PostgreSQL Source Code  git master
_intbig_gist.c
Go to the documentation of this file.
1 /*
2  * contrib/intarray/_intbig_gist.c
3  */
4 #include "postgres.h"
5 
6 #include "_int.h"
7 #include "access/gist.h"
8 #include "access/reloptions.h"
9 #include "access/stratnum.h"
10 #include "port/pg_bitutils.h"
11 
12 #define GETENTRY(vec,pos) ((GISTTYPE *) DatumGetPointer((vec)->vector[(pos)].key))
13 /*
14 ** _intbig methods
15 */
24 
27 
28 Datum
30 {
31  ereport(ERROR,
32  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
33  errmsg("_intbig_in() not implemented")));
34  PG_RETURN_DATUM(0);
35 }
36 
37 Datum
39 {
40  ereport(ERROR,
41  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
42  errmsg("_intbig_out() not implemented")));
43  PG_RETURN_DATUM(0);
44 }
45 
46 static GISTTYPE *
47 _intbig_alloc(bool allistrue, int siglen, BITVECP sign)
48 {
49  int flag = allistrue ? ALLISTRUE : 0;
50  int size = CALCGTSIZE(flag, siglen);
51  GISTTYPE *res = (GISTTYPE *) palloc(size);
52 
53  SET_VARSIZE(res, size);
54  res->flag = flag;
55 
56  if (!allistrue)
57  {
58  if (sign)
59  memcpy(GETSIGN(res), sign, siglen);
60  else
61  memset(GETSIGN(res), 0, siglen);
62  }
63 
64  return res;
65 }
66 
67 
68 /*********************************************************************
69 ** intbig functions
70 *********************************************************************/
71 static bool
72 _intbig_overlap(GISTTYPE *a, ArrayType *b, int siglen)
73 {
74  int num = ARRNELEMS(b);
75  int32 *ptr = ARRPTR(b);
76 
77  CHECKARRVALID(b);
78 
79  while (num--)
80  {
81  if (GETBIT(GETSIGN(a), HASHVAL(*ptr, siglen)))
82  return true;
83  ptr++;
84  }
85 
86  return false;
87 }
88 
89 static bool
90 _intbig_contains(GISTTYPE *a, ArrayType *b, int siglen)
91 {
92  int num = ARRNELEMS(b);
93  int32 *ptr = ARRPTR(b);
94 
95  CHECKARRVALID(b);
96 
97  while (num--)
98  {
99  if (!GETBIT(GETSIGN(a), HASHVAL(*ptr, siglen)))
100  return false;
101  ptr++;
102  }
103 
104  return true;
105 }
106 
107 Datum
109 {
110  GISTTYPE *a = (GISTTYPE *) PG_GETARG_POINTER(0);
111  GISTTYPE *b = (GISTTYPE *) PG_GETARG_POINTER(1);
112  bool *result = (bool *) PG_GETARG_POINTER(2);
113  int siglen = GET_SIGLEN();
114 
115  if (ISALLTRUE(a) && ISALLTRUE(b))
116  *result = true;
117  else if (ISALLTRUE(a))
118  *result = false;
119  else if (ISALLTRUE(b))
120  *result = false;
121  else
122  {
123  int32 i;
124  BITVECP sa = GETSIGN(a),
125  sb = GETSIGN(b);
126 
127  *result = true;
128  LOOPBYTE(siglen)
129  {
130  if (sa[i] != sb[i])
131  {
132  *result = false;
133  break;
134  }
135  }
136  }
137  PG_RETURN_POINTER(result);
138 }
139 
140 Datum
142 {
143  GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
144  int siglen = GET_SIGLEN();
145 
146  if (entry->leafkey)
147  {
148  GISTENTRY *retval;
149  ArrayType *in = DatumGetArrayTypeP(entry->key);
150  int32 *ptr;
151  int num;
152  GISTTYPE *res = _intbig_alloc(false, siglen, NULL);
153 
154  CHECKARRVALID(in);
155  if (ARRISEMPTY(in))
156  {
157  ptr = NULL;
158  num = 0;
159  }
160  else
161  {
162  ptr = ARRPTR(in);
163  num = ARRNELEMS(in);
164  }
165 
166  while (num--)
167  {
168  HASH(GETSIGN(res), *ptr, siglen);
169  ptr++;
170  }
171 
172  retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
173  gistentryinit(*retval, PointerGetDatum(res),
174  entry->rel, entry->page,
175  entry->offset, false);
176 
177  if (in != DatumGetArrayTypeP(entry->key))
178  pfree(in);
179 
180  PG_RETURN_POINTER(retval);
181  }
182  else if (!ISALLTRUE(DatumGetPointer(entry->key)))
183  {
184  GISTENTRY *retval;
185  int i;
187  GISTTYPE *res;
188 
189  LOOPBYTE(siglen)
190  {
191  if ((sign[i] & 0xff) != 0xff)
192  PG_RETURN_POINTER(entry);
193  }
194 
195  res = _intbig_alloc(true, siglen, sign);
196  retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
197  gistentryinit(*retval, PointerGetDatum(res),
198  entry->rel, entry->page,
199  entry->offset, false);
200 
201  PG_RETURN_POINTER(retval);
202  }
203 
204  PG_RETURN_POINTER(entry);
205 }
206 
207 
208 static int32
209 sizebitvec(BITVECP sign, int siglen)
210 {
211  return pg_popcount(sign, siglen);
212 }
213 
214 static int
215 hemdistsign(BITVECP a, BITVECP b, int siglen)
216 {
217  int i,
218  diff,
219  dist = 0;
220 
221  LOOPBYTE(siglen)
222  {
223  diff = (unsigned char) (a[i] ^ b[i]);
224  /* Using the popcount functions here isn't likely to win */
225  dist += pg_number_of_ones[diff];
226  }
227  return dist;
228 }
229 
230 static int
231 hemdist(GISTTYPE *a, GISTTYPE *b, int siglen)
232 {
233  if (ISALLTRUE(a))
234  {
235  if (ISALLTRUE(b))
236  return 0;
237  else
238  return SIGLENBIT(siglen) - sizebitvec(GETSIGN(b), siglen);
239  }
240  else if (ISALLTRUE(b))
241  return SIGLENBIT(siglen) - sizebitvec(GETSIGN(a), siglen);
242 
243  return hemdistsign(GETSIGN(a), GETSIGN(b), siglen);
244 }
245 
246 Datum
248 {
250 }
251 
252 static int32
253 unionkey(BITVECP sbase, GISTTYPE *add, int siglen)
254 {
255  int32 i;
256  BITVECP sadd = GETSIGN(add);
257 
258  if (ISALLTRUE(add))
259  return 1;
260  LOOPBYTE(siglen)
261  sbase[i] |= sadd[i];
262  return 0;
263 }
264 
265 Datum
267 {
269  int *size = (int *) PG_GETARG_POINTER(1);
270  int siglen = GET_SIGLEN();
271  int32 i;
272  GISTTYPE *result = _intbig_alloc(false, siglen, NULL);
273  BITVECP base = GETSIGN(result);
274 
275  for (i = 0; i < entryvec->n; i++)
276  {
277  if (unionkey(base, GETENTRY(entryvec, i), siglen))
278  {
279  result->flag |= ALLISTRUE;
280  SET_VARSIZE(result, CALCGTSIZE(ALLISTRUE, siglen));
281  break;
282  }
283  }
284 
285  *size = VARSIZE(result);
286 
287  PG_RETURN_POINTER(result);
288 }
289 
290 Datum
292 {
293  GISTENTRY *origentry = (GISTENTRY *) PG_GETARG_POINTER(0); /* always ISSIGNKEY */
294  GISTENTRY *newentry = (GISTENTRY *) PG_GETARG_POINTER(1);
295  float *penalty = (float *) PG_GETARG_POINTER(2);
296  GISTTYPE *origval = (GISTTYPE *) DatumGetPointer(origentry->key);
297  GISTTYPE *newval = (GISTTYPE *) DatumGetPointer(newentry->key);
298  int siglen = GET_SIGLEN();
299 
300  *penalty = hemdist(origval, newval, siglen);
301  PG_RETURN_POINTER(penalty);
302 }
303 
304 
305 typedef struct
306 {
307  OffsetNumber pos;
308  int32 cost;
309 } SPLITCOST;
310 
311 static int
312 comparecost(const void *a, const void *b)
313 {
314  return ((const SPLITCOST *) a)->cost - ((const SPLITCOST *) b)->cost;
315 }
316 
317 
318 Datum
320 {
323  int siglen = GET_SIGLEN();
324  OffsetNumber k,
325  j;
326  GISTTYPE *datum_l,
327  *datum_r;
328  BITVECP union_l,
329  union_r;
330  int32 size_alpha,
331  size_beta;
332  int32 size_waste,
333  waste = -1;
334  int32 nbytes;
335  OffsetNumber seed_1 = 0,
336  seed_2 = 0;
338  *right;
339  OffsetNumber maxoff;
340  BITVECP ptr;
341  int i;
342  SPLITCOST *costvector;
343  GISTTYPE *_k,
344  *_j;
345 
346  maxoff = entryvec->n - 2;
347  nbytes = (maxoff + 2) * sizeof(OffsetNumber);
348  v->spl_left = (OffsetNumber *) palloc(nbytes);
349  v->spl_right = (OffsetNumber *) palloc(nbytes);
350 
351  for (k = FirstOffsetNumber; k < maxoff; k = OffsetNumberNext(k))
352  {
353  _k = GETENTRY(entryvec, k);
354  for (j = OffsetNumberNext(k); j <= maxoff; j = OffsetNumberNext(j))
355  {
356  size_waste = hemdist(_k, GETENTRY(entryvec, j), siglen);
357  if (size_waste > waste)
358  {
359  waste = size_waste;
360  seed_1 = k;
361  seed_2 = j;
362  }
363  }
364  }
365 
366  left = v->spl_left;
367  v->spl_nleft = 0;
368  right = v->spl_right;
369  v->spl_nright = 0;
370 
371  if (seed_1 == 0 || seed_2 == 0)
372  {
373  seed_1 = 1;
374  seed_2 = 2;
375  }
376 
377  /* form initial .. */
378  datum_l = _intbig_alloc(ISALLTRUE(GETENTRY(entryvec, seed_1)), siglen,
379  GETSIGN(GETENTRY(entryvec, seed_1)));
380  datum_r = _intbig_alloc(ISALLTRUE(GETENTRY(entryvec, seed_2)), siglen,
381  GETSIGN(GETENTRY(entryvec, seed_2)));
382 
383  maxoff = OffsetNumberNext(maxoff);
384  /* sort before ... */
385  costvector = (SPLITCOST *) palloc(sizeof(SPLITCOST) * maxoff);
386  for (j = FirstOffsetNumber; j <= maxoff; j = OffsetNumberNext(j))
387  {
388  costvector[j - 1].pos = j;
389  _j = GETENTRY(entryvec, j);
390  size_alpha = hemdist(datum_l, _j, siglen);
391  size_beta = hemdist(datum_r, _j, siglen);
392  costvector[j - 1].cost = Abs(size_alpha - size_beta);
393  }
394  qsort((void *) costvector, maxoff, sizeof(SPLITCOST), comparecost);
395 
396  union_l = GETSIGN(datum_l);
397  union_r = GETSIGN(datum_r);
398 
399  for (k = 0; k < maxoff; k++)
400  {
401  j = costvector[k].pos;
402  if (j == seed_1)
403  {
404  *left++ = j;
405  v->spl_nleft++;
406  continue;
407  }
408  else if (j == seed_2)
409  {
410  *right++ = j;
411  v->spl_nright++;
412  continue;
413  }
414  _j = GETENTRY(entryvec, j);
415  size_alpha = hemdist(datum_l, _j, siglen);
416  size_beta = hemdist(datum_r, _j, siglen);
417 
418  if (size_alpha < size_beta + WISH_F(v->spl_nleft, v->spl_nright, 0.00001))
419  {
420  if (ISALLTRUE(datum_l) || ISALLTRUE(_j))
421  {
422  if (!ISALLTRUE(datum_l))
423  MemSet((void *) union_l, 0xff, siglen);
424  }
425  else
426  {
427  ptr = GETSIGN(_j);
428  LOOPBYTE(siglen)
429  union_l[i] |= ptr[i];
430  }
431  *left++ = j;
432  v->spl_nleft++;
433  }
434  else
435  {
436  if (ISALLTRUE(datum_r) || ISALLTRUE(_j))
437  {
438  if (!ISALLTRUE(datum_r))
439  MemSet((void *) union_r, 0xff, siglen);
440  }
441  else
442  {
443  ptr = GETSIGN(_j);
444  LOOPBYTE(siglen)
445  union_r[i] |= ptr[i];
446  }
447  *right++ = j;
448  v->spl_nright++;
449  }
450  }
451 
452  *right = *left = FirstOffsetNumber;
453  pfree(costvector);
454 
455  v->spl_ldatum = PointerGetDatum(datum_l);
456  v->spl_rdatum = PointerGetDatum(datum_r);
457 
459 }
460 
461 Datum
463 {
464  GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
465  ArrayType *query = PG_GETARG_ARRAYTYPE_P(1);
467 
468  /* Oid subtype = PG_GETARG_OID(3); */
469  bool *recheck = (bool *) PG_GETARG_POINTER(4);
470  int siglen = GET_SIGLEN();
471  bool retval;
472 
473  /* All cases served by this function are inexact */
474  *recheck = true;
475 
476  if (ISALLTRUE(DatumGetPointer(entry->key)))
477  PG_RETURN_BOOL(true);
478 
479  if (strategy == BooleanSearchStrategy)
480  {
481  retval = signconsistent((QUERYTYPE *) query,
482  GETSIGN(DatumGetPointer(entry->key)),
483  siglen,
484  false);
485  PG_FREE_IF_COPY(query, 1);
486  PG_RETURN_BOOL(retval);
487  }
488 
489  CHECKARRVALID(query);
490 
491  switch (strategy)
492  {
494  retval = _intbig_overlap((GISTTYPE *) DatumGetPointer(entry->key),
495  query, siglen);
496  break;
498  if (GIST_LEAF(entry))
499  {
500  int i,
501  num = ARRNELEMS(query);
502  int32 *ptr = ARRPTR(query);
503  BITVECP dq = palloc0(siglen),
504  de;
505 
506  while (num--)
507  {
508  HASH(dq, *ptr, siglen);
509  ptr++;
510  }
511 
512  de = GETSIGN((GISTTYPE *) DatumGetPointer(entry->key));
513  retval = true;
514  LOOPBYTE(siglen)
515  {
516  if (de[i] != dq[i])
517  {
518  retval = false;
519  break;
520  }
521  }
522 
523  pfree(dq);
524  }
525  else
526  retval = _intbig_contains((GISTTYPE *) DatumGetPointer(entry->key),
527  query, siglen);
528  break;
531  retval = _intbig_contains((GISTTYPE *) DatumGetPointer(entry->key),
532  query, siglen);
533  break;
536 
537  /*
538  * This code is unreachable as of intarray 1.4, because the <@
539  * operator has been removed from the opclass. We keep it for now
540  * to support older versions of the SQL definitions.
541  */
542  if (GIST_LEAF(entry))
543  {
544  int i,
545  num = ARRNELEMS(query);
546  int32 *ptr = ARRPTR(query);
547  BITVECP dq = palloc0(siglen),
548  de;
549 
550  while (num--)
551  {
552  HASH(dq, *ptr, siglen);
553  ptr++;
554  }
555 
556  de = GETSIGN((GISTTYPE *) DatumGetPointer(entry->key));
557  retval = true;
558  LOOPBYTE(siglen)
559  {
560  if (de[i] & ~dq[i])
561  {
562  retval = false;
563  break;
564  }
565  }
566  }
567  else
568  {
569  /*
570  * Unfortunately, because empty arrays could be anywhere in
571  * the index, we must search the whole tree.
572  */
573  retval = true;
574  }
575  break;
576  default:
577  retval = false;
578  }
579  PG_FREE_IF_COPY(query, 1);
580  PG_RETURN_BOOL(retval);
581 }
582 
583 Datum
585 {
587 
589  add_local_int_reloption(relopts, "siglen",
590  "signature length in bytes",
593 
594  PG_RETURN_VOID();
595 }
#define GIST_LEAF(entry)
Definition: gist.h:162
Relation rel
Definition: gist.h:153
#define PG_RETURN_POINTER(x)
Definition: fmgr.h:360
OffsetNumber pos
Definition: hstore_gist.c:346
Datum g_intbig_same(PG_FUNCTION_ARGS)
Definition: _intbig_gist.c:108
void init_local_reloptions(local_relopts *opts, Size relopt_struct_size)
Definition: reloptions.c:710
#define LOOPBYTE(siglen)
Definition: hstore_gist.c:32
#define RTOldContainsStrategyNumber
Definition: stratnum.h:63
Datum g_intbig_penalty(PG_FUNCTION_ARGS)
Definition: _intbig_gist.c:291
Datum g_intbig_compress(PG_FUNCTION_ARGS)
Definition: _intbig_gist.c:141
#define RTOldContainedByStrategyNumber
Definition: stratnum.h:64
struct NODE * left
#define VARSIZE(PTR)
Definition: postgres.h:303
#define GETSIGN(x)
Definition: hstore_gist.c:61
#define PointerGetDatum(X)
Definition: postgres.h:556
#define PG_GETARG_DATUM(n)
Definition: fmgr.h:268
#define CHECKARRVALID(x)
Definition: _int.h:30
Datum g_intbig_decompress(PG_FUNCTION_ARGS)
Definition: _intbig_gist.c:247
#define SIGLEN_MAX
Definition: hstore_gist.c:23
OffsetNumber * spl_left
Definition: gist.h:134
Datum spl_rdatum
Definition: gist.h:141
int32 n
Definition: gist.h:227
uint16 StrategyNumber
Definition: stratnum.h:22
#define ALLISTRUE
Definition: hstore_gist.c:54
int errcode(int sqlerrcode)
Definition: elog.c:610
struct NODE * right
#define MemSet(start, val, len)
Definition: c.h:949
#define RTContainedByStrategyNumber
Definition: stratnum.h:58
#define PG_GETARG_POINTER(n)
Definition: fmgr.h:276
int32 flag
Definition: hstore_gist.c:50
int spl_nleft
Definition: gist.h:135
void add_local_int_reloption(local_relopts *relopts, const char *name, const char *desc, int default_val, int min_val, int max_val, int offset)
Definition: reloptions.c:894
signed int int32
Definition: c.h:362
uint64 pg_popcount(const char *buf, int bytes)
Definition: pg_bitutils.c:282
int32 cost
Definition: hstore_gist.c:347
Datum g_intbig_options(PG_FUNCTION_ARGS)
Definition: _intbig_gist.c:584
uint16 OffsetNumber
Definition: off.h:24
Page page
Definition: gist.h:154
#define Abs(x)
Definition: c.h:933
#define CALCGTSIZE(flag, siglen)
Definition: hstore_gist.c:59
#define HASH(sign, val, siglen)
Definition: hstore_gist.c:45
#define PG_GETARG_ARRAYTYPE_P(n)
Definition: array.h:251
void pfree(void *pointer)
Definition: mcxt.c:1057
#define GETBIT(x, i)
Definition: blutils.c:33
#define WISH_F(a, b, c)
Definition: hstore_gist.c:76
#define ERROR
Definition: elog.h:43
#define GET_SIGLEN()
Definition: hstore_gist.c:25
Datum _intbig_out(PG_FUNCTION_ARGS)
Definition: _intbig_gist.c:38
int spl_nright
Definition: gist.h:140
Datum g_intbig_consistent(PG_FUNCTION_ARGS)
Definition: _intbig_gist.c:462
char sign
Definition: informix.c:668
Datum key
Definition: gist.h:152
#define FirstOffsetNumber
Definition: off.h:27
char * flag(int b)
Definition: test-ctype.c:33
#define ARRISEMPTY(x)
Definition: _int.h:38
#define RTSameStrategyNumber
Definition: stratnum.h:56
static bool _intbig_overlap(GISTTYPE *a, ArrayType *b, int siglen)
Definition: _intbig_gist.c:72
#define SIGLEN_DEFAULT
Definition: hstore_gist.c:22
static bool _intbig_contains(GISTTYPE *a, ArrayType *b, int siglen)
Definition: _intbig_gist.c:90
static int32 unionkey(BITVECP sbase, GISTTYPE *add, int siglen)
Definition: _intbig_gist.c:253
bool leafkey
Definition: gist.h:156
static int hemdistsign(BITVECP a, BITVECP b, int siglen)
Definition: _intbig_gist.c:215
static GISTTYPE * _intbig_alloc(bool allistrue, int siglen, BITVECP sign)
Definition: _intbig_gist.c:47
void * palloc0(Size size)
Definition: mcxt.c:981
#define PG_RETURN_BOOL(x)
Definition: fmgr.h:358
uintptr_t Datum
Definition: postgres.h:367
#define PG_RETURN_DATUM(x)
Definition: fmgr.h:352
Datum g_intbig_picksplit(PG_FUNCTION_ARGS)
Definition: _intbig_gist.c:319
#define ISALLTRUE(x)
Definition: hstore_gist.c:56
#define ereport(elevel,...)
Definition: elog.h:144
#define HASHVAL(val, siglen)
Definition: hstore_gist.c:44
static int hemdist(GISTTYPE *a, GISTTYPE *b, int siglen)
Definition: _intbig_gist.c:231
#define PG_RETURN_VOID()
Definition: fmgr.h:348
Datum spl_ldatum
Definition: gist.h:136
#define SIGLENBIT(siglen)
Definition: hstore_gist.c:24
bool signconsistent(QUERYTYPE *query, BITVECP sign, int siglen, bool calcnot)
Definition: _int_bool.c:300
#define gistentryinit(e, k, r, pg, o, l)
Definition: gist.h:236
#define BooleanSearchStrategy
Definition: _int.h:134
#define OffsetNumberNext(offsetNumber)
Definition: off.h:52
#define newval
OffsetNumber * spl_right
Definition: gist.h:139
#define PG_FREE_IF_COPY(ptr, n)
Definition: fmgr.h:260
#define RTContainsStrategyNumber
Definition: stratnum.h:57
#define ARRNELEMS(x)
Definition: cube.c:25
PG_FUNCTION_INFO_V1(g_intbig_consistent)
#define PG_GETARG_UINT16(n)
Definition: fmgr.h:272
#define DatumGetPointer(X)
Definition: postgres.h:549
PGDLLIMPORT const uint8 pg_number_of_ones[256]
Definition: pg_bitutils.c:87
void * palloc(Size size)
Definition: mcxt.c:950
int errmsg(const char *fmt,...)
Definition: elog.c:824
Datum g_intbig_union(PG_FUNCTION_ARGS)
Definition: _intbig_gist.c:266
int i
static int32 sizebitvec(BITVECP sign, int siglen)
Definition: _intbig_gist.c:209
#define RTOverlapStrategyNumber
Definition: stratnum.h:53
#define PG_FUNCTION_ARGS
Definition: fmgr.h:193
#define SET_VARSIZE(PTR, len)
Definition: postgres.h:329
char * BITVECP
Definition: hstore_gist.c:30
#define ARRPTR(x)
Definition: cube.c:24
#define qsort(a, b, c, d)
Definition: port.h:475
OffsetNumber offset
Definition: gist.h:155
Datum _intbig_in(PG_FUNCTION_ARGS)
Definition: _intbig_gist.c:29
static int comparecost(const void *a, const void *b)
Definition: _intbig_gist.c:312
#define offsetof(type, field)
Definition: c.h:668
#define GETENTRY(vec, pos)
Definition: _intbig_gist.c:12
#define DatumGetArrayTypeP(X)
Definition: array.h:249