PostgreSQL Source Code  git master
_intbig_gist.c
Go to the documentation of this file.
1 /*
2  * contrib/intarray/_intbig_gist.c
3  */
4 #include "postgres.h"
5 
6 #include <math.h>
7 
8 #include "_int.h"
9 #include "access/gist.h"
10 #include "access/reloptions.h"
11 #include "access/stratnum.h"
12 #include "port/pg_bitutils.h"
13 
14 #define GETENTRY(vec,pos) ((GISTTYPE *) DatumGetPointer((vec)->vector[(pos)].key))
15 /*
16 ** _intbig methods
17 */
26 
29 
30 Datum
32 {
33  ereport(ERROR,
34  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
35  errmsg("cannot accept a value of type %s", "intbig_gkey")));
36 
37  PG_RETURN_VOID(); /* keep compiler quiet */
38 }
39 
40 Datum
42 {
43  ereport(ERROR,
44  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
45  errmsg("cannot display a value of type %s", "intbig_gkey")));
46 
47  PG_RETURN_VOID(); /* keep compiler quiet */
48 }
49 
50 static GISTTYPE *
51 _intbig_alloc(bool allistrue, int siglen, BITVECP sign)
52 {
53  int flag = allistrue ? ALLISTRUE : 0;
54  int size = CALCGTSIZE(flag, siglen);
55  GISTTYPE *res = (GISTTYPE *) palloc(size);
56 
57  SET_VARSIZE(res, size);
58  res->flag = flag;
59 
60  if (!allistrue)
61  {
62  if (sign)
63  memcpy(GETSIGN(res), sign, siglen);
64  else
65  memset(GETSIGN(res), 0, siglen);
66  }
67 
68  return res;
69 }
70 
71 
72 /*********************************************************************
73 ** intbig functions
74 *********************************************************************/
75 static bool
77 {
78  int num = ARRNELEMS(b);
79  int32 *ptr = ARRPTR(b);
80 
82 
83  while (num--)
84  {
85  if (GETBIT(GETSIGN(a), HASHVAL(*ptr, siglen)))
86  return true;
87  ptr++;
88  }
89 
90  return false;
91 }
92 
93 static bool
95 {
96  int num = ARRNELEMS(b);
97  int32 *ptr = ARRPTR(b);
98 
100 
101  while (num--)
102  {
103  if (!GETBIT(GETSIGN(a), HASHVAL(*ptr, siglen)))
104  return false;
105  ptr++;
106  }
107 
108  return true;
109 }
110 
111 Datum
113 {
116  bool *result = (bool *) PG_GETARG_POINTER(2);
117  int siglen = GET_SIGLEN();
118 
119  if (ISALLTRUE(a) && ISALLTRUE(b))
120  *result = true;
121  else if (ISALLTRUE(a))
122  *result = false;
123  else if (ISALLTRUE(b))
124  *result = false;
125  else
126  {
127  int32 i;
128  BITVECP sa = GETSIGN(a),
129  sb = GETSIGN(b);
130 
131  *result = true;
132  LOOPBYTE(siglen)
133  {
134  if (sa[i] != sb[i])
135  {
136  *result = false;
137  break;
138  }
139  }
140  }
141  PG_RETURN_POINTER(result);
142 }
143 
144 Datum
146 {
147  GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
148  int siglen = GET_SIGLEN();
149 
150  if (entry->leafkey)
151  {
152  GISTENTRY *retval;
153  ArrayType *in = DatumGetArrayTypeP(entry->key);
154  int32 *ptr;
155  int num;
156  GISTTYPE *res = _intbig_alloc(false, siglen, NULL);
157 
158  CHECKARRVALID(in);
159  if (ARRISEMPTY(in))
160  {
161  ptr = NULL;
162  num = 0;
163  }
164  else
165  {
166  ptr = ARRPTR(in);
167  num = ARRNELEMS(in);
168  }
169 
170  while (num--)
171  {
172  HASH(GETSIGN(res), *ptr, siglen);
173  ptr++;
174  }
175 
176  retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
178  entry->rel, entry->page,
179  entry->offset, false);
180 
181  PG_RETURN_POINTER(retval);
182  }
183  else if (!ISALLTRUE(DatumGetPointer(entry->key)))
184  {
185  GISTENTRY *retval;
186  int i;
188  GISTTYPE *res;
189 
190  LOOPBYTE(siglen)
191  {
192  if ((sign[i] & 0xff) != 0xff)
193  PG_RETURN_POINTER(entry);
194  }
195 
196  res = _intbig_alloc(true, siglen, sign);
197  retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
199  entry->rel, entry->page,
200  entry->offset, false);
201 
202  PG_RETURN_POINTER(retval);
203  }
204 
205  PG_RETURN_POINTER(entry);
206 }
207 
208 
209 static int32
210 sizebitvec(BITVECP sign, int siglen)
211 {
212  return pg_popcount(sign, siglen);
213 }
214 
215 static int
217 {
218  int i,
219  diff,
220  dist = 0;
221 
222  LOOPBYTE(siglen)
223  {
224  diff = (unsigned char) (a[i] ^ b[i]);
225  /* Using the popcount functions here isn't likely to win */
226  dist += pg_number_of_ones[diff];
227  }
228  return dist;
229 }
230 
231 static int
232 hemdist(GISTTYPE *a, GISTTYPE *b, int siglen)
233 {
234  if (ISALLTRUE(a))
235  {
236  if (ISALLTRUE(b))
237  return 0;
238  else
239  return SIGLENBIT(siglen) - sizebitvec(GETSIGN(b), siglen);
240  }
241  else if (ISALLTRUE(b))
242  return SIGLENBIT(siglen) - sizebitvec(GETSIGN(a), siglen);
243 
244  return hemdistsign(GETSIGN(a), GETSIGN(b), siglen);
245 }
246 
247 Datum
249 {
251 }
252 
253 static int32
254 unionkey(BITVECP sbase, GISTTYPE *add, int siglen)
255 {
256  int32 i;
257  BITVECP sadd = GETSIGN(add);
258 
259  if (ISALLTRUE(add))
260  return 1;
261  LOOPBYTE(siglen)
262  sbase[i] |= sadd[i];
263  return 0;
264 }
265 
266 Datum
268 {
270  int *size = (int *) PG_GETARG_POINTER(1);
271  int siglen = GET_SIGLEN();
272  int32 i;
273  GISTTYPE *result = _intbig_alloc(false, siglen, NULL);
274  BITVECP base = GETSIGN(result);
275 
276  for (i = 0; i < entryvec->n; i++)
277  {
278  if (unionkey(base, GETENTRY(entryvec, i), siglen))
279  {
280  result->flag |= ALLISTRUE;
281  SET_VARSIZE(result, CALCGTSIZE(ALLISTRUE, siglen));
282  break;
283  }
284  }
285 
286  *size = VARSIZE(result);
287 
288  PG_RETURN_POINTER(result);
289 }
290 
291 Datum
293 {
294  GISTENTRY *origentry = (GISTENTRY *) PG_GETARG_POINTER(0); /* always ISSIGNKEY */
295  GISTENTRY *newentry = (GISTENTRY *) PG_GETARG_POINTER(1);
296  float *penalty = (float *) PG_GETARG_POINTER(2);
297  GISTTYPE *origval = (GISTTYPE *) DatumGetPointer(origentry->key);
298  GISTTYPE *newval = (GISTTYPE *) DatumGetPointer(newentry->key);
299  int siglen = GET_SIGLEN();
300 
301  *penalty = hemdist(origval, newval, siglen);
302  PG_RETURN_POINTER(penalty);
303 }
304 
305 
306 typedef struct
307 {
308  OffsetNumber pos;
309  int32 cost;
310 } SPLITCOST;
311 
312 static int
313 comparecost(const void *a, const void *b)
314 {
315  return ((const SPLITCOST *) a)->cost - ((const SPLITCOST *) b)->cost;
316 }
317 
318 
319 Datum
321 {
324  int siglen = GET_SIGLEN();
325  OffsetNumber k,
326  j;
327  GISTTYPE *datum_l,
328  *datum_r;
329  BITVECP union_l,
330  union_r;
331  int32 size_alpha,
332  size_beta;
333  int32 size_waste,
334  waste = -1;
335  int32 nbytes;
336  OffsetNumber seed_1 = 0,
337  seed_2 = 0;
338  OffsetNumber *left,
339  *right;
340  OffsetNumber maxoff;
341  BITVECP ptr;
342  int i;
343  SPLITCOST *costvector;
344  GISTTYPE *_k,
345  *_j;
346 
347  maxoff = entryvec->n - 2;
348  nbytes = (maxoff + 2) * sizeof(OffsetNumber);
349  v->spl_left = (OffsetNumber *) palloc(nbytes);
350  v->spl_right = (OffsetNumber *) palloc(nbytes);
351 
352  for (k = FirstOffsetNumber; k < maxoff; k = OffsetNumberNext(k))
353  {
354  _k = GETENTRY(entryvec, k);
355  for (j = OffsetNumberNext(k); j <= maxoff; j = OffsetNumberNext(j))
356  {
357  size_waste = hemdist(_k, GETENTRY(entryvec, j), siglen);
358  if (size_waste > waste)
359  {
360  waste = size_waste;
361  seed_1 = k;
362  seed_2 = j;
363  }
364  }
365  }
366 
367  left = v->spl_left;
368  v->spl_nleft = 0;
369  right = v->spl_right;
370  v->spl_nright = 0;
371 
372  if (seed_1 == 0 || seed_2 == 0)
373  {
374  seed_1 = 1;
375  seed_2 = 2;
376  }
377 
378  /* form initial .. */
379  datum_l = _intbig_alloc(ISALLTRUE(GETENTRY(entryvec, seed_1)), siglen,
380  GETSIGN(GETENTRY(entryvec, seed_1)));
381  datum_r = _intbig_alloc(ISALLTRUE(GETENTRY(entryvec, seed_2)), siglen,
382  GETSIGN(GETENTRY(entryvec, seed_2)));
383 
384  maxoff = OffsetNumberNext(maxoff);
385  /* sort before ... */
386  costvector = (SPLITCOST *) palloc(sizeof(SPLITCOST) * maxoff);
387  for (j = FirstOffsetNumber; j <= maxoff; j = OffsetNumberNext(j))
388  {
389  costvector[j - 1].pos = j;
390  _j = GETENTRY(entryvec, j);
391  size_alpha = hemdist(datum_l, _j, siglen);
392  size_beta = hemdist(datum_r, _j, siglen);
393  costvector[j - 1].cost = abs(size_alpha - size_beta);
394  }
395  qsort(costvector, maxoff, sizeof(SPLITCOST), comparecost);
396 
397  union_l = GETSIGN(datum_l);
398  union_r = GETSIGN(datum_r);
399 
400  for (k = 0; k < maxoff; k++)
401  {
402  j = costvector[k].pos;
403  if (j == seed_1)
404  {
405  *left++ = j;
406  v->spl_nleft++;
407  continue;
408  }
409  else if (j == seed_2)
410  {
411  *right++ = j;
412  v->spl_nright++;
413  continue;
414  }
415  _j = GETENTRY(entryvec, j);
416  size_alpha = hemdist(datum_l, _j, siglen);
417  size_beta = hemdist(datum_r, _j, siglen);
418 
419  if (size_alpha < size_beta + WISH_F(v->spl_nleft, v->spl_nright, 0.00001))
420  {
421  if (ISALLTRUE(datum_l) || ISALLTRUE(_j))
422  {
423  if (!ISALLTRUE(datum_l))
424  memset(union_l, 0xff, siglen);
425  }
426  else
427  {
428  ptr = GETSIGN(_j);
429  LOOPBYTE(siglen)
430  union_l[i] |= ptr[i];
431  }
432  *left++ = j;
433  v->spl_nleft++;
434  }
435  else
436  {
437  if (ISALLTRUE(datum_r) || ISALLTRUE(_j))
438  {
439  if (!ISALLTRUE(datum_r))
440  memset(union_r, 0xff, siglen);
441  }
442  else
443  {
444  ptr = GETSIGN(_j);
445  LOOPBYTE(siglen)
446  union_r[i] |= ptr[i];
447  }
448  *right++ = j;
449  v->spl_nright++;
450  }
451  }
452 
453  *right = *left = FirstOffsetNumber;
454  pfree(costvector);
455 
456  v->spl_ldatum = PointerGetDatum(datum_l);
457  v->spl_rdatum = PointerGetDatum(datum_r);
458 
460 }
461 
462 Datum
464 {
465  GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
466  ArrayType *query = PG_GETARG_ARRAYTYPE_P(1);
468 
469  /* Oid subtype = PG_GETARG_OID(3); */
470  bool *recheck = (bool *) PG_GETARG_POINTER(4);
471  int siglen = GET_SIGLEN();
472  bool retval;
473 
474  /* All cases served by this function are inexact */
475  *recheck = true;
476 
477  if (ISALLTRUE(DatumGetPointer(entry->key)))
478  PG_RETURN_BOOL(true);
479 
480  if (strategy == BooleanSearchStrategy)
481  {
482  retval = signconsistent((QUERYTYPE *) query,
483  GETSIGN(DatumGetPointer(entry->key)),
484  siglen,
485  false);
486  PG_FREE_IF_COPY(query, 1);
487  PG_RETURN_BOOL(retval);
488  }
489 
490  CHECKARRVALID(query);
491 
492  switch (strategy)
493  {
495  retval = _intbig_overlap((GISTTYPE *) DatumGetPointer(entry->key),
496  query, siglen);
497  break;
499  if (GIST_LEAF(entry))
500  {
501  int i,
502  num = ARRNELEMS(query);
503  int32 *ptr = ARRPTR(query);
504  BITVECP dq = palloc0(siglen),
505  de;
506 
507  while (num--)
508  {
509  HASH(dq, *ptr, siglen);
510  ptr++;
511  }
512 
513  de = GETSIGN((GISTTYPE *) DatumGetPointer(entry->key));
514  retval = true;
515  LOOPBYTE(siglen)
516  {
517  if (de[i] != dq[i])
518  {
519  retval = false;
520  break;
521  }
522  }
523 
524  pfree(dq);
525  }
526  else
527  retval = _intbig_contains((GISTTYPE *) DatumGetPointer(entry->key),
528  query, siglen);
529  break;
532  retval = _intbig_contains((GISTTYPE *) DatumGetPointer(entry->key),
533  query, siglen);
534  break;
537 
538  /*
539  * This code is unreachable as of intarray 1.4, because the <@
540  * operator has been removed from the opclass. We keep it for now
541  * to support older versions of the SQL definitions.
542  */
543  if (GIST_LEAF(entry))
544  {
545  int i,
546  num = ARRNELEMS(query);
547  int32 *ptr = ARRPTR(query);
548  BITVECP dq = palloc0(siglen),
549  de;
550 
551  while (num--)
552  {
553  HASH(dq, *ptr, siglen);
554  ptr++;
555  }
556 
557  de = GETSIGN((GISTTYPE *) DatumGetPointer(entry->key));
558  retval = true;
559  LOOPBYTE(siglen)
560  {
561  if (de[i] & ~dq[i])
562  {
563  retval = false;
564  break;
565  }
566  }
567  }
568  else
569  {
570  /*
571  * Unfortunately, because empty arrays could be anywhere in
572  * the index, we must search the whole tree.
573  */
574  retval = true;
575  }
576  break;
577  default:
578  retval = false;
579  }
580  PG_FREE_IF_COPY(query, 1);
581  PG_RETURN_BOOL(retval);
582 }
583 
584 Datum
586 {
588 
590  add_local_int_reloption(relopts, "siglen",
591  "signature length in bytes",
593  offsetof(GISTIntArrayBigOptions, siglen));
594 
595  PG_RETURN_VOID();
596 }
bool signconsistent(QUERYTYPE *query, BITVECP sign, int siglen, bool calcnot)
Definition: _int_bool.c:299
#define CHECKARRVALID(x)
Definition: _int.h:30
#define BooleanSearchStrategy
Definition: _int.h:134
#define ARRISEMPTY(x)
Definition: _int.h:38
Datum g_intbig_consistent(PG_FUNCTION_ARGS)
Definition: _intbig_gist.c:463
static int hemdist(GISTTYPE *a, GISTTYPE *b, int siglen)
Definition: _intbig_gist.c:232
static int32 sizebitvec(BITVECP sign, int siglen)
Definition: _intbig_gist.c:210
static int hemdistsign(BITVECP a, BITVECP b, int siglen)
Definition: _intbig_gist.c:216
Datum _intbig_in(PG_FUNCTION_ARGS)
Definition: _intbig_gist.c:31
static GISTTYPE * _intbig_alloc(bool allistrue, int siglen, BITVECP sign)
Definition: _intbig_gist.c:51
#define GETENTRY(vec, pos)
Definition: _intbig_gist.c:14
Datum g_intbig_compress(PG_FUNCTION_ARGS)
Definition: _intbig_gist.c:145
Datum g_intbig_decompress(PG_FUNCTION_ARGS)
Definition: _intbig_gist.c:248
static int32 unionkey(BITVECP sbase, GISTTYPE *add, int siglen)
Definition: _intbig_gist.c:254
PG_FUNCTION_INFO_V1(g_intbig_consistent)
Datum _intbig_out(PG_FUNCTION_ARGS)
Definition: _intbig_gist.c:41
Datum g_intbig_same(PG_FUNCTION_ARGS)
Definition: _intbig_gist.c:112
static bool _intbig_overlap(GISTTYPE *a, ArrayType *b, int siglen)
Definition: _intbig_gist.c:76
Datum g_intbig_union(PG_FUNCTION_ARGS)
Definition: _intbig_gist.c:267
Datum g_intbig_picksplit(PG_FUNCTION_ARGS)
Definition: _intbig_gist.c:320
Datum g_intbig_penalty(PG_FUNCTION_ARGS)
Definition: _intbig_gist.c:292
Datum g_intbig_options(PG_FUNCTION_ARGS)
Definition: _intbig_gist.c:585
static bool _intbig_contains(GISTTYPE *a, ArrayType *b, int siglen)
Definition: _intbig_gist.c:94
static int comparecost(const void *a, const void *b)
Definition: _intbig_gist.c:313
#define PG_GETARG_ARRAYTYPE_P(n)
Definition: array.h:256
#define DatumGetArrayTypeP(X)
Definition: array.h:254
#define GETBIT(x, i)
Definition: blutils.c:33
signed int int32
Definition: c.h:483
#define ARRNELEMS(x)
Definition: cube.c:26
#define ARRPTR(x)
Definition: cube.c:25
int errcode(int sqlerrcode)
Definition: elog.c:858
int errmsg(const char *fmt,...)
Definition: elog.c:1069
#define ERROR
Definition: elog.h:39
#define ereport(elevel,...)
Definition: elog.h:149
#define PG_RETURN_VOID()
Definition: fmgr.h:349
#define PG_FREE_IF_COPY(ptr, n)
Definition: fmgr.h:260
#define PG_GETARG_POINTER(n)
Definition: fmgr.h:276
#define PG_GETARG_DATUM(n)
Definition: fmgr.h:268
#define PG_GETARG_UINT16(n)
Definition: fmgr.h:272
#define PG_RETURN_DATUM(x)
Definition: fmgr.h:353
#define PG_RETURN_POINTER(x)
Definition: fmgr.h:361
#define PG_FUNCTION_ARGS
Definition: fmgr.h:193
#define PG_RETURN_BOOL(x)
Definition: fmgr.h:359
#define GIST_LEAF(entry)
Definition: gist.h:168
#define gistentryinit(e, k, r, pg, o, l)
Definition: gist.h:242
#define newval
#define HASHVAL(val, siglen)
Definition: hstore_gist.c:44
#define LOOPBYTE(siglen)
Definition: hstore_gist.c:32
#define ALLISTRUE
Definition: hstore_gist.c:54
#define WISH_F(a, b, c)
Definition: hstore_gist.c:76
#define CALCGTSIZE(flag, siglen)
Definition: hstore_gist.c:59
#define ISALLTRUE(x)
Definition: hstore_gist.c:56
#define SIGLEN_MAX
Definition: hstore_gist.c:23
#define SIGLEN_DEFAULT
Definition: hstore_gist.c:22
char * BITVECP
Definition: hstore_gist.c:30
#define GET_SIGLEN()
Definition: hstore_gist.c:25
#define SIGLENBIT(siglen)
Definition: hstore_gist.c:24
#define GETSIGN(x)
Definition: hstore_gist.c:61
#define HASH(sign, val, siglen)
Definition: hstore_gist.c:45
char sign
Definition: informix.c:668
int b
Definition: isn.c:70
int a
Definition: isn.c:69
int j
Definition: isn.c:74
int i
Definition: isn.c:73
void pfree(void *pointer)
Definition: mcxt.c:1456
void * palloc0(Size size)
Definition: mcxt.c:1257
void * palloc(Size size)
Definition: mcxt.c:1226
#define OffsetNumberNext(offsetNumber)
Definition: off.h:52
uint16 OffsetNumber
Definition: off.h:24
#define FirstOffsetNumber
Definition: off.h:27
PGDLLIMPORT const uint8 pg_number_of_ones[256]
Definition: pg_bitutils.c:87
uint64 pg_popcount(const char *buf, int bytes)
Definition: pg_bitutils.c:296
#define qsort(a, b, c, d)
Definition: port.h:445
static Datum PointerGetDatum(const void *X)
Definition: postgres.h:322
uintptr_t Datum
Definition: postgres.h:64
static Pointer DatumGetPointer(Datum X)
Definition: postgres.h:312
void init_local_reloptions(local_relopts *relopts, Size relopt_struct_size)
Definition: reloptions.c:736
void add_local_int_reloption(local_relopts *relopts, const char *name, const char *desc, int default_val, int min_val, int max_val, int offset)
Definition: reloptions.c:920
#define RTOldContainsStrategyNumber
Definition: stratnum.h:63
uint16 StrategyNumber
Definition: stratnum.h:22
#define RTOverlapStrategyNumber
Definition: stratnum.h:53
#define RTSameStrategyNumber
Definition: stratnum.h:56
#define RTContainsStrategyNumber
Definition: stratnum.h:57
#define RTOldContainedByStrategyNumber
Definition: stratnum.h:64
#define RTContainedByStrategyNumber
Definition: stratnum.h:58
OffsetNumber offset
Definition: gist.h:161
Datum key
Definition: gist.h:158
Page page
Definition: gist.h:160
Relation rel
Definition: gist.h:159
bool leafkey
Definition: gist.h:162
int32 flag
Definition: hstore_gist.c:50
int spl_nleft
Definition: gist.h:141
OffsetNumber * spl_right
Definition: gist.h:145
Datum spl_ldatum
Definition: gist.h:142
Datum spl_rdatum
Definition: gist.h:147
int spl_nright
Definition: gist.h:146
OffsetNumber * spl_left
Definition: gist.h:140
int32 n
Definition: gist.h:233
int32 cost
Definition: hstore_gist.c:353
OffsetNumber pos
Definition: hstore_gist.c:352
char * flag(int b)
Definition: test-ctype.c:33
#define SET_VARSIZE(PTR, len)
Definition: varatt.h:305
#define VARSIZE(PTR)
Definition: varatt.h:279