PostgreSQL Source Code  git master
_intbig_gist.c
Go to the documentation of this file.
1 /*
2  * contrib/intarray/_intbig_gist.c
3  */
4 #include "postgres.h"
5 
6 #include <math.h>
7 
8 #include "_int.h"
9 #include "access/gist.h"
10 #include "access/reloptions.h"
11 #include "access/stratnum.h"
12 #include "common/int.h"
13 #include "port/pg_bitutils.h"
14 
15 #define GETENTRY(vec,pos) ((GISTTYPE *) DatumGetPointer((vec)->vector[(pos)].key))
16 /*
17 ** _intbig methods
18 */
27 
30 
31 Datum
33 {
34  ereport(ERROR,
35  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
36  errmsg("cannot accept a value of type %s", "intbig_gkey")));
37 
38  PG_RETURN_VOID(); /* keep compiler quiet */
39 }
40 
41 Datum
43 {
44  ereport(ERROR,
45  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
46  errmsg("cannot display a value of type %s", "intbig_gkey")));
47 
48  PG_RETURN_VOID(); /* keep compiler quiet */
49 }
50 
51 static GISTTYPE *
52 _intbig_alloc(bool allistrue, int siglen, BITVECP sign)
53 {
54  int flag = allistrue ? ALLISTRUE : 0;
55  int size = CALCGTSIZE(flag, siglen);
57 
59  res->flag = flag;
60 
61  if (!allistrue)
62  {
63  if (sign)
64  memcpy(GETSIGN(res), sign, siglen);
65  else
66  memset(GETSIGN(res), 0, siglen);
67  }
68 
69  return res;
70 }
71 
72 
73 /*********************************************************************
74 ** intbig functions
75 *********************************************************************/
76 static bool
78 {
79  int num = ARRNELEMS(b);
80  int32 *ptr = ARRPTR(b);
81 
83 
84  while (num--)
85  {
86  if (GETBIT(GETSIGN(a), HASHVAL(*ptr, siglen)))
87  return true;
88  ptr++;
89  }
90 
91  return false;
92 }
93 
94 static bool
96 {
97  int num = ARRNELEMS(b);
98  int32 *ptr = ARRPTR(b);
99 
100  CHECKARRVALID(b);
101 
102  while (num--)
103  {
104  if (!GETBIT(GETSIGN(a), HASHVAL(*ptr, siglen)))
105  return false;
106  ptr++;
107  }
108 
109  return true;
110 }
111 
112 Datum
114 {
117  bool *result = (bool *) PG_GETARG_POINTER(2);
118  int siglen = GET_SIGLEN();
119 
120  if (ISALLTRUE(a) && ISALLTRUE(b))
121  *result = true;
122  else if (ISALLTRUE(a))
123  *result = false;
124  else if (ISALLTRUE(b))
125  *result = false;
126  else
127  {
128  int32 i;
129  BITVECP sa = GETSIGN(a),
130  sb = GETSIGN(b);
131 
132  *result = true;
133  LOOPBYTE(siglen)
134  {
135  if (sa[i] != sb[i])
136  {
137  *result = false;
138  break;
139  }
140  }
141  }
142  PG_RETURN_POINTER(result);
143 }
144 
145 Datum
147 {
148  GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
149  int siglen = GET_SIGLEN();
150 
151  if (entry->leafkey)
152  {
153  GISTENTRY *retval;
154  ArrayType *in = DatumGetArrayTypeP(entry->key);
155  int32 *ptr;
156  int num;
157  GISTTYPE *res = _intbig_alloc(false, siglen, NULL);
158 
159  CHECKARRVALID(in);
160  if (ARRISEMPTY(in))
161  {
162  ptr = NULL;
163  num = 0;
164  }
165  else
166  {
167  ptr = ARRPTR(in);
168  num = ARRNELEMS(in);
169  }
170 
171  while (num--)
172  {
173  HASH(GETSIGN(res), *ptr, siglen);
174  ptr++;
175  }
176 
177  retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
179  entry->rel, entry->page,
180  entry->offset, false);
181 
182  PG_RETURN_POINTER(retval);
183  }
184  else if (!ISALLTRUE(DatumGetPointer(entry->key)))
185  {
186  GISTENTRY *retval;
187  int i;
189  GISTTYPE *res;
190 
191  LOOPBYTE(siglen)
192  {
193  if ((sign[i] & 0xff) != 0xff)
194  PG_RETURN_POINTER(entry);
195  }
196 
197  res = _intbig_alloc(true, siglen, sign);
198  retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
200  entry->rel, entry->page,
201  entry->offset, false);
202 
203  PG_RETURN_POINTER(retval);
204  }
205 
206  PG_RETURN_POINTER(entry);
207 }
208 
209 
210 static int32
211 sizebitvec(BITVECP sign, int siglen)
212 {
213  return pg_popcount(sign, siglen);
214 }
215 
216 static int
218 {
219  int i,
220  diff,
221  dist = 0;
222 
223  LOOPBYTE(siglen)
224  {
225  diff = (unsigned char) (a[i] ^ b[i]);
226  /* Using the popcount functions here isn't likely to win */
227  dist += pg_number_of_ones[diff];
228  }
229  return dist;
230 }
231 
232 static int
233 hemdist(GISTTYPE *a, GISTTYPE *b, int siglen)
234 {
235  if (ISALLTRUE(a))
236  {
237  if (ISALLTRUE(b))
238  return 0;
239  else
240  return SIGLENBIT(siglen) - sizebitvec(GETSIGN(b), siglen);
241  }
242  else if (ISALLTRUE(b))
243  return SIGLENBIT(siglen) - sizebitvec(GETSIGN(a), siglen);
244 
245  return hemdistsign(GETSIGN(a), GETSIGN(b), siglen);
246 }
247 
248 Datum
250 {
252 }
253 
254 static int32
255 unionkey(BITVECP sbase, GISTTYPE *add, int siglen)
256 {
257  int32 i;
258  BITVECP sadd = GETSIGN(add);
259 
260  if (ISALLTRUE(add))
261  return 1;
262  LOOPBYTE(siglen)
263  sbase[i] |= sadd[i];
264  return 0;
265 }
266 
267 Datum
269 {
271  int *size = (int *) PG_GETARG_POINTER(1);
272  int siglen = GET_SIGLEN();
273  int32 i;
274  GISTTYPE *result = _intbig_alloc(false, siglen, NULL);
275  BITVECP base = GETSIGN(result);
276 
277  for (i = 0; i < entryvec->n; i++)
278  {
279  if (unionkey(base, GETENTRY(entryvec, i), siglen))
280  {
281  result->flag |= ALLISTRUE;
282  SET_VARSIZE(result, CALCGTSIZE(ALLISTRUE, siglen));
283  break;
284  }
285  }
286 
287  *size = VARSIZE(result);
288 
289  PG_RETURN_POINTER(result);
290 }
291 
292 Datum
294 {
295  GISTENTRY *origentry = (GISTENTRY *) PG_GETARG_POINTER(0); /* always ISSIGNKEY */
296  GISTENTRY *newentry = (GISTENTRY *) PG_GETARG_POINTER(1);
297  float *penalty = (float *) PG_GETARG_POINTER(2);
298  GISTTYPE *origval = (GISTTYPE *) DatumGetPointer(origentry->key);
299  GISTTYPE *newval = (GISTTYPE *) DatumGetPointer(newentry->key);
300  int siglen = GET_SIGLEN();
301 
302  *penalty = hemdist(origval, newval, siglen);
303  PG_RETURN_POINTER(penalty);
304 }
305 
306 
307 typedef struct
308 {
309  OffsetNumber pos;
310  int32 cost;
311 } SPLITCOST;
312 
313 static int
314 comparecost(const void *a, const void *b)
315 {
316  return pg_cmp_s32(((const SPLITCOST *) a)->cost,
317  ((const SPLITCOST *) b)->cost);
318 }
319 
320 
321 Datum
323 {
326  int siglen = GET_SIGLEN();
327  OffsetNumber k,
328  j;
329  GISTTYPE *datum_l,
330  *datum_r;
331  BITVECP union_l,
332  union_r;
333  int32 size_alpha,
334  size_beta;
335  int32 size_waste,
336  waste = -1;
337  int32 nbytes;
338  OffsetNumber seed_1 = 0,
339  seed_2 = 0;
340  OffsetNumber *left,
341  *right;
342  OffsetNumber maxoff;
343  BITVECP ptr;
344  int i;
345  SPLITCOST *costvector;
346  GISTTYPE *_k,
347  *_j;
348 
349  maxoff = entryvec->n - 2;
350  nbytes = (maxoff + 2) * sizeof(OffsetNumber);
351  v->spl_left = (OffsetNumber *) palloc(nbytes);
352  v->spl_right = (OffsetNumber *) palloc(nbytes);
353 
354  for (k = FirstOffsetNumber; k < maxoff; k = OffsetNumberNext(k))
355  {
356  _k = GETENTRY(entryvec, k);
357  for (j = OffsetNumberNext(k); j <= maxoff; j = OffsetNumberNext(j))
358  {
359  size_waste = hemdist(_k, GETENTRY(entryvec, j), siglen);
360  if (size_waste > waste)
361  {
362  waste = size_waste;
363  seed_1 = k;
364  seed_2 = j;
365  }
366  }
367  }
368 
369  left = v->spl_left;
370  v->spl_nleft = 0;
371  right = v->spl_right;
372  v->spl_nright = 0;
373 
374  if (seed_1 == 0 || seed_2 == 0)
375  {
376  seed_1 = 1;
377  seed_2 = 2;
378  }
379 
380  /* form initial .. */
381  datum_l = _intbig_alloc(ISALLTRUE(GETENTRY(entryvec, seed_1)), siglen,
382  GETSIGN(GETENTRY(entryvec, seed_1)));
383  datum_r = _intbig_alloc(ISALLTRUE(GETENTRY(entryvec, seed_2)), siglen,
384  GETSIGN(GETENTRY(entryvec, seed_2)));
385 
386  maxoff = OffsetNumberNext(maxoff);
387  /* sort before ... */
388  costvector = (SPLITCOST *) palloc(sizeof(SPLITCOST) * maxoff);
389  for (j = FirstOffsetNumber; j <= maxoff; j = OffsetNumberNext(j))
390  {
391  costvector[j - 1].pos = j;
392  _j = GETENTRY(entryvec, j);
393  size_alpha = hemdist(datum_l, _j, siglen);
394  size_beta = hemdist(datum_r, _j, siglen);
395  costvector[j - 1].cost = abs(size_alpha - size_beta);
396  }
397  qsort(costvector, maxoff, sizeof(SPLITCOST), comparecost);
398 
399  union_l = GETSIGN(datum_l);
400  union_r = GETSIGN(datum_r);
401 
402  for (k = 0; k < maxoff; k++)
403  {
404  j = costvector[k].pos;
405  if (j == seed_1)
406  {
407  *left++ = j;
408  v->spl_nleft++;
409  continue;
410  }
411  else if (j == seed_2)
412  {
413  *right++ = j;
414  v->spl_nright++;
415  continue;
416  }
417  _j = GETENTRY(entryvec, j);
418  size_alpha = hemdist(datum_l, _j, siglen);
419  size_beta = hemdist(datum_r, _j, siglen);
420 
421  if (size_alpha < size_beta + WISH_F(v->spl_nleft, v->spl_nright, 0.00001))
422  {
423  if (ISALLTRUE(datum_l) || ISALLTRUE(_j))
424  {
425  if (!ISALLTRUE(datum_l))
426  memset(union_l, 0xff, siglen);
427  }
428  else
429  {
430  ptr = GETSIGN(_j);
431  LOOPBYTE(siglen)
432  union_l[i] |= ptr[i];
433  }
434  *left++ = j;
435  v->spl_nleft++;
436  }
437  else
438  {
439  if (ISALLTRUE(datum_r) || ISALLTRUE(_j))
440  {
441  if (!ISALLTRUE(datum_r))
442  memset(union_r, 0xff, siglen);
443  }
444  else
445  {
446  ptr = GETSIGN(_j);
447  LOOPBYTE(siglen)
448  union_r[i] |= ptr[i];
449  }
450  *right++ = j;
451  v->spl_nright++;
452  }
453  }
454 
455  *right = *left = FirstOffsetNumber;
456  pfree(costvector);
457 
458  v->spl_ldatum = PointerGetDatum(datum_l);
459  v->spl_rdatum = PointerGetDatum(datum_r);
460 
462 }
463 
464 Datum
466 {
467  GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
468  ArrayType *query = PG_GETARG_ARRAYTYPE_P(1);
470 
471  /* Oid subtype = PG_GETARG_OID(3); */
472  bool *recheck = (bool *) PG_GETARG_POINTER(4);
473  int siglen = GET_SIGLEN();
474  bool retval;
475 
476  /* All cases served by this function are inexact */
477  *recheck = true;
478 
479  if (ISALLTRUE(DatumGetPointer(entry->key)))
480  PG_RETURN_BOOL(true);
481 
482  if (strategy == BooleanSearchStrategy)
483  {
484  retval = signconsistent((QUERYTYPE *) query,
485  GETSIGN(DatumGetPointer(entry->key)),
486  siglen,
487  false);
488  PG_FREE_IF_COPY(query, 1);
489  PG_RETURN_BOOL(retval);
490  }
491 
492  CHECKARRVALID(query);
493 
494  switch (strategy)
495  {
497  retval = _intbig_overlap((GISTTYPE *) DatumGetPointer(entry->key),
498  query, siglen);
499  break;
501  if (GIST_LEAF(entry))
502  {
503  int i,
504  num = ARRNELEMS(query);
505  int32 *ptr = ARRPTR(query);
506  BITVECP dq = palloc0(siglen),
507  de;
508 
509  while (num--)
510  {
511  HASH(dq, *ptr, siglen);
512  ptr++;
513  }
514 
515  de = GETSIGN((GISTTYPE *) DatumGetPointer(entry->key));
516  retval = true;
517  LOOPBYTE(siglen)
518  {
519  if (de[i] != dq[i])
520  {
521  retval = false;
522  break;
523  }
524  }
525 
526  pfree(dq);
527  }
528  else
529  retval = _intbig_contains((GISTTYPE *) DatumGetPointer(entry->key),
530  query, siglen);
531  break;
534  retval = _intbig_contains((GISTTYPE *) DatumGetPointer(entry->key),
535  query, siglen);
536  break;
539 
540  /*
541  * This code is unreachable as of intarray 1.4, because the <@
542  * operator has been removed from the opclass. We keep it for now
543  * to support older versions of the SQL definitions.
544  */
545  if (GIST_LEAF(entry))
546  {
547  int i,
548  num = ARRNELEMS(query);
549  int32 *ptr = ARRPTR(query);
550  BITVECP dq = palloc0(siglen),
551  de;
552 
553  while (num--)
554  {
555  HASH(dq, *ptr, siglen);
556  ptr++;
557  }
558 
559  de = GETSIGN((GISTTYPE *) DatumGetPointer(entry->key));
560  retval = true;
561  LOOPBYTE(siglen)
562  {
563  if (de[i] & ~dq[i])
564  {
565  retval = false;
566  break;
567  }
568  }
569  }
570  else
571  {
572  /*
573  * Unfortunately, because empty arrays could be anywhere in
574  * the index, we must search the whole tree.
575  */
576  retval = true;
577  }
578  break;
579  default:
580  retval = false;
581  }
582  PG_FREE_IF_COPY(query, 1);
583  PG_RETURN_BOOL(retval);
584 }
585 
586 Datum
588 {
590 
592  add_local_int_reloption(relopts, "siglen",
593  "signature length in bytes",
595  offsetof(GISTIntArrayBigOptions, siglen));
596 
597  PG_RETURN_VOID();
598 }
bool signconsistent(QUERYTYPE *query, BITVECP sign, int siglen, bool calcnot)
Definition: _int_bool.c:298
#define CHECKARRVALID(x)
Definition: _int.h:30
#define BooleanSearchStrategy
Definition: _int.h:134
#define ARRISEMPTY(x)
Definition: _int.h:38
Datum g_intbig_consistent(PG_FUNCTION_ARGS)
Definition: _intbig_gist.c:465
static int hemdist(GISTTYPE *a, GISTTYPE *b, int siglen)
Definition: _intbig_gist.c:233
static int32 sizebitvec(BITVECP sign, int siglen)
Definition: _intbig_gist.c:211
static int hemdistsign(BITVECP a, BITVECP b, int siglen)
Definition: _intbig_gist.c:217
Datum _intbig_in(PG_FUNCTION_ARGS)
Definition: _intbig_gist.c:32
static GISTTYPE * _intbig_alloc(bool allistrue, int siglen, BITVECP sign)
Definition: _intbig_gist.c:52
#define GETENTRY(vec, pos)
Definition: _intbig_gist.c:15
Datum g_intbig_compress(PG_FUNCTION_ARGS)
Definition: _intbig_gist.c:146
Datum g_intbig_decompress(PG_FUNCTION_ARGS)
Definition: _intbig_gist.c:249
static int32 unionkey(BITVECP sbase, GISTTYPE *add, int siglen)
Definition: _intbig_gist.c:255
PG_FUNCTION_INFO_V1(g_intbig_consistent)
Datum _intbig_out(PG_FUNCTION_ARGS)
Definition: _intbig_gist.c:42
Datum g_intbig_same(PG_FUNCTION_ARGS)
Definition: _intbig_gist.c:113
static bool _intbig_overlap(GISTTYPE *a, ArrayType *b, int siglen)
Definition: _intbig_gist.c:77
Datum g_intbig_union(PG_FUNCTION_ARGS)
Definition: _intbig_gist.c:268
Datum g_intbig_picksplit(PG_FUNCTION_ARGS)
Definition: _intbig_gist.c:322
Datum g_intbig_penalty(PG_FUNCTION_ARGS)
Definition: _intbig_gist.c:293
Datum g_intbig_options(PG_FUNCTION_ARGS)
Definition: _intbig_gist.c:587
static bool _intbig_contains(GISTTYPE *a, ArrayType *b, int siglen)
Definition: _intbig_gist.c:95
static int comparecost(const void *a, const void *b)
Definition: _intbig_gist.c:314
#define PG_GETARG_ARRAYTYPE_P(n)
Definition: array.h:263
#define DatumGetArrayTypeP(X)
Definition: array.h:261
#define GETBIT(x, i)
Definition: blutils.c:30
int32_t int32
Definition: c.h:481
#define ARRNELEMS(x)
Definition: cube.c:26
#define ARRPTR(x)
Definition: cube.c:25
int errcode(int sqlerrcode)
Definition: elog.c:853
int errmsg(const char *fmt,...)
Definition: elog.c:1070
#define ERROR
Definition: elog.h:39
#define ereport(elevel,...)
Definition: elog.h:149
#define PG_RETURN_VOID()
Definition: fmgr.h:349
#define PG_FREE_IF_COPY(ptr, n)
Definition: fmgr.h:260
#define PG_GETARG_POINTER(n)
Definition: fmgr.h:276
#define PG_GETARG_DATUM(n)
Definition: fmgr.h:268
#define PG_GETARG_UINT16(n)
Definition: fmgr.h:272
#define PG_RETURN_DATUM(x)
Definition: fmgr.h:353
#define PG_RETURN_POINTER(x)
Definition: fmgr.h:361
#define PG_FUNCTION_ARGS
Definition: fmgr.h:193
#define PG_RETURN_BOOL(x)
Definition: fmgr.h:359
#define GIST_LEAF(entry)
Definition: gist.h:170
#define gistentryinit(e, k, r, pg, o, l)
Definition: gist.h:244
#define newval
#define HASHVAL(val, siglen)
Definition: hstore_gist.c:45
#define LOOPBYTE(siglen)
Definition: hstore_gist.c:33
#define ALLISTRUE
Definition: hstore_gist.c:55
#define WISH_F(a, b, c)
Definition: hstore_gist.c:77
#define CALCGTSIZE(flag, siglen)
Definition: hstore_gist.c:60
#define ISALLTRUE(x)
Definition: hstore_gist.c:57
#define SIGLEN_MAX
Definition: hstore_gist.c:24
#define SIGLEN_DEFAULT
Definition: hstore_gist.c:23
char * BITVECP
Definition: hstore_gist.c:31
#define GET_SIGLEN()
Definition: hstore_gist.c:26
#define SIGLENBIT(siglen)
Definition: hstore_gist.c:25
#define GETSIGN(x)
Definition: hstore_gist.c:62
#define HASH(sign, val, siglen)
Definition: hstore_gist.c:46
char sign
Definition: informix.c:693
static int pg_cmp_s32(int32 a, int32 b)
Definition: int.h:646
int b
Definition: isn.c:69
int a
Definition: isn.c:68
int j
Definition: isn.c:73
int i
Definition: isn.c:72
void pfree(void *pointer)
Definition: mcxt.c:1521
void * palloc0(Size size)
Definition: mcxt.c:1347
void * palloc(Size size)
Definition: mcxt.c:1317
#define OffsetNumberNext(offsetNumber)
Definition: off.h:52
uint16 OffsetNumber
Definition: off.h:24
#define FirstOffsetNumber
Definition: off.h:27
PGDLLIMPORT const uint8 pg_number_of_ones[256]
Definition: pg_bitutils.c:87
static uint64 pg_popcount(const char *buf, int bytes)
Definition: pg_bitutils.h:339
#define qsort(a, b, c, d)
Definition: port.h:447
static Datum PointerGetDatum(const void *X)
Definition: postgres.h:322
uintptr_t Datum
Definition: postgres.h:64
static Pointer DatumGetPointer(Datum X)
Definition: postgres.h:312
void init_local_reloptions(local_relopts *relopts, Size relopt_struct_size)
Definition: reloptions.c:734
void add_local_int_reloption(local_relopts *relopts, const char *name, const char *desc, int default_val, int min_val, int max_val, int offset)
Definition: reloptions.c:918
static pg_noinline void Size size
Definition: slab.c:607
#define RTOldContainsStrategyNumber
Definition: stratnum.h:63
uint16 StrategyNumber
Definition: stratnum.h:22
#define RTOverlapStrategyNumber
Definition: stratnum.h:53
#define RTSameStrategyNumber
Definition: stratnum.h:56
#define RTContainsStrategyNumber
Definition: stratnum.h:57
#define RTOldContainedByStrategyNumber
Definition: stratnum.h:64
#define RTContainedByStrategyNumber
Definition: stratnum.h:58
OffsetNumber offset
Definition: gist.h:163
Datum key
Definition: gist.h:160
Page page
Definition: gist.h:162
Relation rel
Definition: gist.h:161
bool leafkey
Definition: gist.h:164
int32 flag
Definition: hstore_gist.c:51
int spl_nleft
Definition: gist.h:143
OffsetNumber * spl_right
Definition: gist.h:147
Datum spl_ldatum
Definition: gist.h:144
Datum spl_rdatum
Definition: gist.h:149
int spl_nright
Definition: gist.h:148
OffsetNumber * spl_left
Definition: gist.h:142
int32 n
Definition: gist.h:235
int32 cost
Definition: hstore_gist.c:354
OffsetNumber pos
Definition: hstore_gist.c:353
char * flag(int b)
Definition: test-ctype.c:33
#define SET_VARSIZE(PTR, len)
Definition: varatt.h:305
#define VARSIZE(PTR)
Definition: varatt.h:279