PostgreSQL Source Code  git master
_intbig_gist.c
Go to the documentation of this file.
1 /*
2  * contrib/intarray/_intbig_gist.c
3  */
4 #include "postgres.h"
5 
6 #include <math.h>
7 
8 #include "_int.h"
9 #include "access/gist.h"
10 #include "access/reloptions.h"
11 #include "access/stratnum.h"
12 #include "port/pg_bitutils.h"
13 
14 #define GETENTRY(vec,pos) ((GISTTYPE *) DatumGetPointer((vec)->vector[(pos)].key))
15 /*
16 ** _intbig methods
17 */
26 
29 
30 Datum
32 {
33  ereport(ERROR,
34  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
35  errmsg("cannot accept a value of type %s", "intbig_gkey")));
36 
37  PG_RETURN_VOID(); /* keep compiler quiet */
38 }
39 
40 Datum
42 {
43  ereport(ERROR,
44  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
45  errmsg("cannot display a value of type %s", "intbig_gkey")));
46 
47  PG_RETURN_VOID(); /* keep compiler quiet */
48 }
49 
50 static GISTTYPE *
51 _intbig_alloc(bool allistrue, int siglen, BITVECP sign)
52 {
53  int flag = allistrue ? ALLISTRUE : 0;
54  int size = CALCGTSIZE(flag, siglen);
55  GISTTYPE *res = (GISTTYPE *) palloc(size);
56 
57  SET_VARSIZE(res, size);
58  res->flag = flag;
59 
60  if (!allistrue)
61  {
62  if (sign)
63  memcpy(GETSIGN(res), sign, siglen);
64  else
65  memset(GETSIGN(res), 0, siglen);
66  }
67 
68  return res;
69 }
70 
71 
72 /*********************************************************************
73 ** intbig functions
74 *********************************************************************/
75 static bool
77 {
78  int num = ARRNELEMS(b);
79  int32 *ptr = ARRPTR(b);
80 
82 
83  while (num--)
84  {
85  if (GETBIT(GETSIGN(a), HASHVAL(*ptr, siglen)))
86  return true;
87  ptr++;
88  }
89 
90  return false;
91 }
92 
93 static bool
95 {
96  int num = ARRNELEMS(b);
97  int32 *ptr = ARRPTR(b);
98 
100 
101  while (num--)
102  {
103  if (!GETBIT(GETSIGN(a), HASHVAL(*ptr, siglen)))
104  return false;
105  ptr++;
106  }
107 
108  return true;
109 }
110 
111 Datum
113 {
116  bool *result = (bool *) PG_GETARG_POINTER(2);
117  int siglen = GET_SIGLEN();
118 
119  if (ISALLTRUE(a) && ISALLTRUE(b))
120  *result = true;
121  else if (ISALLTRUE(a))
122  *result = false;
123  else if (ISALLTRUE(b))
124  *result = false;
125  else
126  {
127  int32 i;
128  BITVECP sa = GETSIGN(a),
129  sb = GETSIGN(b);
130 
131  *result = true;
132  LOOPBYTE(siglen)
133  {
134  if (sa[i] != sb[i])
135  {
136  *result = false;
137  break;
138  }
139  }
140  }
141  PG_RETURN_POINTER(result);
142 }
143 
144 Datum
146 {
147  GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
148  int siglen = GET_SIGLEN();
149 
150  if (entry->leafkey)
151  {
152  GISTENTRY *retval;
153  ArrayType *in = DatumGetArrayTypeP(entry->key);
154  int32 *ptr;
155  int num;
156  GISTTYPE *res = _intbig_alloc(false, siglen, NULL);
157 
158  CHECKARRVALID(in);
159  if (ARRISEMPTY(in))
160  {
161  ptr = NULL;
162  num = 0;
163  }
164  else
165  {
166  ptr = ARRPTR(in);
167  num = ARRNELEMS(in);
168  }
169 
170  while (num--)
171  {
172  HASH(GETSIGN(res), *ptr, siglen);
173  ptr++;
174  }
175 
176  retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
178  entry->rel, entry->page,
179  entry->offset, false);
180 
181  if (in != DatumGetArrayTypeP(entry->key))
182  pfree(in);
183 
184  PG_RETURN_POINTER(retval);
185  }
186  else if (!ISALLTRUE(DatumGetPointer(entry->key)))
187  {
188  GISTENTRY *retval;
189  int i;
191  GISTTYPE *res;
192 
193  LOOPBYTE(siglen)
194  {
195  if ((sign[i] & 0xff) != 0xff)
196  PG_RETURN_POINTER(entry);
197  }
198 
199  res = _intbig_alloc(true, siglen, sign);
200  retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
202  entry->rel, entry->page,
203  entry->offset, false);
204 
205  PG_RETURN_POINTER(retval);
206  }
207 
208  PG_RETURN_POINTER(entry);
209 }
210 
211 
212 static int32
213 sizebitvec(BITVECP sign, int siglen)
214 {
215  return pg_popcount(sign, siglen);
216 }
217 
218 static int
220 {
221  int i,
222  diff,
223  dist = 0;
224 
225  LOOPBYTE(siglen)
226  {
227  diff = (unsigned char) (a[i] ^ b[i]);
228  /* Using the popcount functions here isn't likely to win */
229  dist += pg_number_of_ones[diff];
230  }
231  return dist;
232 }
233 
234 static int
235 hemdist(GISTTYPE *a, GISTTYPE *b, int siglen)
236 {
237  if (ISALLTRUE(a))
238  {
239  if (ISALLTRUE(b))
240  return 0;
241  else
242  return SIGLENBIT(siglen) - sizebitvec(GETSIGN(b), siglen);
243  }
244  else if (ISALLTRUE(b))
245  return SIGLENBIT(siglen) - sizebitvec(GETSIGN(a), siglen);
246 
247  return hemdistsign(GETSIGN(a), GETSIGN(b), siglen);
248 }
249 
250 Datum
252 {
254 }
255 
256 static int32
257 unionkey(BITVECP sbase, GISTTYPE *add, int siglen)
258 {
259  int32 i;
260  BITVECP sadd = GETSIGN(add);
261 
262  if (ISALLTRUE(add))
263  return 1;
264  LOOPBYTE(siglen)
265  sbase[i] |= sadd[i];
266  return 0;
267 }
268 
269 Datum
271 {
273  int *size = (int *) PG_GETARG_POINTER(1);
274  int siglen = GET_SIGLEN();
275  int32 i;
276  GISTTYPE *result = _intbig_alloc(false, siglen, NULL);
277  BITVECP base = GETSIGN(result);
278 
279  for (i = 0; i < entryvec->n; i++)
280  {
281  if (unionkey(base, GETENTRY(entryvec, i), siglen))
282  {
283  result->flag |= ALLISTRUE;
284  SET_VARSIZE(result, CALCGTSIZE(ALLISTRUE, siglen));
285  break;
286  }
287  }
288 
289  *size = VARSIZE(result);
290 
291  PG_RETURN_POINTER(result);
292 }
293 
294 Datum
296 {
297  GISTENTRY *origentry = (GISTENTRY *) PG_GETARG_POINTER(0); /* always ISSIGNKEY */
298  GISTENTRY *newentry = (GISTENTRY *) PG_GETARG_POINTER(1);
299  float *penalty = (float *) PG_GETARG_POINTER(2);
300  GISTTYPE *origval = (GISTTYPE *) DatumGetPointer(origentry->key);
301  GISTTYPE *newval = (GISTTYPE *) DatumGetPointer(newentry->key);
302  int siglen = GET_SIGLEN();
303 
304  *penalty = hemdist(origval, newval, siglen);
305  PG_RETURN_POINTER(penalty);
306 }
307 
308 
309 typedef struct
310 {
311  OffsetNumber pos;
312  int32 cost;
313 } SPLITCOST;
314 
315 static int
316 comparecost(const void *a, const void *b)
317 {
318  return ((const SPLITCOST *) a)->cost - ((const SPLITCOST *) b)->cost;
319 }
320 
321 
322 Datum
324 {
327  int siglen = GET_SIGLEN();
328  OffsetNumber k,
329  j;
330  GISTTYPE *datum_l,
331  *datum_r;
332  BITVECP union_l,
333  union_r;
334  int32 size_alpha,
335  size_beta;
336  int32 size_waste,
337  waste = -1;
338  int32 nbytes;
339  OffsetNumber seed_1 = 0,
340  seed_2 = 0;
341  OffsetNumber *left,
342  *right;
343  OffsetNumber maxoff;
344  BITVECP ptr;
345  int i;
346  SPLITCOST *costvector;
347  GISTTYPE *_k,
348  *_j;
349 
350  maxoff = entryvec->n - 2;
351  nbytes = (maxoff + 2) * sizeof(OffsetNumber);
352  v->spl_left = (OffsetNumber *) palloc(nbytes);
353  v->spl_right = (OffsetNumber *) palloc(nbytes);
354 
355  for (k = FirstOffsetNumber; k < maxoff; k = OffsetNumberNext(k))
356  {
357  _k = GETENTRY(entryvec, k);
358  for (j = OffsetNumberNext(k); j <= maxoff; j = OffsetNumberNext(j))
359  {
360  size_waste = hemdist(_k, GETENTRY(entryvec, j), siglen);
361  if (size_waste > waste)
362  {
363  waste = size_waste;
364  seed_1 = k;
365  seed_2 = j;
366  }
367  }
368  }
369 
370  left = v->spl_left;
371  v->spl_nleft = 0;
372  right = v->spl_right;
373  v->spl_nright = 0;
374 
375  if (seed_1 == 0 || seed_2 == 0)
376  {
377  seed_1 = 1;
378  seed_2 = 2;
379  }
380 
381  /* form initial .. */
382  datum_l = _intbig_alloc(ISALLTRUE(GETENTRY(entryvec, seed_1)), siglen,
383  GETSIGN(GETENTRY(entryvec, seed_1)));
384  datum_r = _intbig_alloc(ISALLTRUE(GETENTRY(entryvec, seed_2)), siglen,
385  GETSIGN(GETENTRY(entryvec, seed_2)));
386 
387  maxoff = OffsetNumberNext(maxoff);
388  /* sort before ... */
389  costvector = (SPLITCOST *) palloc(sizeof(SPLITCOST) * maxoff);
390  for (j = FirstOffsetNumber; j <= maxoff; j = OffsetNumberNext(j))
391  {
392  costvector[j - 1].pos = j;
393  _j = GETENTRY(entryvec, j);
394  size_alpha = hemdist(datum_l, _j, siglen);
395  size_beta = hemdist(datum_r, _j, siglen);
396  costvector[j - 1].cost = abs(size_alpha - size_beta);
397  }
398  qsort(costvector, maxoff, sizeof(SPLITCOST), comparecost);
399 
400  union_l = GETSIGN(datum_l);
401  union_r = GETSIGN(datum_r);
402 
403  for (k = 0; k < maxoff; k++)
404  {
405  j = costvector[k].pos;
406  if (j == seed_1)
407  {
408  *left++ = j;
409  v->spl_nleft++;
410  continue;
411  }
412  else if (j == seed_2)
413  {
414  *right++ = j;
415  v->spl_nright++;
416  continue;
417  }
418  _j = GETENTRY(entryvec, j);
419  size_alpha = hemdist(datum_l, _j, siglen);
420  size_beta = hemdist(datum_r, _j, siglen);
421 
422  if (size_alpha < size_beta + WISH_F(v->spl_nleft, v->spl_nright, 0.00001))
423  {
424  if (ISALLTRUE(datum_l) || ISALLTRUE(_j))
425  {
426  if (!ISALLTRUE(datum_l))
427  memset(union_l, 0xff, siglen);
428  }
429  else
430  {
431  ptr = GETSIGN(_j);
432  LOOPBYTE(siglen)
433  union_l[i] |= ptr[i];
434  }
435  *left++ = j;
436  v->spl_nleft++;
437  }
438  else
439  {
440  if (ISALLTRUE(datum_r) || ISALLTRUE(_j))
441  {
442  if (!ISALLTRUE(datum_r))
443  memset(union_r, 0xff, siglen);
444  }
445  else
446  {
447  ptr = GETSIGN(_j);
448  LOOPBYTE(siglen)
449  union_r[i] |= ptr[i];
450  }
451  *right++ = j;
452  v->spl_nright++;
453  }
454  }
455 
456  *right = *left = FirstOffsetNumber;
457  pfree(costvector);
458 
459  v->spl_ldatum = PointerGetDatum(datum_l);
460  v->spl_rdatum = PointerGetDatum(datum_r);
461 
463 }
464 
465 Datum
467 {
468  GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
469  ArrayType *query = PG_GETARG_ARRAYTYPE_P(1);
471 
472  /* Oid subtype = PG_GETARG_OID(3); */
473  bool *recheck = (bool *) PG_GETARG_POINTER(4);
474  int siglen = GET_SIGLEN();
475  bool retval;
476 
477  /* All cases served by this function are inexact */
478  *recheck = true;
479 
480  if (ISALLTRUE(DatumGetPointer(entry->key)))
481  PG_RETURN_BOOL(true);
482 
483  if (strategy == BooleanSearchStrategy)
484  {
485  retval = signconsistent((QUERYTYPE *) query,
486  GETSIGN(DatumGetPointer(entry->key)),
487  siglen,
488  false);
489  PG_FREE_IF_COPY(query, 1);
490  PG_RETURN_BOOL(retval);
491  }
492 
493  CHECKARRVALID(query);
494 
495  switch (strategy)
496  {
498  retval = _intbig_overlap((GISTTYPE *) DatumGetPointer(entry->key),
499  query, siglen);
500  break;
502  if (GIST_LEAF(entry))
503  {
504  int i,
505  num = ARRNELEMS(query);
506  int32 *ptr = ARRPTR(query);
507  BITVECP dq = palloc0(siglen),
508  de;
509 
510  while (num--)
511  {
512  HASH(dq, *ptr, siglen);
513  ptr++;
514  }
515 
516  de = GETSIGN((GISTTYPE *) DatumGetPointer(entry->key));
517  retval = true;
518  LOOPBYTE(siglen)
519  {
520  if (de[i] != dq[i])
521  {
522  retval = false;
523  break;
524  }
525  }
526 
527  pfree(dq);
528  }
529  else
530  retval = _intbig_contains((GISTTYPE *) DatumGetPointer(entry->key),
531  query, siglen);
532  break;
535  retval = _intbig_contains((GISTTYPE *) DatumGetPointer(entry->key),
536  query, siglen);
537  break;
540 
541  /*
542  * This code is unreachable as of intarray 1.4, because the <@
543  * operator has been removed from the opclass. We keep it for now
544  * to support older versions of the SQL definitions.
545  */
546  if (GIST_LEAF(entry))
547  {
548  int i,
549  num = ARRNELEMS(query);
550  int32 *ptr = ARRPTR(query);
551  BITVECP dq = palloc0(siglen),
552  de;
553 
554  while (num--)
555  {
556  HASH(dq, *ptr, siglen);
557  ptr++;
558  }
559 
560  de = GETSIGN((GISTTYPE *) DatumGetPointer(entry->key));
561  retval = true;
562  LOOPBYTE(siglen)
563  {
564  if (de[i] & ~dq[i])
565  {
566  retval = false;
567  break;
568  }
569  }
570  }
571  else
572  {
573  /*
574  * Unfortunately, because empty arrays could be anywhere in
575  * the index, we must search the whole tree.
576  */
577  retval = true;
578  }
579  break;
580  default:
581  retval = false;
582  }
583  PG_FREE_IF_COPY(query, 1);
584  PG_RETURN_BOOL(retval);
585 }
586 
587 Datum
589 {
591 
593  add_local_int_reloption(relopts, "siglen",
594  "signature length in bytes",
596  offsetof(GISTIntArrayBigOptions, siglen));
597 
598  PG_RETURN_VOID();
599 }
bool signconsistent(QUERYTYPE *query, BITVECP sign, int siglen, bool calcnot)
Definition: _int_bool.c:299
#define CHECKARRVALID(x)
Definition: _int.h:30
#define BooleanSearchStrategy
Definition: _int.h:134
#define ARRISEMPTY(x)
Definition: _int.h:38
Datum g_intbig_consistent(PG_FUNCTION_ARGS)
Definition: _intbig_gist.c:466
static int hemdist(GISTTYPE *a, GISTTYPE *b, int siglen)
Definition: _intbig_gist.c:235
static int32 sizebitvec(BITVECP sign, int siglen)
Definition: _intbig_gist.c:213
static int hemdistsign(BITVECP a, BITVECP b, int siglen)
Definition: _intbig_gist.c:219
Datum _intbig_in(PG_FUNCTION_ARGS)
Definition: _intbig_gist.c:31
static GISTTYPE * _intbig_alloc(bool allistrue, int siglen, BITVECP sign)
Definition: _intbig_gist.c:51
#define GETENTRY(vec, pos)
Definition: _intbig_gist.c:14
Datum g_intbig_compress(PG_FUNCTION_ARGS)
Definition: _intbig_gist.c:145
Datum g_intbig_decompress(PG_FUNCTION_ARGS)
Definition: _intbig_gist.c:251
static int32 unionkey(BITVECP sbase, GISTTYPE *add, int siglen)
Definition: _intbig_gist.c:257
PG_FUNCTION_INFO_V1(g_intbig_consistent)
Datum _intbig_out(PG_FUNCTION_ARGS)
Definition: _intbig_gist.c:41
Datum g_intbig_same(PG_FUNCTION_ARGS)
Definition: _intbig_gist.c:112
static bool _intbig_overlap(GISTTYPE *a, ArrayType *b, int siglen)
Definition: _intbig_gist.c:76
Datum g_intbig_union(PG_FUNCTION_ARGS)
Definition: _intbig_gist.c:270
Datum g_intbig_picksplit(PG_FUNCTION_ARGS)
Definition: _intbig_gist.c:323
Datum g_intbig_penalty(PG_FUNCTION_ARGS)
Definition: _intbig_gist.c:295
Datum g_intbig_options(PG_FUNCTION_ARGS)
Definition: _intbig_gist.c:588
static bool _intbig_contains(GISTTYPE *a, ArrayType *b, int siglen)
Definition: _intbig_gist.c:94
static int comparecost(const void *a, const void *b)
Definition: _intbig_gist.c:316
#define PG_GETARG_ARRAYTYPE_P(n)
Definition: array.h:256
#define DatumGetArrayTypeP(X)
Definition: array.h:254
#define GETBIT(x, i)
Definition: blutils.c:33
signed int int32
Definition: c.h:478
#define ARRNELEMS(x)
Definition: cube.c:26
#define ARRPTR(x)
Definition: cube.c:25
int errcode(int sqlerrcode)
Definition: elog.c:858
int errmsg(const char *fmt,...)
Definition: elog.c:1069
#define ERROR
Definition: elog.h:39
#define ereport(elevel,...)
Definition: elog.h:149
#define PG_RETURN_VOID()
Definition: fmgr.h:349
#define PG_FREE_IF_COPY(ptr, n)
Definition: fmgr.h:260
#define PG_GETARG_POINTER(n)
Definition: fmgr.h:276
#define PG_GETARG_DATUM(n)
Definition: fmgr.h:268
#define PG_GETARG_UINT16(n)
Definition: fmgr.h:272
#define PG_RETURN_DATUM(x)
Definition: fmgr.h:353
#define PG_RETURN_POINTER(x)
Definition: fmgr.h:361
#define PG_FUNCTION_ARGS
Definition: fmgr.h:193
#define PG_RETURN_BOOL(x)
Definition: fmgr.h:359
#define GIST_LEAF(entry)
Definition: gist.h:168
#define gistentryinit(e, k, r, pg, o, l)
Definition: gist.h:242
#define newval
#define HASHVAL(val, siglen)
Definition: hstore_gist.c:44
#define LOOPBYTE(siglen)
Definition: hstore_gist.c:32
#define ALLISTRUE
Definition: hstore_gist.c:54
#define WISH_F(a, b, c)
Definition: hstore_gist.c:76
#define CALCGTSIZE(flag, siglen)
Definition: hstore_gist.c:59
#define ISALLTRUE(x)
Definition: hstore_gist.c:56
#define SIGLEN_MAX
Definition: hstore_gist.c:23
#define SIGLEN_DEFAULT
Definition: hstore_gist.c:22
char * BITVECP
Definition: hstore_gist.c:30
#define GET_SIGLEN()
Definition: hstore_gist.c:25
#define SIGLENBIT(siglen)
Definition: hstore_gist.c:24
#define GETSIGN(x)
Definition: hstore_gist.c:61
#define HASH(sign, val, siglen)
Definition: hstore_gist.c:45
char sign
Definition: informix.c:668
int b
Definition: isn.c:70
int a
Definition: isn.c:69
int j
Definition: isn.c:74
int i
Definition: isn.c:73
void pfree(void *pointer)
Definition: mcxt.c:1436
void * palloc0(Size size)
Definition: mcxt.c:1241
void * palloc(Size size)
Definition: mcxt.c:1210
#define OffsetNumberNext(offsetNumber)
Definition: off.h:52
uint16 OffsetNumber
Definition: off.h:24
#define FirstOffsetNumber
Definition: off.h:27
PGDLLIMPORT const uint8 pg_number_of_ones[256]
Definition: pg_bitutils.c:87
uint64 pg_popcount(const char *buf, int bytes)
Definition: pg_bitutils.c:296
#define qsort(a, b, c, d)
Definition: port.h:445
static Datum PointerGetDatum(const void *X)
Definition: postgres.h:322
uintptr_t Datum
Definition: postgres.h:64
static Pointer DatumGetPointer(Datum X)
Definition: postgres.h:312
void init_local_reloptions(local_relopts *relopts, Size relopt_struct_size)
Definition: reloptions.c:736
void add_local_int_reloption(local_relopts *relopts, const char *name, const char *desc, int default_val, int min_val, int max_val, int offset)
Definition: reloptions.c:920
#define RTOldContainsStrategyNumber
Definition: stratnum.h:63
uint16 StrategyNumber
Definition: stratnum.h:22
#define RTOverlapStrategyNumber
Definition: stratnum.h:53
#define RTSameStrategyNumber
Definition: stratnum.h:56
#define RTContainsStrategyNumber
Definition: stratnum.h:57
#define RTOldContainedByStrategyNumber
Definition: stratnum.h:64
#define RTContainedByStrategyNumber
Definition: stratnum.h:58
OffsetNumber offset
Definition: gist.h:161
Datum key
Definition: gist.h:158
Page page
Definition: gist.h:160
Relation rel
Definition: gist.h:159
bool leafkey
Definition: gist.h:162
int32 flag
Definition: hstore_gist.c:50
int spl_nleft
Definition: gist.h:141
OffsetNumber * spl_right
Definition: gist.h:145
Datum spl_ldatum
Definition: gist.h:142
Datum spl_rdatum
Definition: gist.h:147
int spl_nright
Definition: gist.h:146
OffsetNumber * spl_left
Definition: gist.h:140
int32 n
Definition: gist.h:233
int32 cost
Definition: hstore_gist.c:353
OffsetNumber pos
Definition: hstore_gist.c:352
char * flag(int b)
Definition: test-ctype.c:33
#define SET_VARSIZE(PTR, len)
Definition: varatt.h:305
#define VARSIZE(PTR)
Definition: varatt.h:279