PostgreSQL Source Code  git master
_intbig_gist.c
Go to the documentation of this file.
1 /*
2  * contrib/intarray/_intbig_gist.c
3  */
4 #include "postgres.h"
5 
6 #include <math.h>
7 
8 #include "_int.h"
9 #include "access/gist.h"
10 #include "access/reloptions.h"
11 #include "access/stratnum.h"
12 #include "port/pg_bitutils.h"
13 
14 #define GETENTRY(vec,pos) ((GISTTYPE *) DatumGetPointer((vec)->vector[(pos)].key))
15 /*
16 ** _intbig methods
17 */
26 
29 
30 Datum
32 {
33  ereport(ERROR,
34  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
35  errmsg("_intbig_in() not implemented")));
36  PG_RETURN_DATUM(0);
37 }
38 
39 Datum
41 {
42  ereport(ERROR,
43  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
44  errmsg("_intbig_out() not implemented")));
45  PG_RETURN_DATUM(0);
46 }
47 
48 static GISTTYPE *
49 _intbig_alloc(bool allistrue, int siglen, BITVECP sign)
50 {
51  int flag = allistrue ? ALLISTRUE : 0;
52  int size = CALCGTSIZE(flag, siglen);
53  GISTTYPE *res = (GISTTYPE *) palloc(size);
54 
55  SET_VARSIZE(res, size);
56  res->flag = flag;
57 
58  if (!allistrue)
59  {
60  if (sign)
61  memcpy(GETSIGN(res), sign, siglen);
62  else
63  memset(GETSIGN(res), 0, siglen);
64  }
65 
66  return res;
67 }
68 
69 
70 /*********************************************************************
71 ** intbig functions
72 *********************************************************************/
73 static bool
75 {
76  int num = ARRNELEMS(b);
77  int32 *ptr = ARRPTR(b);
78 
80 
81  while (num--)
82  {
83  if (GETBIT(GETSIGN(a), HASHVAL(*ptr, siglen)))
84  return true;
85  ptr++;
86  }
87 
88  return false;
89 }
90 
91 static bool
93 {
94  int num = ARRNELEMS(b);
95  int32 *ptr = ARRPTR(b);
96 
98 
99  while (num--)
100  {
101  if (!GETBIT(GETSIGN(a), HASHVAL(*ptr, siglen)))
102  return false;
103  ptr++;
104  }
105 
106  return true;
107 }
108 
109 Datum
111 {
114  bool *result = (bool *) PG_GETARG_POINTER(2);
115  int siglen = GET_SIGLEN();
116 
117  if (ISALLTRUE(a) && ISALLTRUE(b))
118  *result = true;
119  else if (ISALLTRUE(a))
120  *result = false;
121  else if (ISALLTRUE(b))
122  *result = false;
123  else
124  {
125  int32 i;
126  BITVECP sa = GETSIGN(a),
127  sb = GETSIGN(b);
128 
129  *result = true;
130  LOOPBYTE(siglen)
131  {
132  if (sa[i] != sb[i])
133  {
134  *result = false;
135  break;
136  }
137  }
138  }
139  PG_RETURN_POINTER(result);
140 }
141 
142 Datum
144 {
145  GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
146  int siglen = GET_SIGLEN();
147 
148  if (entry->leafkey)
149  {
150  GISTENTRY *retval;
151  ArrayType *in = DatumGetArrayTypeP(entry->key);
152  int32 *ptr;
153  int num;
154  GISTTYPE *res = _intbig_alloc(false, siglen, NULL);
155 
156  CHECKARRVALID(in);
157  if (ARRISEMPTY(in))
158  {
159  ptr = NULL;
160  num = 0;
161  }
162  else
163  {
164  ptr = ARRPTR(in);
165  num = ARRNELEMS(in);
166  }
167 
168  while (num--)
169  {
170  HASH(GETSIGN(res), *ptr, siglen);
171  ptr++;
172  }
173 
174  retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
176  entry->rel, entry->page,
177  entry->offset, false);
178 
179  if (in != DatumGetArrayTypeP(entry->key))
180  pfree(in);
181 
182  PG_RETURN_POINTER(retval);
183  }
184  else if (!ISALLTRUE(DatumGetPointer(entry->key)))
185  {
186  GISTENTRY *retval;
187  int i;
189  GISTTYPE *res;
190 
191  LOOPBYTE(siglen)
192  {
193  if ((sign[i] & 0xff) != 0xff)
194  PG_RETURN_POINTER(entry);
195  }
196 
197  res = _intbig_alloc(true, siglen, sign);
198  retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
200  entry->rel, entry->page,
201  entry->offset, false);
202 
203  PG_RETURN_POINTER(retval);
204  }
205 
206  PG_RETURN_POINTER(entry);
207 }
208 
209 
210 static int32
211 sizebitvec(BITVECP sign, int siglen)
212 {
213  return pg_popcount(sign, siglen);
214 }
215 
216 static int
218 {
219  int i,
220  diff,
221  dist = 0;
222 
223  LOOPBYTE(siglen)
224  {
225  diff = (unsigned char) (a[i] ^ b[i]);
226  /* Using the popcount functions here isn't likely to win */
227  dist += pg_number_of_ones[diff];
228  }
229  return dist;
230 }
231 
232 static int
233 hemdist(GISTTYPE *a, GISTTYPE *b, int siglen)
234 {
235  if (ISALLTRUE(a))
236  {
237  if (ISALLTRUE(b))
238  return 0;
239  else
240  return SIGLENBIT(siglen) - sizebitvec(GETSIGN(b), siglen);
241  }
242  else if (ISALLTRUE(b))
243  return SIGLENBIT(siglen) - sizebitvec(GETSIGN(a), siglen);
244 
245  return hemdistsign(GETSIGN(a), GETSIGN(b), siglen);
246 }
247 
248 Datum
250 {
252 }
253 
254 static int32
255 unionkey(BITVECP sbase, GISTTYPE *add, int siglen)
256 {
257  int32 i;
258  BITVECP sadd = GETSIGN(add);
259 
260  if (ISALLTRUE(add))
261  return 1;
262  LOOPBYTE(siglen)
263  sbase[i] |= sadd[i];
264  return 0;
265 }
266 
267 Datum
269 {
271  int *size = (int *) PG_GETARG_POINTER(1);
272  int siglen = GET_SIGLEN();
273  int32 i;
274  GISTTYPE *result = _intbig_alloc(false, siglen, NULL);
275  BITVECP base = GETSIGN(result);
276 
277  for (i = 0; i < entryvec->n; i++)
278  {
279  if (unionkey(base, GETENTRY(entryvec, i), siglen))
280  {
281  result->flag |= ALLISTRUE;
282  SET_VARSIZE(result, CALCGTSIZE(ALLISTRUE, siglen));
283  break;
284  }
285  }
286 
287  *size = VARSIZE(result);
288 
289  PG_RETURN_POINTER(result);
290 }
291 
292 Datum
294 {
295  GISTENTRY *origentry = (GISTENTRY *) PG_GETARG_POINTER(0); /* always ISSIGNKEY */
296  GISTENTRY *newentry = (GISTENTRY *) PG_GETARG_POINTER(1);
297  float *penalty = (float *) PG_GETARG_POINTER(2);
298  GISTTYPE *origval = (GISTTYPE *) DatumGetPointer(origentry->key);
299  GISTTYPE *newval = (GISTTYPE *) DatumGetPointer(newentry->key);
300  int siglen = GET_SIGLEN();
301 
302  *penalty = hemdist(origval, newval, siglen);
303  PG_RETURN_POINTER(penalty);
304 }
305 
306 
307 typedef struct
308 {
309  OffsetNumber pos;
310  int32 cost;
311 } SPLITCOST;
312 
313 static int
314 comparecost(const void *a, const void *b)
315 {
316  return ((const SPLITCOST *) a)->cost - ((const SPLITCOST *) b)->cost;
317 }
318 
319 
320 Datum
322 {
325  int siglen = GET_SIGLEN();
326  OffsetNumber k,
327  j;
328  GISTTYPE *datum_l,
329  *datum_r;
330  BITVECP union_l,
331  union_r;
332  int32 size_alpha,
333  size_beta;
334  int32 size_waste,
335  waste = -1;
336  int32 nbytes;
337  OffsetNumber seed_1 = 0,
338  seed_2 = 0;
339  OffsetNumber *left,
340  *right;
341  OffsetNumber maxoff;
342  BITVECP ptr;
343  int i;
344  SPLITCOST *costvector;
345  GISTTYPE *_k,
346  *_j;
347 
348  maxoff = entryvec->n - 2;
349  nbytes = (maxoff + 2) * sizeof(OffsetNumber);
350  v->spl_left = (OffsetNumber *) palloc(nbytes);
351  v->spl_right = (OffsetNumber *) palloc(nbytes);
352 
353  for (k = FirstOffsetNumber; k < maxoff; k = OffsetNumberNext(k))
354  {
355  _k = GETENTRY(entryvec, k);
356  for (j = OffsetNumberNext(k); j <= maxoff; j = OffsetNumberNext(j))
357  {
358  size_waste = hemdist(_k, GETENTRY(entryvec, j), siglen);
359  if (size_waste > waste)
360  {
361  waste = size_waste;
362  seed_1 = k;
363  seed_2 = j;
364  }
365  }
366  }
367 
368  left = v->spl_left;
369  v->spl_nleft = 0;
370  right = v->spl_right;
371  v->spl_nright = 0;
372 
373  if (seed_1 == 0 || seed_2 == 0)
374  {
375  seed_1 = 1;
376  seed_2 = 2;
377  }
378 
379  /* form initial .. */
380  datum_l = _intbig_alloc(ISALLTRUE(GETENTRY(entryvec, seed_1)), siglen,
381  GETSIGN(GETENTRY(entryvec, seed_1)));
382  datum_r = _intbig_alloc(ISALLTRUE(GETENTRY(entryvec, seed_2)), siglen,
383  GETSIGN(GETENTRY(entryvec, seed_2)));
384 
385  maxoff = OffsetNumberNext(maxoff);
386  /* sort before ... */
387  costvector = (SPLITCOST *) palloc(sizeof(SPLITCOST) * maxoff);
388  for (j = FirstOffsetNumber; j <= maxoff; j = OffsetNumberNext(j))
389  {
390  costvector[j - 1].pos = j;
391  _j = GETENTRY(entryvec, j);
392  size_alpha = hemdist(datum_l, _j, siglen);
393  size_beta = hemdist(datum_r, _j, siglen);
394  costvector[j - 1].cost = abs(size_alpha - size_beta);
395  }
396  qsort((void *) costvector, maxoff, sizeof(SPLITCOST), comparecost);
397 
398  union_l = GETSIGN(datum_l);
399  union_r = GETSIGN(datum_r);
400 
401  for (k = 0; k < maxoff; k++)
402  {
403  j = costvector[k].pos;
404  if (j == seed_1)
405  {
406  *left++ = j;
407  v->spl_nleft++;
408  continue;
409  }
410  else if (j == seed_2)
411  {
412  *right++ = j;
413  v->spl_nright++;
414  continue;
415  }
416  _j = GETENTRY(entryvec, j);
417  size_alpha = hemdist(datum_l, _j, siglen);
418  size_beta = hemdist(datum_r, _j, siglen);
419 
420  if (size_alpha < size_beta + WISH_F(v->spl_nleft, v->spl_nright, 0.00001))
421  {
422  if (ISALLTRUE(datum_l) || ISALLTRUE(_j))
423  {
424  if (!ISALLTRUE(datum_l))
425  memset((void *) union_l, 0xff, siglen);
426  }
427  else
428  {
429  ptr = GETSIGN(_j);
430  LOOPBYTE(siglen)
431  union_l[i] |= ptr[i];
432  }
433  *left++ = j;
434  v->spl_nleft++;
435  }
436  else
437  {
438  if (ISALLTRUE(datum_r) || ISALLTRUE(_j))
439  {
440  if (!ISALLTRUE(datum_r))
441  memset((void *) union_r, 0xff, siglen);
442  }
443  else
444  {
445  ptr = GETSIGN(_j);
446  LOOPBYTE(siglen)
447  union_r[i] |= ptr[i];
448  }
449  *right++ = j;
450  v->spl_nright++;
451  }
452  }
453 
454  *right = *left = FirstOffsetNumber;
455  pfree(costvector);
456 
457  v->spl_ldatum = PointerGetDatum(datum_l);
458  v->spl_rdatum = PointerGetDatum(datum_r);
459 
461 }
462 
463 Datum
465 {
466  GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
467  ArrayType *query = PG_GETARG_ARRAYTYPE_P(1);
469 
470  /* Oid subtype = PG_GETARG_OID(3); */
471  bool *recheck = (bool *) PG_GETARG_POINTER(4);
472  int siglen = GET_SIGLEN();
473  bool retval;
474 
475  /* All cases served by this function are inexact */
476  *recheck = true;
477 
478  if (ISALLTRUE(DatumGetPointer(entry->key)))
479  PG_RETURN_BOOL(true);
480 
481  if (strategy == BooleanSearchStrategy)
482  {
483  retval = signconsistent((QUERYTYPE *) query,
484  GETSIGN(DatumGetPointer(entry->key)),
485  siglen,
486  false);
487  PG_FREE_IF_COPY(query, 1);
488  PG_RETURN_BOOL(retval);
489  }
490 
491  CHECKARRVALID(query);
492 
493  switch (strategy)
494  {
496  retval = _intbig_overlap((GISTTYPE *) DatumGetPointer(entry->key),
497  query, siglen);
498  break;
500  if (GIST_LEAF(entry))
501  {
502  int i,
503  num = ARRNELEMS(query);
504  int32 *ptr = ARRPTR(query);
505  BITVECP dq = palloc0(siglen),
506  de;
507 
508  while (num--)
509  {
510  HASH(dq, *ptr, siglen);
511  ptr++;
512  }
513 
514  de = GETSIGN((GISTTYPE *) DatumGetPointer(entry->key));
515  retval = true;
516  LOOPBYTE(siglen)
517  {
518  if (de[i] != dq[i])
519  {
520  retval = false;
521  break;
522  }
523  }
524 
525  pfree(dq);
526  }
527  else
528  retval = _intbig_contains((GISTTYPE *) DatumGetPointer(entry->key),
529  query, siglen);
530  break;
533  retval = _intbig_contains((GISTTYPE *) DatumGetPointer(entry->key),
534  query, siglen);
535  break;
538 
539  /*
540  * This code is unreachable as of intarray 1.4, because the <@
541  * operator has been removed from the opclass. We keep it for now
542  * to support older versions of the SQL definitions.
543  */
544  if (GIST_LEAF(entry))
545  {
546  int i,
547  num = ARRNELEMS(query);
548  int32 *ptr = ARRPTR(query);
549  BITVECP dq = palloc0(siglen),
550  de;
551 
552  while (num--)
553  {
554  HASH(dq, *ptr, siglen);
555  ptr++;
556  }
557 
558  de = GETSIGN((GISTTYPE *) DatumGetPointer(entry->key));
559  retval = true;
560  LOOPBYTE(siglen)
561  {
562  if (de[i] & ~dq[i])
563  {
564  retval = false;
565  break;
566  }
567  }
568  }
569  else
570  {
571  /*
572  * Unfortunately, because empty arrays could be anywhere in
573  * the index, we must search the whole tree.
574  */
575  retval = true;
576  }
577  break;
578  default:
579  retval = false;
580  }
581  PG_FREE_IF_COPY(query, 1);
582  PG_RETURN_BOOL(retval);
583 }
584 
585 Datum
587 {
589 
591  add_local_int_reloption(relopts, "siglen",
592  "signature length in bytes",
594  offsetof(GISTIntArrayBigOptions, siglen));
595 
596  PG_RETURN_VOID();
597 }
bool signconsistent(QUERYTYPE *query, BITVECP sign, int siglen, bool calcnot)
Definition: _int_bool.c:299
#define CHECKARRVALID(x)
Definition: _int.h:30
#define BooleanSearchStrategy
Definition: _int.h:134
#define ARRISEMPTY(x)
Definition: _int.h:38
Datum g_intbig_consistent(PG_FUNCTION_ARGS)
Definition: _intbig_gist.c:464
static int hemdist(GISTTYPE *a, GISTTYPE *b, int siglen)
Definition: _intbig_gist.c:233
static int32 sizebitvec(BITVECP sign, int siglen)
Definition: _intbig_gist.c:211
static int hemdistsign(BITVECP a, BITVECP b, int siglen)
Definition: _intbig_gist.c:217
Datum _intbig_in(PG_FUNCTION_ARGS)
Definition: _intbig_gist.c:31
static GISTTYPE * _intbig_alloc(bool allistrue, int siglen, BITVECP sign)
Definition: _intbig_gist.c:49
#define GETENTRY(vec, pos)
Definition: _intbig_gist.c:14
Datum g_intbig_compress(PG_FUNCTION_ARGS)
Definition: _intbig_gist.c:143
Datum g_intbig_decompress(PG_FUNCTION_ARGS)
Definition: _intbig_gist.c:249
static int32 unionkey(BITVECP sbase, GISTTYPE *add, int siglen)
Definition: _intbig_gist.c:255
PG_FUNCTION_INFO_V1(g_intbig_consistent)
Datum _intbig_out(PG_FUNCTION_ARGS)
Definition: _intbig_gist.c:40
Datum g_intbig_same(PG_FUNCTION_ARGS)
Definition: _intbig_gist.c:110
static bool _intbig_overlap(GISTTYPE *a, ArrayType *b, int siglen)
Definition: _intbig_gist.c:74
Datum g_intbig_union(PG_FUNCTION_ARGS)
Definition: _intbig_gist.c:268
Datum g_intbig_picksplit(PG_FUNCTION_ARGS)
Definition: _intbig_gist.c:321
Datum g_intbig_penalty(PG_FUNCTION_ARGS)
Definition: _intbig_gist.c:293
Datum g_intbig_options(PG_FUNCTION_ARGS)
Definition: _intbig_gist.c:586
static bool _intbig_contains(GISTTYPE *a, ArrayType *b, int siglen)
Definition: _intbig_gist.c:92
static int comparecost(const void *a, const void *b)
Definition: _intbig_gist.c:314
#define PG_GETARG_ARRAYTYPE_P(n)
Definition: array.h:256
#define DatumGetArrayTypeP(X)
Definition: array.h:254
#define GETBIT(x, i)
Definition: blutils.c:33
signed int int32
Definition: c.h:430
#define ARRNELEMS(x)
Definition: cube.c:26
#define ARRPTR(x)
Definition: cube.c:25
int errcode(int sqlerrcode)
Definition: elog.c:695
int errmsg(const char *fmt,...)
Definition: elog.c:906
#define ERROR
Definition: elog.h:35
#define ereport(elevel,...)
Definition: elog.h:145
#define PG_RETURN_VOID()
Definition: fmgr.h:349
#define PG_FREE_IF_COPY(ptr, n)
Definition: fmgr.h:260
#define PG_GETARG_POINTER(n)
Definition: fmgr.h:276
#define PG_GETARG_DATUM(n)
Definition: fmgr.h:268
#define PG_GETARG_UINT16(n)
Definition: fmgr.h:272
#define PG_RETURN_DATUM(x)
Definition: fmgr.h:353
#define PG_RETURN_POINTER(x)
Definition: fmgr.h:361
#define PG_FUNCTION_ARGS
Definition: fmgr.h:193
#define PG_RETURN_BOOL(x)
Definition: fmgr.h:359
#define GIST_LEAF(entry)
Definition: gist.h:168
#define gistentryinit(e, k, r, pg, o, l)
Definition: gist.h:242
#define newval
#define HASHVAL(val, siglen)
Definition: hstore_gist.c:44
#define LOOPBYTE(siglen)
Definition: hstore_gist.c:32
#define ALLISTRUE
Definition: hstore_gist.c:54
#define WISH_F(a, b, c)
Definition: hstore_gist.c:76
#define CALCGTSIZE(flag, siglen)
Definition: hstore_gist.c:59
#define ISALLTRUE(x)
Definition: hstore_gist.c:56
#define SIGLEN_MAX
Definition: hstore_gist.c:23
#define SIGLEN_DEFAULT
Definition: hstore_gist.c:22
char * BITVECP
Definition: hstore_gist.c:30
#define GET_SIGLEN()
Definition: hstore_gist.c:25
#define SIGLENBIT(siglen)
Definition: hstore_gist.c:24
#define GETSIGN(x)
Definition: hstore_gist.c:61
#define HASH(sign, val, siglen)
Definition: hstore_gist.c:45
char sign
Definition: informix.c:668
int b
Definition: isn.c:70
int a
Definition: isn.c:69
int j
Definition: isn.c:74
int i
Definition: isn.c:73
void pfree(void *pointer)
Definition: mcxt.c:1306
void * palloc0(Size size)
Definition: mcxt.c:1230
void * palloc(Size size)
Definition: mcxt.c:1199
#define OffsetNumberNext(offsetNumber)
Definition: off.h:52
uint16 OffsetNumber
Definition: off.h:24
#define FirstOffsetNumber
Definition: off.h:27
PGDLLIMPORT const uint8 pg_number_of_ones[256]
Definition: pg_bitutils.c:87
uint64 pg_popcount(const char *buf, int bytes)
Definition: pg_bitutils.c:296
#define qsort(a, b, c, d)
Definition: port.h:445
static Datum PointerGetDatum(const void *X)
Definition: postgres.h:670
uintptr_t Datum
Definition: postgres.h:412
static Pointer DatumGetPointer(Datum X)
Definition: postgres.h:660
#define SET_VARSIZE(PTR, len)
Definition: postgres.h:343
#define VARSIZE(PTR)
Definition: postgres.h:317
void init_local_reloptions(local_relopts *relopts, Size relopt_struct_size)
Definition: reloptions.c:736
void add_local_int_reloption(local_relopts *relopts, const char *name, const char *desc, int default_val, int min_val, int max_val, int offset)
Definition: reloptions.c:920
#define RTOldContainsStrategyNumber
Definition: stratnum.h:63
uint16 StrategyNumber
Definition: stratnum.h:22
#define RTOverlapStrategyNumber
Definition: stratnum.h:53
#define RTSameStrategyNumber
Definition: stratnum.h:56
#define RTContainsStrategyNumber
Definition: stratnum.h:57
#define RTOldContainedByStrategyNumber
Definition: stratnum.h:64
#define RTContainedByStrategyNumber
Definition: stratnum.h:58
OffsetNumber offset
Definition: gist.h:161
Datum key
Definition: gist.h:158
Page page
Definition: gist.h:160
Relation rel
Definition: gist.h:159
bool leafkey
Definition: gist.h:162
int32 flag
Definition: hstore_gist.c:50
int spl_nleft
Definition: gist.h:141
OffsetNumber * spl_right
Definition: gist.h:145
Datum spl_ldatum
Definition: gist.h:142
Datum spl_rdatum
Definition: gist.h:147
int spl_nright
Definition: gist.h:146
OffsetNumber * spl_left
Definition: gist.h:140
int32 n
Definition: gist.h:233
int32 cost
Definition: hstore_gist.c:347
OffsetNumber pos
Definition: hstore_gist.c:346
char * flag(int b)
Definition: test-ctype.c:33