PostgreSQL Source Code  git master
_ltree_gist.c
Go to the documentation of this file.
1 /*
2  * contrib/ltree/_ltree_gist.c
3  *
4  *
5  * GiST support for ltree[]
6  * Teodor Sigaev <teodor@stack.net>
7  */
8 #include "postgres.h"
9 
10 #include <math.h>
11 
12 #include "access/gist.h"
13 #include "access/reloptions.h"
14 #include "access/stratnum.h"
15 #include "crc32.h"
16 #include "ltree.h"
17 #include "port/pg_bitutils.h"
18 #include "utils/array.h"
19 
27 
28 #define GETENTRY(vec,pos) ((ltree_gist *) DatumGetPointer((vec)->vector[(pos)].key))
29 #define NEXTVAL(x) ( (ltree*)( (char*)(x) + INTALIGN( VARSIZE(x) ) ) )
30 
31 #define WISH_F(a,b,c) (double)( -(double)(((a)-(b))*((a)-(b))*((a)-(b)))*(c) )
32 
33 
34 static void
35 hashing(BITVECP sign, ltree *t, int siglen)
36 {
37  int tlen = t->numlevel;
39  int hash;
40 
41  while (tlen > 0)
42  {
43  hash = ltree_crc32_sz(cur->name, cur->len);
44  AHASH(sign, hash, siglen);
45  cur = LEVEL_NEXT(cur);
46  tlen--;
47  }
48 }
49 
50 Datum
52 {
53  GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
54  GISTENTRY *retval = entry;
55  int siglen = LTREE_GET_ASIGLEN();
56 
57  if (entry->leafkey)
58  { /* ltree */
59  ltree_gist *key;
61  int num = ArrayGetNItems(ARR_NDIM(val), ARR_DIMS(val));
62  ltree *item = (ltree *) ARR_DATA_PTR(val);
63 
64  if (ARR_NDIM(val) > 1)
65  ereport(ERROR,
66  (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
67  errmsg("array must be one-dimensional")));
69  ereport(ERROR,
70  (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
71  errmsg("array must not contain nulls")));
72 
73  key = ltree_gist_alloc(false, NULL, siglen, NULL, NULL);
74 
75  while (num > 0)
76  {
77  hashing(LTG_SIGN(key), item, siglen);
78  num--;
79  item = NEXTVAL(item);
80  }
81 
82  retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
84  entry->rel, entry->page,
85  entry->offset, false);
86  }
87  else if (!LTG_ISALLTRUE(entry->key))
88  {
89  int32 i;
90  ltree_gist *key;
92 
93  ALOOPBYTE(siglen)
94  {
95  if ((sign[i] & 0xff) != 0xff)
96  PG_RETURN_POINTER(retval);
97  }
98 
99  key = ltree_gist_alloc(true, sign, siglen, NULL, NULL);
100  retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
102  entry->rel, entry->page,
103  entry->offset, false);
104  }
105  PG_RETURN_POINTER(retval);
106 }
107 
108 Datum
110 {
113  bool *result = (bool *) PG_GETARG_POINTER(2);
114  int siglen = LTREE_GET_ASIGLEN();
115 
116  if (LTG_ISALLTRUE(a) && LTG_ISALLTRUE(b))
117  *result = true;
118  else if (LTG_ISALLTRUE(a))
119  *result = false;
120  else if (LTG_ISALLTRUE(b))
121  *result = false;
122  else
123  {
124  int32 i;
125  BITVECP sa = LTG_SIGN(a),
126  sb = LTG_SIGN(b);
127 
128  *result = true;
129  ALOOPBYTE(siglen)
130  {
131  if (sa[i] != sb[i])
132  {
133  *result = false;
134  break;
135  }
136  }
137  }
138  PG_RETURN_POINTER(result);
139 }
140 
141 static int32
142 unionkey(BITVECP sbase, ltree_gist *add, int siglen)
143 {
144  int32 i;
145  BITVECP sadd = LTG_SIGN(add);
146 
147  if (LTG_ISALLTRUE(add))
148  return 1;
149 
150  ALOOPBYTE(siglen)
151  sbase[i] |= sadd[i];
152  return 0;
153 }
154 
155 Datum
157 {
159  int *size = (int *) PG_GETARG_POINTER(1);
160  int siglen = LTREE_GET_ASIGLEN();
161  int32 i;
162  ltree_gist *result = ltree_gist_alloc(false, NULL, siglen, NULL, NULL);
163  BITVECP base = LTG_SIGN(result);
164 
165  for (i = 0; i < entryvec->n; i++)
166  {
167  if (unionkey(base, GETENTRY(entryvec, i), siglen))
168  {
169  result->flag |= LTG_ALLTRUE;
170  SET_VARSIZE(result, LTG_HDRSIZE);
171  break;
172  }
173  }
174 
175  *size = VARSIZE(result);
176 
177  PG_RETURN_POINTER(result);
178 }
179 
180 static int32
181 sizebitvec(BITVECP sign, int siglen)
182 {
183  return pg_popcount((const char *) sign, siglen);
184 }
185 
186 static int
188 {
189  int i,
190  diff,
191  dist = 0;
192 
193  ALOOPBYTE(siglen)
194  {
195  diff = (unsigned char) (a[i] ^ b[i]);
196  /* Using the popcount functions here isn't likely to win */
197  dist += pg_number_of_ones[diff];
198  }
199  return dist;
200 }
201 
202 static int
203 hemdist(ltree_gist *a, ltree_gist *b, int siglen)
204 {
205  if (LTG_ISALLTRUE(a))
206  {
207  if (LTG_ISALLTRUE(b))
208  return 0;
209  else
210  return ASIGLENBIT(siglen) - sizebitvec(LTG_SIGN(b), siglen);
211  }
212  else if (LTG_ISALLTRUE(b))
213  return ASIGLENBIT(siglen) - sizebitvec(LTG_SIGN(a), siglen);
214 
215  return hemdistsign(LTG_SIGN(a), LTG_SIGN(b), siglen);
216 }
217 
218 
219 Datum
221 {
222  ltree_gist *origval = (ltree_gist *) DatumGetPointer(((GISTENTRY *) PG_GETARG_POINTER(0))->key);
224  float *penalty = (float *) PG_GETARG_POINTER(2);
225  int siglen = LTREE_GET_ASIGLEN();
226 
227  *penalty = hemdist(origval, newval, siglen);
228  PG_RETURN_POINTER(penalty);
229 }
230 
231 typedef struct
232 {
233  OffsetNumber pos;
234  int32 cost;
235 } SPLITCOST;
236 
237 static int
238 comparecost(const void *a, const void *b)
239 {
240  return ((const SPLITCOST *) a)->cost - ((const SPLITCOST *) b)->cost;
241 }
242 
243 Datum
245 {
248  int siglen = LTREE_GET_ASIGLEN();
249  OffsetNumber k,
250  j;
251  ltree_gist *datum_l,
252  *datum_r;
253  BITVECP union_l,
254  union_r;
255  int32 size_alpha,
256  size_beta;
257  int32 size_waste,
258  waste = -1;
259  int32 nbytes;
260  OffsetNumber seed_1 = 0,
261  seed_2 = 0;
262  OffsetNumber *left,
263  *right;
264  OffsetNumber maxoff;
265  BITVECP ptr;
266  int i;
267  SPLITCOST *costvector;
268  ltree_gist *_k,
269  *_j;
270 
271  maxoff = entryvec->n - 2;
272  nbytes = (maxoff + 2) * sizeof(OffsetNumber);
273  v->spl_left = (OffsetNumber *) palloc(nbytes);
274  v->spl_right = (OffsetNumber *) palloc(nbytes);
275 
276  for (k = FirstOffsetNumber; k < maxoff; k = OffsetNumberNext(k))
277  {
278  _k = GETENTRY(entryvec, k);
279  for (j = OffsetNumberNext(k); j <= maxoff; j = OffsetNumberNext(j))
280  {
281  size_waste = hemdist(_k, GETENTRY(entryvec, j), siglen);
282  if (size_waste > waste)
283  {
284  waste = size_waste;
285  seed_1 = k;
286  seed_2 = j;
287  }
288  }
289  }
290 
291  left = v->spl_left;
292  v->spl_nleft = 0;
293  right = v->spl_right;
294  v->spl_nright = 0;
295 
296  if (seed_1 == 0 || seed_2 == 0)
297  {
298  seed_1 = 1;
299  seed_2 = 2;
300  }
301 
302  /* form initial .. */
303  datum_l = ltree_gist_alloc(LTG_ISALLTRUE(GETENTRY(entryvec, seed_1)),
304  LTG_SIGN(GETENTRY(entryvec, seed_1)),
305  siglen, NULL, NULL);
306 
307  datum_r = ltree_gist_alloc(LTG_ISALLTRUE(GETENTRY(entryvec, seed_2)),
308  LTG_SIGN(GETENTRY(entryvec, seed_2)),
309  siglen, NULL, NULL);
310 
311  maxoff = OffsetNumberNext(maxoff);
312  /* sort before ... */
313  costvector = (SPLITCOST *) palloc(sizeof(SPLITCOST) * maxoff);
314  for (j = FirstOffsetNumber; j <= maxoff; j = OffsetNumberNext(j))
315  {
316  costvector[j - 1].pos = j;
317  _j = GETENTRY(entryvec, j);
318  size_alpha = hemdist(datum_l, _j, siglen);
319  size_beta = hemdist(datum_r, _j, siglen);
320  costvector[j - 1].cost = abs(size_alpha - size_beta);
321  }
322  qsort(costvector, maxoff, sizeof(SPLITCOST), comparecost);
323 
324  union_l = LTG_SIGN(datum_l);
325  union_r = LTG_SIGN(datum_r);
326 
327  for (k = 0; k < maxoff; k++)
328  {
329  j = costvector[k].pos;
330  if (j == seed_1)
331  {
332  *left++ = j;
333  v->spl_nleft++;
334  continue;
335  }
336  else if (j == seed_2)
337  {
338  *right++ = j;
339  v->spl_nright++;
340  continue;
341  }
342  _j = GETENTRY(entryvec, j);
343  size_alpha = hemdist(datum_l, _j, siglen);
344  size_beta = hemdist(datum_r, _j, siglen);
345 
346  if (size_alpha < size_beta + WISH_F(v->spl_nleft, v->spl_nright, 0.00001))
347  {
348  if (LTG_ISALLTRUE(datum_l) || LTG_ISALLTRUE(_j))
349  {
350  if (!LTG_ISALLTRUE(datum_l))
351  memset(union_l, 0xff, siglen);
352  }
353  else
354  {
355  ptr = LTG_SIGN(_j);
356  ALOOPBYTE(siglen)
357  union_l[i] |= ptr[i];
358  }
359  *left++ = j;
360  v->spl_nleft++;
361  }
362  else
363  {
364  if (LTG_ISALLTRUE(datum_r) || LTG_ISALLTRUE(_j))
365  {
366  if (!LTG_ISALLTRUE(datum_r))
367  memset(union_r, 0xff, siglen);
368  }
369  else
370  {
371  ptr = LTG_SIGN(_j);
372  ALOOPBYTE(siglen)
373  union_r[i] |= ptr[i];
374  }
375  *right++ = j;
376  v->spl_nright++;
377  }
378  }
379 
380  *right = *left = FirstOffsetNumber;
381 
382  v->spl_ldatum = PointerGetDatum(datum_l);
383  v->spl_rdatum = PointerGetDatum(datum_r);
384 
386 }
387 
388 static bool
389 gist_te(ltree_gist *key, ltree *query, int siglen)
390 {
391  ltree_level *curq = LTREE_FIRST(query);
393  int qlen = query->numlevel;
394  unsigned int hv;
395 
396  if (LTG_ISALLTRUE(key))
397  return true;
398 
399  while (qlen > 0)
400  {
401  hv = ltree_crc32_sz(curq->name, curq->len);
402  if (!GETBIT(sign, AHASHVAL(hv, siglen)))
403  return false;
404  curq = LEVEL_NEXT(curq);
405  qlen--;
406  }
407 
408  return true;
409 }
410 
411 typedef struct LtreeSignature
412 {
414  int siglen;
416 
417 static bool
419 {
420  LtreeSignature *sig = cxt;
421 
422  return (FLG_CANLOOKSIGN(val->flag)) ? GETBIT(sig->sign, AHASHVAL(val->val, sig->siglen)) : true;
423 }
424 
425 static bool
426 gist_qtxt(ltree_gist *key, ltxtquery *query, int siglen)
427 {
429 
430  if (LTG_ISALLTRUE(key))
431  return true;
432 
433  sig.sign = LTG_SIGN(key);
434  sig.siglen = siglen;
435 
436  return ltree_execute(GETQUERY(query),
437  &sig, false,
439 }
440 
441 static bool
442 gist_qe(ltree_gist *key, lquery *query, int siglen)
443 {
444  lquery_level *curq = LQUERY_FIRST(query);
446  int qlen = query->numlevel;
447 
448  if (LTG_ISALLTRUE(key))
449  return true;
450 
451  while (qlen > 0)
452  {
453  if (curq->numvar && LQL_CANLOOKSIGN(curq))
454  {
455  bool isexist = false;
456  int vlen = curq->numvar;
457  lquery_variant *curv = LQL_FIRST(curq);
458 
459  while (vlen > 0)
460  {
461  if (GETBIT(sign, AHASHVAL(curv->val, siglen)))
462  {
463  isexist = true;
464  break;
465  }
466  curv = LVAR_NEXT(curv);
467  vlen--;
468  }
469  if (!isexist)
470  return false;
471  }
472 
473  curq = LQL_NEXT(curq);
474  qlen--;
475  }
476 
477  return true;
478 }
479 
480 static bool
481 _arrq_cons(ltree_gist *key, ArrayType *_query, int siglen)
482 {
483  lquery *query = (lquery *) ARR_DATA_PTR(_query);
484  int num = ArrayGetNItems(ARR_NDIM(_query), ARR_DIMS(_query));
485 
486  if (ARR_NDIM(_query) > 1)
487  ereport(ERROR,
488  (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
489  errmsg("array must be one-dimensional")));
490  if (array_contains_nulls(_query))
491  ereport(ERROR,
492  (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
493  errmsg("array must not contain nulls")));
494 
495  while (num > 0)
496  {
497  if (gist_qe(key, query, siglen))
498  return true;
499  num--;
500  query = (lquery *) NEXTVAL(query);
501  }
502  return false;
503 }
504 
505 Datum
507 {
508  GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
509  void *query = (void *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
511 
512  /* Oid subtype = PG_GETARG_OID(3); */
513  bool *recheck = (bool *) PG_GETARG_POINTER(4);
514  int siglen = LTREE_GET_ASIGLEN();
516  bool res = false;
517 
518  /* All cases served by this function are inexact */
519  *recheck = true;
520 
521  switch (strategy)
522  {
523  case 10:
524  case 11:
525  res = gist_te(key, (ltree *) query, siglen);
526  break;
527  case 12:
528  case 13:
529  res = gist_qe(key, (lquery *) query, siglen);
530  break;
531  case 14:
532  case 15:
533  res = gist_qtxt(key, (ltxtquery *) query, siglen);
534  break;
535  case 16:
536  case 17:
537  res = _arrq_cons(key, (ArrayType *) query, siglen);
538  break;
539  default:
540  /* internal error */
541  elog(ERROR, "unrecognized StrategyNumber: %d", strategy);
542  }
543  PG_FREE_IF_COPY(query, 1);
545 }
546 
547 Datum
549 {
551 
552  init_local_reloptions(relopts, sizeof(LtreeGistOptions));
553  add_local_int_reloption(relopts, "siglen", "signature length",
555  offsetof(LtreeGistOptions, siglen));
556 
557  PG_RETURN_VOID();
558 }
#define GETQUERY(x)
Definition: _int.h:157
#define NEXTVAL(x)
Definition: _ltree_gist.c:29
static bool gist_qe(ltree_gist *key, lquery *query, int siglen)
Definition: _ltree_gist.c:442
#define WISH_F(a, b, c)
Definition: _ltree_gist.c:31
static bool _arrq_cons(ltree_gist *key, ArrayType *_query, int siglen)
Definition: _ltree_gist.c:481
static bool gist_te(ltree_gist *key, ltree *query, int siglen)
Definition: _ltree_gist.c:389
static void hashing(BITVECP sign, ltree *t, int siglen)
Definition: _ltree_gist.c:35
static int32 sizebitvec(BITVECP sign, int siglen)
Definition: _ltree_gist.c:181
static int hemdistsign(BITVECP a, BITVECP b, int siglen)
Definition: _ltree_gist.c:187
Datum _ltree_union(PG_FUNCTION_ARGS)
Definition: _ltree_gist.c:156
#define GETENTRY(vec, pos)
Definition: _ltree_gist.c:28
PG_FUNCTION_INFO_V1(_ltree_compress)
Datum _ltree_penalty(PG_FUNCTION_ARGS)
Definition: _ltree_gist.c:220
static bool gist_qtxt(ltree_gist *key, ltxtquery *query, int siglen)
Definition: _ltree_gist.c:426
static int hemdist(ltree_gist *a, ltree_gist *b, int siglen)
Definition: _ltree_gist.c:203
static int32 unionkey(BITVECP sbase, ltree_gist *add, int siglen)
Definition: _ltree_gist.c:142
struct LtreeSignature LtreeSignature
Datum _ltree_gist_options(PG_FUNCTION_ARGS)
Definition: _ltree_gist.c:548
static bool checkcondition_bit(void *cxt, ITEM *val)
Definition: _ltree_gist.c:418
Datum _ltree_compress(PG_FUNCTION_ARGS)
Definition: _ltree_gist.c:51
Datum _ltree_consistent(PG_FUNCTION_ARGS)
Definition: _ltree_gist.c:506
Datum _ltree_same(PG_FUNCTION_ARGS)
Definition: _ltree_gist.c:109
Datum _ltree_picksplit(PG_FUNCTION_ARGS)
Definition: _ltree_gist.c:244
static int comparecost(const void *a, const void *b)
Definition: _ltree_gist.c:238
#define ARR_NDIM(a)
Definition: array.h:290
#define ARR_DATA_PTR(a)
Definition: array.h:322
#define DatumGetArrayTypeP(X)
Definition: array.h:261
#define ARR_DIMS(a)
Definition: array.h:294
bool array_contains_nulls(ArrayType *array)
Definition: arrayfuncs.c:3755
int ArrayGetNItems(int ndim, const int *dims)
Definition: arrayutils.c:57
#define GETBIT(x, i)
Definition: blutils.c:33
signed int int32
Definition: c.h:497
unsigned int ltree_crc32_sz(const char *buf, int size)
Definition: crc32.c:24
struct cursor * cur
Definition: ecpg.c:28
int errcode(int sqlerrcode)
Definition: elog.c:853
int errmsg(const char *fmt,...)
Definition: elog.c:1070
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:225
#define ereport(elevel,...)
Definition: elog.h:149
#define PG_RETURN_VOID()
Definition: fmgr.h:349
#define PG_FREE_IF_COPY(ptr, n)
Definition: fmgr.h:260
#define PG_GETARG_POINTER(n)
Definition: fmgr.h:276
#define PG_GETARG_DATUM(n)
Definition: fmgr.h:268
#define PG_GETARG_UINT16(n)
Definition: fmgr.h:272
#define PG_DETOAST_DATUM(datum)
Definition: fmgr.h:240
#define PG_RETURN_POINTER(x)
Definition: fmgr.h:361
#define PG_FUNCTION_ARGS
Definition: fmgr.h:193
#define PG_RETURN_BOOL(x)
Definition: fmgr.h:359
#define gistentryinit(e, k, r, pg, o, l)
Definition: gist.h:244
#define newval
char * BITVECP
Definition: hstore_gist.c:31
long val
Definition: informix.c:689
char sign
Definition: informix.c:693
int b
Definition: isn.c:70
int a
Definition: isn.c:69
int j
Definition: isn.c:74
int i
Definition: isn.c:73
#define LTG_HDRSIZE
Definition: ltree.h:277
#define FLG_CANLOOKSIGN(x)
Definition: ltree.h:107
#define LTREE_FIRST(x)
Definition: ltree.h:51
#define AHASHVAL(val, siglen)
Definition: ltree.h:305
#define LTREE_GET_ASIGLEN()
Definition: ltree.h:297
#define LTG_ALLTRUE
Definition: ltree.h:274
#define LEVEL_NEXT(x)
Definition: ltree.h:40
#define ASIGLENBIT(siglen)
Definition: ltree.h:300
#define LTREE_ASIGLEN_MAX
Definition: ltree.h:296
#define ALOOPBYTE(siglen)
Definition: ltree.h:302
ltree_gist * ltree_gist_alloc(bool isalltrue, BITVECP sign, int siglen, ltree *left, ltree *right)
Definition: ltree_gist.c:42
bool ltree_execute(ITEM *curitem, void *checkval, bool calcnot, bool(*chkcond)(void *checkval, ITEM *val))
Definition: ltxtquery_op.c:20
#define LQL_CANLOOKSIGN(x)
Definition: ltree.h:111
#define LQL_FIRST(x)
Definition: ltree.h:101
#define AHASH(sign, val, siglen)
Definition: ltree.h:306
#define LVAR_NEXT(x)
Definition: ltree.h:72
#define LQL_NEXT(x)
Definition: ltree.h:100
#define LQUERY_FIRST(x)
Definition: ltree.h:124
#define LTG_ISALLTRUE(x)
Definition: ltree.h:281
#define LTG_SIGN(x)
Definition: ltree.h:278
#define LTREE_ASIGLEN_DEFAULT
Definition: ltree.h:295
void * palloc(Size size)
Definition: mcxt.c:1317
#define OffsetNumberNext(offsetNumber)
Definition: off.h:52
uint16 OffsetNumber
Definition: off.h:24
#define FirstOffsetNumber
Definition: off.h:27
PGDLLIMPORT const uint8 pg_number_of_ones[256]
Definition: pg_bitutils.c:87
static uint64 pg_popcount(const char *buf, int bytes)
Definition: pg_bitutils.h:339
static int sig
Definition: pg_ctl.c:80
#define qsort(a, b, c, d)
Definition: port.h:447
static Datum PointerGetDatum(const void *X)
Definition: postgres.h:322
uintptr_t Datum
Definition: postgres.h:64
static Pointer DatumGetPointer(Datum X)
Definition: postgres.h:312
static unsigned hash(unsigned *uv, int n)
Definition: rege_dfa.c:715
void init_local_reloptions(local_relopts *relopts, Size relopt_struct_size)
Definition: reloptions.c:734
void add_local_int_reloption(local_relopts *relopts, const char *name, const char *desc, int default_val, int min_val, int max_val, int offset)
Definition: reloptions.c:918
static pg_noinline void Size size
Definition: slab.c:607
uint16 StrategyNumber
Definition: stratnum.h:22
OffsetNumber offset
Definition: gist.h:163
Datum key
Definition: gist.h:160
Page page
Definition: gist.h:162
Relation rel
Definition: gist.h:161
bool leafkey
Definition: gist.h:164
int spl_nleft
Definition: gist.h:143
OffsetNumber * spl_right
Definition: gist.h:147
Datum spl_ldatum
Definition: gist.h:144
Datum spl_rdatum
Definition: gist.h:149
int spl_nright
Definition: gist.h:148
OffsetNumber * spl_left
Definition: gist.h:142
int32 n
Definition: gist.h:235
Definition: _int.h:141
int32 cost
Definition: hstore_gist.c:354
OffsetNumber pos
Definition: hstore_gist.c:353
char * name
Definition: type.h:138
uint16 numvar
Definition: ltree.h:92
int32 val
Definition: ltree.h:60
Definition: ltree.h:114
uint16 numlevel
Definition: ltree.h:116
uint32 flag
Definition: ltree.h:269
char name[FLEXIBLE_ARRAY_MEMBER]
Definition: ltree.h:36
uint16 len
Definition: ltree.h:35
Definition: ltree.h:43
uint16 numlevel
Definition: ltree.h:45
#define SET_VARSIZE(PTR, len)
Definition: varatt.h:305
#define VARSIZE(PTR)
Definition: varatt.h:279