PostgreSQL Source Code git master
_ltree_gist.c
Go to the documentation of this file.
1/*
2 * contrib/ltree/_ltree_gist.c
3 *
4 *
5 * GiST support for ltree[]
6 * Teodor Sigaev <teodor@stack.net>
7 */
8#include "postgres.h"
9
10#include <math.h>
11
12#include "access/gist.h"
13#include "access/reloptions.h"
14#include "access/stratnum.h"
15#include "crc32.h"
16#include "ltree.h"
17#include "port/pg_bitutils.h"
18#include "utils/array.h"
19
27
28#define GETENTRY(vec,pos) ((ltree_gist *) DatumGetPointer((vec)->vector[(pos)].key))
29#define NEXTVAL(x) ( (ltree*)( (char*)(x) + INTALIGN( VARSIZE(x) ) ) )
30
31#define WISH_F(a,b,c) (double)( -(double)(((a)-(b))*((a)-(b))*((a)-(b)))*(c) )
32
33
34static void
35hashing(BITVECP sign, ltree *t, int siglen)
36{
37 int tlen = t->numlevel;
39 int hash;
40
41 while (tlen > 0)
42 {
43 hash = ltree_crc32_sz(cur->name, cur->len);
44 AHASH(sign, hash, siglen);
46 tlen--;
47 }
48}
49
52{
54 GISTENTRY *retval = entry;
55 int siglen = LTREE_GET_ASIGLEN();
56
57 if (entry->leafkey)
58 { /* ltree */
62 ltree *item = (ltree *) ARR_DATA_PTR(val);
63
64 if (ARR_NDIM(val) > 1)
66 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
67 errmsg("array must be one-dimensional")));
70 (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
71 errmsg("array must not contain nulls")));
72
73 key = ltree_gist_alloc(false, NULL, siglen, NULL, NULL);
74
75 while (num > 0)
76 {
77 hashing(LTG_SIGN(key), item, siglen);
78 num--;
79 item = NEXTVAL(item);
80 }
81
82 retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
84 entry->rel, entry->page,
85 entry->offset, false);
86 }
87 else if (!LTG_ISALLTRUE(entry->key))
88 {
89 int32 i;
92
93 ALOOPBYTE(siglen)
94 {
95 if ((sign[i] & 0xff) != 0xff)
96 PG_RETURN_POINTER(retval);
97 }
98
99 key = ltree_gist_alloc(true, sign, siglen, NULL, NULL);
100 retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
102 entry->rel, entry->page,
103 entry->offset, false);
104 }
105 PG_RETURN_POINTER(retval);
106}
107
108Datum
110{
113 bool *result = (bool *) PG_GETARG_POINTER(2);
114 int siglen = LTREE_GET_ASIGLEN();
115
117 *result = true;
118 else if (LTG_ISALLTRUE(a))
119 *result = false;
120 else if (LTG_ISALLTRUE(b))
121 *result = false;
122 else
123 {
124 int32 i;
125 BITVECP sa = LTG_SIGN(a),
126 sb = LTG_SIGN(b);
127
128 *result = true;
129 ALOOPBYTE(siglen)
130 {
131 if (sa[i] != sb[i])
132 {
133 *result = false;
134 break;
135 }
136 }
137 }
138 PG_RETURN_POINTER(result);
139}
140
141static int32
142unionkey(BITVECP sbase, ltree_gist *add, int siglen)
143{
144 int32 i;
145 BITVECP sadd = LTG_SIGN(add);
146
147 if (LTG_ISALLTRUE(add))
148 return 1;
149
150 ALOOPBYTE(siglen)
151 sbase[i] |= sadd[i];
152 return 0;
153}
154
155Datum
157{
159 int *size = (int *) PG_GETARG_POINTER(1);
160 int siglen = LTREE_GET_ASIGLEN();
161 int32 i;
162 ltree_gist *result = ltree_gist_alloc(false, NULL, siglen, NULL, NULL);
163 BITVECP base = LTG_SIGN(result);
164
165 for (i = 0; i < entryvec->n; i++)
166 {
167 if (unionkey(base, GETENTRY(entryvec, i), siglen))
168 {
169 result->flag |= LTG_ALLTRUE;
170 SET_VARSIZE(result, LTG_HDRSIZE);
171 break;
172 }
173 }
174
175 *size = VARSIZE(result);
176
177 PG_RETURN_POINTER(result);
178}
179
180static int32
182{
183 return pg_popcount((const char *) sign, siglen);
184}
185
186static int
188{
189 int i,
190 diff,
191 dist = 0;
192
193 ALOOPBYTE(siglen)
194 {
195 diff = (unsigned char) (a[i] ^ b[i]);
196 /* Using the popcount functions here isn't likely to win */
197 dist += pg_number_of_ones[diff];
198 }
199 return dist;
200}
201
202static int
204{
205 if (LTG_ISALLTRUE(a))
206 {
207 if (LTG_ISALLTRUE(b))
208 return 0;
209 else
210 return ASIGLENBIT(siglen) - sizebitvec(LTG_SIGN(b), siglen);
211 }
212 else if (LTG_ISALLTRUE(b))
213 return ASIGLENBIT(siglen) - sizebitvec(LTG_SIGN(a), siglen);
214
215 return hemdistsign(LTG_SIGN(a), LTG_SIGN(b), siglen);
216}
217
218
219Datum
221{
224 float *penalty = (float *) PG_GETARG_POINTER(2);
225 int siglen = LTREE_GET_ASIGLEN();
226
227 *penalty = hemdist(origval, newval, siglen);
228 PG_RETURN_POINTER(penalty);
229}
230
231typedef struct
232{
233 OffsetNumber pos;
234 int32 cost;
235} SPLITCOST;
236
237static int
238comparecost(const void *a, const void *b)
239{
240 return ((const SPLITCOST *) a)->cost - ((const SPLITCOST *) b)->cost;
241}
242
243Datum
245{
248 int siglen = LTREE_GET_ASIGLEN();
249 OffsetNumber k,
250 j;
251 ltree_gist *datum_l,
252 *datum_r;
253 BITVECP union_l,
254 union_r;
255 int32 size_alpha,
256 size_beta;
257 int32 size_waste,
258 waste = -1;
259 int32 nbytes;
260 OffsetNumber seed_1 = 0,
261 seed_2 = 0;
262 OffsetNumber *left,
263 *right;
264 OffsetNumber maxoff;
265 BITVECP ptr;
266 int i;
267 SPLITCOST *costvector;
268 ltree_gist *_k,
269 *_j;
270
271 maxoff = entryvec->n - 2;
272 nbytes = (maxoff + 2) * sizeof(OffsetNumber);
273 v->spl_left = (OffsetNumber *) palloc(nbytes);
274 v->spl_right = (OffsetNumber *) palloc(nbytes);
275
276 for (k = FirstOffsetNumber; k < maxoff; k = OffsetNumberNext(k))
277 {
278 _k = GETENTRY(entryvec, k);
279 for (j = OffsetNumberNext(k); j <= maxoff; j = OffsetNumberNext(j))
280 {
281 size_waste = hemdist(_k, GETENTRY(entryvec, j), siglen);
282 if (size_waste > waste)
283 {
284 waste = size_waste;
285 seed_1 = k;
286 seed_2 = j;
287 }
288 }
289 }
290
291 left = v->spl_left;
292 v->spl_nleft = 0;
293 right = v->spl_right;
294 v->spl_nright = 0;
295
296 if (seed_1 == 0 || seed_2 == 0)
297 {
298 seed_1 = 1;
299 seed_2 = 2;
300 }
301
302 /* form initial .. */
303 datum_l = ltree_gist_alloc(LTG_ISALLTRUE(GETENTRY(entryvec, seed_1)),
304 LTG_SIGN(GETENTRY(entryvec, seed_1)),
305 siglen, NULL, NULL);
306
307 datum_r = ltree_gist_alloc(LTG_ISALLTRUE(GETENTRY(entryvec, seed_2)),
308 LTG_SIGN(GETENTRY(entryvec, seed_2)),
309 siglen, NULL, NULL);
310
311 maxoff = OffsetNumberNext(maxoff);
312 /* sort before ... */
313 costvector = (SPLITCOST *) palloc(sizeof(SPLITCOST) * maxoff);
314 for (j = FirstOffsetNumber; j <= maxoff; j = OffsetNumberNext(j))
315 {
316 costvector[j - 1].pos = j;
317 _j = GETENTRY(entryvec, j);
318 size_alpha = hemdist(datum_l, _j, siglen);
319 size_beta = hemdist(datum_r, _j, siglen);
320 costvector[j - 1].cost = abs(size_alpha - size_beta);
321 }
322 qsort(costvector, maxoff, sizeof(SPLITCOST), comparecost);
323
324 union_l = LTG_SIGN(datum_l);
325 union_r = LTG_SIGN(datum_r);
326
327 for (k = 0; k < maxoff; k++)
328 {
329 j = costvector[k].pos;
330 if (j == seed_1)
331 {
332 *left++ = j;
333 v->spl_nleft++;
334 continue;
335 }
336 else if (j == seed_2)
337 {
338 *right++ = j;
339 v->spl_nright++;
340 continue;
341 }
342 _j = GETENTRY(entryvec, j);
343 size_alpha = hemdist(datum_l, _j, siglen);
344 size_beta = hemdist(datum_r, _j, siglen);
345
346 if (size_alpha < size_beta + WISH_F(v->spl_nleft, v->spl_nright, 0.00001))
347 {
348 if (LTG_ISALLTRUE(datum_l) || LTG_ISALLTRUE(_j))
349 {
350 if (!LTG_ISALLTRUE(datum_l))
351 memset(union_l, 0xff, siglen);
352 }
353 else
354 {
355 ptr = LTG_SIGN(_j);
356 ALOOPBYTE(siglen)
357 union_l[i] |= ptr[i];
358 }
359 *left++ = j;
360 v->spl_nleft++;
361 }
362 else
363 {
364 if (LTG_ISALLTRUE(datum_r) || LTG_ISALLTRUE(_j))
365 {
366 if (!LTG_ISALLTRUE(datum_r))
367 memset(union_r, 0xff, siglen);
368 }
369 else
370 {
371 ptr = LTG_SIGN(_j);
372 ALOOPBYTE(siglen)
373 union_r[i] |= ptr[i];
374 }
375 *right++ = j;
376 v->spl_nright++;
377 }
378 }
379
380 *right = *left = FirstOffsetNumber;
381
382 v->spl_ldatum = PointerGetDatum(datum_l);
383 v->spl_rdatum = PointerGetDatum(datum_r);
384
386}
387
388static bool
389gist_te(ltree_gist *key, ltree *query, int siglen)
390{
391 ltree_level *curq = LTREE_FIRST(query);
393 int qlen = query->numlevel;
394 unsigned int hv;
395
396 if (LTG_ISALLTRUE(key))
397 return true;
398
399 while (qlen > 0)
400 {
401 hv = ltree_crc32_sz(curq->name, curq->len);
402 if (!GETBIT(sign, AHASHVAL(hv, siglen)))
403 return false;
404 curq = LEVEL_NEXT(curq);
405 qlen--;
406 }
407
408 return true;
409}
410
411typedef struct LtreeSignature
412{
416
417static bool
419{
420 LtreeSignature *sig = cxt;
421
422 return (FLG_CANLOOKSIGN(val->flag)) ? GETBIT(sig->sign, AHASHVAL(val->val, sig->siglen)) : true;
423}
424
425static bool
426gist_qtxt(ltree_gist *key, ltxtquery *query, int siglen)
427{
429
430 if (LTG_ISALLTRUE(key))
431 return true;
432
433 sig.sign = LTG_SIGN(key);
434 sig.siglen = siglen;
435
436 return ltree_execute(GETQUERY(query),
437 &sig, false,
439}
440
441static bool
442gist_qe(ltree_gist *key, lquery *query, int siglen)
443{
444 lquery_level *curq = LQUERY_FIRST(query);
446 int qlen = query->numlevel;
447
448 if (LTG_ISALLTRUE(key))
449 return true;
450
451 while (qlen > 0)
452 {
453 if (curq->numvar && LQL_CANLOOKSIGN(curq))
454 {
455 bool isexist = false;
456 int vlen = curq->numvar;
457 lquery_variant *curv = LQL_FIRST(curq);
458
459 while (vlen > 0)
460 {
461 if (GETBIT(sign, AHASHVAL(curv->val, siglen)))
462 {
463 isexist = true;
464 break;
465 }
466 curv = LVAR_NEXT(curv);
467 vlen--;
468 }
469 if (!isexist)
470 return false;
471 }
472
473 curq = LQL_NEXT(curq);
474 qlen--;
475 }
476
477 return true;
478}
479
480static bool
481_arrq_cons(ltree_gist *key, ArrayType *_query, int siglen)
482{
483 lquery *query = (lquery *) ARR_DATA_PTR(_query);
484 int num = ArrayGetNItems(ARR_NDIM(_query), ARR_DIMS(_query));
485
486 if (ARR_NDIM(_query) > 1)
488 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
489 errmsg("array must be one-dimensional")));
490 if (array_contains_nulls(_query))
492 (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
493 errmsg("array must not contain nulls")));
494
495 while (num > 0)
496 {
497 if (gist_qe(key, query, siglen))
498 return true;
499 num--;
500 query = (lquery *) NEXTVAL(query);
501 }
502 return false;
503}
504
505Datum
507{
508 GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
509 void *query = PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
511
512 /* Oid subtype = PG_GETARG_OID(3); */
513 bool *recheck = (bool *) PG_GETARG_POINTER(4);
514 int siglen = LTREE_GET_ASIGLEN();
516 bool res = false;
517
518 /* All cases served by this function are inexact */
519 *recheck = true;
520
521 switch (strategy)
522 {
523 case 10:
524 case 11:
525 res = gist_te(key, (ltree *) query, siglen);
526 break;
527 case 12:
528 case 13:
529 res = gist_qe(key, (lquery *) query, siglen);
530 break;
531 case 14:
532 case 15:
533 res = gist_qtxt(key, (ltxtquery *) query, siglen);
534 break;
535 case 16:
536 case 17:
537 res = _arrq_cons(key, (ArrayType *) query, siglen);
538 break;
539 default:
540 /* internal error */
541 elog(ERROR, "unrecognized StrategyNumber: %d", strategy);
542 }
543 PG_FREE_IF_COPY(query, 1);
545}
546
547Datum
549{
551
552 init_local_reloptions(relopts, sizeof(LtreeGistOptions));
553 add_local_int_reloption(relopts, "siglen", "signature length",
555 offsetof(LtreeGistOptions, siglen));
556
558}
#define GETQUERY(x)
Definition: _int.h:157
#define NEXTVAL(x)
Definition: _ltree_gist.c:29
static bool gist_qe(ltree_gist *key, lquery *query, int siglen)
Definition: _ltree_gist.c:442
#define WISH_F(a, b, c)
Definition: _ltree_gist.c:31
static bool _arrq_cons(ltree_gist *key, ArrayType *_query, int siglen)
Definition: _ltree_gist.c:481
static bool gist_te(ltree_gist *key, ltree *query, int siglen)
Definition: _ltree_gist.c:389
static void hashing(BITVECP sign, ltree *t, int siglen)
Definition: _ltree_gist.c:35
static int32 sizebitvec(BITVECP sign, int siglen)
Definition: _ltree_gist.c:181
static int hemdistsign(BITVECP a, BITVECP b, int siglen)
Definition: _ltree_gist.c:187
Datum _ltree_union(PG_FUNCTION_ARGS)
Definition: _ltree_gist.c:156
#define GETENTRY(vec, pos)
Definition: _ltree_gist.c:28
PG_FUNCTION_INFO_V1(_ltree_compress)
Datum _ltree_penalty(PG_FUNCTION_ARGS)
Definition: _ltree_gist.c:220
static bool gist_qtxt(ltree_gist *key, ltxtquery *query, int siglen)
Definition: _ltree_gist.c:426
static int hemdist(ltree_gist *a, ltree_gist *b, int siglen)
Definition: _ltree_gist.c:203
static int32 unionkey(BITVECP sbase, ltree_gist *add, int siglen)
Definition: _ltree_gist.c:142
struct LtreeSignature LtreeSignature
Datum _ltree_gist_options(PG_FUNCTION_ARGS)
Definition: _ltree_gist.c:548
static bool checkcondition_bit(void *cxt, ITEM *val)
Definition: _ltree_gist.c:418
Datum _ltree_compress(PG_FUNCTION_ARGS)
Definition: _ltree_gist.c:51
Datum _ltree_consistent(PG_FUNCTION_ARGS)
Definition: _ltree_gist.c:506
Datum _ltree_same(PG_FUNCTION_ARGS)
Definition: _ltree_gist.c:109
Datum _ltree_picksplit(PG_FUNCTION_ARGS)
Definition: _ltree_gist.c:244
static int comparecost(const void *a, const void *b)
Definition: _ltree_gist.c:238
#define ARR_NDIM(a)
Definition: array.h:290
#define ARR_DATA_PTR(a)
Definition: array.h:322
#define DatumGetArrayTypeP(X)
Definition: array.h:261
#define ARR_DIMS(a)
Definition: array.h:294
bool array_contains_nulls(ArrayType *array)
Definition: arrayfuncs.c:3767
int ArrayGetNItems(int ndim, const int *dims)
Definition: arrayutils.c:57
#define GETBIT(x, i)
Definition: blutils.c:30
int32_t int32
Definition: c.h:484
unsigned int ltree_crc32_sz(const char *buf, int size)
Definition: crc32.c:24
struct cursor * cur
Definition: ecpg.c:29
int errcode(int sqlerrcode)
Definition: elog.c:853
int errmsg(const char *fmt,...)
Definition: elog.c:1070
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:225
#define ereport(elevel,...)
Definition: elog.h:149
#define PG_RETURN_VOID()
Definition: fmgr.h:349
#define PG_FREE_IF_COPY(ptr, n)
Definition: fmgr.h:260
#define PG_GETARG_POINTER(n)
Definition: fmgr.h:276
#define PG_GETARG_DATUM(n)
Definition: fmgr.h:268
#define PG_GETARG_UINT16(n)
Definition: fmgr.h:272
#define PG_DETOAST_DATUM(datum)
Definition: fmgr.h:240
#define PG_RETURN_POINTER(x)
Definition: fmgr.h:361
#define PG_FUNCTION_ARGS
Definition: fmgr.h:193
#define PG_RETURN_BOOL(x)
Definition: fmgr.h:359
#define gistentryinit(e, k, r, pg, o, l)
Definition: gist.h:245
#define newval
char * BITVECP
Definition: hstore_gist.c:31
long val
Definition: informix.c:689
char sign
Definition: informix.c:693
int b
Definition: isn.c:69
int a
Definition: isn.c:68
int j
Definition: isn.c:73
int i
Definition: isn.c:72
#define LTG_HDRSIZE
Definition: ltree.h:277
#define FLG_CANLOOKSIGN(x)
Definition: ltree.h:107
#define LTREE_FIRST(x)
Definition: ltree.h:51
#define AHASHVAL(val, siglen)
Definition: ltree.h:305
#define LTREE_GET_ASIGLEN()
Definition: ltree.h:297
#define LTG_ALLTRUE
Definition: ltree.h:274
#define LEVEL_NEXT(x)
Definition: ltree.h:40
#define ASIGLENBIT(siglen)
Definition: ltree.h:300
#define LTREE_ASIGLEN_MAX
Definition: ltree.h:296
#define ALOOPBYTE(siglen)
Definition: ltree.h:302
bool ltree_execute(ITEM *curitem, void *checkval, bool calcnot, bool(*chkcond)(void *checkval, ITEM *val))
Definition: ltxtquery_op.c:20
#define LQL_CANLOOKSIGN(x)
Definition: ltree.h:111
#define LQL_FIRST(x)
Definition: ltree.h:101
#define AHASH(sign, val, siglen)
Definition: ltree.h:306
#define LVAR_NEXT(x)
Definition: ltree.h:72
#define LQL_NEXT(x)
Definition: ltree.h:100
#define LQUERY_FIRST(x)
Definition: ltree.h:124
#define LTG_ISALLTRUE(x)
Definition: ltree.h:281
#define LTG_SIGN(x)
Definition: ltree.h:278
ltree_gist * ltree_gist_alloc(bool isalltrue, BITVECP sign, int siglen, ltree *left, ltree *right)
Definition: ltree_gist.c:42
#define LTREE_ASIGLEN_DEFAULT
Definition: ltree.h:295
void * palloc(Size size)
Definition: mcxt.c:1317
#define OffsetNumberNext(offsetNumber)
Definition: off.h:52
uint16 OffsetNumber
Definition: off.h:24
#define FirstOffsetNumber
Definition: off.h:27
PGDLLIMPORT const uint8 pg_number_of_ones[256]
Definition: pg_bitutils.c:87
static uint64 pg_popcount(const char *buf, int bytes)
Definition: pg_bitutils.h:337
static int sig
Definition: pg_ctl.c:80
#define qsort(a, b, c, d)
Definition: port.h:475
static Datum PointerGetDatum(const void *X)
Definition: postgres.h:327
uintptr_t Datum
Definition: postgres.h:69
static Pointer DatumGetPointer(Datum X)
Definition: postgres.h:317
static unsigned hash(unsigned *uv, int n)
Definition: rege_dfa.c:715
void init_local_reloptions(local_relopts *relopts, Size relopt_struct_size)
Definition: reloptions.c:753
void add_local_int_reloption(local_relopts *relopts, const char *name, const char *desc, int default_val, int min_val, int max_val, int offset)
Definition: reloptions.c:937
static pg_noinline void Size size
Definition: slab.c:607
uint16 StrategyNumber
Definition: stratnum.h:22
OffsetNumber offset
Definition: gist.h:164
Datum key
Definition: gist.h:161
Page page
Definition: gist.h:163
Relation rel
Definition: gist.h:162
bool leafkey
Definition: gist.h:165
int spl_nleft
Definition: gist.h:144
OffsetNumber * spl_right
Definition: gist.h:148
Datum spl_ldatum
Definition: gist.h:145
Datum spl_rdatum
Definition: gist.h:150
int spl_nright
Definition: gist.h:149
OffsetNumber * spl_left
Definition: gist.h:143
int32 n
Definition: gist.h:236
Definition: _int.h:141
int32 cost
Definition: hstore_gist.c:354
OffsetNumber pos
Definition: hstore_gist.c:353
char * name
Definition: type.h:139
uint16 numvar
Definition: ltree.h:92
int32 val
Definition: ltree.h:60
Definition: ltree.h:114
uint16 numlevel
Definition: ltree.h:116
uint32 flag
Definition: ltree.h:269
char name[FLEXIBLE_ARRAY_MEMBER]
Definition: ltree.h:36
uint16 len
Definition: ltree.h:35
Definition: ltree.h:43
uint16 numlevel
Definition: ltree.h:45
#define SET_VARSIZE(PTR, len)
Definition: varatt.h:305
#define VARSIZE(PTR)
Definition: varatt.h:279