PostgreSQL Source Code git master
trgm_gist.c
Go to the documentation of this file.
1/*
2 * contrib/pg_trgm/trgm_gist.c
3 */
4#include "postgres.h"
5
6#include "access/reloptions.h"
7#include "access/stratnum.h"
8#include "fmgr.h"
9#include "port/pg_bitutils.h"
10#include "trgm.h"
11#include "varatt.h"
12
13/* gist_trgm_ops opclass options */
14typedef struct
15{
16 int32 vl_len_; /* varlena header (do not touch directly!) */
17 int siglen; /* signature length in bytes */
19
20#define GET_SIGLEN() (PG_HAS_OPCLASS_OPTIONS() ? \
21 ((TrgmGistOptions *) PG_GET_OPCLASS_OPTIONS())->siglen : \
22 SIGLEN_DEFAULT)
23
24typedef struct
25{
26 /* most recent inputs to gtrgm_consistent */
29 /* extracted trigrams for query */
31 /* if a regex operator, the extracted graph */
33
34 /*
35 * The "query" and "trigrams" are stored in the same palloc block as this
36 * cache struct, at MAXALIGN'ed offsets. The graph however isn't.
37 */
39
40#define GETENTRY(vec,pos) ((TRGM *) DatumGetPointer((vec)->vector[(pos)].key))
41
42
54
55
58{
60 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
61 errmsg("cannot accept a value of type %s", "gtrgm")));
62
63 PG_RETURN_VOID(); /* keep compiler quiet */
64}
65
68{
70 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
71 errmsg("cannot display a value of type %s", "gtrgm")));
72
73 PG_RETURN_VOID(); /* keep compiler quiet */
74}
75
76static TRGM *
77gtrgm_alloc(bool isalltrue, int siglen, BITVECP sign)
78{
79 int flag = SIGNKEY | (isalltrue ? ALLISTRUE : 0);
80 int size = CALCGTSIZE(flag, siglen);
81 TRGM *res = palloc(size);
82
84 res->flag = flag;
85
86 if (!isalltrue)
87 {
88 if (sign)
89 memcpy(GETSIGN(res), sign, siglen);
90 else
91 memset(GETSIGN(res), 0, siglen);
92 }
93
94 return res;
95}
96
97static void
98makesign(BITVECP sign, TRGM *a, int siglen)
99{
100 int32 k,
101 len = ARRNELEM(a);
102 trgm *ptr = GETARR(a);
103 int32 tmp = 0;
104
105 MemSet(sign, 0, siglen);
106 SETBIT(sign, SIGLENBIT(siglen)); /* set last unused bit */
107 for (k = 0; k < len; k++)
108 {
109 CPTRGM(((char *) &tmp), ptr + k);
110 HASH(sign, tmp, siglen);
111 }
112}
113
114Datum
116{
117 GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
118 int siglen = GET_SIGLEN();
119 GISTENTRY *retval = entry;
120
121 if (entry->leafkey)
122 { /* trgm */
123 TRGM *res;
124 text *val = DatumGetTextPP(entry->key);
125
127 retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
129 entry->rel, entry->page,
130 entry->offset, false);
131 }
132 else if (ISSIGNKEY(DatumGetPointer(entry->key)) &&
133 !ISALLTRUE(DatumGetPointer(entry->key)))
134 {
135 int32 i;
136 TRGM *res;
138
139 LOOPBYTE(siglen)
140 {
141 if ((sign[i] & 0xff) != 0xff)
142 PG_RETURN_POINTER(retval);
143 }
144
145 res = gtrgm_alloc(true, siglen, sign);
146 retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
148 entry->rel, entry->page,
149 entry->offset, false);
150 }
151 PG_RETURN_POINTER(retval);
152}
153
154Datum
156{
157 GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
158 GISTENTRY *retval;
159 text *key;
160
161 key = DatumGetTextPP(entry->key);
162
163 if (key != (text *) DatumGetPointer(entry->key))
164 {
165 /* need to pass back the decompressed item */
166 retval = palloc(sizeof(GISTENTRY));
168 entry->rel, entry->page, entry->offset, entry->leafkey);
169 PG_RETURN_POINTER(retval);
170 }
171 else
172 {
173 /* we can return the entry as-is */
174 PG_RETURN_POINTER(entry);
175 }
176}
177
178static int32
180{
181 int32 count = 0;
182 int32 k,
183 len = ARRNELEM(qtrg);
184 trgm *ptr = GETARR(qtrg);
185 int32 tmp = 0;
186
187 for (k = 0; k < len; k++)
188 {
189 CPTRGM(((char *) &tmp), ptr + k);
190 count += GETBIT(sign, HASHVAL(tmp, siglen));
191 }
192
193 return count;
194}
195
196Datum
198{
199 GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
200 text *query = PG_GETARG_TEXT_P(1);
202
203 /* Oid subtype = PG_GETARG_OID(3); */
204 bool *recheck = (bool *) PG_GETARG_POINTER(4);
205 int siglen = GET_SIGLEN();
206 TRGM *key = (TRGM *) DatumGetPointer(entry->key);
207 TRGM *qtrg;
208 bool res;
209 Size querysize = VARSIZE(query);
211 double nlimit;
212
213 /*
214 * We keep the extracted trigrams in cache, because trigram extraction is
215 * relatively CPU-expensive. When trying to reuse a cached value, check
216 * strategy number not just query itself, because trigram extraction
217 * depends on strategy.
218 *
219 * The cached structure is a single palloc chunk containing the
220 * gtrgm_consistent_cache header, then the input query (4-byte length
221 * word, uncompressed, starting at a MAXALIGN boundary), then the TRGM
222 * value (also starting at a MAXALIGN boundary). However we don't try to
223 * include the regex graph (if any) in that struct. (XXX currently, this
224 * approach can leak regex graphs across index rescans. Not clear if
225 * that's worth fixing.)
226 */
227 cache = (gtrgm_consistent_cache *) fcinfo->flinfo->fn_extra;
228 if (cache == NULL ||
229 cache->strategy != strategy ||
230 VARSIZE(cache->query) != querysize ||
231 memcmp(cache->query, query, querysize) != 0)
232 {
233 gtrgm_consistent_cache *newcache;
234 TrgmPackedGraph *graph = NULL;
235 Size qtrgsize;
236
237 switch (strategy)
238 {
243 qtrg = generate_trgm(VARDATA(query),
244 querysize - VARHDRSZ);
245 break;
247#ifndef IGNORECASE
248 elog(ERROR, "cannot handle ~~* with case-sensitive trigrams");
249#endif
250 /* FALL THRU */
252 qtrg = generate_wildcard_trgm(VARDATA(query),
253 querysize - VARHDRSZ);
254 break;
256#ifndef IGNORECASE
257 elog(ERROR, "cannot handle ~* with case-sensitive trigrams");
258#endif
259 /* FALL THRU */
261 qtrg = createTrgmNFA(query, PG_GET_COLLATION(),
262 &graph, fcinfo->flinfo->fn_mcxt);
263 /* just in case an empty array is returned ... */
264 if (qtrg && ARRNELEM(qtrg) <= 0)
265 {
266 pfree(qtrg);
267 qtrg = NULL;
268 }
269 break;
270 default:
271 elog(ERROR, "unrecognized strategy number: %d", strategy);
272 qtrg = NULL; /* keep compiler quiet */
273 break;
274 }
275
276 qtrgsize = qtrg ? VARSIZE(qtrg) : 0;
277
278 newcache = (gtrgm_consistent_cache *)
279 MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
281 MAXALIGN(querysize) +
282 qtrgsize);
283
284 newcache->strategy = strategy;
285 newcache->query = (text *)
286 ((char *) newcache + MAXALIGN(sizeof(gtrgm_consistent_cache)));
287 memcpy(newcache->query, query, querysize);
288 if (qtrg)
289 {
290 newcache->trigrams = (TRGM *)
291 ((char *) newcache->query + MAXALIGN(querysize));
292 memcpy((char *) newcache->trigrams, qtrg, qtrgsize);
293 /* release qtrg in case it was made in fn_mcxt */
294 pfree(qtrg);
295 }
296 else
297 newcache->trigrams = NULL;
298 newcache->graph = graph;
299
300 if (cache)
301 pfree(cache);
302 fcinfo->flinfo->fn_extra = newcache;
303 cache = newcache;
304 }
305
306 qtrg = cache->trigrams;
307
308 switch (strategy)
309 {
313
314 /*
315 * Similarity search is exact. (Strict) word similarity search is
316 * inexact
317 */
318 *recheck = (strategy != SimilarityStrategyNumber);
319
320 nlimit = index_strategy_get_limit(strategy);
321
322 if (GIST_LEAF(entry))
323 { /* all leafs contains orig trgm */
324 double tmpsml = cnt_sml(qtrg, key, *recheck);
325
326 res = (tmpsml >= nlimit);
327 }
328 else if (ISALLTRUE(key))
329 { /* non-leaf contains signature */
330 res = true;
331 }
332 else
333 { /* non-leaf contains signature */
334 int32 count = cnt_sml_sign_common(qtrg, GETSIGN(key), siglen);
335 int32 len = ARRNELEM(qtrg);
336
337 if (len == 0)
338 res = false;
339 else
340 res = (((((float8) count) / ((float8) len))) >= nlimit);
341 }
342 break;
344#ifndef IGNORECASE
345 elog(ERROR, "cannot handle ~~* with case-sensitive trigrams");
346#endif
347 /* FALL THRU */
350 /* Wildcard and equal search are inexact */
351 *recheck = true;
352
353 /*
354 * Check if all the extracted trigrams can be present in child
355 * nodes.
356 */
357 if (GIST_LEAF(entry))
358 { /* all leafs contains orig trgm */
359 res = trgm_contained_by(qtrg, key);
360 }
361 else if (ISALLTRUE(key))
362 { /* non-leaf contains signature */
363 res = true;
364 }
365 else
366 { /* non-leaf contains signature */
367 int32 k,
368 tmp = 0,
369 len = ARRNELEM(qtrg);
370 trgm *ptr = GETARR(qtrg);
372
373 res = true;
374 for (k = 0; k < len; k++)
375 {
376 CPTRGM(((char *) &tmp), ptr + k);
377 if (!GETBIT(sign, HASHVAL(tmp, siglen)))
378 {
379 res = false;
380 break;
381 }
382 }
383 }
384 break;
386#ifndef IGNORECASE
387 elog(ERROR, "cannot handle ~* with case-sensitive trigrams");
388#endif
389 /* FALL THRU */
391 /* Regexp search is inexact */
392 *recheck = true;
393
394 /* Check regex match as much as we can with available info */
395 if (qtrg)
396 {
397 if (GIST_LEAF(entry))
398 { /* all leafs contains orig trgm */
399 bool *check;
400
401 check = trgm_presence_map(qtrg, key);
402 res = trigramsMatchGraph(cache->graph, check);
403 pfree(check);
404 }
405 else if (ISALLTRUE(key))
406 { /* non-leaf contains signature */
407 res = true;
408 }
409 else
410 { /* non-leaf contains signature */
411 int32 k,
412 tmp = 0,
413 len = ARRNELEM(qtrg);
414 trgm *ptr = GETARR(qtrg);
416 bool *check;
417
418 /*
419 * GETBIT() tests may give false positives, due to limited
420 * size of the sign array. But since trigramsMatchGraph()
421 * implements a monotone boolean function, false positives
422 * in the check array can't lead to false negative answer.
423 * So we can apply trigramsMatchGraph despite uncertainty,
424 * and that usefully improves the quality of the search.
425 */
426 check = (bool *) palloc(len * sizeof(bool));
427 for (k = 0; k < len; k++)
428 {
429 CPTRGM(((char *) &tmp), ptr + k);
430 check[k] = GETBIT(sign, HASHVAL(tmp, siglen));
431 }
432 res = trigramsMatchGraph(cache->graph, check);
433 pfree(check);
434 }
435 }
436 else
437 {
438 /* trigram-free query must be rechecked everywhere */
439 res = true;
440 }
441 break;
442 default:
443 elog(ERROR, "unrecognized strategy number: %d", strategy);
444 res = false; /* keep compiler quiet */
445 break;
446 }
447
449}
450
451Datum
453{
454 GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
455 text *query = PG_GETARG_TEXT_P(1);
457
458 /* Oid subtype = PG_GETARG_OID(3); */
459 bool *recheck = (bool *) PG_GETARG_POINTER(4);
460 int siglen = GET_SIGLEN();
461 TRGM *key = (TRGM *) DatumGetPointer(entry->key);
462 TRGM *qtrg;
463 float8 res;
464 Size querysize = VARSIZE(query);
465 char *cache = (char *) fcinfo->flinfo->fn_extra;
466
467 /*
468 * Cache the generated trigrams across multiple calls with the same query.
469 */
470 if (cache == NULL ||
471 VARSIZE(cache) != querysize ||
472 memcmp(cache, query, querysize) != 0)
473 {
474 char *newcache;
475
476 qtrg = generate_trgm(VARDATA(query), querysize - VARHDRSZ);
477
478 newcache = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
479 MAXALIGN(querysize) +
480 VARSIZE(qtrg));
481
482 memcpy(newcache, query, querysize);
483 memcpy(newcache + MAXALIGN(querysize), qtrg, VARSIZE(qtrg));
484
485 if (cache)
486 pfree(cache);
487 fcinfo->flinfo->fn_extra = newcache;
488 cache = newcache;
489 }
490
491 qtrg = (TRGM *) (cache + MAXALIGN(querysize));
492
493 switch (strategy)
494 {
498 /* Only plain trigram distance is exact */
499 *recheck = (strategy != DistanceStrategyNumber);
500 if (GIST_LEAF(entry))
501 { /* all leafs contains orig trgm */
502
503 /*
504 * Prevent gcc optimizing the sml variable using volatile
505 * keyword. Otherwise res can differ from the
506 * word_similarity_dist_op() function.
507 */
508 float4 volatile sml = cnt_sml(qtrg, key, *recheck);
509
510 res = 1.0 - sml;
511 }
512 else if (ISALLTRUE(key))
513 { /* all leafs contains orig trgm */
514 res = 0.0;
515 }
516 else
517 { /* non-leaf contains signature */
518 int32 count = cnt_sml_sign_common(qtrg, GETSIGN(key), siglen);
519 int32 len = ARRNELEM(qtrg);
520
521 res = (len == 0) ? -1.0 : 1.0 - ((float8) count) / ((float8) len);
522 }
523 break;
524 default:
525 elog(ERROR, "unrecognized strategy number: %d", strategy);
526 res = 0; /* keep compiler quiet */
527 break;
528 }
529
531}
532
533static int32
534unionkey(BITVECP sbase, TRGM *add, int siglen)
535{
536 int32 i;
537
538 if (ISSIGNKEY(add))
539 {
540 BITVECP sadd = GETSIGN(add);
541
542 if (ISALLTRUE(add))
543 return 1;
544
545 LOOPBYTE(siglen)
546 sbase[i] |= sadd[i];
547 }
548 else
549 {
550 trgm *ptr = GETARR(add);
551 int32 tmp = 0;
552
553 for (i = 0; i < ARRNELEM(add); i++)
554 {
555 CPTRGM(((char *) &tmp), ptr + i);
556 HASH(sbase, tmp, siglen);
557 }
558 }
559 return 0;
560}
561
562
563Datum
565{
567 int32 len = entryvec->n;
568 int *size = (int *) PG_GETARG_POINTER(1);
569 int siglen = GET_SIGLEN();
570 int32 i;
571 TRGM *result = gtrgm_alloc(false, siglen, NULL);
572 BITVECP base = GETSIGN(result);
573
574 for (i = 0; i < len; i++)
575 {
576 if (unionkey(base, GETENTRY(entryvec, i), siglen))
577 {
578 result->flag = ALLISTRUE;
579 SET_VARSIZE(result, CALCGTSIZE(ALLISTRUE, siglen));
580 break;
581 }
582 }
583
584 *size = VARSIZE(result);
585
586 PG_RETURN_POINTER(result);
587}
588
589Datum
591{
592 TRGM *a = (TRGM *) PG_GETARG_POINTER(0);
593 TRGM *b = (TRGM *) PG_GETARG_POINTER(1);
594 bool *result = (bool *) PG_GETARG_POINTER(2);
595 int siglen = GET_SIGLEN();
596
597 if (ISSIGNKEY(a))
598 { /* then b also ISSIGNKEY */
599 if (ISALLTRUE(a) && ISALLTRUE(b))
600 *result = true;
601 else if (ISALLTRUE(a))
602 *result = false;
603 else if (ISALLTRUE(b))
604 *result = false;
605 else
606 {
607 int32 i;
608 BITVECP sa = GETSIGN(a),
609 sb = GETSIGN(b);
610
611 *result = true;
612 LOOPBYTE(siglen)
613 {
614 if (sa[i] != sb[i])
615 {
616 *result = false;
617 break;
618 }
619 }
620 }
621 }
622 else
623 { /* a and b ISARRKEY */
624 int32 lena = ARRNELEM(a),
625 lenb = ARRNELEM(b);
626
627 if (lena != lenb)
628 *result = false;
629 else
630 {
631 trgm *ptra = GETARR(a),
632 *ptrb = GETARR(b);
633 int32 i;
634
635 *result = true;
636 for (i = 0; i < lena; i++)
637 if (CMPTRGM(ptra + i, ptrb + i))
638 {
639 *result = false;
640 break;
641 }
642 }
643 }
644
645 PG_RETURN_POINTER(result);
646}
647
648static int32
650{
651 return pg_popcount(sign, siglen);
652}
653
654static int
656{
657 int i,
658 diff,
659 dist = 0;
660
661 LOOPBYTE(siglen)
662 {
663 diff = (unsigned char) (a[i] ^ b[i]);
664 /* Using the popcount functions here isn't likely to win */
665 dist += pg_number_of_ones[diff];
666 }
667 return dist;
668}
669
670static int
671hemdist(TRGM *a, TRGM *b, int siglen)
672{
673 if (ISALLTRUE(a))
674 {
675 if (ISALLTRUE(b))
676 return 0;
677 else
678 return SIGLENBIT(siglen) - sizebitvec(GETSIGN(b), siglen);
679 }
680 else if (ISALLTRUE(b))
681 return SIGLENBIT(siglen) - sizebitvec(GETSIGN(a), siglen);
682
683 return hemdistsign(GETSIGN(a), GETSIGN(b), siglen);
684}
685
686Datum
688{
689 GISTENTRY *origentry = (GISTENTRY *) PG_GETARG_POINTER(0); /* always ISSIGNKEY */
690 GISTENTRY *newentry = (GISTENTRY *) PG_GETARG_POINTER(1);
691 float *penalty = (float *) PG_GETARG_POINTER(2);
692 int siglen = GET_SIGLEN();
693 TRGM *origval = (TRGM *) DatumGetPointer(origentry->key);
694 TRGM *newval = (TRGM *) DatumGetPointer(newentry->key);
695 BITVECP orig = GETSIGN(origval);
696
697 *penalty = 0.0;
698
699 if (ISARRKEY(newval))
700 {
701 char *cache = (char *) fcinfo->flinfo->fn_extra;
702 TRGM *cachedVal = (TRGM *) (cache + MAXALIGN(siglen));
703 Size newvalsize = VARSIZE(newval);
705
706 /*
707 * Cache the sign data across multiple calls with the same newval.
708 */
709 if (cache == NULL ||
710 VARSIZE(cachedVal) != newvalsize ||
711 memcmp(cachedVal, newval, newvalsize) != 0)
712 {
713 char *newcache;
714
715 newcache = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
716 MAXALIGN(siglen) +
717 newvalsize);
718
719 makesign((BITVECP) newcache, newval, siglen);
720
721 cachedVal = (TRGM *) (newcache + MAXALIGN(siglen));
722 memcpy(cachedVal, newval, newvalsize);
723
724 if (cache)
725 pfree(cache);
726 fcinfo->flinfo->fn_extra = newcache;
727 cache = newcache;
728 }
729
730 sign = (BITVECP) cache;
731
732 if (ISALLTRUE(origval))
733 *penalty = ((float) (SIGLENBIT(siglen) - sizebitvec(sign, siglen))) / (float) (SIGLENBIT(siglen) + 1);
734 else
735 *penalty = hemdistsign(sign, orig, siglen);
736 }
737 else
738 *penalty = hemdist(origval, newval, siglen);
739 PG_RETURN_POINTER(penalty);
740}
741
742typedef struct
743{
746} CACHESIGN;
747
748static void
749fillcache(CACHESIGN *item, TRGM *key, BITVECP sign, int siglen)
750{
751 item->allistrue = false;
752 item->sign = sign;
753 if (ISARRKEY(key))
754 makesign(item->sign, key, siglen);
755 else if (ISALLTRUE(key))
756 item->allistrue = true;
757 else
758 memcpy(item->sign, GETSIGN(key), siglen);
759}
760
761#define WISH_F(a,b,c) (double)( -(double)(((a)-(b))*((a)-(b))*((a)-(b)))*(c) )
762typedef struct
763{
764 OffsetNumber pos;
765 int32 cost;
766} SPLITCOST;
767
768static int
769comparecost(const void *a, const void *b)
770{
771 if (((const SPLITCOST *) a)->cost == ((const SPLITCOST *) b)->cost)
772 return 0;
773 else
774 return (((const SPLITCOST *) a)->cost > ((const SPLITCOST *) b)->cost) ? 1 : -1;
775}
776
777
778static int
780{
781 if (a->allistrue)
782 {
783 if (b->allistrue)
784 return 0;
785 else
786 return SIGLENBIT(siglen) - sizebitvec(b->sign, siglen);
787 }
788 else if (b->allistrue)
789 return SIGLENBIT(siglen) - sizebitvec(a->sign, siglen);
790
791 return hemdistsign(a->sign, b->sign, siglen);
792}
793
794Datum
796{
798 OffsetNumber maxoff = entryvec->n - 1;
800 int siglen = GET_SIGLEN();
801 OffsetNumber k,
802 j;
803 TRGM *datum_l,
804 *datum_r;
805 BITVECP union_l,
806 union_r;
807 int32 size_alpha,
808 size_beta;
809 int32 size_waste,
810 waste = -1;
811 int32 nbytes;
812 OffsetNumber seed_1 = 0,
813 seed_2 = 0;
814 OffsetNumber *left,
815 *right;
816 BITVECP ptr;
817 int i;
818 CACHESIGN *cache;
819 char *cache_sign;
820 SPLITCOST *costvector;
821
822 /* cache the sign data for each existing item */
823 cache = (CACHESIGN *) palloc(sizeof(CACHESIGN) * (maxoff + 1));
824 cache_sign = palloc(siglen * (maxoff + 1));
825
826 for (k = FirstOffsetNumber; k <= maxoff; k = OffsetNumberNext(k))
827 fillcache(&cache[k], GETENTRY(entryvec, k), &cache_sign[siglen * k],
828 siglen);
829
830 /* now find the two furthest-apart items */
831 for (k = FirstOffsetNumber; k < maxoff; k = OffsetNumberNext(k))
832 {
833 for (j = OffsetNumberNext(k); j <= maxoff; j = OffsetNumberNext(j))
834 {
835 size_waste = hemdistcache(&(cache[j]), &(cache[k]), siglen);
836 if (size_waste > waste)
837 {
838 waste = size_waste;
839 seed_1 = k;
840 seed_2 = j;
841 }
842 }
843 }
844
845 /* just in case we didn't make a selection ... */
846 if (seed_1 == 0 || seed_2 == 0)
847 {
848 seed_1 = 1;
849 seed_2 = 2;
850 }
851
852 /* initialize the result vectors */
853 nbytes = maxoff * sizeof(OffsetNumber);
854 v->spl_left = left = (OffsetNumber *) palloc(nbytes);
855 v->spl_right = right = (OffsetNumber *) palloc(nbytes);
856 v->spl_nleft = 0;
857 v->spl_nright = 0;
858
859 /* form initial .. */
860 datum_l = gtrgm_alloc(cache[seed_1].allistrue, siglen, cache[seed_1].sign);
861 datum_r = gtrgm_alloc(cache[seed_2].allistrue, siglen, cache[seed_2].sign);
862
863 union_l = GETSIGN(datum_l);
864 union_r = GETSIGN(datum_r);
865
866 /* sort before ... */
867 costvector = (SPLITCOST *) palloc(sizeof(SPLITCOST) * maxoff);
868 for (j = FirstOffsetNumber; j <= maxoff; j = OffsetNumberNext(j))
869 {
870 costvector[j - 1].pos = j;
871 size_alpha = hemdistcache(&(cache[seed_1]), &(cache[j]), siglen);
872 size_beta = hemdistcache(&(cache[seed_2]), &(cache[j]), siglen);
873 costvector[j - 1].cost = abs(size_alpha - size_beta);
874 }
875 qsort(costvector, maxoff, sizeof(SPLITCOST), comparecost);
876
877 for (k = 0; k < maxoff; k++)
878 {
879 j = costvector[k].pos;
880 if (j == seed_1)
881 {
882 *left++ = j;
883 v->spl_nleft++;
884 continue;
885 }
886 else if (j == seed_2)
887 {
888 *right++ = j;
889 v->spl_nright++;
890 continue;
891 }
892
893 if (ISALLTRUE(datum_l) || cache[j].allistrue)
894 {
895 if (ISALLTRUE(datum_l) && cache[j].allistrue)
896 size_alpha = 0;
897 else
898 size_alpha = SIGLENBIT(siglen) -
899 sizebitvec((cache[j].allistrue) ? GETSIGN(datum_l) :
900 GETSIGN(cache[j].sign),
901 siglen);
902 }
903 else
904 size_alpha = hemdistsign(cache[j].sign, GETSIGN(datum_l), siglen);
905
906 if (ISALLTRUE(datum_r) || cache[j].allistrue)
907 {
908 if (ISALLTRUE(datum_r) && cache[j].allistrue)
909 size_beta = 0;
910 else
911 size_beta = SIGLENBIT(siglen) -
912 sizebitvec((cache[j].allistrue) ? GETSIGN(datum_r) :
913 GETSIGN(cache[j].sign),
914 siglen);
915 }
916 else
917 size_beta = hemdistsign(cache[j].sign, GETSIGN(datum_r), siglen);
918
919 if (size_alpha < size_beta + WISH_F(v->spl_nleft, v->spl_nright, 0.1))
920 {
921 if (ISALLTRUE(datum_l) || cache[j].allistrue)
922 {
923 if (!ISALLTRUE(datum_l))
924 memset(GETSIGN(datum_l), 0xff, siglen);
925 }
926 else
927 {
928 ptr = cache[j].sign;
929 LOOPBYTE(siglen)
930 union_l[i] |= ptr[i];
931 }
932 *left++ = j;
933 v->spl_nleft++;
934 }
935 else
936 {
937 if (ISALLTRUE(datum_r) || cache[j].allistrue)
938 {
939 if (!ISALLTRUE(datum_r))
940 memset(GETSIGN(datum_r), 0xff, siglen);
941 }
942 else
943 {
944 ptr = cache[j].sign;
945 LOOPBYTE(siglen)
946 union_r[i] |= ptr[i];
947 }
948 *right++ = j;
949 v->spl_nright++;
950 }
951 }
952
953 v->spl_ldatum = PointerGetDatum(datum_l);
954 v->spl_rdatum = PointerGetDatum(datum_r);
955
957}
958
959Datum
961{
963
964 init_local_reloptions(relopts, sizeof(TrgmGistOptions));
965 add_local_int_reloption(relopts, "siglen",
966 "signature length in bytes",
968 offsetof(TrgmGistOptions, siglen));
969
971}
#define GETBIT(x, i)
Definition: blutils.c:30
#define SETBIT(x, i)
Definition: blutils.c:29
#define MAXALIGN(LEN)
Definition: c.h:768
#define VARHDRSZ
Definition: c.h:649
double float8
Definition: c.h:587
int32_t int32
Definition: c.h:484
float float4
Definition: c.h:586
#define MemSet(start, val, len)
Definition: c.h:977
size_t Size
Definition: c.h:562
int errcode(int sqlerrcode)
Definition: elog.c:853
int errmsg(const char *fmt,...)
Definition: elog.c:1070
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:225
#define ereport(elevel,...)
Definition: elog.h:149
#define PG_RETURN_VOID()
Definition: fmgr.h:349
#define PG_RETURN_FLOAT8(x)
Definition: fmgr.h:367
#define DatumGetTextPP(X)
Definition: fmgr.h:292
#define PG_GETARG_POINTER(n)
Definition: fmgr.h:276
#define PG_GETARG_UINT16(n)
Definition: fmgr.h:272
#define PG_RETURN_POINTER(x)
Definition: fmgr.h:361
#define PG_GET_COLLATION()
Definition: fmgr.h:198
#define PG_FUNCTION_ARGS
Definition: fmgr.h:193
#define PG_RETURN_BOOL(x)
Definition: fmgr.h:359
#define PG_GETARG_TEXT_P(n)
Definition: fmgr.h:336
#define GIST_LEAF(entry)
Definition: gist.h:171
#define gistentryinit(e, k, r, pg, o, l)
Definition: gist.h:245
#define newval
#define HASHVAL(val, siglen)
Definition: hstore_gist.c:45
#define LOOPBYTE(siglen)
Definition: hstore_gist.c:33
#define ALLISTRUE
Definition: hstore_gist.c:55
#define CALCGTSIZE(flag, siglen)
Definition: hstore_gist.c:60
#define ISALLTRUE(x)
Definition: hstore_gist.c:57
#define SIGLEN_MAX
Definition: hstore_gist.c:24
#define SIGLEN_DEFAULT
Definition: hstore_gist.c:23
char * BITVECP
Definition: hstore_gist.c:31
#define SIGLENBIT(siglen)
Definition: hstore_gist.c:25
#define GETSIGN(x)
Definition: hstore_gist.c:62
#define HASH(sign, val, siglen)
Definition: hstore_gist.c:46
long val
Definition: informix.c:689
char sign
Definition: informix.c:693
int b
Definition: isn.c:69
int a
Definition: isn.c:68
int j
Definition: isn.c:73
int i
Definition: isn.c:72
if(TABLE==NULL||TABLE_index==NULL)
Definition: isn.c:76
void * MemoryContextAlloc(MemoryContext context, Size size)
Definition: mcxt.c:1181
void pfree(void *pointer)
Definition: mcxt.c:1521
void * palloc(Size size)
Definition: mcxt.c:1317
#define OffsetNumberNext(offsetNumber)
Definition: off.h:52
uint16 OffsetNumber
Definition: off.h:24
#define FirstOffsetNumber
Definition: off.h:27
PGDLLIMPORT const uint8 pg_number_of_ones[256]
Definition: pg_bitutils.c:87
static uint64 pg_popcount(const char *buf, int bytes)
Definition: pg_bitutils.h:337
const void size_t len
#define qsort(a, b, c, d)
Definition: port.h:475
static Datum PointerGetDatum(const void *X)
Definition: postgres.h:327
uintptr_t Datum
Definition: postgres.h:69
static Pointer DatumGetPointer(Datum X)
Definition: postgres.h:317
void init_local_reloptions(local_relopts *relopts, Size relopt_struct_size)
Definition: reloptions.c:753
void add_local_int_reloption(local_relopts *relopts, const char *name, const char *desc, int default_val, int min_val, int max_val, int offset)
Definition: reloptions.c:937
static pg_noinline void Size size
Definition: slab.c:607
uint16 StrategyNumber
Definition: stratnum.h:22
BITVECP sign
Definition: trgm_gist.c:745
bool allistrue
Definition: trgm_gist.c:744
OffsetNumber offset
Definition: gist.h:164
Datum key
Definition: gist.h:161
Page page
Definition: gist.h:163
Relation rel
Definition: gist.h:162
bool leafkey
Definition: gist.h:165
int spl_nleft
Definition: gist.h:144
OffsetNumber * spl_right
Definition: gist.h:148
Datum spl_ldatum
Definition: gist.h:145
Datum spl_rdatum
Definition: gist.h:150
int spl_nright
Definition: gist.h:149
OffsetNumber * spl_left
Definition: gist.h:143
int32 n
Definition: gist.h:236
int32 cost
Definition: hstore_gist.c:354
OffsetNumber pos
Definition: hstore_gist.c:353
Definition: trgm.h:61
uint8 flag
Definition: trgm.h:63
StrategyNumber strategy
Definition: trgm_gist.c:27
TrgmPackedGraph * graph
Definition: trgm_gist.c:32
Definition: c.h:644
char * flag(int b)
Definition: test-ctype.c:33
#define RegExpICaseStrategyNumber
Definition: trgm.h:34
#define WordSimilarityStrategyNumber
Definition: trgm.h:35
TRGM * generate_trgm(char *str, int slen)
Definition: trgm_op.c:359
#define DistanceStrategyNumber
Definition: trgm.h:30
#define StrictWordSimilarityStrategyNumber
Definition: trgm.h:37
#define StrictWordDistanceStrategyNumber
Definition: trgm.h:38
#define ISARRKEY(x)
Definition: trgm.h:94
bool * trgm_presence_map(TRGM *query, TRGM *key)
Definition: trgm_op.c:1081
#define ARRNELEM(x)
Definition: trgm.h:101
double index_strategy_get_limit(StrategyNumber strategy)
Definition: trgm_op.c:135
TRGM * createTrgmNFA(text *text_re, Oid collation, TrgmPackedGraph **graph, MemoryContext rcontext)
Definition: trgm_regexp.c:524
#define SimilarityStrategyNumber
Definition: trgm.h:29
bool trigramsMatchGraph(TrgmPackedGraph *graph, bool *check)
Definition: trgm_regexp.c:628
#define CMPTRGM(a, b)
Definition: trgm.h:45
bool trgm_contained_by(TRGM *trg1, TRGM *trg2)
Definition: trgm_op.c:1042
#define SIGNKEY
Definition: trgm.h:91
#define ISSIGNKEY(x)
Definition: trgm.h:95
char trgm[3]
Definition: trgm.h:41
#define CPTRGM(a, b)
Definition: trgm.h:47
#define ILikeStrategyNumber
Definition: trgm.h:32
TRGM * generate_wildcard_trgm(const char *str, int slen)
Definition: trgm_op.c:869
float4 cnt_sml(TRGM *trg1, TRGM *trg2, bool inexact)
Definition: trgm_op.c:994
#define LikeStrategyNumber
Definition: trgm.h:31
#define GETARR(x)
Definition: trgm.h:100
#define EqualStrategyNumber
Definition: trgm.h:39
#define RegExpStrategyNumber
Definition: trgm.h:33
#define WordDistanceStrategyNumber
Definition: trgm.h:36
Datum gtrgm_out(PG_FUNCTION_ARGS)
Definition: trgm_gist.c:67
static int hemdistcache(CACHESIGN *a, CACHESIGN *b, int siglen)
Definition: trgm_gist.c:779
static void fillcache(CACHESIGN *item, TRGM *key, BITVECP sign, int siglen)
Definition: trgm_gist.c:749
Datum gtrgm_same(PG_FUNCTION_ARGS)
Definition: trgm_gist.c:590
#define WISH_F(a, b, c)
Definition: trgm_gist.c:761
static int32 unionkey(BITVECP sbase, TRGM *add, int siglen)
Definition: trgm_gist.c:534
PG_FUNCTION_INFO_V1(gtrgm_in)
Datum gtrgm_distance(PG_FUNCTION_ARGS)
Definition: trgm_gist.c:452
static int32 sizebitvec(BITVECP sign, int siglen)
Definition: trgm_gist.c:649
Datum gtrgm_in(PG_FUNCTION_ARGS)
Definition: trgm_gist.c:57
Datum gtrgm_options(PG_FUNCTION_ARGS)
Definition: trgm_gist.c:960
static int hemdistsign(BITVECP a, BITVECP b, int siglen)
Definition: trgm_gist.c:655
#define GETENTRY(vec, pos)
Definition: trgm_gist.c:40
Datum gtrgm_compress(PG_FUNCTION_ARGS)
Definition: trgm_gist.c:115
Datum gtrgm_picksplit(PG_FUNCTION_ARGS)
Definition: trgm_gist.c:795
Datum gtrgm_decompress(PG_FUNCTION_ARGS)
Definition: trgm_gist.c:155
static TRGM * gtrgm_alloc(bool isalltrue, int siglen, BITVECP sign)
Definition: trgm_gist.c:77
Datum gtrgm_union(PG_FUNCTION_ARGS)
Definition: trgm_gist.c:564
Datum gtrgm_penalty(PG_FUNCTION_ARGS)
Definition: trgm_gist.c:687
#define GET_SIGLEN()
Definition: trgm_gist.c:20
static void makesign(BITVECP sign, TRGM *a, int siglen)
Definition: trgm_gist.c:98
Datum gtrgm_consistent(PG_FUNCTION_ARGS)
Definition: trgm_gist.c:197
static int32 cnt_sml_sign_common(TRGM *qtrg, BITVECP sign, int siglen)
Definition: trgm_gist.c:179
static int hemdist(TRGM *a, TRGM *b, int siglen)
Definition: trgm_gist.c:671
static int comparecost(const void *a, const void *b)
Definition: trgm_gist.c:769
#define VARDATA(PTR)
Definition: varatt.h:278
#define VARDATA_ANY(PTR)
Definition: varatt.h:324
#define SET_VARSIZE(PTR, len)
Definition: varatt.h:305
#define VARSIZE(PTR)
Definition: varatt.h:279
#define VARSIZE_ANY_EXHDR(PTR)
Definition: varatt.h:317