PostgreSQL Source Code  git master
trgm_gist.c
Go to the documentation of this file.
1 /*
2  * contrib/pg_trgm/trgm_gist.c
3  */
4 #include "postgres.h"
5 
6 #include "access/stratnum.h"
7 #include "fmgr.h"
8 #include "port/pg_bitutils.h"
9 #include "trgm.h"
10 
11 typedef struct
12 {
13  /* most recent inputs to gtrgm_consistent */
16  /* extracted trigrams for query */
18  /* if a regex operator, the extracted graph */
20 
21  /*
22  * The "query" and "trigrams" are stored in the same palloc block as this
23  * cache struct, at MAXALIGN'ed offsets. The graph however isn't.
24  */
26 
27 #define GETENTRY(vec,pos) ((TRGM *) DatumGetPointer((vec)->vector[(pos)].key))
28 
29 
40 
41 
42 Datum
44 {
45  elog(ERROR, "not implemented");
46  PG_RETURN_DATUM(0);
47 }
48 
49 Datum
51 {
52  elog(ERROR, "not implemented");
53  PG_RETURN_DATUM(0);
54 }
55 
56 static void
58 {
59  int32 k,
60  len = ARRNELEM(a);
61  trgm *ptr = GETARR(a);
62  int32 tmp = 0;
63 
64  MemSet((void *) sign, 0, sizeof(BITVEC));
65  SETBIT(sign, SIGLENBIT); /* set last unused bit */
66  for (k = 0; k < len; k++)
67  {
68  CPTRGM(((char *) &tmp), ptr + k);
69  HASH(sign, tmp);
70  }
71 }
72 
73 Datum
75 {
76  GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
77  GISTENTRY *retval = entry;
78 
79  if (entry->leafkey)
80  { /* trgm */
81  TRGM *res;
82  text *val = DatumGetTextPP(entry->key);
83 
85  retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
86  gistentryinit(*retval, PointerGetDatum(res),
87  entry->rel, entry->page,
88  entry->offset, false);
89  }
90  else if (ISSIGNKEY(DatumGetPointer(entry->key)) &&
91  !ISALLTRUE(DatumGetPointer(entry->key)))
92  {
93  int32 i,
94  len;
95  TRGM *res;
97 
98  LOOPBYTE
99  {
100  if ((sign[i] & 0xff) != 0xff)
101  PG_RETURN_POINTER(retval);
102  }
103 
104  len = CALCGTSIZE(SIGNKEY | ALLISTRUE, 0);
105  res = (TRGM *) palloc(len);
106  SET_VARSIZE(res, len);
107  res->flag = SIGNKEY | ALLISTRUE;
108 
109  retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
110  gistentryinit(*retval, PointerGetDatum(res),
111  entry->rel, entry->page,
112  entry->offset, false);
113  }
114  PG_RETURN_POINTER(retval);
115 }
116 
117 Datum
119 {
120  GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
121  GISTENTRY *retval;
122  text *key;
123 
124  key = DatumGetTextPP(entry->key);
125 
126  if (key != (text *) DatumGetPointer(entry->key))
127  {
128  /* need to pass back the decompressed item */
129  retval = palloc(sizeof(GISTENTRY));
130  gistentryinit(*retval, PointerGetDatum(key),
131  entry->rel, entry->page, entry->offset, entry->leafkey);
132  PG_RETURN_POINTER(retval);
133  }
134  else
135  {
136  /* we can return the entry as-is */
137  PG_RETURN_POINTER(entry);
138  }
139 }
140 
141 static int32
143 {
144  int32 count = 0;
145  int32 k,
146  len = ARRNELEM(qtrg);
147  trgm *ptr = GETARR(qtrg);
148  int32 tmp = 0;
149 
150  for (k = 0; k < len; k++)
151  {
152  CPTRGM(((char *) &tmp), ptr + k);
153  count += GETBIT(sign, HASHVAL(tmp));
154  }
155 
156  return count;
157 }
158 
159 Datum
161 {
162  GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
163  text *query = PG_GETARG_TEXT_P(1);
165 
166  /* Oid subtype = PG_GETARG_OID(3); */
167  bool *recheck = (bool *) PG_GETARG_POINTER(4);
168  TRGM *key = (TRGM *) DatumGetPointer(entry->key);
169  TRGM *qtrg;
170  bool res;
171  Size querysize = VARSIZE(query);
172  gtrgm_consistent_cache *cache;
173  double nlimit;
174 
175  /*
176  * We keep the extracted trigrams in cache, because trigram extraction is
177  * relatively CPU-expensive. When trying to reuse a cached value, check
178  * strategy number not just query itself, because trigram extraction
179  * depends on strategy.
180  *
181  * The cached structure is a single palloc chunk containing the
182  * gtrgm_consistent_cache header, then the input query (4-byte length
183  * word, uncompressed, starting at a MAXALIGN boundary), then the TRGM
184  * value (also starting at a MAXALIGN boundary). However we don't try to
185  * include the regex graph (if any) in that struct. (XXX currently, this
186  * approach can leak regex graphs across index rescans. Not clear if
187  * that's worth fixing.)
188  */
189  cache = (gtrgm_consistent_cache *) fcinfo->flinfo->fn_extra;
190  if (cache == NULL ||
191  cache->strategy != strategy ||
192  VARSIZE(cache->query) != querysize ||
193  memcmp((char *) cache->query, (char *) query, querysize) != 0)
194  {
195  gtrgm_consistent_cache *newcache;
196  TrgmPackedGraph *graph = NULL;
197  Size qtrgsize;
198 
199  switch (strategy)
200  {
204  qtrg = generate_trgm(VARDATA(query),
205  querysize - VARHDRSZ);
206  break;
207  case ILikeStrategyNumber:
208 #ifndef IGNORECASE
209  elog(ERROR, "cannot handle ~~* with case-sensitive trigrams");
210 #endif
211  /* FALL THRU */
212  case LikeStrategyNumber:
213  qtrg = generate_wildcard_trgm(VARDATA(query),
214  querysize - VARHDRSZ);
215  break;
217 #ifndef IGNORECASE
218  elog(ERROR, "cannot handle ~* with case-sensitive trigrams");
219 #endif
220  /* FALL THRU */
222  qtrg = createTrgmNFA(query, PG_GET_COLLATION(),
223  &graph, fcinfo->flinfo->fn_mcxt);
224  /* just in case an empty array is returned ... */
225  if (qtrg && ARRNELEM(qtrg) <= 0)
226  {
227  pfree(qtrg);
228  qtrg = NULL;
229  }
230  break;
231  default:
232  elog(ERROR, "unrecognized strategy number: %d", strategy);
233  qtrg = NULL; /* keep compiler quiet */
234  break;
235  }
236 
237  qtrgsize = qtrg ? VARSIZE(qtrg) : 0;
238 
239  newcache = (gtrgm_consistent_cache *)
240  MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
242  MAXALIGN(querysize) +
243  qtrgsize);
244 
245  newcache->strategy = strategy;
246  newcache->query = (text *)
247  ((char *) newcache + MAXALIGN(sizeof(gtrgm_consistent_cache)));
248  memcpy((char *) newcache->query, (char *) query, querysize);
249  if (qtrg)
250  {
251  newcache->trigrams = (TRGM *)
252  ((char *) newcache->query + MAXALIGN(querysize));
253  memcpy((char *) newcache->trigrams, (char *) qtrg, qtrgsize);
254  /* release qtrg in case it was made in fn_mcxt */
255  pfree(qtrg);
256  }
257  else
258  newcache->trigrams = NULL;
259  newcache->graph = graph;
260 
261  if (cache)
262  pfree(cache);
263  fcinfo->flinfo->fn_extra = (void *) newcache;
264  cache = newcache;
265  }
266 
267  qtrg = cache->trigrams;
268 
269  switch (strategy)
270  {
274 
275  /*
276  * Similarity search is exact. (Strict) word similarity search is
277  * inexact
278  */
279  *recheck = (strategy != SimilarityStrategyNumber);
280 
281  nlimit = index_strategy_get_limit(strategy);
282 
283  if (GIST_LEAF(entry))
284  { /* all leafs contains orig trgm */
285  double tmpsml = cnt_sml(qtrg, key, *recheck);
286 
287  res = (tmpsml >= nlimit);
288  }
289  else if (ISALLTRUE(key))
290  { /* non-leaf contains signature */
291  res = true;
292  }
293  else
294  { /* non-leaf contains signature */
295  int32 count = cnt_sml_sign_common(qtrg, GETSIGN(key));
296  int32 len = ARRNELEM(qtrg);
297 
298  if (len == 0)
299  res = false;
300  else
301  res = (((((float8) count) / ((float8) len))) >= nlimit);
302  }
303  break;
304  case ILikeStrategyNumber:
305 #ifndef IGNORECASE
306  elog(ERROR, "cannot handle ~~* with case-sensitive trigrams");
307 #endif
308  /* FALL THRU */
309  case LikeStrategyNumber:
310  /* Wildcard search is inexact */
311  *recheck = true;
312 
313  /*
314  * Check if all the extracted trigrams can be present in child
315  * nodes.
316  */
317  if (GIST_LEAF(entry))
318  { /* all leafs contains orig trgm */
319  res = trgm_contained_by(qtrg, key);
320  }
321  else if (ISALLTRUE(key))
322  { /* non-leaf contains signature */
323  res = true;
324  }
325  else
326  { /* non-leaf contains signature */
327  int32 k,
328  tmp = 0,
329  len = ARRNELEM(qtrg);
330  trgm *ptr = GETARR(qtrg);
331  BITVECP sign = GETSIGN(key);
332 
333  res = true;
334  for (k = 0; k < len; k++)
335  {
336  CPTRGM(((char *) &tmp), ptr + k);
337  if (!GETBIT(sign, HASHVAL(tmp)))
338  {
339  res = false;
340  break;
341  }
342  }
343  }
344  break;
346 #ifndef IGNORECASE
347  elog(ERROR, "cannot handle ~* with case-sensitive trigrams");
348 #endif
349  /* FALL THRU */
351  /* Regexp search is inexact */
352  *recheck = true;
353 
354  /* Check regex match as much as we can with available info */
355  if (qtrg)
356  {
357  if (GIST_LEAF(entry))
358  { /* all leafs contains orig trgm */
359  bool *check;
360 
361  check = trgm_presence_map(qtrg, key);
362  res = trigramsMatchGraph(cache->graph, check);
363  pfree(check);
364  }
365  else if (ISALLTRUE(key))
366  { /* non-leaf contains signature */
367  res = true;
368  }
369  else
370  { /* non-leaf contains signature */
371  int32 k,
372  tmp = 0,
373  len = ARRNELEM(qtrg);
374  trgm *ptr = GETARR(qtrg);
375  BITVECP sign = GETSIGN(key);
376  bool *check;
377 
378  /*
379  * GETBIT() tests may give false positives, due to limited
380  * size of the sign array. But since trigramsMatchGraph()
381  * implements a monotone boolean function, false positives
382  * in the check array can't lead to false negative answer.
383  * So we can apply trigramsMatchGraph despite uncertainty,
384  * and that usefully improves the quality of the search.
385  */
386  check = (bool *) palloc(len * sizeof(bool));
387  for (k = 0; k < len; k++)
388  {
389  CPTRGM(((char *) &tmp), ptr + k);
390  check[k] = GETBIT(sign, HASHVAL(tmp));
391  }
392  res = trigramsMatchGraph(cache->graph, check);
393  pfree(check);
394  }
395  }
396  else
397  {
398  /* trigram-free query must be rechecked everywhere */
399  res = true;
400  }
401  break;
402  default:
403  elog(ERROR, "unrecognized strategy number: %d", strategy);
404  res = false; /* keep compiler quiet */
405  break;
406  }
407 
408  PG_RETURN_BOOL(res);
409 }
410 
411 Datum
413 {
414  GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
415  text *query = PG_GETARG_TEXT_P(1);
417 
418  /* Oid subtype = PG_GETARG_OID(3); */
419  bool *recheck = (bool *) PG_GETARG_POINTER(4);
420  TRGM *key = (TRGM *) DatumGetPointer(entry->key);
421  TRGM *qtrg;
422  float8 res;
423  Size querysize = VARSIZE(query);
424  char *cache = (char *) fcinfo->flinfo->fn_extra;
425 
426  /*
427  * Cache the generated trigrams across multiple calls with the same query.
428  */
429  if (cache == NULL ||
430  VARSIZE(cache) != querysize ||
431  memcmp(cache, query, querysize) != 0)
432  {
433  char *newcache;
434 
435  qtrg = generate_trgm(VARDATA(query), querysize - VARHDRSZ);
436 
437  newcache = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
438  MAXALIGN(querysize) +
439  VARSIZE(qtrg));
440 
441  memcpy(newcache, query, querysize);
442  memcpy(newcache + MAXALIGN(querysize), qtrg, VARSIZE(qtrg));
443 
444  if (cache)
445  pfree(cache);
446  fcinfo->flinfo->fn_extra = newcache;
447  cache = newcache;
448  }
449 
450  qtrg = (TRGM *) (cache + MAXALIGN(querysize));
451 
452  switch (strategy)
453  {
457  /* Only plain trigram distance is exact */
458  *recheck = (strategy != DistanceStrategyNumber);
459  if (GIST_LEAF(entry))
460  { /* all leafs contains orig trgm */
461 
462  /*
463  * Prevent gcc optimizing the sml variable using volatile
464  * keyword. Otherwise res can differ from the
465  * word_similarity_dist_op() function.
466  */
467  float4 volatile sml = cnt_sml(qtrg, key, *recheck);
468 
469  res = 1.0 - sml;
470  }
471  else if (ISALLTRUE(key))
472  { /* all leafs contains orig trgm */
473  res = 0.0;
474  }
475  else
476  { /* non-leaf contains signature */
477  int32 count = cnt_sml_sign_common(qtrg, GETSIGN(key));
478  int32 len = ARRNELEM(qtrg);
479 
480  res = (len == 0) ? -1.0 : 1.0 - ((float8) count) / ((float8) len);
481  }
482  break;
483  default:
484  elog(ERROR, "unrecognized strategy number: %d", strategy);
485  res = 0; /* keep compiler quiet */
486  break;
487  }
488 
489  PG_RETURN_FLOAT8(res);
490 }
491 
492 static int32
493 unionkey(BITVECP sbase, TRGM *add)
494 {
495  int32 i;
496 
497  if (ISSIGNKEY(add))
498  {
499  BITVECP sadd = GETSIGN(add);
500 
501  if (ISALLTRUE(add))
502  return 1;
503 
504  LOOPBYTE
505  sbase[i] |= sadd[i];
506  }
507  else
508  {
509  trgm *ptr = GETARR(add);
510  int32 tmp = 0;
511 
512  for (i = 0; i < ARRNELEM(add); i++)
513  {
514  CPTRGM(((char *) &tmp), ptr + i);
515  HASH(sbase, tmp);
516  }
517  }
518  return 0;
519 }
520 
521 
522 Datum
524 {
526  int32 len = entryvec->n;
527  int *size = (int *) PG_GETARG_POINTER(1);
528  BITVEC base;
529  int32 i;
530  int32 flag = 0;
531  TRGM *result;
532 
533  MemSet((void *) base, 0, sizeof(BITVEC));
534  for (i = 0; i < len; i++)
535  {
536  if (unionkey(base, GETENTRY(entryvec, i)))
537  {
538  flag = ALLISTRUE;
539  break;
540  }
541  }
542 
543  flag |= SIGNKEY;
544  len = CALCGTSIZE(flag, 0);
545  result = (TRGM *) palloc(len);
546  SET_VARSIZE(result, len);
547  result->flag = flag;
548  if (!ISALLTRUE(result))
549  memcpy((void *) GETSIGN(result), (void *) base, sizeof(BITVEC));
550  *size = len;
551 
552  PG_RETURN_POINTER(result);
553 }
554 
555 Datum
557 {
558  TRGM *a = (TRGM *) PG_GETARG_POINTER(0);
559  TRGM *b = (TRGM *) PG_GETARG_POINTER(1);
560  bool *result = (bool *) PG_GETARG_POINTER(2);
561 
562  if (ISSIGNKEY(a))
563  { /* then b also ISSIGNKEY */
564  if (ISALLTRUE(a) && ISALLTRUE(b))
565  *result = true;
566  else if (ISALLTRUE(a))
567  *result = false;
568  else if (ISALLTRUE(b))
569  *result = false;
570  else
571  {
572  int32 i;
573  BITVECP sa = GETSIGN(a),
574  sb = GETSIGN(b);
575 
576  *result = true;
577  LOOPBYTE
578  {
579  if (sa[i] != sb[i])
580  {
581  *result = false;
582  break;
583  }
584  }
585  }
586  }
587  else
588  { /* a and b ISARRKEY */
589  int32 lena = ARRNELEM(a),
590  lenb = ARRNELEM(b);
591 
592  if (lena != lenb)
593  *result = false;
594  else
595  {
596  trgm *ptra = GETARR(a),
597  *ptrb = GETARR(b);
598  int32 i;
599 
600  *result = true;
601  for (i = 0; i < lena; i++)
602  if (CMPTRGM(ptra + i, ptrb + i))
603  {
604  *result = false;
605  break;
606  }
607  }
608  }
609 
610  PG_RETURN_POINTER(result);
611 }
612 
613 static int32
615 {
616  return pg_popcount(sign, SIGLEN);
617 }
618 
619 static int
621 {
622  int i,
623  diff,
624  dist = 0;
625 
626  LOOPBYTE
627  {
628  diff = (unsigned char) (a[i] ^ b[i]);
629  /* Using the popcount functions here isn't likely to win */
630  dist += pg_number_of_ones[diff];
631  }
632  return dist;
633 }
634 
635 static int
637 {
638  if (ISALLTRUE(a))
639  {
640  if (ISALLTRUE(b))
641  return 0;
642  else
643  return SIGLENBIT - sizebitvec(GETSIGN(b));
644  }
645  else if (ISALLTRUE(b))
646  return SIGLENBIT - sizebitvec(GETSIGN(a));
647 
648  return hemdistsign(GETSIGN(a), GETSIGN(b));
649 }
650 
651 Datum
653 {
654  GISTENTRY *origentry = (GISTENTRY *) PG_GETARG_POINTER(0); /* always ISSIGNKEY */
655  GISTENTRY *newentry = (GISTENTRY *) PG_GETARG_POINTER(1);
656  float *penalty = (float *) PG_GETARG_POINTER(2);
657  TRGM *origval = (TRGM *) DatumGetPointer(origentry->key);
658  TRGM *newval = (TRGM *) DatumGetPointer(newentry->key);
659  BITVECP orig = GETSIGN(origval);
660 
661  *penalty = 0.0;
662 
663  if (ISARRKEY(newval))
664  {
665  char *cache = (char *) fcinfo->flinfo->fn_extra;
666  TRGM *cachedVal = (TRGM *) (cache + MAXALIGN(sizeof(BITVEC)));
667  Size newvalsize = VARSIZE(newval);
668  BITVECP sign;
669 
670  /*
671  * Cache the sign data across multiple calls with the same newval.
672  */
673  if (cache == NULL ||
674  VARSIZE(cachedVal) != newvalsize ||
675  memcmp(cachedVal, newval, newvalsize) != 0)
676  {
677  char *newcache;
678 
679  newcache = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
680  MAXALIGN(sizeof(BITVEC)) +
681  newvalsize);
682 
683  makesign((BITVECP) newcache, newval);
684 
685  cachedVal = (TRGM *) (newcache + MAXALIGN(sizeof(BITVEC)));
686  memcpy(cachedVal, newval, newvalsize);
687 
688  if (cache)
689  pfree(cache);
690  fcinfo->flinfo->fn_extra = newcache;
691  cache = newcache;
692  }
693 
694  sign = (BITVECP) cache;
695 
696  if (ISALLTRUE(origval))
697  *penalty = ((float) (SIGLENBIT - sizebitvec(sign))) / (float) (SIGLENBIT + 1);
698  else
699  *penalty = hemdistsign(sign, orig);
700  }
701  else
702  *penalty = hemdist(origval, newval);
703  PG_RETURN_POINTER(penalty);
704 }
705 
706 typedef struct
707 {
708  bool allistrue;
710 } CACHESIGN;
711 
712 static void
714 {
715  item->allistrue = false;
716  if (ISARRKEY(key))
717  makesign(item->sign, key);
718  else if (ISALLTRUE(key))
719  item->allistrue = true;
720  else
721  memcpy((void *) item->sign, (void *) GETSIGN(key), sizeof(BITVEC));
722 }
723 
724 #define WISH_F(a,b,c) (double)( -(double)(((a)-(b))*((a)-(b))*((a)-(b)))*(c) )
725 typedef struct
726 {
727  OffsetNumber pos;
728  int32 cost;
729 } SPLITCOST;
730 
731 static int
732 comparecost(const void *a, const void *b)
733 {
734  if (((const SPLITCOST *) a)->cost == ((const SPLITCOST *) b)->cost)
735  return 0;
736  else
737  return (((const SPLITCOST *) a)->cost > ((const SPLITCOST *) b)->cost) ? 1 : -1;
738 }
739 
740 
741 static int
743 {
744  if (a->allistrue)
745  {
746  if (b->allistrue)
747  return 0;
748  else
749  return SIGLENBIT - sizebitvec(b->sign);
750  }
751  else if (b->allistrue)
752  return SIGLENBIT - sizebitvec(a->sign);
753 
754  return hemdistsign(a->sign, b->sign);
755 }
756 
757 Datum
759 {
761  OffsetNumber maxoff = entryvec->n - 2;
763  OffsetNumber k,
764  j;
765  TRGM *datum_l,
766  *datum_r;
767  BITVECP union_l,
768  union_r;
769  int32 size_alpha,
770  size_beta;
771  int32 size_waste,
772  waste = -1;
773  int32 nbytes;
774  OffsetNumber seed_1 = 0,
775  seed_2 = 0;
776  OffsetNumber *left,
777  *right;
778  BITVECP ptr;
779  int i;
780  CACHESIGN *cache;
781  SPLITCOST *costvector;
782 
783  /* cache the sign data for each existing item */
784  cache = (CACHESIGN *) palloc(sizeof(CACHESIGN) * (maxoff + 2));
785  for (k = FirstOffsetNumber; k <= maxoff; k = OffsetNumberNext(k))
786  fillcache(&cache[k], GETENTRY(entryvec, k));
787 
788  /* now find the two furthest-apart items */
789  for (k = FirstOffsetNumber; k < maxoff; k = OffsetNumberNext(k))
790  {
791  for (j = OffsetNumberNext(k); j <= maxoff; j = OffsetNumberNext(j))
792  {
793  size_waste = hemdistcache(&(cache[j]), &(cache[k]));
794  if (size_waste > waste)
795  {
796  waste = size_waste;
797  seed_1 = k;
798  seed_2 = j;
799  }
800  }
801  }
802 
803  /* just in case we didn't make a selection ... */
804  if (seed_1 == 0 || seed_2 == 0)
805  {
806  seed_1 = 1;
807  seed_2 = 2;
808  }
809 
810  /* initialize the result vectors */
811  nbytes = (maxoff + 2) * sizeof(OffsetNumber);
812  v->spl_left = left = (OffsetNumber *) palloc(nbytes);
813  v->spl_right = right = (OffsetNumber *) palloc(nbytes);
814  v->spl_nleft = 0;
815  v->spl_nright = 0;
816 
817  /* form initial .. */
818  if (cache[seed_1].allistrue)
819  {
820  datum_l = (TRGM *) palloc(CALCGTSIZE(SIGNKEY | ALLISTRUE, 0));
821  SET_VARSIZE(datum_l, CALCGTSIZE(SIGNKEY | ALLISTRUE, 0));
822  datum_l->flag = SIGNKEY | ALLISTRUE;
823  }
824  else
825  {
826  datum_l = (TRGM *) palloc(CALCGTSIZE(SIGNKEY, 0));
827  SET_VARSIZE(datum_l, CALCGTSIZE(SIGNKEY, 0));
828  datum_l->flag = SIGNKEY;
829  memcpy((void *) GETSIGN(datum_l), (void *) cache[seed_1].sign, sizeof(BITVEC));
830  }
831  if (cache[seed_2].allistrue)
832  {
833  datum_r = (TRGM *) palloc(CALCGTSIZE(SIGNKEY | ALLISTRUE, 0));
834  SET_VARSIZE(datum_r, CALCGTSIZE(SIGNKEY | ALLISTRUE, 0));
835  datum_r->flag = SIGNKEY | ALLISTRUE;
836  }
837  else
838  {
839  datum_r = (TRGM *) palloc(CALCGTSIZE(SIGNKEY, 0));
840  SET_VARSIZE(datum_r, CALCGTSIZE(SIGNKEY, 0));
841  datum_r->flag = SIGNKEY;
842  memcpy((void *) GETSIGN(datum_r), (void *) cache[seed_2].sign, sizeof(BITVEC));
843  }
844 
845  union_l = GETSIGN(datum_l);
846  union_r = GETSIGN(datum_r);
847  maxoff = OffsetNumberNext(maxoff);
848  fillcache(&cache[maxoff], GETENTRY(entryvec, maxoff));
849  /* sort before ... */
850  costvector = (SPLITCOST *) palloc(sizeof(SPLITCOST) * maxoff);
851  for (j = FirstOffsetNumber; j <= maxoff; j = OffsetNumberNext(j))
852  {
853  costvector[j - 1].pos = j;
854  size_alpha = hemdistcache(&(cache[seed_1]), &(cache[j]));
855  size_beta = hemdistcache(&(cache[seed_2]), &(cache[j]));
856  costvector[j - 1].cost = abs(size_alpha - size_beta);
857  }
858  qsort((void *) costvector, maxoff, sizeof(SPLITCOST), comparecost);
859 
860  for (k = 0; k < maxoff; k++)
861  {
862  j = costvector[k].pos;
863  if (j == seed_1)
864  {
865  *left++ = j;
866  v->spl_nleft++;
867  continue;
868  }
869  else if (j == seed_2)
870  {
871  *right++ = j;
872  v->spl_nright++;
873  continue;
874  }
875 
876  if (ISALLTRUE(datum_l) || cache[j].allistrue)
877  {
878  if (ISALLTRUE(datum_l) && cache[j].allistrue)
879  size_alpha = 0;
880  else
881  size_alpha = SIGLENBIT - sizebitvec(
882  (cache[j].allistrue) ? GETSIGN(datum_l) : GETSIGN(cache[j].sign)
883  );
884  }
885  else
886  size_alpha = hemdistsign(cache[j].sign, GETSIGN(datum_l));
887 
888  if (ISALLTRUE(datum_r) || cache[j].allistrue)
889  {
890  if (ISALLTRUE(datum_r) && cache[j].allistrue)
891  size_beta = 0;
892  else
893  size_beta = SIGLENBIT - sizebitvec(
894  (cache[j].allistrue) ? GETSIGN(datum_r) : GETSIGN(cache[j].sign)
895  );
896  }
897  else
898  size_beta = hemdistsign(cache[j].sign, GETSIGN(datum_r));
899 
900  if (size_alpha < size_beta + WISH_F(v->spl_nleft, v->spl_nright, 0.1))
901  {
902  if (ISALLTRUE(datum_l) || cache[j].allistrue)
903  {
904  if (!ISALLTRUE(datum_l))
905  MemSet((void *) GETSIGN(datum_l), 0xff, sizeof(BITVEC));
906  }
907  else
908  {
909  ptr = cache[j].sign;
910  LOOPBYTE
911  union_l[i] |= ptr[i];
912  }
913  *left++ = j;
914  v->spl_nleft++;
915  }
916  else
917  {
918  if (ISALLTRUE(datum_r) || cache[j].allistrue)
919  {
920  if (!ISALLTRUE(datum_r))
921  MemSet((void *) GETSIGN(datum_r), 0xff, sizeof(BITVEC));
922  }
923  else
924  {
925  ptr = cache[j].sign;
926  LOOPBYTE
927  union_r[i] |= ptr[i];
928  }
929  *right++ = j;
930  v->spl_nright++;
931  }
932  }
933 
934  *right = *left = FirstOffsetNumber;
935  v->spl_ldatum = PointerGetDatum(datum_l);
936  v->spl_rdatum = PointerGetDatum(datum_r);
937 
939 }
#define GIST_LEAF(entry)
Definition: gist.h:141
Relation rel
Definition: gist.h:132
#define PG_RETURN_POINTER(x)
Definition: fmgr.h:351
OffsetNumber pos
Definition: hstore_gist.c:319
#define SETBIT(x, i)
Definition: blutils.c:31
bool trigramsMatchGraph(TrgmPackedGraph *graph, bool *check)
Definition: trgm_regexp.c:636
#define StrictWordDistanceStrategyNumber
Definition: trgm.h:39
#define CMPTRGM(a, b)
Definition: trgm.h:45
#define VARDATA_ANY(PTR)
Definition: postgres.h:348
#define VARDATA(PTR)
Definition: postgres.h:302
static int hemdist(TRGM *a, TRGM *b)
Definition: trgm_gist.c:636
#define RegExpStrategyNumber
Definition: trgm.h:34
static int32 cnt_sml_sign_common(TRGM *qtrg, BITVECP sign)
Definition: trgm_gist.c:142
TRGM * generate_wildcard_trgm(const char *str, int slen)
Definition: trgm_op.c:866
#define SimilarityStrategyNumber
Definition: trgm.h:30
#define VARSIZE(PTR)
Definition: postgres.h:303
#define ISSIGNKEY(x)
Definition: trgm.h:102
#define GETSIGN(x)
Definition: hstore_gist.c:50
#define PointerGetDatum(X)
Definition: postgres.h:556
#define DistanceStrategyNumber
Definition: trgm.h:31
#define VARHDRSZ
Definition: c.h:556
PG_FUNCTION_INFO_V1(gtrgm_in)
TRGM * createTrgmNFA(text *text_re, Oid collation, TrgmPackedGraph **graph, MemoryContext rcontext)
Definition: trgm_regexp.c:521
#define StrictWordSimilarityStrategyNumber
Definition: trgm.h:38
#define RegExpICaseStrategyNumber
Definition: trgm.h:35
#define SIGLENBIT
Definition: hstore_gist.c:16
#define DatumGetTextPP(X)
Definition: fmgr.h:286
#define WordDistanceStrategyNumber
Definition: trgm.h:37
#define PG_RETURN_FLOAT8(x)
Definition: fmgr.h:356
static int hemdistcache(CACHESIGN *a, CACHESIGN *b)
Definition: trgm_gist.c:742
OffsetNumber * spl_left
Definition: gist.h:113
Datum spl_rdatum
Definition: gist.h:120
Datum gtrgm_union(PG_FUNCTION_ARGS)
Definition: trgm_gist.c:523
int32 n
Definition: gist.h:206
static int comparecost(const void *a, const void *b)
Definition: trgm_gist.c:732
uint16 StrategyNumber
Definition: stratnum.h:22
float4 cnt_sml(TRGM *trg1, TRGM *trg2, bool inexact)
Definition: trgm_op.c:998
#define ALLISTRUE
Definition: hstore_gist.c:43
#define MemSet(start, val, len)
Definition: c.h:956
static void fillcache(CACHESIGN *item, TRGM *key)
Definition: trgm_gist.c:713
#define PG_GETARG_POINTER(n)
Definition: fmgr.h:271
#define HASH(sign, val)
Definition: hstore_gist.c:34
#define ARRNELEM(x)
Definition: trgm.h:108
bool * trgm_presence_map(TRGM *query, TRGM *key)
Definition: trgm_op.c:1085
int spl_nleft
Definition: gist.h:114
Datum gtrgm_same(PG_FUNCTION_ARGS)
Definition: trgm_gist.c:556
#define PG_GET_COLLATION()
Definition: fmgr.h:193
signed int int32
Definition: c.h:347
uint64 pg_popcount(const char *buf, int bytes)
Definition: pg_bitutils.c:282
int32 cost
Definition: hstore_gist.c:320
#define WISH_F(a, b, c)
Definition: trgm_gist.c:724
uint16 OffsetNumber
Definition: off.h:24
Page page
Definition: gist.h:133
#define ISARRKEY(x)
Definition: trgm.h:101
static int32 unionkey(BITVECP sbase, TRGM *add)
Definition: trgm_gist.c:493
#define GETARR(x)
Definition: trgm.h:107
Datum gtrgm_distance(PG_FUNCTION_ARGS)
Definition: trgm_gist.c:412
void pfree(void *pointer)
Definition: mcxt.c:1056
#define GETBIT(x, i)
Definition: blutils.c:32
#define ERROR
Definition: elog.h:43
double float8
Definition: c.h:492
TrgmPackedGraph * graph
Definition: trgm_gist.c:19
Definition: trgm.h:66
int spl_nright
Definition: gist.h:119
Datum gtrgm_consistent(PG_FUNCTION_ARGS)
Definition: trgm_gist.c:160
static void makesign(BITVECP sign, TRGM *a)
Definition: trgm_gist.c:57
char BITVEC[SIGLEN]
Definition: hstore_gist.c:18
BITVEC sign
Definition: trgm_gist.c:709
char sign
Definition: informix.c:688
StrategyNumber strategy
Definition: trgm_gist.c:14
Datum key
Definition: gist.h:131
#define LikeStrategyNumber
Definition: trgm.h:32
#define FirstOffsetNumber
Definition: off.h:27
char * flag(int b)
Definition: test-ctype.c:33
double index_strategy_get_limit(StrategyNumber strategy)
Definition: trgm_op.c:132
#define HASHVAL(val)
Definition: hstore_gist.c:33
bool trgm_contained_by(TRGM *trg1, TRGM *trg2)
Definition: trgm_op.c:1046
bool leafkey
Definition: gist.h:135
Datum gtrgm_out(PG_FUNCTION_ARGS)
Definition: trgm_gist.c:50
float float4
Definition: c.h:491
#define SIGNKEY
Definition: trgm.h:98
#define PG_RETURN_BOOL(x)
Definition: fmgr.h:349
uintptr_t Datum
Definition: postgres.h:367
#define GETENTRY(vec, pos)
Definition: trgm_gist.c:27
#define PG_RETURN_DATUM(x)
Definition: fmgr.h:343
uint8 flag
Definition: trgm.h:69
Datum gtrgm_penalty(PG_FUNCTION_ARGS)
Definition: trgm_gist.c:652
Datum gtrgm_picksplit(PG_FUNCTION_ARGS)
Definition: trgm_gist.c:758
#define ISALLTRUE(x)
Definition: hstore_gist.c:45
#define WordSimilarityStrategyNumber
Definition: trgm.h:36
char trgm[3]
Definition: trgm.h:41
static int hemdistsign(BITVECP a, BITVECP b)
Definition: trgm_gist.c:620
#define CALCGTSIZE(flag)
Definition: hstore_gist.c:48
Datum spl_ldatum
Definition: gist.h:115
#define gistentryinit(e, k, r, pg, o, l)
Definition: gist.h:215
Datum gtrgm_decompress(PG_FUNCTION_ARGS)
Definition: trgm_gist.c:118
TRGM * generate_trgm(char *str, int slen)
Definition: trgm_op.c:356
#define OffsetNumberNext(offsetNumber)
Definition: off.h:52
size_t Size
Definition: c.h:467
#define newval
OffsetNumber * spl_right
Definition: gist.h:118
#define MAXALIGN(LEN)
Definition: c.h:686
#define PG_GETARG_UINT16(n)
Definition: fmgr.h:267
#define DatumGetPointer(X)
Definition: postgres.h:549
PGDLLIMPORT const uint8 pg_number_of_ones[256]
Definition: pg_bitutils.c:87
#define VARSIZE_ANY_EXHDR(PTR)
Definition: postgres.h:341
void * palloc(Size size)
Definition: mcxt.c:949
#define PG_GETARG_TEXT_P(n)
Definition: fmgr.h:330
void * MemoryContextAlloc(MemoryContext context, Size size)
Definition: mcxt.c:796
#define LOOPBYTE
Definition: hstore_gist.c:21
#define elog(elevel,...)
Definition: elog.h:228
int i
#define SIGLEN
Definition: hstore_gist.c:15
Definition: c.h:550
#define PG_FUNCTION_ARGS
Definition: fmgr.h:188
#define SET_VARSIZE(PTR, len)
Definition: postgres.h:329
char * BITVECP
Definition: hstore_gist.c:19
#define ILikeStrategyNumber
Definition: trgm.h:33
bool allistrue
Definition: trgm_gist.c:708
#define qsort(a, b, c, d)
Definition: port.h:492
Datum gtrgm_in(PG_FUNCTION_ARGS)
Definition: trgm_gist.c:43
static int32 sizebitvec(BITVECP sign)
Definition: trgm_gist.c:614
Datum gtrgm_compress(PG_FUNCTION_ARGS)
Definition: trgm_gist.c:74
OffsetNumber offset
Definition: gist.h:134
long val
Definition: informix.c:684
#define CPTRGM(a, b)
Definition: trgm.h:47