PostgreSQL Source Code  git master
trgm_gist.c
Go to the documentation of this file.
1 /*
2  * contrib/pg_trgm/trgm_gist.c
3  */
4 #include "postgres.h"
5 
6 #include "access/reloptions.h"
7 #include "access/stratnum.h"
8 #include "fmgr.h"
9 #include "port/pg_bitutils.h"
10 #include "trgm.h"
11 #include "varatt.h"
12 
13 /* gist_trgm_ops opclass options */
14 typedef struct
15 {
16  int32 vl_len_; /* varlena header (do not touch directly!) */
17  int siglen; /* signature length in bytes */
19 
20 #define GET_SIGLEN() (PG_HAS_OPCLASS_OPTIONS() ? \
21  ((TrgmGistOptions *) PG_GET_OPCLASS_OPTIONS())->siglen : \
22  SIGLEN_DEFAULT)
23 
24 typedef struct
25 {
26  /* most recent inputs to gtrgm_consistent */
29  /* extracted trigrams for query */
31  /* if a regex operator, the extracted graph */
33 
34  /*
35  * The "query" and "trigrams" are stored in the same palloc block as this
36  * cache struct, at MAXALIGN'ed offsets. The graph however isn't.
37  */
39 
40 #define GETENTRY(vec,pos) ((TRGM *) DatumGetPointer((vec)->vector[(pos)].key))
41 
42 
54 
55 
56 Datum
58 {
59  ereport(ERROR,
60  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
61  errmsg("cannot accept a value of type %s", "gtrgm")));
62 
63  PG_RETURN_VOID(); /* keep compiler quiet */
64 }
65 
66 Datum
68 {
69  ereport(ERROR,
70  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
71  errmsg("cannot display a value of type %s", "gtrgm")));
72 
73  PG_RETURN_VOID(); /* keep compiler quiet */
74 }
75 
76 static TRGM *
77 gtrgm_alloc(bool isalltrue, int siglen, BITVECP sign)
78 {
79  int flag = SIGNKEY | (isalltrue ? ALLISTRUE : 0);
80  int size = CALCGTSIZE(flag, siglen);
81  TRGM *res = palloc(size);
82 
84  res->flag = flag;
85 
86  if (!isalltrue)
87  {
88  if (sign)
89  memcpy(GETSIGN(res), sign, siglen);
90  else
91  memset(GETSIGN(res), 0, siglen);
92  }
93 
94  return res;
95 }
96 
97 static void
98 makesign(BITVECP sign, TRGM *a, int siglen)
99 {
100  int32 k,
101  len = ARRNELEM(a);
102  trgm *ptr = GETARR(a);
103  int32 tmp = 0;
104 
105  MemSet(sign, 0, siglen);
106  SETBIT(sign, SIGLENBIT(siglen)); /* set last unused bit */
107  for (k = 0; k < len; k++)
108  {
109  CPTRGM(((char *) &tmp), ptr + k);
110  HASH(sign, tmp, siglen);
111  }
112 }
113 
114 Datum
116 {
117  GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
118  int siglen = GET_SIGLEN();
119  GISTENTRY *retval = entry;
120 
121  if (entry->leafkey)
122  { /* trgm */
123  TRGM *res;
124  text *val = DatumGetTextPP(entry->key);
125 
127  retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
129  entry->rel, entry->page,
130  entry->offset, false);
131  }
132  else if (ISSIGNKEY(DatumGetPointer(entry->key)) &&
133  !ISALLTRUE(DatumGetPointer(entry->key)))
134  {
135  int32 i;
136  TRGM *res;
138 
139  LOOPBYTE(siglen)
140  {
141  if ((sign[i] & 0xff) != 0xff)
142  PG_RETURN_POINTER(retval);
143  }
144 
145  res = gtrgm_alloc(true, siglen, sign);
146  retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
148  entry->rel, entry->page,
149  entry->offset, false);
150  }
151  PG_RETURN_POINTER(retval);
152 }
153 
154 Datum
156 {
157  GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
158  GISTENTRY *retval;
159  text *key;
160 
161  key = DatumGetTextPP(entry->key);
162 
163  if (key != (text *) DatumGetPointer(entry->key))
164  {
165  /* need to pass back the decompressed item */
166  retval = palloc(sizeof(GISTENTRY));
168  entry->rel, entry->page, entry->offset, entry->leafkey);
169  PG_RETURN_POINTER(retval);
170  }
171  else
172  {
173  /* we can return the entry as-is */
174  PG_RETURN_POINTER(entry);
175  }
176 }
177 
178 static int32
180 {
181  int32 count = 0;
182  int32 k,
183  len = ARRNELEM(qtrg);
184  trgm *ptr = GETARR(qtrg);
185  int32 tmp = 0;
186 
187  for (k = 0; k < len; k++)
188  {
189  CPTRGM(((char *) &tmp), ptr + k);
190  count += GETBIT(sign, HASHVAL(tmp, siglen));
191  }
192 
193  return count;
194 }
195 
196 Datum
198 {
199  GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
200  text *query = PG_GETARG_TEXT_P(1);
202 
203  /* Oid subtype = PG_GETARG_OID(3); */
204  bool *recheck = (bool *) PG_GETARG_POINTER(4);
205  int siglen = GET_SIGLEN();
206  TRGM *key = (TRGM *) DatumGetPointer(entry->key);
207  TRGM *qtrg;
208  bool res;
209  Size querysize = VARSIZE(query);
210  gtrgm_consistent_cache *cache;
211  double nlimit;
212 
213  /*
214  * We keep the extracted trigrams in cache, because trigram extraction is
215  * relatively CPU-expensive. When trying to reuse a cached value, check
216  * strategy number not just query itself, because trigram extraction
217  * depends on strategy.
218  *
219  * The cached structure is a single palloc chunk containing the
220  * gtrgm_consistent_cache header, then the input query (4-byte length
221  * word, uncompressed, starting at a MAXALIGN boundary), then the TRGM
222  * value (also starting at a MAXALIGN boundary). However we don't try to
223  * include the regex graph (if any) in that struct. (XXX currently, this
224  * approach can leak regex graphs across index rescans. Not clear if
225  * that's worth fixing.)
226  */
227  cache = (gtrgm_consistent_cache *) fcinfo->flinfo->fn_extra;
228  if (cache == NULL ||
229  cache->strategy != strategy ||
230  VARSIZE(cache->query) != querysize ||
231  memcmp((char *) cache->query, (char *) query, querysize) != 0)
232  {
233  gtrgm_consistent_cache *newcache;
234  TrgmPackedGraph *graph = NULL;
235  Size qtrgsize;
236 
237  switch (strategy)
238  {
242  case EqualStrategyNumber:
243  qtrg = generate_trgm(VARDATA(query),
244  querysize - VARHDRSZ);
245  break;
246  case ILikeStrategyNumber:
247 #ifndef IGNORECASE
248  elog(ERROR, "cannot handle ~~* with case-sensitive trigrams");
249 #endif
250  /* FALL THRU */
251  case LikeStrategyNumber:
252  qtrg = generate_wildcard_trgm(VARDATA(query),
253  querysize - VARHDRSZ);
254  break;
256 #ifndef IGNORECASE
257  elog(ERROR, "cannot handle ~* with case-sensitive trigrams");
258 #endif
259  /* FALL THRU */
261  qtrg = createTrgmNFA(query, PG_GET_COLLATION(),
262  &graph, fcinfo->flinfo->fn_mcxt);
263  /* just in case an empty array is returned ... */
264  if (qtrg && ARRNELEM(qtrg) <= 0)
265  {
266  pfree(qtrg);
267  qtrg = NULL;
268  }
269  break;
270  default:
271  elog(ERROR, "unrecognized strategy number: %d", strategy);
272  qtrg = NULL; /* keep compiler quiet */
273  break;
274  }
275 
276  qtrgsize = qtrg ? VARSIZE(qtrg) : 0;
277 
278  newcache = (gtrgm_consistent_cache *)
279  MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
281  MAXALIGN(querysize) +
282  qtrgsize);
283 
284  newcache->strategy = strategy;
285  newcache->query = (text *)
286  ((char *) newcache + MAXALIGN(sizeof(gtrgm_consistent_cache)));
287  memcpy((char *) newcache->query, (char *) query, querysize);
288  if (qtrg)
289  {
290  newcache->trigrams = (TRGM *)
291  ((char *) newcache->query + MAXALIGN(querysize));
292  memcpy((char *) newcache->trigrams, (char *) qtrg, qtrgsize);
293  /* release qtrg in case it was made in fn_mcxt */
294  pfree(qtrg);
295  }
296  else
297  newcache->trigrams = NULL;
298  newcache->graph = graph;
299 
300  if (cache)
301  pfree(cache);
302  fcinfo->flinfo->fn_extra = (void *) newcache;
303  cache = newcache;
304  }
305 
306  qtrg = cache->trigrams;
307 
308  switch (strategy)
309  {
313 
314  /*
315  * Similarity search is exact. (Strict) word similarity search is
316  * inexact
317  */
318  *recheck = (strategy != SimilarityStrategyNumber);
319 
320  nlimit = index_strategy_get_limit(strategy);
321 
322  if (GIST_LEAF(entry))
323  { /* all leafs contains orig trgm */
324  double tmpsml = cnt_sml(qtrg, key, *recheck);
325 
326  res = (tmpsml >= nlimit);
327  }
328  else if (ISALLTRUE(key))
329  { /* non-leaf contains signature */
330  res = true;
331  }
332  else
333  { /* non-leaf contains signature */
334  int32 count = cnt_sml_sign_common(qtrg, GETSIGN(key), siglen);
335  int32 len = ARRNELEM(qtrg);
336 
337  if (len == 0)
338  res = false;
339  else
340  res = (((((float8) count) / ((float8) len))) >= nlimit);
341  }
342  break;
343  case ILikeStrategyNumber:
344 #ifndef IGNORECASE
345  elog(ERROR, "cannot handle ~~* with case-sensitive trigrams");
346 #endif
347  /* FALL THRU */
348  case LikeStrategyNumber:
349  case EqualStrategyNumber:
350  /* Wildcard and equal search are inexact */
351  *recheck = true;
352 
353  /*
354  * Check if all the extracted trigrams can be present in child
355  * nodes.
356  */
357  if (GIST_LEAF(entry))
358  { /* all leafs contains orig trgm */
359  res = trgm_contained_by(qtrg, key);
360  }
361  else if (ISALLTRUE(key))
362  { /* non-leaf contains signature */
363  res = true;
364  }
365  else
366  { /* non-leaf contains signature */
367  int32 k,
368  tmp = 0,
369  len = ARRNELEM(qtrg);
370  trgm *ptr = GETARR(qtrg);
371  BITVECP sign = GETSIGN(key);
372 
373  res = true;
374  for (k = 0; k < len; k++)
375  {
376  CPTRGM(((char *) &tmp), ptr + k);
377  if (!GETBIT(sign, HASHVAL(tmp, siglen)))
378  {
379  res = false;
380  break;
381  }
382  }
383  }
384  break;
386 #ifndef IGNORECASE
387  elog(ERROR, "cannot handle ~* with case-sensitive trigrams");
388 #endif
389  /* FALL THRU */
391  /* Regexp search is inexact */
392  *recheck = true;
393 
394  /* Check regex match as much as we can with available info */
395  if (qtrg)
396  {
397  if (GIST_LEAF(entry))
398  { /* all leafs contains orig trgm */
399  bool *check;
400 
401  check = trgm_presence_map(qtrg, key);
402  res = trigramsMatchGraph(cache->graph, check);
403  pfree(check);
404  }
405  else if (ISALLTRUE(key))
406  { /* non-leaf contains signature */
407  res = true;
408  }
409  else
410  { /* non-leaf contains signature */
411  int32 k,
412  tmp = 0,
413  len = ARRNELEM(qtrg);
414  trgm *ptr = GETARR(qtrg);
415  BITVECP sign = GETSIGN(key);
416  bool *check;
417 
418  /*
419  * GETBIT() tests may give false positives, due to limited
420  * size of the sign array. But since trigramsMatchGraph()
421  * implements a monotone boolean function, false positives
422  * in the check array can't lead to false negative answer.
423  * So we can apply trigramsMatchGraph despite uncertainty,
424  * and that usefully improves the quality of the search.
425  */
426  check = (bool *) palloc(len * sizeof(bool));
427  for (k = 0; k < len; k++)
428  {
429  CPTRGM(((char *) &tmp), ptr + k);
430  check[k] = GETBIT(sign, HASHVAL(tmp, siglen));
431  }
432  res = trigramsMatchGraph(cache->graph, check);
433  pfree(check);
434  }
435  }
436  else
437  {
438  /* trigram-free query must be rechecked everywhere */
439  res = true;
440  }
441  break;
442  default:
443  elog(ERROR, "unrecognized strategy number: %d", strategy);
444  res = false; /* keep compiler quiet */
445  break;
446  }
447 
449 }
450 
451 Datum
453 {
454  GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
455  text *query = PG_GETARG_TEXT_P(1);
457 
458  /* Oid subtype = PG_GETARG_OID(3); */
459  bool *recheck = (bool *) PG_GETARG_POINTER(4);
460  int siglen = GET_SIGLEN();
461  TRGM *key = (TRGM *) DatumGetPointer(entry->key);
462  TRGM *qtrg;
463  float8 res;
464  Size querysize = VARSIZE(query);
465  char *cache = (char *) fcinfo->flinfo->fn_extra;
466 
467  /*
468  * Cache the generated trigrams across multiple calls with the same query.
469  */
470  if (cache == NULL ||
471  VARSIZE(cache) != querysize ||
472  memcmp(cache, query, querysize) != 0)
473  {
474  char *newcache;
475 
476  qtrg = generate_trgm(VARDATA(query), querysize - VARHDRSZ);
477 
478  newcache = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
479  MAXALIGN(querysize) +
480  VARSIZE(qtrg));
481 
482  memcpy(newcache, query, querysize);
483  memcpy(newcache + MAXALIGN(querysize), qtrg, VARSIZE(qtrg));
484 
485  if (cache)
486  pfree(cache);
487  fcinfo->flinfo->fn_extra = newcache;
488  cache = newcache;
489  }
490 
491  qtrg = (TRGM *) (cache + MAXALIGN(querysize));
492 
493  switch (strategy)
494  {
498  /* Only plain trigram distance is exact */
499  *recheck = (strategy != DistanceStrategyNumber);
500  if (GIST_LEAF(entry))
501  { /* all leafs contains orig trgm */
502 
503  /*
504  * Prevent gcc optimizing the sml variable using volatile
505  * keyword. Otherwise res can differ from the
506  * word_similarity_dist_op() function.
507  */
508  float4 volatile sml = cnt_sml(qtrg, key, *recheck);
509 
510  res = 1.0 - sml;
511  }
512  else if (ISALLTRUE(key))
513  { /* all leafs contains orig trgm */
514  res = 0.0;
515  }
516  else
517  { /* non-leaf contains signature */
518  int32 count = cnt_sml_sign_common(qtrg, GETSIGN(key), siglen);
519  int32 len = ARRNELEM(qtrg);
520 
521  res = (len == 0) ? -1.0 : 1.0 - ((float8) count) / ((float8) len);
522  }
523  break;
524  default:
525  elog(ERROR, "unrecognized strategy number: %d", strategy);
526  res = 0; /* keep compiler quiet */
527  break;
528  }
529 
531 }
532 
533 static int32
534 unionkey(BITVECP sbase, TRGM *add, int siglen)
535 {
536  int32 i;
537 
538  if (ISSIGNKEY(add))
539  {
540  BITVECP sadd = GETSIGN(add);
541 
542  if (ISALLTRUE(add))
543  return 1;
544 
545  LOOPBYTE(siglen)
546  sbase[i] |= sadd[i];
547  }
548  else
549  {
550  trgm *ptr = GETARR(add);
551  int32 tmp = 0;
552 
553  for (i = 0; i < ARRNELEM(add); i++)
554  {
555  CPTRGM(((char *) &tmp), ptr + i);
556  HASH(sbase, tmp, siglen);
557  }
558  }
559  return 0;
560 }
561 
562 
563 Datum
565 {
567  int32 len = entryvec->n;
568  int *size = (int *) PG_GETARG_POINTER(1);
569  int siglen = GET_SIGLEN();
570  int32 i;
571  TRGM *result = gtrgm_alloc(false, siglen, NULL);
572  BITVECP base = GETSIGN(result);
573 
574  for (i = 0; i < len; i++)
575  {
576  if (unionkey(base, GETENTRY(entryvec, i), siglen))
577  {
578  result->flag = ALLISTRUE;
579  SET_VARSIZE(result, CALCGTSIZE(ALLISTRUE, siglen));
580  break;
581  }
582  }
583 
584  *size = VARSIZE(result);
585 
586  PG_RETURN_POINTER(result);
587 }
588 
589 Datum
591 {
592  TRGM *a = (TRGM *) PG_GETARG_POINTER(0);
593  TRGM *b = (TRGM *) PG_GETARG_POINTER(1);
594  bool *result = (bool *) PG_GETARG_POINTER(2);
595  int siglen = GET_SIGLEN();
596 
597  if (ISSIGNKEY(a))
598  { /* then b also ISSIGNKEY */
599  if (ISALLTRUE(a) && ISALLTRUE(b))
600  *result = true;
601  else if (ISALLTRUE(a))
602  *result = false;
603  else if (ISALLTRUE(b))
604  *result = false;
605  else
606  {
607  int32 i;
608  BITVECP sa = GETSIGN(a),
609  sb = GETSIGN(b);
610 
611  *result = true;
612  LOOPBYTE(siglen)
613  {
614  if (sa[i] != sb[i])
615  {
616  *result = false;
617  break;
618  }
619  }
620  }
621  }
622  else
623  { /* a and b ISARRKEY */
624  int32 lena = ARRNELEM(a),
625  lenb = ARRNELEM(b);
626 
627  if (lena != lenb)
628  *result = false;
629  else
630  {
631  trgm *ptra = GETARR(a),
632  *ptrb = GETARR(b);
633  int32 i;
634 
635  *result = true;
636  for (i = 0; i < lena; i++)
637  if (CMPTRGM(ptra + i, ptrb + i))
638  {
639  *result = false;
640  break;
641  }
642  }
643  }
644 
645  PG_RETURN_POINTER(result);
646 }
647 
648 static int32
649 sizebitvec(BITVECP sign, int siglen)
650 {
651  return pg_popcount(sign, siglen);
652 }
653 
654 static int
656 {
657  int i,
658  diff,
659  dist = 0;
660 
661  LOOPBYTE(siglen)
662  {
663  diff = (unsigned char) (a[i] ^ b[i]);
664  /* Using the popcount functions here isn't likely to win */
665  dist += pg_number_of_ones[diff];
666  }
667  return dist;
668 }
669 
670 static int
671 hemdist(TRGM *a, TRGM *b, int siglen)
672 {
673  if (ISALLTRUE(a))
674  {
675  if (ISALLTRUE(b))
676  return 0;
677  else
678  return SIGLENBIT(siglen) - sizebitvec(GETSIGN(b), siglen);
679  }
680  else if (ISALLTRUE(b))
681  return SIGLENBIT(siglen) - sizebitvec(GETSIGN(a), siglen);
682 
683  return hemdistsign(GETSIGN(a), GETSIGN(b), siglen);
684 }
685 
686 Datum
688 {
689  GISTENTRY *origentry = (GISTENTRY *) PG_GETARG_POINTER(0); /* always ISSIGNKEY */
690  GISTENTRY *newentry = (GISTENTRY *) PG_GETARG_POINTER(1);
691  float *penalty = (float *) PG_GETARG_POINTER(2);
692  int siglen = GET_SIGLEN();
693  TRGM *origval = (TRGM *) DatumGetPointer(origentry->key);
694  TRGM *newval = (TRGM *) DatumGetPointer(newentry->key);
695  BITVECP orig = GETSIGN(origval);
696 
697  *penalty = 0.0;
698 
699  if (ISARRKEY(newval))
700  {
701  char *cache = (char *) fcinfo->flinfo->fn_extra;
702  TRGM *cachedVal = (TRGM *) (cache + MAXALIGN(siglen));
703  Size newvalsize = VARSIZE(newval);
704  BITVECP sign;
705 
706  /*
707  * Cache the sign data across multiple calls with the same newval.
708  */
709  if (cache == NULL ||
710  VARSIZE(cachedVal) != newvalsize ||
711  memcmp(cachedVal, newval, newvalsize) != 0)
712  {
713  char *newcache;
714 
715  newcache = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
716  MAXALIGN(siglen) +
717  newvalsize);
718 
719  makesign((BITVECP) newcache, newval, siglen);
720 
721  cachedVal = (TRGM *) (newcache + MAXALIGN(siglen));
722  memcpy(cachedVal, newval, newvalsize);
723 
724  if (cache)
725  pfree(cache);
726  fcinfo->flinfo->fn_extra = newcache;
727  cache = newcache;
728  }
729 
730  sign = (BITVECP) cache;
731 
732  if (ISALLTRUE(origval))
733  *penalty = ((float) (SIGLENBIT(siglen) - sizebitvec(sign, siglen))) / (float) (SIGLENBIT(siglen) + 1);
734  else
735  *penalty = hemdistsign(sign, orig, siglen);
736  }
737  else
738  *penalty = hemdist(origval, newval, siglen);
739  PG_RETURN_POINTER(penalty);
740 }
741 
742 typedef struct
743 {
744  bool allistrue;
746 } CACHESIGN;
747 
748 static void
749 fillcache(CACHESIGN *item, TRGM *key, BITVECP sign, int siglen)
750 {
751  item->allistrue = false;
752  item->sign = sign;
753  if (ISARRKEY(key))
754  makesign(item->sign, key, siglen);
755  else if (ISALLTRUE(key))
756  item->allistrue = true;
757  else
758  memcpy(item->sign, GETSIGN(key), siglen);
759 }
760 
761 #define WISH_F(a,b,c) (double)( -(double)(((a)-(b))*((a)-(b))*((a)-(b)))*(c) )
762 typedef struct
763 {
764  OffsetNumber pos;
765  int32 cost;
766 } SPLITCOST;
767 
768 static int
769 comparecost(const void *a, const void *b)
770 {
771  if (((const SPLITCOST *) a)->cost == ((const SPLITCOST *) b)->cost)
772  return 0;
773  else
774  return (((const SPLITCOST *) a)->cost > ((const SPLITCOST *) b)->cost) ? 1 : -1;
775 }
776 
777 
778 static int
780 {
781  if (a->allistrue)
782  {
783  if (b->allistrue)
784  return 0;
785  else
786  return SIGLENBIT(siglen) - sizebitvec(b->sign, siglen);
787  }
788  else if (b->allistrue)
789  return SIGLENBIT(siglen) - sizebitvec(a->sign, siglen);
790 
791  return hemdistsign(a->sign, b->sign, siglen);
792 }
793 
794 Datum
796 {
798  OffsetNumber maxoff = entryvec->n - 1;
800  int siglen = GET_SIGLEN();
801  OffsetNumber k,
802  j;
803  TRGM *datum_l,
804  *datum_r;
805  BITVECP union_l,
806  union_r;
807  int32 size_alpha,
808  size_beta;
809  int32 size_waste,
810  waste = -1;
811  int32 nbytes;
812  OffsetNumber seed_1 = 0,
813  seed_2 = 0;
814  OffsetNumber *left,
815  *right;
816  BITVECP ptr;
817  int i;
818  CACHESIGN *cache;
819  char *cache_sign;
820  SPLITCOST *costvector;
821 
822  /* cache the sign data for each existing item */
823  cache = (CACHESIGN *) palloc(sizeof(CACHESIGN) * (maxoff + 1));
824  cache_sign = palloc(siglen * (maxoff + 1));
825 
826  for (k = FirstOffsetNumber; k <= maxoff; k = OffsetNumberNext(k))
827  fillcache(&cache[k], GETENTRY(entryvec, k), &cache_sign[siglen * k],
828  siglen);
829 
830  /* now find the two furthest-apart items */
831  for (k = FirstOffsetNumber; k < maxoff; k = OffsetNumberNext(k))
832  {
833  for (j = OffsetNumberNext(k); j <= maxoff; j = OffsetNumberNext(j))
834  {
835  size_waste = hemdistcache(&(cache[j]), &(cache[k]), siglen);
836  if (size_waste > waste)
837  {
838  waste = size_waste;
839  seed_1 = k;
840  seed_2 = j;
841  }
842  }
843  }
844 
845  /* just in case we didn't make a selection ... */
846  if (seed_1 == 0 || seed_2 == 0)
847  {
848  seed_1 = 1;
849  seed_2 = 2;
850  }
851 
852  /* initialize the result vectors */
853  nbytes = maxoff * sizeof(OffsetNumber);
854  v->spl_left = left = (OffsetNumber *) palloc(nbytes);
855  v->spl_right = right = (OffsetNumber *) palloc(nbytes);
856  v->spl_nleft = 0;
857  v->spl_nright = 0;
858 
859  /* form initial .. */
860  datum_l = gtrgm_alloc(cache[seed_1].allistrue, siglen, cache[seed_1].sign);
861  datum_r = gtrgm_alloc(cache[seed_2].allistrue, siglen, cache[seed_2].sign);
862 
863  union_l = GETSIGN(datum_l);
864  union_r = GETSIGN(datum_r);
865 
866  /* sort before ... */
867  costvector = (SPLITCOST *) palloc(sizeof(SPLITCOST) * maxoff);
868  for (j = FirstOffsetNumber; j <= maxoff; j = OffsetNumberNext(j))
869  {
870  costvector[j - 1].pos = j;
871  size_alpha = hemdistcache(&(cache[seed_1]), &(cache[j]), siglen);
872  size_beta = hemdistcache(&(cache[seed_2]), &(cache[j]), siglen);
873  costvector[j - 1].cost = abs(size_alpha - size_beta);
874  }
875  qsort(costvector, maxoff, sizeof(SPLITCOST), comparecost);
876 
877  for (k = 0; k < maxoff; k++)
878  {
879  j = costvector[k].pos;
880  if (j == seed_1)
881  {
882  *left++ = j;
883  v->spl_nleft++;
884  continue;
885  }
886  else if (j == seed_2)
887  {
888  *right++ = j;
889  v->spl_nright++;
890  continue;
891  }
892 
893  if (ISALLTRUE(datum_l) || cache[j].allistrue)
894  {
895  if (ISALLTRUE(datum_l) && cache[j].allistrue)
896  size_alpha = 0;
897  else
898  size_alpha = SIGLENBIT(siglen) -
899  sizebitvec((cache[j].allistrue) ? GETSIGN(datum_l) :
900  GETSIGN(cache[j].sign),
901  siglen);
902  }
903  else
904  size_alpha = hemdistsign(cache[j].sign, GETSIGN(datum_l), siglen);
905 
906  if (ISALLTRUE(datum_r) || cache[j].allistrue)
907  {
908  if (ISALLTRUE(datum_r) && cache[j].allistrue)
909  size_beta = 0;
910  else
911  size_beta = SIGLENBIT(siglen) -
912  sizebitvec((cache[j].allistrue) ? GETSIGN(datum_r) :
913  GETSIGN(cache[j].sign),
914  siglen);
915  }
916  else
917  size_beta = hemdistsign(cache[j].sign, GETSIGN(datum_r), siglen);
918 
919  if (size_alpha < size_beta + WISH_F(v->spl_nleft, v->spl_nright, 0.1))
920  {
921  if (ISALLTRUE(datum_l) || cache[j].allistrue)
922  {
923  if (!ISALLTRUE(datum_l))
924  memset(GETSIGN(datum_l), 0xff, siglen);
925  }
926  else
927  {
928  ptr = cache[j].sign;
929  LOOPBYTE(siglen)
930  union_l[i] |= ptr[i];
931  }
932  *left++ = j;
933  v->spl_nleft++;
934  }
935  else
936  {
937  if (ISALLTRUE(datum_r) || cache[j].allistrue)
938  {
939  if (!ISALLTRUE(datum_r))
940  memset(GETSIGN(datum_r), 0xff, siglen);
941  }
942  else
943  {
944  ptr = cache[j].sign;
945  LOOPBYTE(siglen)
946  union_r[i] |= ptr[i];
947  }
948  *right++ = j;
949  v->spl_nright++;
950  }
951  }
952 
953  v->spl_ldatum = PointerGetDatum(datum_l);
954  v->spl_rdatum = PointerGetDatum(datum_r);
955 
957 }
958 
959 Datum
961 {
963 
964  init_local_reloptions(relopts, sizeof(TrgmGistOptions));
965  add_local_int_reloption(relopts, "siglen",
966  "signature length in bytes",
968  offsetof(TrgmGistOptions, siglen));
969 
970  PG_RETURN_VOID();
971 }
#define GETBIT(x, i)
Definition: blutils.c:33
#define SETBIT(x, i)
Definition: blutils.c:32
#define MAXALIGN(LEN)
Definition: c.h:802
signed int int32
Definition: c.h:496
#define VARHDRSZ
Definition: c.h:683
double float8
Definition: c.h:621
float float4
Definition: c.h:620
#define MemSet(start, val, len)
Definition: c.h:1011
size_t Size
Definition: c.h:596
int errcode(int sqlerrcode)
Definition: elog.c:853
int errmsg(const char *fmt,...)
Definition: elog.c:1070
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:225
#define ereport(elevel,...)
Definition: elog.h:149
#define PG_RETURN_VOID()
Definition: fmgr.h:349
#define PG_RETURN_FLOAT8(x)
Definition: fmgr.h:367
#define DatumGetTextPP(X)
Definition: fmgr.h:292
#define PG_GETARG_POINTER(n)
Definition: fmgr.h:276
#define PG_GETARG_UINT16(n)
Definition: fmgr.h:272
#define PG_RETURN_POINTER(x)
Definition: fmgr.h:361
#define PG_GET_COLLATION()
Definition: fmgr.h:198
#define PG_FUNCTION_ARGS
Definition: fmgr.h:193
#define PG_RETURN_BOOL(x)
Definition: fmgr.h:359
#define PG_GETARG_TEXT_P(n)
Definition: fmgr.h:336
#define GIST_LEAF(entry)
Definition: gist.h:170
#define gistentryinit(e, k, r, pg, o, l)
Definition: gist.h:244
#define newval
#define HASHVAL(val, siglen)
Definition: hstore_gist.c:45
#define LOOPBYTE(siglen)
Definition: hstore_gist.c:33
#define ALLISTRUE
Definition: hstore_gist.c:55
#define CALCGTSIZE(flag, siglen)
Definition: hstore_gist.c:60
#define ISALLTRUE(x)
Definition: hstore_gist.c:57
#define SIGLEN_MAX
Definition: hstore_gist.c:24
#define SIGLEN_DEFAULT
Definition: hstore_gist.c:23
char * BITVECP
Definition: hstore_gist.c:31
#define SIGLENBIT(siglen)
Definition: hstore_gist.c:25
#define GETSIGN(x)
Definition: hstore_gist.c:62
#define HASH(sign, val, siglen)
Definition: hstore_gist.c:46
long val
Definition: informix.c:689
char sign
Definition: informix.c:693
int b
Definition: isn.c:70
int a
Definition: isn.c:69
int j
Definition: isn.c:74
int i
Definition: isn.c:73
if(TABLE==NULL||TABLE_index==NULL)
Definition: isn.c:77
void pfree(void *pointer)
Definition: mcxt.c:1521
void * MemoryContextAlloc(MemoryContext context, Size size)
Definition: mcxt.c:1181
void * palloc(Size size)
Definition: mcxt.c:1317
#define OffsetNumberNext(offsetNumber)
Definition: off.h:52
uint16 OffsetNumber
Definition: off.h:24
#define FirstOffsetNumber
Definition: off.h:27
PGDLLIMPORT const uint8 pg_number_of_ones[256]
Definition: pg_bitutils.c:87
static uint64 pg_popcount(const char *buf, int bytes)
Definition: pg_bitutils.h:339
const void size_t len
#define qsort(a, b, c, d)
Definition: port.h:447
static Datum PointerGetDatum(const void *X)
Definition: postgres.h:322
uintptr_t Datum
Definition: postgres.h:64
static Pointer DatumGetPointer(Datum X)
Definition: postgres.h:312
void init_local_reloptions(local_relopts *relopts, Size relopt_struct_size)
Definition: reloptions.c:734
void add_local_int_reloption(local_relopts *relopts, const char *name, const char *desc, int default_val, int min_val, int max_val, int offset)
Definition: reloptions.c:918
static pg_noinline void Size size
Definition: slab.c:607
uint16 StrategyNumber
Definition: stratnum.h:22
BITVECP sign
Definition: trgm_gist.c:745
bool allistrue
Definition: trgm_gist.c:744
OffsetNumber offset
Definition: gist.h:163
Datum key
Definition: gist.h:160
Page page
Definition: gist.h:162
Relation rel
Definition: gist.h:161
bool leafkey
Definition: gist.h:164
int spl_nleft
Definition: gist.h:143
OffsetNumber * spl_right
Definition: gist.h:147
Datum spl_ldatum
Definition: gist.h:144
Datum spl_rdatum
Definition: gist.h:149
int spl_nright
Definition: gist.h:148
OffsetNumber * spl_left
Definition: gist.h:142
int32 n
Definition: gist.h:235
int32 cost
Definition: hstore_gist.c:354
OffsetNumber pos
Definition: hstore_gist.c:353
Definition: trgm.h:67
uint8 flag
Definition: trgm.h:69
StrategyNumber strategy
Definition: trgm_gist.c:27
TrgmPackedGraph * graph
Definition: trgm_gist.c:32
Definition: c.h:678
char * flag(int b)
Definition: test-ctype.c:33
#define RegExpICaseStrategyNumber
Definition: trgm.h:35
bool * trgm_presence_map(TRGM *query, TRGM *key)
Definition: trgm_op.c:1079
TRGM * generate_trgm(char *str, int slen)
Definition: trgm_op.c:357
#define WordSimilarityStrategyNumber
Definition: trgm.h:36
#define DistanceStrategyNumber
Definition: trgm.h:31
#define StrictWordSimilarityStrategyNumber
Definition: trgm.h:38
TRGM * generate_wildcard_trgm(const char *str, int slen)
Definition: trgm_op.c:867
#define StrictWordDistanceStrategyNumber
Definition: trgm.h:39
#define ISARRKEY(x)
Definition: trgm.h:100
#define ARRNELEM(x)
Definition: trgm.h:107
double index_strategy_get_limit(StrategyNumber strategy)
Definition: trgm_op.c:133
#define SimilarityStrategyNumber
Definition: trgm.h:30
bool trigramsMatchGraph(TrgmPackedGraph *graph, bool *check)
Definition: trgm_regexp.c:626
#define CMPTRGM(a, b)
Definition: trgm.h:46
bool trgm_contained_by(TRGM *trg1, TRGM *trg2)
Definition: trgm_op.c:1040
#define SIGNKEY
Definition: trgm.h:97
#define ISSIGNKEY(x)
Definition: trgm.h:101
char trgm[3]
Definition: trgm.h:42
#define CPTRGM(a, b)
Definition: trgm.h:48
#define ILikeStrategyNumber
Definition: trgm.h:33
TRGM * createTrgmNFA(text *text_re, Oid collation, TrgmPackedGraph **graph, MemoryContext rcontext)
Definition: trgm_regexp.c:522
float4 cnt_sml(TRGM *trg1, TRGM *trg2, bool inexact)
Definition: trgm_op.c:992
#define LikeStrategyNumber
Definition: trgm.h:32
#define GETARR(x)
Definition: trgm.h:106
#define EqualStrategyNumber
Definition: trgm.h:40
#define RegExpStrategyNumber
Definition: trgm.h:34
#define WordDistanceStrategyNumber
Definition: trgm.h:37
Datum gtrgm_out(PG_FUNCTION_ARGS)
Definition: trgm_gist.c:67
static int hemdistcache(CACHESIGN *a, CACHESIGN *b, int siglen)
Definition: trgm_gist.c:779
static void fillcache(CACHESIGN *item, TRGM *key, BITVECP sign, int siglen)
Definition: trgm_gist.c:749
Datum gtrgm_same(PG_FUNCTION_ARGS)
Definition: trgm_gist.c:590
#define WISH_F(a, b, c)
Definition: trgm_gist.c:761
static int32 unionkey(BITVECP sbase, TRGM *add, int siglen)
Definition: trgm_gist.c:534
PG_FUNCTION_INFO_V1(gtrgm_in)
Datum gtrgm_distance(PG_FUNCTION_ARGS)
Definition: trgm_gist.c:452
static int32 sizebitvec(BITVECP sign, int siglen)
Definition: trgm_gist.c:649
Datum gtrgm_in(PG_FUNCTION_ARGS)
Definition: trgm_gist.c:57
Datum gtrgm_options(PG_FUNCTION_ARGS)
Definition: trgm_gist.c:960
static int hemdistsign(BITVECP a, BITVECP b, int siglen)
Definition: trgm_gist.c:655
#define GETENTRY(vec, pos)
Definition: trgm_gist.c:40
Datum gtrgm_compress(PG_FUNCTION_ARGS)
Definition: trgm_gist.c:115
Datum gtrgm_picksplit(PG_FUNCTION_ARGS)
Definition: trgm_gist.c:795
Datum gtrgm_decompress(PG_FUNCTION_ARGS)
Definition: trgm_gist.c:155
static TRGM * gtrgm_alloc(bool isalltrue, int siglen, BITVECP sign)
Definition: trgm_gist.c:77
Datum gtrgm_union(PG_FUNCTION_ARGS)
Definition: trgm_gist.c:564
Datum gtrgm_penalty(PG_FUNCTION_ARGS)
Definition: trgm_gist.c:687
#define GET_SIGLEN()
Definition: trgm_gist.c:20
static void makesign(BITVECP sign, TRGM *a, int siglen)
Definition: trgm_gist.c:98
Datum gtrgm_consistent(PG_FUNCTION_ARGS)
Definition: trgm_gist.c:197
static int32 cnt_sml_sign_common(TRGM *qtrg, BITVECP sign, int siglen)
Definition: trgm_gist.c:179
static int hemdist(TRGM *a, TRGM *b, int siglen)
Definition: trgm_gist.c:671
static int comparecost(const void *a, const void *b)
Definition: trgm_gist.c:769
#define VARDATA(PTR)
Definition: varatt.h:278
#define VARDATA_ANY(PTR)
Definition: varatt.h:324
#define SET_VARSIZE(PTR, len)
Definition: varatt.h:305
#define VARSIZE(PTR)
Definition: varatt.h:279
#define VARSIZE_ANY_EXHDR(PTR)
Definition: varatt.h:317