PostgreSQL Source Code  git master
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros
trgm_gist.c
Go to the documentation of this file.
1 /*
2  * contrib/pg_trgm/trgm_gist.c
3  */
4 #include "postgres.h"
5 
6 #include "trgm.h"
7 
8 #include "access/stratnum.h"
9 #include "fmgr.h"
10 
11 
12 typedef struct
13 {
14  /* most recent inputs to gtrgm_consistent */
17  /* extracted trigrams for query */
19  /* if a regex operator, the extracted graph */
21 
22  /*
23  * The "query" and "trigrams" are stored in the same palloc block as this
24  * cache struct, at MAXALIGN'ed offsets. The graph however isn't.
25  */
27 
28 #define GETENTRY(vec,pos) ((TRGM *) DatumGetPointer((vec)->vector[(pos)].key))
29 
30 
41 
42 /* Number of one-bits in an unsigned byte */
43 static const uint8 number_of_ones[256] = {
44  0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
45  1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
46  1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
47  2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
48  1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
49  2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
50  2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
51  3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
52  1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
53  2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
54  2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
55  3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
56  2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
57  3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
58  3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
59  4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8
60 };
61 
62 
63 Datum
65 {
66  elog(ERROR, "not implemented");
67  PG_RETURN_DATUM(0);
68 }
69 
70 Datum
72 {
73  elog(ERROR, "not implemented");
74  PG_RETURN_DATUM(0);
75 }
76 
77 static void
79 {
80  int32 k,
81  len = ARRNELEM(a);
82  trgm *ptr = GETARR(a);
83  int32 tmp = 0;
84 
85  MemSet((void *) sign, 0, sizeof(BITVEC));
86  SETBIT(sign, SIGLENBIT); /* set last unused bit */
87  for (k = 0; k < len; k++)
88  {
89  CPTRGM(((char *) &tmp), ptr + k);
90  HASH(sign, tmp);
91  }
92 }
93 
94 Datum
96 {
97  GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
98  GISTENTRY *retval = entry;
99 
100  if (entry->leafkey)
101  { /* trgm */
102  TRGM *res;
103  text *val = DatumGetTextP(entry->key);
104 
105  res = generate_trgm(VARDATA(val), VARSIZE(val) - VARHDRSZ);
106  retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
107  gistentryinit(*retval, PointerGetDatum(res),
108  entry->rel, entry->page,
109  entry->offset, FALSE);
110  }
111  else if (ISSIGNKEY(DatumGetPointer(entry->key)) &&
112  !ISALLTRUE(DatumGetPointer(entry->key)))
113  {
114  int32 i,
115  len;
116  TRGM *res;
118 
119  LOOPBYTE
120  {
121  if ((sign[i] & 0xff) != 0xff)
122  PG_RETURN_POINTER(retval);
123  }
124 
125  len = CALCGTSIZE(SIGNKEY | ALLISTRUE, 0);
126  res = (TRGM *) palloc(len);
127  SET_VARSIZE(res, len);
128  res->flag = SIGNKEY | ALLISTRUE;
129 
130  retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
131  gistentryinit(*retval, PointerGetDatum(res),
132  entry->rel, entry->page,
133  entry->offset, FALSE);
134  }
135  PG_RETURN_POINTER(retval);
136 }
137 
138 Datum
140 {
141  GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
142  GISTENTRY *retval;
143  text *key;
144 
145  key = DatumGetTextP(entry->key);
146 
147  if (key != (text *) DatumGetPointer(entry->key))
148  {
149  /* need to pass back the decompressed item */
150  retval = palloc(sizeof(GISTENTRY));
151  gistentryinit(*retval, PointerGetDatum(key),
152  entry->rel, entry->page, entry->offset, entry->leafkey);
153  PG_RETURN_POINTER(retval);
154  }
155  else
156  {
157  /* we can return the entry as-is */
158  PG_RETURN_POINTER(entry);
159  }
160 }
161 
162 static int32
164 {
165  int32 count = 0;
166  int32 k,
167  len = ARRNELEM(qtrg);
168  trgm *ptr = GETARR(qtrg);
169  int32 tmp = 0;
170 
171  for (k = 0; k < len; k++)
172  {
173  CPTRGM(((char *) &tmp), ptr + k);
174  count += GETBIT(sign, HASHVAL(tmp));
175  }
176 
177  return count;
178 }
179 
180 Datum
182 {
183  GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
184  text *query = PG_GETARG_TEXT_P(1);
186 
187  /* Oid subtype = PG_GETARG_OID(3); */
188  bool *recheck = (bool *) PG_GETARG_POINTER(4);
189  TRGM *key = (TRGM *) DatumGetPointer(entry->key);
190  TRGM *qtrg;
191  bool res;
192  Size querysize = VARSIZE(query);
193  gtrgm_consistent_cache *cache;
194  double nlimit;
195 
196  /*
197  * We keep the extracted trigrams in cache, because trigram extraction is
198  * relatively CPU-expensive. When trying to reuse a cached value, check
199  * strategy number not just query itself, because trigram extraction
200  * depends on strategy.
201  *
202  * The cached structure is a single palloc chunk containing the
203  * gtrgm_consistent_cache header, then the input query (starting at a
204  * MAXALIGN boundary), then the TRGM value (also starting at a MAXALIGN
205  * boundary). However we don't try to include the regex graph (if any) in
206  * that struct. (XXX currently, this approach can leak regex graphs
207  * across index rescans. Not clear if that's worth fixing.)
208  */
209  cache = (gtrgm_consistent_cache *) fcinfo->flinfo->fn_extra;
210  if (cache == NULL ||
211  cache->strategy != strategy ||
212  VARSIZE(cache->query) != querysize ||
213  memcmp((char *) cache->query, (char *) query, querysize) != 0)
214  {
215  gtrgm_consistent_cache *newcache;
216  TrgmPackedGraph *graph = NULL;
217  Size qtrgsize;
218 
219  switch (strategy)
220  {
223  qtrg = generate_trgm(VARDATA(query),
224  querysize - VARHDRSZ);
225  break;
226  case ILikeStrategyNumber:
227 #ifndef IGNORECASE
228  elog(ERROR, "cannot handle ~~* with case-sensitive trigrams");
229 #endif
230  /* FALL THRU */
231  case LikeStrategyNumber:
232  qtrg = generate_wildcard_trgm(VARDATA(query),
233  querysize - VARHDRSZ);
234  break;
236 #ifndef IGNORECASE
237  elog(ERROR, "cannot handle ~* with case-sensitive trigrams");
238 #endif
239  /* FALL THRU */
241  qtrg = createTrgmNFA(query, PG_GET_COLLATION(),
242  &graph, fcinfo->flinfo->fn_mcxt);
243  /* just in case an empty array is returned ... */
244  if (qtrg && ARRNELEM(qtrg) <= 0)
245  {
246  pfree(qtrg);
247  qtrg = NULL;
248  }
249  break;
250  default:
251  elog(ERROR, "unrecognized strategy number: %d", strategy);
252  qtrg = NULL; /* keep compiler quiet */
253  break;
254  }
255 
256  qtrgsize = qtrg ? VARSIZE(qtrg) : 0;
257 
258  newcache = (gtrgm_consistent_cache *)
259  MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
261  MAXALIGN(querysize) +
262  qtrgsize);
263 
264  newcache->strategy = strategy;
265  newcache->query = (text *)
266  ((char *) newcache + MAXALIGN(sizeof(gtrgm_consistent_cache)));
267  memcpy((char *) newcache->query, (char *) query, querysize);
268  if (qtrg)
269  {
270  newcache->trigrams = (TRGM *)
271  ((char *) newcache->query + MAXALIGN(querysize));
272  memcpy((char *) newcache->trigrams, (char *) qtrg, qtrgsize);
273  /* release qtrg in case it was made in fn_mcxt */
274  pfree(qtrg);
275  }
276  else
277  newcache->trigrams = NULL;
278  newcache->graph = graph;
279 
280  if (cache)
281  pfree(cache);
282  fcinfo->flinfo->fn_extra = (void *) newcache;
283  cache = newcache;
284  }
285 
286  qtrg = cache->trigrams;
287 
288  switch (strategy)
289  {
292  /* Similarity search is exact. Word similarity search is inexact */
293  *recheck = (strategy == WordSimilarityStrategyNumber);
294  nlimit = (strategy == SimilarityStrategyNumber) ?
296 
297  if (GIST_LEAF(entry))
298  { /* all leafs contains orig trgm */
299  double tmpsml = cnt_sml(qtrg, key, *recheck);
300 
301  res = (tmpsml >= nlimit);
302  }
303  else if (ISALLTRUE(key))
304  { /* non-leaf contains signature */
305  res = true;
306  }
307  else
308  { /* non-leaf contains signature */
309  int32 count = cnt_sml_sign_common(qtrg, GETSIGN(key));
310  int32 len = ARRNELEM(qtrg);
311 
312  if (len == 0)
313  res = false;
314  else
315  res = (((((float8) count) / ((float8) len))) >= nlimit);
316  }
317  break;
318  case ILikeStrategyNumber:
319 #ifndef IGNORECASE
320  elog(ERROR, "cannot handle ~~* with case-sensitive trigrams");
321 #endif
322  /* FALL THRU */
323  case LikeStrategyNumber:
324  /* Wildcard search is inexact */
325  *recheck = true;
326 
327  /*
328  * Check if all the extracted trigrams can be present in child
329  * nodes.
330  */
331  if (GIST_LEAF(entry))
332  { /* all leafs contains orig trgm */
333  res = trgm_contained_by(qtrg, key);
334  }
335  else if (ISALLTRUE(key))
336  { /* non-leaf contains signature */
337  res = true;
338  }
339  else
340  { /* non-leaf contains signature */
341  int32 k,
342  tmp = 0,
343  len = ARRNELEM(qtrg);
344  trgm *ptr = GETARR(qtrg);
345  BITVECP sign = GETSIGN(key);
346 
347  res = true;
348  for (k = 0; k < len; k++)
349  {
350  CPTRGM(((char *) &tmp), ptr + k);
351  if (!GETBIT(sign, HASHVAL(tmp)))
352  {
353  res = false;
354  break;
355  }
356  }
357  }
358  break;
360 #ifndef IGNORECASE
361  elog(ERROR, "cannot handle ~* with case-sensitive trigrams");
362 #endif
363  /* FALL THRU */
365  /* Regexp search is inexact */
366  *recheck = true;
367 
368  /* Check regex match as much as we can with available info */
369  if (qtrg)
370  {
371  if (GIST_LEAF(entry))
372  { /* all leafs contains orig trgm */
373  bool *check;
374 
375  check = trgm_presence_map(qtrg, key);
376  res = trigramsMatchGraph(cache->graph, check);
377  pfree(check);
378  }
379  else if (ISALLTRUE(key))
380  { /* non-leaf contains signature */
381  res = true;
382  }
383  else
384  { /* non-leaf contains signature */
385  int32 k,
386  tmp = 0,
387  len = ARRNELEM(qtrg);
388  trgm *ptr = GETARR(qtrg);
389  BITVECP sign = GETSIGN(key);
390  bool *check;
391 
392  /*
393  * GETBIT() tests may give false positives, due to limited
394  * size of the sign array. But since trigramsMatchGraph()
395  * implements a monotone boolean function, false positives
396  * in the check array can't lead to false negative answer.
397  * So we can apply trigramsMatchGraph despite uncertainty,
398  * and that usefully improves the quality of the search.
399  */
400  check = (bool *) palloc(len * sizeof(bool));
401  for (k = 0; k < len; k++)
402  {
403  CPTRGM(((char *) &tmp), ptr + k);
404  check[k] = GETBIT(sign, HASHVAL(tmp));
405  }
406  res = trigramsMatchGraph(cache->graph, check);
407  pfree(check);
408  }
409  }
410  else
411  {
412  /* trigram-free query must be rechecked everywhere */
413  res = true;
414  }
415  break;
416  default:
417  elog(ERROR, "unrecognized strategy number: %d", strategy);
418  res = false; /* keep compiler quiet */
419  break;
420  }
421 
422  PG_RETURN_BOOL(res);
423 }
424 
425 Datum
427 {
428  GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
429  text *query = PG_GETARG_TEXT_P(1);
431 
432  /* Oid subtype = PG_GETARG_OID(3); */
433  bool *recheck = (bool *) PG_GETARG_POINTER(4);
434  TRGM *key = (TRGM *) DatumGetPointer(entry->key);
435  TRGM *qtrg;
436  float8 res;
437  Size querysize = VARSIZE(query);
438  char *cache = (char *) fcinfo->flinfo->fn_extra;
439 
440  /*
441  * Cache the generated trigrams across multiple calls with the same query.
442  */
443  if (cache == NULL ||
444  VARSIZE(cache) != querysize ||
445  memcmp(cache, query, querysize) != 0)
446  {
447  char *newcache;
448 
449  qtrg = generate_trgm(VARDATA(query), querysize - VARHDRSZ);
450 
451  newcache = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
452  MAXALIGN(querysize) +
453  VARSIZE(qtrg));
454 
455  memcpy(newcache, query, querysize);
456  memcpy(newcache + MAXALIGN(querysize), qtrg, VARSIZE(qtrg));
457 
458  if (cache)
459  pfree(cache);
460  fcinfo->flinfo->fn_extra = newcache;
461  cache = newcache;
462  }
463 
464  qtrg = (TRGM *) (cache + MAXALIGN(querysize));
465 
466  switch (strategy)
467  {
470  *recheck = strategy == WordDistanceStrategyNumber;
471  if (GIST_LEAF(entry))
472  { /* all leafs contains orig trgm */
473 
474  /*
475  * Prevent gcc optimizing the sml variable using volatile
476  * keyword. Otherwise res can differ from the
477  * word_similarity_dist_op() function.
478  */
479  float4 volatile sml = cnt_sml(qtrg, key, *recheck);
480 
481  res = 1.0 - sml;
482  }
483  else if (ISALLTRUE(key))
484  { /* all leafs contains orig trgm */
485  res = 0.0;
486  }
487  else
488  { /* non-leaf contains signature */
489  int32 count = cnt_sml_sign_common(qtrg, GETSIGN(key));
490  int32 len = ARRNELEM(qtrg);
491 
492  res = (len == 0) ? -1.0 : 1.0 - ((float8) count) / ((float8) len);
493  }
494  break;
495  default:
496  elog(ERROR, "unrecognized strategy number: %d", strategy);
497  res = 0; /* keep compiler quiet */
498  break;
499  }
500 
501  PG_RETURN_FLOAT8(res);
502 }
503 
504 static int32
505 unionkey(BITVECP sbase, TRGM *add)
506 {
507  int32 i;
508 
509  if (ISSIGNKEY(add))
510  {
511  BITVECP sadd = GETSIGN(add);
512 
513  if (ISALLTRUE(add))
514  return 1;
515 
516  LOOPBYTE
517  sbase[i] |= sadd[i];
518  }
519  else
520  {
521  trgm *ptr = GETARR(add);
522  int32 tmp = 0;
523 
524  for (i = 0; i < ARRNELEM(add); i++)
525  {
526  CPTRGM(((char *) &tmp), ptr + i);
527  HASH(sbase, tmp);
528  }
529  }
530  return 0;
531 }
532 
533 
534 Datum
536 {
538  int32 len = entryvec->n;
539  int *size = (int *) PG_GETARG_POINTER(1);
540  BITVEC base;
541  int32 i;
542  int32 flag = 0;
543  TRGM *result;
544 
545  MemSet((void *) base, 0, sizeof(BITVEC));
546  for (i = 0; i < len; i++)
547  {
548  if (unionkey(base, GETENTRY(entryvec, i)))
549  {
550  flag = ALLISTRUE;
551  break;
552  }
553  }
554 
555  flag |= SIGNKEY;
556  len = CALCGTSIZE(flag, 0);
557  result = (TRGM *) palloc(len);
558  SET_VARSIZE(result, len);
559  result->flag = flag;
560  if (!ISALLTRUE(result))
561  memcpy((void *) GETSIGN(result), (void *) base, sizeof(BITVEC));
562  *size = len;
563 
564  PG_RETURN_POINTER(result);
565 }
566 
567 Datum
569 {
570  TRGM *a = (TRGM *) PG_GETARG_POINTER(0);
571  TRGM *b = (TRGM *) PG_GETARG_POINTER(1);
572  bool *result = (bool *) PG_GETARG_POINTER(2);
573 
574  if (ISSIGNKEY(a))
575  { /* then b also ISSIGNKEY */
576  if (ISALLTRUE(a) && ISALLTRUE(b))
577  *result = true;
578  else if (ISALLTRUE(a))
579  *result = false;
580  else if (ISALLTRUE(b))
581  *result = false;
582  else
583  {
584  int32 i;
585  BITVECP sa = GETSIGN(a),
586  sb = GETSIGN(b);
587 
588  *result = true;
589  LOOPBYTE
590  {
591  if (sa[i] != sb[i])
592  {
593  *result = false;
594  break;
595  }
596  }
597  }
598  }
599  else
600  { /* a and b ISARRKEY */
601  int32 lena = ARRNELEM(a),
602  lenb = ARRNELEM(b);
603 
604  if (lena != lenb)
605  *result = false;
606  else
607  {
608  trgm *ptra = GETARR(a),
609  *ptrb = GETARR(b);
610  int32 i;
611 
612  *result = true;
613  for (i = 0; i < lena; i++)
614  if (CMPTRGM(ptra + i, ptrb + i))
615  {
616  *result = false;
617  break;
618  }
619  }
620  }
621 
622  PG_RETURN_POINTER(result);
623 }
624 
625 static int32
627 {
628  int32 size = 0,
629  i;
630 
631  LOOPBYTE
632  size += number_of_ones[(unsigned char) sign[i]];
633  return size;
634 }
635 
636 static int
638 {
639  int i,
640  diff,
641  dist = 0;
642 
643  LOOPBYTE
644  {
645  diff = (unsigned char) (a[i] ^ b[i]);
646  dist += number_of_ones[diff];
647  }
648  return dist;
649 }
650 
651 static int
653 {
654  if (ISALLTRUE(a))
655  {
656  if (ISALLTRUE(b))
657  return 0;
658  else
659  return SIGLENBIT - sizebitvec(GETSIGN(b));
660  }
661  else if (ISALLTRUE(b))
662  return SIGLENBIT - sizebitvec(GETSIGN(a));
663 
664  return hemdistsign(GETSIGN(a), GETSIGN(b));
665 }
666 
667 Datum
669 {
670  GISTENTRY *origentry = (GISTENTRY *) PG_GETARG_POINTER(0); /* always ISSIGNKEY */
671  GISTENTRY *newentry = (GISTENTRY *) PG_GETARG_POINTER(1);
672  float *penalty = (float *) PG_GETARG_POINTER(2);
673  TRGM *origval = (TRGM *) DatumGetPointer(origentry->key);
674  TRGM *newval = (TRGM *) DatumGetPointer(newentry->key);
675  BITVECP orig = GETSIGN(origval);
676 
677  *penalty = 0.0;
678 
679  if (ISARRKEY(newval))
680  {
681  char *cache = (char *) fcinfo->flinfo->fn_extra;
682  TRGM *cachedVal = (TRGM *) (cache + MAXALIGN(sizeof(BITVEC)));
683  Size newvalsize = VARSIZE(newval);
684  BITVECP sign;
685 
686  /*
687  * Cache the sign data across multiple calls with the same newval.
688  */
689  if (cache == NULL ||
690  VARSIZE(cachedVal) != newvalsize ||
691  memcmp(cachedVal, newval, newvalsize) != 0)
692  {
693  char *newcache;
694 
695  newcache = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
696  MAXALIGN(sizeof(BITVEC)) +
697  newvalsize);
698 
699  makesign((BITVECP) newcache, newval);
700 
701  cachedVal = (TRGM *) (newcache + MAXALIGN(sizeof(BITVEC)));
702  memcpy(cachedVal, newval, newvalsize);
703 
704  if (cache)
705  pfree(cache);
706  fcinfo->flinfo->fn_extra = newcache;
707  cache = newcache;
708  }
709 
710  sign = (BITVECP) cache;
711 
712  if (ISALLTRUE(origval))
713  *penalty = ((float) (SIGLENBIT - sizebitvec(sign))) / (float) (SIGLENBIT + 1);
714  else
715  *penalty = hemdistsign(sign, orig);
716  }
717  else
718  *penalty = hemdist(origval, newval);
719  PG_RETURN_POINTER(penalty);
720 }
721 
722 typedef struct
723 {
724  bool allistrue;
726 } CACHESIGN;
727 
728 static void
730 {
731  item->allistrue = false;
732  if (ISARRKEY(key))
733  makesign(item->sign, key);
734  else if (ISALLTRUE(key))
735  item->allistrue = true;
736  else
737  memcpy((void *) item->sign, (void *) GETSIGN(key), sizeof(BITVEC));
738 }
739 
740 #define WISH_F(a,b,c) (double)( -(double)(((a)-(b))*((a)-(b))*((a)-(b)))*(c) )
741 typedef struct
742 {
743  OffsetNumber pos;
744  int32 cost;
745 } SPLITCOST;
746 
747 static int
748 comparecost(const void *a, const void *b)
749 {
750  if (((const SPLITCOST *) a)->cost == ((const SPLITCOST *) b)->cost)
751  return 0;
752  else
753  return (((const SPLITCOST *) a)->cost > ((const SPLITCOST *) b)->cost) ? 1 : -1;
754 }
755 
756 
757 static int
759 {
760  if (a->allistrue)
761  {
762  if (b->allistrue)
763  return 0;
764  else
765  return SIGLENBIT - sizebitvec(b->sign);
766  }
767  else if (b->allistrue)
768  return SIGLENBIT - sizebitvec(a->sign);
769 
770  return hemdistsign(a->sign, b->sign);
771 }
772 
773 Datum
775 {
777  OffsetNumber maxoff = entryvec->n - 2;
779  OffsetNumber k,
780  j;
781  TRGM *datum_l,
782  *datum_r;
783  BITVECP union_l,
784  union_r;
785  int32 size_alpha,
786  size_beta;
787  int32 size_waste,
788  waste = -1;
789  int32 nbytes;
790  OffsetNumber seed_1 = 0,
791  seed_2 = 0;
792  OffsetNumber *left,
793  *right;
794  BITVECP ptr;
795  int i;
796  CACHESIGN *cache;
797  SPLITCOST *costvector;
798 
799  /* cache the sign data for each existing item */
800  cache = (CACHESIGN *) palloc(sizeof(CACHESIGN) * (maxoff + 2));
801  for (k = FirstOffsetNumber; k <= maxoff; k = OffsetNumberNext(k))
802  fillcache(&cache[k], GETENTRY(entryvec, k));
803 
804  /* now find the two furthest-apart items */
805  for (k = FirstOffsetNumber; k < maxoff; k = OffsetNumberNext(k))
806  {
807  for (j = OffsetNumberNext(k); j <= maxoff; j = OffsetNumberNext(j))
808  {
809  size_waste = hemdistcache(&(cache[j]), &(cache[k]));
810  if (size_waste > waste)
811  {
812  waste = size_waste;
813  seed_1 = k;
814  seed_2 = j;
815  }
816  }
817  }
818 
819  /* just in case we didn't make a selection ... */
820  if (seed_1 == 0 || seed_2 == 0)
821  {
822  seed_1 = 1;
823  seed_2 = 2;
824  }
825 
826  /* initialize the result vectors */
827  nbytes = (maxoff + 2) * sizeof(OffsetNumber);
828  v->spl_left = left = (OffsetNumber *) palloc(nbytes);
829  v->spl_right = right = (OffsetNumber *) palloc(nbytes);
830  v->spl_nleft = 0;
831  v->spl_nright = 0;
832 
833  /* form initial .. */
834  if (cache[seed_1].allistrue)
835  {
836  datum_l = (TRGM *) palloc(CALCGTSIZE(SIGNKEY | ALLISTRUE, 0));
837  SET_VARSIZE(datum_l, CALCGTSIZE(SIGNKEY | ALLISTRUE, 0));
838  datum_l->flag = SIGNKEY | ALLISTRUE;
839  }
840  else
841  {
842  datum_l = (TRGM *) palloc(CALCGTSIZE(SIGNKEY, 0));
843  SET_VARSIZE(datum_l, CALCGTSIZE(SIGNKEY, 0));
844  datum_l->flag = SIGNKEY;
845  memcpy((void *) GETSIGN(datum_l), (void *) cache[seed_1].sign, sizeof(BITVEC));
846  }
847  if (cache[seed_2].allistrue)
848  {
849  datum_r = (TRGM *) palloc(CALCGTSIZE(SIGNKEY | ALLISTRUE, 0));
850  SET_VARSIZE(datum_r, CALCGTSIZE(SIGNKEY | ALLISTRUE, 0));
851  datum_r->flag = SIGNKEY | ALLISTRUE;
852  }
853  else
854  {
855  datum_r = (TRGM *) palloc(CALCGTSIZE(SIGNKEY, 0));
856  SET_VARSIZE(datum_r, CALCGTSIZE(SIGNKEY, 0));
857  datum_r->flag = SIGNKEY;
858  memcpy((void *) GETSIGN(datum_r), (void *) cache[seed_2].sign, sizeof(BITVEC));
859  }
860 
861  union_l = GETSIGN(datum_l);
862  union_r = GETSIGN(datum_r);
863  maxoff = OffsetNumberNext(maxoff);
864  fillcache(&cache[maxoff], GETENTRY(entryvec, maxoff));
865  /* sort before ... */
866  costvector = (SPLITCOST *) palloc(sizeof(SPLITCOST) * maxoff);
867  for (j = FirstOffsetNumber; j <= maxoff; j = OffsetNumberNext(j))
868  {
869  costvector[j - 1].pos = j;
870  size_alpha = hemdistcache(&(cache[seed_1]), &(cache[j]));
871  size_beta = hemdistcache(&(cache[seed_2]), &(cache[j]));
872  costvector[j - 1].cost = abs(size_alpha - size_beta);
873  }
874  qsort((void *) costvector, maxoff, sizeof(SPLITCOST), comparecost);
875 
876  for (k = 0; k < maxoff; k++)
877  {
878  j = costvector[k].pos;
879  if (j == seed_1)
880  {
881  *left++ = j;
882  v->spl_nleft++;
883  continue;
884  }
885  else if (j == seed_2)
886  {
887  *right++ = j;
888  v->spl_nright++;
889  continue;
890  }
891 
892  if (ISALLTRUE(datum_l) || cache[j].allistrue)
893  {
894  if (ISALLTRUE(datum_l) && cache[j].allistrue)
895  size_alpha = 0;
896  else
897  size_alpha = SIGLENBIT - sizebitvec(
898  (cache[j].allistrue) ? GETSIGN(datum_l) : GETSIGN(cache[j].sign)
899  );
900  }
901  else
902  size_alpha = hemdistsign(cache[j].sign, GETSIGN(datum_l));
903 
904  if (ISALLTRUE(datum_r) || cache[j].allistrue)
905  {
906  if (ISALLTRUE(datum_r) && cache[j].allistrue)
907  size_beta = 0;
908  else
909  size_beta = SIGLENBIT - sizebitvec(
910  (cache[j].allistrue) ? GETSIGN(datum_r) : GETSIGN(cache[j].sign)
911  );
912  }
913  else
914  size_beta = hemdistsign(cache[j].sign, GETSIGN(datum_r));
915 
916  if (size_alpha < size_beta + WISH_F(v->spl_nleft, v->spl_nright, 0.1))
917  {
918  if (ISALLTRUE(datum_l) || cache[j].allistrue)
919  {
920  if (!ISALLTRUE(datum_l))
921  MemSet((void *) GETSIGN(datum_l), 0xff, sizeof(BITVEC));
922  }
923  else
924  {
925  ptr = cache[j].sign;
926  LOOPBYTE
927  union_l[i] |= ptr[i];
928  }
929  *left++ = j;
930  v->spl_nleft++;
931  }
932  else
933  {
934  if (ISALLTRUE(datum_r) || cache[j].allistrue)
935  {
936  if (!ISALLTRUE(datum_r))
937  MemSet((void *) GETSIGN(datum_r), 0xff, sizeof(BITVEC));
938  }
939  else
940  {
941  ptr = cache[j].sign;
942  LOOPBYTE
943  union_r[i] |= ptr[i];
944  }
945  *right++ = j;
946  v->spl_nright++;
947  }
948  }
949 
950  *right = *left = FirstOffsetNumber;
951  v->spl_ldatum = PointerGetDatum(datum_l);
952  v->spl_rdatum = PointerGetDatum(datum_r);
953 
955 }
#define GIST_LEAF(entry)
Definition: gist.h:133
Relation rel
Definition: gist.h:124
#define PG_RETURN_POINTER(x)
Definition: fmgr.h:305
OffsetNumber pos
Definition: hstore_gist.c:323
#define SETBIT(x, i)
Definition: blutils.c:33
bool trigramsMatchGraph(TrgmPackedGraph *graph, bool *check)
Definition: trgm_regexp.c:634
#define CMPTRGM(a, b)
Definition: trgm.h:42
#define VARDATA(PTR)
Definition: postgres.h:305
static int hemdist(TRGM *a, TRGM *b)
Definition: trgm_gist.c:652
#define RegExpStrategyNumber
Definition: trgm.h:33
static int32 cnt_sml_sign_common(TRGM *qtrg, BITVECP sign)
Definition: trgm_gist.c:163
TRGM * generate_wildcard_trgm(const char *str, int slen)
Definition: trgm_op.c:796
#define SimilarityStrategyNumber
Definition: trgm.h:29
#define VARSIZE(PTR)
Definition: postgres.h:306
#define ISSIGNKEY(x)
Definition: trgm.h:99
#define GETSIGN(x)
Definition: hstore_gist.c:54
#define PointerGetDatum(X)
Definition: postgres.h:564
#define DistanceStrategyNumber
Definition: trgm.h:30
#define VARHDRSZ
Definition: c.h:440
PG_FUNCTION_INFO_V1(gtrgm_in)
TRGM * createTrgmNFA(text *text_re, Oid collation, TrgmPackedGraph **graph, MemoryContext rcontext)
Definition: trgm_regexp.c:516
#define RegExpICaseStrategyNumber
Definition: trgm.h:34
#define SIGLENBIT
Definition: hstore_gist.c:17
#define WordDistanceStrategyNumber
Definition: trgm.h:36
#define PG_RETURN_FLOAT8(x)
Definition: fmgr.h:310
double similarity_threshold
Definition: trgm_op.c:19
static int hemdistcache(CACHESIGN *a, CACHESIGN *b)
Definition: trgm_gist.c:758
OffsetNumber * spl_left
Definition: gist.h:105
Datum spl_rdatum
Definition: gist.h:112
unsigned char uint8
Definition: c.h:263
Datum gtrgm_union(PG_FUNCTION_ARGS)
Definition: trgm_gist.c:535
int32 n
Definition: gist.h:160
static int comparecost(const void *a, const void *b)
Definition: trgm_gist.c:748
uint16 StrategyNumber
Definition: stratnum.h:22
float4 cnt_sml(TRGM *trg1, TRGM *trg2, bool inexact)
Definition: trgm_op.c:928
#define ALLISTRUE
Definition: hstore_gist.c:47
#define MemSet(start, val, len)
Definition: c.h:852
static void fillcache(CACHESIGN *item, TRGM *key)
Definition: trgm_gist.c:729
#define PG_GETARG_POINTER(n)
Definition: fmgr.h:232
#define HASH(sign, val)
Definition: hstore_gist.c:38
double word_similarity_threshold
Definition: trgm_op.c:20
#define ARRNELEM(x)
Definition: trgm.h:105
bool * trgm_presence_map(TRGM *query, TRGM *key)
Definition: trgm_op.c:1015
int spl_nleft
Definition: gist.h:106
Datum gtrgm_same(PG_FUNCTION_ARGS)
Definition: trgm_gist.c:568
static const uint8 number_of_ones[256]
Definition: trgm_gist.c:43
#define PG_GET_COLLATION()
Definition: fmgr.h:155
signed int int32
Definition: c.h:253
int32 cost
Definition: hstore_gist.c:324
#define WISH_F(a, b, c)
Definition: trgm_gist.c:740
uint16 OffsetNumber
Definition: off.h:24
Page page
Definition: gist.h:125
#define ISARRKEY(x)
Definition: trgm.h:98
static int32 unionkey(BITVECP sbase, TRGM *add)
Definition: trgm_gist.c:505
#define GETARR(x)
Definition: trgm.h:104
Datum gtrgm_distance(PG_FUNCTION_ARGS)
Definition: trgm_gist.c:426
void pfree(void *pointer)
Definition: mcxt.c:992
#define GETBIT(x, i)
Definition: blutils.c:34
#define ERROR
Definition: elog.h:43
double float8
Definition: c.h:377
TrgmPackedGraph * graph
Definition: trgm_gist.c:20
Definition: trgm.h:63
#define FALSE
Definition: c.h:218
int spl_nright
Definition: gist.h:111
Datum gtrgm_consistent(PG_FUNCTION_ARGS)
Definition: trgm_gist.c:181
static void makesign(BITVECP sign, TRGM *a)
Definition: trgm_gist.c:78
char BITVEC[SIGLEN]
Definition: hstore_gist.c:19
BITVEC sign
Definition: trgm_gist.c:725
char sign
Definition: informix.c:693
StrategyNumber strategy
Definition: trgm_gist.c:15
Datum key
Definition: gist.h:123
#define LikeStrategyNumber
Definition: trgm.h:31
#define FirstOffsetNumber
Definition: off.h:27
char * flag(int b)
Definition: test-ctype.c:33
#define HASHVAL(val)
Definition: hstore_gist.c:37
bool trgm_contained_by(TRGM *trg1, TRGM *trg2)
Definition: trgm_op.c:976
bool leafkey
Definition: gist.h:127
Datum gtrgm_out(PG_FUNCTION_ARGS)
Definition: trgm_gist.c:71
float float4
Definition: c.h:376
#define SIGNKEY
Definition: trgm.h:95
#define PG_RETURN_BOOL(x)
Definition: fmgr.h:303
uintptr_t Datum
Definition: postgres.h:374
#define GETENTRY(vec, pos)
Definition: trgm_gist.c:28
#define PG_RETURN_DATUM(x)
Definition: fmgr.h:297
uint8 flag
Definition: trgm.h:66
Datum gtrgm_penalty(PG_FUNCTION_ARGS)
Definition: trgm_gist.c:668
Datum gtrgm_picksplit(PG_FUNCTION_ARGS)
Definition: trgm_gist.c:774
#define ISALLTRUE(x)
Definition: hstore_gist.c:49
#define WordSimilarityStrategyNumber
Definition: trgm.h:35
char trgm[3]
Definition: trgm.h:38
static int hemdistsign(BITVECP a, BITVECP b)
Definition: trgm_gist.c:637
#define CALCGTSIZE(flag)
Definition: hstore_gist.c:52
Datum spl_ldatum
Definition: gist.h:107
#define NULL
Definition: c.h:226
#define gistentryinit(e, k, r, pg, o, l)
Definition: gist.h:169
Datum gtrgm_decompress(PG_FUNCTION_ARGS)
Definition: trgm_gist.c:139
TRGM * generate_trgm(char *str, int slen)
Definition: trgm_op.c:321
#define OffsetNumberNext(offsetNumber)
Definition: off.h:53
size_t Size
Definition: c.h:352
#define newval
OffsetNumber * spl_right
Definition: gist.h:110
#define MAXALIGN(LEN)
Definition: c.h:583
#define DatumGetTextP(X)
Definition: fmgr.h:248
#define PG_GETARG_UINT16(n)
Definition: fmgr.h:228
#define DatumGetPointer(X)
Definition: postgres.h:557
void * palloc(Size size)
Definition: mcxt.c:891
#define PG_GETARG_TEXT_P(n)
Definition: fmgr.h:269
void * MemoryContextAlloc(MemoryContext context, Size size)
Definition: mcxt.c:749
#define LOOPBYTE
Definition: hstore_gist.c:25
int i
Definition: c.h:434
#define PG_FUNCTION_ARGS
Definition: fmgr.h:150
#define SET_VARSIZE(PTR, len)
Definition: postgres.h:330
char * BITVECP
Definition: hstore_gist.c:20
#define ILikeStrategyNumber
Definition: trgm.h:32
bool allistrue
Definition: trgm_gist.c:724
#define elog
Definition: elog.h:219
#define qsort(a, b, c, d)
Definition: port.h:440
Datum gtrgm_in(PG_FUNCTION_ARGS)
Definition: trgm_gist.c:64
static int32 sizebitvec(BITVECP sign)
Definition: trgm_gist.c:626
Datum gtrgm_compress(PG_FUNCTION_ARGS)
Definition: trgm_gist.c:95
OffsetNumber offset
Definition: gist.h:126
long val
Definition: informix.c:689
#define CPTRGM(a, b)
Definition: trgm.h:44