PostgreSQL Source Code  git master
trgm.h File Reference
#include "access/gist.h"
#include "access/itup.h"
#include "access/stratnum.h"
#include "storage/bufpage.h"
Include dependency graph for trgm.h:
This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Data Structures

struct  TRGM
 

Macros

#define LPADDING   2
 
#define RPADDING   1
 
#define KEEPONLYALNUM
 
#define IGNORECASE
 
#define DIVUNION
 
#define SimilarityStrategyNumber   1
 
#define DistanceStrategyNumber   2
 
#define LikeStrategyNumber   3
 
#define ILikeStrategyNumber   4
 
#define RegExpStrategyNumber   5
 
#define RegExpICaseStrategyNumber   6
 
#define WordSimilarityStrategyNumber   7
 
#define WordDistanceStrategyNumber   8
 
#define StrictWordSimilarityStrategyNumber   9
 
#define StrictWordDistanceStrategyNumber   10
 
#define EqualStrategyNumber   11
 
#define CMPCHAR(a, b)   ( ((a)==(b)) ? 0 : ( ((a)<(b)) ? -1 : 1 ) )
 
#define CMPPCHAR(a, b, i)   CMPCHAR( *(((const char*)(a))+i), *(((const char*)(b))+i) )
 
#define CMPTRGM(a, b)   ( CMPPCHAR(a,b,0) ? CMPPCHAR(a,b,0) : ( CMPPCHAR(a,b,1) ? CMPPCHAR(a,b,1) : CMPPCHAR(a,b,2) ) )
 
#define CPTRGM(a, b)
 
#define ISWORDCHR(c)   (t_isalpha(c) || t_isdigit(c))
 
#define ISPRINTABLECHAR(a)   ( isascii( *(unsigned char*)(a) ) && (isalnum( *(unsigned char*)(a) ) || *(unsigned char*)(a)==' ') )
 
#define ISPRINTABLETRGM(t)   ( ISPRINTABLECHAR( ((char*)(t)) ) && ISPRINTABLECHAR( ((char*)(t))+1 ) && ISPRINTABLECHAR( ((char*)(t))+2 ) )
 
#define ISESCAPECHAR(x)   (*(x) == '\\') /* Wildcard escape character */
 
#define ISWILDCARDCHAR(x)
 
#define TRGMHDRSIZE   (VARHDRSZ + sizeof(uint8))
 
#define SIGLEN_DEFAULT   (sizeof(int) * 3)
 
#define SIGLEN_MAX   GISTMaxIndexKeySize
 
#define BITBYTE   8
 
#define SIGLENBIT(siglen)   ((siglen) * BITBYTE - 1) /* see makesign */
 
#define LOOPBYTE(siglen)   for (i = 0; i < (siglen); i++)
 
#define GETBYTE(x, i)   ( *( (BITVECP)(x) + (int)( (i) / BITBYTE ) ) )
 
#define GETBITBYTE(x, i)   ( (((char)(x)) >> (i)) & 0x01 )
 
#define CLRBIT(x, i)   GETBYTE(x,i) &= ~( 0x01 << ( (i) % BITBYTE ) )
 
#define SETBIT(x, i)   GETBYTE(x,i) |= ( 0x01 << ( (i) % BITBYTE ) )
 
#define GETBIT(x, i)   ( (GETBYTE(x,i) >> ( (i) % BITBYTE )) & 0x01 )
 
#define HASHVAL(val, siglen)   (((unsigned int)(val)) % SIGLENBIT(siglen))
 
#define HASH(sign, val, siglen)   SETBIT((sign), HASHVAL(val, siglen))
 
#define ARRKEY   0x01
 
#define SIGNKEY   0x02
 
#define ALLISTRUE   0x04
 
#define ISARRKEY(x)   ( ((TRGM*)x)->flag & ARRKEY )
 
#define ISSIGNKEY(x)   ( ((TRGM*)x)->flag & SIGNKEY )
 
#define ISALLTRUE(x)   ( ((TRGM*)x)->flag & ALLISTRUE )
 
#define CALCGTSIZE(flag, len)   ( TRGMHDRSIZE + ( ( (flag) & ARRKEY ) ? ((len)*sizeof(trgm)) : (((flag) & ALLISTRUE) ? 0 : (len)) ) )
 
#define GETSIGN(x)   ( (BITVECP)( (char*)x+TRGMHDRSIZE ) )
 
#define GETARR(x)   ( (trgm*)( (char*)x+TRGMHDRSIZE ) )
 
#define ARRNELEM(x)   ( ( VARSIZE(x) - TRGMHDRSIZE )/sizeof(trgm) )
 
#define CALCSML(count, len1, len2)   ((float4) (count)) / ((float4) ((len1) + (len2) - (count)))
 

Typedefs

typedef char trgm[3]
 
typedef char * BITVECP
 
typedef struct TrgmPackedGraph TrgmPackedGraph
 

Functions

double index_strategy_get_limit (StrategyNumber strategy)
 
uint32 trgm2int (trgm *ptr)
 
void compact_trigram (trgm *tptr, char *str, int bytelen)
 
TRGMgenerate_trgm (char *str, int slen)
 
TRGMgenerate_wildcard_trgm (const char *str, int slen)
 
float4 cnt_sml (TRGM *trg1, TRGM *trg2, bool inexact)
 
bool trgm_contained_by (TRGM *trg1, TRGM *trg2)
 
booltrgm_presence_map (TRGM *query, TRGM *key)
 
TRGMcreateTrgmNFA (text *text_re, Oid collation, TrgmPackedGraph **graph, MemoryContext rcontext)
 
bool trigramsMatchGraph (TrgmPackedGraph *graph, bool *check)
 

Variables

double similarity_threshold
 
double word_similarity_threshold
 
double strict_word_similarity_threshold
 

Macro Definition Documentation

◆ ALLISTRUE

#define ALLISTRUE   0x04

Definition at line 99 of file trgm.h.

◆ ARRKEY

#define ARRKEY   0x01

Definition at line 97 of file trgm.h.

Referenced by expandColorTrigrams(), generate_trgm(), and generate_wildcard_trgm().

◆ ARRNELEM

◆ BITBYTE

#define BITBYTE   8

Definition at line 79 of file trgm.h.

◆ CALCGTSIZE

#define CALCGTSIZE (   flag,
  len 
)    ( TRGMHDRSIZE + ( ( (flag) & ARRKEY ) ? ((len)*sizeof(trgm)) : (((flag) & ALLISTRUE) ? 0 : (len)) ) )

Definition at line 105 of file trgm.h.

◆ CALCSML

#define CALCSML (   count,
  len1,
  len2 
)    ((float4) (count)) / ((float4) ((len1) + (len2) - (count)))

Definition at line 117 of file trgm.h.

Referenced by cnt_sml(), and iterate_word_similarity().

◆ CLRBIT

#define CLRBIT (   x,
  i 
)    GETBYTE(x,i) &= ~( 0x01 << ( (i) % BITBYTE ) )

Definition at line 90 of file trgm.h.

◆ CMPCHAR

#define CMPCHAR (   a,
 
)    ( ((a)==(b)) ? 0 : ( ((a)<(b)) ? -1 : 1 ) )

Definition at line 44 of file trgm.h.

◆ CMPPCHAR

#define CMPPCHAR (   a,
  b,
  i 
)    CMPCHAR( *(((const char*)(a))+i), *(((const char*)(b))+i) )

Definition at line 45 of file trgm.h.

◆ CMPTRGM

#define CMPTRGM (   a,
 
)    ( CMPPCHAR(a,b,0) ? CMPPCHAR(a,b,0) : ( CMPPCHAR(a,b,1) ? CMPPCHAR(a,b,1) : CMPPCHAR(a,b,2) ) )

◆ CPTRGM

#define CPTRGM (   a,
 
)
Value:
do { \
*(((char*)(a))+0) = *(((char*)(b))+0); \
*(((char*)(a))+1) = *(((char*)(b))+1); \
*(((char*)(a))+2) = *(((char*)(b))+2); \
} while(0)

Definition at line 48 of file trgm.h.

Referenced by cnt_sml_sign_common(), compact_trigram(), gtrgm_consistent(), make_trigrams(), makesign(), show_trgm(), and unionkey().

◆ DistanceStrategyNumber

#define DistanceStrategyNumber   2

Definition at line 31 of file trgm.h.

Referenced by gtrgm_distance().

◆ DIVUNION

#define DIVUNION

Definition at line 27 of file trgm.h.

◆ EqualStrategyNumber

#define EqualStrategyNumber   11

◆ GETARR

◆ GETBIT

#define GETBIT (   x,
  i 
)    ( (GETBYTE(x,i) >> ( (i) % BITBYTE )) & 0x01 )

Definition at line 92 of file trgm.h.

◆ GETBITBYTE

#define GETBITBYTE (   x,
  i 
)    ( (((char)(x)) >> (i)) & 0x01 )

Definition at line 89 of file trgm.h.

◆ GETBYTE

#define GETBYTE (   x,
  i 
)    ( *( (BITVECP)(x) + (int)( (i) / BITBYTE ) ) )

Definition at line 88 of file trgm.h.

◆ GETSIGN

#define GETSIGN (   x)    ( (BITVECP)( (char*)x+TRGMHDRSIZE ) )

Definition at line 106 of file trgm.h.

◆ HASH

#define HASH (   sign,
  val,
  siglen 
)    SETBIT((sign), HASHVAL(val, siglen))

Definition at line 95 of file trgm.h.

◆ HASHVAL

#define HASHVAL (   val,
  siglen 
)    (((unsigned int)(val)) % SIGLENBIT(siglen))

Definition at line 94 of file trgm.h.

◆ IGNORECASE

#define IGNORECASE

Definition at line 26 of file trgm.h.

◆ ILikeStrategyNumber

#define ILikeStrategyNumber   4

◆ ISALLTRUE

#define ISALLTRUE (   x)    ( ((TRGM*)x)->flag & ALLISTRUE )

Definition at line 103 of file trgm.h.

◆ ISARRKEY

#define ISARRKEY (   x)    ( ((TRGM*)x)->flag & ARRKEY )

Definition at line 101 of file trgm.h.

Referenced by fillcache(), and gtrgm_penalty().

◆ ISESCAPECHAR

#define ISESCAPECHAR (   x)    (*(x) == '\\') /* Wildcard escape character */

Definition at line 63 of file trgm.h.

Referenced by get_wildcard_part().

◆ ISPRINTABLECHAR

#define ISPRINTABLECHAR (   a)    ( isascii( *(unsigned char*)(a) ) && (isalnum( *(unsigned char*)(a) ) || *(unsigned char*)(a)==' ') )

Definition at line 56 of file trgm.h.

◆ ISPRINTABLETRGM

#define ISPRINTABLETRGM (   t)    ( ISPRINTABLECHAR( ((char*)(t)) ) && ISPRINTABLECHAR( ((char*)(t))+1 ) && ISPRINTABLECHAR( ((char*)(t))+2 ) )

Definition at line 61 of file trgm.h.

Referenced by show_trgm().

◆ ISSIGNKEY

#define ISSIGNKEY (   x)    ( ((TRGM*)x)->flag & SIGNKEY )

Definition at line 102 of file trgm.h.

Referenced by gtrgm_compress(), gtrgm_same(), and unionkey().

◆ ISWILDCARDCHAR

#define ISWILDCARDCHAR (   x)
Value:
(*(x) == '_' || *(x) == '%') /* Wildcard
* meta-character */

Definition at line 64 of file trgm.h.

Referenced by get_wildcard_part().

◆ ISWORDCHR

#define ISWORDCHR (   c)    (t_isalpha(c) || t_isdigit(c))

Definition at line 55 of file trgm.h.

Referenced by find_word(), get_wildcard_part(), and getColorInfo().

◆ KEEPONLYALNUM

#define KEEPONLYALNUM

Definition at line 18 of file trgm.h.

◆ LikeStrategyNumber

#define LikeStrategyNumber   3

◆ LOOPBYTE

#define LOOPBYTE (   siglen)    for (i = 0; i < (siglen); i++)

Definition at line 85 of file trgm.h.

◆ LPADDING

#define LPADDING   2

Definition at line 16 of file trgm.h.

Referenced by generate_trgm_only(), generate_wildcard_trgm(), and get_wildcard_part().

◆ RegExpICaseStrategyNumber

#define RegExpICaseStrategyNumber   6

◆ RegExpStrategyNumber

#define RegExpStrategyNumber   5

◆ RPADDING

#define RPADDING   1

Definition at line 17 of file trgm.h.

Referenced by generate_trgm_only(), generate_wildcard_trgm(), and get_wildcard_part().

◆ SETBIT

#define SETBIT (   x,
  i 
)    GETBYTE(x,i) |= ( 0x01 << ( (i) % BITBYTE ) )

Definition at line 91 of file trgm.h.

◆ SIGLEN_DEFAULT

#define SIGLEN_DEFAULT   (sizeof(int) * 3)

Definition at line 77 of file trgm.h.

◆ SIGLEN_MAX

#define SIGLEN_MAX   GISTMaxIndexKeySize

Definition at line 78 of file trgm.h.

◆ SIGLENBIT

#define SIGLENBIT (   siglen)    ((siglen) * BITBYTE - 1) /* see makesign */

Definition at line 81 of file trgm.h.

◆ SIGNKEY

#define SIGNKEY   0x02

Definition at line 98 of file trgm.h.

Referenced by gtrgm_alloc().

◆ SimilarityStrategyNumber

#define SimilarityStrategyNumber   1

◆ StrictWordDistanceStrategyNumber

#define StrictWordDistanceStrategyNumber   10

Definition at line 39 of file trgm.h.

Referenced by gtrgm_distance().

◆ StrictWordSimilarityStrategyNumber

#define StrictWordSimilarityStrategyNumber   9

◆ TRGMHDRSIZE

#define TRGMHDRSIZE   (VARHDRSZ + sizeof(uint8))

Definition at line 74 of file trgm.h.

Referenced by expandColorTrigrams(), generate_trgm(), and generate_wildcard_trgm().

◆ WordDistanceStrategyNumber

#define WordDistanceStrategyNumber   8

Definition at line 37 of file trgm.h.

Referenced by gtrgm_distance().

◆ WordSimilarityStrategyNumber

#define WordSimilarityStrategyNumber   7

Typedef Documentation

◆ BITVECP

typedef char* BITVECP

Definition at line 83 of file trgm.h.

◆ trgm

typedef char trgm[3]

Definition at line 42 of file trgm.h.

◆ TrgmPackedGraph

Definition at line 122 of file trgm.h.

Function Documentation

◆ cnt_sml()

float4 cnt_sml ( TRGM trg1,
TRGM trg2,
bool  inexact 
)

Definition at line 996 of file trgm_op.c.

References ARRNELEM, CALCSML, CMPTRGM, and GETARR.

Referenced by gtrgm_consistent(), gtrgm_distance(), and similarity().

997 {
998  trgm *ptr1,
999  *ptr2;
1000  int count = 0;
1001  int len1,
1002  len2;
1003 
1004  ptr1 = GETARR(trg1);
1005  ptr2 = GETARR(trg2);
1006 
1007  len1 = ARRNELEM(trg1);
1008  len2 = ARRNELEM(trg2);
1009 
1010  /* explicit test is needed to avoid 0/0 division when both lengths are 0 */
1011  if (len1 <= 0 || len2 <= 0)
1012  return (float4) 0.0;
1013 
1014  while (ptr1 - GETARR(trg1) < len1 && ptr2 - GETARR(trg2) < len2)
1015  {
1016  int res = CMPTRGM(ptr1, ptr2);
1017 
1018  if (res < 0)
1019  ptr1++;
1020  else if (res > 0)
1021  ptr2++;
1022  else
1023  {
1024  ptr1++;
1025  ptr2++;
1026  count++;
1027  }
1028  }
1029 
1030  /*
1031  * If inexact then len2 is equal to count, because we don't know actual
1032  * length of second string in inexact search and we can assume that count
1033  * is a lower bound of len2.
1034  */
1035  return CALCSML(count, len1, inexact ? count : len2);
1036 }
#define CMPTRGM(a, b)
Definition: trgm.h:46
#define ARRNELEM(x)
Definition: trgm.h:108
#define GETARR(x)
Definition: trgm.h:107
float float4
Definition: c.h:552
char trgm[3]
Definition: trgm.h:42
#define CALCSML(count, len1, len2)
Definition: trgm.h:117

◆ compact_trigram()

void compact_trigram ( trgm tptr,
char *  str,
int  bytelen 
)

Definition at line 198 of file trgm_op.c.

References COMP_LEGACY_CRC32, CPTRGM, FIN_LEGACY_CRC32, and INIT_LEGACY_CRC32.

Referenced by fillTrgm(), and make_trigrams().

199 {
200  if (bytelen == 3)
201  {
202  CPTRGM(tptr, str);
203  }
204  else
205  {
206  pg_crc32 crc;
207 
208  INIT_LEGACY_CRC32(crc);
209  COMP_LEGACY_CRC32(crc, str, bytelen);
210  FIN_LEGACY_CRC32(crc);
211 
212  /*
213  * use only 3 upper bytes from crc, hope, it's good enough hashing
214  */
215  CPTRGM(tptr, &crc);
216  }
217 }
#define INIT_LEGACY_CRC32(crc)
Definition: pg_crc.h:79
#define FIN_LEGACY_CRC32(crc)
Definition: pg_crc.h:80
#define COMP_LEGACY_CRC32(crc, data, len)
Definition: pg_crc.h:81
uint32 pg_crc32
Definition: pg_crc.h:37
#define CPTRGM(a, b)
Definition: trgm.h:48

◆ createTrgmNFA()

TRGM* createTrgmNFA ( text text_re,
Oid  collation,
TrgmPackedGraph **  graph,
MemoryContext  rcontext 
)

Definition at line 521 of file trgm_regexp.c.

References ALLOCSET_DEFAULT_SIZES, AllocSetContextCreate, createTrgmNFAInternal(), CurrentMemoryContext, MemoryContextDelete(), MemoryContextSwitchTo(), PG_END_TRY, PG_FINALLY, pg_regfree(), PG_TRY, RE_compile(), REG_ADVANCED, and REG_ICASE.

Referenced by gin_extract_query_trgm(), and gtrgm_consistent().

523 {
524  TRGM *trg;
525  regex_t regex;
526  MemoryContext tmpcontext;
527  MemoryContext oldcontext;
528 
529  /*
530  * This processing generates a great deal of cruft, which we'd like to
531  * clean up before returning (since this function may be called in a
532  * query-lifespan memory context). Make a temp context we can work in so
533  * that cleanup is easy.
534  */
536  "createTrgmNFA temporary context",
538  oldcontext = MemoryContextSwitchTo(tmpcontext);
539 
540  /*
541  * Stage 1: Compile the regexp into a NFA, using the regexp library.
542  */
543 #ifdef IGNORECASE
544  RE_compile(&regex, text_re, REG_ADVANCED | REG_ICASE, collation);
545 #else
546  RE_compile(&regex, text_re, REG_ADVANCED, collation);
547 #endif
548 
549  /*
550  * Since the regexp library allocates its internal data structures with
551  * malloc, we need to use a PG_TRY block to ensure that pg_regfree() gets
552  * done even if there's an error.
553  */
554  PG_TRY();
555  {
556  trg = createTrgmNFAInternal(&regex, graph, rcontext);
557  }
558  PG_FINALLY();
559  {
560  pg_regfree(&regex);
561  }
562  PG_END_TRY();
563 
564  /* Clean up all the cruft we created */
565  MemoryContextSwitchTo(oldcontext);
566  MemoryContextDelete(tmpcontext);
567 
568  return trg;
569 }
void MemoryContextDelete(MemoryContext context)
Definition: mcxt.c:212
#define AllocSetContextCreate
Definition: memutils.h:170
static TRGM * createTrgmNFAInternal(regex_t *regex, TrgmPackedGraph **graph, MemoryContext rcontext)
Definition: trgm_regexp.c:575
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition: palloc.h:109
#define REG_ICASE
Definition: regex.h:106
Definition: trgm.h:67
#define ALLOCSET_DEFAULT_SIZES
Definition: memutils.h:192
MemoryContext CurrentMemoryContext
Definition: mcxt.c:38
#define REG_ADVANCED
Definition: regex.h:103
#define PG_FINALLY()
Definition: elog.h:326
static void RE_compile(regex_t *regex, text *text_re, int cflags, Oid collation)
Definition: trgm_regexp.c:729
#define PG_TRY()
Definition: elog.h:309
void pg_regfree(regex_t *re)
Definition: regfree.c:49
Definition: regex.h:55
#define PG_END_TRY()
Definition: elog.h:334

◆ generate_trgm()

TRGM* generate_trgm ( char *  str,
int  slen 
)

Definition at line 356 of file trgm_op.c.

References ARRKEY, CALCGTSIZE, comp_trgm(), TRGM::flag, generate_trgm_only(), GETARR, palloc(), protect_out_of_mem(), qsort, qunique(), SET_VARSIZE, and TRGMHDRSIZE.

Referenced by gin_extract_query_trgm(), gin_extract_value_trgm(), gtrgm_compress(), gtrgm_consistent(), gtrgm_distance(), show_trgm(), and similarity().

357 {
358  TRGM *trg;
359  int len;
360 
361  protect_out_of_mem(slen);
362 
363  trg = (TRGM *) palloc(TRGMHDRSIZE + sizeof(trgm) * (slen / 2 + 1) * 3);
364  trg->flag = ARRKEY;
365 
366  len = generate_trgm_only(GETARR(trg), str, slen, NULL);
367  SET_VARSIZE(trg, CALCGTSIZE(ARRKEY, len));
368 
369  if (len == 0)
370  return trg;
371 
372  /*
373  * Make trigrams unique.
374  */
375  if (len > 1)
376  {
377  qsort((void *) GETARR(trg), len, sizeof(trgm), comp_trgm);
378  len = qunique(GETARR(trg), len, sizeof(trgm), comp_trgm);
379  }
380 
381  SET_VARSIZE(trg, CALCGTSIZE(ARRKEY, len));
382 
383  return trg;
384 }
#define ARRKEY
Definition: trgm.h:97
static void protect_out_of_mem(int slen)
Definition: trgm_op.c:339
#define CALCGTSIZE(flag, siglen)
Definition: hstore_gist.c:59
#define GETARR(x)
Definition: trgm.h:107
Definition: trgm.h:67
#define TRGMHDRSIZE
Definition: trgm.h:74
uint8 flag
Definition: trgm.h:70
char trgm[3]
Definition: trgm.h:42
static size_t qunique(void *array, size_t elements, size_t width, int(*compare)(const void *, const void *))
Definition: qunique.h:21
void * palloc(Size size)
Definition: mcxt.c:950
#define SET_VARSIZE(PTR, len)
Definition: postgres.h:329
#define qsort(a, b, c, d)
Definition: port.h:497
static int generate_trgm_only(trgm *trg, char *str, int slen, TrgmBound *bounds)
Definition: trgm_op.c:275
static int comp_trgm(const void *a, const void *b)
Definition: trgm_op.c:161

◆ generate_wildcard_trgm()

TRGM* generate_wildcard_trgm ( const char *  str,
int  slen 
)

Definition at line 866 of file trgm_op.c.

References ARRKEY, buf, CALCGTSIZE, comp_trgm(), TRGM::flag, get_wildcard_part(), GETARR, lowerstr_with_len(), LPADDING, make_trigrams(), palloc(), pfree(), protect_out_of_mem(), qsort, qunique(), RPADDING, SET_VARSIZE, generate_unaccent_rules::str, and TRGMHDRSIZE.

Referenced by gin_extract_query_trgm(), and gtrgm_consistent().

867 {
868  TRGM *trg;
869  char *buf,
870  *buf2;
871  trgm *tptr;
872  int len,
873  charlen,
874  bytelen;
875  const char *eword;
876 
877  protect_out_of_mem(slen);
878 
879  trg = (TRGM *) palloc(TRGMHDRSIZE + sizeof(trgm) * (slen / 2 + 1) * 3);
880  trg->flag = ARRKEY;
881  SET_VARSIZE(trg, TRGMHDRSIZE);
882 
883  if (slen + LPADDING + RPADDING < 3 || slen == 0)
884  return trg;
885 
886  tptr = GETARR(trg);
887 
888  /* Allocate a buffer for blank-padded, but not yet case-folded, words */
889  buf = palloc(sizeof(char) * (slen + 4));
890 
891  /*
892  * Extract trigrams from each substring extracted by get_wildcard_part.
893  */
894  eword = str;
895  while ((eword = get_wildcard_part(eword, slen - (eword - str),
896  buf, &bytelen, &charlen)) != NULL)
897  {
898 #ifdef IGNORECASE
899  buf2 = lowerstr_with_len(buf, bytelen);
900  bytelen = strlen(buf2);
901 #else
902  buf2 = buf;
903 #endif
904 
905  /*
906  * count trigrams
907  */
908  tptr = make_trigrams(tptr, buf2, bytelen, charlen);
909 
910 #ifdef IGNORECASE
911  pfree(buf2);
912 #endif
913  }
914 
915  pfree(buf);
916 
917  if ((len = tptr - GETARR(trg)) == 0)
918  return trg;
919 
920  /*
921  * Make trigrams unique.
922  */
923  if (len > 1)
924  {
925  qsort((void *) GETARR(trg), len, sizeof(trgm), comp_trgm);
926  len = qunique(GETARR(trg), len, sizeof(trgm), comp_trgm);
927  }
928 
929  SET_VARSIZE(trg, CALCGTSIZE(ARRKEY, len));
930 
931  return trg;
932 }
char * lowerstr_with_len(const char *str, int len)
Definition: ts_locale.c:257
#define ARRKEY
Definition: trgm.h:97
static void protect_out_of_mem(int slen)
Definition: trgm_op.c:339
static trgm * make_trigrams(trgm *tptr, char *str, int bytelen, int charlen)
Definition: trgm_op.c:223
#define CALCGTSIZE(flag, siglen)
Definition: hstore_gist.c:59
#define GETARR(x)
Definition: trgm.h:107
void pfree(void *pointer)
Definition: mcxt.c:1057
Definition: trgm.h:67
static char * buf
Definition: pg_test_fsync.c:68
#define TRGMHDRSIZE
Definition: trgm.h:74
uint8 flag
Definition: trgm.h:70
char trgm[3]
Definition: trgm.h:42
static size_t qunique(void *array, size_t elements, size_t width, int(*compare)(const void *, const void *))
Definition: qunique.h:21
#define RPADDING
Definition: trgm.h:17
void * palloc(Size size)
Definition: mcxt.c:950
#define LPADDING
Definition: trgm.h:16
static const char * get_wildcard_part(const char *str, int lenstr, char *buf, int *bytelen, int *charlen)
Definition: trgm_op.c:721
#define SET_VARSIZE(PTR, len)
Definition: postgres.h:329
#define qsort(a, b, c, d)
Definition: port.h:497
static int comp_trgm(const void *a, const void *b)
Definition: trgm_op.c:161

◆ index_strategy_get_limit()

double index_strategy_get_limit ( StrategyNumber  strategy)

Definition at line 132 of file trgm_op.c.

References elog, ERROR, similarity_threshold, SimilarityStrategyNumber, strict_word_similarity_threshold, StrictWordSimilarityStrategyNumber, word_similarity_threshold, and WordSimilarityStrategyNumber.

Referenced by gin_trgm_consistent(), gin_trgm_triconsistent(), and gtrgm_consistent().

133 {
134  switch (strategy)
135  {
137  return similarity_threshold;
142  default:
143  elog(ERROR, "unrecognized strategy number: %d", strategy);
144  break;
145  }
146 
147  return 0.0; /* keep compiler quiet */
148 }
#define SimilarityStrategyNumber
Definition: trgm.h:30
#define StrictWordSimilarityStrategyNumber
Definition: trgm.h:38
#define ERROR
Definition: elog.h:43
double strict_word_similarity_threshold
Definition: trgm_op.c:21
#define WordSimilarityStrategyNumber
Definition: trgm.h:36
double word_similarity_threshold
Definition: trgm_op.c:20
#define elog(elevel,...)
Definition: elog.h:228
double similarity_threshold
Definition: trgm_op.c:19

◆ trgm2int()

uint32 trgm2int ( trgm ptr)

Definition at line 935 of file trgm_op.c.

References val.

Referenced by gin_extract_query_trgm(), gin_extract_value_trgm(), and show_trgm().

936 {
937  uint32 val = 0;
938 
939  val |= *(((unsigned char *) ptr));
940  val <<= 8;
941  val |= *(((unsigned char *) ptr) + 1);
942  val <<= 8;
943  val |= *(((unsigned char *) ptr) + 2);
944 
945  return val;
946 }
unsigned int uint32
Definition: c.h:429
long val
Definition: informix.c:664

◆ trgm_contained_by()

bool trgm_contained_by ( TRGM trg1,
TRGM trg2 
)

Definition at line 1044 of file trgm_op.c.

References ARRNELEM, CMPTRGM, and GETARR.

Referenced by gtrgm_consistent().

1045 {
1046  trgm *ptr1,
1047  *ptr2;
1048  int len1,
1049  len2;
1050 
1051  ptr1 = GETARR(trg1);
1052  ptr2 = GETARR(trg2);
1053 
1054  len1 = ARRNELEM(trg1);
1055  len2 = ARRNELEM(trg2);
1056 
1057  while (ptr1 - GETARR(trg1) < len1 && ptr2 - GETARR(trg2) < len2)
1058  {
1059  int res = CMPTRGM(ptr1, ptr2);
1060 
1061  if (res < 0)
1062  return false;
1063  else if (res > 0)
1064  ptr2++;
1065  else
1066  {
1067  ptr1++;
1068  ptr2++;
1069  }
1070  }
1071  if (ptr1 - GETARR(trg1) < len1)
1072  return false;
1073  else
1074  return true;
1075 }
#define CMPTRGM(a, b)
Definition: trgm.h:46
#define ARRNELEM(x)
Definition: trgm.h:108
#define GETARR(x)
Definition: trgm.h:107
char trgm[3]
Definition: trgm.h:42

◆ trgm_presence_map()

bool* trgm_presence_map ( TRGM query,
TRGM key 
)

Definition at line 1083 of file trgm_op.c.

References ARRNELEM, CMPTRGM, GETARR, i, and palloc0().

Referenced by gtrgm_consistent().

1084 {
1085  bool *result;
1086  trgm *ptrq = GETARR(query),
1087  *ptrk = GETARR(key);
1088  int lenq = ARRNELEM(query),
1089  lenk = ARRNELEM(key),
1090  i;
1091 
1092  result = (bool *) palloc0(lenq * sizeof(bool));
1093 
1094  /* for each query trigram, do a binary search in the key array */
1095  for (i = 0; i < lenq; i++)
1096  {
1097  int lo = 0;
1098  int hi = lenk;
1099 
1100  while (lo < hi)
1101  {
1102  int mid = (lo + hi) / 2;
1103  int res = CMPTRGM(ptrq, ptrk + mid);
1104 
1105  if (res < 0)
1106  hi = mid;
1107  else if (res > 0)
1108  lo = mid + 1;
1109  else
1110  {
1111  result[i] = true;
1112  break;
1113  }
1114  }
1115  ptrq++;
1116  }
1117 
1118  return result;
1119 }
#define CMPTRGM(a, b)
Definition: trgm.h:46
#define ARRNELEM(x)
Definition: trgm.h:108
#define GETARR(x)
Definition: trgm.h:107
void * palloc0(Size size)
Definition: mcxt.c:981
char trgm[3]
Definition: trgm.h:42
int i

◆ trigramsMatchGraph()

bool trigramsMatchGraph ( TrgmPackedGraph graph,
bool check 
)

Definition at line 636 of file trgm_regexp.c.

References TrgmPackedState::arcs, TrgmPackedState::arcsCount, TrgmPackedArc::colorTrgm, TrgmPackedGraph::colorTrigramGroups, TrgmPackedGraph::colorTrigramsActive, TrgmPackedGraph::colorTrigramsCount, i, TrgmPackedGraph::states, TrgmPackedGraph::statesActive, TrgmPackedGraph::statesCount, TrgmPackedGraph::statesQueue, and TrgmPackedArc::targetState.

Referenced by gin_trgm_consistent(), gin_trgm_triconsistent(), and gtrgm_consistent().

637 {
638  int i,
639  j,
640  k,
641  queueIn,
642  queueOut;
643 
644  /*
645  * Reset temporary working areas.
646  */
647  memset(graph->colorTrigramsActive, 0,
648  sizeof(bool) * graph->colorTrigramsCount);
649  memset(graph->statesActive, 0, sizeof(bool) * graph->statesCount);
650 
651  /*
652  * Check which color trigrams were matched. A match for any simple
653  * trigram associated with a color trigram counts as a match of the color
654  * trigram.
655  */
656  j = 0;
657  for (i = 0; i < graph->colorTrigramsCount; i++)
658  {
659  int cnt = graph->colorTrigramGroups[i];
660 
661  for (k = j; k < j + cnt; k++)
662  {
663  if (check[k])
664  {
665  /*
666  * Found one matched trigram in the group. Can skip the rest
667  * of them and go to the next group.
668  */
669  graph->colorTrigramsActive[i] = true;
670  break;
671  }
672  }
673  j = j + cnt;
674  }
675 
676  /*
677  * Initialize the statesQueue to hold just the initial state. Note:
678  * statesQueue has room for statesCount entries, which is certainly enough
679  * since no state will be put in the queue more than once. The
680  * statesActive array marks which states have been queued.
681  */
682  graph->statesActive[0] = true;
683  graph->statesQueue[0] = 0;
684  queueIn = 0;
685  queueOut = 1;
686 
687  /* Process queued states as long as there are any. */
688  while (queueIn < queueOut)
689  {
690  int stateno = graph->statesQueue[queueIn++];
691  TrgmPackedState *state = &graph->states[stateno];
692  int cnt = state->arcsCount;
693 
694  /* Loop over state's out-arcs */
695  for (i = 0; i < cnt; i++)
696  {
697  TrgmPackedArc *arc = &state->arcs[i];
698 
699  /*
700  * If corresponding color trigram is present then activate the
701  * corresponding state. We're done if that's the final state,
702  * otherwise queue the state if it's not been queued already.
703  */
704  if (graph->colorTrigramsActive[arc->colorTrgm])
705  {
706  int nextstate = arc->targetState;
707 
708  if (nextstate == 1)
709  return true; /* success: final state is reachable */
710 
711  if (!graph->statesActive[nextstate])
712  {
713  graph->statesActive[nextstate] = true;
714  graph->statesQueue[queueOut++] = nextstate;
715  }
716  }
717  }
718  }
719 
720  /* Queue is empty, so match fails. */
721  return false;
722 }
bool * colorTrigramsActive
Definition: trgm_regexp.c:461
Definition: regguts.h:276
TrgmPackedState * states
Definition: trgm_regexp.c:458
TrgmPackedArc * arcs
Definition: trgm_regexp.c:435
bool * statesActive
Definition: trgm_regexp.c:462
Definition: regguts.h:298
int i
int * colorTrigramGroups
Definition: trgm_regexp.c:451

Variable Documentation

◆ similarity_threshold

double similarity_threshold

Definition at line 19 of file trgm_op.c.

Referenced by _PG_init(), index_strategy_get_limit(), set_limit(), show_limit(), and similarity_op().

◆ strict_word_similarity_threshold

double strict_word_similarity_threshold

◆ word_similarity_threshold

double word_similarity_threshold