PostgreSQL Source Code  git master
trgm.h File Reference
#include "access/gist.h"
#include "access/itup.h"
#include "access/stratnum.h"
#include "storage/bufpage.h"
Include dependency graph for trgm.h:
This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Data Structures

struct  TRGM
 

Macros

#define LPADDING   2
 
#define RPADDING   1
 
#define KEEPONLYALNUM
 
#define IGNORECASE
 
#define DIVUNION
 
#define SimilarityStrategyNumber   1
 
#define DistanceStrategyNumber   2
 
#define LikeStrategyNumber   3
 
#define ILikeStrategyNumber   4
 
#define RegExpStrategyNumber   5
 
#define RegExpICaseStrategyNumber   6
 
#define WordSimilarityStrategyNumber   7
 
#define WordDistanceStrategyNumber   8
 
#define StrictWordSimilarityStrategyNumber   9
 
#define StrictWordDistanceStrategyNumber   10
 
#define EqualStrategyNumber   11
 
#define CMPCHAR(a, b)   ( ((a)==(b)) ? 0 : ( ((a)<(b)) ? -1 : 1 ) )
 
#define CMPPCHAR(a, b, i)   CMPCHAR( *(((const char*)(a))+i), *(((const char*)(b))+i) )
 
#define CMPTRGM(a, b)   ( CMPPCHAR(a,b,0) ? CMPPCHAR(a,b,0) : ( CMPPCHAR(a,b,1) ? CMPPCHAR(a,b,1) : CMPPCHAR(a,b,2) ) )
 
#define CPTRGM(a, b)
 
#define ISWORDCHR(c)   (t_isalpha(c) || t_isdigit(c))
 
#define ISPRINTABLECHAR(a)   ( isascii( *(unsigned char*)(a) ) && (isalnum( *(unsigned char*)(a) ) || *(unsigned char*)(a)==' ') )
 
#define ISPRINTABLETRGM(t)   ( ISPRINTABLECHAR( ((char*)(t)) ) && ISPRINTABLECHAR( ((char*)(t))+1 ) && ISPRINTABLECHAR( ((char*)(t))+2 ) )
 
#define ISESCAPECHAR(x)   (*(x) == '\\') /* Wildcard escape character */
 
#define ISWILDCARDCHAR(x)
 
#define TRGMHDRSIZE   (VARHDRSZ + sizeof(uint8))
 
#define SIGLEN_DEFAULT   (sizeof(int) * 3)
 
#define SIGLEN_MAX   GISTMaxIndexKeySize
 
#define BITBYTE   8
 
#define SIGLENBIT(siglen)   ((siglen) * BITBYTE - 1) /* see makesign */
 
#define LOOPBYTE(siglen)    for (i = 0; i < (siglen); i++)
 
#define GETBYTE(x, i)   ( *( (BITVECP)(x) + (int)( (i) / BITBYTE ) ) )
 
#define GETBITBYTE(x, i)   ( (((char)(x)) >> (i)) & 0x01 )
 
#define CLRBIT(x, i)   GETBYTE(x,i) &= ~( 0x01 << ( (i) % BITBYTE ) )
 
#define SETBIT(x, i)   GETBYTE(x,i) |= ( 0x01 << ( (i) % BITBYTE ) )
 
#define GETBIT(x, i)   ( (GETBYTE(x,i) >> ( (i) % BITBYTE )) & 0x01 )
 
#define HASHVAL(val, siglen)   (((unsigned int)(val)) % SIGLENBIT(siglen))
 
#define HASH(sign, val, siglen)   SETBIT((sign), HASHVAL(val, siglen))
 
#define ARRKEY   0x01
 
#define SIGNKEY   0x02
 
#define ALLISTRUE   0x04
 
#define ISARRKEY(x)   ( ((TRGM*)x)->flag & ARRKEY )
 
#define ISSIGNKEY(x)   ( ((TRGM*)x)->flag & SIGNKEY )
 
#define ISALLTRUE(x)   ( ((TRGM*)x)->flag & ALLISTRUE )
 
#define CALCGTSIZE(flag, len)   ( TRGMHDRSIZE + ( ( (flag) & ARRKEY ) ? ((len)*sizeof(trgm)) : (((flag) & ALLISTRUE) ? 0 : (len)) ) )
 
#define GETSIGN(x)   ( (BITVECP)( (char*)x+TRGMHDRSIZE ) )
 
#define GETARR(x)   ( (trgm*)( (char*)x+TRGMHDRSIZE ) )
 
#define ARRNELEM(x)   ( ( VARSIZE(x) - TRGMHDRSIZE )/sizeof(trgm) )
 
#define CALCSML(count, len1, len2)   ((float4) (count)) / ((float4) ((len1) + (len2) - (count)))
 

Typedefs

typedef char trgm[3]
 
typedef char * BITVECP
 
typedef struct TrgmPackedGraph TrgmPackedGraph
 

Functions

double index_strategy_get_limit (StrategyNumber strategy)
 
uint32 trgm2int (trgm *ptr)
 
void compact_trigram (trgm *tptr, char *str, int bytelen)
 
TRGMgenerate_trgm (char *str, int slen)
 
TRGMgenerate_wildcard_trgm (const char *str, int slen)
 
float4 cnt_sml (TRGM *trg1, TRGM *trg2, bool inexact)
 
bool trgm_contained_by (TRGM *trg1, TRGM *trg2)
 
booltrgm_presence_map (TRGM *query, TRGM *key)
 
TRGMcreateTrgmNFA (text *text_re, Oid collation, TrgmPackedGraph **graph, MemoryContext rcontext)
 
bool trigramsMatchGraph (TrgmPackedGraph *graph, bool *check)
 

Variables

double similarity_threshold
 
double word_similarity_threshold
 
double strict_word_similarity_threshold
 

Macro Definition Documentation

◆ ALLISTRUE

#define ALLISTRUE   0x04

Definition at line 98 of file trgm.h.

◆ ARRKEY

#define ARRKEY   0x01

Definition at line 96 of file trgm.h.

◆ ARRNELEM

#define ARRNELEM (   x)    ( ( VARSIZE(x) - TRGMHDRSIZE )/sizeof(trgm) )

Definition at line 107 of file trgm.h.

◆ BITBYTE

#define BITBYTE   8

Definition at line 78 of file trgm.h.

◆ CALCGTSIZE

#define CALCGTSIZE (   flag,
  len 
)    ( TRGMHDRSIZE + ( ( (flag) & ARRKEY ) ? ((len)*sizeof(trgm)) : (((flag) & ALLISTRUE) ? 0 : (len)) ) )

Definition at line 104 of file trgm.h.

◆ CALCSML

#define CALCSML (   count,
  len1,
  len2 
)    ((float4) (count)) / ((float4) ((len1) + (len2) - (count)))

Definition at line 116 of file trgm.h.

◆ CLRBIT

#define CLRBIT (   x,
  i 
)    GETBYTE(x,i) &= ~( 0x01 << ( (i) % BITBYTE ) )

Definition at line 89 of file trgm.h.

◆ CMPCHAR

#define CMPCHAR (   a,
  b 
)    ( ((a)==(b)) ? 0 : ( ((a)<(b)) ? -1 : 1 ) )

Definition at line 44 of file trgm.h.

◆ CMPPCHAR

#define CMPPCHAR (   a,
  b,
  i 
)    CMPCHAR( *(((const char*)(a))+i), *(((const char*)(b))+i) )

Definition at line 45 of file trgm.h.

◆ CMPTRGM

#define CMPTRGM (   a,
  b 
)    ( CMPPCHAR(a,b,0) ? CMPPCHAR(a,b,0) : ( CMPPCHAR(a,b,1) ? CMPPCHAR(a,b,1) : CMPPCHAR(a,b,2) ) )

Definition at line 46 of file trgm.h.

◆ CPTRGM

#define CPTRGM (   a,
  b 
)
Value:
do { \
*(((char*)(a))+0) = *(((char*)(b))+0); \
*(((char*)(a))+1) = *(((char*)(b))+1); \
*(((char*)(a))+2) = *(((char*)(b))+2); \
} while(0)
int b
Definition: isn.c:70
int a
Definition: isn.c:69

Definition at line 48 of file trgm.h.

◆ DistanceStrategyNumber

#define DistanceStrategyNumber   2

Definition at line 31 of file trgm.h.

◆ DIVUNION

#define DIVUNION

Definition at line 27 of file trgm.h.

◆ EqualStrategyNumber

#define EqualStrategyNumber   11

Definition at line 40 of file trgm.h.

◆ GETARR

#define GETARR (   x)    ( (trgm*)( (char*)x+TRGMHDRSIZE ) )

Definition at line 106 of file trgm.h.

◆ GETBIT

#define GETBIT (   x,
  i 
)    ( (GETBYTE(x,i) >> ( (i) % BITBYTE )) & 0x01 )

Definition at line 91 of file trgm.h.

◆ GETBITBYTE

#define GETBITBYTE (   x,
  i 
)    ( (((char)(x)) >> (i)) & 0x01 )

Definition at line 88 of file trgm.h.

◆ GETBYTE

#define GETBYTE (   x,
  i 
)    ( *( (BITVECP)(x) + (int)( (i) / BITBYTE ) ) )

Definition at line 87 of file trgm.h.

◆ GETSIGN

#define GETSIGN (   x)    ( (BITVECP)( (char*)x+TRGMHDRSIZE ) )

Definition at line 105 of file trgm.h.

◆ HASH

#define HASH (   sign,
  val,
  siglen 
)    SETBIT((sign), HASHVAL(val, siglen))

Definition at line 94 of file trgm.h.

◆ HASHVAL

#define HASHVAL (   val,
  siglen 
)    (((unsigned int)(val)) % SIGLENBIT(siglen))

Definition at line 93 of file trgm.h.

◆ IGNORECASE

#define IGNORECASE

Definition at line 26 of file trgm.h.

◆ ILikeStrategyNumber

#define ILikeStrategyNumber   4

Definition at line 33 of file trgm.h.

◆ ISALLTRUE

#define ISALLTRUE (   x)    ( ((TRGM*)x)->flag & ALLISTRUE )

Definition at line 102 of file trgm.h.

◆ ISARRKEY

#define ISARRKEY (   x)    ( ((TRGM*)x)->flag & ARRKEY )

Definition at line 100 of file trgm.h.

◆ ISESCAPECHAR

#define ISESCAPECHAR (   x)    (*(x) == '\\') /* Wildcard escape character */

Definition at line 63 of file trgm.h.

◆ ISPRINTABLECHAR

#define ISPRINTABLECHAR (   a)    ( isascii( *(unsigned char*)(a) ) && (isalnum( *(unsigned char*)(a) ) || *(unsigned char*)(a)==' ') )

Definition at line 56 of file trgm.h.

◆ ISPRINTABLETRGM

#define ISPRINTABLETRGM (   t)    ( ISPRINTABLECHAR( ((char*)(t)) ) && ISPRINTABLECHAR( ((char*)(t))+1 ) && ISPRINTABLECHAR( ((char*)(t))+2 ) )

Definition at line 61 of file trgm.h.

◆ ISSIGNKEY

#define ISSIGNKEY (   x)    ( ((TRGM*)x)->flag & SIGNKEY )

Definition at line 101 of file trgm.h.

◆ ISWILDCARDCHAR

#define ISWILDCARDCHAR (   x)
Value:
(*(x) == '_' || *(x) == '%') /* Wildcard
* meta-character */
int x
Definition: isn.c:71

Definition at line 64 of file trgm.h.

◆ ISWORDCHR

#define ISWORDCHR (   c)    (t_isalpha(c) || t_isdigit(c))

Definition at line 55 of file trgm.h.

◆ KEEPONLYALNUM

#define KEEPONLYALNUM

Definition at line 18 of file trgm.h.

◆ LikeStrategyNumber

#define LikeStrategyNumber   3

Definition at line 32 of file trgm.h.

◆ LOOPBYTE

#define LOOPBYTE (   siglen)     for (i = 0; i < (siglen); i++)

Definition at line 84 of file trgm.h.

◆ LPADDING

#define LPADDING   2

Definition at line 16 of file trgm.h.

◆ RegExpICaseStrategyNumber

#define RegExpICaseStrategyNumber   6

Definition at line 35 of file trgm.h.

◆ RegExpStrategyNumber

#define RegExpStrategyNumber   5

Definition at line 34 of file trgm.h.

◆ RPADDING

#define RPADDING   1

Definition at line 17 of file trgm.h.

◆ SETBIT

#define SETBIT (   x,
  i 
)    GETBYTE(x,i) |= ( 0x01 << ( (i) % BITBYTE ) )

Definition at line 90 of file trgm.h.

◆ SIGLEN_DEFAULT

#define SIGLEN_DEFAULT   (sizeof(int) * 3)

Definition at line 76 of file trgm.h.

◆ SIGLEN_MAX

#define SIGLEN_MAX   GISTMaxIndexKeySize

Definition at line 77 of file trgm.h.

◆ SIGLENBIT

#define SIGLENBIT (   siglen)    ((siglen) * BITBYTE - 1) /* see makesign */

Definition at line 80 of file trgm.h.

◆ SIGNKEY

#define SIGNKEY   0x02

Definition at line 97 of file trgm.h.

◆ SimilarityStrategyNumber

#define SimilarityStrategyNumber   1

Definition at line 30 of file trgm.h.

◆ StrictWordDistanceStrategyNumber

#define StrictWordDistanceStrategyNumber   10

Definition at line 39 of file trgm.h.

◆ StrictWordSimilarityStrategyNumber

#define StrictWordSimilarityStrategyNumber   9

Definition at line 38 of file trgm.h.

◆ TRGMHDRSIZE

#define TRGMHDRSIZE   (VARHDRSZ + sizeof(uint8))

Definition at line 73 of file trgm.h.

◆ WordDistanceStrategyNumber

#define WordDistanceStrategyNumber   8

Definition at line 37 of file trgm.h.

◆ WordSimilarityStrategyNumber

#define WordSimilarityStrategyNumber   7

Definition at line 36 of file trgm.h.

Typedef Documentation

◆ BITVECP

typedef char* BITVECP

Definition at line 82 of file trgm.h.

◆ trgm

typedef char trgm[3]

Definition at line 42 of file trgm.h.

◆ TrgmPackedGraph

Definition at line 82 of file trgm.h.

Function Documentation

◆ cnt_sml()

float4 cnt_sml ( TRGM trg1,
TRGM trg2,
bool  inexact 
)

Definition at line 996 of file trgm_op.c.

999 {
1000  trgm *ptr1,
1001  *ptr2;
1002  int count = 0;
1003  int len1,
1004  len2;
1005 
1006  ptr1 = GETARR(trg1);
1007  ptr2 = GETARR(trg2);
1008 
1009  len1 = ARRNELEM(trg1);
1010  len2 = ARRNELEM(trg2);
1011 
1012  /* explicit test is needed to avoid 0/0 division when both lengths are 0 */
1013  if (len1 <= 0 || len2 <= 0)
1014  return (float4) 0.0;
1015 
1016  while (ptr1 - GETARR(trg1) < len1 && ptr2 - GETARR(trg2) < len2)
1017  {
1018  int res = CMPTRGM(ptr1, ptr2);
1019 
1020  if (res < 0)
1021  ptr1++;
1022  else if (res > 0)
1023  ptr2++;
1024  else
1025  {
1026  ptr1++;
1027  ptr2++;
1028  count++;
1029  }
1030  }
1031 
1032  /*
1033  * If inexact then len2 is equal to count, because we don't know actual
1034  * length of second string in inexact search and we can assume that count
1035  * is a lower bound of len2.
1036  */
float float4
Definition: c.h:564
#define ARRNELEM(x)
Definition: trgm.h:107
#define CMPTRGM(a, b)
Definition: trgm.h:46
char trgm[3]
Definition: trgm.h:42
#define GETARR(x)
Definition: trgm.h:106

References ARRNELEM, CALCSML, CMPTRGM, GETARR, and res.

Referenced by gtrgm_consistent(), gtrgm_distance(), and similarity().

◆ compact_trigram()

void compact_trigram ( trgm tptr,
char *  str,
int  bytelen 
)

Definition at line 198 of file trgm_op.c.

201 {
202  if (bytelen == 3)
203  {
204  CPTRGM(tptr, str);
205  }
206  else
207  {
208  pg_crc32 crc;
209 
211  COMP_LEGACY_CRC32(crc, str, bytelen);
213 
214  /*
215  * use only 3 upper bytes from crc, hope, it's good enough hashing
216  */
217  CPTRGM(tptr, &crc);
return crc
uint32 pg_crc32
Definition: pg_crc.h:37
#define INIT_LEGACY_CRC32(crc)
Definition: pg_crc.h:79
#define COMP_LEGACY_CRC32(crc, data, len)
Definition: pg_crc.h:81
#define FIN_LEGACY_CRC32(crc)
Definition: pg_crc.h:80
#define CPTRGM(a, b)
Definition: trgm.h:48

References COMP_LEGACY_CRC32, CPTRGM, crc, FIN_LEGACY_CRC32, INIT_LEGACY_CRC32, and generate_unaccent_rules::str.

Referenced by fillTrgm(), and make_trigrams().

◆ createTrgmNFA()

TRGM* createTrgmNFA ( text text_re,
Oid  collation,
TrgmPackedGraph **  graph,
MemoryContext  rcontext 
)

Definition at line 521 of file trgm_regexp.c.

523 {
524  TRGM *trg;
525  regex_t regex;
526  MemoryContext tmpcontext;
527  MemoryContext oldcontext;
528 
529  /*
530  * This processing generates a great deal of cruft, which we'd like to
531  * clean up before returning (since this function may be called in a
532  * query-lifespan memory context). Make a temp context we can work in so
533  * that cleanup is easy.
534  */
536  "createTrgmNFA temporary context",
538  oldcontext = MemoryContextSwitchTo(tmpcontext);
539 
540  /*
541  * Stage 1: Compile the regexp into a NFA, using the regexp library.
542  */
543 #ifdef IGNORECASE
544  RE_compile(&regex, text_re,
545  REG_ADVANCED | REG_NOSUB | REG_ICASE, collation);
546 #else
547  RE_compile(&regex, text_re,
548  REG_ADVANCED | REG_NOSUB, collation);
549 #endif
550 
551  /*
552  * Since the regexp library allocates its internal data structures with
553  * malloc, we need to use a PG_TRY block to ensure that pg_regfree() gets
554  * done even if there's an error.
555  */
556  PG_TRY();
557  {
558  trg = createTrgmNFAInternal(&regex, graph, rcontext);
559  }
560  PG_FINALLY();
561  {
562  pg_regfree(&regex);
563  }
564  PG_END_TRY();
565 
566  /* Clean up all the cruft we created */
567  MemoryContextSwitchTo(oldcontext);
568  MemoryContextDelete(tmpcontext);
569 
570  return trg;
571 }
#define PG_END_TRY()
Definition: elog.h:324
#define PG_TRY()
Definition: elog.h:299
#define PG_FINALLY()
Definition: elog.h:316
MemoryContext CurrentMemoryContext
Definition: mcxt.c:42
void MemoryContextDelete(MemoryContext context)
Definition: mcxt.c:218
#define AllocSetContextCreate
Definition: memutils.h:173
#define ALLOCSET_DEFAULT_SIZES
Definition: memutils.h:195
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition: palloc.h:109
#define REG_ICASE
Definition: regex.h:106
#define REG_ADVANCED
Definition: regex.h:103
#define REG_NOSUB
Definition: regex.h:107
void pg_regfree(regex_t *re)
Definition: regfree.c:49
Definition: trgm.h:67
Definition: regex.h:56
static TRGM * createTrgmNFAInternal(regex_t *regex, TrgmPackedGraph **graph, MemoryContext rcontext)
Definition: trgm_regexp.c:577
static void RE_compile(regex_t *regex, text *text_re, int cflags, Oid collation)
Definition: trgm_regexp.c:731

References ALLOCSET_DEFAULT_SIZES, AllocSetContextCreate, createTrgmNFAInternal(), CurrentMemoryContext, MemoryContextDelete(), MemoryContextSwitchTo(), PG_END_TRY, PG_FINALLY, pg_regfree(), PG_TRY, RE_compile(), REG_ADVANCED, REG_ICASE, and REG_NOSUB.

Referenced by gin_extract_query_trgm(), and gtrgm_consistent().

◆ generate_trgm()

TRGM* generate_trgm ( char *  str,
int  slen 
)

Definition at line 356 of file trgm_op.c.

359 {
360  TRGM *trg;
361  int len;
362 
363  protect_out_of_mem(slen);
364 
365  trg = (TRGM *) palloc(TRGMHDRSIZE + sizeof(trgm) * (slen / 2 + 1) * 3);
366  trg->flag = ARRKEY;
367 
368  len = generate_trgm_only(GETARR(trg), str, slen, NULL);
370 
371  if (len == 0)
372  return trg;
373 
374  /*
375  * Make trigrams unique.
376  */
377  if (len > 1)
378  {
379  qsort((void *) GETARR(trg), len, sizeof(trgm), comp_trgm);
380  len = qunique(GETARR(trg), len, sizeof(trgm), comp_trgm);
381  }
382 
384 
#define CALCGTSIZE(flag, siglen)
Definition: hstore_gist.c:59
void * palloc(Size size)
Definition: mcxt.c:1062
const void size_t len
#define qsort(a, b, c, d)
Definition: port.h:495
#define SET_VARSIZE(PTR, len)
Definition: postgres.h:342
static size_t qunique(void *array, size_t elements, size_t width, int(*compare)(const void *, const void *))
Definition: qunique.h:21
uint8 flag
Definition: trgm.h:69
#define ARRKEY
Definition: trgm.h:96
#define TRGMHDRSIZE
Definition: trgm.h:73
static int comp_trgm(const void *a, const void *b)
Definition: trgm_op.c:161
static void protect_out_of_mem(int slen)
Definition: trgm_op.c:339
static int generate_trgm_only(trgm *trg, char *str, int slen, TrgmBound *bounds)
Definition: trgm_op.c:275

References ARRKEY, CALCGTSIZE, comp_trgm(), TRGM::flag, generate_trgm_only(), GETARR, len, palloc(), protect_out_of_mem(), qsort, qunique(), SET_VARSIZE, generate_unaccent_rules::str, and TRGMHDRSIZE.

Referenced by gin_extract_query_trgm(), gin_extract_value_trgm(), gtrgm_compress(), gtrgm_consistent(), gtrgm_distance(), show_trgm(), and similarity().

◆ generate_wildcard_trgm()

TRGM* generate_wildcard_trgm ( const char *  str,
int  slen 
)

Definition at line 866 of file trgm_op.c.

869 {
870  TRGM *trg;
871  char *buf,
872  *buf2;
873  trgm *tptr;
874  int len,
875  charlen,
876  bytelen;
877  const char *eword;
878 
879  protect_out_of_mem(slen);
880 
881  trg = (TRGM *) palloc(TRGMHDRSIZE + sizeof(trgm) * (slen / 2 + 1) * 3);
882  trg->flag = ARRKEY;
883  SET_VARSIZE(trg, TRGMHDRSIZE);
884 
885  if (slen + LPADDING + RPADDING < 3 || slen == 0)
886  return trg;
887 
888  tptr = GETARR(trg);
889 
890  /* Allocate a buffer for blank-padded, but not yet case-folded, words */
891  buf = palloc(sizeof(char) * (slen + 4));
892 
893  /*
894  * Extract trigrams from each substring extracted by get_wildcard_part.
895  */
896  eword = str;
897  while ((eword = get_wildcard_part(eword, slen - (eword - str),
898  buf, &bytelen, &charlen)) != NULL)
899  {
900 #ifdef IGNORECASE
901  buf2 = lowerstr_with_len(buf, bytelen);
902  bytelen = strlen(buf2);
903 #else
904  buf2 = buf;
905 #endif
906 
907  /*
908  * count trigrams
909  */
910  tptr = make_trigrams(tptr, buf2, bytelen, charlen);
911 
912 #ifdef IGNORECASE
913  pfree(buf2);
914 #endif
915  }
916 
917  pfree(buf);
918 
919  if ((len = tptr - GETARR(trg)) == 0)
920  return trg;
921 
922  /*
923  * Make trigrams unique.
924  */
925  if (len > 1)
926  {
927  qsort((void *) GETARR(trg), len, sizeof(trgm), comp_trgm);
928  len = qunique(GETARR(trg), len, sizeof(trgm), comp_trgm);
929  }
930 
932 
void pfree(void *pointer)
Definition: mcxt.c:1169
static char * buf
Definition: pg_test_fsync.c:70
#define RPADDING
Definition: trgm.h:17
#define LPADDING
Definition: trgm.h:16
static const char * get_wildcard_part(const char *str, int lenstr, char *buf, int *bytelen, int *charlen)
Definition: trgm_op.c:721
static trgm * make_trigrams(trgm *tptr, char *str, int bytelen, int charlen)
Definition: trgm_op.c:223
char * lowerstr_with_len(const char *str, int len)
Definition: ts_locale.c:257

References ARRKEY, buf, CALCGTSIZE, comp_trgm(), TRGM::flag, get_wildcard_part(), GETARR, len, lowerstr_with_len(), LPADDING, make_trigrams(), palloc(), pfree(), protect_out_of_mem(), qsort, qunique(), RPADDING, SET_VARSIZE, generate_unaccent_rules::str, and TRGMHDRSIZE.

Referenced by gin_extract_query_trgm(), and gtrgm_consistent().

◆ index_strategy_get_limit()

double index_strategy_get_limit ( StrategyNumber  strategy)

Definition at line 132 of file trgm_op.c.

135 {
136  switch (strategy)
137  {
139  return similarity_threshold;
144  default:
145  elog(ERROR, "unrecognized strategy number: %d", strategy);
146  break;
147  }
148 
#define ERROR
Definition: elog.h:33
#define elog(elevel,...)
Definition: elog.h:218
#define WordSimilarityStrategyNumber
Definition: trgm.h:36
#define StrictWordSimilarityStrategyNumber
Definition: trgm.h:38
#define SimilarityStrategyNumber
Definition: trgm.h:30
double strict_word_similarity_threshold
Definition: trgm_op.c:21
double word_similarity_threshold
Definition: trgm_op.c:20
double similarity_threshold
Definition: trgm_op.c:19

References elog, ERROR, similarity_threshold, SimilarityStrategyNumber, strict_word_similarity_threshold, StrictWordSimilarityStrategyNumber, word_similarity_threshold, and WordSimilarityStrategyNumber.

Referenced by gin_trgm_consistent(), gin_trgm_triconsistent(), and gtrgm_consistent().

◆ trgm2int()

uint32 trgm2int ( trgm ptr)

Definition at line 935 of file trgm_op.c.

938 {
939  uint32 val = 0;
940 
941  val |= *(((unsigned char *) ptr));
942  val <<= 8;
943  val |= *(((unsigned char *) ptr) + 1);
944  val <<= 8;
945  val |= *(((unsigned char *) ptr) + 2);
946 
unsigned int uint32
Definition: c.h:441
long val
Definition: informix.c:664

References val.

Referenced by gin_extract_query_trgm(), gin_extract_value_trgm(), and show_trgm().

◆ trgm_contained_by()

bool trgm_contained_by ( TRGM trg1,
TRGM trg2 
)

Definition at line 1044 of file trgm_op.c.

1047 {
1048  trgm *ptr1,
1049  *ptr2;
1050  int len1,
1051  len2;
1052 
1053  ptr1 = GETARR(trg1);
1054  ptr2 = GETARR(trg2);
1055 
1056  len1 = ARRNELEM(trg1);
1057  len2 = ARRNELEM(trg2);
1058 
1059  while (ptr1 - GETARR(trg1) < len1 && ptr2 - GETARR(trg2) < len2)
1060  {
1061  int res = CMPTRGM(ptr1, ptr2);
1062 
1063  if (res < 0)
1064  return false;
1065  else if (res > 0)
1066  ptr2++;
1067  else
1068  {
1069  ptr1++;
1070  ptr2++;
1071  }
1072  }
1073  if (ptr1 - GETARR(trg1) < len1)
1074  return false;
1075  else

References ARRNELEM, CMPTRGM, GETARR, and res.

Referenced by gtrgm_consistent().

◆ trgm_presence_map()

bool* trgm_presence_map ( TRGM query,
TRGM key 
)

Definition at line 1083 of file trgm_op.c.

1086 {
1087  bool *result;
1088  trgm *ptrq = GETARR(query),
1089  *ptrk = GETARR(key);
1090  int lenq = ARRNELEM(query),
1091  lenk = ARRNELEM(key),
1092  i;
1093 
1094  result = (bool *) palloc0(lenq * sizeof(bool));
1095 
1096  /* for each query trigram, do a binary search in the key array */
1097  for (i = 0; i < lenq; i++)
1098  {
1099  int lo = 0;
1100  int hi = lenk;
1101 
1102  while (lo < hi)
1103  {
1104  int mid = (lo + hi) / 2;
1105  int res = CMPTRGM(ptrq, ptrk + mid);
1106 
1107  if (res < 0)
1108  hi = mid;
1109  else if (res > 0)
1110  lo = mid + 1;
1111  else
1112  {
1113  result[i] = true;
1114  break;
1115  }
1116  }
1117  ptrq++;
1118  }
1119 
int i
Definition: isn.c:73
void * palloc0(Size size)
Definition: mcxt.c:1093

References ARRNELEM, CMPTRGM, GETARR, i, sort-test::key, palloc0(), and res.

Referenced by gtrgm_consistent().

◆ trigramsMatchGraph()

bool trigramsMatchGraph ( TrgmPackedGraph graph,
bool check 
)

Definition at line 638 of file trgm_regexp.c.

639 {
640  int i,
641  j,
642  k,
643  queueIn,
644  queueOut;
645 
646  /*
647  * Reset temporary working areas.
648  */
649  memset(graph->colorTrigramsActive, 0,
650  sizeof(bool) * graph->colorTrigramsCount);
651  memset(graph->statesActive, 0, sizeof(bool) * graph->statesCount);
652 
653  /*
654  * Check which color trigrams were matched. A match for any simple
655  * trigram associated with a color trigram counts as a match of the color
656  * trigram.
657  */
658  j = 0;
659  for (i = 0; i < graph->colorTrigramsCount; i++)
660  {
661  int cnt = graph->colorTrigramGroups[i];
662 
663  for (k = j; k < j + cnt; k++)
664  {
665  if (check[k])
666  {
667  /*
668  * Found one matched trigram in the group. Can skip the rest
669  * of them and go to the next group.
670  */
671  graph->colorTrigramsActive[i] = true;
672  break;
673  }
674  }
675  j = j + cnt;
676  }
677 
678  /*
679  * Initialize the statesQueue to hold just the initial state. Note:
680  * statesQueue has room for statesCount entries, which is certainly enough
681  * since no state will be put in the queue more than once. The
682  * statesActive array marks which states have been queued.
683  */
684  graph->statesActive[0] = true;
685  graph->statesQueue[0] = 0;
686  queueIn = 0;
687  queueOut = 1;
688 
689  /* Process queued states as long as there are any. */
690  while (queueIn < queueOut)
691  {
692  int stateno = graph->statesQueue[queueIn++];
693  TrgmPackedState *state = &graph->states[stateno];
694  int cnt = state->arcsCount;
695 
696  /* Loop over state's out-arcs */
697  for (i = 0; i < cnt; i++)
698  {
699  TrgmPackedArc *arc = &state->arcs[i];
700 
701  /*
702  * If corresponding color trigram is present then activate the
703  * corresponding state. We're done if that's the final state,
704  * otherwise queue the state if it's not been queued already.
705  */
706  if (graph->colorTrigramsActive[arc->colorTrgm])
707  {
708  int nextstate = arc->targetState;
709 
710  if (nextstate == 1)
711  return true; /* success: final state is reachable */
712 
713  if (!graph->statesActive[nextstate])
714  {
715  graph->statesActive[nextstate] = true;
716  graph->statesQueue[queueOut++] = nextstate;
717  }
718  }
719  }
720  }
721 
722  /* Queue is empty, so match fails. */
723  return false;
724 }
int j
Definition: isn.c:74
TrgmPackedState * states
Definition: trgm_regexp.c:458
bool * statesActive
Definition: trgm_regexp.c:462
bool * colorTrigramsActive
Definition: trgm_regexp.c:461
int * colorTrigramGroups
Definition: trgm_regexp.c:451
Definition: regguts.h:291
Definition: regguts.h:318

References TrgmPackedGraph::colorTrigramGroups, TrgmPackedGraph::colorTrigramsActive, TrgmPackedGraph::colorTrigramsCount, i, j, TrgmPackedGraph::states, TrgmPackedGraph::statesActive, TrgmPackedGraph::statesCount, and TrgmPackedGraph::statesQueue.

Referenced by gin_trgm_consistent(), gin_trgm_triconsistent(), and gtrgm_consistent().

Variable Documentation

◆ similarity_threshold

double similarity_threshold
extern

Definition at line 19 of file trgm_op.c.

Referenced by _PG_init(), index_strategy_get_limit(), set_limit(), show_limit(), and similarity_op().

◆ strict_word_similarity_threshold

double strict_word_similarity_threshold
extern

◆ word_similarity_threshold

double word_similarity_threshold
extern