PostgreSQL Source Code  git master
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros
trgm.h
Go to the documentation of this file.
1 /*
2  * contrib/pg_trgm/trgm.h
3  */
4 #ifndef __TRGM_H__
5 #define __TRGM_H__
6 
7 #include "access/gist.h"
8 #include "access/itup.h"
9 #include "storage/bufpage.h"
10 
11 /*
12  * Options ... but note that trgm_regexp.c effectively assumes these values
13  * of LPADDING and RPADDING.
14  */
15 #define LPADDING 2
16 #define RPADDING 1
17 #define KEEPONLYALNUM
18 /*
19  * Caution: IGNORECASE macro means that trigrams are case-insensitive.
20  * If this macro is disabled, the ~* and ~~* operators must be removed from
21  * the operator classes, because we can't handle case-insensitive wildcard
22  * search with case-sensitive trigrams. Failure to do this will result in
23  * "cannot handle ~*(~~*) with case-sensitive trigrams" errors.
24  */
25 #define IGNORECASE
26 #define DIVUNION
27 
28 /* operator strategy numbers */
29 #define SimilarityStrategyNumber 1
30 #define DistanceStrategyNumber 2
31 #define LikeStrategyNumber 3
32 #define ILikeStrategyNumber 4
33 #define RegExpStrategyNumber 5
34 #define RegExpICaseStrategyNumber 6
35 #define WordSimilarityStrategyNumber 7
36 #define WordDistanceStrategyNumber 8
37 
38 typedef char trgm[3];
39 
40 #define CMPCHAR(a,b) ( ((a)==(b)) ? 0 : ( ((a)<(b)) ? -1 : 1 ) )
41 #define CMPPCHAR(a,b,i) CMPCHAR( *(((const char*)(a))+i), *(((const char*)(b))+i) )
42 #define CMPTRGM(a,b) ( CMPPCHAR(a,b,0) ? CMPPCHAR(a,b,0) : ( CMPPCHAR(a,b,1) ? CMPPCHAR(a,b,1) : CMPPCHAR(a,b,2) ) )
43 
44 #define CPTRGM(a,b) do { \
45  *(((char*)(a))+0) = *(((char*)(b))+0); \
46  *(((char*)(a))+1) = *(((char*)(b))+1); \
47  *(((char*)(a))+2) = *(((char*)(b))+2); \
48 } while(0);
49 
50 #ifdef KEEPONLYALNUM
51 #define ISWORDCHR(c) (t_isalpha(c) || t_isdigit(c))
52 #define ISPRINTABLECHAR(a) ( isascii( *(unsigned char*)(a) ) && (isalnum( *(unsigned char*)(a) ) || *(unsigned char*)(a)==' ') )
53 #else
54 #define ISWORDCHR(c) (!t_isspace(c))
55 #define ISPRINTABLECHAR(a) ( isascii( *(unsigned char*)(a) ) && isprint( *(unsigned char*)(a) ) )
56 #endif
57 #define ISPRINTABLETRGM(t) ( ISPRINTABLECHAR( ((char*)(t)) ) && ISPRINTABLECHAR( ((char*)(t))+1 ) && ISPRINTABLECHAR( ((char*)(t))+2 ) )
58 
59 #define ISESCAPECHAR(x) (*(x) == '\\') /* Wildcard escape character */
60 #define ISWILDCARDCHAR(x) (*(x) == '_' || *(x) == '%') /* Wildcard
61  * meta-character */
62 
63 typedef struct
64 {
65  int32 vl_len_; /* varlena header (do not touch directly!) */
67  char data[FLEXIBLE_ARRAY_MEMBER];
68 } TRGM;
69 
70 #define TRGMHDRSIZE (VARHDRSZ + sizeof(uint8))
71 
72 /* gist */
73 #define BITBYTE 8
74 #define SIGLENINT 3 /* >122 => key will toast, so very slow!!! */
75 #define SIGLEN ( sizeof(int)*SIGLENINT )
76 
77 #define SIGLENBIT (SIGLEN*BITBYTE - 1) /* see makesign */
78 
79 typedef char BITVEC[SIGLEN];
80 typedef char *BITVECP;
81 
82 #define LOOPBYTE \
83  for(i=0;i<SIGLEN;i++)
84 
85 #define GETBYTE(x,i) ( *( (BITVECP)(x) + (int)( (i) / BITBYTE ) ) )
86 #define GETBITBYTE(x,i) ( (((char)(x)) >> (i)) & 0x01 )
87 #define CLRBIT(x,i) GETBYTE(x,i) &= ~( 0x01 << ( (i) % BITBYTE ) )
88 #define SETBIT(x,i) GETBYTE(x,i) |= ( 0x01 << ( (i) % BITBYTE ) )
89 #define GETBIT(x,i) ( (GETBYTE(x,i) >> ( (i) % BITBYTE )) & 0x01 )
90 
91 #define HASHVAL(val) (((unsigned int)(val)) % SIGLENBIT)
92 #define HASH(sign, val) SETBIT((sign), HASHVAL(val))
93 
94 #define ARRKEY 0x01
95 #define SIGNKEY 0x02
96 #define ALLISTRUE 0x04
97 
98 #define ISARRKEY(x) ( ((TRGM*)x)->flag & ARRKEY )
99 #define ISSIGNKEY(x) ( ((TRGM*)x)->flag & SIGNKEY )
100 #define ISALLTRUE(x) ( ((TRGM*)x)->flag & ALLISTRUE )
101 
102 #define CALCGTSIZE(flag, len) ( TRGMHDRSIZE + ( ( (flag) & ARRKEY ) ? ((len)*sizeof(trgm)) : (((flag) & ALLISTRUE) ? 0 : SIGLEN) ) )
103 #define GETSIGN(x) ( (BITVECP)( (char*)x+TRGMHDRSIZE ) )
104 #define GETARR(x) ( (trgm*)( (char*)x+TRGMHDRSIZE ) )
105 #define ARRNELEM(x) ( ( VARSIZE(x) - TRGMHDRSIZE )/sizeof(trgm) )
106 
107 /*
108  * If DIVUNION is defined then similarity formula is:
109  * count / (len1 + len2 - count)
110  * else if DIVUNION is not defined then similarity formula is:
111  * count / max(len1, len2)
112  */
113 #ifdef DIVUNION
114 #define CALCSML(count, len1, len2) ((float4) (count)) / ((float4) ((len1) + (len2) - (count)))
115 #else
116 #define CALCSML(count, len1, len2) ((float4) (count)) / ((float4) (((len1) > (len2)) ? (len1) : (len2)))
117 #endif
118 
120 
121 extern double similarity_threshold;
122 extern double word_similarity_threshold;
123 
124 extern uint32 trgm2int(trgm *ptr);
125 extern void compact_trigram(trgm *tptr, char *str, int bytelen);
126 extern TRGM *generate_trgm(char *str, int slen);
127 extern TRGM *generate_wildcard_trgm(const char *str, int slen);
128 extern float4 cnt_sml(TRGM *trg1, TRGM *trg2, bool inexact);
129 extern bool trgm_contained_by(TRGM *trg1, TRGM *trg2);
130 extern bool *trgm_presence_map(TRGM *query, TRGM *key);
131 extern TRGM *createTrgmNFA(text *text_re, Oid collation,
132  TrgmPackedGraph **graph, MemoryContext rcontext);
133 extern bool trigramsMatchGraph(TrgmPackedGraph *graph, bool *check);
134 
135 #endif /* __TRGM_H__ */
bool trigramsMatchGraph(TrgmPackedGraph *graph, bool *check)
Definition: trgm_regexp.c:635
#define SIGLEN
Definition: trgm.h:75
char * BITVECP
Definition: trgm.h:80
TRGM * generate_wildcard_trgm(const char *str, int slen)
Definition: trgm_op.c:796
TRGM * createTrgmNFA(text *text_re, Oid collation, TrgmPackedGraph **graph, MemoryContext rcontext)
Definition: trgm_regexp.c:517
double similarity_threshold
Definition: trgm_op.c:19
unsigned char uint8
Definition: c.h:266
float4 cnt_sml(TRGM *trg1, TRGM *trg2, bool inexact)
Definition: trgm_op.c:928
char BITVEC[SIGLEN]
Definition: trgm.h:79
double word_similarity_threshold
Definition: trgm_op.c:20
bool * trgm_presence_map(TRGM *query, TRGM *key)
Definition: trgm_op.c:1015
int32 vl_len_
Definition: trgm.h:65
unsigned int Oid
Definition: postgres_ext.h:31
signed int int32
Definition: c.h:256
Definition: trgm.h:63
unsigned int uint32
Definition: c.h:268
bool trgm_contained_by(TRGM *trg1, TRGM *trg2)
Definition: trgm_op.c:976
uint32 trgm2int(trgm *ptr)
Definition: trgm_op.c:865
float float4
Definition: c.h:380
uint8 flag
Definition: trgm.h:66
void compact_trigram(trgm *tptr, char *str, int bytelen)
Definition: trgm_op.c:166
char trgm[3]
Definition: trgm.h:38
TRGM * generate_trgm(char *str, int slen)
Definition: trgm_op.c:321
Definition: c.h:439