PostgreSQL Source Code  git master
trgm_gin.c
Go to the documentation of this file.
1 /*
2  * contrib/pg_trgm/trgm_gin.c
3  */
4 #include "postgres.h"
5 
6 #include "trgm.h"
7 
8 #include "access/gin.h"
9 #include "access/stratnum.h"
10 #include "fmgr.h"
11 
12 
18 
19 /*
20  * This function can only be called if a pre-9.1 version of the GIN operator
21  * class definition is present in the catalogs (probably as a consequence
22  * of upgrade-in-place). Cope.
23  */
24 Datum
26 {
27  if (PG_NARGS() == 3)
28  return gin_extract_value_trgm(fcinfo);
29  if (PG_NARGS() == 7)
30  return gin_extract_query_trgm(fcinfo);
31  elog(ERROR, "unexpected number of arguments to gin_extract_trgm");
33 }
34 
35 Datum
37 {
38  text *val = (text *) PG_GETARG_TEXT_PP(0);
39  int32 *nentries = (int32 *) PG_GETARG_POINTER(1);
40  Datum *entries = NULL;
41  TRGM *trg;
42  int32 trglen;
43 
44  *nentries = 0;
45 
47  trglen = ARRNELEM(trg);
48 
49  if (trglen > 0)
50  {
51  trgm *ptr;
52  int32 i;
53 
54  *nentries = trglen;
55  entries = (Datum *) palloc(sizeof(Datum) * trglen);
56 
57  ptr = GETARR(trg);
58  for (i = 0; i < trglen; i++)
59  {
60  int32 item = trgm2int(ptr);
61 
62  entries[i] = Int32GetDatum(item);
63  ptr++;
64  }
65  }
66 
67  PG_RETURN_POINTER(entries);
68 }
69 
70 Datum
72 {
73  text *val = (text *) PG_GETARG_TEXT_PP(0);
74  int32 *nentries = (int32 *) PG_GETARG_POINTER(1);
75  StrategyNumber strategy = PG_GETARG_UINT16(2);
76 
77  /* bool **pmatch = (bool **) PG_GETARG_POINTER(3); */
78  Pointer **extra_data = (Pointer **) PG_GETARG_POINTER(4);
79 
80  /* bool **nullFlags = (bool **) PG_GETARG_POINTER(5); */
81  int32 *searchMode = (int32 *) PG_GETARG_POINTER(6);
82  Datum *entries = NULL;
83  TRGM *trg;
84  int32 trglen;
85  trgm *ptr;
86  TrgmPackedGraph *graph;
87  int32 i;
88 
89  switch (strategy)
90  {
94  break;
96 #ifndef IGNORECASE
97  elog(ERROR, "cannot handle ~~* with case-sensitive trigrams");
98 #endif
99  /* FALL THRU */
100  case LikeStrategyNumber:
101 
102  /*
103  * For wildcard search we extract all the trigrams that every
104  * potentially-matching string must include.
105  */
107  VARSIZE_ANY_EXHDR(val));
108  break;
110 #ifndef IGNORECASE
111  elog(ERROR, "cannot handle ~* with case-sensitive trigrams");
112 #endif
113  /* FALL THRU */
115  trg = createTrgmNFA(val, PG_GET_COLLATION(),
116  &graph, CurrentMemoryContext);
117  if (trg && ARRNELEM(trg) > 0)
118  {
119  /*
120  * Successful regex processing: store NFA-like graph as
121  * extra_data. GIN API requires an array of nentries
122  * Pointers, but we just put the same value in each element.
123  */
124  trglen = ARRNELEM(trg);
125  *extra_data = (Pointer *) palloc(sizeof(Pointer) * trglen);
126  for (i = 0; i < trglen; i++)
127  (*extra_data)[i] = (Pointer) graph;
128  }
129  else
130  {
131  /* No result: have to do full index scan. */
132  *nentries = 0;
133  *searchMode = GIN_SEARCH_MODE_ALL;
134  PG_RETURN_POINTER(entries);
135  }
136  break;
137  default:
138  elog(ERROR, "unrecognized strategy number: %d", strategy);
139  trg = NULL; /* keep compiler quiet */
140  break;
141  }
142 
143  trglen = ARRNELEM(trg);
144  *nentries = trglen;
145 
146  if (trglen > 0)
147  {
148  entries = (Datum *) palloc(sizeof(Datum) * trglen);
149  ptr = GETARR(trg);
150  for (i = 0; i < trglen; i++)
151  {
152  int32 item = trgm2int(ptr);
153 
154  entries[i] = Int32GetDatum(item);
155  ptr++;
156  }
157  }
158 
159  /*
160  * If no trigram was extracted then we have to scan all the index.
161  */
162  if (trglen == 0)
163  *searchMode = GIN_SEARCH_MODE_ALL;
164 
165  PG_RETURN_POINTER(entries);
166 }
167 
168 Datum
170 {
171  bool *check = (bool *) PG_GETARG_POINTER(0);
172  StrategyNumber strategy = PG_GETARG_UINT16(1);
173 
174  /* text *query = PG_GETARG_TEXT_PP(2); */
175  int32 nkeys = PG_GETARG_INT32(3);
176  Pointer *extra_data = (Pointer *) PG_GETARG_POINTER(4);
177  bool *recheck = (bool *) PG_GETARG_POINTER(5);
178  bool res;
179  int32 i,
180  ntrue;
181  double nlimit;
182 
183  /* All cases served by this function are inexact */
184  *recheck = true;
185 
186  switch (strategy)
187  {
190  nlimit = (strategy == SimilarityStrategyNumber) ?
192 
193  /* Count the matches */
194  ntrue = 0;
195  for (i = 0; i < nkeys; i++)
196  {
197  if (check[i])
198  ntrue++;
199  }
200 
201  /*--------------------
202  * If DIVUNION is defined then similarity formula is:
203  * c / (len1 + len2 - c)
204  * where c is number of common trigrams and it stands as ntrue in
205  * this code. Here we don't know value of len2 but we can assume
206  * that c (ntrue) is a lower bound of len2, so upper bound of
207  * similarity is:
208  * c / (len1 + c - c) => c / len1
209  * If DIVUNION is not defined then similarity formula is:
210  * c / max(len1, len2)
211  * And again, c (ntrue) is a lower bound of len2, but c <= len1
212  * just by definition and, consequently, upper bound of
213  * similarity is just c / len1.
214  * So, independently on DIVUNION the upper bound formula is the same.
215  */
216  res = (nkeys == 0) ? false :
217  (((((float4) ntrue) / ((float4) nkeys))) >= nlimit);
218  break;
219  case ILikeStrategyNumber:
220 #ifndef IGNORECASE
221  elog(ERROR, "cannot handle ~~* with case-sensitive trigrams");
222 #endif
223  /* FALL THRU */
224  case LikeStrategyNumber:
225  /* Check if all extracted trigrams are presented. */
226  res = true;
227  for (i = 0; i < nkeys; i++)
228  {
229  if (!check[i])
230  {
231  res = false;
232  break;
233  }
234  }
235  break;
237 #ifndef IGNORECASE
238  elog(ERROR, "cannot handle ~* with case-sensitive trigrams");
239 #endif
240  /* FALL THRU */
242  if (nkeys < 1)
243  {
244  /* Regex processing gave no result: do full index scan */
245  res = true;
246  }
247  else
248  res = trigramsMatchGraph((TrgmPackedGraph *) extra_data[0],
249  check);
250  break;
251  default:
252  elog(ERROR, "unrecognized strategy number: %d", strategy);
253  res = false; /* keep compiler quiet */
254  break;
255  }
256 
257  PG_RETURN_BOOL(res);
258 }
259 
260 /*
261  * In all cases, GIN_TRUE is at least as favorable to inclusion as
262  * GIN_MAYBE. If no better option is available, simply treat
263  * GIN_MAYBE as if it were GIN_TRUE and apply the same test as the binary
264  * consistent function.
265  */
266 Datum
268 {
270  StrategyNumber strategy = PG_GETARG_UINT16(1);
271 
272  /* text *query = PG_GETARG_TEXT_PP(2); */
273  int32 nkeys = PG_GETARG_INT32(3);
274  Pointer *extra_data = (Pointer *) PG_GETARG_POINTER(4);
276  int32 i,
277  ntrue;
278  bool *boolcheck;
279  double nlimit;
280 
281  switch (strategy)
282  {
285  nlimit = (strategy == SimilarityStrategyNumber) ?
287 
288  /* Count the matches */
289  ntrue = 0;
290  for (i = 0; i < nkeys; i++)
291  {
292  if (check[i] != GIN_FALSE)
293  ntrue++;
294  }
295 
296  /*
297  * See comment in gin_trgm_consistent() about * upper bound
298  * formula
299  */
300  res = (nkeys == 0)
301  ? GIN_FALSE : (((((float4) ntrue) / ((float4) nkeys)) >= nlimit)
302  ? GIN_MAYBE : GIN_FALSE);
303  break;
304  case ILikeStrategyNumber:
305 #ifndef IGNORECASE
306  elog(ERROR, "cannot handle ~~* with case-sensitive trigrams");
307 #endif
308  /* FALL THRU */
309  case LikeStrategyNumber:
310  /* Check if all extracted trigrams are presented. */
311  res = GIN_MAYBE;
312  for (i = 0; i < nkeys; i++)
313  {
314  if (check[i] == GIN_FALSE)
315  {
316  res = GIN_FALSE;
317  break;
318  }
319  }
320  break;
322 #ifndef IGNORECASE
323  elog(ERROR, "cannot handle ~* with case-sensitive trigrams");
324 #endif
325  /* FALL THRU */
327  if (nkeys < 1)
328  {
329  /* Regex processing gave no result: do full index scan */
330  res = GIN_MAYBE;
331  }
332  else
333  {
334  /*
335  * As trigramsMatchGraph implements a monotonic boolean
336  * function, promoting all GIN_MAYBE keys to GIN_TRUE will
337  * give a conservative result.
338  */
339  boolcheck = (bool *) palloc(sizeof(bool) * nkeys);
340  for (i = 0; i < nkeys; i++)
341  boolcheck[i] = (check[i] != GIN_FALSE);
342  if (!trigramsMatchGraph((TrgmPackedGraph *) extra_data[0],
343  boolcheck))
344  res = GIN_FALSE;
345  pfree(boolcheck);
346  }
347  break;
348  default:
349  elog(ERROR, "unrecognized strategy number: %d", strategy);
350  res = GIN_FALSE; /* keep compiler quiet */
351  break;
352  }
353 
354  /* All cases served by this function are inexact */
355  Assert(res != GIN_TRUE);
357 }
#define GIN_TRUE
Definition: gin.h:60
#define PG_RETURN_POINTER(x)
Definition: fmgr.h:321
#define PG_GETARG_INT32(n)
Definition: fmgr.h:234
bool trigramsMatchGraph(TrgmPackedGraph *graph, bool *check)
Definition: trgm_regexp.c:641
#define VARDATA_ANY(PTR)
Definition: postgres.h:347
#define RegExpStrategyNumber
Definition: trgm.h:33
TRGM * generate_wildcard_trgm(const char *str, int slen)
Definition: trgm_op.c:796
#define SimilarityStrategyNumber
Definition: trgm.h:29
#define GIN_MAYBE
Definition: gin.h:61
TRGM * createTrgmNFA(text *text_re, Oid collation, TrgmPackedGraph **graph, MemoryContext rcontext)
Definition: trgm_regexp.c:523
#define RegExpICaseStrategyNumber
Definition: trgm.h:34
double similarity_threshold
Definition: trgm_op.c:19
uint16 StrategyNumber
Definition: stratnum.h:22
PG_FUNCTION_INFO_V1(gin_extract_trgm)
#define PG_GETARG_POINTER(n)
Definition: fmgr.h:241
double word_similarity_threshold
Definition: trgm_op.c:20
#define ARRNELEM(x)
Definition: trgm.h:105
#define PG_GET_COLLATION()
Definition: fmgr.h:163
signed int int32
Definition: c.h:294
#define PG_GETARG_TEXT_PP(n)
Definition: fmgr.h:273
#define GETARR(x)
Definition: trgm.h:104
#define PG_RETURN_GIN_TERNARY_VALUE(x)
Definition: gin.h:66
Datum gin_trgm_triconsistent(PG_FUNCTION_ARGS)
Definition: trgm_gin.c:267
#define GIN_SEARCH_MODE_ALL
Definition: gin.h:35
void pfree(void *pointer)
Definition: mcxt.c:936
Datum gin_extract_trgm(PG_FUNCTION_ARGS)
Definition: trgm_gin.c:25
char * Pointer
Definition: c.h:283
#define ERROR
Definition: elog.h:43
Datum gin_extract_query_trgm(PG_FUNCTION_ARGS)
Definition: trgm_gin.c:71
Definition: trgm.h:63
char GinTernaryValue
Definition: gin.h:57
Datum gin_trgm_consistent(PG_FUNCTION_ARGS)
Definition: trgm_gin.c:169
#define LikeStrategyNumber
Definition: trgm.h:31
MemoryContext CurrentMemoryContext
Definition: mcxt.c:37
uint32 trgm2int(trgm *ptr)
Definition: trgm_op.c:865
float float4
Definition: c.h:438
#define PG_RETURN_BOOL(x)
Definition: fmgr.h:319
uintptr_t Datum
Definition: postgres.h:372
#define WordSimilarityStrategyNumber
Definition: trgm.h:35
char trgm[3]
Definition: trgm.h:38
Datum gin_extract_value_trgm(PG_FUNCTION_ARGS)
Definition: trgm_gin.c:36
#define Assert(condition)
Definition: c.h:680
TRGM * generate_trgm(char *str, int slen)
Definition: trgm_op.c:321
#define GIN_FALSE
Definition: gin.h:59
#define PG_NARGS()
Definition: fmgr.h:168
#define PG_GETARG_UINT16(n)
Definition: fmgr.h:237
#define Int32GetDatum(X)
Definition: postgres.h:485
#define VARSIZE_ANY_EXHDR(PTR)
Definition: postgres.h:340
void * palloc(Size size)
Definition: mcxt.c:835
int i
Definition: c.h:497
#define PG_FUNCTION_ARGS
Definition: fmgr.h:158
#define ILikeStrategyNumber
Definition: trgm.h:32
#define elog
Definition: elog.h:219
long val
Definition: informix.c:689
#define PG_RETURN_NULL()
Definition: fmgr.h:305