PostgreSQL Source Code  git master
trgm_gin.c
Go to the documentation of this file.
1 /*
2  * contrib/pg_trgm/trgm_gin.c
3  */
4 #include "postgres.h"
5 
6 #include "access/gin.h"
7 #include "access/stratnum.h"
8 #include "fmgr.h"
9 #include "trgm.h"
10 #include "varatt.h"
11 
17 
18 /*
19  * This function can only be called if a pre-9.1 version of the GIN operator
20  * class definition is present in the catalogs (probably as a consequence
21  * of upgrade-in-place). Cope.
22  */
23 Datum
25 {
26  if (PG_NARGS() == 3)
27  return gin_extract_value_trgm(fcinfo);
28  if (PG_NARGS() == 7)
29  return gin_extract_query_trgm(fcinfo);
30  elog(ERROR, "unexpected number of arguments to gin_extract_trgm");
32 }
33 
34 Datum
36 {
37  text *val = (text *) PG_GETARG_TEXT_PP(0);
38  int32 *nentries = (int32 *) PG_GETARG_POINTER(1);
39  Datum *entries = NULL;
40  TRGM *trg;
41  int32 trglen;
42 
43  *nentries = 0;
44 
46  trglen = ARRNELEM(trg);
47 
48  if (trglen > 0)
49  {
50  trgm *ptr;
51  int32 i;
52 
53  *nentries = trglen;
54  entries = (Datum *) palloc(sizeof(Datum) * trglen);
55 
56  ptr = GETARR(trg);
57  for (i = 0; i < trglen; i++)
58  {
59  int32 item = trgm2int(ptr);
60 
61  entries[i] = Int32GetDatum(item);
62  ptr++;
63  }
64  }
65 
66  PG_RETURN_POINTER(entries);
67 }
68 
69 Datum
71 {
72  text *val = (text *) PG_GETARG_TEXT_PP(0);
73  int32 *nentries = (int32 *) PG_GETARG_POINTER(1);
74  StrategyNumber strategy = PG_GETARG_UINT16(2);
75 
76  /* bool **pmatch = (bool **) PG_GETARG_POINTER(3); */
77  Pointer **extra_data = (Pointer **) PG_GETARG_POINTER(4);
78 
79  /* bool **nullFlags = (bool **) PG_GETARG_POINTER(5); */
80  int32 *searchMode = (int32 *) PG_GETARG_POINTER(6);
81  Datum *entries = NULL;
82  TRGM *trg;
83  int32 trglen;
84  trgm *ptr;
85  TrgmPackedGraph *graph;
86  int32 i;
87 
88  switch (strategy)
89  {
95  break;
97 #ifndef IGNORECASE
98  elog(ERROR, "cannot handle ~~* with case-sensitive trigrams");
99 #endif
100  /* FALL THRU */
101  case LikeStrategyNumber:
102 
103  /*
104  * For wildcard search we extract all the trigrams that every
105  * potentially-matching string must include.
106  */
109  break;
111 #ifndef IGNORECASE
112  elog(ERROR, "cannot handle ~* with case-sensitive trigrams");
113 #endif
114  /* FALL THRU */
117  &graph, CurrentMemoryContext);
118  if (trg && ARRNELEM(trg) > 0)
119  {
120  /*
121  * Successful regex processing: store NFA-like graph as
122  * extra_data. GIN API requires an array of nentries
123  * Pointers, but we just put the same value in each element.
124  */
125  trglen = ARRNELEM(trg);
126  *extra_data = (Pointer *) palloc(sizeof(Pointer) * trglen);
127  for (i = 0; i < trglen; i++)
128  (*extra_data)[i] = (Pointer) graph;
129  }
130  else
131  {
132  /* No result: have to do full index scan. */
133  *nentries = 0;
134  *searchMode = GIN_SEARCH_MODE_ALL;
135  PG_RETURN_POINTER(entries);
136  }
137  break;
138  default:
139  elog(ERROR, "unrecognized strategy number: %d", strategy);
140  trg = NULL; /* keep compiler quiet */
141  break;
142  }
143 
144  trglen = ARRNELEM(trg);
145  *nentries = trglen;
146 
147  if (trglen > 0)
148  {
149  entries = (Datum *) palloc(sizeof(Datum) * trglen);
150  ptr = GETARR(trg);
151  for (i = 0; i < trglen; i++)
152  {
153  int32 item = trgm2int(ptr);
154 
155  entries[i] = Int32GetDatum(item);
156  ptr++;
157  }
158  }
159 
160  /*
161  * If no trigram was extracted then we have to scan all the index.
162  */
163  if (trglen == 0)
164  *searchMode = GIN_SEARCH_MODE_ALL;
165 
166  PG_RETURN_POINTER(entries);
167 }
168 
169 Datum
171 {
172  bool *check = (bool *) PG_GETARG_POINTER(0);
173  StrategyNumber strategy = PG_GETARG_UINT16(1);
174 
175  /* text *query = PG_GETARG_TEXT_PP(2); */
176  int32 nkeys = PG_GETARG_INT32(3);
177  Pointer *extra_data = (Pointer *) PG_GETARG_POINTER(4);
178  bool *recheck = (bool *) PG_GETARG_POINTER(5);
179  bool res;
180  int32 i,
181  ntrue;
182  double nlimit;
183 
184  /* All cases served by this function are inexact */
185  *recheck = true;
186 
187  switch (strategy)
188  {
192  nlimit = index_strategy_get_limit(strategy);
193 
194  /* Count the matches */
195  ntrue = 0;
196  for (i = 0; i < nkeys; i++)
197  {
198  if (check[i])
199  ntrue++;
200  }
201 
202  /*--------------------
203  * If DIVUNION is defined then similarity formula is:
204  * c / (len1 + len2 - c)
205  * where c is number of common trigrams and it stands as ntrue in
206  * this code. Here we don't know value of len2 but we can assume
207  * that c (ntrue) is a lower bound of len2, so upper bound of
208  * similarity is:
209  * c / (len1 + c - c) => c / len1
210  * If DIVUNION is not defined then similarity formula is:
211  * c / max(len1, len2)
212  * And again, c (ntrue) is a lower bound of len2, but c <= len1
213  * just by definition and, consequently, upper bound of
214  * similarity is just c / len1.
215  * So, independently on DIVUNION the upper bound formula is the same.
216  */
217  res = (nkeys == 0) ? false :
218  (((((float4) ntrue) / ((float4) nkeys))) >= nlimit);
219  break;
220  case ILikeStrategyNumber:
221 #ifndef IGNORECASE
222  elog(ERROR, "cannot handle ~~* with case-sensitive trigrams");
223 #endif
224  /* FALL THRU */
225  case LikeStrategyNumber:
226  case EqualStrategyNumber:
227  /* Check if all extracted trigrams are presented. */
228  res = true;
229  for (i = 0; i < nkeys; i++)
230  {
231  if (!check[i])
232  {
233  res = false;
234  break;
235  }
236  }
237  break;
239 #ifndef IGNORECASE
240  elog(ERROR, "cannot handle ~* with case-sensitive trigrams");
241 #endif
242  /* FALL THRU */
244  if (nkeys < 1)
245  {
246  /* Regex processing gave no result: do full index scan */
247  res = true;
248  }
249  else
250  res = trigramsMatchGraph((TrgmPackedGraph *) extra_data[0],
251  check);
252  break;
253  default:
254  elog(ERROR, "unrecognized strategy number: %d", strategy);
255  res = false; /* keep compiler quiet */
256  break;
257  }
258 
260 }
261 
262 /*
263  * In all cases, GIN_TRUE is at least as favorable to inclusion as
264  * GIN_MAYBE. If no better option is available, simply treat
265  * GIN_MAYBE as if it were GIN_TRUE and apply the same test as the binary
266  * consistent function.
267  */
268 Datum
270 {
272  StrategyNumber strategy = PG_GETARG_UINT16(1);
273 
274  /* text *query = PG_GETARG_TEXT_PP(2); */
275  int32 nkeys = PG_GETARG_INT32(3);
276  Pointer *extra_data = (Pointer *) PG_GETARG_POINTER(4);
278  int32 i,
279  ntrue;
280  bool *boolcheck;
281  double nlimit;
282 
283  switch (strategy)
284  {
288  nlimit = index_strategy_get_limit(strategy);
289 
290  /* Count the matches */
291  ntrue = 0;
292  for (i = 0; i < nkeys; i++)
293  {
294  if (check[i] != GIN_FALSE)
295  ntrue++;
296  }
297 
298  /*
299  * See comment in gin_trgm_consistent() about * upper bound
300  * formula
301  */
302  res = (nkeys == 0)
303  ? GIN_FALSE : (((((float4) ntrue) / ((float4) nkeys)) >= nlimit)
304  ? GIN_MAYBE : GIN_FALSE);
305  break;
306  case ILikeStrategyNumber:
307 #ifndef IGNORECASE
308  elog(ERROR, "cannot handle ~~* with case-sensitive trigrams");
309 #endif
310  /* FALL THRU */
311  case LikeStrategyNumber:
312  case EqualStrategyNumber:
313  /* Check if all extracted trigrams are presented. */
314  res = GIN_MAYBE;
315  for (i = 0; i < nkeys; i++)
316  {
317  if (check[i] == GIN_FALSE)
318  {
319  res = GIN_FALSE;
320  break;
321  }
322  }
323  break;
325 #ifndef IGNORECASE
326  elog(ERROR, "cannot handle ~* with case-sensitive trigrams");
327 #endif
328  /* FALL THRU */
330  if (nkeys < 1)
331  {
332  /* Regex processing gave no result: do full index scan */
333  res = GIN_MAYBE;
334  }
335  else
336  {
337  /*
338  * As trigramsMatchGraph implements a monotonic boolean
339  * function, promoting all GIN_MAYBE keys to GIN_TRUE will
340  * give a conservative result.
341  */
342  boolcheck = (bool *) palloc(sizeof(bool) * nkeys);
343  for (i = 0; i < nkeys; i++)
344  boolcheck[i] = (check[i] != GIN_FALSE);
345  if (!trigramsMatchGraph((TrgmPackedGraph *) extra_data[0],
346  boolcheck))
347  res = GIN_FALSE;
348  pfree(boolcheck);
349  }
350  break;
351  default:
352  elog(ERROR, "unrecognized strategy number: %d", strategy);
353  res = GIN_FALSE; /* keep compiler quiet */
354  break;
355  }
356 
357  /* All cases served by this function are inexact */
358  Assert(res != GIN_TRUE);
360 }
signed int int32
Definition: c.h:494
char * Pointer
Definition: c.h:483
#define Assert(condition)
Definition: c.h:858
float float4
Definition: c.h:629
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:224
#define PG_GETARG_TEXT_PP(n)
Definition: fmgr.h:309
#define PG_GETARG_POINTER(n)
Definition: fmgr.h:276
#define PG_NARGS()
Definition: fmgr.h:203
#define PG_RETURN_NULL()
Definition: fmgr.h:345
#define PG_GETARG_UINT16(n)
Definition: fmgr.h:272
#define PG_GETARG_INT32(n)
Definition: fmgr.h:269
#define PG_RETURN_POINTER(x)
Definition: fmgr.h:361
#define PG_GET_COLLATION()
Definition: fmgr.h:198
#define PG_FUNCTION_ARGS
Definition: fmgr.h:193
#define PG_RETURN_BOOL(x)
Definition: fmgr.h:359
#define PG_RETURN_GIN_TERNARY_VALUE(x)
Definition: gin.h:79
#define GIN_SEARCH_MODE_ALL
Definition: gin.h:36
#define GIN_FALSE
Definition: gin.h:63
char GinTernaryValue
Definition: gin.h:58
#define GIN_MAYBE
Definition: gin.h:65
#define GIN_TRUE
Definition: gin.h:64
long val
Definition: informix.c:670
int i
Definition: isn.c:73
void pfree(void *pointer)
Definition: mcxt.c:1520
MemoryContext CurrentMemoryContext
Definition: mcxt.c:143
void * palloc(Size size)
Definition: mcxt.c:1316
uintptr_t Datum
Definition: postgres.h:64
static Datum Int32GetDatum(int32 X)
Definition: postgres.h:212
uint16 StrategyNumber
Definition: stratnum.h:22
Definition: trgm.h:67
Definition: c.h:687
#define RegExpICaseStrategyNumber
Definition: trgm.h:35
TRGM * generate_trgm(char *str, int slen)
Definition: trgm_op.c:357
#define WordSimilarityStrategyNumber
Definition: trgm.h:36
#define StrictWordSimilarityStrategyNumber
Definition: trgm.h:38
TRGM * generate_wildcard_trgm(const char *str, int slen)
Definition: trgm_op.c:867
uint32 trgm2int(trgm *ptr)
Definition: trgm_op.c:936
#define ARRNELEM(x)
Definition: trgm.h:107
double index_strategy_get_limit(StrategyNumber strategy)
Definition: trgm_op.c:133
#define SimilarityStrategyNumber
Definition: trgm.h:30
bool trigramsMatchGraph(TrgmPackedGraph *graph, bool *check)
Definition: trgm_regexp.c:626
char trgm[3]
Definition: trgm.h:42
#define ILikeStrategyNumber
Definition: trgm.h:33
TRGM * createTrgmNFA(text *text_re, Oid collation, TrgmPackedGraph **graph, MemoryContext rcontext)
Definition: trgm_regexp.c:522
#define LikeStrategyNumber
Definition: trgm.h:32
#define GETARR(x)
Definition: trgm.h:106
#define EqualStrategyNumber
Definition: trgm.h:40
#define RegExpStrategyNumber
Definition: trgm.h:34
Datum gin_trgm_consistent(PG_FUNCTION_ARGS)
Definition: trgm_gin.c:170
Datum gin_trgm_triconsistent(PG_FUNCTION_ARGS)
Definition: trgm_gin.c:269
Datum gin_extract_value_trgm(PG_FUNCTION_ARGS)
Definition: trgm_gin.c:35
Datum gin_extract_query_trgm(PG_FUNCTION_ARGS)
Definition: trgm_gin.c:70
Datum gin_extract_trgm(PG_FUNCTION_ARGS)
Definition: trgm_gin.c:24
PG_FUNCTION_INFO_V1(gin_extract_trgm)
#define VARDATA_ANY(PTR)
Definition: varatt.h:324
#define VARSIZE_ANY_EXHDR(PTR)
Definition: varatt.h:317