PostgreSQL Source Code git master
trgm_gin.c
Go to the documentation of this file.
1/*
2 * contrib/pg_trgm/trgm_gin.c
3 */
4#include "postgres.h"
5
6#include "access/gin.h"
7#include "access/stratnum.h"
8#include "fmgr.h"
9#include "trgm.h"
10#include "varatt.h"
11
17
18/*
19 * This function can only be called if a pre-9.1 version of the GIN operator
20 * class definition is present in the catalogs (probably as a consequence
21 * of upgrade-in-place). Cope.
22 */
25{
26 if (PG_NARGS() == 3)
27 return gin_extract_value_trgm(fcinfo);
28 if (PG_NARGS() == 7)
29 return gin_extract_query_trgm(fcinfo);
30 elog(ERROR, "unexpected number of arguments to gin_extract_trgm");
32}
33
36{
38 int32 *nentries = (int32 *) PG_GETARG_POINTER(1);
39 Datum *entries = NULL;
40 TRGM *trg;
41 int32 trglen;
42
43 *nentries = 0;
44
46 trglen = ARRNELEM(trg);
47
48 if (trglen > 0)
49 {
50 trgm *ptr;
51 int32 i;
52
53 *nentries = trglen;
54 entries = (Datum *) palloc(sizeof(Datum) * trglen);
55
56 ptr = GETARR(trg);
57 for (i = 0; i < trglen; i++)
58 {
59 int32 item = trgm2int(ptr);
60
61 entries[i] = Int32GetDatum(item);
62 ptr++;
63 }
64 }
65
66 PG_RETURN_POINTER(entries);
67}
68
71{
73 int32 *nentries = (int32 *) PG_GETARG_POINTER(1);
74 StrategyNumber strategy = PG_GETARG_UINT16(2);
75
76 /* bool **pmatch = (bool **) PG_GETARG_POINTER(3); */
77 Pointer **extra_data = (Pointer **) PG_GETARG_POINTER(4);
78
79 /* bool **nullFlags = (bool **) PG_GETARG_POINTER(5); */
80 int32 *searchMode = (int32 *) PG_GETARG_POINTER(6);
81 Datum *entries = NULL;
82 TRGM *trg;
83 int32 trglen;
84 trgm *ptr;
85 TrgmPackedGraph *graph;
86 int32 i;
87
88 switch (strategy)
89 {
95 break;
97#ifndef IGNORECASE
98 elog(ERROR, "cannot handle ~~* with case-sensitive trigrams");
99#endif
100 /* FALL THRU */
102
103 /*
104 * For wildcard search we extract all the trigrams that every
105 * potentially-matching string must include.
106 */
109 break;
111#ifndef IGNORECASE
112 elog(ERROR, "cannot handle ~* with case-sensitive trigrams");
113#endif
114 /* FALL THRU */
117 &graph, CurrentMemoryContext);
118 if (trg && ARRNELEM(trg) > 0)
119 {
120 /*
121 * Successful regex processing: store NFA-like graph as
122 * extra_data. GIN API requires an array of nentries
123 * Pointers, but we just put the same value in each element.
124 */
125 trglen = ARRNELEM(trg);
126 *extra_data = (Pointer *) palloc(sizeof(Pointer) * trglen);
127 for (i = 0; i < trglen; i++)
128 (*extra_data)[i] = (Pointer) graph;
129 }
130 else
131 {
132 /* No result: have to do full index scan. */
133 *nentries = 0;
134 *searchMode = GIN_SEARCH_MODE_ALL;
135 PG_RETURN_POINTER(entries);
136 }
137 break;
138 default:
139 elog(ERROR, "unrecognized strategy number: %d", strategy);
140 trg = NULL; /* keep compiler quiet */
141 break;
142 }
143
144 trglen = ARRNELEM(trg);
145 *nentries = trglen;
146
147 if (trglen > 0)
148 {
149 entries = (Datum *) palloc(sizeof(Datum) * trglen);
150 ptr = GETARR(trg);
151 for (i = 0; i < trglen; i++)
152 {
153 int32 item = trgm2int(ptr);
154
155 entries[i] = Int32GetDatum(item);
156 ptr++;
157 }
158 }
159
160 /*
161 * If no trigram was extracted then we have to scan all the index.
162 */
163 if (trglen == 0)
164 *searchMode = GIN_SEARCH_MODE_ALL;
165
166 PG_RETURN_POINTER(entries);
167}
168
169Datum
171{
172 bool *check = (bool *) PG_GETARG_POINTER(0);
173 StrategyNumber strategy = PG_GETARG_UINT16(1);
174
175 /* text *query = PG_GETARG_TEXT_PP(2); */
176 int32 nkeys = PG_GETARG_INT32(3);
177 Pointer *extra_data = (Pointer *) PG_GETARG_POINTER(4);
178 bool *recheck = (bool *) PG_GETARG_POINTER(5);
179 bool res;
180 int32 i,
181 ntrue;
182 double nlimit;
183
184 /* All cases served by this function are inexact */
185 *recheck = true;
186
187 switch (strategy)
188 {
192 nlimit = index_strategy_get_limit(strategy);
193
194 /* Count the matches */
195 ntrue = 0;
196 for (i = 0; i < nkeys; i++)
197 {
198 if (check[i])
199 ntrue++;
200 }
201
202 /*--------------------
203 * If DIVUNION is defined then similarity formula is:
204 * c / (len1 + len2 - c)
205 * where c is number of common trigrams and it stands as ntrue in
206 * this code. Here we don't know value of len2 but we can assume
207 * that c (ntrue) is a lower bound of len2, so upper bound of
208 * similarity is:
209 * c / (len1 + c - c) => c / len1
210 * If DIVUNION is not defined then similarity formula is:
211 * c / max(len1, len2)
212 * And again, c (ntrue) is a lower bound of len2, but c <= len1
213 * just by definition and, consequently, upper bound of
214 * similarity is just c / len1.
215 * So, independently on DIVUNION the upper bound formula is the same.
216 */
217 res = (nkeys == 0) ? false :
218 (((((float4) ntrue) / ((float4) nkeys))) >= nlimit);
219 break;
221#ifndef IGNORECASE
222 elog(ERROR, "cannot handle ~~* with case-sensitive trigrams");
223#endif
224 /* FALL THRU */
227 /* Check if all extracted trigrams are presented. */
228 res = true;
229 for (i = 0; i < nkeys; i++)
230 {
231 if (!check[i])
232 {
233 res = false;
234 break;
235 }
236 }
237 break;
239#ifndef IGNORECASE
240 elog(ERROR, "cannot handle ~* with case-sensitive trigrams");
241#endif
242 /* FALL THRU */
244 if (nkeys < 1)
245 {
246 /* Regex processing gave no result: do full index scan */
247 res = true;
248 }
249 else
250 res = trigramsMatchGraph((TrgmPackedGraph *) extra_data[0],
251 check);
252 break;
253 default:
254 elog(ERROR, "unrecognized strategy number: %d", strategy);
255 res = false; /* keep compiler quiet */
256 break;
257 }
258
260}
261
262/*
263 * In all cases, GIN_TRUE is at least as favorable to inclusion as
264 * GIN_MAYBE. If no better option is available, simply treat
265 * GIN_MAYBE as if it were GIN_TRUE and apply the same test as the binary
266 * consistent function.
267 */
268Datum
270{
272 StrategyNumber strategy = PG_GETARG_UINT16(1);
273
274 /* text *query = PG_GETARG_TEXT_PP(2); */
275 int32 nkeys = PG_GETARG_INT32(3);
276 Pointer *extra_data = (Pointer *) PG_GETARG_POINTER(4);
278 int32 i,
279 ntrue;
280 bool *boolcheck;
281 double nlimit;
282
283 switch (strategy)
284 {
288 nlimit = index_strategy_get_limit(strategy);
289
290 /* Count the matches */
291 ntrue = 0;
292 for (i = 0; i < nkeys; i++)
293 {
294 if (check[i] != GIN_FALSE)
295 ntrue++;
296 }
297
298 /*
299 * See comment in gin_trgm_consistent() about * upper bound
300 * formula
301 */
302 res = (nkeys == 0)
303 ? GIN_FALSE : (((((float4) ntrue) / ((float4) nkeys)) >= nlimit)
304 ? GIN_MAYBE : GIN_FALSE);
305 break;
307#ifndef IGNORECASE
308 elog(ERROR, "cannot handle ~~* with case-sensitive trigrams");
309#endif
310 /* FALL THRU */
313 /* Check if all extracted trigrams are presented. */
314 res = GIN_MAYBE;
315 for (i = 0; i < nkeys; i++)
316 {
317 if (check[i] == GIN_FALSE)
318 {
319 res = GIN_FALSE;
320 break;
321 }
322 }
323 break;
325#ifndef IGNORECASE
326 elog(ERROR, "cannot handle ~* with case-sensitive trigrams");
327#endif
328 /* FALL THRU */
330 if (nkeys < 1)
331 {
332 /* Regex processing gave no result: do full index scan */
333 res = GIN_MAYBE;
334 }
335 else
336 {
337 /*
338 * As trigramsMatchGraph implements a monotonic boolean
339 * function, promoting all GIN_MAYBE keys to GIN_TRUE will
340 * give a conservative result.
341 */
342 boolcheck = (bool *) palloc(sizeof(bool) * nkeys);
343 for (i = 0; i < nkeys; i++)
344 boolcheck[i] = (check[i] != GIN_FALSE);
345 if (!trigramsMatchGraph((TrgmPackedGraph *) extra_data[0],
346 boolcheck))
347 res = GIN_FALSE;
348 pfree(boolcheck);
349 }
350 break;
351 default:
352 elog(ERROR, "unrecognized strategy number: %d", strategy);
353 res = GIN_FALSE; /* keep compiler quiet */
354 break;
355 }
356
357 /* All cases served by this function are inexact */
358 Assert(res != GIN_TRUE);
360}
char * Pointer
Definition: c.h:479
#define Assert(condition)
Definition: c.h:815
int32_t int32
Definition: c.h:484
float float4
Definition: c.h:586
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:225
#define PG_GETARG_TEXT_PP(n)
Definition: fmgr.h:309
#define PG_GETARG_POINTER(n)
Definition: fmgr.h:276
#define PG_NARGS()
Definition: fmgr.h:203
#define PG_RETURN_NULL()
Definition: fmgr.h:345
#define PG_GETARG_UINT16(n)
Definition: fmgr.h:272
#define PG_GETARG_INT32(n)
Definition: fmgr.h:269
#define PG_RETURN_POINTER(x)
Definition: fmgr.h:361
#define PG_GET_COLLATION()
Definition: fmgr.h:198
#define PG_FUNCTION_ARGS
Definition: fmgr.h:193
#define PG_RETURN_BOOL(x)
Definition: fmgr.h:359
#define PG_RETURN_GIN_TERNARY_VALUE(x)
Definition: gin.h:79
#define GIN_SEARCH_MODE_ALL
Definition: gin.h:36
#define GIN_FALSE
Definition: gin.h:63
char GinTernaryValue
Definition: gin.h:58
#define GIN_MAYBE
Definition: gin.h:65
#define GIN_TRUE
Definition: gin.h:64
long val
Definition: informix.c:689
int i
Definition: isn.c:72
void pfree(void *pointer)
Definition: mcxt.c:1521
void * palloc(Size size)
Definition: mcxt.c:1317
MemoryContext CurrentMemoryContext
Definition: mcxt.c:143
uintptr_t Datum
Definition: postgres.h:69
static Datum Int32GetDatum(int32 X)
Definition: postgres.h:217
uint16 StrategyNumber
Definition: stratnum.h:22
Definition: trgm.h:61
Definition: c.h:644
#define RegExpICaseStrategyNumber
Definition: trgm.h:34
#define WordSimilarityStrategyNumber
Definition: trgm.h:35
TRGM * generate_trgm(char *str, int slen)
Definition: trgm_op.c:359
#define StrictWordSimilarityStrategyNumber
Definition: trgm.h:37
uint32 trgm2int(trgm *ptr)
Definition: trgm_op.c:938
#define ARRNELEM(x)
Definition: trgm.h:101
double index_strategy_get_limit(StrategyNumber strategy)
Definition: trgm_op.c:135
TRGM * createTrgmNFA(text *text_re, Oid collation, TrgmPackedGraph **graph, MemoryContext rcontext)
Definition: trgm_regexp.c:524
#define SimilarityStrategyNumber
Definition: trgm.h:29
bool trigramsMatchGraph(TrgmPackedGraph *graph, bool *check)
Definition: trgm_regexp.c:628
char trgm[3]
Definition: trgm.h:41
#define ILikeStrategyNumber
Definition: trgm.h:32
TRGM * generate_wildcard_trgm(const char *str, int slen)
Definition: trgm_op.c:869
#define LikeStrategyNumber
Definition: trgm.h:31
#define GETARR(x)
Definition: trgm.h:100
#define EqualStrategyNumber
Definition: trgm.h:39
#define RegExpStrategyNumber
Definition: trgm.h:33
Datum gin_trgm_consistent(PG_FUNCTION_ARGS)
Definition: trgm_gin.c:170
Datum gin_trgm_triconsistent(PG_FUNCTION_ARGS)
Definition: trgm_gin.c:269
Datum gin_extract_value_trgm(PG_FUNCTION_ARGS)
Definition: trgm_gin.c:35
Datum gin_extract_query_trgm(PG_FUNCTION_ARGS)
Definition: trgm_gin.c:70
Datum gin_extract_trgm(PG_FUNCTION_ARGS)
Definition: trgm_gin.c:24
PG_FUNCTION_INFO_V1(gin_extract_trgm)
#define VARDATA_ANY(PTR)
Definition: varatt.h:324
#define VARSIZE_ANY_EXHDR(PTR)
Definition: varatt.h:317