PostgreSQL Source Code git master
Loading...
Searching...
No Matches
trgm_gin.c
Go to the documentation of this file.
1/*
2 * contrib/pg_trgm/trgm_gin.c
3 */
4#include "postgres.h"
5
6#include "access/gin.h"
7#include "access/stratnum.h"
8#include "fmgr.h"
9#include "trgm.h"
10#include "varatt.h"
11
17
18/*
19 * This function can only be called if a pre-9.1 version of the GIN operator
20 * class definition is present in the catalogs (probably as a consequence
21 * of upgrade-in-place). Cope.
22 */
25{
26 if (PG_NARGS() == 3)
27 return gin_extract_value_trgm(fcinfo);
28 if (PG_NARGS() == 7)
29 return gin_extract_query_trgm(fcinfo);
30 elog(ERROR, "unexpected number of arguments to gin_extract_trgm");
32}
33
36{
38 int32 *nentries = (int32 *) PG_GETARG_POINTER(1);
39 Datum *entries = NULL;
40 TRGM *trg;
42
43 *nentries = 0;
44
46 trglen = ARRNELEM(trg);
47
48 if (trglen > 0)
49 {
50 trgm *ptr;
51 int32 i;
52
53 *nentries = trglen;
54 entries = palloc_array(Datum, trglen);
55
56 ptr = GETARR(trg);
57 for (i = 0; i < trglen; i++)
58 {
59 int32 item = trgm2int(ptr);
60
61 entries[i] = Int32GetDatum(item);
62 ptr++;
63 }
64 }
65
66 PG_RETURN_POINTER(entries);
67}
68
71{
73 int32 *nentries = (int32 *) PG_GETARG_POINTER(1);
74 StrategyNumber strategy = PG_GETARG_UINT16(2);
75#ifdef NOT_USED
76 bool **pmatch = (bool **) PG_GETARG_POINTER(3);
77#endif
78 Pointer **extra_data = (Pointer **) PG_GETARG_POINTER(4);
79#ifdef NOT_USED
80 bool **nullFlags = (bool **) PG_GETARG_POINTER(5);
81#endif
82 int32 *searchMode = (int32 *) PG_GETARG_POINTER(6);
83 Datum *entries = NULL;
84 TRGM *trg;
86 trgm *ptr;
87 TrgmPackedGraph *graph;
88 int32 i;
89
90 switch (strategy)
91 {
97 break;
99#ifndef IGNORECASE
100 elog(ERROR, "cannot handle ~~* with case-sensitive trigrams");
101#endif
102 /* FALL THRU */
104
105 /*
106 * For wildcard search we extract all the trigrams that every
107 * potentially-matching string must include.
108 */
111 break;
113#ifndef IGNORECASE
114 elog(ERROR, "cannot handle ~* with case-sensitive trigrams");
115#endif
116 /* FALL THRU */
119 &graph, CurrentMemoryContext);
120 if (trg && ARRNELEM(trg) > 0)
121 {
122 /*
123 * Successful regex processing: store NFA-like graph as
124 * extra_data. GIN API requires an array of nentries
125 * Pointers, but we just put the same value in each element.
126 */
127 trglen = ARRNELEM(trg);
128 *extra_data = palloc_array(Pointer, trglen);
129 for (i = 0; i < trglen; i++)
130 (*extra_data)[i] = (Pointer) graph;
131 }
132 else
133 {
134 /* No result: have to do full index scan. */
135 *nentries = 0;
136 *searchMode = GIN_SEARCH_MODE_ALL;
137 PG_RETURN_POINTER(entries);
138 }
139 break;
140 default:
141 elog(ERROR, "unrecognized strategy number: %d", strategy);
142 trg = NULL; /* keep compiler quiet */
143 break;
144 }
145
146 trglen = ARRNELEM(trg);
147 *nentries = trglen;
148
149 if (trglen > 0)
150 {
151 entries = palloc_array(Datum, trglen);
152 ptr = GETARR(trg);
153 for (i = 0; i < trglen; i++)
154 {
155 int32 item = trgm2int(ptr);
156
157 entries[i] = Int32GetDatum(item);
158 ptr++;
159 }
160 }
161
162 /*
163 * If no trigram was extracted then we have to scan all the index.
164 */
165 if (trglen == 0)
166 *searchMode = GIN_SEARCH_MODE_ALL;
167
168 PG_RETURN_POINTER(entries);
169}
170
171Datum
173{
174 bool *check = (bool *) PG_GETARG_POINTER(0);
175 StrategyNumber strategy = PG_GETARG_UINT16(1);
176#ifdef NOT_USED
177 text *query = PG_GETARG_TEXT_PP(2);
178#endif
179 int32 nkeys = PG_GETARG_INT32(3);
180 Pointer *extra_data = (Pointer *) PG_GETARG_POINTER(4);
181 bool *recheck = (bool *) PG_GETARG_POINTER(5);
182 bool res;
183 int32 i,
184 ntrue;
185 double nlimit;
186
187 /* All cases served by this function are inexact */
188 *recheck = true;
189
190 switch (strategy)
191 {
196
197 /* Count the matches */
198 ntrue = 0;
199 for (i = 0; i < nkeys; i++)
200 {
201 if (check[i])
202 ntrue++;
203 }
204
205 /*--------------------
206 * If DIVUNION is defined then similarity formula is:
207 * c / (len1 + len2 - c)
208 * where c is number of common trigrams and it stands as ntrue in
209 * this code. Here we don't know value of len2 but we can assume
210 * that c (ntrue) is a lower bound of len2, so upper bound of
211 * similarity is:
212 * c / (len1 + c - c) => c / len1
213 * If DIVUNION is not defined then similarity formula is:
214 * c / max(len1, len2)
215 * And again, c (ntrue) is a lower bound of len2, but c <= len1
216 * just by definition and, consequently, upper bound of
217 * similarity is just c / len1.
218 * So, independently on DIVUNION the upper bound formula is the same.
219 */
220 res = (nkeys == 0) ? false :
221 (((((float4) ntrue) / ((float4) nkeys))) >= nlimit);
222 break;
224#ifndef IGNORECASE
225 elog(ERROR, "cannot handle ~~* with case-sensitive trigrams");
226#endif
227 /* FALL THRU */
230 /* Check if all extracted trigrams are presented. */
231 res = true;
232 for (i = 0; i < nkeys; i++)
233 {
234 if (!check[i])
235 {
236 res = false;
237 break;
238 }
239 }
240 break;
242#ifndef IGNORECASE
243 elog(ERROR, "cannot handle ~* with case-sensitive trigrams");
244#endif
245 /* FALL THRU */
247 if (nkeys < 1)
248 {
249 /* Regex processing gave no result: do full index scan */
250 res = true;
251 }
252 else
253 res = trigramsMatchGraph(extra_data[0], check);
254 break;
255 default:
256 elog(ERROR, "unrecognized strategy number: %d", strategy);
257 res = false; /* keep compiler quiet */
258 break;
259 }
260
261 PG_RETURN_BOOL(res);
262}
263
264/*
265 * In all cases, GIN_TRUE is at least as favorable to inclusion as
266 * GIN_MAYBE. If no better option is available, simply treat
267 * GIN_MAYBE as if it were GIN_TRUE and apply the same test as the binary
268 * consistent function.
269 */
270Datum
272{
274 StrategyNumber strategy = PG_GETARG_UINT16(1);
275#ifdef NOT_USED
276 text *query = PG_GETARG_TEXT_PP(2);
277#endif
278 int32 nkeys = PG_GETARG_INT32(3);
279 Pointer *extra_data = (Pointer *) PG_GETARG_POINTER(4);
281 int32 i,
282 ntrue;
283 bool *boolcheck;
284 double nlimit;
285
286 switch (strategy)
287 {
292
293 /* Count the matches */
294 ntrue = 0;
295 for (i = 0; i < nkeys; i++)
296 {
297 if (check[i] != GIN_FALSE)
298 ntrue++;
299 }
300
301 /*
302 * See comment in gin_trgm_consistent() about * upper bound
303 * formula
304 */
305 res = (nkeys == 0)
306 ? GIN_FALSE : (((((float4) ntrue) / ((float4) nkeys)) >= nlimit)
307 ? GIN_MAYBE : GIN_FALSE);
308 break;
310#ifndef IGNORECASE
311 elog(ERROR, "cannot handle ~~* with case-sensitive trigrams");
312#endif
313 /* FALL THRU */
316 /* Check if all extracted trigrams are presented. */
317 res = GIN_MAYBE;
318 for (i = 0; i < nkeys; i++)
319 {
320 if (check[i] == GIN_FALSE)
321 {
322 res = GIN_FALSE;
323 break;
324 }
325 }
326 break;
328#ifndef IGNORECASE
329 elog(ERROR, "cannot handle ~* with case-sensitive trigrams");
330#endif
331 /* FALL THRU */
333 if (nkeys < 1)
334 {
335 /* Regex processing gave no result: do full index scan */
336 res = GIN_MAYBE;
337 }
338 else
339 {
340 /*
341 * As trigramsMatchGraph implements a monotonic boolean
342 * function, promoting all GIN_MAYBE keys to GIN_TRUE will
343 * give a conservative result.
344 */
345 boolcheck = palloc_array(bool, nkeys);
346 for (i = 0; i < nkeys; i++)
347 boolcheck[i] = (check[i] != GIN_FALSE);
348 if (!trigramsMatchGraph(extra_data[0], boolcheck))
349 res = GIN_FALSE;
351 }
352 break;
353 default:
354 elog(ERROR, "unrecognized strategy number: %d", strategy);
355 res = GIN_FALSE; /* keep compiler quiet */
356 break;
357 }
358
359 /* All cases served by this function are inexact */
360 Assert(res != GIN_TRUE);
362}
#define Assert(condition)
Definition c.h:873
int32_t int32
Definition c.h:542
float float4
Definition c.h:643
void * Pointer
Definition c.h:537
#define ERROR
Definition elog.h:39
#define elog(elevel,...)
Definition elog.h:226
#define palloc_array(type, count)
Definition fe_memutils.h:76
#define PG_GETARG_TEXT_PP(n)
Definition fmgr.h:310
#define PG_GETARG_POINTER(n)
Definition fmgr.h:277
#define PG_NARGS()
Definition fmgr.h:203
#define PG_RETURN_NULL()
Definition fmgr.h:346
#define PG_FUNCTION_INFO_V1(funcname)
Definition fmgr.h:417
#define PG_GETARG_UINT16(n)
Definition fmgr.h:272
#define PG_GETARG_INT32(n)
Definition fmgr.h:269
#define PG_RETURN_POINTER(x)
Definition fmgr.h:363
#define PG_GET_COLLATION()
Definition fmgr.h:198
#define PG_FUNCTION_ARGS
Definition fmgr.h:193
#define PG_RETURN_BOOL(x)
Definition fmgr.h:360
#define PG_RETURN_GIN_TERNARY_VALUE(x)
Definition gin.h:92
#define GIN_SEARCH_MODE_ALL
Definition gin.h:38
#define GIN_FALSE
Definition gin.h:76
char GinTernaryValue
Definition gin.h:71
#define GIN_MAYBE
Definition gin.h:78
#define GIN_TRUE
Definition gin.h:77
long val
Definition informix.c:689
int i
Definition isn.c:77
void pfree(void *pointer)
Definition mcxt.c:1616
MemoryContext CurrentMemoryContext
Definition mcxt.c:160
uint64_t Datum
Definition postgres.h:70
static Datum Int32GetDatum(int32 X)
Definition postgres.h:222
static int fb(int x)
uint16 StrategyNumber
Definition stratnum.h:22
Definition trgm.h:58
Definition c.h:706
#define RegExpICaseStrategyNumber
Definition trgm.h:34
#define WordSimilarityStrategyNumber
Definition trgm.h:35
TRGM * generate_trgm(char *str, int slen)
Definition trgm_op.c:406
#define StrictWordSimilarityStrategyNumber
Definition trgm.h:37
uint32 trgm2int(trgm *ptr)
Definition trgm_op.c:985
#define ARRNELEM(x)
Definition trgm.h:98
double index_strategy_get_limit(StrategyNumber strategy)
Definition trgm_op.c:182
TRGM * createTrgmNFA(text *text_re, Oid collation, TrgmPackedGraph **graph, MemoryContext rcontext)
#define SimilarityStrategyNumber
Definition trgm.h:29
bool trigramsMatchGraph(TrgmPackedGraph *graph, bool *check)
char trgm[3]
Definition trgm.h:41
#define ILikeStrategyNumber
Definition trgm.h:32
TRGM * generate_wildcard_trgm(const char *str, int slen)
Definition trgm_op.c:916
#define LikeStrategyNumber
Definition trgm.h:31
#define GETARR(x)
Definition trgm.h:97
#define EqualStrategyNumber
Definition trgm.h:39
#define RegExpStrategyNumber
Definition trgm.h:33
Datum gin_trgm_consistent(PG_FUNCTION_ARGS)
Definition trgm_gin.c:172
Datum gin_trgm_triconsistent(PG_FUNCTION_ARGS)
Definition trgm_gin.c:271
Datum gin_extract_value_trgm(PG_FUNCTION_ARGS)
Definition trgm_gin.c:35
Datum gin_extract_query_trgm(PG_FUNCTION_ARGS)
Definition trgm_gin.c:70
Datum gin_extract_trgm(PG_FUNCTION_ARGS)
Definition trgm_gin.c:24
static Size VARSIZE_ANY_EXHDR(const void *PTR)
Definition varatt.h:472
static char * VARDATA_ANY(const void *PTR)
Definition varatt.h:486