PostgreSQL Source Code git master
Loading...
Searching...
No Matches
trgm_gist.c File Reference
#include "postgres.h"
#include "access/reloptions.h"
#include "access/stratnum.h"
#include "fmgr.h"
#include "port/pg_bitutils.h"
#include "trgm.h"
#include "varatt.h"
Include dependency graph for trgm_gist.c:

Go to the source code of this file.

Data Structures

struct  TrgmGistOptions
 
struct  gtrgm_consistent_cache
 
struct  CACHESIGN
 
struct  SPLITCOST
 

Macros

#define GET_SIGLEN()
 
#define GETENTRY(vec, pos)   ((TRGM *) DatumGetPointer((vec)->vector[(pos)].key))
 
#define WISH_F(a, b, c)   (double)( -(double)(((a)-(b))*((a)-(b))*((a)-(b)))*(c) )
 

Functions

 PG_FUNCTION_INFO_V1 (gtrgm_in)
 
 PG_FUNCTION_INFO_V1 (gtrgm_out)
 
 PG_FUNCTION_INFO_V1 (gtrgm_compress)
 
 PG_FUNCTION_INFO_V1 (gtrgm_decompress)
 
 PG_FUNCTION_INFO_V1 (gtrgm_consistent)
 
 PG_FUNCTION_INFO_V1 (gtrgm_distance)
 
 PG_FUNCTION_INFO_V1 (gtrgm_union)
 
 PG_FUNCTION_INFO_V1 (gtrgm_same)
 
 PG_FUNCTION_INFO_V1 (gtrgm_penalty)
 
 PG_FUNCTION_INFO_V1 (gtrgm_picksplit)
 
 PG_FUNCTION_INFO_V1 (gtrgm_options)
 
Datum gtrgm_in (PG_FUNCTION_ARGS)
 
Datum gtrgm_out (PG_FUNCTION_ARGS)
 
static TRGMgtrgm_alloc (bool isalltrue, int siglen, BITVECP sign)
 
static void makesign (BITVECP sign, TRGM *a, int siglen)
 
Datum gtrgm_compress (PG_FUNCTION_ARGS)
 
Datum gtrgm_decompress (PG_FUNCTION_ARGS)
 
static int32 cnt_sml_sign_common (TRGM *qtrg, BITVECP sign, int siglen)
 
Datum gtrgm_consistent (PG_FUNCTION_ARGS)
 
Datum gtrgm_distance (PG_FUNCTION_ARGS)
 
static int32 unionkey (BITVECP sbase, TRGM *add, int siglen)
 
Datum gtrgm_union (PG_FUNCTION_ARGS)
 
Datum gtrgm_same (PG_FUNCTION_ARGS)
 
static int32 sizebitvec (BITVECP sign, int siglen)
 
static int hemdistsign (BITVECP a, BITVECP b, int siglen)
 
static int hemdist (TRGM *a, TRGM *b, int siglen)
 
Datum gtrgm_penalty (PG_FUNCTION_ARGS)
 
static void fillcache (CACHESIGN *item, TRGM *key, BITVECP sign, int siglen)
 
static int comparecost (const void *a, const void *b)
 
static int hemdistcache (CACHESIGN *a, CACHESIGN *b, int siglen)
 
Datum gtrgm_picksplit (PG_FUNCTION_ARGS)
 
Datum gtrgm_options (PG_FUNCTION_ARGS)
 

Macro Definition Documentation

◆ GET_SIGLEN

#define GET_SIGLEN ( )
Value:
#define PG_GET_OPCLASS_OPTIONS()
Definition fmgr.h:343
#define PG_HAS_OPCLASS_OPTIONS()
Definition fmgr.h:342
#define SIGLEN_DEFAULT
Definition hstore_gist.c:23

Definition at line 20 of file trgm_gist.c.

21 : \

◆ GETENTRY

#define GETENTRY (   vec,
  pos 
)    ((TRGM *) DatumGetPointer((vec)->vector[(pos)].key))

Definition at line 40 of file trgm_gist.c.

◆ WISH_F

#define WISH_F (   a,
  b,
  c 
)    (double)( -(double)(((a)-(b))*((a)-(b))*((a)-(b)))*(c) )

Definition at line 763 of file trgm_gist.c.

Function Documentation

◆ cnt_sml_sign_common()

static int32 cnt_sml_sign_common ( TRGM qtrg,
BITVECP  sign,
int  siglen 
)
static

Definition at line 179 of file trgm_gist.c.

180{
181 int32 count = 0;
182 int32 k,
183 len = ARRNELEM(qtrg);
184 trgm *ptr = GETARR(qtrg);
185 int32 tmp = 0;
186
187 for (k = 0; k < len; k++)
188 {
189 CPTRGM(&tmp, ptr + k);
190 count += GETBIT(sign, HASHVAL(tmp, siglen));
191 }
192
193 return count;
194}
#define GETBIT(x, i)
Definition blutils.c:30
int32_t int32
Definition c.h:542
#define HASHVAL(val, siglen)
Definition hstore_gist.c:45
char sign
Definition informix.c:693
const void size_t len
static int fb(int x)
#define ARRNELEM(x)
Definition trgm.h:98
char trgm[3]
Definition trgm.h:41
#define CPTRGM(a, b)
Definition trgm.h:43
#define GETARR(x)
Definition trgm.h:97

References ARRNELEM, CPTRGM, fb(), GETARR, GETBIT, HASHVAL, len, and sign.

Referenced by gtrgm_consistent(), and gtrgm_distance().

◆ comparecost()

static int comparecost ( const void a,
const void b 
)
static

Definition at line 771 of file trgm_gist.c.

772{
773 if (((const SPLITCOST *) a)->cost == ((const SPLITCOST *) b)->cost)
774 return 0;
775 else
776 return (((const SPLITCOST *) a)->cost > ((const SPLITCOST *) b)->cost) ? 1 : -1;
777}
int b
Definition isn.c:74
int a
Definition isn.c:73

References a, and b.

Referenced by gtrgm_picksplit().

◆ fillcache()

static void fillcache ( CACHESIGN item,
TRGM key,
BITVECP  sign,
int  siglen 
)
static

Definition at line 751 of file trgm_gist.c.

752{
753 item->allistrue = false;
754 item->sign = sign;
755 if (ISARRKEY(key))
756 makesign(item->sign, key, siglen);
757 else if (ISALLTRUE(key))
758 item->allistrue = true;
759 else
760 memcpy(item->sign, GETSIGN(key), siglen);
761}
#define ISALLTRUE(x)
Definition hstore_gist.c:57
#define GETSIGN(x)
Definition hstore_gist.c:62
BITVECP sign
Definition trgm_gist.c:747
bool allistrue
Definition trgm_gist.c:746
#define ISARRKEY(x)
Definition trgm.h:91
static void makesign(BITVECP sign, TRGM *a, int siglen)
Definition trgm_gist.c:98

References CACHESIGN::allistrue, fb(), GETSIGN, ISALLTRUE, ISARRKEY, makesign(), CACHESIGN::sign, and sign.

Referenced by gtrgm_picksplit().

◆ gtrgm_alloc()

static TRGM * gtrgm_alloc ( bool  isalltrue,
int  siglen,
BITVECP  sign 
)
static

Definition at line 77 of file trgm_gist.c.

78{
79 int flag = SIGNKEY | (isalltrue ? ALLISTRUE : 0);
80 int size = CALCGTSIZE(flag, siglen);
81 TRGM *res = palloc(size);
82
83 SET_VARSIZE(res, size);
84 res->flag = flag;
85
86 if (!isalltrue)
87 {
88 if (sign)
89 memcpy(GETSIGN(res), sign, siglen);
90 else
91 memset(GETSIGN(res), 0, siglen);
92 }
93
94 return res;
95}
#define ALLISTRUE
Definition hstore_gist.c:55
#define CALCGTSIZE(flag, siglen)
Definition hstore_gist.c:60
void * palloc(Size size)
Definition mcxt.c:1387
Definition trgm.h:58
uint8 flag
Definition trgm.h:60
char * flag(int b)
Definition test-ctype.c:33
#define SIGNKEY
Definition trgm.h:88
static void SET_VARSIZE(void *PTR, Size len)
Definition varatt.h:432

References ALLISTRUE, CALCGTSIZE, fb(), TRGM::flag, flag(), GETSIGN, palloc(), SET_VARSIZE(), sign, and SIGNKEY.

Referenced by gtrgm_compress(), gtrgm_picksplit(), and gtrgm_union().

◆ gtrgm_compress()

Datum gtrgm_compress ( PG_FUNCTION_ARGS  )

Definition at line 115 of file trgm_gist.c.

116{
117 GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
118 int siglen = GET_SIGLEN();
119 GISTENTRY *retval = entry;
120
121 if (entry->leafkey)
122 { /* trgm */
123 TRGM *res;
124 text *val = DatumGetTextPP(entry->key);
125
127 retval = palloc_object(GISTENTRY);
128 gistentryinit(*retval, PointerGetDatum(res),
129 entry->rel, entry->page,
130 entry->offset, false);
131 }
132 else if (ISSIGNKEY(DatumGetPointer(entry->key)) &&
133 !ISALLTRUE(DatumGetPointer(entry->key)))
134 {
135 int32 i;
136 TRGM *res;
138
139 LOOPBYTE(siglen)
140 {
141 if ((sign[i] & 0xff) != 0xff)
142 PG_RETURN_POINTER(retval);
143 }
144
145 res = gtrgm_alloc(true, siglen, sign);
146 retval = palloc_object(GISTENTRY);
147 gistentryinit(*retval, PointerGetDatum(res),
148 entry->rel, entry->page,
149 entry->offset, false);
150 }
151 PG_RETURN_POINTER(retval);
152}
#define palloc_object(type)
Definition fe_memutils.h:74
#define DatumGetTextPP(X)
Definition fmgr.h:293
#define PG_GETARG_POINTER(n)
Definition fmgr.h:277
#define PG_RETURN_POINTER(x)
Definition fmgr.h:363
#define gistentryinit(e, k, r, pg, o, l)
Definition gist.h:245
#define LOOPBYTE(siglen)
Definition hstore_gist.c:33
char * BITVECP
Definition hstore_gist.c:31
long val
Definition informix.c:689
int i
Definition isn.c:77
static Datum PointerGetDatum(const void *X)
Definition postgres.h:352
static Pointer DatumGetPointer(Datum X)
Definition postgres.h:342
OffsetNumber offset
Definition gist.h:164
Datum key
Definition gist.h:161
Page page
Definition gist.h:163
Relation rel
Definition gist.h:162
bool leafkey
Definition gist.h:165
Definition c.h:706
TRGM * generate_trgm(char *str, int slen)
Definition trgm_op.c:406
#define ISSIGNKEY(x)
Definition trgm.h:92
static TRGM * gtrgm_alloc(bool isalltrue, int siglen, BITVECP sign)
Definition trgm_gist.c:77
#define GET_SIGLEN()
Definition trgm_gist.c:20
static Size VARSIZE_ANY_EXHDR(const void *PTR)
Definition varatt.h:472
static char * VARDATA_ANY(const void *PTR)
Definition varatt.h:486

References DatumGetPointer(), DatumGetTextPP, generate_trgm(), GET_SIGLEN, GETSIGN, gistentryinit, gtrgm_alloc(), i, ISALLTRUE, ISSIGNKEY, GISTENTRY::key, GISTENTRY::leafkey, LOOPBYTE, GISTENTRY::offset, GISTENTRY::page, palloc_object, PG_GETARG_POINTER, PG_RETURN_POINTER, PointerGetDatum(), GISTENTRY::rel, sign, val, VARDATA_ANY(), and VARSIZE_ANY_EXHDR().

◆ gtrgm_consistent()

Datum gtrgm_consistent ( PG_FUNCTION_ARGS  )

Definition at line 197 of file trgm_gist.c.

198{
199 GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
200 text *query = PG_GETARG_TEXT_P(1);
202#ifdef NOT_USED
203 Oid subtype = PG_GETARG_OID(3);
204#endif
205 bool *recheck = (bool *) PG_GETARG_POINTER(4);
206 int siglen = GET_SIGLEN();
207 TRGM *key = (TRGM *) DatumGetPointer(entry->key);
208 TRGM *qtrg;
209 bool res;
210 Size querysize = VARSIZE(query);
212 double nlimit;
213
214 /*
215 * We keep the extracted trigrams in cache, because trigram extraction is
216 * relatively CPU-expensive. When trying to reuse a cached value, check
217 * strategy number not just query itself, because trigram extraction
218 * depends on strategy.
219 *
220 * The cached structure is a single palloc chunk containing the
221 * gtrgm_consistent_cache header, then the input query (4-byte length
222 * word, uncompressed, starting at a MAXALIGN boundary), then the TRGM
223 * value (also starting at a MAXALIGN boundary). However we don't try to
224 * include the regex graph (if any) in that struct. (XXX currently, this
225 * approach can leak regex graphs across index rescans. Not clear if
226 * that's worth fixing.)
227 */
228 cache = (gtrgm_consistent_cache *) fcinfo->flinfo->fn_extra;
229 if (cache == NULL ||
230 cache->strategy != strategy ||
231 VARSIZE(cache->query) != querysize ||
232 memcmp(cache->query, query, querysize) != 0)
233 {
235 TrgmPackedGraph *graph = NULL;
237
238 switch (strategy)
239 {
244 qtrg = generate_trgm(VARDATA(query),
246 break;
248#ifndef IGNORECASE
249 elog(ERROR, "cannot handle ~~* with case-sensitive trigrams");
250#endif
251 /* FALL THRU */
255 break;
257#ifndef IGNORECASE
258 elog(ERROR, "cannot handle ~* with case-sensitive trigrams");
259#endif
260 /* FALL THRU */
263 &graph, fcinfo->flinfo->fn_mcxt);
264 /* just in case an empty array is returned ... */
265 if (qtrg && ARRNELEM(qtrg) <= 0)
266 {
267 pfree(qtrg);
268 qtrg = NULL;
269 }
270 break;
271 default:
272 elog(ERROR, "unrecognized strategy number: %d", strategy);
273 qtrg = NULL; /* keep compiler quiet */
274 break;
275 }
276
277 qtrgsize = qtrg ? VARSIZE(qtrg) : 0;
278
280 MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
283 qtrgsize);
284
285 newcache->strategy = strategy;
286 newcache->query = (text *)
287 ((char *) newcache + MAXALIGN(sizeof(gtrgm_consistent_cache)));
288 memcpy(newcache->query, query, querysize);
289 if (qtrg)
290 {
291 newcache->trigrams = (TRGM *)
292 ((char *) newcache->query + MAXALIGN(querysize));
293 memcpy((char *) newcache->trigrams, qtrg, qtrgsize);
294 /* release qtrg in case it was made in fn_mcxt */
295 pfree(qtrg);
296 }
297 else
298 newcache->trigrams = NULL;
299 newcache->graph = graph;
300
301 if (cache)
302 pfree(cache);
303 fcinfo->flinfo->fn_extra = newcache;
304 cache = newcache;
305 }
306
307 qtrg = cache->trigrams;
308
309 switch (strategy)
310 {
314
315 /*
316 * Similarity search is exact. (Strict) word similarity search is
317 * inexact
318 */
319 *recheck = (strategy != SimilarityStrategyNumber);
320
322
323 if (GIST_LEAF(entry))
324 { /* all leafs contains orig trgm */
325 double tmpsml = cnt_sml(qtrg, key, *recheck);
326
327 res = (tmpsml >= nlimit);
328 }
329 else if (ISALLTRUE(key))
330 { /* non-leaf contains signature */
331 res = true;
332 }
333 else
334 { /* non-leaf contains signature */
335 int32 count = cnt_sml_sign_common(qtrg, GETSIGN(key), siglen);
337
338 if (len == 0)
339 res = false;
340 else
341 res = (((((float8) count) / ((float8) len))) >= nlimit);
342 }
343 break;
345#ifndef IGNORECASE
346 elog(ERROR, "cannot handle ~~* with case-sensitive trigrams");
347#endif
348 /* FALL THRU */
351 /* Wildcard and equal search are inexact */
352 *recheck = true;
353
354 /*
355 * Check if all the extracted trigrams can be present in child
356 * nodes.
357 */
358 if (GIST_LEAF(entry))
359 { /* all leafs contains orig trgm */
360 res = trgm_contained_by(qtrg, key);
361 }
362 else if (ISALLTRUE(key))
363 { /* non-leaf contains signature */
364 res = true;
365 }
366 else
367 { /* non-leaf contains signature */
368 int32 k,
369 tmp = 0,
370 len = ARRNELEM(qtrg);
371 trgm *ptr = GETARR(qtrg);
372 BITVECP sign = GETSIGN(key);
373
374 res = true;
375 for (k = 0; k < len; k++)
376 {
377 CPTRGM(&tmp, ptr + k);
378 if (!GETBIT(sign, HASHVAL(tmp, siglen)))
379 {
380 res = false;
381 break;
382 }
383 }
384 }
385 break;
387#ifndef IGNORECASE
388 elog(ERROR, "cannot handle ~* with case-sensitive trigrams");
389#endif
390 /* FALL THRU */
392 /* Regexp search is inexact */
393 *recheck = true;
394
395 /* Check regex match as much as we can with available info */
396 if (qtrg)
397 {
398 if (GIST_LEAF(entry))
399 { /* all leafs contains orig trgm */
400 bool *check;
401
402 check = trgm_presence_map(qtrg, key);
403 res = trigramsMatchGraph(cache->graph, check);
404 pfree(check);
405 }
406 else if (ISALLTRUE(key))
407 { /* non-leaf contains signature */
408 res = true;
409 }
410 else
411 { /* non-leaf contains signature */
412 int32 k,
413 tmp = 0,
414 len = ARRNELEM(qtrg);
415 trgm *ptr = GETARR(qtrg);
416 BITVECP sign = GETSIGN(key);
417 bool *check;
418
419 /*
420 * GETBIT() tests may give false positives, due to limited
421 * size of the sign array. But since trigramsMatchGraph()
422 * implements a monotone boolean function, false positives
423 * in the check array can't lead to false negative answer.
424 * So we can apply trigramsMatchGraph despite uncertainty,
425 * and that usefully improves the quality of the search.
426 */
427 check = (bool *) palloc(len * sizeof(bool));
428 for (k = 0; k < len; k++)
429 {
430 CPTRGM(&tmp, ptr + k);
431 check[k] = GETBIT(sign, HASHVAL(tmp, siglen));
432 }
433 res = trigramsMatchGraph(cache->graph, check);
434 pfree(check);
435 }
436 }
437 else
438 {
439 /* trigram-free query must be rechecked everywhere */
440 res = true;
441 }
442 break;
443 default:
444 elog(ERROR, "unrecognized strategy number: %d", strategy);
445 res = false; /* keep compiler quiet */
446 break;
447 }
448
449 PG_RETURN_BOOL(res);
450}
#define MAXALIGN(LEN)
Definition c.h:826
#define VARHDRSZ
Definition c.h:711
double float8
Definition c.h:644
size_t Size
Definition c.h:619
#define ERROR
Definition elog.h:39
#define elog(elevel,...)
Definition elog.h:226
#define PG_GETARG_OID(n)
Definition fmgr.h:275
#define PG_GETARG_UINT16(n)
Definition fmgr.h:272
#define PG_GET_COLLATION()
Definition fmgr.h:198
#define PG_RETURN_BOOL(x)
Definition fmgr.h:360
#define PG_GETARG_TEXT_P(n)
Definition fmgr.h:337
#define GIST_LEAF(entry)
Definition gist.h:171
void * MemoryContextAlloc(MemoryContext context, Size size)
Definition mcxt.c:1232
void pfree(void *pointer)
Definition mcxt.c:1616
unsigned int Oid
uint16 StrategyNumber
Definition stratnum.h:22
#define RegExpICaseStrategyNumber
Definition trgm.h:34
#define WordSimilarityStrategyNumber
Definition trgm.h:35
#define StrictWordSimilarityStrategyNumber
Definition trgm.h:37
bool * trgm_presence_map(TRGM *query, TRGM *key)
Definition trgm_op.c:1128
double index_strategy_get_limit(StrategyNumber strategy)
Definition trgm_op.c:182
TRGM * createTrgmNFA(text *text_re, Oid collation, TrgmPackedGraph **graph, MemoryContext rcontext)
#define SimilarityStrategyNumber
Definition trgm.h:29
bool trigramsMatchGraph(TrgmPackedGraph *graph, bool *check)
bool trgm_contained_by(TRGM *trg1, TRGM *trg2)
Definition trgm_op.c:1089
#define ILikeStrategyNumber
Definition trgm.h:32
TRGM * generate_wildcard_trgm(const char *str, int slen)
Definition trgm_op.c:916
float4 cnt_sml(TRGM *trg1, TRGM *trg2, bool inexact)
Definition trgm_op.c:1041
#define LikeStrategyNumber
Definition trgm.h:31
#define EqualStrategyNumber
Definition trgm.h:39
#define RegExpStrategyNumber
Definition trgm.h:33
static int32 cnt_sml_sign_common(TRGM *qtrg, BITVECP sign, int siglen)
Definition trgm_gist.c:179
static Size VARSIZE(const void *PTR)
Definition varatt.h:298
static char * VARDATA(const void *PTR)
Definition varatt.h:305

References ARRNELEM, cnt_sml(), cnt_sml_sign_common(), CPTRGM, createTrgmNFA(), DatumGetPointer(), elog, EqualStrategyNumber, ERROR, fb(), generate_trgm(), generate_wildcard_trgm(), GET_SIGLEN, GETARR, GETBIT, GETSIGN, GIST_LEAF, gtrgm_consistent_cache::graph, HASHVAL, ILikeStrategyNumber, index_strategy_get_limit(), ISALLTRUE, GISTENTRY::key, len, LikeStrategyNumber, MAXALIGN, MemoryContextAlloc(), palloc(), pfree(), PG_GET_COLLATION, PG_GETARG_OID, PG_GETARG_POINTER, PG_GETARG_TEXT_P, PG_GETARG_UINT16, PG_RETURN_BOOL, gtrgm_consistent_cache::query, RegExpICaseStrategyNumber, RegExpStrategyNumber, sign, SimilarityStrategyNumber, gtrgm_consistent_cache::strategy, StrictWordSimilarityStrategyNumber, trgm_contained_by(), trgm_presence_map(), gtrgm_consistent_cache::trigrams, trigramsMatchGraph(), VARDATA(), VARHDRSZ, VARSIZE(), and WordSimilarityStrategyNumber.

◆ gtrgm_decompress()

Datum gtrgm_decompress ( PG_FUNCTION_ARGS  )

Definition at line 155 of file trgm_gist.c.

156{
157 GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
158 GISTENTRY *retval;
159 text *key;
160
161 key = DatumGetTextPP(entry->key);
162
163 if (key != (text *) DatumGetPointer(entry->key))
164 {
165 /* need to pass back the decompressed item */
166 retval = palloc_object(GISTENTRY);
167 gistentryinit(*retval, PointerGetDatum(key),
168 entry->rel, entry->page, entry->offset, entry->leafkey);
169 PG_RETURN_POINTER(retval);
170 }
171 else
172 {
173 /* we can return the entry as-is */
174 PG_RETURN_POINTER(entry);
175 }
176}

References DatumGetPointer(), DatumGetTextPP, gistentryinit, GISTENTRY::key, GISTENTRY::leafkey, GISTENTRY::offset, GISTENTRY::page, palloc_object, PG_GETARG_POINTER, PG_RETURN_POINTER, PointerGetDatum(), and GISTENTRY::rel.

◆ gtrgm_distance()

Datum gtrgm_distance ( PG_FUNCTION_ARGS  )

Definition at line 453 of file trgm_gist.c.

454{
455 GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
456 text *query = PG_GETARG_TEXT_P(1);
458#ifdef NOT_USED
459 Oid subtype = PG_GETARG_OID(3);
460#endif
461 bool *recheck = (bool *) PG_GETARG_POINTER(4);
462 int siglen = GET_SIGLEN();
463 TRGM *key = (TRGM *) DatumGetPointer(entry->key);
464 TRGM *qtrg;
465 float8 res;
466 Size querysize = VARSIZE(query);
467 char *cache = (char *) fcinfo->flinfo->fn_extra;
468
469 /*
470 * Cache the generated trigrams across multiple calls with the same query.
471 */
472 if (cache == NULL ||
473 VARSIZE(cache) != querysize ||
474 memcmp(cache, query, querysize) != 0)
475 {
476 char *newcache;
477
479
480 newcache = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
482 VARSIZE(qtrg));
483
484 memcpy(newcache, query, querysize);
486
487 if (cache)
488 pfree(cache);
489 fcinfo->flinfo->fn_extra = newcache;
490 cache = newcache;
491 }
492
493 qtrg = (TRGM *) (cache + MAXALIGN(querysize));
494
495 switch (strategy)
496 {
500 /* Only plain trigram distance is exact */
501 *recheck = (strategy != DistanceStrategyNumber);
502 if (GIST_LEAF(entry))
503 { /* all leafs contains orig trgm */
504
505 /*
506 * Prevent gcc optimizing the sml variable using volatile
507 * keyword. Otherwise res can differ from the
508 * word_similarity_dist_op() function.
509 */
510 float4 volatile sml = cnt_sml(qtrg, key, *recheck);
511
512 res = 1.0 - sml;
513 }
514 else if (ISALLTRUE(key))
515 { /* all leafs contains orig trgm */
516 res = 0.0;
517 }
518 else
519 { /* non-leaf contains signature */
520 int32 count = cnt_sml_sign_common(qtrg, GETSIGN(key), siglen);
522
523 res = (len == 0) ? -1.0 : 1.0 - ((float8) count) / ((float8) len);
524 }
525 break;
526 default:
527 elog(ERROR, "unrecognized strategy number: %d", strategy);
528 res = 0; /* keep compiler quiet */
529 break;
530 }
531
532 PG_RETURN_FLOAT8(res);
533}
float float4
Definition c.h:643
#define PG_RETURN_FLOAT8(x)
Definition fmgr.h:369
#define DistanceStrategyNumber
Definition trgm.h:30
#define StrictWordDistanceStrategyNumber
Definition trgm.h:38
#define WordDistanceStrategyNumber
Definition trgm.h:36

References ARRNELEM, cnt_sml(), cnt_sml_sign_common(), DatumGetPointer(), DistanceStrategyNumber, elog, ERROR, fb(), generate_trgm(), GET_SIGLEN, GETSIGN, GIST_LEAF, ISALLTRUE, GISTENTRY::key, len, MAXALIGN, MemoryContextAlloc(), pfree(), PG_GETARG_OID, PG_GETARG_POINTER, PG_GETARG_TEXT_P, PG_GETARG_UINT16, PG_RETURN_FLOAT8, StrictWordDistanceStrategyNumber, VARDATA(), VARHDRSZ, VARSIZE(), and WordDistanceStrategyNumber.

◆ gtrgm_in()

Datum gtrgm_in ( PG_FUNCTION_ARGS  )

Definition at line 57 of file trgm_gist.c.

58{
61 errmsg("cannot accept a value of type %s", "gtrgm")));
62
63 PG_RETURN_VOID(); /* keep compiler quiet */
64}
int errcode(int sqlerrcode)
Definition elog.c:863
int errmsg(const char *fmt,...)
Definition elog.c:1080
#define ereport(elevel,...)
Definition elog.h:150
#define PG_RETURN_VOID()
Definition fmgr.h:350

References ereport, errcode(), errmsg(), ERROR, fb(), and PG_RETURN_VOID.

◆ gtrgm_options()

Datum gtrgm_options ( PG_FUNCTION_ARGS  )

Definition at line 962 of file trgm_gist.c.

963{
965
968 "signature length in bytes",
970 offsetof(TrgmGistOptions, siglen));
971
973}
#define SIGLEN_MAX
Definition hstore_gist.c:24
void init_local_reloptions(local_relopts *relopts, Size relopt_struct_size)
Definition reloptions.c:782
void add_local_int_reloption(local_relopts *relopts, const char *name, const char *desc, int default_val, int min_val, int max_val, int offset)

References add_local_int_reloption(), fb(), init_local_reloptions(), PG_GETARG_POINTER, PG_RETURN_VOID, SIGLEN_DEFAULT, and SIGLEN_MAX.

◆ gtrgm_out()

Datum gtrgm_out ( PG_FUNCTION_ARGS  )

Definition at line 67 of file trgm_gist.c.

68{
71 errmsg("cannot display a value of type %s", "gtrgm")));
72
73 PG_RETURN_VOID(); /* keep compiler quiet */
74}

References ereport, errcode(), errmsg(), ERROR, fb(), and PG_RETURN_VOID.

◆ gtrgm_penalty()

Datum gtrgm_penalty ( PG_FUNCTION_ARGS  )

Definition at line 689 of file trgm_gist.c.

690{
691 GISTENTRY *origentry = (GISTENTRY *) PG_GETARG_POINTER(0); /* always ISSIGNKEY */
693 float *penalty = (float *) PG_GETARG_POINTER(2);
694 int siglen = GET_SIGLEN();
698
699 *penalty = 0.0;
700
701 if (ISARRKEY(newval))
702 {
703 char *cache = (char *) fcinfo->flinfo->fn_extra;
704 TRGM *cachedVal = (TRGM *) (cache + MAXALIGN(siglen));
707
708 /*
709 * Cache the sign data across multiple calls with the same newval.
710 */
711 if (cache == NULL ||
714 {
715 char *newcache;
716
717 newcache = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
718 MAXALIGN(siglen) +
719 newvalsize);
720
721 makesign((BITVECP) newcache, newval, siglen);
722
723 cachedVal = (TRGM *) (newcache + MAXALIGN(siglen));
725
726 if (cache)
727 pfree(cache);
728 fcinfo->flinfo->fn_extra = newcache;
729 cache = newcache;
730 }
731
732 sign = (BITVECP) cache;
733
734 if (ISALLTRUE(origval))
735 *penalty = ((float) (SIGLENBIT(siglen) - sizebitvec(sign, siglen))) / (float) (SIGLENBIT(siglen) + 1);
736 else
737 *penalty = hemdistsign(sign, orig, siglen);
738 }
739 else
740 *penalty = hemdist(origval, newval, siglen);
741 PG_RETURN_POINTER(penalty);
742}
#define newval
#define SIGLENBIT(siglen)
Definition hstore_gist.c:25
static int32 sizebitvec(BITVECP sign, int siglen)
Definition trgm_gist.c:651
static int hemdistsign(BITVECP a, BITVECP b, int siglen)
Definition trgm_gist.c:657
static int hemdist(TRGM *a, TRGM *b, int siglen)
Definition trgm_gist.c:673

References DatumGetPointer(), fb(), GET_SIGLEN, GETSIGN, hemdist(), hemdistsign(), ISALLTRUE, ISARRKEY, makesign(), MAXALIGN, MemoryContextAlloc(), newval, pfree(), PG_GETARG_POINTER, PG_RETURN_POINTER, SIGLENBIT, sign, sizebitvec(), and VARSIZE().

◆ gtrgm_picksplit()

Datum gtrgm_picksplit ( PG_FUNCTION_ARGS  )

Definition at line 797 of file trgm_gist.c.

798{
800 OffsetNumber maxoff = entryvec->n - 1;
802 int siglen = GET_SIGLEN();
803 OffsetNumber k,
804 j;
805 TRGM *datum_l,
806 *datum_r;
808 union_r;
810 size_beta;
812 waste = -1;
813 int32 nbytes;
815 seed_2 = 0;
816 OffsetNumber *left,
817 *right;
818 BITVECP ptr;
819 int i;
820 CACHESIGN *cache;
821 char *cache_sign;
823
824 /* cache the sign data for each existing item */
825 cache = palloc_array(CACHESIGN, maxoff + 1);
826 cache_sign = palloc(siglen * (maxoff + 1));
827
828 for (k = FirstOffsetNumber; k <= maxoff; k = OffsetNumberNext(k))
829 fillcache(&cache[k], GETENTRY(entryvec, k), &cache_sign[siglen * k],
830 siglen);
831
832 /* now find the two furthest-apart items */
833 for (k = FirstOffsetNumber; k < maxoff; k = OffsetNumberNext(k))
834 {
835 for (j = OffsetNumberNext(k); j <= maxoff; j = OffsetNumberNext(j))
836 {
837 size_waste = hemdistcache(&(cache[j]), &(cache[k]), siglen);
838 if (size_waste > waste)
839 {
840 waste = size_waste;
841 seed_1 = k;
842 seed_2 = j;
843 }
844 }
845 }
846
847 /* just in case we didn't make a selection ... */
848 if (seed_1 == 0 || seed_2 == 0)
849 {
850 seed_1 = 1;
851 seed_2 = 2;
852 }
853
854 /* initialize the result vectors */
855 nbytes = maxoff * sizeof(OffsetNumber);
856 v->spl_left = left = (OffsetNumber *) palloc(nbytes);
857 v->spl_right = right = (OffsetNumber *) palloc(nbytes);
858 v->spl_nleft = 0;
859 v->spl_nright = 0;
860
861 /* form initial .. */
862 datum_l = gtrgm_alloc(cache[seed_1].allistrue, siglen, cache[seed_1].sign);
863 datum_r = gtrgm_alloc(cache[seed_2].allistrue, siglen, cache[seed_2].sign);
864
867
868 /* sort before ... */
870 for (j = FirstOffsetNumber; j <= maxoff; j = OffsetNumberNext(j))
871 {
872 costvector[j - 1].pos = j;
873 size_alpha = hemdistcache(&(cache[seed_1]), &(cache[j]), siglen);
874 size_beta = hemdistcache(&(cache[seed_2]), &(cache[j]), siglen);
875 costvector[j - 1].cost = abs(size_alpha - size_beta);
876 }
877 qsort(costvector, maxoff, sizeof(SPLITCOST), comparecost);
878
879 for (k = 0; k < maxoff; k++)
880 {
881 j = costvector[k].pos;
882 if (j == seed_1)
883 {
884 *left++ = j;
885 v->spl_nleft++;
886 continue;
887 }
888 else if (j == seed_2)
889 {
890 *right++ = j;
891 v->spl_nright++;
892 continue;
893 }
894
895 if (ISALLTRUE(datum_l) || cache[j].allistrue)
896 {
897 if (ISALLTRUE(datum_l) && cache[j].allistrue)
898 size_alpha = 0;
899 else
900 size_alpha = SIGLENBIT(siglen) -
901 sizebitvec((cache[j].allistrue) ? GETSIGN(datum_l) :
902 GETSIGN(cache[j].sign),
903 siglen);
904 }
905 else
906 size_alpha = hemdistsign(cache[j].sign, GETSIGN(datum_l), siglen);
907
908 if (ISALLTRUE(datum_r) || cache[j].allistrue)
909 {
910 if (ISALLTRUE(datum_r) && cache[j].allistrue)
911 size_beta = 0;
912 else
913 size_beta = SIGLENBIT(siglen) -
914 sizebitvec((cache[j].allistrue) ? GETSIGN(datum_r) :
915 GETSIGN(cache[j].sign),
916 siglen);
917 }
918 else
919 size_beta = hemdistsign(cache[j].sign, GETSIGN(datum_r), siglen);
920
921 if (size_alpha < size_beta + WISH_F(v->spl_nleft, v->spl_nright, 0.1))
922 {
923 if (ISALLTRUE(datum_l) || cache[j].allistrue)
924 {
925 if (!ISALLTRUE(datum_l))
926 memset(GETSIGN(datum_l), 0xff, siglen);
927 }
928 else
929 {
930 ptr = cache[j].sign;
931 LOOPBYTE(siglen)
932 union_l[i] |= ptr[i];
933 }
934 *left++ = j;
935 v->spl_nleft++;
936 }
937 else
938 {
939 if (ISALLTRUE(datum_r) || cache[j].allistrue)
940 {
941 if (!ISALLTRUE(datum_r))
942 memset(GETSIGN(datum_r), 0xff, siglen);
943 }
944 else
945 {
946 ptr = cache[j].sign;
947 LOOPBYTE(siglen)
948 union_r[i] |= ptr[i];
949 }
950 *right++ = j;
951 v->spl_nright++;
952 }
953 }
954
957
959}
#define palloc_array(type, count)
Definition fe_memutils.h:76
int j
Definition isn.c:78
#define OffsetNumberNext(offsetNumber)
Definition off.h:52
uint16 OffsetNumber
Definition off.h:24
#define FirstOffsetNumber
Definition off.h:27
#define qsort(a, b, c, d)
Definition port.h:495
int spl_nleft
Definition gist.h:144
OffsetNumber * spl_right
Definition gist.h:148
Datum spl_ldatum
Definition gist.h:145
Datum spl_rdatum
Definition gist.h:150
int spl_nright
Definition gist.h:149
OffsetNumber * spl_left
Definition gist.h:143
static int hemdistcache(CACHESIGN *a, CACHESIGN *b, int siglen)
Definition trgm_gist.c:781
static void fillcache(CACHESIGN *item, TRGM *key, BITVECP sign, int siglen)
Definition trgm_gist.c:751
#define WISH_F(a, b, c)
Definition trgm_gist.c:763
#define GETENTRY(vec, pos)
Definition trgm_gist.c:40
static int comparecost(const void *a, const void *b)
Definition trgm_gist.c:771

References comparecost(), fb(), fillcache(), FirstOffsetNumber, GET_SIGLEN, GETENTRY, GETSIGN, gtrgm_alloc(), hemdistcache(), hemdistsign(), i, ISALLTRUE, j, LOOPBYTE, GistEntryVector::n, OffsetNumberNext, palloc(), palloc_array, PG_GETARG_POINTER, PG_RETURN_POINTER, PointerGetDatum(), qsort, SIGLENBIT, CACHESIGN::sign, sign, sizebitvec(), GIST_SPLITVEC::spl_ldatum, GIST_SPLITVEC::spl_left, GIST_SPLITVEC::spl_nleft, GIST_SPLITVEC::spl_nright, GIST_SPLITVEC::spl_rdatum, GIST_SPLITVEC::spl_right, and WISH_F.

◆ gtrgm_same()

Datum gtrgm_same ( PG_FUNCTION_ARGS  )

Definition at line 592 of file trgm_gist.c.

593{
594 TRGM *a = (TRGM *) PG_GETARG_POINTER(0);
595 TRGM *b = (TRGM *) PG_GETARG_POINTER(1);
596 bool *result = (bool *) PG_GETARG_POINTER(2);
597 int siglen = GET_SIGLEN();
598
599 if (ISSIGNKEY(a))
600 { /* then b also ISSIGNKEY */
601 if (ISALLTRUE(a) && ISALLTRUE(b))
602 *result = true;
603 else if (ISALLTRUE(a))
604 *result = false;
605 else if (ISALLTRUE(b))
606 *result = false;
607 else
608 {
609 int32 i;
610 BITVECP sa = GETSIGN(a),
611 sb = GETSIGN(b);
612
613 *result = true;
614 LOOPBYTE(siglen)
615 {
616 if (sa[i] != sb[i])
617 {
618 *result = false;
619 break;
620 }
621 }
622 }
623 }
624 else
625 { /* a and b ISARRKEY */
626 int32 lena = ARRNELEM(a),
627 lenb = ARRNELEM(b);
628
629 if (lena != lenb)
630 *result = false;
631 else
632 {
633 trgm *ptra = GETARR(a),
634 *ptrb = GETARR(b);
635 int32 i;
636
637 *result = true;
638 for (i = 0; i < lena; i++)
639 if (CMPTRGM(ptra + i, ptrb + i))
640 {
641 *result = false;
642 break;
643 }
644 }
645 }
646
647 PG_RETURN_POINTER(result);
648}
int(* CMPTRGM)(const void *a, const void *b)
Definition trgm_op.c:49

References a, ARRNELEM, b, CMPTRGM, fb(), GET_SIGLEN, GETARR, GETSIGN, i, ISALLTRUE, ISSIGNKEY, LOOPBYTE, PG_GETARG_POINTER, and PG_RETURN_POINTER.

◆ gtrgm_union()

Datum gtrgm_union ( PG_FUNCTION_ARGS  )

Definition at line 566 of file trgm_gist.c.

567{
569 int32 len = entryvec->n;
570 int *size = (int *) PG_GETARG_POINTER(1);
571 int siglen = GET_SIGLEN();
572 int32 i;
573 TRGM *result = gtrgm_alloc(false, siglen, NULL);
574 BITVECP base = GETSIGN(result);
575
576 for (i = 0; i < len; i++)
577 {
578 if (unionkey(base, GETENTRY(entryvec, i), siglen))
579 {
580 result->flag = ALLISTRUE;
581 SET_VARSIZE(result, CALCGTSIZE(ALLISTRUE, siglen));
582 break;
583 }
584 }
585
586 *size = VARSIZE(result);
587
588 PG_RETURN_POINTER(result);
589}
static int32 unionkey(BITVECP sbase, TRGM *add, int siglen)
Definition trgm_gist.c:536

References ALLISTRUE, CALCGTSIZE, fb(), TRGM::flag, GET_SIGLEN, GETENTRY, GETSIGN, gtrgm_alloc(), i, len, GistEntryVector::n, PG_GETARG_POINTER, PG_RETURN_POINTER, SET_VARSIZE(), unionkey(), and VARSIZE().

◆ hemdist()

static int hemdist ( TRGM a,
TRGM b,
int  siglen 
)
static

Definition at line 673 of file trgm_gist.c.

674{
675 if (ISALLTRUE(a))
676 {
677 if (ISALLTRUE(b))
678 return 0;
679 else
680 return SIGLENBIT(siglen) - sizebitvec(GETSIGN(b), siglen);
681 }
682 else if (ISALLTRUE(b))
683 return SIGLENBIT(siglen) - sizebitvec(GETSIGN(a), siglen);
684
685 return hemdistsign(GETSIGN(a), GETSIGN(b), siglen);
686}

References a, b, GETSIGN, hemdistsign(), ISALLTRUE, SIGLENBIT, and sizebitvec().

Referenced by gtrgm_penalty().

◆ hemdistcache()

static int hemdistcache ( CACHESIGN a,
CACHESIGN b,
int  siglen 
)
static

Definition at line 781 of file trgm_gist.c.

782{
783 if (a->allistrue)
784 {
785 if (b->allistrue)
786 return 0;
787 else
788 return SIGLENBIT(siglen) - sizebitvec(b->sign, siglen);
789 }
790 else if (b->allistrue)
791 return SIGLENBIT(siglen) - sizebitvec(a->sign, siglen);
792
793 return hemdistsign(a->sign, b->sign, siglen);
794}

References a, b, hemdistsign(), SIGLENBIT, and sizebitvec().

Referenced by gtrgm_picksplit().

◆ hemdistsign()

static int hemdistsign ( BITVECP  a,
BITVECP  b,
int  siglen 
)
static

Definition at line 657 of file trgm_gist.c.

658{
659 int i,
660 diff,
661 dist = 0;
662
663 LOOPBYTE(siglen)
664 {
665 diff = (unsigned char) (a[i] ^ b[i]);
666 /* Using the popcount functions here isn't likely to win */
668 }
669 return dist;
670}
PGDLLIMPORT const uint8 pg_number_of_ones[256]
Definition pg_bitutils.c:80

References a, b, fb(), i, LOOPBYTE, and pg_number_of_ones.

Referenced by gtrgm_penalty(), gtrgm_picksplit(), hemdist(), and hemdistcache().

◆ makesign()

static void makesign ( BITVECP  sign,
TRGM a,
int  siglen 
)
static

Definition at line 98 of file trgm_gist.c.

99{
100 int32 k,
101 len = ARRNELEM(a);
102 trgm *ptr = GETARR(a);
103 int32 tmp = 0;
104
105 MemSet(sign, 0, siglen);
106 SETBIT(sign, SIGLENBIT(siglen)); /* set last unused bit */
107 for (k = 0; k < len; k++)
108 {
109 CPTRGM(&tmp, ptr + k);
110 HASH(sign, tmp, siglen);
111 }
112}
#define SETBIT(x, i)
Definition blutils.c:29
#define MemSet(start, val, len)
Definition c.h:1013
#define HASH(sign, val, siglen)
Definition hstore_gist.c:46

References a, ARRNELEM, CPTRGM, GETARR, HASH, len, MemSet, SETBIT, SIGLENBIT, and sign.

Referenced by fillcache(), and gtrgm_penalty().

◆ PG_FUNCTION_INFO_V1() [1/11]

PG_FUNCTION_INFO_V1 ( gtrgm_compress  )

◆ PG_FUNCTION_INFO_V1() [2/11]

PG_FUNCTION_INFO_V1 ( gtrgm_consistent  )

◆ PG_FUNCTION_INFO_V1() [3/11]

PG_FUNCTION_INFO_V1 ( gtrgm_decompress  )

◆ PG_FUNCTION_INFO_V1() [4/11]

PG_FUNCTION_INFO_V1 ( gtrgm_distance  )

◆ PG_FUNCTION_INFO_V1() [5/11]

PG_FUNCTION_INFO_V1 ( gtrgm_in  )

◆ PG_FUNCTION_INFO_V1() [6/11]

PG_FUNCTION_INFO_V1 ( gtrgm_options  )

◆ PG_FUNCTION_INFO_V1() [7/11]

PG_FUNCTION_INFO_V1 ( gtrgm_out  )

◆ PG_FUNCTION_INFO_V1() [8/11]

PG_FUNCTION_INFO_V1 ( gtrgm_penalty  )

◆ PG_FUNCTION_INFO_V1() [9/11]

PG_FUNCTION_INFO_V1 ( gtrgm_picksplit  )

◆ PG_FUNCTION_INFO_V1() [10/11]

PG_FUNCTION_INFO_V1 ( gtrgm_same  )

◆ PG_FUNCTION_INFO_V1() [11/11]

PG_FUNCTION_INFO_V1 ( gtrgm_union  )

◆ sizebitvec()

static int32 sizebitvec ( BITVECP  sign,
int  siglen 
)
static

Definition at line 651 of file trgm_gist.c.

652{
653 return pg_popcount(sign, siglen);
654}
static uint64 pg_popcount(const char *buf, int bytes)

References pg_popcount(), and sign.

Referenced by gtrgm_penalty(), gtrgm_picksplit(), hemdist(), and hemdistcache().

◆ unionkey()

static int32 unionkey ( BITVECP  sbase,
TRGM add,
int  siglen 
)
static

Definition at line 536 of file trgm_gist.c.

537{
538 int32 i;
539
540 if (ISSIGNKEY(add))
541 {
543
544 if (ISALLTRUE(add))
545 return 1;
546
547 LOOPBYTE(siglen)
548 sbase[i] |= sadd[i];
549 }
550 else
551 {
552 trgm *ptr = GETARR(add);
553 int32 tmp = 0;
554
555 for (i = 0; i < ARRNELEM(add); i++)
556 {
557 CPTRGM(&tmp, ptr + i);
558 HASH(sbase, tmp, siglen);
559 }
560 }
561 return 0;
562}

References ARRNELEM, CPTRGM, fb(), GETARR, GETSIGN, HASH, i, ISALLTRUE, ISSIGNKEY, and LOOPBYTE.

Referenced by gtrgm_union().