PostgreSQL Source Code  git master
trgm_op.c File Reference
#include "postgres.h"
#include <ctype.h>
#include "trgm.h"
#include "catalog/pg_type.h"
#include "tsearch/ts_locale.h"
#include "utils/lsyscache.h"
#include "utils/memutils.h"
#include "utils/pg_crc.h"
Include dependency graph for trgm_op.c:

Go to the source code of this file.

Data Structures

struct  pos_trgm
 

Macros

#define TRGM_BOUND_LEFT   (0x01) /* trigram is left bound of word */
 
#define TRGM_BOUND_RIGHT   (0x02) /* trigram is right bound of word */
 
#define WORD_SIMILARITY_CHECK_ONLY
 
#define WORD_SIMILARITY_STRICT
 

Typedefs

typedef uint8 TrgmBound
 

Functions

void _PG_init (void)
 
 PG_FUNCTION_INFO_V1 (set_limit)
 
 PG_FUNCTION_INFO_V1 (show_limit)
 
 PG_FUNCTION_INFO_V1 (show_trgm)
 
 PG_FUNCTION_INFO_V1 (similarity)
 
 PG_FUNCTION_INFO_V1 (word_similarity)
 
 PG_FUNCTION_INFO_V1 (strict_word_similarity)
 
 PG_FUNCTION_INFO_V1 (similarity_dist)
 
 PG_FUNCTION_INFO_V1 (similarity_op)
 
 PG_FUNCTION_INFO_V1 (word_similarity_op)
 
 PG_FUNCTION_INFO_V1 (word_similarity_commutator_op)
 
 PG_FUNCTION_INFO_V1 (word_similarity_dist_op)
 
 PG_FUNCTION_INFO_V1 (word_similarity_dist_commutator_op)
 
 PG_FUNCTION_INFO_V1 (strict_word_similarity_op)
 
 PG_FUNCTION_INFO_V1 (strict_word_similarity_commutator_op)
 
 PG_FUNCTION_INFO_V1 (strict_word_similarity_dist_op)
 
 PG_FUNCTION_INFO_V1 (strict_word_similarity_dist_commutator_op)
 
Datum set_limit (PG_FUNCTION_ARGS)
 
double index_strategy_get_limit (StrategyNumber strategy)
 
Datum show_limit (PG_FUNCTION_ARGS)
 
static int comp_trgm (const void *a, const void *b)
 
static int unique_array (trgm *a, int len)
 
static char * find_word (char *str, int lenstr, char **endword, int *charlen)
 
void compact_trigram (trgm *tptr, char *str, int bytelen)
 
static trgmmake_trigrams (trgm *tptr, char *str, int bytelen, int charlen)
 
static int generate_trgm_only (trgm *trg, char *str, int slen, TrgmBound *bounds)
 
static void protect_out_of_mem (int slen)
 
TRGMgenerate_trgm (char *str, int slen)
 
static pos_trgmmake_positional_trgm (trgm *trg1, int len1, trgm *trg2, int len2)
 
static int comp_ptrgm (const void *v1, const void *v2)
 
static float4 iterate_word_similarity (int *trg2indexes, bool *found, int ulen1, int len2, int len, uint8 flags, TrgmBound *bounds)
 
static float4 calc_word_similarity (char *str1, int slen1, char *str2, int slen2, uint8 flags)
 
static const char * get_wildcard_part (const char *str, int lenstr, char *buf, int *bytelen, int *charlen)
 
TRGMgenerate_wildcard_trgm (const char *str, int slen)
 
uint32 trgm2int (trgm *ptr)
 
Datum show_trgm (PG_FUNCTION_ARGS)
 
float4 cnt_sml (TRGM *trg1, TRGM *trg2, bool inexact)
 
bool trgm_contained_by (TRGM *trg1, TRGM *trg2)
 
booltrgm_presence_map (TRGM *query, TRGM *key)
 
Datum similarity (PG_FUNCTION_ARGS)
 
Datum word_similarity (PG_FUNCTION_ARGS)
 
Datum strict_word_similarity (PG_FUNCTION_ARGS)
 
Datum similarity_dist (PG_FUNCTION_ARGS)
 
Datum similarity_op (PG_FUNCTION_ARGS)
 
Datum word_similarity_op (PG_FUNCTION_ARGS)
 
Datum word_similarity_commutator_op (PG_FUNCTION_ARGS)
 
Datum word_similarity_dist_op (PG_FUNCTION_ARGS)
 
Datum word_similarity_dist_commutator_op (PG_FUNCTION_ARGS)
 
Datum strict_word_similarity_op (PG_FUNCTION_ARGS)
 
Datum strict_word_similarity_commutator_op (PG_FUNCTION_ARGS)
 
Datum strict_word_similarity_dist_op (PG_FUNCTION_ARGS)
 
Datum strict_word_similarity_dist_commutator_op (PG_FUNCTION_ARGS)
 

Variables

 PG_MODULE_MAGIC
 
double similarity_threshold = 0.3f
 
double word_similarity_threshold = 0.6f
 
double strict_word_similarity_threshold = 0.5f
 

Macro Definition Documentation

◆ TRGM_BOUND_LEFT

#define TRGM_BOUND_LEFT   (0x01) /* trigram is left bound of word */

Definition at line 51 of file trgm_op.c.

Referenced by generate_trgm_only(), and iterate_word_similarity().

◆ TRGM_BOUND_RIGHT

#define TRGM_BOUND_RIGHT   (0x02) /* trigram is right bound of word */

Definition at line 52 of file trgm_op.c.

Referenced by generate_trgm_only(), and iterate_word_similarity().

◆ WORD_SIMILARITY_CHECK_ONLY

#define WORD_SIMILARITY_CHECK_ONLY
Value:
(0x01) /* if set then only check existence
* of similar search pattern in text */

Definition at line 55 of file trgm_op.c.

Referenced by iterate_word_similarity(), strict_word_similarity_commutator_op(), strict_word_similarity_op(), word_similarity_commutator_op(), and word_similarity_op().

◆ WORD_SIMILARITY_STRICT

#define WORD_SIMILARITY_STRICT

Typedef Documentation

◆ TrgmBound

Definition at line 50 of file trgm_op.c.

Function Documentation

◆ _PG_init()

void _PG_init ( void  )

Definition at line 64 of file trgm_op.c.

References DefineCustomRealVariable(), PGC_USERSET, similarity_threshold, strict_word_similarity_threshold, and word_similarity_threshold.

65 {
66  /* Define custom GUC variables. */
67  DefineCustomRealVariable("pg_trgm.similarity_threshold",
68  "Sets the threshold used by the %% operator.",
69  "Valid range is 0.0 .. 1.0.",
71  0.3,
72  0.0,
73  1.0,
75  0,
76  NULL,
77  NULL,
78  NULL);
79  DefineCustomRealVariable("pg_trgm.word_similarity_threshold",
80  "Sets the threshold used by the <%% operator.",
81  "Valid range is 0.0 .. 1.0.",
83  0.6,
84  0.0,
85  1.0,
87  0,
88  NULL,
89  NULL,
90  NULL);
91  DefineCustomRealVariable("pg_trgm.strict_word_similarity_threshold",
92  "Sets the threshold used by the <<%% operator.",
93  "Valid range is 0.0 .. 1.0.",
95  0.5,
96  0.0,
97  1.0,
99  0,
100  NULL,
101  NULL,
102  NULL);
103 }
void DefineCustomRealVariable(const char *name, const char *short_desc, const char *long_desc, double *valueAddr, double bootValue, double minValue, double maxValue, GucContext context, int flags, GucRealCheckHook check_hook, GucRealAssignHook assign_hook, GucShowHook show_hook)
Definition: guc.c:8037
double strict_word_similarity_threshold
Definition: trgm_op.c:21
double word_similarity_threshold
Definition: trgm_op.c:20
double similarity_threshold
Definition: trgm_op.c:19

◆ calc_word_similarity()

static float4 calc_word_similarity ( char *  str1,
int  slen1,
char *  str2,
int  slen2,
uint8  flags 
)
static

Definition at line 641 of file trgm_op.c.

References cmp(), CMPTRGM, comp_ptrgm(), generate_trgm_only(), i, pos_trgm::index, iterate_word_similarity(), make_positional_trgm(), palloc(), palloc0(), pfree(), protect_out_of_mem(), qsort, and WORD_SIMILARITY_STRICT.

Referenced by strict_word_similarity(), strict_word_similarity_commutator_op(), strict_word_similarity_dist_commutator_op(), strict_word_similarity_dist_op(), strict_word_similarity_op(), word_similarity(), word_similarity_commutator_op(), word_similarity_dist_commutator_op(), word_similarity_dist_op(), and word_similarity_op().

643 {
644  bool *found;
645  pos_trgm *ptrg;
646  trgm *trg1;
647  trgm *trg2;
648  int len1,
649  len2,
650  len,
651  i,
652  j,
653  ulen1;
654  int *trg2indexes;
655  float4 result;
656  TrgmBound *bounds;
657 
658  protect_out_of_mem(slen1 + slen2);
659 
660  /* Make positional trigrams */
661  trg1 = (trgm *) palloc(sizeof(trgm) * (slen1 / 2 + 1) * 3);
662  trg2 = (trgm *) palloc(sizeof(trgm) * (slen2 / 2 + 1) * 3);
663  if (flags & WORD_SIMILARITY_STRICT)
664  bounds = (TrgmBound *) palloc0(sizeof(TrgmBound) * (slen2 / 2 + 1) * 3);
665  else
666  bounds = NULL;
667 
668  len1 = generate_trgm_only(trg1, str1, slen1, NULL);
669  len2 = generate_trgm_only(trg2, str2, slen2, bounds);
670 
671  ptrg = make_positional_trgm(trg1, len1, trg2, len2);
672  len = len1 + len2;
673  qsort(ptrg, len, sizeof(pos_trgm), comp_ptrgm);
674 
675  pfree(trg1);
676  pfree(trg2);
677 
678  /*
679  * Merge positional trigrams array: enumerate each trigram and find its
680  * presence in required word.
681  */
682  trg2indexes = (int *) palloc(sizeof(int) * len2);
683  found = (bool *) palloc0(sizeof(bool) * len);
684 
685  ulen1 = 0;
686  j = 0;
687  for (i = 0; i < len; i++)
688  {
689  if (i > 0)
690  {
691  int cmp = CMPTRGM(ptrg[i - 1].trg, ptrg[i].trg);
692 
693  if (cmp != 0)
694  {
695  if (found[j])
696  ulen1++;
697  j++;
698  }
699  }
700 
701  if (ptrg[i].index >= 0)
702  {
703  trg2indexes[ptrg[i].index] = j;
704  }
705  else
706  {
707  found[j] = true;
708  }
709  }
710  if (found[j])
711  ulen1++;
712 
713  /* Run iterative procedure to find maximum similarity with word */
714  result = iterate_word_similarity(trg2indexes, found, ulen1, len2, len,
715  flags, bounds);
716 
717  pfree(trg2indexes);
718  pfree(found);
719  pfree(ptrg);
720 
721  return result;
722 }
#define CMPTRGM(a, b)
Definition: trgm.h:45
static float4 iterate_word_similarity(int *trg2indexes, bool *found, int ulen1, int len2, int len, uint8 flags, TrgmBound *bounds)
Definition: trgm_op.c:477
int index
Definition: trgm_op.c:46
static void protect_out_of_mem(int slen)
Definition: trgm_op.c:359
Definition: type.h:89
void pfree(void *pointer)
Definition: mcxt.c:1031
static int comp_ptrgm(const void *v1, const void *v2)
Definition: trgm_op.c:444
float float4
Definition: c.h:457
void * palloc0(Size size)
Definition: mcxt.c:955
char trgm[3]
Definition: trgm.h:41
#define WORD_SIMILARITY_STRICT
Definition: trgm_op.c:57
uint8 TrgmBound
Definition: trgm_op.c:50
void * palloc(Size size)
Definition: mcxt.c:924
int i
#define qsort(a, b, c, d)
Definition: port.h:421
static int generate_trgm_only(trgm *trg, char *str, int slen, TrgmBound *bounds)
Definition: trgm_op.c:295
static pos_trgm * make_positional_trgm(trgm *trg1, int len1, trgm *trg2, int len2)
Definition: trgm_op.c:417
static int cmp(const chr *x, const chr *y, size_t len)
Definition: regc_locale.c:742

◆ cnt_sml()

float4 cnt_sml ( TRGM trg1,
TRGM trg2,
bool  inexact 
)

Definition at line 1018 of file trgm_op.c.

References ARRNELEM, CALCSML, CMPTRGM, and GETARR.

Referenced by gtrgm_consistent(), gtrgm_distance(), and similarity().

1019 {
1020  trgm *ptr1,
1021  *ptr2;
1022  int count = 0;
1023  int len1,
1024  len2;
1025 
1026  ptr1 = GETARR(trg1);
1027  ptr2 = GETARR(trg2);
1028 
1029  len1 = ARRNELEM(trg1);
1030  len2 = ARRNELEM(trg2);
1031 
1032  /* explicit test is needed to avoid 0/0 division when both lengths are 0 */
1033  if (len1 <= 0 || len2 <= 0)
1034  return (float4) 0.0;
1035 
1036  while (ptr1 - GETARR(trg1) < len1 && ptr2 - GETARR(trg2) < len2)
1037  {
1038  int res = CMPTRGM(ptr1, ptr2);
1039 
1040  if (res < 0)
1041  ptr1++;
1042  else if (res > 0)
1043  ptr2++;
1044  else
1045  {
1046  ptr1++;
1047  ptr2++;
1048  count++;
1049  }
1050  }
1051 
1052  /*
1053  * If inexact then len2 is equal to count, because we don't know actual
1054  * length of second string in inexact search and we can assume that count
1055  * is a lower bound of len2.
1056  */
1057  return CALCSML(count, len1, inexact ? count : len2);
1058 }
#define CMPTRGM(a, b)
Definition: trgm.h:45
#define ARRNELEM(x)
Definition: trgm.h:108
#define GETARR(x)
Definition: trgm.h:107
float float4
Definition: c.h:457
char trgm[3]
Definition: trgm.h:41
#define CALCSML(count, len1, len2)
Definition: trgm.h:117

◆ comp_ptrgm()

static int comp_ptrgm ( const void *  v1,
const void *  v2 
)
static

Definition at line 444 of file trgm_op.c.

References cmp(), CMPTRGM, pos_trgm::index, and pos_trgm::trg.

Referenced by calc_word_similarity().

445 {
446  const pos_trgm *p1 = (const pos_trgm *) v1;
447  const pos_trgm *p2 = (const pos_trgm *) v2;
448  int cmp;
449 
450  cmp = CMPTRGM(p1->trg, p2->trg);
451  if (cmp != 0)
452  return cmp;
453 
454  if (p1->index < p2->index)
455  return -1;
456  else if (p1->index == p2->index)
457  return 0;
458  else
459  return 1;
460 }
#define CMPTRGM(a, b)
Definition: trgm.h:45
int index
Definition: trgm_op.c:46
trgm trg
Definition: trgm_op.c:45
static int cmp(const chr *x, const chr *y, size_t len)
Definition: regc_locale.c:742

◆ comp_trgm()

static int comp_trgm ( const void *  a,
const void *  b 
)
static

Definition at line 161 of file trgm_op.c.

References CMPTRGM.

Referenced by generate_trgm(), and generate_wildcard_trgm().

162 {
163  return CMPTRGM(a, b);
164 }
#define CMPTRGM(a, b)
Definition: trgm.h:45

◆ compact_trigram()

void compact_trigram ( trgm tptr,
char *  str,
int  bytelen 
)

Definition at line 218 of file trgm_op.c.

References COMP_LEGACY_CRC32, CPTRGM, FIN_LEGACY_CRC32, and INIT_LEGACY_CRC32.

Referenced by fillTrgm(), and make_trigrams().

219 {
220  if (bytelen == 3)
221  {
222  CPTRGM(tptr, str);
223  }
224  else
225  {
226  pg_crc32 crc;
227 
228  INIT_LEGACY_CRC32(crc);
229  COMP_LEGACY_CRC32(crc, str, bytelen);
230  FIN_LEGACY_CRC32(crc);
231 
232  /*
233  * use only 3 upper bytes from crc, hope, it's good enough hashing
234  */
235  CPTRGM(tptr, &crc);
236  }
237 }
#define INIT_LEGACY_CRC32(crc)
Definition: pg_crc.h:79
#define FIN_LEGACY_CRC32(crc)
Definition: pg_crc.h:80
#define COMP_LEGACY_CRC32(crc, data, len)
Definition: pg_crc.h:81
uint32 pg_crc32
Definition: pg_crc.h:37
#define CPTRGM(a, b)
Definition: trgm.h:47

◆ find_word()

static char* find_word ( char *  str,
int  lenstr,
char **  endword,
int *  charlen 
)
static

Definition at line 191 of file trgm_op.c.

References ISWORDCHR, pg_mblen(), and generate_unaccent_rules::str.

Referenced by generate_trgm_only().

192 {
193  char *beginword = str;
194 
195  while (beginword - str < lenstr && !ISWORDCHR(beginword))
196  beginword += pg_mblen(beginword);
197 
198  if (beginword - str >= lenstr)
199  return NULL;
200 
201  *endword = beginword;
202  *charlen = 0;
203  while (*endword - str < lenstr && ISWORDCHR(*endword))
204  {
205  *endword += pg_mblen(*endword);
206  (*charlen)++;
207  }
208 
209  return beginword;
210 }
#define ISWORDCHR(c)
Definition: trgm.h:54
int pg_mblen(const char *mbstr)
Definition: mbutils.c:760

◆ generate_trgm()

TRGM* generate_trgm ( char *  str,
int  slen 
)

Definition at line 376 of file trgm_op.c.

References ARRKEY, CALCGTSIZE, comp_trgm(), TRGM::flag, generate_trgm_only(), GETARR, palloc(), protect_out_of_mem(), qsort, SET_VARSIZE, TRGMHDRSIZE, and unique_array().

Referenced by gin_extract_query_trgm(), gin_extract_value_trgm(), gtrgm_compress(), gtrgm_consistent(), gtrgm_distance(), show_trgm(), and similarity().

377 {
378  TRGM *trg;
379  int len;
380 
381  protect_out_of_mem(slen);
382 
383  trg = (TRGM *) palloc(TRGMHDRSIZE + sizeof(trgm) * (slen / 2 + 1) * 3);
384  trg->flag = ARRKEY;
385 
386  len = generate_trgm_only(GETARR(trg), str, slen, NULL);
387  SET_VARSIZE(trg, CALCGTSIZE(ARRKEY, len));
388 
389  if (len == 0)
390  return trg;
391 
392  /*
393  * Make trigrams unique.
394  */
395  if (len > 1)
396  {
397  qsort((void *) GETARR(trg), len, sizeof(trgm), comp_trgm);
398  len = unique_array(GETARR(trg), len);
399  }
400 
401  SET_VARSIZE(trg, CALCGTSIZE(ARRKEY, len));
402 
403  return trg;
404 }
#define ARRKEY
Definition: trgm.h:97
static void protect_out_of_mem(int slen)
Definition: trgm_op.c:359
#define GETARR(x)
Definition: trgm.h:107
Definition: trgm.h:66
#define TRGMHDRSIZE
Definition: trgm.h:73
uint8 flag
Definition: trgm.h:69
char trgm[3]
Definition: trgm.h:41
#define CALCGTSIZE(flag)
Definition: hstore_gist.c:52
static int unique_array(trgm *a, int len)
Definition: trgm_op.c:167
void * palloc(Size size)
Definition: mcxt.c:924
#define SET_VARSIZE(PTR, len)
Definition: postgres.h:327
#define qsort(a, b, c, d)
Definition: port.h:421
static int generate_trgm_only(trgm *trg, char *str, int slen, TrgmBound *bounds)
Definition: trgm_op.c:295
static int comp_trgm(const void *a, const void *b)
Definition: trgm_op.c:161

◆ generate_trgm_only()

static int generate_trgm_only ( trgm trg,
char *  str,
int  slen,
TrgmBound bounds 
)
static

Definition at line 295 of file trgm_op.c.

References buf, find_word(), lowerstr_with_len(), LPADDING, make_trigrams(), palloc(), pfree(), pg_database_encoding_max_length(), RPADDING, generate_unaccent_rules::str, TRGM_BOUND_LEFT, and TRGM_BOUND_RIGHT.

Referenced by calc_word_similarity(), and generate_trgm().

296 {
297  trgm *tptr;
298  char *buf;
299  int charlen,
300  bytelen;
301  char *bword,
302  *eword;
303 
304  if (slen + LPADDING + RPADDING < 3 || slen == 0)
305  return 0;
306 
307  tptr = trg;
308 
309  /* Allocate a buffer for case-folded, blank-padded words */
310  buf = (char *) palloc(slen * pg_database_encoding_max_length() + 4);
311 
312  if (LPADDING > 0)
313  {
314  *buf = ' ';
315  if (LPADDING > 1)
316  *(buf + 1) = ' ';
317  }
318 
319  eword = str;
320  while ((bword = find_word(eword, slen - (eword - str), &eword, &charlen)) != NULL)
321  {
322 #ifdef IGNORECASE
323  bword = lowerstr_with_len(bword, eword - bword);
324  bytelen = strlen(bword);
325 #else
326  bytelen = eword - bword;
327 #endif
328 
329  memcpy(buf + LPADDING, bword, bytelen);
330 
331 #ifdef IGNORECASE
332  pfree(bword);
333 #endif
334 
335  buf[LPADDING + bytelen] = ' ';
336  buf[LPADDING + bytelen + 1] = ' ';
337 
338  /* Calculate trigrams marking their bounds if needed */
339  if (bounds)
340  bounds[tptr - trg] |= TRGM_BOUND_LEFT;
341  tptr = make_trigrams(tptr, buf, bytelen + LPADDING + RPADDING,
342  charlen + LPADDING + RPADDING);
343  if (bounds)
344  bounds[tptr - trg - 1] |= TRGM_BOUND_RIGHT;
345  }
346 
347  pfree(buf);
348 
349  return tptr - trg;
350 }
char * lowerstr_with_len(const char *str, int len)
Definition: ts_locale.c:241
static trgm * make_trigrams(trgm *tptr, char *str, int bytelen, int charlen)
Definition: trgm_op.c:243
static char * find_word(char *str, int lenstr, char **endword, int *charlen)
Definition: trgm_op.c:191
#define TRGM_BOUND_RIGHT
Definition: trgm_op.c:52
void pfree(void *pointer)
Definition: mcxt.c:1031
#define TRGM_BOUND_LEFT
Definition: trgm_op.c:51
static char * buf
Definition: pg_test_fsync.c:67
int pg_database_encoding_max_length(void)
Definition: wchar.c:1833
char trgm[3]
Definition: trgm.h:41
#define RPADDING
Definition: trgm.h:17
void * palloc(Size size)
Definition: mcxt.c:924
#define LPADDING
Definition: trgm.h:16

◆ generate_wildcard_trgm()

TRGM* generate_wildcard_trgm ( const char *  str,
int  slen 
)

Definition at line 886 of file trgm_op.c.

References ARRKEY, buf, CALCGTSIZE, comp_trgm(), TRGM::flag, get_wildcard_part(), GETARR, lowerstr_with_len(), LPADDING, make_trigrams(), palloc(), pfree(), protect_out_of_mem(), qsort, RPADDING, SET_VARSIZE, generate_unaccent_rules::str, TRGMHDRSIZE, and unique_array().

Referenced by gin_extract_query_trgm(), and gtrgm_consistent().

887 {
888  TRGM *trg;
889  char *buf,
890  *buf2;
891  trgm *tptr;
892  int len,
893  charlen,
894  bytelen;
895  const char *eword;
896 
897  protect_out_of_mem(slen);
898 
899  trg = (TRGM *) palloc(TRGMHDRSIZE + sizeof(trgm) * (slen / 2 + 1) * 3);
900  trg->flag = ARRKEY;
901  SET_VARSIZE(trg, TRGMHDRSIZE);
902 
903  if (slen + LPADDING + RPADDING < 3 || slen == 0)
904  return trg;
905 
906  tptr = GETARR(trg);
907 
908  /* Allocate a buffer for blank-padded, but not yet case-folded, words */
909  buf = palloc(sizeof(char) * (slen + 4));
910 
911  /*
912  * Extract trigrams from each substring extracted by get_wildcard_part.
913  */
914  eword = str;
915  while ((eword = get_wildcard_part(eword, slen - (eword - str),
916  buf, &bytelen, &charlen)) != NULL)
917  {
918 #ifdef IGNORECASE
919  buf2 = lowerstr_with_len(buf, bytelen);
920  bytelen = strlen(buf2);
921 #else
922  buf2 = buf;
923 #endif
924 
925  /*
926  * count trigrams
927  */
928  tptr = make_trigrams(tptr, buf2, bytelen, charlen);
929 
930 #ifdef IGNORECASE
931  pfree(buf2);
932 #endif
933  }
934 
935  pfree(buf);
936 
937  if ((len = tptr - GETARR(trg)) == 0)
938  return trg;
939 
940  /*
941  * Make trigrams unique.
942  */
943  if (len > 1)
944  {
945  qsort((void *) GETARR(trg), len, sizeof(trgm), comp_trgm);
946  len = unique_array(GETARR(trg), len);
947  }
948 
949  SET_VARSIZE(trg, CALCGTSIZE(ARRKEY, len));
950 
951  return trg;
952 }
char * lowerstr_with_len(const char *str, int len)
Definition: ts_locale.c:241
#define ARRKEY
Definition: trgm.h:97
static void protect_out_of_mem(int slen)
Definition: trgm_op.c:359
static trgm * make_trigrams(trgm *tptr, char *str, int bytelen, int charlen)
Definition: trgm_op.c:243
#define GETARR(x)
Definition: trgm.h:107
void pfree(void *pointer)
Definition: mcxt.c:1031
Definition: trgm.h:66
static char * buf
Definition: pg_test_fsync.c:67
#define TRGMHDRSIZE
Definition: trgm.h:73
uint8 flag
Definition: trgm.h:69
char trgm[3]
Definition: trgm.h:41
#define CALCGTSIZE(flag)
Definition: hstore_gist.c:52
#define RPADDING
Definition: trgm.h:17
static int unique_array(trgm *a, int len)
Definition: trgm_op.c:167
void * palloc(Size size)
Definition: mcxt.c:924
#define LPADDING
Definition: trgm.h:16
static const char * get_wildcard_part(const char *str, int lenstr, char *buf, int *bytelen, int *charlen)
Definition: trgm_op.c:741
#define SET_VARSIZE(PTR, len)
Definition: postgres.h:327
#define qsort(a, b, c, d)
Definition: port.h:421
static int comp_trgm(const void *a, const void *b)
Definition: trgm_op.c:161

◆ get_wildcard_part()

static const char* get_wildcard_part ( const char *  str,
int  lenstr,
char *  buf,
int *  bytelen,
int *  charlen 
)
static

Definition at line 741 of file trgm_op.c.

References buf, ISESCAPECHAR, ISWILDCARDCHAR, ISWORDCHR, LPADDING, pg_mblen(), RPADDING, and generate_unaccent_rules::str.

Referenced by generate_wildcard_trgm().

743 {
744  const char *beginword = str;
745  const char *endword;
746  char *s = buf;
747  bool in_leading_wildcard_meta = false;
748  bool in_trailing_wildcard_meta = false;
749  bool in_escape = false;
750  int clen;
751 
752  /*
753  * Find the first word character, remembering whether preceding character
754  * was wildcard meta-character. Note that the in_escape state persists
755  * from this loop to the next one, since we may exit at a word character
756  * that is in_escape.
757  */
758  while (beginword - str < lenstr)
759  {
760  if (in_escape)
761  {
762  if (ISWORDCHR(beginword))
763  break;
764  in_escape = false;
765  in_leading_wildcard_meta = false;
766  }
767  else
768  {
769  if (ISESCAPECHAR(beginword))
770  in_escape = true;
771  else if (ISWILDCARDCHAR(beginword))
772  in_leading_wildcard_meta = true;
773  else if (ISWORDCHR(beginword))
774  break;
775  else
776  in_leading_wildcard_meta = false;
777  }
778  beginword += pg_mblen(beginword);
779  }
780 
781  /*
782  * Handle string end.
783  */
784  if (beginword - str >= lenstr)
785  return NULL;
786 
787  /*
788  * Add left padding spaces if preceding character wasn't wildcard
789  * meta-character.
790  */
791  *charlen = 0;
792  if (!in_leading_wildcard_meta)
793  {
794  if (LPADDING > 0)
795  {
796  *s++ = ' ';
797  (*charlen)++;
798  if (LPADDING > 1)
799  {
800  *s++ = ' ';
801  (*charlen)++;
802  }
803  }
804  }
805 
806  /*
807  * Copy data into buf until wildcard meta-character, non-word character or
808  * string boundary. Strip escapes during copy.
809  */
810  endword = beginword;
811  while (endword - str < lenstr)
812  {
813  clen = pg_mblen(endword);
814  if (in_escape)
815  {
816  if (ISWORDCHR(endword))
817  {
818  memcpy(s, endword, clen);
819  (*charlen)++;
820  s += clen;
821  }
822  else
823  {
824  /*
825  * Back up endword to the escape character when stopping at an
826  * escaped char, so that subsequent get_wildcard_part will
827  * restart from the escape character. We assume here that
828  * escape chars are single-byte.
829  */
830  endword--;
831  break;
832  }
833  in_escape = false;
834  }
835  else
836  {
837  if (ISESCAPECHAR(endword))
838  in_escape = true;
839  else if (ISWILDCARDCHAR(endword))
840  {
841  in_trailing_wildcard_meta = true;
842  break;
843  }
844  else if (ISWORDCHR(endword))
845  {
846  memcpy(s, endword, clen);
847  (*charlen)++;
848  s += clen;
849  }
850  else
851  break;
852  }
853  endword += clen;
854  }
855 
856  /*
857  * Add right padding spaces if next character isn't wildcard
858  * meta-character.
859  */
860  if (!in_trailing_wildcard_meta)
861  {
862  if (RPADDING > 0)
863  {
864  *s++ = ' ';
865  (*charlen)++;
866  if (RPADDING > 1)
867  {
868  *s++ = ' ';
869  (*charlen)++;
870  }
871  }
872  }
873 
874  *bytelen = s - buf;
875  return endword;
876 }
#define ISWILDCARDCHAR(x)
Definition: trgm.h:63
#define ISWORDCHR(c)
Definition: trgm.h:54
static char * buf
Definition: pg_test_fsync.c:67
#define ISESCAPECHAR(x)
Definition: trgm.h:62
#define RPADDING
Definition: trgm.h:17
int pg_mblen(const char *mbstr)
Definition: mbutils.c:760
#define LPADDING
Definition: trgm.h:16

◆ index_strategy_get_limit()

double index_strategy_get_limit ( StrategyNumber  strategy)

Definition at line 132 of file trgm_op.c.

References elog, ERROR, similarity_threshold, SimilarityStrategyNumber, strict_word_similarity_threshold, StrictWordSimilarityStrategyNumber, word_similarity_threshold, and WordSimilarityStrategyNumber.

Referenced by gin_trgm_consistent(), gin_trgm_triconsistent(), and gtrgm_consistent().

133 {
134  switch (strategy)
135  {
137  return similarity_threshold;
142  default:
143  elog(ERROR, "unrecognized strategy number: %d", strategy);
144  break;
145  }
146 
147  return 0.0; /* keep compiler quiet */
148 }
#define SimilarityStrategyNumber
Definition: trgm.h:30
#define StrictWordSimilarityStrategyNumber
Definition: trgm.h:38
#define ERROR
Definition: elog.h:43
double strict_word_similarity_threshold
Definition: trgm_op.c:21
#define WordSimilarityStrategyNumber
Definition: trgm.h:36
double word_similarity_threshold
Definition: trgm_op.c:20
#define elog
Definition: elog.h:219
double similarity_threshold
Definition: trgm_op.c:19

◆ iterate_word_similarity()

static float4 iterate_word_similarity ( int *  trg2indexes,
bool found,
int  ulen1,
int  len2,
int  len,
uint8  flags,
TrgmBound bounds 
)
static

Definition at line 477 of file trgm_op.c.

References Assert, CALCSML, i, lower(), Max, palloc(), pfree(), strict_word_similarity_threshold, TRGM_BOUND_LEFT, TRGM_BOUND_RIGHT, upper(), WORD_SIMILARITY_CHECK_ONLY, WORD_SIMILARITY_STRICT, and word_similarity_threshold.

Referenced by calc_word_similarity().

484 {
485  int *lastpos,
486  i,
487  ulen2 = 0,
488  count = 0,
489  upper = -1,
490  lower;
491  float4 smlr_cur,
492  smlr_max = 0.0f;
493  double threshold;
494 
495  Assert(bounds || !(flags & WORD_SIMILARITY_STRICT));
496 
497  /* Select appropriate threshold */
498  threshold = (flags & WORD_SIMILARITY_STRICT) ?
501 
502  /*
503  * Consider first trigram as initial lower bount for strict word similarity,
504  * or initialize it later with first trigram present for plain word
505  * similarity.
506  */
507  lower = (flags & WORD_SIMILARITY_STRICT) ? 0 : -1;
508 
509  /* Memorise last position of each trigram */
510  lastpos = (int *) palloc(sizeof(int) * len);
511  memset(lastpos, -1, sizeof(int) * len);
512 
513  for (i = 0; i < len2; i++)
514  {
515  /* Get index of next trigram */
516  int trgindex = trg2indexes[i];
517 
518  /* Update last position of this trigram */
519  if (lower >= 0 || found[trgindex])
520  {
521  if (lastpos[trgindex] < 0)
522  {
523  ulen2++;
524  if (found[trgindex])
525  count++;
526  }
527  lastpos[trgindex] = i;
528  }
529 
530  /*
531  * Adjust upper bound if trigram is upper bound of word for strict
532  * word similarity, or if trigram is present in required substring for
533  * plain word similarity
534  */
535  if ((flags & WORD_SIMILARITY_STRICT) ? (bounds[i] & TRGM_BOUND_RIGHT)
536  : found[trgindex])
537  {
538  int prev_lower,
539  tmp_ulen2,
540  tmp_lower,
541  tmp_count;
542 
543  upper = i;
544  if (lower == -1)
545  {
546  lower = i;
547  ulen2 = 1;
548  }
549 
550  smlr_cur = CALCSML(count, ulen1, ulen2);
551 
552  /* Also try to adjust lower bound for greater similarity */
553  tmp_count = count;
554  tmp_ulen2 = ulen2;
555  prev_lower = lower;
556  for (tmp_lower = lower; tmp_lower <= upper; tmp_lower++)
557  {
558  float smlr_tmp;
559  int tmp_trgindex;
560 
561  /*
562  * Adjust lower bound only if trigram is lower bound of word
563  * for strict word similarity, or consider every trigram as
564  * lower bound for plain word similarity.
565  */
566  if (!(flags & WORD_SIMILARITY_STRICT)
567  || (bounds[tmp_lower] & TRGM_BOUND_LEFT))
568  {
569  smlr_tmp = CALCSML(tmp_count, ulen1, tmp_ulen2);
570  if (smlr_tmp > smlr_cur)
571  {
572  smlr_cur = smlr_tmp;
573  ulen2 = tmp_ulen2;
574  lower = tmp_lower;
575  count = tmp_count;
576  }
577 
578  /*
579  * If we only check that word similarity is greater than
580  * threshold we do not need to calculate a maximum
581  * similarity.
582  */
583  if ((flags & WORD_SIMILARITY_CHECK_ONLY)
584  && smlr_cur >= threshold)
585  break;
586  }
587 
588  tmp_trgindex = trg2indexes[tmp_lower];
589  if (lastpos[tmp_trgindex] == tmp_lower)
590  {
591  tmp_ulen2--;
592  if (found[tmp_trgindex])
593  tmp_count--;
594  }
595  }
596 
597  smlr_max = Max(smlr_max, smlr_cur);
598 
599  /*
600  * if we only check that word similarity is greater than
601  * threshold we do not need to calculate a maximum similarity.
602  */
603  if ((flags & WORD_SIMILARITY_CHECK_ONLY) && smlr_max >= threshold)
604  break;
605 
606  for (tmp_lower = prev_lower; tmp_lower < lower; tmp_lower++)
607  {
608  int tmp_trgindex;
609 
610  tmp_trgindex = trg2indexes[tmp_lower];
611  if (lastpos[tmp_trgindex] == tmp_lower)
612  lastpos[tmp_trgindex] = -1;
613  }
614  }
615  }
616 
617  pfree(lastpos);
618 
619  return smlr_max;
620 }
Datum lower(PG_FUNCTION_ARGS)
Definition: oracle_compat.c:44
Datum upper(PG_FUNCTION_ARGS)
Definition: oracle_compat.c:75
#define TRGM_BOUND_RIGHT
Definition: trgm_op.c:52
#define WORD_SIMILARITY_CHECK_ONLY
Definition: trgm_op.c:55
void pfree(void *pointer)
Definition: mcxt.c:1031
double strict_word_similarity_threshold
Definition: trgm_op.c:21
#define TRGM_BOUND_LEFT
Definition: trgm_op.c:51
float float4
Definition: c.h:457
#define Max(x, y)
Definition: c.h:851
#define Assert(condition)
Definition: c.h:699
#define WORD_SIMILARITY_STRICT
Definition: trgm_op.c:57
void * palloc(Size size)
Definition: mcxt.c:924
double word_similarity_threshold
Definition: trgm_op.c:20
int i
#define CALCSML(count, len1, len2)
Definition: trgm.h:117

◆ make_positional_trgm()

static pos_trgm* make_positional_trgm ( trgm trg1,
int  len1,
trgm trg2,
int  len2 
)
static

Definition at line 417 of file trgm_op.c.

References i, pos_trgm::index, and palloc().

Referenced by calc_word_similarity().

418 {
419  pos_trgm *result;
420  int i,
421  len = len1 + len2;
422 
423  result = (pos_trgm *) palloc(sizeof(pos_trgm) * len);
424 
425  for (i = 0; i < len1; i++)
426  {
427  memcpy(&result[i].trg, &trg1[i], sizeof(trgm));
428  result[i].index = -1;
429  }
430 
431  for (i = 0; i < len2; i++)
432  {
433  memcpy(&result[i + len1].trg, &trg2[i], sizeof(trgm));
434  result[i + len1].index = i;
435  }
436 
437  return result;
438 }
int index
Definition: trgm_op.c:46
char trgm[3]
Definition: trgm.h:41
void * palloc(Size size)
Definition: mcxt.c:924
int i

◆ make_trigrams()

static trgm* make_trigrams ( trgm tptr,
char *  str,
int  bytelen,
int  charlen 
)
static

Definition at line 243 of file trgm_op.c.

References Assert, compact_trigram(), CPTRGM, pg_mblen(), and generate_unaccent_rules::str.

Referenced by generate_trgm_only(), and generate_wildcard_trgm().

244 {
245  char *ptr = str;
246 
247  if (charlen < 3)
248  return tptr;
249 
250  if (bytelen > charlen)
251  {
252  /* Find multibyte character boundaries and apply compact_trigram */
253  int lenfirst = pg_mblen(str),
254  lenmiddle = pg_mblen(str + lenfirst),
255  lenlast = pg_mblen(str + lenfirst + lenmiddle);
256 
257  while ((ptr - str) + lenfirst + lenmiddle + lenlast <= bytelen)
258  {
259  compact_trigram(tptr, ptr, lenfirst + lenmiddle + lenlast);
260 
261  ptr += lenfirst;
262  tptr++;
263 
264  lenfirst = lenmiddle;
265  lenmiddle = lenlast;
266  lenlast = pg_mblen(ptr + lenfirst + lenmiddle);
267  }
268  }
269  else
270  {
271  /* Fast path when there are no multibyte characters */
272  Assert(bytelen == charlen);
273 
274  while (ptr - str < bytelen - 2 /* number of trigrams = strlen - 2 */ )
275  {
276  CPTRGM(tptr, ptr);
277  ptr++;
278  tptr++;
279  }
280  }
281 
282  return tptr;
283 }
#define Assert(condition)
Definition: c.h:699
int pg_mblen(const char *mbstr)
Definition: mbutils.c:760
#define CPTRGM(a, b)
Definition: trgm.h:47
void compact_trigram(trgm *tptr, char *str, int bytelen)
Definition: trgm_op.c:218

◆ PG_FUNCTION_INFO_V1() [1/16]

PG_FUNCTION_INFO_V1 ( set_limit  )

◆ PG_FUNCTION_INFO_V1() [2/16]

PG_FUNCTION_INFO_V1 ( show_limit  )

◆ PG_FUNCTION_INFO_V1() [3/16]

PG_FUNCTION_INFO_V1 ( show_trgm  )

◆ PG_FUNCTION_INFO_V1() [4/16]

PG_FUNCTION_INFO_V1 ( similarity  )

◆ PG_FUNCTION_INFO_V1() [5/16]

PG_FUNCTION_INFO_V1 ( word_similarity  )

◆ PG_FUNCTION_INFO_V1() [6/16]

PG_FUNCTION_INFO_V1 ( strict_word_similarity  )

◆ PG_FUNCTION_INFO_V1() [7/16]

PG_FUNCTION_INFO_V1 ( similarity_dist  )

◆ PG_FUNCTION_INFO_V1() [8/16]

PG_FUNCTION_INFO_V1 ( similarity_op  )

◆ PG_FUNCTION_INFO_V1() [9/16]

PG_FUNCTION_INFO_V1 ( word_similarity_op  )

◆ PG_FUNCTION_INFO_V1() [10/16]

PG_FUNCTION_INFO_V1 ( word_similarity_commutator_op  )

◆ PG_FUNCTION_INFO_V1() [11/16]

PG_FUNCTION_INFO_V1 ( word_similarity_dist_op  )

◆ PG_FUNCTION_INFO_V1() [12/16]

PG_FUNCTION_INFO_V1 ( word_similarity_dist_commutator_op  )

◆ PG_FUNCTION_INFO_V1() [13/16]

PG_FUNCTION_INFO_V1 ( strict_word_similarity_op  )

◆ PG_FUNCTION_INFO_V1() [14/16]

PG_FUNCTION_INFO_V1 ( strict_word_similarity_commutator_op  )

◆ PG_FUNCTION_INFO_V1() [15/16]

PG_FUNCTION_INFO_V1 ( strict_word_similarity_dist_op  )

◆ PG_FUNCTION_INFO_V1() [16/16]

PG_FUNCTION_INFO_V1 ( strict_word_similarity_dist_commutator_op  )

◆ protect_out_of_mem()

static void protect_out_of_mem ( int  slen)
static

Definition at line 359 of file trgm_op.c.

References ereport, errcode(), errmsg(), ERROR, MaxAllocSize, and pg_database_encoding_max_length().

Referenced by calc_word_similarity(), generate_trgm(), and generate_wildcard_trgm().

360 {
361  if ((Size) (slen / 2) >= (MaxAllocSize / (sizeof(trgm) * 3)) ||
363  ereport(ERROR,
364  (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
365  errmsg("out of memory")));
366 }
int errcode(int sqlerrcode)
Definition: elog.c:575
#define ERROR
Definition: elog.h:43
int pg_database_encoding_max_length(void)
Definition: wchar.c:1833
#define ereport(elevel, rest)
Definition: elog.h:122
#define MaxAllocSize
Definition: memutils.h:40
char trgm[3]
Definition: trgm.h:41
size_t Size
Definition: c.h:433
int errmsg(const char *fmt,...)
Definition: elog.c:797

◆ set_limit()

Datum set_limit ( PG_FUNCTION_ARGS  )

Definition at line 110 of file trgm_op.c.

References Float4GetDatum(), getTypeOutputInfo(), OidOutputFunctionCall(), PG_GETARG_FLOAT4, PG_RETURN_FLOAT4, PGC_S_SESSION, PGC_USERSET, SetConfigOption(), and similarity_threshold.

111 {
112  float4 nlimit = PG_GETARG_FLOAT4(0);
113  char *nlimit_str;
114  Oid func_out_oid;
115  bool is_varlena;
116 
117  getTypeOutputInfo(FLOAT4OID, &func_out_oid, &is_varlena);
118 
119  nlimit_str = OidOutputFunctionCall(func_out_oid, Float4GetDatum(nlimit));
120 
121  SetConfigOption("pg_trgm.similarity_threshold", nlimit_str,
123 
125 }
#define PG_RETURN_FLOAT4(x)
Definition: fmgr.h:330
void getTypeOutputInfo(Oid type, Oid *typOutput, bool *typIsVarlena)
Definition: lsyscache.c:2650
unsigned int Oid
Definition: postgres_ext.h:31
Datum Float4GetDatum(float4 X)
Definition: fmgr.c:1888
void SetConfigOption(const char *name, const char *value, GucContext context, GucSource source)
Definition: guc.c:6892
#define PG_GETARG_FLOAT4(n)
Definition: fmgr.h:250
float float4
Definition: c.h:457
char * OidOutputFunctionCall(Oid functionId, Datum val)
Definition: fmgr.c:1832
double similarity_threshold
Definition: trgm_op.c:19

◆ show_limit()

Datum show_limit ( PG_FUNCTION_ARGS  )

Definition at line 155 of file trgm_op.c.

References PG_RETURN_FLOAT4, and similarity_threshold.

156 {
158 }
#define PG_RETURN_FLOAT4(x)
Definition: fmgr.h:330
double similarity_threshold
Definition: trgm_op.c:19

◆ show_trgm()

Datum show_trgm ( PG_FUNCTION_ARGS  )

Definition at line 969 of file trgm_op.c.

References ARRNELEM, construct_array(), CPTRGM, DatumGetPointer, generate_trgm(), GETARR, i, ISPRINTABLETRGM, Max, palloc(), pfree(), pg_database_encoding_max_length(), PG_FREE_IF_COPY, PG_GETARG_TEXT_PP, PG_RETURN_POINTER, PointerGetDatum, SET_VARSIZE, snprintf(), trgm2int(), VARDATA, VARDATA_ANY, VARHDRSZ, and VARSIZE_ANY_EXHDR.

970 {
971  text *in = PG_GETARG_TEXT_PP(0);
972  TRGM *trg;
973  Datum *d;
974  ArrayType *a;
975  trgm *ptr;
976  int i;
977 
979  d = (Datum *) palloc(sizeof(Datum) * (1 + ARRNELEM(trg)));
980 
981  for (i = 0, ptr = GETARR(trg); i < ARRNELEM(trg); i++, ptr++)
982  {
983  text *item = (text *) palloc(VARHDRSZ + Max(12, pg_database_encoding_max_length() * 3));
984 
986  {
987  snprintf(VARDATA(item), 12, "0x%06x", trgm2int(ptr));
988  SET_VARSIZE(item, VARHDRSZ + strlen(VARDATA(item)));
989  }
990  else
991  {
992  SET_VARSIZE(item, VARHDRSZ + 3);
993  CPTRGM(VARDATA(item), ptr);
994  }
995  d[i] = PointerGetDatum(item);
996  }
997 
998  a = construct_array(
999  d,
1000  ARRNELEM(trg),
1001  TEXTOID,
1002  -1,
1003  false,
1004  'i'
1005  );
1006 
1007  for (i = 0; i < ARRNELEM(trg); i++)
1008  pfree(DatumGetPointer(d[i]));
1009 
1010  pfree(d);
1011  pfree(trg);
1012  PG_FREE_IF_COPY(in, 0);
1013 
1014  PG_RETURN_POINTER(a);
1015 }
#define PG_RETURN_POINTER(x)
Definition: fmgr.h:326
uint32 trgm2int(trgm *ptr)
Definition: trgm_op.c:955
#define VARDATA_ANY(PTR)
Definition: postgres.h:346
#define VARDATA(PTR)
Definition: postgres.h:302
#define PointerGetDatum(X)
Definition: postgres.h:539
#define VARHDRSZ
Definition: c.h:522
ArrayType * construct_array(Datum *elems, int nelems, Oid elmtype, int elmlen, bool elmbyval, char elmalign)
Definition: arrayfuncs.c:3279
int snprintf(char *str, size_t count, const char *fmt,...) pg_attribute_printf(3
#define ARRNELEM(x)
Definition: trgm.h:108
TRGM * generate_trgm(char *str, int slen)
Definition: trgm_op.c:376
#define PG_GETARG_TEXT_PP(n)
Definition: fmgr.h:278
#define GETARR(x)
Definition: trgm.h:107
void pfree(void *pointer)
Definition: mcxt.c:1031
Definition: trgm.h:66
#define ISPRINTABLETRGM(t)
Definition: trgm.h:60
int pg_database_encoding_max_length(void)
Definition: wchar.c:1833
uintptr_t Datum
Definition: postgres.h:365
char trgm[3]
Definition: trgm.h:41
#define Max(x, y)
Definition: c.h:851
#define PG_FREE_IF_COPY(ptr, n)
Definition: fmgr.h:230
#define DatumGetPointer(X)
Definition: postgres.h:532
#define VARSIZE_ANY_EXHDR(PTR)
Definition: postgres.h:339
void * palloc(Size size)
Definition: mcxt.c:924
int i
Definition: c.h:516
#define SET_VARSIZE(PTR, len)
Definition: postgres.h:327
#define CPTRGM(a, b)
Definition: trgm.h:47

◆ similarity()

Datum similarity ( PG_FUNCTION_ARGS  )

Definition at line 1144 of file trgm_op.c.

References cnt_sml(), generate_trgm(), pfree(), PG_FREE_IF_COPY, PG_GETARG_TEXT_PP, PG_RETURN_FLOAT4, VARDATA_ANY, and VARSIZE_ANY_EXHDR.

Referenced by similarity_dist(), and similarity_op().

1145 {
1146  text *in1 = PG_GETARG_TEXT_PP(0);
1147  text *in2 = PG_GETARG_TEXT_PP(1);
1148  TRGM *trg1,
1149  *trg2;
1150  float4 res;
1151 
1152  trg1 = generate_trgm(VARDATA_ANY(in1), VARSIZE_ANY_EXHDR(in1));
1153  trg2 = generate_trgm(VARDATA_ANY(in2), VARSIZE_ANY_EXHDR(in2));
1154 
1155  res = cnt_sml(trg1, trg2, false);
1156 
1157  pfree(trg1);
1158  pfree(trg2);
1159  PG_FREE_IF_COPY(in1, 0);
1160  PG_FREE_IF_COPY(in2, 1);
1161 
1162  PG_RETURN_FLOAT4(res);
1163 }
#define PG_RETURN_FLOAT4(x)
Definition: fmgr.h:330
#define VARDATA_ANY(PTR)
Definition: postgres.h:346
TRGM * generate_trgm(char *str, int slen)
Definition: trgm_op.c:376
#define PG_GETARG_TEXT_PP(n)
Definition: fmgr.h:278
void pfree(void *pointer)
Definition: mcxt.c:1031
Definition: trgm.h:66
float float4
Definition: c.h:457
#define PG_FREE_IF_COPY(ptr, n)
Definition: fmgr.h:230
float4 cnt_sml(TRGM *trg1, TRGM *trg2, bool inexact)
Definition: trgm_op.c:1018
#define VARSIZE_ANY_EXHDR(PTR)
Definition: postgres.h:339
Definition: c.h:516

◆ similarity_dist()

Datum similarity_dist ( PG_FUNCTION_ARGS  )

Definition at line 1198 of file trgm_op.c.

References DatumGetFloat4, DirectFunctionCall2, PG_GETARG_DATUM, PG_RETURN_FLOAT4, and similarity().

1199 {
1201  PG_GETARG_DATUM(0),
1202  PG_GETARG_DATUM(1)));
1203 
1204  PG_RETURN_FLOAT4(1.0 - res);
1205 }
#define PG_RETURN_FLOAT4(x)
Definition: fmgr.h:330
#define PG_GETARG_DATUM(n)
Definition: fmgr.h:238
float float4
Definition: c.h:457
Datum similarity(PG_FUNCTION_ARGS)
Definition: trgm_op.c:1144
#define DatumGetFloat4(X)
Definition: postgres.h:663
#define DirectFunctionCall2(func, arg1, arg2)
Definition: fmgr.h:592

◆ similarity_op()

Datum similarity_op ( PG_FUNCTION_ARGS  )

Definition at line 1208 of file trgm_op.c.

References DatumGetFloat4, DirectFunctionCall2, PG_GETARG_DATUM, PG_RETURN_BOOL, similarity(), and similarity_threshold.

1209 {
1211  PG_GETARG_DATUM(0),
1212  PG_GETARG_DATUM(1)));
1213 
1215 }
#define PG_GETARG_DATUM(n)
Definition: fmgr.h:238
float float4
Definition: c.h:457
#define PG_RETURN_BOOL(x)
Definition: fmgr.h:324
Datum similarity(PG_FUNCTION_ARGS)
Definition: trgm_op.c:1144
#define DatumGetFloat4(X)
Definition: postgres.h:663
double similarity_threshold
Definition: trgm_op.c:19
#define DirectFunctionCall2(func, arg1, arg2)
Definition: fmgr.h:592

◆ strict_word_similarity()

Datum strict_word_similarity ( PG_FUNCTION_ARGS  )

Definition at line 1182 of file trgm_op.c.

References calc_word_similarity(), PG_FREE_IF_COPY, PG_GETARG_TEXT_PP, PG_RETURN_FLOAT4, VARDATA_ANY, VARSIZE_ANY_EXHDR, and WORD_SIMILARITY_STRICT.

1183 {
1184  text *in1 = PG_GETARG_TEXT_PP(0);
1185  text *in2 = PG_GETARG_TEXT_PP(1);
1186  float4 res;
1187 
1189  VARDATA_ANY(in2), VARSIZE_ANY_EXHDR(in2),
1191 
1192  PG_FREE_IF_COPY(in1, 0);
1193  PG_FREE_IF_COPY(in2, 1);
1194  PG_RETURN_FLOAT4(res);
1195 }
#define PG_RETURN_FLOAT4(x)
Definition: fmgr.h:330
#define VARDATA_ANY(PTR)
Definition: postgres.h:346
#define PG_GETARG_TEXT_PP(n)
Definition: fmgr.h:278
float float4
Definition: c.h:457
#define PG_FREE_IF_COPY(ptr, n)
Definition: fmgr.h:230
#define WORD_SIMILARITY_STRICT
Definition: trgm_op.c:57
#define VARSIZE_ANY_EXHDR(PTR)
Definition: postgres.h:339
Definition: c.h:516
static float4 calc_word_similarity(char *str1, int slen1, char *str2, int slen2, uint8 flags)
Definition: trgm_op.c:641

◆ strict_word_similarity_commutator_op()

Datum strict_word_similarity_commutator_op ( PG_FUNCTION_ARGS  )

Definition at line 1298 of file trgm_op.c.

References calc_word_similarity(), PG_FREE_IF_COPY, PG_GETARG_TEXT_PP, PG_RETURN_BOOL, strict_word_similarity_threshold, VARDATA_ANY, VARSIZE_ANY_EXHDR, WORD_SIMILARITY_CHECK_ONLY, and WORD_SIMILARITY_STRICT.

1299 {
1300  text *in1 = PG_GETARG_TEXT_PP(0);
1301  text *in2 = PG_GETARG_TEXT_PP(1);
1302  float4 res;
1303 
1305  VARDATA_ANY(in1), VARSIZE_ANY_EXHDR(in1),
1307 
1308  PG_FREE_IF_COPY(in1, 0);
1309  PG_FREE_IF_COPY(in2, 1);
1311 }
#define VARDATA_ANY(PTR)
Definition: postgres.h:346
#define WORD_SIMILARITY_CHECK_ONLY
Definition: trgm_op.c:55
#define PG_GETARG_TEXT_PP(n)
Definition: fmgr.h:278
double strict_word_similarity_threshold
Definition: trgm_op.c:21
float float4
Definition: c.h:457
#define PG_RETURN_BOOL(x)
Definition: fmgr.h:324
#define PG_FREE_IF_COPY(ptr, n)
Definition: fmgr.h:230
#define WORD_SIMILARITY_STRICT
Definition: trgm_op.c:57
#define VARSIZE_ANY_EXHDR(PTR)
Definition: postgres.h:339
Definition: c.h:516
static float4 calc_word_similarity(char *str1, int slen1, char *str2, int slen2, uint8 flags)
Definition: trgm_op.c:641

◆ strict_word_similarity_dist_commutator_op()

Datum strict_word_similarity_dist_commutator_op ( PG_FUNCTION_ARGS  )

Definition at line 1330 of file trgm_op.c.

References calc_word_similarity(), PG_FREE_IF_COPY, PG_GETARG_TEXT_PP, PG_RETURN_FLOAT4, VARDATA_ANY, VARSIZE_ANY_EXHDR, and WORD_SIMILARITY_STRICT.

1331 {
1332  text *in1 = PG_GETARG_TEXT_PP(0);
1333  text *in2 = PG_GETARG_TEXT_PP(1);
1334  float4 res;
1335 
1337  VARDATA_ANY(in1), VARSIZE_ANY_EXHDR(in1),
1339 
1340  PG_FREE_IF_COPY(in1, 0);
1341  PG_FREE_IF_COPY(in2, 1);
1342  PG_RETURN_FLOAT4(1.0 - res);
1343 }
#define PG_RETURN_FLOAT4(x)
Definition: fmgr.h:330
#define VARDATA_ANY(PTR)
Definition: postgres.h:346
#define PG_GETARG_TEXT_PP(n)
Definition: fmgr.h:278
float float4
Definition: c.h:457
#define PG_FREE_IF_COPY(ptr, n)
Definition: fmgr.h:230
#define WORD_SIMILARITY_STRICT
Definition: trgm_op.c:57
#define VARSIZE_ANY_EXHDR(PTR)
Definition: postgres.h:339
Definition: c.h:516
static float4 calc_word_similarity(char *str1, int slen1, char *str2, int slen2, uint8 flags)
Definition: trgm_op.c:641

◆ strict_word_similarity_dist_op()

Datum strict_word_similarity_dist_op ( PG_FUNCTION_ARGS  )

Definition at line 1314 of file trgm_op.c.

References calc_word_similarity(), PG_FREE_IF_COPY, PG_GETARG_TEXT_PP, PG_RETURN_FLOAT4, VARDATA_ANY, VARSIZE_ANY_EXHDR, and WORD_SIMILARITY_STRICT.

1315 {
1316  text *in1 = PG_GETARG_TEXT_PP(0);
1317  text *in2 = PG_GETARG_TEXT_PP(1);
1318  float4 res;
1319 
1321  VARDATA_ANY(in2), VARSIZE_ANY_EXHDR(in2),
1323 
1324  PG_FREE_IF_COPY(in1, 0);
1325  PG_FREE_IF_COPY(in2, 1);
1326  PG_RETURN_FLOAT4(1.0 - res);
1327 }
#define PG_RETURN_FLOAT4(x)
Definition: fmgr.h:330
#define VARDATA_ANY(PTR)
Definition: postgres.h:346
#define PG_GETARG_TEXT_PP(n)
Definition: fmgr.h:278
float float4
Definition: c.h:457
#define PG_FREE_IF_COPY(ptr, n)
Definition: fmgr.h:230
#define WORD_SIMILARITY_STRICT
Definition: trgm_op.c:57
#define VARSIZE_ANY_EXHDR(PTR)
Definition: postgres.h:339
Definition: c.h:516
static float4 calc_word_similarity(char *str1, int slen1, char *str2, int slen2, uint8 flags)
Definition: trgm_op.c:641

◆ strict_word_similarity_op()

Datum strict_word_similarity_op ( PG_FUNCTION_ARGS  )

Definition at line 1282 of file trgm_op.c.

References calc_word_similarity(), PG_FREE_IF_COPY, PG_GETARG_TEXT_PP, PG_RETURN_BOOL, strict_word_similarity_threshold, VARDATA_ANY, VARSIZE_ANY_EXHDR, WORD_SIMILARITY_CHECK_ONLY, and WORD_SIMILARITY_STRICT.

1283 {
1284  text *in1 = PG_GETARG_TEXT_PP(0);
1285  text *in2 = PG_GETARG_TEXT_PP(1);
1286  float4 res;
1287 
1289  VARDATA_ANY(in2), VARSIZE_ANY_EXHDR(in2),
1291 
1292  PG_FREE_IF_COPY(in1, 0);
1293  PG_FREE_IF_COPY(in2, 1);
1295 }
#define VARDATA_ANY(PTR)
Definition: postgres.h:346
#define WORD_SIMILARITY_CHECK_ONLY
Definition: trgm_op.c:55
#define PG_GETARG_TEXT_PP(n)
Definition: fmgr.h:278
double strict_word_similarity_threshold
Definition: trgm_op.c:21
float float4
Definition: c.h:457
#define PG_RETURN_BOOL(x)
Definition: fmgr.h:324
#define PG_FREE_IF_COPY(ptr, n)
Definition: fmgr.h:230
#define WORD_SIMILARITY_STRICT
Definition: trgm_op.c:57
#define VARSIZE_ANY_EXHDR(PTR)
Definition: postgres.h:339
Definition: c.h:516
static float4 calc_word_similarity(char *str1, int slen1, char *str2, int slen2, uint8 flags)
Definition: trgm_op.c:641

◆ trgm2int()

uint32 trgm2int ( trgm ptr)

Definition at line 955 of file trgm_op.c.

References val.

Referenced by gin_extract_query_trgm(), gin_extract_value_trgm(), and show_trgm().

956 {
957  uint32 val = 0;
958 
959  val |= *(((unsigned char *) ptr));
960  val <<= 8;
961  val |= *(((unsigned char *) ptr) + 1);
962  val <<= 8;
963  val |= *(((unsigned char *) ptr) + 2);
964 
965  return val;
966 }
unsigned int uint32
Definition: c.h:325
long val
Definition: informix.c:689

◆ trgm_contained_by()

bool trgm_contained_by ( TRGM trg1,
TRGM trg2 
)

Definition at line 1066 of file trgm_op.c.

References ARRNELEM, CMPTRGM, and GETARR.

Referenced by gtrgm_consistent().

1067 {
1068  trgm *ptr1,
1069  *ptr2;
1070  int len1,
1071  len2;
1072 
1073  ptr1 = GETARR(trg1);
1074  ptr2 = GETARR(trg2);
1075 
1076  len1 = ARRNELEM(trg1);
1077  len2 = ARRNELEM(trg2);
1078 
1079  while (ptr1 - GETARR(trg1) < len1 && ptr2 - GETARR(trg2) < len2)
1080  {
1081  int res = CMPTRGM(ptr1, ptr2);
1082 
1083  if (res < 0)
1084  return false;
1085  else if (res > 0)
1086  ptr2++;
1087  else
1088  {
1089  ptr1++;
1090  ptr2++;
1091  }
1092  }
1093  if (ptr1 - GETARR(trg1) < len1)
1094  return false;
1095  else
1096  return true;
1097 }
#define CMPTRGM(a, b)
Definition: trgm.h:45
#define ARRNELEM(x)
Definition: trgm.h:108
#define GETARR(x)
Definition: trgm.h:107
char trgm[3]
Definition: trgm.h:41

◆ trgm_presence_map()

bool* trgm_presence_map ( TRGM query,
TRGM key 
)

Definition at line 1105 of file trgm_op.c.

References ARRNELEM, CMPTRGM, GETARR, i, and palloc0().

Referenced by gtrgm_consistent().

1106 {
1107  bool *result;
1108  trgm *ptrq = GETARR(query),
1109  *ptrk = GETARR(key);
1110  int lenq = ARRNELEM(query),
1111  lenk = ARRNELEM(key),
1112  i;
1113 
1114  result = (bool *) palloc0(lenq * sizeof(bool));
1115 
1116  /* for each query trigram, do a binary search in the key array */
1117  for (i = 0; i < lenq; i++)
1118  {
1119  int lo = 0;
1120  int hi = lenk;
1121 
1122  while (lo < hi)
1123  {
1124  int mid = (lo + hi) / 2;
1125  int res = CMPTRGM(ptrq, ptrk + mid);
1126 
1127  if (res < 0)
1128  hi = mid;
1129  else if (res > 0)
1130  lo = mid + 1;
1131  else
1132  {
1133  result[i] = true;
1134  break;
1135  }
1136  }
1137  ptrq++;
1138  }
1139 
1140  return result;
1141 }
#define CMPTRGM(a, b)
Definition: trgm.h:45
#define ARRNELEM(x)
Definition: trgm.h:108
#define GETARR(x)
Definition: trgm.h:107
void * palloc0(Size size)
Definition: mcxt.c:955
char trgm[3]
Definition: trgm.h:41
int i

◆ unique_array()

static int unique_array ( trgm a,
int  len 
)
static

Definition at line 167 of file trgm_op.c.

References CMPTRGM, and CPTRGM.

Referenced by generate_trgm(), and generate_wildcard_trgm().

168 {
169  trgm *curend,
170  *tmp;
171 
172  curend = tmp = a;
173  while (tmp - a < len)
174  if (CMPTRGM(tmp, curend))
175  {
176  curend++;
177  CPTRGM(curend, tmp);
178  tmp++;
179  }
180  else
181  tmp++;
182 
183  return curend + 1 - a;
184 }
#define CMPTRGM(a, b)
Definition: trgm.h:45
char trgm[3]
Definition: trgm.h:41
#define CPTRGM(a, b)
Definition: trgm.h:47

◆ word_similarity()

Datum word_similarity ( PG_FUNCTION_ARGS  )

Definition at line 1166 of file trgm_op.c.

References calc_word_similarity(), PG_FREE_IF_COPY, PG_GETARG_TEXT_PP, PG_RETURN_FLOAT4, VARDATA_ANY, and VARSIZE_ANY_EXHDR.

1167 {
1168  text *in1 = PG_GETARG_TEXT_PP(0);
1169  text *in2 = PG_GETARG_TEXT_PP(1);
1170  float4 res;
1171 
1173  VARDATA_ANY(in2), VARSIZE_ANY_EXHDR(in2),
1174  0);
1175 
1176  PG_FREE_IF_COPY(in1, 0);
1177  PG_FREE_IF_COPY(in2, 1);
1178  PG_RETURN_FLOAT4(res);
1179 }
#define PG_RETURN_FLOAT4(x)
Definition: fmgr.h:330
#define VARDATA_ANY(PTR)
Definition: postgres.h:346
#define PG_GETARG_TEXT_PP(n)
Definition: fmgr.h:278
float float4
Definition: c.h:457
#define PG_FREE_IF_COPY(ptr, n)
Definition: fmgr.h:230
#define VARSIZE_ANY_EXHDR(PTR)
Definition: postgres.h:339
Definition: c.h:516
static float4 calc_word_similarity(char *str1, int slen1, char *str2, int slen2, uint8 flags)
Definition: trgm_op.c:641

◆ word_similarity_commutator_op()

Datum word_similarity_commutator_op ( PG_FUNCTION_ARGS  )

Definition at line 1234 of file trgm_op.c.

References calc_word_similarity(), PG_FREE_IF_COPY, PG_GETARG_TEXT_PP, PG_RETURN_BOOL, VARDATA_ANY, VARSIZE_ANY_EXHDR, WORD_SIMILARITY_CHECK_ONLY, and word_similarity_threshold.

1235 {
1236  text *in1 = PG_GETARG_TEXT_PP(0);
1237  text *in2 = PG_GETARG_TEXT_PP(1);
1238  float4 res;
1239 
1241  VARDATA_ANY(in1), VARSIZE_ANY_EXHDR(in1),
1243 
1244  PG_FREE_IF_COPY(in1, 0);
1245  PG_FREE_IF_COPY(in2, 1);
1247 }
#define VARDATA_ANY(PTR)
Definition: postgres.h:346
#define WORD_SIMILARITY_CHECK_ONLY
Definition: trgm_op.c:55
#define PG_GETARG_TEXT_PP(n)
Definition: fmgr.h:278
float float4
Definition: c.h:457
#define PG_RETURN_BOOL(x)
Definition: fmgr.h:324
#define PG_FREE_IF_COPY(ptr, n)
Definition: fmgr.h:230
#define VARSIZE_ANY_EXHDR(PTR)
Definition: postgres.h:339
double word_similarity_threshold
Definition: trgm_op.c:20
Definition: c.h:516
static float4 calc_word_similarity(char *str1, int slen1, char *str2, int slen2, uint8 flags)
Definition: trgm_op.c:641

◆ word_similarity_dist_commutator_op()

Datum word_similarity_dist_commutator_op ( PG_FUNCTION_ARGS  )

Definition at line 1266 of file trgm_op.c.

References calc_word_similarity(), PG_FREE_IF_COPY, PG_GETARG_TEXT_PP, PG_RETURN_FLOAT4, VARDATA_ANY, and VARSIZE_ANY_EXHDR.

1267 {
1268  text *in1 = PG_GETARG_TEXT_PP(0);
1269  text *in2 = PG_GETARG_TEXT_PP(1);
1270  float4 res;
1271 
1273  VARDATA_ANY(in1), VARSIZE_ANY_EXHDR(in1),
1274  0);
1275 
1276  PG_FREE_IF_COPY(in1, 0);
1277  PG_FREE_IF_COPY(in2, 1);
1278  PG_RETURN_FLOAT4(1.0 - res);
1279 }
#define PG_RETURN_FLOAT4(x)
Definition: fmgr.h:330
#define VARDATA_ANY(PTR)
Definition: postgres.h:346
#define PG_GETARG_TEXT_PP(n)
Definition: fmgr.h:278
float float4
Definition: c.h:457
#define PG_FREE_IF_COPY(ptr, n)
Definition: fmgr.h:230
#define VARSIZE_ANY_EXHDR(PTR)
Definition: postgres.h:339
Definition: c.h:516
static float4 calc_word_similarity(char *str1, int slen1, char *str2, int slen2, uint8 flags)
Definition: trgm_op.c:641

◆ word_similarity_dist_op()

Datum word_similarity_dist_op ( PG_FUNCTION_ARGS  )

Definition at line 1250 of file trgm_op.c.

References calc_word_similarity(), PG_FREE_IF_COPY, PG_GETARG_TEXT_PP, PG_RETURN_FLOAT4, VARDATA_ANY, and VARSIZE_ANY_EXHDR.

1251 {
1252  text *in1 = PG_GETARG_TEXT_PP(0);
1253  text *in2 = PG_GETARG_TEXT_PP(1);
1254  float4 res;
1255 
1257  VARDATA_ANY(in2), VARSIZE_ANY_EXHDR(in2),
1258  0);
1259 
1260  PG_FREE_IF_COPY(in1, 0);
1261  PG_FREE_IF_COPY(in2, 1);
1262  PG_RETURN_FLOAT4(1.0 - res);
1263 }
#define PG_RETURN_FLOAT4(x)
Definition: fmgr.h:330
#define VARDATA_ANY(PTR)
Definition: postgres.h:346
#define PG_GETARG_TEXT_PP(n)
Definition: fmgr.h:278
float float4
Definition: c.h:457
#define PG_FREE_IF_COPY(ptr, n)
Definition: fmgr.h:230
#define VARSIZE_ANY_EXHDR(PTR)
Definition: postgres.h:339
Definition: c.h:516
static float4 calc_word_similarity(char *str1, int slen1, char *str2, int slen2, uint8 flags)
Definition: trgm_op.c:641

◆ word_similarity_op()

Datum word_similarity_op ( PG_FUNCTION_ARGS  )

Definition at line 1218 of file trgm_op.c.

References calc_word_similarity(), PG_FREE_IF_COPY, PG_GETARG_TEXT_PP, PG_RETURN_BOOL, VARDATA_ANY, VARSIZE_ANY_EXHDR, WORD_SIMILARITY_CHECK_ONLY, and word_similarity_threshold.

1219 {
1220  text *in1 = PG_GETARG_TEXT_PP(0);
1221  text *in2 = PG_GETARG_TEXT_PP(1);
1222  float4 res;
1223 
1225  VARDATA_ANY(in2), VARSIZE_ANY_EXHDR(in2),
1227 
1228  PG_FREE_IF_COPY(in1, 0);
1229  PG_FREE_IF_COPY(in2, 1);
1231 }
#define VARDATA_ANY(PTR)
Definition: postgres.h:346
#define WORD_SIMILARITY_CHECK_ONLY
Definition: trgm_op.c:55
#define PG_GETARG_TEXT_PP(n)
Definition: fmgr.h:278
float float4
Definition: c.h:457
#define PG_RETURN_BOOL(x)
Definition: fmgr.h:324
#define PG_FREE_IF_COPY(ptr, n)
Definition: fmgr.h:230
#define VARSIZE_ANY_EXHDR(PTR)
Definition: postgres.h:339
double word_similarity_threshold
Definition: trgm_op.c:20
Definition: c.h:516
static float4 calc_word_similarity(char *str1, int slen1, char *str2, int slen2, uint8 flags)
Definition: trgm_op.c:641

Variable Documentation

◆ PG_MODULE_MAGIC

PG_MODULE_MAGIC

Definition at line 16 of file trgm_op.c.

◆ similarity_threshold

double similarity_threshold = 0.3f

Definition at line 19 of file trgm_op.c.

Referenced by _PG_init(), index_strategy_get_limit(), set_limit(), show_limit(), and similarity_op().

◆ strict_word_similarity_threshold

double strict_word_similarity_threshold = 0.5f

◆ word_similarity_threshold

double word_similarity_threshold = 0.6f