52 #define TRGM_BOUND_LEFT 0x01
53 #define TRGM_BOUND_RIGHT 0x02
56 #define WORD_SIMILARITY_CHECK_ONLY 0x01
58 #define WORD_SIMILARITY_STRICT 0x02
69 "Sets the threshold used by the % operator.",
70 "Valid range is 0.0 .. 1.0.",
81 "Sets the threshold used by the <% operator.",
82 "Valid range is 0.0 .. 1.0.",
93 "Sets the threshold used by the <<% operator.",
94 "Valid range is 0.0 .. 1.0.",
146 elog(
ERROR,
"unrecognized strategy number: %d", strategy);
174 find_word(
char *
str,
int lenstr,
char **endword,
int *charlen)
176 char *beginword =
str;
178 while (beginword -
str < lenstr && !
ISWORDCHR(beginword))
181 if (beginword -
str >= lenstr)
184 *endword = beginword;
233 if (bytelen > charlen)
240 while ((ptr -
str) + lenfirst + lenmiddle + lenlast <= bytelen)
247 lenfirst = lenmiddle;
249 lenlast =
pg_mblen(ptr + lenfirst + lenmiddle);
255 Assert(bytelen == charlen);
257 while (ptr -
str < bytelen - 2 )
303 while ((bword =
find_word(eword, slen - (eword -
str), &eword, &charlen)) != NULL)
307 bytelen = strlen(bword);
309 bytelen = eword - bword;
347 (
errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
348 errmsg(
"out of memory")));
408 for (
i = 0;
i < len1;
i++)
410 memcpy(&result[
i].trg, &trg1[
i],
sizeof(
trgm));
414 for (
i = 0;
i < len2;
i++)
416 memcpy(&result[
i + len1].trg, &trg2[
i],
sizeof(
trgm));
489 lastpos = (
int *)
palloc(
sizeof(
int) *
len);
490 memset(lastpos, -1,
sizeof(
int) *
len);
492 for (
i = 0;
i < len2;
i++)
499 trgindex = trg2indexes[
i];
502 if (
lower >= 0 || found[trgindex])
504 if (lastpos[trgindex] < 0)
510 lastpos[trgindex] =
i;
533 smlr_cur =
CALCSML(count, ulen1, ulen2);
539 for (tmp_lower =
lower; tmp_lower <=
upper; tmp_lower++)
552 smlr_tmp =
CALCSML(tmp_count, ulen1, tmp_ulen2);
553 if (smlr_tmp > smlr_cur)
567 && smlr_cur >= threshold)
571 tmp_trgindex = trg2indexes[tmp_lower];
572 if (lastpos[tmp_trgindex] == tmp_lower)
575 if (found[tmp_trgindex])
580 smlr_max =
Max(smlr_max, smlr_cur);
589 for (tmp_lower = prev_lower; tmp_lower <
lower; tmp_lower++)
593 tmp_trgindex = trg2indexes[tmp_lower];
594 if (lastpos[tmp_trgindex] == tmp_lower)
595 lastpos[tmp_trgindex] = -1;
665 trg2indexes = (
int *)
palloc(
sizeof(
int) * len2);
686 trg2indexes[ptrg[
i].
index] =
j;
725 char *
buf,
int *bytelen,
int *charlen)
727 const char *beginword =
str;
730 bool in_leading_wildcard_meta =
false;
731 bool in_trailing_wildcard_meta =
false;
732 bool in_escape =
false;
741 while (beginword -
str < lenstr)
748 in_leading_wildcard_meta =
false;
755 in_leading_wildcard_meta =
true;
759 in_leading_wildcard_meta =
false;
767 if (beginword -
str >= lenstr)
775 if (!in_leading_wildcard_meta)
794 while (endword -
str < lenstr)
801 memcpy(s, endword, clen);
824 in_trailing_wildcard_meta =
true;
829 memcpy(s, endword, clen);
843 if (!in_trailing_wildcard_meta)
899 buf, &bytelen, &charlen)) != NULL)
903 bytelen = strlen(buf2);
942 val |= *(((
unsigned char *) ptr));
944 val |= *(((
unsigned char *) ptr) + 1);
946 val |= *(((
unsigned char *) ptr) + 2);
1009 if (len1 <= 0 || len2 <= 0)
1012 while (ptr1 -
GETARR(trg1) < len1 && ptr2 -
GETARR(trg2) < len2)
1033 return CALCSML(count, len1, inexact ? count : len2);
1055 while (ptr1 -
GETARR(trg1) < len1 && ptr2 -
GETARR(trg2) < len2)
1069 if (ptr1 -
GETARR(trg1) < len1)
1090 result = (
bool *)
palloc0(lenq *
sizeof(
bool));
1093 for (
i = 0;
i < lenq;
i++)
1100 int mid = (lo + hi) / 2;
ArrayType * construct_array_builtin(Datum *elems, int nelems, Oid elmtype)
#define Assert(condition)
static void PGresult const char * p2
static void PGresult * res
int errcode(int sqlerrcode)
int errmsg(const char *fmt,...)
#define ereport(elevel,...)
char * OidOutputFunctionCall(Oid functionId, Datum val)
#define PG_FREE_IF_COPY(ptr, n)
#define PG_GETARG_TEXT_PP(n)
#define DirectFunctionCall2(func, arg1, arg2)
#define PG_GETARG_DATUM(n)
#define PG_GETARG_FLOAT4(n)
#define PG_RETURN_POINTER(x)
#define PG_RETURN_FLOAT4(x)
#define PG_RETURN_BOOL(x)
void DefineCustomRealVariable(const char *name, const char *short_desc, const char *long_desc, double *valueAddr, double bootValue, double minValue, double maxValue, GucContext context, int flags, GucRealCheckHook check_hook, GucRealAssignHook assign_hook, GucShowHook show_hook)
void SetConfigOption(const char *name, const char *value, GucContext context, GucSource source)
void MarkGUCPrefixReserved(const char *className)
#define CALCGTSIZE(flag, siglen)
static int pg_cmp_s32(int32 a, int32 b)
void getTypeOutputInfo(Oid type, Oid *typOutput, bool *typIsVarlena)
int pg_database_encoding_max_length(void)
int pg_mblen(const char *mbstr)
void pfree(void *pointer)
void * palloc0(Size size)
#define CHECK_FOR_INTERRUPTS()
Datum lower(PG_FUNCTION_ARGS)
Datum upper(PG_FUNCTION_ARGS)
#define INIT_LEGACY_CRC32(crc)
#define COMP_LEGACY_CRC32(crc, data, len)
#define FIN_LEGACY_CRC32(crc)
#define qsort(a, b, c, d)
static Datum PointerGetDatum(const void *X)
static Datum Float4GetDatum(float4 X)
static float4 DatumGetFloat4(Datum X)
static Pointer DatumGetPointer(Datum X)
static size_t qunique(void *array, size_t elements, size_t width, int(*compare)(const void *, const void *))
static int cmp(const chr *x, const chr *y, size_t len)
#define CALCSML(count, len1, len2)
#define WordSimilarityStrategyNumber
#define StrictWordSimilarityStrategyNumber
#define SimilarityStrategyNumber
#define ISWILDCARDCHAR(x)
#define ISPRINTABLETRGM(t)
static float4 iterate_word_similarity(int *trg2indexes, bool *found, int ulen1, int len2, int len, uint8 flags, TrgmBound *bounds)
bool * trgm_presence_map(TRGM *query, TRGM *key)
Datum strict_word_similarity_commutator_op(PG_FUNCTION_ARGS)
TRGM * generate_trgm(char *str, int slen)
Datum set_limit(PG_FUNCTION_ARGS)
static const char * get_wildcard_part(const char *str, int lenstr, char *buf, int *bytelen, int *charlen)
static int comp_trgm(const void *a, const void *b)
double strict_word_similarity_threshold
TRGM * generate_wildcard_trgm(const char *str, int slen)
uint32 trgm2int(trgm *ptr)
#define WORD_SIMILARITY_CHECK_ONLY
static void protect_out_of_mem(int slen)
Datum word_similarity(PG_FUNCTION_ARGS)
Datum strict_word_similarity_dist_commutator_op(PG_FUNCTION_ARGS)
void compact_trigram(trgm *tptr, char *str, int bytelen)
PG_FUNCTION_INFO_V1(set_limit)
static float4 calc_word_similarity(char *str1, int slen1, char *str2, int slen2, uint8 flags)
double word_similarity_threshold
Datum word_similarity_dist_commutator_op(PG_FUNCTION_ARGS)
double index_strategy_get_limit(StrategyNumber strategy)
static pos_trgm * make_positional_trgm(trgm *trg1, int len1, trgm *trg2, int len2)
Datum similarity(PG_FUNCTION_ARGS)
double similarity_threshold
static int comp_ptrgm(const void *v1, const void *v2)
Datum show_trgm(PG_FUNCTION_ARGS)
static trgm * make_trigrams(trgm *tptr, char *str, int bytelen, int charlen)
bool trgm_contained_by(TRGM *trg1, TRGM *trg2)
static char * find_word(char *str, int lenstr, char **endword, int *charlen)
Datum show_limit(PG_FUNCTION_ARGS)
Datum strict_word_similarity_dist_op(PG_FUNCTION_ARGS)
Datum word_similarity_op(PG_FUNCTION_ARGS)
Datum word_similarity_commutator_op(PG_FUNCTION_ARGS)
#define WORD_SIMILARITY_STRICT
float4 cnt_sml(TRGM *trg1, TRGM *trg2, bool inexact)
static int generate_trgm_only(trgm *trg, char *str, int slen, TrgmBound *bounds)
Datum similarity_op(PG_FUNCTION_ARGS)
Datum word_similarity_dist_op(PG_FUNCTION_ARGS)
Datum strict_word_similarity(PG_FUNCTION_ARGS)
Datum similarity_dist(PG_FUNCTION_ARGS)
Datum strict_word_similarity_op(PG_FUNCTION_ARGS)
char * lowerstr_with_len(const char *str, int len)
#define SET_VARSIZE(PTR, len)
#define VARSIZE_ANY_EXHDR(PTR)