8#include "catalog/pg_collation_d.h"
54#define TRGM_BOUND_LEFT 0x01
55#define TRGM_BOUND_RIGHT 0x02
58#define WORD_SIMILARITY_CHECK_ONLY 0x01
60#define WORD_SIMILARITY_STRICT 0x02
71 "Sets the threshold used by the % operator.",
72 "Valid range is 0.0 .. 1.0.",
83 "Sets the threshold used by the <% operator.",
84 "Valid range is 0.0 .. 1.0.",
95 "Sets the threshold used by the <<% operator.",
96 "Valid range is 0.0 .. 1.0.",
148 elog(
ERROR,
"unrecognized strategy number: %d", strategy);
176find_word(
char *
str,
int lenstr,
char **endword,
int *charlen)
178 char *beginword =
str;
180 while (beginword -
str < lenstr && !
ISWORDCHR(beginword))
183 if (beginword -
str >= lenstr)
186 *endword = beginword;
235 if (bytelen > charlen)
242 while ((ptr -
str) + lenfirst + lenmiddle + lenlast <= bytelen)
249 lenfirst = lenmiddle;
251 lenlast =
pg_mblen(ptr + lenfirst + lenmiddle);
257 Assert(bytelen == charlen);
259 while (ptr -
str < bytelen - 2 )
305 while ((bword =
find_word(eword, slen - (eword -
str), &eword, &charlen)) != NULL)
308 bword =
str_tolower(bword, eword - bword, DEFAULT_COLLATION_OID);
309 bytelen = strlen(bword);
311 bytelen = eword - bword;
349 (
errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
350 errmsg(
"out of memory")));
410 for (
i = 0;
i < len1;
i++)
412 memcpy(&result[
i].trg, &trg1[
i],
sizeof(
trgm));
416 for (
i = 0;
i < len2;
i++)
418 memcpy(&result[
i + len1].trg, &trg2[
i],
sizeof(
trgm));
491 lastpos = (
int *)
palloc(
sizeof(
int) *
len);
492 memset(lastpos, -1,
sizeof(
int) *
len);
494 for (
i = 0;
i < len2;
i++)
501 trgindex = trg2indexes[
i];
504 if (
lower >= 0 || found[trgindex])
506 if (lastpos[trgindex] < 0)
512 lastpos[trgindex] =
i;
535 smlr_cur =
CALCSML(count, ulen1, ulen2);
541 for (tmp_lower =
lower; tmp_lower <=
upper; tmp_lower++)
554 smlr_tmp =
CALCSML(tmp_count, ulen1, tmp_ulen2);
555 if (smlr_tmp > smlr_cur)
569 && smlr_cur >= threshold)
573 tmp_trgindex = trg2indexes[tmp_lower];
574 if (lastpos[tmp_trgindex] == tmp_lower)
577 if (found[tmp_trgindex])
582 smlr_max =
Max(smlr_max, smlr_cur);
591 for (tmp_lower = prev_lower; tmp_lower <
lower; tmp_lower++)
595 tmp_trgindex = trg2indexes[tmp_lower];
596 if (lastpos[tmp_trgindex] == tmp_lower)
597 lastpos[tmp_trgindex] = -1;
667 trg2indexes = (
int *)
palloc(
sizeof(
int) * len2);
688 trg2indexes[ptrg[
i].
index] =
j;
727 char *
buf,
int *bytelen,
int *charlen)
729 const char *beginword =
str;
732 bool in_leading_wildcard_meta =
false;
733 bool in_trailing_wildcard_meta =
false;
734 bool in_escape =
false;
743 while (beginword -
str < lenstr)
750 in_leading_wildcard_meta =
false;
757 in_leading_wildcard_meta =
true;
761 in_leading_wildcard_meta =
false;
769 if (beginword -
str >= lenstr)
777 if (!in_leading_wildcard_meta)
796 while (endword -
str < lenstr)
803 memcpy(s, endword, clen);
826 in_trailing_wildcard_meta =
true;
831 memcpy(s, endword, clen);
845 if (!in_trailing_wildcard_meta)
901 buf, &bytelen, &charlen)) != NULL)
905 bytelen = strlen(buf2);
944 val |= *(((
unsigned char *) ptr));
946 val |= *(((
unsigned char *) ptr) + 1);
948 val |= *(((
unsigned char *) ptr) + 2);
1011 if (len1 <= 0 || len2 <= 0)
1014 while (ptr1 -
GETARR(trg1) < len1 && ptr2 -
GETARR(trg2) < len2)
1035 return CALCSML(count, len1, inexact ? count : len2);
1057 while (ptr1 -
GETARR(trg1) < len1 && ptr2 -
GETARR(trg2) < len2)
1071 if (ptr1 -
GETARR(trg1) < len1)
1092 result = (
bool *)
palloc0(lenq *
sizeof(
bool));
1095 for (
i = 0;
i < lenq;
i++)
1102 int mid = (lo + hi) / 2;
ArrayType * construct_array_builtin(Datum *elems, int nelems, Oid elmtype)
#define Assert(condition)
static void PGresult const char * p2
static void PGresult * res
int errcode(int sqlerrcode)
int errmsg(const char *fmt,...)
#define ereport(elevel,...)
char * OidOutputFunctionCall(Oid functionId, Datum val)
#define PG_FREE_IF_COPY(ptr, n)
#define PG_GETARG_TEXT_PP(n)
#define DirectFunctionCall2(func, arg1, arg2)
#define PG_GETARG_DATUM(n)
#define PG_GETARG_FLOAT4(n)
#define PG_RETURN_POINTER(x)
#define PG_RETURN_FLOAT4(x)
#define PG_RETURN_BOOL(x)
void DefineCustomRealVariable(const char *name, const char *short_desc, const char *long_desc, double *valueAddr, double bootValue, double minValue, double maxValue, GucContext context, int flags, GucRealCheckHook check_hook, GucRealAssignHook assign_hook, GucShowHook show_hook)
void SetConfigOption(const char *name, const char *value, GucContext context, GucSource source)
void MarkGUCPrefixReserved(const char *className)
#define CALCGTSIZE(flag, siglen)
static int pg_cmp_s32(int32 a, int32 b)
void getTypeOutputInfo(Oid type, Oid *typOutput, bool *typIsVarlena)
int pg_database_encoding_max_length(void)
int pg_mblen(const char *mbstr)
void pfree(void *pointer)
void * palloc0(Size size)
#define CHECK_FOR_INTERRUPTS()
Datum lower(PG_FUNCTION_ARGS)
Datum upper(PG_FUNCTION_ARGS)
#define INIT_LEGACY_CRC32(crc)
#define COMP_LEGACY_CRC32(crc, data, len)
#define FIN_LEGACY_CRC32(crc)
#define qsort(a, b, c, d)
static Datum PointerGetDatum(const void *X)
static Datum Float4GetDatum(float4 X)
static float4 DatumGetFloat4(Datum X)
static Pointer DatumGetPointer(Datum X)
static size_t qunique(void *array, size_t elements, size_t width, int(*compare)(const void *, const void *))
static int cmp(const chr *x, const chr *y, size_t len)
#define CALCSML(count, len1, len2)
#define WordSimilarityStrategyNumber
#define StrictWordSimilarityStrategyNumber
#define SimilarityStrategyNumber
#define ISWILDCARDCHAR(x)
#define ISPRINTABLETRGM(t)
static float4 iterate_word_similarity(int *trg2indexes, bool *found, int ulen1, int len2, int len, uint8 flags, TrgmBound *bounds)
Datum strict_word_similarity_commutator_op(PG_FUNCTION_ARGS)
Datum set_limit(PG_FUNCTION_ARGS)
static int comp_trgm(const void *a, const void *b)
double strict_word_similarity_threshold
TRGM * generate_trgm(char *str, int slen)
uint32 trgm2int(trgm *ptr)
#define WORD_SIMILARITY_CHECK_ONLY
static void protect_out_of_mem(int slen)
Datum word_similarity(PG_FUNCTION_ARGS)
Datum strict_word_similarity_dist_commutator_op(PG_FUNCTION_ARGS)
void compact_trigram(trgm *tptr, char *str, int bytelen)
bool * trgm_presence_map(TRGM *query, TRGM *key)
PG_FUNCTION_INFO_V1(set_limit)
static float4 calc_word_similarity(char *str1, int slen1, char *str2, int slen2, uint8 flags)
double word_similarity_threshold
Datum word_similarity_dist_commutator_op(PG_FUNCTION_ARGS)
double index_strategy_get_limit(StrategyNumber strategy)
Datum similarity(PG_FUNCTION_ARGS)
static pos_trgm * make_positional_trgm(trgm *trg1, int len1, trgm *trg2, int len2)
double similarity_threshold
static int comp_ptrgm(const void *v1, const void *v2)
static char * find_word(char *str, int lenstr, char **endword, int *charlen)
Datum show_trgm(PG_FUNCTION_ARGS)
bool trgm_contained_by(TRGM *trg1, TRGM *trg2)
Datum show_limit(PG_FUNCTION_ARGS)
Datum strict_word_similarity_dist_op(PG_FUNCTION_ARGS)
Datum word_similarity_op(PG_FUNCTION_ARGS)
Datum word_similarity_commutator_op(PG_FUNCTION_ARGS)
#define WORD_SIMILARITY_STRICT
TRGM * generate_wildcard_trgm(const char *str, int slen)
static trgm * make_trigrams(trgm *tptr, char *str, int bytelen, int charlen)
float4 cnt_sml(TRGM *trg1, TRGM *trg2, bool inexact)
static int generate_trgm_only(trgm *trg, char *str, int slen, TrgmBound *bounds)
Datum similarity_op(PG_FUNCTION_ARGS)
Datum word_similarity_dist_op(PG_FUNCTION_ARGS)
static const char * get_wildcard_part(const char *str, int lenstr, char *buf, int *bytelen, int *charlen)
Datum strict_word_similarity(PG_FUNCTION_ARGS)
Datum similarity_dist(PG_FUNCTION_ARGS)
Datum strict_word_similarity_op(PG_FUNCTION_ARGS)
#define SET_VARSIZE(PTR, len)
#define VARSIZE_ANY_EXHDR(PTR)