51 #define TRGM_BOUND_LEFT 0x01
52 #define TRGM_BOUND_RIGHT 0x02
55 #define WORD_SIMILARITY_CHECK_ONLY 0x01
57 #define WORD_SIMILARITY_STRICT 0x02
68 "Sets the threshold used by the % operator.",
69 "Valid range is 0.0 .. 1.0.",
80 "Sets the threshold used by the <% operator.",
81 "Valid range is 0.0 .. 1.0.",
92 "Sets the threshold used by the <<% operator.",
93 "Valid range is 0.0 .. 1.0.",
145 elog(
ERROR,
"unrecognized strategy number: %d", strategy);
173 find_word(
char *
str,
int lenstr,
char **endword,
int *charlen)
175 char *beginword =
str;
177 while (beginword -
str < lenstr && !
ISWORDCHR(beginword))
180 if (beginword -
str >= lenstr)
183 *endword = beginword;
232 if (bytelen > charlen)
239 while ((ptr -
str) + lenfirst + lenmiddle + lenlast <= bytelen)
246 lenfirst = lenmiddle;
248 lenlast =
pg_mblen(ptr + lenfirst + lenmiddle);
254 Assert(bytelen == charlen);
256 while (ptr -
str < bytelen - 2 )
302 while ((bword =
find_word(eword, slen - (eword -
str), &eword, &charlen)) != NULL)
306 bytelen = strlen(bword);
308 bytelen = eword - bword;
346 (
errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
347 errmsg(
"out of memory")));
407 for (
i = 0;
i < len1;
i++)
409 memcpy(&result[
i].trg, &trg1[
i],
sizeof(
trgm));
413 for (
i = 0;
i < len2;
i++)
415 memcpy(&result[
i + len1].trg, &trg2[
i],
sizeof(
trgm));
438 else if (p1->
index ==
p2->index)
493 lastpos = (
int *)
palloc(
sizeof(
int) *
len);
494 memset(lastpos, -1,
sizeof(
int) *
len);
496 for (
i = 0;
i < len2;
i++)
503 trgindex = trg2indexes[
i];
506 if (
lower >= 0 || found[trgindex])
508 if (lastpos[trgindex] < 0)
514 lastpos[trgindex] =
i;
537 smlr_cur =
CALCSML(count, ulen1, ulen2);
543 for (tmp_lower =
lower; tmp_lower <=
upper; tmp_lower++)
556 smlr_tmp =
CALCSML(tmp_count, ulen1, tmp_ulen2);
557 if (smlr_tmp > smlr_cur)
571 && smlr_cur >= threshold)
575 tmp_trgindex = trg2indexes[tmp_lower];
576 if (lastpos[tmp_trgindex] == tmp_lower)
579 if (found[tmp_trgindex])
584 smlr_max =
Max(smlr_max, smlr_cur);
593 for (tmp_lower = prev_lower; tmp_lower <
lower; tmp_lower++)
597 tmp_trgindex = trg2indexes[tmp_lower];
598 if (lastpos[tmp_trgindex] == tmp_lower)
599 lastpos[tmp_trgindex] = -1;
669 trg2indexes = (
int *)
palloc(
sizeof(
int) * len2);
690 trg2indexes[ptrg[
i].
index] =
j;
729 char *
buf,
int *bytelen,
int *charlen)
731 const char *beginword =
str;
734 bool in_leading_wildcard_meta =
false;
735 bool in_trailing_wildcard_meta =
false;
736 bool in_escape =
false;
745 while (beginword -
str < lenstr)
752 in_leading_wildcard_meta =
false;
759 in_leading_wildcard_meta =
true;
763 in_leading_wildcard_meta =
false;
771 if (beginword -
str >= lenstr)
779 if (!in_leading_wildcard_meta)
798 while (endword -
str < lenstr)
805 memcpy(s, endword, clen);
828 in_trailing_wildcard_meta =
true;
833 memcpy(s, endword, clen);
847 if (!in_trailing_wildcard_meta)
903 buf, &bytelen, &charlen)) != NULL)
907 bytelen = strlen(buf2);
946 val |= *(((
unsigned char *) ptr));
948 val |= *(((
unsigned char *) ptr) + 1);
950 val |= *(((
unsigned char *) ptr) + 2);
1013 if (len1 <= 0 || len2 <= 0)
1016 while (ptr1 -
GETARR(trg1) < len1 && ptr2 -
GETARR(trg2) < len2)
1037 return CALCSML(count, len1, inexact ? count : len2);
1059 while (ptr1 -
GETARR(trg1) < len1 && ptr2 -
GETARR(trg2) < len2)
1073 if (ptr1 -
GETARR(trg1) < len1)
1094 result = (
bool *)
palloc0(lenq *
sizeof(
bool));
1097 for (
i = 0;
i < lenq;
i++)
1104 int mid = (lo + hi) / 2;
ArrayType * construct_array_builtin(Datum *elems, int nelems, Oid elmtype)
static void PGresult const char * p2
static void PGresult * res
elog(ERROR, "%s: %s", p2, msg)
int errcode(int sqlerrcode)
int errmsg(const char *fmt,...)
#define ereport(elevel,...)
char * OidOutputFunctionCall(Oid functionId, Datum val)
#define PG_FREE_IF_COPY(ptr, n)
#define PG_GETARG_TEXT_PP(n)
#define DirectFunctionCall2(func, arg1, arg2)
#define PG_GETARG_DATUM(n)
#define PG_GETARG_FLOAT4(n)
#define PG_RETURN_POINTER(x)
#define PG_RETURN_FLOAT4(x)
#define PG_RETURN_BOOL(x)
void DefineCustomRealVariable(const char *name, const char *short_desc, const char *long_desc, double *valueAddr, double bootValue, double minValue, double maxValue, GucContext context, int flags, GucRealCheckHook check_hook, GucRealAssignHook assign_hook, GucShowHook show_hook)
void SetConfigOption(const char *name, const char *value, GucContext context, GucSource source)
void MarkGUCPrefixReserved(const char *className)
#define CALCGTSIZE(flag, siglen)
Assert(fmt[strlen(fmt) - 1] !='\n')
void getTypeOutputInfo(Oid type, Oid *typOutput, bool *typIsVarlena)
int pg_database_encoding_max_length(void)
int pg_mblen(const char *mbstr)
void pfree(void *pointer)
void * palloc0(Size size)
#define CHECK_FOR_INTERRUPTS()
Datum lower(PG_FUNCTION_ARGS)
Datum upper(PG_FUNCTION_ARGS)
#define INIT_LEGACY_CRC32(crc)
#define COMP_LEGACY_CRC32(crc, data, len)
#define FIN_LEGACY_CRC32(crc)
#define qsort(a, b, c, d)
static Datum PointerGetDatum(const void *X)
static Datum Float4GetDatum(float4 X)
static float4 DatumGetFloat4(Datum X)
static Pointer DatumGetPointer(Datum X)
static size_t qunique(void *array, size_t elements, size_t width, int(*compare)(const void *, const void *))
static int cmp(const chr *x, const chr *y, size_t len)
#define CALCSML(count, len1, len2)
#define WordSimilarityStrategyNumber
#define StrictWordSimilarityStrategyNumber
#define SimilarityStrategyNumber
#define ISWILDCARDCHAR(x)
#define ISPRINTABLETRGM(t)
static float4 iterate_word_similarity(int *trg2indexes, bool *found, int ulen1, int len2, int len, uint8 flags, TrgmBound *bounds)
bool * trgm_presence_map(TRGM *query, TRGM *key)
Datum strict_word_similarity_commutator_op(PG_FUNCTION_ARGS)
TRGM * generate_trgm(char *str, int slen)
Datum set_limit(PG_FUNCTION_ARGS)
static const char * get_wildcard_part(const char *str, int lenstr, char *buf, int *bytelen, int *charlen)
static int comp_trgm(const void *a, const void *b)
double strict_word_similarity_threshold
TRGM * generate_wildcard_trgm(const char *str, int slen)
uint32 trgm2int(trgm *ptr)
#define WORD_SIMILARITY_CHECK_ONLY
static void protect_out_of_mem(int slen)
Datum word_similarity(PG_FUNCTION_ARGS)
Datum strict_word_similarity_dist_commutator_op(PG_FUNCTION_ARGS)
void compact_trigram(trgm *tptr, char *str, int bytelen)
PG_FUNCTION_INFO_V1(set_limit)
static float4 calc_word_similarity(char *str1, int slen1, char *str2, int slen2, uint8 flags)
double word_similarity_threshold
Datum word_similarity_dist_commutator_op(PG_FUNCTION_ARGS)
double index_strategy_get_limit(StrategyNumber strategy)
static pos_trgm * make_positional_trgm(trgm *trg1, int len1, trgm *trg2, int len2)
Datum similarity(PG_FUNCTION_ARGS)
double similarity_threshold
static int comp_ptrgm(const void *v1, const void *v2)
Datum show_trgm(PG_FUNCTION_ARGS)
static trgm * make_trigrams(trgm *tptr, char *str, int bytelen, int charlen)
bool trgm_contained_by(TRGM *trg1, TRGM *trg2)
static char * find_word(char *str, int lenstr, char **endword, int *charlen)
Datum show_limit(PG_FUNCTION_ARGS)
Datum strict_word_similarity_dist_op(PG_FUNCTION_ARGS)
Datum word_similarity_op(PG_FUNCTION_ARGS)
Datum word_similarity_commutator_op(PG_FUNCTION_ARGS)
#define WORD_SIMILARITY_STRICT
float4 cnt_sml(TRGM *trg1, TRGM *trg2, bool inexact)
static int generate_trgm_only(trgm *trg, char *str, int slen, TrgmBound *bounds)
Datum similarity_op(PG_FUNCTION_ARGS)
Datum word_similarity_dist_op(PG_FUNCTION_ARGS)
Datum strict_word_similarity(PG_FUNCTION_ARGS)
Datum similarity_dist(PG_FUNCTION_ARGS)
Datum strict_word_similarity_op(PG_FUNCTION_ARGS)
char * lowerstr_with_len(const char *str, int len)
#define SET_VARSIZE(PTR, len)
#define VARSIZE_ANY_EXHDR(PTR)