8#include "catalog/pg_collation_d.h"
60#define TRGM_BOUND_LEFT 0x01
61#define TRGM_BOUND_RIGHT 0x02
64#define WORD_SIMILARITY_CHECK_ONLY 0x01
66#define WORD_SIMILARITY_STRICT 0x02
77 "Sets the threshold used by the % operator.",
78 "Valid range is 0.0 .. 1.0.",
89 "Sets the threshold used by the <% operator.",
90 "Valid range is 0.0 .. 1.0.",
101 "Sets the threshold used by the <<% operator.",
102 "Valid range is 0.0 .. 1.0.",
116#define CMPCHAR(a,b) ( ((a)==(b)) ? 0 : ( ((a)<(b)) ? -1 : 1 ) )
125#define CMPPCHAR_S(a,b,i) CMPCHAR( *(((const signed char*)(a))+i), *(((const signed char*)(b))+i) )
135#define CMPPCHAR_UNS(a,b,i) CMPCHAR( *(((const unsigned char*)(a))+i), *(((const unsigned char*)(b))+i) )
195 elog(
ERROR,
"unrecognized strategy number: %d", strategy);
223find_word(
char *
str,
int lenstr,
char **endword,
int *charlen)
225 char *beginword =
str;
227 while (beginword -
str < lenstr && !
ISWORDCHR(beginword))
230 if (beginword -
str >= lenstr)
233 *endword = beginword;
282 if (bytelen > charlen)
289 while ((ptr -
str) + lenfirst + lenmiddle + lenlast <= bytelen)
296 lenfirst = lenmiddle;
298 lenlast =
pg_mblen(ptr + lenfirst + lenmiddle);
304 Assert(bytelen == charlen);
306 while (ptr -
str < bytelen - 2 )
352 while ((bword =
find_word(eword, slen - (eword -
str), &eword, &charlen)) != NULL)
355 bword =
str_tolower(bword, eword - bword, DEFAULT_COLLATION_OID);
356 bytelen = strlen(bword);
358 bytelen = eword - bword;
396 (
errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
397 errmsg(
"out of memory")));
457 for (
i = 0;
i < len1;
i++)
459 memcpy(&result[
i].trg, &trg1[
i],
sizeof(
trgm));
463 for (
i = 0;
i < len2;
i++)
465 memcpy(&result[
i + len1].trg, &trg2[
i],
sizeof(
trgm));
538 lastpos = (
int *)
palloc(
sizeof(
int) *
len);
539 memset(lastpos, -1,
sizeof(
int) *
len);
541 for (
i = 0;
i < len2;
i++)
548 trgindex = trg2indexes[
i];
551 if (
lower >= 0 || found[trgindex])
553 if (lastpos[trgindex] < 0)
559 lastpos[trgindex] =
i;
582 smlr_cur =
CALCSML(count, ulen1, ulen2);
588 for (tmp_lower =
lower; tmp_lower <=
upper; tmp_lower++)
601 smlr_tmp =
CALCSML(tmp_count, ulen1, tmp_ulen2);
602 if (smlr_tmp > smlr_cur)
616 && smlr_cur >= threshold)
620 tmp_trgindex = trg2indexes[tmp_lower];
621 if (lastpos[tmp_trgindex] == tmp_lower)
624 if (found[tmp_trgindex])
629 smlr_max =
Max(smlr_max, smlr_cur);
638 for (tmp_lower = prev_lower; tmp_lower <
lower; tmp_lower++)
642 tmp_trgindex = trg2indexes[tmp_lower];
643 if (lastpos[tmp_trgindex] == tmp_lower)
644 lastpos[tmp_trgindex] = -1;
714 trg2indexes = (
int *)
palloc(
sizeof(
int) * len2);
735 trg2indexes[ptrg[
i].
index] =
j;
774 char *
buf,
int *bytelen,
int *charlen)
776 const char *beginword =
str;
779 bool in_leading_wildcard_meta =
false;
780 bool in_trailing_wildcard_meta =
false;
781 bool in_escape =
false;
790 while (beginword -
str < lenstr)
797 in_leading_wildcard_meta =
false;
804 in_leading_wildcard_meta =
true;
808 in_leading_wildcard_meta =
false;
816 if (beginword -
str >= lenstr)
824 if (!in_leading_wildcard_meta)
843 while (endword -
str < lenstr)
850 memcpy(s, endword, clen);
873 in_trailing_wildcard_meta =
true;
878 memcpy(s, endword, clen);
892 if (!in_trailing_wildcard_meta)
948 buf, &bytelen, &charlen)) != NULL)
952 bytelen = strlen(buf2);
991 val |= *(((
unsigned char *) ptr));
993 val |= *(((
unsigned char *) ptr) + 1);
995 val |= *(((
unsigned char *) ptr) + 2);
1058 if (len1 <= 0 || len2 <= 0)
1061 while (ptr1 -
GETARR(trg1) < len1 && ptr2 -
GETARR(trg2) < len2)
1063 int res =
CMPTRGM(ptr1, ptr2);
1082 return CALCSML(count, len1, inexact ? count : len2);
1104 while (ptr1 -
GETARR(trg1) < len1 && ptr2 -
GETARR(trg2) < len2)
1106 int res =
CMPTRGM(ptr1, ptr2);
1118 if (ptr1 -
GETARR(trg1) < len1)
1139 result = (
bool *)
palloc0(lenq *
sizeof(
bool));
1142 for (
i = 0;
i < lenq;
i++)
1149 int mid = (lo + hi) / 2;
1150 int res =
CMPTRGM(ptrq, ptrk + mid);
1180 res =
cnt_sml(trg1, trg2,
false);
ArrayType * construct_array_builtin(Datum *elems, int nelems, Oid elmtype)
int errcode(int sqlerrcode)
int errmsg(const char *fmt,...)
#define ereport(elevel,...)
char * OidOutputFunctionCall(Oid functionId, Datum val)
#define PG_FREE_IF_COPY(ptr, n)
#define PG_GETARG_TEXT_PP(n)
#define DirectFunctionCall2(func, arg1, arg2)
#define PG_GETARG_DATUM(n)
#define PG_GETARG_FLOAT4(n)
#define PG_RETURN_POINTER(x)
#define PG_RETURN_FLOAT4(x)
#define PG_RETURN_BOOL(x)
void DefineCustomRealVariable(const char *name, const char *short_desc, const char *long_desc, double *valueAddr, double bootValue, double minValue, double maxValue, GucContext context, int flags, GucRealCheckHook check_hook, GucRealAssignHook assign_hook, GucShowHook show_hook)
void SetConfigOption(const char *name, const char *value, GucContext context, GucSource source)
void MarkGUCPrefixReserved(const char *className)
Assert(PointerIsAligned(start, uint64))
#define CALCGTSIZE(flag, siglen)
static int pg_cmp_s32(int32 a, int32 b)
void getTypeOutputInfo(Oid type, Oid *typOutput, bool *typIsVarlena)
int pg_database_encoding_max_length(void)
int pg_mblen(const char *mbstr)
void pfree(void *pointer)
void * palloc0(Size size)
#define CHECK_FOR_INTERRUPTS()
Datum lower(PG_FUNCTION_ARGS)
Datum upper(PG_FUNCTION_ARGS)
#define INIT_LEGACY_CRC32(crc)
#define COMP_LEGACY_CRC32(crc, data, len)
#define FIN_LEGACY_CRC32(crc)
#define qsort(a, b, c, d)
static Datum PointerGetDatum(const void *X)
static Datum Float4GetDatum(float4 X)
static float4 DatumGetFloat4(Datum X)
static Pointer DatumGetPointer(Datum X)
static size_t qunique(void *array, size_t elements, size_t width, int(*compare)(const void *, const void *))
static int cmp(const chr *x, const chr *y, size_t len)
#define CALCSML(count, len1, len2)
#define WordSimilarityStrategyNumber
#define StrictWordSimilarityStrategyNumber
#define SimilarityStrategyNumber
#define ISWILDCARDCHAR(x)
#define ISPRINTABLETRGM(t)
static float4 iterate_word_similarity(int *trg2indexes, bool *found, int ulen1, int len2, int len, uint8 flags, TrgmBound *bounds)
Datum strict_word_similarity_commutator_op(PG_FUNCTION_ARGS)
static int CMPTRGM_UNSIGNED(const void *a, const void *b)
Datum set_limit(PG_FUNCTION_ARGS)
static int comp_trgm(const void *a, const void *b)
double strict_word_similarity_threshold
TRGM * generate_trgm(char *str, int slen)
uint32 trgm2int(trgm *ptr)
PG_MODULE_MAGIC_EXT(.name="pg_trgm",.version=PG_VERSION)
static int CMPTRGM_CHOOSE(const void *a, const void *b)
int(* CMPTRGM)(const void *a, const void *b)
#define WORD_SIMILARITY_CHECK_ONLY
static void protect_out_of_mem(int slen)
Datum word_similarity(PG_FUNCTION_ARGS)
Datum strict_word_similarity_dist_commutator_op(PG_FUNCTION_ARGS)
void compact_trigram(trgm *tptr, char *str, int bytelen)
bool * trgm_presence_map(TRGM *query, TRGM *key)
PG_FUNCTION_INFO_V1(set_limit)
static float4 calc_word_similarity(char *str1, int slen1, char *str2, int slen2, uint8 flags)
double word_similarity_threshold
Datum word_similarity_dist_commutator_op(PG_FUNCTION_ARGS)
double index_strategy_get_limit(StrategyNumber strategy)
Datum similarity(PG_FUNCTION_ARGS)
static pos_trgm * make_positional_trgm(trgm *trg1, int len1, trgm *trg2, int len2)
double similarity_threshold
static int CMPTRGM_SIGNED(const void *a, const void *b)
static int comp_ptrgm(const void *v1, const void *v2)
static char * find_word(char *str, int lenstr, char **endword, int *charlen)
Datum show_trgm(PG_FUNCTION_ARGS)
bool trgm_contained_by(TRGM *trg1, TRGM *trg2)
#define CMPPCHAR_S(a, b, i)
Datum show_limit(PG_FUNCTION_ARGS)
#define CMPPCHAR_UNS(a, b, i)
Datum strict_word_similarity_dist_op(PG_FUNCTION_ARGS)
Datum word_similarity_op(PG_FUNCTION_ARGS)
Datum word_similarity_commutator_op(PG_FUNCTION_ARGS)
#define WORD_SIMILARITY_STRICT
TRGM * generate_wildcard_trgm(const char *str, int slen)
static trgm * make_trigrams(trgm *tptr, char *str, int bytelen, int charlen)
float4 cnt_sml(TRGM *trg1, TRGM *trg2, bool inexact)
static int generate_trgm_only(trgm *trg, char *str, int slen, TrgmBound *bounds)
Datum similarity_op(PG_FUNCTION_ARGS)
Datum word_similarity_dist_op(PG_FUNCTION_ARGS)
static const char * get_wildcard_part(const char *str, int lenstr, char *buf, int *bytelen, int *charlen)
Datum strict_word_similarity(PG_FUNCTION_ARGS)
Datum similarity_dist(PG_FUNCTION_ARGS)
Datum strict_word_similarity_op(PG_FUNCTION_ARGS)
#define SET_VARSIZE(PTR, len)
#define VARSIZE_ANY_EXHDR(PTR)
bool GetDefaultCharSignedness(void)