26 #define MAX_LEVENSHTEIN_STRLEN 255
67 #ifdef LEVENSHTEIN_LESS_EQUAL
69 const char *target,
int tlen,
70 int ins_c,
int del_c,
int sub_c,
71 int max_d,
bool trusted)
74 const char *target,
int tlen,
75 int ins_c,
int del_c,
int sub_c,
83 int *s_char_len = NULL;
92 #ifdef LEVENSHTEIN_LESS_EQUAL
98 #define START_COLUMN start_column
99 #define STOP_COLUMN stop_column
103 #define START_COLUMN 0
104 #define STOP_COLUMN m
131 (
errcode(ERRCODE_INVALID_PARAMETER_VALUE),
132 errmsg(
"levenshtein argument exceeds maximum length of %d characters",
135 #ifdef LEVENSHTEIN_LESS_EQUAL
150 int net_inserts = n - m;
152 min_theo_d = net_inserts < 0 ?
153 -net_inserts * del_c : net_inserts * ins_c;
154 if (min_theo_d > max_d)
156 if (ins_c + del_c < sub_c)
157 sub_c = ins_c + del_c;
158 max_theo_d = min_theo_d + sub_c *
Min(m, n);
159 if (max_d >= max_theo_d)
161 else if (ins_c + del_c > 0)
175 int slack_d = max_d - min_theo_d;
176 int best_column = net_inserts < 0 ? -net_inserts : 0;
178 stop_column = best_column + (slack_d / (ins_c + del_c)) + 1;
193 if (m != slen || n != tlen)
198 s_char_len = (
int *)
palloc((m + 1) *
sizeof(int));
199 for (
i = 0;
i < m; ++
i)
212 prev = (
int *)
palloc(2 * m *
sizeof(
int));
223 for (
y = target,
j = 1;
j < n;
j++)
227 int y_char_len = n != tlen + 1 ?
pg_mblen(
y) : 1;
230 #ifdef LEVENSHTEIN_LESS_EQUAL
240 prev[stop_column] = max_d + 1;
250 if (start_column == 0)
269 if (s_char_len != NULL)
276 int x_char_len = s_char_len[
i - 1];
287 ins = prev[
i] + ins_c;
288 del = curr[
i - 1] + del_c;
289 if (
x[x_char_len - 1] ==
y[y_char_len - 1]
290 && x_char_len == y_char_len &&
294 sub = prev[
i - 1] + sub_c;
297 curr[
i] =
Min(ins, del);
298 curr[
i] =
Min(curr[
i], sub);
313 ins = prev[
i] + ins_c;
314 del = curr[
i - 1] + del_c;
315 sub = prev[
i - 1] + ((*
x == *
y) ? 0 : sub_c);
318 curr[
i] =
Min(ins, del);
319 curr[
i] =
Min(curr[
i], sub);
334 #ifdef LEVENSHTEIN_LESS_EQUAL
353 int zp =
j - (n - m);
356 while (stop_column > 0)
358 int ii = stop_column - 1;
359 int net_inserts = ii - zp;
361 if (prev[ii] + (net_inserts > 0 ? net_inserts * ins_c :
362 -net_inserts * del_c) <= max_d)
368 while (start_column < stop_column)
370 int net_inserts = start_column - zp;
372 if (prev[start_column] +
373 (net_inserts > 0 ? net_inserts * ins_c :
374 -net_inserts * del_c) <= max_d)
382 prev[start_column] = max_d + 1;
383 curr[start_column] = max_d + 1;
384 if (start_column != 0)
385 source += (s_char_len != NULL) ? s_char_len[start_column - 1] : 1;
390 if (start_column >= stop_column)
int errcode(int sqlerrcode)
int errmsg(const char *fmt,...)
#define ereport(elevel,...)
#define MAX_LEVENSHTEIN_STRLEN
int varstr_levenshtein(const char *source, int slen, const char *target, int tlen, int ins_c, int del_c, int sub_c, bool trusted)
int pg_mbstrlen_with_len(const char *mbstr, int limit)
int pg_mblen(const char *mbstr)
static rewind_source * source
static bool rest_of_char_same(const char *s1, const char *s2, int len)
int varstr_levenshtein_less_equal(const char *source, int slen, const char *target, int tlen, int ins_c, int del_c, int sub_c, int max_d, bool trusted)