78{
79 int m,
80 n;
81 int *prev;
82 int *curr;
83 int *s_char_len = NULL;
86
87
88
89
90
91
92#ifdef LEVENSHTEIN_LESS_EQUAL
93 int start_column,
94 stop_column;
95
96#undef START_COLUMN
97#undef STOP_COLUMN
98#define START_COLUMN start_column
99#define STOP_COLUMN stop_column
100#else
101#undef START_COLUMN
102#undef STOP_COLUMN
103#define START_COLUMN 0
104#define STOP_COLUMN m
105#endif
106
107
110
111
112
113
114
115 if (!m)
116 return n * ins_c;
117 if (!n)
118 return m * del_c;
119
120
121
122
123
124
125
126
127 if (!trusted &&
131 (
errcode(ERRCODE_INVALID_PARAMETER_VALUE),
132 errmsg(
"levenshtein argument exceeds maximum length of %d characters",
134
135#ifdef LEVENSHTEIN_LESS_EQUAL
136
137 start_column = 0;
138 stop_column = m + 1;
139
140
141
142
143
144
145
146 if (max_d >= 0)
147 {
148 int min_theo_d;
149 int max_theo_d;
150 int net_inserts = n - m;
151
152 min_theo_d = net_inserts < 0 ?
153 -net_inserts * del_c : net_inserts * ins_c;
154 if (min_theo_d > max_d)
155 return max_d + 1;
156 if (ins_c + del_c < sub_c)
157 sub_c = ins_c + del_c;
158 max_theo_d = min_theo_d + sub_c *
Min(m, n);
159 if (max_d >= max_theo_d)
160 max_d = -1;
161 else if (ins_c + del_c > 0)
162 {
163
164
165
166
167
168
169
170
171
172
173
174
175 int slack_d = max_d - min_theo_d;
176 int best_column = net_inserts < 0 ? -net_inserts : 0;
177
178 stop_column = best_column + (slack_d / (ins_c + del_c)) + 1;
179 if (stop_column > m)
180 stop_column = m + 1;
181 }
182 }
183#endif
184
185
186
187
188
189
190
191
192
193 if (m != slen || n != tlen)
194 {
197
198 s_char_len = (
int *)
palloc((m + 1) *
sizeof(int));
199 for (
i = 0;
i < m; ++
i)
200 {
203 }
205 }
206
207
208 ++m;
209 ++n;
210
211
212 prev = (
int *)
palloc(2 * m *
sizeof(
int));
213 curr = prev + m;
214
215
216
217
218
221
222
223 for (
y = target,
j = 1;
j < n;
j++)
224 {
225 int *temp;
227 int y_char_len = n != tlen + 1 ?
pg_mblen(
y) : 1;
229
230#ifdef LEVENSHTEIN_LESS_EQUAL
231
232
233
234
235
236
237
238 if (stop_column < m)
239 {
240 prev[stop_column] = max_d + 1;
241 ++stop_column;
242 }
243
244
245
246
247
248
249
250 if (start_column == 0)
251 {
254 }
255 else
257#else
260#endif
261
262
263
264
265
266
267
268
269 if (s_char_len != NULL)
270 {
272 {
273 int ins;
274 int del;
275 int sub;
276 int x_char_len = s_char_len[
i - 1];
277
278
279
280
281
282
283
284
285
286
287 ins = prev[
i] + ins_c;
288 del = curr[
i - 1] + del_c;
289 if (
x[x_char_len - 1] ==
y[y_char_len - 1]
290 && x_char_len == y_char_len &&
293 else
294 sub = prev[
i - 1] + sub_c;
295
296
297 curr[
i] =
Min(ins, del);
298 curr[
i] =
Min(curr[
i], sub);
299
300
302 }
303 }
304 else
305 {
307 {
308 int ins;
309 int del;
310 int sub;
311
312
313 ins = prev[
i] + ins_c;
314 del = curr[
i - 1] + del_c;
315 sub = prev[
i - 1] + ((*
x == *
y) ? 0 : sub_c);
316
317
318 curr[
i] =
Min(ins, del);
319 curr[
i] =
Min(curr[
i], sub);
320
321
323 }
324 }
325
326
327 temp = curr;
328 curr = prev;
329 prev = temp;
330
331
333
334#ifdef LEVENSHTEIN_LESS_EQUAL
335
336
337
338
339
340
341
342
343 if (max_d >= 0)
344 {
345
346
347
348
349
350
351
352
353 int zp =
j - (n - m);
354
355
356 while (stop_column > 0)
357 {
358 int ii = stop_column - 1;
359 int net_inserts = ii - zp;
360
361 if (prev[ii] + (net_inserts > 0 ? net_inserts * ins_c :
362 -net_inserts * del_c) <= max_d)
363 break;
364 stop_column--;
365 }
366
367
368 while (start_column < stop_column)
369 {
370 int net_inserts = start_column - zp;
371
372 if (prev[start_column] +
373 (net_inserts > 0 ? net_inserts * ins_c :
374 -net_inserts * del_c) <= max_d)
375 break;
376
377
378
379
380
381
382 prev[start_column] = max_d + 1;
383 curr[start_column] = max_d + 1;
384 if (start_column != 0)
385 source += (s_char_len != NULL) ? s_char_len[start_column - 1] : 1;
386 start_column++;
387 }
388
389
390 if (start_column >= stop_column)
391 return max_d + 1;
392 }
393#endif
394 }
395
396
397
398
399
400 return prev[m - 1];
401}
int errcode(int sqlerrcode)
int errmsg(const char *fmt,...)
#define ereport(elevel,...)
#define MAX_LEVENSHTEIN_STRLEN
int pg_mbstrlen_with_len(const char *mbstr, int limit)
int pg_mblen(const char *mbstr)
static rewind_source * source
static bool rest_of_char_same(const char *s1, const char *s2, int len)