PostgreSQL Source Code  git master
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros
fuzzystrmatch.c File Reference
#include "postgres.h"
#include <ctype.h>
#include "mb/pg_wchar.h"
#include "utils/builtins.h"
#include "utils/varlena.h"
Include dependency graph for fuzzystrmatch.c:

Go to the source code of this file.

Macros

#define SOUNDEX_LEN   4
 
#define MAX_METAPHONE_STRLEN   255
 
#define META_ERROR   FALSE
 
#define META_SUCCESS   TRUE
 
#define META_FAILURE   FALSE
 
#define SH   'X'
 
#define TH   '0'
 
#define isvowel(c)   (getcode(c) & 1) /* AEIOU */
 
#define NOCHANGE(c)   (getcode(c) & 2) /* FJMNR */
 
#define AFFECTH(c)   (getcode(c) & 4) /* CGPST */
 
#define MAKESOFT(c)   (getcode(c) & 8) /* EIY */
 
#define NOGHTOF(c)   (getcode(c) & 16) /* BDH */
 
#define Next_Letter   (toupper((unsigned char) word[w_idx+1]))
 
#define Curr_Letter   (toupper((unsigned char) word[w_idx]))
 
#define Look_Back_Letter(n)   (w_idx >= (n) ? toupper((unsigned char) word[w_idx-(n)]) : '\0')
 
#define Prev_Letter   (Look_Back_Letter(1))
 
#define After_Next_Letter   (Next_Letter != '\0' ? toupper((unsigned char) word[w_idx+2]) : '\0')
 
#define Look_Ahead_Letter(n)   toupper((unsigned char) Lookahead(word+w_idx, n))
 
#define Phonize(c)   do {(*phoned_word)[p_idx++] = c;} while (0)
 
#define End_Phoned_Word   do {(*phoned_word)[p_idx] = '\0';} while (0)
 
#define Phone_Len   (p_idx)
 
#define Isbreak(c)   (!isalpha((unsigned char) (c)))
 

Functions

static void _soundex (const char *instr, char *outstr)
 
static char soundex_code (char letter)
 
static char Lookahead (char *word, int how_far)
 
static int _metaphone (char *word, int max_phonemes, char **phoned_word)
 
static int getcode (char c)
 
 PG_FUNCTION_INFO_V1 (levenshtein_with_costs)
 
Datum levenshtein_with_costs (PG_FUNCTION_ARGS)
 
 PG_FUNCTION_INFO_V1 (levenshtein)
 
Datum levenshtein (PG_FUNCTION_ARGS)
 
 PG_FUNCTION_INFO_V1 (levenshtein_less_equal_with_costs)
 
Datum levenshtein_less_equal_with_costs (PG_FUNCTION_ARGS)
 
 PG_FUNCTION_INFO_V1 (levenshtein_less_equal)
 
Datum levenshtein_less_equal (PG_FUNCTION_ARGS)
 
 PG_FUNCTION_INFO_V1 (metaphone)
 
Datum metaphone (PG_FUNCTION_ARGS)
 
 PG_FUNCTION_INFO_V1 (soundex)
 
Datum soundex (PG_FUNCTION_ARGS)
 
 PG_FUNCTION_INFO_V1 (difference)
 
Datum difference (PG_FUNCTION_ARGS)
 

Variables

 PG_MODULE_MAGIC
 
static const char * soundex_table = "01230120022455012623010202"
 
static const char _codes [26]
 

Macro Definition Documentation

#define AFFECTH (   c)    (getcode(c) & 4) /* CGPST */

Definition at line 150 of file fuzzystrmatch.c.

Referenced by _metaphone().

#define After_Next_Letter   (Next_Letter != '\0' ? toupper((unsigned char) word[w_idx+2]) : '\0')

Definition at line 332 of file fuzzystrmatch.c.

Referenced by _metaphone().

#define Curr_Letter   (toupper((unsigned char) word[w_idx]))

Definition at line 325 of file fuzzystrmatch.c.

Referenced by _metaphone().

#define End_Phoned_Word   do {(*phoned_word)[p_idx] = '\0';} while (0)

Definition at line 357 of file fuzzystrmatch.c.

Referenced by _metaphone().

#define Isbreak (   c)    (!isalpha((unsigned char) (c)))

Definition at line 362 of file fuzzystrmatch.c.

Referenced by _metaphone().

#define isvowel (   c)    (getcode(c) & 1) /* AEIOU */

Definition at line 144 of file fuzzystrmatch.c.

Referenced by _metaphone().

#define Look_Ahead_Letter (   n)    toupper((unsigned char) Lookahead(word+w_idx, n))

Definition at line 334 of file fuzzystrmatch.c.

Referenced by _metaphone().

#define Look_Back_Letter (   n)    (w_idx >= (n) ? toupper((unsigned char) word[w_idx-(n)]) : '\0')

Definition at line 327 of file fuzzystrmatch.c.

Referenced by _metaphone().

#define MAKESOFT (   c)    (getcode(c) & 8) /* EIY */

Definition at line 153 of file fuzzystrmatch.c.

Referenced by _metaphone().

#define MAX_METAPHONE_STRLEN   255

Definition at line 72 of file fuzzystrmatch.c.

Referenced by metaphone().

#define META_ERROR   FALSE

Definition at line 104 of file fuzzystrmatch.c.

#define META_FAILURE   FALSE

Definition at line 106 of file fuzzystrmatch.c.

#define META_SUCCESS   TRUE

Definition at line 105 of file fuzzystrmatch.c.

Referenced by _metaphone(), and metaphone().

#define Next_Letter   (toupper((unsigned char) word[w_idx+1]))

Definition at line 323 of file fuzzystrmatch.c.

Referenced by _metaphone().

#define NOCHANGE (   c)    (getcode(c) & 2) /* FJMNR */

Definition at line 147 of file fuzzystrmatch.c.

#define NOGHTOF (   c)    (getcode(c) & 16) /* BDH */

Definition at line 156 of file fuzzystrmatch.c.

Referenced by _metaphone().

#define Phone_Len   (p_idx)

Definition at line 359 of file fuzzystrmatch.c.

Referenced by _metaphone().

#define Phonize (   c)    do {(*phoned_word)[p_idx++] = c;} while (0)

Definition at line 355 of file fuzzystrmatch.c.

Referenced by _metaphone().

#define Prev_Letter   (Look_Back_Letter(1))

Definition at line 330 of file fuzzystrmatch.c.

Referenced by _metaphone().

#define SH   'X'

Definition at line 115 of file fuzzystrmatch.c.

Referenced by _metaphone().

#define SOUNDEX_LEN   4

Definition at line 54 of file fuzzystrmatch.c.

Referenced by _soundex(), difference(), and soundex().

#define TH   '0'

Definition at line 116 of file fuzzystrmatch.c.

Referenced by _metaphone().

Function Documentation

static int _metaphone ( char *  word,
int  max_phonemes,
char **  phoned_word 
)
static

Definition at line 366 of file fuzzystrmatch.c.

References AFFECTH, After_Next_Letter, Curr_Letter, elog, End_Phoned_Word, ERROR, Isbreak, isvowel, Look_Ahead_Letter, Look_Back_Letter, MAKESOFT, META_SUCCESS, Next_Letter, NOGHTOF, NULL, palloc(), Phone_Len, Phonize, Prev_Letter, SH, and TH.

Referenced by metaphone().

369 {
370  int w_idx = 0; /* point in the phonization we're at. */
371  int p_idx = 0; /* end of the phoned phrase */
372 
373  /*-- Parameter checks --*/
374 
375  /*
376  * Shouldn't be necessary, but left these here anyway jec Aug 3, 2001
377  */
378 
379  /* Negative phoneme length is meaningless */
380  if (!(max_phonemes > 0))
381  /* internal error */
382  elog(ERROR, "metaphone: Requested output length must be > 0");
383 
384  /* Empty/null string is meaningless */
385  if ((word == NULL) || !(strlen(word) > 0))
386  /* internal error */
387  elog(ERROR, "metaphone: Input string length must be > 0");
388 
389  /*-- Allocate memory for our phoned_phrase --*/
390  if (max_phonemes == 0)
391  { /* Assume largest possible */
392  *phoned_word = palloc(sizeof(char) * strlen(word) +1);
393  }
394  else
395  {
396  *phoned_word = palloc(sizeof(char) * max_phonemes + 1);
397  }
398 
399  /*-- The first phoneme has to be processed specially. --*/
400  /* Find our first letter */
401  for (; !isalpha((unsigned char) (Curr_Letter)); w_idx++)
402  {
403  /* On the off chance we were given nothing but crap... */
404  if (Curr_Letter == '\0')
405  {
407  return META_SUCCESS; /* For testing */
408  }
409  }
410 
411  switch (Curr_Letter)
412  {
413  /* AE becomes E */
414  case 'A':
415  if (Next_Letter == 'E')
416  {
417  Phonize('E');
418  w_idx += 2;
419  }
420  /* Remember, preserve vowels at the beginning */
421  else
422  {
423  Phonize('A');
424  w_idx++;
425  }
426  break;
427  /* [GKP]N becomes N */
428  case 'G':
429  case 'K':
430  case 'P':
431  if (Next_Letter == 'N')
432  {
433  Phonize('N');
434  w_idx += 2;
435  }
436  break;
437 
438  /*
439  * WH becomes H, WR becomes R W if followed by a vowel
440  */
441  case 'W':
442  if (Next_Letter == 'H' ||
443  Next_Letter == 'R')
444  {
446  w_idx += 2;
447  }
448  else if (isvowel(Next_Letter))
449  {
450  Phonize('W');
451  w_idx += 2;
452  }
453  /* else ignore */
454  break;
455  /* X becomes S */
456  case 'X':
457  Phonize('S');
458  w_idx++;
459  break;
460  /* Vowels are kept */
461 
462  /*
463  * We did A already case 'A': case 'a':
464  */
465  case 'E':
466  case 'I':
467  case 'O':
468  case 'U':
470  w_idx++;
471  break;
472  default:
473  /* do nothing */
474  break;
475  }
476 
477 
478 
479  /* On to the metaphoning */
480  for (; Curr_Letter != '\0' &&
481  (max_phonemes == 0 || Phone_Len < max_phonemes);
482  w_idx++)
483  {
484  /*
485  * How many letters to skip because an earlier encoding handled
486  * multiple letters
487  */
488  unsigned short int skip_letter = 0;
489 
490 
491  /*
492  * THOUGHT: It would be nice if, rather than having things like...
493  * well, SCI. For SCI you encode the S, then have to remember to skip
494  * the C. So the phonome SCI invades both S and C. It would be
495  * better, IMHO, to skip the C from the S part of the encoding. Hell,
496  * I'm trying it.
497  */
498 
499  /* Ignore non-alphas */
500  if (!isalpha((unsigned char) (Curr_Letter)))
501  continue;
502 
503  /* Drop duplicates, except CC */
504  if (Curr_Letter == Prev_Letter &&
505  Curr_Letter != 'C')
506  continue;
507 
508  switch (Curr_Letter)
509  {
510  /* B -> B unless in MB */
511  case 'B':
512  if (Prev_Letter != 'M')
513  Phonize('B');
514  break;
515 
516  /*
517  * 'sh' if -CIA- or -CH, but not SCH, except SCHW. (SCHW is
518  * handled in S) S if -CI-, -CE- or -CY- dropped if -SCI-,
519  * SCE-, -SCY- (handed in S) else K
520  */
521  case 'C':
522  if (MAKESOFT(Next_Letter))
523  { /* C[IEY] */
524  if (After_Next_Letter == 'A' &&
525  Next_Letter == 'I')
526  { /* CIA */
527  Phonize(SH);
528  }
529  /* SC[IEY] */
530  else if (Prev_Letter == 'S')
531  {
532  /* Dropped */
533  }
534  else
535  Phonize('S');
536  }
537  else if (Next_Letter == 'H')
538  {
539 #ifndef USE_TRADITIONAL_METAPHONE
540  if (After_Next_Letter == 'R' ||
541  Prev_Letter == 'S')
542  { /* Christ, School */
543  Phonize('K');
544  }
545  else
546  Phonize(SH);
547 #else
548  Phonize(SH);
549 #endif
550  skip_letter++;
551  }
552  else
553  Phonize('K');
554  break;
555 
556  /*
557  * J if in -DGE-, -DGI- or -DGY- else T
558  */
559  case 'D':
560  if (Next_Letter == 'G' &&
562  {
563  Phonize('J');
564  skip_letter++;
565  }
566  else
567  Phonize('T');
568  break;
569 
570  /*
571  * F if in -GH and not B--GH, D--GH, -H--GH, -H---GH else
572  * dropped if -GNED, -GN, else dropped if -DGE-, -DGI- or
573  * -DGY- (handled in D) else J if in -GE-, -GI, -GY and not GG
574  * else K
575  */
576  case 'G':
577  if (Next_Letter == 'H')
578  {
579  if (!(NOGHTOF(Look_Back_Letter(3)) ||
580  Look_Back_Letter(4) == 'H'))
581  {
582  Phonize('F');
583  skip_letter++;
584  }
585  else
586  {
587  /* silent */
588  }
589  }
590  else if (Next_Letter == 'N')
591  {
592  if (Isbreak(After_Next_Letter) ||
593  (After_Next_Letter == 'E' &&
594  Look_Ahead_Letter(3) == 'D'))
595  {
596  /* dropped */
597  }
598  else
599  Phonize('K');
600  }
601  else if (MAKESOFT(Next_Letter) &&
602  Prev_Letter != 'G')
603  Phonize('J');
604  else
605  Phonize('K');
606  break;
607  /* H if before a vowel and not after C,G,P,S,T */
608  case 'H':
609  if (isvowel(Next_Letter) &&
611  Phonize('H');
612  break;
613 
614  /*
615  * dropped if after C else K
616  */
617  case 'K':
618  if (Prev_Letter != 'C')
619  Phonize('K');
620  break;
621 
622  /*
623  * F if before H else P
624  */
625  case 'P':
626  if (Next_Letter == 'H')
627  Phonize('F');
628  else
629  Phonize('P');
630  break;
631 
632  /*
633  * K
634  */
635  case 'Q':
636  Phonize('K');
637  break;
638 
639  /*
640  * 'sh' in -SH-, -SIO- or -SIA- or -SCHW- else S
641  */
642  case 'S':
643  if (Next_Letter == 'I' &&
644  (After_Next_Letter == 'O' ||
645  After_Next_Letter == 'A'))
646  Phonize(SH);
647  else if (Next_Letter == 'H')
648  {
649  Phonize(SH);
650  skip_letter++;
651  }
652 #ifndef USE_TRADITIONAL_METAPHONE
653  else if (Next_Letter == 'C' &&
654  Look_Ahead_Letter(2) == 'H' &&
655  Look_Ahead_Letter(3) == 'W')
656  {
657  Phonize(SH);
658  skip_letter += 2;
659  }
660 #endif
661  else
662  Phonize('S');
663  break;
664 
665  /*
666  * 'sh' in -TIA- or -TIO- else 'th' before H else T
667  */
668  case 'T':
669  if (Next_Letter == 'I' &&
670  (After_Next_Letter == 'O' ||
671  After_Next_Letter == 'A'))
672  Phonize(SH);
673  else if (Next_Letter == 'H')
674  {
675  Phonize(TH);
676  skip_letter++;
677  }
678  else
679  Phonize('T');
680  break;
681  /* F */
682  case 'V':
683  Phonize('F');
684  break;
685  /* W before a vowel, else dropped */
686  case 'W':
687  if (isvowel(Next_Letter))
688  Phonize('W');
689  break;
690  /* KS */
691  case 'X':
692  Phonize('K');
693  if (max_phonemes == 0 || Phone_Len < max_phonemes)
694  Phonize('S');
695  break;
696  /* Y if followed by a vowel */
697  case 'Y':
698  if (isvowel(Next_Letter))
699  Phonize('Y');
700  break;
701  /* S */
702  case 'Z':
703  Phonize('S');
704  break;
705  /* No transformation */
706  case 'F':
707  case 'J':
708  case 'L':
709  case 'M':
710  case 'N':
711  case 'R':
713  break;
714  default:
715  /* nothing */
716  break;
717  } /* END SWITCH */
718 
719  w_idx += skip_letter;
720  } /* END FOR */
721 
723 
724  return (META_SUCCESS);
725 } /* END metaphone */
#define TH
#define META_SUCCESS
#define isvowel(c)
#define MAKESOFT(c)
#define ERROR
Definition: elog.h:43
#define Prev_Letter
#define End_Phoned_Word
#define Look_Back_Letter(n)
#define Isbreak(c)
#define Curr_Letter
#define Phonize(c)
#define After_Next_Letter
#define Phone_Len
#define NOGHTOF(c)
#define NULL
Definition: c.h:229
#define Next_Letter
#define AFFECTH(c)
static void word(struct vars *, int, struct state *, struct state *)
Definition: regcomp.c:1243
void * palloc(Size size)
Definition: mcxt.c:849
#define elog
Definition: elog.h:219
#define SH
#define Look_Ahead_Letter(n)
static void _soundex ( const char *  instr,
char *  outstr 
)
static

Definition at line 747 of file fuzzystrmatch.c.

References AssertArg, soundex_code(), and SOUNDEX_LEN.

Referenced by difference(), and soundex().

748 {
749  int count;
750 
751  AssertArg(instr);
752  AssertArg(outstr);
753 
754  outstr[SOUNDEX_LEN] = '\0';
755 
756  /* Skip leading non-alphabetic characters */
757  while (!isalpha((unsigned char) instr[0]) && instr[0])
758  ++instr;
759 
760  /* No string left */
761  if (!instr[0])
762  {
763  outstr[0] = (char) 0;
764  return;
765  }
766 
767  /* Take the first letter as is */
768  *outstr++ = (char) toupper((unsigned char) *instr++);
769 
770  count = 1;
771  while (*instr && count < SOUNDEX_LEN)
772  {
773  if (isalpha((unsigned char) *instr) &&
774  soundex_code(*instr) != soundex_code(*(instr - 1)))
775  {
776  *outstr = soundex_code(instr[0]);
777  if (*outstr != '0')
778  {
779  ++outstr;
780  ++count;
781  }
782  }
783  ++instr;
784  }
785 
786  /* Fill with 0's */
787  while (count < SOUNDEX_LEN)
788  {
789  *outstr = '0';
790  ++outstr;
791  ++count;
792  }
793 }
static char soundex_code(char letter)
Definition: fuzzystrmatch.c:60
#define AssertArg(condition)
Definition: c.h:677
#define SOUNDEX_LEN
Definition: fuzzystrmatch.c:54
Datum difference ( PG_FUNCTION_ARGS  )

Definition at line 798 of file fuzzystrmatch.c.

References _soundex(), i, PG_GETARG_TEXT_PP, PG_RETURN_INT32, result, SOUNDEX_LEN, and text_to_cstring().

Referenced by checkcondition_str(), ExtendMultiXactMember(), find_wordentry(), findJsonbValueFromContainer(), findObjectByCatalogId(), findObjectByOid(), findOwningExtension(), hstore_concat(), hstore_delete_array(), hstore_delete_hstore(), hstoreFindKey(), ScanCKeywordLookup(), and ScanKeywordLookup().

799 {
800  char sndx1[SOUNDEX_LEN + 1],
801  sndx2[SOUNDEX_LEN + 1];
802  int i,
803  result;
804 
807 
808  result = 0;
809  for (i = 0; i < SOUNDEX_LEN; i++)
810  {
811  if (sndx1[i] == sndx2[i])
812  result++;
813  }
814 
815  PG_RETURN_INT32(result);
816 }
#define PG_RETURN_INT32(x)
Definition: fmgr.h:314
return result
Definition: formatting.c:1618
#define PG_GETARG_TEXT_PP(n)
Definition: fmgr.h:273
static void _soundex(const char *instr, char *outstr)
#define SOUNDEX_LEN
Definition: fuzzystrmatch.c:54
char * text_to_cstring(const text *t)
Definition: varlena.c:182
int i
static int getcode ( char  c)
static

Definition at line 132 of file fuzzystrmatch.c.

References _codes.

133 {
134  if (isalpha((unsigned char) c))
135  {
136  c = toupper((unsigned char) c);
137  /* Defend against non-ASCII letters */
138  if (c >= 'A' && c <= 'Z')
139  return _codes[c - 'A'];
140  }
141  return 0;
142 }
static const char _codes[26]
char * c
Datum levenshtein ( PG_FUNCTION_ARGS  )

Definition at line 186 of file fuzzystrmatch.c.

References PG_GETARG_TEXT_PP, PG_RETURN_INT32, VARDATA_ANY, VARSIZE_ANY_EXHDR, and varstr_levenshtein().

187 {
188  text *src = PG_GETARG_TEXT_PP(0);
189  text *dst = PG_GETARG_TEXT_PP(1);
190  const char *s_data;
191  const char *t_data;
192  int s_bytes,
193  t_bytes;
194 
195  /* Extract a pointer to the actual character data */
196  s_data = VARDATA_ANY(src);
197  t_data = VARDATA_ANY(dst);
198  /* Determine length of each string in bytes */
199  s_bytes = VARSIZE_ANY_EXHDR(src);
200  t_bytes = VARSIZE_ANY_EXHDR(dst);
201 
202  PG_RETURN_INT32(varstr_levenshtein(s_data, s_bytes, t_data, t_bytes,
203  1, 1, 1, false));
204 }
#define VARDATA_ANY(PTR)
Definition: postgres.h:347
int varstr_levenshtein(const char *source, int slen, const char *target, int tlen, int ins_c, int del_c, int sub_c, bool trusted)
Definition: levenshtein.c:73
#define PG_RETURN_INT32(x)
Definition: fmgr.h:314
#define PG_GETARG_TEXT_PP(n)
Definition: fmgr.h:273
#define VARSIZE_ANY_EXHDR(PTR)
Definition: postgres.h:340
Definition: c.h:439
Datum levenshtein_less_equal ( PG_FUNCTION_ARGS  )

Definition at line 238 of file fuzzystrmatch.c.

References PG_GETARG_INT32, PG_GETARG_TEXT_PP, PG_RETURN_INT32, VARDATA_ANY, VARSIZE_ANY_EXHDR, and varstr_levenshtein_less_equal().

239 {
240  text *src = PG_GETARG_TEXT_PP(0);
241  text *dst = PG_GETARG_TEXT_PP(1);
242  int max_d = PG_GETARG_INT32(2);
243  const char *s_data;
244  const char *t_data;
245  int s_bytes,
246  t_bytes;
247 
248  /* Extract a pointer to the actual character data */
249  s_data = VARDATA_ANY(src);
250  t_data = VARDATA_ANY(dst);
251  /* Determine length of each string in bytes */
252  s_bytes = VARSIZE_ANY_EXHDR(src);
253  t_bytes = VARSIZE_ANY_EXHDR(dst);
254 
256  t_data, t_bytes,
257  1, 1, 1,
258  max_d, false));
259 }
#define PG_GETARG_INT32(n)
Definition: fmgr.h:234
#define VARDATA_ANY(PTR)
Definition: postgres.h:347
#define PG_RETURN_INT32(x)
Definition: fmgr.h:314
#define PG_GETARG_TEXT_PP(n)
Definition: fmgr.h:273
int varstr_levenshtein_less_equal(const char *source, int slen, const char *target, int tlen, int ins_c, int del_c, int sub_c, int max_d, bool trusted)
#define VARSIZE_ANY_EXHDR(PTR)
Definition: postgres.h:340
Definition: c.h:439
Datum levenshtein_less_equal_with_costs ( PG_FUNCTION_ARGS  )

Definition at line 209 of file fuzzystrmatch.c.

References PG_GETARG_INT32, PG_GETARG_TEXT_PP, PG_RETURN_INT32, VARDATA_ANY, VARSIZE_ANY_EXHDR, and varstr_levenshtein_less_equal().

210 {
211  text *src = PG_GETARG_TEXT_PP(0);
212  text *dst = PG_GETARG_TEXT_PP(1);
213  int ins_c = PG_GETARG_INT32(2);
214  int del_c = PG_GETARG_INT32(3);
215  int sub_c = PG_GETARG_INT32(4);
216  int max_d = PG_GETARG_INT32(5);
217  const char *s_data;
218  const char *t_data;
219  int s_bytes,
220  t_bytes;
221 
222  /* Extract a pointer to the actual character data */
223  s_data = VARDATA_ANY(src);
224  t_data = VARDATA_ANY(dst);
225  /* Determine length of each string in bytes */
226  s_bytes = VARSIZE_ANY_EXHDR(src);
227  t_bytes = VARSIZE_ANY_EXHDR(dst);
228 
230  t_data, t_bytes,
231  ins_c, del_c, sub_c,
232  max_d, false));
233 }
#define PG_GETARG_INT32(n)
Definition: fmgr.h:234
#define VARDATA_ANY(PTR)
Definition: postgres.h:347
#define PG_RETURN_INT32(x)
Definition: fmgr.h:314
#define PG_GETARG_TEXT_PP(n)
Definition: fmgr.h:273
int varstr_levenshtein_less_equal(const char *source, int slen, const char *target, int tlen, int ins_c, int del_c, int sub_c, int max_d, bool trusted)
#define VARSIZE_ANY_EXHDR(PTR)
Definition: postgres.h:340
Definition: c.h:439
Datum levenshtein_with_costs ( PG_FUNCTION_ARGS  )

Definition at line 160 of file fuzzystrmatch.c.

References PG_GETARG_INT32, PG_GETARG_TEXT_PP, PG_RETURN_INT32, VARDATA_ANY, VARSIZE_ANY_EXHDR, and varstr_levenshtein().

161 {
162  text *src = PG_GETARG_TEXT_PP(0);
163  text *dst = PG_GETARG_TEXT_PP(1);
164  int ins_c = PG_GETARG_INT32(2);
165  int del_c = PG_GETARG_INT32(3);
166  int sub_c = PG_GETARG_INT32(4);
167  const char *s_data;
168  const char *t_data;
169  int s_bytes,
170  t_bytes;
171 
172  /* Extract a pointer to the actual character data */
173  s_data = VARDATA_ANY(src);
174  t_data = VARDATA_ANY(dst);
175  /* Determine length of each string in bytes */
176  s_bytes = VARSIZE_ANY_EXHDR(src);
177  t_bytes = VARSIZE_ANY_EXHDR(dst);
178 
179  PG_RETURN_INT32(varstr_levenshtein(s_data, s_bytes, t_data, t_bytes,
180  ins_c, del_c, sub_c, false));
181 }
#define PG_GETARG_INT32(n)
Definition: fmgr.h:234
#define VARDATA_ANY(PTR)
Definition: postgres.h:347
int varstr_levenshtein(const char *source, int slen, const char *target, int tlen, int ins_c, int del_c, int sub_c, bool trusted)
Definition: levenshtein.c:73
#define PG_RETURN_INT32(x)
Definition: fmgr.h:314
#define PG_GETARG_TEXT_PP(n)
Definition: fmgr.h:273
#define VARSIZE_ANY_EXHDR(PTR)
Definition: postgres.h:340
Definition: c.h:439
static char Lookahead ( char *  word,
int  how_far 
)
static

Definition at line 340 of file fuzzystrmatch.c.

References idx().

341 {
342  char letter_ahead = '\0'; /* null by default */
343  int idx;
344 
345  for (idx = 0; word[idx] != '\0' && idx < how_far; idx++);
346  /* Edge forward in the string... */
347 
348  letter_ahead = word[idx]; /* idx will be either == to how_far or at the
349  * end of the string */
350  return letter_ahead;
351 }
Datum idx(PG_FUNCTION_ARGS)
Definition: _int_op.c:264
static void word(struct vars *, int, struct state *, struct state *)
Definition: regcomp.c:1243
Datum metaphone ( PG_FUNCTION_ARGS  )

Definition at line 269 of file fuzzystrmatch.c.

References _metaphone(), cstring_to_text(), elog, ereport, errcode(), errmsg(), ERROR, MAX_METAPHONE_STRLEN, META_SUCCESS, PG_GETARG_DATUM, PG_GETARG_INT32, PG_RETURN_NULL, PG_RETURN_TEXT_P, and TextDatumGetCString.

270 {
271  char *str_i = TextDatumGetCString(PG_GETARG_DATUM(0));
272  size_t str_i_len = strlen(str_i);
273  int reqlen;
274  char *metaph;
275  int retval;
276 
277  /* return an empty string if we receive one */
278  if (!(str_i_len > 0))
280 
281  if (str_i_len > MAX_METAPHONE_STRLEN)
282  ereport(ERROR,
283  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
284  errmsg("argument exceeds the maximum length of %d bytes",
286 
287  reqlen = PG_GETARG_INT32(1);
288  if (reqlen > MAX_METAPHONE_STRLEN)
289  ereport(ERROR,
290  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
291  errmsg("output exceeds the maximum length of %d bytes",
293 
294  if (!(reqlen > 0))
295  ereport(ERROR,
296  (errcode(ERRCODE_ZERO_LENGTH_CHARACTER_STRING),
297  errmsg("output cannot be empty string")));
298 
299 
300  retval = _metaphone(str_i, reqlen, &metaph);
301  if (retval == META_SUCCESS)
303  else
304  {
305  /* internal error */
306  elog(ERROR, "metaphone: failure");
307  /* keep the compiler quiet */
308  PG_RETURN_NULL();
309  }
310 }
#define PG_GETARG_INT32(n)
Definition: fmgr.h:234
#define META_SUCCESS
#define PG_GETARG_DATUM(n)
Definition: fmgr.h:233
int errcode(int sqlerrcode)
Definition: elog.c:575
static int _metaphone(char *word, int max_phonemes, char **phoned_word)
#define ERROR
Definition: elog.h:43
#define ereport(elevel, rest)
Definition: elog.h:122
#define TextDatumGetCString(d)
Definition: builtins.h:92
#define PG_RETURN_TEXT_P(x)
Definition: fmgr.h:330
text * cstring_to_text(const char *s)
Definition: varlena.c:149
#define MAX_METAPHONE_STRLEN
Definition: fuzzystrmatch.c:72
int errmsg(const char *fmt,...)
Definition: elog.c:797
#define elog
Definition: elog.h:219
#define PG_RETURN_NULL()
Definition: fmgr.h:305
PG_FUNCTION_INFO_V1 ( levenshtein_with_costs  )
PG_FUNCTION_INFO_V1 ( levenshtein  )
PG_FUNCTION_INFO_V1 ( levenshtein_less_equal_with_costs  )
PG_FUNCTION_INFO_V1 ( levenshtein_less_equal  )
PG_FUNCTION_INFO_V1 ( metaphone  )
PG_FUNCTION_INFO_V1 ( soundex  )
PG_FUNCTION_INFO_V1 ( difference  )
Datum soundex ( PG_FUNCTION_ARGS  )

Definition at line 734 of file fuzzystrmatch.c.

References _soundex(), arg, cstring_to_text(), PG_GETARG_TEXT_PP, PG_RETURN_TEXT_P, SOUNDEX_LEN, and text_to_cstring().

735 {
736  char outstr[SOUNDEX_LEN + 1];
737  char *arg;
738 
740 
741  _soundex(arg, outstr);
742 
744 }
#define PG_GETARG_TEXT_PP(n)
Definition: fmgr.h:273
static void _soundex(const char *instr, char *outstr)
#define SOUNDEX_LEN
Definition: fuzzystrmatch.c:54
#define PG_RETURN_TEXT_P(x)
Definition: fmgr.h:330
text * cstring_to_text(const char *s)
Definition: varlena.c:149
char * text_to_cstring(const text *t)
Definition: varlena.c:182
void * arg
static char soundex_code ( char  letter)
static

Definition at line 60 of file fuzzystrmatch.c.

References soundex_table.

Referenced by _soundex().

61 {
62  letter = toupper((unsigned char) letter);
63  /* Defend against non-ASCII letters */
64  if (letter >= 'A' && letter <= 'Z')
65  return soundex_table[letter - 'A'];
66  return letter;
67 }
static const char * soundex_table
Definition: fuzzystrmatch.c:57

Variable Documentation

const char _codes[26]
static
Initial value:
= {
1, 16, 4, 16, 9, 2, 4, 16, 9, 2, 0, 2, 2, 2, 1, 4, 0, 2, 4, 4, 1, 0, 0, 0, 8, 0
}

Definition at line 126 of file fuzzystrmatch.c.

Referenced by getcode().

PG_MODULE_MAGIC

Definition at line 47 of file fuzzystrmatch.c.

const char* soundex_table = "01230120022455012623010202"
static

Definition at line 57 of file fuzzystrmatch.c.

Referenced by soundex_code().