PostgreSQL Source Code  git master
dmetaphone.c File Reference
#include "postgres.h"
#include "utils/builtins.h"
#include <assert.h>
#include <ctype.h>
Include dependency graph for dmetaphone.c:

Go to the source code of this file.

Data Structures

struct  metastring
 

Macros

#define NDEBUG
 
#define META_MALLOC(v, n, t)   (v = (t*)palloc(((n)*sizeof(t))))
 
#define META_REALLOC(v, n, t)   (v = (t*)repalloc((v),((n)*sizeof(t))))
 
#define META_FREE(x)   ((void)true) /* pfree((x)) */
 

Functions

static void DoubleMetaphone (char *, char **)
 
 PG_FUNCTION_INFO_V1 (dmetaphone)
 
Datum dmetaphone (PG_FUNCTION_ARGS)
 
 PG_FUNCTION_INFO_V1 (dmetaphone_alt)
 
Datum dmetaphone_alt (PG_FUNCTION_ARGS)
 
static metastringNewMetaString (const char *init_str)
 
static void DestroyMetaString (metastring *s)
 
static void IncreaseBuffer (metastring *s, int chars_needed)
 
static void MakeUpper (metastring *s)
 
static int IsVowel (metastring *s, int pos)
 
static int SlavoGermanic (metastring *s)
 
static char GetAt (metastring *s, int pos)
 
static void SetAt (metastring *s, int pos, char c)
 
static int StringAt (metastring *s, int start, int length,...)
 
static void MetaphAdd (metastring *s, const char *new_str)
 

Macro Definition Documentation

◆ META_FREE

#define META_FREE (   x)    ((void)true) /* pfree((x)) */

Definition at line 200 of file dmetaphone.c.

Referenced by DestroyMetaString().

◆ META_MALLOC

#define META_MALLOC (   v,
  n,
 
)    (v = (t*)palloc(((n)*sizeof(t))))

Definition at line 187 of file dmetaphone.c.

Referenced by NewMetaString().

◆ META_REALLOC

#define META_REALLOC (   v,
  n,
 
)    (v = (t*)repalloc((v),((n)*sizeof(t))))

Definition at line 190 of file dmetaphone.c.

Referenced by IncreaseBuffer().

◆ NDEBUG

#define NDEBUG

Definition at line 104 of file dmetaphone.c.

Function Documentation

◆ DestroyMetaString()

static void DestroyMetaString ( metastring s)
static

Definition at line 260 of file dmetaphone.c.

References metastring::free_string_on_destroy, META_FREE, and metastring::str.

Referenced by DoubleMetaphone().

261 {
262  if (s == NULL)
263  return;
264 
265  if (s->free_string_on_destroy && (s->str != NULL))
266  META_FREE(s->str);
267 
268  META_FREE(s);
269 }
#define META_FREE(x)
Definition: dmetaphone.c:200
char * str
Definition: dmetaphone.c:220
int free_string_on_destroy
Definition: dmetaphone.c:223

◆ dmetaphone()

Datum dmetaphone ( PG_FUNCTION_ARGS  )

Definition at line 131 of file dmetaphone.c.

References arg, cstring_to_text(), dmetaphone_alt(), DoubleMetaphone(), PG_ARGISNULL, PG_FUNCTION_INFO_V1(), PG_GETARG_TEXT_PP, PG_RETURN_NULL, PG_RETURN_TEXT_P, and text_to_cstring().

132 {
133  text *arg;
134  char *aptr,
135  *codes[2],
136  *code;
137 
138 #ifdef DMETAPHONE_NOSTRICT
139  if (PG_ARGISNULL(0))
140  PG_RETURN_NULL();
141 #endif
142  arg = PG_GETARG_TEXT_PP(0);
143  aptr = text_to_cstring(arg);
144 
145  DoubleMetaphone(aptr, codes);
146  code = codes[0];
147  if (!code)
148  code = "";
149 
151 }
#define PG_GETARG_TEXT_PP(n)
Definition: fmgr.h:273
#define PG_RETURN_TEXT_P(x)
Definition: fmgr.h:331
text * cstring_to_text(const char *s)
Definition: varlena.c:149
#define PG_ARGISNULL(n)
Definition: fmgr.h:174
char * text_to_cstring(const text *t)
Definition: varlena.c:182
void * arg
Definition: c.h:487
static void DoubleMetaphone(char *, char **)
Definition: dmetaphone.c:395
#define PG_RETURN_NULL()
Definition: fmgr.h:305

◆ dmetaphone_alt()

Datum dmetaphone_alt ( PG_FUNCTION_ARGS  )

Definition at line 160 of file dmetaphone.c.

References arg, cstring_to_text(), DoubleMetaphone(), PG_ARGISNULL, PG_GETARG_TEXT_PP, PG_RETURN_NULL, PG_RETURN_TEXT_P, and text_to_cstring().

Referenced by dmetaphone().

161 {
162  text *arg;
163  char *aptr,
164  *codes[2],
165  *code;
166 
167 #ifdef DMETAPHONE_NOSTRICT
168  if (PG_ARGISNULL(0))
169  PG_RETURN_NULL();
170 #endif
171  arg = PG_GETARG_TEXT_PP(0);
172  aptr = text_to_cstring(arg);
173 
174  DoubleMetaphone(aptr, codes);
175  code = codes[1];
176  if (!code)
177  code = "";
178 
180 }
#define PG_GETARG_TEXT_PP(n)
Definition: fmgr.h:273
#define PG_RETURN_TEXT_P(x)
Definition: fmgr.h:331
text * cstring_to_text(const char *s)
Definition: varlena.c:149
#define PG_ARGISNULL(n)
Definition: fmgr.h:174
char * text_to_cstring(const text *t)
Definition: varlena.c:182
void * arg
Definition: c.h:487
static void DoubleMetaphone(char *, char **)
Definition: dmetaphone.c:395
#define PG_RETURN_NULL()
Definition: fmgr.h:305

◆ DoubleMetaphone()

static void DoubleMetaphone ( char *  str,
char **  codes 
)
static

Definition at line 395 of file dmetaphone.c.

References DestroyMetaString(), metastring::free_string_on_destroy, GetAt(), IsVowel(), metastring::length, length(), main(), MakeUpper(), MetaphAdd(), NewMetaString(), SetAt(), SlavoGermanic(), metastring::str, and StringAt().

Referenced by dmetaphone(), and dmetaphone_alt().

396 {
397  int length;
398  metastring *original;
399  metastring *primary;
400  metastring *secondary;
401  int current;
402  int last;
403 
404  current = 0;
405  /* we need the real length and last prior to padding */
406  length = strlen(str);
407  last = length - 1;
408  original = NewMetaString(str);
409  /* Pad original so we can index beyond end */
410  MetaphAdd(original, " ");
411 
412  primary = NewMetaString("");
413  secondary = NewMetaString("");
414  primary->free_string_on_destroy = 0;
415  secondary->free_string_on_destroy = 0;
416 
417  MakeUpper(original);
418 
419  /* skip these when at start of word */
420  if (StringAt(original, 0, 2, "GN", "KN", "PN", "WR", "PS", ""))
421  current += 1;
422 
423  /* Initial 'X' is pronounced 'Z' e.g. 'Xavier' */
424  if (GetAt(original, 0) == 'X')
425  {
426  MetaphAdd(primary, "S"); /* 'Z' maps to 'S' */
427  MetaphAdd(secondary, "S");
428  current += 1;
429  }
430 
431  /* main loop */
432  while ((primary->length < 4) || (secondary->length < 4))
433  {
434  if (current >= length)
435  break;
436 
437  switch (GetAt(original, current))
438  {
439  case 'A':
440  case 'E':
441  case 'I':
442  case 'O':
443  case 'U':
444  case 'Y':
445  if (current == 0)
446  {
447  /* all init vowels now map to 'A' */
448  MetaphAdd(primary, "A");
449  MetaphAdd(secondary, "A");
450  }
451  current += 1;
452  break;
453 
454  case 'B':
455 
456  /* "-mb", e.g", "dumb", already skipped over... */
457  MetaphAdd(primary, "P");
458  MetaphAdd(secondary, "P");
459 
460  if (GetAt(original, current + 1) == 'B')
461  current += 2;
462  else
463  current += 1;
464  break;
465 
466  case '\xc7': /* C with cedilla */
467  MetaphAdd(primary, "S");
468  MetaphAdd(secondary, "S");
469  current += 1;
470  break;
471 
472  case 'C':
473  /* various germanic */
474  if ((current > 1)
475  && !IsVowel(original, current - 2)
476  && StringAt(original, (current - 1), 3, "ACH", "")
477  && ((GetAt(original, current + 2) != 'I')
478  && ((GetAt(original, current + 2) != 'E')
479  || StringAt(original, (current - 2), 6, "BACHER",
480  "MACHER", ""))))
481  {
482  MetaphAdd(primary, "K");
483  MetaphAdd(secondary, "K");
484  current += 2;
485  break;
486  }
487 
488  /* special case 'caesar' */
489  if ((current == 0)
490  && StringAt(original, current, 6, "CAESAR", ""))
491  {
492  MetaphAdd(primary, "S");
493  MetaphAdd(secondary, "S");
494  current += 2;
495  break;
496  }
497 
498  /* italian 'chianti' */
499  if (StringAt(original, current, 4, "CHIA", ""))
500  {
501  MetaphAdd(primary, "K");
502  MetaphAdd(secondary, "K");
503  current += 2;
504  break;
505  }
506 
507  if (StringAt(original, current, 2, "CH", ""))
508  {
509  /* find 'michael' */
510  if ((current > 0)
511  && StringAt(original, current, 4, "CHAE", ""))
512  {
513  MetaphAdd(primary, "K");
514  MetaphAdd(secondary, "X");
515  current += 2;
516  break;
517  }
518 
519  /* greek roots e.g. 'chemistry', 'chorus' */
520  if ((current == 0)
521  && (StringAt(original, (current + 1), 5,
522  "HARAC", "HARIS", "")
523  || StringAt(original, (current + 1), 3, "HOR",
524  "HYM", "HIA", "HEM", ""))
525  && !StringAt(original, 0, 5, "CHORE", ""))
526  {
527  MetaphAdd(primary, "K");
528  MetaphAdd(secondary, "K");
529  current += 2;
530  break;
531  }
532 
533  /* germanic, greek, or otherwise 'ch' for 'kh' sound */
534  if (
535  (StringAt(original, 0, 4, "VAN ", "VON ", "")
536  || StringAt(original, 0, 3, "SCH", ""))
537  /* 'architect but not 'arch', 'orchestra', 'orchid' */
538  || StringAt(original, (current - 2), 6, "ORCHES",
539  "ARCHIT", "ORCHID", "")
540  || StringAt(original, (current + 2), 1, "T", "S",
541  "")
542  || ((StringAt(original, (current - 1), 1,
543  "A", "O", "U", "E", "")
544  || (current == 0))
545 
546  /*
547  * e.g., 'wachtler', 'wechsler', but not 'tichner'
548  */
549  && StringAt(original, (current + 2), 1, "L", "R",
550  "N", "M", "B", "H", "F", "V", "W",
551  " ", "")))
552  {
553  MetaphAdd(primary, "K");
554  MetaphAdd(secondary, "K");
555  }
556  else
557  {
558  if (current > 0)
559  {
560  if (StringAt(original, 0, 2, "MC", ""))
561  {
562  /* e.g., "McHugh" */
563  MetaphAdd(primary, "K");
564  MetaphAdd(secondary, "K");
565  }
566  else
567  {
568  MetaphAdd(primary, "X");
569  MetaphAdd(secondary, "K");
570  }
571  }
572  else
573  {
574  MetaphAdd(primary, "X");
575  MetaphAdd(secondary, "X");
576  }
577  }
578  current += 2;
579  break;
580  }
581  /* e.g, 'czerny' */
582  if (StringAt(original, current, 2, "CZ", "")
583  && !StringAt(original, (current - 2), 4, "WICZ", ""))
584  {
585  MetaphAdd(primary, "S");
586  MetaphAdd(secondary, "X");
587  current += 2;
588  break;
589  }
590 
591  /* e.g., 'focaccia' */
592  if (StringAt(original, (current + 1), 3, "CIA", ""))
593  {
594  MetaphAdd(primary, "X");
595  MetaphAdd(secondary, "X");
596  current += 3;
597  break;
598  }
599 
600  /* double 'C', but not if e.g. 'McClellan' */
601  if (StringAt(original, current, 2, "CC", "")
602  && !((current == 1) && (GetAt(original, 0) == 'M')))
603  {
604  /* 'bellocchio' but not 'bacchus' */
605  if (StringAt(original, (current + 2), 1, "I", "E", "H", "")
606  && !StringAt(original, (current + 2), 2, "HU", ""))
607  {
608  /* 'accident', 'accede' 'succeed' */
609  if (
610  ((current == 1)
611  && (GetAt(original, current - 1) == 'A'))
612  || StringAt(original, (current - 1), 5, "UCCEE",
613  "UCCES", ""))
614  {
615  MetaphAdd(primary, "KS");
616  MetaphAdd(secondary, "KS");
617  /* 'bacci', 'bertucci', other italian */
618  }
619  else
620  {
621  MetaphAdd(primary, "X");
622  MetaphAdd(secondary, "X");
623  }
624  current += 3;
625  break;
626  }
627  else
628  { /* Pierce's rule */
629  MetaphAdd(primary, "K");
630  MetaphAdd(secondary, "K");
631  current += 2;
632  break;
633  }
634  }
635 
636  if (StringAt(original, current, 2, "CK", "CG", "CQ", ""))
637  {
638  MetaphAdd(primary, "K");
639  MetaphAdd(secondary, "K");
640  current += 2;
641  break;
642  }
643 
644  if (StringAt(original, current, 2, "CI", "CE", "CY", ""))
645  {
646  /* italian vs. english */
647  if (StringAt
648  (original, current, 3, "CIO", "CIE", "CIA", ""))
649  {
650  MetaphAdd(primary, "S");
651  MetaphAdd(secondary, "X");
652  }
653  else
654  {
655  MetaphAdd(primary, "S");
656  MetaphAdd(secondary, "S");
657  }
658  current += 2;
659  break;
660  }
661 
662  /* else */
663  MetaphAdd(primary, "K");
664  MetaphAdd(secondary, "K");
665 
666  /* name sent in 'mac caffrey', 'mac gregor */
667  if (StringAt(original, (current + 1), 2, " C", " Q", " G", ""))
668  current += 3;
669  else if (StringAt(original, (current + 1), 1, "C", "K", "Q", "")
670  && !StringAt(original, (current + 1), 2,
671  "CE", "CI", ""))
672  current += 2;
673  else
674  current += 1;
675  break;
676 
677  case 'D':
678  if (StringAt(original, current, 2, "DG", ""))
679  {
680  if (StringAt(original, (current + 2), 1,
681  "I", "E", "Y", ""))
682  {
683  /* e.g. 'edge' */
684  MetaphAdd(primary, "J");
685  MetaphAdd(secondary, "J");
686  current += 3;
687  break;
688  }
689  else
690  {
691  /* e.g. 'edgar' */
692  MetaphAdd(primary, "TK");
693  MetaphAdd(secondary, "TK");
694  current += 2;
695  break;
696  }
697  }
698 
699  if (StringAt(original, current, 2, "DT", "DD", ""))
700  {
701  MetaphAdd(primary, "T");
702  MetaphAdd(secondary, "T");
703  current += 2;
704  break;
705  }
706 
707  /* else */
708  MetaphAdd(primary, "T");
709  MetaphAdd(secondary, "T");
710  current += 1;
711  break;
712 
713  case 'F':
714  if (GetAt(original, current + 1) == 'F')
715  current += 2;
716  else
717  current += 1;
718  MetaphAdd(primary, "F");
719  MetaphAdd(secondary, "F");
720  break;
721 
722  case 'G':
723  if (GetAt(original, current + 1) == 'H')
724  {
725  if ((current > 0) && !IsVowel(original, current - 1))
726  {
727  MetaphAdd(primary, "K");
728  MetaphAdd(secondary, "K");
729  current += 2;
730  break;
731  }
732 
733  if (current < 3)
734  {
735  /* 'ghislane', ghiradelli */
736  if (current == 0)
737  {
738  if (GetAt(original, current + 2) == 'I')
739  {
740  MetaphAdd(primary, "J");
741  MetaphAdd(secondary, "J");
742  }
743  else
744  {
745  MetaphAdd(primary, "K");
746  MetaphAdd(secondary, "K");
747  }
748  current += 2;
749  break;
750  }
751  }
752 
753  /*
754  * Parker's rule (with some further refinements) - e.g.,
755  * 'hugh'
756  */
757  if (
758  ((current > 1)
759  && StringAt(original, (current - 2), 1,
760  "B", "H", "D", ""))
761  /* e.g., 'bough' */
762  || ((current > 2)
763  && StringAt(original, (current - 3), 1,
764  "B", "H", "D", ""))
765  /* e.g., 'broughton' */
766  || ((current > 3)
767  && StringAt(original, (current - 4), 1,
768  "B", "H", "")))
769  {
770  current += 2;
771  break;
772  }
773  else
774  {
775  /*
776  * e.g., 'laugh', 'McLaughlin', 'cough', 'gough',
777  * 'rough', 'tough'
778  */
779  if ((current > 2)
780  && (GetAt(original, current - 1) == 'U')
781  && StringAt(original, (current - 3), 1, "C",
782  "G", "L", "R", "T", ""))
783  {
784  MetaphAdd(primary, "F");
785  MetaphAdd(secondary, "F");
786  }
787  else if ((current > 0)
788  && GetAt(original, current - 1) != 'I')
789  {
790 
791 
792  MetaphAdd(primary, "K");
793  MetaphAdd(secondary, "K");
794  }
795 
796  current += 2;
797  break;
798  }
799  }
800 
801  if (GetAt(original, current + 1) == 'N')
802  {
803  if ((current == 1) && IsVowel(original, 0)
804  && !SlavoGermanic(original))
805  {
806  MetaphAdd(primary, "KN");
807  MetaphAdd(secondary, "N");
808  }
809  else
810  /* not e.g. 'cagney' */
811  if (!StringAt(original, (current + 2), 2, "EY", "")
812  && (GetAt(original, current + 1) != 'Y')
813  && !SlavoGermanic(original))
814  {
815  MetaphAdd(primary, "N");
816  MetaphAdd(secondary, "KN");
817  }
818  else
819  {
820  MetaphAdd(primary, "KN");
821  MetaphAdd(secondary, "KN");
822  }
823  current += 2;
824  break;
825  }
826 
827  /* 'tagliaro' */
828  if (StringAt(original, (current + 1), 2, "LI", "")
829  && !SlavoGermanic(original))
830  {
831  MetaphAdd(primary, "KL");
832  MetaphAdd(secondary, "L");
833  current += 2;
834  break;
835  }
836 
837  /* -ges-,-gep-,-gel-, -gie- at beginning */
838  if ((current == 0)
839  && ((GetAt(original, current + 1) == 'Y')
840  || StringAt(original, (current + 1), 2, "ES", "EP",
841  "EB", "EL", "EY", "IB", "IL", "IN", "IE",
842  "EI", "ER", "")))
843  {
844  MetaphAdd(primary, "K");
845  MetaphAdd(secondary, "J");
846  current += 2;
847  break;
848  }
849 
850  /* -ger-, -gy- */
851  if (
852  (StringAt(original, (current + 1), 2, "ER", "")
853  || (GetAt(original, current + 1) == 'Y'))
854  && !StringAt(original, 0, 6,
855  "DANGER", "RANGER", "MANGER", "")
856  && !StringAt(original, (current - 1), 1, "E", "I", "")
857  && !StringAt(original, (current - 1), 3, "RGY", "OGY",
858  ""))
859  {
860  MetaphAdd(primary, "K");
861  MetaphAdd(secondary, "J");
862  current += 2;
863  break;
864  }
865 
866  /* italian e.g, 'biaggi' */
867  if (StringAt(original, (current + 1), 1, "E", "I", "Y", "")
868  || StringAt(original, (current - 1), 4,
869  "AGGI", "OGGI", ""))
870  {
871  /* obvious germanic */
872  if (
873  (StringAt(original, 0, 4, "VAN ", "VON ", "")
874  || StringAt(original, 0, 3, "SCH", ""))
875  || StringAt(original, (current + 1), 2, "ET", ""))
876  {
877  MetaphAdd(primary, "K");
878  MetaphAdd(secondary, "K");
879  }
880  else
881  {
882  /* always soft if french ending */
883  if (StringAt
884  (original, (current + 1), 4, "IER ", ""))
885  {
886  MetaphAdd(primary, "J");
887  MetaphAdd(secondary, "J");
888  }
889  else
890  {
891  MetaphAdd(primary, "J");
892  MetaphAdd(secondary, "K");
893  }
894  }
895  current += 2;
896  break;
897  }
898 
899  if (GetAt(original, current + 1) == 'G')
900  current += 2;
901  else
902  current += 1;
903  MetaphAdd(primary, "K");
904  MetaphAdd(secondary, "K");
905  break;
906 
907  case 'H':
908  /* only keep if first & before vowel or btw. 2 vowels */
909  if (((current == 0) || IsVowel(original, current - 1))
910  && IsVowel(original, current + 1))
911  {
912  MetaphAdd(primary, "H");
913  MetaphAdd(secondary, "H");
914  current += 2;
915  }
916  else
917  /* also takes care of 'HH' */
918  current += 1;
919  break;
920 
921  case 'J':
922  /* obvious spanish, 'jose', 'san jacinto' */
923  if (StringAt(original, current, 4, "JOSE", "")
924  || StringAt(original, 0, 4, "SAN ", ""))
925  {
926  if (((current == 0)
927  && (GetAt(original, current + 4) == ' '))
928  || StringAt(original, 0, 4, "SAN ", ""))
929  {
930  MetaphAdd(primary, "H");
931  MetaphAdd(secondary, "H");
932  }
933  else
934  {
935  MetaphAdd(primary, "J");
936  MetaphAdd(secondary, "H");
937  }
938  current += 1;
939  break;
940  }
941 
942  if ((current == 0)
943  && !StringAt(original, current, 4, "JOSE", ""))
944  {
945  MetaphAdd(primary, "J"); /* Yankelovich/Jankelowicz */
946  MetaphAdd(secondary, "A");
947  }
948  else
949  {
950  /* spanish pron. of e.g. 'bajador' */
951  if (IsVowel(original, current - 1)
952  && !SlavoGermanic(original)
953  && ((GetAt(original, current + 1) == 'A')
954  || (GetAt(original, current + 1) == 'O')))
955  {
956  MetaphAdd(primary, "J");
957  MetaphAdd(secondary, "H");
958  }
959  else
960  {
961  if (current == last)
962  {
963  MetaphAdd(primary, "J");
964  MetaphAdd(secondary, "");
965  }
966  else
967  {
968  if (!StringAt(original, (current + 1), 1, "L", "T",
969  "K", "S", "N", "M", "B", "Z", "")
970  && !StringAt(original, (current - 1), 1,
971  "S", "K", "L", ""))
972  {
973  MetaphAdd(primary, "J");
974  MetaphAdd(secondary, "J");
975  }
976  }
977  }
978  }
979 
980  if (GetAt(original, current + 1) == 'J') /* it could happen! */
981  current += 2;
982  else
983  current += 1;
984  break;
985 
986  case 'K':
987  if (GetAt(original, current + 1) == 'K')
988  current += 2;
989  else
990  current += 1;
991  MetaphAdd(primary, "K");
992  MetaphAdd(secondary, "K");
993  break;
994 
995  case 'L':
996  if (GetAt(original, current + 1) == 'L')
997  {
998  /* spanish e.g. 'cabrillo', 'gallegos' */
999  if (((current == (length - 3))
1000  && StringAt(original, (current - 1), 4, "ILLO",
1001  "ILLA", "ALLE", ""))
1002  || ((StringAt(original, (last - 1), 2, "AS", "OS", "")
1003  || StringAt(original, last, 1, "A", "O", ""))
1004  && StringAt(original, (current - 1), 4,
1005  "ALLE", "")))
1006  {
1007  MetaphAdd(primary, "L");
1008  MetaphAdd(secondary, "");
1009  current += 2;
1010  break;
1011  }
1012  current += 2;
1013  }
1014  else
1015  current += 1;
1016  MetaphAdd(primary, "L");
1017  MetaphAdd(secondary, "L");
1018  break;
1019 
1020  case 'M':
1021  if ((StringAt(original, (current - 1), 3, "UMB", "")
1022  && (((current + 1) == last)
1023  || StringAt(original, (current + 2), 2, "ER", "")))
1024  /* 'dumb','thumb' */
1025  || (GetAt(original, current + 1) == 'M'))
1026  current += 2;
1027  else
1028  current += 1;
1029  MetaphAdd(primary, "M");
1030  MetaphAdd(secondary, "M");
1031  break;
1032 
1033  case 'N':
1034  if (GetAt(original, current + 1) == 'N')
1035  current += 2;
1036  else
1037  current += 1;
1038  MetaphAdd(primary, "N");
1039  MetaphAdd(secondary, "N");
1040  break;
1041 
1042  case '\xd1': /* N with tilde */
1043  current += 1;
1044  MetaphAdd(primary, "N");
1045  MetaphAdd(secondary, "N");
1046  break;
1047 
1048  case 'P':
1049  if (GetAt(original, current + 1) == 'H')
1050  {
1051  MetaphAdd(primary, "F");
1052  MetaphAdd(secondary, "F");
1053  current += 2;
1054  break;
1055  }
1056 
1057  /* also account for "campbell", "raspberry" */
1058  if (StringAt(original, (current + 1), 1, "P", "B", ""))
1059  current += 2;
1060  else
1061  current += 1;
1062  MetaphAdd(primary, "P");
1063  MetaphAdd(secondary, "P");
1064  break;
1065 
1066  case 'Q':
1067  if (GetAt(original, current + 1) == 'Q')
1068  current += 2;
1069  else
1070  current += 1;
1071  MetaphAdd(primary, "K");
1072  MetaphAdd(secondary, "K");
1073  break;
1074 
1075  case 'R':
1076  /* french e.g. 'rogier', but exclude 'hochmeier' */
1077  if ((current == last)
1078  && !SlavoGermanic(original)
1079  && StringAt(original, (current - 2), 2, "IE", "")
1080  && !StringAt(original, (current - 4), 2, "ME", "MA", ""))
1081  {
1082  MetaphAdd(primary, "");
1083  MetaphAdd(secondary, "R");
1084  }
1085  else
1086  {
1087  MetaphAdd(primary, "R");
1088  MetaphAdd(secondary, "R");
1089  }
1090 
1091  if (GetAt(original, current + 1) == 'R')
1092  current += 2;
1093  else
1094  current += 1;
1095  break;
1096 
1097  case 'S':
1098  /* special cases 'island', 'isle', 'carlisle', 'carlysle' */
1099  if (StringAt(original, (current - 1), 3, "ISL", "YSL", ""))
1100  {
1101  current += 1;
1102  break;
1103  }
1104 
1105  /* special case 'sugar-' */
1106  if ((current == 0)
1107  && StringAt(original, current, 5, "SUGAR", ""))
1108  {
1109  MetaphAdd(primary, "X");
1110  MetaphAdd(secondary, "S");
1111  current += 1;
1112  break;
1113  }
1114 
1115  if (StringAt(original, current, 2, "SH", ""))
1116  {
1117  /* germanic */
1118  if (StringAt
1119  (original, (current + 1), 4, "HEIM", "HOEK", "HOLM",
1120  "HOLZ", ""))
1121  {
1122  MetaphAdd(primary, "S");
1123  MetaphAdd(secondary, "S");
1124  }
1125  else
1126  {
1127  MetaphAdd(primary, "X");
1128  MetaphAdd(secondary, "X");
1129  }
1130  current += 2;
1131  break;
1132  }
1133 
1134  /* italian & armenian */
1135  if (StringAt(original, current, 3, "SIO", "SIA", "")
1136  || StringAt(original, current, 4, "SIAN", ""))
1137  {
1138  if (!SlavoGermanic(original))
1139  {
1140  MetaphAdd(primary, "S");
1141  MetaphAdd(secondary, "X");
1142  }
1143  else
1144  {
1145  MetaphAdd(primary, "S");
1146  MetaphAdd(secondary, "S");
1147  }
1148  current += 3;
1149  break;
1150  }
1151 
1152  /*
1153  * german & anglicisations, e.g. 'smith' match 'schmidt',
1154  * 'snider' match 'schneider' also, -sz- in slavic language
1155  * although in hungarian it is pronounced 's'
1156  */
1157  if (((current == 0)
1158  && StringAt(original, (current + 1), 1,
1159  "M", "N", "L", "W", ""))
1160  || StringAt(original, (current + 1), 1, "Z", ""))
1161  {
1162  MetaphAdd(primary, "S");
1163  MetaphAdd(secondary, "X");
1164  if (StringAt(original, (current + 1), 1, "Z", ""))
1165  current += 2;
1166  else
1167  current += 1;
1168  break;
1169  }
1170 
1171  if (StringAt(original, current, 2, "SC", ""))
1172  {
1173  /* Schlesinger's rule */
1174  if (GetAt(original, current + 2) == 'H')
1175  {
1176  /* dutch origin, e.g. 'school', 'schooner' */
1177  if (StringAt(original, (current + 3), 2,
1178  "OO", "ER", "EN",
1179  "UY", "ED", "EM", ""))
1180  {
1181  /* 'schermerhorn', 'schenker' */
1182  if (StringAt(original, (current + 3), 2,
1183  "ER", "EN", ""))
1184  {
1185  MetaphAdd(primary, "X");
1186  MetaphAdd(secondary, "SK");
1187  }
1188  else
1189  {
1190  MetaphAdd(primary, "SK");
1191  MetaphAdd(secondary, "SK");
1192  }
1193  current += 3;
1194  break;
1195  }
1196  else
1197  {
1198  if ((current == 0) && !IsVowel(original, 3)
1199  && (GetAt(original, 3) != 'W'))
1200  {
1201  MetaphAdd(primary, "X");
1202  MetaphAdd(secondary, "S");
1203  }
1204  else
1205  {
1206  MetaphAdd(primary, "X");
1207  MetaphAdd(secondary, "X");
1208  }
1209  current += 3;
1210  break;
1211  }
1212  }
1213 
1214  if (StringAt(original, (current + 2), 1,
1215  "I", "E", "Y", ""))
1216  {
1217  MetaphAdd(primary, "S");
1218  MetaphAdd(secondary, "S");
1219  current += 3;
1220  break;
1221  }
1222  /* else */
1223  MetaphAdd(primary, "SK");
1224  MetaphAdd(secondary, "SK");
1225  current += 3;
1226  break;
1227  }
1228 
1229  /* french e.g. 'resnais', 'artois' */
1230  if ((current == last)
1231  && StringAt(original, (current - 2), 2, "AI", "OI", ""))
1232  {
1233  MetaphAdd(primary, "");
1234  MetaphAdd(secondary, "S");
1235  }
1236  else
1237  {
1238  MetaphAdd(primary, "S");
1239  MetaphAdd(secondary, "S");
1240  }
1241 
1242  if (StringAt(original, (current + 1), 1, "S", "Z", ""))
1243  current += 2;
1244  else
1245  current += 1;
1246  break;
1247 
1248  case 'T':
1249  if (StringAt(original, current, 4, "TION", ""))
1250  {
1251  MetaphAdd(primary, "X");
1252  MetaphAdd(secondary, "X");
1253  current += 3;
1254  break;
1255  }
1256 
1257  if (StringAt(original, current, 3, "TIA", "TCH", ""))
1258  {
1259  MetaphAdd(primary, "X");
1260  MetaphAdd(secondary, "X");
1261  current += 3;
1262  break;
1263  }
1264 
1265  if (StringAt(original, current, 2, "TH", "")
1266  || StringAt(original, current, 3, "TTH", ""))
1267  {
1268  /* special case 'thomas', 'thames' or germanic */
1269  if (StringAt(original, (current + 2), 2, "OM", "AM", "")
1270  || StringAt(original, 0, 4, "VAN ", "VON ", "")
1271  || StringAt(original, 0, 3, "SCH", ""))
1272  {
1273  MetaphAdd(primary, "T");
1274  MetaphAdd(secondary, "T");
1275  }
1276  else
1277  {
1278  MetaphAdd(primary, "0");
1279  MetaphAdd(secondary, "T");
1280  }
1281  current += 2;
1282  break;
1283  }
1284 
1285  if (StringAt(original, (current + 1), 1, "T", "D", ""))
1286  current += 2;
1287  else
1288  current += 1;
1289  MetaphAdd(primary, "T");
1290  MetaphAdd(secondary, "T");
1291  break;
1292 
1293  case 'V':
1294  if (GetAt(original, current + 1) == 'V')
1295  current += 2;
1296  else
1297  current += 1;
1298  MetaphAdd(primary, "F");
1299  MetaphAdd(secondary, "F");
1300  break;
1301 
1302  case 'W':
1303  /* can also be in middle of word */
1304  if (StringAt(original, current, 2, "WR", ""))
1305  {
1306  MetaphAdd(primary, "R");
1307  MetaphAdd(secondary, "R");
1308  current += 2;
1309  break;
1310  }
1311 
1312  if ((current == 0)
1313  && (IsVowel(original, current + 1)
1314  || StringAt(original, current, 2, "WH", "")))
1315  {
1316  /* Wasserman should match Vasserman */
1317  if (IsVowel(original, current + 1))
1318  {
1319  MetaphAdd(primary, "A");
1320  MetaphAdd(secondary, "F");
1321  }
1322  else
1323  {
1324  /* need Uomo to match Womo */
1325  MetaphAdd(primary, "A");
1326  MetaphAdd(secondary, "A");
1327  }
1328  }
1329 
1330  /* Arnow should match Arnoff */
1331  if (((current == last) && IsVowel(original, current - 1))
1332  || StringAt(original, (current - 1), 5, "EWSKI", "EWSKY",
1333  "OWSKI", "OWSKY", "")
1334  || StringAt(original, 0, 3, "SCH", ""))
1335  {
1336  MetaphAdd(primary, "");
1337  MetaphAdd(secondary, "F");
1338  current += 1;
1339  break;
1340  }
1341 
1342  /* polish e.g. 'filipowicz' */
1343  if (StringAt(original, current, 4, "WICZ", "WITZ", ""))
1344  {
1345  MetaphAdd(primary, "TS");
1346  MetaphAdd(secondary, "FX");
1347  current += 4;
1348  break;
1349  }
1350 
1351  /* else skip it */
1352  current += 1;
1353  break;
1354 
1355  case 'X':
1356  /* french e.g. breaux */
1357  if (!((current == last)
1358  && (StringAt(original, (current - 3), 3,
1359  "IAU", "EAU", "")
1360  || StringAt(original, (current - 2), 2,
1361  "AU", "OU", ""))))
1362  {
1363  MetaphAdd(primary, "KS");
1364  MetaphAdd(secondary, "KS");
1365  }
1366 
1367 
1368  if (StringAt(original, (current + 1), 1, "C", "X", ""))
1369  current += 2;
1370  else
1371  current += 1;
1372  break;
1373 
1374  case 'Z':
1375  /* chinese pinyin e.g. 'zhao' */
1376  if (GetAt(original, current + 1) == 'H')
1377  {
1378  MetaphAdd(primary, "J");
1379  MetaphAdd(secondary, "J");
1380  current += 2;
1381  break;
1382  }
1383  else if (StringAt(original, (current + 1), 2,
1384  "ZO", "ZI", "ZA", "")
1385  || (SlavoGermanic(original)
1386  && ((current > 0)
1387  && GetAt(original, current - 1) != 'T')))
1388  {
1389  MetaphAdd(primary, "S");
1390  MetaphAdd(secondary, "TS");
1391  }
1392  else
1393  {
1394  MetaphAdd(primary, "S");
1395  MetaphAdd(secondary, "S");
1396  }
1397 
1398  if (GetAt(original, current + 1) == 'Z')
1399  current += 2;
1400  else
1401  current += 1;
1402  break;
1403 
1404  default:
1405  current += 1;
1406  }
1407 
1408  /*
1409  * printf("PRIMARY: %s\n", primary->str); printf("SECONDARY: %s\n",
1410  * secondary->str);
1411  */
1412  }
1413 
1414 
1415  if (primary->length > 4)
1416  SetAt(primary, 4, '\0');
1417 
1418  if (secondary->length > 4)
1419  SetAt(secondary, 4, '\0');
1420 
1421  *codes = primary->str;
1422  *++codes = secondary->str;
1423 
1424  DestroyMetaString(original);
1425  DestroyMetaString(primary);
1426  DestroyMetaString(secondary);
1427 }
int length(const List *list)
Definition: list.c:1309
static metastring * NewMetaString(const char *init_str)
Definition: dmetaphone.c:235
static void MetaphAdd(metastring *s, const char *new_str)
Definition: dmetaphone.c:378
static char GetAt(metastring *s, int pos)
Definition: dmetaphone.c:325
static void SetAt(metastring *s, int pos, char c)
Definition: dmetaphone.c:335
char * str
Definition: dmetaphone.c:220
int free_string_on_destroy
Definition: dmetaphone.c:223
static void DestroyMetaString(metastring *s)
Definition: dmetaphone.c:260
static void MakeUpper(metastring *s)
Definition: dmetaphone.c:282
static int IsVowel(metastring *s, int pos)
Definition: dmetaphone.c:292
static int StringAt(metastring *s, int start, int length,...)
Definition: dmetaphone.c:348
static int SlavoGermanic(metastring *s)
Definition: dmetaphone.c:309

◆ GetAt()

static char GetAt ( metastring s,
int  pos 
)
static

Definition at line 325 of file dmetaphone.c.

References metastring::length, and metastring::str.

Referenced by DoubleMetaphone().

326 {
327  if ((pos < 0) || (pos >= s->length))
328  return '\0';
329 
330  return ((char) *(s->str + pos));
331 }
char * str
Definition: dmetaphone.c:220

◆ IncreaseBuffer()

static void IncreaseBuffer ( metastring s,
int  chars_needed 
)
static

Definition at line 273 of file dmetaphone.c.

References assert, metastring::bufsize, META_REALLOC, and metastring::str.

Referenced by MetaphAdd().

274 {
275  META_REALLOC(s->str, (s->bufsize + chars_needed + 10), char);
276  assert(s->str != NULL);
277  s->bufsize = s->bufsize + chars_needed + 10;
278 }
#define META_REALLOC(v, n, t)
Definition: dmetaphone.c:190
#define assert(TEST)
Definition: imath.c:37
char * str
Definition: dmetaphone.c:220

◆ IsVowel()

static int IsVowel ( metastring s,
int  pos 
)
static

Definition at line 292 of file dmetaphone.c.

References metastring::length, and metastring::str.

Referenced by DoubleMetaphone().

293 {
294  char c;
295 
296  if ((pos < 0) || (pos >= s->length))
297  return 0;
298 
299  c = *(s->str + pos);
300  if ((c == 'A') || (c == 'E') || (c == 'I') || (c == 'O') ||
301  (c == 'U') || (c == 'Y'))
302  return 1;
303 
304  return 0;
305 }
char * c
char * str
Definition: dmetaphone.c:220

◆ MakeUpper()

static void MakeUpper ( metastring s)
static

Definition at line 282 of file dmetaphone.c.

References i, and metastring::str.

Referenced by DoubleMetaphone().

283 {
284  char *i;
285 
286  for (i = s->str; *i; i++)
287  *i = toupper((unsigned char) *i);
288 }
char * str
Definition: dmetaphone.c:220
int i

◆ MetaphAdd()

static void MetaphAdd ( metastring s,
const char *  new_str 
)
static

Definition at line 378 of file dmetaphone.c.

References metastring::bufsize, IncreaseBuffer(), metastring::length, and metastring::str.

Referenced by DoubleMetaphone().

379 {
380  int add_length;
381 
382  if (new_str == NULL)
383  return;
384 
385  add_length = strlen(new_str);
386  if ((s->length + add_length) > (s->bufsize - 1))
387  IncreaseBuffer(s, add_length);
388 
389  strcat(s->str, new_str);
390  s->length += add_length;
391 }
char * str
Definition: dmetaphone.c:220
static void IncreaseBuffer(metastring *s, int chars_needed)
Definition: dmetaphone.c:273

◆ NewMetaString()

static metastring* NewMetaString ( const char *  init_str)
static

Definition at line 235 of file dmetaphone.c.

References assert, metastring::bufsize, metastring::free_string_on_destroy, metastring::length, META_MALLOC, and metastring::str.

Referenced by DoubleMetaphone().

236 {
237  metastring *s;
238  char empty_string[] = "";
239 
240  META_MALLOC(s, 1, metastring);
241  assert(s != NULL);
242 
243  if (init_str == NULL)
244  init_str = empty_string;
245  s->length = strlen(init_str);
246  /* preallocate a bit more for potential growth */
247  s->bufsize = s->length + 7;
248 
249  META_MALLOC(s->str, s->bufsize, char);
250  assert(s->str != NULL);
251 
252  memcpy(s->str, init_str, s->length + 1);
253  s->free_string_on_destroy = 1;
254 
255  return s;
256 }
#define assert(TEST)
Definition: imath.c:37
char * str
Definition: dmetaphone.c:220
int free_string_on_destroy
Definition: dmetaphone.c:223
#define META_MALLOC(v, n, t)
Definition: dmetaphone.c:187

◆ PG_FUNCTION_INFO_V1() [1/2]

PG_FUNCTION_INFO_V1 ( dmetaphone  )

Referenced by dmetaphone().

◆ PG_FUNCTION_INFO_V1() [2/2]

PG_FUNCTION_INFO_V1 ( dmetaphone_alt  )

◆ SetAt()

static void SetAt ( metastring s,
int  pos,
char  c 
)
static

Definition at line 335 of file dmetaphone.c.

References metastring::length, and metastring::str.

Referenced by DoubleMetaphone().

336 {
337  if ((pos < 0) || (pos >= s->length))
338  return;
339 
340  *(s->str + pos) = c;
341 }
char * c
char * str
Definition: dmetaphone.c:220

◆ SlavoGermanic()

static int SlavoGermanic ( metastring s)
static

Definition at line 309 of file dmetaphone.c.

References metastring::str.

Referenced by DoubleMetaphone().

310 {
311  if ((char *) strstr(s->str, "W"))
312  return 1;
313  else if ((char *) strstr(s->str, "K"))
314  return 1;
315  else if ((char *) strstr(s->str, "CZ"))
316  return 1;
317  else if ((char *) strstr(s->str, "WITZ"))
318  return 1;
319  else
320  return 0;
321 }
char * str
Definition: dmetaphone.c:220

◆ StringAt()

static int StringAt ( metastring s,
int  start,
int  length,
  ... 
)
static

Definition at line 348 of file dmetaphone.c.

References metastring::length, metastring::str, and test().

Referenced by DoubleMetaphone().

349 {
350  char *test;
351  char *pos;
352  va_list ap;
353 
354  if ((start < 0) || (start >= s->length))
355  return 0;
356 
357  pos = (s->str + start);
358  va_start(ap, length);
359 
360  do
361  {
362  test = va_arg(ap, char *);
363  if (*test && (strncmp(pos, test, length) == 0))
364  {
365  va_end(ap);
366  return 1;
367  }
368  }
369  while (strcmp(test, "") != 0);
370 
371  va_end(ap);
372 
373  return 0;
374 }
int length(const List *list)
Definition: list.c:1309
static void test(void)
char * str
Definition: dmetaphone.c:220