PostgreSQL Source Code  git master
dmetaphone.c File Reference
#include "postgres.h"
#include "utils/builtins.h"
#include <assert.h>
#include <ctype.h>
Include dependency graph for dmetaphone.c:

Go to the source code of this file.

Data Structures

struct  metastring
 

Macros

#define NDEBUG
 
#define META_MALLOC(v, n, t)    (v = (t*)palloc(((n)*sizeof(t))))
 
#define META_REALLOC(v, n, t)    (v = (t*)repalloc((v),((n)*sizeof(t))))
 
#define META_FREE(x)   ((void)true) /* pfree((x)) */
 

Functions

static void DoubleMetaphone (char *str, char **codes)
 
 PG_FUNCTION_INFO_V1 (dmetaphone)
 
Datum dmetaphone (PG_FUNCTION_ARGS)
 
 PG_FUNCTION_INFO_V1 (dmetaphone_alt)
 
Datum dmetaphone_alt (PG_FUNCTION_ARGS)
 
static metastringNewMetaString (const char *init_str)
 
static void DestroyMetaString (metastring *s)
 
static void IncreaseBuffer (metastring *s, int chars_needed)
 
static void MakeUpper (metastring *s)
 
static int IsVowel (metastring *s, int pos)
 
static int SlavoGermanic (metastring *s)
 
static char GetAt (metastring *s, int pos)
 
static void SetAt (metastring *s, int pos, char c)
 
static int StringAt (metastring *s, int start, int length,...)
 
static void MetaphAdd (metastring *s, const char *new_str)
 

Macro Definition Documentation

◆ META_FREE

#define META_FREE (   x)    ((void)true) /* pfree((x)) */

Definition at line 200 of file dmetaphone.c.

◆ META_MALLOC

#define META_MALLOC (   v,
  n,
 
)     (v = (t*)palloc(((n)*sizeof(t))))

Definition at line 187 of file dmetaphone.c.

◆ META_REALLOC

#define META_REALLOC (   v,
  n,
 
)     (v = (t*)repalloc((v),((n)*sizeof(t))))

Definition at line 190 of file dmetaphone.c.

◆ NDEBUG

#define NDEBUG

Definition at line 104 of file dmetaphone.c.

Function Documentation

◆ DestroyMetaString()

static void DestroyMetaString ( metastring s)
static

Definition at line 260 of file dmetaphone.c.

261 {
262  if (s == NULL)
263  return;
264 
265  if (s->free_string_on_destroy && (s->str != NULL))
266  META_FREE(s->str);
267 
268  META_FREE(s);
269 }
#define META_FREE(x)
Definition: dmetaphone.c:200
char * str
Definition: dmetaphone.c:220
int free_string_on_destroy
Definition: dmetaphone.c:223

References metastring::free_string_on_destroy, META_FREE, and metastring::str.

Referenced by DoubleMetaphone().

◆ dmetaphone()

Datum dmetaphone ( PG_FUNCTION_ARGS  )

Definition at line 131 of file dmetaphone.c.

132 {
133  text *arg;
134  char *aptr,
135  *codes[2],
136  *code;
137 
138 #ifdef DMETAPHONE_NOSTRICT
139  if (PG_ARGISNULL(0))
140  PG_RETURN_NULL();
141 #endif
142  arg = PG_GETARG_TEXT_PP(0);
143  aptr = text_to_cstring(arg);
144 
145  DoubleMetaphone(aptr, codes);
146  code = codes[0];
147  if (!code)
148  code = "";
149 
151 }
static void DoubleMetaphone(char *str, char **codes)
Definition: dmetaphone.c:395
#define PG_GETARG_TEXT_PP(n)
Definition: fmgr.h:309
#define PG_ARGISNULL(n)
Definition: fmgr.h:209
#define PG_RETURN_NULL()
Definition: fmgr.h:345
#define PG_RETURN_TEXT_P(x)
Definition: fmgr.h:372
void * arg
Definition: c.h:674
char * text_to_cstring(const text *t)
Definition: varlena.c:217
text * cstring_to_text(const char *s)
Definition: varlena.c:184

References arg, cstring_to_text(), DoubleMetaphone(), PG_ARGISNULL, PG_GETARG_TEXT_PP, PG_RETURN_NULL, PG_RETURN_TEXT_P, and text_to_cstring().

◆ dmetaphone_alt()

Datum dmetaphone_alt ( PG_FUNCTION_ARGS  )

Definition at line 160 of file dmetaphone.c.

161 {
162  text *arg;
163  char *aptr,
164  *codes[2],
165  *code;
166 
167 #ifdef DMETAPHONE_NOSTRICT
168  if (PG_ARGISNULL(0))
169  PG_RETURN_NULL();
170 #endif
171  arg = PG_GETARG_TEXT_PP(0);
172  aptr = text_to_cstring(arg);
173 
174  DoubleMetaphone(aptr, codes);
175  code = codes[1];
176  if (!code)
177  code = "";
178 
180 }

References arg, cstring_to_text(), DoubleMetaphone(), PG_ARGISNULL, PG_GETARG_TEXT_PP, PG_RETURN_NULL, PG_RETURN_TEXT_P, and text_to_cstring().

◆ DoubleMetaphone()

static void DoubleMetaphone ( char *  str,
char **  codes 
)
static

Definition at line 395 of file dmetaphone.c.

396 {
397  int length;
398  metastring *original;
399  metastring *primary;
400  metastring *secondary;
401  int current;
402  int last;
403 
404  current = 0;
405  /* we need the real length and last prior to padding */
406  length = strlen(str);
407  last = length - 1;
408  original = NewMetaString(str);
409  /* Pad original so we can index beyond end */
410  MetaphAdd(original, " ");
411 
412  primary = NewMetaString("");
413  secondary = NewMetaString("");
414  primary->free_string_on_destroy = 0;
415  secondary->free_string_on_destroy = 0;
416 
417  MakeUpper(original);
418 
419  /* skip these when at start of word */
420  if (StringAt(original, 0, 2, "GN", "KN", "PN", "WR", "PS", ""))
421  current += 1;
422 
423  /* Initial 'X' is pronounced 'Z' e.g. 'Xavier' */
424  if (GetAt(original, 0) == 'X')
425  {
426  MetaphAdd(primary, "S"); /* 'Z' maps to 'S' */
427  MetaphAdd(secondary, "S");
428  current += 1;
429  }
430 
431  /* main loop */
432  while ((primary->length < 4) || (secondary->length < 4))
433  {
434  if (current >= length)
435  break;
436 
437  switch (GetAt(original, current))
438  {
439  case 'A':
440  case 'E':
441  case 'I':
442  case 'O':
443  case 'U':
444  case 'Y':
445  if (current == 0)
446  {
447  /* all init vowels now map to 'A' */
448  MetaphAdd(primary, "A");
449  MetaphAdd(secondary, "A");
450  }
451  current += 1;
452  break;
453 
454  case 'B':
455 
456  /* "-mb", e.g", "dumb", already skipped over... */
457  MetaphAdd(primary, "P");
458  MetaphAdd(secondary, "P");
459 
460  if (GetAt(original, current + 1) == 'B')
461  current += 2;
462  else
463  current += 1;
464  break;
465 
466  case '\xc7': /* C with cedilla */
467  MetaphAdd(primary, "S");
468  MetaphAdd(secondary, "S");
469  current += 1;
470  break;
471 
472  case 'C':
473  /* various germanic */
474  if ((current > 1)
475  && !IsVowel(original, current - 2)
476  && StringAt(original, (current - 1), 3, "ACH", "")
477  && ((GetAt(original, current + 2) != 'I')
478  && ((GetAt(original, current + 2) != 'E')
479  || StringAt(original, (current - 2), 6, "BACHER",
480  "MACHER", ""))))
481  {
482  MetaphAdd(primary, "K");
483  MetaphAdd(secondary, "K");
484  current += 2;
485  break;
486  }
487 
488  /* special case 'caesar' */
489  if ((current == 0)
490  && StringAt(original, current, 6, "CAESAR", ""))
491  {
492  MetaphAdd(primary, "S");
493  MetaphAdd(secondary, "S");
494  current += 2;
495  break;
496  }
497 
498  /* italian 'chianti' */
499  if (StringAt(original, current, 4, "CHIA", ""))
500  {
501  MetaphAdd(primary, "K");
502  MetaphAdd(secondary, "K");
503  current += 2;
504  break;
505  }
506 
507  if (StringAt(original, current, 2, "CH", ""))
508  {
509  /* find 'michael' */
510  if ((current > 0)
511  && StringAt(original, current, 4, "CHAE", ""))
512  {
513  MetaphAdd(primary, "K");
514  MetaphAdd(secondary, "X");
515  current += 2;
516  break;
517  }
518 
519  /* greek roots e.g. 'chemistry', 'chorus' */
520  if ((current == 0)
521  && (StringAt(original, (current + 1), 5,
522  "HARAC", "HARIS", "")
523  || StringAt(original, (current + 1), 3, "HOR",
524  "HYM", "HIA", "HEM", ""))
525  && !StringAt(original, 0, 5, "CHORE", ""))
526  {
527  MetaphAdd(primary, "K");
528  MetaphAdd(secondary, "K");
529  current += 2;
530  break;
531  }
532 
533  /* germanic, greek, or otherwise 'ch' for 'kh' sound */
534  if ((StringAt(original, 0, 4, "VAN ", "VON ", "")
535  || StringAt(original, 0, 3, "SCH", ""))
536  /* 'architect but not 'arch', 'orchestra', 'orchid' */
537  || StringAt(original, (current - 2), 6, "ORCHES",
538  "ARCHIT", "ORCHID", "")
539  || StringAt(original, (current + 2), 1, "T", "S",
540  "")
541  || ((StringAt(original, (current - 1), 1,
542  "A", "O", "U", "E", "")
543  || (current == 0))
544 
545  /*
546  * e.g., 'wachtler', 'wechsler', but not 'tichner'
547  */
548  && StringAt(original, (current + 2), 1, "L", "R",
549  "N", "M", "B", "H", "F", "V", "W",
550  " ", "")))
551  {
552  MetaphAdd(primary, "K");
553  MetaphAdd(secondary, "K");
554  }
555  else
556  {
557  if (current > 0)
558  {
559  if (StringAt(original, 0, 2, "MC", ""))
560  {
561  /* e.g., "McHugh" */
562  MetaphAdd(primary, "K");
563  MetaphAdd(secondary, "K");
564  }
565  else
566  {
567  MetaphAdd(primary, "X");
568  MetaphAdd(secondary, "K");
569  }
570  }
571  else
572  {
573  MetaphAdd(primary, "X");
574  MetaphAdd(secondary, "X");
575  }
576  }
577  current += 2;
578  break;
579  }
580  /* e.g, 'czerny' */
581  if (StringAt(original, current, 2, "CZ", "")
582  && !StringAt(original, (current - 2), 4, "WICZ", ""))
583  {
584  MetaphAdd(primary, "S");
585  MetaphAdd(secondary, "X");
586  current += 2;
587  break;
588  }
589 
590  /* e.g., 'focaccia' */
591  if (StringAt(original, (current + 1), 3, "CIA", ""))
592  {
593  MetaphAdd(primary, "X");
594  MetaphAdd(secondary, "X");
595  current += 3;
596  break;
597  }
598 
599  /* double 'C', but not if e.g. 'McClellan' */
600  if (StringAt(original, current, 2, "CC", "")
601  && !((current == 1) && (GetAt(original, 0) == 'M')))
602  {
603  /* 'bellocchio' but not 'bacchus' */
604  if (StringAt(original, (current + 2), 1, "I", "E", "H", "")
605  && !StringAt(original, (current + 2), 2, "HU", ""))
606  {
607  /* 'accident', 'accede' 'succeed' */
608  if (((current == 1)
609  && (GetAt(original, current - 1) == 'A'))
610  || StringAt(original, (current - 1), 5, "UCCEE",
611  "UCCES", ""))
612  {
613  MetaphAdd(primary, "KS");
614  MetaphAdd(secondary, "KS");
615  /* 'bacci', 'bertucci', other italian */
616  }
617  else
618  {
619  MetaphAdd(primary, "X");
620  MetaphAdd(secondary, "X");
621  }
622  current += 3;
623  break;
624  }
625  else
626  { /* Pierce's rule */
627  MetaphAdd(primary, "K");
628  MetaphAdd(secondary, "K");
629  current += 2;
630  break;
631  }
632  }
633 
634  if (StringAt(original, current, 2, "CK", "CG", "CQ", ""))
635  {
636  MetaphAdd(primary, "K");
637  MetaphAdd(secondary, "K");
638  current += 2;
639  break;
640  }
641 
642  if (StringAt(original, current, 2, "CI", "CE", "CY", ""))
643  {
644  /* italian vs. english */
645  if (StringAt
646  (original, current, 3, "CIO", "CIE", "CIA", ""))
647  {
648  MetaphAdd(primary, "S");
649  MetaphAdd(secondary, "X");
650  }
651  else
652  {
653  MetaphAdd(primary, "S");
654  MetaphAdd(secondary, "S");
655  }
656  current += 2;
657  break;
658  }
659 
660  /* else */
661  MetaphAdd(primary, "K");
662  MetaphAdd(secondary, "K");
663 
664  /* name sent in 'mac caffrey', 'mac gregor */
665  if (StringAt(original, (current + 1), 2, " C", " Q", " G", ""))
666  current += 3;
667  else if (StringAt(original, (current + 1), 1, "C", "K", "Q", "")
668  && !StringAt(original, (current + 1), 2,
669  "CE", "CI", ""))
670  current += 2;
671  else
672  current += 1;
673  break;
674 
675  case 'D':
676  if (StringAt(original, current, 2, "DG", ""))
677  {
678  if (StringAt(original, (current + 2), 1,
679  "I", "E", "Y", ""))
680  {
681  /* e.g. 'edge' */
682  MetaphAdd(primary, "J");
683  MetaphAdd(secondary, "J");
684  current += 3;
685  break;
686  }
687  else
688  {
689  /* e.g. 'edgar' */
690  MetaphAdd(primary, "TK");
691  MetaphAdd(secondary, "TK");
692  current += 2;
693  break;
694  }
695  }
696 
697  if (StringAt(original, current, 2, "DT", "DD", ""))
698  {
699  MetaphAdd(primary, "T");
700  MetaphAdd(secondary, "T");
701  current += 2;
702  break;
703  }
704 
705  /* else */
706  MetaphAdd(primary, "T");
707  MetaphAdd(secondary, "T");
708  current += 1;
709  break;
710 
711  case 'F':
712  if (GetAt(original, current + 1) == 'F')
713  current += 2;
714  else
715  current += 1;
716  MetaphAdd(primary, "F");
717  MetaphAdd(secondary, "F");
718  break;
719 
720  case 'G':
721  if (GetAt(original, current + 1) == 'H')
722  {
723  if ((current > 0) && !IsVowel(original, current - 1))
724  {
725  MetaphAdd(primary, "K");
726  MetaphAdd(secondary, "K");
727  current += 2;
728  break;
729  }
730 
731  if (current < 3)
732  {
733  /* 'ghislane', ghiradelli */
734  if (current == 0)
735  {
736  if (GetAt(original, current + 2) == 'I')
737  {
738  MetaphAdd(primary, "J");
739  MetaphAdd(secondary, "J");
740  }
741  else
742  {
743  MetaphAdd(primary, "K");
744  MetaphAdd(secondary, "K");
745  }
746  current += 2;
747  break;
748  }
749  }
750 
751  /*
752  * Parker's rule (with some further refinements) - e.g.,
753  * 'hugh'
754  */
755  if (((current > 1)
756  && StringAt(original, (current - 2), 1,
757  "B", "H", "D", ""))
758  /* e.g., 'bough' */
759  || ((current > 2)
760  && StringAt(original, (current - 3), 1,
761  "B", "H", "D", ""))
762  /* e.g., 'broughton' */
763  || ((current > 3)
764  && StringAt(original, (current - 4), 1,
765  "B", "H", "")))
766  {
767  current += 2;
768  break;
769  }
770  else
771  {
772  /*
773  * e.g., 'laugh', 'McLaughlin', 'cough', 'gough',
774  * 'rough', 'tough'
775  */
776  if ((current > 2)
777  && (GetAt(original, current - 1) == 'U')
778  && StringAt(original, (current - 3), 1, "C",
779  "G", "L", "R", "T", ""))
780  {
781  MetaphAdd(primary, "F");
782  MetaphAdd(secondary, "F");
783  }
784  else if ((current > 0)
785  && GetAt(original, current - 1) != 'I')
786  {
787 
788 
789  MetaphAdd(primary, "K");
790  MetaphAdd(secondary, "K");
791  }
792 
793  current += 2;
794  break;
795  }
796  }
797 
798  if (GetAt(original, current + 1) == 'N')
799  {
800  if ((current == 1) && IsVowel(original, 0)
801  && !SlavoGermanic(original))
802  {
803  MetaphAdd(primary, "KN");
804  MetaphAdd(secondary, "N");
805  }
806  else
807  /* not e.g. 'cagney' */
808  if (!StringAt(original, (current + 2), 2, "EY", "")
809  && (GetAt(original, current + 1) != 'Y')
810  && !SlavoGermanic(original))
811  {
812  MetaphAdd(primary, "N");
813  MetaphAdd(secondary, "KN");
814  }
815  else
816  {
817  MetaphAdd(primary, "KN");
818  MetaphAdd(secondary, "KN");
819  }
820  current += 2;
821  break;
822  }
823 
824  /* 'tagliaro' */
825  if (StringAt(original, (current + 1), 2, "LI", "")
826  && !SlavoGermanic(original))
827  {
828  MetaphAdd(primary, "KL");
829  MetaphAdd(secondary, "L");
830  current += 2;
831  break;
832  }
833 
834  /* -ges-,-gep-,-gel-, -gie- at beginning */
835  if ((current == 0)
836  && ((GetAt(original, current + 1) == 'Y')
837  || StringAt(original, (current + 1), 2, "ES", "EP",
838  "EB", "EL", "EY", "IB", "IL", "IN", "IE",
839  "EI", "ER", "")))
840  {
841  MetaphAdd(primary, "K");
842  MetaphAdd(secondary, "J");
843  current += 2;
844  break;
845  }
846 
847  /* -ger-, -gy- */
848  if ((StringAt(original, (current + 1), 2, "ER", "")
849  || (GetAt(original, current + 1) == 'Y'))
850  && !StringAt(original, 0, 6,
851  "DANGER", "RANGER", "MANGER", "")
852  && !StringAt(original, (current - 1), 1, "E", "I", "")
853  && !StringAt(original, (current - 1), 3, "RGY", "OGY", ""))
854  {
855  MetaphAdd(primary, "K");
856  MetaphAdd(secondary, "J");
857  current += 2;
858  break;
859  }
860 
861  /* italian e.g, 'biaggi' */
862  if (StringAt(original, (current + 1), 1, "E", "I", "Y", "")
863  || StringAt(original, (current - 1), 4,
864  "AGGI", "OGGI", ""))
865  {
866  /* obvious germanic */
867  if ((StringAt(original, 0, 4, "VAN ", "VON ", "")
868  || StringAt(original, 0, 3, "SCH", ""))
869  || StringAt(original, (current + 1), 2, "ET", ""))
870  {
871  MetaphAdd(primary, "K");
872  MetaphAdd(secondary, "K");
873  }
874  else
875  {
876  /* always soft if french ending */
877  if (StringAt
878  (original, (current + 1), 4, "IER ", ""))
879  {
880  MetaphAdd(primary, "J");
881  MetaphAdd(secondary, "J");
882  }
883  else
884  {
885  MetaphAdd(primary, "J");
886  MetaphAdd(secondary, "K");
887  }
888  }
889  current += 2;
890  break;
891  }
892 
893  if (GetAt(original, current + 1) == 'G')
894  current += 2;
895  else
896  current += 1;
897  MetaphAdd(primary, "K");
898  MetaphAdd(secondary, "K");
899  break;
900 
901  case 'H':
902  /* only keep if first & before vowel or btw. 2 vowels */
903  if (((current == 0) || IsVowel(original, current - 1))
904  && IsVowel(original, current + 1))
905  {
906  MetaphAdd(primary, "H");
907  MetaphAdd(secondary, "H");
908  current += 2;
909  }
910  else
911  /* also takes care of 'HH' */
912  current += 1;
913  break;
914 
915  case 'J':
916  /* obvious spanish, 'jose', 'san jacinto' */
917  if (StringAt(original, current, 4, "JOSE", "")
918  || StringAt(original, 0, 4, "SAN ", ""))
919  {
920  if (((current == 0)
921  && (GetAt(original, current + 4) == ' '))
922  || StringAt(original, 0, 4, "SAN ", ""))
923  {
924  MetaphAdd(primary, "H");
925  MetaphAdd(secondary, "H");
926  }
927  else
928  {
929  MetaphAdd(primary, "J");
930  MetaphAdd(secondary, "H");
931  }
932  current += 1;
933  break;
934  }
935 
936  if ((current == 0)
937  && !StringAt(original, current, 4, "JOSE", ""))
938  {
939  MetaphAdd(primary, "J"); /* Yankelovich/Jankelowicz */
940  MetaphAdd(secondary, "A");
941  }
942  else
943  {
944  /* spanish pron. of e.g. 'bajador' */
945  if (IsVowel(original, current - 1)
946  && !SlavoGermanic(original)
947  && ((GetAt(original, current + 1) == 'A')
948  || (GetAt(original, current + 1) == 'O')))
949  {
950  MetaphAdd(primary, "J");
951  MetaphAdd(secondary, "H");
952  }
953  else
954  {
955  if (current == last)
956  {
957  MetaphAdd(primary, "J");
958  MetaphAdd(secondary, "");
959  }
960  else
961  {
962  if (!StringAt(original, (current + 1), 1, "L", "T",
963  "K", "S", "N", "M", "B", "Z", "")
964  && !StringAt(original, (current - 1), 1,
965  "S", "K", "L", ""))
966  {
967  MetaphAdd(primary, "J");
968  MetaphAdd(secondary, "J");
969  }
970  }
971  }
972  }
973 
974  if (GetAt(original, current + 1) == 'J') /* it could happen! */
975  current += 2;
976  else
977  current += 1;
978  break;
979 
980  case 'K':
981  if (GetAt(original, current + 1) == 'K')
982  current += 2;
983  else
984  current += 1;
985  MetaphAdd(primary, "K");
986  MetaphAdd(secondary, "K");
987  break;
988 
989  case 'L':
990  if (GetAt(original, current + 1) == 'L')
991  {
992  /* spanish e.g. 'cabrillo', 'gallegos' */
993  if (((current == (length - 3))
994  && StringAt(original, (current - 1), 4, "ILLO",
995  "ILLA", "ALLE", ""))
996  || ((StringAt(original, (last - 1), 2, "AS", "OS", "")
997  || StringAt(original, last, 1, "A", "O", ""))
998  && StringAt(original, (current - 1), 4,
999  "ALLE", "")))
1000  {
1001  MetaphAdd(primary, "L");
1002  MetaphAdd(secondary, "");
1003  current += 2;
1004  break;
1005  }
1006  current += 2;
1007  }
1008  else
1009  current += 1;
1010  MetaphAdd(primary, "L");
1011  MetaphAdd(secondary, "L");
1012  break;
1013 
1014  case 'M':
1015  if ((StringAt(original, (current - 1), 3, "UMB", "")
1016  && (((current + 1) == last)
1017  || StringAt(original, (current + 2), 2, "ER", "")))
1018  /* 'dumb','thumb' */
1019  || (GetAt(original, current + 1) == 'M'))
1020  current += 2;
1021  else
1022  current += 1;
1023  MetaphAdd(primary, "M");
1024  MetaphAdd(secondary, "M");
1025  break;
1026 
1027  case 'N':
1028  if (GetAt(original, current + 1) == 'N')
1029  current += 2;
1030  else
1031  current += 1;
1032  MetaphAdd(primary, "N");
1033  MetaphAdd(secondary, "N");
1034  break;
1035 
1036  case '\xd1': /* N with tilde */
1037  current += 1;
1038  MetaphAdd(primary, "N");
1039  MetaphAdd(secondary, "N");
1040  break;
1041 
1042  case 'P':
1043  if (GetAt(original, current + 1) == 'H')
1044  {
1045  MetaphAdd(primary, "F");
1046  MetaphAdd(secondary, "F");
1047  current += 2;
1048  break;
1049  }
1050 
1051  /* also account for "campbell", "raspberry" */
1052  if (StringAt(original, (current + 1), 1, "P", "B", ""))
1053  current += 2;
1054  else
1055  current += 1;
1056  MetaphAdd(primary, "P");
1057  MetaphAdd(secondary, "P");
1058  break;
1059 
1060  case 'Q':
1061  if (GetAt(original, current + 1) == 'Q')
1062  current += 2;
1063  else
1064  current += 1;
1065  MetaphAdd(primary, "K");
1066  MetaphAdd(secondary, "K");
1067  break;
1068 
1069  case 'R':
1070  /* french e.g. 'rogier', but exclude 'hochmeier' */
1071  if ((current == last)
1072  && !SlavoGermanic(original)
1073  && StringAt(original, (current - 2), 2, "IE", "")
1074  && !StringAt(original, (current - 4), 2, "ME", "MA", ""))
1075  {
1076  MetaphAdd(primary, "");
1077  MetaphAdd(secondary, "R");
1078  }
1079  else
1080  {
1081  MetaphAdd(primary, "R");
1082  MetaphAdd(secondary, "R");
1083  }
1084 
1085  if (GetAt(original, current + 1) == 'R')
1086  current += 2;
1087  else
1088  current += 1;
1089  break;
1090 
1091  case 'S':
1092  /* special cases 'island', 'isle', 'carlisle', 'carlysle' */
1093  if (StringAt(original, (current - 1), 3, "ISL", "YSL", ""))
1094  {
1095  current += 1;
1096  break;
1097  }
1098 
1099  /* special case 'sugar-' */
1100  if ((current == 0)
1101  && StringAt(original, current, 5, "SUGAR", ""))
1102  {
1103  MetaphAdd(primary, "X");
1104  MetaphAdd(secondary, "S");
1105  current += 1;
1106  break;
1107  }
1108 
1109  if (StringAt(original, current, 2, "SH", ""))
1110  {
1111  /* germanic */
1112  if (StringAt
1113  (original, (current + 1), 4, "HEIM", "HOEK", "HOLM",
1114  "HOLZ", ""))
1115  {
1116  MetaphAdd(primary, "S");
1117  MetaphAdd(secondary, "S");
1118  }
1119  else
1120  {
1121  MetaphAdd(primary, "X");
1122  MetaphAdd(secondary, "X");
1123  }
1124  current += 2;
1125  break;
1126  }
1127 
1128  /* italian & armenian */
1129  if (StringAt(original, current, 3, "SIO", "SIA", "")
1130  || StringAt(original, current, 4, "SIAN", ""))
1131  {
1132  if (!SlavoGermanic(original))
1133  {
1134  MetaphAdd(primary, "S");
1135  MetaphAdd(secondary, "X");
1136  }
1137  else
1138  {
1139  MetaphAdd(primary, "S");
1140  MetaphAdd(secondary, "S");
1141  }
1142  current += 3;
1143  break;
1144  }
1145 
1146  /*
1147  * german & anglicisations, e.g. 'smith' match 'schmidt',
1148  * 'snider' match 'schneider' also, -sz- in slavic language
1149  * although in hungarian it is pronounced 's'
1150  */
1151  if (((current == 0)
1152  && StringAt(original, (current + 1), 1,
1153  "M", "N", "L", "W", ""))
1154  || StringAt(original, (current + 1), 1, "Z", ""))
1155  {
1156  MetaphAdd(primary, "S");
1157  MetaphAdd(secondary, "X");
1158  if (StringAt(original, (current + 1), 1, "Z", ""))
1159  current += 2;
1160  else
1161  current += 1;
1162  break;
1163  }
1164 
1165  if (StringAt(original, current, 2, "SC", ""))
1166  {
1167  /* Schlesinger's rule */
1168  if (GetAt(original, current + 2) == 'H')
1169  {
1170  /* dutch origin, e.g. 'school', 'schooner' */
1171  if (StringAt(original, (current + 3), 2,
1172  "OO", "ER", "EN",
1173  "UY", "ED", "EM", ""))
1174  {
1175  /* 'schermerhorn', 'schenker' */
1176  if (StringAt(original, (current + 3), 2,
1177  "ER", "EN", ""))
1178  {
1179  MetaphAdd(primary, "X");
1180  MetaphAdd(secondary, "SK");
1181  }
1182  else
1183  {
1184  MetaphAdd(primary, "SK");
1185  MetaphAdd(secondary, "SK");
1186  }
1187  current += 3;
1188  break;
1189  }
1190  else
1191  {
1192  if ((current == 0) && !IsVowel(original, 3)
1193  && (GetAt(original, 3) != 'W'))
1194  {
1195  MetaphAdd(primary, "X");
1196  MetaphAdd(secondary, "S");
1197  }
1198  else
1199  {
1200  MetaphAdd(primary, "X");
1201  MetaphAdd(secondary, "X");
1202  }
1203  current += 3;
1204  break;
1205  }
1206  }
1207 
1208  if (StringAt(original, (current + 2), 1,
1209  "I", "E", "Y", ""))
1210  {
1211  MetaphAdd(primary, "S");
1212  MetaphAdd(secondary, "S");
1213  current += 3;
1214  break;
1215  }
1216  /* else */
1217  MetaphAdd(primary, "SK");
1218  MetaphAdd(secondary, "SK");
1219  current += 3;
1220  break;
1221  }
1222 
1223  /* french e.g. 'resnais', 'artois' */
1224  if ((current == last)
1225  && StringAt(original, (current - 2), 2, "AI", "OI", ""))
1226  {
1227  MetaphAdd(primary, "");
1228  MetaphAdd(secondary, "S");
1229  }
1230  else
1231  {
1232  MetaphAdd(primary, "S");
1233  MetaphAdd(secondary, "S");
1234  }
1235 
1236  if (StringAt(original, (current + 1), 1, "S", "Z", ""))
1237  current += 2;
1238  else
1239  current += 1;
1240  break;
1241 
1242  case 'T':
1243  if (StringAt(original, current, 4, "TION", ""))
1244  {
1245  MetaphAdd(primary, "X");
1246  MetaphAdd(secondary, "X");
1247  current += 3;
1248  break;
1249  }
1250 
1251  if (StringAt(original, current, 3, "TIA", "TCH", ""))
1252  {
1253  MetaphAdd(primary, "X");
1254  MetaphAdd(secondary, "X");
1255  current += 3;
1256  break;
1257  }
1258 
1259  if (StringAt(original, current, 2, "TH", "")
1260  || StringAt(original, current, 3, "TTH", ""))
1261  {
1262  /* special case 'thomas', 'thames' or germanic */
1263  if (StringAt(original, (current + 2), 2, "OM", "AM", "")
1264  || StringAt(original, 0, 4, "VAN ", "VON ", "")
1265  || StringAt(original, 0, 3, "SCH", ""))
1266  {
1267  MetaphAdd(primary, "T");
1268  MetaphAdd(secondary, "T");
1269  }
1270  else
1271  {
1272  MetaphAdd(primary, "0");
1273  MetaphAdd(secondary, "T");
1274  }
1275  current += 2;
1276  break;
1277  }
1278 
1279  if (StringAt(original, (current + 1), 1, "T", "D", ""))
1280  current += 2;
1281  else
1282  current += 1;
1283  MetaphAdd(primary, "T");
1284  MetaphAdd(secondary, "T");
1285  break;
1286 
1287  case 'V':
1288  if (GetAt(original, current + 1) == 'V')
1289  current += 2;
1290  else
1291  current += 1;
1292  MetaphAdd(primary, "F");
1293  MetaphAdd(secondary, "F");
1294  break;
1295 
1296  case 'W':
1297  /* can also be in middle of word */
1298  if (StringAt(original, current, 2, "WR", ""))
1299  {
1300  MetaphAdd(primary, "R");
1301  MetaphAdd(secondary, "R");
1302  current += 2;
1303  break;
1304  }
1305 
1306  if ((current == 0)
1307  && (IsVowel(original, current + 1)
1308  || StringAt(original, current, 2, "WH", "")))
1309  {
1310  /* Wasserman should match Vasserman */
1311  if (IsVowel(original, current + 1))
1312  {
1313  MetaphAdd(primary, "A");
1314  MetaphAdd(secondary, "F");
1315  }
1316  else
1317  {
1318  /* need Uomo to match Womo */
1319  MetaphAdd(primary, "A");
1320  MetaphAdd(secondary, "A");
1321  }
1322  }
1323 
1324  /* Arnow should match Arnoff */
1325  if (((current == last) && IsVowel(original, current - 1))
1326  || StringAt(original, (current - 1), 5, "EWSKI", "EWSKY",
1327  "OWSKI", "OWSKY", "")
1328  || StringAt(original, 0, 3, "SCH", ""))
1329  {
1330  MetaphAdd(primary, "");
1331  MetaphAdd(secondary, "F");
1332  current += 1;
1333  break;
1334  }
1335 
1336  /* polish e.g. 'filipowicz' */
1337  if (StringAt(original, current, 4, "WICZ", "WITZ", ""))
1338  {
1339  MetaphAdd(primary, "TS");
1340  MetaphAdd(secondary, "FX");
1341  current += 4;
1342  break;
1343  }
1344 
1345  /* else skip it */
1346  current += 1;
1347  break;
1348 
1349  case 'X':
1350  /* french e.g. breaux */
1351  if (!((current == last)
1352  && (StringAt(original, (current - 3), 3,
1353  "IAU", "EAU", "")
1354  || StringAt(original, (current - 2), 2,
1355  "AU", "OU", ""))))
1356  {
1357  MetaphAdd(primary, "KS");
1358  MetaphAdd(secondary, "KS");
1359  }
1360 
1361 
1362  if (StringAt(original, (current + 1), 1, "C", "X", ""))
1363  current += 2;
1364  else
1365  current += 1;
1366  break;
1367 
1368  case 'Z':
1369  /* chinese pinyin e.g. 'zhao' */
1370  if (GetAt(original, current + 1) == 'H')
1371  {
1372  MetaphAdd(primary, "J");
1373  MetaphAdd(secondary, "J");
1374  current += 2;
1375  break;
1376  }
1377  else if (StringAt(original, (current + 1), 2,
1378  "ZO", "ZI", "ZA", "")
1379  || (SlavoGermanic(original)
1380  && ((current > 0)
1381  && GetAt(original, current - 1) != 'T')))
1382  {
1383  MetaphAdd(primary, "S");
1384  MetaphAdd(secondary, "TS");
1385  }
1386  else
1387  {
1388  MetaphAdd(primary, "S");
1389  MetaphAdd(secondary, "S");
1390  }
1391 
1392  if (GetAt(original, current + 1) == 'Z')
1393  current += 2;
1394  else
1395  current += 1;
1396  break;
1397 
1398  default:
1399  current += 1;
1400  }
1401 
1402  /*
1403  * printf("PRIMARY: %s\n", primary->str); printf("SECONDARY: %s\n",
1404  * secondary->str);
1405  */
1406  }
1407 
1408 
1409  if (primary->length > 4)
1410  SetAt(primary, 4, '\0');
1411 
1412  if (secondary->length > 4)
1413  SetAt(secondary, 4, '\0');
1414 
1415  *codes = primary->str;
1416  *++codes = secondary->str;
1417 
1418  DestroyMetaString(original);
1419  DestroyMetaString(primary);
1420  DestroyMetaString(secondary);
1421 }
static char GetAt(metastring *s, int pos)
Definition: dmetaphone.c:325
static void SetAt(metastring *s, int pos, char c)
Definition: dmetaphone.c:335
static void MetaphAdd(metastring *s, const char *new_str)
Definition: dmetaphone.c:378
static int SlavoGermanic(metastring *s)
Definition: dmetaphone.c:309
static void MakeUpper(metastring *s)
Definition: dmetaphone.c:282
static int StringAt(metastring *s, int start, int length,...)
Definition: dmetaphone.c:348
static int IsVowel(metastring *s, int pos)
Definition: dmetaphone.c:292
static metastring * NewMetaString(const char *init_str)
Definition: dmetaphone.c:235
static void DestroyMetaString(metastring *s)
Definition: dmetaphone.c:260

References DestroyMetaString(), metastring::free_string_on_destroy, GetAt(), IsVowel(), metastring::length, MakeUpper(), MetaphAdd(), NewMetaString(), SetAt(), SlavoGermanic(), metastring::str, generate_unaccent_rules::str, and StringAt().

Referenced by dmetaphone(), and dmetaphone_alt().

◆ GetAt()

static char GetAt ( metastring s,
int  pos 
)
static

Definition at line 325 of file dmetaphone.c.

326 {
327  if ((pos < 0) || (pos >= s->length))
328  return '\0';
329 
330  return ((char) *(s->str + pos));
331 }

References metastring::length, and metastring::str.

Referenced by DoubleMetaphone().

◆ IncreaseBuffer()

static void IncreaseBuffer ( metastring s,
int  chars_needed 
)
static

Definition at line 273 of file dmetaphone.c.

274 {
275  META_REALLOC(s->str, (s->bufsize + chars_needed + 10), char);
276  assert(s->str != NULL);
277  s->bufsize = s->bufsize + chars_needed + 10;
278 }
#define META_REALLOC(v, n, t)
Definition: dmetaphone.c:190
#define assert(x)
Definition: regcustom.h:56

References assert, metastring::bufsize, META_REALLOC, and metastring::str.

Referenced by MetaphAdd().

◆ IsVowel()

static int IsVowel ( metastring s,
int  pos 
)
static

Definition at line 292 of file dmetaphone.c.

293 {
294  char c;
295 
296  if ((pos < 0) || (pos >= s->length))
297  return 0;
298 
299  c = *(s->str + pos);
300  if ((c == 'A') || (c == 'E') || (c == 'I') || (c == 'O') ||
301  (c == 'U') || (c == 'Y'))
302  return 1;
303 
304  return 0;
305 }
char * c

References metastring::length, and metastring::str.

Referenced by DoubleMetaphone().

◆ MakeUpper()

static void MakeUpper ( metastring s)
static

Definition at line 282 of file dmetaphone.c.

283 {
284  char *i;
285 
286  for (i = s->str; *i; i++)
287  *i = toupper((unsigned char) *i);
288 }
int i
Definition: isn.c:73

References i, and metastring::str.

Referenced by DoubleMetaphone().

◆ MetaphAdd()

static void MetaphAdd ( metastring s,
const char *  new_str 
)
static

Definition at line 378 of file dmetaphone.c.

379 {
380  int add_length;
381 
382  if (new_str == NULL)
383  return;
384 
385  add_length = strlen(new_str);
386  if ((s->length + add_length) > (s->bufsize - 1))
387  IncreaseBuffer(s, add_length);
388 
389  strcat(s->str, new_str);
390  s->length += add_length;
391 }
static void IncreaseBuffer(metastring *s, int chars_needed)
Definition: dmetaphone.c:273

References metastring::bufsize, IncreaseBuffer(), metastring::length, and metastring::str.

Referenced by DoubleMetaphone().

◆ NewMetaString()

static metastring* NewMetaString ( const char *  init_str)
static

Definition at line 235 of file dmetaphone.c.

236 {
237  metastring *s;
238  char empty_string[] = "";
239 
240  META_MALLOC(s, 1, metastring);
241  assert(s != NULL);
242 
243  if (init_str == NULL)
244  init_str = empty_string;
245  s->length = strlen(init_str);
246  /* preallocate a bit more for potential growth */
247  s->bufsize = s->length + 7;
248 
249  META_MALLOC(s->str, s->bufsize, char);
250  assert(s->str != NULL);
251 
252  memcpy(s->str, init_str, s->length + 1);
253  s->free_string_on_destroy = 1;
254 
255  return s;
256 }
#define META_MALLOC(v, n, t)
Definition: dmetaphone.c:187

References assert, metastring::bufsize, metastring::free_string_on_destroy, metastring::length, META_MALLOC, and metastring::str.

Referenced by DoubleMetaphone().

◆ PG_FUNCTION_INFO_V1() [1/2]

PG_FUNCTION_INFO_V1 ( dmetaphone  )

◆ PG_FUNCTION_INFO_V1() [2/2]

PG_FUNCTION_INFO_V1 ( dmetaphone_alt  )

◆ SetAt()

static void SetAt ( metastring s,
int  pos,
char  c 
)
static

Definition at line 335 of file dmetaphone.c.

336 {
337  if ((pos < 0) || (pos >= s->length))
338  return;
339 
340  *(s->str + pos) = c;
341 }

References metastring::length, and metastring::str.

Referenced by DoubleMetaphone().

◆ SlavoGermanic()

static int SlavoGermanic ( metastring s)
static

Definition at line 309 of file dmetaphone.c.

310 {
311  if ((char *) strstr(s->str, "W"))
312  return 1;
313  else if ((char *) strstr(s->str, "K"))
314  return 1;
315  else if ((char *) strstr(s->str, "CZ"))
316  return 1;
317  else if ((char *) strstr(s->str, "WITZ"))
318  return 1;
319  else
320  return 0;
321 }

References metastring::str.

Referenced by DoubleMetaphone().

◆ StringAt()

static int StringAt ( metastring s,
int  start,
int  length,
  ... 
)
static

Definition at line 348 of file dmetaphone.c.

349 {
350  char *test;
351  char *pos;
352  va_list ap;
353 
354  if ((start < 0) || (start >= s->length))
355  return 0;
356 
357  pos = (s->str + start);
358  va_start(ap, length);
359 
360  do
361  {
362  test = va_arg(ap, char *);
363  if (*test && (strncmp(pos, test, length) == 0))
364  {
365  va_end(ap);
366  return 1;
367  }
368  }
369  while (strcmp(test, "") != 0);
370 
371  va_end(ap);
372 
373  return 0;
374 }
va_end(args)
va_start(args, fmt)
static void test(void)

References metastring::length, metastring::str, test(), va_end(), and va_start().

Referenced by DoubleMetaphone().