PostgreSQL Source Code git master
dmetaphone.c File Reference
#include "postgres.h"
#include "utils/builtins.h"
#include <assert.h>
#include <ctype.h>
Include dependency graph for dmetaphone.c:

Go to the source code of this file.

Data Structures

struct  metastring
 

Macros

#define NDEBUG
 
#define META_MALLOC(v, n, t)    (v = (t*)palloc(((n)*sizeof(t))))
 
#define META_REALLOC(v, n, t)    (v = (t*)repalloc((v),((n)*sizeof(t))))
 
#define META_FREE(x)   ((void)true) /* pfree((x)) */
 

Functions

static void DoubleMetaphone (char *str, char **codes)
 
 PG_FUNCTION_INFO_V1 (dmetaphone)
 
Datum dmetaphone (PG_FUNCTION_ARGS)
 
 PG_FUNCTION_INFO_V1 (dmetaphone_alt)
 
Datum dmetaphone_alt (PG_FUNCTION_ARGS)
 
static metastringNewMetaString (const char *init_str)
 
static void DestroyMetaString (metastring *s)
 
static void IncreaseBuffer (metastring *s, int chars_needed)
 
static void MakeUpper (metastring *s)
 
static int IsVowel (metastring *s, int pos)
 
static int SlavoGermanic (metastring *s)
 
static char GetAt (metastring *s, int pos)
 
static void SetAt (metastring *s, int pos, char c)
 
static int StringAt (metastring *s, int start, int length,...)
 
static void MetaphAdd (metastring *s, const char *new_str)
 

Macro Definition Documentation

◆ META_FREE

#define META_FREE (   x)    ((void)true) /* pfree((x)) */

Definition at line 200 of file dmetaphone.c.

◆ META_MALLOC

#define META_MALLOC (   v,
  n,
 
)     (v = (t*)palloc(((n)*sizeof(t))))

Definition at line 187 of file dmetaphone.c.

◆ META_REALLOC

#define META_REALLOC (   v,
  n,
 
)     (v = (t*)repalloc((v),((n)*sizeof(t))))

Definition at line 190 of file dmetaphone.c.

◆ NDEBUG

#define NDEBUG

Definition at line 104 of file dmetaphone.c.

Function Documentation

◆ DestroyMetaString()

static void DestroyMetaString ( metastring s)
static

Definition at line 260 of file dmetaphone.c.

261{
262 if (s == NULL)
263 return;
264
265 if (s->free_string_on_destroy && (s->str != NULL))
266 META_FREE(s->str);
267
268 META_FREE(s);
269}
#define META_FREE(x)
Definition: dmetaphone.c:200
char * str
Definition: dmetaphone.c:220
int free_string_on_destroy
Definition: dmetaphone.c:223

References metastring::free_string_on_destroy, META_FREE, and metastring::str.

Referenced by DoubleMetaphone().

◆ dmetaphone()

Datum dmetaphone ( PG_FUNCTION_ARGS  )

Definition at line 131 of file dmetaphone.c.

132{
133 text *arg;
134 char *aptr,
135 *codes[2],
136 *code;
137
138#ifdef DMETAPHONE_NOSTRICT
139 if (PG_ARGISNULL(0))
141#endif
143 aptr = text_to_cstring(arg);
144
145 DoubleMetaphone(aptr, codes);
146 code = codes[0];
147 if (!code)
148 code = "";
149
151}
static void DoubleMetaphone(char *str, char **codes)
Definition: dmetaphone.c:395
#define PG_GETARG_TEXT_PP(n)
Definition: fmgr.h:309
#define PG_ARGISNULL(n)
Definition: fmgr.h:209
#define PG_RETURN_NULL()
Definition: fmgr.h:345
#define PG_RETURN_TEXT_P(x)
Definition: fmgr.h:372
void * arg
Definition: c.h:658
text * cstring_to_text(const char *s)
Definition: varlena.c:192
char * text_to_cstring(const text *t)
Definition: varlena.c:225

References arg, cstring_to_text(), DoubleMetaphone(), PG_ARGISNULL, PG_GETARG_TEXT_PP, PG_RETURN_NULL, PG_RETURN_TEXT_P, and text_to_cstring().

◆ dmetaphone_alt()

Datum dmetaphone_alt ( PG_FUNCTION_ARGS  )

Definition at line 160 of file dmetaphone.c.

161{
162 text *arg;
163 char *aptr,
164 *codes[2],
165 *code;
166
167#ifdef DMETAPHONE_NOSTRICT
168 if (PG_ARGISNULL(0))
170#endif
172 aptr = text_to_cstring(arg);
173
174 DoubleMetaphone(aptr, codes);
175 code = codes[1];
176 if (!code)
177 code = "";
178
180}

References arg, cstring_to_text(), DoubleMetaphone(), PG_ARGISNULL, PG_GETARG_TEXT_PP, PG_RETURN_NULL, PG_RETURN_TEXT_P, and text_to_cstring().

◆ DoubleMetaphone()

static void DoubleMetaphone ( char *  str,
char **  codes 
)
static

Definition at line 395 of file dmetaphone.c.

396{
397 int length;
398 metastring *original;
399 metastring *primary;
400 metastring *secondary;
401 int current;
402 int last;
403
404 current = 0;
405 /* we need the real length and last prior to padding */
406 length = strlen(str);
407 last = length - 1;
408 original = NewMetaString(str);
409 /* Pad original so we can index beyond end */
410 MetaphAdd(original, " ");
411
412 primary = NewMetaString("");
413 secondary = NewMetaString("");
414 primary->free_string_on_destroy = 0;
415 secondary->free_string_on_destroy = 0;
416
417 MakeUpper(original);
418
419 /* skip these when at start of word */
420 if (StringAt(original, 0, 2, "GN", "KN", "PN", "WR", "PS", ""))
421 current += 1;
422
423 /* Initial 'X' is pronounced 'Z' e.g. 'Xavier' */
424 if (GetAt(original, 0) == 'X')
425 {
426 MetaphAdd(primary, "S"); /* 'Z' maps to 'S' */
427 MetaphAdd(secondary, "S");
428 current += 1;
429 }
430
431 /* main loop */
432 while ((primary->length < 4) || (secondary->length < 4))
433 {
434 if (current >= length)
435 break;
436
437 switch (GetAt(original, current))
438 {
439 case 'A':
440 case 'E':
441 case 'I':
442 case 'O':
443 case 'U':
444 case 'Y':
445 if (current == 0)
446 {
447 /* all init vowels now map to 'A' */
448 MetaphAdd(primary, "A");
449 MetaphAdd(secondary, "A");
450 }
451 current += 1;
452 break;
453
454 case 'B':
455
456 /* "-mb", e.g", "dumb", already skipped over... */
457 MetaphAdd(primary, "P");
458 MetaphAdd(secondary, "P");
459
460 if (GetAt(original, current + 1) == 'B')
461 current += 2;
462 else
463 current += 1;
464 break;
465
466 case '\xc7': /* C with cedilla */
467 MetaphAdd(primary, "S");
468 MetaphAdd(secondary, "S");
469 current += 1;
470 break;
471
472 case 'C':
473 /* various germanic */
474 if ((current > 1)
475 && !IsVowel(original, current - 2)
476 && StringAt(original, (current - 1), 3, "ACH", "")
477 && ((GetAt(original, current + 2) != 'I')
478 && ((GetAt(original, current + 2) != 'E')
479 || StringAt(original, (current - 2), 6, "BACHER",
480 "MACHER", ""))))
481 {
482 MetaphAdd(primary, "K");
483 MetaphAdd(secondary, "K");
484 current += 2;
485 break;
486 }
487
488 /* special case 'caesar' */
489 if ((current == 0)
490 && StringAt(original, current, 6, "CAESAR", ""))
491 {
492 MetaphAdd(primary, "S");
493 MetaphAdd(secondary, "S");
494 current += 2;
495 break;
496 }
497
498 /* italian 'chianti' */
499 if (StringAt(original, current, 4, "CHIA", ""))
500 {
501 MetaphAdd(primary, "K");
502 MetaphAdd(secondary, "K");
503 current += 2;
504 break;
505 }
506
507 if (StringAt(original, current, 2, "CH", ""))
508 {
509 /* find 'michael' */
510 if ((current > 0)
511 && StringAt(original, current, 4, "CHAE", ""))
512 {
513 MetaphAdd(primary, "K");
514 MetaphAdd(secondary, "X");
515 current += 2;
516 break;
517 }
518
519 /* greek roots e.g. 'chemistry', 'chorus' */
520 if ((current == 0)
521 && (StringAt(original, (current + 1), 5,
522 "HARAC", "HARIS", "")
523 || StringAt(original, (current + 1), 3, "HOR",
524 "HYM", "HIA", "HEM", ""))
525 && !StringAt(original, 0, 5, "CHORE", ""))
526 {
527 MetaphAdd(primary, "K");
528 MetaphAdd(secondary, "K");
529 current += 2;
530 break;
531 }
532
533 /* germanic, greek, or otherwise 'ch' for 'kh' sound */
534 if ((StringAt(original, 0, 4, "VAN ", "VON ", "")
535 || StringAt(original, 0, 3, "SCH", ""))
536 /* 'architect but not 'arch', 'orchestra', 'orchid' */
537 || StringAt(original, (current - 2), 6, "ORCHES",
538 "ARCHIT", "ORCHID", "")
539 || StringAt(original, (current + 2), 1, "T", "S",
540 "")
541 || ((StringAt(original, (current - 1), 1,
542 "A", "O", "U", "E", "")
543 || (current == 0))
544
545 /*
546 * e.g., 'wachtler', 'wechsler', but not 'tichner'
547 */
548 && StringAt(original, (current + 2), 1, "L", "R",
549 "N", "M", "B", "H", "F", "V", "W",
550 " ", "")))
551 {
552 MetaphAdd(primary, "K");
553 MetaphAdd(secondary, "K");
554 }
555 else
556 {
557 if (current > 0)
558 {
559 if (StringAt(original, 0, 2, "MC", ""))
560 {
561 /* e.g., "McHugh" */
562 MetaphAdd(primary, "K");
563 MetaphAdd(secondary, "K");
564 }
565 else
566 {
567 MetaphAdd(primary, "X");
568 MetaphAdd(secondary, "K");
569 }
570 }
571 else
572 {
573 MetaphAdd(primary, "X");
574 MetaphAdd(secondary, "X");
575 }
576 }
577 current += 2;
578 break;
579 }
580 /* e.g, 'czerny' */
581 if (StringAt(original, current, 2, "CZ", "")
582 && !StringAt(original, (current - 2), 4, "WICZ", ""))
583 {
584 MetaphAdd(primary, "S");
585 MetaphAdd(secondary, "X");
586 current += 2;
587 break;
588 }
589
590 /* e.g., 'focaccia' */
591 if (StringAt(original, (current + 1), 3, "CIA", ""))
592 {
593 MetaphAdd(primary, "X");
594 MetaphAdd(secondary, "X");
595 current += 3;
596 break;
597 }
598
599 /* double 'C', but not if e.g. 'McClellan' */
600 if (StringAt(original, current, 2, "CC", "")
601 && !((current == 1) && (GetAt(original, 0) == 'M')))
602 {
603 /* 'bellocchio' but not 'bacchus' */
604 if (StringAt(original, (current + 2), 1, "I", "E", "H", "")
605 && !StringAt(original, (current + 2), 2, "HU", ""))
606 {
607 /* 'accident', 'accede' 'succeed' */
608 if (((current == 1)
609 && (GetAt(original, current - 1) == 'A'))
610 || StringAt(original, (current - 1), 5, "UCCEE",
611 "UCCES", ""))
612 {
613 MetaphAdd(primary, "KS");
614 MetaphAdd(secondary, "KS");
615 /* 'bacci', 'bertucci', other italian */
616 }
617 else
618 {
619 MetaphAdd(primary, "X");
620 MetaphAdd(secondary, "X");
621 }
622 current += 3;
623 break;
624 }
625 else
626 { /* Pierce's rule */
627 MetaphAdd(primary, "K");
628 MetaphAdd(secondary, "K");
629 current += 2;
630 break;
631 }
632 }
633
634 if (StringAt(original, current, 2, "CK", "CG", "CQ", ""))
635 {
636 MetaphAdd(primary, "K");
637 MetaphAdd(secondary, "K");
638 current += 2;
639 break;
640 }
641
642 if (StringAt(original, current, 2, "CI", "CE", "CY", ""))
643 {
644 /* italian vs. english */
645 if (StringAt
646 (original, current, 3, "CIO", "CIE", "CIA", ""))
647 {
648 MetaphAdd(primary, "S");
649 MetaphAdd(secondary, "X");
650 }
651 else
652 {
653 MetaphAdd(primary, "S");
654 MetaphAdd(secondary, "S");
655 }
656 current += 2;
657 break;
658 }
659
660 /* else */
661 MetaphAdd(primary, "K");
662 MetaphAdd(secondary, "K");
663
664 /* name sent in 'mac caffrey', 'mac gregor */
665 if (StringAt(original, (current + 1), 2, " C", " Q", " G", ""))
666 current += 3;
667 else if (StringAt(original, (current + 1), 1, "C", "K", "Q", "")
668 && !StringAt(original, (current + 1), 2,
669 "CE", "CI", ""))
670 current += 2;
671 else
672 current += 1;
673 break;
674
675 case 'D':
676 if (StringAt(original, current, 2, "DG", ""))
677 {
678 if (StringAt(original, (current + 2), 1,
679 "I", "E", "Y", ""))
680 {
681 /* e.g. 'edge' */
682 MetaphAdd(primary, "J");
683 MetaphAdd(secondary, "J");
684 current += 3;
685 break;
686 }
687 else
688 {
689 /* e.g. 'edgar' */
690 MetaphAdd(primary, "TK");
691 MetaphAdd(secondary, "TK");
692 current += 2;
693 break;
694 }
695 }
696
697 if (StringAt(original, current, 2, "DT", "DD", ""))
698 {
699 MetaphAdd(primary, "T");
700 MetaphAdd(secondary, "T");
701 current += 2;
702 break;
703 }
704
705 /* else */
706 MetaphAdd(primary, "T");
707 MetaphAdd(secondary, "T");
708 current += 1;
709 break;
710
711 case 'F':
712 if (GetAt(original, current + 1) == 'F')
713 current += 2;
714 else
715 current += 1;
716 MetaphAdd(primary, "F");
717 MetaphAdd(secondary, "F");
718 break;
719
720 case 'G':
721 if (GetAt(original, current + 1) == 'H')
722 {
723 if ((current > 0) && !IsVowel(original, current - 1))
724 {
725 MetaphAdd(primary, "K");
726 MetaphAdd(secondary, "K");
727 current += 2;
728 break;
729 }
730
731 if (current < 3)
732 {
733 /* 'ghislane', ghiradelli */
734 if (current == 0)
735 {
736 if (GetAt(original, current + 2) == 'I')
737 {
738 MetaphAdd(primary, "J");
739 MetaphAdd(secondary, "J");
740 }
741 else
742 {
743 MetaphAdd(primary, "K");
744 MetaphAdd(secondary, "K");
745 }
746 current += 2;
747 break;
748 }
749 }
750
751 /*
752 * Parker's rule (with some further refinements) - e.g.,
753 * 'hugh'
754 */
755 if (((current > 1)
756 && StringAt(original, (current - 2), 1,
757 "B", "H", "D", ""))
758 /* e.g., 'bough' */
759 || ((current > 2)
760 && StringAt(original, (current - 3), 1,
761 "B", "H", "D", ""))
762 /* e.g., 'broughton' */
763 || ((current > 3)
764 && StringAt(original, (current - 4), 1,
765 "B", "H", "")))
766 {
767 current += 2;
768 break;
769 }
770 else
771 {
772 /*
773 * e.g., 'laugh', 'McLaughlin', 'cough', 'gough',
774 * 'rough', 'tough'
775 */
776 if ((current > 2)
777 && (GetAt(original, current - 1) == 'U')
778 && StringAt(original, (current - 3), 1, "C",
779 "G", "L", "R", "T", ""))
780 {
781 MetaphAdd(primary, "F");
782 MetaphAdd(secondary, "F");
783 }
784 else if ((current > 0)
785 && GetAt(original, current - 1) != 'I')
786 {
787
788
789 MetaphAdd(primary, "K");
790 MetaphAdd(secondary, "K");
791 }
792
793 current += 2;
794 break;
795 }
796 }
797
798 if (GetAt(original, current + 1) == 'N')
799 {
800 if ((current == 1) && IsVowel(original, 0)
801 && !SlavoGermanic(original))
802 {
803 MetaphAdd(primary, "KN");
804 MetaphAdd(secondary, "N");
805 }
806 else
807 /* not e.g. 'cagney' */
808 if (!StringAt(original, (current + 2), 2, "EY", "")
809 && (GetAt(original, current + 1) != 'Y')
810 && !SlavoGermanic(original))
811 {
812 MetaphAdd(primary, "N");
813 MetaphAdd(secondary, "KN");
814 }
815 else
816 {
817 MetaphAdd(primary, "KN");
818 MetaphAdd(secondary, "KN");
819 }
820 current += 2;
821 break;
822 }
823
824 /* 'tagliaro' */
825 if (StringAt(original, (current + 1), 2, "LI", "")
826 && !SlavoGermanic(original))
827 {
828 MetaphAdd(primary, "KL");
829 MetaphAdd(secondary, "L");
830 current += 2;
831 break;
832 }
833
834 /* -ges-,-gep-,-gel-, -gie- at beginning */
835 if ((current == 0)
836 && ((GetAt(original, current + 1) == 'Y')
837 || StringAt(original, (current + 1), 2, "ES", "EP",
838 "EB", "EL", "EY", "IB", "IL", "IN", "IE",
839 "EI", "ER", "")))
840 {
841 MetaphAdd(primary, "K");
842 MetaphAdd(secondary, "J");
843 current += 2;
844 break;
845 }
846
847 /* -ger-, -gy- */
848 if ((StringAt(original, (current + 1), 2, "ER", "")
849 || (GetAt(original, current + 1) == 'Y'))
850 && !StringAt(original, 0, 6,
851 "DANGER", "RANGER", "MANGER", "")
852 && !StringAt(original, (current - 1), 1, "E", "I", "")
853 && !StringAt(original, (current - 1), 3, "RGY", "OGY", ""))
854 {
855 MetaphAdd(primary, "K");
856 MetaphAdd(secondary, "J");
857 current += 2;
858 break;
859 }
860
861 /* italian e.g, 'biaggi' */
862 if (StringAt(original, (current + 1), 1, "E", "I", "Y", "")
863 || StringAt(original, (current - 1), 4,
864 "AGGI", "OGGI", ""))
865 {
866 /* obvious germanic */
867 if ((StringAt(original, 0, 4, "VAN ", "VON ", "")
868 || StringAt(original, 0, 3, "SCH", ""))
869 || StringAt(original, (current + 1), 2, "ET", ""))
870 {
871 MetaphAdd(primary, "K");
872 MetaphAdd(secondary, "K");
873 }
874 else
875 {
876 /* always soft if french ending */
877 if (StringAt
878 (original, (current + 1), 4, "IER ", ""))
879 {
880 MetaphAdd(primary, "J");
881 MetaphAdd(secondary, "J");
882 }
883 else
884 {
885 MetaphAdd(primary, "J");
886 MetaphAdd(secondary, "K");
887 }
888 }
889 current += 2;
890 break;
891 }
892
893 if (GetAt(original, current + 1) == 'G')
894 current += 2;
895 else
896 current += 1;
897 MetaphAdd(primary, "K");
898 MetaphAdd(secondary, "K");
899 break;
900
901 case 'H':
902 /* only keep if first & before vowel or btw. 2 vowels */
903 if (((current == 0) || IsVowel(original, current - 1))
904 && IsVowel(original, current + 1))
905 {
906 MetaphAdd(primary, "H");
907 MetaphAdd(secondary, "H");
908 current += 2;
909 }
910 else
911 /* also takes care of 'HH' */
912 current += 1;
913 break;
914
915 case 'J':
916 /* obvious spanish, 'jose', 'san jacinto' */
917 if (StringAt(original, current, 4, "JOSE", "")
918 || StringAt(original, 0, 4, "SAN ", ""))
919 {
920 if (((current == 0)
921 && (GetAt(original, current + 4) == ' '))
922 || StringAt(original, 0, 4, "SAN ", ""))
923 {
924 MetaphAdd(primary, "H");
925 MetaphAdd(secondary, "H");
926 }
927 else
928 {
929 MetaphAdd(primary, "J");
930 MetaphAdd(secondary, "H");
931 }
932 current += 1;
933 break;
934 }
935
936 if ((current == 0)
937 && !StringAt(original, current, 4, "JOSE", ""))
938 {
939 MetaphAdd(primary, "J"); /* Yankelovich/Jankelowicz */
940 MetaphAdd(secondary, "A");
941 }
942 else
943 {
944 /* spanish pron. of e.g. 'bajador' */
945 if (IsVowel(original, current - 1)
946 && !SlavoGermanic(original)
947 && ((GetAt(original, current + 1) == 'A')
948 || (GetAt(original, current + 1) == 'O')))
949 {
950 MetaphAdd(primary, "J");
951 MetaphAdd(secondary, "H");
952 }
953 else
954 {
955 if (current == last)
956 {
957 MetaphAdd(primary, "J");
958 MetaphAdd(secondary, "");
959 }
960 else
961 {
962 if (!StringAt(original, (current + 1), 1, "L", "T",
963 "K", "S", "N", "M", "B", "Z", "")
964 && !StringAt(original, (current - 1), 1,
965 "S", "K", "L", ""))
966 {
967 MetaphAdd(primary, "J");
968 MetaphAdd(secondary, "J");
969 }
970 }
971 }
972 }
973
974 if (GetAt(original, current + 1) == 'J') /* it could happen! */
975 current += 2;
976 else
977 current += 1;
978 break;
979
980 case 'K':
981 if (GetAt(original, current + 1) == 'K')
982 current += 2;
983 else
984 current += 1;
985 MetaphAdd(primary, "K");
986 MetaphAdd(secondary, "K");
987 break;
988
989 case 'L':
990 if (GetAt(original, current + 1) == 'L')
991 {
992 /* spanish e.g. 'cabrillo', 'gallegos' */
993 if (((current == (length - 3))
994 && StringAt(original, (current - 1), 4, "ILLO",
995 "ILLA", "ALLE", ""))
996 || ((StringAt(original, (last - 1), 2, "AS", "OS", "")
997 || StringAt(original, last, 1, "A", "O", ""))
998 && StringAt(original, (current - 1), 4,
999 "ALLE", "")))
1000 {
1001 MetaphAdd(primary, "L");
1002 MetaphAdd(secondary, "");
1003 current += 2;
1004 break;
1005 }
1006 current += 2;
1007 }
1008 else
1009 current += 1;
1010 MetaphAdd(primary, "L");
1011 MetaphAdd(secondary, "L");
1012 break;
1013
1014 case 'M':
1015 if ((StringAt(original, (current - 1), 3, "UMB", "")
1016 && (((current + 1) == last)
1017 || StringAt(original, (current + 2), 2, "ER", "")))
1018 /* 'dumb','thumb' */
1019 || (GetAt(original, current + 1) == 'M'))
1020 current += 2;
1021 else
1022 current += 1;
1023 MetaphAdd(primary, "M");
1024 MetaphAdd(secondary, "M");
1025 break;
1026
1027 case 'N':
1028 if (GetAt(original, current + 1) == 'N')
1029 current += 2;
1030 else
1031 current += 1;
1032 MetaphAdd(primary, "N");
1033 MetaphAdd(secondary, "N");
1034 break;
1035
1036 case '\xd1': /* N with tilde */
1037 current += 1;
1038 MetaphAdd(primary, "N");
1039 MetaphAdd(secondary, "N");
1040 break;
1041
1042 case 'P':
1043 if (GetAt(original, current + 1) == 'H')
1044 {
1045 MetaphAdd(primary, "F");
1046 MetaphAdd(secondary, "F");
1047 current += 2;
1048 break;
1049 }
1050
1051 /* also account for "campbell", "raspberry" */
1052 if (StringAt(original, (current + 1), 1, "P", "B", ""))
1053 current += 2;
1054 else
1055 current += 1;
1056 MetaphAdd(primary, "P");
1057 MetaphAdd(secondary, "P");
1058 break;
1059
1060 case 'Q':
1061 if (GetAt(original, current + 1) == 'Q')
1062 current += 2;
1063 else
1064 current += 1;
1065 MetaphAdd(primary, "K");
1066 MetaphAdd(secondary, "K");
1067 break;
1068
1069 case 'R':
1070 /* french e.g. 'rogier', but exclude 'hochmeier' */
1071 if ((current == last)
1072 && !SlavoGermanic(original)
1073 && StringAt(original, (current - 2), 2, "IE", "")
1074 && !StringAt(original, (current - 4), 2, "ME", "MA", ""))
1075 {
1076 MetaphAdd(primary, "");
1077 MetaphAdd(secondary, "R");
1078 }
1079 else
1080 {
1081 MetaphAdd(primary, "R");
1082 MetaphAdd(secondary, "R");
1083 }
1084
1085 if (GetAt(original, current + 1) == 'R')
1086 current += 2;
1087 else
1088 current += 1;
1089 break;
1090
1091 case 'S':
1092 /* special cases 'island', 'isle', 'carlisle', 'carlysle' */
1093 if (StringAt(original, (current - 1), 3, "ISL", "YSL", ""))
1094 {
1095 current += 1;
1096 break;
1097 }
1098
1099 /* special case 'sugar-' */
1100 if ((current == 0)
1101 && StringAt(original, current, 5, "SUGAR", ""))
1102 {
1103 MetaphAdd(primary, "X");
1104 MetaphAdd(secondary, "S");
1105 current += 1;
1106 break;
1107 }
1108
1109 if (StringAt(original, current, 2, "SH", ""))
1110 {
1111 /* germanic */
1112 if (StringAt
1113 (original, (current + 1), 4, "HEIM", "HOEK", "HOLM",
1114 "HOLZ", ""))
1115 {
1116 MetaphAdd(primary, "S");
1117 MetaphAdd(secondary, "S");
1118 }
1119 else
1120 {
1121 MetaphAdd(primary, "X");
1122 MetaphAdd(secondary, "X");
1123 }
1124 current += 2;
1125 break;
1126 }
1127
1128 /* italian & armenian */
1129 if (StringAt(original, current, 3, "SIO", "SIA", "")
1130 || StringAt(original, current, 4, "SIAN", ""))
1131 {
1132 if (!SlavoGermanic(original))
1133 {
1134 MetaphAdd(primary, "S");
1135 MetaphAdd(secondary, "X");
1136 }
1137 else
1138 {
1139 MetaphAdd(primary, "S");
1140 MetaphAdd(secondary, "S");
1141 }
1142 current += 3;
1143 break;
1144 }
1145
1146 /*
1147 * german & anglicisations, e.g. 'smith' match 'schmidt',
1148 * 'snider' match 'schneider' also, -sz- in slavic language
1149 * although in hungarian it is pronounced 's'
1150 */
1151 if (((current == 0)
1152 && StringAt(original, (current + 1), 1,
1153 "M", "N", "L", "W", ""))
1154 || StringAt(original, (current + 1), 1, "Z", ""))
1155 {
1156 MetaphAdd(primary, "S");
1157 MetaphAdd(secondary, "X");
1158 if (StringAt(original, (current + 1), 1, "Z", ""))
1159 current += 2;
1160 else
1161 current += 1;
1162 break;
1163 }
1164
1165 if (StringAt(original, current, 2, "SC", ""))
1166 {
1167 /* Schlesinger's rule */
1168 if (GetAt(original, current + 2) == 'H')
1169 {
1170 /* dutch origin, e.g. 'school', 'schooner' */
1171 if (StringAt(original, (current + 3), 2,
1172 "OO", "ER", "EN",
1173 "UY", "ED", "EM", ""))
1174 {
1175 /* 'schermerhorn', 'schenker' */
1176 if (StringAt(original, (current + 3), 2,
1177 "ER", "EN", ""))
1178 {
1179 MetaphAdd(primary, "X");
1180 MetaphAdd(secondary, "SK");
1181 }
1182 else
1183 {
1184 MetaphAdd(primary, "SK");
1185 MetaphAdd(secondary, "SK");
1186 }
1187 current += 3;
1188 break;
1189 }
1190 else
1191 {
1192 if ((current == 0) && !IsVowel(original, 3)
1193 && (GetAt(original, 3) != 'W'))
1194 {
1195 MetaphAdd(primary, "X");
1196 MetaphAdd(secondary, "S");
1197 }
1198 else
1199 {
1200 MetaphAdd(primary, "X");
1201 MetaphAdd(secondary, "X");
1202 }
1203 current += 3;
1204 break;
1205 }
1206 }
1207
1208 if (StringAt(original, (current + 2), 1,
1209 "I", "E", "Y", ""))
1210 {
1211 MetaphAdd(primary, "S");
1212 MetaphAdd(secondary, "S");
1213 current += 3;
1214 break;
1215 }
1216 /* else */
1217 MetaphAdd(primary, "SK");
1218 MetaphAdd(secondary, "SK");
1219 current += 3;
1220 break;
1221 }
1222
1223 /* french e.g. 'resnais', 'artois' */
1224 if ((current == last)
1225 && StringAt(original, (current - 2), 2, "AI", "OI", ""))
1226 {
1227 MetaphAdd(primary, "");
1228 MetaphAdd(secondary, "S");
1229 }
1230 else
1231 {
1232 MetaphAdd(primary, "S");
1233 MetaphAdd(secondary, "S");
1234 }
1235
1236 if (StringAt(original, (current + 1), 1, "S", "Z", ""))
1237 current += 2;
1238 else
1239 current += 1;
1240 break;
1241
1242 case 'T':
1243 if (StringAt(original, current, 4, "TION", ""))
1244 {
1245 MetaphAdd(primary, "X");
1246 MetaphAdd(secondary, "X");
1247 current += 3;
1248 break;
1249 }
1250
1251 if (StringAt(original, current, 3, "TIA", "TCH", ""))
1252 {
1253 MetaphAdd(primary, "X");
1254 MetaphAdd(secondary, "X");
1255 current += 3;
1256 break;
1257 }
1258
1259 if (StringAt(original, current, 2, "TH", "")
1260 || StringAt(original, current, 3, "TTH", ""))
1261 {
1262 /* special case 'thomas', 'thames' or germanic */
1263 if (StringAt(original, (current + 2), 2, "OM", "AM", "")
1264 || StringAt(original, 0, 4, "VAN ", "VON ", "")
1265 || StringAt(original, 0, 3, "SCH", ""))
1266 {
1267 MetaphAdd(primary, "T");
1268 MetaphAdd(secondary, "T");
1269 }
1270 else
1271 {
1272 MetaphAdd(primary, "0");
1273 MetaphAdd(secondary, "T");
1274 }
1275 current += 2;
1276 break;
1277 }
1278
1279 if (StringAt(original, (current + 1), 1, "T", "D", ""))
1280 current += 2;
1281 else
1282 current += 1;
1283 MetaphAdd(primary, "T");
1284 MetaphAdd(secondary, "T");
1285 break;
1286
1287 case 'V':
1288 if (GetAt(original, current + 1) == 'V')
1289 current += 2;
1290 else
1291 current += 1;
1292 MetaphAdd(primary, "F");
1293 MetaphAdd(secondary, "F");
1294 break;
1295
1296 case 'W':
1297 /* can also be in middle of word */
1298 if (StringAt(original, current, 2, "WR", ""))
1299 {
1300 MetaphAdd(primary, "R");
1301 MetaphAdd(secondary, "R");
1302 current += 2;
1303 break;
1304 }
1305
1306 if ((current == 0)
1307 && (IsVowel(original, current + 1)
1308 || StringAt(original, current, 2, "WH", "")))
1309 {
1310 /* Wasserman should match Vasserman */
1311 if (IsVowel(original, current + 1))
1312 {
1313 MetaphAdd(primary, "A");
1314 MetaphAdd(secondary, "F");
1315 }
1316 else
1317 {
1318 /* need Uomo to match Womo */
1319 MetaphAdd(primary, "A");
1320 MetaphAdd(secondary, "A");
1321 }
1322 }
1323
1324 /* Arnow should match Arnoff */
1325 if (((current == last) && IsVowel(original, current - 1))
1326 || StringAt(original, (current - 1), 5, "EWSKI", "EWSKY",
1327 "OWSKI", "OWSKY", "")
1328 || StringAt(original, 0, 3, "SCH", ""))
1329 {
1330 MetaphAdd(primary, "");
1331 MetaphAdd(secondary, "F");
1332 current += 1;
1333 break;
1334 }
1335
1336 /* polish e.g. 'filipowicz' */
1337 if (StringAt(original, current, 4, "WICZ", "WITZ", ""))
1338 {
1339 MetaphAdd(primary, "TS");
1340 MetaphAdd(secondary, "FX");
1341 current += 4;
1342 break;
1343 }
1344
1345 /* else skip it */
1346 current += 1;
1347 break;
1348
1349 case 'X':
1350 /* french e.g. breaux */
1351 if (!((current == last)
1352 && (StringAt(original, (current - 3), 3,
1353 "IAU", "EAU", "")
1354 || StringAt(original, (current - 2), 2,
1355 "AU", "OU", ""))))
1356 {
1357 MetaphAdd(primary, "KS");
1358 MetaphAdd(secondary, "KS");
1359 }
1360
1361
1362 if (StringAt(original, (current + 1), 1, "C", "X", ""))
1363 current += 2;
1364 else
1365 current += 1;
1366 break;
1367
1368 case 'Z':
1369 /* chinese pinyin e.g. 'zhao' */
1370 if (GetAt(original, current + 1) == 'H')
1371 {
1372 MetaphAdd(primary, "J");
1373 MetaphAdd(secondary, "J");
1374 current += 2;
1375 break;
1376 }
1377 else if (StringAt(original, (current + 1), 2,
1378 "ZO", "ZI", "ZA", "")
1379 || (SlavoGermanic(original)
1380 && ((current > 0)
1381 && GetAt(original, current - 1) != 'T')))
1382 {
1383 MetaphAdd(primary, "S");
1384 MetaphAdd(secondary, "TS");
1385 }
1386 else
1387 {
1388 MetaphAdd(primary, "S");
1389 MetaphAdd(secondary, "S");
1390 }
1391
1392 if (GetAt(original, current + 1) == 'Z')
1393 current += 2;
1394 else
1395 current += 1;
1396 break;
1397
1398 default:
1399 current += 1;
1400 }
1401
1402 /*
1403 * printf("PRIMARY: %s\n", primary->str); printf("SECONDARY: %s\n",
1404 * secondary->str);
1405 */
1406 }
1407
1408
1409 if (primary->length > 4)
1410 SetAt(primary, 4, '\0');
1411
1412 if (secondary->length > 4)
1413 SetAt(secondary, 4, '\0');
1414
1415 *codes = primary->str;
1416 *++codes = secondary->str;
1417
1418 DestroyMetaString(original);
1419 DestroyMetaString(primary);
1420 DestroyMetaString(secondary);
1421}
static char GetAt(metastring *s, int pos)
Definition: dmetaphone.c:325
static void SetAt(metastring *s, int pos, char c)
Definition: dmetaphone.c:335
static void MetaphAdd(metastring *s, const char *new_str)
Definition: dmetaphone.c:378
static int SlavoGermanic(metastring *s)
Definition: dmetaphone.c:309
static void MakeUpper(metastring *s)
Definition: dmetaphone.c:282
static int StringAt(metastring *s, int start, int length,...)
Definition: dmetaphone.c:348
static int IsVowel(metastring *s, int pos)
Definition: dmetaphone.c:292
static void DestroyMetaString(metastring *s)
Definition: dmetaphone.c:260
static metastring * NewMetaString(const char *init_str)
Definition: dmetaphone.c:235
const char * str

References DestroyMetaString(), metastring::free_string_on_destroy, GetAt(), IsVowel(), metastring::length, MakeUpper(), MetaphAdd(), NewMetaString(), SetAt(), SlavoGermanic(), metastring::str, str, and StringAt().

Referenced by dmetaphone(), and dmetaphone_alt().

◆ GetAt()

static char GetAt ( metastring s,
int  pos 
)
static

Definition at line 325 of file dmetaphone.c.

326{
327 if ((pos < 0) || (pos >= s->length))
328 return '\0';
329
330 return ((char) *(s->str + pos));
331}

References metastring::length, and metastring::str.

Referenced by DoubleMetaphone().

◆ IncreaseBuffer()

static void IncreaseBuffer ( metastring s,
int  chars_needed 
)
static

Definition at line 273 of file dmetaphone.c.

274{
275 META_REALLOC(s->str, (s->bufsize + chars_needed + 10), char);
276 assert(s->str != NULL);
277 s->bufsize = s->bufsize + chars_needed + 10;
278}
#define META_REALLOC(v, n, t)
Definition: dmetaphone.c:190
#define assert(x)
Definition: regcustom.h:56

References assert, metastring::bufsize, META_REALLOC, and metastring::str.

Referenced by MetaphAdd().

◆ IsVowel()

static int IsVowel ( metastring s,
int  pos 
)
static

Definition at line 292 of file dmetaphone.c.

293{
294 char c;
295
296 if ((pos < 0) || (pos >= s->length))
297 return 0;
298
299 c = *(s->str + pos);
300 if ((c == 'A') || (c == 'E') || (c == 'I') || (c == 'O') ||
301 (c == 'U') || (c == 'Y'))
302 return 1;
303
304 return 0;
305}
char * c

References metastring::length, and metastring::str.

Referenced by DoubleMetaphone().

◆ MakeUpper()

static void MakeUpper ( metastring s)
static

Definition at line 282 of file dmetaphone.c.

283{
284 char *i;
285
286 for (i = s->str; *i; i++)
287 *i = toupper((unsigned char) *i);
288}
int i
Definition: isn.c:74

References i, and metastring::str.

Referenced by DoubleMetaphone().

◆ MetaphAdd()

static void MetaphAdd ( metastring s,
const char *  new_str 
)
static

Definition at line 378 of file dmetaphone.c.

379{
380 int add_length;
381
382 if (new_str == NULL)
383 return;
384
385 add_length = strlen(new_str);
386 if ((s->length + add_length) > (s->bufsize - 1))
387 IncreaseBuffer(s, add_length);
388
389 strcat(s->str, new_str);
390 s->length += add_length;
391}
static void IncreaseBuffer(metastring *s, int chars_needed)
Definition: dmetaphone.c:273

References metastring::bufsize, IncreaseBuffer(), metastring::length, and metastring::str.

Referenced by DoubleMetaphone().

◆ NewMetaString()

static metastring * NewMetaString ( const char *  init_str)
static

Definition at line 235 of file dmetaphone.c.

236{
237 metastring *s;
238 char empty_string[] = "";
239
240 META_MALLOC(s, 1, metastring);
241 assert(s != NULL);
242
243 if (init_str == NULL)
244 init_str = empty_string;
245 s->length = strlen(init_str);
246 /* preallocate a bit more for potential growth */
247 s->bufsize = s->length + 7;
248
249 META_MALLOC(s->str, s->bufsize, char);
250 assert(s->str != NULL);
251
252 memcpy(s->str, init_str, s->length + 1);
254
255 return s;
256}
#define META_MALLOC(v, n, t)
Definition: dmetaphone.c:187

References assert, metastring::bufsize, metastring::free_string_on_destroy, metastring::length, META_MALLOC, and metastring::str.

Referenced by DoubleMetaphone().

◆ PG_FUNCTION_INFO_V1() [1/2]

PG_FUNCTION_INFO_V1 ( dmetaphone  )

◆ PG_FUNCTION_INFO_V1() [2/2]

PG_FUNCTION_INFO_V1 ( dmetaphone_alt  )

◆ SetAt()

static void SetAt ( metastring s,
int  pos,
char  c 
)
static

Definition at line 335 of file dmetaphone.c.

336{
337 if ((pos < 0) || (pos >= s->length))
338 return;
339
340 *(s->str + pos) = c;
341}

References metastring::length, and metastring::str.

Referenced by DoubleMetaphone().

◆ SlavoGermanic()

static int SlavoGermanic ( metastring s)
static

Definition at line 309 of file dmetaphone.c.

310{
311 if (strstr(s->str, "W"))
312 return 1;
313 else if (strstr(s->str, "K"))
314 return 1;
315 else if (strstr(s->str, "CZ"))
316 return 1;
317 else if (strstr(s->str, "WITZ"))
318 return 1;
319 else
320 return 0;
321}

References metastring::str.

Referenced by DoubleMetaphone().

◆ StringAt()

static int StringAt ( metastring s,
int  start,
int  length,
  ... 
)
static

Definition at line 348 of file dmetaphone.c.

349{
350 char *test;
351 char *pos;
352 va_list ap;
353
354 if ((start < 0) || (start >= s->length))
355 return 0;
356
357 pos = (s->str + start);
358 va_start(ap, length);
359
360 do
361 {
362 test = va_arg(ap, char *);
363 if (*test && (strncmp(pos, test, length) == 0))
364 {
365 va_end(ap);
366 return 1;
367 }
368 }
369 while (strcmp(test, "") != 0);
370
371 va_end(ap);
372
373 return 0;
374}
return str start
static void test(void)

References metastring::length, start, metastring::str, and test().

Referenced by DoubleMetaphone().