PostgreSQL Source Code  git master
pgstrcasecmp.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * pgstrcasecmp.c
4  * Portable SQL-like case-independent comparisons and conversions.
5  *
6  * SQL99 specifies Unicode-aware case normalization, which we don't yet
7  * have the infrastructure for. Instead we use tolower() to provide a
8  * locale-aware translation. However, there are some locales where this
9  * is not right either (eg, Turkish may do strange things with 'i' and
10  * 'I'). Our current compromise is to use tolower() for characters with
11  * the high bit set, and use an ASCII-only downcasing for 7-bit
12  * characters.
13  *
14  * NB: this code should match downcase_truncate_identifier() in scansup.c.
15  *
16  * We also provide strict ASCII-only case conversion functions, which can
17  * be used to implement C/POSIX case folding semantics no matter what the
18  * C library thinks the locale is.
19  *
20  *
21  * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
22  *
23  * src/port/pgstrcasecmp.c
24  *
25  *-------------------------------------------------------------------------
26  */
27 #include "c.h"
28 
29 #include <ctype.h>
30 
31 
32 /*
33  * Case-independent comparison of two null-terminated strings.
34  */
35 int
36 pg_strcasecmp(const char *s1, const char *s2)
37 {
38  for (;;)
39  {
40  unsigned char ch1 = (unsigned char) *s1++;
41  unsigned char ch2 = (unsigned char) *s2++;
42 
43  if (ch1 != ch2)
44  {
45  if (ch1 >= 'A' && ch1 <= 'Z')
46  ch1 += 'a' - 'A';
47  else if (IS_HIGHBIT_SET(ch1) && isupper(ch1))
48  ch1 = tolower(ch1);
49 
50  if (ch2 >= 'A' && ch2 <= 'Z')
51  ch2 += 'a' - 'A';
52  else if (IS_HIGHBIT_SET(ch2) && isupper(ch2))
53  ch2 = tolower(ch2);
54 
55  if (ch1 != ch2)
56  return (int) ch1 - (int) ch2;
57  }
58  if (ch1 == 0)
59  break;
60  }
61  return 0;
62 }
63 
64 /*
65  * Case-independent comparison of two not-necessarily-null-terminated strings.
66  * At most n bytes will be examined from each string.
67  */
68 int
69 pg_strncasecmp(const char *s1, const char *s2, size_t n)
70 {
71  while (n-- > 0)
72  {
73  unsigned char ch1 = (unsigned char) *s1++;
74  unsigned char ch2 = (unsigned char) *s2++;
75 
76  if (ch1 != ch2)
77  {
78  if (ch1 >= 'A' && ch1 <= 'Z')
79  ch1 += 'a' - 'A';
80  else if (IS_HIGHBIT_SET(ch1) && isupper(ch1))
81  ch1 = tolower(ch1);
82 
83  if (ch2 >= 'A' && ch2 <= 'Z')
84  ch2 += 'a' - 'A';
85  else if (IS_HIGHBIT_SET(ch2) && isupper(ch2))
86  ch2 = tolower(ch2);
87 
88  if (ch1 != ch2)
89  return (int) ch1 - (int) ch2;
90  }
91  if (ch1 == 0)
92  break;
93  }
94  return 0;
95 }
96 
97 /*
98  * Fold a character to upper case.
99  *
100  * Unlike some versions of toupper(), this is safe to apply to characters
101  * that aren't lower case letters. Note however that the whole thing is
102  * a bit bogus for multibyte character sets.
103  */
104 unsigned char
105 pg_toupper(unsigned char ch)
106 {
107  if (ch >= 'a' && ch <= 'z')
108  ch += 'A' - 'a';
109  else if (IS_HIGHBIT_SET(ch) && islower(ch))
110  ch = toupper(ch);
111  return ch;
112 }
113 
114 /*
115  * Fold a character to lower case.
116  *
117  * Unlike some versions of tolower(), this is safe to apply to characters
118  * that aren't upper case letters. Note however that the whole thing is
119  * a bit bogus for multibyte character sets.
120  */
121 unsigned char
122 pg_tolower(unsigned char ch)
123 {
124  if (ch >= 'A' && ch <= 'Z')
125  ch += 'a' - 'A';
126  else if (IS_HIGHBIT_SET(ch) && isupper(ch))
127  ch = tolower(ch);
128  return ch;
129 }
130 
131 /*
132  * Fold a character to upper case, following C/POSIX locale rules.
133  */
134 unsigned char
135 pg_ascii_toupper(unsigned char ch)
136 {
137  if (ch >= 'a' && ch <= 'z')
138  ch += 'A' - 'a';
139  return ch;
140 }
141 
142 /*
143  * Fold a character to lower case, following C/POSIX locale rules.
144  */
145 unsigned char
146 pg_ascii_tolower(unsigned char ch)
147 {
148  if (ch >= 'A' && ch <= 'Z')
149  ch += 'a' - 'A';
150  return ch;
151 }
#define IS_HIGHBIT_SET(ch)
Definition: c.h:1142
int pg_strcasecmp(const char *s1, const char *s2)
Definition: pgstrcasecmp.c:36
unsigned char pg_toupper(unsigned char ch)
Definition: pgstrcasecmp.c:105
unsigned char pg_tolower(unsigned char ch)
Definition: pgstrcasecmp.c:122
unsigned char pg_ascii_tolower(unsigned char ch)
Definition: pgstrcasecmp.c:146
unsigned char pg_ascii_toupper(unsigned char ch)
Definition: pgstrcasecmp.c:135
int pg_strncasecmp(const char *s1, const char *s2, size_t n)
Definition: pgstrcasecmp.c:69
char * s1
char * s2