PostgreSQL Source Code  git master
unicode_case.c File Reference
Include dependency graph for unicode_case.c:

Go to the source code of this file.

Functions

static const pg_case_mapfind_case_map (pg_wchar ucs)
 
static size_t convert_case (char *dst, size_t dstsize, const char *src, ssize_t srclen, CaseKind str_casekind, WordBoundaryNext wbnext, void *wbstate)
 
pg_wchar unicode_lowercase_simple (pg_wchar code)
 
pg_wchar unicode_titlecase_simple (pg_wchar code)
 
pg_wchar unicode_uppercase_simple (pg_wchar code)
 
size_t unicode_strlower (char *dst, size_t dstsize, const char *src, ssize_t srclen)
 
size_t unicode_strtitle (char *dst, size_t dstsize, const char *src, ssize_t srclen, WordBoundaryNext wbnext, void *wbstate)
 
size_t unicode_strupper (char *dst, size_t dstsize, const char *src, ssize_t srclen)
 

Function Documentation

◆ convert_case()

static size_t convert_case ( char *  dst,
size_t  dstsize,
const char *  src,
ssize_t  srclen,
CaseKind  str_casekind,
WordBoundaryNext  wbnext,
void *  wbstate 
)
static

Definition at line 137 of file unicode_case.c.

139 {
140  /* character CaseKind varies while titlecasing */
141  CaseKind chr_casekind = str_casekind;
142  size_t srcoff = 0;
143  size_t result_len = 0;
144  size_t boundary = 0;
145 
146  Assert((str_casekind == CaseTitle && wbnext && wbstate) ||
147  (str_casekind != CaseTitle && !wbnext && !wbstate));
148 
149  if (str_casekind == CaseTitle)
150  {
151  boundary = wbnext(wbstate);
152  Assert(boundary == 0); /* start of text is always a boundary */
153  }
154 
155  while ((srclen < 0 || srcoff < srclen) && src[srcoff] != '\0')
156  {
157  pg_wchar u1 = utf8_to_unicode((unsigned char *) src + srcoff);
158  int u1len = unicode_utf8len(u1);
159  const pg_case_map *casemap = find_case_map(u1);
160 
161  if (str_casekind == CaseTitle)
162  {
163  if (srcoff == boundary)
164  {
165  chr_casekind = CaseUpper;
166  boundary = wbnext(wbstate);
167  }
168  else
169  chr_casekind = CaseLower;
170  }
171 
172  /* perform mapping, update result_len, and write to dst */
173  if (casemap)
174  {
175  pg_wchar u2 = casemap->simplemap[chr_casekind];
176  pg_wchar u2len = unicode_utf8len(u2);
177 
178  if (result_len + u2len <= dstsize)
179  unicode_to_utf8(u2, (unsigned char *) dst + result_len);
180 
181  result_len += u2len;
182  }
183  else
184  {
185  /* no mapping; copy bytes from src */
186  if (result_len + u1len <= dstsize)
187  memcpy(dst + result_len, src + srcoff, u1len);
188 
189  result_len += u1len;
190  }
191 
192  srcoff += u1len;
193  }
194 
195  if (result_len < dstsize)
196  dst[result_len] = '\0';
197 
198  return result_len;
199 }
#define Assert(condition)
Definition: c.h:858
static pg_wchar utf8_to_unicode(const unsigned char *c)
Definition: mbprint.c:53
unsigned int pg_wchar
Definition: mbprint.c:31
static unsigned char * unicode_to_utf8(pg_wchar c, unsigned char *utf8string)
Definition: pg_wchar.h:575
static int unicode_utf8len(pg_wchar c)
Definition: pg_wchar.h:607
pg_wchar simplemap[NCaseKind]
static const pg_case_map * find_case_map(pg_wchar ucs)
Definition: unicode_case.c:203
@ CaseTitle
@ CaseLower
@ CaseUpper

References Assert, CaseLower, CaseTitle, CaseUpper, find_case_map(), pg_case_map::simplemap, unicode_to_utf8(), unicode_utf8len(), and utf8_to_unicode().

Referenced by unicode_strlower(), unicode_strtitle(), and unicode_strupper().

◆ find_case_map()

static const pg_case_map * find_case_map ( pg_wchar  ucs)
static

Definition at line 203 of file unicode_case.c.

204 {
205  int min;
206  int mid;
207  int max;
208 
209  /* all chars <= 0x80 are stored in array for fast lookup */
210  Assert(lengthof(case_map) >= 0x80);
211  if (ucs < 0x80)
212  {
213  const pg_case_map *map = &case_map[ucs];
214 
215  Assert(map->codepoint == ucs);
216  return map;
217  }
218 
219  /* otherwise, binary search */
220  min = 0x80;
221  max = lengthof(case_map) - 1;
222  while (max >= min)
223  {
224  mid = (min + max) / 2;
225  if (ucs > case_map[mid].codepoint)
226  min = mid + 1;
227  else if (ucs < case_map[mid].codepoint)
228  max = mid - 1;
229  else
230  return &case_map[mid];
231  }
232 
233  return NULL;
234 }
#define lengthof(array)
Definition: c.h:788
pg_wchar codepoint
static const pg_case_map case_map[2955]

References Assert, case_map, pg_case_map::codepoint, and lengthof.

Referenced by convert_case(), unicode_lowercase_simple(), unicode_titlecase_simple(), and unicode_uppercase_simple().

◆ unicode_lowercase_simple()

pg_wchar unicode_lowercase_simple ( pg_wchar  code)

Definition at line 29 of file unicode_case.c.

30 {
31  const pg_case_map *map = find_case_map(code);
32 
33  return map ? map->simplemap[CaseLower] : code;
34 }

References CaseLower, find_case_map(), and pg_case_map::simplemap.

Referenced by pg_wc_tolower().

◆ unicode_strlower()

size_t unicode_strlower ( char *  dst,
size_t  dstsize,
const char *  src,
ssize_t  srclen 
)

Definition at line 69 of file unicode_case.c.

70 {
71  return convert_case(dst, dstsize, src, srclen, CaseLower, NULL, NULL);
72 }
static size_t convert_case(char *dst, size_t dstsize, const char *src, ssize_t srclen, CaseKind str_casekind, WordBoundaryNext wbnext, void *wbstate)
Definition: unicode_case.c:137

References CaseLower, and convert_case().

Referenced by str_tolower(), and test_strlower().

◆ unicode_strtitle()

size_t unicode_strtitle ( char *  dst,
size_t  dstsize,
const char *  src,
ssize_t  srclen,
WordBoundaryNext  wbnext,
void *  wbstate 
)

Definition at line 100 of file unicode_case.c.

102 {
103  return convert_case(dst, dstsize, src, srclen, CaseTitle, wbnext,
104  wbstate);
105 }

References CaseTitle, and convert_case().

Referenced by str_initcap().

◆ unicode_strupper()

size_t unicode_strupper ( char *  dst,
size_t  dstsize,
const char *  src,
ssize_t  srclen 
)

Definition at line 124 of file unicode_case.c.

125 {
126  return convert_case(dst, dstsize, src, srclen, CaseUpper, NULL, NULL);
127 }

References CaseUpper, and convert_case().

Referenced by str_toupper().

◆ unicode_titlecase_simple()

pg_wchar unicode_titlecase_simple ( pg_wchar  code)

Definition at line 37 of file unicode_case.c.

38 {
39  const pg_case_map *map = find_case_map(code);
40 
41  return map ? map->simplemap[CaseTitle] : code;
42 }

References CaseTitle, find_case_map(), and pg_case_map::simplemap.

◆ unicode_uppercase_simple()

pg_wchar unicode_uppercase_simple ( pg_wchar  code)

Definition at line 45 of file unicode_case.c.

46 {
47  const pg_case_map *map = find_case_map(code);
48 
49  return map ? map->simplemap[CaseUpper] : code;
50 }

References CaseUpper, find_case_map(), and pg_case_map::simplemap.

Referenced by pg_wc_toupper().