PostgreSQL Source Code  git master
All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
unicode_case.c File Reference
#include "postgres.h"
#include "common/unicode_case.h"
#include "common/unicode_case_table.h"
#include "mb/pg_wchar.h"
Include dependency graph for unicode_case.c:

Go to the source code of this file.

Functions

static const pg_case_mapfind_case_map (pg_wchar ucs)
 
static size_t convert_case (char *dst, size_t dstsize, const char *src, ssize_t srclen, CaseKind str_casekind, WordBoundaryNext wbnext, void *wbstate)
 
pg_wchar unicode_lowercase_simple (pg_wchar code)
 
pg_wchar unicode_titlecase_simple (pg_wchar code)
 
pg_wchar unicode_uppercase_simple (pg_wchar code)
 
size_t unicode_strlower (char *dst, size_t dstsize, const char *src, ssize_t srclen)
 
size_t unicode_strtitle (char *dst, size_t dstsize, const char *src, ssize_t srclen, WordBoundaryNext wbnext, void *wbstate)
 
size_t unicode_strupper (char *dst, size_t dstsize, const char *src, ssize_t srclen)
 

Function Documentation

◆ convert_case()

static size_t convert_case ( char *  dst,
size_t  dstsize,
const char *  src,
ssize_t  srclen,
CaseKind  str_casekind,
WordBoundaryNext  wbnext,
void *  wbstate 
)
static

Definition at line 136 of file unicode_case.c.

138 {
139  /* character CaseKind varies while titlecasing */
140  CaseKind chr_casekind = str_casekind;
141  size_t srcoff = 0;
142  size_t result_len = 0;
143  size_t boundary = 0;
144 
145  Assert((str_casekind == CaseTitle && wbnext && wbstate) ||
146  (str_casekind != CaseTitle && !wbnext && !wbstate));
147 
148  if (str_casekind == CaseTitle)
149  {
150  boundary = wbnext(wbstate);
151  Assert(boundary == 0); /* start of text is always a boundary */
152  }
153 
154  while ((srclen < 0 || srcoff < srclen) && src[srcoff] != '\0')
155  {
156  pg_wchar u1 = utf8_to_unicode((unsigned char *) src + srcoff);
157  int u1len = unicode_utf8len(u1);
158  const pg_case_map *casemap = find_case_map(u1);
159 
160  if (str_casekind == CaseTitle)
161  {
162  if (srcoff == boundary)
163  {
164  chr_casekind = CaseUpper;
165  boundary = wbnext(wbstate);
166  }
167  else
168  chr_casekind = CaseLower;
169  }
170 
171  /* perform mapping, update result_len, and write to dst */
172  if (casemap)
173  {
174  pg_wchar u2 = casemap->simplemap[chr_casekind];
175  pg_wchar u2len = unicode_utf8len(u2);
176 
177  if (result_len + u2len <= dstsize)
178  unicode_to_utf8(u2, (unsigned char *) dst + result_len);
179 
180  result_len += u2len;
181  }
182  else
183  {
184  /* no mapping; copy bytes from src */
185  if (result_len + u1len <= dstsize)
186  memcpy(dst + result_len, src + srcoff, u1len);
187 
188  result_len += u1len;
189  }
190 
191  srcoff += u1len;
192  }
193 
194  if (result_len < dstsize)
195  dst[result_len] = '\0';
196 
197  return result_len;
198 }
#define Assert(condition)
Definition: c.h:861
static pg_wchar utf8_to_unicode(const unsigned char *c)
Definition: mbprint.c:53
unsigned int pg_wchar
Definition: mbprint.c:31
static unsigned char * unicode_to_utf8(pg_wchar c, unsigned char *utf8string)
Definition: pg_wchar.h:575
static int unicode_utf8len(pg_wchar c)
Definition: pg_wchar.h:607
pg_wchar simplemap[NCaseKind]
static const pg_case_map * find_case_map(pg_wchar ucs)
Definition: unicode_case.c:202
@ CaseTitle
@ CaseLower
@ CaseUpper

References Assert, CaseLower, CaseTitle, CaseUpper, find_case_map(), pg_case_map::simplemap, unicode_to_utf8(), unicode_utf8len(), and utf8_to_unicode().

Referenced by unicode_strlower(), unicode_strtitle(), and unicode_strupper().

◆ find_case_map()

static const pg_case_map * find_case_map ( pg_wchar  ucs)
static

Definition at line 202 of file unicode_case.c.

203 {
204  int min;
205  int mid;
206  int max;
207 
208  /* all chars <= 0x80 are stored in array for fast lookup */
209  Assert(lengthof(case_map) >= 0x80);
210  if (ucs < 0x80)
211  {
212  const pg_case_map *map = &case_map[ucs];
213 
214  Assert(map->codepoint == ucs);
215  return map;
216  }
217 
218  /* otherwise, binary search */
219  min = 0x80;
220  max = lengthof(case_map) - 1;
221  while (max >= min)
222  {
223  mid = (min + max) / 2;
224  if (ucs > case_map[mid].codepoint)
225  min = mid + 1;
226  else if (ucs < case_map[mid].codepoint)
227  max = mid - 1;
228  else
229  return &case_map[mid];
230  }
231 
232  return NULL;
233 }
#define lengthof(array)
Definition: c.h:791
pg_wchar codepoint
static const pg_case_map case_map[2955]

References Assert, case_map, pg_case_map::codepoint, and lengthof.

Referenced by convert_case(), unicode_lowercase_simple(), unicode_titlecase_simple(), and unicode_uppercase_simple().

◆ unicode_lowercase_simple()

pg_wchar unicode_lowercase_simple ( pg_wchar  code)

Definition at line 28 of file unicode_case.c.

29 {
30  const pg_case_map *map = find_case_map(code);
31 
32  return map ? map->simplemap[CaseLower] : code;
33 }

References CaseLower, find_case_map(), and pg_case_map::simplemap.

Referenced by pg_wc_tolower().

◆ unicode_strlower()

size_t unicode_strlower ( char *  dst,
size_t  dstsize,
const char *  src,
ssize_t  srclen 
)

Definition at line 68 of file unicode_case.c.

69 {
70  return convert_case(dst, dstsize, src, srclen, CaseLower, NULL, NULL);
71 }
static size_t convert_case(char *dst, size_t dstsize, const char *src, ssize_t srclen, CaseKind str_casekind, WordBoundaryNext wbnext, void *wbstate)
Definition: unicode_case.c:136

References CaseLower, and convert_case().

Referenced by str_tolower(), and test_strlower().

◆ unicode_strtitle()

size_t unicode_strtitle ( char *  dst,
size_t  dstsize,
const char *  src,
ssize_t  srclen,
WordBoundaryNext  wbnext,
void *  wbstate 
)

Definition at line 99 of file unicode_case.c.

101 {
102  return convert_case(dst, dstsize, src, srclen, CaseTitle, wbnext,
103  wbstate);
104 }

References CaseTitle, and convert_case().

Referenced by str_initcap().

◆ unicode_strupper()

size_t unicode_strupper ( char *  dst,
size_t  dstsize,
const char *  src,
ssize_t  srclen 
)

Definition at line 123 of file unicode_case.c.

124 {
125  return convert_case(dst, dstsize, src, srclen, CaseUpper, NULL, NULL);
126 }

References CaseUpper, and convert_case().

Referenced by str_toupper().

◆ unicode_titlecase_simple()

pg_wchar unicode_titlecase_simple ( pg_wchar  code)

Definition at line 36 of file unicode_case.c.

37 {
38  const pg_case_map *map = find_case_map(code);
39 
40  return map ? map->simplemap[CaseTitle] : code;
41 }

References CaseTitle, find_case_map(), and pg_case_map::simplemap.

◆ unicode_uppercase_simple()

pg_wchar unicode_uppercase_simple ( pg_wchar  code)

Definition at line 44 of file unicode_case.c.

45 {
46  const pg_case_map *map = find_case_map(code);
47 
48  return map ? map->simplemap[CaseUpper] : code;
49 }

References CaseUpper, find_case_map(), and pg_case_map::simplemap.

Referenced by pg_wc_toupper().