PostgreSQL Source Code  git master
pg_locale_libc.c File Reference
#include "postgres.h"
#include "catalog/pg_collation.h"
#include "mb/pg_wchar.h"
#include "utils/formatting.h"
#include "utils/pg_locale.h"
Include dependency graph for pg_locale_libc.c:

Go to the source code of this file.

Macros

#define TEXTBUFLEN   1024
 

Functions

locale_t make_libc_collator (const char *collate, const char *ctype)
 
int strncoll_libc (const char *arg1, ssize_t len1, const char *arg2, ssize_t len2, pg_locale_t locale)
 
size_t strnxfrm_libc (char *dest, size_t destsize, const char *src, ssize_t srclen, pg_locale_t locale)
 
static void report_newlocale_failure (const char *localename)
 
static size_t mbstowcs_l (wchar_t *dest, const char *src, size_t n, locale_t loc)
 
static size_t wcstombs_l (char *dest, const wchar_t *src, size_t n, locale_t loc)
 
size_t wchar2char (char *to, const wchar_t *from, size_t tolen, pg_locale_t locale)
 
size_t char2wchar (wchar_t *to, size_t tolen, const char *from, size_t fromlen, pg_locale_t locale)
 

Macro Definition Documentation

◆ TEXTBUFLEN

#define TEXTBUFLEN   1024

Definition at line 25 of file pg_locale_libc.c.

Function Documentation

◆ char2wchar()

size_t char2wchar ( wchar_t *  to,
size_t  tolen,
const char *  from,
size_t  fromlen,
pg_locale_t  locale 
)

Definition at line 433 of file pg_locale_libc.c.

435 {
436  size_t result;
437 
438  if (tolen == 0)
439  return 0;
440 
441 #ifdef WIN32
442  /* See WIN32 "Unicode" comment above */
443  if (GetDatabaseEncoding() == PG_UTF8)
444  {
445  /* Win32 API does not work for zero-length input */
446  if (fromlen == 0)
447  result = 0;
448  else
449  {
450  result = MultiByteToWideChar(CP_UTF8, 0, from, fromlen, to, tolen - 1);
451  /* A zero return is failure */
452  if (result == 0)
453  result = -1;
454  }
455 
456  if (result != -1)
457  {
458  Assert(result < tolen);
459  /* Append trailing null wchar (MultiByteToWideChar() does not) */
460  to[result] = 0;
461  }
462  }
463  else
464 #endif /* WIN32 */
465  {
466  /* mbstowcs requires ending '\0' */
467  char *str = pnstrdup(from, fromlen);
468 
469  if (locale == (pg_locale_t) 0)
470  {
471  /* Use mbstowcs directly for the default locale */
472  result = mbstowcs(to, str, tolen);
473  }
474  else
475  {
476  /* Use mbstowcs_l for nondefault locales */
477  result = mbstowcs_l(to, str, tolen, locale->info.lt);
478  }
479 
480  pfree(str);
481  }
482 
483  if (result == -1)
484  {
485  /*
486  * Invalid multibyte character encountered. We try to give a useful
487  * error message by letting pg_verifymbstr check the string. But it's
488  * possible that the string is OK to us, and not OK to mbstowcs ---
489  * this suggests that the LC_CTYPE locale is different from the
490  * database encoding. Give a generic error message if pg_verifymbstr
491  * can't find anything wrong.
492  */
493  pg_verifymbstr(from, fromlen, false); /* might not return */
494  /* but if it does ... */
495  ereport(ERROR,
496  (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
497  errmsg("invalid multibyte character for locale"),
498  errhint("The server's LC_CTYPE locale is probably incompatible with the database encoding.")));
499  }
500 
501  return result;
502 }
#define Assert(condition)
Definition: c.h:849
int errhint(const char *fmt,...)
Definition: elog.c:1317
int errcode(int sqlerrcode)
Definition: elog.c:853
int errmsg(const char *fmt,...)
Definition: elog.c:1070
#define ERROR
Definition: elog.h:39
#define ereport(elevel,...)
Definition: elog.h:149
const char * str
static char * locale
Definition: initdb.c:140
int GetDatabaseEncoding(void)
Definition: mbutils.c:1261
bool pg_verifymbstr(const char *mbstr, int len, bool noError)
Definition: mbutils.c:1556
char * pnstrdup(const char *in, Size len)
Definition: mcxt.c:1707
void pfree(void *pointer)
Definition: mcxt.c:1521
static size_t mbstowcs_l(wchar_t *dest, const char *src, size_t n, locale_t loc)
@ PG_UTF8
Definition: pg_wchar.h:232

References Assert, ereport, errcode(), errhint(), errmsg(), ERROR, GetDatabaseEncoding(), locale, mbstowcs_l(), pfree(), PG_UTF8, pg_verifymbstr(), pnstrdup(), and str.

Referenced by lowerstr_with_len(), str_initcap(), str_tolower(), str_toupper(), t_isalnum(), t_isalpha(), t_isdigit(), t_isprint(), t_isspace(), and TParserInit().

◆ make_libc_collator()

locale_t make_libc_collator ( const char *  collate,
const char *  ctype 
)

Definition at line 53 of file pg_locale_libc.c.

54 {
55  locale_t loc = 0;
56 
57  if (strcmp(collate, ctype) == 0)
58  {
59  if (strcmp(ctype, "C") != 0 && strcmp(ctype, "POSIX") != 0)
60  {
61  /* Normal case where they're the same */
62  errno = 0;
63 #ifndef WIN32
64  loc = newlocale(LC_COLLATE_MASK | LC_CTYPE_MASK, collate,
65  NULL);
66 #else
67  loc = _create_locale(LC_ALL, collate);
68 #endif
69  if (!loc)
70  report_newlocale_failure(collate);
71  }
72  }
73  else
74  {
75 #ifndef WIN32
76  /* We need two newlocale() steps */
77  locale_t loc1 = 0;
78 
79  if (strcmp(collate, "C") != 0 && strcmp(collate, "POSIX") != 0)
80  {
81  errno = 0;
82  loc1 = newlocale(LC_COLLATE_MASK, collate, NULL);
83  if (!loc1)
84  report_newlocale_failure(collate);
85  }
86 
87  if (strcmp(ctype, "C") != 0 && strcmp(ctype, "POSIX") != 0)
88  {
89  errno = 0;
90  loc = newlocale(LC_CTYPE_MASK, ctype, loc1);
91  if (!loc)
92  {
93  if (loc1)
94  freelocale(loc1);
96  }
97  }
98  else
99  loc = loc1;
100 #else
101 
102  /*
103  * XXX The _create_locale() API doesn't appear to support this. Could
104  * perhaps be worked around by changing pg_locale_t to contain two
105  * separate fields.
106  */
107  ereport(ERROR,
108  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
109  errmsg("collations with different collate and ctype values are not supported on this platform")));
110 #endif
111  }
112 
113  return loc;
114 }
static void report_newlocale_failure(const char *localename)
#define locale_t
Definition: win32_port.h:442

References ereport, errcode(), errmsg(), ERROR, locale_t, and report_newlocale_failure().

Referenced by init_database_collation(), and pg_newlocale_from_collation().

◆ mbstowcs_l()

static size_t mbstowcs_l ( wchar_t *  dest,
const char *  src,
size_t  n,
locale_t  loc 
)
static

Definition at line 335 of file pg_locale_libc.c.

336 {
337 #ifdef WIN32
338  return _mbstowcs_l(dest, src, n, loc);
339 #else
340  size_t result;
341  locale_t save_locale = uselocale(loc);
342 
343  result = mbstowcs(dest, src, n);
344  uselocale(save_locale);
345  return result;
346 #endif
347 }

References generate_unaccent_rules::dest, and locale_t.

Referenced by char2wchar().

◆ report_newlocale_failure()

static void report_newlocale_failure ( const char *  localename)
static

Definition at line 301 of file pg_locale_libc.c.

302 {
303  int save_errno;
304 
305  /*
306  * Windows doesn't provide any useful error indication from
307  * _create_locale(), and BSD-derived platforms don't seem to feel they
308  * need to set errno either (even though POSIX is pretty clear that
309  * newlocale should do so). So, if errno hasn't been set, assume ENOENT
310  * is what to report.
311  */
312  if (errno == 0)
313  errno = ENOENT;
314 
315  /*
316  * ENOENT means "no such locale", not "no such file", so clarify that
317  * errno with an errdetail message.
318  */
319  save_errno = errno; /* auxiliary funcs might change errno */
320  ereport(ERROR,
321  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
322  errmsg("could not create locale \"%s\": %m",
323  localename),
324  (save_errno == ENOENT ?
325  errdetail("The operating system could not find any locale data for the locale name \"%s\".",
326  localename) : 0)));
327 }
int errdetail(const char *fmt,...)
Definition: elog.c:1203

References ereport, errcode(), errdetail(), errmsg(), and ERROR.

Referenced by make_libc_collator().

◆ strncoll_libc()

int strncoll_libc ( const char *  arg1,
ssize_t  len1,
const char *  arg2,
ssize_t  len2,
pg_locale_t  locale 
)

Definition at line 124 of file pg_locale_libc.c.

126 {
127  char sbuf[TEXTBUFLEN];
128  char *buf = sbuf;
129  size_t bufsize1 = (len1 == -1) ? 0 : len1 + 1;
130  size_t bufsize2 = (len2 == -1) ? 0 : len2 + 1;
131  const char *arg1n;
132  const char *arg2n;
133  int result;
134 
135  Assert(locale->provider == COLLPROVIDER_LIBC);
136 
137 #ifdef WIN32
138  /* check for this case before doing the work for nul-termination */
139  if (GetDatabaseEncoding() == PG_UTF8)
140  return strncoll_libc_win32_utf8(arg1, len1, arg2, len2, locale);
141 #endif /* WIN32 */
142 
143  if (bufsize1 + bufsize2 > TEXTBUFLEN)
144  buf = palloc(bufsize1 + bufsize2);
145 
146  /* nul-terminate arguments if necessary */
147  if (len1 == -1)
148  {
149  arg1n = arg1;
150  }
151  else
152  {
153  char *buf1 = buf;
154 
155  memcpy(buf1, arg1, len1);
156  buf1[len1] = '\0';
157  arg1n = buf1;
158  }
159 
160  if (len2 == -1)
161  {
162  arg2n = arg2;
163  }
164  else
165  {
166  char *buf2 = buf + bufsize1;
167 
168  memcpy(buf2, arg2, len2);
169  buf2[len2] = '\0';
170  arg2n = buf2;
171  }
172 
173  result = strcoll_l(arg1n, arg2n, locale->info.lt);
174 
175  if (buf != sbuf)
176  pfree(buf);
177 
178  return result;
179 }
void * palloc(Size size)
Definition: mcxt.c:1317
#define TEXTBUFLEN
static char * buf
Definition: pg_test_fsync.c:73
#define strcoll_l
Definition: win32_port.h:465

References Assert, buf, GetDatabaseEncoding(), locale, palloc(), pfree(), PG_UTF8, strcoll_l, and TEXTBUFLEN.

Referenced by pg_strcoll(), and pg_strncoll().

◆ strnxfrm_libc()

size_t strnxfrm_libc ( char *  dest,
size_t  destsize,
const char *  src,
ssize_t  srclen,
pg_locale_t  locale 
)

Definition at line 189 of file pg_locale_libc.c.

191 {
192  char sbuf[TEXTBUFLEN];
193  char *buf = sbuf;
194  size_t bufsize = srclen + 1;
195  size_t result;
196 
197  Assert(locale->provider == COLLPROVIDER_LIBC);
198 
199  if (srclen == -1)
200  return strxfrm_l(dest, src, destsize, locale->info.lt);
201 
202  if (bufsize > TEXTBUFLEN)
203  buf = palloc(bufsize);
204 
205  /* nul-terminate argument */
206  memcpy(buf, src, srclen);
207  buf[srclen] = '\0';
208 
209  result = strxfrm_l(dest, buf, destsize, locale->info.lt);
210 
211  if (buf != sbuf)
212  pfree(buf);
213 
214  /* if dest is defined, it should be nul-terminated */
215  Assert(result >= destsize || dest[result] == '\0');
216 
217  return result;
218 }
#define bufsize
Definition: indent_globs.h:36
#define strxfrm_l
Definition: win32_port.h:466

References Assert, buf, bufsize, generate_unaccent_rules::dest, locale, palloc(), pfree(), strxfrm_l, and TEXTBUFLEN.

Referenced by pg_strnxfrm(), and pg_strxfrm().

◆ wchar2char()

size_t wchar2char ( char *  to,
const wchar_t *  from,
size_t  tolen,
pg_locale_t  locale 
)

Definition at line 379 of file pg_locale_libc.c.

380 {
381  size_t result;
382 
383  if (tolen == 0)
384  return 0;
385 
386 #ifdef WIN32
387 
388  /*
389  * On Windows, the "Unicode" locales assume UTF16 not UTF8 encoding, and
390  * for some reason mbstowcs and wcstombs won't do this for us, so we use
391  * MultiByteToWideChar().
392  */
393  if (GetDatabaseEncoding() == PG_UTF8)
394  {
395  result = WideCharToMultiByte(CP_UTF8, 0, from, -1, to, tolen,
396  NULL, NULL);
397  /* A zero return is failure */
398  if (result <= 0)
399  result = -1;
400  else
401  {
402  Assert(result <= tolen);
403  /* Microsoft counts the zero terminator in the result */
404  result--;
405  }
406  }
407  else
408 #endif /* WIN32 */
409  if (locale == (pg_locale_t) 0)
410  {
411  /* Use wcstombs directly for the default locale */
412  result = wcstombs(to, from, tolen);
413  }
414  else
415  {
416  /* Use wcstombs_l for nondefault locales */
417  result = wcstombs_l(to, from, tolen, locale->info.lt);
418  }
419 
420  return result;
421 }
static size_t wcstombs_l(char *dest, const wchar_t *src, size_t n, locale_t loc)

References Assert, GetDatabaseEncoding(), locale, PG_UTF8, and wcstombs_l().

Referenced by lowerstr_with_len(), str_initcap(), str_tolower(), and str_toupper().

◆ wcstombs_l()

static size_t wcstombs_l ( char *  dest,
const wchar_t *  src,
size_t  n,
locale_t  loc 
)
static

Definition at line 351 of file pg_locale_libc.c.

352 {
353 #ifdef WIN32
354  return _wcstombs_l(dest, src, n, loc);
355 #else
356  size_t result;
357  locale_t save_locale = uselocale(loc);
358 
359  result = wcstombs(dest, src, n);
360  uselocale(save_locale);
361  return result;
362 #endif
363 }

References generate_unaccent_rules::dest, and locale_t.

Referenced by wchar2char().