PostgreSQL Source Code git master
All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
pg_locale_libc.c
Go to the documentation of this file.
1/*-----------------------------------------------------------------------
2 *
3 * PostgreSQL locale utilities for libc
4 *
5 * Portions Copyright (c) 2002-2025, PostgreSQL Global Development Group
6 *
7 * src/backend/utils/adt/pg_locale_libc.c
8 *
9 *-----------------------------------------------------------------------
10 */
11
12#include "postgres.h"
13
14#include <limits.h>
15#include <wctype.h>
16
17#include "access/htup_details.h"
18#include "catalog/pg_database.h"
20#include "mb/pg_wchar.h"
21#include "miscadmin.h"
22#include "utils/builtins.h"
23#include "utils/formatting.h"
24#include "utils/memutils.h"
25#include "utils/pg_locale.h"
26#include "utils/syscache.h"
27
28#ifdef __GLIBC__
29#include <gnu/libc-version.h>
30#endif
31
32#ifdef WIN32
33#include <shlwapi.h>
34#endif
35
36/*
37 * Size of stack buffer to use for string transformations, used to avoid heap
38 * allocations in typical cases. This should be large enough that most strings
39 * will fit, but small enough that we feel comfortable putting it on the
40 * stack.
41 */
42#define TEXTBUFLEN 1024
43
45
46extern size_t strlower_libc(char *dst, size_t dstsize, const char *src,
47 ssize_t srclen, pg_locale_t locale);
48extern size_t strtitle_libc(char *dst, size_t dstsize, const char *src,
49 ssize_t srclen, pg_locale_t locale);
50extern size_t strupper_libc(char *dst, size_t dstsize, const char *src,
51 ssize_t srclen, pg_locale_t locale);
52
53static int strncoll_libc(const char *arg1, ssize_t len1,
54 const char *arg2, ssize_t len2,
56static size_t strnxfrm_libc(char *dest, size_t destsize,
57 const char *src, ssize_t srclen,
59extern char *get_collation_actual_version_libc(const char *collcollate);
60static locale_t make_libc_collator(const char *collate,
61 const char *ctype);
62static void report_newlocale_failure(const char *localename);
63
64#ifdef WIN32
65static int strncoll_libc_win32_utf8(const char *arg1, ssize_t len1,
66 const char *arg2, ssize_t len2,
68#endif
69
70static size_t strlower_libc_sb(char *dest, size_t destsize,
71 const char *src, ssize_t srclen,
73static size_t strlower_libc_mb(char *dest, size_t destsize,
74 const char *src, ssize_t srclen,
76static size_t strtitle_libc_sb(char *dest, size_t destsize,
77 const char *src, ssize_t srclen,
79static size_t strtitle_libc_mb(char *dest, size_t destsize,
80 const char *src, ssize_t srclen,
82static size_t strupper_libc_sb(char *dest, size_t destsize,
83 const char *src, ssize_t srclen,
85static size_t strupper_libc_mb(char *dest, size_t destsize,
86 const char *src, ssize_t srclen,
88
91 .strnxfrm = strnxfrm_libc,
92 .strnxfrm_prefix = NULL,
93
94 /*
95 * Unfortunately, it seems that strxfrm() for non-C collations is broken
96 * on many common platforms; testing of multiple versions of glibc reveals
97 * that, for many locales, strcoll() and strxfrm() do not return
98 * consistent results. While no other libc other than Cygwin has so far
99 * been shown to have a problem, we take the conservative course of action
100 * for right now and disable this categorically. (Users who are certain
101 * this isn't a problem on their system can define TRUST_STRXFRM.)
102 */
103#ifdef TRUST_STRXFRM
104 .strxfrm_is_safe = true,
105#else
106 .strxfrm_is_safe = false,
107#endif
108};
109
110#ifdef WIN32
111static const struct collate_methods collate_methods_libc_win32_utf8 = {
112 .strncoll = strncoll_libc_win32_utf8,
113 .strnxfrm = strnxfrm_libc,
114 .strnxfrm_prefix = NULL,
115#ifdef TRUST_STRXFRM
116 .strxfrm_is_safe = true,
117#else
118 .strxfrm_is_safe = false,
119#endif
120};
121#endif
122
123size_t
124strlower_libc(char *dst, size_t dstsize, const char *src,
125 ssize_t srclen, pg_locale_t locale)
126{
128 return strlower_libc_mb(dst, dstsize, src, srclen, locale);
129 else
130 return strlower_libc_sb(dst, dstsize, src, srclen, locale);
131}
132
133size_t
134strtitle_libc(char *dst, size_t dstsize, const char *src,
135 ssize_t srclen, pg_locale_t locale)
136{
138 return strtitle_libc_mb(dst, dstsize, src, srclen, locale);
139 else
140 return strtitle_libc_sb(dst, dstsize, src, srclen, locale);
141}
142
143size_t
144strupper_libc(char *dst, size_t dstsize, const char *src,
145 ssize_t srclen, pg_locale_t locale)
146{
148 return strupper_libc_mb(dst, dstsize, src, srclen, locale);
149 else
150 return strupper_libc_sb(dst, dstsize, src, srclen, locale);
151}
152
153static size_t
154strlower_libc_sb(char *dest, size_t destsize, const char *src, ssize_t srclen,
156{
157 if (srclen < 0)
158 srclen = strlen(src);
159
160 if (srclen + 1 <= destsize)
161 {
162 locale_t loc = locale->info.lt;
163 char *p;
164
165 if (srclen + 1 > destsize)
166 return srclen;
167
168 memcpy(dest, src, srclen);
169 dest[srclen] = '\0';
170
171 /*
172 * Note: we assume that tolower_l() will not be so broken as to need
173 * an isupper_l() guard test. When using the default collation, we
174 * apply the traditional Postgres behavior that forces ASCII-style
175 * treatment of I/i, but in non-default collations you get exactly
176 * what the collation says.
177 */
178 for (p = dest; *p; p++)
179 {
180 if (locale->is_default)
181 *p = pg_tolower((unsigned char) *p);
182 else
183 *p = tolower_l((unsigned char) *p, loc);
184 }
185 }
186
187 return srclen;
188}
189
190static size_t
191strlower_libc_mb(char *dest, size_t destsize, const char *src, ssize_t srclen,
193{
194 locale_t loc = locale->info.lt;
195 size_t result_size;
196 wchar_t *workspace;
197 char *result;
198 size_t curr_char;
199 size_t max_size;
200
201 if (srclen < 0)
202 srclen = strlen(src);
203
204 /* Overflow paranoia */
205 if ((srclen + 1) > (INT_MAX / sizeof(wchar_t)))
207 (errcode(ERRCODE_OUT_OF_MEMORY),
208 errmsg("out of memory")));
209
210 /* Output workspace cannot have more codes than input bytes */
211 workspace = (wchar_t *) palloc((srclen + 1) * sizeof(wchar_t));
212
213 char2wchar(workspace, srclen + 1, src, srclen, locale);
214
215 for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
216 workspace[curr_char] = towlower_l(workspace[curr_char], loc);
217
218 /*
219 * Make result large enough; case change might change number of bytes
220 */
221 max_size = curr_char * pg_database_encoding_max_length();
222 result = palloc(max_size + 1);
223
224 result_size = wchar2char(result, workspace, max_size + 1, locale);
225
226 if (result_size + 1 > destsize)
227 return result_size;
228
229 memcpy(dest, result, result_size);
230 dest[result_size] = '\0';
231
232 pfree(workspace);
233 pfree(result);
234
235 return result_size;
236}
237
238static size_t
239strtitle_libc_sb(char *dest, size_t destsize, const char *src, ssize_t srclen,
241{
242 if (srclen < 0)
243 srclen = strlen(src);
244
245 if (srclen + 1 <= destsize)
246 {
247 locale_t loc = locale->info.lt;
248 int wasalnum = false;
249 char *p;
250
251 memcpy(dest, src, srclen);
252 dest[srclen] = '\0';
253
254 /*
255 * Note: we assume that toupper_l()/tolower_l() will not be so broken
256 * as to need guard tests. When using the default collation, we apply
257 * the traditional Postgres behavior that forces ASCII-style treatment
258 * of I/i, but in non-default collations you get exactly what the
259 * collation says.
260 */
261 for (p = dest; *p; p++)
262 {
263 if (locale->is_default)
264 {
265 if (wasalnum)
266 *p = pg_tolower((unsigned char) *p);
267 else
268 *p = pg_toupper((unsigned char) *p);
269 }
270 else
271 {
272 if (wasalnum)
273 *p = tolower_l((unsigned char) *p, loc);
274 else
275 *p = toupper_l((unsigned char) *p, loc);
276 }
277 wasalnum = isalnum_l((unsigned char) *p, loc);
278 }
279 }
280
281 return srclen;
282}
283
284static size_t
285strtitle_libc_mb(char *dest, size_t destsize, const char *src, ssize_t srclen,
287{
288 locale_t loc = locale->info.lt;
289 int wasalnum = false;
290 size_t result_size;
291 wchar_t *workspace;
292 char *result;
293 size_t curr_char;
294 size_t max_size;
295
296 if (srclen < 0)
297 srclen = strlen(src);
298
299 /* Overflow paranoia */
300 if ((srclen + 1) > (INT_MAX / sizeof(wchar_t)))
302 (errcode(ERRCODE_OUT_OF_MEMORY),
303 errmsg("out of memory")));
304
305 /* Output workspace cannot have more codes than input bytes */
306 workspace = (wchar_t *) palloc((srclen + 1) * sizeof(wchar_t));
307
308 char2wchar(workspace, srclen + 1, src, srclen, locale);
309
310 for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
311 {
312 if (wasalnum)
313 workspace[curr_char] = towlower_l(workspace[curr_char], loc);
314 else
315 workspace[curr_char] = towupper_l(workspace[curr_char], loc);
316 wasalnum = iswalnum_l(workspace[curr_char], loc);
317 }
318
319 /*
320 * Make result large enough; case change might change number of bytes
321 */
322 max_size = curr_char * pg_database_encoding_max_length();
323 result = palloc(max_size + 1);
324
325 result_size = wchar2char(result, workspace, max_size + 1, locale);
326
327 if (result_size + 1 > destsize)
328 return result_size;
329
330 memcpy(dest, result, result_size);
331 dest[result_size] = '\0';
332
333 pfree(workspace);
334 pfree(result);
335
336 return result_size;
337}
338
339static size_t
340strupper_libc_sb(char *dest, size_t destsize, const char *src, ssize_t srclen,
342{
343 if (srclen < 0)
344 srclen = strlen(src);
345
346 if (srclen + 1 <= destsize)
347 {
348 locale_t loc = locale->info.lt;
349 char *p;
350
351 memcpy(dest, src, srclen);
352 dest[srclen] = '\0';
353
354 /*
355 * Note: we assume that toupper_l() will not be so broken as to need
356 * an islower_l() guard test. When using the default collation, we
357 * apply the traditional Postgres behavior that forces ASCII-style
358 * treatment of I/i, but in non-default collations you get exactly
359 * what the collation says.
360 */
361 for (p = dest; *p; p++)
362 {
363 if (locale->is_default)
364 *p = pg_toupper((unsigned char) *p);
365 else
366 *p = toupper_l((unsigned char) *p, loc);
367 }
368 }
369
370 return srclen;
371}
372
373static size_t
374strupper_libc_mb(char *dest, size_t destsize, const char *src, ssize_t srclen,
376{
377 locale_t loc = locale->info.lt;
378 size_t result_size;
379 wchar_t *workspace;
380 char *result;
381 size_t curr_char;
382 size_t max_size;
383
384 if (srclen < 0)
385 srclen = strlen(src);
386
387 /* Overflow paranoia */
388 if ((srclen + 1) > (INT_MAX / sizeof(wchar_t)))
390 (errcode(ERRCODE_OUT_OF_MEMORY),
391 errmsg("out of memory")));
392
393 /* Output workspace cannot have more codes than input bytes */
394 workspace = (wchar_t *) palloc((srclen + 1) * sizeof(wchar_t));
395
396 char2wchar(workspace, srclen + 1, src, srclen, locale);
397
398 for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
399 workspace[curr_char] = towupper_l(workspace[curr_char], loc);
400
401 /*
402 * Make result large enough; case change might change number of bytes
403 */
404 max_size = curr_char * pg_database_encoding_max_length();
405 result = palloc(max_size + 1);
406
407 result_size = wchar2char(result, workspace, max_size + 1, locale);
408
409 if (result_size + 1 > destsize)
410 return result_size;
411
412 memcpy(dest, result, result_size);
413 dest[result_size] = '\0';
414
415 pfree(workspace);
416 pfree(result);
417
418 return result_size;
419}
420
423{
424 const char *collate;
425 const char *ctype;
426 locale_t loc;
427 pg_locale_t result;
428
429 if (collid == DEFAULT_COLLATION_OID)
430 {
431 HeapTuple tp;
432 Datum datum;
433
435 if (!HeapTupleIsValid(tp))
436 elog(ERROR, "cache lookup failed for database %u", MyDatabaseId);
437 datum = SysCacheGetAttrNotNull(DATABASEOID, tp,
438 Anum_pg_database_datcollate);
439 collate = TextDatumGetCString(datum);
440 datum = SysCacheGetAttrNotNull(DATABASEOID, tp,
441 Anum_pg_database_datctype);
442 ctype = TextDatumGetCString(datum);
443
444 ReleaseSysCache(tp);
445 }
446 else
447 {
448 HeapTuple tp;
449 Datum datum;
450
452 if (!HeapTupleIsValid(tp))
453 elog(ERROR, "cache lookup failed for collation %u", collid);
454
455 datum = SysCacheGetAttrNotNull(COLLOID, tp,
456 Anum_pg_collation_collcollate);
457 collate = TextDatumGetCString(datum);
458 datum = SysCacheGetAttrNotNull(COLLOID, tp,
459 Anum_pg_collation_collctype);
460 ctype = TextDatumGetCString(datum);
461
462 ReleaseSysCache(tp);
463 }
464
465
466 loc = make_libc_collator(collate, ctype);
467
468 result = MemoryContextAllocZero(context, sizeof(struct pg_locale_struct));
469 result->provider = COLLPROVIDER_LIBC;
470 result->deterministic = true;
471 result->collate_is_c = (strcmp(collate, "C") == 0) ||
472 (strcmp(collate, "POSIX") == 0);
473 result->ctype_is_c = (strcmp(ctype, "C") == 0) ||
474 (strcmp(ctype, "POSIX") == 0);
475 result->info.lt = loc;
476 if (!result->collate_is_c)
477 {
478#ifdef WIN32
480 result->collate = &collate_methods_libc_win32_utf8;
481 else
482#endif
483 result->collate = &collate_methods_libc;
484 }
485
486 return result;
487}
488
489/*
490 * Create a locale_t with the given collation and ctype.
491 *
492 * The "C" and "POSIX" locales are not actually handled by libc, so return
493 * NULL.
494 *
495 * Ensure that no path leaks a locale_t.
496 */
497static locale_t
498make_libc_collator(const char *collate, const char *ctype)
499{
500 locale_t loc = 0;
501
502 if (strcmp(collate, ctype) == 0)
503 {
504 if (strcmp(ctype, "C") != 0 && strcmp(ctype, "POSIX") != 0)
505 {
506 /* Normal case where they're the same */
507 errno = 0;
508#ifndef WIN32
509 loc = newlocale(LC_COLLATE_MASK | LC_CTYPE_MASK, collate,
510 NULL);
511#else
512 loc = _create_locale(LC_ALL, collate);
513#endif
514 if (!loc)
516 }
517 }
518 else
519 {
520#ifndef WIN32
521 /* We need two newlocale() steps */
522 locale_t loc1 = 0;
523
524 if (strcmp(collate, "C") != 0 && strcmp(collate, "POSIX") != 0)
525 {
526 errno = 0;
527 loc1 = newlocale(LC_COLLATE_MASK, collate, NULL);
528 if (!loc1)
530 }
531
532 if (strcmp(ctype, "C") != 0 && strcmp(ctype, "POSIX") != 0)
533 {
534 errno = 0;
535 loc = newlocale(LC_CTYPE_MASK, ctype, loc1);
536 if (!loc)
537 {
538 if (loc1)
539 freelocale(loc1);
541 }
542 }
543 else
544 loc = loc1;
545#else
546
547 /*
548 * XXX The _create_locale() API doesn't appear to support this. Could
549 * perhaps be worked around by changing pg_locale_t to contain two
550 * separate fields.
551 */
553 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
554 errmsg("collations with different collate and ctype values are not supported on this platform")));
555#endif
556 }
557
558 return loc;
559}
560
561/*
562 * strncoll_libc
563 *
564 * NUL-terminate arguments, if necessary, and pass to strcoll_l().
565 *
566 * An input string length of -1 means that it's already NUL-terminated.
567 */
568int
569strncoll_libc(const char *arg1, ssize_t len1, const char *arg2, ssize_t len2,
571{
572 char sbuf[TEXTBUFLEN];
573 char *buf = sbuf;
574 size_t bufsize1 = (len1 == -1) ? 0 : len1 + 1;
575 size_t bufsize2 = (len2 == -1) ? 0 : len2 + 1;
576 const char *arg1n;
577 const char *arg2n;
578 int result;
579
580 Assert(locale->provider == COLLPROVIDER_LIBC);
581
582 if (bufsize1 + bufsize2 > TEXTBUFLEN)
583 buf = palloc(bufsize1 + bufsize2);
584
585 /* nul-terminate arguments if necessary */
586 if (len1 == -1)
587 {
588 arg1n = arg1;
589 }
590 else
591 {
592 char *buf1 = buf;
593
594 memcpy(buf1, arg1, len1);
595 buf1[len1] = '\0';
596 arg1n = buf1;
597 }
598
599 if (len2 == -1)
600 {
601 arg2n = arg2;
602 }
603 else
604 {
605 char *buf2 = buf + bufsize1;
606
607 memcpy(buf2, arg2, len2);
608 buf2[len2] = '\0';
609 arg2n = buf2;
610 }
611
612 result = strcoll_l(arg1n, arg2n, locale->info.lt);
613
614 if (buf != sbuf)
615 pfree(buf);
616
617 return result;
618}
619
620/*
621 * strnxfrm_libc
622 *
623 * NUL-terminate src, if necessary, and pass to strxfrm_l().
624 *
625 * A source length of -1 means that it's already NUL-terminated.
626 */
627size_t
628strnxfrm_libc(char *dest, size_t destsize, const char *src, ssize_t srclen,
630{
631 char sbuf[TEXTBUFLEN];
632 char *buf = sbuf;
633 size_t bufsize = srclen + 1;
634 size_t result;
635
636 Assert(locale->provider == COLLPROVIDER_LIBC);
637
638 if (srclen == -1)
639 return strxfrm_l(dest, src, destsize, locale->info.lt);
640
641 if (bufsize > TEXTBUFLEN)
642 buf = palloc(bufsize);
643
644 /* nul-terminate argument */
645 memcpy(buf, src, srclen);
646 buf[srclen] = '\0';
647
648 result = strxfrm_l(dest, buf, destsize, locale->info.lt);
649
650 if (buf != sbuf)
651 pfree(buf);
652
653 /* if dest is defined, it should be nul-terminated */
654 Assert(result >= destsize || dest[result] == '\0');
655
656 return result;
657}
658
659char *
660get_collation_actual_version_libc(const char *collcollate)
661{
662 char *collversion = NULL;
663
664 if (pg_strcasecmp("C", collcollate) != 0 &&
665 pg_strncasecmp("C.", collcollate, 2) != 0 &&
666 pg_strcasecmp("POSIX", collcollate) != 0)
667 {
668#if defined(__GLIBC__)
669 /* Use the glibc version because we don't have anything better. */
670 collversion = pstrdup(gnu_get_libc_version());
671#elif defined(LC_VERSION_MASK)
672 locale_t loc;
673
674 /* Look up FreeBSD collation version. */
675 loc = newlocale(LC_COLLATE_MASK, collcollate, NULL);
676 if (loc)
677 {
678 collversion =
679 pstrdup(querylocale(LC_COLLATE_MASK | LC_VERSION_MASK, loc));
680 freelocale(loc);
681 }
682 else
684 (errmsg("could not load locale \"%s\"", collcollate)));
685#elif defined(WIN32)
686 /*
687 * If we are targeting Windows Vista and above, we can ask for a name
688 * given a collation name (earlier versions required a location code
689 * that we don't have).
690 */
691 NLSVERSIONINFOEX version = {sizeof(NLSVERSIONINFOEX)};
692 WCHAR wide_collcollate[LOCALE_NAME_MAX_LENGTH];
693
694 MultiByteToWideChar(CP_ACP, 0, collcollate, -1, wide_collcollate,
695 LOCALE_NAME_MAX_LENGTH);
696 if (!GetNLSVersionEx(COMPARE_STRING, wide_collcollate, &version))
697 {
698 /*
699 * GetNLSVersionEx() wants a language tag such as "en-US", not a
700 * locale name like "English_United States.1252". Until those
701 * values can be prevented from entering the system, or 100%
702 * reliably converted to the more useful tag format, tolerate the
703 * resulting error and report that we have no version data.
704 */
705 if (GetLastError() == ERROR_INVALID_PARAMETER)
706 return NULL;
707
709 (errmsg("could not get collation version for locale \"%s\": error code %lu",
710 collcollate,
711 GetLastError())));
712 }
713 collversion = psprintf("%lu.%lu,%lu.%lu",
714 (version.dwNLSVersion >> 8) & 0xFFFF,
715 version.dwNLSVersion & 0xFF,
716 (version.dwDefinedVersion >> 8) & 0xFFFF,
717 version.dwDefinedVersion & 0xFF);
718#endif
719 }
720
721 return collversion;
722}
723
724/*
725 * strncoll_libc_win32_utf8
726 *
727 * Win32 does not have UTF-8. Convert UTF8 arguments to wide characters and
728 * invoke wcscoll_l().
729 *
730 * An input string length of -1 means that it's NUL-terminated.
731 */
732#ifdef WIN32
733static int
734strncoll_libc_win32_utf8(const char *arg1, ssize_t len1, const char *arg2,
735 ssize_t len2, pg_locale_t locale)
736{
737 char sbuf[TEXTBUFLEN];
738 char *buf = sbuf;
739 char *a1p,
740 *a2p;
741 int a1len;
742 int a2len;
743 int r;
744 int result;
745
746 Assert(locale->provider == COLLPROVIDER_LIBC);
748
749 if (len1 == -1)
750 len1 = strlen(arg1);
751 if (len2 == -1)
752 len2 = strlen(arg2);
753
754 a1len = len1 * 2 + 2;
755 a2len = len2 * 2 + 2;
756
757 if (a1len + a2len > TEXTBUFLEN)
758 buf = palloc(a1len + a2len);
759
760 a1p = buf;
761 a2p = buf + a1len;
762
763 /* API does not work for zero-length input */
764 if (len1 == 0)
765 r = 0;
766 else
767 {
768 r = MultiByteToWideChar(CP_UTF8, 0, arg1, len1,
769 (LPWSTR) a1p, a1len / 2);
770 if (!r)
772 (errmsg("could not convert string to UTF-16: error code %lu",
773 GetLastError())));
774 }
775 ((LPWSTR) a1p)[r] = 0;
776
777 if (len2 == 0)
778 r = 0;
779 else
780 {
781 r = MultiByteToWideChar(CP_UTF8, 0, arg2, len2,
782 (LPWSTR) a2p, a2len / 2);
783 if (!r)
785 (errmsg("could not convert string to UTF-16: error code %lu",
786 GetLastError())));
787 }
788 ((LPWSTR) a2p)[r] = 0;
789
790 errno = 0;
791 result = wcscoll_l((LPWSTR) a1p, (LPWSTR) a2p, locale->info.lt);
792 if (result == 2147483647) /* _NLSCMPERROR; missing from mingw headers */
794 (errmsg("could not compare Unicode strings: %m")));
795
796 if (buf != sbuf)
797 pfree(buf);
798
799 return result;
800}
801#endif /* WIN32 */
802
803/* simple subroutine for reporting errors from newlocale() */
804static void
805report_newlocale_failure(const char *localename)
806{
807 int save_errno;
808
809 /*
810 * Windows doesn't provide any useful error indication from
811 * _create_locale(), and BSD-derived platforms don't seem to feel they
812 * need to set errno either (even though POSIX is pretty clear that
813 * newlocale should do so). So, if errno hasn't been set, assume ENOENT
814 * is what to report.
815 */
816 if (errno == 0)
817 errno = ENOENT;
818
819 /*
820 * ENOENT means "no such locale", not "no such file", so clarify that
821 * errno with an errdetail message.
822 */
823 save_errno = errno; /* auxiliary funcs might change errno */
825 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
826 errmsg("could not create locale \"%s\": %m",
827 localename),
828 (save_errno == ENOENT ?
829 errdetail("The operating system could not find any locale data for the locale name \"%s\".",
830 localename) : 0)));
831}
832
833/*
834 * POSIX doesn't define _l-variants of these functions, but several systems
835 * have them. We provide our own replacements here.
836 */
837#ifndef HAVE_MBSTOWCS_L
838static size_t
839mbstowcs_l(wchar_t *dest, const char *src, size_t n, locale_t loc)
840{
841#ifdef WIN32
842 return _mbstowcs_l(dest, src, n, loc);
843#else
844 size_t result;
845 locale_t save_locale = uselocale(loc);
846
847 result = mbstowcs(dest, src, n);
848 uselocale(save_locale);
849 return result;
850#endif
851}
852#endif
853#ifndef HAVE_WCSTOMBS_L
854static size_t
855wcstombs_l(char *dest, const wchar_t *src, size_t n, locale_t loc)
856{
857#ifdef WIN32
858 return _wcstombs_l(dest, src, n, loc);
859#else
860 size_t result;
861 locale_t save_locale = uselocale(loc);
862
863 result = wcstombs(dest, src, n);
864 uselocale(save_locale);
865 return result;
866#endif
867}
868#endif
869
870/*
871 * These functions convert from/to libc's wchar_t, *not* pg_wchar_t.
872 * Therefore we keep them here rather than with the mbutils code.
873 */
874
875/*
876 * wchar2char --- convert wide characters to multibyte format
877 *
878 * This has the same API as the standard wcstombs_l() function; in particular,
879 * tolen is the maximum number of bytes to store at *to, and *from must be
880 * zero-terminated. The output will be zero-terminated iff there is room.
881 */
882size_t
883wchar2char(char *to, const wchar_t *from, size_t tolen, pg_locale_t locale)
884{
885 size_t result;
886
887 if (tolen == 0)
888 return 0;
889
890#ifdef WIN32
891
892 /*
893 * On Windows, the "Unicode" locales assume UTF16 not UTF8 encoding, and
894 * for some reason mbstowcs and wcstombs won't do this for us, so we use
895 * MultiByteToWideChar().
896 */
898 {
899 result = WideCharToMultiByte(CP_UTF8, 0, from, -1, to, tolen,
900 NULL, NULL);
901 /* A zero return is failure */
902 if (result <= 0)
903 result = -1;
904 else
905 {
906 Assert(result <= tolen);
907 /* Microsoft counts the zero terminator in the result */
908 result--;
909 }
910 }
911 else
912#endif /* WIN32 */
913 if (locale == (pg_locale_t) 0)
914 {
915 /* Use wcstombs directly for the default locale */
916 result = wcstombs(to, from, tolen);
917 }
918 else
919 {
920 /* Use wcstombs_l for nondefault locales */
921 result = wcstombs_l(to, from, tolen, locale->info.lt);
922 }
923
924 return result;
925}
926
927/*
928 * char2wchar --- convert multibyte characters to wide characters
929 *
930 * This has almost the API of mbstowcs_l(), except that *from need not be
931 * null-terminated; instead, the number of input bytes is specified as
932 * fromlen. Also, we ereport() rather than returning -1 for invalid
933 * input encoding. tolen is the maximum number of wchar_t's to store at *to.
934 * The output will be zero-terminated iff there is room.
935 */
936size_t
937char2wchar(wchar_t *to, size_t tolen, const char *from, size_t fromlen,
939{
940 size_t result;
941
942 if (tolen == 0)
943 return 0;
944
945#ifdef WIN32
946 /* See WIN32 "Unicode" comment above */
948 {
949 /* Win32 API does not work for zero-length input */
950 if (fromlen == 0)
951 result = 0;
952 else
953 {
954 result = MultiByteToWideChar(CP_UTF8, 0, from, fromlen, to, tolen - 1);
955 /* A zero return is failure */
956 if (result == 0)
957 result = -1;
958 }
959
960 if (result != -1)
961 {
962 Assert(result < tolen);
963 /* Append trailing null wchar (MultiByteToWideChar() does not) */
964 to[result] = 0;
965 }
966 }
967 else
968#endif /* WIN32 */
969 {
970 /* mbstowcs requires ending '\0' */
971 char *str = pnstrdup(from, fromlen);
972
973 if (locale == (pg_locale_t) 0)
974 {
975 /* Use mbstowcs directly for the default locale */
976 result = mbstowcs(to, str, tolen);
977 }
978 else
979 {
980 /* Use mbstowcs_l for nondefault locales */
981 result = mbstowcs_l(to, str, tolen, locale->info.lt);
982 }
983
984 pfree(str);
985 }
986
987 if (result == -1)
988 {
989 /*
990 * Invalid multibyte character encountered. We try to give a useful
991 * error message by letting pg_verifymbstr check the string. But it's
992 * possible that the string is OK to us, and not OK to mbstowcs ---
993 * this suggests that the LC_CTYPE locale is different from the
994 * database encoding. Give a generic error message if pg_verifymbstr
995 * can't find anything wrong.
996 */
997 pg_verifymbstr(from, fromlen, false); /* might not return */
998 /* but if it does ... */
1000 (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
1001 errmsg("invalid multibyte character for locale"),
1002 errhint("The server's LC_CTYPE locale is probably incompatible with the database encoding.")));
1003 }
1004
1005 return result;
1006}
#define TextDatumGetCString(d)
Definition: builtins.h:98
Oid collid
int errdetail(const char *fmt,...)
Definition: elog.c:1203
int errhint(const char *fmt,...)
Definition: elog.c:1317
int errcode(int sqlerrcode)
Definition: elog.c:853
int errmsg(const char *fmt,...)
Definition: elog.c:1070
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:225
#define ereport(elevel,...)
Definition: elog.h:149
Oid MyDatabaseId
Definition: globals.c:93
Assert(PointerIsAligned(start, uint64))
const char * str
#define HeapTupleIsValid(tuple)
Definition: htup.h:78
#define bufsize
Definition: indent_globs.h:36
static char * locale
Definition: initdb.c:140
int GetDatabaseEncoding(void)
Definition: mbutils.c:1261
bool pg_verifymbstr(const char *mbstr, int len, bool noError)
Definition: mbutils.c:1556
int pg_database_encoding_max_length(void)
Definition: mbutils.c:1546
void * MemoryContextAllocZero(MemoryContext context, Size size)
Definition: mcxt.c:1215
char * pstrdup(const char *in)
Definition: mcxt.c:1699
void pfree(void *pointer)
Definition: mcxt.c:1524
void * palloc(Size size)
Definition: mcxt.c:1317
char * pnstrdup(const char *in, Size len)
Definition: mcxt.c:1710
static size_t strtitle_libc_mb(char *dest, size_t destsize, const char *src, ssize_t srclen, pg_locale_t locale)
size_t wchar2char(char *to, const wchar_t *from, size_t tolen, pg_locale_t locale)
static size_t strupper_libc_sb(char *dest, size_t destsize, const char *src, ssize_t srclen, pg_locale_t locale)
pg_locale_t create_pg_locale_libc(Oid collid, MemoryContext context)
size_t strtitle_libc(char *dst, size_t dstsize, const char *src, ssize_t srclen, pg_locale_t locale)
size_t strupper_libc(char *dst, size_t dstsize, const char *src, ssize_t srclen, pg_locale_t locale)
char * get_collation_actual_version_libc(const char *collcollate)
static locale_t make_libc_collator(const char *collate, const char *ctype)
static size_t wcstombs_l(char *dest, const wchar_t *src, size_t n, locale_t loc)
static const struct collate_methods collate_methods_libc
size_t strlower_libc(char *dst, size_t dstsize, const char *src, ssize_t srclen, pg_locale_t locale)
static size_t strupper_libc_mb(char *dest, size_t destsize, const char *src, ssize_t srclen, pg_locale_t locale)
static int strncoll_libc(const char *arg1, ssize_t len1, const char *arg2, ssize_t len2, pg_locale_t locale)
static size_t strlower_libc_sb(char *dest, size_t destsize, const char *src, ssize_t srclen, pg_locale_t locale)
static void report_newlocale_failure(const char *localename)
static size_t strnxfrm_libc(char *dest, size_t destsize, const char *src, ssize_t srclen, pg_locale_t locale)
#define TEXTBUFLEN
static size_t strtitle_libc_sb(char *dest, size_t destsize, const char *src, ssize_t srclen, pg_locale_t locale)
size_t char2wchar(wchar_t *to, size_t tolen, const char *from, size_t fromlen, pg_locale_t locale)
static size_t mbstowcs_l(wchar_t *dest, const char *src, size_t n, locale_t loc)
static size_t strlower_libc_mb(char *dest, size_t destsize, const char *src, ssize_t srclen, pg_locale_t locale)
static char * buf
Definition: pg_test_fsync.c:72
@ PG_UTF8
Definition: pg_wchar.h:232
int pg_strcasecmp(const char *s1, const char *s2)
Definition: pgstrcasecmp.c:36
unsigned char pg_toupper(unsigned char ch)
Definition: pgstrcasecmp.c:105
unsigned char pg_tolower(unsigned char ch)
Definition: pgstrcasecmp.c:122
int pg_strncasecmp(const char *s1, const char *s2, size_t n)
Definition: pgstrcasecmp.c:69
uintptr_t Datum
Definition: postgres.h:69
static Datum ObjectIdGetDatum(Oid X)
Definition: postgres.h:257
unsigned int Oid
Definition: postgres_ext.h:32
char * psprintf(const char *fmt,...)
Definition: psprintf.c:43
int(* strncoll)(const char *arg1, ssize_t len1, const char *arg2, ssize_t len2, pg_locale_t locale)
Definition: pg_locale.h:57
const struct collate_methods * collate
Definition: pg_locale.h:104
union pg_locale_struct::@158 info
bool deterministic
Definition: pg_locale.h:99
void ReleaseSysCache(HeapTuple tuple)
Definition: syscache.c:269
HeapTuple SearchSysCache1(int cacheId, Datum key1)
Definition: syscache.c:221
Datum SysCacheGetAttrNotNull(int cacheId, HeapTuple tup, AttrNumber attributeNumber)
Definition: syscache.c:631
#define locale_t
Definition: win32_port.h:432
#define toupper_l
Definition: win32_port.h:434
#define iswalnum_l
Definition: win32_port.h:442
#define towupper_l
Definition: win32_port.h:436
#define strcoll_l
Definition: win32_port.h:455
#define strxfrm_l
Definition: win32_port.h:456
#define towlower_l
Definition: win32_port.h:435
#define wcscoll_l
Definition: win32_port.h:457
#define tolower_l
Definition: win32_port.h:433
#define isalnum_l
Definition: win32_port.h:441