PostgreSQL Source Code git master
All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
pg_locale_libc.c
Go to the documentation of this file.
1/*-----------------------------------------------------------------------
2 *
3 * PostgreSQL locale utilities for libc
4 *
5 * Portions Copyright (c) 2002-2025, PostgreSQL Global Development Group
6 *
7 * src/backend/utils/adt/pg_locale_libc.c
8 *
9 *-----------------------------------------------------------------------
10 */
11
12#include "postgres.h"
13
14#include <limits.h>
15#include <wctype.h>
16
17#include "access/htup_details.h"
18#include "catalog/pg_database.h"
20#include "mb/pg_wchar.h"
21#include "miscadmin.h"
22#include "utils/builtins.h"
23#include "utils/formatting.h"
24#include "utils/memutils.h"
25#include "utils/pg_locale.h"
26#include "utils/syscache.h"
27
28#ifdef __GLIBC__
29#include <gnu/libc-version.h>
30#endif
31
32#ifdef WIN32
33#include <shlwapi.h>
34#endif
35
36/*
37 * Size of stack buffer to use for string transformations, used to avoid heap
38 * allocations in typical cases. This should be large enough that most strings
39 * will fit, but small enough that we feel comfortable putting it on the
40 * stack.
41 */
42#define TEXTBUFLEN 1024
43
45
46extern size_t strlower_libc(char *dst, size_t dstsize, const char *src,
47 ssize_t srclen, pg_locale_t locale);
48extern size_t strtitle_libc(char *dst, size_t dstsize, const char *src,
49 ssize_t srclen, pg_locale_t locale);
50extern size_t strupper_libc(char *dst, size_t dstsize, const char *src,
51 ssize_t srclen, pg_locale_t locale);
52
53static int strncoll_libc(const char *arg1, ssize_t len1,
54 const char *arg2, ssize_t len2,
56static size_t strnxfrm_libc(char *dest, size_t destsize,
57 const char *src, ssize_t srclen,
59extern char *get_collation_actual_version_libc(const char *collcollate);
60static locale_t make_libc_collator(const char *collate,
61 const char *ctype);
62
63#ifdef WIN32
64static int strncoll_libc_win32_utf8(const char *arg1, ssize_t len1,
65 const char *arg2, ssize_t len2,
67#endif
68
69static size_t strlower_libc_sb(char *dest, size_t destsize,
70 const char *src, ssize_t srclen,
72static size_t strlower_libc_mb(char *dest, size_t destsize,
73 const char *src, ssize_t srclen,
75static size_t strtitle_libc_sb(char *dest, size_t destsize,
76 const char *src, ssize_t srclen,
78static size_t strtitle_libc_mb(char *dest, size_t destsize,
79 const char *src, ssize_t srclen,
81static size_t strupper_libc_sb(char *dest, size_t destsize,
82 const char *src, ssize_t srclen,
84static size_t strupper_libc_mb(char *dest, size_t destsize,
85 const char *src, ssize_t srclen,
87
90 .strnxfrm = strnxfrm_libc,
91 .strnxfrm_prefix = NULL,
92
93 /*
94 * Unfortunately, it seems that strxfrm() for non-C collations is broken
95 * on many common platforms; testing of multiple versions of glibc reveals
96 * that, for many locales, strcoll() and strxfrm() do not return
97 * consistent results. While no other libc other than Cygwin has so far
98 * been shown to have a problem, we take the conservative course of action
99 * for right now and disable this categorically. (Users who are certain
100 * this isn't a problem on their system can define TRUST_STRXFRM.)
101 */
102#ifdef TRUST_STRXFRM
103 .strxfrm_is_safe = true,
104#else
105 .strxfrm_is_safe = false,
106#endif
107};
108
109#ifdef WIN32
110static const struct collate_methods collate_methods_libc_win32_utf8 = {
111 .strncoll = strncoll_libc_win32_utf8,
112 .strnxfrm = strnxfrm_libc,
113 .strnxfrm_prefix = NULL,
114#ifdef TRUST_STRXFRM
115 .strxfrm_is_safe = true,
116#else
117 .strxfrm_is_safe = false,
118#endif
119};
120#endif
121
122size_t
123strlower_libc(char *dst, size_t dstsize, const char *src,
124 ssize_t srclen, pg_locale_t locale)
125{
127 return strlower_libc_mb(dst, dstsize, src, srclen, locale);
128 else
129 return strlower_libc_sb(dst, dstsize, src, srclen, locale);
130}
131
132size_t
133strtitle_libc(char *dst, size_t dstsize, const char *src,
134 ssize_t srclen, pg_locale_t locale)
135{
137 return strtitle_libc_mb(dst, dstsize, src, srclen, locale);
138 else
139 return strtitle_libc_sb(dst, dstsize, src, srclen, locale);
140}
141
142size_t
143strupper_libc(char *dst, size_t dstsize, const char *src,
144 ssize_t srclen, pg_locale_t locale)
145{
147 return strupper_libc_mb(dst, dstsize, src, srclen, locale);
148 else
149 return strupper_libc_sb(dst, dstsize, src, srclen, locale);
150}
151
152static size_t
153strlower_libc_sb(char *dest, size_t destsize, const char *src, ssize_t srclen,
155{
156 if (srclen < 0)
157 srclen = strlen(src);
158
159 if (srclen + 1 <= destsize)
160 {
161 locale_t loc = locale->info.lt;
162 char *p;
163
164 if (srclen + 1 > destsize)
165 return srclen;
166
167 memcpy(dest, src, srclen);
168 dest[srclen] = '\0';
169
170 /*
171 * Note: we assume that tolower_l() will not be so broken as to need
172 * an isupper_l() guard test. When using the default collation, we
173 * apply the traditional Postgres behavior that forces ASCII-style
174 * treatment of I/i, but in non-default collations you get exactly
175 * what the collation says.
176 */
177 for (p = dest; *p; p++)
178 {
179 if (locale->is_default)
180 *p = pg_tolower((unsigned char) *p);
181 else
182 *p = tolower_l((unsigned char) *p, loc);
183 }
184 }
185
186 return srclen;
187}
188
189static size_t
190strlower_libc_mb(char *dest, size_t destsize, const char *src, ssize_t srclen,
192{
193 locale_t loc = locale->info.lt;
194 size_t result_size;
195 wchar_t *workspace;
196 char *result;
197 size_t curr_char;
198 size_t max_size;
199
200 if (srclen < 0)
201 srclen = strlen(src);
202
203 /* Overflow paranoia */
204 if ((srclen + 1) > (INT_MAX / sizeof(wchar_t)))
206 (errcode(ERRCODE_OUT_OF_MEMORY),
207 errmsg("out of memory")));
208
209 /* Output workspace cannot have more codes than input bytes */
210 workspace = (wchar_t *) palloc((srclen + 1) * sizeof(wchar_t));
211
212 char2wchar(workspace, srclen + 1, src, srclen, locale);
213
214 for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
215 workspace[curr_char] = towlower_l(workspace[curr_char], loc);
216
217 /*
218 * Make result large enough; case change might change number of bytes
219 */
220 max_size = curr_char * pg_database_encoding_max_length();
221 result = palloc(max_size + 1);
222
223 result_size = wchar2char(result, workspace, max_size + 1, locale);
224
225 if (result_size + 1 > destsize)
226 return result_size;
227
228 memcpy(dest, result, result_size);
229 dest[result_size] = '\0';
230
231 pfree(workspace);
232 pfree(result);
233
234 return result_size;
235}
236
237static size_t
238strtitle_libc_sb(char *dest, size_t destsize, const char *src, ssize_t srclen,
240{
241 if (srclen < 0)
242 srclen = strlen(src);
243
244 if (srclen + 1 <= destsize)
245 {
246 locale_t loc = locale->info.lt;
247 int wasalnum = false;
248 char *p;
249
250 memcpy(dest, src, srclen);
251 dest[srclen] = '\0';
252
253 /*
254 * Note: we assume that toupper_l()/tolower_l() will not be so broken
255 * as to need guard tests. When using the default collation, we apply
256 * the traditional Postgres behavior that forces ASCII-style treatment
257 * of I/i, but in non-default collations you get exactly what the
258 * collation says.
259 */
260 for (p = dest; *p; p++)
261 {
262 if (locale->is_default)
263 {
264 if (wasalnum)
265 *p = pg_tolower((unsigned char) *p);
266 else
267 *p = pg_toupper((unsigned char) *p);
268 }
269 else
270 {
271 if (wasalnum)
272 *p = tolower_l((unsigned char) *p, loc);
273 else
274 *p = toupper_l((unsigned char) *p, loc);
275 }
276 wasalnum = isalnum_l((unsigned char) *p, loc);
277 }
278 }
279
280 return srclen;
281}
282
283static size_t
284strtitle_libc_mb(char *dest, size_t destsize, const char *src, ssize_t srclen,
286{
287 locale_t loc = locale->info.lt;
288 int wasalnum = false;
289 size_t result_size;
290 wchar_t *workspace;
291 char *result;
292 size_t curr_char;
293 size_t max_size;
294
295 if (srclen < 0)
296 srclen = strlen(src);
297
298 /* Overflow paranoia */
299 if ((srclen + 1) > (INT_MAX / sizeof(wchar_t)))
301 (errcode(ERRCODE_OUT_OF_MEMORY),
302 errmsg("out of memory")));
303
304 /* Output workspace cannot have more codes than input bytes */
305 workspace = (wchar_t *) palloc((srclen + 1) * sizeof(wchar_t));
306
307 char2wchar(workspace, srclen + 1, src, srclen, locale);
308
309 for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
310 {
311 if (wasalnum)
312 workspace[curr_char] = towlower_l(workspace[curr_char], loc);
313 else
314 workspace[curr_char] = towupper_l(workspace[curr_char], loc);
315 wasalnum = iswalnum_l(workspace[curr_char], loc);
316 }
317
318 /*
319 * Make result large enough; case change might change number of bytes
320 */
321 max_size = curr_char * pg_database_encoding_max_length();
322 result = palloc(max_size + 1);
323
324 result_size = wchar2char(result, workspace, max_size + 1, locale);
325
326 if (result_size + 1 > destsize)
327 return result_size;
328
329 memcpy(dest, result, result_size);
330 dest[result_size] = '\0';
331
332 pfree(workspace);
333 pfree(result);
334
335 return result_size;
336}
337
338static size_t
339strupper_libc_sb(char *dest, size_t destsize, const char *src, ssize_t srclen,
341{
342 if (srclen < 0)
343 srclen = strlen(src);
344
345 if (srclen + 1 <= destsize)
346 {
347 locale_t loc = locale->info.lt;
348 char *p;
349
350 memcpy(dest, src, srclen);
351 dest[srclen] = '\0';
352
353 /*
354 * Note: we assume that toupper_l() will not be so broken as to need
355 * an islower_l() guard test. When using the default collation, we
356 * apply the traditional Postgres behavior that forces ASCII-style
357 * treatment of I/i, but in non-default collations you get exactly
358 * what the collation says.
359 */
360 for (p = dest; *p; p++)
361 {
362 if (locale->is_default)
363 *p = pg_toupper((unsigned char) *p);
364 else
365 *p = toupper_l((unsigned char) *p, loc);
366 }
367 }
368
369 return srclen;
370}
371
372static size_t
373strupper_libc_mb(char *dest, size_t destsize, const char *src, ssize_t srclen,
375{
376 locale_t loc = locale->info.lt;
377 size_t result_size;
378 wchar_t *workspace;
379 char *result;
380 size_t curr_char;
381 size_t max_size;
382
383 if (srclen < 0)
384 srclen = strlen(src);
385
386 /* Overflow paranoia */
387 if ((srclen + 1) > (INT_MAX / sizeof(wchar_t)))
389 (errcode(ERRCODE_OUT_OF_MEMORY),
390 errmsg("out of memory")));
391
392 /* Output workspace cannot have more codes than input bytes */
393 workspace = (wchar_t *) palloc((srclen + 1) * sizeof(wchar_t));
394
395 char2wchar(workspace, srclen + 1, src, srclen, locale);
396
397 for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
398 workspace[curr_char] = towupper_l(workspace[curr_char], loc);
399
400 /*
401 * Make result large enough; case change might change number of bytes
402 */
403 max_size = curr_char * pg_database_encoding_max_length();
404 result = palloc(max_size + 1);
405
406 result_size = wchar2char(result, workspace, max_size + 1, locale);
407
408 if (result_size + 1 > destsize)
409 return result_size;
410
411 memcpy(dest, result, result_size);
412 dest[result_size] = '\0';
413
414 pfree(workspace);
415 pfree(result);
416
417 return result_size;
418}
419
422{
423 const char *collate;
424 const char *ctype;
425 locale_t loc;
426 pg_locale_t result;
427
428 if (collid == DEFAULT_COLLATION_OID)
429 {
430 HeapTuple tp;
431 Datum datum;
432
434 if (!HeapTupleIsValid(tp))
435 elog(ERROR, "cache lookup failed for database %u", MyDatabaseId);
436 datum = SysCacheGetAttrNotNull(DATABASEOID, tp,
437 Anum_pg_database_datcollate);
438 collate = TextDatumGetCString(datum);
439 datum = SysCacheGetAttrNotNull(DATABASEOID, tp,
440 Anum_pg_database_datctype);
441 ctype = TextDatumGetCString(datum);
442
443 ReleaseSysCache(tp);
444 }
445 else
446 {
447 HeapTuple tp;
448 Datum datum;
449
451 if (!HeapTupleIsValid(tp))
452 elog(ERROR, "cache lookup failed for collation %u", collid);
453
454 datum = SysCacheGetAttrNotNull(COLLOID, tp,
455 Anum_pg_collation_collcollate);
456 collate = TextDatumGetCString(datum);
457 datum = SysCacheGetAttrNotNull(COLLOID, tp,
458 Anum_pg_collation_collctype);
459 ctype = TextDatumGetCString(datum);
460
461 ReleaseSysCache(tp);
462 }
463
464
465 loc = make_libc_collator(collate, ctype);
466
467 result = MemoryContextAllocZero(context, sizeof(struct pg_locale_struct));
468 result->provider = COLLPROVIDER_LIBC;
469 result->deterministic = true;
470 result->collate_is_c = (strcmp(collate, "C") == 0) ||
471 (strcmp(collate, "POSIX") == 0);
472 result->ctype_is_c = (strcmp(ctype, "C") == 0) ||
473 (strcmp(ctype, "POSIX") == 0);
474 result->info.lt = loc;
475 if (!result->collate_is_c)
476 {
477#ifdef WIN32
479 result->collate = &collate_methods_libc_win32_utf8;
480 else
481#endif
482 result->collate = &collate_methods_libc;
483 }
484
485 return result;
486}
487
488/*
489 * Create a locale_t with the given collation and ctype.
490 *
491 * The "C" and "POSIX" locales are not actually handled by libc, so return
492 * NULL.
493 *
494 * Ensure that no path leaks a locale_t.
495 */
496static locale_t
497make_libc_collator(const char *collate, const char *ctype)
498{
499 locale_t loc = 0;
500
501 if (strcmp(collate, ctype) == 0)
502 {
503 if (strcmp(ctype, "C") != 0 && strcmp(ctype, "POSIX") != 0)
504 {
505 /* Normal case where they're the same */
506 errno = 0;
507#ifndef WIN32
508 loc = newlocale(LC_COLLATE_MASK | LC_CTYPE_MASK, collate,
509 NULL);
510#else
511 loc = _create_locale(LC_ALL, collate);
512#endif
513 if (!loc)
515 }
516 }
517 else
518 {
519#ifndef WIN32
520 /* We need two newlocale() steps */
521 locale_t loc1 = 0;
522
523 if (strcmp(collate, "C") != 0 && strcmp(collate, "POSIX") != 0)
524 {
525 errno = 0;
526 loc1 = newlocale(LC_COLLATE_MASK, collate, NULL);
527 if (!loc1)
529 }
530
531 if (strcmp(ctype, "C") != 0 && strcmp(ctype, "POSIX") != 0)
532 {
533 errno = 0;
534 loc = newlocale(LC_CTYPE_MASK, ctype, loc1);
535 if (!loc)
536 {
537 if (loc1)
538 freelocale(loc1);
540 }
541 }
542 else
543 loc = loc1;
544#else
545
546 /*
547 * XXX The _create_locale() API doesn't appear to support this. Could
548 * perhaps be worked around by changing pg_locale_t to contain two
549 * separate fields.
550 */
552 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
553 errmsg("collations with different collate and ctype values are not supported on this platform")));
554#endif
555 }
556
557 return loc;
558}
559
560/*
561 * strncoll_libc
562 *
563 * NUL-terminate arguments, if necessary, and pass to strcoll_l().
564 *
565 * An input string length of -1 means that it's already NUL-terminated.
566 */
567int
568strncoll_libc(const char *arg1, ssize_t len1, const char *arg2, ssize_t len2,
570{
571 char sbuf[TEXTBUFLEN];
572 char *buf = sbuf;
573 size_t bufsize1 = (len1 == -1) ? 0 : len1 + 1;
574 size_t bufsize2 = (len2 == -1) ? 0 : len2 + 1;
575 const char *arg1n;
576 const char *arg2n;
577 int result;
578
579 Assert(locale->provider == COLLPROVIDER_LIBC);
580
581 if (bufsize1 + bufsize2 > TEXTBUFLEN)
582 buf = palloc(bufsize1 + bufsize2);
583
584 /* nul-terminate arguments if necessary */
585 if (len1 == -1)
586 {
587 arg1n = arg1;
588 }
589 else
590 {
591 char *buf1 = buf;
592
593 memcpy(buf1, arg1, len1);
594 buf1[len1] = '\0';
595 arg1n = buf1;
596 }
597
598 if (len2 == -1)
599 {
600 arg2n = arg2;
601 }
602 else
603 {
604 char *buf2 = buf + bufsize1;
605
606 memcpy(buf2, arg2, len2);
607 buf2[len2] = '\0';
608 arg2n = buf2;
609 }
610
611 result = strcoll_l(arg1n, arg2n, locale->info.lt);
612
613 if (buf != sbuf)
614 pfree(buf);
615
616 return result;
617}
618
619/*
620 * strnxfrm_libc
621 *
622 * NUL-terminate src, if necessary, and pass to strxfrm_l().
623 *
624 * A source length of -1 means that it's already NUL-terminated.
625 */
626size_t
627strnxfrm_libc(char *dest, size_t destsize, const char *src, ssize_t srclen,
629{
630 char sbuf[TEXTBUFLEN];
631 char *buf = sbuf;
632 size_t bufsize = srclen + 1;
633 size_t result;
634
635 Assert(locale->provider == COLLPROVIDER_LIBC);
636
637 if (srclen == -1)
638 return strxfrm_l(dest, src, destsize, locale->info.lt);
639
640 if (bufsize > TEXTBUFLEN)
641 buf = palloc(bufsize);
642
643 /* nul-terminate argument */
644 memcpy(buf, src, srclen);
645 buf[srclen] = '\0';
646
647 result = strxfrm_l(dest, buf, destsize, locale->info.lt);
648
649 if (buf != sbuf)
650 pfree(buf);
651
652 /* if dest is defined, it should be nul-terminated */
653 Assert(result >= destsize || dest[result] == '\0');
654
655 return result;
656}
657
658char *
659get_collation_actual_version_libc(const char *collcollate)
660{
661 char *collversion = NULL;
662
663 if (pg_strcasecmp("C", collcollate) != 0 &&
664 pg_strncasecmp("C.", collcollate, 2) != 0 &&
665 pg_strcasecmp("POSIX", collcollate) != 0)
666 {
667#if defined(__GLIBC__)
668 /* Use the glibc version because we don't have anything better. */
669 collversion = pstrdup(gnu_get_libc_version());
670#elif defined(LC_VERSION_MASK)
671 locale_t loc;
672
673 /* Look up FreeBSD collation version. */
674 loc = newlocale(LC_COLLATE_MASK, collcollate, NULL);
675 if (loc)
676 {
677 collversion =
678 pstrdup(querylocale(LC_COLLATE_MASK | LC_VERSION_MASK, loc));
679 freelocale(loc);
680 }
681 else
683 (errmsg("could not load locale \"%s\"", collcollate)));
684#elif defined(WIN32)
685 /*
686 * If we are targeting Windows Vista and above, we can ask for a name
687 * given a collation name (earlier versions required a location code
688 * that we don't have).
689 */
690 NLSVERSIONINFOEX version = {sizeof(NLSVERSIONINFOEX)};
691 WCHAR wide_collcollate[LOCALE_NAME_MAX_LENGTH];
692
693 MultiByteToWideChar(CP_ACP, 0, collcollate, -1, wide_collcollate,
694 LOCALE_NAME_MAX_LENGTH);
695 if (!GetNLSVersionEx(COMPARE_STRING, wide_collcollate, &version))
696 {
697 /*
698 * GetNLSVersionEx() wants a language tag such as "en-US", not a
699 * locale name like "English_United States.1252". Until those
700 * values can be prevented from entering the system, or 100%
701 * reliably converted to the more useful tag format, tolerate the
702 * resulting error and report that we have no version data.
703 */
704 if (GetLastError() == ERROR_INVALID_PARAMETER)
705 return NULL;
706
708 (errmsg("could not get collation version for locale \"%s\": error code %lu",
709 collcollate,
710 GetLastError())));
711 }
712 collversion = psprintf("%lu.%lu,%lu.%lu",
713 (version.dwNLSVersion >> 8) & 0xFFFF,
714 version.dwNLSVersion & 0xFF,
715 (version.dwDefinedVersion >> 8) & 0xFFFF,
716 version.dwDefinedVersion & 0xFF);
717#endif
718 }
719
720 return collversion;
721}
722
723/*
724 * strncoll_libc_win32_utf8
725 *
726 * Win32 does not have UTF-8. Convert UTF8 arguments to wide characters and
727 * invoke wcscoll_l().
728 *
729 * An input string length of -1 means that it's NUL-terminated.
730 */
731#ifdef WIN32
732static int
733strncoll_libc_win32_utf8(const char *arg1, ssize_t len1, const char *arg2,
734 ssize_t len2, pg_locale_t locale)
735{
736 char sbuf[TEXTBUFLEN];
737 char *buf = sbuf;
738 char *a1p,
739 *a2p;
740 int a1len;
741 int a2len;
742 int r;
743 int result;
744
745 Assert(locale->provider == COLLPROVIDER_LIBC);
747
748 if (len1 == -1)
749 len1 = strlen(arg1);
750 if (len2 == -1)
751 len2 = strlen(arg2);
752
753 a1len = len1 * 2 + 2;
754 a2len = len2 * 2 + 2;
755
756 if (a1len + a2len > TEXTBUFLEN)
757 buf = palloc(a1len + a2len);
758
759 a1p = buf;
760 a2p = buf + a1len;
761
762 /* API does not work for zero-length input */
763 if (len1 == 0)
764 r = 0;
765 else
766 {
767 r = MultiByteToWideChar(CP_UTF8, 0, arg1, len1,
768 (LPWSTR) a1p, a1len / 2);
769 if (!r)
771 (errmsg("could not convert string to UTF-16: error code %lu",
772 GetLastError())));
773 }
774 ((LPWSTR) a1p)[r] = 0;
775
776 if (len2 == 0)
777 r = 0;
778 else
779 {
780 r = MultiByteToWideChar(CP_UTF8, 0, arg2, len2,
781 (LPWSTR) a2p, a2len / 2);
782 if (!r)
784 (errmsg("could not convert string to UTF-16: error code %lu",
785 GetLastError())));
786 }
787 ((LPWSTR) a2p)[r] = 0;
788
789 errno = 0;
790 result = wcscoll_l((LPWSTR) a1p, (LPWSTR) a2p, locale->info.lt);
791 if (result == 2147483647) /* _NLSCMPERROR; missing from mingw headers */
793 (errmsg("could not compare Unicode strings: %m")));
794
795 if (buf != sbuf)
796 pfree(buf);
797
798 return result;
799}
800#endif /* WIN32 */
801
802/* simple subroutine for reporting errors from newlocale() */
803void
804report_newlocale_failure(const char *localename)
805{
806 int save_errno;
807
808 /*
809 * Windows doesn't provide any useful error indication from
810 * _create_locale(), and BSD-derived platforms don't seem to feel they
811 * need to set errno either (even though POSIX is pretty clear that
812 * newlocale should do so). So, if errno hasn't been set, assume ENOENT
813 * is what to report.
814 */
815 if (errno == 0)
816 errno = ENOENT;
817
818 /*
819 * ENOENT means "no such locale", not "no such file", so clarify that
820 * errno with an errdetail message.
821 */
822 save_errno = errno; /* auxiliary funcs might change errno */
824 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
825 errmsg("could not create locale \"%s\": %m",
826 localename),
827 (save_errno == ENOENT ?
828 errdetail("The operating system could not find any locale data for the locale name \"%s\".",
829 localename) : 0)));
830}
831
832/*
833 * POSIX doesn't define _l-variants of these functions, but several systems
834 * have them. We provide our own replacements here.
835 */
836#ifndef HAVE_MBSTOWCS_L
837static size_t
838mbstowcs_l(wchar_t *dest, const char *src, size_t n, locale_t loc)
839{
840#ifdef WIN32
841 return _mbstowcs_l(dest, src, n, loc);
842#else
843 size_t result;
844 locale_t save_locale = uselocale(loc);
845
846 result = mbstowcs(dest, src, n);
847 uselocale(save_locale);
848 return result;
849#endif
850}
851#endif
852#ifndef HAVE_WCSTOMBS_L
853static size_t
854wcstombs_l(char *dest, const wchar_t *src, size_t n, locale_t loc)
855{
856#ifdef WIN32
857 return _wcstombs_l(dest, src, n, loc);
858#else
859 size_t result;
860 locale_t save_locale = uselocale(loc);
861
862 result = wcstombs(dest, src, n);
863 uselocale(save_locale);
864 return result;
865#endif
866}
867#endif
868
869/*
870 * These functions convert from/to libc's wchar_t, *not* pg_wchar_t.
871 * Therefore we keep them here rather than with the mbutils code.
872 */
873
874/*
875 * wchar2char --- convert wide characters to multibyte format
876 *
877 * This has the same API as the standard wcstombs_l() function; in particular,
878 * tolen is the maximum number of bytes to store at *to, and *from must be
879 * zero-terminated. The output will be zero-terminated iff there is room.
880 */
881size_t
882wchar2char(char *to, const wchar_t *from, size_t tolen, pg_locale_t locale)
883{
884 size_t result;
885
886 if (tolen == 0)
887 return 0;
888
889#ifdef WIN32
890
891 /*
892 * On Windows, the "Unicode" locales assume UTF16 not UTF8 encoding, and
893 * for some reason mbstowcs and wcstombs won't do this for us, so we use
894 * MultiByteToWideChar().
895 */
897 {
898 result = WideCharToMultiByte(CP_UTF8, 0, from, -1, to, tolen,
899 NULL, NULL);
900 /* A zero return is failure */
901 if (result <= 0)
902 result = -1;
903 else
904 {
905 Assert(result <= tolen);
906 /* Microsoft counts the zero terminator in the result */
907 result--;
908 }
909 }
910 else
911#endif /* WIN32 */
912 if (locale == (pg_locale_t) 0)
913 {
914 /* Use wcstombs directly for the default locale */
915 result = wcstombs(to, from, tolen);
916 }
917 else
918 {
919 /* Use wcstombs_l for nondefault locales */
920 result = wcstombs_l(to, from, tolen, locale->info.lt);
921 }
922
923 return result;
924}
925
926/*
927 * char2wchar --- convert multibyte characters to wide characters
928 *
929 * This has almost the API of mbstowcs_l(), except that *from need not be
930 * null-terminated; instead, the number of input bytes is specified as
931 * fromlen. Also, we ereport() rather than returning -1 for invalid
932 * input encoding. tolen is the maximum number of wchar_t's to store at *to.
933 * The output will be zero-terminated iff there is room.
934 */
935size_t
936char2wchar(wchar_t *to, size_t tolen, const char *from, size_t fromlen,
938{
939 size_t result;
940
941 if (tolen == 0)
942 return 0;
943
944#ifdef WIN32
945 /* See WIN32 "Unicode" comment above */
947 {
948 /* Win32 API does not work for zero-length input */
949 if (fromlen == 0)
950 result = 0;
951 else
952 {
953 result = MultiByteToWideChar(CP_UTF8, 0, from, fromlen, to, tolen - 1);
954 /* A zero return is failure */
955 if (result == 0)
956 result = -1;
957 }
958
959 if (result != -1)
960 {
961 Assert(result < tolen);
962 /* Append trailing null wchar (MultiByteToWideChar() does not) */
963 to[result] = 0;
964 }
965 }
966 else
967#endif /* WIN32 */
968 {
969 /* mbstowcs requires ending '\0' */
970 char *str = pnstrdup(from, fromlen);
971
972 if (locale == (pg_locale_t) 0)
973 {
974 /* Use mbstowcs directly for the default locale */
975 result = mbstowcs(to, str, tolen);
976 }
977 else
978 {
979 /* Use mbstowcs_l for nondefault locales */
980 result = mbstowcs_l(to, str, tolen, locale->info.lt);
981 }
982
983 pfree(str);
984 }
985
986 if (result == -1)
987 {
988 /*
989 * Invalid multibyte character encountered. We try to give a useful
990 * error message by letting pg_verifymbstr check the string. But it's
991 * possible that the string is OK to us, and not OK to mbstowcs ---
992 * this suggests that the LC_CTYPE locale is different from the
993 * database encoding. Give a generic error message if pg_verifymbstr
994 * can't find anything wrong.
995 */
996 pg_verifymbstr(from, fromlen, false); /* might not return */
997 /* but if it does ... */
999 (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
1000 errmsg("invalid multibyte character for locale"),
1001 errhint("The server's LC_CTYPE locale is probably incompatible with the database encoding.")));
1002 }
1003
1004 return result;
1005}
#define TextDatumGetCString(d)
Definition: builtins.h:98
Oid collid
int errdetail(const char *fmt,...)
Definition: elog.c:1204
int errhint(const char *fmt,...)
Definition: elog.c:1318
int errcode(int sqlerrcode)
Definition: elog.c:854
int errmsg(const char *fmt,...)
Definition: elog.c:1071
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:225
#define ereport(elevel,...)
Definition: elog.h:149
Oid MyDatabaseId
Definition: globals.c:95
Assert(PointerIsAligned(start, uint64))
const char * str
#define HeapTupleIsValid(tuple)
Definition: htup.h:78
#define bufsize
Definition: indent_globs.h:36
static char * locale
Definition: initdb.c:140
int GetDatabaseEncoding(void)
Definition: mbutils.c:1261
bool pg_verifymbstr(const char *mbstr, int len, bool noError)
Definition: mbutils.c:1556
int pg_database_encoding_max_length(void)
Definition: mbutils.c:1546
void * MemoryContextAllocZero(MemoryContext context, Size size)
Definition: mcxt.c:1294
char * pstrdup(const char *in)
Definition: mcxt.c:2325
void pfree(void *pointer)
Definition: mcxt.c:2150
void * palloc(Size size)
Definition: mcxt.c:1943
char * pnstrdup(const char *in, Size len)
Definition: mcxt.c:2336
static size_t strtitle_libc_mb(char *dest, size_t destsize, const char *src, ssize_t srclen, pg_locale_t locale)
size_t wchar2char(char *to, const wchar_t *from, size_t tolen, pg_locale_t locale)
static size_t strupper_libc_sb(char *dest, size_t destsize, const char *src, ssize_t srclen, pg_locale_t locale)
pg_locale_t create_pg_locale_libc(Oid collid, MemoryContext context)
size_t strtitle_libc(char *dst, size_t dstsize, const char *src, ssize_t srclen, pg_locale_t locale)
size_t strupper_libc(char *dst, size_t dstsize, const char *src, ssize_t srclen, pg_locale_t locale)
char * get_collation_actual_version_libc(const char *collcollate)
static locale_t make_libc_collator(const char *collate, const char *ctype)
static size_t wcstombs_l(char *dest, const wchar_t *src, size_t n, locale_t loc)
static const struct collate_methods collate_methods_libc
size_t strlower_libc(char *dst, size_t dstsize, const char *src, ssize_t srclen, pg_locale_t locale)
static size_t strupper_libc_mb(char *dest, size_t destsize, const char *src, ssize_t srclen, pg_locale_t locale)
static int strncoll_libc(const char *arg1, ssize_t len1, const char *arg2, ssize_t len2, pg_locale_t locale)
static size_t strlower_libc_sb(char *dest, size_t destsize, const char *src, ssize_t srclen, pg_locale_t locale)
static size_t strnxfrm_libc(char *dest, size_t destsize, const char *src, ssize_t srclen, pg_locale_t locale)
void report_newlocale_failure(const char *localename)
#define TEXTBUFLEN
static size_t strtitle_libc_sb(char *dest, size_t destsize, const char *src, ssize_t srclen, pg_locale_t locale)
size_t char2wchar(wchar_t *to, size_t tolen, const char *from, size_t fromlen, pg_locale_t locale)
static size_t mbstowcs_l(wchar_t *dest, const char *src, size_t n, locale_t loc)
static size_t strlower_libc_mb(char *dest, size_t destsize, const char *src, ssize_t srclen, pg_locale_t locale)
static char * buf
Definition: pg_test_fsync.c:72
@ PG_UTF8
Definition: pg_wchar.h:232
int pg_strcasecmp(const char *s1, const char *s2)
Definition: pgstrcasecmp.c:36
unsigned char pg_toupper(unsigned char ch)
Definition: pgstrcasecmp.c:105
unsigned char pg_tolower(unsigned char ch)
Definition: pgstrcasecmp.c:122
int pg_strncasecmp(const char *s1, const char *s2, size_t n)
Definition: pgstrcasecmp.c:69
uintptr_t Datum
Definition: postgres.h:69
static Datum ObjectIdGetDatum(Oid X)
Definition: postgres.h:257
unsigned int Oid
Definition: postgres_ext.h:30
char * psprintf(const char *fmt,...)
Definition: psprintf.c:43
int(* strncoll)(const char *arg1, ssize_t len1, const char *arg2, ssize_t len2, pg_locale_t locale)
Definition: pg_locale.h:57
const struct collate_methods * collate
Definition: pg_locale.h:104
bool deterministic
Definition: pg_locale.h:99
union pg_locale_struct::@161 info
void ReleaseSysCache(HeapTuple tuple)
Definition: syscache.c:269
HeapTuple SearchSysCache1(int cacheId, Datum key1)
Definition: syscache.c:221
Datum SysCacheGetAttrNotNull(int cacheId, HeapTuple tup, AttrNumber attributeNumber)
Definition: syscache.c:631
#define locale_t
Definition: win32_port.h:432
#define toupper_l
Definition: win32_port.h:434
#define iswalnum_l
Definition: win32_port.h:442
#define towupper_l
Definition: win32_port.h:436
#define strcoll_l
Definition: win32_port.h:455
#define strxfrm_l
Definition: win32_port.h:456
#define towlower_l
Definition: win32_port.h:435
#define wcscoll_l
Definition: win32_port.h:457
#define tolower_l
Definition: win32_port.h:433
#define isalnum_l
Definition: win32_port.h:441