PostgreSQL Source Code  git master
pg_locale.c
Go to the documentation of this file.
1 /*-----------------------------------------------------------------------
2  *
3  * PostgreSQL locale utilities
4  *
5  * Portions Copyright (c) 2002-2024, PostgreSQL Global Development Group
6  *
7  * src/backend/utils/adt/pg_locale.c
8  *
9  *-----------------------------------------------------------------------
10  */
11 
12 /*----------
13  * Here is how the locale stuff is handled: LC_COLLATE and LC_CTYPE
14  * are fixed at CREATE DATABASE time, stored in pg_database, and cannot
15  * be changed. Thus, the effects of strcoll(), strxfrm(), isupper(),
16  * toupper(), etc. are always in the same fixed locale.
17  *
18  * LC_MESSAGES is settable at run time and will take effect
19  * immediately.
20  *
21  * The other categories, LC_MONETARY, LC_NUMERIC, and LC_TIME are also
22  * settable at run-time. However, we don't actually set those locale
23  * categories permanently. This would have bizarre effects like no
24  * longer accepting standard floating-point literals in some locales.
25  * Instead, we only set these locale categories briefly when needed,
26  * cache the required information obtained from localeconv() or
27  * strftime(), and then set the locale categories back to "C".
28  * The cached information is only used by the formatting functions
29  * (to_char, etc.) and the money type. For the user, this should all be
30  * transparent.
31  *
32  * !!! NOW HEAR THIS !!!
33  *
34  * We've been bitten repeatedly by this bug, so let's try to keep it in
35  * mind in future: on some platforms, the locale functions return pointers
36  * to static data that will be overwritten by any later locale function.
37  * Thus, for example, the obvious-looking sequence
38  * save = setlocale(category, NULL);
39  * if (!setlocale(category, value))
40  * fail = true;
41  * setlocale(category, save);
42  * DOES NOT WORK RELIABLY: on some platforms the second setlocale() call
43  * will change the memory save is pointing at. To do this sort of thing
44  * safely, you *must* pstrdup what setlocale returns the first time.
45  *
46  * The POSIX locale standard is available here:
47  *
48  * http://www.opengroup.org/onlinepubs/009695399/basedefs/xbd_chap07.html
49  *----------
50  */
51 
52 
53 #include "postgres.h"
54 
55 #include <time.h>
56 
57 #include "access/htup_details.h"
58 #include "catalog/pg_collation.h"
59 #include "mb/pg_wchar.h"
60 #include "miscadmin.h"
61 #include "utils/builtins.h"
62 #include "utils/formatting.h"
63 #include "utils/guc_hooks.h"
64 #include "utils/hsearch.h"
65 #include "utils/lsyscache.h"
66 #include "utils/memutils.h"
67 #include "utils/pg_locale.h"
68 #include "utils/syscache.h"
69 
70 #ifdef USE_ICU
71 #include <unicode/ucnv.h>
72 #include <unicode/ustring.h>
73 #endif
74 
75 #ifdef __GLIBC__
76 #include <gnu/libc-version.h>
77 #endif
78 
79 #ifdef WIN32
80 #include <shlwapi.h>
81 #endif
82 
83 /* Error triggered for locale-sensitive subroutines */
84 #define PGLOCALE_SUPPORT_ERROR(provider) \
85  elog(ERROR, "unsupported collprovider for %s: %c", __func__, provider)
86 
87 /*
88  * This should be large enough that most strings will fit, but small enough
89  * that we feel comfortable putting it on the stack
90  */
91 #define TEXTBUFLEN 1024
92 
93 #define MAX_L10N_DATA 80
94 
95 
96 /* GUC settings */
101 
103 
104 /*
105  * lc_time localization cache.
106  *
107  * We use only the first 7 or 12 entries of these arrays. The last array
108  * element is left as NULL for the convenience of outside code that wants
109  * to sequentially scan these arrays.
110  */
112 char *localized_full_days[7 + 1];
114 char *localized_full_months[12 + 1];
115 
116 /* is the databases's LC_CTYPE the C locale? */
117 bool database_ctype_is_c = false;
118 
119 /* indicates whether locale information cache is valid */
120 static bool CurrentLocaleConvValid = false;
121 static bool CurrentLCTimeValid = false;
122 
123 /* Cache for collation-related knowledge */
124 
125 typedef struct
126 {
127  Oid collid; /* hash key: pg_collation OID */
128  bool collate_is_c; /* is collation's LC_COLLATE C? */
129  bool ctype_is_c; /* is collation's LC_CTYPE C? */
130  bool flags_valid; /* true if above flags are valid */
131  pg_locale_t locale; /* locale_t struct, or 0 if not valid */
133 
134 static HTAB *collation_cache = NULL;
135 
136 
137 #if defined(WIN32) && defined(LC_MESSAGES)
138 static char *IsoLocaleName(const char *);
139 #endif
140 
141 #ifdef USE_ICU
142 /*
143  * Converter object for converting between ICU's UChar strings and C strings
144  * in database encoding. Since the database encoding doesn't change, we only
145  * need one of these per session.
146  */
147 static UConverter *icu_converter = NULL;
148 
149 static UCollator *pg_ucol_open(const char *loc_str);
150 static void init_icu_converter(void);
151 static size_t uchar_length(UConverter *converter,
152  const char *str, int32_t len);
153 static int32_t uchar_convert(UConverter *converter,
154  UChar *dest, int32_t destlen,
155  const char *src, int32_t srclen);
156 static void icu_set_collation_attributes(UCollator *collator, const char *loc,
157  UErrorCode *status);
158 #endif
159 
160 /*
161  * POSIX doesn't define _l-variants of these functions, but several systems
162  * have them. We provide our own replacements here.
163  */
164 #ifndef HAVE_MBSTOWCS_L
165 static size_t
166 mbstowcs_l(wchar_t *dest, const char *src, size_t n, locale_t loc)
167 {
168 #ifdef WIN32
169  return _mbstowcs_l(dest, src, n, loc);
170 #else
171  size_t result;
172  locale_t save_locale = uselocale(loc);
173 
174  result = mbstowcs(dest, src, n);
175  uselocale(save_locale);
176  return result;
177 #endif
178 }
179 #endif
180 #ifndef HAVE_WCSTOMBS_L
181 static size_t
182 wcstombs_l(char *dest, const wchar_t *src, size_t n, locale_t loc)
183 {
184 #ifdef WIN32
185  return _wcstombs_l(dest, src, n, loc);
186 #else
187  size_t result;
188  locale_t save_locale = uselocale(loc);
189 
190  result = wcstombs(dest, src, n);
191  uselocale(save_locale);
192  return result;
193 #endif
194 }
195 #endif
196 
197 /*
198  * pg_perm_setlocale
199  *
200  * This wraps the libc function setlocale(), with two additions. First, when
201  * changing LC_CTYPE, update gettext's encoding for the current message
202  * domain. GNU gettext automatically tracks LC_CTYPE on most platforms, but
203  * not on Windows. Second, if the operation is successful, the corresponding
204  * LC_XXX environment variable is set to match. By setting the environment
205  * variable, we ensure that any subsequent use of setlocale(..., "") will
206  * preserve the settings made through this routine. Of course, LC_ALL must
207  * also be unset to fully ensure that, but that has to be done elsewhere after
208  * all the individual LC_XXX variables have been set correctly. (Thank you
209  * Perl for making this kluge necessary.)
210  */
211 char *
212 pg_perm_setlocale(int category, const char *locale)
213 {
214  char *result;
215  const char *envvar;
216 
217 #ifndef WIN32
218  result = setlocale(category, locale);
219 #else
220 
221  /*
222  * On Windows, setlocale(LC_MESSAGES) does not work, so just assume that
223  * the given value is good and set it in the environment variables. We
224  * must ignore attempts to set to "", which means "keep using the old
225  * environment value".
226  */
227 #ifdef LC_MESSAGES
228  if (category == LC_MESSAGES)
229  {
230  result = (char *) locale;
231  if (locale == NULL || locale[0] == '\0')
232  return result;
233  }
234  else
235 #endif
236  result = setlocale(category, locale);
237 #endif /* WIN32 */
238 
239  if (result == NULL)
240  return result; /* fall out immediately on failure */
241 
242  /*
243  * Use the right encoding in translated messages. Under ENABLE_NLS, let
244  * pg_bind_textdomain_codeset() figure it out. Under !ENABLE_NLS, message
245  * format strings are ASCII, but database-encoding strings may enter the
246  * message via %s. This makes the overall message encoding equal to the
247  * database encoding.
248  */
249  if (category == LC_CTYPE)
250  {
251  static char save_lc_ctype[LOCALE_NAME_BUFLEN];
252 
253  /* copy setlocale() return value before callee invokes it again */
254  strlcpy(save_lc_ctype, result, sizeof(save_lc_ctype));
255  result = save_lc_ctype;
256 
257 #ifdef ENABLE_NLS
258  SetMessageEncoding(pg_bind_textdomain_codeset(textdomain(NULL)));
259 #else
261 #endif
262  }
263 
264  switch (category)
265  {
266  case LC_COLLATE:
267  envvar = "LC_COLLATE";
268  break;
269  case LC_CTYPE:
270  envvar = "LC_CTYPE";
271  break;
272 #ifdef LC_MESSAGES
273  case LC_MESSAGES:
274  envvar = "LC_MESSAGES";
275 #ifdef WIN32
276  result = IsoLocaleName(locale);
277  if (result == NULL)
278  result = (char *) locale;
279  elog(DEBUG3, "IsoLocaleName() executed; locale: \"%s\"", result);
280 #endif /* WIN32 */
281  break;
282 #endif /* LC_MESSAGES */
283  case LC_MONETARY:
284  envvar = "LC_MONETARY";
285  break;
286  case LC_NUMERIC:
287  envvar = "LC_NUMERIC";
288  break;
289  case LC_TIME:
290  envvar = "LC_TIME";
291  break;
292  default:
293  elog(FATAL, "unrecognized LC category: %d", category);
294  return NULL; /* keep compiler quiet */
295  }
296 
297  if (setenv(envvar, result, 1) != 0)
298  return NULL;
299 
300  return result;
301 }
302 
303 
304 /*
305  * Is the locale name valid for the locale category?
306  *
307  * If successful, and canonname isn't NULL, a palloc'd copy of the locale's
308  * canonical name is stored there. This is especially useful for figuring out
309  * what locale name "" means (ie, the server environment value). (Actually,
310  * it seems that on most implementations that's the only thing it's good for;
311  * we could wish that setlocale gave back a canonically spelled version of
312  * the locale name, but typically it doesn't.)
313  */
314 bool
315 check_locale(int category, const char *locale, char **canonname)
316 {
317  char *save;
318  char *res;
319 
320  if (canonname)
321  *canonname = NULL; /* in case of failure */
322 
323  save = setlocale(category, NULL);
324  if (!save)
325  return false; /* won't happen, we hope */
326 
327  /* save may be pointing at a modifiable scratch variable, see above. */
328  save = pstrdup(save);
329 
330  /* set the locale with setlocale, to see if it accepts it. */
331  res = setlocale(category, locale);
332 
333  /* save canonical name if requested. */
334  if (res && canonname)
335  *canonname = pstrdup(res);
336 
337  /* restore old value. */
338  if (!setlocale(category, save))
339  elog(WARNING, "failed to restore old locale \"%s\"", save);
340  pfree(save);
341 
342  return (res != NULL);
343 }
344 
345 
346 /*
347  * GUC check/assign hooks
348  *
349  * For most locale categories, the assign hook doesn't actually set the locale
350  * permanently, just reset flags so that the next use will cache the
351  * appropriate values. (See explanation at the top of this file.)
352  *
353  * Note: we accept value = "" as selecting the postmaster's environment
354  * value, whatever it was (so long as the environment setting is legal).
355  * This will have been locked down by an earlier call to pg_perm_setlocale.
356  */
357 bool
359 {
360  return check_locale(LC_MONETARY, *newval, NULL);
361 }
362 
363 void
364 assign_locale_monetary(const char *newval, void *extra)
365 {
366  CurrentLocaleConvValid = false;
367 }
368 
369 bool
371 {
372  return check_locale(LC_NUMERIC, *newval, NULL);
373 }
374 
375 void
376 assign_locale_numeric(const char *newval, void *extra)
377 {
378  CurrentLocaleConvValid = false;
379 }
380 
381 bool
382 check_locale_time(char **newval, void **extra, GucSource source)
383 {
384  return check_locale(LC_TIME, *newval, NULL);
385 }
386 
387 void
388 assign_locale_time(const char *newval, void *extra)
389 {
390  CurrentLCTimeValid = false;
391 }
392 
393 /*
394  * We allow LC_MESSAGES to actually be set globally.
395  *
396  * Note: we normally disallow value = "" because it wouldn't have consistent
397  * semantics (it'd effectively just use the previous value). However, this
398  * is the value passed for PGC_S_DEFAULT, so don't complain in that case,
399  * not even if the attempted setting fails due to invalid environment value.
400  * The idea there is just to accept the environment setting *if possible*
401  * during startup, until we can read the proper value from postgresql.conf.
402  */
403 bool
405 {
406  if (**newval == '\0')
407  {
408  if (source == PGC_S_DEFAULT)
409  return true;
410  else
411  return false;
412  }
413 
414  /*
415  * LC_MESSAGES category does not exist everywhere, but accept it anyway
416  *
417  * On Windows, we can't even check the value, so accept blindly
418  */
419 #if defined(LC_MESSAGES) && !defined(WIN32)
420  return check_locale(LC_MESSAGES, *newval, NULL);
421 #else
422  return true;
423 #endif
424 }
425 
426 void
427 assign_locale_messages(const char *newval, void *extra)
428 {
429  /*
430  * LC_MESSAGES category does not exist everywhere, but accept it anyway.
431  * We ignore failure, as per comment above.
432  */
433 #ifdef LC_MESSAGES
434  (void) pg_perm_setlocale(LC_MESSAGES, newval);
435 #endif
436 }
437 
438 
439 /*
440  * Frees the malloced content of a struct lconv. (But not the struct
441  * itself.) It's important that this not throw elog(ERROR).
442  */
443 static void
444 free_struct_lconv(struct lconv *s)
445 {
446  free(s->decimal_point);
447  free(s->thousands_sep);
448  free(s->grouping);
449  free(s->int_curr_symbol);
450  free(s->currency_symbol);
451  free(s->mon_decimal_point);
452  free(s->mon_thousands_sep);
453  free(s->mon_grouping);
454  free(s->positive_sign);
455  free(s->negative_sign);
456 }
457 
458 /*
459  * Check that all fields of a struct lconv (or at least, the ones we care
460  * about) are non-NULL. The field list must match free_struct_lconv().
461  */
462 static bool
463 struct_lconv_is_valid(struct lconv *s)
464 {
465  if (s->decimal_point == NULL)
466  return false;
467  if (s->thousands_sep == NULL)
468  return false;
469  if (s->grouping == NULL)
470  return false;
471  if (s->int_curr_symbol == NULL)
472  return false;
473  if (s->currency_symbol == NULL)
474  return false;
475  if (s->mon_decimal_point == NULL)
476  return false;
477  if (s->mon_thousands_sep == NULL)
478  return false;
479  if (s->mon_grouping == NULL)
480  return false;
481  if (s->positive_sign == NULL)
482  return false;
483  if (s->negative_sign == NULL)
484  return false;
485  return true;
486 }
487 
488 
489 /*
490  * Convert the strdup'd string at *str from the specified encoding to the
491  * database encoding.
492  */
493 static void
495 {
496  char *pstr;
497  char *mstr;
498 
499  /* convert the string to the database encoding */
500  pstr = pg_any_to_server(*str, strlen(*str), encoding);
501  if (pstr == *str)
502  return; /* no conversion happened */
503 
504  /* need it malloc'd not palloc'd */
505  mstr = strdup(pstr);
506  if (mstr == NULL)
507  ereport(ERROR,
508  (errcode(ERRCODE_OUT_OF_MEMORY),
509  errmsg("out of memory")));
510 
511  /* replace old string */
512  free(*str);
513  *str = mstr;
514 
515  pfree(pstr);
516 }
517 
518 
519 /*
520  * Return the POSIX lconv struct (contains number/money formatting
521  * information) with locale information for all categories.
522  */
523 struct lconv *
525 {
526  static struct lconv CurrentLocaleConv;
527  static bool CurrentLocaleConvAllocated = false;
528  struct lconv *extlconv;
529  struct lconv worklconv;
530  char *save_lc_monetary;
531  char *save_lc_numeric;
532 #ifdef WIN32
533  char *save_lc_ctype;
534 #endif
535 
536  /* Did we do it already? */
538  return &CurrentLocaleConv;
539 
540  /* Free any already-allocated storage */
541  if (CurrentLocaleConvAllocated)
542  {
543  free_struct_lconv(&CurrentLocaleConv);
544  CurrentLocaleConvAllocated = false;
545  }
546 
547  /*
548  * This is tricky because we really don't want to risk throwing error
549  * while the locale is set to other than our usual settings. Therefore,
550  * the process is: collect the usual settings, set locale to special
551  * setting, copy relevant data into worklconv using strdup(), restore
552  * normal settings, convert data to desired encoding, and finally stash
553  * the collected data in CurrentLocaleConv. This makes it safe if we
554  * throw an error during encoding conversion or run out of memory anywhere
555  * in the process. All data pointed to by struct lconv members is
556  * allocated with strdup, to avoid premature elog(ERROR) and to allow
557  * using a single cleanup routine.
558  */
559  memset(&worklconv, 0, sizeof(worklconv));
560 
561  /* Save prevailing values of monetary and numeric locales */
562  save_lc_monetary = setlocale(LC_MONETARY, NULL);
563  if (!save_lc_monetary)
564  elog(ERROR, "setlocale(NULL) failed");
565  save_lc_monetary = pstrdup(save_lc_monetary);
566 
567  save_lc_numeric = setlocale(LC_NUMERIC, NULL);
568  if (!save_lc_numeric)
569  elog(ERROR, "setlocale(NULL) failed");
570  save_lc_numeric = pstrdup(save_lc_numeric);
571 
572 #ifdef WIN32
573 
574  /*
575  * The POSIX standard explicitly says that it is undefined what happens if
576  * LC_MONETARY or LC_NUMERIC imply an encoding (codeset) different from
577  * that implied by LC_CTYPE. In practice, all Unix-ish platforms seem to
578  * believe that localeconv() should return strings that are encoded in the
579  * codeset implied by the LC_MONETARY or LC_NUMERIC locale name. Hence,
580  * once we have successfully collected the localeconv() results, we will
581  * convert them from that codeset to the desired server encoding.
582  *
583  * Windows, of course, resolutely does things its own way; on that
584  * platform LC_CTYPE has to match LC_MONETARY/LC_NUMERIC to get sane
585  * results. Hence, we must temporarily set that category as well.
586  */
587 
588  /* Save prevailing value of ctype locale */
589  save_lc_ctype = setlocale(LC_CTYPE, NULL);
590  if (!save_lc_ctype)
591  elog(ERROR, "setlocale(NULL) failed");
592  save_lc_ctype = pstrdup(save_lc_ctype);
593 
594  /* Here begins the critical section where we must not throw error */
595 
596  /* use numeric to set the ctype */
597  setlocale(LC_CTYPE, locale_numeric);
598 #endif
599 
600  /* Get formatting information for numeric */
601  setlocale(LC_NUMERIC, locale_numeric);
602  extlconv = localeconv();
603 
604  /* Must copy data now in case setlocale() overwrites it */
605  worklconv.decimal_point = strdup(extlconv->decimal_point);
606  worklconv.thousands_sep = strdup(extlconv->thousands_sep);
607  worklconv.grouping = strdup(extlconv->grouping);
608 
609 #ifdef WIN32
610  /* use monetary to set the ctype */
611  setlocale(LC_CTYPE, locale_monetary);
612 #endif
613 
614  /* Get formatting information for monetary */
615  setlocale(LC_MONETARY, locale_monetary);
616  extlconv = localeconv();
617 
618  /* Must copy data now in case setlocale() overwrites it */
619  worklconv.int_curr_symbol = strdup(extlconv->int_curr_symbol);
620  worklconv.currency_symbol = strdup(extlconv->currency_symbol);
621  worklconv.mon_decimal_point = strdup(extlconv->mon_decimal_point);
622  worklconv.mon_thousands_sep = strdup(extlconv->mon_thousands_sep);
623  worklconv.mon_grouping = strdup(extlconv->mon_grouping);
624  worklconv.positive_sign = strdup(extlconv->positive_sign);
625  worklconv.negative_sign = strdup(extlconv->negative_sign);
626  /* Copy scalar fields as well */
627  worklconv.int_frac_digits = extlconv->int_frac_digits;
628  worklconv.frac_digits = extlconv->frac_digits;
629  worklconv.p_cs_precedes = extlconv->p_cs_precedes;
630  worklconv.p_sep_by_space = extlconv->p_sep_by_space;
631  worklconv.n_cs_precedes = extlconv->n_cs_precedes;
632  worklconv.n_sep_by_space = extlconv->n_sep_by_space;
633  worklconv.p_sign_posn = extlconv->p_sign_posn;
634  worklconv.n_sign_posn = extlconv->n_sign_posn;
635 
636  /*
637  * Restore the prevailing locale settings; failure to do so is fatal.
638  * Possibly we could limp along with nondefault LC_MONETARY or LC_NUMERIC,
639  * but proceeding with the wrong value of LC_CTYPE would certainly be bad
640  * news; and considering that the prevailing LC_MONETARY and LC_NUMERIC
641  * are almost certainly "C", there's really no reason that restoring those
642  * should fail.
643  */
644 #ifdef WIN32
645  if (!setlocale(LC_CTYPE, save_lc_ctype))
646  elog(FATAL, "failed to restore LC_CTYPE to \"%s\"", save_lc_ctype);
647 #endif
648  if (!setlocale(LC_MONETARY, save_lc_monetary))
649  elog(FATAL, "failed to restore LC_MONETARY to \"%s\"", save_lc_monetary);
650  if (!setlocale(LC_NUMERIC, save_lc_numeric))
651  elog(FATAL, "failed to restore LC_NUMERIC to \"%s\"", save_lc_numeric);
652 
653  /*
654  * At this point we've done our best to clean up, and can call functions
655  * that might possibly throw errors with a clean conscience. But let's
656  * make sure we don't leak any already-strdup'd fields in worklconv.
657  */
658  PG_TRY();
659  {
660  int encoding;
661 
662  /* Release the pstrdup'd locale names */
663  pfree(save_lc_monetary);
664  pfree(save_lc_numeric);
665 #ifdef WIN32
666  pfree(save_lc_ctype);
667 #endif
668 
669  /* If any of the preceding strdup calls failed, complain now. */
670  if (!struct_lconv_is_valid(&worklconv))
671  ereport(ERROR,
672  (errcode(ERRCODE_OUT_OF_MEMORY),
673  errmsg("out of memory")));
674 
675  /*
676  * Now we must perform encoding conversion from whatever's associated
677  * with the locales into the database encoding. If we can't identify
678  * the encoding implied by LC_NUMERIC or LC_MONETARY (ie we get -1),
679  * use PG_SQL_ASCII, which will result in just validating that the
680  * strings are OK in the database encoding.
681  */
683  if (encoding < 0)
685 
686  db_encoding_convert(encoding, &worklconv.decimal_point);
687  db_encoding_convert(encoding, &worklconv.thousands_sep);
688  /* grouping is not text and does not require conversion */
689 
691  if (encoding < 0)
693 
694  db_encoding_convert(encoding, &worklconv.int_curr_symbol);
695  db_encoding_convert(encoding, &worklconv.currency_symbol);
696  db_encoding_convert(encoding, &worklconv.mon_decimal_point);
697  db_encoding_convert(encoding, &worklconv.mon_thousands_sep);
698  /* mon_grouping is not text and does not require conversion */
699  db_encoding_convert(encoding, &worklconv.positive_sign);
700  db_encoding_convert(encoding, &worklconv.negative_sign);
701  }
702  PG_CATCH();
703  {
704  free_struct_lconv(&worklconv);
705  PG_RE_THROW();
706  }
707  PG_END_TRY();
708 
709  /*
710  * Everything is good, so save the results.
711  */
712  CurrentLocaleConv = worklconv;
713  CurrentLocaleConvAllocated = true;
714  CurrentLocaleConvValid = true;
715  return &CurrentLocaleConv;
716 }
717 
718 #ifdef WIN32
719 /*
720  * On Windows, strftime() returns its output in encoding CP_ACP (the default
721  * operating system codepage for the computer), which is likely different
722  * from SERVER_ENCODING. This is especially important in Japanese versions
723  * of Windows which will use SJIS encoding, which we don't support as a
724  * server encoding.
725  *
726  * So, instead of using strftime(), use wcsftime() to return the value in
727  * wide characters (internally UTF16) and then convert to UTF8, which we
728  * know how to handle directly.
729  *
730  * Note that this only affects the calls to strftime() in this file, which are
731  * used to get the locale-aware strings. Other parts of the backend use
732  * pg_strftime(), which isn't locale-aware and does not need to be replaced.
733  */
734 static size_t
735 strftime_win32(char *dst, size_t dstlen,
736  const char *format, const struct tm *tm)
737 {
738  size_t len;
739  wchar_t wformat[8]; /* formats used below need 3 chars */
740  wchar_t wbuf[MAX_L10N_DATA];
741 
742  /*
743  * Get a wchar_t version of the format string. We only actually use
744  * plain-ASCII formats in this file, so we can say that they're UTF8.
745  */
746  len = MultiByteToWideChar(CP_UTF8, 0, format, -1,
747  wformat, lengthof(wformat));
748  if (len == 0)
749  elog(ERROR, "could not convert format string from UTF-8: error code %lu",
750  GetLastError());
751 
752  len = wcsftime(wbuf, MAX_L10N_DATA, wformat, tm);
753  if (len == 0)
754  {
755  /*
756  * wcsftime failed, possibly because the result would not fit in
757  * MAX_L10N_DATA. Return 0 with the contents of dst unspecified.
758  */
759  return 0;
760  }
761 
762  len = WideCharToMultiByte(CP_UTF8, 0, wbuf, len, dst, dstlen - 1,
763  NULL, NULL);
764  if (len == 0)
765  elog(ERROR, "could not convert string to UTF-8: error code %lu",
766  GetLastError());
767 
768  dst[len] = '\0';
769 
770  return len;
771 }
772 
773 /* redefine strftime() */
774 #define strftime(a,b,c,d) strftime_win32(a,b,c,d)
775 #endif /* WIN32 */
776 
777 /*
778  * Subroutine for cache_locale_time().
779  * Convert the given string from encoding "encoding" to the database
780  * encoding, and store the result at *dst, replacing any previous value.
781  */
782 static void
783 cache_single_string(char **dst, const char *src, int encoding)
784 {
785  char *ptr;
786  char *olddst;
787 
788  /* Convert the string to the database encoding, or validate it's OK */
789  ptr = pg_any_to_server(src, strlen(src), encoding);
790 
791  /* Store the string in long-lived storage, replacing any previous value */
792  olddst = *dst;
794  if (olddst)
795  pfree(olddst);
796 
797  /* Might as well clean up any palloc'd conversion result, too */
798  if (ptr != src)
799  pfree(ptr);
800 }
801 
802 /*
803  * Update the lc_time localization cache variables if needed.
804  */
805 void
807 {
808  char buf[(2 * 7 + 2 * 12) * MAX_L10N_DATA];
809  char *bufptr;
810  time_t timenow;
811  struct tm *timeinfo;
812  bool strftimefail = false;
813  int encoding;
814  int i;
815  char *save_lc_time;
816 #ifdef WIN32
817  char *save_lc_ctype;
818 #endif
819 
820  /* did we do this already? */
821  if (CurrentLCTimeValid)
822  return;
823 
824  elog(DEBUG3, "cache_locale_time() executed; locale: \"%s\"", locale_time);
825 
826  /*
827  * As in PGLC_localeconv(), it's critical that we not throw error while
828  * libc's locale settings have nondefault values. Hence, we just call
829  * strftime() within the critical section, and then convert and save its
830  * results afterwards.
831  */
832 
833  /* Save prevailing value of time locale */
834  save_lc_time = setlocale(LC_TIME, NULL);
835  if (!save_lc_time)
836  elog(ERROR, "setlocale(NULL) failed");
837  save_lc_time = pstrdup(save_lc_time);
838 
839 #ifdef WIN32
840 
841  /*
842  * On Windows, it appears that wcsftime() internally uses LC_CTYPE, so we
843  * must set it here. This code looks the same as what PGLC_localeconv()
844  * does, but the underlying reason is different: this does NOT determine
845  * the encoding we'll get back from strftime_win32().
846  */
847 
848  /* Save prevailing value of ctype locale */
849  save_lc_ctype = setlocale(LC_CTYPE, NULL);
850  if (!save_lc_ctype)
851  elog(ERROR, "setlocale(NULL) failed");
852  save_lc_ctype = pstrdup(save_lc_ctype);
853 
854  /* use lc_time to set the ctype */
855  setlocale(LC_CTYPE, locale_time);
856 #endif
857 
858  setlocale(LC_TIME, locale_time);
859 
860  /* We use times close to current time as data for strftime(). */
861  timenow = time(NULL);
862  timeinfo = localtime(&timenow);
863 
864  /* Store the strftime results in MAX_L10N_DATA-sized portions of buf[] */
865  bufptr = buf;
866 
867  /*
868  * MAX_L10N_DATA is sufficient buffer space for every known locale, and
869  * POSIX defines no strftime() errors. (Buffer space exhaustion is not an
870  * error.) An implementation might report errors (e.g. ENOMEM) by
871  * returning 0 (or, less plausibly, a negative value) and setting errno.
872  * Report errno just in case the implementation did that, but clear it in
873  * advance of the calls so we don't emit a stale, unrelated errno.
874  */
875  errno = 0;
876 
877  /* localized days */
878  for (i = 0; i < 7; i++)
879  {
880  timeinfo->tm_wday = i;
881  if (strftime(bufptr, MAX_L10N_DATA, "%a", timeinfo) <= 0)
882  strftimefail = true;
883  bufptr += MAX_L10N_DATA;
884  if (strftime(bufptr, MAX_L10N_DATA, "%A", timeinfo) <= 0)
885  strftimefail = true;
886  bufptr += MAX_L10N_DATA;
887  }
888 
889  /* localized months */
890  for (i = 0; i < 12; i++)
891  {
892  timeinfo->tm_mon = i;
893  timeinfo->tm_mday = 1; /* make sure we don't have invalid date */
894  if (strftime(bufptr, MAX_L10N_DATA, "%b", timeinfo) <= 0)
895  strftimefail = true;
896  bufptr += MAX_L10N_DATA;
897  if (strftime(bufptr, MAX_L10N_DATA, "%B", timeinfo) <= 0)
898  strftimefail = true;
899  bufptr += MAX_L10N_DATA;
900  }
901 
902  /*
903  * Restore the prevailing locale settings; as in PGLC_localeconv(),
904  * failure to do so is fatal.
905  */
906 #ifdef WIN32
907  if (!setlocale(LC_CTYPE, save_lc_ctype))
908  elog(FATAL, "failed to restore LC_CTYPE to \"%s\"", save_lc_ctype);
909 #endif
910  if (!setlocale(LC_TIME, save_lc_time))
911  elog(FATAL, "failed to restore LC_TIME to \"%s\"", save_lc_time);
912 
913  /*
914  * At this point we've done our best to clean up, and can throw errors, or
915  * call functions that might throw errors, with a clean conscience.
916  */
917  if (strftimefail)
918  elog(ERROR, "strftime() failed: %m");
919 
920  /* Release the pstrdup'd locale names */
921  pfree(save_lc_time);
922 #ifdef WIN32
923  pfree(save_lc_ctype);
924 #endif
925 
926 #ifndef WIN32
927 
928  /*
929  * As in PGLC_localeconv(), we must convert strftime()'s output from the
930  * encoding implied by LC_TIME to the database encoding. If we can't
931  * identify the LC_TIME encoding, just perform encoding validation.
932  */
934  if (encoding < 0)
936 
937 #else
938 
939  /*
940  * On Windows, strftime_win32() always returns UTF8 data, so convert from
941  * that if necessary.
942  */
943  encoding = PG_UTF8;
944 
945 #endif /* WIN32 */
946 
947  bufptr = buf;
948 
949  /* localized days */
950  for (i = 0; i < 7; i++)
951  {
953  bufptr += MAX_L10N_DATA;
955  bufptr += MAX_L10N_DATA;
956  }
957  localized_abbrev_days[7] = NULL;
958  localized_full_days[7] = NULL;
959 
960  /* localized months */
961  for (i = 0; i < 12; i++)
962  {
964  bufptr += MAX_L10N_DATA;
966  bufptr += MAX_L10N_DATA;
967  }
968  localized_abbrev_months[12] = NULL;
969  localized_full_months[12] = NULL;
970 
971  CurrentLCTimeValid = true;
972 }
973 
974 
975 #if defined(WIN32) && defined(LC_MESSAGES)
976 /*
977  * Convert a Windows setlocale() argument to a Unix-style one.
978  *
979  * Regardless of platform, we install message catalogs under a Unix-style
980  * LL[_CC][.ENCODING][@VARIANT] naming convention. Only LC_MESSAGES settings
981  * following that style will elicit localized interface strings.
982  *
983  * Before Visual Studio 2012 (msvcr110.dll), Windows setlocale() accepted "C"
984  * (but not "c") and strings of the form <Language>[_<Country>][.<CodePage>],
985  * case-insensitive. setlocale() returns the fully-qualified form; for
986  * example, setlocale("thaI") returns "Thai_Thailand.874". Internally,
987  * setlocale() and _create_locale() select a "locale identifier"[1] and store
988  * it in an undocumented _locale_t field. From that LCID, we can retrieve the
989  * ISO 639 language and the ISO 3166 country. Character encoding does not
990  * matter, because the server and client encodings govern that.
991  *
992  * Windows Vista introduced the "locale name" concept[2], closely following
993  * RFC 4646. Locale identifiers are now deprecated. Starting with Visual
994  * Studio 2012, setlocale() accepts locale names in addition to the strings it
995  * accepted historically. It does not standardize them; setlocale("Th-tH")
996  * returns "Th-tH". setlocale(category, "") still returns a traditional
997  * string. Furthermore, msvcr110.dll changed the undocumented _locale_t
998  * content to carry locale names instead of locale identifiers.
999  *
1000  * Visual Studio 2015 should still be able to do the same as Visual Studio
1001  * 2012, but the declaration of locale_name is missing in _locale_t, causing
1002  * this code compilation to fail, hence this falls back instead on to
1003  * enumerating all system locales by using EnumSystemLocalesEx to find the
1004  * required locale name. If the input argument is in Unix-style then we can
1005  * get ISO Locale name directly by using GetLocaleInfoEx() with LCType as
1006  * LOCALE_SNAME.
1007  *
1008  * MinGW headers declare _create_locale(), but msvcrt.dll lacks that symbol in
1009  * releases before Windows 8. IsoLocaleName() always fails in a MinGW-built
1010  * postgres.exe, so only Unix-style values of the lc_messages GUC can elicit
1011  * localized messages. In particular, every lc_messages setting that initdb
1012  * can select automatically will yield only C-locale messages. XXX This could
1013  * be fixed by running the fully-qualified locale name through a lookup table.
1014  *
1015  * This function returns a pointer to a static buffer bearing the converted
1016  * name or NULL if conversion fails.
1017  *
1018  * [1] https://docs.microsoft.com/en-us/windows/win32/intl/locale-identifiers
1019  * [2] https://docs.microsoft.com/en-us/windows/win32/intl/locale-names
1020  */
1021 
1022 #if defined(_MSC_VER)
1023 
1024 /*
1025  * Callback function for EnumSystemLocalesEx() in get_iso_localename().
1026  *
1027  * This function enumerates all system locales, searching for one that matches
1028  * an input with the format: <Language>[_<Country>], e.g.
1029  * English[_United States]
1030  *
1031  * The input is a three wchar_t array as an LPARAM. The first element is the
1032  * locale_name we want to match, the second element is an allocated buffer
1033  * where the Unix-style locale is copied if a match is found, and the third
1034  * element is the search status, 1 if a match was found, 0 otherwise.
1035  */
1036 static BOOL CALLBACK
1037 search_locale_enum(LPWSTR pStr, DWORD dwFlags, LPARAM lparam)
1038 {
1039  wchar_t test_locale[LOCALE_NAME_MAX_LENGTH];
1040  wchar_t **argv;
1041 
1042  (void) (dwFlags);
1043 
1044  argv = (wchar_t **) lparam;
1045  *argv[2] = (wchar_t) 0;
1046 
1047  memset(test_locale, 0, sizeof(test_locale));
1048 
1049  /* Get the name of the <Language> in English */
1050  if (GetLocaleInfoEx(pStr, LOCALE_SENGLISHLANGUAGENAME,
1051  test_locale, LOCALE_NAME_MAX_LENGTH))
1052  {
1053  /*
1054  * If the enumerated locale does not have a hyphen ("en") OR the
1055  * locale_name input does not have an underscore ("English"), we only
1056  * need to compare the <Language> tags.
1057  */
1058  if (wcsrchr(pStr, '-') == NULL || wcsrchr(argv[0], '_') == NULL)
1059  {
1060  if (_wcsicmp(argv[0], test_locale) == 0)
1061  {
1062  wcscpy(argv[1], pStr);
1063  *argv[2] = (wchar_t) 1;
1064  return FALSE;
1065  }
1066  }
1067 
1068  /*
1069  * We have to compare a full <Language>_<Country> tag, so we append
1070  * the underscore and name of the country/region in English, e.g.
1071  * "English_United States".
1072  */
1073  else
1074  {
1075  size_t len;
1076 
1077  wcscat(test_locale, L"_");
1078  len = wcslen(test_locale);
1079  if (GetLocaleInfoEx(pStr, LOCALE_SENGLISHCOUNTRYNAME,
1080  test_locale + len,
1081  LOCALE_NAME_MAX_LENGTH - len))
1082  {
1083  if (_wcsicmp(argv[0], test_locale) == 0)
1084  {
1085  wcscpy(argv[1], pStr);
1086  *argv[2] = (wchar_t) 1;
1087  return FALSE;
1088  }
1089  }
1090  }
1091  }
1092 
1093  return TRUE;
1094 }
1095 
1096 /*
1097  * This function converts a Windows locale name to an ISO formatted version
1098  * for Visual Studio 2015 or greater.
1099  *
1100  * Returns NULL, if no valid conversion was found.
1101  */
1102 static char *
1103 get_iso_localename(const char *winlocname)
1104 {
1105  wchar_t wc_locale_name[LOCALE_NAME_MAX_LENGTH];
1106  wchar_t buffer[LOCALE_NAME_MAX_LENGTH];
1107  static char iso_lc_messages[LOCALE_NAME_MAX_LENGTH];
1108  char *period;
1109  int len;
1110  int ret_val;
1111 
1112  /*
1113  * Valid locales have the following syntax:
1114  * <Language>[_<Country>[.<CodePage>]]
1115  *
1116  * GetLocaleInfoEx can only take locale name without code-page and for the
1117  * purpose of this API the code-page doesn't matter.
1118  */
1119  period = strchr(winlocname, '.');
1120  if (period != NULL)
1121  len = period - winlocname;
1122  else
1123  len = pg_mbstrlen(winlocname);
1124 
1125  memset(wc_locale_name, 0, sizeof(wc_locale_name));
1126  memset(buffer, 0, sizeof(buffer));
1127  MultiByteToWideChar(CP_ACP, 0, winlocname, len, wc_locale_name,
1128  LOCALE_NAME_MAX_LENGTH);
1129 
1130  /*
1131  * If the lc_messages is already a Unix-style string, we have a direct
1132  * match with LOCALE_SNAME, e.g. en-US, en_US.
1133  */
1134  ret_val = GetLocaleInfoEx(wc_locale_name, LOCALE_SNAME, (LPWSTR) &buffer,
1135  LOCALE_NAME_MAX_LENGTH);
1136  if (!ret_val)
1137  {
1138  /*
1139  * Search for a locale in the system that matches language and country
1140  * name.
1141  */
1142  wchar_t *argv[3];
1143 
1144  argv[0] = wc_locale_name;
1145  argv[1] = buffer;
1146  argv[2] = (wchar_t *) &ret_val;
1147  EnumSystemLocalesEx(search_locale_enum, LOCALE_WINDOWS, (LPARAM) argv,
1148  NULL);
1149  }
1150 
1151  if (ret_val)
1152  {
1153  size_t rc;
1154  char *hyphen;
1155 
1156  /* Locale names use only ASCII, any conversion locale suffices. */
1157  rc = wchar2char(iso_lc_messages, buffer, sizeof(iso_lc_messages), NULL);
1158  if (rc == -1 || rc == sizeof(iso_lc_messages))
1159  return NULL;
1160 
1161  /*
1162  * Since the message catalogs sit on a case-insensitive filesystem, we
1163  * need not standardize letter case here. So long as we do not ship
1164  * message catalogs for which it would matter, we also need not
1165  * translate the script/variant portion, e.g. uz-Cyrl-UZ to
1166  * uz_UZ@cyrillic. Simply replace the hyphen with an underscore.
1167  */
1168  hyphen = strchr(iso_lc_messages, '-');
1169  if (hyphen)
1170  *hyphen = '_';
1171  return iso_lc_messages;
1172  }
1173 
1174  return NULL;
1175 }
1176 
1177 static char *
1178 IsoLocaleName(const char *winlocname)
1179 {
1180  static char iso_lc_messages[LOCALE_NAME_MAX_LENGTH];
1181 
1182  if (pg_strcasecmp("c", winlocname) == 0 ||
1183  pg_strcasecmp("posix", winlocname) == 0)
1184  {
1185  strcpy(iso_lc_messages, "C");
1186  return iso_lc_messages;
1187  }
1188  else
1189  return get_iso_localename(winlocname);
1190 }
1191 
1192 #else /* !defined(_MSC_VER) */
1193 
1194 static char *
1195 IsoLocaleName(const char *winlocname)
1196 {
1197  return NULL; /* Not supported on MinGW */
1198 }
1199 
1200 #endif /* defined(_MSC_VER) */
1201 
1202 #endif /* WIN32 && LC_MESSAGES */
1203 
1204 
1205 /*
1206  * Cache mechanism for collation information.
1207  *
1208  * We cache two flags: whether the collation's LC_COLLATE or LC_CTYPE is C
1209  * (or POSIX), so we can optimize a few code paths in various places.
1210  * For the built-in C and POSIX collations, we can know that without even
1211  * doing a cache lookup, but we want to support aliases for C/POSIX too.
1212  * For the "default" collation, there are separate static cache variables,
1213  * since consulting the pg_collation catalog doesn't tell us what we need.
1214  *
1215  * Also, if a pg_locale_t has been requested for a collation, we cache that
1216  * for the life of a backend.
1217  *
1218  * Note that some code relies on the flags not reporting false negatives
1219  * (that is, saying it's not C when it is). For example, char2wchar()
1220  * could fail if the locale is C, so str_tolower() shouldn't call it
1221  * in that case.
1222  *
1223  * Note that we currently lack any way to flush the cache. Since we don't
1224  * support ALTER COLLATION, this is OK. The worst case is that someone
1225  * drops a collation, and a useless cache entry hangs around in existing
1226  * backends.
1227  */
1228 
1229 static collation_cache_entry *
1230 lookup_collation_cache(Oid collation, bool set_flags)
1231 {
1232  collation_cache_entry *cache_entry;
1233  bool found;
1234 
1235  Assert(OidIsValid(collation));
1236  Assert(collation != DEFAULT_COLLATION_OID);
1237 
1238  if (collation_cache == NULL)
1239  {
1240  /* First time through, initialize the hash table */
1241  HASHCTL ctl;
1242 
1243  ctl.keysize = sizeof(Oid);
1244  ctl.entrysize = sizeof(collation_cache_entry);
1245  collation_cache = hash_create("Collation cache", 100, &ctl,
1246  HASH_ELEM | HASH_BLOBS);
1247  }
1248 
1249  cache_entry = hash_search(collation_cache, &collation, HASH_ENTER, &found);
1250  if (!found)
1251  {
1252  /*
1253  * Make sure cache entry is marked invalid, in case we fail before
1254  * setting things.
1255  */
1256  cache_entry->flags_valid = false;
1257  cache_entry->locale = 0;
1258  }
1259 
1260  if (set_flags && !cache_entry->flags_valid)
1261  {
1262  /* Attempt to set the flags */
1263  HeapTuple tp;
1264  Form_pg_collation collform;
1265 
1266  tp = SearchSysCache1(COLLOID, ObjectIdGetDatum(collation));
1267  if (!HeapTupleIsValid(tp))
1268  elog(ERROR, "cache lookup failed for collation %u", collation);
1269  collform = (Form_pg_collation) GETSTRUCT(tp);
1270 
1271  if (collform->collprovider == COLLPROVIDER_BUILTIN)
1272  {
1273  Datum datum;
1274  const char *colllocale;
1275 
1276  datum = SysCacheGetAttrNotNull(COLLOID, tp, Anum_pg_collation_colllocale);
1277  colllocale = TextDatumGetCString(datum);
1278 
1279  cache_entry->collate_is_c = true;
1280  cache_entry->ctype_is_c = (strcmp(colllocale, "C") == 0);
1281  }
1282  else if (collform->collprovider == COLLPROVIDER_LIBC)
1283  {
1284  Datum datum;
1285  const char *collcollate;
1286  const char *collctype;
1287 
1288  datum = SysCacheGetAttrNotNull(COLLOID, tp, Anum_pg_collation_collcollate);
1289  collcollate = TextDatumGetCString(datum);
1290  datum = SysCacheGetAttrNotNull(COLLOID, tp, Anum_pg_collation_collctype);
1291  collctype = TextDatumGetCString(datum);
1292 
1293  cache_entry->collate_is_c = ((strcmp(collcollate, "C") == 0) ||
1294  (strcmp(collcollate, "POSIX") == 0));
1295  cache_entry->ctype_is_c = ((strcmp(collctype, "C") == 0) ||
1296  (strcmp(collctype, "POSIX") == 0));
1297  }
1298  else
1299  {
1300  cache_entry->collate_is_c = false;
1301  cache_entry->ctype_is_c = false;
1302  }
1303 
1304  cache_entry->flags_valid = true;
1305 
1306  ReleaseSysCache(tp);
1307  }
1308 
1309  return cache_entry;
1310 }
1311 
1312 
1313 /*
1314  * Detect whether collation's LC_COLLATE property is C
1315  */
1316 bool
1318 {
1319  /*
1320  * If we're asked about "collation 0", return false, so that the code will
1321  * go into the non-C path and report that the collation is bogus.
1322  */
1323  if (!OidIsValid(collation))
1324  return false;
1325 
1326  /*
1327  * If we're asked about the default collation, we have to inquire of the C
1328  * library. Cache the result so we only have to compute it once.
1329  */
1330  if (collation == DEFAULT_COLLATION_OID)
1331  {
1332  static int result = -1;
1333  const char *localeptr;
1334 
1335  if (result >= 0)
1336  return (bool) result;
1337 
1338  if (default_locale.provider == COLLPROVIDER_BUILTIN)
1339  {
1340  result = true;
1341  return (bool) result;
1342  }
1343  else if (default_locale.provider == COLLPROVIDER_ICU)
1344  {
1345  result = false;
1346  return (bool) result;
1347  }
1348  else if (default_locale.provider == COLLPROVIDER_LIBC)
1349  {
1350  localeptr = setlocale(LC_CTYPE, NULL);
1351  if (!localeptr)
1352  elog(ERROR, "invalid LC_CTYPE setting");
1353  }
1354  else
1355  elog(ERROR, "unexpected collation provider '%c'",
1357 
1358  if (strcmp(localeptr, "C") == 0)
1359  result = true;
1360  else if (strcmp(localeptr, "POSIX") == 0)
1361  result = true;
1362  else
1363  result = false;
1364  return (bool) result;
1365  }
1366 
1367  /*
1368  * If we're asked about the built-in C/POSIX collations, we know that.
1369  */
1370  if (collation == C_COLLATION_OID ||
1371  collation == POSIX_COLLATION_OID)
1372  return true;
1373 
1374  /*
1375  * Otherwise, we have to consult pg_collation, but we cache that.
1376  */
1377  return (lookup_collation_cache(collation, true))->collate_is_c;
1378 }
1379 
1380 /*
1381  * Detect whether collation's LC_CTYPE property is C
1382  */
1383 bool
1384 lc_ctype_is_c(Oid collation)
1385 {
1386  /*
1387  * If we're asked about "collation 0", return false, so that the code will
1388  * go into the non-C path and report that the collation is bogus.
1389  */
1390  if (!OidIsValid(collation))
1391  return false;
1392 
1393  /*
1394  * If we're asked about the default collation, we have to inquire of the C
1395  * library. Cache the result so we only have to compute it once.
1396  */
1397  if (collation == DEFAULT_COLLATION_OID)
1398  {
1399  static int result = -1;
1400  const char *localeptr;
1401 
1402  if (result >= 0)
1403  return (bool) result;
1404 
1405  if (default_locale.provider == COLLPROVIDER_BUILTIN)
1406  {
1407  localeptr = default_locale.info.builtin.locale;
1408  }
1409  else if (default_locale.provider == COLLPROVIDER_ICU)
1410  {
1411  result = false;
1412  return (bool) result;
1413  }
1414  else if (default_locale.provider == COLLPROVIDER_LIBC)
1415  {
1416  localeptr = setlocale(LC_CTYPE, NULL);
1417  if (!localeptr)
1418  elog(ERROR, "invalid LC_CTYPE setting");
1419  }
1420  else
1421  elog(ERROR, "unexpected collation provider '%c'",
1423 
1424  if (strcmp(localeptr, "C") == 0)
1425  result = true;
1426  else if (strcmp(localeptr, "POSIX") == 0)
1427  result = true;
1428  else
1429  result = false;
1430  return (bool) result;
1431  }
1432 
1433  /*
1434  * If we're asked about the built-in C/POSIX collations, we know that.
1435  */
1436  if (collation == C_COLLATION_OID ||
1437  collation == POSIX_COLLATION_OID)
1438  return true;
1439 
1440  /*
1441  * Otherwise, we have to consult pg_collation, but we cache that.
1442  */
1443  return (lookup_collation_cache(collation, true))->ctype_is_c;
1444 }
1445 
1447 
1448 void
1449 make_icu_collator(const char *iculocstr,
1450  const char *icurules,
1451  struct pg_locale_struct *resultp)
1452 {
1453 #ifdef USE_ICU
1454  UCollator *collator;
1455 
1456  collator = pg_ucol_open(iculocstr);
1457 
1458  /*
1459  * If rules are specified, we extract the rules of the standard collation,
1460  * add our own rules, and make a new collator with the combined rules.
1461  */
1462  if (icurules)
1463  {
1464  const UChar *default_rules;
1465  UChar *agg_rules;
1466  UChar *my_rules;
1467  UErrorCode status;
1468  int32_t length;
1469 
1470  default_rules = ucol_getRules(collator, &length);
1471  icu_to_uchar(&my_rules, icurules, strlen(icurules));
1472 
1473  agg_rules = palloc_array(UChar, u_strlen(default_rules) + u_strlen(my_rules) + 1);
1474  u_strcpy(agg_rules, default_rules);
1475  u_strcat(agg_rules, my_rules);
1476 
1477  ucol_close(collator);
1478 
1479  status = U_ZERO_ERROR;
1480  collator = ucol_openRules(agg_rules, u_strlen(agg_rules),
1481  UCOL_DEFAULT, UCOL_DEFAULT_STRENGTH, NULL, &status);
1482  if (U_FAILURE(status))
1483  ereport(ERROR,
1484  (errmsg("could not open collator for locale \"%s\" with rules \"%s\": %s",
1485  iculocstr, icurules, u_errorName(status))));
1486  }
1487 
1488  /* We will leak this string if the caller errors later :-( */
1489  resultp->info.icu.locale = MemoryContextStrdup(TopMemoryContext, iculocstr);
1490  resultp->info.icu.ucol = collator;
1491 #else /* not USE_ICU */
1492  /* could get here if a collation was created by a build with ICU */
1493  ereport(ERROR,
1494  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1495  errmsg("ICU is not supported in this build")));
1496 #endif /* not USE_ICU */
1497 }
1498 
1499 
1500 /* simple subroutine for reporting errors from newlocale() */
1501 static void
1502 report_newlocale_failure(const char *localename)
1503 {
1504  int save_errno;
1505 
1506  /*
1507  * Windows doesn't provide any useful error indication from
1508  * _create_locale(), and BSD-derived platforms don't seem to feel they
1509  * need to set errno either (even though POSIX is pretty clear that
1510  * newlocale should do so). So, if errno hasn't been set, assume ENOENT
1511  * is what to report.
1512  */
1513  if (errno == 0)
1514  errno = ENOENT;
1515 
1516  /*
1517  * ENOENT means "no such locale", not "no such file", so clarify that
1518  * errno with an errdetail message.
1519  */
1520  save_errno = errno; /* auxiliary funcs might change errno */
1521  ereport(ERROR,
1522  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1523  errmsg("could not create locale \"%s\": %m",
1524  localename),
1525  (save_errno == ENOENT ?
1526  errdetail("The operating system could not find any locale data for the locale name \"%s\".",
1527  localename) : 0)));
1528 }
1529 
1530 bool
1532 {
1533  /* default locale must always be deterministic */
1534  if (locale == NULL)
1535  return true;
1536  else
1537  return locale->deterministic;
1538 }
1539 
1540 /*
1541  * Create a locale_t from a collation OID. Results are cached for the
1542  * lifetime of the backend. Thus, do not free the result with freelocale().
1543  *
1544  * As a special optimization, the default/database collation returns 0.
1545  *
1546  * For simplicity, we always generate COLLATE + CTYPE even though we
1547  * might only need one of them. Since this is called only once per session,
1548  * it shouldn't cost much.
1549  */
1552 {
1553  collation_cache_entry *cache_entry;
1554 
1555  /* Callers must pass a valid OID */
1557 
1558  if (collid == DEFAULT_COLLATION_OID)
1559  {
1560  if (default_locale.provider == COLLPROVIDER_LIBC)
1561  return (pg_locale_t) 0;
1562  else
1563  return &default_locale;
1564  }
1565 
1566  cache_entry = lookup_collation_cache(collid, false);
1567 
1568  if (cache_entry->locale == 0)
1569  {
1570  /* We haven't computed this yet in this session, so do it */
1571  HeapTuple tp;
1572  Form_pg_collation collform;
1573  struct pg_locale_struct result;
1574  pg_locale_t resultp;
1575  Datum datum;
1576  bool isnull;
1577 
1578  tp = SearchSysCache1(COLLOID, ObjectIdGetDatum(collid));
1579  if (!HeapTupleIsValid(tp))
1580  elog(ERROR, "cache lookup failed for collation %u", collid);
1581  collform = (Form_pg_collation) GETSTRUCT(tp);
1582 
1583  /* We'll fill in the result struct locally before allocating memory */
1584  memset(&result, 0, sizeof(result));
1585  result.provider = collform->collprovider;
1586  result.deterministic = collform->collisdeterministic;
1587 
1588  if (collform->collprovider == COLLPROVIDER_BUILTIN)
1589  {
1590  const char *locstr;
1591 
1592  datum = SysCacheGetAttrNotNull(COLLOID, tp, Anum_pg_collation_colllocale);
1593  locstr = TextDatumGetCString(datum);
1594 
1596 
1598  locstr);
1599  }
1600  else if (collform->collprovider == COLLPROVIDER_LIBC)
1601  {
1602  const char *collcollate;
1603  const char *collctype pg_attribute_unused();
1604  locale_t loc;
1605 
1606  datum = SysCacheGetAttrNotNull(COLLOID, tp, Anum_pg_collation_collcollate);
1607  collcollate = TextDatumGetCString(datum);
1608  datum = SysCacheGetAttrNotNull(COLLOID, tp, Anum_pg_collation_collctype);
1609  collctype = TextDatumGetCString(datum);
1610 
1611  if (strcmp(collcollate, collctype) == 0)
1612  {
1613  /* Normal case where they're the same */
1614  errno = 0;
1615 #ifndef WIN32
1616  loc = newlocale(LC_COLLATE_MASK | LC_CTYPE_MASK, collcollate,
1617  NULL);
1618 #else
1619  loc = _create_locale(LC_ALL, collcollate);
1620 #endif
1621  if (!loc)
1622  report_newlocale_failure(collcollate);
1623  }
1624  else
1625  {
1626 #ifndef WIN32
1627  /* We need two newlocale() steps */
1628  locale_t loc1;
1629 
1630  errno = 0;
1631  loc1 = newlocale(LC_COLLATE_MASK, collcollate, NULL);
1632  if (!loc1)
1633  report_newlocale_failure(collcollate);
1634  errno = 0;
1635  loc = newlocale(LC_CTYPE_MASK, collctype, loc1);
1636  if (!loc)
1637  report_newlocale_failure(collctype);
1638 #else
1639 
1640  /*
1641  * XXX The _create_locale() API doesn't appear to support
1642  * this. Could perhaps be worked around by changing
1643  * pg_locale_t to contain two separate fields.
1644  */
1645  ereport(ERROR,
1646  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1647  errmsg("collations with different collate and ctype values are not supported on this platform")));
1648 #endif
1649  }
1650 
1651  result.info.lt = loc;
1652  }
1653  else if (collform->collprovider == COLLPROVIDER_ICU)
1654  {
1655  const char *iculocstr;
1656  const char *icurules;
1657 
1658  datum = SysCacheGetAttrNotNull(COLLOID, tp, Anum_pg_collation_colllocale);
1659  iculocstr = TextDatumGetCString(datum);
1660 
1661  datum = SysCacheGetAttr(COLLOID, tp, Anum_pg_collation_collicurules, &isnull);
1662  if (!isnull)
1663  icurules = TextDatumGetCString(datum);
1664  else
1665  icurules = NULL;
1666 
1667  make_icu_collator(iculocstr, icurules, &result);
1668  }
1669 
1670  datum = SysCacheGetAttr(COLLOID, tp, Anum_pg_collation_collversion,
1671  &isnull);
1672  if (!isnull)
1673  {
1674  char *actual_versionstr;
1675  char *collversionstr;
1676 
1677  collversionstr = TextDatumGetCString(datum);
1678 
1679  if (collform->collprovider == COLLPROVIDER_LIBC)
1680  datum = SysCacheGetAttrNotNull(COLLOID, tp, Anum_pg_collation_collcollate);
1681  else
1682  datum = SysCacheGetAttrNotNull(COLLOID, tp, Anum_pg_collation_colllocale);
1683 
1684  actual_versionstr = get_collation_actual_version(collform->collprovider,
1685  TextDatumGetCString(datum));
1686  if (!actual_versionstr)
1687  {
1688  /*
1689  * This could happen when specifying a version in CREATE
1690  * COLLATION but the provider does not support versioning, or
1691  * manually creating a mess in the catalogs.
1692  */
1693  ereport(ERROR,
1694  (errmsg("collation \"%s\" has no actual version, but a version was recorded",
1695  NameStr(collform->collname))));
1696  }
1697 
1698  if (strcmp(actual_versionstr, collversionstr) != 0)
1699  ereport(WARNING,
1700  (errmsg("collation \"%s\" has version mismatch",
1701  NameStr(collform->collname)),
1702  errdetail("The collation in the database was created using version %s, "
1703  "but the operating system provides version %s.",
1704  collversionstr, actual_versionstr),
1705  errhint("Rebuild all objects affected by this collation and run "
1706  "ALTER COLLATION %s REFRESH VERSION, "
1707  "or build PostgreSQL with the right library version.",
1708  quote_qualified_identifier(get_namespace_name(collform->collnamespace),
1709  NameStr(collform->collname)))));
1710  }
1711 
1712  ReleaseSysCache(tp);
1713 
1714  /* We'll keep the pg_locale_t structures in TopMemoryContext */
1715  resultp = MemoryContextAlloc(TopMemoryContext, sizeof(*resultp));
1716  *resultp = result;
1717 
1718  cache_entry->locale = resultp;
1719  }
1720 
1721  return cache_entry->locale;
1722 }
1723 
1724 /*
1725  * Get provider-specific collation version string for the given collation from
1726  * the operating system/library.
1727  */
1728 char *
1729 get_collation_actual_version(char collprovider, const char *collcollate)
1730 {
1731  char *collversion = NULL;
1732 
1733  /*
1734  * The only two supported locales (C and C.UTF-8) are both based on memcmp
1735  * and are not expected to change, but track the version anyway.
1736  *
1737  * Note that the character semantics may change for some locales, but the
1738  * collation version only tracks changes to sort order.
1739  */
1740  if (collprovider == COLLPROVIDER_BUILTIN)
1741  {
1742  if (strcmp(collcollate, "C") == 0)
1743  return "1";
1744  else if (strcmp(collcollate, "C.UTF-8") == 0)
1745  return "1";
1746  else
1747  ereport(ERROR,
1748  (errcode(ERRCODE_WRONG_OBJECT_TYPE),
1749  errmsg("invalid locale name \"%s\" for builtin provider",
1750  collcollate)));
1751  }
1752 
1753 #ifdef USE_ICU
1754  if (collprovider == COLLPROVIDER_ICU)
1755  {
1756  UCollator *collator;
1757  UVersionInfo versioninfo;
1758  char buf[U_MAX_VERSION_STRING_LENGTH];
1759 
1760  collator = pg_ucol_open(collcollate);
1761 
1762  ucol_getVersion(collator, versioninfo);
1763  ucol_close(collator);
1764 
1765  u_versionToString(versioninfo, buf);
1766  collversion = pstrdup(buf);
1767  }
1768  else
1769 #endif
1770  if (collprovider == COLLPROVIDER_LIBC &&
1771  pg_strcasecmp("C", collcollate) != 0 &&
1772  pg_strncasecmp("C.", collcollate, 2) != 0 &&
1773  pg_strcasecmp("POSIX", collcollate) != 0)
1774  {
1775 #if defined(__GLIBC__)
1776  /* Use the glibc version because we don't have anything better. */
1777  collversion = pstrdup(gnu_get_libc_version());
1778 #elif defined(LC_VERSION_MASK)
1779  locale_t loc;
1780 
1781  /* Look up FreeBSD collation version. */
1782  loc = newlocale(LC_COLLATE, collcollate, NULL);
1783  if (loc)
1784  {
1785  collversion =
1786  pstrdup(querylocale(LC_COLLATE_MASK | LC_VERSION_MASK, loc));
1787  freelocale(loc);
1788  }
1789  else
1790  ereport(ERROR,
1791  (errmsg("could not load locale \"%s\"", collcollate)));
1792 #elif defined(WIN32)
1793  /*
1794  * If we are targeting Windows Vista and above, we can ask for a name
1795  * given a collation name (earlier versions required a location code
1796  * that we don't have).
1797  */
1798  NLSVERSIONINFOEX version = {sizeof(NLSVERSIONINFOEX)};
1799  WCHAR wide_collcollate[LOCALE_NAME_MAX_LENGTH];
1800 
1801  MultiByteToWideChar(CP_ACP, 0, collcollate, -1, wide_collcollate,
1802  LOCALE_NAME_MAX_LENGTH);
1803  if (!GetNLSVersionEx(COMPARE_STRING, wide_collcollate, &version))
1804  {
1805  /*
1806  * GetNLSVersionEx() wants a language tag such as "en-US", not a
1807  * locale name like "English_United States.1252". Until those
1808  * values can be prevented from entering the system, or 100%
1809  * reliably converted to the more useful tag format, tolerate the
1810  * resulting error and report that we have no version data.
1811  */
1812  if (GetLastError() == ERROR_INVALID_PARAMETER)
1813  return NULL;
1814 
1815  ereport(ERROR,
1816  (errmsg("could not get collation version for locale \"%s\": error code %lu",
1817  collcollate,
1818  GetLastError())));
1819  }
1820  collversion = psprintf("%lu.%lu,%lu.%lu",
1821  (version.dwNLSVersion >> 8) & 0xFFFF,
1822  version.dwNLSVersion & 0xFF,
1823  (version.dwDefinedVersion >> 8) & 0xFFFF,
1824  version.dwDefinedVersion & 0xFF);
1825 #endif
1826  }
1827 
1828  return collversion;
1829 }
1830 
1831 /*
1832  * pg_strncoll_libc_win32_utf8
1833  *
1834  * Win32 does not have UTF-8. Convert UTF8 arguments to wide characters and
1835  * invoke wcscoll() or wcscoll_l().
1836  */
1837 #ifdef WIN32
1838 static int
1839 pg_strncoll_libc_win32_utf8(const char *arg1, size_t len1, const char *arg2,
1840  size_t len2, pg_locale_t locale)
1841 {
1842  char sbuf[TEXTBUFLEN];
1843  char *buf = sbuf;
1844  char *a1p,
1845  *a2p;
1846  int a1len = len1 * 2 + 2;
1847  int a2len = len2 * 2 + 2;
1848  int r;
1849  int result;
1850 
1851  Assert(!locale || locale->provider == COLLPROVIDER_LIBC);
1853 #ifndef WIN32
1854  Assert(false);
1855 #endif
1856 
1857  if (a1len + a2len > TEXTBUFLEN)
1858  buf = palloc(a1len + a2len);
1859 
1860  a1p = buf;
1861  a2p = buf + a1len;
1862 
1863  /* API does not work for zero-length input */
1864  if (len1 == 0)
1865  r = 0;
1866  else
1867  {
1868  r = MultiByteToWideChar(CP_UTF8, 0, arg1, len1,
1869  (LPWSTR) a1p, a1len / 2);
1870  if (!r)
1871  ereport(ERROR,
1872  (errmsg("could not convert string to UTF-16: error code %lu",
1873  GetLastError())));
1874  }
1875  ((LPWSTR) a1p)[r] = 0;
1876 
1877  if (len2 == 0)
1878  r = 0;
1879  else
1880  {
1881  r = MultiByteToWideChar(CP_UTF8, 0, arg2, len2,
1882  (LPWSTR) a2p, a2len / 2);
1883  if (!r)
1884  ereport(ERROR,
1885  (errmsg("could not convert string to UTF-16: error code %lu",
1886  GetLastError())));
1887  }
1888  ((LPWSTR) a2p)[r] = 0;
1889 
1890  errno = 0;
1891  if (locale)
1892  result = wcscoll_l((LPWSTR) a1p, (LPWSTR) a2p, locale->info.lt);
1893  else
1894  result = wcscoll((LPWSTR) a1p, (LPWSTR) a2p);
1895  if (result == 2147483647) /* _NLSCMPERROR; missing from mingw headers */
1896  ereport(ERROR,
1897  (errmsg("could not compare Unicode strings: %m")));
1898 
1899  if (buf != sbuf)
1900  pfree(buf);
1901 
1902  return result;
1903 }
1904 #endif /* WIN32 */
1905 
1906 /*
1907  * pg_strcoll_libc
1908  *
1909  * Call strcoll(), strcoll_l(), wcscoll(), or wcscoll_l() as appropriate for
1910  * the given locale, platform, and database encoding. If the locale is NULL,
1911  * use the database collation.
1912  *
1913  * Arguments must be encoded in the database encoding and nul-terminated.
1914  */
1915 static int
1916 pg_strcoll_libc(const char *arg1, const char *arg2, pg_locale_t locale)
1917 {
1918  int result;
1919 
1920  Assert(!locale || locale->provider == COLLPROVIDER_LIBC);
1921 #ifdef WIN32
1922  if (GetDatabaseEncoding() == PG_UTF8)
1923  {
1924  size_t len1 = strlen(arg1);
1925  size_t len2 = strlen(arg2);
1926 
1927  result = pg_strncoll_libc_win32_utf8(arg1, len1, arg2, len2, locale);
1928  }
1929  else
1930 #endif /* WIN32 */
1931  if (locale)
1932  result = strcoll_l(arg1, arg2, locale->info.lt);
1933  else
1934  result = strcoll(arg1, arg2);
1935 
1936  return result;
1937 }
1938 
1939 /*
1940  * pg_strncoll_libc
1941  *
1942  * Nul-terminate the arguments and call pg_strcoll_libc().
1943  */
1944 static int
1945 pg_strncoll_libc(const char *arg1, size_t len1, const char *arg2, size_t len2,
1947 {
1948  char sbuf[TEXTBUFLEN];
1949  char *buf = sbuf;
1950  size_t bufsize1 = len1 + 1;
1951  size_t bufsize2 = len2 + 1;
1952  char *arg1n;
1953  char *arg2n;
1954  int result;
1955 
1956  Assert(!locale || locale->provider == COLLPROVIDER_LIBC);
1957 
1958 #ifdef WIN32
1959  /* check for this case before doing the work for nul-termination */
1960  if (GetDatabaseEncoding() == PG_UTF8)
1961  return pg_strncoll_libc_win32_utf8(arg1, len1, arg2, len2, locale);
1962 #endif /* WIN32 */
1963 
1964  if (bufsize1 + bufsize2 > TEXTBUFLEN)
1965  buf = palloc(bufsize1 + bufsize2);
1966 
1967  arg1n = buf;
1968  arg2n = buf + bufsize1;
1969 
1970  /* nul-terminate arguments */
1971  memcpy(arg1n, arg1, len1);
1972  arg1n[len1] = '\0';
1973  memcpy(arg2n, arg2, len2);
1974  arg2n[len2] = '\0';
1975 
1976  result = pg_strcoll_libc(arg1n, arg2n, locale);
1977 
1978  if (buf != sbuf)
1979  pfree(buf);
1980 
1981  return result;
1982 }
1983 
1984 #ifdef USE_ICU
1985 
1986 /*
1987  * pg_strncoll_icu_no_utf8
1988  *
1989  * Convert the arguments from the database encoding to UChar strings, then
1990  * call ucol_strcoll(). An argument length of -1 means that the string is
1991  * NUL-terminated.
1992  *
1993  * When the database encoding is UTF-8, and ICU supports ucol_strcollUTF8(),
1994  * caller should call that instead.
1995  */
1996 static int
1997 pg_strncoll_icu_no_utf8(const char *arg1, int32_t len1,
1998  const char *arg2, int32_t len2, pg_locale_t locale)
1999 {
2000  char sbuf[TEXTBUFLEN];
2001  char *buf = sbuf;
2002  int32_t ulen1;
2003  int32_t ulen2;
2004  size_t bufsize1;
2005  size_t bufsize2;
2006  UChar *uchar1,
2007  *uchar2;
2008  int result;
2009 
2010  Assert(locale->provider == COLLPROVIDER_ICU);
2011 #ifdef HAVE_UCOL_STRCOLLUTF8
2013 #endif
2014 
2015  init_icu_converter();
2016 
2017  ulen1 = uchar_length(icu_converter, arg1, len1);
2018  ulen2 = uchar_length(icu_converter, arg2, len2);
2019 
2020  bufsize1 = (ulen1 + 1) * sizeof(UChar);
2021  bufsize2 = (ulen2 + 1) * sizeof(UChar);
2022 
2023  if (bufsize1 + bufsize2 > TEXTBUFLEN)
2024  buf = palloc(bufsize1 + bufsize2);
2025 
2026  uchar1 = (UChar *) buf;
2027  uchar2 = (UChar *) (buf + bufsize1);
2028 
2029  ulen1 = uchar_convert(icu_converter, uchar1, ulen1 + 1, arg1, len1);
2030  ulen2 = uchar_convert(icu_converter, uchar2, ulen2 + 1, arg2, len2);
2031 
2032  result = ucol_strcoll(locale->info.icu.ucol,
2033  uchar1, ulen1,
2034  uchar2, ulen2);
2035 
2036  if (buf != sbuf)
2037  pfree(buf);
2038 
2039  return result;
2040 }
2041 
2042 /*
2043  * pg_strncoll_icu
2044  *
2045  * Call ucol_strcollUTF8() or ucol_strcoll() as appropriate for the given
2046  * database encoding. An argument length of -1 means the string is
2047  * NUL-terminated.
2048  *
2049  * Arguments must be encoded in the database encoding.
2050  */
2051 static int
2052 pg_strncoll_icu(const char *arg1, int32_t len1, const char *arg2, int32_t len2,
2054 {
2055  int result;
2056 
2057  Assert(locale->provider == COLLPROVIDER_ICU);
2058 
2059 #ifdef HAVE_UCOL_STRCOLLUTF8
2060  if (GetDatabaseEncoding() == PG_UTF8)
2061  {
2062  UErrorCode status;
2063 
2064  status = U_ZERO_ERROR;
2065  result = ucol_strcollUTF8(locale->info.icu.ucol,
2066  arg1, len1,
2067  arg2, len2,
2068  &status);
2069  if (U_FAILURE(status))
2070  ereport(ERROR,
2071  (errmsg("collation failed: %s", u_errorName(status))));
2072  }
2073  else
2074 #endif
2075  {
2076  result = pg_strncoll_icu_no_utf8(arg1, len1, arg2, len2, locale);
2077  }
2078 
2079  return result;
2080 }
2081 
2082 #endif /* USE_ICU */
2083 
2084 /*
2085  * pg_strcoll
2086  *
2087  * Call ucol_strcollUTF8(), ucol_strcoll(), strcoll(), strcoll_l(), wcscoll(),
2088  * or wcscoll_l() as appropriate for the given locale, platform, and database
2089  * encoding. If the locale is not specified, use the database collation.
2090  *
2091  * Arguments must be encoded in the database encoding and nul-terminated.
2092  *
2093  * The caller is responsible for breaking ties if the collation is
2094  * deterministic; this maintains consistency with pg_strxfrm(), which cannot
2095  * easily account for deterministic collations.
2096  */
2097 int
2098 pg_strcoll(const char *arg1, const char *arg2, pg_locale_t locale)
2099 {
2100  int result;
2101 
2102  if (!locale || locale->provider == COLLPROVIDER_LIBC)
2103  result = pg_strcoll_libc(arg1, arg2, locale);
2104 #ifdef USE_ICU
2105  else if (locale->provider == COLLPROVIDER_ICU)
2106  result = pg_strncoll_icu(arg1, -1, arg2, -1, locale);
2107 #endif
2108  else
2109  /* shouldn't happen */
2110  PGLOCALE_SUPPORT_ERROR(locale->provider);
2111 
2112  return result;
2113 }
2114 
2115 /*
2116  * pg_strncoll
2117  *
2118  * Call ucol_strcollUTF8(), ucol_strcoll(), strcoll(), strcoll_l(), wcscoll(),
2119  * or wcscoll_l() as appropriate for the given locale, platform, and database
2120  * encoding. If the locale is not specified, use the database collation.
2121  *
2122  * Arguments must be encoded in the database encoding.
2123  *
2124  * This function may need to nul-terminate the arguments for libc functions;
2125  * so if the caller already has nul-terminated strings, it should call
2126  * pg_strcoll() instead.
2127  *
2128  * The caller is responsible for breaking ties if the collation is
2129  * deterministic; this maintains consistency with pg_strnxfrm(), which cannot
2130  * easily account for deterministic collations.
2131  */
2132 int
2133 pg_strncoll(const char *arg1, size_t len1, const char *arg2, size_t len2,
2135 {
2136  int result;
2137 
2138  if (!locale || locale->provider == COLLPROVIDER_LIBC)
2139  result = pg_strncoll_libc(arg1, len1, arg2, len2, locale);
2140 #ifdef USE_ICU
2141  else if (locale->provider == COLLPROVIDER_ICU)
2142  result = pg_strncoll_icu(arg1, len1, arg2, len2, locale);
2143 #endif
2144  else
2145  /* shouldn't happen */
2146  PGLOCALE_SUPPORT_ERROR(locale->provider);
2147 
2148  return result;
2149 }
2150 
2151 
2152 static size_t
2153 pg_strxfrm_libc(char *dest, const char *src, size_t destsize,
2155 {
2156  Assert(!locale || locale->provider == COLLPROVIDER_LIBC);
2157 
2158 #ifdef TRUST_STRXFRM
2159  if (locale)
2160  return strxfrm_l(dest, src, destsize, locale->info.lt);
2161  else
2162  return strxfrm(dest, src, destsize);
2163 #else
2164  /* shouldn't happen */
2165  PGLOCALE_SUPPORT_ERROR(locale->provider);
2166  return 0; /* keep compiler quiet */
2167 #endif
2168 }
2169 
2170 static size_t
2171 pg_strnxfrm_libc(char *dest, const char *src, size_t srclen, size_t destsize,
2173 {
2174  char sbuf[TEXTBUFLEN];
2175  char *buf = sbuf;
2176  size_t bufsize = srclen + 1;
2177  size_t result;
2178 
2179  Assert(!locale || locale->provider == COLLPROVIDER_LIBC);
2180 
2181  if (bufsize > TEXTBUFLEN)
2182  buf = palloc(bufsize);
2183 
2184  /* nul-terminate arguments */
2185  memcpy(buf, src, srclen);
2186  buf[srclen] = '\0';
2187 
2188  result = pg_strxfrm_libc(dest, buf, destsize, locale);
2189 
2190  if (buf != sbuf)
2191  pfree(buf);
2192 
2193  /* if dest is defined, it should be nul-terminated */
2194  Assert(result >= destsize || dest[result] == '\0');
2195 
2196  return result;
2197 }
2198 
2199 #ifdef USE_ICU
2200 
2201 /* 'srclen' of -1 means the strings are NUL-terminated */
2202 static size_t
2203 pg_strnxfrm_icu(char *dest, const char *src, int32_t srclen, int32_t destsize,
2205 {
2206  char sbuf[TEXTBUFLEN];
2207  char *buf = sbuf;
2208  UChar *uchar;
2209  int32_t ulen;
2210  size_t uchar_bsize;
2211  Size result_bsize;
2212 
2213  Assert(locale->provider == COLLPROVIDER_ICU);
2214 
2215  init_icu_converter();
2216 
2217  ulen = uchar_length(icu_converter, src, srclen);
2218 
2219  uchar_bsize = (ulen + 1) * sizeof(UChar);
2220 
2221  if (uchar_bsize > TEXTBUFLEN)
2222  buf = palloc(uchar_bsize);
2223 
2224  uchar = (UChar *) buf;
2225 
2226  ulen = uchar_convert(icu_converter, uchar, ulen + 1, src, srclen);
2227 
2228  result_bsize = ucol_getSortKey(locale->info.icu.ucol,
2229  uchar, ulen,
2230  (uint8_t *) dest, destsize);
2231 
2232  /*
2233  * ucol_getSortKey() counts the nul-terminator in the result length, but
2234  * this function should not.
2235  */
2236  Assert(result_bsize > 0);
2237  result_bsize--;
2238 
2239  if (buf != sbuf)
2240  pfree(buf);
2241 
2242  /* if dest is defined, it should be nul-terminated */
2243  Assert(result_bsize >= destsize || dest[result_bsize] == '\0');
2244 
2245  return result_bsize;
2246 }
2247 
2248 /* 'srclen' of -1 means the strings are NUL-terminated */
2249 static size_t
2250 pg_strnxfrm_prefix_icu_no_utf8(char *dest, const char *src, int32_t srclen,
2251  int32_t destsize, pg_locale_t locale)
2252 {
2253  char sbuf[TEXTBUFLEN];
2254  char *buf = sbuf;
2255  UCharIterator iter;
2256  uint32_t state[2];
2257  UErrorCode status;
2258  int32_t ulen = -1;
2259  UChar *uchar = NULL;
2260  size_t uchar_bsize;
2261  Size result_bsize;
2262 
2263  Assert(locale->provider == COLLPROVIDER_ICU);
2265 
2266  init_icu_converter();
2267 
2268  ulen = uchar_length(icu_converter, src, srclen);
2269 
2270  uchar_bsize = (ulen + 1) * sizeof(UChar);
2271 
2272  if (uchar_bsize > TEXTBUFLEN)
2273  buf = palloc(uchar_bsize);
2274 
2275  uchar = (UChar *) buf;
2276 
2277  ulen = uchar_convert(icu_converter, uchar, ulen + 1, src, srclen);
2278 
2279  uiter_setString(&iter, uchar, ulen);
2280  state[0] = state[1] = 0; /* won't need that again */
2281  status = U_ZERO_ERROR;
2282  result_bsize = ucol_nextSortKeyPart(locale->info.icu.ucol,
2283  &iter,
2284  state,
2285  (uint8_t *) dest,
2286  destsize,
2287  &status);
2288  if (U_FAILURE(status))
2289  ereport(ERROR,
2290  (errmsg("sort key generation failed: %s",
2291  u_errorName(status))));
2292 
2293  return result_bsize;
2294 }
2295 
2296 /* 'srclen' of -1 means the strings are NUL-terminated */
2297 static size_t
2298 pg_strnxfrm_prefix_icu(char *dest, const char *src, int32_t srclen,
2299  int32_t destsize, pg_locale_t locale)
2300 {
2301  size_t result;
2302 
2303  Assert(locale->provider == COLLPROVIDER_ICU);
2304 
2305  if (GetDatabaseEncoding() == PG_UTF8)
2306  {
2307  UCharIterator iter;
2308  uint32_t state[2];
2309  UErrorCode status;
2310 
2311  uiter_setUTF8(&iter, src, srclen);
2312  state[0] = state[1] = 0; /* won't need that again */
2313  status = U_ZERO_ERROR;
2314  result = ucol_nextSortKeyPart(locale->info.icu.ucol,
2315  &iter,
2316  state,
2317  (uint8_t *) dest,
2318  destsize,
2319  &status);
2320  if (U_FAILURE(status))
2321  ereport(ERROR,
2322  (errmsg("sort key generation failed: %s",
2323  u_errorName(status))));
2324  }
2325  else
2326  result = pg_strnxfrm_prefix_icu_no_utf8(dest, src, srclen, destsize,
2327  locale);
2328 
2329  return result;
2330 }
2331 
2332 #endif
2333 
2334 /*
2335  * Return true if the collation provider supports pg_strxfrm() and
2336  * pg_strnxfrm(); otherwise false.
2337  *
2338  * Unfortunately, it seems that strxfrm() for non-C collations is broken on
2339  * many common platforms; testing of multiple versions of glibc reveals that,
2340  * for many locales, strcoll() and strxfrm() do not return consistent
2341  * results. While no other libc other than Cygwin has so far been shown to
2342  * have a problem, we take the conservative course of action for right now and
2343  * disable this categorically. (Users who are certain this isn't a problem on
2344  * their system can define TRUST_STRXFRM.)
2345  *
2346  * No similar problem is known for the ICU provider.
2347  */
2348 bool
2350 {
2351  if (!locale || locale->provider == COLLPROVIDER_LIBC)
2352 #ifdef TRUST_STRXFRM
2353  return true;
2354 #else
2355  return false;
2356 #endif
2357  else if (locale->provider == COLLPROVIDER_ICU)
2358  return true;
2359  else
2360  /* shouldn't happen */
2361  PGLOCALE_SUPPORT_ERROR(locale->provider);
2362 
2363  return false; /* keep compiler quiet */
2364 }
2365 
2366 /*
2367  * pg_strxfrm
2368  *
2369  * Transforms 'src' to a nul-terminated string stored in 'dest' such that
2370  * ordinary strcmp() on transformed strings is equivalent to pg_strcoll() on
2371  * untransformed strings.
2372  *
2373  * The provided 'src' must be nul-terminated. If 'destsize' is zero, 'dest'
2374  * may be NULL.
2375  *
2376  * Returns the number of bytes needed to store the transformed string,
2377  * excluding the terminating nul byte. If the value returned is 'destsize' or
2378  * greater, the resulting contents of 'dest' are undefined.
2379  */
2380 size_t
2381 pg_strxfrm(char *dest, const char *src, size_t destsize, pg_locale_t locale)
2382 {
2383  size_t result = 0; /* keep compiler quiet */
2384 
2385  if (!locale || locale->provider == COLLPROVIDER_LIBC)
2386  result = pg_strxfrm_libc(dest, src, destsize, locale);
2387 #ifdef USE_ICU
2388  else if (locale->provider == COLLPROVIDER_ICU)
2389  result = pg_strnxfrm_icu(dest, src, -1, destsize, locale);
2390 #endif
2391  else
2392  /* shouldn't happen */
2393  PGLOCALE_SUPPORT_ERROR(locale->provider);
2394 
2395  return result;
2396 }
2397 
2398 /*
2399  * pg_strnxfrm
2400  *
2401  * Transforms 'src' to a nul-terminated string stored in 'dest' such that
2402  * ordinary strcmp() on transformed strings is equivalent to pg_strcoll() on
2403  * untransformed strings.
2404  *
2405  * 'src' does not need to be nul-terminated. If 'destsize' is zero, 'dest' may
2406  * be NULL.
2407  *
2408  * Returns the number of bytes needed to store the transformed string,
2409  * excluding the terminating nul byte. If the value returned is 'destsize' or
2410  * greater, the resulting contents of 'dest' are undefined.
2411  *
2412  * This function may need to nul-terminate the argument for libc functions;
2413  * so if the caller already has a nul-terminated string, it should call
2414  * pg_strxfrm() instead.
2415  */
2416 size_t
2417 pg_strnxfrm(char *dest, size_t destsize, const char *src, size_t srclen,
2419 {
2420  size_t result = 0; /* keep compiler quiet */
2421 
2422  if (!locale || locale->provider == COLLPROVIDER_LIBC)
2423  result = pg_strnxfrm_libc(dest, src, srclen, destsize, locale);
2424 #ifdef USE_ICU
2425  else if (locale->provider == COLLPROVIDER_ICU)
2426  result = pg_strnxfrm_icu(dest, src, srclen, destsize, locale);
2427 #endif
2428  else
2429  /* shouldn't happen */
2430  PGLOCALE_SUPPORT_ERROR(locale->provider);
2431 
2432  return result;
2433 }
2434 
2435 /*
2436  * Return true if the collation provider supports pg_strxfrm_prefix() and
2437  * pg_strnxfrm_prefix(); otherwise false.
2438  */
2439 bool
2441 {
2442  if (!locale || locale->provider == COLLPROVIDER_LIBC)
2443  return false;
2444  else if (locale->provider == COLLPROVIDER_ICU)
2445  return true;
2446  else
2447  /* shouldn't happen */
2448  PGLOCALE_SUPPORT_ERROR(locale->provider);
2449 
2450  return false; /* keep compiler quiet */
2451 }
2452 
2453 /*
2454  * pg_strxfrm_prefix
2455  *
2456  * Transforms 'src' to a byte sequence stored in 'dest' such that ordinary
2457  * memcmp() on the byte sequence is equivalent to pg_strcoll() on
2458  * untransformed strings. The result is not nul-terminated.
2459  *
2460  * The provided 'src' must be nul-terminated.
2461  *
2462  * If destsize is not large enough to hold the resulting byte sequence, stores
2463  * only the first destsize bytes in 'dest'. Returns the number of bytes
2464  * actually copied to 'dest'.
2465  */
2466 size_t
2467 pg_strxfrm_prefix(char *dest, const char *src, size_t destsize,
2469 {
2470  size_t result = 0; /* keep compiler quiet */
2471 
2472  if (!locale)
2473  PGLOCALE_SUPPORT_ERROR(COLLPROVIDER_LIBC);
2474 #ifdef USE_ICU
2475  else if (locale->provider == COLLPROVIDER_ICU)
2476  result = pg_strnxfrm_prefix_icu(dest, src, -1, destsize, locale);
2477 #endif
2478  else
2479  PGLOCALE_SUPPORT_ERROR(locale->provider);
2480 
2481  return result;
2482 }
2483 
2484 /*
2485  * pg_strnxfrm_prefix
2486  *
2487  * Transforms 'src' to a byte sequence stored in 'dest' such that ordinary
2488  * memcmp() on the byte sequence is equivalent to pg_strcoll() on
2489  * untransformed strings. The result is not nul-terminated.
2490  *
2491  * The provided 'src' must be nul-terminated.
2492  *
2493  * If destsize is not large enough to hold the resulting byte sequence, stores
2494  * only the first destsize bytes in 'dest'. Returns the number of bytes
2495  * actually copied to 'dest'.
2496  *
2497  * This function may need to nul-terminate the argument for libc functions;
2498  * so if the caller already has a nul-terminated string, it should call
2499  * pg_strxfrm_prefix() instead.
2500  */
2501 size_t
2502 pg_strnxfrm_prefix(char *dest, size_t destsize, const char *src,
2503  size_t srclen, pg_locale_t locale)
2504 {
2505  size_t result = 0; /* keep compiler quiet */
2506 
2507  if (!locale)
2508  PGLOCALE_SUPPORT_ERROR(COLLPROVIDER_LIBC);
2509 #ifdef USE_ICU
2510  else if (locale->provider == COLLPROVIDER_ICU)
2511  result = pg_strnxfrm_prefix_icu(dest, src, -1, destsize, locale);
2512 #endif
2513  else
2514  PGLOCALE_SUPPORT_ERROR(locale->provider);
2515 
2516  return result;
2517 }
2518 
2519 /*
2520  * Return required encoding ID for the given locale, or -1 if any encoding is
2521  * valid for the locale.
2522  *
2523  * The only supported locale for the builtin provider is "C", and it's
2524  * available for any encoding.
2525  */
2526 int
2528 {
2529  if (strcmp(locale, "C") == 0)
2530  return -1;
2531  if (strcmp(locale, "C.UTF-8") == 0)
2532  return PG_UTF8;
2533 
2534  ereport(ERROR,
2535  (errcode(ERRCODE_WRONG_OBJECT_TYPE),
2536  errmsg("invalid locale name \"%s\" for builtin provider",
2537  locale)));
2538 
2539  return 0; /* keep compiler quiet */
2540 }
2541 
2542 
2543 /*
2544  * Validate the locale and encoding combination, and return the canonical form
2545  * of the locale name.
2546  *
2547  * The only supported locale for the builtin provider is "C", and it's
2548  * available for any encoding.
2549  */
2550 const char *
2552 {
2553  const char *canonical_name = NULL;
2554  int required_encoding;
2555 
2556  if (strcmp(locale, "C") == 0)
2557  canonical_name = "C";
2558  else if (strcmp(locale, "C.UTF-8") == 0 || strcmp(locale, "C.UTF8") == 0)
2559  canonical_name = "C.UTF-8";
2560 
2561  if (!canonical_name)
2562  ereport(ERROR,
2563  (errcode(ERRCODE_WRONG_OBJECT_TYPE),
2564  errmsg("invalid locale name \"%s\" for builtin provider",
2565  locale)));
2566 
2567  required_encoding = builtin_locale_encoding(canonical_name);
2568  if (required_encoding >= 0 && encoding != required_encoding)
2569  ereport(ERROR,
2570  (errcode(ERRCODE_WRONG_OBJECT_TYPE),
2571  errmsg("encoding \"%s\" does not match locale \"%s\"",
2573 
2574  return canonical_name;
2575 }
2576 
2577 
2578 #ifdef USE_ICU
2579 
2580 /*
2581  * Wrapper around ucol_open() to handle API differences for older ICU
2582  * versions.
2583  */
2584 static UCollator *
2585 pg_ucol_open(const char *loc_str)
2586 {
2587  UCollator *collator;
2588  UErrorCode status;
2589  const char *orig_str = loc_str;
2590  char *fixed_str = NULL;
2591 
2592  /*
2593  * Must never open default collator, because it depends on the environment
2594  * and may change at any time. Should not happen, but check here to catch
2595  * bugs that might be hard to catch otherwise.
2596  *
2597  * NB: the default collator is not the same as the collator for the root
2598  * locale. The root locale may be specified as the empty string, "und", or
2599  * "root". The default collator is opened by passing NULL to ucol_open().
2600  */
2601  if (loc_str == NULL)
2602  elog(ERROR, "opening default collator is not supported");
2603 
2604  /*
2605  * In ICU versions 54 and earlier, "und" is not a recognized spelling of
2606  * the root locale. If the first component of the locale is "und", replace
2607  * with "root" before opening.
2608  */
2609  if (U_ICU_VERSION_MAJOR_NUM < 55)
2610  {
2611  char lang[ULOC_LANG_CAPACITY];
2612 
2613  status = U_ZERO_ERROR;
2614  uloc_getLanguage(loc_str, lang, ULOC_LANG_CAPACITY, &status);
2615  if (U_FAILURE(status) || status == U_STRING_NOT_TERMINATED_WARNING)
2616  {
2617  ereport(ERROR,
2618  (errmsg("could not get language from locale \"%s\": %s",
2619  loc_str, u_errorName(status))));
2620  }
2621 
2622  if (strcmp(lang, "und") == 0)
2623  {
2624  const char *remainder = loc_str + strlen("und");
2625 
2626  fixed_str = palloc(strlen("root") + strlen(remainder) + 1);
2627  strcpy(fixed_str, "root");
2628  strcat(fixed_str, remainder);
2629 
2630  loc_str = fixed_str;
2631  }
2632  }
2633 
2634  status = U_ZERO_ERROR;
2635  collator = ucol_open(loc_str, &status);
2636  if (U_FAILURE(status))
2637  ereport(ERROR,
2638  /* use original string for error report */
2639  (errmsg("could not open collator for locale \"%s\": %s",
2640  orig_str, u_errorName(status))));
2641 
2642  if (U_ICU_VERSION_MAJOR_NUM < 54)
2643  {
2644  status = U_ZERO_ERROR;
2645  icu_set_collation_attributes(collator, loc_str, &status);
2646 
2647  /*
2648  * Pretend the error came from ucol_open(), for consistent error
2649  * message across ICU versions.
2650  */
2651  if (U_FAILURE(status) || status == U_STRING_NOT_TERMINATED_WARNING)
2652  {
2653  ucol_close(collator);
2654  ereport(ERROR,
2655  (errmsg("could not open collator for locale \"%s\": %s",
2656  orig_str, u_errorName(status))));
2657  }
2658  }
2659 
2660  if (fixed_str != NULL)
2661  pfree(fixed_str);
2662 
2663  return collator;
2664 }
2665 
2666 static void
2667 init_icu_converter(void)
2668 {
2669  const char *icu_encoding_name;
2670  UErrorCode status;
2671  UConverter *conv;
2672 
2673  if (icu_converter)
2674  return; /* already done */
2675 
2676  icu_encoding_name = get_encoding_name_for_icu(GetDatabaseEncoding());
2677  if (!icu_encoding_name)
2678  ereport(ERROR,
2679  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
2680  errmsg("encoding \"%s\" not supported by ICU",
2682 
2683  status = U_ZERO_ERROR;
2684  conv = ucnv_open(icu_encoding_name, &status);
2685  if (U_FAILURE(status))
2686  ereport(ERROR,
2687  (errmsg("could not open ICU converter for encoding \"%s\": %s",
2688  icu_encoding_name, u_errorName(status))));
2689 
2690  icu_converter = conv;
2691 }
2692 
2693 /*
2694  * Find length, in UChars, of given string if converted to UChar string.
2695  */
2696 static size_t
2697 uchar_length(UConverter *converter, const char *str, int32_t len)
2698 {
2699  UErrorCode status = U_ZERO_ERROR;
2700  int32_t ulen;
2701 
2702  ulen = ucnv_toUChars(converter, NULL, 0, str, len, &status);
2703  if (U_FAILURE(status) && status != U_BUFFER_OVERFLOW_ERROR)
2704  ereport(ERROR,
2705  (errmsg("%s failed: %s", "ucnv_toUChars", u_errorName(status))));
2706  return ulen;
2707 }
2708 
2709 /*
2710  * Convert the given source string into a UChar string, stored in dest, and
2711  * return the length (in UChars).
2712  */
2713 static int32_t
2714 uchar_convert(UConverter *converter, UChar *dest, int32_t destlen,
2715  const char *src, int32_t srclen)
2716 {
2717  UErrorCode status = U_ZERO_ERROR;
2718  int32_t ulen;
2719 
2720  status = U_ZERO_ERROR;
2721  ulen = ucnv_toUChars(converter, dest, destlen, src, srclen, &status);
2722  if (U_FAILURE(status))
2723  ereport(ERROR,
2724  (errmsg("%s failed: %s", "ucnv_toUChars", u_errorName(status))));
2725  return ulen;
2726 }
2727 
2728 /*
2729  * Convert a string in the database encoding into a string of UChars.
2730  *
2731  * The source string at buff is of length nbytes
2732  * (it needn't be nul-terminated)
2733  *
2734  * *buff_uchar receives a pointer to the palloc'd result string, and
2735  * the function's result is the number of UChars generated.
2736  *
2737  * The result string is nul-terminated, though most callers rely on the
2738  * result length instead.
2739  */
2740 int32_t
2741 icu_to_uchar(UChar **buff_uchar, const char *buff, size_t nbytes)
2742 {
2743  int32_t len_uchar;
2744 
2745  init_icu_converter();
2746 
2747  len_uchar = uchar_length(icu_converter, buff, nbytes);
2748 
2749  *buff_uchar = palloc((len_uchar + 1) * sizeof(**buff_uchar));
2750  len_uchar = uchar_convert(icu_converter,
2751  *buff_uchar, len_uchar + 1, buff, nbytes);
2752 
2753  return len_uchar;
2754 }
2755 
2756 /*
2757  * Convert a string of UChars into the database encoding.
2758  *
2759  * The source string at buff_uchar is of length len_uchar
2760  * (it needn't be nul-terminated)
2761  *
2762  * *result receives a pointer to the palloc'd result string, and the
2763  * function's result is the number of bytes generated (not counting nul).
2764  *
2765  * The result string is nul-terminated.
2766  */
2767 int32_t
2768 icu_from_uchar(char **result, const UChar *buff_uchar, int32_t len_uchar)
2769 {
2770  UErrorCode status;
2771  int32_t len_result;
2772 
2773  init_icu_converter();
2774 
2775  status = U_ZERO_ERROR;
2776  len_result = ucnv_fromUChars(icu_converter, NULL, 0,
2777  buff_uchar, len_uchar, &status);
2778  if (U_FAILURE(status) && status != U_BUFFER_OVERFLOW_ERROR)
2779  ereport(ERROR,
2780  (errmsg("%s failed: %s", "ucnv_fromUChars",
2781  u_errorName(status))));
2782 
2783  *result = palloc(len_result + 1);
2784 
2785  status = U_ZERO_ERROR;
2786  len_result = ucnv_fromUChars(icu_converter, *result, len_result + 1,
2787  buff_uchar, len_uchar, &status);
2788  if (U_FAILURE(status) ||
2789  status == U_STRING_NOT_TERMINATED_WARNING)
2790  ereport(ERROR,
2791  (errmsg("%s failed: %s", "ucnv_fromUChars",
2792  u_errorName(status))));
2793 
2794  return len_result;
2795 }
2796 
2797 /*
2798  * Parse collation attributes from the given locale string and apply them to
2799  * the open collator.
2800  *
2801  * First, the locale string is canonicalized to an ICU format locale ID such
2802  * as "und@colStrength=primary;colCaseLevel=yes". Then, it parses and applies
2803  * the key-value arguments.
2804  *
2805  * Starting with ICU version 54, the attributes are processed automatically by
2806  * ucol_open(), so this is only necessary for emulating this behavior on older
2807  * versions.
2808  */
2810 static void
2811 icu_set_collation_attributes(UCollator *collator, const char *loc,
2812  UErrorCode *status)
2813 {
2814  int32_t len;
2815  char *icu_locale_id;
2816  char *lower_str;
2817  char *str;
2818 
2819  /*
2820  * The input locale may be a BCP 47 language tag, e.g.
2821  * "und-u-kc-ks-level1", which expresses the same attributes in a
2822  * different form. It will be converted to the equivalent ICU format
2823  * locale ID, e.g. "und@colcaselevel=yes;colstrength=primary", by
2824  * uloc_canonicalize().
2825  */
2826  *status = U_ZERO_ERROR;
2827  len = uloc_canonicalize(loc, NULL, 0, status);
2828  icu_locale_id = palloc(len + 1);
2829  *status = U_ZERO_ERROR;
2830  len = uloc_canonicalize(loc, icu_locale_id, len + 1, status);
2831  if (U_FAILURE(*status) || *status == U_STRING_NOT_TERMINATED_WARNING)
2832  return;
2833 
2834  lower_str = asc_tolower(icu_locale_id, strlen(icu_locale_id));
2835 
2836  pfree(icu_locale_id);
2837 
2838  str = strchr(lower_str, '@');
2839  if (!str)
2840  return;
2841  str++;
2842 
2843  for (char *token = strtok(str, ";"); token; token = strtok(NULL, ";"))
2844  {
2845  char *e = strchr(token, '=');
2846 
2847  if (e)
2848  {
2849  char *name;
2850  char *value;
2851  UColAttribute uattr;
2852  UColAttributeValue uvalue;
2853 
2854  *status = U_ZERO_ERROR;
2855 
2856  *e = '\0';
2857  name = token;
2858  value = e + 1;
2859 
2860  /*
2861  * See attribute name and value lists in ICU i18n/coll.cpp
2862  */
2863  if (strcmp(name, "colstrength") == 0)
2864  uattr = UCOL_STRENGTH;
2865  else if (strcmp(name, "colbackwards") == 0)
2866  uattr = UCOL_FRENCH_COLLATION;
2867  else if (strcmp(name, "colcaselevel") == 0)
2868  uattr = UCOL_CASE_LEVEL;
2869  else if (strcmp(name, "colcasefirst") == 0)
2870  uattr = UCOL_CASE_FIRST;
2871  else if (strcmp(name, "colalternate") == 0)
2872  uattr = UCOL_ALTERNATE_HANDLING;
2873  else if (strcmp(name, "colnormalization") == 0)
2874  uattr = UCOL_NORMALIZATION_MODE;
2875  else if (strcmp(name, "colnumeric") == 0)
2876  uattr = UCOL_NUMERIC_COLLATION;
2877  else
2878  /* ignore if unknown */
2879  continue;
2880 
2881  if (strcmp(value, "primary") == 0)
2882  uvalue = UCOL_PRIMARY;
2883  else if (strcmp(value, "secondary") == 0)
2884  uvalue = UCOL_SECONDARY;
2885  else if (strcmp(value, "tertiary") == 0)
2886  uvalue = UCOL_TERTIARY;
2887  else if (strcmp(value, "quaternary") == 0)
2888  uvalue = UCOL_QUATERNARY;
2889  else if (strcmp(value, "identical") == 0)
2890  uvalue = UCOL_IDENTICAL;
2891  else if (strcmp(value, "no") == 0)
2892  uvalue = UCOL_OFF;
2893  else if (strcmp(value, "yes") == 0)
2894  uvalue = UCOL_ON;
2895  else if (strcmp(value, "shifted") == 0)
2896  uvalue = UCOL_SHIFTED;
2897  else if (strcmp(value, "non-ignorable") == 0)
2898  uvalue = UCOL_NON_IGNORABLE;
2899  else if (strcmp(value, "lower") == 0)
2900  uvalue = UCOL_LOWER_FIRST;
2901  else if (strcmp(value, "upper") == 0)
2902  uvalue = UCOL_UPPER_FIRST;
2903  else
2904  {
2905  *status = U_ILLEGAL_ARGUMENT_ERROR;
2906  break;
2907  }
2908 
2909  ucol_setAttribute(collator, uattr, uvalue, status);
2910  }
2911  }
2912 
2913  pfree(lower_str);
2914 }
2915 #endif
2916 
2917 /*
2918  * Return the BCP47 language tag representation of the requested locale.
2919  *
2920  * This function should be called before passing the string to ucol_open(),
2921  * because conversion to a language tag also performs "level 2
2922  * canonicalization". In addition to producing a consistent format, level 2
2923  * canonicalization is able to more accurately interpret different input
2924  * locale string formats, such as POSIX and .NET IDs.
2925  */
2926 char *
2927 icu_language_tag(const char *loc_str, int elevel)
2928 {
2929 #ifdef USE_ICU
2930  UErrorCode status;
2931  char *langtag;
2932  size_t buflen = 32; /* arbitrary starting buffer size */
2933  const bool strict = true;
2934 
2935  /*
2936  * A BCP47 language tag doesn't have a clearly-defined upper limit (cf.
2937  * RFC5646 section 4.4). Additionally, in older ICU versions,
2938  * uloc_toLanguageTag() doesn't always return the ultimate length on the
2939  * first call, necessitating a loop.
2940  */
2941  langtag = palloc(buflen);
2942  while (true)
2943  {
2944  status = U_ZERO_ERROR;
2945  uloc_toLanguageTag(loc_str, langtag, buflen, strict, &status);
2946 
2947  /* try again if the buffer is not large enough */
2948  if ((status == U_BUFFER_OVERFLOW_ERROR ||
2949  status == U_STRING_NOT_TERMINATED_WARNING) &&
2950  buflen < MaxAllocSize)
2951  {
2952  buflen = Min(buflen * 2, MaxAllocSize);
2953  langtag = repalloc(langtag, buflen);
2954  continue;
2955  }
2956 
2957  break;
2958  }
2959 
2960  if (U_FAILURE(status))
2961  {
2962  pfree(langtag);
2963 
2964  if (elevel > 0)
2965  ereport(elevel,
2966  (errmsg("could not convert locale name \"%s\" to language tag: %s",
2967  loc_str, u_errorName(status))));
2968  return NULL;
2969  }
2970 
2971  return langtag;
2972 #else /* not USE_ICU */
2973  ereport(ERROR,
2974  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
2975  errmsg("ICU is not supported in this build")));
2976  return NULL; /* keep compiler quiet */
2977 #endif /* not USE_ICU */
2978 }
2979 
2980 /*
2981  * Perform best-effort check that the locale is a valid one.
2982  */
2983 void
2984 icu_validate_locale(const char *loc_str)
2985 {
2986 #ifdef USE_ICU
2987  UCollator *collator;
2988  UErrorCode status;
2989  char lang[ULOC_LANG_CAPACITY];
2990  bool found = false;
2991  int elevel = icu_validation_level;
2992 
2993  /* no validation */
2994  if (elevel < 0)
2995  return;
2996 
2997  /* downgrade to WARNING during pg_upgrade */
2998  if (IsBinaryUpgrade && elevel > WARNING)
2999  elevel = WARNING;
3000 
3001  /* validate that we can extract the language */
3002  status = U_ZERO_ERROR;
3003  uloc_getLanguage(loc_str, lang, ULOC_LANG_CAPACITY, &status);
3004  if (U_FAILURE(status) || status == U_STRING_NOT_TERMINATED_WARNING)
3005  {
3006  ereport(elevel,
3007  (errmsg("could not get language from ICU locale \"%s\": %s",
3008  loc_str, u_errorName(status)),
3009  errhint("To disable ICU locale validation, set the parameter %s to \"%s\".",
3010  "icu_validation_level", "disabled")));
3011  return;
3012  }
3013 
3014  /* check for special language name */
3015  if (strcmp(lang, "") == 0 ||
3016  strcmp(lang, "root") == 0 || strcmp(lang, "und") == 0)
3017  found = true;
3018 
3019  /* search for matching language within ICU */
3020  for (int32_t i = 0; !found && i < uloc_countAvailable(); i++)
3021  {
3022  const char *otherloc = uloc_getAvailable(i);
3023  char otherlang[ULOC_LANG_CAPACITY];
3024 
3025  status = U_ZERO_ERROR;
3026  uloc_getLanguage(otherloc, otherlang, ULOC_LANG_CAPACITY, &status);
3027  if (U_FAILURE(status) || status == U_STRING_NOT_TERMINATED_WARNING)
3028  continue;
3029 
3030  if (strcmp(lang, otherlang) == 0)
3031  found = true;
3032  }
3033 
3034  if (!found)
3035  ereport(elevel,
3036  (errmsg("ICU locale \"%s\" has unknown language \"%s\"",
3037  loc_str, lang),
3038  errhint("To disable ICU locale validation, set the parameter %s to \"%s\".",
3039  "icu_validation_level", "disabled")));
3040 
3041  /* check that it can be opened */
3042  collator = pg_ucol_open(loc_str);
3043  ucol_close(collator);
3044 #else /* not USE_ICU */
3045  /* could get here if a collation was created by a build with ICU */
3046  ereport(ERROR,
3047  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
3048  errmsg("ICU is not supported in this build")));
3049 #endif /* not USE_ICU */
3050 }
3051 
3052 /*
3053  * These functions convert from/to libc's wchar_t, *not* pg_wchar_t.
3054  * Therefore we keep them here rather than with the mbutils code.
3055  */
3056 
3057 /*
3058  * wchar2char --- convert wide characters to multibyte format
3059  *
3060  * This has the same API as the standard wcstombs_l() function; in particular,
3061  * tolen is the maximum number of bytes to store at *to, and *from must be
3062  * zero-terminated. The output will be zero-terminated iff there is room.
3063  */
3064 size_t
3065 wchar2char(char *to, const wchar_t *from, size_t tolen, pg_locale_t locale)
3066 {
3067  size_t result;
3068 
3069  Assert(!locale || locale->provider == COLLPROVIDER_LIBC);
3070 
3071  if (tolen == 0)
3072  return 0;
3073 
3074 #ifdef WIN32
3075 
3076  /*
3077  * On Windows, the "Unicode" locales assume UTF16 not UTF8 encoding, and
3078  * for some reason mbstowcs and wcstombs won't do this for us, so we use
3079  * MultiByteToWideChar().
3080  */
3081  if (GetDatabaseEncoding() == PG_UTF8)
3082  {
3083  result = WideCharToMultiByte(CP_UTF8, 0, from, -1, to, tolen,
3084  NULL, NULL);
3085  /* A zero return is failure */
3086  if (result <= 0)
3087  result = -1;
3088  else
3089  {
3090  Assert(result <= tolen);
3091  /* Microsoft counts the zero terminator in the result */
3092  result--;
3093  }
3094  }
3095  else
3096 #endif /* WIN32 */
3097  if (locale == (pg_locale_t) 0)
3098  {
3099  /* Use wcstombs directly for the default locale */
3100  result = wcstombs(to, from, tolen);
3101  }
3102  else
3103  {
3104  /* Use wcstombs_l for nondefault locales */
3105  result = wcstombs_l(to, from, tolen, locale->info.lt);
3106  }
3107 
3108  return result;
3109 }
3110 
3111 /*
3112  * char2wchar --- convert multibyte characters to wide characters
3113  *
3114  * This has almost the API of mbstowcs_l(), except that *from need not be
3115  * null-terminated; instead, the number of input bytes is specified as
3116  * fromlen. Also, we ereport() rather than returning -1 for invalid
3117  * input encoding. tolen is the maximum number of wchar_t's to store at *to.
3118  * The output will be zero-terminated iff there is room.
3119  */
3120 size_t
3121 char2wchar(wchar_t *to, size_t tolen, const char *from, size_t fromlen,
3123 {
3124  size_t result;
3125 
3126  Assert(!locale || locale->provider == COLLPROVIDER_LIBC);
3127 
3128  if (tolen == 0)
3129  return 0;
3130 
3131 #ifdef WIN32
3132  /* See WIN32 "Unicode" comment above */
3133  if (GetDatabaseEncoding() == PG_UTF8)
3134  {
3135  /* Win32 API does not work for zero-length input */
3136  if (fromlen == 0)
3137  result = 0;
3138  else
3139  {
3140  result = MultiByteToWideChar(CP_UTF8, 0, from, fromlen, to, tolen - 1);
3141  /* A zero return is failure */
3142  if (result == 0)
3143  result = -1;
3144  }
3145 
3146  if (result != -1)
3147  {
3148  Assert(result < tolen);
3149  /* Append trailing null wchar (MultiByteToWideChar() does not) */
3150  to[result] = 0;
3151  }
3152  }
3153  else
3154 #endif /* WIN32 */
3155  {
3156  /* mbstowcs requires ending '\0' */
3157  char *str = pnstrdup(from, fromlen);
3158 
3159  if (locale == (pg_locale_t) 0)
3160  {
3161  /* Use mbstowcs directly for the default locale */
3162  result = mbstowcs(to, str, tolen);
3163  }
3164  else
3165  {
3166  /* Use mbstowcs_l for nondefault locales */
3167  result = mbstowcs_l(to, str, tolen, locale->info.lt);
3168  }
3169 
3170  pfree(str);
3171  }
3172 
3173  if (result == -1)
3174  {
3175  /*
3176  * Invalid multibyte character encountered. We try to give a useful
3177  * error message by letting pg_verifymbstr check the string. But it's
3178  * possible that the string is OK to us, and not OK to mbstowcs ---
3179  * this suggests that the LC_CTYPE locale is different from the
3180  * database encoding. Give a generic error message if pg_verifymbstr
3181  * can't find anything wrong.
3182  */
3183  pg_verifymbstr(from, fromlen, false); /* might not return */
3184  /* but if it does ... */
3185  ereport(ERROR,
3186  (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
3187  errmsg("invalid multibyte character for locale"),
3188  errhint("The server's LC_CTYPE locale is probably incompatible with the database encoding.")));
3189  }
3190 
3191  return result;
3192 }
#define TextDatumGetCString(d)
Definition: builtins.h:98
#define NameStr(name)
Definition: c.h:746
#define Min(x, y)
Definition: c.h:1004
#define pg_attribute_unused()
Definition: c.h:123
#define Assert(condition)
Definition: c.h:858
#define lengthof(array)
Definition: c.h:788
#define OidIsValid(objectId)
Definition: c.h:775
size_t Size
Definition: c.h:605
Oid collid
void * hash_search(HTAB *hashp, const void *keyPtr, HASHACTION action, bool *foundPtr)
Definition: dynahash.c:955
HTAB * hash_create(const char *tabname, long nelem, const HASHCTL *info, int flags)
Definition: dynahash.c:352
int errdetail(const char *fmt,...)
Definition: elog.c:1205
int errhint(const char *fmt,...)
Definition: elog.c:1319
int errcode(int sqlerrcode)
Definition: elog.c:859
int errmsg(const char *fmt,...)
Definition: elog.c:1072
#define PG_RE_THROW()
Definition: elog.h:411
#define DEBUG3
Definition: elog.h:28
#define FATAL
Definition: elog.h:41
#define PG_TRY(...)
Definition: elog.h:370
#define WARNING
Definition: elog.h:36
#define PG_END_TRY(...)
Definition: elog.h:395
#define ERROR
Definition: elog.h:39
#define PG_CATCH(...)
Definition: elog.h:380
#define elog(elevel,...)
Definition: elog.h:224
#define ereport(elevel,...)
Definition: elog.h:149
const char * get_encoding_name_for_icu(int encoding)
Definition: encnames.c:472
#define palloc_array(type, count)
Definition: fe_memutils.h:64
char * asc_tolower(const char *buff, size_t nbytes)
Definition: formatting.c:2158
bool IsBinaryUpgrade
Definition: globals.c:118
#define newval
GucSource
Definition: guc.h:108
@ PGC_S_DEFAULT
Definition: guc.h:109
const char * str
#define free(a)
Definition: header.h:65
@ HASH_ENTER
Definition: hsearch.h:114
#define HASH_ELEM
Definition: hsearch.h:95
#define HASH_BLOBS
Definition: hsearch.h:97
#define HeapTupleIsValid(tuple)
Definition: htup.h:78
#define GETSTRUCT(TUP)
Definition: htup_details.h:653
#define period
Definition: indent_codes.h:66
#define token
Definition: indent_globs.h:126
#define bufsize
Definition: indent_globs.h:36
static struct @155 value
static char * locale
Definition: initdb.c:140
int i
Definition: isn.c:73
static struct pg_tm tm
Definition: localtime.c:104
char * get_namespace_name(Oid nspid)
Definition: lsyscache.c:3366
char * pg_any_to_server(const char *s, int len, int encoding)
Definition: mbutils.c:676
int GetDatabaseEncoding(void)
Definition: mbutils.c:1261
int pg_mbstrlen(const char *mbstr)
Definition: mbutils.c:1037
bool pg_verifymbstr(const char *mbstr, int len, bool noError)
Definition: mbutils.c:1556
void SetMessageEncoding(int encoding)
Definition: mbutils.c:1171
char * pnstrdup(const char *in, Size len)
Definition: mcxt.c:1706
char * pstrdup(const char *in)
Definition: mcxt.c:1695
void pfree(void *pointer)
Definition: mcxt.c:1520
MemoryContext TopMemoryContext
Definition: mcxt.c:149
void * repalloc(void *pointer, Size size)
Definition: mcxt.c:1540
void * MemoryContextAlloc(MemoryContext context, Size size)
Definition: mcxt.c:1180
char * MemoryContextStrdup(MemoryContext context, const char *string)
Definition: mcxt.c:1682
void * palloc(Size size)
Definition: mcxt.c:1316
#define MaxAllocSize
Definition: memutils.h:40
static char format
FormData_pg_collation * Form_pg_collation
Definition: pg_collation.h:58
const void size_t len
int32 encoding
Definition: pg_database.h:41
size_t pg_strnxfrm(char *dest, size_t destsize, const char *src, size_t srclen, pg_locale_t locale)
Definition: pg_locale.c:2417
static size_t pg_strxfrm_libc(char *dest, const char *src, size_t destsize, pg_locale_t locale)
Definition: pg_locale.c:2153
int icu_validation_level
Definition: pg_locale.c:102
void cache_locale_time(void)
Definition: pg_locale.c:806
bool pg_strxfrm_enabled(pg_locale_t locale)
Definition: pg_locale.c:2349
char * localized_full_months[12+1]
Definition: pg_locale.c:114
size_t wchar2char(char *to, const wchar_t *from, size_t tolen, pg_locale_t locale)
Definition: pg_locale.c:3065
struct lconv * PGLC_localeconv(void)
Definition: pg_locale.c:524
int pg_strncoll(const char *arg1, size_t len1, const char *arg2, size_t len2, pg_locale_t locale)
Definition: pg_locale.c:2133
void make_icu_collator(const char *iculocstr, const char *icurules, struct pg_locale_struct *resultp)
Definition: pg_locale.c:1449
bool lc_collate_is_c(Oid collation)
Definition: pg_locale.c:1317
struct pg_locale_struct default_locale
Definition: pg_locale.c:1446
void icu_validate_locale(const char *loc_str)
Definition: pg_locale.c:2984
static bool CurrentLCTimeValid
Definition: pg_locale.c:121
void assign_locale_time(const char *newval, void *extra)
Definition: pg_locale.c:388
bool check_locale_time(char **newval, void **extra, GucSource source)
Definition: pg_locale.c:382
char * locale_messages
Definition: pg_locale.c:97
char * locale_numeric
Definition: pg_locale.c:99
pg_locale_t pg_newlocale_from_collation(Oid collid)
Definition: pg_locale.c:1551
int builtin_locale_encoding(const char *locale)
Definition: pg_locale.c:2527
bool database_ctype_is_c
Definition: pg_locale.c:117
#define PGLOCALE_SUPPORT_ERROR(provider)
Definition: pg_locale.c:84
char * locale_time
Definition: pg_locale.c:100
static void cache_single_string(char **dst, const char *src, int encoding)
Definition: pg_locale.c:783
bool lc_ctype_is_c(Oid collation)
Definition: pg_locale.c:1384
bool check_locale_numeric(char **newval, void **extra, GucSource source)
Definition: pg_locale.c:370
bool pg_locale_deterministic(pg_locale_t locale)
Definition: pg_locale.c:1531
static void db_encoding_convert(int encoding, char **str)
Definition: pg_locale.c:494
static size_t wcstombs_l(char *dest, const wchar_t *src, size_t n, locale_t loc)
Definition: pg_locale.c:182
void assign_locale_numeric(const char *newval, void *extra)
Definition: pg_locale.c:376
bool check_locale_messages(char **newval, void **extra, GucSource source)
Definition: pg_locale.c:404
#define MAX_L10N_DATA
Definition: pg_locale.c:93
char * get_collation_actual_version(char collprovider, const char *collcollate)
Definition: pg_locale.c:1729
static void free_struct_lconv(struct lconv *s)
Definition: pg_locale.c:444
char * pg_perm_setlocale(int category, const char *locale)
Definition: pg_locale.c:212
void assign_locale_messages(const char *newval, void *extra)
Definition: pg_locale.c:427
static bool CurrentLocaleConvValid
Definition: pg_locale.c:120
char * icu_language_tag(const char *loc_str, int elevel)
Definition: pg_locale.c:2927
int pg_strcoll(const char *arg1, const char *arg2, pg_locale_t locale)
Definition: pg_locale.c:2098
static int pg_strcoll_libc(const char *arg1, const char *arg2, pg_locale_t locale)
Definition: pg_locale.c:1916
static HTAB * collation_cache
Definition: pg_locale.c:134
bool pg_strxfrm_prefix_enabled(pg_locale_t locale)
Definition: pg_locale.c:2440
static void report_newlocale_failure(const char *localename)
Definition: pg_locale.c:1502
char * localized_abbrev_months[12+1]
Definition: pg_locale.c:113
static int pg_strncoll_libc(const char *arg1, size_t len1, const char *arg2, size_t len2, pg_locale_t locale)
Definition: pg_locale.c:1945
size_t pg_strnxfrm_prefix(char *dest, size_t destsize, const char *src, size_t srclen, pg_locale_t locale)
Definition: pg_locale.c:2502
static bool struct_lconv_is_valid(struct lconv *s)
Definition: pg_locale.c:463
char * localized_full_days[7+1]
Definition: pg_locale.c:112
size_t pg_strxfrm(char *dest, const char *src, size_t destsize, pg_locale_t locale)
Definition: pg_locale.c:2381
const char * builtin_validate_locale(int encoding, const char *locale)
Definition: pg_locale.c:2551
static collation_cache_entry * lookup_collation_cache(Oid collation, bool set_flags)
Definition: pg_locale.c:1230
void assign_locale_monetary(const char *newval, void *extra)
Definition: pg_locale.c:364
#define TEXTBUFLEN
Definition: pg_locale.c:91
bool check_locale(int category, const char *locale, char **canonname)
Definition: pg_locale.c:315
char * localized_abbrev_days[7+1]
Definition: pg_locale.c:111
size_t pg_strxfrm_prefix(char *dest, const char *src, size_t destsize, pg_locale_t locale)
Definition: pg_locale.c:2467
char * locale_monetary
Definition: pg_locale.c:98
size_t char2wchar(wchar_t *to, size_t tolen, const char *from, size_t fromlen, pg_locale_t locale)
Definition: pg_locale.c:3121
bool check_locale_monetary(char **newval, void **extra, GucSource source)
Definition: pg_locale.c:358
static size_t mbstowcs_l(wchar_t *dest, const char *src, size_t n, locale_t loc)
Definition: pg_locale.c:166
static size_t pg_strnxfrm_libc(char *dest, const char *src, size_t srclen, size_t destsize, pg_locale_t locale)
Definition: pg_locale.c:2171
#define LOCALE_NAME_BUFLEN
Definition: pg_locale.h:36
static rewind_source * source
Definition: pg_rewind.c:89
static char * buf
Definition: pg_test_fsync.c:73
@ PG_SQL_ASCII
Definition: pg_wchar.h:226
@ PG_UTF8
Definition: pg_wchar.h:232
#define pg_encoding_to_char
Definition: pg_wchar.h:630
int pg_strcasecmp(const char *s1, const char *s2)
Definition: pgstrcasecmp.c:36
int pg_get_encoding_from_locale(const char *ctype, bool write_message)
Definition: chklocale.c:428
size_t strlcpy(char *dst, const char *src, size_t siz)
Definition: strlcpy.c:45
int pg_strncasecmp(const char *s1, const char *s2, size_t n)
Definition: pgstrcasecmp.c:69
uintptr_t Datum
Definition: postgres.h:64
static Datum ObjectIdGetDatum(Oid X)
Definition: postgres.h:252
unsigned int Oid
Definition: postgres_ext.h:31
e
Definition: preproc-init.c:82
char * psprintf(const char *fmt,...)
Definition: psprintf.c:46
tree ctl
Definition: radixtree.h:1847
char * quote_qualified_identifier(const char *qualifier, const char *ident)
Definition: ruleutils.c:12707
Definition: dynahash.c:220
Definition: pg_locale.c:126
bool collate_is_c
Definition: pg_locale.c:128
Oid collid
Definition: pg_locale.c:127
pg_locale_t locale
Definition: pg_locale.c:131
bool flags_valid
Definition: pg_locale.c:130
bool ctype_is_c
Definition: pg_locale.c:129
locale_t lt
Definition: pg_locale.h:83
struct pg_locale_struct::@151::@152 builtin
const char * locale
Definition: pg_locale.h:81
union pg_locale_struct::@151 info
bool deterministic
Definition: pg_locale.h:76
Definition: regguts.h:323
void ReleaseSysCache(HeapTuple tuple)
Definition: syscache.c:266
HeapTuple SearchSysCache1(int cacheId, Datum key1)
Definition: syscache.c:218
Datum SysCacheGetAttr(int cacheId, HeapTuple tup, AttrNumber attributeNumber, bool *isNull)
Definition: syscache.c:479
Datum SysCacheGetAttrNotNull(int cacheId, HeapTuple tup, AttrNumber attributeNumber)
Definition: syscache.c:510
const char * name
#define locale_t
Definition: win32_port.h:424
#define strcoll_l
Definition: win32_port.h:447
#define strxfrm_l
Definition: win32_port.h:448
#define wcscoll_l
Definition: win32_port.h:449
#define setenv(x, y, z)
Definition: win32_port.h:537
#define setlocale(a, b)
Definition: win32_port.h:467