PostgreSQL Source Code  git master
pg_locale.c
Go to the documentation of this file.
1 /*-----------------------------------------------------------------------
2  *
3  * PostgreSQL locale utilities
4  *
5  * Portions Copyright (c) 2002-2020, PostgreSQL Global Development Group
6  *
7  * src/backend/utils/adt/pg_locale.c
8  *
9  *-----------------------------------------------------------------------
10  */
11 
12 /*----------
13  * Here is how the locale stuff is handled: LC_COLLATE and LC_CTYPE
14  * are fixed at CREATE DATABASE time, stored in pg_database, and cannot
15  * be changed. Thus, the effects of strcoll(), strxfrm(), isupper(),
16  * toupper(), etc. are always in the same fixed locale.
17  *
18  * LC_MESSAGES is settable at run time and will take effect
19  * immediately.
20  *
21  * The other categories, LC_MONETARY, LC_NUMERIC, and LC_TIME are also
22  * settable at run-time. However, we don't actually set those locale
23  * categories permanently. This would have bizarre effects like no
24  * longer accepting standard floating-point literals in some locales.
25  * Instead, we only set these locale categories briefly when needed,
26  * cache the required information obtained from localeconv() or
27  * strftime(), and then set the locale categories back to "C".
28  * The cached information is only used by the formatting functions
29  * (to_char, etc.) and the money type. For the user, this should all be
30  * transparent.
31  *
32  * !!! NOW HEAR THIS !!!
33  *
34  * We've been bitten repeatedly by this bug, so let's try to keep it in
35  * mind in future: on some platforms, the locale functions return pointers
36  * to static data that will be overwritten by any later locale function.
37  * Thus, for example, the obvious-looking sequence
38  * save = setlocale(category, NULL);
39  * if (!setlocale(category, value))
40  * fail = true;
41  * setlocale(category, save);
42  * DOES NOT WORK RELIABLY: on some platforms the second setlocale() call
43  * will change the memory save is pointing at. To do this sort of thing
44  * safely, you *must* pstrdup what setlocale returns the first time.
45  *
46  * The POSIX locale standard is available here:
47  *
48  * http://www.opengroup.org/onlinepubs/009695399/basedefs/xbd_chap07.html
49  *----------
50  */
51 
52 
53 #include "postgres.h"
54 
55 #include <time.h>
56 
57 #include "access/htup_details.h"
58 #include "catalog/pg_collation.h"
59 #include "catalog/pg_control.h"
60 #include "mb/pg_wchar.h"
61 #include "utils/builtins.h"
62 #include "utils/formatting.h"
63 #include "utils/hsearch.h"
64 #include "utils/lsyscache.h"
65 #include "utils/memutils.h"
66 #include "utils/pg_locale.h"
67 #include "utils/syscache.h"
68 
69 #ifdef USE_ICU
70 #include <unicode/ucnv.h>
71 #endif
72 
73 #ifdef __GLIBC__
74 #include <gnu/libc-version.h>
75 #endif
76 
77 #ifdef WIN32
78 #include <shlwapi.h>
79 #endif
80 
81 #define MAX_L10N_DATA 80
82 
83 
84 /* GUC settings */
89 
90 /*
91  * lc_time localization cache.
92  *
93  * We use only the first 7 or 12 entries of these arrays. The last array
94  * element is left as NULL for the convenience of outside code that wants
95  * to sequentially scan these arrays.
96  */
97 char *localized_abbrev_days[7 + 1];
98 char *localized_full_days[7 + 1];
100 char *localized_full_months[12 + 1];
101 
102 /* indicates whether locale information cache is valid */
103 static bool CurrentLocaleConvValid = false;
104 static bool CurrentLCTimeValid = false;
105 
106 /* Environment variable storage area */
107 
108 #define LC_ENV_BUFSIZE (NAMEDATALEN + 20)
109 
112 
113 #ifdef LC_MESSAGES
114 static char lc_messages_envbuf[LC_ENV_BUFSIZE];
115 #endif
119 
120 /* Cache for collation-related knowledge */
121 
122 typedef struct
123 {
124  Oid collid; /* hash key: pg_collation OID */
125  bool collate_is_c; /* is collation's LC_COLLATE C? */
126  bool ctype_is_c; /* is collation's LC_CTYPE C? */
127  bool flags_valid; /* true if above flags are valid */
128  pg_locale_t locale; /* locale_t struct, or 0 if not valid */
130 
131 static HTAB *collation_cache = NULL;
132 
133 
134 #if defined(WIN32) && defined(LC_MESSAGES)
135 static char *IsoLocaleName(const char *); /* MSVC specific */
136 #endif
137 
138 #ifdef USE_ICU
139 static void icu_set_collation_attributes(UCollator *collator, const char *loc);
140 #endif
141 
142 /*
143  * pg_perm_setlocale
144  *
145  * This wraps the libc function setlocale(), with two additions. First, when
146  * changing LC_CTYPE, update gettext's encoding for the current message
147  * domain. GNU gettext automatically tracks LC_CTYPE on most platforms, but
148  * not on Windows. Second, if the operation is successful, the corresponding
149  * LC_XXX environment variable is set to match. By setting the environment
150  * variable, we ensure that any subsequent use of setlocale(..., "") will
151  * preserve the settings made through this routine. Of course, LC_ALL must
152  * also be unset to fully ensure that, but that has to be done elsewhere after
153  * all the individual LC_XXX variables have been set correctly. (Thank you
154  * Perl for making this kluge necessary.)
155  */
156 char *
157 pg_perm_setlocale(int category, const char *locale)
158 {
159  char *result;
160  const char *envvar;
161  char *envbuf;
162 
163 #ifndef WIN32
164  result = setlocale(category, locale);
165 #else
166 
167  /*
168  * On Windows, setlocale(LC_MESSAGES) does not work, so just assume that
169  * the given value is good and set it in the environment variables. We
170  * must ignore attempts to set to "", which means "keep using the old
171  * environment value".
172  */
173 #ifdef LC_MESSAGES
174  if (category == LC_MESSAGES)
175  {
176  result = (char *) locale;
177  if (locale == NULL || locale[0] == '\0')
178  return result;
179  }
180  else
181 #endif
182  result = setlocale(category, locale);
183 #endif /* WIN32 */
184 
185  if (result == NULL)
186  return result; /* fall out immediately on failure */
187 
188  /*
189  * Use the right encoding in translated messages. Under ENABLE_NLS, let
190  * pg_bind_textdomain_codeset() figure it out. Under !ENABLE_NLS, message
191  * format strings are ASCII, but database-encoding strings may enter the
192  * message via %s. This makes the overall message encoding equal to the
193  * database encoding.
194  */
195  if (category == LC_CTYPE)
196  {
197  static char save_lc_ctype[LC_ENV_BUFSIZE];
198 
199  /* copy setlocale() return value before callee invokes it again */
200  strlcpy(save_lc_ctype, result, sizeof(save_lc_ctype));
201  result = save_lc_ctype;
202 
203 #ifdef ENABLE_NLS
204  SetMessageEncoding(pg_bind_textdomain_codeset(textdomain(NULL)));
205 #else
207 #endif
208  }
209 
210  switch (category)
211  {
212  case LC_COLLATE:
213  envvar = "LC_COLLATE";
214  envbuf = lc_collate_envbuf;
215  break;
216  case LC_CTYPE:
217  envvar = "LC_CTYPE";
218  envbuf = lc_ctype_envbuf;
219  break;
220 #ifdef LC_MESSAGES
221  case LC_MESSAGES:
222  envvar = "LC_MESSAGES";
223  envbuf = lc_messages_envbuf;
224 #ifdef WIN32
225  result = IsoLocaleName(locale);
226  if (result == NULL)
227  result = (char *) locale;
228  elog(DEBUG3, "IsoLocaleName() executed; locale: \"%s\"", result);
229 #endif /* WIN32 */
230  break;
231 #endif /* LC_MESSAGES */
232  case LC_MONETARY:
233  envvar = "LC_MONETARY";
234  envbuf = lc_monetary_envbuf;
235  break;
236  case LC_NUMERIC:
237  envvar = "LC_NUMERIC";
238  envbuf = lc_numeric_envbuf;
239  break;
240  case LC_TIME:
241  envvar = "LC_TIME";
242  envbuf = lc_time_envbuf;
243  break;
244  default:
245  elog(FATAL, "unrecognized LC category: %d", category);
246  envvar = NULL; /* keep compiler quiet */
247  envbuf = NULL;
248  return NULL;
249  }
250 
251  snprintf(envbuf, LC_ENV_BUFSIZE - 1, "%s=%s", envvar, result);
252 
253  if (putenv(envbuf))
254  return NULL;
255 
256  return result;
257 }
258 
259 
260 /*
261  * Is the locale name valid for the locale category?
262  *
263  * If successful, and canonname isn't NULL, a palloc'd copy of the locale's
264  * canonical name is stored there. This is especially useful for figuring out
265  * what locale name "" means (ie, the server environment value). (Actually,
266  * it seems that on most implementations that's the only thing it's good for;
267  * we could wish that setlocale gave back a canonically spelled version of
268  * the locale name, but typically it doesn't.)
269  */
270 bool
271 check_locale(int category, const char *locale, char **canonname)
272 {
273  char *save;
274  char *res;
275 
276  if (canonname)
277  *canonname = NULL; /* in case of failure */
278 
279  save = setlocale(category, NULL);
280  if (!save)
281  return false; /* won't happen, we hope */
282 
283  /* save may be pointing at a modifiable scratch variable, see above. */
284  save = pstrdup(save);
285 
286  /* set the locale with setlocale, to see if it accepts it. */
287  res = setlocale(category, locale);
288 
289  /* save canonical name if requested. */
290  if (res && canonname)
291  *canonname = pstrdup(res);
292 
293  /* restore old value. */
294  if (!setlocale(category, save))
295  elog(WARNING, "failed to restore old locale \"%s\"", save);
296  pfree(save);
297 
298  return (res != NULL);
299 }
300 
301 
302 /*
303  * GUC check/assign hooks
304  *
305  * For most locale categories, the assign hook doesn't actually set the locale
306  * permanently, just reset flags so that the next use will cache the
307  * appropriate values. (See explanation at the top of this file.)
308  *
309  * Note: we accept value = "" as selecting the postmaster's environment
310  * value, whatever it was (so long as the environment setting is legal).
311  * This will have been locked down by an earlier call to pg_perm_setlocale.
312  */
313 bool
314 check_locale_monetary(char **newval, void **extra, GucSource source)
315 {
316  return check_locale(LC_MONETARY, *newval, NULL);
317 }
318 
319 void
320 assign_locale_monetary(const char *newval, void *extra)
321 {
322  CurrentLocaleConvValid = false;
323 }
324 
325 bool
326 check_locale_numeric(char **newval, void **extra, GucSource source)
327 {
328  return check_locale(LC_NUMERIC, *newval, NULL);
329 }
330 
331 void
332 assign_locale_numeric(const char *newval, void *extra)
333 {
334  CurrentLocaleConvValid = false;
335 }
336 
337 bool
338 check_locale_time(char **newval, void **extra, GucSource source)
339 {
340  return check_locale(LC_TIME, *newval, NULL);
341 }
342 
343 void
344 assign_locale_time(const char *newval, void *extra)
345 {
346  CurrentLCTimeValid = false;
347 }
348 
349 /*
350  * We allow LC_MESSAGES to actually be set globally.
351  *
352  * Note: we normally disallow value = "" because it wouldn't have consistent
353  * semantics (it'd effectively just use the previous value). However, this
354  * is the value passed for PGC_S_DEFAULT, so don't complain in that case,
355  * not even if the attempted setting fails due to invalid environment value.
356  * The idea there is just to accept the environment setting *if possible*
357  * during startup, until we can read the proper value from postgresql.conf.
358  */
359 bool
360 check_locale_messages(char **newval, void **extra, GucSource source)
361 {
362  if (**newval == '\0')
363  {
364  if (source == PGC_S_DEFAULT)
365  return true;
366  else
367  return false;
368  }
369 
370  /*
371  * LC_MESSAGES category does not exist everywhere, but accept it anyway
372  *
373  * On Windows, we can't even check the value, so accept blindly
374  */
375 #if defined(LC_MESSAGES) && !defined(WIN32)
376  return check_locale(LC_MESSAGES, *newval, NULL);
377 #else
378  return true;
379 #endif
380 }
381 
382 void
383 assign_locale_messages(const char *newval, void *extra)
384 {
385  /*
386  * LC_MESSAGES category does not exist everywhere, but accept it anyway.
387  * We ignore failure, as per comment above.
388  */
389 #ifdef LC_MESSAGES
390  (void) pg_perm_setlocale(LC_MESSAGES, newval);
391 #endif
392 }
393 
394 
395 /*
396  * Frees the malloced content of a struct lconv. (But not the struct
397  * itself.) It's important that this not throw elog(ERROR).
398  */
399 static void
400 free_struct_lconv(struct lconv *s)
401 {
402  if (s->decimal_point)
403  free(s->decimal_point);
404  if (s->thousands_sep)
405  free(s->thousands_sep);
406  if (s->grouping)
407  free(s->grouping);
408  if (s->int_curr_symbol)
409  free(s->int_curr_symbol);
410  if (s->currency_symbol)
411  free(s->currency_symbol);
412  if (s->mon_decimal_point)
413  free(s->mon_decimal_point);
414  if (s->mon_thousands_sep)
415  free(s->mon_thousands_sep);
416  if (s->mon_grouping)
417  free(s->mon_grouping);
418  if (s->positive_sign)
419  free(s->positive_sign);
420  if (s->negative_sign)
421  free(s->negative_sign);
422 }
423 
424 /*
425  * Check that all fields of a struct lconv (or at least, the ones we care
426  * about) are non-NULL. The field list must match free_struct_lconv().
427  */
428 static bool
429 struct_lconv_is_valid(struct lconv *s)
430 {
431  if (s->decimal_point == NULL)
432  return false;
433  if (s->thousands_sep == NULL)
434  return false;
435  if (s->grouping == NULL)
436  return false;
437  if (s->int_curr_symbol == NULL)
438  return false;
439  if (s->currency_symbol == NULL)
440  return false;
441  if (s->mon_decimal_point == NULL)
442  return false;
443  if (s->mon_thousands_sep == NULL)
444  return false;
445  if (s->mon_grouping == NULL)
446  return false;
447  if (s->positive_sign == NULL)
448  return false;
449  if (s->negative_sign == NULL)
450  return false;
451  return true;
452 }
453 
454 
455 /*
456  * Convert the strdup'd string at *str from the specified encoding to the
457  * database encoding.
458  */
459 static void
461 {
462  char *pstr;
463  char *mstr;
464 
465  /* convert the string to the database encoding */
466  pstr = pg_any_to_server(*str, strlen(*str), encoding);
467  if (pstr == *str)
468  return; /* no conversion happened */
469 
470  /* need it malloc'd not palloc'd */
471  mstr = strdup(pstr);
472  if (mstr == NULL)
473  ereport(ERROR,
474  (errcode(ERRCODE_OUT_OF_MEMORY),
475  errmsg("out of memory")));
476 
477  /* replace old string */
478  free(*str);
479  *str = mstr;
480 
481  pfree(pstr);
482 }
483 
484 
485 /*
486  * Return the POSIX lconv struct (contains number/money formatting
487  * information) with locale information for all categories.
488  */
489 struct lconv *
491 {
492  static struct lconv CurrentLocaleConv;
493  static bool CurrentLocaleConvAllocated = false;
494  struct lconv *extlconv;
495  struct lconv worklconv;
496  char *save_lc_monetary;
497  char *save_lc_numeric;
498 #ifdef WIN32
499  char *save_lc_ctype;
500 #endif
501 
502  /* Did we do it already? */
504  return &CurrentLocaleConv;
505 
506  /* Free any already-allocated storage */
507  if (CurrentLocaleConvAllocated)
508  {
509  free_struct_lconv(&CurrentLocaleConv);
510  CurrentLocaleConvAllocated = false;
511  }
512 
513  /*
514  * This is tricky because we really don't want to risk throwing error
515  * while the locale is set to other than our usual settings. Therefore,
516  * the process is: collect the usual settings, set locale to special
517  * setting, copy relevant data into worklconv using strdup(), restore
518  * normal settings, convert data to desired encoding, and finally stash
519  * the collected data in CurrentLocaleConv. This makes it safe if we
520  * throw an error during encoding conversion or run out of memory anywhere
521  * in the process. All data pointed to by struct lconv members is
522  * allocated with strdup, to avoid premature elog(ERROR) and to allow
523  * using a single cleanup routine.
524  */
525  memset(&worklconv, 0, sizeof(worklconv));
526 
527  /* Save prevailing values of monetary and numeric locales */
528  save_lc_monetary = setlocale(LC_MONETARY, NULL);
529  if (!save_lc_monetary)
530  elog(ERROR, "setlocale(NULL) failed");
531  save_lc_monetary = pstrdup(save_lc_monetary);
532 
533  save_lc_numeric = setlocale(LC_NUMERIC, NULL);
534  if (!save_lc_numeric)
535  elog(ERROR, "setlocale(NULL) failed");
536  save_lc_numeric = pstrdup(save_lc_numeric);
537 
538 #ifdef WIN32
539 
540  /*
541  * The POSIX standard explicitly says that it is undefined what happens if
542  * LC_MONETARY or LC_NUMERIC imply an encoding (codeset) different from
543  * that implied by LC_CTYPE. In practice, all Unix-ish platforms seem to
544  * believe that localeconv() should return strings that are encoded in the
545  * codeset implied by the LC_MONETARY or LC_NUMERIC locale name. Hence,
546  * once we have successfully collected the localeconv() results, we will
547  * convert them from that codeset to the desired server encoding.
548  *
549  * Windows, of course, resolutely does things its own way; on that
550  * platform LC_CTYPE has to match LC_MONETARY/LC_NUMERIC to get sane
551  * results. Hence, we must temporarily set that category as well.
552  */
553 
554  /* Save prevailing value of ctype locale */
555  save_lc_ctype = setlocale(LC_CTYPE, NULL);
556  if (!save_lc_ctype)
557  elog(ERROR, "setlocale(NULL) failed");
558  save_lc_ctype = pstrdup(save_lc_ctype);
559 
560  /* Here begins the critical section where we must not throw error */
561 
562  /* use numeric to set the ctype */
563  setlocale(LC_CTYPE, locale_numeric);
564 #endif
565 
566  /* Get formatting information for numeric */
567  setlocale(LC_NUMERIC, locale_numeric);
568  extlconv = localeconv();
569 
570  /* Must copy data now in case setlocale() overwrites it */
571  worklconv.decimal_point = strdup(extlconv->decimal_point);
572  worklconv.thousands_sep = strdup(extlconv->thousands_sep);
573  worklconv.grouping = strdup(extlconv->grouping);
574 
575 #ifdef WIN32
576  /* use monetary to set the ctype */
577  setlocale(LC_CTYPE, locale_monetary);
578 #endif
579 
580  /* Get formatting information for monetary */
581  setlocale(LC_MONETARY, locale_monetary);
582  extlconv = localeconv();
583 
584  /* Must copy data now in case setlocale() overwrites it */
585  worklconv.int_curr_symbol = strdup(extlconv->int_curr_symbol);
586  worklconv.currency_symbol = strdup(extlconv->currency_symbol);
587  worklconv.mon_decimal_point = strdup(extlconv->mon_decimal_point);
588  worklconv.mon_thousands_sep = strdup(extlconv->mon_thousands_sep);
589  worklconv.mon_grouping = strdup(extlconv->mon_grouping);
590  worklconv.positive_sign = strdup(extlconv->positive_sign);
591  worklconv.negative_sign = strdup(extlconv->negative_sign);
592  /* Copy scalar fields as well */
593  worklconv.int_frac_digits = extlconv->int_frac_digits;
594  worklconv.frac_digits = extlconv->frac_digits;
595  worklconv.p_cs_precedes = extlconv->p_cs_precedes;
596  worklconv.p_sep_by_space = extlconv->p_sep_by_space;
597  worklconv.n_cs_precedes = extlconv->n_cs_precedes;
598  worklconv.n_sep_by_space = extlconv->n_sep_by_space;
599  worklconv.p_sign_posn = extlconv->p_sign_posn;
600  worklconv.n_sign_posn = extlconv->n_sign_posn;
601 
602  /*
603  * Restore the prevailing locale settings; failure to do so is fatal.
604  * Possibly we could limp along with nondefault LC_MONETARY or LC_NUMERIC,
605  * but proceeding with the wrong value of LC_CTYPE would certainly be bad
606  * news; and considering that the prevailing LC_MONETARY and LC_NUMERIC
607  * are almost certainly "C", there's really no reason that restoring those
608  * should fail.
609  */
610 #ifdef WIN32
611  if (!setlocale(LC_CTYPE, save_lc_ctype))
612  elog(FATAL, "failed to restore LC_CTYPE to \"%s\"", save_lc_ctype);
613 #endif
614  if (!setlocale(LC_MONETARY, save_lc_monetary))
615  elog(FATAL, "failed to restore LC_MONETARY to \"%s\"", save_lc_monetary);
616  if (!setlocale(LC_NUMERIC, save_lc_numeric))
617  elog(FATAL, "failed to restore LC_NUMERIC to \"%s\"", save_lc_numeric);
618 
619  /*
620  * At this point we've done our best to clean up, and can call functions
621  * that might possibly throw errors with a clean conscience. But let's
622  * make sure we don't leak any already-strdup'd fields in worklconv.
623  */
624  PG_TRY();
625  {
626  int encoding;
627 
628  /* Release the pstrdup'd locale names */
629  pfree(save_lc_monetary);
630  pfree(save_lc_numeric);
631 #ifdef WIN32
632  pfree(save_lc_ctype);
633 #endif
634 
635  /* If any of the preceding strdup calls failed, complain now. */
636  if (!struct_lconv_is_valid(&worklconv))
637  ereport(ERROR,
638  (errcode(ERRCODE_OUT_OF_MEMORY),
639  errmsg("out of memory")));
640 
641  /*
642  * Now we must perform encoding conversion from whatever's associated
643  * with the locales into the database encoding. If we can't identify
644  * the encoding implied by LC_NUMERIC or LC_MONETARY (ie we get -1),
645  * use PG_SQL_ASCII, which will result in just validating that the
646  * strings are OK in the database encoding.
647  */
649  if (encoding < 0)
650  encoding = PG_SQL_ASCII;
651 
652  db_encoding_convert(encoding, &worklconv.decimal_point);
653  db_encoding_convert(encoding, &worklconv.thousands_sep);
654  /* grouping is not text and does not require conversion */
655 
657  if (encoding < 0)
658  encoding = PG_SQL_ASCII;
659 
660  db_encoding_convert(encoding, &worklconv.int_curr_symbol);
661  db_encoding_convert(encoding, &worklconv.currency_symbol);
662  db_encoding_convert(encoding, &worklconv.mon_decimal_point);
663  db_encoding_convert(encoding, &worklconv.mon_thousands_sep);
664  /* mon_grouping is not text and does not require conversion */
665  db_encoding_convert(encoding, &worklconv.positive_sign);
666  db_encoding_convert(encoding, &worklconv.negative_sign);
667  }
668  PG_CATCH();
669  {
670  free_struct_lconv(&worklconv);
671  PG_RE_THROW();
672  }
673  PG_END_TRY();
674 
675  /*
676  * Everything is good, so save the results.
677  */
678  CurrentLocaleConv = worklconv;
679  CurrentLocaleConvAllocated = true;
680  CurrentLocaleConvValid = true;
681  return &CurrentLocaleConv;
682 }
683 
684 #ifdef WIN32
685 /*
686  * On Windows, strftime() returns its output in encoding CP_ACP (the default
687  * operating system codepage for the computer), which is likely different
688  * from SERVER_ENCODING. This is especially important in Japanese versions
689  * of Windows which will use SJIS encoding, which we don't support as a
690  * server encoding.
691  *
692  * So, instead of using strftime(), use wcsftime() to return the value in
693  * wide characters (internally UTF16) and then convert to UTF8, which we
694  * know how to handle directly.
695  *
696  * Note that this only affects the calls to strftime() in this file, which are
697  * used to get the locale-aware strings. Other parts of the backend use
698  * pg_strftime(), which isn't locale-aware and does not need to be replaced.
699  */
700 static size_t
701 strftime_win32(char *dst, size_t dstlen,
702  const char *format, const struct tm *tm)
703 {
704  size_t len;
705  wchar_t wformat[8]; /* formats used below need 3 chars */
706  wchar_t wbuf[MAX_L10N_DATA];
707 
708  /*
709  * Get a wchar_t version of the format string. We only actually use
710  * plain-ASCII formats in this file, so we can say that they're UTF8.
711  */
712  len = MultiByteToWideChar(CP_UTF8, 0, format, -1,
713  wformat, lengthof(wformat));
714  if (len == 0)
715  elog(ERROR, "could not convert format string from UTF-8: error code %lu",
716  GetLastError());
717 
718  len = wcsftime(wbuf, MAX_L10N_DATA, wformat, tm);
719  if (len == 0)
720  {
721  /*
722  * wcsftime failed, possibly because the result would not fit in
723  * MAX_L10N_DATA. Return 0 with the contents of dst unspecified.
724  */
725  return 0;
726  }
727 
728  len = WideCharToMultiByte(CP_UTF8, 0, wbuf, len, dst, dstlen - 1,
729  NULL, NULL);
730  if (len == 0)
731  elog(ERROR, "could not convert string to UTF-8: error code %lu",
732  GetLastError());
733 
734  dst[len] = '\0';
735 
736  return len;
737 }
738 
739 /* redefine strftime() */
740 #define strftime(a,b,c,d) strftime_win32(a,b,c,d)
741 #endif /* WIN32 */
742 
743 /*
744  * Subroutine for cache_locale_time().
745  * Convert the given string from encoding "encoding" to the database
746  * encoding, and store the result at *dst, replacing any previous value.
747  */
748 static void
749 cache_single_string(char **dst, const char *src, int encoding)
750 {
751  char *ptr;
752  char *olddst;
753 
754  /* Convert the string to the database encoding, or validate it's OK */
755  ptr = pg_any_to_server(src, strlen(src), encoding);
756 
757  /* Store the string in long-lived storage, replacing any previous value */
758  olddst = *dst;
760  if (olddst)
761  pfree(olddst);
762 
763  /* Might as well clean up any palloc'd conversion result, too */
764  if (ptr != src)
765  pfree(ptr);
766 }
767 
768 /*
769  * Update the lc_time localization cache variables if needed.
770  */
771 void
773 {
774  char buf[(2 * 7 + 2 * 12) * MAX_L10N_DATA];
775  char *bufptr;
776  time_t timenow;
777  struct tm *timeinfo;
778  bool strftimefail = false;
779  int encoding;
780  int i;
781  char *save_lc_time;
782 #ifdef WIN32
783  char *save_lc_ctype;
784 #endif
785 
786  /* did we do this already? */
787  if (CurrentLCTimeValid)
788  return;
789 
790  elog(DEBUG3, "cache_locale_time() executed; locale: \"%s\"", locale_time);
791 
792  /*
793  * As in PGLC_localeconv(), it's critical that we not throw error while
794  * libc's locale settings have nondefault values. Hence, we just call
795  * strftime() within the critical section, and then convert and save its
796  * results afterwards.
797  */
798 
799  /* Save prevailing value of time locale */
800  save_lc_time = setlocale(LC_TIME, NULL);
801  if (!save_lc_time)
802  elog(ERROR, "setlocale(NULL) failed");
803  save_lc_time = pstrdup(save_lc_time);
804 
805 #ifdef WIN32
806 
807  /*
808  * On Windows, it appears that wcsftime() internally uses LC_CTYPE, so we
809  * must set it here. This code looks the same as what PGLC_localeconv()
810  * does, but the underlying reason is different: this does NOT determine
811  * the encoding we'll get back from strftime_win32().
812  */
813 
814  /* Save prevailing value of ctype locale */
815  save_lc_ctype = setlocale(LC_CTYPE, NULL);
816  if (!save_lc_ctype)
817  elog(ERROR, "setlocale(NULL) failed");
818  save_lc_ctype = pstrdup(save_lc_ctype);
819 
820  /* use lc_time to set the ctype */
821  setlocale(LC_CTYPE, locale_time);
822 #endif
823 
824  setlocale(LC_TIME, locale_time);
825 
826  /* We use times close to current time as data for strftime(). */
827  timenow = time(NULL);
828  timeinfo = localtime(&timenow);
829 
830  /* Store the strftime results in MAX_L10N_DATA-sized portions of buf[] */
831  bufptr = buf;
832 
833  /*
834  * MAX_L10N_DATA is sufficient buffer space for every known locale, and
835  * POSIX defines no strftime() errors. (Buffer space exhaustion is not an
836  * error.) An implementation might report errors (e.g. ENOMEM) by
837  * returning 0 (or, less plausibly, a negative value) and setting errno.
838  * Report errno just in case the implementation did that, but clear it in
839  * advance of the calls so we don't emit a stale, unrelated errno.
840  */
841  errno = 0;
842 
843  /* localized days */
844  for (i = 0; i < 7; i++)
845  {
846  timeinfo->tm_wday = i;
847  if (strftime(bufptr, MAX_L10N_DATA, "%a", timeinfo) <= 0)
848  strftimefail = true;
849  bufptr += MAX_L10N_DATA;
850  if (strftime(bufptr, MAX_L10N_DATA, "%A", timeinfo) <= 0)
851  strftimefail = true;
852  bufptr += MAX_L10N_DATA;
853  }
854 
855  /* localized months */
856  for (i = 0; i < 12; i++)
857  {
858  timeinfo->tm_mon = i;
859  timeinfo->tm_mday = 1; /* make sure we don't have invalid date */
860  if (strftime(bufptr, MAX_L10N_DATA, "%b", timeinfo) <= 0)
861  strftimefail = true;
862  bufptr += MAX_L10N_DATA;
863  if (strftime(bufptr, MAX_L10N_DATA, "%B", timeinfo) <= 0)
864  strftimefail = true;
865  bufptr += MAX_L10N_DATA;
866  }
867 
868  /*
869  * Restore the prevailing locale settings; as in PGLC_localeconv(),
870  * failure to do so is fatal.
871  */
872 #ifdef WIN32
873  if (!setlocale(LC_CTYPE, save_lc_ctype))
874  elog(FATAL, "failed to restore LC_CTYPE to \"%s\"", save_lc_ctype);
875 #endif
876  if (!setlocale(LC_TIME, save_lc_time))
877  elog(FATAL, "failed to restore LC_TIME to \"%s\"", save_lc_time);
878 
879  /*
880  * At this point we've done our best to clean up, and can throw errors, or
881  * call functions that might throw errors, with a clean conscience.
882  */
883  if (strftimefail)
884  elog(ERROR, "strftime() failed: %m");
885 
886  /* Release the pstrdup'd locale names */
887  pfree(save_lc_time);
888 #ifdef WIN32
889  pfree(save_lc_ctype);
890 #endif
891 
892 #ifndef WIN32
893 
894  /*
895  * As in PGLC_localeconv(), we must convert strftime()'s output from the
896  * encoding implied by LC_TIME to the database encoding. If we can't
897  * identify the LC_TIME encoding, just perform encoding validation.
898  */
899  encoding = pg_get_encoding_from_locale(locale_time, true);
900  if (encoding < 0)
901  encoding = PG_SQL_ASCII;
902 
903 #else
904 
905  /*
906  * On Windows, strftime_win32() always returns UTF8 data, so convert from
907  * that if necessary.
908  */
909  encoding = PG_UTF8;
910 
911 #endif /* WIN32 */
912 
913  bufptr = buf;
914 
915  /* localized days */
916  for (i = 0; i < 7; i++)
917  {
918  cache_single_string(&localized_abbrev_days[i], bufptr, encoding);
919  bufptr += MAX_L10N_DATA;
920  cache_single_string(&localized_full_days[i], bufptr, encoding);
921  bufptr += MAX_L10N_DATA;
922  }
923  localized_abbrev_days[7] = NULL;
924  localized_full_days[7] = NULL;
925 
926  /* localized months */
927  for (i = 0; i < 12; i++)
928  {
929  cache_single_string(&localized_abbrev_months[i], bufptr, encoding);
930  bufptr += MAX_L10N_DATA;
931  cache_single_string(&localized_full_months[i], bufptr, encoding);
932  bufptr += MAX_L10N_DATA;
933  }
934  localized_abbrev_months[12] = NULL;
935  localized_full_months[12] = NULL;
936 
937  CurrentLCTimeValid = true;
938 }
939 
940 
941 #if defined(WIN32) && defined(LC_MESSAGES)
942 /*
943  * Convert a Windows setlocale() argument to a Unix-style one.
944  *
945  * Regardless of platform, we install message catalogs under a Unix-style
946  * LL[_CC][.ENCODING][@VARIANT] naming convention. Only LC_MESSAGES settings
947  * following that style will elicit localized interface strings.
948  *
949  * Before Visual Studio 2012 (msvcr110.dll), Windows setlocale() accepted "C"
950  * (but not "c") and strings of the form <Language>[_<Country>][.<CodePage>],
951  * case-insensitive. setlocale() returns the fully-qualified form; for
952  * example, setlocale("thaI") returns "Thai_Thailand.874". Internally,
953  * setlocale() and _create_locale() select a "locale identifier"[1] and store
954  * it in an undocumented _locale_t field. From that LCID, we can retrieve the
955  * ISO 639 language and the ISO 3166 country. Character encoding does not
956  * matter, because the server and client encodings govern that.
957  *
958  * Windows Vista introduced the "locale name" concept[2], closely following
959  * RFC 4646. Locale identifiers are now deprecated. Starting with Visual
960  * Studio 2012, setlocale() accepts locale names in addition to the strings it
961  * accepted historically. It does not standardize them; setlocale("Th-tH")
962  * returns "Th-tH". setlocale(category, "") still returns a traditional
963  * string. Furthermore, msvcr110.dll changed the undocumented _locale_t
964  * content to carry locale names instead of locale identifiers.
965  *
966  * Visual Studio 2015 should still be able to do the same as Visual Studio
967  * 2012, but the declaration of locale_name is missing in _locale_t, causing
968  * this code compilation to fail, hence this falls back instead on to
969  * enumerating all system locales by using EnumSystemLocalesEx to find the
970  * required locale name. If the input argument is in Unix-style then we can
971  * get ISO Locale name directly by using GetLocaleInfoEx() with LCType as
972  * LOCALE_SNAME.
973  *
974  * MinGW headers declare _create_locale(), but msvcrt.dll lacks that symbol in
975  * releases before Windows 8. IsoLocaleName() always fails in a MinGW-built
976  * postgres.exe, so only Unix-style values of the lc_messages GUC can elicit
977  * localized messages. In particular, every lc_messages setting that initdb
978  * can select automatically will yield only C-locale messages. XXX This could
979  * be fixed by running the fully-qualified locale name through a lookup table.
980  *
981  * This function returns a pointer to a static buffer bearing the converted
982  * name or NULL if conversion fails.
983  *
984  * [1] https://docs.microsoft.com/en-us/windows/win32/intl/locale-identifiers
985  * [2] https://docs.microsoft.com/en-us/windows/win32/intl/locale-names
986  */
987 
988 #if _MSC_VER >= 1900
989 /*
990  * Callback function for EnumSystemLocalesEx() in get_iso_localename().
991  *
992  * This function enumerates all system locales, searching for one that matches
993  * an input with the format: <Language>[_<Country>], e.g.
994  * English[_United States]
995  *
996  * The input is a three wchar_t array as an LPARAM. The first element is the
997  * locale_name we want to match, the second element is an allocated buffer
998  * where the Unix-style locale is copied if a match is found, and the third
999  * element is the search status, 1 if a match was found, 0 otherwise.
1000  */
1001 static BOOL CALLBACK
1002 search_locale_enum(LPWSTR pStr, DWORD dwFlags, LPARAM lparam)
1003 {
1004  wchar_t test_locale[LOCALE_NAME_MAX_LENGTH];
1005  wchar_t **argv;
1006 
1007  (void) (dwFlags);
1008 
1009  argv = (wchar_t **) lparam;
1010  *argv[2] = (wchar_t) 0;
1011 
1012  memset(test_locale, 0, sizeof(test_locale));
1013 
1014  /* Get the name of the <Language> in English */
1015  if (GetLocaleInfoEx(pStr, LOCALE_SENGLISHLANGUAGENAME,
1016  test_locale, LOCALE_NAME_MAX_LENGTH))
1017  {
1018  /*
1019  * If the enumerated locale does not have a hyphen ("en") OR the
1020  * lc_message input does not have an underscore ("English"), we only
1021  * need to compare the <Language> tags.
1022  */
1023  if (wcsrchr(pStr, '-') == NULL || wcsrchr(argv[0], '_') == NULL)
1024  {
1025  if (_wcsicmp(argv[0], test_locale) == 0)
1026  {
1027  wcscpy(argv[1], pStr);
1028  *argv[2] = (wchar_t) 1;
1029  return FALSE;
1030  }
1031  }
1032 
1033  /*
1034  * We have to compare a full <Language>_<Country> tag, so we append
1035  * the underscore and name of the country/region in English, e.g.
1036  * "English_United States".
1037  */
1038  else
1039  {
1040  size_t len;
1041 
1042  wcscat(test_locale, L"_");
1043  len = wcslen(test_locale);
1044  if (GetLocaleInfoEx(pStr, LOCALE_SENGLISHCOUNTRYNAME,
1045  test_locale + len,
1046  LOCALE_NAME_MAX_LENGTH - len))
1047  {
1048  if (_wcsicmp(argv[0], test_locale) == 0)
1049  {
1050  wcscpy(argv[1], pStr);
1051  *argv[2] = (wchar_t) 1;
1052  return FALSE;
1053  }
1054  }
1055  }
1056  }
1057 
1058  return TRUE;
1059 }
1060 
1061 /*
1062  * This function converts a Windows locale name to an ISO formatted version
1063  * for Visual Studio 2015 or greater.
1064  *
1065  * Returns NULL, if no valid conversion was found.
1066  */
1067 static char *
1068 get_iso_localename(const char *winlocname)
1069 {
1070  wchar_t wc_locale_name[LOCALE_NAME_MAX_LENGTH];
1071  wchar_t buffer[LOCALE_NAME_MAX_LENGTH];
1072  static char iso_lc_messages[LOCALE_NAME_MAX_LENGTH];
1073  char *period;
1074  int len;
1075  int ret_val;
1076 
1077  /*
1078  * Valid locales have the following syntax:
1079  * <Language>[_<Country>[.<CodePage>]]
1080  *
1081  * GetLocaleInfoEx can only take locale name without code-page and for the
1082  * purpose of this API the code-page doesn't matter.
1083  */
1084  period = strchr(winlocname, '.');
1085  if (period != NULL)
1086  len = period - winlocname;
1087  else
1088  len = pg_mbstrlen(winlocname);
1089 
1090  memset(wc_locale_name, 0, sizeof(wc_locale_name));
1091  memset(buffer, 0, sizeof(buffer));
1092  MultiByteToWideChar(CP_ACP, 0, winlocname, len, wc_locale_name,
1093  LOCALE_NAME_MAX_LENGTH);
1094 
1095  /*
1096  * If the lc_messages is already an Unix-style string, we have a direct
1097  * match with LOCALE_SNAME, e.g. en-US, en_US.
1098  */
1099  ret_val = GetLocaleInfoEx(wc_locale_name, LOCALE_SNAME, (LPWSTR) &buffer,
1100  LOCALE_NAME_MAX_LENGTH);
1101  if (!ret_val)
1102  {
1103  /*
1104  * Search for a locale in the system that matches language and country
1105  * name.
1106  */
1107  wchar_t *argv[3];
1108 
1109  argv[0] = wc_locale_name;
1110  argv[1] = buffer;
1111  argv[2] = (wchar_t *) &ret_val;
1112  EnumSystemLocalesEx(search_locale_enum, LOCALE_WINDOWS, (LPARAM) argv,
1113  NULL);
1114  }
1115 
1116  if (ret_val)
1117  {
1118  size_t rc;
1119  char *hyphen;
1120 
1121  /* Locale names use only ASCII, any conversion locale suffices. */
1122  rc = wchar2char(iso_lc_messages, buffer, sizeof(iso_lc_messages), NULL);
1123  if (rc == -1 || rc == sizeof(iso_lc_messages))
1124  return NULL;
1125 
1126  /*
1127  * Simply replace the hyphen with an underscore. See comments in
1128  * IsoLocaleName.
1129  */
1130  hyphen = strchr(iso_lc_messages, '-');
1131  if (hyphen)
1132  *hyphen = '_';
1133  return iso_lc_messages;
1134  }
1135 
1136  return NULL;
1137 }
1138 #endif /* _MSC_VER >= 1900 */
1139 
1140 static char *
1141 IsoLocaleName(const char *winlocname)
1142 {
1143 #if defined(_MSC_VER)
1144  static char iso_lc_messages[LOCALE_NAME_MAX_LENGTH];
1145 
1146  if (pg_strcasecmp("c", winlocname) == 0 ||
1147  pg_strcasecmp("posix", winlocname) == 0)
1148  {
1149  strcpy(iso_lc_messages, "C");
1150  return iso_lc_messages;
1151  }
1152  else
1153  {
1154 #if (_MSC_VER >= 1900) /* Visual Studio 2015 or later */
1155  return get_iso_localename(winlocname);
1156 #else
1157  _locale_t loct;
1158 
1159  loct = _create_locale(LC_CTYPE, winlocname);
1160  if (loct != NULL)
1161  {
1162  size_t rc;
1163  char *hyphen;
1164 
1165  /* Locale names use only ASCII, any conversion locale suffices. */
1166  rc = wchar2char(iso_lc_messages, loct->locinfo->locale_name[LC_CTYPE],
1167  sizeof(iso_lc_messages), NULL);
1168  _free_locale(loct);
1169  if (rc == -1 || rc == sizeof(iso_lc_messages))
1170  return NULL;
1171 
1172  /*
1173  * Since the message catalogs sit on a case-insensitive
1174  * filesystem, we need not standardize letter case here. So long
1175  * as we do not ship message catalogs for which it would matter,
1176  * we also need not translate the script/variant portion, e.g.
1177  * uz-Cyrl-UZ to uz_UZ@cyrillic. Simply replace the hyphen with
1178  * an underscore.
1179  *
1180  * Note that the locale name can be less-specific than the value
1181  * we would derive under earlier Visual Studio releases. For
1182  * example, French_France.1252 yields just "fr". This does not
1183  * affect any of the country-specific message catalogs available
1184  * as of this writing (pt_BR, zh_CN, zh_TW).
1185  */
1186  hyphen = strchr(iso_lc_messages, '-');
1187  if (hyphen)
1188  *hyphen = '_';
1189  return iso_lc_messages;
1190  }
1191 #endif /* Visual Studio 2015 or later */
1192  }
1193 #endif /* defined(_MSC_VER) */
1194  return NULL; /* Not supported on this version of msvc/mingw */
1195 }
1196 #endif /* WIN32 && LC_MESSAGES */
1197 
1198 
1199 /*
1200  * Detect aging strxfrm() implementations that, in a subset of locales, write
1201  * past the specified buffer length. Affected users must update OS packages
1202  * before using PostgreSQL 9.5 or later.
1203  *
1204  * Assume that the bug can come and go from one postmaster startup to another
1205  * due to physical replication among diverse machines. Assume that the bug's
1206  * presence will not change during the life of a particular postmaster. Given
1207  * those assumptions, call this no less than once per postmaster startup per
1208  * LC_COLLATE setting used. No known-affected system offers strxfrm_l(), so
1209  * there is no need to consider pg_collation locales.
1210  */
1211 void
1213 {
1214  char buf[32];
1215  const int canary = 0x7F;
1216  bool ok = true;
1217 
1218  /*
1219  * Given a two-byte ASCII string and length limit 7, 8 or 9, Solaris 10
1220  * 05/08 returns 18 and modifies 10 bytes. It respects limits above or
1221  * below that range.
1222  *
1223  * The bug is present in Solaris 8 as well; it is absent in Solaris 10
1224  * 01/13 and Solaris 11.2. Affected locales include is_IS.ISO8859-1,
1225  * en_US.UTF-8, en_US.ISO8859-1, and ru_RU.KOI8-R. Unaffected locales
1226  * include de_DE.UTF-8, de_DE.ISO8859-1, zh_TW.UTF-8, and C.
1227  */
1228  buf[7] = canary;
1229  (void) strxfrm(buf, "ab", 7);
1230  if (buf[7] != canary)
1231  ok = false;
1232 
1233  /*
1234  * illumos bug #1594 was present in the source tree from 2010-10-11 to
1235  * 2012-02-01. Given an ASCII string of any length and length limit 1,
1236  * affected systems ignore the length limit and modify a number of bytes
1237  * one less than the return value. The problem inputs for this bug do not
1238  * overlap those for the Solaris bug, hence a distinct test.
1239  *
1240  * Affected systems include smartos-20110926T021612Z. Affected locales
1241  * include en_US.ISO8859-1 and en_US.UTF-8. Unaffected locales include C.
1242  */
1243  buf[1] = canary;
1244  (void) strxfrm(buf, "a", 1);
1245  if (buf[1] != canary)
1246  ok = false;
1247 
1248  if (!ok)
1249  ereport(ERROR,
1250  (errcode(ERRCODE_SYSTEM_ERROR),
1251  errmsg_internal("strxfrm(), in locale \"%s\", writes past the specified array length",
1252  setlocale(LC_COLLATE, NULL)),
1253  errhint("Apply system library package updates.")));
1254 }
1255 
1256 
1257 /*
1258  * Cache mechanism for collation information.
1259  *
1260  * We cache two flags: whether the collation's LC_COLLATE or LC_CTYPE is C
1261  * (or POSIX), so we can optimize a few code paths in various places.
1262  * For the built-in C and POSIX collations, we can know that without even
1263  * doing a cache lookup, but we want to support aliases for C/POSIX too.
1264  * For the "default" collation, there are separate static cache variables,
1265  * since consulting the pg_collation catalog doesn't tell us what we need.
1266  *
1267  * Also, if a pg_locale_t has been requested for a collation, we cache that
1268  * for the life of a backend.
1269  *
1270  * Note that some code relies on the flags not reporting false negatives
1271  * (that is, saying it's not C when it is). For example, char2wchar()
1272  * could fail if the locale is C, so str_tolower() shouldn't call it
1273  * in that case.
1274  *
1275  * Note that we currently lack any way to flush the cache. Since we don't
1276  * support ALTER COLLATION, this is OK. The worst case is that someone
1277  * drops a collation, and a useless cache entry hangs around in existing
1278  * backends.
1279  */
1280 
1281 static collation_cache_entry *
1282 lookup_collation_cache(Oid collation, bool set_flags)
1283 {
1284  collation_cache_entry *cache_entry;
1285  bool found;
1286 
1287  Assert(OidIsValid(collation));
1288  Assert(collation != DEFAULT_COLLATION_OID);
1289 
1290  if (collation_cache == NULL)
1291  {
1292  /* First time through, initialize the hash table */
1293  HASHCTL ctl;
1294 
1295  memset(&ctl, 0, sizeof(ctl));
1296  ctl.keysize = sizeof(Oid);
1297  ctl.entrysize = sizeof(collation_cache_entry);
1298  collation_cache = hash_create("Collation cache", 100, &ctl,
1299  HASH_ELEM | HASH_BLOBS);
1300  }
1301 
1302  cache_entry = hash_search(collation_cache, &collation, HASH_ENTER, &found);
1303  if (!found)
1304  {
1305  /*
1306  * Make sure cache entry is marked invalid, in case we fail before
1307  * setting things.
1308  */
1309  cache_entry->flags_valid = false;
1310  cache_entry->locale = 0;
1311  }
1312 
1313  if (set_flags && !cache_entry->flags_valid)
1314  {
1315  /* Attempt to set the flags */
1316  HeapTuple tp;
1317  Form_pg_collation collform;
1318  const char *collcollate;
1319  const char *collctype;
1320 
1321  tp = SearchSysCache1(COLLOID, ObjectIdGetDatum(collation));
1322  if (!HeapTupleIsValid(tp))
1323  elog(ERROR, "cache lookup failed for collation %u", collation);
1324  collform = (Form_pg_collation) GETSTRUCT(tp);
1325 
1326  collcollate = NameStr(collform->collcollate);
1327  collctype = NameStr(collform->collctype);
1328 
1329  cache_entry->collate_is_c = ((strcmp(collcollate, "C") == 0) ||
1330  (strcmp(collcollate, "POSIX") == 0));
1331  cache_entry->ctype_is_c = ((strcmp(collctype, "C") == 0) ||
1332  (strcmp(collctype, "POSIX") == 0));
1333 
1334  cache_entry->flags_valid = true;
1335 
1336  ReleaseSysCache(tp);
1337  }
1338 
1339  return cache_entry;
1340 }
1341 
1342 
1343 /*
1344  * Detect whether collation's LC_COLLATE property is C
1345  */
1346 bool
1348 {
1349  /*
1350  * If we're asked about "collation 0", return false, so that the code will
1351  * go into the non-C path and report that the collation is bogus.
1352  */
1353  if (!OidIsValid(collation))
1354  return false;
1355 
1356  /*
1357  * If we're asked about the default collation, we have to inquire of the C
1358  * library. Cache the result so we only have to compute it once.
1359  */
1360  if (collation == DEFAULT_COLLATION_OID)
1361  {
1362  static int result = -1;
1363  char *localeptr;
1364 
1365  if (result >= 0)
1366  return (bool) result;
1367  localeptr = setlocale(LC_COLLATE, NULL);
1368  if (!localeptr)
1369  elog(ERROR, "invalid LC_COLLATE setting");
1370 
1371  if (strcmp(localeptr, "C") == 0)
1372  result = true;
1373  else if (strcmp(localeptr, "POSIX") == 0)
1374  result = true;
1375  else
1376  result = false;
1377  return (bool) result;
1378  }
1379 
1380  /*
1381  * If we're asked about the built-in C/POSIX collations, we know that.
1382  */
1383  if (collation == C_COLLATION_OID ||
1384  collation == POSIX_COLLATION_OID)
1385  return true;
1386 
1387  /*
1388  * Otherwise, we have to consult pg_collation, but we cache that.
1389  */
1390  return (lookup_collation_cache(collation, true))->collate_is_c;
1391 }
1392 
1393 /*
1394  * Detect whether collation's LC_CTYPE property is C
1395  */
1396 bool
1397 lc_ctype_is_c(Oid collation)
1398 {
1399  /*
1400  * If we're asked about "collation 0", return false, so that the code will
1401  * go into the non-C path and report that the collation is bogus.
1402  */
1403  if (!OidIsValid(collation))
1404  return false;
1405 
1406  /*
1407  * If we're asked about the default collation, we have to inquire of the C
1408  * library. Cache the result so we only have to compute it once.
1409  */
1410  if (collation == DEFAULT_COLLATION_OID)
1411  {
1412  static int result = -1;
1413  char *localeptr;
1414 
1415  if (result >= 0)
1416  return (bool) result;
1417  localeptr = setlocale(LC_CTYPE, NULL);
1418  if (!localeptr)
1419  elog(ERROR, "invalid LC_CTYPE setting");
1420 
1421  if (strcmp(localeptr, "C") == 0)
1422  result = true;
1423  else if (strcmp(localeptr, "POSIX") == 0)
1424  result = true;
1425  else
1426  result = false;
1427  return (bool) result;
1428  }
1429 
1430  /*
1431  * If we're asked about the built-in C/POSIX collations, we know that.
1432  */
1433  if (collation == C_COLLATION_OID ||
1434  collation == POSIX_COLLATION_OID)
1435  return true;
1436 
1437  /*
1438  * Otherwise, we have to consult pg_collation, but we cache that.
1439  */
1440  return (lookup_collation_cache(collation, true))->ctype_is_c;
1441 }
1442 
1443 
1444 /* simple subroutine for reporting errors from newlocale() */
1445 #ifdef HAVE_LOCALE_T
1446 static void
1447 report_newlocale_failure(const char *localename)
1448 {
1449  int save_errno;
1450 
1451  /*
1452  * Windows doesn't provide any useful error indication from
1453  * _create_locale(), and BSD-derived platforms don't seem to feel they
1454  * need to set errno either (even though POSIX is pretty clear that
1455  * newlocale should do so). So, if errno hasn't been set, assume ENOENT
1456  * is what to report.
1457  */
1458  if (errno == 0)
1459  errno = ENOENT;
1460 
1461  /*
1462  * ENOENT means "no such locale", not "no such file", so clarify that
1463  * errno with an errdetail message.
1464  */
1465  save_errno = errno; /* auxiliary funcs might change errno */
1466  ereport(ERROR,
1467  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1468  errmsg("could not create locale \"%s\": %m",
1469  localename),
1470  (save_errno == ENOENT ?
1471  errdetail("The operating system could not find any locale data for the locale name \"%s\".",
1472  localename) : 0)));
1473 }
1474 #endif /* HAVE_LOCALE_T */
1475 
1476 
1477 /*
1478  * Create a locale_t from a collation OID. Results are cached for the
1479  * lifetime of the backend. Thus, do not free the result with freelocale().
1480  *
1481  * As a special optimization, the default/database collation returns 0.
1482  * Callers should then revert to the non-locale_t-enabled code path.
1483  * In fact, they shouldn't call this function at all when they are dealing
1484  * with the default locale. That can save quite a bit in hotspots.
1485  * Also, callers should avoid calling this before going down a C/POSIX
1486  * fastpath, because such a fastpath should work even on platforms without
1487  * locale_t support in the C library.
1488  *
1489  * For simplicity, we always generate COLLATE + CTYPE even though we
1490  * might only need one of them. Since this is called only once per session,
1491  * it shouldn't cost much.
1492  */
1495 {
1496  collation_cache_entry *cache_entry;
1497 
1498  /* Callers must pass a valid OID */
1499  Assert(OidIsValid(collid));
1500 
1501  /* Return 0 for "default" collation, just in case caller forgets */
1502  if (collid == DEFAULT_COLLATION_OID)
1503  return (pg_locale_t) 0;
1504 
1505  cache_entry = lookup_collation_cache(collid, false);
1506 
1507  if (cache_entry->locale == 0)
1508  {
1509  /* We haven't computed this yet in this session, so do it */
1510  HeapTuple tp;
1511  Form_pg_collation collform;
1512  const char *collcollate;
1513  const char *collctype pg_attribute_unused();
1514  struct pg_locale_struct result;
1515  pg_locale_t resultp;
1516  Datum collversion;
1517  bool isnull;
1518 
1519  tp = SearchSysCache1(COLLOID, ObjectIdGetDatum(collid));
1520  if (!HeapTupleIsValid(tp))
1521  elog(ERROR, "cache lookup failed for collation %u", collid);
1522  collform = (Form_pg_collation) GETSTRUCT(tp);
1523 
1524  collcollate = NameStr(collform->collcollate);
1525  collctype = NameStr(collform->collctype);
1526 
1527  /* We'll fill in the result struct locally before allocating memory */
1528  memset(&result, 0, sizeof(result));
1529  result.provider = collform->collprovider;
1530  result.deterministic = collform->collisdeterministic;
1531 
1532  if (collform->collprovider == COLLPROVIDER_LIBC)
1533  {
1534 #ifdef HAVE_LOCALE_T
1535  locale_t loc;
1536 
1537  if (strcmp(collcollate, collctype) == 0)
1538  {
1539  /* Normal case where they're the same */
1540  errno = 0;
1541 #ifndef WIN32
1542  loc = newlocale(LC_COLLATE_MASK | LC_CTYPE_MASK, collcollate,
1543  NULL);
1544 #else
1545  loc = _create_locale(LC_ALL, collcollate);
1546 #endif
1547  if (!loc)
1548  report_newlocale_failure(collcollate);
1549  }
1550  else
1551  {
1552 #ifndef WIN32
1553  /* We need two newlocale() steps */
1554  locale_t loc1;
1555 
1556  errno = 0;
1557  loc1 = newlocale(LC_COLLATE_MASK, collcollate, NULL);
1558  if (!loc1)
1559  report_newlocale_failure(collcollate);
1560  errno = 0;
1561  loc = newlocale(LC_CTYPE_MASK, collctype, loc1);
1562  if (!loc)
1563  report_newlocale_failure(collctype);
1564 #else
1565 
1566  /*
1567  * XXX The _create_locale() API doesn't appear to support
1568  * this. Could perhaps be worked around by changing
1569  * pg_locale_t to contain two separate fields.
1570  */
1571  ereport(ERROR,
1572  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1573  errmsg("collations with different collate and ctype values are not supported on this platform")));
1574 #endif
1575  }
1576 
1577  result.info.lt = loc;
1578 #else /* not HAVE_LOCALE_T */
1579  /* platform that doesn't support locale_t */
1580  ereport(ERROR,
1581  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1582  errmsg("collation provider LIBC is not supported on this platform")));
1583 #endif /* not HAVE_LOCALE_T */
1584  }
1585  else if (collform->collprovider == COLLPROVIDER_ICU)
1586  {
1587 #ifdef USE_ICU
1588  UCollator *collator;
1589  UErrorCode status;
1590 
1591  if (strcmp(collcollate, collctype) != 0)
1592  ereport(ERROR,
1593  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1594  errmsg("collations with different collate and ctype values are not supported by ICU")));
1595 
1596  status = U_ZERO_ERROR;
1597  collator = ucol_open(collcollate, &status);
1598  if (U_FAILURE(status))
1599  ereport(ERROR,
1600  (errmsg("could not open collator for locale \"%s\": %s",
1601  collcollate, u_errorName(status))));
1602 
1603  if (U_ICU_VERSION_MAJOR_NUM < 54)
1604  icu_set_collation_attributes(collator, collcollate);
1605 
1606  /* We will leak this string if we get an error below :-( */
1607  result.info.icu.locale = MemoryContextStrdup(TopMemoryContext,
1608  collcollate);
1609  result.info.icu.ucol = collator;
1610 #else /* not USE_ICU */
1611  /* could get here if a collation was created by a build with ICU */
1612  ereport(ERROR,
1613  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1614  errmsg("ICU is not supported in this build"), \
1615  errhint("You need to rebuild PostgreSQL using --with-icu.")));
1616 #endif /* not USE_ICU */
1617  }
1618 
1619  collversion = SysCacheGetAttr(COLLOID, tp, Anum_pg_collation_collversion,
1620  &isnull);
1621  if (!isnull)
1622  {
1623  char *actual_versionstr;
1624  char *collversionstr;
1625 
1626  actual_versionstr = get_collation_actual_version(collform->collprovider, collcollate);
1627  if (!actual_versionstr)
1628  {
1629  /*
1630  * This could happen when specifying a version in CREATE
1631  * COLLATION for a libc locale, or manually creating a mess in
1632  * the catalogs.
1633  */
1634  ereport(ERROR,
1635  (errmsg("collation \"%s\" has no actual version, but a version was specified",
1636  NameStr(collform->collname))));
1637  }
1638  collversionstr = TextDatumGetCString(collversion);
1639 
1640  if (strcmp(actual_versionstr, collversionstr) != 0)
1641  ereport(WARNING,
1642  (errmsg("collation \"%s\" has version mismatch",
1643  NameStr(collform->collname)),
1644  errdetail("The collation in the database was created using version %s, "
1645  "but the operating system provides version %s.",
1646  collversionstr, actual_versionstr),
1647  errhint("Rebuild all objects affected by this collation and run "
1648  "ALTER COLLATION %s REFRESH VERSION, "
1649  "or build PostgreSQL with the right library version.",
1650  quote_qualified_identifier(get_namespace_name(collform->collnamespace),
1651  NameStr(collform->collname)))));
1652  }
1653 
1654  ReleaseSysCache(tp);
1655 
1656  /* We'll keep the pg_locale_t structures in TopMemoryContext */
1657  resultp = MemoryContextAlloc(TopMemoryContext, sizeof(*resultp));
1658  *resultp = result;
1659 
1660  cache_entry->locale = resultp;
1661  }
1662 
1663  return cache_entry->locale;
1664 }
1665 
1666 /*
1667  * Get provider-specific collation version string for the given collation from
1668  * the operating system/library.
1669  */
1670 char *
1671 get_collation_actual_version(char collprovider, const char *collcollate)
1672 {
1673  char *collversion = NULL;
1674 
1675 #ifdef USE_ICU
1676  if (collprovider == COLLPROVIDER_ICU)
1677  {
1678  UCollator *collator;
1679  UErrorCode status;
1680  UVersionInfo versioninfo;
1681  char buf[U_MAX_VERSION_STRING_LENGTH];
1682 
1683  status = U_ZERO_ERROR;
1684  collator = ucol_open(collcollate, &status);
1685  if (U_FAILURE(status))
1686  ereport(ERROR,
1687  (errmsg("could not open collator for locale \"%s\": %s",
1688  collcollate, u_errorName(status))));
1689  ucol_getVersion(collator, versioninfo);
1690  ucol_close(collator);
1691 
1692  u_versionToString(versioninfo, buf);
1693  collversion = pstrdup(buf);
1694  }
1695  else
1696 #endif
1697  if (collprovider == COLLPROVIDER_LIBC)
1698  {
1699 #if defined(__GLIBC__)
1700  char *copy = pstrdup(collcollate);
1701  char *copy_suffix = strstr(copy, ".");
1702  bool need_version = true;
1703 
1704  /*
1705  * Check for names like C.UTF-8 by chopping off the encoding suffix on
1706  * our temporary copy, so we can skip the version.
1707  */
1708  if (copy_suffix)
1709  *copy_suffix = '\0';
1710  if (pg_strcasecmp("c", copy) == 0 ||
1711  pg_strcasecmp("posix", copy) == 0)
1712  need_version = false;
1713  pfree(copy);
1714  if (!need_version)
1715  return NULL;
1716 
1717  /* Use the glibc version because we don't have anything better. */
1718  collversion = pstrdup(gnu_get_libc_version());
1719 #elif defined(WIN32) && _WIN32_WINNT >= 0x0600
1720  /*
1721  * If we are targeting Windows Vista and above, we can ask for a name
1722  * given a collation name (earlier versions required a location code
1723  * that we don't have).
1724  */
1725  NLSVERSIONINFOEX version = {sizeof(NLSVERSIONINFOEX)};
1726  WCHAR wide_collcollate[LOCALE_NAME_MAX_LENGTH];
1727 
1728  /* These would be invalid arguments, but have no version. */
1729  if (pg_strcasecmp("c", collcollate) == 0 ||
1730  pg_strcasecmp("posix", collcollate) == 0)
1731  return NULL;
1732 
1733  /* For all other names, ask the OS. */
1734  MultiByteToWideChar(CP_ACP, 0, collcollate, -1, wide_collcollate,
1735  LOCALE_NAME_MAX_LENGTH);
1736  if (!GetNLSVersionEx(COMPARE_STRING, wide_collcollate, &version))
1737  ereport(ERROR,
1738  (errmsg("could not get collation version for locale \"%s\": error code %lu",
1739  collcollate,
1740  GetLastError())));
1741  collversion = psprintf("%d.%d,%d.%d",
1742  (version.dwNLSVersion >> 8) & 0xFFFF,
1743  version.dwNLSVersion & 0xFF,
1744  (version.dwDefinedVersion >> 8) & 0xFFFF,
1745  version.dwDefinedVersion & 0xFF);
1746 #endif
1747  }
1748 
1749  return collversion;
1750 }
1751 
1752 
1753 #ifdef USE_ICU
1754 /*
1755  * Converter object for converting between ICU's UChar strings and C strings
1756  * in database encoding. Since the database encoding doesn't change, we only
1757  * need one of these per session.
1758  */
1759 static UConverter *icu_converter = NULL;
1760 
1761 static void
1762 init_icu_converter(void)
1763 {
1764  const char *icu_encoding_name;
1765  UErrorCode status;
1766  UConverter *conv;
1767 
1768  if (icu_converter)
1769  return; /* already done */
1770 
1771  icu_encoding_name = get_encoding_name_for_icu(GetDatabaseEncoding());
1772  if (!icu_encoding_name)
1773  ereport(ERROR,
1774  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1775  errmsg("encoding \"%s\" not supported by ICU",
1777 
1778  status = U_ZERO_ERROR;
1779  conv = ucnv_open(icu_encoding_name, &status);
1780  if (U_FAILURE(status))
1781  ereport(ERROR,
1782  (errmsg("could not open ICU converter for encoding \"%s\": %s",
1783  icu_encoding_name, u_errorName(status))));
1784 
1785  icu_converter = conv;
1786 }
1787 
1788 /*
1789  * Convert a string in the database encoding into a string of UChars.
1790  *
1791  * The source string at buff is of length nbytes
1792  * (it needn't be nul-terminated)
1793  *
1794  * *buff_uchar receives a pointer to the palloc'd result string, and
1795  * the function's result is the number of UChars generated.
1796  *
1797  * The result string is nul-terminated, though most callers rely on the
1798  * result length instead.
1799  */
1800 int32_t
1801 icu_to_uchar(UChar **buff_uchar, const char *buff, size_t nbytes)
1802 {
1803  UErrorCode status;
1804  int32_t len_uchar;
1805 
1806  init_icu_converter();
1807 
1808  status = U_ZERO_ERROR;
1809  len_uchar = ucnv_toUChars(icu_converter, NULL, 0,
1810  buff, nbytes, &status);
1811  if (U_FAILURE(status) && status != U_BUFFER_OVERFLOW_ERROR)
1812  ereport(ERROR,
1813  (errmsg("%s failed: %s", "ucnv_toUChars", u_errorName(status))));
1814 
1815  *buff_uchar = palloc((len_uchar + 1) * sizeof(**buff_uchar));
1816 
1817  status = U_ZERO_ERROR;
1818  len_uchar = ucnv_toUChars(icu_converter, *buff_uchar, len_uchar + 1,
1819  buff, nbytes, &status);
1820  if (U_FAILURE(status))
1821  ereport(ERROR,
1822  (errmsg("%s failed: %s", "ucnv_toUChars", u_errorName(status))));
1823 
1824  return len_uchar;
1825 }
1826 
1827 /*
1828  * Convert a string of UChars into the database encoding.
1829  *
1830  * The source string at buff_uchar is of length len_uchar
1831  * (it needn't be nul-terminated)
1832  *
1833  * *result receives a pointer to the palloc'd result string, and the
1834  * function's result is the number of bytes generated (not counting nul).
1835  *
1836  * The result string is nul-terminated.
1837  */
1838 int32_t
1839 icu_from_uchar(char **result, const UChar *buff_uchar, int32_t len_uchar)
1840 {
1841  UErrorCode status;
1842  int32_t len_result;
1843 
1844  init_icu_converter();
1845 
1846  status = U_ZERO_ERROR;
1847  len_result = ucnv_fromUChars(icu_converter, NULL, 0,
1848  buff_uchar, len_uchar, &status);
1849  if (U_FAILURE(status) && status != U_BUFFER_OVERFLOW_ERROR)
1850  ereport(ERROR,
1851  (errmsg("%s failed: %s", "ucnv_fromUChars",
1852  u_errorName(status))));
1853 
1854  *result = palloc(len_result + 1);
1855 
1856  status = U_ZERO_ERROR;
1857  len_result = ucnv_fromUChars(icu_converter, *result, len_result + 1,
1858  buff_uchar, len_uchar, &status);
1859  if (U_FAILURE(status))
1860  ereport(ERROR,
1861  (errmsg("%s failed: %s", "ucnv_fromUChars",
1862  u_errorName(status))));
1863 
1864  return len_result;
1865 }
1866 
1867 /*
1868  * Parse collation attributes and apply them to the open collator. This takes
1869  * a string like "und@colStrength=primary;colCaseLevel=yes" and parses and
1870  * applies the key-value arguments.
1871  *
1872  * Starting with ICU version 54, the attributes are processed automatically by
1873  * ucol_open(), so this is only necessary for emulating this behavior on older
1874  * versions.
1875  */
1877 static void
1878 icu_set_collation_attributes(UCollator *collator, const char *loc)
1879 {
1880  char *str = asc_tolower(loc, strlen(loc));
1881 
1882  str = strchr(str, '@');
1883  if (!str)
1884  return;
1885  str++;
1886 
1887  for (char *token = strtok(str, ";"); token; token = strtok(NULL, ";"))
1888  {
1889  char *e = strchr(token, '=');
1890 
1891  if (e)
1892  {
1893  char *name;
1894  char *value;
1895  UColAttribute uattr;
1896  UColAttributeValue uvalue;
1897  UErrorCode status;
1898 
1899  status = U_ZERO_ERROR;
1900 
1901  *e = '\0';
1902  name = token;
1903  value = e + 1;
1904 
1905  /*
1906  * See attribute name and value lists in ICU i18n/coll.cpp
1907  */
1908  if (strcmp(name, "colstrength") == 0)
1909  uattr = UCOL_STRENGTH;
1910  else if (strcmp(name, "colbackwards") == 0)
1911  uattr = UCOL_FRENCH_COLLATION;
1912  else if (strcmp(name, "colcaselevel") == 0)
1913  uattr = UCOL_CASE_LEVEL;
1914  else if (strcmp(name, "colcasefirst") == 0)
1915  uattr = UCOL_CASE_FIRST;
1916  else if (strcmp(name, "colalternate") == 0)
1917  uattr = UCOL_ALTERNATE_HANDLING;
1918  else if (strcmp(name, "colnormalization") == 0)
1919  uattr = UCOL_NORMALIZATION_MODE;
1920  else if (strcmp(name, "colnumeric") == 0)
1921  uattr = UCOL_NUMERIC_COLLATION;
1922  else
1923  /* ignore if unknown */
1924  continue;
1925 
1926  if (strcmp(value, "primary") == 0)
1927  uvalue = UCOL_PRIMARY;
1928  else if (strcmp(value, "secondary") == 0)
1929  uvalue = UCOL_SECONDARY;
1930  else if (strcmp(value, "tertiary") == 0)
1931  uvalue = UCOL_TERTIARY;
1932  else if (strcmp(value, "quaternary") == 0)
1933  uvalue = UCOL_QUATERNARY;
1934  else if (strcmp(value, "identical") == 0)
1935  uvalue = UCOL_IDENTICAL;
1936  else if (strcmp(value, "no") == 0)
1937  uvalue = UCOL_OFF;
1938  else if (strcmp(value, "yes") == 0)
1939  uvalue = UCOL_ON;
1940  else if (strcmp(value, "shifted") == 0)
1941  uvalue = UCOL_SHIFTED;
1942  else if (strcmp(value, "non-ignorable") == 0)
1943  uvalue = UCOL_NON_IGNORABLE;
1944  else if (strcmp(value, "lower") == 0)
1945  uvalue = UCOL_LOWER_FIRST;
1946  else if (strcmp(value, "upper") == 0)
1947  uvalue = UCOL_UPPER_FIRST;
1948  else
1949  status = U_ILLEGAL_ARGUMENT_ERROR;
1950 
1951  if (status == U_ZERO_ERROR)
1952  ucol_setAttribute(collator, uattr, uvalue, &status);
1953 
1954  /*
1955  * Pretend the error came from ucol_open(), for consistent error
1956  * message across ICU versions.
1957  */
1958  if (U_FAILURE(status))
1959  ereport(ERROR,
1960  (errmsg("could not open collator for locale \"%s\": %s",
1961  loc, u_errorName(status))));
1962  }
1963  }
1964 }
1965 
1966 #endif /* USE_ICU */
1967 
1968 /*
1969  * These functions convert from/to libc's wchar_t, *not* pg_wchar_t.
1970  * Therefore we keep them here rather than with the mbutils code.
1971  */
1972 
1973 /*
1974  * wchar2char --- convert wide characters to multibyte format
1975  *
1976  * This has the same API as the standard wcstombs_l() function; in particular,
1977  * tolen is the maximum number of bytes to store at *to, and *from must be
1978  * zero-terminated. The output will be zero-terminated iff there is room.
1979  */
1980 size_t
1981 wchar2char(char *to, const wchar_t *from, size_t tolen, pg_locale_t locale)
1982 {
1983  size_t result;
1984 
1985  Assert(!locale || locale->provider == COLLPROVIDER_LIBC);
1986 
1987  if (tolen == 0)
1988  return 0;
1989 
1990 #ifdef WIN32
1991 
1992  /*
1993  * On Windows, the "Unicode" locales assume UTF16 not UTF8 encoding, and
1994  * for some reason mbstowcs and wcstombs won't do this for us, so we use
1995  * MultiByteToWideChar().
1996  */
1997  if (GetDatabaseEncoding() == PG_UTF8)
1998  {
1999  result = WideCharToMultiByte(CP_UTF8, 0, from, -1, to, tolen,
2000  NULL, NULL);
2001  /* A zero return is failure */
2002  if (result <= 0)
2003  result = -1;
2004  else
2005  {
2006  Assert(result <= tolen);
2007  /* Microsoft counts the zero terminator in the result */
2008  result--;
2009  }
2010  }
2011  else
2012 #endif /* WIN32 */
2013  if (locale == (pg_locale_t) 0)
2014  {
2015  /* Use wcstombs directly for the default locale */
2016  result = wcstombs(to, from, tolen);
2017  }
2018  else
2019  {
2020 #ifdef HAVE_LOCALE_T
2021 #ifdef HAVE_WCSTOMBS_L
2022  /* Use wcstombs_l for nondefault locales */
2023  result = wcstombs_l(to, from, tolen, locale->info.lt);
2024 #else /* !HAVE_WCSTOMBS_L */
2025  /* We have to temporarily set the locale as current ... ugh */
2026  locale_t save_locale = uselocale(locale->info.lt);
2027 
2028  result = wcstombs(to, from, tolen);
2029 
2030  uselocale(save_locale);
2031 #endif /* HAVE_WCSTOMBS_L */
2032 #else /* !HAVE_LOCALE_T */
2033  /* Can't have locale != 0 without HAVE_LOCALE_T */
2034  elog(ERROR, "wcstombs_l is not available");
2035  result = 0; /* keep compiler quiet */
2036 #endif /* HAVE_LOCALE_T */
2037  }
2038 
2039  return result;
2040 }
2041 
2042 /*
2043  * char2wchar --- convert multibyte characters to wide characters
2044  *
2045  * This has almost the API of mbstowcs_l(), except that *from need not be
2046  * null-terminated; instead, the number of input bytes is specified as
2047  * fromlen. Also, we ereport() rather than returning -1 for invalid
2048  * input encoding. tolen is the maximum number of wchar_t's to store at *to.
2049  * The output will be zero-terminated iff there is room.
2050  */
2051 size_t
2052 char2wchar(wchar_t *to, size_t tolen, const char *from, size_t fromlen,
2054 {
2055  size_t result;
2056 
2057  Assert(!locale || locale->provider == COLLPROVIDER_LIBC);
2058 
2059  if (tolen == 0)
2060  return 0;
2061 
2062 #ifdef WIN32
2063  /* See WIN32 "Unicode" comment above */
2064  if (GetDatabaseEncoding() == PG_UTF8)
2065  {
2066  /* Win32 API does not work for zero-length input */
2067  if (fromlen == 0)
2068  result = 0;
2069  else
2070  {
2071  result = MultiByteToWideChar(CP_UTF8, 0, from, fromlen, to, tolen - 1);
2072  /* A zero return is failure */
2073  if (result == 0)
2074  result = -1;
2075  }
2076 
2077  if (result != -1)
2078  {
2079  Assert(result < tolen);
2080  /* Append trailing null wchar (MultiByteToWideChar() does not) */
2081  to[result] = 0;
2082  }
2083  }
2084  else
2085 #endif /* WIN32 */
2086  {
2087  /* mbstowcs requires ending '\0' */
2088  char *str = pnstrdup(from, fromlen);
2089 
2090  if (locale == (pg_locale_t) 0)
2091  {
2092  /* Use mbstowcs directly for the default locale */
2093  result = mbstowcs(to, str, tolen);
2094  }
2095  else
2096  {
2097 #ifdef HAVE_LOCALE_T
2098 #ifdef HAVE_MBSTOWCS_L
2099  /* Use mbstowcs_l for nondefault locales */
2100  result = mbstowcs_l(to, str, tolen, locale->info.lt);
2101 #else /* !HAVE_MBSTOWCS_L */
2102  /* We have to temporarily set the locale as current ... ugh */
2103  locale_t save_locale = uselocale(locale->info.lt);
2104 
2105  result = mbstowcs(to, str, tolen);
2106 
2107  uselocale(save_locale);
2108 #endif /* HAVE_MBSTOWCS_L */
2109 #else /* !HAVE_LOCALE_T */
2110  /* Can't have locale != 0 without HAVE_LOCALE_T */
2111  elog(ERROR, "mbstowcs_l is not available");
2112  result = 0; /* keep compiler quiet */
2113 #endif /* HAVE_LOCALE_T */
2114  }
2115 
2116  pfree(str);
2117  }
2118 
2119  if (result == -1)
2120  {
2121  /*
2122  * Invalid multibyte character encountered. We try to give a useful
2123  * error message by letting pg_verifymbstr check the string. But it's
2124  * possible that the string is OK to us, and not OK to mbstowcs ---
2125  * this suggests that the LC_CTYPE locale is different from the
2126  * database encoding. Give a generic error message if pg_verifymbstr
2127  * can't find anything wrong.
2128  */
2129  pg_verifymbstr(from, fromlen, false); /* might not return */
2130  /* but if it does ... */
2131  ereport(ERROR,
2132  (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
2133  errmsg("invalid multibyte character for locale"),
2134  errhint("The server's LC_CTYPE locale is probably incompatible with the database encoding.")));
2135  }
2136 
2137  return result;
2138 }
static char lc_numeric_envbuf[LC_ENV_BUFSIZE]
Definition: pg_locale.c:117
void SetMessageEncoding(int encoding)
Definition: mbutils.c:1055
bool flags_valid
Definition: pg_locale.c:127
int errhint(const char *fmt,...)
Definition: elog.c:1068
char * pnstrdup(const char *in, Size len)
Definition: mcxt.c:1198
static bool CurrentLocaleConvValid
Definition: pg_locale.c:103
#define GETSTRUCT(TUP)
Definition: htup_details.h:655
Oid collid
Definition: pg_locale.c:124
#define HASH_ELEM
Definition: hsearch.h:85
bool ctype_is_c
Definition: pg_locale.c:126
static char lc_monetary_envbuf[LC_ENV_BUFSIZE]
Definition: pg_locale.c:116
#define setlocale(a, b)
Definition: win32_port.h:436
#define DEBUG3
Definition: elog.h:23
char * pstrdup(const char *in)
Definition: mcxt.c:1187
char * psprintf(const char *fmt,...)
Definition: psprintf.c:46
#define LC_ENV_BUFSIZE
Definition: pg_locale.c:108
bool check_locale(int category, const char *locale, char **canonname)
Definition: pg_locale.c:271
bool check_locale_time(char **newval, void **extra, GucSource source)
Definition: pg_locale.c:338
Size entrysize
Definition: hsearch.h:72
int errcode(int sqlerrcode)
Definition: elog.c:610
#define wcstombs_l
Definition: win32_port.h:417
void assign_locale_numeric(const char *newval, void *extra)
Definition: pg_locale.c:332
int pg_strcasecmp(const char *s1, const char *s2)
Definition: pgstrcasecmp.c:36
#define lengthof(array)
Definition: c.h:676
void * hash_search(HTAB *hashp, const void *keyPtr, HASHACTION action, bool *foundPtr)
Definition: dynahash.c:919
char * locale_numeric
Definition: pg_locale.c:87
unsigned int Oid
Definition: postgres_ext.h:31
#define OidIsValid(objectId)
Definition: c.h:652
bool collate_is_c
Definition: pg_locale.c:125
void assign_locale_monetary(const char *newval, void *extra)
Definition: pg_locale.c:320
static char lc_time_envbuf[LC_ENV_BUFSIZE]
Definition: pg_locale.c:118
GucSource
Definition: guc.h:105
char * pg_perm_setlocale(int category, const char *locale)
Definition: pg_locale.c:157
static void free_struct_lconv(struct lconv *s)
Definition: pg_locale.c:400
static struct pg_tm tm
Definition: localtime.c:102
void assign_locale_time(const char *newval, void *extra)
Definition: pg_locale.c:344
Definition: dynahash.c:218
void pfree(void *pointer)
Definition: mcxt.c:1057
#define MAX_L10N_DATA
Definition: pg_locale.c:81
#define ObjectIdGetDatum(X)
Definition: postgres.h:507
#define ERROR
Definition: elog.h:43
bool lc_collate_is_c(Oid collation)
Definition: pg_locale.c:1347
#define FATAL
Definition: elog.h:52
char * localized_abbrev_months[12+1]
Definition: pg_locale.c:99
const char * get_encoding_name_for_icu(int encoding)
Definition: encnames.c:470
char * get_namespace_name(Oid nspid)
Definition: lsyscache.c:3191
void check_strxfrm_bug(void)
Definition: pg_locale.c:1212
static char * buf
Definition: pg_test_fsync.c:68
int errdetail(const char *fmt,...)
Definition: elog.c:954
char * localized_full_days[7+1]
Definition: pg_locale.c:98
#define locale_t
Definition: win32_port.h:391
MemoryContext TopMemoryContext
Definition: mcxt.c:44
size_t char2wchar(wchar_t *to, size_t tolen, const char *from, size_t fromlen, pg_locale_t locale)
Definition: pg_locale.c:2052
static char lc_ctype_envbuf[LC_ENV_BUFSIZE]
Definition: pg_locale.c:111
char * quote_qualified_identifier(const char *qualifier, const char *ident)
Definition: ruleutils.c:10793
#define WARNING
Definition: elog.h:40
HeapTuple SearchSysCache1(int cacheId, Datum key1)
Definition: syscache.c:1116
pg_locale_t locale
Definition: pg_locale.c:128
pg_locale_t pg_newlocale_from_collation(Oid collid)
Definition: pg_locale.c:1494
#define HASH_BLOBS
Definition: hsearch.h:86
#define TextDatumGetCString(d)
Definition: builtins.h:87
bool check_locale_numeric(char **newval, void **extra, GucSource source)
Definition: pg_locale.c:326
uintptr_t Datum
Definition: postgres.h:367
HTAB * hash_create(const char *tabname, long nelem, HASHCTL *info, int flags)
Definition: dynahash.c:326
void ReleaseSysCache(HeapTuple tuple)
Definition: syscache.c:1164
int GetDatabaseEncoding(void)
Definition: mbutils.c:1151
bool deterministic
Definition: pg_locale.h:85
Datum SysCacheGetAttr(int cacheId, HeapTuple tup, AttrNumber attributeNumber, bool *isNull)
Definition: syscache.c:1377
Size keysize
Definition: hsearch.h:71
int pg_mbstrlen(const char *mbstr)
Definition: mbutils.c:921
static HTAB * collation_cache
Definition: pg_locale.c:131
int pg_get_encoding_from_locale(const char *ctype, bool write_message)
Definition: chklocale.c:452
bool pg_verifymbstr(const char *mbstr, int len, bool noError)
Definition: mbutils.c:1446
static struct @143 value
#define ereport(elevel,...)
Definition: elog.h:144
#define free(a)
Definition: header.h:65
size_t strlcpy(char *dst, const char *src, size_t siz)
Definition: strlcpy.c:45
const char * pg_encoding_to_char(int encoding)
Definition: encnames.c:588
int errmsg_internal(const char *fmt,...)
Definition: elog.c:908
#define PG_CATCH()
Definition: elog.h:305
char * localized_full_months[12+1]
Definition: pg_locale.c:100
char * locale_messages
Definition: pg_locale.c:85
#define HeapTupleIsValid(tuple)
Definition: htup.h:78
#define Assert(condition)
Definition: c.h:746
void cache_locale_time(void)
Definition: pg_locale.c:772
void assign_locale_messages(const char *newval, void *extra)
Definition: pg_locale.c:383
bool check_locale_messages(char **newval, void **extra, GucSource source)
Definition: pg_locale.c:360
static bool struct_lconv_is_valid(struct lconv *s)
Definition: pg_locale.c:429
bool check_locale_monetary(char **newval, void **extra, GucSource source)
Definition: pg_locale.c:314
pg_attribute_unused()
Definition: isn.c:63
struct lconv * PGLC_localeconv(void)
Definition: pg_locale.c:490
#define newval
static void cache_single_string(char **dst, const char *src, int encoding)
Definition: pg_locale.c:749
#define PG_RE_THROW()
Definition: elog.h:336
int32 encoding
Definition: pg_database.h:41
#define putenv(x)
Definition: win32_port.h:502
const char * name
Definition: encode.c:561
FormData_pg_collation * Form_pg_collation
Definition: pg_collation.h:51
#define mbstowcs_l
Definition: win32_port.h:418
e
Definition: preproc-init.c:82
void * palloc(Size size)
Definition: mcxt.c:950
int errmsg(const char *fmt,...)
Definition: elog.c:821
char * MemoryContextStrdup(MemoryContext context, const char *string)
Definition: mcxt.c:1174
void * MemoryContextAlloc(MemoryContext context, Size size)
Definition: mcxt.c:797
size_t wchar2char(char *to, const wchar_t *from, size_t tolen, pg_locale_t locale)
Definition: pg_locale.c:1981
#define elog(elevel,...)
Definition: elog.h:214
int i
#define NameStr(name)
Definition: c.h:623
static char * locale
Definition: initdb.c:126
char * localized_abbrev_days[7+1]
Definition: pg_locale.c:97
union pg_locale_struct::@142 info
static char format
bool lc_ctype_is_c(Oid collation)
Definition: pg_locale.c:1397
static collation_cache_entry * lookup_collation_cache(Oid collation, bool set_flags)
Definition: pg_locale.c:1282
Definition: pg_locale.c:122
static void static void status(const char *fmt,...) pg_attribute_printf(1
Definition: pg_regress.c:227
char * pg_any_to_server(const char *s, int len, int encoding)
Definition: mbutils.c:619
#define PG_TRY()
Definition: elog.h:295
static char lc_collate_envbuf[LC_ENV_BUFSIZE]
Definition: pg_locale.c:110
char * locale_monetary
Definition: pg_locale.c:86
char * asc_tolower(const char *buff, size_t nbytes)
Definition: formatting.c:2035
static void db_encoding_convert(int encoding, char **str)
Definition: pg_locale.c:460
#define snprintf
Definition: port.h:215
#define PG_END_TRY()
Definition: elog.h:320
char * locale_time
Definition: pg_locale.c:88
static bool CurrentLCTimeValid
Definition: pg_locale.c:104
char * get_collation_actual_version(char collprovider, const char *collcollate)
Definition: pg_locale.c:1671