PostgreSQL Source Code  git master
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros
pg_locale.c
Go to the documentation of this file.
1 /*-----------------------------------------------------------------------
2  *
3  * PostgreSQL locale utilities
4  *
5  * Portions Copyright (c) 2002-2017, PostgreSQL Global Development Group
6  *
7  * src/backend/utils/adt/pg_locale.c
8  *
9  *-----------------------------------------------------------------------
10  */
11 
12 /*----------
13  * Here is how the locale stuff is handled: LC_COLLATE and LC_CTYPE
14  * are fixed at CREATE DATABASE time, stored in pg_database, and cannot
15  * be changed. Thus, the effects of strcoll(), strxfrm(), isupper(),
16  * toupper(), etc. are always in the same fixed locale.
17  *
18  * LC_MESSAGES is settable at run time and will take effect
19  * immediately.
20  *
21  * The other categories, LC_MONETARY, LC_NUMERIC, and LC_TIME are also
22  * settable at run-time. However, we don't actually set those locale
23  * categories permanently. This would have bizarre effects like no
24  * longer accepting standard floating-point literals in some locales.
25  * Instead, we only set the locales briefly when needed, cache the
26  * required information obtained from localeconv(), and set them back.
27  * The cached information is only used by the formatting functions
28  * (to_char, etc.) and the money type. For the user, this should all be
29  * transparent.
30  *
31  * !!! NOW HEAR THIS !!!
32  *
33  * We've been bitten repeatedly by this bug, so let's try to keep it in
34  * mind in future: on some platforms, the locale functions return pointers
35  * to static data that will be overwritten by any later locale function.
36  * Thus, for example, the obvious-looking sequence
37  * save = setlocale(category, NULL);
38  * if (!setlocale(category, value))
39  * fail = true;
40  * setlocale(category, save);
41  * DOES NOT WORK RELIABLY: on some platforms the second setlocale() call
42  * will change the memory save is pointing at. To do this sort of thing
43  * safely, you *must* pstrdup what setlocale returns the first time.
44  *
45  * FYI, The Open Group locale standard is defined here:
46  *
47  * http://www.opengroup.org/onlinepubs/009695399/basedefs/xbd_chap07.html
48  *----------
49  */
50 
51 
52 #include "postgres.h"
53 
54 #include <time.h>
55 
56 #include "access/htup_details.h"
57 #include "catalog/pg_collation.h"
58 #include "catalog/pg_control.h"
59 #include "mb/pg_wchar.h"
60 #include "utils/hsearch.h"
61 #include "utils/memutils.h"
62 #include "utils/pg_locale.h"
63 #include "utils/syscache.h"
64 
65 #ifdef WIN32
66 /*
67  * This Windows file defines StrNCpy. We don't need it here, so we undefine
68  * it to keep the compiler quiet, and undefine it again after the file is
69  * included, so we don't accidentally use theirs.
70  */
71 #undef StrNCpy
72 #include <shlwapi.h>
73 #ifdef StrNCpy
74 #undef STrNCpy
75 #endif
76 #endif
77 
78 #define MAX_L10N_DATA 80
79 
80 
81 /* GUC settings */
86 
87 /* lc_time localization cache */
92 
93 /* indicates whether locale information cache is valid */
94 static bool CurrentLocaleConvValid = false;
95 static bool CurrentLCTimeValid = false;
96 
97 /* Environment variable storage area */
98 
99 #define LC_ENV_BUFSIZE (NAMEDATALEN + 20)
100 
103 
104 #ifdef LC_MESSAGES
105 static char lc_messages_envbuf[LC_ENV_BUFSIZE];
106 #endif
110 
111 /* Cache for collation-related knowledge */
112 
113 typedef struct
114 {
115  Oid collid; /* hash key: pg_collation OID */
116  bool collate_is_c; /* is collation's LC_COLLATE C? */
117  bool ctype_is_c; /* is collation's LC_CTYPE C? */
118  bool flags_valid; /* true if above flags are valid */
119  pg_locale_t locale; /* locale_t struct, or 0 if not valid */
121 
123 
124 
125 #if defined(WIN32) && defined(LC_MESSAGES)
126 static char *IsoLocaleName(const char *); /* MSVC specific */
127 #endif
128 
129 
130 /*
131  * pg_perm_setlocale
132  *
133  * This wraps the libc function setlocale(), with two additions. First, when
134  * changing LC_CTYPE, update gettext's encoding for the current message
135  * domain. GNU gettext automatically tracks LC_CTYPE on most platforms, but
136  * not on Windows. Second, if the operation is successful, the corresponding
137  * LC_XXX environment variable is set to match. By setting the environment
138  * variable, we ensure that any subsequent use of setlocale(..., "") will
139  * preserve the settings made through this routine. Of course, LC_ALL must
140  * also be unset to fully ensure that, but that has to be done elsewhere after
141  * all the individual LC_XXX variables have been set correctly. (Thank you
142  * Perl for making this kluge necessary.)
143  */
144 char *
145 pg_perm_setlocale(int category, const char *locale)
146 {
147  char *result;
148  const char *envvar;
149  char *envbuf;
150 
151 #ifndef WIN32
152  result = setlocale(category, locale);
153 #else
154 
155  /*
156  * On Windows, setlocale(LC_MESSAGES) does not work, so just assume that
157  * the given value is good and set it in the environment variables. We
158  * must ignore attempts to set to "", which means "keep using the old
159  * environment value".
160  */
161 #ifdef LC_MESSAGES
162  if (category == LC_MESSAGES)
163  {
164  result = (char *) locale;
165  if (locale == NULL || locale[0] == '\0')
166  return result;
167  }
168  else
169 #endif
170  result = setlocale(category, locale);
171 #endif /* WIN32 */
172 
173  if (result == NULL)
174  return result; /* fall out immediately on failure */
175 
176  /*
177  * Use the right encoding in translated messages. Under ENABLE_NLS, let
178  * pg_bind_textdomain_codeset() figure it out. Under !ENABLE_NLS, message
179  * format strings are ASCII, but database-encoding strings may enter the
180  * message via %s. This makes the overall message encoding equal to the
181  * database encoding.
182  */
183  if (category == LC_CTYPE)
184  {
185  static char save_lc_ctype[LC_ENV_BUFSIZE];
186 
187  /* copy setlocale() return value before callee invokes it again */
188  strlcpy(save_lc_ctype, result, sizeof(save_lc_ctype));
189  result = save_lc_ctype;
190 
191 #ifdef ENABLE_NLS
192  SetMessageEncoding(pg_bind_textdomain_codeset(textdomain(NULL)));
193 #else
195 #endif
196  }
197 
198  switch (category)
199  {
200  case LC_COLLATE:
201  envvar = "LC_COLLATE";
202  envbuf = lc_collate_envbuf;
203  break;
204  case LC_CTYPE:
205  envvar = "LC_CTYPE";
206  envbuf = lc_ctype_envbuf;
207  break;
208 #ifdef LC_MESSAGES
209  case LC_MESSAGES:
210  envvar = "LC_MESSAGES";
211  envbuf = lc_messages_envbuf;
212 #ifdef WIN32
213  result = IsoLocaleName(locale);
214  if (result == NULL)
215  result = (char *) locale;
216 #endif /* WIN32 */
217  break;
218 #endif /* LC_MESSAGES */
219  case LC_MONETARY:
220  envvar = "LC_MONETARY";
221  envbuf = lc_monetary_envbuf;
222  break;
223  case LC_NUMERIC:
224  envvar = "LC_NUMERIC";
225  envbuf = lc_numeric_envbuf;
226  break;
227  case LC_TIME:
228  envvar = "LC_TIME";
229  envbuf = lc_time_envbuf;
230  break;
231  default:
232  elog(FATAL, "unrecognized LC category: %d", category);
233  envvar = NULL; /* keep compiler quiet */
234  envbuf = NULL;
235  return NULL;
236  }
237 
238  snprintf(envbuf, LC_ENV_BUFSIZE - 1, "%s=%s", envvar, result);
239 
240  if (putenv(envbuf))
241  return NULL;
242 
243  return result;
244 }
245 
246 
247 /*
248  * Is the locale name valid for the locale category?
249  *
250  * If successful, and canonname isn't NULL, a palloc'd copy of the locale's
251  * canonical name is stored there. This is especially useful for figuring out
252  * what locale name "" means (ie, the server environment value). (Actually,
253  * it seems that on most implementations that's the only thing it's good for;
254  * we could wish that setlocale gave back a canonically spelled version of
255  * the locale name, but typically it doesn't.)
256  */
257 bool
258 check_locale(int category, const char *locale, char **canonname)
259 {
260  char *save;
261  char *res;
262 
263  if (canonname)
264  *canonname = NULL; /* in case of failure */
265 
266  save = setlocale(category, NULL);
267  if (!save)
268  return false; /* won't happen, we hope */
269 
270  /* save may be pointing at a modifiable scratch variable, see above. */
271  save = pstrdup(save);
272 
273  /* set the locale with setlocale, to see if it accepts it. */
274  res = setlocale(category, locale);
275 
276  /* save canonical name if requested. */
277  if (res && canonname)
278  *canonname = pstrdup(res);
279 
280  /* restore old value. */
281  if (!setlocale(category, save))
282  elog(WARNING, "failed to restore old locale \"%s\"", save);
283  pfree(save);
284 
285  return (res != NULL);
286 }
287 
288 
289 /*
290  * GUC check/assign hooks
291  *
292  * For most locale categories, the assign hook doesn't actually set the locale
293  * permanently, just reset flags so that the next use will cache the
294  * appropriate values. (See explanation at the top of this file.)
295  *
296  * Note: we accept value = "" as selecting the postmaster's environment
297  * value, whatever it was (so long as the environment setting is legal).
298  * This will have been locked down by an earlier call to pg_perm_setlocale.
299  */
300 bool
301 check_locale_monetary(char **newval, void **extra, GucSource source)
302 {
303  return check_locale(LC_MONETARY, *newval, NULL);
304 }
305 
306 void
307 assign_locale_monetary(const char *newval, void *extra)
308 {
309  CurrentLocaleConvValid = false;
310 }
311 
312 bool
313 check_locale_numeric(char **newval, void **extra, GucSource source)
314 {
315  return check_locale(LC_NUMERIC, *newval, NULL);
316 }
317 
318 void
319 assign_locale_numeric(const char *newval, void *extra)
320 {
321  CurrentLocaleConvValid = false;
322 }
323 
324 bool
325 check_locale_time(char **newval, void **extra, GucSource source)
326 {
327  return check_locale(LC_TIME, *newval, NULL);
328 }
329 
330 void
331 assign_locale_time(const char *newval, void *extra)
332 {
333  CurrentLCTimeValid = false;
334 }
335 
336 /*
337  * We allow LC_MESSAGES to actually be set globally.
338  *
339  * Note: we normally disallow value = "" because it wouldn't have consistent
340  * semantics (it'd effectively just use the previous value). However, this
341  * is the value passed for PGC_S_DEFAULT, so don't complain in that case,
342  * not even if the attempted setting fails due to invalid environment value.
343  * The idea there is just to accept the environment setting *if possible*
344  * during startup, until we can read the proper value from postgresql.conf.
345  */
346 bool
347 check_locale_messages(char **newval, void **extra, GucSource source)
348 {
349  if (**newval == '\0')
350  {
351  if (source == PGC_S_DEFAULT)
352  return true;
353  else
354  return false;
355  }
356 
357  /*
358  * LC_MESSAGES category does not exist everywhere, but accept it anyway
359  *
360  * On Windows, we can't even check the value, so accept blindly
361  */
362 #if defined(LC_MESSAGES) && !defined(WIN32)
363  return check_locale(LC_MESSAGES, *newval, NULL);
364 #else
365  return true;
366 #endif
367 }
368 
369 void
370 assign_locale_messages(const char *newval, void *extra)
371 {
372  /*
373  * LC_MESSAGES category does not exist everywhere, but accept it anyway.
374  * We ignore failure, as per comment above.
375  */
376 #ifdef LC_MESSAGES
377  (void) pg_perm_setlocale(LC_MESSAGES, newval);
378 #endif
379 }
380 
381 
382 /*
383  * Frees the malloced content of a struct lconv. (But not the struct
384  * itself.) It's important that this not throw elog(ERROR).
385  */
386 static void
387 free_struct_lconv(struct lconv * s)
388 {
389  if (s->decimal_point)
390  free(s->decimal_point);
391  if (s->thousands_sep)
392  free(s->thousands_sep);
393  if (s->grouping)
394  free(s->grouping);
395  if (s->int_curr_symbol)
396  free(s->int_curr_symbol);
397  if (s->currency_symbol)
398  free(s->currency_symbol);
399  if (s->mon_decimal_point)
400  free(s->mon_decimal_point);
401  if (s->mon_thousands_sep)
402  free(s->mon_thousands_sep);
403  if (s->mon_grouping)
404  free(s->mon_grouping);
405  if (s->positive_sign)
406  free(s->positive_sign);
407  if (s->negative_sign)
408  free(s->negative_sign);
409 }
410 
411 /*
412  * Check that all fields of a struct lconv (or at least, the ones we care
413  * about) are non-NULL. The field list must match free_struct_lconv().
414  */
415 static bool
416 struct_lconv_is_valid(struct lconv * s)
417 {
418  if (s->decimal_point == NULL)
419  return false;
420  if (s->thousands_sep == NULL)
421  return false;
422  if (s->grouping == NULL)
423  return false;
424  if (s->int_curr_symbol == NULL)
425  return false;
426  if (s->currency_symbol == NULL)
427  return false;
428  if (s->mon_decimal_point == NULL)
429  return false;
430  if (s->mon_thousands_sep == NULL)
431  return false;
432  if (s->mon_grouping == NULL)
433  return false;
434  if (s->positive_sign == NULL)
435  return false;
436  if (s->negative_sign == NULL)
437  return false;
438  return true;
439 }
440 
441 
442 /*
443  * Convert the strdup'd string at *str from the specified encoding to the
444  * database encoding.
445  */
446 static void
448 {
449  char *pstr;
450  char *mstr;
451 
452  /* convert the string to the database encoding */
453  pstr = pg_any_to_server(*str, strlen(*str), encoding);
454  if (pstr == *str)
455  return; /* no conversion happened */
456 
457  /* need it malloc'd not palloc'd */
458  mstr = strdup(pstr);
459  if (mstr == NULL)
460  ereport(ERROR,
461  (errcode(ERRCODE_OUT_OF_MEMORY),
462  errmsg("out of memory")));
463 
464  /* replace old string */
465  free(*str);
466  *str = mstr;
467 
468  pfree(pstr);
469 }
470 
471 
472 /*
473  * Return the POSIX lconv struct (contains number/money formatting
474  * information) with locale information for all categories.
475  */
476 struct lconv *
478 {
479  static struct lconv CurrentLocaleConv;
480  static bool CurrentLocaleConvAllocated = false;
481  struct lconv *extlconv;
482  struct lconv worklconv;
483  bool trouble = false;
484  char *save_lc_monetary;
485  char *save_lc_numeric;
486 #ifdef WIN32
487  char *save_lc_ctype;
488 #endif
489 
490  /* Did we do it already? */
492  return &CurrentLocaleConv;
493 
494  /* Free any already-allocated storage */
495  if (CurrentLocaleConvAllocated)
496  {
497  free_struct_lconv(&CurrentLocaleConv);
498  CurrentLocaleConvAllocated = false;
499  }
500 
501  /*
502  * This is tricky because we really don't want to risk throwing error
503  * while the locale is set to other than our usual settings. Therefore,
504  * the process is: collect the usual settings, set locale to special
505  * setting, copy relevant data into worklconv using strdup(), restore
506  * normal settings, convert data to desired encoding, and finally stash
507  * the collected data in CurrentLocaleConv. This makes it safe if we
508  * throw an error during encoding conversion or run out of memory anywhere
509  * in the process. All data pointed to by struct lconv members is
510  * allocated with strdup, to avoid premature elog(ERROR) and to allow
511  * using a single cleanup routine.
512  */
513  memset(&worklconv, 0, sizeof(worklconv));
514 
515  /* Save user's values of monetary and numeric locales */
516  save_lc_monetary = setlocale(LC_MONETARY, NULL);
517  if (save_lc_monetary)
518  save_lc_monetary = pstrdup(save_lc_monetary);
519 
520  save_lc_numeric = setlocale(LC_NUMERIC, NULL);
521  if (save_lc_numeric)
522  save_lc_numeric = pstrdup(save_lc_numeric);
523 
524 #ifdef WIN32
525 
526  /*
527  * Ideally, monetary and numeric local symbols could be returned in any
528  * server encoding. Unfortunately, the WIN32 API does not allow
529  * setlocale() to return values in a codepage/CTYPE that uses more than
530  * two bytes per character, such as UTF-8:
531  *
532  * http://msdn.microsoft.com/en-us/library/x99tb11d.aspx
533  *
534  * Evidently, LC_CTYPE allows us to control the encoding used for strings
535  * returned by localeconv(). The Open Group standard, mentioned at the
536  * top of this C file, doesn't explicitly state this.
537  *
538  * Therefore, we set LC_CTYPE to match LC_NUMERIC or LC_MONETARY (which
539  * cannot be UTF8), call localeconv(), and then convert from the
540  * numeric/monetary LC_CTYPE to the server encoding. One example use of
541  * this is for the Euro symbol.
542  *
543  * Perhaps someday we will use GetLocaleInfoW() which returns values in
544  * UTF16 and convert from that.
545  */
546 
547  /* save user's value of ctype locale */
548  save_lc_ctype = setlocale(LC_CTYPE, NULL);
549  if (save_lc_ctype)
550  save_lc_ctype = pstrdup(save_lc_ctype);
551 
552  /* Here begins the critical section where we must not throw error */
553 
554  /* use numeric to set the ctype */
555  setlocale(LC_CTYPE, locale_numeric);
556 #endif
557 
558  /* Get formatting information for numeric */
559  setlocale(LC_NUMERIC, locale_numeric);
560  extlconv = localeconv();
561 
562  /* Must copy data now in case setlocale() overwrites it */
563  worklconv.decimal_point = strdup(extlconv->decimal_point);
564  worklconv.thousands_sep = strdup(extlconv->thousands_sep);
565  worklconv.grouping = strdup(extlconv->grouping);
566 
567 #ifdef WIN32
568  /* use monetary to set the ctype */
569  setlocale(LC_CTYPE, locale_monetary);
570 #endif
571 
572  /* Get formatting information for monetary */
573  setlocale(LC_MONETARY, locale_monetary);
574  extlconv = localeconv();
575 
576  /* Must copy data now in case setlocale() overwrites it */
577  worklconv.int_curr_symbol = strdup(extlconv->int_curr_symbol);
578  worklconv.currency_symbol = strdup(extlconv->currency_symbol);
579  worklconv.mon_decimal_point = strdup(extlconv->mon_decimal_point);
580  worklconv.mon_thousands_sep = strdup(extlconv->mon_thousands_sep);
581  worklconv.mon_grouping = strdup(extlconv->mon_grouping);
582  worklconv.positive_sign = strdup(extlconv->positive_sign);
583  worklconv.negative_sign = strdup(extlconv->negative_sign);
584  /* Copy scalar fields as well */
585  worklconv.int_frac_digits = extlconv->int_frac_digits;
586  worklconv.frac_digits = extlconv->frac_digits;
587  worklconv.p_cs_precedes = extlconv->p_cs_precedes;
588  worklconv.p_sep_by_space = extlconv->p_sep_by_space;
589  worklconv.n_cs_precedes = extlconv->n_cs_precedes;
590  worklconv.n_sep_by_space = extlconv->n_sep_by_space;
591  worklconv.p_sign_posn = extlconv->p_sign_posn;
592  worklconv.n_sign_posn = extlconv->n_sign_posn;
593 
594  /* Try to restore internal settings */
595  if (save_lc_monetary)
596  {
597  if (!setlocale(LC_MONETARY, save_lc_monetary))
598  trouble = true;
599  }
600 
601  if (save_lc_numeric)
602  {
603  if (!setlocale(LC_NUMERIC, save_lc_numeric))
604  trouble = true;
605  }
606 
607 #ifdef WIN32
608  /* Try to restore internal ctype settings */
609  if (save_lc_ctype)
610  {
611  if (!setlocale(LC_CTYPE, save_lc_ctype))
612  trouble = true;
613  }
614 #endif
615 
616  /*
617  * At this point we've done our best to clean up, and can call functions
618  * that might possibly throw errors with a clean conscience. But let's
619  * make sure we don't leak any already-strdup'd fields in worklconv.
620  */
621  PG_TRY();
622  {
623  int encoding;
624 
625  /*
626  * Report it if we failed to restore anything. Perhaps this should be
627  * FATAL, rather than continuing with bad locale settings?
628  */
629  if (trouble)
630  elog(WARNING, "failed to restore old locale");
631 
632  /* Release the pstrdup'd locale names */
633  if (save_lc_monetary)
634  pfree(save_lc_monetary);
635  if (save_lc_numeric)
636  pfree(save_lc_numeric);
637 #ifdef WIN32
638  if (save_lc_ctype)
639  pfree(save_lc_ctype);
640 #endif
641 
642  /* If any of the preceding strdup calls failed, complain now. */
643  if (!struct_lconv_is_valid(&worklconv))
644  ereport(ERROR,
645  (errcode(ERRCODE_OUT_OF_MEMORY),
646  errmsg("out of memory")));
647 
648  /*
649  * Now we must perform encoding conversion from whatever's associated
650  * with the locale into the database encoding.
651  */
653 
654  db_encoding_convert(encoding, &worklconv.decimal_point);
655  db_encoding_convert(encoding, &worklconv.thousands_sep);
656  /* grouping is not text and does not require conversion */
657 
659 
660  db_encoding_convert(encoding, &worklconv.int_curr_symbol);
661  db_encoding_convert(encoding, &worklconv.currency_symbol);
662  db_encoding_convert(encoding, &worklconv.mon_decimal_point);
663  db_encoding_convert(encoding, &worklconv.mon_thousands_sep);
664  /* mon_grouping is not text and does not require conversion */
665  db_encoding_convert(encoding, &worklconv.positive_sign);
666  db_encoding_convert(encoding, &worklconv.negative_sign);
667  }
668  PG_CATCH();
669  {
670  free_struct_lconv(&worklconv);
671  PG_RE_THROW();
672  }
673  PG_END_TRY();
674 
675  /*
676  * Everything is good, so save the results.
677  */
678  CurrentLocaleConv = worklconv;
679  CurrentLocaleConvAllocated = true;
680  CurrentLocaleConvValid = true;
681  return &CurrentLocaleConv;
682 }
683 
684 #ifdef WIN32
685 /*
686  * On WIN32, strftime() returns the encoding in CP_ACP (the default
687  * operating system codpage for that computer), which is likely different
688  * from SERVER_ENCODING. This is especially important in Japanese versions
689  * of Windows which will use SJIS encoding, which we don't support as a
690  * server encoding.
691  *
692  * So, instead of using strftime(), use wcsftime() to return the value in
693  * wide characters (internally UTF16) and then convert it to the appropriate
694  * database encoding.
695  *
696  * Note that this only affects the calls to strftime() in this file, which are
697  * used to get the locale-aware strings. Other parts of the backend use
698  * pg_strftime(), which isn't locale-aware and does not need to be replaced.
699  */
700 static size_t
701 strftime_win32(char *dst, size_t dstlen,
702  const char *format, const struct tm * tm)
703 {
704  size_t len;
705  wchar_t wformat[8]; /* formats used below need 3 bytes */
706  wchar_t wbuf[MAX_L10N_DATA];
707 
708  /* get a wchar_t version of the format string */
709  len = MultiByteToWideChar(CP_UTF8, 0, format, -1,
710  wformat, lengthof(wformat));
711  if (len == 0)
712  elog(ERROR, "could not convert format string from UTF-8: error code %lu",
713  GetLastError());
714 
715  len = wcsftime(wbuf, MAX_L10N_DATA, wformat, tm);
716  if (len == 0)
717  {
718  /*
719  * strftime failed, possibly because the result would not fit in
720  * MAX_L10N_DATA. Return 0 with the contents of dst unspecified.
721  */
722  return 0;
723  }
724 
725  len = WideCharToMultiByte(CP_UTF8, 0, wbuf, len, dst, dstlen - 1,
726  NULL, NULL);
727  if (len == 0)
728  elog(ERROR, "could not convert string to UTF-8: error code %lu",
729  GetLastError());
730 
731  dst[len] = '\0';
732  if (GetDatabaseEncoding() != PG_UTF8)
733  {
734  char *convstr = pg_any_to_server(dst, len, PG_UTF8);
735 
736  if (convstr != dst)
737  {
738  strlcpy(dst, convstr, dstlen);
739  len = strlen(dst);
740  pfree(convstr);
741  }
742  }
743 
744  return len;
745 }
746 
747 /* redefine strftime() */
748 #define strftime(a,b,c,d) strftime_win32(a,b,c,d)
749 #endif /* WIN32 */
750 
751 /* Subroutine for cache_locale_time(). */
752 static void
753 cache_single_time(char **dst, const char *format, const struct tm * tm)
754 {
755  char buf[MAX_L10N_DATA];
756  char *ptr;
757 
758  /*
759  * MAX_L10N_DATA is sufficient buffer space for every known locale, and
760  * POSIX defines no strftime() errors. (Buffer space exhaustion is not an
761  * error.) An implementation might report errors (e.g. ENOMEM) by
762  * returning 0 (or, less plausibly, a negative value) and setting errno.
763  * Report errno just in case the implementation did that, but clear it in
764  * advance of the call so we don't emit a stale, unrelated errno.
765  */
766  errno = 0;
767  if (strftime(buf, MAX_L10N_DATA, format, tm) <= 0)
768  elog(ERROR, "strftime(%s) failed: %m", format);
769 
771  if (*dst)
772  pfree(*dst);
773  *dst = ptr;
774 }
775 
776 /*
777  * Update the lc_time localization cache variables if needed.
778  */
779 void
781 {
782  char *save_lc_time;
783  time_t timenow;
784  struct tm *timeinfo;
785  int i;
786 
787 #ifdef WIN32
788  char *save_lc_ctype;
789 #endif
790 
791  /* did we do this already? */
792  if (CurrentLCTimeValid)
793  return;
794 
795  elog(DEBUG3, "cache_locale_time() executed; locale: \"%s\"", locale_time);
796 
797  /* save user's value of time locale */
798  save_lc_time = setlocale(LC_TIME, NULL);
799  if (save_lc_time)
800  save_lc_time = pstrdup(save_lc_time);
801 
802 #ifdef WIN32
803 
804  /*
805  * On WIN32, there is no way to get locale-specific time values in a
806  * specified locale, like we do for monetary/numeric. We can only get
807  * CP_ACP (see strftime_win32) or UTF16. Therefore, we get UTF16 and
808  * convert it to the database locale. However, wcsftime() internally uses
809  * LC_CTYPE, so we set it here. See the WIN32 comment near the top of
810  * PGLC_localeconv().
811  */
812 
813  /* save user's value of ctype locale */
814  save_lc_ctype = setlocale(LC_CTYPE, NULL);
815  if (save_lc_ctype)
816  save_lc_ctype = pstrdup(save_lc_ctype);
817 
818  /* use lc_time to set the ctype */
819  setlocale(LC_CTYPE, locale_time);
820 #endif
821 
822  setlocale(LC_TIME, locale_time);
823 
824  timenow = time(NULL);
825  timeinfo = localtime(&timenow);
826 
827  /* localized days */
828  for (i = 0; i < 7; i++)
829  {
830  timeinfo->tm_wday = i;
831  cache_single_time(&localized_abbrev_days[i], "%a", timeinfo);
832  cache_single_time(&localized_full_days[i], "%A", timeinfo);
833  }
834 
835  /* localized months */
836  for (i = 0; i < 12; i++)
837  {
838  timeinfo->tm_mon = i;
839  timeinfo->tm_mday = 1; /* make sure we don't have invalid date */
840  cache_single_time(&localized_abbrev_months[i], "%b", timeinfo);
841  cache_single_time(&localized_full_months[i], "%B", timeinfo);
842  }
843 
844  /* try to restore internal settings */
845  if (save_lc_time)
846  {
847  if (!setlocale(LC_TIME, save_lc_time))
848  elog(WARNING, "failed to restore old locale");
849  pfree(save_lc_time);
850  }
851 
852 #ifdef WIN32
853  /* try to restore internal ctype settings */
854  if (save_lc_ctype)
855  {
856  if (!setlocale(LC_CTYPE, save_lc_ctype))
857  elog(WARNING, "failed to restore old locale");
858  pfree(save_lc_ctype);
859  }
860 #endif
861 
862  CurrentLCTimeValid = true;
863 }
864 
865 
866 #if defined(WIN32) && defined(LC_MESSAGES)
867 /*
868  * Convert a Windows setlocale() argument to a Unix-style one.
869  *
870  * Regardless of platform, we install message catalogs under a Unix-style
871  * LL[_CC][.ENCODING][@VARIANT] naming convention. Only LC_MESSAGES settings
872  * following that style will elicit localized interface strings.
873  *
874  * Before Visual Studio 2012 (msvcr110.dll), Windows setlocale() accepted "C"
875  * (but not "c") and strings of the form <Language>[_<Country>][.<CodePage>],
876  * case-insensitive. setlocale() returns the fully-qualified form; for
877  * example, setlocale("thaI") returns "Thai_Thailand.874". Internally,
878  * setlocale() and _create_locale() select a "locale identifier"[1] and store
879  * it in an undocumented _locale_t field. From that LCID, we can retrieve the
880  * ISO 639 language and the ISO 3166 country. Character encoding does not
881  * matter, because the server and client encodings govern that.
882  *
883  * Windows Vista introduced the "locale name" concept[2], closely following
884  * RFC 4646. Locale identifiers are now deprecated. Starting with Visual
885  * Studio 2012, setlocale() accepts locale names in addition to the strings it
886  * accepted historically. It does not standardize them; setlocale("Th-tH")
887  * returns "Th-tH". setlocale(category, "") still returns a traditional
888  * string. Furthermore, msvcr110.dll changed the undocumented _locale_t
889  * content to carry locale names instead of locale identifiers.
890  *
891  * MinGW headers declare _create_locale(), but msvcrt.dll lacks that symbol.
892  * IsoLocaleName() always fails in a MinGW-built postgres.exe, so only
893  * Unix-style values of the lc_messages GUC can elicit localized messages. In
894  * particular, every lc_messages setting that initdb can select automatically
895  * will yield only C-locale messages. XXX This could be fixed by running the
896  * fully-qualified locale name through a lookup table.
897  *
898  * This function returns a pointer to a static buffer bearing the converted
899  * name or NULL if conversion fails.
900  *
901  * [1] http://msdn.microsoft.com/en-us/library/windows/desktop/dd373763.aspx
902  * [2] http://msdn.microsoft.com/en-us/library/windows/desktop/dd373814.aspx
903  */
904 static char *
905 IsoLocaleName(const char *winlocname)
906 {
907 #if (_MSC_VER >= 1400) /* VC8.0 or later */
908  static char iso_lc_messages[32];
909  _locale_t loct = NULL;
910 
911  if (pg_strcasecmp("c", winlocname) == 0 ||
912  pg_strcasecmp("posix", winlocname) == 0)
913  {
914  strcpy(iso_lc_messages, "C");
915  return iso_lc_messages;
916  }
917 
918  loct = _create_locale(LC_CTYPE, winlocname);
919  if (loct != NULL)
920  {
921 #if (_MSC_VER >= 1700) /* Visual Studio 2012 or later */
922  size_t rc;
923  char *hyphen;
924 
925  /* Locale names use only ASCII, any conversion locale suffices. */
926  rc = wchar2char(iso_lc_messages, loct->locinfo->locale_name[LC_CTYPE],
927  sizeof(iso_lc_messages), NULL);
928  _free_locale(loct);
929  if (rc == -1 || rc == sizeof(iso_lc_messages))
930  return NULL;
931 
932  /*
933  * Since the message catalogs sit on a case-insensitive filesystem, we
934  * need not standardize letter case here. So long as we do not ship
935  * message catalogs for which it would matter, we also need not
936  * translate the script/variant portion, e.g. uz-Cyrl-UZ to
937  * uz_UZ@cyrillic. Simply replace the hyphen with an underscore.
938  *
939  * Note that the locale name can be less-specific than the value we
940  * would derive under earlier Visual Studio releases. For example,
941  * French_France.1252 yields just "fr". This does not affect any of
942  * the country-specific message catalogs available as of this writing
943  * (pt_BR, zh_CN, zh_TW).
944  */
945  hyphen = strchr(iso_lc_messages, '-');
946  if (hyphen)
947  *hyphen = '_';
948 #else
949  char isolang[32],
950  isocrty[32];
951  LCID lcid;
952 
953  lcid = loct->locinfo->lc_handle[LC_CTYPE];
954  if (lcid == 0)
955  lcid = MAKELCID(MAKELANGID(LANG_ENGLISH, SUBLANG_ENGLISH_US), SORT_DEFAULT);
956  _free_locale(loct);
957 
958  if (!GetLocaleInfoA(lcid, LOCALE_SISO639LANGNAME, isolang, sizeof(isolang)))
959  return NULL;
960  if (!GetLocaleInfoA(lcid, LOCALE_SISO3166CTRYNAME, isocrty, sizeof(isocrty)))
961  return NULL;
962  snprintf(iso_lc_messages, sizeof(iso_lc_messages) - 1, "%s_%s", isolang, isocrty);
963 #endif
964  return iso_lc_messages;
965  }
966  return NULL;
967 #else
968  return NULL; /* Not supported on this version of msvc/mingw */
969 #endif /* _MSC_VER >= 1400 */
970 }
971 #endif /* WIN32 && LC_MESSAGES */
972 
973 
974 /*
975  * Detect aging strxfrm() implementations that, in a subset of locales, write
976  * past the specified buffer length. Affected users must update OS packages
977  * before using PostgreSQL 9.5 or later.
978  *
979  * Assume that the bug can come and go from one postmaster startup to another
980  * due to physical replication among diverse machines. Assume that the bug's
981  * presence will not change during the life of a particular postmaster. Given
982  * those assumptions, call this no less than once per postmaster startup per
983  * LC_COLLATE setting used. No known-affected system offers strxfrm_l(), so
984  * there is no need to consider pg_collation locales.
985  */
986 void
988 {
989  char buf[32];
990  const int canary = 0x7F;
991  bool ok = true;
992 
993  /*
994  * Given a two-byte ASCII string and length limit 7, 8 or 9, Solaris 10
995  * 05/08 returns 18 and modifies 10 bytes. It respects limits above or
996  * below that range.
997  *
998  * The bug is present in Solaris 8 as well; it is absent in Solaris 10
999  * 01/13 and Solaris 11.2. Affected locales include is_IS.ISO8859-1,
1000  * en_US.UTF-8, en_US.ISO8859-1, and ru_RU.KOI8-R. Unaffected locales
1001  * include de_DE.UTF-8, de_DE.ISO8859-1, zh_TW.UTF-8, and C.
1002  */
1003  buf[7] = canary;
1004  (void) strxfrm(buf, "ab", 7);
1005  if (buf[7] != canary)
1006  ok = false;
1007 
1008  /*
1009  * illumos bug #1594 was present in the source tree from 2010-10-11 to
1010  * 2012-02-01. Given an ASCII string of any length and length limit 1,
1011  * affected systems ignore the length limit and modify a number of bytes
1012  * one less than the return value. The problem inputs for this bug do not
1013  * overlap those for the Solaris bug, hence a distinct test.
1014  *
1015  * Affected systems include smartos-20110926T021612Z. Affected locales
1016  * include en_US.ISO8859-1 and en_US.UTF-8. Unaffected locales include C.
1017  */
1018  buf[1] = canary;
1019  (void) strxfrm(buf, "a", 1);
1020  if (buf[1] != canary)
1021  ok = false;
1022 
1023  if (!ok)
1024  ereport(ERROR,
1025  (errcode(ERRCODE_SYSTEM_ERROR),
1026  errmsg_internal("strxfrm(), in locale \"%s\", writes past the specified array length",
1027  setlocale(LC_COLLATE, NULL)),
1028  errhint("Apply system library package updates.")));
1029 }
1030 
1031 
1032 /*
1033  * Cache mechanism for collation information.
1034  *
1035  * We cache two flags: whether the collation's LC_COLLATE or LC_CTYPE is C
1036  * (or POSIX), so we can optimize a few code paths in various places.
1037  * For the built-in C and POSIX collations, we can know that without even
1038  * doing a cache lookup, but we want to support aliases for C/POSIX too.
1039  * For the "default" collation, there are separate static cache variables,
1040  * since consulting the pg_collation catalog doesn't tell us what we need.
1041  *
1042  * Also, if a pg_locale_t has been requested for a collation, we cache that
1043  * for the life of a backend.
1044  *
1045  * Note that some code relies on the flags not reporting false negatives
1046  * (that is, saying it's not C when it is). For example, char2wchar()
1047  * could fail if the locale is C, so str_tolower() shouldn't call it
1048  * in that case.
1049  *
1050  * Note that we currently lack any way to flush the cache. Since we don't
1051  * support ALTER COLLATION, this is OK. The worst case is that someone
1052  * drops a collation, and a useless cache entry hangs around in existing
1053  * backends.
1054  */
1055 
1056 static collation_cache_entry *
1057 lookup_collation_cache(Oid collation, bool set_flags)
1058 {
1059  collation_cache_entry *cache_entry;
1060  bool found;
1061 
1062  Assert(OidIsValid(collation));
1063  Assert(collation != DEFAULT_COLLATION_OID);
1064 
1065  if (collation_cache == NULL)
1066  {
1067  /* First time through, initialize the hash table */
1068  HASHCTL ctl;
1069 
1070  memset(&ctl, 0, sizeof(ctl));
1071  ctl.keysize = sizeof(Oid);
1072  ctl.entrysize = sizeof(collation_cache_entry);
1073  collation_cache = hash_create("Collation cache", 100, &ctl,
1074  HASH_ELEM | HASH_BLOBS);
1075  }
1076 
1077  cache_entry = hash_search(collation_cache, &collation, HASH_ENTER, &found);
1078  if (!found)
1079  {
1080  /*
1081  * Make sure cache entry is marked invalid, in case we fail before
1082  * setting things.
1083  */
1084  cache_entry->flags_valid = false;
1085  cache_entry->locale = 0;
1086  }
1087 
1088  if (set_flags && !cache_entry->flags_valid)
1089  {
1090  /* Attempt to set the flags */
1091  HeapTuple tp;
1092  Form_pg_collation collform;
1093  const char *collcollate;
1094  const char *collctype;
1095 
1096  tp = SearchSysCache1(COLLOID, ObjectIdGetDatum(collation));
1097  if (!HeapTupleIsValid(tp))
1098  elog(ERROR, "cache lookup failed for collation %u", collation);
1099  collform = (Form_pg_collation) GETSTRUCT(tp);
1100 
1101  collcollate = NameStr(collform->collcollate);
1102  collctype = NameStr(collform->collctype);
1103 
1104  cache_entry->collate_is_c = ((strcmp(collcollate, "C") == 0) ||
1105  (strcmp(collcollate, "POSIX") == 0));
1106  cache_entry->ctype_is_c = ((strcmp(collctype, "C") == 0) ||
1107  (strcmp(collctype, "POSIX") == 0));
1108 
1109  cache_entry->flags_valid = true;
1110 
1111  ReleaseSysCache(tp);
1112  }
1113 
1114  return cache_entry;
1115 }
1116 
1117 
1118 /*
1119  * Detect whether collation's LC_COLLATE property is C
1120  */
1121 bool
1123 {
1124  /*
1125  * If we're asked about "collation 0", return false, so that the code will
1126  * go into the non-C path and report that the collation is bogus.
1127  */
1128  if (!OidIsValid(collation))
1129  return false;
1130 
1131  /*
1132  * If we're asked about the default collation, we have to inquire of the C
1133  * library. Cache the result so we only have to compute it once.
1134  */
1135  if (collation == DEFAULT_COLLATION_OID)
1136  {
1137  static int result = -1;
1138  char *localeptr;
1139 
1140  if (result >= 0)
1141  return (bool) result;
1142  localeptr = setlocale(LC_COLLATE, NULL);
1143  if (!localeptr)
1144  elog(ERROR, "invalid LC_COLLATE setting");
1145 
1146  if (strcmp(localeptr, "C") == 0)
1147  result = true;
1148  else if (strcmp(localeptr, "POSIX") == 0)
1149  result = true;
1150  else
1151  result = false;
1152  return (bool) result;
1153  }
1154 
1155  /*
1156  * If we're asked about the built-in C/POSIX collations, we know that.
1157  */
1158  if (collation == C_COLLATION_OID ||
1159  collation == POSIX_COLLATION_OID)
1160  return true;
1161 
1162  /*
1163  * Otherwise, we have to consult pg_collation, but we cache that.
1164  */
1165  return (lookup_collation_cache(collation, true))->collate_is_c;
1166 }
1167 
1168 /*
1169  * Detect whether collation's LC_CTYPE property is C
1170  */
1171 bool
1172 lc_ctype_is_c(Oid collation)
1173 {
1174  /*
1175  * If we're asked about "collation 0", return false, so that the code will
1176  * go into the non-C path and report that the collation is bogus.
1177  */
1178  if (!OidIsValid(collation))
1179  return false;
1180 
1181  /*
1182  * If we're asked about the default collation, we have to inquire of the C
1183  * library. Cache the result so we only have to compute it once.
1184  */
1185  if (collation == DEFAULT_COLLATION_OID)
1186  {
1187  static int result = -1;
1188  char *localeptr;
1189 
1190  if (result >= 0)
1191  return (bool) result;
1192  localeptr = setlocale(LC_CTYPE, NULL);
1193  if (!localeptr)
1194  elog(ERROR, "invalid LC_CTYPE setting");
1195 
1196  if (strcmp(localeptr, "C") == 0)
1197  result = true;
1198  else if (strcmp(localeptr, "POSIX") == 0)
1199  result = true;
1200  else
1201  result = false;
1202  return (bool) result;
1203  }
1204 
1205  /*
1206  * If we're asked about the built-in C/POSIX collations, we know that.
1207  */
1208  if (collation == C_COLLATION_OID ||
1209  collation == POSIX_COLLATION_OID)
1210  return true;
1211 
1212  /*
1213  * Otherwise, we have to consult pg_collation, but we cache that.
1214  */
1215  return (lookup_collation_cache(collation, true))->ctype_is_c;
1216 }
1217 
1218 
1219 /* simple subroutine for reporting errors from newlocale() */
1220 #ifdef HAVE_LOCALE_T
1221 static void
1222 report_newlocale_failure(const char *localename)
1223 {
1224  /* copy errno in case one of the ereport auxiliary functions changes it */
1225  int save_errno = errno;
1226 
1227  /*
1228  * ENOENT means "no such locale", not "no such file", so clarify that
1229  * errno with an errdetail message.
1230  */
1231  ereport(ERROR,
1232  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1233  errmsg("could not create locale \"%s\": %m",
1234  localename),
1235  (save_errno == ENOENT ?
1236  errdetail("The operating system could not find any locale data for the locale name \"%s\".",
1237  localename) : 0)));
1238 }
1239 #endif /* HAVE_LOCALE_T */
1240 
1241 
1242 /*
1243  * Create a locale_t from a collation OID. Results are cached for the
1244  * lifetime of the backend. Thus, do not free the result with freelocale().
1245  *
1246  * As a special optimization, the default/database collation returns 0.
1247  * Callers should then revert to the non-locale_t-enabled code path.
1248  * In fact, they shouldn't call this function at all when they are dealing
1249  * with the default locale. That can save quite a bit in hotspots.
1250  * Also, callers should avoid calling this before going down a C/POSIX
1251  * fastpath, because such a fastpath should work even on platforms without
1252  * locale_t support in the C library.
1253  *
1254  * For simplicity, we always generate COLLATE + CTYPE even though we
1255  * might only need one of them. Since this is called only once per session,
1256  * it shouldn't cost much.
1257  */
1260 {
1261  collation_cache_entry *cache_entry;
1262 
1263  /* Callers must pass a valid OID */
1264  Assert(OidIsValid(collid));
1265 
1266  /* Return 0 for "default" collation, just in case caller forgets */
1267  if (collid == DEFAULT_COLLATION_OID)
1268  return (pg_locale_t) 0;
1269 
1270  cache_entry = lookup_collation_cache(collid, false);
1271 
1272  if (cache_entry->locale == 0)
1273  {
1274  /* We haven't computed this yet in this session, so do it */
1275 #ifdef HAVE_LOCALE_T
1276  HeapTuple tp;
1277  Form_pg_collation collform;
1278  const char *collcollate;
1279  const char *collctype;
1280  locale_t result;
1281 
1282  tp = SearchSysCache1(COLLOID, ObjectIdGetDatum(collid));
1283  if (!HeapTupleIsValid(tp))
1284  elog(ERROR, "cache lookup failed for collation %u", collid);
1285  collform = (Form_pg_collation) GETSTRUCT(tp);
1286 
1287  collcollate = NameStr(collform->collcollate);
1288  collctype = NameStr(collform->collctype);
1289 
1290  if (strcmp(collcollate, collctype) == 0)
1291  {
1292  /* Normal case where they're the same */
1293 #ifndef WIN32
1294  result = newlocale(LC_COLLATE_MASK | LC_CTYPE_MASK, collcollate,
1295  NULL);
1296 #else
1297  result = _create_locale(LC_ALL, collcollate);
1298 #endif
1299  if (!result)
1300  report_newlocale_failure(collcollate);
1301  }
1302  else
1303  {
1304 #ifndef WIN32
1305  /* We need two newlocale() steps */
1306  locale_t loc1;
1307 
1308  loc1 = newlocale(LC_COLLATE_MASK, collcollate, NULL);
1309  if (!loc1)
1310  report_newlocale_failure(collcollate);
1311  result = newlocale(LC_CTYPE_MASK, collctype, loc1);
1312  if (!result)
1313  report_newlocale_failure(collctype);
1314 #else
1315 
1316  /*
1317  * XXX The _create_locale() API doesn't appear to support this.
1318  * Could perhaps be worked around by changing pg_locale_t to
1319  * contain two separate fields.
1320  */
1321  ereport(ERROR,
1322  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1323  errmsg("collations with different collate and ctype values are not supported on this platform")));
1324 #endif
1325  }
1326 
1327  cache_entry->locale = result;
1328 
1329  ReleaseSysCache(tp);
1330 #else /* not HAVE_LOCALE_T */
1331 
1332  /*
1333  * For platforms that don't support locale_t, we can't do anything
1334  * with non-default collations.
1335  */
1336  ereport(ERROR,
1337  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1338  errmsg("nondefault collations are not supported on this platform")));
1339 #endif /* not HAVE_LOCALE_T */
1340  }
1341 
1342  return cache_entry->locale;
1343 }
1344 
1345 
1346 /*
1347  * These functions convert from/to libc's wchar_t, *not* pg_wchar_t.
1348  * Therefore we keep them here rather than with the mbutils code.
1349  */
1350 
1351 #ifdef USE_WIDE_UPPER_LOWER
1352 
1353 /*
1354  * wchar2char --- convert wide characters to multibyte format
1355  *
1356  * This has the same API as the standard wcstombs_l() function; in particular,
1357  * tolen is the maximum number of bytes to store at *to, and *from must be
1358  * zero-terminated. The output will be zero-terminated iff there is room.
1359  */
1360 size_t
1361 wchar2char(char *to, const wchar_t *from, size_t tolen, pg_locale_t locale)
1362 {
1363  size_t result;
1364 
1365  if (tolen == 0)
1366  return 0;
1367 
1368 #ifdef WIN32
1369 
1370  /*
1371  * On Windows, the "Unicode" locales assume UTF16 not UTF8 encoding, and
1372  * for some reason mbstowcs and wcstombs won't do this for us, so we use
1373  * MultiByteToWideChar().
1374  */
1375  if (GetDatabaseEncoding() == PG_UTF8)
1376  {
1377  result = WideCharToMultiByte(CP_UTF8, 0, from, -1, to, tolen,
1378  NULL, NULL);
1379  /* A zero return is failure */
1380  if (result <= 0)
1381  result = -1;
1382  else
1383  {
1384  Assert(result <= tolen);
1385  /* Microsoft counts the zero terminator in the result */
1386  result--;
1387  }
1388  }
1389  else
1390 #endif /* WIN32 */
1391  if (locale == (pg_locale_t) 0)
1392  {
1393  /* Use wcstombs directly for the default locale */
1394  result = wcstombs(to, from, tolen);
1395  }
1396  else
1397  {
1398 #ifdef HAVE_LOCALE_T
1399 #ifdef HAVE_WCSTOMBS_L
1400  /* Use wcstombs_l for nondefault locales */
1401  result = wcstombs_l(to, from, tolen, locale);
1402 #else /* !HAVE_WCSTOMBS_L */
1403  /* We have to temporarily set the locale as current ... ugh */
1404  locale_t save_locale = uselocale(locale);
1405 
1406  result = wcstombs(to, from, tolen);
1407 
1408  uselocale(save_locale);
1409 #endif /* HAVE_WCSTOMBS_L */
1410 #else /* !HAVE_LOCALE_T */
1411  /* Can't have locale != 0 without HAVE_LOCALE_T */
1412  elog(ERROR, "wcstombs_l is not available");
1413  result = 0; /* keep compiler quiet */
1414 #endif /* HAVE_LOCALE_T */
1415  }
1416 
1417  return result;
1418 }
1419 
1420 /*
1421  * char2wchar --- convert multibyte characters to wide characters
1422  *
1423  * This has almost the API of mbstowcs_l(), except that *from need not be
1424  * null-terminated; instead, the number of input bytes is specified as
1425  * fromlen. Also, we ereport() rather than returning -1 for invalid
1426  * input encoding. tolen is the maximum number of wchar_t's to store at *to.
1427  * The output will be zero-terminated iff there is room.
1428  */
1429 size_t
1430 char2wchar(wchar_t *to, size_t tolen, const char *from, size_t fromlen,
1431  pg_locale_t locale)
1432 {
1433  size_t result;
1434 
1435  if (tolen == 0)
1436  return 0;
1437 
1438 #ifdef WIN32
1439  /* See WIN32 "Unicode" comment above */
1440  if (GetDatabaseEncoding() == PG_UTF8)
1441  {
1442  /* Win32 API does not work for zero-length input */
1443  if (fromlen == 0)
1444  result = 0;
1445  else
1446  {
1447  result = MultiByteToWideChar(CP_UTF8, 0, from, fromlen, to, tolen - 1);
1448  /* A zero return is failure */
1449  if (result == 0)
1450  result = -1;
1451  }
1452 
1453  if (result != -1)
1454  {
1455  Assert(result < tolen);
1456  /* Append trailing null wchar (MultiByteToWideChar() does not) */
1457  to[result] = 0;
1458  }
1459  }
1460  else
1461 #endif /* WIN32 */
1462  {
1463  /* mbstowcs requires ending '\0' */
1464  char *str = pnstrdup(from, fromlen);
1465 
1466  if (locale == (pg_locale_t) 0)
1467  {
1468  /* Use mbstowcs directly for the default locale */
1469  result = mbstowcs(to, str, tolen);
1470  }
1471  else
1472  {
1473 #ifdef HAVE_LOCALE_T
1474 #ifdef HAVE_MBSTOWCS_L
1475  /* Use mbstowcs_l for nondefault locales */
1476  result = mbstowcs_l(to, str, tolen, locale);
1477 #else /* !HAVE_MBSTOWCS_L */
1478  /* We have to temporarily set the locale as current ... ugh */
1479  locale_t save_locale = uselocale(locale);
1480 
1481  result = mbstowcs(to, str, tolen);
1482 
1483  uselocale(save_locale);
1484 #endif /* HAVE_MBSTOWCS_L */
1485 #else /* !HAVE_LOCALE_T */
1486  /* Can't have locale != 0 without HAVE_LOCALE_T */
1487  elog(ERROR, "mbstowcs_l is not available");
1488  result = 0; /* keep compiler quiet */
1489 #endif /* HAVE_LOCALE_T */
1490  }
1491 
1492  pfree(str);
1493  }
1494 
1495  if (result == -1)
1496  {
1497  /*
1498  * Invalid multibyte character encountered. We try to give a useful
1499  * error message by letting pg_verifymbstr check the string. But it's
1500  * possible that the string is OK to us, and not OK to mbstowcs ---
1501  * this suggests that the LC_CTYPE locale is different from the
1502  * database encoding. Give a generic error message if verifymbstr
1503  * can't find anything wrong.
1504  */
1505  pg_verifymbstr(from, fromlen, false); /* might not return */
1506  /* but if it does ... */
1507  ereport(ERROR,
1508  (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
1509  errmsg("invalid multibyte character for locale"),
1510  errhint("The server's LC_CTYPE locale is probably incompatible with the database encoding.")));
1511  }
1512 
1513  return result;
1514 }
1515 
1516 #endif /* USE_WIDE_UPPER_LOWER */
static char lc_numeric_envbuf[LC_ENV_BUFSIZE]
Definition: pg_locale.c:108
void SetMessageEncoding(int encoding)
Definition: mbutils.c:919
bool flags_valid
Definition: pg_locale.c:118
int errhint(const char *fmt,...)
Definition: elog.c:987
char * pnstrdup(const char *in, Size len)
Definition: mcxt.c:1176
static bool CurrentLocaleConvValid
Definition: pg_locale.c:94
#define GETSTRUCT(TUP)
Definition: htup_details.h:656
Oid collid
Definition: pg_locale.c:115
#define HASH_ELEM
Definition: hsearch.h:87
bool ctype_is_c
Definition: pg_locale.c:117
static char lc_monetary_envbuf[LC_ENV_BUFSIZE]
Definition: pg_locale.c:107
#define locale_t
Definition: win32.h:333
#define DEBUG3
Definition: elog.h:23
#define wcstombs_l
Definition: win32.h:359
char * pstrdup(const char *in)
Definition: mcxt.c:1165
Datum timenow(PG_FUNCTION_ARGS)
Definition: nabstime.c:997
#define LC_ENV_BUFSIZE
Definition: pg_locale.c:99
bool check_locale(int category, const char *locale, char **canonname)
Definition: pg_locale.c:258
bool check_locale_time(char **newval, void **extra, GucSource source)
Definition: pg_locale.c:325
Size entrysize
Definition: hsearch.h:73
int errcode(int sqlerrcode)
Definition: elog.c:575
void assign_locale_numeric(const char *newval, void *extra)
Definition: pg_locale.c:319
int snprintf(char *str, size_t count, const char *fmt,...) pg_attribute_printf(3
int pg_strcasecmp(const char *s1, const char *s2)
Definition: pgstrcasecmp.c:36
#define lengthof(array)
Definition: c.h:558
void * hash_search(HTAB *hashp, const void *keyPtr, HASHACTION action, bool *foundPtr)
Definition: dynahash.c:885
char * locale_numeric
Definition: pg_locale.c:84
unsigned int Oid
Definition: postgres_ext.h:31
char * localized_abbrev_months[12]
Definition: pg_locale.c:90
#define putenv(x)
Definition: win32.h:421
#define OidIsValid(objectId)
Definition: c.h:534
bool collate_is_c
Definition: pg_locale.c:116
void assign_locale_monetary(const char *newval, void *extra)
Definition: pg_locale.c:307
static char lc_time_envbuf[LC_ENV_BUFSIZE]
Definition: pg_locale.c:109
#define SearchSysCache1(cacheId, key1)
Definition: syscache.h:149
GucSource
Definition: guc.h:105
char * pg_perm_setlocale(int category, const char *locale)
Definition: pg_locale.c:145
int pg_locale_t
Definition: pg_locale.h:70
static void free_struct_lconv(struct lconv *s)
Definition: pg_locale.c:387
static struct pg_tm tm
Definition: localtime.c:103
#define mbstowcs_l
Definition: win32.h:360
void assign_locale_time(const char *newval, void *extra)
Definition: pg_locale.c:331
Definition: dynahash.c:193
void pfree(void *pointer)
Definition: mcxt.c:992
#define MAX_L10N_DATA
Definition: pg_locale.c:78
#define ObjectIdGetDatum(X)
Definition: postgres.h:515
#define ERROR
Definition: elog.h:43
bool lc_collate_is_c(Oid collation)
Definition: pg_locale.c:1122
#define FATAL
Definition: elog.h:52
void check_strxfrm_bug(void)
Definition: pg_locale.c:987
static char * buf
Definition: pg_test_fsync.c:65
#define DEFAULT_COLLATION_OID
Definition: pg_collation.h:68
int errdetail(const char *fmt,...)
Definition: elog.c:873
#define ereport(elevel, rest)
Definition: elog.h:122
MemoryContext TopMemoryContext
Definition: mcxt.c:43
static char lc_ctype_envbuf[LC_ENV_BUFSIZE]
Definition: pg_locale.c:102
#define WARNING
Definition: elog.h:40
pg_locale_t locale
Definition: pg_locale.c:119
pg_locale_t pg_newlocale_from_collation(Oid collid)
Definition: pg_locale.c:1259
#define HASH_BLOBS
Definition: hsearch.h:88
bool check_locale_numeric(char **newval, void **extra, GucSource source)
Definition: pg_locale.c:313
HTAB * hash_create(const char *tabname, long nelem, HASHCTL *info, int flags)
Definition: dynahash.c:301
void ReleaseSysCache(HeapTuple tuple)
Definition: syscache.c:1083
int GetDatabaseEncoding(void)
Definition: mbutils.c:1015
Size keysize
Definition: hsearch.h:72
static HTAB * collation_cache
Definition: pg_locale.c:122
int pg_get_encoding_from_locale(const char *ctype, bool write_message)
Definition: chklocale.c:433
char * localized_full_days[7]
Definition: pg_locale.c:89
static char * encoding
Definition: initdb.c:120
#define free(a)
Definition: header.h:60
size_t strlcpy(char *dst, const char *src, size_t siz)
Definition: strlcpy.c:45
int errmsg_internal(const char *fmt,...)
Definition: elog.c:827
#define PG_CATCH()
Definition: elog.h:293
char * locale_messages
Definition: pg_locale.c:82
#define HeapTupleIsValid(tuple)
Definition: htup.h:77
#define NULL
Definition: c.h:226
#define Assert(condition)
Definition: c.h:671
void cache_locale_time(void)
Definition: pg_locale.c:780
void assign_locale_messages(const char *newval, void *extra)
Definition: pg_locale.c:370
bool check_locale_messages(char **newval, void **extra, GucSource source)
Definition: pg_locale.c:347
static bool struct_lconv_is_valid(struct lconv *s)
Definition: pg_locale.c:416
bool check_locale_monetary(char **newval, void **extra, GucSource source)
Definition: pg_locale.c:301
struct lconv * PGLC_localeconv(void)
Definition: pg_locale.c:477
#define newval
#define PG_RE_THROW()
Definition: elog.h:314
#define C_COLLATION_OID
Definition: pg_collation.h:71
FormData_pg_collation * Form_pg_collation
Definition: pg_collation.h:47
#define POSIX_COLLATION_OID
Definition: pg_collation.h:74
int errmsg(const char *fmt,...)
Definition: elog.c:797
char * MemoryContextStrdup(MemoryContext context, const char *string)
Definition: mcxt.c:1152
int i
#define NameStr(name)
Definition: c.h:495
static char * locale
Definition: initdb.c:121
bool pg_verifymbstr(const char *mbstr, int len, bool noError)
Definition: wchar.c:1866
static char format
Definition: pg_basebackup.c:82
#define elog
Definition: elog.h:219
bool lc_ctype_is_c(Oid collation)
Definition: pg_locale.c:1172
static collation_cache_entry * lookup_collation_cache(Oid collation, bool set_flags)
Definition: pg_locale.c:1057
Definition: pg_locale.c:113
char * localized_full_months[12]
Definition: pg_locale.c:91
char * pg_any_to_server(const char *s, int len, int encoding)
Definition: mbutils.c:572
#define PG_TRY()
Definition: elog.h:284
static char lc_collate_envbuf[LC_ENV_BUFSIZE]
Definition: pg_locale.c:101
char * locale_monetary
Definition: pg_locale.c:83
char * localized_abbrev_days[7]
Definition: pg_locale.c:88
static void db_encoding_convert(int encoding, char **str)
Definition: pg_locale.c:447
#define PG_END_TRY()
Definition: elog.h:300
static void cache_single_time(char **dst, const char *format, const struct tm *tm)
Definition: pg_locale.c:753
char * locale_time
Definition: pg_locale.c:85
static bool CurrentLCTimeValid
Definition: pg_locale.c:95