PostgreSQL Source Code  git master
pg_locale.c
Go to the documentation of this file.
1 /*-----------------------------------------------------------------------
2  *
3  * PostgreSQL locale utilities
4  *
5  * Portions Copyright (c) 2002-2024, PostgreSQL Global Development Group
6  *
7  * src/backend/utils/adt/pg_locale.c
8  *
9  *-----------------------------------------------------------------------
10  */
11 
12 /*----------
13  * Here is how the locale stuff is handled: LC_COLLATE and LC_CTYPE
14  * are fixed at CREATE DATABASE time, stored in pg_database, and cannot
15  * be changed. Thus, the effects of strcoll(), strxfrm(), isupper(),
16  * toupper(), etc. are always in the same fixed locale.
17  *
18  * LC_MESSAGES is settable at run time and will take effect
19  * immediately.
20  *
21  * The other categories, LC_MONETARY, LC_NUMERIC, and LC_TIME are also
22  * settable at run-time. However, we don't actually set those locale
23  * categories permanently. This would have bizarre effects like no
24  * longer accepting standard floating-point literals in some locales.
25  * Instead, we only set these locale categories briefly when needed,
26  * cache the required information obtained from localeconv() or
27  * strftime(), and then set the locale categories back to "C".
28  * The cached information is only used by the formatting functions
29  * (to_char, etc.) and the money type. For the user, this should all be
30  * transparent.
31  *
32  * !!! NOW HEAR THIS !!!
33  *
34  * We've been bitten repeatedly by this bug, so let's try to keep it in
35  * mind in future: on some platforms, the locale functions return pointers
36  * to static data that will be overwritten by any later locale function.
37  * Thus, for example, the obvious-looking sequence
38  * save = setlocale(category, NULL);
39  * if (!setlocale(category, value))
40  * fail = true;
41  * setlocale(category, save);
42  * DOES NOT WORK RELIABLY: on some platforms the second setlocale() call
43  * will change the memory save is pointing at. To do this sort of thing
44  * safely, you *must* pstrdup what setlocale returns the first time.
45  *
46  * The POSIX locale standard is available here:
47  *
48  * http://www.opengroup.org/onlinepubs/009695399/basedefs/xbd_chap07.html
49  *----------
50  */
51 
52 
53 #include "postgres.h"
54 
55 #include <time.h>
56 
57 #include "access/htup_details.h"
58 #include "catalog/pg_collation.h"
59 #include "mb/pg_wchar.h"
60 #include "miscadmin.h"
61 #include "utils/builtins.h"
62 #include "utils/formatting.h"
63 #include "utils/guc_hooks.h"
64 #include "utils/hsearch.h"
65 #include "utils/lsyscache.h"
66 #include "utils/memutils.h"
67 #include "utils/pg_locale.h"
68 #include "utils/syscache.h"
69 
70 #ifdef USE_ICU
71 #include <unicode/ucnv.h>
72 #include <unicode/ustring.h>
73 #endif
74 
75 #ifdef __GLIBC__
76 #include <gnu/libc-version.h>
77 #endif
78 
79 #ifdef WIN32
80 #include <shlwapi.h>
81 #endif
82 
83 /* Error triggered for locale-sensitive subroutines */
84 #define PGLOCALE_SUPPORT_ERROR(provider) \
85  elog(ERROR, "unsupported collprovider for %s: %c", __func__, provider)
86 
87 /*
88  * This should be large enough that most strings will fit, but small enough
89  * that we feel comfortable putting it on the stack
90  */
91 #define TEXTBUFLEN 1024
92 
93 #define MAX_L10N_DATA 80
94 
95 
96 /* GUC settings */
101 
103 
104 /*
105  * lc_time localization cache.
106  *
107  * We use only the first 7 or 12 entries of these arrays. The last array
108  * element is left as NULL for the convenience of outside code that wants
109  * to sequentially scan these arrays.
110  */
112 char *localized_full_days[7 + 1];
114 char *localized_full_months[12 + 1];
115 
116 /* is the databases's LC_CTYPE the C locale? */
117 bool database_ctype_is_c = false;
118 
119 /* indicates whether locale information cache is valid */
120 static bool CurrentLocaleConvValid = false;
121 static bool CurrentLCTimeValid = false;
122 
123 /* Cache for collation-related knowledge */
124 
125 typedef struct
126 {
127  Oid collid; /* hash key: pg_collation OID */
128  bool collate_is_c; /* is collation's LC_COLLATE C? */
129  bool ctype_is_c; /* is collation's LC_CTYPE C? */
130  bool flags_valid; /* true if above flags are valid */
131  pg_locale_t locale; /* locale_t struct, or 0 if not valid */
133 
134 static HTAB *collation_cache = NULL;
135 
136 
137 #if defined(WIN32) && defined(LC_MESSAGES)
138 static char *IsoLocaleName(const char *);
139 #endif
140 
141 #ifdef USE_ICU
142 /*
143  * Converter object for converting between ICU's UChar strings and C strings
144  * in database encoding. Since the database encoding doesn't change, we only
145  * need one of these per session.
146  */
147 static UConverter *icu_converter = NULL;
148 
149 static UCollator *pg_ucol_open(const char *loc_str);
150 static void init_icu_converter(void);
151 static size_t uchar_length(UConverter *converter,
152  const char *str, int32_t len);
153 static int32_t uchar_convert(UConverter *converter,
154  UChar *dest, int32_t destlen,
155  const char *src, int32_t srclen);
156 static void icu_set_collation_attributes(UCollator *collator, const char *loc,
157  UErrorCode *status);
158 #endif
159 
160 /*
161  * POSIX doesn't define _l-variants of these functions, but several systems
162  * have them. We provide our own replacements here.
163  */
164 #ifndef HAVE_MBSTOWCS_L
165 static size_t
166 mbstowcs_l(wchar_t *dest, const char *src, size_t n, locale_t loc)
167 {
168 #ifdef WIN32
169  return _mbstowcs_l(dest, src, n, loc);
170 #else
171  size_t result;
172  locale_t save_locale = uselocale(loc);
173 
174  result = mbstowcs(dest, src, n);
175  uselocale(save_locale);
176  return result;
177 #endif
178 }
179 #endif
180 #ifndef HAVE_WCSTOMBS_L
181 static size_t
182 wcstombs_l(char *dest, const wchar_t *src, size_t n, locale_t loc)
183 {
184 #ifdef WIN32
185  return _wcstombs_l(dest, src, n, loc);
186 #else
187  size_t result;
188  locale_t save_locale = uselocale(loc);
189 
190  result = wcstombs(dest, src, n);
191  uselocale(save_locale);
192  return result;
193 #endif
194 }
195 #endif
196 
197 /*
198  * pg_perm_setlocale
199  *
200  * This wraps the libc function setlocale(), with two additions. First, when
201  * changing LC_CTYPE, update gettext's encoding for the current message
202  * domain. GNU gettext automatically tracks LC_CTYPE on most platforms, but
203  * not on Windows. Second, if the operation is successful, the corresponding
204  * LC_XXX environment variable is set to match. By setting the environment
205  * variable, we ensure that any subsequent use of setlocale(..., "") will
206  * preserve the settings made through this routine. Of course, LC_ALL must
207  * also be unset to fully ensure that, but that has to be done elsewhere after
208  * all the individual LC_XXX variables have been set correctly. (Thank you
209  * Perl for making this kluge necessary.)
210  */
211 char *
212 pg_perm_setlocale(int category, const char *locale)
213 {
214  char *result;
215  const char *envvar;
216 
217 #ifndef WIN32
218  result = setlocale(category, locale);
219 #else
220 
221  /*
222  * On Windows, setlocale(LC_MESSAGES) does not work, so just assume that
223  * the given value is good and set it in the environment variables. We
224  * must ignore attempts to set to "", which means "keep using the old
225  * environment value".
226  */
227 #ifdef LC_MESSAGES
228  if (category == LC_MESSAGES)
229  {
230  result = (char *) locale;
231  if (locale == NULL || locale[0] == '\0')
232  return result;
233  }
234  else
235 #endif
236  result = setlocale(category, locale);
237 #endif /* WIN32 */
238 
239  if (result == NULL)
240  return result; /* fall out immediately on failure */
241 
242  /*
243  * Use the right encoding in translated messages. Under ENABLE_NLS, let
244  * pg_bind_textdomain_codeset() figure it out. Under !ENABLE_NLS, message
245  * format strings are ASCII, but database-encoding strings may enter the
246  * message via %s. This makes the overall message encoding equal to the
247  * database encoding.
248  */
249  if (category == LC_CTYPE)
250  {
251  static char save_lc_ctype[LOCALE_NAME_BUFLEN];
252 
253  /* copy setlocale() return value before callee invokes it again */
254  strlcpy(save_lc_ctype, result, sizeof(save_lc_ctype));
255  result = save_lc_ctype;
256 
257 #ifdef ENABLE_NLS
258  SetMessageEncoding(pg_bind_textdomain_codeset(textdomain(NULL)));
259 #else
261 #endif
262  }
263 
264  switch (category)
265  {
266  case LC_COLLATE:
267  envvar = "LC_COLLATE";
268  break;
269  case LC_CTYPE:
270  envvar = "LC_CTYPE";
271  break;
272 #ifdef LC_MESSAGES
273  case LC_MESSAGES:
274  envvar = "LC_MESSAGES";
275 #ifdef WIN32
276  result = IsoLocaleName(locale);
277  if (result == NULL)
278  result = (char *) locale;
279  elog(DEBUG3, "IsoLocaleName() executed; locale: \"%s\"", result);
280 #endif /* WIN32 */
281  break;
282 #endif /* LC_MESSAGES */
283  case LC_MONETARY:
284  envvar = "LC_MONETARY";
285  break;
286  case LC_NUMERIC:
287  envvar = "LC_NUMERIC";
288  break;
289  case LC_TIME:
290  envvar = "LC_TIME";
291  break;
292  default:
293  elog(FATAL, "unrecognized LC category: %d", category);
294  return NULL; /* keep compiler quiet */
295  }
296 
297  if (setenv(envvar, result, 1) != 0)
298  return NULL;
299 
300  return result;
301 }
302 
303 
304 /*
305  * Is the locale name valid for the locale category?
306  *
307  * If successful, and canonname isn't NULL, a palloc'd copy of the locale's
308  * canonical name is stored there. This is especially useful for figuring out
309  * what locale name "" means (ie, the server environment value). (Actually,
310  * it seems that on most implementations that's the only thing it's good for;
311  * we could wish that setlocale gave back a canonically spelled version of
312  * the locale name, but typically it doesn't.)
313  */
314 bool
315 check_locale(int category, const char *locale, char **canonname)
316 {
317  char *save;
318  char *res;
319 
320  if (canonname)
321  *canonname = NULL; /* in case of failure */
322 
323  save = setlocale(category, NULL);
324  if (!save)
325  return false; /* won't happen, we hope */
326 
327  /* save may be pointing at a modifiable scratch variable, see above. */
328  save = pstrdup(save);
329 
330  /* set the locale with setlocale, to see if it accepts it. */
331  res = setlocale(category, locale);
332 
333  /* save canonical name if requested. */
334  if (res && canonname)
335  *canonname = pstrdup(res);
336 
337  /* restore old value. */
338  if (!setlocale(category, save))
339  elog(WARNING, "failed to restore old locale \"%s\"", save);
340  pfree(save);
341 
342  return (res != NULL);
343 }
344 
345 
346 /*
347  * GUC check/assign hooks
348  *
349  * For most locale categories, the assign hook doesn't actually set the locale
350  * permanently, just reset flags so that the next use will cache the
351  * appropriate values. (See explanation at the top of this file.)
352  *
353  * Note: we accept value = "" as selecting the postmaster's environment
354  * value, whatever it was (so long as the environment setting is legal).
355  * This will have been locked down by an earlier call to pg_perm_setlocale.
356  */
357 bool
359 {
360  return check_locale(LC_MONETARY, *newval, NULL);
361 }
362 
363 void
364 assign_locale_monetary(const char *newval, void *extra)
365 {
366  CurrentLocaleConvValid = false;
367 }
368 
369 bool
371 {
372  return check_locale(LC_NUMERIC, *newval, NULL);
373 }
374 
375 void
376 assign_locale_numeric(const char *newval, void *extra)
377 {
378  CurrentLocaleConvValid = false;
379 }
380 
381 bool
382 check_locale_time(char **newval, void **extra, GucSource source)
383 {
384  return check_locale(LC_TIME, *newval, NULL);
385 }
386 
387 void
388 assign_locale_time(const char *newval, void *extra)
389 {
390  CurrentLCTimeValid = false;
391 }
392 
393 /*
394  * We allow LC_MESSAGES to actually be set globally.
395  *
396  * Note: we normally disallow value = "" because it wouldn't have consistent
397  * semantics (it'd effectively just use the previous value). However, this
398  * is the value passed for PGC_S_DEFAULT, so don't complain in that case,
399  * not even if the attempted setting fails due to invalid environment value.
400  * The idea there is just to accept the environment setting *if possible*
401  * during startup, until we can read the proper value from postgresql.conf.
402  */
403 bool
405 {
406  if (**newval == '\0')
407  {
408  if (source == PGC_S_DEFAULT)
409  return true;
410  else
411  return false;
412  }
413 
414  /*
415  * LC_MESSAGES category does not exist everywhere, but accept it anyway
416  *
417  * On Windows, we can't even check the value, so accept blindly
418  */
419 #if defined(LC_MESSAGES) && !defined(WIN32)
420  return check_locale(LC_MESSAGES, *newval, NULL);
421 #else
422  return true;
423 #endif
424 }
425 
426 void
427 assign_locale_messages(const char *newval, void *extra)
428 {
429  /*
430  * LC_MESSAGES category does not exist everywhere, but accept it anyway.
431  * We ignore failure, as per comment above.
432  */
433 #ifdef LC_MESSAGES
434  (void) pg_perm_setlocale(LC_MESSAGES, newval);
435 #endif
436 }
437 
438 
439 /*
440  * Frees the malloced content of a struct lconv. (But not the struct
441  * itself.) It's important that this not throw elog(ERROR).
442  */
443 static void
444 free_struct_lconv(struct lconv *s)
445 {
446  free(s->decimal_point);
447  free(s->thousands_sep);
448  free(s->grouping);
449  free(s->int_curr_symbol);
450  free(s->currency_symbol);
451  free(s->mon_decimal_point);
452  free(s->mon_thousands_sep);
453  free(s->mon_grouping);
454  free(s->positive_sign);
455  free(s->negative_sign);
456 }
457 
458 /*
459  * Check that all fields of a struct lconv (or at least, the ones we care
460  * about) are non-NULL. The field list must match free_struct_lconv().
461  */
462 static bool
463 struct_lconv_is_valid(struct lconv *s)
464 {
465  if (s->decimal_point == NULL)
466  return false;
467  if (s->thousands_sep == NULL)
468  return false;
469  if (s->grouping == NULL)
470  return false;
471  if (s->int_curr_symbol == NULL)
472  return false;
473  if (s->currency_symbol == NULL)
474  return false;
475  if (s->mon_decimal_point == NULL)
476  return false;
477  if (s->mon_thousands_sep == NULL)
478  return false;
479  if (s->mon_grouping == NULL)
480  return false;
481  if (s->positive_sign == NULL)
482  return false;
483  if (s->negative_sign == NULL)
484  return false;
485  return true;
486 }
487 
488 
489 /*
490  * Convert the strdup'd string at *str from the specified encoding to the
491  * database encoding.
492  */
493 static void
495 {
496  char *pstr;
497  char *mstr;
498 
499  /* convert the string to the database encoding */
500  pstr = pg_any_to_server(*str, strlen(*str), encoding);
501  if (pstr == *str)
502  return; /* no conversion happened */
503 
504  /* need it malloc'd not palloc'd */
505  mstr = strdup(pstr);
506  if (mstr == NULL)
507  ereport(ERROR,
508  (errcode(ERRCODE_OUT_OF_MEMORY),
509  errmsg("out of memory")));
510 
511  /* replace old string */
512  free(*str);
513  *str = mstr;
514 
515  pfree(pstr);
516 }
517 
518 
519 /*
520  * Return the POSIX lconv struct (contains number/money formatting
521  * information) with locale information for all categories.
522  */
523 struct lconv *
525 {
526  static struct lconv CurrentLocaleConv;
527  static bool CurrentLocaleConvAllocated = false;
528  struct lconv *extlconv;
529  struct lconv worklconv;
530  char *save_lc_monetary;
531  char *save_lc_numeric;
532 #ifdef WIN32
533  char *save_lc_ctype;
534 #endif
535 
536  /* Did we do it already? */
538  return &CurrentLocaleConv;
539 
540  /* Free any already-allocated storage */
541  if (CurrentLocaleConvAllocated)
542  {
543  free_struct_lconv(&CurrentLocaleConv);
544  CurrentLocaleConvAllocated = false;
545  }
546 
547  /*
548  * This is tricky because we really don't want to risk throwing error
549  * while the locale is set to other than our usual settings. Therefore,
550  * the process is: collect the usual settings, set locale to special
551  * setting, copy relevant data into worklconv using strdup(), restore
552  * normal settings, convert data to desired encoding, and finally stash
553  * the collected data in CurrentLocaleConv. This makes it safe if we
554  * throw an error during encoding conversion or run out of memory anywhere
555  * in the process. All data pointed to by struct lconv members is
556  * allocated with strdup, to avoid premature elog(ERROR) and to allow
557  * using a single cleanup routine.
558  */
559  memset(&worklconv, 0, sizeof(worklconv));
560 
561  /* Save prevailing values of monetary and numeric locales */
562  save_lc_monetary = setlocale(LC_MONETARY, NULL);
563  if (!save_lc_monetary)
564  elog(ERROR, "setlocale(NULL) failed");
565  save_lc_monetary = pstrdup(save_lc_monetary);
566 
567  save_lc_numeric = setlocale(LC_NUMERIC, NULL);
568  if (!save_lc_numeric)
569  elog(ERROR, "setlocale(NULL) failed");
570  save_lc_numeric = pstrdup(save_lc_numeric);
571 
572 #ifdef WIN32
573 
574  /*
575  * The POSIX standard explicitly says that it is undefined what happens if
576  * LC_MONETARY or LC_NUMERIC imply an encoding (codeset) different from
577  * that implied by LC_CTYPE. In practice, all Unix-ish platforms seem to
578  * believe that localeconv() should return strings that are encoded in the
579  * codeset implied by the LC_MONETARY or LC_NUMERIC locale name. Hence,
580  * once we have successfully collected the localeconv() results, we will
581  * convert them from that codeset to the desired server encoding.
582  *
583  * Windows, of course, resolutely does things its own way; on that
584  * platform LC_CTYPE has to match LC_MONETARY/LC_NUMERIC to get sane
585  * results. Hence, we must temporarily set that category as well.
586  */
587 
588  /* Save prevailing value of ctype locale */
589  save_lc_ctype = setlocale(LC_CTYPE, NULL);
590  if (!save_lc_ctype)
591  elog(ERROR, "setlocale(NULL) failed");
592  save_lc_ctype = pstrdup(save_lc_ctype);
593 
594  /* Here begins the critical section where we must not throw error */
595 
596  /* use numeric to set the ctype */
597  setlocale(LC_CTYPE, locale_numeric);
598 #endif
599 
600  /* Get formatting information for numeric */
601  setlocale(LC_NUMERIC, locale_numeric);
602  extlconv = localeconv();
603 
604  /* Must copy data now in case setlocale() overwrites it */
605  worklconv.decimal_point = strdup(extlconv->decimal_point);
606  worklconv.thousands_sep = strdup(extlconv->thousands_sep);
607  worklconv.grouping = strdup(extlconv->grouping);
608 
609 #ifdef WIN32
610  /* use monetary to set the ctype */
611  setlocale(LC_CTYPE, locale_monetary);
612 #endif
613 
614  /* Get formatting information for monetary */
615  setlocale(LC_MONETARY, locale_monetary);
616  extlconv = localeconv();
617 
618  /* Must copy data now in case setlocale() overwrites it */
619  worklconv.int_curr_symbol = strdup(extlconv->int_curr_symbol);
620  worklconv.currency_symbol = strdup(extlconv->currency_symbol);
621  worklconv.mon_decimal_point = strdup(extlconv->mon_decimal_point);
622  worklconv.mon_thousands_sep = strdup(extlconv->mon_thousands_sep);
623  worklconv.mon_grouping = strdup(extlconv->mon_grouping);
624  worklconv.positive_sign = strdup(extlconv->positive_sign);
625  worklconv.negative_sign = strdup(extlconv->negative_sign);
626  /* Copy scalar fields as well */
627  worklconv.int_frac_digits = extlconv->int_frac_digits;
628  worklconv.frac_digits = extlconv->frac_digits;
629  worklconv.p_cs_precedes = extlconv->p_cs_precedes;
630  worklconv.p_sep_by_space = extlconv->p_sep_by_space;
631  worklconv.n_cs_precedes = extlconv->n_cs_precedes;
632  worklconv.n_sep_by_space = extlconv->n_sep_by_space;
633  worklconv.p_sign_posn = extlconv->p_sign_posn;
634  worklconv.n_sign_posn = extlconv->n_sign_posn;
635 
636  /*
637  * Restore the prevailing locale settings; failure to do so is fatal.
638  * Possibly we could limp along with nondefault LC_MONETARY or LC_NUMERIC,
639  * but proceeding with the wrong value of LC_CTYPE would certainly be bad
640  * news; and considering that the prevailing LC_MONETARY and LC_NUMERIC
641  * are almost certainly "C", there's really no reason that restoring those
642  * should fail.
643  */
644 #ifdef WIN32
645  if (!setlocale(LC_CTYPE, save_lc_ctype))
646  elog(FATAL, "failed to restore LC_CTYPE to \"%s\"", save_lc_ctype);
647 #endif
648  if (!setlocale(LC_MONETARY, save_lc_monetary))
649  elog(FATAL, "failed to restore LC_MONETARY to \"%s\"", save_lc_monetary);
650  if (!setlocale(LC_NUMERIC, save_lc_numeric))
651  elog(FATAL, "failed to restore LC_NUMERIC to \"%s\"", save_lc_numeric);
652 
653  /*
654  * At this point we've done our best to clean up, and can call functions
655  * that might possibly throw errors with a clean conscience. But let's
656  * make sure we don't leak any already-strdup'd fields in worklconv.
657  */
658  PG_TRY();
659  {
660  int encoding;
661 
662  /* Release the pstrdup'd locale names */
663  pfree(save_lc_monetary);
664  pfree(save_lc_numeric);
665 #ifdef WIN32
666  pfree(save_lc_ctype);
667 #endif
668 
669  /* If any of the preceding strdup calls failed, complain now. */
670  if (!struct_lconv_is_valid(&worklconv))
671  ereport(ERROR,
672  (errcode(ERRCODE_OUT_OF_MEMORY),
673  errmsg("out of memory")));
674 
675  /*
676  * Now we must perform encoding conversion from whatever's associated
677  * with the locales into the database encoding. If we can't identify
678  * the encoding implied by LC_NUMERIC or LC_MONETARY (ie we get -1),
679  * use PG_SQL_ASCII, which will result in just validating that the
680  * strings are OK in the database encoding.
681  */
683  if (encoding < 0)
685 
686  db_encoding_convert(encoding, &worklconv.decimal_point);
687  db_encoding_convert(encoding, &worklconv.thousands_sep);
688  /* grouping is not text and does not require conversion */
689 
691  if (encoding < 0)
693 
694  db_encoding_convert(encoding, &worklconv.int_curr_symbol);
695  db_encoding_convert(encoding, &worklconv.currency_symbol);
696  db_encoding_convert(encoding, &worklconv.mon_decimal_point);
697  db_encoding_convert(encoding, &worklconv.mon_thousands_sep);
698  /* mon_grouping is not text and does not require conversion */
699  db_encoding_convert(encoding, &worklconv.positive_sign);
700  db_encoding_convert(encoding, &worklconv.negative_sign);
701  }
702  PG_CATCH();
703  {
704  free_struct_lconv(&worklconv);
705  PG_RE_THROW();
706  }
707  PG_END_TRY();
708 
709  /*
710  * Everything is good, so save the results.
711  */
712  CurrentLocaleConv = worklconv;
713  CurrentLocaleConvAllocated = true;
714  CurrentLocaleConvValid = true;
715  return &CurrentLocaleConv;
716 }
717 
718 #ifdef WIN32
719 /*
720  * On Windows, strftime() returns its output in encoding CP_ACP (the default
721  * operating system codepage for the computer), which is likely different
722  * from SERVER_ENCODING. This is especially important in Japanese versions
723  * of Windows which will use SJIS encoding, which we don't support as a
724  * server encoding.
725  *
726  * So, instead of using strftime(), use wcsftime() to return the value in
727  * wide characters (internally UTF16) and then convert to UTF8, which we
728  * know how to handle directly.
729  *
730  * Note that this only affects the calls to strftime() in this file, which are
731  * used to get the locale-aware strings. Other parts of the backend use
732  * pg_strftime(), which isn't locale-aware and does not need to be replaced.
733  */
734 static size_t
735 strftime_win32(char *dst, size_t dstlen,
736  const char *format, const struct tm *tm)
737 {
738  size_t len;
739  wchar_t wformat[8]; /* formats used below need 3 chars */
740  wchar_t wbuf[MAX_L10N_DATA];
741 
742  /*
743  * Get a wchar_t version of the format string. We only actually use
744  * plain-ASCII formats in this file, so we can say that they're UTF8.
745  */
746  len = MultiByteToWideChar(CP_UTF8, 0, format, -1,
747  wformat, lengthof(wformat));
748  if (len == 0)
749  elog(ERROR, "could not convert format string from UTF-8: error code %lu",
750  GetLastError());
751 
752  len = wcsftime(wbuf, MAX_L10N_DATA, wformat, tm);
753  if (len == 0)
754  {
755  /*
756  * wcsftime failed, possibly because the result would not fit in
757  * MAX_L10N_DATA. Return 0 with the contents of dst unspecified.
758  */
759  return 0;
760  }
761 
762  len = WideCharToMultiByte(CP_UTF8, 0, wbuf, len, dst, dstlen - 1,
763  NULL, NULL);
764  if (len == 0)
765  elog(ERROR, "could not convert string to UTF-8: error code %lu",
766  GetLastError());
767 
768  dst[len] = '\0';
769 
770  return len;
771 }
772 
773 /* redefine strftime() */
774 #define strftime(a,b,c,d) strftime_win32(a,b,c,d)
775 #endif /* WIN32 */
776 
777 /*
778  * Subroutine for cache_locale_time().
779  * Convert the given string from encoding "encoding" to the database
780  * encoding, and store the result at *dst, replacing any previous value.
781  */
782 static void
783 cache_single_string(char **dst, const char *src, int encoding)
784 {
785  char *ptr;
786  char *olddst;
787 
788  /* Convert the string to the database encoding, or validate it's OK */
789  ptr = pg_any_to_server(src, strlen(src), encoding);
790 
791  /* Store the string in long-lived storage, replacing any previous value */
792  olddst = *dst;
794  if (olddst)
795  pfree(olddst);
796 
797  /* Might as well clean up any palloc'd conversion result, too */
798  if (ptr != src)
799  pfree(ptr);
800 }
801 
802 /*
803  * Update the lc_time localization cache variables if needed.
804  */
805 void
807 {
808  char buf[(2 * 7 + 2 * 12) * MAX_L10N_DATA];
809  char *bufptr;
810  time_t timenow;
811  struct tm *timeinfo;
812  bool strftimefail = false;
813  int encoding;
814  int i;
815  char *save_lc_time;
816 #ifdef WIN32
817  char *save_lc_ctype;
818 #endif
819 
820  /* did we do this already? */
821  if (CurrentLCTimeValid)
822  return;
823 
824  elog(DEBUG3, "cache_locale_time() executed; locale: \"%s\"", locale_time);
825 
826  /*
827  * As in PGLC_localeconv(), it's critical that we not throw error while
828  * libc's locale settings have nondefault values. Hence, we just call
829  * strftime() within the critical section, and then convert and save its
830  * results afterwards.
831  */
832 
833  /* Save prevailing value of time locale */
834  save_lc_time = setlocale(LC_TIME, NULL);
835  if (!save_lc_time)
836  elog(ERROR, "setlocale(NULL) failed");
837  save_lc_time = pstrdup(save_lc_time);
838 
839 #ifdef WIN32
840 
841  /*
842  * On Windows, it appears that wcsftime() internally uses LC_CTYPE, so we
843  * must set it here. This code looks the same as what PGLC_localeconv()
844  * does, but the underlying reason is different: this does NOT determine
845  * the encoding we'll get back from strftime_win32().
846  */
847 
848  /* Save prevailing value of ctype locale */
849  save_lc_ctype = setlocale(LC_CTYPE, NULL);
850  if (!save_lc_ctype)
851  elog(ERROR, "setlocale(NULL) failed");
852  save_lc_ctype = pstrdup(save_lc_ctype);
853 
854  /* use lc_time to set the ctype */
855  setlocale(LC_CTYPE, locale_time);
856 #endif
857 
858  setlocale(LC_TIME, locale_time);
859 
860  /* We use times close to current time as data for strftime(). */
861  timenow = time(NULL);
862  timeinfo = localtime(&timenow);
863 
864  /* Store the strftime results in MAX_L10N_DATA-sized portions of buf[] */
865  bufptr = buf;
866 
867  /*
868  * MAX_L10N_DATA is sufficient buffer space for every known locale, and
869  * POSIX defines no strftime() errors. (Buffer space exhaustion is not an
870  * error.) An implementation might report errors (e.g. ENOMEM) by
871  * returning 0 (or, less plausibly, a negative value) and setting errno.
872  * Report errno just in case the implementation did that, but clear it in
873  * advance of the calls so we don't emit a stale, unrelated errno.
874  */
875  errno = 0;
876 
877  /* localized days */
878  for (i = 0; i < 7; i++)
879  {
880  timeinfo->tm_wday = i;
881  if (strftime(bufptr, MAX_L10N_DATA, "%a", timeinfo) <= 0)
882  strftimefail = true;
883  bufptr += MAX_L10N_DATA;
884  if (strftime(bufptr, MAX_L10N_DATA, "%A", timeinfo) <= 0)
885  strftimefail = true;
886  bufptr += MAX_L10N_DATA;
887  }
888 
889  /* localized months */
890  for (i = 0; i < 12; i++)
891  {
892  timeinfo->tm_mon = i;
893  timeinfo->tm_mday = 1; /* make sure we don't have invalid date */
894  if (strftime(bufptr, MAX_L10N_DATA, "%b", timeinfo) <= 0)
895  strftimefail = true;
896  bufptr += MAX_L10N_DATA;
897  if (strftime(bufptr, MAX_L10N_DATA, "%B", timeinfo) <= 0)
898  strftimefail = true;
899  bufptr += MAX_L10N_DATA;
900  }
901 
902  /*
903  * Restore the prevailing locale settings; as in PGLC_localeconv(),
904  * failure to do so is fatal.
905  */
906 #ifdef WIN32
907  if (!setlocale(LC_CTYPE, save_lc_ctype))
908  elog(FATAL, "failed to restore LC_CTYPE to \"%s\"", save_lc_ctype);
909 #endif
910  if (!setlocale(LC_TIME, save_lc_time))
911  elog(FATAL, "failed to restore LC_TIME to \"%s\"", save_lc_time);
912 
913  /*
914  * At this point we've done our best to clean up, and can throw errors, or
915  * call functions that might throw errors, with a clean conscience.
916  */
917  if (strftimefail)
918  elog(ERROR, "strftime() failed: %m");
919 
920  /* Release the pstrdup'd locale names */
921  pfree(save_lc_time);
922 #ifdef WIN32
923  pfree(save_lc_ctype);
924 #endif
925 
926 #ifndef WIN32
927 
928  /*
929  * As in PGLC_localeconv(), we must convert strftime()'s output from the
930  * encoding implied by LC_TIME to the database encoding. If we can't
931  * identify the LC_TIME encoding, just perform encoding validation.
932  */
934  if (encoding < 0)
936 
937 #else
938 
939  /*
940  * On Windows, strftime_win32() always returns UTF8 data, so convert from
941  * that if necessary.
942  */
943  encoding = PG_UTF8;
944 
945 #endif /* WIN32 */
946 
947  bufptr = buf;
948 
949  /* localized days */
950  for (i = 0; i < 7; i++)
951  {
953  bufptr += MAX_L10N_DATA;
955  bufptr += MAX_L10N_DATA;
956  }
957  localized_abbrev_days[7] = NULL;
958  localized_full_days[7] = NULL;
959 
960  /* localized months */
961  for (i = 0; i < 12; i++)
962  {
964  bufptr += MAX_L10N_DATA;
966  bufptr += MAX_L10N_DATA;
967  }
968  localized_abbrev_months[12] = NULL;
969  localized_full_months[12] = NULL;
970 
971  CurrentLCTimeValid = true;
972 }
973 
974 
975 #if defined(WIN32) && defined(LC_MESSAGES)
976 /*
977  * Convert a Windows setlocale() argument to a Unix-style one.
978  *
979  * Regardless of platform, we install message catalogs under a Unix-style
980  * LL[_CC][.ENCODING][@VARIANT] naming convention. Only LC_MESSAGES settings
981  * following that style will elicit localized interface strings.
982  *
983  * Before Visual Studio 2012 (msvcr110.dll), Windows setlocale() accepted "C"
984  * (but not "c") and strings of the form <Language>[_<Country>][.<CodePage>],
985  * case-insensitive. setlocale() returns the fully-qualified form; for
986  * example, setlocale("thaI") returns "Thai_Thailand.874". Internally,
987  * setlocale() and _create_locale() select a "locale identifier"[1] and store
988  * it in an undocumented _locale_t field. From that LCID, we can retrieve the
989  * ISO 639 language and the ISO 3166 country. Character encoding does not
990  * matter, because the server and client encodings govern that.
991  *
992  * Windows Vista introduced the "locale name" concept[2], closely following
993  * RFC 4646. Locale identifiers are now deprecated. Starting with Visual
994  * Studio 2012, setlocale() accepts locale names in addition to the strings it
995  * accepted historically. It does not standardize them; setlocale("Th-tH")
996  * returns "Th-tH". setlocale(category, "") still returns a traditional
997  * string. Furthermore, msvcr110.dll changed the undocumented _locale_t
998  * content to carry locale names instead of locale identifiers.
999  *
1000  * Visual Studio 2015 should still be able to do the same as Visual Studio
1001  * 2012, but the declaration of locale_name is missing in _locale_t, causing
1002  * this code compilation to fail, hence this falls back instead on to
1003  * enumerating all system locales by using EnumSystemLocalesEx to find the
1004  * required locale name. If the input argument is in Unix-style then we can
1005  * get ISO Locale name directly by using GetLocaleInfoEx() with LCType as
1006  * LOCALE_SNAME.
1007  *
1008  * MinGW headers declare _create_locale(), but msvcrt.dll lacks that symbol in
1009  * releases before Windows 8. IsoLocaleName() always fails in a MinGW-built
1010  * postgres.exe, so only Unix-style values of the lc_messages GUC can elicit
1011  * localized messages. In particular, every lc_messages setting that initdb
1012  * can select automatically will yield only C-locale messages. XXX This could
1013  * be fixed by running the fully-qualified locale name through a lookup table.
1014  *
1015  * This function returns a pointer to a static buffer bearing the converted
1016  * name or NULL if conversion fails.
1017  *
1018  * [1] https://docs.microsoft.com/en-us/windows/win32/intl/locale-identifiers
1019  * [2] https://docs.microsoft.com/en-us/windows/win32/intl/locale-names
1020  */
1021 
1022 #if defined(_MSC_VER)
1023 
1024 /*
1025  * Callback function for EnumSystemLocalesEx() in get_iso_localename().
1026  *
1027  * This function enumerates all system locales, searching for one that matches
1028  * an input with the format: <Language>[_<Country>], e.g.
1029  * English[_United States]
1030  *
1031  * The input is a three wchar_t array as an LPARAM. The first element is the
1032  * locale_name we want to match, the second element is an allocated buffer
1033  * where the Unix-style locale is copied if a match is found, and the third
1034  * element is the search status, 1 if a match was found, 0 otherwise.
1035  */
1036 static BOOL CALLBACK
1037 search_locale_enum(LPWSTR pStr, DWORD dwFlags, LPARAM lparam)
1038 {
1039  wchar_t test_locale[LOCALE_NAME_MAX_LENGTH];
1040  wchar_t **argv;
1041 
1042  (void) (dwFlags);
1043 
1044  argv = (wchar_t **) lparam;
1045  *argv[2] = (wchar_t) 0;
1046 
1047  memset(test_locale, 0, sizeof(test_locale));
1048 
1049  /* Get the name of the <Language> in English */
1050  if (GetLocaleInfoEx(pStr, LOCALE_SENGLISHLANGUAGENAME,
1051  test_locale, LOCALE_NAME_MAX_LENGTH))
1052  {
1053  /*
1054  * If the enumerated locale does not have a hyphen ("en") OR the
1055  * locale_name input does not have an underscore ("English"), we only
1056  * need to compare the <Language> tags.
1057  */
1058  if (wcsrchr(pStr, '-') == NULL || wcsrchr(argv[0], '_') == NULL)
1059  {
1060  if (_wcsicmp(argv[0], test_locale) == 0)
1061  {
1062  wcscpy(argv[1], pStr);
1063  *argv[2] = (wchar_t) 1;
1064  return FALSE;
1065  }
1066  }
1067 
1068  /*
1069  * We have to compare a full <Language>_<Country> tag, so we append
1070  * the underscore and name of the country/region in English, e.g.
1071  * "English_United States".
1072  */
1073  else
1074  {
1075  size_t len;
1076 
1077  wcscat(test_locale, L"_");
1078  len = wcslen(test_locale);
1079  if (GetLocaleInfoEx(pStr, LOCALE_SENGLISHCOUNTRYNAME,
1080  test_locale + len,
1081  LOCALE_NAME_MAX_LENGTH - len))
1082  {
1083  if (_wcsicmp(argv[0], test_locale) == 0)
1084  {
1085  wcscpy(argv[1], pStr);
1086  *argv[2] = (wchar_t) 1;
1087  return FALSE;
1088  }
1089  }
1090  }
1091  }
1092 
1093  return TRUE;
1094 }
1095 
1096 /*
1097  * This function converts a Windows locale name to an ISO formatted version
1098  * for Visual Studio 2015 or greater.
1099  *
1100  * Returns NULL, if no valid conversion was found.
1101  */
1102 static char *
1103 get_iso_localename(const char *winlocname)
1104 {
1105  wchar_t wc_locale_name[LOCALE_NAME_MAX_LENGTH];
1106  wchar_t buffer[LOCALE_NAME_MAX_LENGTH];
1107  static char iso_lc_messages[LOCALE_NAME_MAX_LENGTH];
1108  char *period;
1109  int len;
1110  int ret_val;
1111 
1112  /*
1113  * Valid locales have the following syntax:
1114  * <Language>[_<Country>[.<CodePage>]]
1115  *
1116  * GetLocaleInfoEx can only take locale name without code-page and for the
1117  * purpose of this API the code-page doesn't matter.
1118  */
1119  period = strchr(winlocname, '.');
1120  if (period != NULL)
1121  len = period - winlocname;
1122  else
1123  len = pg_mbstrlen(winlocname);
1124 
1125  memset(wc_locale_name, 0, sizeof(wc_locale_name));
1126  memset(buffer, 0, sizeof(buffer));
1127  MultiByteToWideChar(CP_ACP, 0, winlocname, len, wc_locale_name,
1128  LOCALE_NAME_MAX_LENGTH);
1129 
1130  /*
1131  * If the lc_messages is already a Unix-style string, we have a direct
1132  * match with LOCALE_SNAME, e.g. en-US, en_US.
1133  */
1134  ret_val = GetLocaleInfoEx(wc_locale_name, LOCALE_SNAME, (LPWSTR) &buffer,
1135  LOCALE_NAME_MAX_LENGTH);
1136  if (!ret_val)
1137  {
1138  /*
1139  * Search for a locale in the system that matches language and country
1140  * name.
1141  */
1142  wchar_t *argv[3];
1143 
1144  argv[0] = wc_locale_name;
1145  argv[1] = buffer;
1146  argv[2] = (wchar_t *) &ret_val;
1147  EnumSystemLocalesEx(search_locale_enum, LOCALE_WINDOWS, (LPARAM) argv,
1148  NULL);
1149  }
1150 
1151  if (ret_val)
1152  {
1153  size_t rc;
1154  char *hyphen;
1155 
1156  /* Locale names use only ASCII, any conversion locale suffices. */
1157  rc = wchar2char(iso_lc_messages, buffer, sizeof(iso_lc_messages), NULL);
1158  if (rc == -1 || rc == sizeof(iso_lc_messages))
1159  return NULL;
1160 
1161  /*
1162  * Since the message catalogs sit on a case-insensitive filesystem, we
1163  * need not standardize letter case here. So long as we do not ship
1164  * message catalogs for which it would matter, we also need not
1165  * translate the script/variant portion, e.g. uz-Cyrl-UZ to
1166  * uz_UZ@cyrillic. Simply replace the hyphen with an underscore.
1167  */
1168  hyphen = strchr(iso_lc_messages, '-');
1169  if (hyphen)
1170  *hyphen = '_';
1171  return iso_lc_messages;
1172  }
1173 
1174  return NULL;
1175 }
1176 
1177 static char *
1178 IsoLocaleName(const char *winlocname)
1179 {
1180  static char iso_lc_messages[LOCALE_NAME_MAX_LENGTH];
1181 
1182  if (pg_strcasecmp("c", winlocname) == 0 ||
1183  pg_strcasecmp("posix", winlocname) == 0)
1184  {
1185  strcpy(iso_lc_messages, "C");
1186  return iso_lc_messages;
1187  }
1188  else
1189  return get_iso_localename(winlocname);
1190 }
1191 
1192 #else /* !defined(_MSC_VER) */
1193 
1194 static char *
1195 IsoLocaleName(const char *winlocname)
1196 {
1197  return NULL; /* Not supported on MinGW */
1198 }
1199 
1200 #endif /* defined(_MSC_VER) */
1201 
1202 #endif /* WIN32 && LC_MESSAGES */
1203 
1204 
1205 /*
1206  * Cache mechanism for collation information.
1207  *
1208  * We cache two flags: whether the collation's LC_COLLATE or LC_CTYPE is C
1209  * (or POSIX), so we can optimize a few code paths in various places.
1210  * For the built-in C and POSIX collations, we can know that without even
1211  * doing a cache lookup, but we want to support aliases for C/POSIX too.
1212  * For the "default" collation, there are separate static cache variables,
1213  * since consulting the pg_collation catalog doesn't tell us what we need.
1214  *
1215  * Also, if a pg_locale_t has been requested for a collation, we cache that
1216  * for the life of a backend.
1217  *
1218  * Note that some code relies on the flags not reporting false negatives
1219  * (that is, saying it's not C when it is). For example, char2wchar()
1220  * could fail if the locale is C, so str_tolower() shouldn't call it
1221  * in that case.
1222  *
1223  * Note that we currently lack any way to flush the cache. Since we don't
1224  * support ALTER COLLATION, this is OK. The worst case is that someone
1225  * drops a collation, and a useless cache entry hangs around in existing
1226  * backends.
1227  */
1228 
1229 static collation_cache_entry *
1230 lookup_collation_cache(Oid collation, bool set_flags)
1231 {
1232  collation_cache_entry *cache_entry;
1233  bool found;
1234 
1235  Assert(OidIsValid(collation));
1236  Assert(collation != DEFAULT_COLLATION_OID);
1237 
1238  if (collation_cache == NULL)
1239  {
1240  /* First time through, initialize the hash table */
1241  HASHCTL ctl;
1242 
1243  ctl.keysize = sizeof(Oid);
1244  ctl.entrysize = sizeof(collation_cache_entry);
1245  collation_cache = hash_create("Collation cache", 100, &ctl,
1246  HASH_ELEM | HASH_BLOBS);
1247  }
1248 
1249  cache_entry = hash_search(collation_cache, &collation, HASH_ENTER, &found);
1250  if (!found)
1251  {
1252  /*
1253  * Make sure cache entry is marked invalid, in case we fail before
1254  * setting things.
1255  */
1256  cache_entry->flags_valid = false;
1257  cache_entry->locale = 0;
1258  }
1259 
1260  if (set_flags && !cache_entry->flags_valid)
1261  {
1262  /* Attempt to set the flags */
1263  HeapTuple tp;
1264  Form_pg_collation collform;
1265 
1266  tp = SearchSysCache1(COLLOID, ObjectIdGetDatum(collation));
1267  if (!HeapTupleIsValid(tp))
1268  elog(ERROR, "cache lookup failed for collation %u", collation);
1269  collform = (Form_pg_collation) GETSTRUCT(tp);
1270 
1271  if (collform->collprovider == COLLPROVIDER_BUILTIN)
1272  {
1273  Datum datum;
1274  const char *colllocale;
1275 
1276  datum = SysCacheGetAttrNotNull(COLLOID, tp, Anum_pg_collation_colllocale);
1277  colllocale = TextDatumGetCString(datum);
1278 
1279  cache_entry->collate_is_c = true;
1280  cache_entry->ctype_is_c = (strcmp(colllocale, "C") == 0);
1281  }
1282  else if (collform->collprovider == COLLPROVIDER_LIBC)
1283  {
1284  Datum datum;
1285  const char *collcollate;
1286  const char *collctype;
1287 
1288  datum = SysCacheGetAttrNotNull(COLLOID, tp, Anum_pg_collation_collcollate);
1289  collcollate = TextDatumGetCString(datum);
1290  datum = SysCacheGetAttrNotNull(COLLOID, tp, Anum_pg_collation_collctype);
1291  collctype = TextDatumGetCString(datum);
1292 
1293  cache_entry->collate_is_c = ((strcmp(collcollate, "C") == 0) ||
1294  (strcmp(collcollate, "POSIX") == 0));
1295  cache_entry->ctype_is_c = ((strcmp(collctype, "C") == 0) ||
1296  (strcmp(collctype, "POSIX") == 0));
1297  }
1298  else
1299  {
1300  cache_entry->collate_is_c = false;
1301  cache_entry->ctype_is_c = false;
1302  }
1303 
1304  cache_entry->flags_valid = true;
1305 
1306  ReleaseSysCache(tp);
1307  }
1308 
1309  return cache_entry;
1310 }
1311 
1312 
1313 /*
1314  * Detect whether collation's LC_COLLATE property is C
1315  */
1316 bool
1318 {
1319  /*
1320  * If we're asked about "collation 0", return false, so that the code will
1321  * go into the non-C path and report that the collation is bogus.
1322  */
1323  if (!OidIsValid(collation))
1324  return false;
1325 
1326  /*
1327  * If we're asked about the default collation, we have to inquire of the C
1328  * library. Cache the result so we only have to compute it once.
1329  */
1330  if (collation == DEFAULT_COLLATION_OID)
1331  {
1332  static int result = -1;
1333  const char *localeptr;
1334 
1335  if (result >= 0)
1336  return (bool) result;
1337 
1338  if (default_locale.provider == COLLPROVIDER_BUILTIN)
1339  {
1340  result = true;
1341  return (bool) result;
1342  }
1343  else if (default_locale.provider == COLLPROVIDER_ICU)
1344  {
1345  result = false;
1346  return (bool) result;
1347  }
1348  else if (default_locale.provider == COLLPROVIDER_LIBC)
1349  {
1350  localeptr = setlocale(LC_CTYPE, NULL);
1351  if (!localeptr)
1352  elog(ERROR, "invalid LC_CTYPE setting");
1353  }
1354  else
1355  elog(ERROR, "unexpected collation provider '%c'",
1357 
1358  if (strcmp(localeptr, "C") == 0)
1359  result = true;
1360  else if (strcmp(localeptr, "POSIX") == 0)
1361  result = true;
1362  else
1363  result = false;
1364  return (bool) result;
1365  }
1366 
1367  /*
1368  * If we're asked about the built-in C/POSIX collations, we know that.
1369  */
1370  if (collation == C_COLLATION_OID ||
1371  collation == POSIX_COLLATION_OID)
1372  return true;
1373 
1374  /*
1375  * Otherwise, we have to consult pg_collation, but we cache that.
1376  */
1377  return (lookup_collation_cache(collation, true))->collate_is_c;
1378 }
1379 
1380 /*
1381  * Detect whether collation's LC_CTYPE property is C
1382  */
1383 bool
1384 lc_ctype_is_c(Oid collation)
1385 {
1386  /*
1387  * If we're asked about "collation 0", return false, so that the code will
1388  * go into the non-C path and report that the collation is bogus.
1389  */
1390  if (!OidIsValid(collation))
1391  return false;
1392 
1393  /*
1394  * If we're asked about the default collation, we have to inquire of the C
1395  * library. Cache the result so we only have to compute it once.
1396  */
1397  if (collation == DEFAULT_COLLATION_OID)
1398  {
1399  static int result = -1;
1400  const char *localeptr;
1401 
1402  if (result >= 0)
1403  return (bool) result;
1404 
1405  if (default_locale.provider == COLLPROVIDER_BUILTIN)
1406  {
1407  localeptr = default_locale.info.builtin.locale;
1408  }
1409  else if (default_locale.provider == COLLPROVIDER_ICU)
1410  {
1411  result = false;
1412  return (bool) result;
1413  }
1414  else if (default_locale.provider == COLLPROVIDER_LIBC)
1415  {
1416  localeptr = setlocale(LC_CTYPE, NULL);
1417  if (!localeptr)
1418  elog(ERROR, "invalid LC_CTYPE setting");
1419  }
1420  else
1421  elog(ERROR, "unexpected collation provider '%c'",
1423 
1424  if (strcmp(localeptr, "C") == 0)
1425  result = true;
1426  else if (strcmp(localeptr, "POSIX") == 0)
1427  result = true;
1428  else
1429  result = false;
1430  return (bool) result;
1431  }
1432 
1433  /*
1434  * If we're asked about the built-in C/POSIX collations, we know that.
1435  */
1436  if (collation == C_COLLATION_OID ||
1437  collation == POSIX_COLLATION_OID)
1438  return true;
1439 
1440  /*
1441  * Otherwise, we have to consult pg_collation, but we cache that.
1442  */
1443  return (lookup_collation_cache(collation, true))->ctype_is_c;
1444 }
1445 
1447 
1448 void
1449 make_icu_collator(const char *iculocstr,
1450  const char *icurules,
1451  struct pg_locale_struct *resultp)
1452 {
1453 #ifdef USE_ICU
1454  UCollator *collator;
1455 
1456  collator = pg_ucol_open(iculocstr);
1457 
1458  /*
1459  * If rules are specified, we extract the rules of the standard collation,
1460  * add our own rules, and make a new collator with the combined rules.
1461  */
1462  if (icurules)
1463  {
1464  const UChar *default_rules;
1465  UChar *agg_rules;
1466  UChar *my_rules;
1467  UErrorCode status;
1468  int32_t length;
1469 
1470  default_rules = ucol_getRules(collator, &length);
1471  icu_to_uchar(&my_rules, icurules, strlen(icurules));
1472 
1473  agg_rules = palloc_array(UChar, u_strlen(default_rules) + u_strlen(my_rules) + 1);
1474  u_strcpy(agg_rules, default_rules);
1475  u_strcat(agg_rules, my_rules);
1476 
1477  ucol_close(collator);
1478 
1479  status = U_ZERO_ERROR;
1480  collator = ucol_openRules(agg_rules, u_strlen(agg_rules),
1481  UCOL_DEFAULT, UCOL_DEFAULT_STRENGTH, NULL, &status);
1482  if (U_FAILURE(status))
1483  ereport(ERROR,
1484  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1485  errmsg("could not open collator for locale \"%s\" with rules \"%s\": %s",
1486  iculocstr, icurules, u_errorName(status))));
1487  }
1488 
1489  /* We will leak this string if the caller errors later :-( */
1490  resultp->info.icu.locale = MemoryContextStrdup(TopMemoryContext, iculocstr);
1491  resultp->info.icu.ucol = collator;
1492 #else /* not USE_ICU */
1493  /* could get here if a collation was created by a build with ICU */
1494  ereport(ERROR,
1495  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1496  errmsg("ICU is not supported in this build")));
1497 #endif /* not USE_ICU */
1498 }
1499 
1500 
1501 /* simple subroutine for reporting errors from newlocale() */
1502 static void
1503 report_newlocale_failure(const char *localename)
1504 {
1505  int save_errno;
1506 
1507  /*
1508  * Windows doesn't provide any useful error indication from
1509  * _create_locale(), and BSD-derived platforms don't seem to feel they
1510  * need to set errno either (even though POSIX is pretty clear that
1511  * newlocale should do so). So, if errno hasn't been set, assume ENOENT
1512  * is what to report.
1513  */
1514  if (errno == 0)
1515  errno = ENOENT;
1516 
1517  /*
1518  * ENOENT means "no such locale", not "no such file", so clarify that
1519  * errno with an errdetail message.
1520  */
1521  save_errno = errno; /* auxiliary funcs might change errno */
1522  ereport(ERROR,
1523  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1524  errmsg("could not create locale \"%s\": %m",
1525  localename),
1526  (save_errno == ENOENT ?
1527  errdetail("The operating system could not find any locale data for the locale name \"%s\".",
1528  localename) : 0)));
1529 }
1530 
1531 bool
1533 {
1534  /* default locale must always be deterministic */
1535  if (locale == NULL)
1536  return true;
1537  else
1538  return locale->deterministic;
1539 }
1540 
1541 /*
1542  * Create a locale_t from a collation OID. Results are cached for the
1543  * lifetime of the backend. Thus, do not free the result with freelocale().
1544  *
1545  * As a special optimization, the default/database collation returns 0.
1546  *
1547  * For simplicity, we always generate COLLATE + CTYPE even though we
1548  * might only need one of them. Since this is called only once per session,
1549  * it shouldn't cost much.
1550  */
1553 {
1554  collation_cache_entry *cache_entry;
1555 
1556  /* Callers must pass a valid OID */
1558 
1559  if (collid == DEFAULT_COLLATION_OID)
1560  {
1561  if (default_locale.provider == COLLPROVIDER_LIBC)
1562  return (pg_locale_t) 0;
1563  else
1564  return &default_locale;
1565  }
1566 
1567  cache_entry = lookup_collation_cache(collid, false);
1568 
1569  if (cache_entry->locale == 0)
1570  {
1571  /* We haven't computed this yet in this session, so do it */
1572  HeapTuple tp;
1573  Form_pg_collation collform;
1574  struct pg_locale_struct result;
1575  pg_locale_t resultp;
1576  Datum datum;
1577  bool isnull;
1578 
1579  tp = SearchSysCache1(COLLOID, ObjectIdGetDatum(collid));
1580  if (!HeapTupleIsValid(tp))
1581  elog(ERROR, "cache lookup failed for collation %u", collid);
1582  collform = (Form_pg_collation) GETSTRUCT(tp);
1583 
1584  /* We'll fill in the result struct locally before allocating memory */
1585  memset(&result, 0, sizeof(result));
1586  result.provider = collform->collprovider;
1587  result.deterministic = collform->collisdeterministic;
1588 
1589  if (collform->collprovider == COLLPROVIDER_BUILTIN)
1590  {
1591  const char *locstr;
1592 
1593  datum = SysCacheGetAttrNotNull(COLLOID, tp, Anum_pg_collation_colllocale);
1594  locstr = TextDatumGetCString(datum);
1595 
1597 
1599  locstr);
1600  }
1601  else if (collform->collprovider == COLLPROVIDER_LIBC)
1602  {
1603  const char *collcollate;
1604  const char *collctype pg_attribute_unused();
1605  locale_t loc;
1606 
1607  datum = SysCacheGetAttrNotNull(COLLOID, tp, Anum_pg_collation_collcollate);
1608  collcollate = TextDatumGetCString(datum);
1609  datum = SysCacheGetAttrNotNull(COLLOID, tp, Anum_pg_collation_collctype);
1610  collctype = TextDatumGetCString(datum);
1611 
1612  if (strcmp(collcollate, collctype) == 0)
1613  {
1614  /* Normal case where they're the same */
1615  errno = 0;
1616 #ifndef WIN32
1617  loc = newlocale(LC_COLLATE_MASK | LC_CTYPE_MASK, collcollate,
1618  NULL);
1619 #else
1620  loc = _create_locale(LC_ALL, collcollate);
1621 #endif
1622  if (!loc)
1623  report_newlocale_failure(collcollate);
1624  }
1625  else
1626  {
1627 #ifndef WIN32
1628  /* We need two newlocale() steps */
1629  locale_t loc1;
1630 
1631  errno = 0;
1632  loc1 = newlocale(LC_COLLATE_MASK, collcollate, NULL);
1633  if (!loc1)
1634  report_newlocale_failure(collcollate);
1635  errno = 0;
1636  loc = newlocale(LC_CTYPE_MASK, collctype, loc1);
1637  if (!loc)
1638  report_newlocale_failure(collctype);
1639 #else
1640 
1641  /*
1642  * XXX The _create_locale() API doesn't appear to support
1643  * this. Could perhaps be worked around by changing
1644  * pg_locale_t to contain two separate fields.
1645  */
1646  ereport(ERROR,
1647  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1648  errmsg("collations with different collate and ctype values are not supported on this platform")));
1649 #endif
1650  }
1651 
1652  result.info.lt = loc;
1653  }
1654  else if (collform->collprovider == COLLPROVIDER_ICU)
1655  {
1656  const char *iculocstr;
1657  const char *icurules;
1658 
1659  datum = SysCacheGetAttrNotNull(COLLOID, tp, Anum_pg_collation_colllocale);
1660  iculocstr = TextDatumGetCString(datum);
1661 
1662  datum = SysCacheGetAttr(COLLOID, tp, Anum_pg_collation_collicurules, &isnull);
1663  if (!isnull)
1664  icurules = TextDatumGetCString(datum);
1665  else
1666  icurules = NULL;
1667 
1668  make_icu_collator(iculocstr, icurules, &result);
1669  }
1670 
1671  datum = SysCacheGetAttr(COLLOID, tp, Anum_pg_collation_collversion,
1672  &isnull);
1673  if (!isnull)
1674  {
1675  char *actual_versionstr;
1676  char *collversionstr;
1677 
1678  collversionstr = TextDatumGetCString(datum);
1679 
1680  if (collform->collprovider == COLLPROVIDER_LIBC)
1681  datum = SysCacheGetAttrNotNull(COLLOID, tp, Anum_pg_collation_collcollate);
1682  else
1683  datum = SysCacheGetAttrNotNull(COLLOID, tp, Anum_pg_collation_colllocale);
1684 
1685  actual_versionstr = get_collation_actual_version(collform->collprovider,
1686  TextDatumGetCString(datum));
1687  if (!actual_versionstr)
1688  {
1689  /*
1690  * This could happen when specifying a version in CREATE
1691  * COLLATION but the provider does not support versioning, or
1692  * manually creating a mess in the catalogs.
1693  */
1694  ereport(ERROR,
1695  (errmsg("collation \"%s\" has no actual version, but a version was recorded",
1696  NameStr(collform->collname))));
1697  }
1698 
1699  if (strcmp(actual_versionstr, collversionstr) != 0)
1700  ereport(WARNING,
1701  (errmsg("collation \"%s\" has version mismatch",
1702  NameStr(collform->collname)),
1703  errdetail("The collation in the database was created using version %s, "
1704  "but the operating system provides version %s.",
1705  collversionstr, actual_versionstr),
1706  errhint("Rebuild all objects affected by this collation and run "
1707  "ALTER COLLATION %s REFRESH VERSION, "
1708  "or build PostgreSQL with the right library version.",
1709  quote_qualified_identifier(get_namespace_name(collform->collnamespace),
1710  NameStr(collform->collname)))));
1711  }
1712 
1713  ReleaseSysCache(tp);
1714 
1715  /* We'll keep the pg_locale_t structures in TopMemoryContext */
1716  resultp = MemoryContextAlloc(TopMemoryContext, sizeof(*resultp));
1717  *resultp = result;
1718 
1719  cache_entry->locale = resultp;
1720  }
1721 
1722  return cache_entry->locale;
1723 }
1724 
1725 /*
1726  * Get provider-specific collation version string for the given collation from
1727  * the operating system/library.
1728  */
1729 char *
1730 get_collation_actual_version(char collprovider, const char *collcollate)
1731 {
1732  char *collversion = NULL;
1733 
1734  /*
1735  * The only two supported locales (C and C.UTF-8) are both based on memcmp
1736  * and are not expected to change, but track the version anyway.
1737  *
1738  * Note that the character semantics may change for some locales, but the
1739  * collation version only tracks changes to sort order.
1740  */
1741  if (collprovider == COLLPROVIDER_BUILTIN)
1742  {
1743  if (strcmp(collcollate, "C") == 0)
1744  return "1";
1745  else if (strcmp(collcollate, "C.UTF-8") == 0)
1746  return "1";
1747  else
1748  ereport(ERROR,
1749  (errcode(ERRCODE_WRONG_OBJECT_TYPE),
1750  errmsg("invalid locale name \"%s\" for builtin provider",
1751  collcollate)));
1752  }
1753 
1754 #ifdef USE_ICU
1755  if (collprovider == COLLPROVIDER_ICU)
1756  {
1757  UCollator *collator;
1758  UVersionInfo versioninfo;
1759  char buf[U_MAX_VERSION_STRING_LENGTH];
1760 
1761  collator = pg_ucol_open(collcollate);
1762 
1763  ucol_getVersion(collator, versioninfo);
1764  ucol_close(collator);
1765 
1766  u_versionToString(versioninfo, buf);
1767  collversion = pstrdup(buf);
1768  }
1769  else
1770 #endif
1771  if (collprovider == COLLPROVIDER_LIBC &&
1772  pg_strcasecmp("C", collcollate) != 0 &&
1773  pg_strncasecmp("C.", collcollate, 2) != 0 &&
1774  pg_strcasecmp("POSIX", collcollate) != 0)
1775  {
1776 #if defined(__GLIBC__)
1777  /* Use the glibc version because we don't have anything better. */
1778  collversion = pstrdup(gnu_get_libc_version());
1779 #elif defined(LC_VERSION_MASK)
1780  locale_t loc;
1781 
1782  /* Look up FreeBSD collation version. */
1783  loc = newlocale(LC_COLLATE, collcollate, NULL);
1784  if (loc)
1785  {
1786  collversion =
1787  pstrdup(querylocale(LC_COLLATE_MASK | LC_VERSION_MASK, loc));
1788  freelocale(loc);
1789  }
1790  else
1791  ereport(ERROR,
1792  (errmsg("could not load locale \"%s\"", collcollate)));
1793 #elif defined(WIN32)
1794  /*
1795  * If we are targeting Windows Vista and above, we can ask for a name
1796  * given a collation name (earlier versions required a location code
1797  * that we don't have).
1798  */
1799  NLSVERSIONINFOEX version = {sizeof(NLSVERSIONINFOEX)};
1800  WCHAR wide_collcollate[LOCALE_NAME_MAX_LENGTH];
1801 
1802  MultiByteToWideChar(CP_ACP, 0, collcollate, -1, wide_collcollate,
1803  LOCALE_NAME_MAX_LENGTH);
1804  if (!GetNLSVersionEx(COMPARE_STRING, wide_collcollate, &version))
1805  {
1806  /*
1807  * GetNLSVersionEx() wants a language tag such as "en-US", not a
1808  * locale name like "English_United States.1252". Until those
1809  * values can be prevented from entering the system, or 100%
1810  * reliably converted to the more useful tag format, tolerate the
1811  * resulting error and report that we have no version data.
1812  */
1813  if (GetLastError() == ERROR_INVALID_PARAMETER)
1814  return NULL;
1815 
1816  ereport(ERROR,
1817  (errmsg("could not get collation version for locale \"%s\": error code %lu",
1818  collcollate,
1819  GetLastError())));
1820  }
1821  collversion = psprintf("%lu.%lu,%lu.%lu",
1822  (version.dwNLSVersion >> 8) & 0xFFFF,
1823  version.dwNLSVersion & 0xFF,
1824  (version.dwDefinedVersion >> 8) & 0xFFFF,
1825  version.dwDefinedVersion & 0xFF);
1826 #endif
1827  }
1828 
1829  return collversion;
1830 }
1831 
1832 /*
1833  * pg_strncoll_libc_win32_utf8
1834  *
1835  * Win32 does not have UTF-8. Convert UTF8 arguments to wide characters and
1836  * invoke wcscoll() or wcscoll_l().
1837  */
1838 #ifdef WIN32
1839 static int
1840 pg_strncoll_libc_win32_utf8(const char *arg1, size_t len1, const char *arg2,
1841  size_t len2, pg_locale_t locale)
1842 {
1843  char sbuf[TEXTBUFLEN];
1844  char *buf = sbuf;
1845  char *a1p,
1846  *a2p;
1847  int a1len = len1 * 2 + 2;
1848  int a2len = len2 * 2 + 2;
1849  int r;
1850  int result;
1851 
1852  Assert(!locale || locale->provider == COLLPROVIDER_LIBC);
1854 #ifndef WIN32
1855  Assert(false);
1856 #endif
1857 
1858  if (a1len + a2len > TEXTBUFLEN)
1859  buf = palloc(a1len + a2len);
1860 
1861  a1p = buf;
1862  a2p = buf + a1len;
1863 
1864  /* API does not work for zero-length input */
1865  if (len1 == 0)
1866  r = 0;
1867  else
1868  {
1869  r = MultiByteToWideChar(CP_UTF8, 0, arg1, len1,
1870  (LPWSTR) a1p, a1len / 2);
1871  if (!r)
1872  ereport(ERROR,
1873  (errmsg("could not convert string to UTF-16: error code %lu",
1874  GetLastError())));
1875  }
1876  ((LPWSTR) a1p)[r] = 0;
1877 
1878  if (len2 == 0)
1879  r = 0;
1880  else
1881  {
1882  r = MultiByteToWideChar(CP_UTF8, 0, arg2, len2,
1883  (LPWSTR) a2p, a2len / 2);
1884  if (!r)
1885  ereport(ERROR,
1886  (errmsg("could not convert string to UTF-16: error code %lu",
1887  GetLastError())));
1888  }
1889  ((LPWSTR) a2p)[r] = 0;
1890 
1891  errno = 0;
1892  if (locale)
1893  result = wcscoll_l((LPWSTR) a1p, (LPWSTR) a2p, locale->info.lt);
1894  else
1895  result = wcscoll((LPWSTR) a1p, (LPWSTR) a2p);
1896  if (result == 2147483647) /* _NLSCMPERROR; missing from mingw headers */
1897  ereport(ERROR,
1898  (errmsg("could not compare Unicode strings: %m")));
1899 
1900  if (buf != sbuf)
1901  pfree(buf);
1902 
1903  return result;
1904 }
1905 #endif /* WIN32 */
1906 
1907 /*
1908  * pg_strcoll_libc
1909  *
1910  * Call strcoll(), strcoll_l(), wcscoll(), or wcscoll_l() as appropriate for
1911  * the given locale, platform, and database encoding. If the locale is NULL,
1912  * use the database collation.
1913  *
1914  * Arguments must be encoded in the database encoding and nul-terminated.
1915  */
1916 static int
1917 pg_strcoll_libc(const char *arg1, const char *arg2, pg_locale_t locale)
1918 {
1919  int result;
1920 
1921  Assert(!locale || locale->provider == COLLPROVIDER_LIBC);
1922 #ifdef WIN32
1923  if (GetDatabaseEncoding() == PG_UTF8)
1924  {
1925  size_t len1 = strlen(arg1);
1926  size_t len2 = strlen(arg2);
1927 
1928  result = pg_strncoll_libc_win32_utf8(arg1, len1, arg2, len2, locale);
1929  }
1930  else
1931 #endif /* WIN32 */
1932  if (locale)
1933  result = strcoll_l(arg1, arg2, locale->info.lt);
1934  else
1935  result = strcoll(arg1, arg2);
1936 
1937  return result;
1938 }
1939 
1940 /*
1941  * pg_strncoll_libc
1942  *
1943  * Nul-terminate the arguments and call pg_strcoll_libc().
1944  */
1945 static int
1946 pg_strncoll_libc(const char *arg1, size_t len1, const char *arg2, size_t len2,
1948 {
1949  char sbuf[TEXTBUFLEN];
1950  char *buf = sbuf;
1951  size_t bufsize1 = len1 + 1;
1952  size_t bufsize2 = len2 + 1;
1953  char *arg1n;
1954  char *arg2n;
1955  int result;
1956 
1957  Assert(!locale || locale->provider == COLLPROVIDER_LIBC);
1958 
1959 #ifdef WIN32
1960  /* check for this case before doing the work for nul-termination */
1961  if (GetDatabaseEncoding() == PG_UTF8)
1962  return pg_strncoll_libc_win32_utf8(arg1, len1, arg2, len2, locale);
1963 #endif /* WIN32 */
1964 
1965  if (bufsize1 + bufsize2 > TEXTBUFLEN)
1966  buf = palloc(bufsize1 + bufsize2);
1967 
1968  arg1n = buf;
1969  arg2n = buf + bufsize1;
1970 
1971  /* nul-terminate arguments */
1972  memcpy(arg1n, arg1, len1);
1973  arg1n[len1] = '\0';
1974  memcpy(arg2n, arg2, len2);
1975  arg2n[len2] = '\0';
1976 
1977  result = pg_strcoll_libc(arg1n, arg2n, locale);
1978 
1979  if (buf != sbuf)
1980  pfree(buf);
1981 
1982  return result;
1983 }
1984 
1985 #ifdef USE_ICU
1986 
1987 /*
1988  * pg_strncoll_icu_no_utf8
1989  *
1990  * Convert the arguments from the database encoding to UChar strings, then
1991  * call ucol_strcoll(). An argument length of -1 means that the string is
1992  * NUL-terminated.
1993  *
1994  * When the database encoding is UTF-8, and ICU supports ucol_strcollUTF8(),
1995  * caller should call that instead.
1996  */
1997 static int
1998 pg_strncoll_icu_no_utf8(const char *arg1, int32_t len1,
1999  const char *arg2, int32_t len2, pg_locale_t locale)
2000 {
2001  char sbuf[TEXTBUFLEN];
2002  char *buf = sbuf;
2003  int32_t ulen1;
2004  int32_t ulen2;
2005  size_t bufsize1;
2006  size_t bufsize2;
2007  UChar *uchar1,
2008  *uchar2;
2009  int result;
2010 
2011  Assert(locale->provider == COLLPROVIDER_ICU);
2012 #ifdef HAVE_UCOL_STRCOLLUTF8
2014 #endif
2015 
2016  init_icu_converter();
2017 
2018  ulen1 = uchar_length(icu_converter, arg1, len1);
2019  ulen2 = uchar_length(icu_converter, arg2, len2);
2020 
2021  bufsize1 = (ulen1 + 1) * sizeof(UChar);
2022  bufsize2 = (ulen2 + 1) * sizeof(UChar);
2023 
2024  if (bufsize1 + bufsize2 > TEXTBUFLEN)
2025  buf = palloc(bufsize1 + bufsize2);
2026 
2027  uchar1 = (UChar *) buf;
2028  uchar2 = (UChar *) (buf + bufsize1);
2029 
2030  ulen1 = uchar_convert(icu_converter, uchar1, ulen1 + 1, arg1, len1);
2031  ulen2 = uchar_convert(icu_converter, uchar2, ulen2 + 1, arg2, len2);
2032 
2033  result = ucol_strcoll(locale->info.icu.ucol,
2034  uchar1, ulen1,
2035  uchar2, ulen2);
2036 
2037  if (buf != sbuf)
2038  pfree(buf);
2039 
2040  return result;
2041 }
2042 
2043 /*
2044  * pg_strncoll_icu
2045  *
2046  * Call ucol_strcollUTF8() or ucol_strcoll() as appropriate for the given
2047  * database encoding. An argument length of -1 means the string is
2048  * NUL-terminated.
2049  *
2050  * Arguments must be encoded in the database encoding.
2051  */
2052 static int
2053 pg_strncoll_icu(const char *arg1, int32_t len1, const char *arg2, int32_t len2,
2055 {
2056  int result;
2057 
2058  Assert(locale->provider == COLLPROVIDER_ICU);
2059 
2060 #ifdef HAVE_UCOL_STRCOLLUTF8
2061  if (GetDatabaseEncoding() == PG_UTF8)
2062  {
2063  UErrorCode status;
2064 
2065  status = U_ZERO_ERROR;
2066  result = ucol_strcollUTF8(locale->info.icu.ucol,
2067  arg1, len1,
2068  arg2, len2,
2069  &status);
2070  if (U_FAILURE(status))
2071  ereport(ERROR,
2072  (errmsg("collation failed: %s", u_errorName(status))));
2073  }
2074  else
2075 #endif
2076  {
2077  result = pg_strncoll_icu_no_utf8(arg1, len1, arg2, len2, locale);
2078  }
2079 
2080  return result;
2081 }
2082 
2083 #endif /* USE_ICU */
2084 
2085 /*
2086  * pg_strcoll
2087  *
2088  * Call ucol_strcollUTF8(), ucol_strcoll(), strcoll(), strcoll_l(), wcscoll(),
2089  * or wcscoll_l() as appropriate for the given locale, platform, and database
2090  * encoding. If the locale is not specified, use the database collation.
2091  *
2092  * Arguments must be encoded in the database encoding and nul-terminated.
2093  *
2094  * The caller is responsible for breaking ties if the collation is
2095  * deterministic; this maintains consistency with pg_strxfrm(), which cannot
2096  * easily account for deterministic collations.
2097  */
2098 int
2099 pg_strcoll(const char *arg1, const char *arg2, pg_locale_t locale)
2100 {
2101  int result;
2102 
2103  if (!locale || locale->provider == COLLPROVIDER_LIBC)
2104  result = pg_strcoll_libc(arg1, arg2, locale);
2105 #ifdef USE_ICU
2106  else if (locale->provider == COLLPROVIDER_ICU)
2107  result = pg_strncoll_icu(arg1, -1, arg2, -1, locale);
2108 #endif
2109  else
2110  /* shouldn't happen */
2111  PGLOCALE_SUPPORT_ERROR(locale->provider);
2112 
2113  return result;
2114 }
2115 
2116 /*
2117  * pg_strncoll
2118  *
2119  * Call ucol_strcollUTF8(), ucol_strcoll(), strcoll(), strcoll_l(), wcscoll(),
2120  * or wcscoll_l() as appropriate for the given locale, platform, and database
2121  * encoding. If the locale is not specified, use the database collation.
2122  *
2123  * Arguments must be encoded in the database encoding.
2124  *
2125  * This function may need to nul-terminate the arguments for libc functions;
2126  * so if the caller already has nul-terminated strings, it should call
2127  * pg_strcoll() instead.
2128  *
2129  * The caller is responsible for breaking ties if the collation is
2130  * deterministic; this maintains consistency with pg_strnxfrm(), which cannot
2131  * easily account for deterministic collations.
2132  */
2133 int
2134 pg_strncoll(const char *arg1, size_t len1, const char *arg2, size_t len2,
2136 {
2137  int result;
2138 
2139  if (!locale || locale->provider == COLLPROVIDER_LIBC)
2140  result = pg_strncoll_libc(arg1, len1, arg2, len2, locale);
2141 #ifdef USE_ICU
2142  else if (locale->provider == COLLPROVIDER_ICU)
2143  result = pg_strncoll_icu(arg1, len1, arg2, len2, locale);
2144 #endif
2145  else
2146  /* shouldn't happen */
2147  PGLOCALE_SUPPORT_ERROR(locale->provider);
2148 
2149  return result;
2150 }
2151 
2152 
2153 static size_t
2154 pg_strxfrm_libc(char *dest, const char *src, size_t destsize,
2156 {
2157  Assert(!locale || locale->provider == COLLPROVIDER_LIBC);
2158 
2159 #ifdef TRUST_STRXFRM
2160  if (locale)
2161  return strxfrm_l(dest, src, destsize, locale->info.lt);
2162  else
2163  return strxfrm(dest, src, destsize);
2164 #else
2165  /* shouldn't happen */
2166  PGLOCALE_SUPPORT_ERROR(locale->provider);
2167  return 0; /* keep compiler quiet */
2168 #endif
2169 }
2170 
2171 static size_t
2172 pg_strnxfrm_libc(char *dest, const char *src, size_t srclen, size_t destsize,
2174 {
2175  char sbuf[TEXTBUFLEN];
2176  char *buf = sbuf;
2177  size_t bufsize = srclen + 1;
2178  size_t result;
2179 
2180  Assert(!locale || locale->provider == COLLPROVIDER_LIBC);
2181 
2182  if (bufsize > TEXTBUFLEN)
2183  buf = palloc(bufsize);
2184 
2185  /* nul-terminate arguments */
2186  memcpy(buf, src, srclen);
2187  buf[srclen] = '\0';
2188 
2189  result = pg_strxfrm_libc(dest, buf, destsize, locale);
2190 
2191  if (buf != sbuf)
2192  pfree(buf);
2193 
2194  /* if dest is defined, it should be nul-terminated */
2195  Assert(result >= destsize || dest[result] == '\0');
2196 
2197  return result;
2198 }
2199 
2200 #ifdef USE_ICU
2201 
2202 /* 'srclen' of -1 means the strings are NUL-terminated */
2203 static size_t
2204 pg_strnxfrm_icu(char *dest, const char *src, int32_t srclen, int32_t destsize,
2206 {
2207  char sbuf[TEXTBUFLEN];
2208  char *buf = sbuf;
2209  UChar *uchar;
2210  int32_t ulen;
2211  size_t uchar_bsize;
2212  Size result_bsize;
2213 
2214  Assert(locale->provider == COLLPROVIDER_ICU);
2215 
2216  init_icu_converter();
2217 
2218  ulen = uchar_length(icu_converter, src, srclen);
2219 
2220  uchar_bsize = (ulen + 1) * sizeof(UChar);
2221 
2222  if (uchar_bsize > TEXTBUFLEN)
2223  buf = palloc(uchar_bsize);
2224 
2225  uchar = (UChar *) buf;
2226 
2227  ulen = uchar_convert(icu_converter, uchar, ulen + 1, src, srclen);
2228 
2229  result_bsize = ucol_getSortKey(locale->info.icu.ucol,
2230  uchar, ulen,
2231  (uint8_t *) dest, destsize);
2232 
2233  /*
2234  * ucol_getSortKey() counts the nul-terminator in the result length, but
2235  * this function should not.
2236  */
2237  Assert(result_bsize > 0);
2238  result_bsize--;
2239 
2240  if (buf != sbuf)
2241  pfree(buf);
2242 
2243  /* if dest is defined, it should be nul-terminated */
2244  Assert(result_bsize >= destsize || dest[result_bsize] == '\0');
2245 
2246  return result_bsize;
2247 }
2248 
2249 /* 'srclen' of -1 means the strings are NUL-terminated */
2250 static size_t
2251 pg_strnxfrm_prefix_icu_no_utf8(char *dest, const char *src, int32_t srclen,
2252  int32_t destsize, pg_locale_t locale)
2253 {
2254  char sbuf[TEXTBUFLEN];
2255  char *buf = sbuf;
2256  UCharIterator iter;
2257  uint32_t state[2];
2258  UErrorCode status;
2259  int32_t ulen = -1;
2260  UChar *uchar = NULL;
2261  size_t uchar_bsize;
2262  Size result_bsize;
2263 
2264  Assert(locale->provider == COLLPROVIDER_ICU);
2266 
2267  init_icu_converter();
2268 
2269  ulen = uchar_length(icu_converter, src, srclen);
2270 
2271  uchar_bsize = (ulen + 1) * sizeof(UChar);
2272 
2273  if (uchar_bsize > TEXTBUFLEN)
2274  buf = palloc(uchar_bsize);
2275 
2276  uchar = (UChar *) buf;
2277 
2278  ulen = uchar_convert(icu_converter, uchar, ulen + 1, src, srclen);
2279 
2280  uiter_setString(&iter, uchar, ulen);
2281  state[0] = state[1] = 0; /* won't need that again */
2282  status = U_ZERO_ERROR;
2283  result_bsize = ucol_nextSortKeyPart(locale->info.icu.ucol,
2284  &iter,
2285  state,
2286  (uint8_t *) dest,
2287  destsize,
2288  &status);
2289  if (U_FAILURE(status))
2290  ereport(ERROR,
2291  (errmsg("sort key generation failed: %s",
2292  u_errorName(status))));
2293 
2294  return result_bsize;
2295 }
2296 
2297 /* 'srclen' of -1 means the strings are NUL-terminated */
2298 static size_t
2299 pg_strnxfrm_prefix_icu(char *dest, const char *src, int32_t srclen,
2300  int32_t destsize, pg_locale_t locale)
2301 {
2302  size_t result;
2303 
2304  Assert(locale->provider == COLLPROVIDER_ICU);
2305 
2306  if (GetDatabaseEncoding() == PG_UTF8)
2307  {
2308  UCharIterator iter;
2309  uint32_t state[2];
2310  UErrorCode status;
2311 
2312  uiter_setUTF8(&iter, src, srclen);
2313  state[0] = state[1] = 0; /* won't need that again */
2314  status = U_ZERO_ERROR;
2315  result = ucol_nextSortKeyPart(locale->info.icu.ucol,
2316  &iter,
2317  state,
2318  (uint8_t *) dest,
2319  destsize,
2320  &status);
2321  if (U_FAILURE(status))
2322  ereport(ERROR,
2323  (errmsg("sort key generation failed: %s",
2324  u_errorName(status))));
2325  }
2326  else
2327  result = pg_strnxfrm_prefix_icu_no_utf8(dest, src, srclen, destsize,
2328  locale);
2329 
2330  return result;
2331 }
2332 
2333 #endif
2334 
2335 /*
2336  * Return true if the collation provider supports pg_strxfrm() and
2337  * pg_strnxfrm(); otherwise false.
2338  *
2339  * Unfortunately, it seems that strxfrm() for non-C collations is broken on
2340  * many common platforms; testing of multiple versions of glibc reveals that,
2341  * for many locales, strcoll() and strxfrm() do not return consistent
2342  * results. While no other libc other than Cygwin has so far been shown to
2343  * have a problem, we take the conservative course of action for right now and
2344  * disable this categorically. (Users who are certain this isn't a problem on
2345  * their system can define TRUST_STRXFRM.)
2346  *
2347  * No similar problem is known for the ICU provider.
2348  */
2349 bool
2351 {
2352  if (!locale || locale->provider == COLLPROVIDER_LIBC)
2353 #ifdef TRUST_STRXFRM
2354  return true;
2355 #else
2356  return false;
2357 #endif
2358  else if (locale->provider == COLLPROVIDER_ICU)
2359  return true;
2360  else
2361  /* shouldn't happen */
2362  PGLOCALE_SUPPORT_ERROR(locale->provider);
2363 
2364  return false; /* keep compiler quiet */
2365 }
2366 
2367 /*
2368  * pg_strxfrm
2369  *
2370  * Transforms 'src' to a nul-terminated string stored in 'dest' such that
2371  * ordinary strcmp() on transformed strings is equivalent to pg_strcoll() on
2372  * untransformed strings.
2373  *
2374  * The provided 'src' must be nul-terminated. If 'destsize' is zero, 'dest'
2375  * may be NULL.
2376  *
2377  * Returns the number of bytes needed to store the transformed string,
2378  * excluding the terminating nul byte. If the value returned is 'destsize' or
2379  * greater, the resulting contents of 'dest' are undefined.
2380  */
2381 size_t
2382 pg_strxfrm(char *dest, const char *src, size_t destsize, pg_locale_t locale)
2383 {
2384  size_t result = 0; /* keep compiler quiet */
2385 
2386  if (!locale || locale->provider == COLLPROVIDER_LIBC)
2387  result = pg_strxfrm_libc(dest, src, destsize, locale);
2388 #ifdef USE_ICU
2389  else if (locale->provider == COLLPROVIDER_ICU)
2390  result = pg_strnxfrm_icu(dest, src, -1, destsize, locale);
2391 #endif
2392  else
2393  /* shouldn't happen */
2394  PGLOCALE_SUPPORT_ERROR(locale->provider);
2395 
2396  return result;
2397 }
2398 
2399 /*
2400  * pg_strnxfrm
2401  *
2402  * Transforms 'src' to a nul-terminated string stored in 'dest' such that
2403  * ordinary strcmp() on transformed strings is equivalent to pg_strcoll() on
2404  * untransformed strings.
2405  *
2406  * 'src' does not need to be nul-terminated. If 'destsize' is zero, 'dest' may
2407  * be NULL.
2408  *
2409  * Returns the number of bytes needed to store the transformed string,
2410  * excluding the terminating nul byte. If the value returned is 'destsize' or
2411  * greater, the resulting contents of 'dest' are undefined.
2412  *
2413  * This function may need to nul-terminate the argument for libc functions;
2414  * so if the caller already has a nul-terminated string, it should call
2415  * pg_strxfrm() instead.
2416  */
2417 size_t
2418 pg_strnxfrm(char *dest, size_t destsize, const char *src, size_t srclen,
2420 {
2421  size_t result = 0; /* keep compiler quiet */
2422 
2423  if (!locale || locale->provider == COLLPROVIDER_LIBC)
2424  result = pg_strnxfrm_libc(dest, src, srclen, destsize, locale);
2425 #ifdef USE_ICU
2426  else if (locale->provider == COLLPROVIDER_ICU)
2427  result = pg_strnxfrm_icu(dest, src, srclen, destsize, locale);
2428 #endif
2429  else
2430  /* shouldn't happen */
2431  PGLOCALE_SUPPORT_ERROR(locale->provider);
2432 
2433  return result;
2434 }
2435 
2436 /*
2437  * Return true if the collation provider supports pg_strxfrm_prefix() and
2438  * pg_strnxfrm_prefix(); otherwise false.
2439  */
2440 bool
2442 {
2443  if (!locale || locale->provider == COLLPROVIDER_LIBC)
2444  return false;
2445  else if (locale->provider == COLLPROVIDER_ICU)
2446  return true;
2447  else
2448  /* shouldn't happen */
2449  PGLOCALE_SUPPORT_ERROR(locale->provider);
2450 
2451  return false; /* keep compiler quiet */
2452 }
2453 
2454 /*
2455  * pg_strxfrm_prefix
2456  *
2457  * Transforms 'src' to a byte sequence stored in 'dest' such that ordinary
2458  * memcmp() on the byte sequence is equivalent to pg_strcoll() on
2459  * untransformed strings. The result is not nul-terminated.
2460  *
2461  * The provided 'src' must be nul-terminated.
2462  *
2463  * If destsize is not large enough to hold the resulting byte sequence, stores
2464  * only the first destsize bytes in 'dest'. Returns the number of bytes
2465  * actually copied to 'dest'.
2466  */
2467 size_t
2468 pg_strxfrm_prefix(char *dest, const char *src, size_t destsize,
2470 {
2471  size_t result = 0; /* keep compiler quiet */
2472 
2473  if (!locale)
2474  PGLOCALE_SUPPORT_ERROR(COLLPROVIDER_LIBC);
2475 #ifdef USE_ICU
2476  else if (locale->provider == COLLPROVIDER_ICU)
2477  result = pg_strnxfrm_prefix_icu(dest, src, -1, destsize, locale);
2478 #endif
2479  else
2480  PGLOCALE_SUPPORT_ERROR(locale->provider);
2481 
2482  return result;
2483 }
2484 
2485 /*
2486  * pg_strnxfrm_prefix
2487  *
2488  * Transforms 'src' to a byte sequence stored in 'dest' such that ordinary
2489  * memcmp() on the byte sequence is equivalent to pg_strcoll() on
2490  * untransformed strings. The result is not nul-terminated.
2491  *
2492  * The provided 'src' must be nul-terminated.
2493  *
2494  * If destsize is not large enough to hold the resulting byte sequence, stores
2495  * only the first destsize bytes in 'dest'. Returns the number of bytes
2496  * actually copied to 'dest'.
2497  *
2498  * This function may need to nul-terminate the argument for libc functions;
2499  * so if the caller already has a nul-terminated string, it should call
2500  * pg_strxfrm_prefix() instead.
2501  */
2502 size_t
2503 pg_strnxfrm_prefix(char *dest, size_t destsize, const char *src,
2504  size_t srclen, pg_locale_t locale)
2505 {
2506  size_t result = 0; /* keep compiler quiet */
2507 
2508  if (!locale)
2509  PGLOCALE_SUPPORT_ERROR(COLLPROVIDER_LIBC);
2510 #ifdef USE_ICU
2511  else if (locale->provider == COLLPROVIDER_ICU)
2512  result = pg_strnxfrm_prefix_icu(dest, src, -1, destsize, locale);
2513 #endif
2514  else
2515  PGLOCALE_SUPPORT_ERROR(locale->provider);
2516 
2517  return result;
2518 }
2519 
2520 /*
2521  * Return required encoding ID for the given locale, or -1 if any encoding is
2522  * valid for the locale.
2523  */
2524 int
2526 {
2527  if (strcmp(locale, "C") == 0)
2528  return -1;
2529  if (strcmp(locale, "C.UTF-8") == 0)
2530  return PG_UTF8;
2531 
2532  ereport(ERROR,
2533  (errcode(ERRCODE_WRONG_OBJECT_TYPE),
2534  errmsg("invalid locale name \"%s\" for builtin provider",
2535  locale)));
2536 
2537  return 0; /* keep compiler quiet */
2538 }
2539 
2540 
2541 /*
2542  * Validate the locale and encoding combination, and return the canonical form
2543  * of the locale name.
2544  */
2545 const char *
2547 {
2548  const char *canonical_name = NULL;
2549  int required_encoding;
2550 
2551  if (strcmp(locale, "C") == 0)
2552  canonical_name = "C";
2553  else if (strcmp(locale, "C.UTF-8") == 0 || strcmp(locale, "C.UTF8") == 0)
2554  canonical_name = "C.UTF-8";
2555 
2556  if (!canonical_name)
2557  ereport(ERROR,
2558  (errcode(ERRCODE_WRONG_OBJECT_TYPE),
2559  errmsg("invalid locale name \"%s\" for builtin provider",
2560  locale)));
2561 
2562  required_encoding = builtin_locale_encoding(canonical_name);
2563  if (required_encoding >= 0 && encoding != required_encoding)
2564  ereport(ERROR,
2565  (errcode(ERRCODE_WRONG_OBJECT_TYPE),
2566  errmsg("encoding \"%s\" does not match locale \"%s\"",
2568 
2569  return canonical_name;
2570 }
2571 
2572 
2573 #ifdef USE_ICU
2574 
2575 /*
2576  * Wrapper around ucol_open() to handle API differences for older ICU
2577  * versions.
2578  */
2579 static UCollator *
2580 pg_ucol_open(const char *loc_str)
2581 {
2582  UCollator *collator;
2583  UErrorCode status;
2584  const char *orig_str = loc_str;
2585  char *fixed_str = NULL;
2586 
2587  /*
2588  * Must never open default collator, because it depends on the environment
2589  * and may change at any time. Should not happen, but check here to catch
2590  * bugs that might be hard to catch otherwise.
2591  *
2592  * NB: the default collator is not the same as the collator for the root
2593  * locale. The root locale may be specified as the empty string, "und", or
2594  * "root". The default collator is opened by passing NULL to ucol_open().
2595  */
2596  if (loc_str == NULL)
2597  elog(ERROR, "opening default collator is not supported");
2598 
2599  /*
2600  * In ICU versions 54 and earlier, "und" is not a recognized spelling of
2601  * the root locale. If the first component of the locale is "und", replace
2602  * with "root" before opening.
2603  */
2604  if (U_ICU_VERSION_MAJOR_NUM < 55)
2605  {
2606  char lang[ULOC_LANG_CAPACITY];
2607 
2608  status = U_ZERO_ERROR;
2609  uloc_getLanguage(loc_str, lang, ULOC_LANG_CAPACITY, &status);
2610  if (U_FAILURE(status) || status == U_STRING_NOT_TERMINATED_WARNING)
2611  {
2612  ereport(ERROR,
2613  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
2614  errmsg("could not get language from locale \"%s\": %s",
2615  loc_str, u_errorName(status))));
2616  }
2617 
2618  if (strcmp(lang, "und") == 0)
2619  {
2620  const char *remainder = loc_str + strlen("und");
2621 
2622  fixed_str = palloc(strlen("root") + strlen(remainder) + 1);
2623  strcpy(fixed_str, "root");
2624  strcat(fixed_str, remainder);
2625 
2626  loc_str = fixed_str;
2627  }
2628  }
2629 
2630  status = U_ZERO_ERROR;
2631  collator = ucol_open(loc_str, &status);
2632  if (U_FAILURE(status))
2633  ereport(ERROR,
2634  /* use original string for error report */
2635  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
2636  errmsg("could not open collator for locale \"%s\": %s",
2637  orig_str, u_errorName(status))));
2638 
2639  if (U_ICU_VERSION_MAJOR_NUM < 54)
2640  {
2641  status = U_ZERO_ERROR;
2642  icu_set_collation_attributes(collator, loc_str, &status);
2643 
2644  /*
2645  * Pretend the error came from ucol_open(), for consistent error
2646  * message across ICU versions.
2647  */
2648  if (U_FAILURE(status) || status == U_STRING_NOT_TERMINATED_WARNING)
2649  {
2650  ucol_close(collator);
2651  ereport(ERROR,
2652  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
2653  errmsg("could not open collator for locale \"%s\": %s",
2654  orig_str, u_errorName(status))));
2655  }
2656  }
2657 
2658  if (fixed_str != NULL)
2659  pfree(fixed_str);
2660 
2661  return collator;
2662 }
2663 
2664 static void
2665 init_icu_converter(void)
2666 {
2667  const char *icu_encoding_name;
2668  UErrorCode status;
2669  UConverter *conv;
2670 
2671  if (icu_converter)
2672  return; /* already done */
2673 
2674  icu_encoding_name = get_encoding_name_for_icu(GetDatabaseEncoding());
2675  if (!icu_encoding_name)
2676  ereport(ERROR,
2677  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
2678  errmsg("encoding \"%s\" not supported by ICU",
2680 
2681  status = U_ZERO_ERROR;
2682  conv = ucnv_open(icu_encoding_name, &status);
2683  if (U_FAILURE(status))
2684  ereport(ERROR,
2685  (errmsg("could not open ICU converter for encoding \"%s\": %s",
2686  icu_encoding_name, u_errorName(status))));
2687 
2688  icu_converter = conv;
2689 }
2690 
2691 /*
2692  * Find length, in UChars, of given string if converted to UChar string.
2693  */
2694 static size_t
2695 uchar_length(UConverter *converter, const char *str, int32_t len)
2696 {
2697  UErrorCode status = U_ZERO_ERROR;
2698  int32_t ulen;
2699 
2700  ulen = ucnv_toUChars(converter, NULL, 0, str, len, &status);
2701  if (U_FAILURE(status) && status != U_BUFFER_OVERFLOW_ERROR)
2702  ereport(ERROR,
2703  (errmsg("%s failed: %s", "ucnv_toUChars", u_errorName(status))));
2704  return ulen;
2705 }
2706 
2707 /*
2708  * Convert the given source string into a UChar string, stored in dest, and
2709  * return the length (in UChars).
2710  */
2711 static int32_t
2712 uchar_convert(UConverter *converter, UChar *dest, int32_t destlen,
2713  const char *src, int32_t srclen)
2714 {
2715  UErrorCode status = U_ZERO_ERROR;
2716  int32_t ulen;
2717 
2718  status = U_ZERO_ERROR;
2719  ulen = ucnv_toUChars(converter, dest, destlen, src, srclen, &status);
2720  if (U_FAILURE(status))
2721  ereport(ERROR,
2722  (errmsg("%s failed: %s", "ucnv_toUChars", u_errorName(status))));
2723  return ulen;
2724 }
2725 
2726 /*
2727  * Convert a string in the database encoding into a string of UChars.
2728  *
2729  * The source string at buff is of length nbytes
2730  * (it needn't be nul-terminated)
2731  *
2732  * *buff_uchar receives a pointer to the palloc'd result string, and
2733  * the function's result is the number of UChars generated.
2734  *
2735  * The result string is nul-terminated, though most callers rely on the
2736  * result length instead.
2737  */
2738 int32_t
2739 icu_to_uchar(UChar **buff_uchar, const char *buff, size_t nbytes)
2740 {
2741  int32_t len_uchar;
2742 
2743  init_icu_converter();
2744 
2745  len_uchar = uchar_length(icu_converter, buff, nbytes);
2746 
2747  *buff_uchar = palloc((len_uchar + 1) * sizeof(**buff_uchar));
2748  len_uchar = uchar_convert(icu_converter,
2749  *buff_uchar, len_uchar + 1, buff, nbytes);
2750 
2751  return len_uchar;
2752 }
2753 
2754 /*
2755  * Convert a string of UChars into the database encoding.
2756  *
2757  * The source string at buff_uchar is of length len_uchar
2758  * (it needn't be nul-terminated)
2759  *
2760  * *result receives a pointer to the palloc'd result string, and the
2761  * function's result is the number of bytes generated (not counting nul).
2762  *
2763  * The result string is nul-terminated.
2764  */
2765 int32_t
2766 icu_from_uchar(char **result, const UChar *buff_uchar, int32_t len_uchar)
2767 {
2768  UErrorCode status;
2769  int32_t len_result;
2770 
2771  init_icu_converter();
2772 
2773  status = U_ZERO_ERROR;
2774  len_result = ucnv_fromUChars(icu_converter, NULL, 0,
2775  buff_uchar, len_uchar, &status);
2776  if (U_FAILURE(status) && status != U_BUFFER_OVERFLOW_ERROR)
2777  ereport(ERROR,
2778  (errmsg("%s failed: %s", "ucnv_fromUChars",
2779  u_errorName(status))));
2780 
2781  *result = palloc(len_result + 1);
2782 
2783  status = U_ZERO_ERROR;
2784  len_result = ucnv_fromUChars(icu_converter, *result, len_result + 1,
2785  buff_uchar, len_uchar, &status);
2786  if (U_FAILURE(status) ||
2787  status == U_STRING_NOT_TERMINATED_WARNING)
2788  ereport(ERROR,
2789  (errmsg("%s failed: %s", "ucnv_fromUChars",
2790  u_errorName(status))));
2791 
2792  return len_result;
2793 }
2794 
2795 /*
2796  * Parse collation attributes from the given locale string and apply them to
2797  * the open collator.
2798  *
2799  * First, the locale string is canonicalized to an ICU format locale ID such
2800  * as "und@colStrength=primary;colCaseLevel=yes". Then, it parses and applies
2801  * the key-value arguments.
2802  *
2803  * Starting with ICU version 54, the attributes are processed automatically by
2804  * ucol_open(), so this is only necessary for emulating this behavior on older
2805  * versions.
2806  */
2808 static void
2809 icu_set_collation_attributes(UCollator *collator, const char *loc,
2810  UErrorCode *status)
2811 {
2812  int32_t len;
2813  char *icu_locale_id;
2814  char *lower_str;
2815  char *str;
2816  char *token;
2817 
2818  /*
2819  * The input locale may be a BCP 47 language tag, e.g.
2820  * "und-u-kc-ks-level1", which expresses the same attributes in a
2821  * different form. It will be converted to the equivalent ICU format
2822  * locale ID, e.g. "und@colcaselevel=yes;colstrength=primary", by
2823  * uloc_canonicalize().
2824  */
2825  *status = U_ZERO_ERROR;
2826  len = uloc_canonicalize(loc, NULL, 0, status);
2827  icu_locale_id = palloc(len + 1);
2828  *status = U_ZERO_ERROR;
2829  len = uloc_canonicalize(loc, icu_locale_id, len + 1, status);
2830  if (U_FAILURE(*status) || *status == U_STRING_NOT_TERMINATED_WARNING)
2831  return;
2832 
2833  lower_str = asc_tolower(icu_locale_id, strlen(icu_locale_id));
2834 
2835  pfree(icu_locale_id);
2836 
2837  str = strchr(lower_str, '@');
2838  if (!str)
2839  return;
2840  str++;
2841 
2842  while ((token = strsep(&str, ";")))
2843  {
2844  char *e = strchr(token, '=');
2845 
2846  if (e)
2847  {
2848  char *name;
2849  char *value;
2850  UColAttribute uattr;
2851  UColAttributeValue uvalue;
2852 
2853  *status = U_ZERO_ERROR;
2854 
2855  *e = '\0';
2856  name = token;
2857  value = e + 1;
2858 
2859  /*
2860  * See attribute name and value lists in ICU i18n/coll.cpp
2861  */
2862  if (strcmp(name, "colstrength") == 0)
2863  uattr = UCOL_STRENGTH;
2864  else if (strcmp(name, "colbackwards") == 0)
2865  uattr = UCOL_FRENCH_COLLATION;
2866  else if (strcmp(name, "colcaselevel") == 0)
2867  uattr = UCOL_CASE_LEVEL;
2868  else if (strcmp(name, "colcasefirst") == 0)
2869  uattr = UCOL_CASE_FIRST;
2870  else if (strcmp(name, "colalternate") == 0)
2871  uattr = UCOL_ALTERNATE_HANDLING;
2872  else if (strcmp(name, "colnormalization") == 0)
2873  uattr = UCOL_NORMALIZATION_MODE;
2874  else if (strcmp(name, "colnumeric") == 0)
2875  uattr = UCOL_NUMERIC_COLLATION;
2876  else
2877  /* ignore if unknown */
2878  continue;
2879 
2880  if (strcmp(value, "primary") == 0)
2881  uvalue = UCOL_PRIMARY;
2882  else if (strcmp(value, "secondary") == 0)
2883  uvalue = UCOL_SECONDARY;
2884  else if (strcmp(value, "tertiary") == 0)
2885  uvalue = UCOL_TERTIARY;
2886  else if (strcmp(value, "quaternary") == 0)
2887  uvalue = UCOL_QUATERNARY;
2888  else if (strcmp(value, "identical") == 0)
2889  uvalue = UCOL_IDENTICAL;
2890  else if (strcmp(value, "no") == 0)
2891  uvalue = UCOL_OFF;
2892  else if (strcmp(value, "yes") == 0)
2893  uvalue = UCOL_ON;
2894  else if (strcmp(value, "shifted") == 0)
2895  uvalue = UCOL_SHIFTED;
2896  else if (strcmp(value, "non-ignorable") == 0)
2897  uvalue = UCOL_NON_IGNORABLE;
2898  else if (strcmp(value, "lower") == 0)
2899  uvalue = UCOL_LOWER_FIRST;
2900  else if (strcmp(value, "upper") == 0)
2901  uvalue = UCOL_UPPER_FIRST;
2902  else
2903  {
2904  *status = U_ILLEGAL_ARGUMENT_ERROR;
2905  break;
2906  }
2907 
2908  ucol_setAttribute(collator, uattr, uvalue, status);
2909  }
2910  }
2911 
2912  pfree(lower_str);
2913 }
2914 #endif
2915 
2916 /*
2917  * Return the BCP47 language tag representation of the requested locale.
2918  *
2919  * This function should be called before passing the string to ucol_open(),
2920  * because conversion to a language tag also performs "level 2
2921  * canonicalization". In addition to producing a consistent format, level 2
2922  * canonicalization is able to more accurately interpret different input
2923  * locale string formats, such as POSIX and .NET IDs.
2924  */
2925 char *
2926 icu_language_tag(const char *loc_str, int elevel)
2927 {
2928 #ifdef USE_ICU
2929  UErrorCode status;
2930  char *langtag;
2931  size_t buflen = 32; /* arbitrary starting buffer size */
2932  const bool strict = true;
2933 
2934  /*
2935  * A BCP47 language tag doesn't have a clearly-defined upper limit (cf.
2936  * RFC5646 section 4.4). Additionally, in older ICU versions,
2937  * uloc_toLanguageTag() doesn't always return the ultimate length on the
2938  * first call, necessitating a loop.
2939  */
2940  langtag = palloc(buflen);
2941  while (true)
2942  {
2943  status = U_ZERO_ERROR;
2944  uloc_toLanguageTag(loc_str, langtag, buflen, strict, &status);
2945 
2946  /* try again if the buffer is not large enough */
2947  if ((status == U_BUFFER_OVERFLOW_ERROR ||
2948  status == U_STRING_NOT_TERMINATED_WARNING) &&
2949  buflen < MaxAllocSize)
2950  {
2951  buflen = Min(buflen * 2, MaxAllocSize);
2952  langtag = repalloc(langtag, buflen);
2953  continue;
2954  }
2955 
2956  break;
2957  }
2958 
2959  if (U_FAILURE(status))
2960  {
2961  pfree(langtag);
2962 
2963  if (elevel > 0)
2964  ereport(elevel,
2965  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
2966  errmsg("could not convert locale name \"%s\" to language tag: %s",
2967  loc_str, u_errorName(status))));
2968  return NULL;
2969  }
2970 
2971  return langtag;
2972 #else /* not USE_ICU */
2973  ereport(ERROR,
2974  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
2975  errmsg("ICU is not supported in this build")));
2976  return NULL; /* keep compiler quiet */
2977 #endif /* not USE_ICU */
2978 }
2979 
2980 /*
2981  * Perform best-effort check that the locale is a valid one.
2982  */
2983 void
2984 icu_validate_locale(const char *loc_str)
2985 {
2986 #ifdef USE_ICU
2987  UCollator *collator;
2988  UErrorCode status;
2989  char lang[ULOC_LANG_CAPACITY];
2990  bool found = false;
2991  int elevel = icu_validation_level;
2992 
2993  /* no validation */
2994  if (elevel < 0)
2995  return;
2996 
2997  /* downgrade to WARNING during pg_upgrade */
2998  if (IsBinaryUpgrade && elevel > WARNING)
2999  elevel = WARNING;
3000 
3001  /* validate that we can extract the language */
3002  status = U_ZERO_ERROR;
3003  uloc_getLanguage(loc_str, lang, ULOC_LANG_CAPACITY, &status);
3004  if (U_FAILURE(status) || status == U_STRING_NOT_TERMINATED_WARNING)
3005  {
3006  ereport(elevel,
3007  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
3008  errmsg("could not get language from ICU locale \"%s\": %s",
3009  loc_str, u_errorName(status)),
3010  errhint("To disable ICU locale validation, set the parameter \"%s\" to \"%s\".",
3011  "icu_validation_level", "disabled")));
3012  return;
3013  }
3014 
3015  /* check for special language name */
3016  if (strcmp(lang, "") == 0 ||
3017  strcmp(lang, "root") == 0 || strcmp(lang, "und") == 0)
3018  found = true;
3019 
3020  /* search for matching language within ICU */
3021  for (int32_t i = 0; !found && i < uloc_countAvailable(); i++)
3022  {
3023  const char *otherloc = uloc_getAvailable(i);
3024  char otherlang[ULOC_LANG_CAPACITY];
3025 
3026  status = U_ZERO_ERROR;
3027  uloc_getLanguage(otherloc, otherlang, ULOC_LANG_CAPACITY, &status);
3028  if (U_FAILURE(status) || status == U_STRING_NOT_TERMINATED_WARNING)
3029  continue;
3030 
3031  if (strcmp(lang, otherlang) == 0)
3032  found = true;
3033  }
3034 
3035  if (!found)
3036  ereport(elevel,
3037  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
3038  errmsg("ICU locale \"%s\" has unknown language \"%s\"",
3039  loc_str, lang),
3040  errhint("To disable ICU locale validation, set the parameter \"%s\" to \"%s\".",
3041  "icu_validation_level", "disabled")));
3042 
3043  /* check that it can be opened */
3044  collator = pg_ucol_open(loc_str);
3045  ucol_close(collator);
3046 #else /* not USE_ICU */
3047  /* could get here if a collation was created by a build with ICU */
3048  ereport(ERROR,
3049  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
3050  errmsg("ICU is not supported in this build")));
3051 #endif /* not USE_ICU */
3052 }
3053 
3054 /*
3055  * These functions convert from/to libc's wchar_t, *not* pg_wchar_t.
3056  * Therefore we keep them here rather than with the mbutils code.
3057  */
3058 
3059 /*
3060  * wchar2char --- convert wide characters to multibyte format
3061  *
3062  * This has the same API as the standard wcstombs_l() function; in particular,
3063  * tolen is the maximum number of bytes to store at *to, and *from must be
3064  * zero-terminated. The output will be zero-terminated iff there is room.
3065  */
3066 size_t
3067 wchar2char(char *to, const wchar_t *from, size_t tolen, pg_locale_t locale)
3068 {
3069  size_t result;
3070 
3071  Assert(!locale || locale->provider == COLLPROVIDER_LIBC);
3072 
3073  if (tolen == 0)
3074  return 0;
3075 
3076 #ifdef WIN32
3077 
3078  /*
3079  * On Windows, the "Unicode" locales assume UTF16 not UTF8 encoding, and
3080  * for some reason mbstowcs and wcstombs won't do this for us, so we use
3081  * MultiByteToWideChar().
3082  */
3083  if (GetDatabaseEncoding() == PG_UTF8)
3084  {
3085  result = WideCharToMultiByte(CP_UTF8, 0, from, -1, to, tolen,
3086  NULL, NULL);
3087  /* A zero return is failure */
3088  if (result <= 0)
3089  result = -1;
3090  else
3091  {
3092  Assert(result <= tolen);
3093  /* Microsoft counts the zero terminator in the result */
3094  result--;
3095  }
3096  }
3097  else
3098 #endif /* WIN32 */
3099  if (locale == (pg_locale_t) 0)
3100  {
3101  /* Use wcstombs directly for the default locale */
3102  result = wcstombs(to, from, tolen);
3103  }
3104  else
3105  {
3106  /* Use wcstombs_l for nondefault locales */
3107  result = wcstombs_l(to, from, tolen, locale->info.lt);
3108  }
3109 
3110  return result;
3111 }
3112 
3113 /*
3114  * char2wchar --- convert multibyte characters to wide characters
3115  *
3116  * This has almost the API of mbstowcs_l(), except that *from need not be
3117  * null-terminated; instead, the number of input bytes is specified as
3118  * fromlen. Also, we ereport() rather than returning -1 for invalid
3119  * input encoding. tolen is the maximum number of wchar_t's to store at *to.
3120  * The output will be zero-terminated iff there is room.
3121  */
3122 size_t
3123 char2wchar(wchar_t *to, size_t tolen, const char *from, size_t fromlen,
3125 {
3126  size_t result;
3127 
3128  Assert(!locale || locale->provider == COLLPROVIDER_LIBC);
3129 
3130  if (tolen == 0)
3131  return 0;
3132 
3133 #ifdef WIN32
3134  /* See WIN32 "Unicode" comment above */
3135  if (GetDatabaseEncoding() == PG_UTF8)
3136  {
3137  /* Win32 API does not work for zero-length input */
3138  if (fromlen == 0)
3139  result = 0;
3140  else
3141  {
3142  result = MultiByteToWideChar(CP_UTF8, 0, from, fromlen, to, tolen - 1);
3143  /* A zero return is failure */
3144  if (result == 0)
3145  result = -1;
3146  }
3147 
3148  if (result != -1)
3149  {
3150  Assert(result < tolen);
3151  /* Append trailing null wchar (MultiByteToWideChar() does not) */
3152  to[result] = 0;
3153  }
3154  }
3155  else
3156 #endif /* WIN32 */
3157  {
3158  /* mbstowcs requires ending '\0' */
3159  char *str = pnstrdup(from, fromlen);
3160 
3161  if (locale == (pg_locale_t) 0)
3162  {
3163  /* Use mbstowcs directly for the default locale */
3164  result = mbstowcs(to, str, tolen);
3165  }
3166  else
3167  {
3168  /* Use mbstowcs_l for nondefault locales */
3169  result = mbstowcs_l(to, str, tolen, locale->info.lt);
3170  }
3171 
3172  pfree(str);
3173  }
3174 
3175  if (result == -1)
3176  {
3177  /*
3178  * Invalid multibyte character encountered. We try to give a useful
3179  * error message by letting pg_verifymbstr check the string. But it's
3180  * possible that the string is OK to us, and not OK to mbstowcs ---
3181  * this suggests that the LC_CTYPE locale is different from the
3182  * database encoding. Give a generic error message if pg_verifymbstr
3183  * can't find anything wrong.
3184  */
3185  pg_verifymbstr(from, fromlen, false); /* might not return */
3186  /* but if it does ... */
3187  ereport(ERROR,
3188  (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
3189  errmsg("invalid multibyte character for locale"),
3190  errhint("The server's LC_CTYPE locale is probably incompatible with the database encoding.")));
3191  }
3192 
3193  return result;
3194 }
#define TextDatumGetCString(d)
Definition: builtins.h:98
#define NameStr(name)
Definition: c.h:746
#define Min(x, y)
Definition: c.h:1004
#define pg_attribute_unused()
Definition: c.h:123
#define Assert(condition)
Definition: c.h:858
#define lengthof(array)
Definition: c.h:788
#define OidIsValid(objectId)
Definition: c.h:775
size_t Size
Definition: c.h:605
Oid collid
void * hash_search(HTAB *hashp, const void *keyPtr, HASHACTION action, bool *foundPtr)
Definition: dynahash.c:955
HTAB * hash_create(const char *tabname, long nelem, const HASHCTL *info, int flags)
Definition: dynahash.c:352
int errdetail(const char *fmt,...)
Definition: elog.c:1203
int errhint(const char *fmt,...)
Definition: elog.c:1317
int errcode(int sqlerrcode)
Definition: elog.c:853
int errmsg(const char *fmt,...)
Definition: elog.c:1070
#define PG_RE_THROW()
Definition: elog.h:411
#define DEBUG3
Definition: elog.h:28
#define FATAL
Definition: elog.h:41
#define PG_TRY(...)
Definition: elog.h:370
#define WARNING
Definition: elog.h:36
#define PG_END_TRY(...)
Definition: elog.h:395
#define ERROR
Definition: elog.h:39
#define PG_CATCH(...)
Definition: elog.h:380
#define elog(elevel,...)
Definition: elog.h:224
#define ereport(elevel,...)
Definition: elog.h:149
const char * get_encoding_name_for_icu(int encoding)
Definition: encnames.c:472
#define palloc_array(type, count)
Definition: fe_memutils.h:64
char * asc_tolower(const char *buff, size_t nbytes)
Definition: formatting.c:2158
bool IsBinaryUpgrade
Definition: globals.c:119
#define newval
GucSource
Definition: guc.h:108
@ PGC_S_DEFAULT
Definition: guc.h:109
const char * str
#define free(a)
Definition: header.h:65
@ HASH_ENTER
Definition: hsearch.h:114
#define HASH_ELEM
Definition: hsearch.h:95
#define HASH_BLOBS
Definition: hsearch.h:97
#define HeapTupleIsValid(tuple)
Definition: htup.h:78
#define GETSTRUCT(TUP)
Definition: htup_details.h:653
#define period
Definition: indent_codes.h:66
#define token
Definition: indent_globs.h:126
#define bufsize
Definition: indent_globs.h:36
static struct @155 value
static char * locale
Definition: initdb.c:140
int i
Definition: isn.c:73
static struct pg_tm tm
Definition: localtime.c:104
char * get_namespace_name(Oid nspid)
Definition: lsyscache.c:3366
char * pg_any_to_server(const char *s, int len, int encoding)
Definition: mbutils.c:676
int GetDatabaseEncoding(void)
Definition: mbutils.c:1261
int pg_mbstrlen(const char *mbstr)
Definition: mbutils.c:1037
bool pg_verifymbstr(const char *mbstr, int len, bool noError)
Definition: mbutils.c:1556
void SetMessageEncoding(int encoding)
Definition: mbutils.c:1171
char * pnstrdup(const char *in, Size len)
Definition: mcxt.c:1707
char * pstrdup(const char *in)
Definition: mcxt.c:1696
void pfree(void *pointer)
Definition: mcxt.c:1521
MemoryContext TopMemoryContext
Definition: mcxt.c:149
void * repalloc(void *pointer, Size size)
Definition: mcxt.c:1541
void * MemoryContextAlloc(MemoryContext context, Size size)
Definition: mcxt.c:1181
char * MemoryContextStrdup(MemoryContext context, const char *string)
Definition: mcxt.c:1683
void * palloc(Size size)
Definition: mcxt.c:1317
#define MaxAllocSize
Definition: memutils.h:40
static char format
FormData_pg_collation * Form_pg_collation
Definition: pg_collation.h:58
const void size_t len
int32 encoding
Definition: pg_database.h:41
size_t pg_strnxfrm(char *dest, size_t destsize, const char *src, size_t srclen, pg_locale_t locale)
Definition: pg_locale.c:2418
static size_t pg_strxfrm_libc(char *dest, const char *src, size_t destsize, pg_locale_t locale)
Definition: pg_locale.c:2154
int icu_validation_level
Definition: pg_locale.c:102
void cache_locale_time(void)
Definition: pg_locale.c:806
bool pg_strxfrm_enabled(pg_locale_t locale)
Definition: pg_locale.c:2350
char * localized_full_months[12+1]
Definition: pg_locale.c:114
size_t wchar2char(char *to, const wchar_t *from, size_t tolen, pg_locale_t locale)
Definition: pg_locale.c:3067
struct lconv * PGLC_localeconv(void)
Definition: pg_locale.c:524
int pg_strncoll(const char *arg1, size_t len1, const char *arg2, size_t len2, pg_locale_t locale)
Definition: pg_locale.c:2134
void make_icu_collator(const char *iculocstr, const char *icurules, struct pg_locale_struct *resultp)
Definition: pg_locale.c:1449
bool lc_collate_is_c(Oid collation)
Definition: pg_locale.c:1317
struct pg_locale_struct default_locale
Definition: pg_locale.c:1446
void icu_validate_locale(const char *loc_str)
Definition: pg_locale.c:2984
static bool CurrentLCTimeValid
Definition: pg_locale.c:121
void assign_locale_time(const char *newval, void *extra)
Definition: pg_locale.c:388
bool check_locale_time(char **newval, void **extra, GucSource source)
Definition: pg_locale.c:382
char * locale_messages
Definition: pg_locale.c:97
char * locale_numeric
Definition: pg_locale.c:99
pg_locale_t pg_newlocale_from_collation(Oid collid)
Definition: pg_locale.c:1552
int builtin_locale_encoding(const char *locale)
Definition: pg_locale.c:2525
bool database_ctype_is_c
Definition: pg_locale.c:117
#define PGLOCALE_SUPPORT_ERROR(provider)
Definition: pg_locale.c:84
char * locale_time
Definition: pg_locale.c:100
static void cache_single_string(char **dst, const char *src, int encoding)
Definition: pg_locale.c:783
bool lc_ctype_is_c(Oid collation)
Definition: pg_locale.c:1384
bool check_locale_numeric(char **newval, void **extra, GucSource source)
Definition: pg_locale.c:370
bool pg_locale_deterministic(pg_locale_t locale)
Definition: pg_locale.c:1532
static void db_encoding_convert(int encoding, char **str)
Definition: pg_locale.c:494
static size_t wcstombs_l(char *dest, const wchar_t *src, size_t n, locale_t loc)
Definition: pg_locale.c:182
void assign_locale_numeric(const char *newval, void *extra)
Definition: pg_locale.c:376
bool check_locale_messages(char **newval, void **extra, GucSource source)
Definition: pg_locale.c:404
#define MAX_L10N_DATA
Definition: pg_locale.c:93
char * get_collation_actual_version(char collprovider, const char *collcollate)
Definition: pg_locale.c:1730
static void free_struct_lconv(struct lconv *s)
Definition: pg_locale.c:444
char * pg_perm_setlocale(int category, const char *locale)
Definition: pg_locale.c:212
void assign_locale_messages(const char *newval, void *extra)
Definition: pg_locale.c:427
static bool CurrentLocaleConvValid
Definition: pg_locale.c:120
char * icu_language_tag(const char *loc_str, int elevel)
Definition: pg_locale.c:2926
int pg_strcoll(const char *arg1, const char *arg2, pg_locale_t locale)
Definition: pg_locale.c:2099
static int pg_strcoll_libc(const char *arg1, const char *arg2, pg_locale_t locale)
Definition: pg_locale.c:1917
static HTAB * collation_cache
Definition: pg_locale.c:134
bool pg_strxfrm_prefix_enabled(pg_locale_t locale)
Definition: pg_locale.c:2441
static void report_newlocale_failure(const char *localename)
Definition: pg_locale.c:1503
char * localized_abbrev_months[12+1]
Definition: pg_locale.c:113
static int pg_strncoll_libc(const char *arg1, size_t len1, const char *arg2, size_t len2, pg_locale_t locale)
Definition: pg_locale.c:1946
size_t pg_strnxfrm_prefix(char *dest, size_t destsize, const char *src, size_t srclen, pg_locale_t locale)
Definition: pg_locale.c:2503
static bool struct_lconv_is_valid(struct lconv *s)
Definition: pg_locale.c:463
char * localized_full_days[7+1]
Definition: pg_locale.c:112
size_t pg_strxfrm(char *dest, const char *src, size_t destsize, pg_locale_t locale)
Definition: pg_locale.c:2382
const char * builtin_validate_locale(int encoding, const char *locale)
Definition: pg_locale.c:2546
static collation_cache_entry * lookup_collation_cache(Oid collation, bool set_flags)
Definition: pg_locale.c:1230
void assign_locale_monetary(const char *newval, void *extra)
Definition: pg_locale.c:364
#define TEXTBUFLEN
Definition: pg_locale.c:91
bool check_locale(int category, const char *locale, char **canonname)
Definition: pg_locale.c:315
char * localized_abbrev_days[7+1]
Definition: pg_locale.c:111
size_t pg_strxfrm_prefix(char *dest, const char *src, size_t destsize, pg_locale_t locale)
Definition: pg_locale.c:2468
char * locale_monetary
Definition: pg_locale.c:98
size_t char2wchar(wchar_t *to, size_t tolen, const char *from, size_t fromlen, pg_locale_t locale)
Definition: pg_locale.c:3123
bool check_locale_monetary(char **newval, void **extra, GucSource source)
Definition: pg_locale.c:358
static size_t mbstowcs_l(wchar_t *dest, const char *src, size_t n, locale_t loc)
Definition: pg_locale.c:166
static size_t pg_strnxfrm_libc(char *dest, const char *src, size_t srclen, size_t destsize, pg_locale_t locale)
Definition: pg_locale.c:2172
#define LOCALE_NAME_BUFLEN
Definition: pg_locale.h:36
static rewind_source * source
Definition: pg_rewind.c:89
static char * buf
Definition: pg_test_fsync.c:73
@ PG_SQL_ASCII
Definition: pg_wchar.h:226
@ PG_UTF8
Definition: pg_wchar.h:232
#define pg_encoding_to_char
Definition: pg_wchar.h:630
int pg_strcasecmp(const char *s1, const char *s2)
Definition: pgstrcasecmp.c:36
char * strsep(char **stringp, const char *delim)
Definition: strsep.c:49
int pg_get_encoding_from_locale(const char *ctype, bool write_message)
Definition: chklocale.c:428
size_t strlcpy(char *dst, const char *src, size_t siz)
Definition: strlcpy.c:45
int pg_strncasecmp(const char *s1, const char *s2, size_t n)
Definition: pgstrcasecmp.c:69
uintptr_t Datum
Definition: postgres.h:64
static Datum ObjectIdGetDatum(Oid X)
Definition: postgres.h:252
unsigned int Oid
Definition: postgres_ext.h:31
e
Definition: preproc-init.c:82
char * psprintf(const char *fmt,...)
Definition: psprintf.c:46
tree ctl
Definition: radixtree.h:1853
char * quote_qualified_identifier(const char *qualifier, const char *ident)
Definition: ruleutils.c:12680
Definition: dynahash.c:220
Definition: pg_locale.c:126
bool collate_is_c
Definition: pg_locale.c:128
Oid collid
Definition: pg_locale.c:127
pg_locale_t locale
Definition: pg_locale.c:131
bool flags_valid
Definition: pg_locale.c:130
bool ctype_is_c
Definition: pg_locale.c:129
locale_t lt
Definition: pg_locale.h:83
struct pg_locale_struct::@151::@152 builtin
const char * locale
Definition: pg_locale.h:81
union pg_locale_struct::@151 info
bool deterministic
Definition: pg_locale.h:76
Definition: regguts.h:323
void ReleaseSysCache(HeapTuple tuple)
Definition: syscache.c:266
HeapTuple SearchSysCache1(int cacheId, Datum key1)
Definition: syscache.c:218
Datum SysCacheGetAttr(int cacheId, HeapTuple tup, AttrNumber attributeNumber, bool *isNull)
Definition: syscache.c:479
Datum SysCacheGetAttrNotNull(int cacheId, HeapTuple tup, AttrNumber attributeNumber)
Definition: syscache.c:510
const char * name
#define locale_t
Definition: win32_port.h:429
#define strcoll_l
Definition: win32_port.h:452
#define strxfrm_l
Definition: win32_port.h:453
#define wcscoll_l
Definition: win32_port.h:454
#define setenv(x, y, z)
Definition: win32_port.h:542
#define setlocale(a, b)
Definition: win32_port.h:472