15#include <unicode/ucnv.h>
16#include <unicode/ustring.h>
23#if U_ICU_VERSION_MAJOR_NUM >= 53
24#define HAVE_UCOL_STRCOLLUTF8 1
26#undef HAVE_UCOL_STRCOLLUTF8
48#define TEXTBUFLEN 1024
51extern size_t strlower_icu(
char *dst,
size_t dstsize,
const char *src,
53extern size_t strtitle_icu(
char *dst,
size_t dstsize,
const char *src,
55extern size_t strupper_icu(
char *dst,
size_t dstsize,
const char *src,
57extern size_t strfold_icu(
char *dst,
size_t dstsize,
const char *src,
62extern UCollator *pg_ucol_open(
const char *loc_str);
64static int strncoll_icu(
const char *arg1, ssize_t len1,
65 const char *arg2, ssize_t len2,
67static size_t strnxfrm_icu(
char *
dest,
size_t destsize,
68 const char *src, ssize_t srclen,
70static size_t strnxfrm_prefix_icu(
char *
dest,
size_t destsize,
71 const char *src, ssize_t srclen,
73extern char *get_collation_actual_version_icu(
const char *collcollate);
75typedef int32_t (*ICU_Convert_Func) (UChar *
dest, int32_t destCapacity,
76 const UChar *src, int32_t srcLength,
78 UErrorCode *pErrorCode);
85static UConverter *icu_converter = NULL;
87static UCollator *make_icu_collator(
const char *iculocstr,
88 const char *icurules);
89static int strncoll_icu(
const char *arg1, ssize_t len1,
90 const char *arg2, ssize_t len2,
92static size_t strnxfrm_prefix_icu(
char *
dest,
size_t destsize,
93 const char *src, ssize_t srclen,
95#ifdef HAVE_UCOL_STRCOLLUTF8
96static int strncoll_icu_utf8(
const char *arg1, ssize_t len1,
97 const char *arg2, ssize_t len2,
100static size_t strnxfrm_prefix_icu_utf8(
char *
dest,
size_t destsize,
101 const char *src, ssize_t srclen,
103static void init_icu_converter(
void);
104static size_t uchar_length(UConverter *converter,
105 const char *
str, int32_t
len);
106static int32_t uchar_convert(UConverter *converter,
107 UChar *
dest, int32_t destlen,
108 const char *src, int32_t srclen);
109static int32_t icu_to_uchar(UChar **buff_uchar,
const char *buff,
111static size_t icu_from_uchar(
char *
dest,
size_t destsize,
112 const UChar *buff_uchar, int32_t len_uchar);
113static void icu_set_collation_attributes(UCollator *collator,
const char *loc,
115static int32_t icu_convert_case(ICU_Convert_Func func,
pg_locale_t mylocale,
116 UChar **buff_dest, UChar *buff_source,
118static int32_t u_strToTitle_default_BI(UChar *
dest, int32_t destCapacity,
119 const UChar *src, int32_t srcLength,
121 UErrorCode *pErrorCode);
122static int32_t u_strFoldCase_default(UChar *
dest, int32_t destCapacity,
123 const UChar *src, int32_t srcLength,
125 UErrorCode *pErrorCode);
129 .strnxfrm = strnxfrm_icu,
130 .strnxfrm_prefix = strnxfrm_prefix_icu,
131 .strxfrm_is_safe =
true,
135#ifdef HAVE_UCOL_STRCOLLUTF8
138 .strncoll = strncoll_icu,
140 .strnxfrm = strnxfrm_icu,
141 .strnxfrm_prefix = strnxfrm_prefix_icu_utf8,
142 .strxfrm_is_safe =
true,
152 const char *iculocstr;
153 const char *icurules = NULL;
157 if (
collid == DEFAULT_COLLATION_OID)
168 deterministic =
true;
170 Anum_pg_database_datlocale);
173 Anum_pg_database_daticurules, &isnull);
190 deterministic = collform->collisdeterministic;
192 Anum_pg_collation_colllocale);
195 Anum_pg_collation_collicurules, &isnull);
202 collator = make_icu_collator(iculocstr, icurules);
206 result->
info.icu.ucol = collator;
207 result->
provider = COLLPROVIDER_ICU;
212 result->
collate = &collate_methods_icu_utf8;
214 result->
collate = &collate_methods_icu;
220 (
errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
221 errmsg(
"ICU is not supported in this build")));
236pg_ucol_open(
const char *loc_str)
240 const char *orig_str = loc_str;
241 char *fixed_str = NULL;
253 elog(
ERROR,
"opening default collator is not supported");
260 if (U_ICU_VERSION_MAJOR_NUM < 55)
262 char lang[ULOC_LANG_CAPACITY];
264 status = U_ZERO_ERROR;
265 uloc_getLanguage(loc_str, lang, ULOC_LANG_CAPACITY, &status);
266 if (U_FAILURE(status) || status == U_STRING_NOT_TERMINATED_WARNING)
269 (
errcode(ERRCODE_INVALID_PARAMETER_VALUE),
270 errmsg(
"could not get language from locale \"%s\": %s",
271 loc_str, u_errorName(status))));
274 if (strcmp(lang,
"und") == 0)
276 const char *
remainder = loc_str + strlen(
"und");
279 strcpy(fixed_str,
"root");
286 status = U_ZERO_ERROR;
287 collator = ucol_open(loc_str, &status);
288 if (U_FAILURE(status))
291 (
errcode(ERRCODE_INVALID_PARAMETER_VALUE),
292 errmsg(
"could not open collator for locale \"%s\": %s",
293 orig_str, u_errorName(status))));
295 if (U_ICU_VERSION_MAJOR_NUM < 54)
297 status = U_ZERO_ERROR;
298 icu_set_collation_attributes(collator, loc_str, &status);
304 if (U_FAILURE(status) || status == U_STRING_NOT_TERMINATED_WARNING)
306 ucol_close(collator);
308 (
errcode(ERRCODE_INVALID_PARAMETER_VALUE),
309 errmsg(
"could not open collator for locale \"%s\": %s",
310 orig_str, u_errorName(status))));
314 if (fixed_str != NULL)
326make_icu_collator(
const char *iculocstr,
const char *icurules)
331 return pg_ucol_open(iculocstr);
335 UCollator *collator_std_rules;
336 UCollator *collator_all_rules;
337 const UChar *std_rules;
349 icu_to_uchar(&my_rules, icurules, strlen(icurules));
351 collator_std_rules = pg_ucol_open(iculocstr);
353 std_rules = ucol_getRules(collator_std_rules, &length);
355 total = u_strlen(std_rules) + u_strlen(my_rules) + 1;
361 ucol_close(collator_std_rules);
363 (
errcode(ERRCODE_OUT_OF_MEMORY),
364 errmsg(
"out of memory")));
367 u_strcpy(all_rules, std_rules);
368 u_strcat(all_rules, my_rules);
370 ucol_close(collator_std_rules);
372 status = U_ZERO_ERROR;
373 collator_all_rules = ucol_openRules(all_rules, u_strlen(all_rules),
374 UCOL_DEFAULT, UCOL_DEFAULT_STRENGTH,
376 if (U_FAILURE(status))
379 (
errcode(ERRCODE_INVALID_PARAMETER_VALUE),
380 errmsg(
"could not open collator for locale \"%s\" with rules \"%s\": %s",
381 iculocstr, icurules, u_errorName(status))));
384 return collator_all_rules;
398 len_uchar = icu_to_uchar(&buff_uchar, src, srclen);
399 len_conv = icu_convert_case(u_strToLower,
locale,
400 &buff_conv, buff_uchar, len_uchar);
401 result_len = icu_from_uchar(
dest, destsize, buff_conv, len_conv);
418 len_uchar = icu_to_uchar(&buff_uchar, src, srclen);
419 len_conv = icu_convert_case(u_strToTitle_default_BI,
locale,
420 &buff_conv, buff_uchar, len_uchar);
421 result_len = icu_from_uchar(
dest, destsize, buff_conv, len_conv);
438 len_uchar = icu_to_uchar(&buff_uchar, src, srclen);
439 len_conv = icu_convert_case(u_strToUpper,
locale,
440 &buff_conv, buff_uchar, len_uchar);
441 result_len = icu_from_uchar(
dest, destsize, buff_conv, len_conv);
449strfold_icu(
char *
dest,
size_t destsize,
const char *src, ssize_t srclen,
458 len_uchar = icu_to_uchar(&buff_uchar, src, srclen);
459 len_conv = icu_convert_case(u_strFoldCase_default,
locale,
460 &buff_conv, buff_uchar, len_uchar);
461 result_len = icu_from_uchar(
dest, destsize, buff_conv, len_conv);
475#ifdef HAVE_UCOL_STRCOLLUTF8
477strncoll_icu_utf8(
const char *arg1, ssize_t len1,
const char *arg2, ssize_t len2,
487 status = U_ZERO_ERROR;
488 result = ucol_strcollUTF8(
locale->info.icu.ucol,
492 if (U_FAILURE(status))
494 (
errmsg(
"collation failed: %s", u_errorName(status))));
502strnxfrm_icu(
char *
dest,
size_t destsize,
const char *src, ssize_t srclen,
514 init_icu_converter();
516 ulen = uchar_length(icu_converter, src, srclen);
518 uchar_bsize = (ulen + 1) *
sizeof(UChar);
523 uchar = (UChar *)
buf;
525 ulen = uchar_convert(icu_converter, uchar, ulen + 1, src, srclen);
527 result_bsize = ucol_getSortKey(
locale->info.icu.ucol,
529 (uint8_t *)
dest, destsize);
542 Assert(result_bsize >= destsize ||
dest[result_bsize] ==
'\0');
549strnxfrm_prefix_icu_utf8(
char *
dest,
size_t destsize,
550 const char *src, ssize_t srclen,
562 uiter_setUTF8(&iter, src, srclen);
564 status = U_ZERO_ERROR;
565 result = ucol_nextSortKeyPart(
locale->info.icu.ucol,
571 if (U_FAILURE(status))
573 (
errmsg(
"sort key generation failed: %s",
574 u_errorName(status))));
580get_collation_actual_version_icu(
const char *collcollate)
583 UVersionInfo versioninfo;
584 char buf[U_MAX_VERSION_STRING_LENGTH];
586 collator = pg_ucol_open(collcollate);
588 ucol_getVersion(collator, versioninfo);
589 ucol_close(collator);
591 u_versionToString(versioninfo,
buf);
608icu_to_uchar(UChar **buff_uchar,
const char *buff,
size_t nbytes)
612 init_icu_converter();
614 len_uchar = uchar_length(icu_converter, buff, nbytes);
616 *buff_uchar =
palloc((len_uchar + 1) *
sizeof(**buff_uchar));
617 len_uchar = uchar_convert(icu_converter,
618 *buff_uchar, len_uchar + 1, buff, nbytes);
635icu_from_uchar(
char *
dest,
size_t destsize,
const UChar *buff_uchar, int32_t len_uchar)
640 init_icu_converter();
642 status = U_ZERO_ERROR;
643 len_result = ucnv_fromUChars(icu_converter, NULL, 0,
644 buff_uchar, len_uchar, &status);
645 if (U_FAILURE(status) && status != U_BUFFER_OVERFLOW_ERROR)
647 (
errmsg(
"%s failed: %s",
"ucnv_fromUChars",
648 u_errorName(status))));
650 if (len_result + 1 > destsize)
653 status = U_ZERO_ERROR;
654 len_result = ucnv_fromUChars(icu_converter,
dest, len_result + 1,
655 buff_uchar, len_uchar, &status);
656 if (U_FAILURE(status) ||
657 status == U_STRING_NOT_TERMINATED_WARNING)
659 (
errmsg(
"%s failed: %s",
"ucnv_fromUChars",
660 u_errorName(status))));
666icu_convert_case(ICU_Convert_Func func,
pg_locale_t mylocale,
667 UChar **buff_dest, UChar *buff_source, int32_t len_source)
672 len_dest = len_source;
673 *buff_dest =
palloc(len_dest *
sizeof(**buff_dest));
674 status = U_ZERO_ERROR;
675 len_dest = func(*buff_dest, len_dest, buff_source, len_source,
677 if (status == U_BUFFER_OVERFLOW_ERROR)
681 *buff_dest =
palloc(len_dest *
sizeof(**buff_dest));
682 status = U_ZERO_ERROR;
683 len_dest = func(*buff_dest, len_dest, buff_source, len_source,
686 if (U_FAILURE(status))
688 (
errmsg(
"case conversion failed: %s", u_errorName(status))));
693u_strToTitle_default_BI(UChar *
dest, int32_t destCapacity,
694 const UChar *src, int32_t srcLength,
696 UErrorCode *pErrorCode)
698 return u_strToTitle(
dest, destCapacity, src, srcLength,
699 NULL,
locale, pErrorCode);
703u_strFoldCase_default(UChar *
dest, int32_t destCapacity,
704 const UChar *src, int32_t srcLength,
706 UErrorCode *pErrorCode)
718 status = U_ZERO_ERROR;
719 uloc_getLanguage(
locale, lang, 3, &status);
720 if (U_SUCCESS(status))
726 if (strcmp(lang,
"tr") == 0 || strcmp(lang,
"az") == 0)
727 options = U_FOLD_CASE_EXCLUDE_SPECIAL_I;
730 return u_strFoldCase(
dest, destCapacity, src, srcLength,
745strncoll_icu(
const char *arg1, ssize_t len1,
761#ifdef HAVE_UCOL_STRCOLLUTF8
765 init_icu_converter();
767 ulen1 = uchar_length(icu_converter, arg1, len1);
768 ulen2 = uchar_length(icu_converter, arg2, len2);
770 bufsize1 = (ulen1 + 1) *
sizeof(UChar);
771 bufsize2 = (ulen2 + 1) *
sizeof(UChar);
776 uchar1 = (UChar *)
buf;
777 uchar2 = (UChar *) (
buf + bufsize1);
779 ulen1 = uchar_convert(icu_converter, uchar1, ulen1 + 1, arg1, len1);
780 ulen2 = uchar_convert(icu_converter, uchar2, ulen2 + 1, arg2, len2);
782 result = ucol_strcoll(
locale->info.icu.ucol,
794strnxfrm_prefix_icu(
char *
dest,
size_t destsize,
795 const char *src, ssize_t srclen,
813 init_icu_converter();
815 ulen = uchar_length(icu_converter, src, srclen);
817 uchar_bsize = (ulen + 1) *
sizeof(UChar);
822 uchar = (UChar *)
buf;
824 ulen = uchar_convert(icu_converter, uchar, ulen + 1, src, srclen);
826 uiter_setString(&iter, uchar, ulen);
828 status = U_ZERO_ERROR;
829 result_bsize = ucol_nextSortKeyPart(
locale->info.icu.ucol,
835 if (U_FAILURE(status))
837 (
errmsg(
"sort key generation failed: %s",
838 u_errorName(status))));
844init_icu_converter(
void)
846 const char *icu_encoding_name;
854 if (!icu_encoding_name)
856 (
errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
857 errmsg(
"encoding \"%s\" not supported by ICU",
860 status = U_ZERO_ERROR;
861 conv = ucnv_open(icu_encoding_name, &status);
862 if (U_FAILURE(status))
864 (
errmsg(
"could not open ICU converter for encoding \"%s\": %s",
865 icu_encoding_name, u_errorName(status))));
867 icu_converter = conv;
876uchar_length(UConverter *converter,
const char *
str, int32_t
len)
878 UErrorCode status = U_ZERO_ERROR;
881 ulen = ucnv_toUChars(converter, NULL, 0,
str,
len, &status);
882 if (U_FAILURE(status) && status != U_BUFFER_OVERFLOW_ERROR)
884 (
errmsg(
"%s failed: %s",
"ucnv_toUChars", u_errorName(status))));
895uchar_convert(UConverter *converter, UChar *
dest, int32_t destlen,
896 const char *src, int32_t srclen)
898 UErrorCode status = U_ZERO_ERROR;
901 status = U_ZERO_ERROR;
902 ulen = ucnv_toUChars(converter,
dest, destlen, src, srclen, &status);
903 if (U_FAILURE(status))
905 (
errmsg(
"%s failed: %s",
"ucnv_toUChars", u_errorName(status))));
923icu_set_collation_attributes(UCollator *collator, const
char *loc,
939 *status = U_ZERO_ERROR;
940 len = uloc_canonicalize(loc, NULL, 0, status);
942 *status = U_ZERO_ERROR;
943 len = uloc_canonicalize(loc, icu_locale_id,
len + 1, status);
944 if (U_FAILURE(*status) || *status == U_STRING_NOT_TERMINATED_WARNING)
947 lower_str =
asc_tolower(icu_locale_id, strlen(icu_locale_id));
949 pfree(icu_locale_id);
951 str = strchr(lower_str,
'@');
958 char *
e = strchr(
token,
'=');
965 UColAttributeValue uvalue;
967 *status = U_ZERO_ERROR;
976 if (strcmp(
name,
"colstrength") == 0)
977 uattr = UCOL_STRENGTH;
978 else if (strcmp(
name,
"colbackwards") == 0)
979 uattr = UCOL_FRENCH_COLLATION;
980 else if (strcmp(
name,
"colcaselevel") == 0)
981 uattr = UCOL_CASE_LEVEL;
982 else if (strcmp(
name,
"colcasefirst") == 0)
983 uattr = UCOL_CASE_FIRST;
984 else if (strcmp(
name,
"colalternate") == 0)
985 uattr = UCOL_ALTERNATE_HANDLING;
986 else if (strcmp(
name,
"colnormalization") == 0)
987 uattr = UCOL_NORMALIZATION_MODE;
988 else if (strcmp(
name,
"colnumeric") == 0)
989 uattr = UCOL_NUMERIC_COLLATION;
994 if (strcmp(
value,
"primary") == 0)
995 uvalue = UCOL_PRIMARY;
996 else if (strcmp(
value,
"secondary") == 0)
997 uvalue = UCOL_SECONDARY;
998 else if (strcmp(
value,
"tertiary") == 0)
999 uvalue = UCOL_TERTIARY;
1000 else if (strcmp(
value,
"quaternary") == 0)
1001 uvalue = UCOL_QUATERNARY;
1002 else if (strcmp(
value,
"identical") == 0)
1003 uvalue = UCOL_IDENTICAL;
1004 else if (strcmp(
value,
"no") == 0)
1006 else if (strcmp(
value,
"yes") == 0)
1008 else if (strcmp(
value,
"shifted") == 0)
1009 uvalue = UCOL_SHIFTED;
1010 else if (strcmp(
value,
"non-ignorable") == 0)
1011 uvalue = UCOL_NON_IGNORABLE;
1012 else if (strcmp(
value,
"lower") == 0)
1013 uvalue = UCOL_LOWER_FIRST;
1014 else if (strcmp(
value,
"upper") == 0)
1015 uvalue = UCOL_UPPER_FIRST;
1018 *status = U_ILLEGAL_ARGUMENT_ERROR;
1022 ucol_setAttribute(collator, uattr, uvalue, status);
#define TextDatumGetCString(d)
#define pg_attribute_unused()
#define Assert(condition)
int errcode(int sqlerrcode)
int errmsg(const char *fmt,...)
#define ereport(elevel,...)
const char * get_encoding_name_for_icu(int encoding)
#define MCXT_ALLOC_NO_OOM
#define HeapTupleIsValid(tuple)
static void * GETSTRUCT(const HeapTupleData *tuple)
int GetDatabaseEncoding(void)
char * MemoryContextStrdup(MemoryContext context, const char *string)
void * MemoryContextAllocZero(MemoryContext context, Size size)
char * pstrdup(const char *in)
void pfree(void *pointer)
void * palloc_extended(Size size, int flags)
FormData_pg_collation * Form_pg_collation
size_t strfold_icu(char *dst, size_t dstsize, const char *src, ssize_t srclen, pg_locale_t locale)
size_t strupper_icu(char *dst, size_t dstsize, const char *src, ssize_t srclen, pg_locale_t locale)
size_t strlower_icu(char *dst, size_t dstsize, const char *src, ssize_t srclen, pg_locale_t locale)
size_t strtitle_icu(char *dst, size_t dstsize, const char *src, ssize_t srclen, pg_locale_t locale)
pg_locale_t create_pg_locale_icu(Oid collid, MemoryContext context)
#define pg_encoding_to_char
char * strsep(char **stringp, const char *delim)
static Datum ObjectIdGetDatum(Oid X)
int(* strncoll)(const char *arg1, ssize_t len1, const char *arg2, ssize_t len2, pg_locale_t locale)
const struct collate_methods * collate
union pg_locale_struct::@158 info
void ReleaseSysCache(HeapTuple tuple)
HeapTuple SearchSysCache1(int cacheId, Datum key1)
Datum SysCacheGetAttr(int cacheId, HeapTuple tup, AttrNumber attributeNumber, bool *isNull)
Datum SysCacheGetAttrNotNull(int cacheId, HeapTuple tup, AttrNumber attributeNumber)