15#include <unicode/ucnv.h>
16#include <unicode/ustring.h>
23#if U_ICU_VERSION_MAJOR_NUM >= 53
24#define HAVE_UCOL_STRCOLLUTF8 1
26#undef HAVE_UCOL_STRCOLLUTF8
48#define TEXTBUFLEN 1024
54extern UCollator *pg_ucol_open(
const char *loc_str);
56static size_t strlower_icu(
char *
dest,
size_t destsize,
const char *src,
58static size_t strtitle_icu(
char *
dest,
size_t destsize,
const char *src,
60static size_t strupper_icu(
char *
dest,
size_t destsize,
const char *src,
62static size_t strfold_icu(
char *
dest,
size_t destsize,
const char *src,
64static int strncoll_icu(
const char *arg1, ssize_t len1,
65 const char *arg2, ssize_t len2,
67static size_t strnxfrm_icu(
char *
dest,
size_t destsize,
68 const char *src, ssize_t srclen,
70extern char *get_collation_actual_version_icu(
const char *collcollate);
72typedef int32_t (*ICU_Convert_Func) (UChar *
dest, int32_t destCapacity,
73 const UChar *src, int32_t srcLength,
75 UErrorCode *pErrorCode);
82static UConverter *icu_converter = NULL;
84static UCollator *make_icu_collator(
const char *iculocstr,
85 const char *icurules);
86static int strncoll_icu(
const char *arg1, ssize_t len1,
87 const char *arg2, ssize_t len2,
89static size_t strnxfrm_prefix_icu(
char *
dest,
size_t destsize,
90 const char *src, ssize_t srclen,
92#ifdef HAVE_UCOL_STRCOLLUTF8
93static int strncoll_icu_utf8(
const char *arg1, ssize_t len1,
94 const char *arg2, ssize_t len2,
97static size_t strnxfrm_prefix_icu_utf8(
char *
dest,
size_t destsize,
98 const char *src, ssize_t srclen,
100static void init_icu_converter(
void);
101static size_t uchar_length(UConverter *converter,
102 const char *
str, int32_t
len);
103static int32_t uchar_convert(UConverter *converter,
104 UChar *
dest, int32_t destlen,
105 const char *src, int32_t srclen);
106static int32_t icu_to_uchar(UChar **buff_uchar,
const char *buff,
108static size_t icu_from_uchar(
char *
dest,
size_t destsize,
109 const UChar *buff_uchar, int32_t len_uchar);
110static void icu_set_collation_attributes(UCollator *collator,
const char *loc,
112static int32_t icu_convert_case(ICU_Convert_Func func,
pg_locale_t mylocale,
113 UChar **buff_dest, UChar *buff_source,
115static int32_t u_strToTitle_default_BI(UChar *
dest, int32_t destCapacity,
116 const UChar *src, int32_t srcLength,
118 UErrorCode *pErrorCode);
119static int32_t u_strFoldCase_default(UChar *
dest, int32_t destCapacity,
120 const UChar *src, int32_t srcLength,
122 UErrorCode *pErrorCode);
128 (ch >=
'A' && ch <=
'Z') || (ch >=
'a' && ch <=
'z');
139 return u_toupper(wc);
145 return u_tolower(wc);
150 .strnxfrm = strnxfrm_icu,
151 .strnxfrm_prefix = strnxfrm_prefix_icu,
152 .strxfrm_is_safe =
true,
156#ifdef HAVE_UCOL_STRCOLLUTF8
159 .strncoll = strncoll_icu,
161 .strnxfrm = strnxfrm_icu,
162 .strnxfrm_prefix = strnxfrm_prefix_icu_utf8,
163 .strxfrm_is_safe =
true,
169 return u_isdigit(wc);
175 return u_isalpha(wc);
181 return u_isalnum(wc);
187 return u_isupper(wc);
193 return u_islower(wc);
199 return u_isgraph(wc);
205 return u_isprint(wc);
211 return u_ispunct(wc);
217 return u_isspace(wc);
223 return u_isxdigit(wc);
229 return u_hasBinaryProperty(wc, UCHAR_CASED);
234 .strtitle = strtitle_icu,
235 .strupper = strupper_icu,
236 .strfold = strfold_icu,
237 .wc_isdigit = wc_isdigit_icu,
238 .wc_isalpha = wc_isalpha_icu,
239 .wc_isalnum = wc_isalnum_icu,
240 .wc_isupper = wc_isupper_icu,
241 .wc_islower = wc_islower_icu,
242 .wc_isgraph = wc_isgraph_icu,
243 .wc_isprint = wc_isprint_icu,
244 .wc_ispunct = wc_ispunct_icu,
245 .wc_isspace = wc_isspace_icu,
246 .wc_isxdigit = wc_isxdigit_icu,
247 .char_is_cased = char_is_cased_icu,
248 .wc_iscased = wc_iscased_icu,
249 .wc_toupper = toupper_icu,
250 .wc_tolower = tolower_icu,
259 const char *iculocstr;
260 const char *icurules = NULL;
264 if (
collid == DEFAULT_COLLATION_OID)
275 deterministic =
true;
277 Anum_pg_database_datlocale);
280 Anum_pg_database_daticurules, &isnull);
297 deterministic = collform->collisdeterministic;
299 Anum_pg_collation_colllocale);
302 Anum_pg_collation_collicurules, &isnull);
309 collator = make_icu_collator(iculocstr, icurules);
313 result->icu.ucol = collator;
318 result->
collate = &collate_methods_icu_utf8;
320 result->
collate = &collate_methods_icu;
321 result->
ctype = &ctype_methods_icu;
327 (
errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
328 errmsg(
"ICU is not supported in this build")));
343pg_ucol_open(
const char *loc_str)
347 const char *orig_str = loc_str;
348 char *fixed_str = NULL;
360 elog(
ERROR,
"opening default collator is not supported");
367 if (U_ICU_VERSION_MAJOR_NUM < 55)
369 char lang[ULOC_LANG_CAPACITY];
371 status = U_ZERO_ERROR;
372 uloc_getLanguage(loc_str, lang, ULOC_LANG_CAPACITY, &status);
373 if (U_FAILURE(status) || status == U_STRING_NOT_TERMINATED_WARNING)
376 (
errcode(ERRCODE_INVALID_PARAMETER_VALUE),
377 errmsg(
"could not get language from locale \"%s\": %s",
378 loc_str, u_errorName(status))));
381 if (strcmp(lang,
"und") == 0)
383 const char *
remainder = loc_str + strlen(
"und");
386 strcpy(fixed_str,
"root");
393 status = U_ZERO_ERROR;
394 collator = ucol_open(loc_str, &status);
395 if (U_FAILURE(status))
398 (
errcode(ERRCODE_INVALID_PARAMETER_VALUE),
399 errmsg(
"could not open collator for locale \"%s\": %s",
400 orig_str, u_errorName(status))));
402 if (U_ICU_VERSION_MAJOR_NUM < 54)
404 status = U_ZERO_ERROR;
405 icu_set_collation_attributes(collator, loc_str, &status);
411 if (U_FAILURE(status) || status == U_STRING_NOT_TERMINATED_WARNING)
413 ucol_close(collator);
415 (
errcode(ERRCODE_INVALID_PARAMETER_VALUE),
416 errmsg(
"could not open collator for locale \"%s\": %s",
417 orig_str, u_errorName(status))));
421 if (fixed_str != NULL)
433make_icu_collator(
const char *iculocstr,
const char *icurules)
438 return pg_ucol_open(iculocstr);
442 UCollator *collator_std_rules;
443 UCollator *collator_all_rules;
444 const UChar *std_rules;
456 icu_to_uchar(&my_rules, icurules, strlen(icurules));
458 collator_std_rules = pg_ucol_open(iculocstr);
460 std_rules = ucol_getRules(collator_std_rules, &length);
462 total = u_strlen(std_rules) + u_strlen(my_rules) + 1;
468 ucol_close(collator_std_rules);
470 (
errcode(ERRCODE_OUT_OF_MEMORY),
471 errmsg(
"out of memory")));
474 u_strcpy(all_rules, std_rules);
475 u_strcat(all_rules, my_rules);
477 ucol_close(collator_std_rules);
479 status = U_ZERO_ERROR;
480 collator_all_rules = ucol_openRules(all_rules, u_strlen(all_rules),
481 UCOL_DEFAULT, UCOL_DEFAULT_STRENGTH,
483 if (U_FAILURE(status))
486 (
errcode(ERRCODE_INVALID_PARAMETER_VALUE),
487 errmsg(
"could not open collator for locale \"%s\" with rules \"%s\": %s",
488 iculocstr, icurules, u_errorName(status))));
491 return collator_all_rules;
496strlower_icu(
char *
dest,
size_t destsize,
const char *src, ssize_t srclen,
505 len_uchar = icu_to_uchar(&buff_uchar, src, srclen);
506 len_conv = icu_convert_case(u_strToLower,
locale,
507 &buff_conv, buff_uchar, len_uchar);
508 result_len = icu_from_uchar(
dest, destsize, buff_conv, len_conv);
516strtitle_icu(
char *
dest,
size_t destsize,
const char *src, ssize_t srclen,
525 len_uchar = icu_to_uchar(&buff_uchar, src, srclen);
526 len_conv = icu_convert_case(u_strToTitle_default_BI,
locale,
527 &buff_conv, buff_uchar, len_uchar);
528 result_len = icu_from_uchar(
dest, destsize, buff_conv, len_conv);
536strupper_icu(
char *
dest,
size_t destsize,
const char *src, ssize_t srclen,
545 len_uchar = icu_to_uchar(&buff_uchar, src, srclen);
546 len_conv = icu_convert_case(u_strToUpper,
locale,
547 &buff_conv, buff_uchar, len_uchar);
548 result_len = icu_from_uchar(
dest, destsize, buff_conv, len_conv);
556strfold_icu(
char *
dest,
size_t destsize,
const char *src, ssize_t srclen,
565 len_uchar = icu_to_uchar(&buff_uchar, src, srclen);
566 len_conv = icu_convert_case(u_strFoldCase_default,
locale,
567 &buff_conv, buff_uchar, len_uchar);
568 result_len = icu_from_uchar(
dest, destsize, buff_conv, len_conv);
582#ifdef HAVE_UCOL_STRCOLLUTF8
584strncoll_icu_utf8(
const char *arg1, ssize_t len1,
const char *arg2, ssize_t len2,
592 status = U_ZERO_ERROR;
593 result = ucol_strcollUTF8(
locale->icu.ucol,
597 if (U_FAILURE(status))
599 (
errmsg(
"collation failed: %s", u_errorName(status))));
607strnxfrm_icu(
char *
dest,
size_t destsize,
const char *src, ssize_t srclen,
617 init_icu_converter();
619 ulen = uchar_length(icu_converter, src, srclen);
621 uchar_bsize = (ulen + 1) *
sizeof(UChar);
626 uchar = (UChar *)
buf;
628 ulen = uchar_convert(icu_converter, uchar, ulen + 1, src, srclen);
630 result_bsize = ucol_getSortKey(
locale->icu.ucol,
632 (uint8_t *)
dest, destsize);
645 Assert(result_bsize >= destsize ||
dest[result_bsize] ==
'\0');
652strnxfrm_prefix_icu_utf8(
char *
dest,
size_t destsize,
653 const char *src, ssize_t srclen,
663 uiter_setUTF8(&iter, src, srclen);
665 status = U_ZERO_ERROR;
666 result = ucol_nextSortKeyPart(
locale->icu.ucol,
672 if (U_FAILURE(status))
674 (
errmsg(
"sort key generation failed: %s",
675 u_errorName(status))));
681get_collation_actual_version_icu(
const char *collcollate)
684 UVersionInfo versioninfo;
685 char buf[U_MAX_VERSION_STRING_LENGTH];
687 collator = pg_ucol_open(collcollate);
689 ucol_getVersion(collator, versioninfo);
690 ucol_close(collator);
692 u_versionToString(versioninfo,
buf);
709icu_to_uchar(UChar **buff_uchar,
const char *buff,
size_t nbytes)
713 init_icu_converter();
715 len_uchar = uchar_length(icu_converter, buff, nbytes);
717 *buff_uchar =
palloc((len_uchar + 1) *
sizeof(**buff_uchar));
718 len_uchar = uchar_convert(icu_converter,
719 *buff_uchar, len_uchar + 1, buff, nbytes);
736icu_from_uchar(
char *
dest,
size_t destsize,
const UChar *buff_uchar, int32_t len_uchar)
741 init_icu_converter();
743 status = U_ZERO_ERROR;
744 len_result = ucnv_fromUChars(icu_converter, NULL, 0,
745 buff_uchar, len_uchar, &status);
746 if (U_FAILURE(status) && status != U_BUFFER_OVERFLOW_ERROR)
748 (
errmsg(
"%s failed: %s",
"ucnv_fromUChars",
749 u_errorName(status))));
751 if (len_result + 1 > destsize)
754 status = U_ZERO_ERROR;
755 len_result = ucnv_fromUChars(icu_converter,
dest, len_result + 1,
756 buff_uchar, len_uchar, &status);
757 if (U_FAILURE(status) ||
758 status == U_STRING_NOT_TERMINATED_WARNING)
760 (
errmsg(
"%s failed: %s",
"ucnv_fromUChars",
761 u_errorName(status))));
767icu_convert_case(ICU_Convert_Func func,
pg_locale_t mylocale,
768 UChar **buff_dest, UChar *buff_source, int32_t len_source)
773 len_dest = len_source;
774 *buff_dest =
palloc(len_dest *
sizeof(**buff_dest));
775 status = U_ZERO_ERROR;
776 len_dest = func(*buff_dest, len_dest, buff_source, len_source,
777 mylocale->icu.
locale, &status);
778 if (status == U_BUFFER_OVERFLOW_ERROR)
782 *buff_dest =
palloc(len_dest *
sizeof(**buff_dest));
783 status = U_ZERO_ERROR;
784 len_dest = func(*buff_dest, len_dest, buff_source, len_source,
785 mylocale->icu.
locale, &status);
787 if (U_FAILURE(status))
789 (
errmsg(
"case conversion failed: %s", u_errorName(status))));
794u_strToTitle_default_BI(UChar *
dest, int32_t destCapacity,
795 const UChar *src, int32_t srcLength,
797 UErrorCode *pErrorCode)
799 return u_strToTitle(
dest, destCapacity, src, srcLength,
800 NULL,
locale, pErrorCode);
804u_strFoldCase_default(UChar *
dest, int32_t destCapacity,
805 const UChar *src, int32_t srcLength,
807 UErrorCode *pErrorCode)
819 status = U_ZERO_ERROR;
820 uloc_getLanguage(
locale, lang, 3, &status);
821 if (U_SUCCESS(status))
827 if (strcmp(lang,
"tr") == 0 || strcmp(lang,
"az") == 0)
828 options = U_FOLD_CASE_EXCLUDE_SPECIAL_I;
831 return u_strFoldCase(
dest, destCapacity, src, srcLength,
846strncoll_icu(
const char *arg1, ssize_t len1,
860#ifdef HAVE_UCOL_STRCOLLUTF8
864 init_icu_converter();
866 ulen1 = uchar_length(icu_converter, arg1, len1);
867 ulen2 = uchar_length(icu_converter, arg2, len2);
869 bufsize1 = (ulen1 + 1) *
sizeof(UChar);
870 bufsize2 = (ulen2 + 1) *
sizeof(UChar);
875 uchar1 = (UChar *)
buf;
876 uchar2 = (UChar *) (
buf + bufsize1);
878 ulen1 = uchar_convert(icu_converter, uchar1, ulen1 + 1, arg1, len1);
879 ulen2 = uchar_convert(icu_converter, uchar2, ulen2 + 1, arg2, len2);
881 result = ucol_strcoll(
locale->icu.ucol,
893strnxfrm_prefix_icu(
char *
dest,
size_t destsize,
894 const char *src, ssize_t srclen,
910 init_icu_converter();
912 ulen = uchar_length(icu_converter, src, srclen);
914 uchar_bsize = (ulen + 1) *
sizeof(UChar);
919 uchar = (UChar *)
buf;
921 ulen = uchar_convert(icu_converter, uchar, ulen + 1, src, srclen);
923 uiter_setString(&iter, uchar, ulen);
925 status = U_ZERO_ERROR;
926 result_bsize = ucol_nextSortKeyPart(
locale->icu.ucol,
932 if (U_FAILURE(status))
934 (
errmsg(
"sort key generation failed: %s",
935 u_errorName(status))));
941init_icu_converter(
void)
943 const char *icu_encoding_name;
951 if (!icu_encoding_name)
953 (
errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
954 errmsg(
"encoding \"%s\" not supported by ICU",
957 status = U_ZERO_ERROR;
958 conv = ucnv_open(icu_encoding_name, &status);
959 if (U_FAILURE(status))
961 (
errmsg(
"could not open ICU converter for encoding \"%s\": %s",
962 icu_encoding_name, u_errorName(status))));
964 icu_converter = conv;
973uchar_length(UConverter *converter,
const char *
str, int32_t
len)
975 UErrorCode status = U_ZERO_ERROR;
978 ulen = ucnv_toUChars(converter, NULL, 0,
str,
len, &status);
979 if (U_FAILURE(status) && status != U_BUFFER_OVERFLOW_ERROR)
981 (
errmsg(
"%s failed: %s",
"ucnv_toUChars", u_errorName(status))));
992uchar_convert(UConverter *converter, UChar *
dest, int32_t destlen,
993 const char *src, int32_t srclen)
995 UErrorCode status = U_ZERO_ERROR;
998 status = U_ZERO_ERROR;
999 ulen = ucnv_toUChars(converter,
dest, destlen, src, srclen, &status);
1000 if (U_FAILURE(status))
1002 (
errmsg(
"%s failed: %s",
"ucnv_toUChars", u_errorName(status))));
1020icu_set_collation_attributes(UCollator *collator, const
char *loc,
1024 char *icu_locale_id;
1036 *status = U_ZERO_ERROR;
1037 len = uloc_canonicalize(loc, NULL, 0, status);
1039 *status = U_ZERO_ERROR;
1040 len = uloc_canonicalize(loc, icu_locale_id,
len + 1, status);
1041 if (U_FAILURE(*status) || *status == U_STRING_NOT_TERMINATED_WARNING)
1044 lower_str =
asc_tolower(icu_locale_id, strlen(icu_locale_id));
1046 pfree(icu_locale_id);
1048 str = strchr(lower_str,
'@');
1055 char *
e = strchr(
token,
'=');
1061 UColAttribute uattr;
1062 UColAttributeValue uvalue;
1064 *status = U_ZERO_ERROR;
1073 if (strcmp(
name,
"colstrength") == 0)
1074 uattr = UCOL_STRENGTH;
1075 else if (strcmp(
name,
"colbackwards") == 0)
1076 uattr = UCOL_FRENCH_COLLATION;
1077 else if (strcmp(
name,
"colcaselevel") == 0)
1078 uattr = UCOL_CASE_LEVEL;
1079 else if (strcmp(
name,
"colcasefirst") == 0)
1080 uattr = UCOL_CASE_FIRST;
1081 else if (strcmp(
name,
"colalternate") == 0)
1082 uattr = UCOL_ALTERNATE_HANDLING;
1083 else if (strcmp(
name,
"colnormalization") == 0)
1084 uattr = UCOL_NORMALIZATION_MODE;
1085 else if (strcmp(
name,
"colnumeric") == 0)
1086 uattr = UCOL_NUMERIC_COLLATION;
1091 if (strcmp(
value,
"primary") == 0)
1092 uvalue = UCOL_PRIMARY;
1093 else if (strcmp(
value,
"secondary") == 0)
1094 uvalue = UCOL_SECONDARY;
1095 else if (strcmp(
value,
"tertiary") == 0)
1096 uvalue = UCOL_TERTIARY;
1097 else if (strcmp(
value,
"quaternary") == 0)
1098 uvalue = UCOL_QUATERNARY;
1099 else if (strcmp(
value,
"identical") == 0)
1100 uvalue = UCOL_IDENTICAL;
1101 else if (strcmp(
value,
"no") == 0)
1103 else if (strcmp(
value,
"yes") == 0)
1105 else if (strcmp(
value,
"shifted") == 0)
1106 uvalue = UCOL_SHIFTED;
1107 else if (strcmp(
value,
"non-ignorable") == 0)
1108 uvalue = UCOL_NON_IGNORABLE;
1109 else if (strcmp(
value,
"lower") == 0)
1110 uvalue = UCOL_LOWER_FIRST;
1111 else if (strcmp(
value,
"upper") == 0)
1112 uvalue = UCOL_UPPER_FIRST;
1115 *status = U_ILLEGAL_ARGUMENT_ERROR;
1119 ucol_setAttribute(collator, uattr, uvalue, status);
#define TextDatumGetCString(d)
#define pg_attribute_unused()
#define IS_HIGHBIT_SET(ch)
int errcode(int sqlerrcode)
int errmsg(const char *fmt,...)
#define ereport(elevel,...)
const char * get_encoding_name_for_icu(int encoding)
#define MCXT_ALLOC_NO_OOM
Assert(PointerIsAligned(start, uint64))
#define HeapTupleIsValid(tuple)
static void * GETSTRUCT(const HeapTupleData *tuple)
int GetDatabaseEncoding(void)
char * MemoryContextStrdup(MemoryContext context, const char *string)
void * MemoryContextAllocZero(MemoryContext context, Size size)
char * pstrdup(const char *in)
void pfree(void *pointer)
void * palloc_extended(Size size, int flags)
FormData_pg_collation * Form_pg_collation
pg_locale_t create_pg_locale_icu(Oid collid, MemoryContext context)
static char buf[DEFAULT_XLOG_SEG_SIZE]
#define pg_encoding_to_char
char * strsep(char **stringp, const char *delim)
static Datum ObjectIdGetDatum(Oid X)
int(* strncoll)(const char *arg1, ssize_t len1, const char *arg2, ssize_t len2, pg_locale_t locale)
size_t(* strlower)(char *dest, size_t destsize, const char *src, ssize_t srclen, pg_locale_t locale)
const struct ctype_methods * ctype
const struct collate_methods * collate
void ReleaseSysCache(HeapTuple tuple)
HeapTuple SearchSysCache1(int cacheId, Datum key1)
Datum SysCacheGetAttr(int cacheId, HeapTuple tup, AttrNumber attributeNumber, bool *isNull)
Datum SysCacheGetAttrNotNull(int cacheId, HeapTuple tup, AttrNumber attributeNumber)