21#include <unicode/ucasemap.h>
22#include <unicode/uchar.h>
32static UCaseMap * casemap = NULL;
35typedef size_t (*
TestFunc) (
char *dst,
size_t dstsize,
const char *src,
62 size_t prev_offset = wbstate->
offset;
88 pg_wchar icufold = u_foldCase(code, U_FOLD_CASE_DEFAULT);
90 if (
lower != iculower || title != icutitle ||
upper != icuupper ||
93 printf(
"case_test: FAILURE for codepoint 0x%06x\n", code);
94 printf(
"case_test: Postgres lower/title/upper/fold: 0x%06x/0x%06x/0x%06x/0x%06x\n",
96 printf(
"case_test: ICU lower/title/upper/fold: 0x%06x/0x%06x/0x%06x/0x%06x\n",
97 iculower, icutitle, icuupper, icufold);
104icu_test_full(
char *
str)
110 char icu_lower[
BUFSZ];
111 char icu_title[
BUFSZ];
112 char icu_upper[
BUFSZ];
113 char icu_fold[
BUFSZ];
127 status = U_ZERO_ERROR;
128 ucasemap_utf8ToLower(casemap, icu_lower,
BUFSZ,
str, -1, &status);
129 status = U_ZERO_ERROR;
130 ucasemap_utf8ToTitle(casemap, icu_title,
BUFSZ,
str, -1, &status);
131 status = U_ZERO_ERROR;
132 ucasemap_utf8ToUpper(casemap, icu_upper,
BUFSZ,
str, -1, &status);
133 status = U_ZERO_ERROR;
134 ucasemap_utf8FoldCase(casemap, icu_fold,
BUFSZ,
str, -1, &status);
136 if (strcmp(
lower, icu_lower) != 0)
138 printf(
"case_test: str='%s' lower='%s' icu_lower='%s'\n",
str,
lower,
142 if (strcmp(title, icu_title) != 0)
144 printf(
"case_test: str='%s' title='%s' icu_title='%s'\n",
str, title,
148 if (strcmp(
upper, icu_upper) != 0)
150 printf(
"case_test: str='%s' upper='%s' icu_upper='%s'\n",
str,
upper,
154 if (strcmp(fold, icu_fold) != 0)
156 printf(
"case_test: str='%s' fold='%s' icu_fold='%s'\n",
str, fold,
169 int skipped_mismatch = 0;
171 for (
pg_wchar code = 0; code <= 0x10ffff; code++)
177 uint8_t icu_category = u_charType(code);
178 char code_str[5] = {0};
186 icu_test_simple(code);
188 icu_test_full(code_str);
194 if (skipped_mismatch > 0)
195 printf(
"case_test: skipped %d codepoints unassigned in ICU due to Unicode version mismatch\n",
198 printf(
"case_test: ICU simple mapping test: %d codepoints successful\n",
206 size_t src1len = strlen(test_string);
208 size_t dst1len = strlen(expected);
209 size_t dst2len = strlen(expected) + 1;
210 char *src1 =
malloc(src1len);
211 char *dst1 =
malloc(dst1len);
212 char *src2 = strdup(test_string);
213 char *dst2 =
malloc(dst2len);
216 memcpy(src1, test_string, src1len);
219 memset(dst1, 0x7F, dst1len);
220 needed = tfunc(dst1, dst1len, src1, src1len);
221 if (needed != strlen(expected))
223 printf(
"case_test: convert_case test1 FAILURE: '%s' needed %zu expected %zu\n",
224 test_string, needed, strlen(expected));
227 if (memcmp(dst1, expected, dst1len) != 0)
229 printf(
"case_test: convert_case test1 FAILURE: test: '%s' result: '%.*s' expected: '%s'\n",
230 test_string, (
int) dst1len, dst1, expected);
235 memset(dst2, 0x7F, dst2len);
236 needed = tfunc(dst2, dst2len, src1, src1len);
237 if (needed != strlen(expected))
239 printf(
"case_test: convert_case test2 FAILURE: '%s' needed %zu expected %zu\n",
240 test_string, needed, strlen(expected));
243 if (strcmp(dst2, expected) != 0)
245 printf(
"case_test: convert_case test2 FAILURE: test: '%s' result: '%s' expected: '%s'\n",
246 test_string, dst2, expected);
251 memset(dst1, 0x7F, dst1len);
252 needed = tfunc(dst1, dst1len, src2, src2len);
253 if (needed != strlen(expected))
255 printf(
"case_test: convert_case test3 FAILURE: '%s' needed %zu expected %zu\n",
256 test_string, needed, strlen(expected));
257 printf(
"case_test: convert_case test3 FAILURE: needed %zu\n", needed);
260 if (memcmp(dst1, expected, dst1len) != 0)
262 printf(
"case_test: convert_case test3 FAILURE: test: '%s' result: '%.*s' expected: '%s'\n",
263 test_string, (
int) dst1len, dst1, expected);
268 memset(dst2, 0x7F, dst2len);
269 needed = tfunc(dst2, dst2len, src2, src2len);
270 if (needed != strlen(expected))
272 printf(
"case_test: convert_case test4 FAILURE: '%s' needed %zu expected %zu\n",
273 test_string, needed, strlen(expected));
276 if (strcmp(dst2, expected) != 0)
278 printf(
"case_test: convert_case test4 FAILURE: test: '%s' result: '%s' expected: '%s'\n",
279 test_string, dst2, expected);
350 icu_test_full(
"ȺȺȺ");
351 icu_test_full(
"ßßß");
353 icu_test_full(
"a b");
354 icu_test_full(
"abc 123xyz");
355 icu_test_full(
"σςΣ ΣΣΣ");
356 icu_test_full(
"ıiIİ");
358 icu_test_full(
"\u0391\u0345\u0301");
361 printf(
"case_test: convert_case: success\n");
368 UErrorCode status = U_ZERO_ERROR;
374 casemap = ucasemap_open(
"und", U_TITLECASE_NO_BREAK_ADJUSTMENT, &status);
375 if (U_FAILURE(status))
377 printf(
"case_test: failure opening UCaseMap: %s\n",
378 u_errorName(status));
385 printf(
"case_test: ICU Unicode version:\t\t%s\n", U_UNICODE_VERSION);
388 printf(
"case_test: ICU not available; skipping\n");
394 ucasemap_close(casemap);
static void test_convert_case()
static void test_convert(TestFunc tfunc, const char *test_string, const char *expected)
int main(int argc, char **argv)
static size_t tfunc_lower(char *dst, size_t dstsize, const char *src, ssize_t srclen)
static size_t initcap_wbnext(void *state)
static size_t tfunc_title(char *dst, size_t dstsize, const char *src, ssize_t srclen)
static size_t tfunc_upper(char *dst, size_t dstsize, const char *src, ssize_t srclen)
static size_t tfunc_fold(char *dst, size_t dstsize, const char *src, ssize_t srclen)
size_t(* TestFunc)(char *dst, size_t dstsize, const char *src, ssize_t srclen)
static pg_wchar utf8_to_unicode(const unsigned char *c)
Datum lower(PG_FUNCTION_ARGS)
Datum upper(PG_FUNCTION_ARGS)
static unsigned char * unicode_to_utf8(pg_wchar c, unsigned char *utf8string)
static int unicode_utf8len(pg_wchar c)
pg_wchar unicode_uppercase_simple(pg_wchar code)
pg_wchar unicode_titlecase_simple(pg_wchar code)
size_t unicode_strupper(char *dst, size_t dstsize, const char *src, ssize_t srclen, bool full)
size_t unicode_strlower(char *dst, size_t dstsize, const char *src, ssize_t srclen, bool full)
size_t unicode_strtitle(char *dst, size_t dstsize, const char *src, ssize_t srclen, bool full, WordBoundaryNext wbnext, void *wbstate)
pg_wchar unicode_lowercase_simple(pg_wchar code)
size_t unicode_strfold(char *dst, size_t dstsize, const char *src, ssize_t srclen, bool full)
pg_wchar unicode_casefold_simple(pg_wchar code)
bool pg_u_isalnum(pg_wchar code, bool posix)
pg_unicode_category unicode_category(pg_wchar code)
#define PG_UNICODE_VERSION