21#include <unicode/ucasemap.h>
22#include <unicode/uchar.h>
33static UCaseMap *
casemap = NULL;
36typedef size_t (*
TestFunc) (
char *dst,
size_t dstsize,
const char *src,
64 size_t prev_offset = wbstate->
offset;
81icu_test_simple(
char32_t code)
87 char32_t iculower = u_tolower(code);
88 char32_t icutitle = u_totitle(code);
89 char32_t icuupper = u_toupper(code);
90 char32_t icufold = u_foldCase(code, U_FOLD_CASE_DEFAULT);
92 if (
lower != iculower || title != icutitle ||
upper != icuupper ||
95 printf(
"case_test: FAILURE for codepoint 0x%06x\n", code);
96 printf(
"case_test: Postgres lower/title/upper/fold: 0x%06x/0x%06x/0x%06x/0x%06x\n",
98 printf(
"case_test: ICU lower/title/upper/fold: 0x%06x/0x%06x/0x%06x/0x%06x\n",
99 iculower, icutitle, icuupper, icufold);
106icu_test_full(
char *
str)
112 char icu_lower[
BUFSZ];
113 char icu_title[
BUFSZ];
114 char icu_upper[
BUFSZ];
115 char icu_fold[
BUFSZ];
132 status = U_ZERO_ERROR;
134 status = U_ZERO_ERROR;
136 status = U_ZERO_ERROR;
138 status = U_ZERO_ERROR;
141 if (strcmp(
lower, icu_lower) != 0)
143 printf(
"case_test: str='%s' lower='%s' icu_lower='%s'\n",
str,
lower,
147 if (strcmp(title, icu_title) != 0)
149 printf(
"case_test: str='%s' title='%s' icu_title='%s'\n",
str, title,
153 if (strcmp(
upper, icu_upper) != 0)
155 printf(
"case_test: str='%s' upper='%s' icu_upper='%s'\n",
str,
upper,
159 if (strcmp(fold, icu_fold) != 0)
161 printf(
"case_test: str='%s' fold='%s' icu_fold='%s'\n",
str, fold,
174 int skipped_mismatch = 0;
176 for (
char32_t code = 0; code <= 0x10ffff; code++)
182 uint8_t icu_category = u_charType(code);
183 char code_str[5] = {0};
191 icu_test_simple(code);
193 icu_test_full(code_str);
199 if (skipped_mismatch > 0)
200 printf(
"case_test: skipped %d codepoints unassigned in ICU due to Unicode version mismatch\n",
203 printf(
"case_test: ICU simple mapping test: %d codepoints successful\n",
211 size_t src1len = strlen(test_string);
213 size_t dst1len = strlen(expected);
214 size_t dst2len = strlen(expected) + 1;
215 char *src1 =
malloc(src1len);
216 char *dst1 =
malloc(dst1len);
217 char *src2 = strdup(test_string);
218 char *dst2 =
malloc(dst2len);
221 memcpy(src1, test_string, src1len);
224 memset(dst1, 0x7F, dst1len);
225 needed = tfunc(dst1, dst1len, src1, src1len);
226 if (needed != strlen(expected))
228 printf(
"case_test: convert_case test1 FAILURE: '%s' needed %zu expected %zu\n",
229 test_string, needed, strlen(expected));
232 if (memcmp(dst1, expected, dst1len) != 0)
234 printf(
"case_test: convert_case test1 FAILURE: test: '%s' result: '%.*s' expected: '%s'\n",
235 test_string, (
int) dst1len, dst1, expected);
240 memset(dst2, 0x7F, dst2len);
241 needed = tfunc(dst2, dst2len, src1, src1len);
242 if (needed != strlen(expected))
244 printf(
"case_test: convert_case test2 FAILURE: '%s' needed %zu expected %zu\n",
245 test_string, needed, strlen(expected));
248 if (strcmp(dst2, expected) != 0)
250 printf(
"case_test: convert_case test2 FAILURE: test: '%s' result: '%s' expected: '%s'\n",
251 test_string, dst2, expected);
256 memset(dst1, 0x7F, dst1len);
257 needed = tfunc(dst1, dst1len, src2, src2len);
258 if (needed != strlen(expected))
260 printf(
"case_test: convert_case test3 FAILURE: '%s' needed %zu expected %zu\n",
261 test_string, needed, strlen(expected));
262 printf(
"case_test: convert_case test3 FAILURE: needed %zu\n", needed);
265 if (memcmp(dst1, expected, dst1len) != 0)
267 printf(
"case_test: convert_case test3 FAILURE: test: '%s' result: '%.*s' expected: '%s'\n",
268 test_string, (
int) dst1len, dst1, expected);
273 memset(dst2, 0x7F, dst2len);
274 needed = tfunc(dst2, dst2len, src2, src2len);
275 if (needed != strlen(expected))
277 printf(
"case_test: convert_case test4 FAILURE: '%s' needed %zu expected %zu\n",
278 test_string, needed, strlen(expected));
281 if (strcmp(dst2, expected) != 0)
283 printf(
"case_test: convert_case test4 FAILURE: test: '%s' result: '%s' expected: '%s'\n",
284 test_string, dst2, expected);
361 icu_test_full(
"ȺȺȺ");
362 icu_test_full(
"ßßß");
364 icu_test_full(
"a b");
365 icu_test_full(
"abc 123xyz");
366 icu_test_full(
"σςΣ ΣΣΣ");
367 icu_test_full(
"ıiIİ");
368 icu_test_full(
"\uFF11a");
370 icu_test_full(
"\u0391\u0345\u0301");
373 printf(
"case_test: convert_case: success\n");
380 UErrorCode status = U_ZERO_ERROR;
386 casemap = ucasemap_open(
"und", U_TITLECASE_NO_BREAK_ADJUSTMENT, &status);
387 if (U_FAILURE(status))
389 printf(
"case_test: failure opening UCaseMap: %s\n",
390 u_errorName(status));
397 printf(
"case_test: ICU Unicode version:\t\t%s\n", U_UNICODE_VERSION);
400 printf(
"case_test: ICU not available; skipping\n");
static void test_convert_case()
static void test_convert(TestFunc tfunc, const char *test_string, const char *expected)
int main(int argc, char **argv)
static size_t tfunc_lower(char *dst, size_t dstsize, const char *src, ssize_t srclen)
static size_t initcap_wbnext(void *state)
static size_t tfunc_title(char *dst, size_t dstsize, const char *src, ssize_t srclen)
static size_t tfunc_upper(char *dst, size_t dstsize, const char *src, ssize_t srclen)
static size_t tfunc_fold(char *dst, size_t dstsize, const char *src, ssize_t srclen)
size_t(* TestFunc)(char *dst, size_t dstsize, const char *src, ssize_t srclen)
static char32_t utf8_to_unicode(const unsigned char *c)
Datum lower(PG_FUNCTION_ARGS)
Datum upper(PG_FUNCTION_ARGS)
static unsigned char * unicode_to_utf8(char32_t c, unsigned char *utf8string)
static int unicode_utf8len(char32_t c)
char32_t unicode_titlecase_simple(char32_t code)
size_t unicode_strupper(char *dst, size_t dstsize, const char *src, ssize_t srclen, bool full)
size_t unicode_strlower(char *dst, size_t dstsize, const char *src, ssize_t srclen, bool full)
char32_t unicode_casefold_simple(char32_t code)
char32_t unicode_lowercase_simple(char32_t code)
size_t unicode_strtitle(char *dst, size_t dstsize, const char *src, ssize_t srclen, bool full, WordBoundaryNext wbnext, void *wbstate)
size_t unicode_strfold(char *dst, size_t dstsize, const char *src, ssize_t srclen, bool full)
static enum CaseMapResult casemap(char32_t u1, CaseKind casekind, bool full, const char *src, size_t srclen, size_t srcoff, char32_t *simple, const char32_t **special)
char32_t unicode_uppercase_simple(char32_t code)
bool pg_u_isalnum(char32_t code, bool posix)
pg_unicode_category unicode_category(char32_t code)
#define PG_UNICODE_VERSION