PostgreSQL Source Code  git master
case_test.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  * case_test.c
3  * Program to test Unicode case mapping functions.
4  *
5  * Portions Copyright (c) 2017-2024, PostgreSQL Global Development Group
6  *
7  * IDENTIFICATION
8  * src/common/unicode/case_test.c
9  *
10  *-------------------------------------------------------------------------
11  */
12 #include "postgres_fe.h"
13 
14 #include <locale.h>
15 #include <stdio.h>
16 #include <stdlib.h>
17 #include <string.h>
18 #include <wctype.h>
19 
20 #ifdef USE_ICU
21 #include <unicode/uchar.h>
22 #endif
23 #include "common/unicode_case.h"
25 #include "common/unicode_version.h"
26 
27 #ifdef USE_ICU
28 
29 static void
30 icu_test_simple(pg_wchar code)
31 {
33  pg_wchar title = unicode_titlecase_simple(code);
35  pg_wchar iculower = u_tolower(code);
36  pg_wchar icutitle = u_totitle(code);
37  pg_wchar icuupper = u_toupper(code);
38 
39  if (lower != iculower || title != icutitle || upper != icuupper)
40  {
41  printf("case_test: FAILURE for codepoint 0x%06x\n", code);
42  printf("case_test: Postgres lower/title/upper: 0x%06x/0x%06x/0x%06x\n",
43  lower, title, upper);
44  printf("case_test: ICU lower/title/upper: 0x%06x/0x%06x/0x%06x\n",
45  iculower, icutitle, icuupper);
46  printf("\n");
47  exit(1);
48  }
49 }
50 
51 /*
52  * Exhaustively compare case mappings with the results from ICU.
53  */
54 static void
55 test_icu(void)
56 {
57  int successful = 0;
58  int skipped_mismatch = 0;
59 
60  for (pg_wchar code = 0; code <= 0x10ffff; code++)
61  {
62  pg_unicode_category category = unicode_category(code);
63 
64  if (category != PG_U_UNASSIGNED)
65  {
66  uint8_t icu_category = u_charType(code);
67 
68  if (icu_category == PG_U_UNASSIGNED)
69  {
70  skipped_mismatch++;
71  continue;
72  }
73 
74  icu_test_simple(code);
75  successful++;
76  }
77  }
78 
79  if (skipped_mismatch > 0)
80  printf("case_test: skipped %d codepoints unassigned in ICU due to Unicode version mismatch\n",
81  skipped_mismatch);
82 
83  printf("case_test: ICU simple mapping test: %d codepoints successful\n",
84  successful);
85 }
86 #endif
87 
88 static void
89 test_strlower(const char *test_string, const char *expected)
90 {
91  size_t src1len = strlen(test_string);
92  size_t src2len = -1; /* NUL-terminated */
93  size_t dst1len = strlen(expected);
94  size_t dst2len = strlen(expected) + 1; /* NUL-terminated */
95  char *src1 = malloc(src1len);
96  char *dst1 = malloc(dst1len);
97  char *src2 = strdup(test_string);
98  char *dst2 = malloc(dst2len);
99  size_t needed;
100 
101  memcpy(src1, test_string, src1len); /* not NUL-terminated */
102 
103  /* neither source nor destination are NUL-terminated */
104  memset(dst1, 0x7F, dst1len);
105  needed = unicode_strlower(dst1, dst1len, src1, src1len);
106  if (needed != strlen(expected))
107  {
108  printf("case_test: convert_case test1 FAILURE: needed %zu\n", needed);
109  exit(1);
110  }
111  if (memcmp(dst1, expected, dst1len) != 0)
112  {
113  printf("case_test: convert_case test1 FAILURE: test: '%s' result: '%.*s' expected: '%s'\n",
114  test_string, (int) dst1len, dst1, expected);
115  exit(1);
116  }
117 
118  /* destination is NUL-terminated and source is not */
119  memset(dst2, 0x7F, dst2len);
120  needed = unicode_strlower(dst2, dst2len, src1, src1len);
121  if (needed != strlen(expected))
122  {
123  printf("case_test: convert_case test2 FAILURE: needed %zu\n", needed);
124  exit(1);
125  }
126  if (strcmp(dst2, expected) != 0)
127  {
128  printf("case_test: convert_case test2 FAILURE: test: '%s' result: '%s' expected: '%s'\n",
129  test_string, dst2, expected);
130  exit(1);
131  }
132 
133  /* source is NUL-terminated and destination is not */
134  memset(dst1, 0x7F, dst1len);
135  needed = unicode_strlower(dst1, dst1len, src2, src2len);
136  if (needed != strlen(expected))
137  {
138  printf("case_test: convert_case test3 FAILURE: needed %zu\n", needed);
139  exit(1);
140  }
141  if (memcmp(dst1, expected, dst1len) != 0)
142  {
143  printf("case_test: convert_case test3 FAILURE: test: '%s' result: '%.*s' expected: '%s'\n",
144  test_string, (int) dst1len, dst1, expected);
145  exit(1);
146  }
147 
148  /* both source and destination are NUL-terminated */
149  memset(dst2, 0x7F, dst2len);
150  needed = unicode_strlower(dst2, dst2len, src2, src2len);
151  if (needed != strlen(expected))
152  {
153  printf("case_test: convert_case test4 FAILURE: needed %zu\n", needed);
154  exit(1);
155  }
156  if (strcmp(dst2, expected) != 0)
157  {
158  printf("case_test: convert_case test4 FAILURE: test: '%s' result: '%s' expected: '%s'\n",
159  test_string, dst2, expected);
160  exit(1);
161  }
162 
163  free(src1);
164  free(dst1);
165  free(src2);
166  free(dst2);
167 }
168 
169 static void
171 {
172  /* test string with no case changes */
173  test_strlower("√∞", "√∞");
174  /* test string with case changes */
175  test_strlower("ABC", "abc");
176  /* test string with case changes and byte length changes */
177  test_strlower("ȺȺȺ", "ⱥⱥⱥ");
178 
179  printf("case_test: convert_case: success\n");
180 }
181 
182 int
183 main(int argc, char **argv)
184 {
185  printf("case_test: Postgres Unicode version:\t%s\n", PG_UNICODE_VERSION);
186 #ifdef USE_ICU
187  printf("case_test: ICU Unicode version:\t\t%s\n", U_UNICODE_VERSION);
188  test_icu();
189 #else
190  printf("case_test: ICU not available; skipping\n");
191 #endif
192 
194  exit(0);
195 }
static void test_convert_case()
Definition: case_test.c:170
int main(int argc, char **argv)
Definition: case_test.c:183
static void test_strlower(const char *test_string, const char *expected)
Definition: case_test.c:89
#define free(a)
Definition: header.h:65
#define malloc(a)
Definition: header.h:50
exit(1)
unsigned int pg_wchar
Definition: mbprint.c:31
Datum lower(PG_FUNCTION_ARGS)
Definition: oracle_compat.c:49
Datum upper(PG_FUNCTION_ARGS)
Definition: oracle_compat.c:80
#define printf(...)
Definition: port.h:244
pg_wchar unicode_uppercase_simple(pg_wchar code)
Definition: unicode_case.c:45
size_t unicode_strlower(char *dst, size_t dstsize, const char *src, ssize_t srclen)
Definition: unicode_case.c:69
pg_wchar unicode_titlecase_simple(pg_wchar code)
Definition: unicode_case.c:37
pg_wchar unicode_lowercase_simple(pg_wchar code)
Definition: unicode_case.c:29
pg_unicode_category unicode_category(pg_wchar code)
pg_unicode_category
@ PG_U_UNASSIGNED
#define PG_UNICODE_VERSION