1048{
1049 char32_t *input_chars = NULL;
1050 char32_t *output_chars = NULL;
1051 int input_size;
1052 char *result;
1053 int result_size;
1054 int count;
1056 bool contains_RandALCat;
1057 unsigned char *p;
1058 char32_t *wp;
1059
1060
1062
1063
1064
1065
1066
1068 {
1071 goto oom;
1073 }
1074
1075
1076
1077
1078
1079
1081 if (input_size < 0)
1084 goto oom;
1085
1086 input_chars =
ALLOC((input_size + 1) *
sizeof(
char32_t));
1087 if (!input_chars)
1088 goto oom;
1089
1090 p = (
unsigned char *)
input;
1091 for (
i = 0;
i < input_size;
i++)
1092 {
1095 }
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107 count = 0;
1108 for (
i = 0;
i < input_size;
i++)
1109 {
1110 char32_t code = input_chars[
i];
1111
1113 input_chars[count++] = 0x0020;
1115 {
1116
1117 }
1118 else
1119 input_chars[count++] = code;
1120 }
1121 input_chars[count] = (
char32_t)
'\0';
1122 input_size = count;
1123
1124 if (input_size == 0)
1125 goto prohibited;
1126
1127
1128
1129
1130
1132 if (!output_chars)
1133 goto oom;
1134
1135
1136
1137
1138
1139 for (
i = 0;
i < input_size;
i++)
1140 {
1141 char32_t code = input_chars[
i];
1142
1144 goto prohibited;
1146 goto prohibited;
1147 }
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170 contains_RandALCat = false;
1171 for (
i = 0;
i < input_size;
i++)
1172 {
1173 char32_t code = input_chars[
i];
1174
1176 {
1177 contains_RandALCat = true;
1178 break;
1179 }
1180 }
1181
1182 if (contains_RandALCat)
1183 {
1184 char32_t first = input_chars[0];
1185 char32_t last = input_chars[input_size - 1];
1186
1187 for (
i = 0;
i < input_size;
i++)
1188 {
1189 char32_t code = input_chars[
i];
1190
1192 goto prohibited;
1193 }
1194
1197 goto prohibited;
1198 }
1199
1200
1201
1202
1203 result_size = 0;
1204 for (wp = output_chars; *wp; wp++)
1205 {
1206 unsigned char buf[4];
1207
1210 }
1211
1212 result =
ALLOC(result_size + 1);
1213 if (!result)
1214 goto oom;
1215
1216
1217
1218
1219
1220 p = (unsigned char *) result;
1221 for (wp = output_chars; *wp; wp++)
1222 {
1225 }
1226 Assert((
char *) p == result + result_size);
1227 *p = '\0';
1228
1231
1234
1235prohibited:
1236 if (input_chars)
1238 if (output_chars)
1240
1242
1243oom:
1244 if (input_chars)
1246 if (output_chars)
1248
1250}
Assert(PointerIsAligned(start, uint64))
static char32_t utf8_to_unicode(const unsigned char *c)
static unsigned char * unicode_to_utf8(char32_t c, unsigned char *utf8string)
static const char32_t LCat_codepoint_ranges[]
static const char32_t prohibited_output_ranges[]
static const char32_t commonly_mapped_to_nothing_ranges[]
#define IS_CODE_IN_TABLE(code, map)
static const char32_t unassigned_codepoint_ranges[]
static const char32_t RandALCat_codepoint_ranges[]
static int pg_utf8_string_len(const char *source)
static const char32_t non_ascii_space_ranges[]
bool pg_is_ascii(const char *str)
char32_t * unicode_normalize(UnicodeNormalizationForm form, const char32_t *input)