PostgreSQL Source Code git master
All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
unicode_norm.c File Reference
Include dependency graph for unicode_norm.c:

Go to the source code of this file.

Macros

#define ALLOC(size)   palloc(size)
 
#define FREE(size)   pfree(size)
 
#define SBASE   0xAC00 /* U+AC00 */
 
#define LBASE   0x1100 /* U+1100 */
 
#define VBASE   0x1161 /* U+1161 */
 
#define TBASE   0x11A7 /* U+11A7 */
 
#define LCOUNT   19
 
#define VCOUNT   21
 
#define TCOUNT   28
 
#define NCOUNT   VCOUNT * TCOUNT
 
#define SCOUNT   LCOUNT * NCOUNT
 

Functions

static const pg_unicode_decompositionget_code_entry (pg_wchar code)
 
static uint8 get_canonical_class (pg_wchar code)
 
static const pg_wcharget_code_decomposition (const pg_unicode_decomposition *entry, int *dec_size)
 
static int get_decomposed_size (pg_wchar code, bool compat)
 
static bool recompose_code (uint32 start, uint32 code, uint32 *result)
 
static void decompose_code (pg_wchar code, bool compat, pg_wchar **result, int *current)
 
pg_wcharunicode_normalize (UnicodeNormalizationForm form, const pg_wchar *input)
 
static const pg_unicode_normpropsqc_hash_lookup (pg_wchar ch, const pg_unicode_norminfo *norminfo)
 
static UnicodeNormalizationQC qc_is_allowed (UnicodeNormalizationForm form, pg_wchar ch)
 
UnicodeNormalizationQC unicode_is_normalized_quickcheck (UnicodeNormalizationForm form, const pg_wchar *input)
 

Macro Definition Documentation

◆ ALLOC

#define ALLOC (   size)    palloc(size)

Definition at line 31 of file unicode_norm.c.

◆ FREE

#define FREE (   size)    pfree(size)

Definition at line 32 of file unicode_norm.c.

◆ LBASE

#define LBASE   0x1100 /* U+1100 */

Definition at line 40 of file unicode_norm.c.

◆ LCOUNT

#define LCOUNT   19

Definition at line 43 of file unicode_norm.c.

◆ NCOUNT

#define NCOUNT   VCOUNT * TCOUNT

Definition at line 46 of file unicode_norm.c.

◆ SBASE

#define SBASE   0xAC00 /* U+AC00 */

Definition at line 39 of file unicode_norm.c.

◆ SCOUNT

#define SCOUNT   LCOUNT * NCOUNT

Definition at line 47 of file unicode_norm.c.

◆ TBASE

#define TBASE   0x11A7 /* U+11A7 */

Definition at line 42 of file unicode_norm.c.

◆ TCOUNT

#define TCOUNT   28

Definition at line 45 of file unicode_norm.c.

◆ VBASE

#define VBASE   0x1161 /* U+1161 */

Definition at line 41 of file unicode_norm.c.

◆ VCOUNT

#define VCOUNT   21

Definition at line 44 of file unicode_norm.c.

Function Documentation

◆ decompose_code()

static void decompose_code ( pg_wchar  code,
bool  compat,
pg_wchar **  result,
int *  current 
)
static

Definition at line 321 of file unicode_norm.c.

322{
323 const pg_unicode_decomposition *entry;
324 int i;
325 const uint32 *decomp;
326 int dec_size;
327
328 /*
329 * Fast path for Hangul characters not stored in tables to save memory as
330 * decomposition is algorithmic. See
331 * https://www.unicode.org/reports/tr15/tr15-18.html, annex 10 for details
332 * on the matter.
333 */
334 if (code >= SBASE && code < SBASE + SCOUNT)
335 {
336 uint32 l,
337 v,
338 tindex,
339 sindex;
340 pg_wchar *res = *result;
341
342 sindex = code - SBASE;
343 l = LBASE + sindex / (VCOUNT * TCOUNT);
344 v = VBASE + (sindex % (VCOUNT * TCOUNT)) / TCOUNT;
345 tindex = sindex % TCOUNT;
346
347 res[*current] = l;
348 (*current)++;
349 res[*current] = v;
350 (*current)++;
351
352 if (tindex != 0)
353 {
354 res[*current] = TBASE + tindex;
355 (*current)++;
356 }
357
358 return;
359 }
360
361 entry = get_code_entry(code);
362
363 /*
364 * Just fill in with the current decomposition if there are no
365 * decomposition codes to recurse to. A NULL entry is equivalent to a
366 * character with class 0 and no decompositions, so just leave also in
367 * this case.
368 */
369 if (entry == NULL || DECOMPOSITION_SIZE(entry) == 0 ||
370 (!compat && DECOMPOSITION_IS_COMPAT(entry)))
371 {
372 pg_wchar *res = *result;
373
374 res[*current] = code;
375 (*current)++;
376 return;
377 }
378
379 /*
380 * If this entry has other decomposition codes look at them as well.
381 */
382 decomp = get_code_decomposition(entry, &dec_size);
383 for (i = 0; i < dec_size; i++)
384 {
385 pg_wchar lcode = (pg_wchar) decomp[i];
386
387 /* Leave if no more decompositions */
388 decompose_code(lcode, compat, result, current);
389 }
390}
uint32_t uint32
Definition: c.h:502
enum COMPAT_MODE compat
Definition: ecpg.c:26
int i
Definition: isn.c:77
unsigned int pg_wchar
Definition: mbprint.c:31
#define TCOUNT
Definition: unicode_norm.c:45
#define TBASE
Definition: unicode_norm.c:42
static const pg_unicode_decomposition * get_code_entry(pg_wchar code)
Definition: unicode_norm.c:72
static void decompose_code(pg_wchar code, bool compat, pg_wchar **result, int *current)
Definition: unicode_norm.c:321
#define VBASE
Definition: unicode_norm.c:41
#define VCOUNT
Definition: unicode_norm.c:44
static const pg_wchar * get_code_decomposition(const pg_unicode_decomposition *entry, int *dec_size)
Definition: unicode_norm.c:134
#define LBASE
Definition: unicode_norm.c:40
#define SBASE
Definition: unicode_norm.c:39
#define SCOUNT
Definition: unicode_norm.c:47
#define DECOMPOSITION_IS_COMPAT(x)
#define DECOMPOSITION_SIZE(x)

References compat, decompose_code(), DECOMPOSITION_IS_COMPAT, DECOMPOSITION_SIZE, get_code_decomposition(), get_code_entry(), i, LBASE, SBASE, SCOUNT, TBASE, TCOUNT, VBASE, and VCOUNT.

Referenced by decompose_code(), and unicode_normalize().

◆ get_canonical_class()

static uint8 get_canonical_class ( pg_wchar  code)
static

Definition at line 112 of file unicode_norm.c.

113{
114 const pg_unicode_decomposition *entry = get_code_entry(code);
115
116 /*
117 * If no entries are found, the character used is either an Hangul
118 * character or a character with a class of 0 and no decompositions.
119 */
120 if (!entry)
121 return 0;
122 else
123 return entry->comb_class;
124}

References pg_unicode_decomposition::comb_class, and get_code_entry().

Referenced by unicode_is_normalized_quickcheck(), and unicode_normalize().

◆ get_code_decomposition()

static const pg_wchar * get_code_decomposition ( const pg_unicode_decomposition entry,
int *  dec_size 
)
static

Definition at line 134 of file unicode_norm.c.

135{
136 static pg_wchar x;
137
138 if (DECOMPOSITION_IS_INLINE(entry))
139 {
140 Assert(DECOMPOSITION_SIZE(entry) == 1);
141 x = (pg_wchar) entry->dec_index;
142 *dec_size = 1;
143 return &x;
144 }
145 else
146 {
147 *dec_size = DECOMPOSITION_SIZE(entry);
148 return &UnicodeDecomp_codepoints[entry->dec_index];
149 }
150}
Assert(PointerIsAligned(start, uint64))
int x
Definition: isn.c:75
static const uint32 UnicodeDecomp_codepoints[5138]
#define DECOMPOSITION_IS_INLINE(x)

References Assert(), pg_unicode_decomposition::dec_index, DECOMPOSITION_IS_INLINE, DECOMPOSITION_SIZE, UnicodeDecomp_codepoints, and x.

Referenced by decompose_code(), and get_decomposed_size().

◆ get_code_entry()

static const pg_unicode_decomposition * get_code_entry ( pg_wchar  code)
static

Definition at line 72 of file unicode_norm.c.

73{
74#ifndef FRONTEND
75 int h;
76 uint32 hashkey;
78
79 /*
80 * Compute the hash function. The hash key is the codepoint with the bytes
81 * in network order.
82 */
83 hashkey = pg_hton32(code);
84 h = decompinfo.hash(&hashkey);
85
86 /* An out-of-range result implies no match */
87 if (h < 0 || h >= decompinfo.num_decomps)
88 return NULL;
89
90 /*
91 * Since it's a perfect hash, we need only match to the specific codepoint
92 * it identifies.
93 */
94 if (code != decompinfo.decomps[h].codepoint)
95 return NULL;
96
97 /* Success! */
98 return &decompinfo.decomps[h];
99#else
100 return bsearch(&(code),
104 conv_compare);
105#endif
106}
#define lengthof(array)
Definition: c.h:759
#define pg_hton32(x)
Definition: pg_bswap.h:121
const pg_unicode_decomposition * decomps
static const pg_unicode_decompinfo UnicodeDecompInfo
static const pg_unicode_decomposition UnicodeDecompMain[6843]

References pg_unicode_decomposition::codepoint, pg_unicode_decompinfo::decomps, pg_unicode_decompinfo::hash, lengthof, pg_unicode_decompinfo::num_decomps, pg_hton32, UnicodeDecompInfo, and UnicodeDecompMain.

Referenced by decompose_code(), get_canonical_class(), and get_decomposed_size().

◆ get_decomposed_size()

static int get_decomposed_size ( pg_wchar  code,
bool  compat 
)
static

Definition at line 159 of file unicode_norm.c.

160{
161 const pg_unicode_decomposition *entry;
162 int size = 0;
163 int i;
164 const uint32 *decomp;
165 int dec_size;
166
167 /*
168 * Fast path for Hangul characters not stored in tables to save memory as
169 * decomposition is algorithmic. See
170 * https://www.unicode.org/reports/tr15/tr15-18.html, annex 10 for details
171 * on the matter.
172 */
173 if (code >= SBASE && code < SBASE + SCOUNT)
174 {
175 uint32 tindex,
176 sindex;
177
178 sindex = code - SBASE;
179 tindex = sindex % TCOUNT;
180
181 if (tindex != 0)
182 return 3;
183 return 2;
184 }
185
186 entry = get_code_entry(code);
187
188 /*
189 * Just count current code if no other decompositions. A NULL entry is
190 * equivalent to a character with class 0 and no decompositions.
191 */
192 if (entry == NULL || DECOMPOSITION_SIZE(entry) == 0 ||
193 (!compat && DECOMPOSITION_IS_COMPAT(entry)))
194 return 1;
195
196 /*
197 * If this entry has other decomposition codes look at them as well. First
198 * get its decomposition in the list of tables available.
199 */
200 decomp = get_code_decomposition(entry, &dec_size);
201 for (i = 0; i < dec_size; i++)
202 {
203 uint32 lcode = decomp[i];
204
205 size += get_decomposed_size(lcode, compat);
206 }
207
208 return size;
209}
static int get_decomposed_size(pg_wchar code, bool compat)
Definition: unicode_norm.c:159

References compat, DECOMPOSITION_IS_COMPAT, DECOMPOSITION_SIZE, get_code_decomposition(), get_code_entry(), get_decomposed_size(), i, SBASE, SCOUNT, and TCOUNT.

Referenced by get_decomposed_size(), and unicode_normalize().

◆ qc_hash_lookup()

static const pg_unicode_normprops * qc_hash_lookup ( pg_wchar  ch,
const pg_unicode_norminfo norminfo 
)
static

Definition at line 543 of file unicode_norm.c.

544{
545 int h;
546 uint32 hashkey;
547
548 /*
549 * Compute the hash function. The hash key is the codepoint with the bytes
550 * in network order.
551 */
552 hashkey = pg_hton32(ch);
553 h = norminfo->hash(&hashkey);
554
555 /* An out-of-range result implies no match */
556 if (h < 0 || h >= norminfo->num_normprops)
557 return NULL;
558
559 /*
560 * Since it's a perfect hash, we need only match to the specific codepoint
561 * it identifies.
562 */
563 if (ch != norminfo->normprops[h].codepoint)
564 return NULL;
565
566 /* Success! */
567 return &norminfo->normprops[h];
568}
const pg_unicode_normprops * normprops

References pg_unicode_normprops::codepoint, pg_unicode_norminfo::hash, pg_unicode_norminfo::normprops, pg_unicode_norminfo::num_normprops, and pg_hton32.

Referenced by qc_is_allowed().

◆ qc_is_allowed()

static UnicodeNormalizationQC qc_is_allowed ( UnicodeNormalizationForm  form,
pg_wchar  ch 
)
static

Definition at line 574 of file unicode_norm.c.

575{
576 const pg_unicode_normprops *found = NULL;
577
578 switch (form)
579 {
580 case UNICODE_NFC:
582 break;
583 case UNICODE_NFKC:
585 break;
586 default:
587 Assert(false);
588 break;
589 }
590
591 if (found)
592 return found->quickcheck;
593 else
594 return UNICODE_NORM_QC_YES;
595}
static const pg_unicode_normprops * qc_hash_lookup(pg_wchar ch, const pg_unicode_norminfo *norminfo)
Definition: unicode_norm.c:543
@ UNICODE_NFC
Definition: unicode_norm.h:21
@ UNICODE_NFKC
Definition: unicode_norm.h:23
@ UNICODE_NORM_QC_YES
Definition: unicode_norm.h:31
static const pg_unicode_norminfo UnicodeNormInfo_NFKC_QC
static const pg_unicode_norminfo UnicodeNormInfo_NFC_QC

References Assert(), qc_hash_lookup(), pg_unicode_normprops::quickcheck, UNICODE_NFC, UNICODE_NFKC, UNICODE_NORM_QC_YES, UnicodeNormInfo_NFC_QC, and UnicodeNormInfo_NFKC_QC.

Referenced by unicode_is_normalized_quickcheck().

◆ recompose_code()

static bool recompose_code ( uint32  start,
uint32  code,
uint32 result 
)
static

Definition at line 218 of file unicode_norm.c.

219{
220 /*
221 * Handle Hangul characters algorithmically, per the Unicode spec.
222 *
223 * Check if two current characters are L and V.
224 */
225 if (start >= LBASE && start < LBASE + LCOUNT &&
226 code >= VBASE && code < VBASE + VCOUNT)
227 {
228 /* make syllable of form LV */
229 uint32 lindex = start - LBASE;
230 uint32 vindex = code - VBASE;
231
232 *result = SBASE + (lindex * VCOUNT + vindex) * TCOUNT;
233 return true;
234 }
235 /* Check if two current characters are LV and T */
236 else if (start >= SBASE && start < (SBASE + SCOUNT) &&
237 ((start - SBASE) % TCOUNT) == 0 &&
238 code >= TBASE && code < (TBASE + TCOUNT))
239 {
240 /* make syllable of form LVT */
241 uint32 tindex = code - TBASE;
242
243 *result = start + tindex;
244 return true;
245 }
246 else
247 {
248 const pg_unicode_decomposition *entry;
249
250 /*
251 * Do an inverse lookup of the decomposition tables to see if anything
252 * matches. The comparison just needs to be a perfect match on the
253 * sub-table of size two, because the start character has already been
254 * recomposed partially. This lookup uses a perfect hash function for
255 * the backend code.
256 */
257#ifndef FRONTEND
258
259 int h,
260 inv_lookup_index;
261 uint64 hashkey;
263
264 /*
265 * Compute the hash function. The hash key is formed by concatenating
266 * bytes of the two codepoints in network order. See also
267 * src/common/unicode/generate-unicode_norm_table.pl.
268 */
269 hashkey = pg_hton64(((uint64) start << 32) | (uint64) code);
270 h = recompinfo.hash(&hashkey);
271
272 /* An out-of-range result implies no match */
273 if (h < 0 || h >= recompinfo.num_recomps)
274 return false;
275
276 inv_lookup_index = recompinfo.inverse_lookup[h];
277 entry = &UnicodeDecompMain[inv_lookup_index];
278
280 code == UnicodeDecomp_codepoints[entry->dec_index + 1])
281 {
282 *result = entry->codepoint;
283 return true;
284 }
285
286#else
287
288 int i;
289
290 for (i = 0; i < lengthof(UnicodeDecompMain); i++)
291 {
292 entry = &UnicodeDecompMain[i];
293
294 if (DECOMPOSITION_SIZE(entry) != 2)
295 continue;
296
297 if (DECOMPOSITION_NO_COMPOSE(entry))
298 continue;
299
301 code == UnicodeDecomp_codepoints[entry->dec_index + 1])
302 {
303 *result = entry->codepoint;
304 return true;
305 }
306 }
307#endif /* !FRONTEND */
308 }
309
310 return false;
311}
uint64_t uint64
Definition: c.h:503
return str start
#define pg_hton64(x)
Definition: pg_bswap.h:122
#define LCOUNT
Definition: unicode_norm.c:43
static const pg_unicode_recompinfo UnicodeRecompInfo
#define DECOMPOSITION_NO_COMPOSE(x)

References pg_unicode_decomposition::codepoint, pg_unicode_decomposition::dec_index, DECOMPOSITION_NO_COMPOSE, DECOMPOSITION_SIZE, pg_unicode_recompinfo::hash, i, pg_unicode_recompinfo::inverse_lookup, LBASE, LCOUNT, lengthof, pg_unicode_recompinfo::num_recomps, pg_hton64, SBASE, SCOUNT, start, TBASE, TCOUNT, UnicodeDecomp_codepoints, UnicodeDecompMain, UnicodeRecompInfo, VBASE, and VCOUNT.

Referenced by unicode_normalize().

◆ unicode_is_normalized_quickcheck()

UnicodeNormalizationQC unicode_is_normalized_quickcheck ( UnicodeNormalizationForm  form,
const pg_wchar input 
)

Definition at line 598 of file unicode_norm.c.

599{
600 uint8 lastCanonicalClass = 0;
602
603 /*
604 * For the "D" forms, we don't run the quickcheck. We don't include the
605 * lookup tables for those because they are huge, checking for these
606 * particular forms is less common, and running the slow path is faster
607 * for the "D" forms than the "C" forms because you don't need to
608 * recompose, which is slow.
609 */
610 if (form == UNICODE_NFD || form == UNICODE_NFKD)
612
613 for (const pg_wchar *p = input; *p; p++)
614 {
615 pg_wchar ch = *p;
616 uint8 canonicalClass;
618
619 canonicalClass = get_canonical_class(ch);
620 if (lastCanonicalClass > canonicalClass && canonicalClass != 0)
621 return UNICODE_NORM_QC_NO;
622
623 check = qc_is_allowed(form, ch);
624 if (check == UNICODE_NORM_QC_NO)
625 return UNICODE_NORM_QC_NO;
626 else if (check == UNICODE_NORM_QC_MAYBE)
627 result = UNICODE_NORM_QC_MAYBE;
628
629 lastCanonicalClass = canonicalClass;
630 }
631 return result;
632}
uint8_t uint8
Definition: c.h:500
FILE * input
static uint8 get_canonical_class(pg_wchar code)
Definition: unicode_norm.c:112
static UnicodeNormalizationQC qc_is_allowed(UnicodeNormalizationForm form, pg_wchar ch)
Definition: unicode_norm.c:574
@ UNICODE_NFKD
Definition: unicode_norm.h:24
@ UNICODE_NFD
Definition: unicode_norm.h:22
UnicodeNormalizationQC
Definition: unicode_norm.h:29
@ UNICODE_NORM_QC_NO
Definition: unicode_norm.h:30
@ UNICODE_NORM_QC_MAYBE
Definition: unicode_norm.h:32

References get_canonical_class(), input, qc_is_allowed(), UNICODE_NFD, UNICODE_NFKD, UNICODE_NORM_QC_MAYBE, UNICODE_NORM_QC_NO, and UNICODE_NORM_QC_YES.

Referenced by unicode_is_normalized().

◆ unicode_normalize()

pg_wchar * unicode_normalize ( UnicodeNormalizationForm  form,
const pg_wchar input 
)

Definition at line 402 of file unicode_norm.c.

403{
404 bool compat = (form == UNICODE_NFKC || form == UNICODE_NFKD);
405 bool recompose = (form == UNICODE_NFC || form == UNICODE_NFKC);
406 pg_wchar *decomp_chars;
407 pg_wchar *recomp_chars;
408 int decomp_size,
410 int count;
411 const pg_wchar *p;
412
413 /* variables for recomposition */
414 int last_class;
415 int starter_pos;
416 int target_pos;
417 uint32 starter_ch;
418
419 /* First, do character decomposition */
420
421 /*
422 * Calculate how many characters long the decomposed version will be.
423 */
424 decomp_size = 0;
425 for (p = input; *p; p++)
426 decomp_size += get_decomposed_size(*p, compat);
427
428 decomp_chars = (pg_wchar *) ALLOC((decomp_size + 1) * sizeof(pg_wchar));
429 if (decomp_chars == NULL)
430 return NULL;
431
432 /*
433 * Now fill in each entry recursively. This needs a second pass on the
434 * decomposition table.
435 */
436 current_size = 0;
437 for (p = input; *p; p++)
438 decompose_code(*p, compat, &decomp_chars, &current_size);
439 decomp_chars[decomp_size] = '\0';
440 Assert(decomp_size == current_size);
441
442 /* Leave if there is nothing to decompose */
443 if (decomp_size == 0)
444 return decomp_chars;
445
446 /*
447 * Now apply canonical ordering.
448 */
449 for (count = 1; count < decomp_size; count++)
450 {
451 pg_wchar prev = decomp_chars[count - 1];
452 pg_wchar next = decomp_chars[count];
453 pg_wchar tmp;
454 const uint8 prevClass = get_canonical_class(prev);
455 const uint8 nextClass = get_canonical_class(next);
456
457 /*
458 * Per Unicode (https://www.unicode.org/reports/tr15/tr15-18.html)
459 * annex 4, a sequence of two adjacent characters in a string is an
460 * exchangeable pair if the combining class (from the Unicode
461 * Character Database) for the first character is greater than the
462 * combining class for the second, and the second is not a starter. A
463 * character is a starter if its combining class is 0.
464 */
465 if (prevClass == 0 || nextClass == 0)
466 continue;
467
468 if (prevClass <= nextClass)
469 continue;
470
471 /* exchange can happen */
472 tmp = decomp_chars[count - 1];
473 decomp_chars[count - 1] = decomp_chars[count];
474 decomp_chars[count] = tmp;
475
476 /* backtrack to check again */
477 if (count > 1)
478 count -= 2;
479 }
480
481 if (!recompose)
482 return decomp_chars;
483
484 /*
485 * The last phase of NFC and NFKC is the recomposition of the reordered
486 * Unicode string using combining classes. The recomposed string cannot be
487 * longer than the decomposed one, so make the allocation of the output
488 * string based on that assumption.
489 */
490 recomp_chars = (pg_wchar *) ALLOC((decomp_size + 1) * sizeof(pg_wchar));
491 if (!recomp_chars)
492 {
493 FREE(decomp_chars);
494 return NULL;
495 }
496
497 last_class = -1; /* this eliminates a special check */
498 starter_pos = 0;
499 target_pos = 1;
500 starter_ch = recomp_chars[0] = decomp_chars[0];
501
502 for (count = 1; count < decomp_size; count++)
503 {
504 pg_wchar ch = decomp_chars[count];
505 int ch_class = get_canonical_class(ch);
506 pg_wchar composite;
507
508 if (last_class < ch_class &&
509 recompose_code(starter_ch, ch, &composite))
510 {
511 recomp_chars[starter_pos] = composite;
512 starter_ch = composite;
513 }
514 else if (ch_class == 0)
515 {
516 starter_pos = target_pos;
517 starter_ch = ch;
518 last_class = -1;
519 recomp_chars[target_pos++] = ch;
520 }
521 else
522 {
523 last_class = ch_class;
524 recomp_chars[target_pos++] = ch;
525 }
526 }
527 recomp_chars[target_pos] = (pg_wchar) '\0';
528
529 FREE(decomp_chars);
530
531 return recomp_chars;
532}
static int32 next
Definition: blutils.c:224
static int64 current_size
Definition: pg_checksums.c:63
#define ALLOC(size)
Definition: unicode_norm.c:31
#define FREE(size)
Definition: unicode_norm.c:32
static bool recompose_code(uint32 start, uint32 code, uint32 *result)
Definition: unicode_norm.c:218

References ALLOC, Assert(), compat, current_size, decompose_code(), FREE, get_canonical_class(), get_decomposed_size(), input, next, recompose_code(), UNICODE_NFC, UNICODE_NFKC, and UNICODE_NFKD.

Referenced by main(), pg_saslprep(), unicode_is_normalized(), and unicode_normalize_func().