PostgreSQL Source Code git master
All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
unicode_category.h
Go to the documentation of this file.
1/*-------------------------------------------------------------------------
2 *
3 * unicode_category.h
4 * Routines for determining the category of Unicode characters.
5 *
6 * These definitions can be used by both frontend and backend code.
7 *
8 * Copyright (c) 2017-2025, PostgreSQL Global Development Group
9 *
10 * src/include/common/unicode_category.h
11 *
12 *-------------------------------------------------------------------------
13 */
14#ifndef UNICODE_CATEGORY_H
15#define UNICODE_CATEGORY_H
16
17#include "mb/pg_wchar.h"
18
19/*
20 * Unicode General Category Values
21 *
22 * See: https://www.unicode.org/reports/tr44/#General_Category_Values
23 *
24 * The Unicode stability policy guarantees: "The enumeration of
25 * General_Category property values is fixed. No new values will be
26 * added". See: https://www.unicode.org/policies/stability_policy.html
27 *
28 * Numeric values chosen to match corresponding ICU UCharCategory.
29 */
31{
32 PG_U_UNASSIGNED = 0, /* Cn */
37 PG_U_OTHER_LETTER = 5, /* Lo */
39 PG_U_ENCLOSING_MARK = 7, /* Me */
40 PG_U_SPACING_MARK = 8, /* Mc */
41 PG_U_DECIMAL_NUMBER = 9, /* Nd */
42 PG_U_LETTER_NUMBER = 10, /* Nl */
43 PG_U_OTHER_NUMBER = 11, /* No */
44 PG_U_SPACE_SEPARATOR = 12, /* Zs */
45 PG_U_LINE_SEPARATOR = 13, /* Zl */
47 PG_U_CONTROL = 15, /* Cc */
48 PG_U_FORMAT = 16, /* Cf */
49 PG_U_PRIVATE_USE = 17, /* Co */
50 PG_U_SURROGATE = 18, /* Cs */
51 PG_U_DASH_PUNCTUATION = 19, /* Pd */
52 PG_U_OPEN_PUNCTUATION = 20, /* Ps */
56 PG_U_MATH_SYMBOL = 24, /* Sm */
57 PG_U_CURRENCY_SYMBOL = 25, /* Sc */
58 PG_U_MODIFIER_SYMBOL = 26, /* Sk */
59 PG_U_OTHER_SYMBOL = 27, /* So */
61 PG_U_FINAL_PUNCTUATION = 29 /* Pf */
63
65extern const char *unicode_category_string(pg_unicode_category category);
66extern const char *unicode_category_abbrev(pg_unicode_category category);
67
68extern bool pg_u_prop_alphabetic(pg_wchar code);
69extern bool pg_u_prop_lowercase(pg_wchar code);
70extern bool pg_u_prop_uppercase(pg_wchar code);
71extern bool pg_u_prop_cased(pg_wchar code);
72extern bool pg_u_prop_case_ignorable(pg_wchar code);
73extern bool pg_u_prop_white_space(pg_wchar code);
74extern bool pg_u_prop_hex_digit(pg_wchar code);
75extern bool pg_u_prop_join_control(pg_wchar code);
76
77extern bool pg_u_isdigit(pg_wchar code, bool posix);
78extern bool pg_u_isalpha(pg_wchar code);
79extern bool pg_u_isalnum(pg_wchar code, bool posix);
80extern bool pg_u_isword(pg_wchar code);
81extern bool pg_u_isupper(pg_wchar code);
82extern bool pg_u_islower(pg_wchar code);
83extern bool pg_u_isblank(pg_wchar code);
84extern bool pg_u_iscntrl(pg_wchar code);
85extern bool pg_u_isgraph(pg_wchar code);
86extern bool pg_u_isprint(pg_wchar code);
87extern bool pg_u_ispunct(pg_wchar code, bool posix);
88extern bool pg_u_isspace(pg_wchar code);
89extern bool pg_u_isxdigit(pg_wchar code, bool posix);
90
91#endif /* UNICODE_CATEGORY_H */
unsigned int pg_wchar
Definition: mbprint.c:31
bool pg_u_prop_uppercase(pg_wchar code)
bool pg_u_isspace(pg_wchar code)
bool pg_u_isxdigit(pg_wchar code, bool posix)
bool pg_u_ispunct(pg_wchar code, bool posix)
const char * unicode_category_string(pg_unicode_category category)
bool pg_u_isprint(pg_wchar code)
bool pg_u_islower(pg_wchar code)
const char * unicode_category_abbrev(pg_unicode_category category)
pg_unicode_category
@ PG_U_CONNECTOR_PUNCTUATION
@ PG_U_OTHER_SYMBOL
@ PG_U_DASH_PUNCTUATION
@ PG_U_UPPERCASE_LETTER
@ PG_U_DECIMAL_NUMBER
@ PG_U_CLOSE_PUNCTUATION
@ PG_U_NONSPACING_MARK
@ PG_U_INITIAL_PUNCTUATION
@ PG_U_CURRENCY_SYMBOL
@ PG_U_LETTER_NUMBER
@ PG_U_MODIFIER_SYMBOL
@ PG_U_SPACE_SEPARATOR
@ PG_U_OPEN_PUNCTUATION
@ PG_U_FORMAT
@ PG_U_PRIVATE_USE
@ PG_U_OTHER_LETTER
@ PG_U_PARAGRAPH_SEPARATOR
@ PG_U_CONTROL
@ PG_U_SPACING_MARK
@ PG_U_TITLECASE_LETTER
@ PG_U_OTHER_NUMBER
@ PG_U_MATH_SYMBOL
@ PG_U_LOWERCASE_LETTER
@ PG_U_LINE_SEPARATOR
@ PG_U_UNASSIGNED
@ PG_U_SURROGATE
@ PG_U_FINAL_PUNCTUATION
@ PG_U_MODIFIER_LETTER
@ PG_U_OTHER_PUNCTUATION
@ PG_U_ENCLOSING_MARK
bool pg_u_prop_white_space(pg_wchar code)
bool pg_u_isblank(pg_wchar code)
bool pg_u_prop_cased(pg_wchar code)
bool pg_u_isalpha(pg_wchar code)
bool pg_u_prop_lowercase(pg_wchar code)
bool pg_u_isalnum(pg_wchar code, bool posix)
bool pg_u_isupper(pg_wchar code)
bool pg_u_prop_alphabetic(pg_wchar code)
bool pg_u_isdigit(pg_wchar code, bool posix)
bool pg_u_iscntrl(pg_wchar code)
bool pg_u_prop_join_control(pg_wchar code)
bool pg_u_isgraph(pg_wchar code)
bool pg_u_isword(pg_wchar code)
bool pg_u_prop_case_ignorable(pg_wchar code)
bool pg_u_prop_hex_digit(pg_wchar code)
pg_unicode_category unicode_category(pg_wchar code)