PostgreSQL Source Code git master
unicode_category.c File Reference
Include dependency graph for unicode_category.c:

Go to the source code of this file.

Macros

#define PG_U_CATEGORY_MASK(X)   ((uint32)(1 << (X)))
 
#define PG_U_LU_MASK   PG_U_CATEGORY_MASK(PG_U_UPPERCASE_LETTER)
 
#define PG_U_LL_MASK   PG_U_CATEGORY_MASK(PG_U_LOWERCASE_LETTER)
 
#define PG_U_LT_MASK   PG_U_CATEGORY_MASK(PG_U_TITLECASE_LETTER)
 
#define PG_U_LC_MASK   (PG_U_LU_MASK|PG_U_LL_MASK|PG_U_LT_MASK)
 
#define PG_U_LM_MASK   PG_U_CATEGORY_MASK(PG_U_MODIFIER_LETTER)
 
#define PG_U_LO_MASK   PG_U_CATEGORY_MASK(PG_U_OTHER_LETTER)
 
#define PG_U_L_MASK
 
#define PG_U_MN_MASK   PG_U_CATEGORY_MASK(PG_U_NONSPACING_MARK)
 
#define PG_U_ME_MASK   PG_U_CATEGORY_MASK(PG_U_ENCLOSING_MARK)
 
#define PG_U_MC_MASK   PG_U_CATEGORY_MASK(PG_U_SPACING_MARK)
 
#define PG_U_M_MASK   (PG_U_MN_MASK|PG_U_MC_MASK|PG_U_ME_MASK)
 
#define PG_U_ND_MASK   PG_U_CATEGORY_MASK(PG_U_DECIMAL_NUMBER)
 
#define PG_U_NL_MASK   PG_U_CATEGORY_MASK(PG_U_LETTER_NUMBER)
 
#define PG_U_NO_MASK   PG_U_CATEGORY_MASK(PG_U_OTHER_NUMBER)
 
#define PG_U_N_MASK   (PG_U_ND_MASK|PG_U_NL_MASK|PG_U_NO_MASK)
 
#define PG_U_PC_MASK   PG_U_CATEGORY_MASK(PG_U_CONNECTOR_PUNCTUATION)
 
#define PG_U_PD_MASK   PG_U_CATEGORY_MASK(PG_U_DASH_PUNCTUATION)
 
#define PG_U_PS_MASK   PG_U_CATEGORY_MASK(PG_U_OPEN_PUNCTUATION)
 
#define PG_U_PE_MASK   PG_U_CATEGORY_MASK(PG_U_CLOSE_PUNCTUATION)
 
#define PG_U_PI_MASK   PG_U_CATEGORY_MASK(PG_U_INITIAL_PUNCTUATION)
 
#define PG_U_PF_MASK   PG_U_CATEGORY_MASK(PG_U_FINAL_PUNCTUATION)
 
#define PG_U_PO_MASK   PG_U_CATEGORY_MASK(PG_U_OTHER_PUNCTUATION)
 
#define PG_U_P_MASK
 
#define PG_U_SM_MASK   PG_U_CATEGORY_MASK(PG_U_MATH_SYMBOL)
 
#define PG_U_SC_MASK   PG_U_CATEGORY_MASK(PG_U_CURRENCY_SYMBOL)
 
#define PG_U_SK_MASK   PG_U_CATEGORY_MASK(PG_U_MODIFIER_SYMBOL)
 
#define PG_U_SO_MASK   PG_U_CATEGORY_MASK(PG_U_OTHER_SYMBOL)
 
#define PG_U_S_MASK   (PG_U_SM_MASK|PG_U_SC_MASK|PG_U_SK_MASK|PG_U_SO_MASK)
 
#define PG_U_ZS_MASK   PG_U_CATEGORY_MASK(PG_U_SPACE_SEPARATOR)
 
#define PG_U_ZL_MASK   PG_U_CATEGORY_MASK(PG_U_LINE_SEPARATOR)
 
#define PG_U_ZP_MASK   PG_U_CATEGORY_MASK(PG_U_PARAGRAPH_SEPARATOR)
 
#define PG_U_Z_MASK   (PG_U_ZS_MASK|PG_U_ZL_MASK|PG_U_ZP_MASK)
 
#define PG_U_CC_MASK   PG_U_CATEGORY_MASK(PG_U_CONTROL)
 
#define PG_U_CF_MASK   PG_U_CATEGORY_MASK(PG_U_FORMAT)
 
#define PG_U_CS_MASK   PG_U_CATEGORY_MASK(PG_U_SURROGATE)
 
#define PG_U_CO_MASK   PG_U_CATEGORY_MASK(PG_U_PRIVATE_USE)
 
#define PG_U_CN_MASK   PG_U_CATEGORY_MASK(PG_U_UNASSIGNED)
 
#define PG_U_C_MASK
 
#define PG_U_CHARACTER_TAB   0x09
 

Functions

static bool range_search (const pg_unicode_range *tbl, size_t size, pg_wchar code)
 
pg_unicode_category unicode_category (pg_wchar code)
 
bool pg_u_prop_alphabetic (pg_wchar code)
 
bool pg_u_prop_lowercase (pg_wchar code)
 
bool pg_u_prop_uppercase (pg_wchar code)
 
bool pg_u_prop_cased (pg_wchar code)
 
bool pg_u_prop_case_ignorable (pg_wchar code)
 
bool pg_u_prop_white_space (pg_wchar code)
 
bool pg_u_prop_hex_digit (pg_wchar code)
 
bool pg_u_prop_join_control (pg_wchar code)
 
bool pg_u_isdigit (pg_wchar code, bool posix)
 
bool pg_u_isalpha (pg_wchar code)
 
bool pg_u_isalnum (pg_wchar code, bool posix)
 
bool pg_u_isword (pg_wchar code)
 
bool pg_u_isupper (pg_wchar code)
 
bool pg_u_islower (pg_wchar code)
 
bool pg_u_isblank (pg_wchar code)
 
bool pg_u_iscntrl (pg_wchar code)
 
bool pg_u_isgraph (pg_wchar code)
 
bool pg_u_isprint (pg_wchar code)
 
bool pg_u_ispunct (pg_wchar code, bool posix)
 
bool pg_u_isspace (pg_wchar code)
 
bool pg_u_isxdigit (pg_wchar code, bool posix)
 
const char * unicode_category_string (pg_unicode_category category)
 
const char * unicode_category_abbrev (pg_unicode_category category)
 

Macro Definition Documentation

◆ PG_U_C_MASK

#define PG_U_C_MASK
Value:
PG_U_CN_MASK)
#define PG_U_CC_MASK
#define PG_U_CO_MASK
#define PG_U_CS_MASK
#define PG_U_CF_MASK

Definition at line 73 of file unicode_category.c.

◆ PG_U_CATEGORY_MASK

#define PG_U_CATEGORY_MASK (   X)    ((uint32)(1 << (X)))

Definition at line 32 of file unicode_category.c.

◆ PG_U_CC_MASK

#define PG_U_CC_MASK   PG_U_CATEGORY_MASK(PG_U_CONTROL)

Definition at line 68 of file unicode_category.c.

◆ PG_U_CF_MASK

#define PG_U_CF_MASK   PG_U_CATEGORY_MASK(PG_U_FORMAT)

Definition at line 69 of file unicode_category.c.

◆ PG_U_CHARACTER_TAB

#define PG_U_CHARACTER_TAB   0x09

Definition at line 76 of file unicode_category.c.

◆ PG_U_CN_MASK

#define PG_U_CN_MASK   PG_U_CATEGORY_MASK(PG_U_UNASSIGNED)

Definition at line 72 of file unicode_category.c.

◆ PG_U_CO_MASK

#define PG_U_CO_MASK   PG_U_CATEGORY_MASK(PG_U_PRIVATE_USE)

Definition at line 71 of file unicode_category.c.

◆ PG_U_CS_MASK

#define PG_U_CS_MASK   PG_U_CATEGORY_MASK(PG_U_SURROGATE)

Definition at line 70 of file unicode_category.c.

◆ PG_U_L_MASK

#define PG_U_L_MASK
Value:
PG_U_LO_MASK)
#define PG_U_LL_MASK
#define PG_U_LT_MASK
#define PG_U_LM_MASK
#define PG_U_LU_MASK

Definition at line 40 of file unicode_category.c.

◆ PG_U_LC_MASK

#define PG_U_LC_MASK   (PG_U_LU_MASK|PG_U_LL_MASK|PG_U_LT_MASK)

Definition at line 37 of file unicode_category.c.

◆ PG_U_LL_MASK

#define PG_U_LL_MASK   PG_U_CATEGORY_MASK(PG_U_LOWERCASE_LETTER)

Definition at line 35 of file unicode_category.c.

◆ PG_U_LM_MASK

#define PG_U_LM_MASK   PG_U_CATEGORY_MASK(PG_U_MODIFIER_LETTER)

Definition at line 38 of file unicode_category.c.

◆ PG_U_LO_MASK

#define PG_U_LO_MASK   PG_U_CATEGORY_MASK(PG_U_OTHER_LETTER)

Definition at line 39 of file unicode_category.c.

◆ PG_U_LT_MASK

#define PG_U_LT_MASK   PG_U_CATEGORY_MASK(PG_U_TITLECASE_LETTER)

Definition at line 36 of file unicode_category.c.

◆ PG_U_LU_MASK

#define PG_U_LU_MASK   PG_U_CATEGORY_MASK(PG_U_UPPERCASE_LETTER)

Definition at line 34 of file unicode_category.c.

◆ PG_U_M_MASK

#define PG_U_M_MASK   (PG_U_MN_MASK|PG_U_MC_MASK|PG_U_ME_MASK)

Definition at line 45 of file unicode_category.c.

◆ PG_U_MC_MASK

#define PG_U_MC_MASK   PG_U_CATEGORY_MASK(PG_U_SPACING_MARK)

Definition at line 44 of file unicode_category.c.

◆ PG_U_ME_MASK

#define PG_U_ME_MASK   PG_U_CATEGORY_MASK(PG_U_ENCLOSING_MARK)

Definition at line 43 of file unicode_category.c.

◆ PG_U_MN_MASK

#define PG_U_MN_MASK   PG_U_CATEGORY_MASK(PG_U_NONSPACING_MARK)

Definition at line 42 of file unicode_category.c.

◆ PG_U_N_MASK

#define PG_U_N_MASK   (PG_U_ND_MASK|PG_U_NL_MASK|PG_U_NO_MASK)

Definition at line 49 of file unicode_category.c.

◆ PG_U_ND_MASK

#define PG_U_ND_MASK   PG_U_CATEGORY_MASK(PG_U_DECIMAL_NUMBER)

Definition at line 46 of file unicode_category.c.

◆ PG_U_NL_MASK

#define PG_U_NL_MASK   PG_U_CATEGORY_MASK(PG_U_LETTER_NUMBER)

Definition at line 47 of file unicode_category.c.

◆ PG_U_NO_MASK

#define PG_U_NO_MASK   PG_U_CATEGORY_MASK(PG_U_OTHER_NUMBER)

Definition at line 48 of file unicode_category.c.

◆ PG_U_P_MASK

#define PG_U_P_MASK
Value:
PG_U_PI_MASK|PG_U_PF_MASK|PG_U_PO_MASK)
#define PG_U_PO_MASK
#define PG_U_PS_MASK
#define PG_U_PC_MASK
#define PG_U_PD_MASK
#define PG_U_PF_MASK
#define PG_U_PE_MASK

Definition at line 57 of file unicode_category.c.

◆ PG_U_PC_MASK

#define PG_U_PC_MASK   PG_U_CATEGORY_MASK(PG_U_CONNECTOR_PUNCTUATION)

Definition at line 50 of file unicode_category.c.

◆ PG_U_PD_MASK

#define PG_U_PD_MASK   PG_U_CATEGORY_MASK(PG_U_DASH_PUNCTUATION)

Definition at line 51 of file unicode_category.c.

◆ PG_U_PE_MASK

#define PG_U_PE_MASK   PG_U_CATEGORY_MASK(PG_U_CLOSE_PUNCTUATION)

Definition at line 53 of file unicode_category.c.

◆ PG_U_PF_MASK

#define PG_U_PF_MASK   PG_U_CATEGORY_MASK(PG_U_FINAL_PUNCTUATION)

Definition at line 55 of file unicode_category.c.

◆ PG_U_PI_MASK

#define PG_U_PI_MASK   PG_U_CATEGORY_MASK(PG_U_INITIAL_PUNCTUATION)

Definition at line 54 of file unicode_category.c.

◆ PG_U_PO_MASK

#define PG_U_PO_MASK   PG_U_CATEGORY_MASK(PG_U_OTHER_PUNCTUATION)

Definition at line 56 of file unicode_category.c.

◆ PG_U_PS_MASK

#define PG_U_PS_MASK   PG_U_CATEGORY_MASK(PG_U_OPEN_PUNCTUATION)

Definition at line 52 of file unicode_category.c.

◆ PG_U_S_MASK

#define PG_U_S_MASK   (PG_U_SM_MASK|PG_U_SC_MASK|PG_U_SK_MASK|PG_U_SO_MASK)

Definition at line 63 of file unicode_category.c.

◆ PG_U_SC_MASK

#define PG_U_SC_MASK   PG_U_CATEGORY_MASK(PG_U_CURRENCY_SYMBOL)

Definition at line 60 of file unicode_category.c.

◆ PG_U_SK_MASK

#define PG_U_SK_MASK   PG_U_CATEGORY_MASK(PG_U_MODIFIER_SYMBOL)

Definition at line 61 of file unicode_category.c.

◆ PG_U_SM_MASK

#define PG_U_SM_MASK   PG_U_CATEGORY_MASK(PG_U_MATH_SYMBOL)

Definition at line 59 of file unicode_category.c.

◆ PG_U_SO_MASK

#define PG_U_SO_MASK   PG_U_CATEGORY_MASK(PG_U_OTHER_SYMBOL)

Definition at line 62 of file unicode_category.c.

◆ PG_U_Z_MASK

#define PG_U_Z_MASK   (PG_U_ZS_MASK|PG_U_ZL_MASK|PG_U_ZP_MASK)

Definition at line 67 of file unicode_category.c.

◆ PG_U_ZL_MASK

#define PG_U_ZL_MASK   PG_U_CATEGORY_MASK(PG_U_LINE_SEPARATOR)

Definition at line 65 of file unicode_category.c.

◆ PG_U_ZP_MASK

#define PG_U_ZP_MASK   PG_U_CATEGORY_MASK(PG_U_PARAGRAPH_SEPARATOR)

Definition at line 66 of file unicode_category.c.

◆ PG_U_ZS_MASK

#define PG_U_ZS_MASK   PG_U_CATEGORY_MASK(PG_U_SPACE_SEPARATOR)

Definition at line 64 of file unicode_category.c.

Function Documentation

◆ pg_u_isalnum()

bool pg_u_isalnum ( pg_wchar  code,
bool  posix 
)

Definition at line 226 of file unicode_category.c.

227{
228 return pg_u_isalpha(code) || pg_u_isdigit(code, posix);
229}
bool pg_u_isalpha(pg_wchar code)
bool pg_u_isdigit(pg_wchar code, bool posix)

References pg_u_isalpha(), and pg_u_isdigit().

Referenced by initcap_wbnext(), and pg_wc_isalnum().

◆ pg_u_isalpha()

bool pg_u_isalpha ( pg_wchar  code)

Definition at line 220 of file unicode_category.c.

221{
222 return pg_u_prop_alphabetic(code);
223}
bool pg_u_prop_alphabetic(pg_wchar code)

References pg_u_prop_alphabetic().

Referenced by pg_u_isalnum(), pg_u_ispunct(), pg_u_isword(), and pg_wc_isalpha().

◆ pg_u_isblank()

bool pg_u_isblank ( pg_wchar  code)

Definition at line 255 of file unicode_category.c.

256{
257 return code == PG_U_CHARACTER_TAB ||
259}
#define PG_U_CHARACTER_TAB
pg_unicode_category unicode_category(pg_wchar code)
@ PG_U_SPACE_SEPARATOR

References PG_U_CHARACTER_TAB, PG_U_SPACE_SEPARATOR, and unicode_category().

Referenced by pg_u_isprint().

◆ pg_u_iscntrl()

bool pg_u_iscntrl ( pg_wchar  code)

Definition at line 262 of file unicode_category.c.

263{
264 return unicode_category(code) == PG_U_CONTROL;
265}
@ PG_U_CONTROL

References PG_U_CONTROL, and unicode_category().

◆ pg_u_isdigit()

bool pg_u_isdigit ( pg_wchar  code,
bool  posix 
)

Definition at line 211 of file unicode_category.c.

212{
213 if (posix)
214 return ('0' <= code && code <= '9');
215 else
217}
@ PG_U_DECIMAL_NUMBER

References PG_U_DECIMAL_NUMBER, and unicode_category().

Referenced by pg_u_isalnum(), and pg_wc_isdigit().

◆ pg_u_isgraph()

bool pg_u_isgraph ( pg_wchar  code)

Definition at line 268 of file unicode_category.c.

269{
270 uint32 category_mask = PG_U_CATEGORY_MASK(unicode_category(code));
271
272 if (category_mask & (PG_U_CC_MASK | PG_U_CS_MASK | PG_U_CN_MASK) ||
273 pg_u_isspace(code))
274 return false;
275 return true;
276}
uint32_t uint32
Definition: c.h:502
bool pg_u_isspace(pg_wchar code)
#define PG_U_CATEGORY_MASK(X)
#define PG_U_CN_MASK

References PG_U_CATEGORY_MASK, PG_U_CC_MASK, PG_U_CN_MASK, PG_U_CS_MASK, pg_u_isspace(), and unicode_category().

Referenced by pg_u_isprint(), and pg_wc_isgraph().

◆ pg_u_islower()

bool pg_u_islower ( pg_wchar  code)

Definition at line 249 of file unicode_category.c.

250{
251 return pg_u_prop_lowercase(code);
252}
bool pg_u_prop_lowercase(pg_wchar code)

References pg_u_prop_lowercase().

Referenced by pg_wc_islower().

◆ pg_u_isprint()

bool pg_u_isprint ( pg_wchar  code)

Definition at line 279 of file unicode_category.c.

280{
281 pg_unicode_category category = unicode_category(code);
282
283 if (category == PG_U_CONTROL)
284 return false;
285
286 return pg_u_isgraph(code) || pg_u_isblank(code);
287}
bool pg_u_isblank(pg_wchar code)
bool pg_u_isgraph(pg_wchar code)
pg_unicode_category

References PG_U_CONTROL, pg_u_isblank(), pg_u_isgraph(), and unicode_category().

Referenced by pg_wc_isprint().

◆ pg_u_ispunct()

bool pg_u_ispunct ( pg_wchar  code,
bool  posix 
)

Definition at line 290 of file unicode_category.c.

291{
292 uint32 category_mask;
293
294 if (posix)
295 {
296 if (pg_u_isalpha(code))
297 return false;
298
299 category_mask = PG_U_CATEGORY_MASK(unicode_category(code));
300 return category_mask & (PG_U_P_MASK | PG_U_S_MASK);
301 }
302 else
303 {
304 category_mask = PG_U_CATEGORY_MASK(unicode_category(code));
305
306 return category_mask & PG_U_P_MASK;
307 }
308}
#define PG_U_P_MASK
#define PG_U_S_MASK

References PG_U_CATEGORY_MASK, pg_u_isalpha(), PG_U_P_MASK, PG_U_S_MASK, and unicode_category().

Referenced by pg_wc_ispunct().

◆ pg_u_isspace()

bool pg_u_isspace ( pg_wchar  code)

Definition at line 311 of file unicode_category.c.

312{
313 return pg_u_prop_white_space(code);
314}
bool pg_u_prop_white_space(pg_wchar code)

References pg_u_prop_white_space().

Referenced by pg_u_isgraph(), and pg_wc_isspace().

◆ pg_u_isupper()

bool pg_u_isupper ( pg_wchar  code)

Definition at line 243 of file unicode_category.c.

244{
245 return pg_u_prop_uppercase(code);
246}
bool pg_u_prop_uppercase(pg_wchar code)

References pg_u_prop_uppercase().

Referenced by pg_wc_isupper().

◆ pg_u_isword()

bool pg_u_isword ( pg_wchar  code)

Definition at line 232 of file unicode_category.c.

233{
234 uint32 category_mask = PG_U_CATEGORY_MASK(unicode_category(code));
235
236 return
237 category_mask & (PG_U_M_MASK | PG_U_ND_MASK | PG_U_PC_MASK) ||
238 pg_u_isalpha(code) ||
240}
#define PG_U_ND_MASK
#define PG_U_M_MASK
bool pg_u_prop_join_control(pg_wchar code)

References PG_U_CATEGORY_MASK, pg_u_isalpha(), PG_U_M_MASK, PG_U_ND_MASK, PG_U_PC_MASK, pg_u_prop_join_control(), and unicode_category().

◆ pg_u_isxdigit()

bool pg_u_isxdigit ( pg_wchar  code,
bool  posix 
)

Definition at line 317 of file unicode_category.c.

318{
319 if (posix)
320 return (('0' <= code && code <= '9') ||
321 ('A' <= code && code <= 'F') ||
322 ('a' <= code && code <= 'f'));
323 else
324 return unicode_category(code) == PG_U_DECIMAL_NUMBER ||
326}
bool pg_u_prop_hex_digit(pg_wchar code)

References PG_U_DECIMAL_NUMBER, pg_u_prop_hex_digit(), and unicode_category().

◆ pg_u_prop_alphabetic()

bool pg_u_prop_alphabetic ( pg_wchar  code)

Definition at line 111 of file unicode_category.c.

112{
113 if (code < 0x80)
115
118 code);
119}
#define lengthof(array)
Definition: c.h:759
static bool range_search(const pg_unicode_range *tbl, size_t size, pg_wchar code)
#define PG_U_PROP_ALPHABETIC
static const pg_unicode_range unicode_alphabetic[1141]
static const pg_unicode_properties unicode_opt_ascii[128]

References lengthof, PG_U_PROP_ALPHABETIC, pg_unicode_properties::properties, range_search(), unicode_alphabetic, and unicode_opt_ascii.

Referenced by pg_u_isalpha().

◆ pg_u_prop_case_ignorable()

bool pg_u_prop_case_ignorable ( pg_wchar  code)

Definition at line 159 of file unicode_category.c.

160{
161 if (code < 0x80)
163
166 code);
167}
static const pg_unicode_range unicode_case_ignorable[491]
#define PG_U_PROP_CASE_IGNORABLE

References lengthof, PG_U_PROP_CASE_IGNORABLE, pg_unicode_properties::properties, range_search(), unicode_case_ignorable, and unicode_opt_ascii.

Referenced by check_final_sigma().

◆ pg_u_prop_cased()

bool pg_u_prop_cased ( pg_wchar  code)

Definition at line 144 of file unicode_category.c.

145{
146 uint32 category_mask;
147
148 if (code < 0x80)
150
151 category_mask = PG_U_CATEGORY_MASK(unicode_category(code));
152
153 return category_mask & PG_U_LT_MASK ||
154 pg_u_prop_lowercase(code) ||
156}
#define PG_U_PROP_CASED

References PG_U_CATEGORY_MASK, PG_U_LT_MASK, PG_U_PROP_CASED, pg_u_prop_lowercase(), pg_u_prop_uppercase(), pg_unicode_properties::properties, unicode_category(), and unicode_opt_ascii.

Referenced by check_final_sigma().

◆ pg_u_prop_hex_digit()

bool pg_u_prop_hex_digit ( pg_wchar  code)

Definition at line 181 of file unicode_category.c.

182{
183 if (code < 0x80)
185
188 code);
189}
#define PG_U_PROP_HEX_DIGIT
static const pg_unicode_range unicode_hex_digit[6]

References lengthof, PG_U_PROP_HEX_DIGIT, pg_unicode_properties::properties, range_search(), unicode_hex_digit, and unicode_opt_ascii.

Referenced by pg_u_isxdigit().

◆ pg_u_prop_join_control()

bool pg_u_prop_join_control ( pg_wchar  code)

Definition at line 192 of file unicode_category.c.

193{
194 if (code < 0x80)
196
199 code);
200}
#define PG_U_PROP_JOIN_CONTROL
static const pg_unicode_range unicode_join_control[1]

References lengthof, PG_U_PROP_JOIN_CONTROL, pg_unicode_properties::properties, range_search(), unicode_join_control, and unicode_opt_ascii.

Referenced by pg_u_isword().

◆ pg_u_prop_lowercase()

bool pg_u_prop_lowercase ( pg_wchar  code)

Definition at line 122 of file unicode_category.c.

123{
124 if (code < 0x80)
126
129 code);
130}
#define PG_U_PROP_LOWERCASE
static const pg_unicode_range unicode_lowercase[686]

References lengthof, PG_U_PROP_LOWERCASE, pg_unicode_properties::properties, range_search(), unicode_lowercase, and unicode_opt_ascii.

Referenced by pg_u_islower(), and pg_u_prop_cased().

◆ pg_u_prop_uppercase()

bool pg_u_prop_uppercase ( pg_wchar  code)

Definition at line 133 of file unicode_category.c.

134{
135 if (code < 0x80)
137
140 code);
141}
#define PG_U_PROP_UPPERCASE
static const pg_unicode_range unicode_uppercase[651]

References lengthof, PG_U_PROP_UPPERCASE, pg_unicode_properties::properties, range_search(), unicode_opt_ascii, and unicode_uppercase.

Referenced by pg_u_isupper(), and pg_u_prop_cased().

◆ pg_u_prop_white_space()

bool pg_u_prop_white_space ( pg_wchar  code)

Definition at line 170 of file unicode_category.c.

171{
172 if (code < 0x80)
174
177 code);
178}
static const pg_unicode_range unicode_white_space[11]
#define PG_U_PROP_WHITE_SPACE

References lengthof, PG_U_PROP_WHITE_SPACE, pg_unicode_properties::properties, range_search(), unicode_opt_ascii, and unicode_white_space.

Referenced by pg_u_isspace().

◆ range_search()

static bool range_search ( const pg_unicode_range tbl,
size_t  size,
pg_wchar  code 
)
static

Definition at line 481 of file unicode_category.c.

482{
483 int min = 0;
484 int mid;
485 int max = size - 1;
486
487 Assert(code <= 0x10ffff);
488
489 while (max >= min)
490 {
491 mid = (min + max) / 2;
492 if (code > tbl[mid].last)
493 min = mid + 1;
494 else if (code < tbl[mid].first)
495 max = mid - 1;
496 else
497 return true;
498 }
499
500 return false;
501}
Assert(PointerIsAligned(start, uint64))

References Assert().

Referenced by pg_u_prop_alphabetic(), pg_u_prop_case_ignorable(), pg_u_prop_hex_digit(), pg_u_prop_join_control(), pg_u_prop_lowercase(), pg_u_prop_uppercase(), and pg_u_prop_white_space().

◆ unicode_category()

pg_unicode_category unicode_category ( pg_wchar  code)

Definition at line 85 of file unicode_category.c.

86{
87 int min = 0;
88 int mid;
89 int max = lengthof(unicode_categories) - 1;
90
91 Assert(code <= 0x10ffff);
92
93 if (code < 0x80)
94 return unicode_opt_ascii[code].category;
95
96 while (max >= min)
97 {
98 mid = (min + max) / 2;
99 if (code > unicode_categories[mid].last)
100 min = mid + 1;
101 else if (code < unicode_categories[mid].first)
102 max = mid - 1;
103 else
104 return unicode_categories[mid].category;
105 }
106
107 return PG_U_UNASSIGNED;
108}
@ PG_U_UNASSIGNED
static const pg_category_range unicode_categories[3302]

References Assert(), pg_category_range::category, pg_unicode_properties::category, lengthof, PG_U_UNASSIGNED, unicode_categories, and unicode_opt_ascii.

Referenced by pg_u_isblank(), pg_u_iscntrl(), pg_u_isdigit(), pg_u_isgraph(), pg_u_isprint(), pg_u_ispunct(), pg_u_isword(), pg_u_isxdigit(), pg_u_prop_cased(), and unicode_assigned().

◆ unicode_category_abbrev()

const char * unicode_category_abbrev ( pg_unicode_category  category)

Definition at line 406 of file unicode_category.c.

407{
408 switch (category)
409 {
410 case PG_U_UNASSIGNED:
411 return "Cn";
413 return "Lu";
415 return "Ll";
417 return "Lt";
419 return "Lm";
421 return "Lo";
423 return "Mn";
425 return "Me";
427 return "Mc";
429 return "Nd";
431 return "Nl";
433 return "No";
435 return "Zs";
437 return "Zl";
439 return "Zp";
440 case PG_U_CONTROL:
441 return "Cc";
442 case PG_U_FORMAT:
443 return "Cf";
444 case PG_U_PRIVATE_USE:
445 return "Co";
446 case PG_U_SURROGATE:
447 return "Cs";
449 return "Pd";
451 return "Ps";
453 return "Pe";
455 return "Pc";
457 return "Po";
458 case PG_U_MATH_SYMBOL:
459 return "Sm";
461 return "Sc";
463 return "Sk";
465 return "So";
467 return "Pi";
469 return "Pf";
470 }
471
472 Assert(false);
473 return "??"; /* keep compiler quiet */
474}
@ PG_U_CONNECTOR_PUNCTUATION
@ PG_U_OTHER_SYMBOL
@ PG_U_DASH_PUNCTUATION
@ PG_U_UPPERCASE_LETTER
@ PG_U_CLOSE_PUNCTUATION
@ PG_U_NONSPACING_MARK
@ PG_U_INITIAL_PUNCTUATION
@ PG_U_CURRENCY_SYMBOL
@ PG_U_LETTER_NUMBER
@ PG_U_MODIFIER_SYMBOL
@ PG_U_OPEN_PUNCTUATION
@ PG_U_FORMAT
@ PG_U_PRIVATE_USE
@ PG_U_OTHER_LETTER
@ PG_U_PARAGRAPH_SEPARATOR
@ PG_U_SPACING_MARK
@ PG_U_TITLECASE_LETTER
@ PG_U_OTHER_NUMBER
@ PG_U_MATH_SYMBOL
@ PG_U_LOWERCASE_LETTER
@ PG_U_LINE_SEPARATOR
@ PG_U_SURROGATE
@ PG_U_FINAL_PUNCTUATION
@ PG_U_MODIFIER_LETTER
@ PG_U_OTHER_PUNCTUATION
@ PG_U_ENCLOSING_MARK

References Assert(), PG_U_CLOSE_PUNCTUATION, PG_U_CONNECTOR_PUNCTUATION, PG_U_CONTROL, PG_U_CURRENCY_SYMBOL, PG_U_DASH_PUNCTUATION, PG_U_DECIMAL_NUMBER, PG_U_ENCLOSING_MARK, PG_U_FINAL_PUNCTUATION, PG_U_FORMAT, PG_U_INITIAL_PUNCTUATION, PG_U_LETTER_NUMBER, PG_U_LINE_SEPARATOR, PG_U_LOWERCASE_LETTER, PG_U_MATH_SYMBOL, PG_U_MODIFIER_LETTER, PG_U_MODIFIER_SYMBOL, PG_U_NONSPACING_MARK, PG_U_OPEN_PUNCTUATION, PG_U_OTHER_LETTER, PG_U_OTHER_NUMBER, PG_U_OTHER_PUNCTUATION, PG_U_OTHER_SYMBOL, PG_U_PARAGRAPH_SEPARATOR, PG_U_PRIVATE_USE, PG_U_SPACE_SEPARATOR, PG_U_SPACING_MARK, PG_U_SURROGATE, PG_U_TITLECASE_LETTER, PG_U_UNASSIGNED, and PG_U_UPPERCASE_LETTER.

◆ unicode_category_string()

const char * unicode_category_string ( pg_unicode_category  category)

Definition at line 332 of file unicode_category.c.

333{
334 switch (category)
335 {
336 case PG_U_UNASSIGNED:
337 return "Unassigned";
339 return "Uppercase_Letter";
341 return "Lowercase_Letter";
343 return "Titlecase_Letter";
345 return "Modifier_Letter";
347 return "Other_Letter";
349 return "Nonspacing_Mark";
351 return "Enclosing_Mark";
353 return "Spacing_Mark";
355 return "Decimal_Number";
357 return "Letter_Number";
359 return "Other_Number";
361 return "Space_Separator";
363 return "Line_Separator";
365 return "Paragraph_Separator";
366 case PG_U_CONTROL:
367 return "Control";
368 case PG_U_FORMAT:
369 return "Format";
370 case PG_U_PRIVATE_USE:
371 return "Private_Use";
372 case PG_U_SURROGATE:
373 return "Surrogate";
375 return "Dash_Punctuation";
377 return "Open_Punctuation";
379 return "Close_Punctuation";
381 return "Connector_Punctuation";
383 return "Other_Punctuation";
384 case PG_U_MATH_SYMBOL:
385 return "Math_Symbol";
387 return "Currency_Symbol";
389 return "Modifier_Symbol";
391 return "Other_Symbol";
393 return "Initial_Punctuation";
395 return "Final_Punctuation";
396 }
397
398 Assert(false);
399 return "Unrecognized"; /* keep compiler quiet */
400}

References Assert(), PG_U_CLOSE_PUNCTUATION, PG_U_CONNECTOR_PUNCTUATION, PG_U_CONTROL, PG_U_CURRENCY_SYMBOL, PG_U_DASH_PUNCTUATION, PG_U_DECIMAL_NUMBER, PG_U_ENCLOSING_MARK, PG_U_FINAL_PUNCTUATION, PG_U_FORMAT, PG_U_INITIAL_PUNCTUATION, PG_U_LETTER_NUMBER, PG_U_LINE_SEPARATOR, PG_U_LOWERCASE_LETTER, PG_U_MATH_SYMBOL, PG_U_MODIFIER_LETTER, PG_U_MODIFIER_SYMBOL, PG_U_NONSPACING_MARK, PG_U_OPEN_PUNCTUATION, PG_U_OTHER_LETTER, PG_U_OTHER_NUMBER, PG_U_OTHER_PUNCTUATION, PG_U_OTHER_SYMBOL, PG_U_PARAGRAPH_SEPARATOR, PG_U_PRIVATE_USE, PG_U_SPACE_SEPARATOR, PG_U_SPACING_MARK, PG_U_SURROGATE, PG_U_TITLECASE_LETTER, PG_U_UNASSIGNED, and PG_U_UPPERCASE_LETTER.