PostgreSQL Source Code  git master
unicode_category.c File Reference
Include dependency graph for unicode_category.c:

Go to the source code of this file.

Macros

#define PG_U_CATEGORY_MASK(X)   ((uint32)(1 << (X)))
 
#define PG_U_LU_MASK   PG_U_CATEGORY_MASK(PG_U_UPPERCASE_LETTER)
 
#define PG_U_LL_MASK   PG_U_CATEGORY_MASK(PG_U_LOWERCASE_LETTER)
 
#define PG_U_LT_MASK   PG_U_CATEGORY_MASK(PG_U_TITLECASE_LETTER)
 
#define PG_U_LC_MASK   (PG_U_LU_MASK|PG_U_LL_MASK|PG_U_LT_MASK)
 
#define PG_U_LM_MASK   PG_U_CATEGORY_MASK(PG_U_MODIFIER_LETTER)
 
#define PG_U_LO_MASK   PG_U_CATEGORY_MASK(PG_U_OTHER_LETTER)
 
#define PG_U_L_MASK
 
#define PG_U_MN_MASK   PG_U_CATEGORY_MASK(PG_U_NONSPACING_MARK)
 
#define PG_U_ME_MASK   PG_U_CATEGORY_MASK(PG_U_ENCLOSING_MARK)
 
#define PG_U_MC_MASK   PG_U_CATEGORY_MASK(PG_U_SPACING_MARK)
 
#define PG_U_M_MASK   (PG_U_MN_MASK|PG_U_MC_MASK|PG_U_ME_MASK)
 
#define PG_U_ND_MASK   PG_U_CATEGORY_MASK(PG_U_DECIMAL_NUMBER)
 
#define PG_U_NL_MASK   PG_U_CATEGORY_MASK(PG_U_LETTER_NUMBER)
 
#define PG_U_NO_MASK   PG_U_CATEGORY_MASK(PG_U_OTHER_NUMBER)
 
#define PG_U_N_MASK   (PG_U_ND_MASK|PG_U_NL_MASK|PG_U_NO_MASK)
 
#define PG_U_PC_MASK   PG_U_CATEGORY_MASK(PG_U_CONNECTOR_PUNCTUATION)
 
#define PG_U_PD_MASK   PG_U_CATEGORY_MASK(PG_U_DASH_PUNCTUATION)
 
#define PG_U_PS_MASK   PG_U_CATEGORY_MASK(PG_U_OPEN_PUNCTUATION)
 
#define PG_U_PE_MASK   PG_U_CATEGORY_MASK(PG_U_CLOSE_PUNCTUATION)
 
#define PG_U_PI_MASK   PG_U_CATEGORY_MASK(PG_U_INITIAL_PUNCTUATION)
 
#define PG_U_PF_MASK   PG_U_CATEGORY_MASK(PG_U_FINAL_PUNCTUATION)
 
#define PG_U_PO_MASK   PG_U_CATEGORY_MASK(PG_U_OTHER_PUNCTUATION)
 
#define PG_U_P_MASK
 
#define PG_U_SM_MASK   PG_U_CATEGORY_MASK(PG_U_MATH_SYMBOL)
 
#define PG_U_SC_MASK   PG_U_CATEGORY_MASK(PG_U_CURRENCY_SYMBOL)
 
#define PG_U_SK_MASK   PG_U_CATEGORY_MASK(PG_U_MODIFIER_SYMBOL)
 
#define PG_U_SO_MASK   PG_U_CATEGORY_MASK(PG_U_OTHER_SYMBOL)
 
#define PG_U_S_MASK   (PG_U_SM_MASK|PG_U_SC_MASK|PG_U_SK_MASK|PG_U_SO_MASK)
 
#define PG_U_ZS_MASK   PG_U_CATEGORY_MASK(PG_U_SPACE_SEPARATOR)
 
#define PG_U_ZL_MASK   PG_U_CATEGORY_MASK(PG_U_LINE_SEPARATOR)
 
#define PG_U_ZP_MASK   PG_U_CATEGORY_MASK(PG_U_PARAGRAPH_SEPARATOR)
 
#define PG_U_Z_MASK   (PG_U_ZS_MASK|PG_U_ZL_MASK|PG_U_ZP_MASK)
 
#define PG_U_CC_MASK   PG_U_CATEGORY_MASK(PG_U_CONTROL)
 
#define PG_U_CF_MASK   PG_U_CATEGORY_MASK(PG_U_FORMAT)
 
#define PG_U_CS_MASK   PG_U_CATEGORY_MASK(PG_U_SURROGATE)
 
#define PG_U_CO_MASK   PG_U_CATEGORY_MASK(PG_U_PRIVATE_USE)
 
#define PG_U_CN_MASK   PG_U_CATEGORY_MASK(PG_U_UNASSIGNED)
 
#define PG_U_C_MASK
 
#define PG_U_CHARACTER_TAB   0x09
 

Functions

static bool range_search (const pg_unicode_range *tbl, size_t size, pg_wchar code)
 
pg_unicode_category unicode_category (pg_wchar code)
 
bool pg_u_prop_alphabetic (pg_wchar code)
 
bool pg_u_prop_lowercase (pg_wchar code)
 
bool pg_u_prop_uppercase (pg_wchar code)
 
bool pg_u_prop_cased (pg_wchar code)
 
bool pg_u_prop_case_ignorable (pg_wchar code)
 
bool pg_u_prop_white_space (pg_wchar code)
 
bool pg_u_prop_hex_digit (pg_wchar code)
 
bool pg_u_prop_join_control (pg_wchar code)
 
bool pg_u_isdigit (pg_wchar code, bool posix)
 
bool pg_u_isalpha (pg_wchar code)
 
bool pg_u_isalnum (pg_wchar code, bool posix)
 
bool pg_u_isword (pg_wchar code)
 
bool pg_u_isupper (pg_wchar code)
 
bool pg_u_islower (pg_wchar code)
 
bool pg_u_isblank (pg_wchar code)
 
bool pg_u_iscntrl (pg_wchar code)
 
bool pg_u_isgraph (pg_wchar code)
 
bool pg_u_isprint (pg_wchar code)
 
bool pg_u_ispunct (pg_wchar code, bool posix)
 
bool pg_u_isspace (pg_wchar code)
 
bool pg_u_isxdigit (pg_wchar code, bool posix)
 
const char * unicode_category_string (pg_unicode_category category)
 
const char * unicode_category_abbrev (pg_unicode_category category)
 

Macro Definition Documentation

◆ PG_U_C_MASK

#define PG_U_C_MASK
Value:
PG_U_CN_MASK)
#define PG_U_CC_MASK
#define PG_U_CO_MASK
#define PG_U_CS_MASK
#define PG_U_CF_MASK

Definition at line 73 of file unicode_category.c.

◆ PG_U_CATEGORY_MASK

#define PG_U_CATEGORY_MASK (   X)    ((uint32)(1 << (X)))

Definition at line 32 of file unicode_category.c.

◆ PG_U_CC_MASK

#define PG_U_CC_MASK   PG_U_CATEGORY_MASK(PG_U_CONTROL)

Definition at line 68 of file unicode_category.c.

◆ PG_U_CF_MASK

#define PG_U_CF_MASK   PG_U_CATEGORY_MASK(PG_U_FORMAT)

Definition at line 69 of file unicode_category.c.

◆ PG_U_CHARACTER_TAB

#define PG_U_CHARACTER_TAB   0x09

Definition at line 76 of file unicode_category.c.

◆ PG_U_CN_MASK

#define PG_U_CN_MASK   PG_U_CATEGORY_MASK(PG_U_UNASSIGNED)

Definition at line 72 of file unicode_category.c.

◆ PG_U_CO_MASK

#define PG_U_CO_MASK   PG_U_CATEGORY_MASK(PG_U_PRIVATE_USE)

Definition at line 71 of file unicode_category.c.

◆ PG_U_CS_MASK

#define PG_U_CS_MASK   PG_U_CATEGORY_MASK(PG_U_SURROGATE)

Definition at line 70 of file unicode_category.c.

◆ PG_U_L_MASK

#define PG_U_L_MASK
Value:
PG_U_LO_MASK)
#define PG_U_LL_MASK
#define PG_U_LT_MASK
#define PG_U_LM_MASK
#define PG_U_LU_MASK

Definition at line 40 of file unicode_category.c.

◆ PG_U_LC_MASK

#define PG_U_LC_MASK   (PG_U_LU_MASK|PG_U_LL_MASK|PG_U_LT_MASK)

Definition at line 37 of file unicode_category.c.

◆ PG_U_LL_MASK

#define PG_U_LL_MASK   PG_U_CATEGORY_MASK(PG_U_LOWERCASE_LETTER)

Definition at line 35 of file unicode_category.c.

◆ PG_U_LM_MASK

#define PG_U_LM_MASK   PG_U_CATEGORY_MASK(PG_U_MODIFIER_LETTER)

Definition at line 38 of file unicode_category.c.

◆ PG_U_LO_MASK

#define PG_U_LO_MASK   PG_U_CATEGORY_MASK(PG_U_OTHER_LETTER)

Definition at line 39 of file unicode_category.c.

◆ PG_U_LT_MASK

#define PG_U_LT_MASK   PG_U_CATEGORY_MASK(PG_U_TITLECASE_LETTER)

Definition at line 36 of file unicode_category.c.

◆ PG_U_LU_MASK

#define PG_U_LU_MASK   PG_U_CATEGORY_MASK(PG_U_UPPERCASE_LETTER)

Definition at line 34 of file unicode_category.c.

◆ PG_U_M_MASK

#define PG_U_M_MASK   (PG_U_MN_MASK|PG_U_MC_MASK|PG_U_ME_MASK)

Definition at line 45 of file unicode_category.c.

◆ PG_U_MC_MASK

#define PG_U_MC_MASK   PG_U_CATEGORY_MASK(PG_U_SPACING_MARK)

Definition at line 44 of file unicode_category.c.

◆ PG_U_ME_MASK

#define PG_U_ME_MASK   PG_U_CATEGORY_MASK(PG_U_ENCLOSING_MARK)

Definition at line 43 of file unicode_category.c.

◆ PG_U_MN_MASK

#define PG_U_MN_MASK   PG_U_CATEGORY_MASK(PG_U_NONSPACING_MARK)

Definition at line 42 of file unicode_category.c.

◆ PG_U_N_MASK

#define PG_U_N_MASK   (PG_U_ND_MASK|PG_U_NL_MASK|PG_U_NO_MASK)

Definition at line 49 of file unicode_category.c.

◆ PG_U_ND_MASK

#define PG_U_ND_MASK   PG_U_CATEGORY_MASK(PG_U_DECIMAL_NUMBER)

Definition at line 46 of file unicode_category.c.

◆ PG_U_NL_MASK

#define PG_U_NL_MASK   PG_U_CATEGORY_MASK(PG_U_LETTER_NUMBER)

Definition at line 47 of file unicode_category.c.

◆ PG_U_NO_MASK

#define PG_U_NO_MASK   PG_U_CATEGORY_MASK(PG_U_OTHER_NUMBER)

Definition at line 48 of file unicode_category.c.

◆ PG_U_P_MASK

#define PG_U_P_MASK
Value:
PG_U_PI_MASK|PG_U_PF_MASK|PG_U_PO_MASK)
#define PG_U_PO_MASK
#define PG_U_PS_MASK
#define PG_U_PC_MASK
#define PG_U_PD_MASK
#define PG_U_PF_MASK
#define PG_U_PE_MASK

Definition at line 57 of file unicode_category.c.

◆ PG_U_PC_MASK

#define PG_U_PC_MASK   PG_U_CATEGORY_MASK(PG_U_CONNECTOR_PUNCTUATION)

Definition at line 50 of file unicode_category.c.

◆ PG_U_PD_MASK

#define PG_U_PD_MASK   PG_U_CATEGORY_MASK(PG_U_DASH_PUNCTUATION)

Definition at line 51 of file unicode_category.c.

◆ PG_U_PE_MASK

#define PG_U_PE_MASK   PG_U_CATEGORY_MASK(PG_U_CLOSE_PUNCTUATION)

Definition at line 53 of file unicode_category.c.

◆ PG_U_PF_MASK

#define PG_U_PF_MASK   PG_U_CATEGORY_MASK(PG_U_FINAL_PUNCTUATION)

Definition at line 55 of file unicode_category.c.

◆ PG_U_PI_MASK

#define PG_U_PI_MASK   PG_U_CATEGORY_MASK(PG_U_INITIAL_PUNCTUATION)

Definition at line 54 of file unicode_category.c.

◆ PG_U_PO_MASK

#define PG_U_PO_MASK   PG_U_CATEGORY_MASK(PG_U_OTHER_PUNCTUATION)

Definition at line 56 of file unicode_category.c.

◆ PG_U_PS_MASK

#define PG_U_PS_MASK   PG_U_CATEGORY_MASK(PG_U_OPEN_PUNCTUATION)

Definition at line 52 of file unicode_category.c.

◆ PG_U_S_MASK

#define PG_U_S_MASK   (PG_U_SM_MASK|PG_U_SC_MASK|PG_U_SK_MASK|PG_U_SO_MASK)

Definition at line 63 of file unicode_category.c.

◆ PG_U_SC_MASK

#define PG_U_SC_MASK   PG_U_CATEGORY_MASK(PG_U_CURRENCY_SYMBOL)

Definition at line 60 of file unicode_category.c.

◆ PG_U_SK_MASK

#define PG_U_SK_MASK   PG_U_CATEGORY_MASK(PG_U_MODIFIER_SYMBOL)

Definition at line 61 of file unicode_category.c.

◆ PG_U_SM_MASK

#define PG_U_SM_MASK   PG_U_CATEGORY_MASK(PG_U_MATH_SYMBOL)

Definition at line 59 of file unicode_category.c.

◆ PG_U_SO_MASK

#define PG_U_SO_MASK   PG_U_CATEGORY_MASK(PG_U_OTHER_SYMBOL)

Definition at line 62 of file unicode_category.c.

◆ PG_U_Z_MASK

#define PG_U_Z_MASK   (PG_U_ZS_MASK|PG_U_ZL_MASK|PG_U_ZP_MASK)

Definition at line 67 of file unicode_category.c.

◆ PG_U_ZL_MASK

#define PG_U_ZL_MASK   PG_U_CATEGORY_MASK(PG_U_LINE_SEPARATOR)

Definition at line 65 of file unicode_category.c.

◆ PG_U_ZP_MASK

#define PG_U_ZP_MASK   PG_U_CATEGORY_MASK(PG_U_PARAGRAPH_SEPARATOR)

Definition at line 66 of file unicode_category.c.

◆ PG_U_ZS_MASK

#define PG_U_ZS_MASK   PG_U_CATEGORY_MASK(PG_U_SPACE_SEPARATOR)

Definition at line 64 of file unicode_category.c.

Function Documentation

◆ pg_u_isalnum()

bool pg_u_isalnum ( pg_wchar  code,
bool  posix 
)

Definition at line 226 of file unicode_category.c.

227 {
228  return pg_u_isalpha(code) || pg_u_isdigit(code, posix);
229 }
bool pg_u_isalpha(pg_wchar code)
bool pg_u_isdigit(pg_wchar code, bool posix)

References pg_u_isalpha(), and pg_u_isdigit().

Referenced by initcap_wbnext(), and pg_wc_isalnum().

◆ pg_u_isalpha()

bool pg_u_isalpha ( pg_wchar  code)

Definition at line 220 of file unicode_category.c.

221 {
222  return pg_u_prop_alphabetic(code);
223 }
bool pg_u_prop_alphabetic(pg_wchar code)

References pg_u_prop_alphabetic().

Referenced by pg_u_isalnum(), pg_u_ispunct(), pg_u_isword(), and pg_wc_isalpha().

◆ pg_u_isblank()

bool pg_u_isblank ( pg_wchar  code)

Definition at line 255 of file unicode_category.c.

256 {
257  return code == PG_U_CHARACTER_TAB ||
259 }
#define PG_U_CHARACTER_TAB
pg_unicode_category unicode_category(pg_wchar code)
@ PG_U_SPACE_SEPARATOR

References PG_U_CHARACTER_TAB, PG_U_SPACE_SEPARATOR, and unicode_category().

Referenced by pg_u_isprint().

◆ pg_u_iscntrl()

bool pg_u_iscntrl ( pg_wchar  code)

Definition at line 262 of file unicode_category.c.

263 {
264  return unicode_category(code) == PG_U_CONTROL;
265 }
@ PG_U_CONTROL

References PG_U_CONTROL, and unicode_category().

◆ pg_u_isdigit()

bool pg_u_isdigit ( pg_wchar  code,
bool  posix 
)

Definition at line 211 of file unicode_category.c.

212 {
213  if (posix)
214  return ('0' <= code && code <= '9');
215  else
216  return unicode_category(code) == PG_U_DECIMAL_NUMBER;
217 }
@ PG_U_DECIMAL_NUMBER

References PG_U_DECIMAL_NUMBER, and unicode_category().

Referenced by pg_u_isalnum(), and pg_wc_isdigit().

◆ pg_u_isgraph()

bool pg_u_isgraph ( pg_wchar  code)

Definition at line 268 of file unicode_category.c.

269 {
270  uint32 category_mask = PG_U_CATEGORY_MASK(unicode_category(code));
271 
272  if (category_mask & (PG_U_CC_MASK | PG_U_CS_MASK | PG_U_CN_MASK) ||
273  pg_u_isspace(code))
274  return false;
275  return true;
276 }
uint32_t uint32
Definition: c.h:485
bool pg_u_isspace(pg_wchar code)
#define PG_U_CATEGORY_MASK(X)
#define PG_U_CN_MASK

References PG_U_CATEGORY_MASK, PG_U_CC_MASK, PG_U_CN_MASK, PG_U_CS_MASK, pg_u_isspace(), and unicode_category().

Referenced by pg_u_isprint(), and pg_wc_isgraph().

◆ pg_u_islower()

bool pg_u_islower ( pg_wchar  code)

Definition at line 249 of file unicode_category.c.

250 {
251  return pg_u_prop_lowercase(code);
252 }
bool pg_u_prop_lowercase(pg_wchar code)

References pg_u_prop_lowercase().

Referenced by pg_wc_islower().

◆ pg_u_isprint()

bool pg_u_isprint ( pg_wchar  code)

Definition at line 279 of file unicode_category.c.

280 {
281  pg_unicode_category category = unicode_category(code);
282 
283  if (category == PG_U_CONTROL)
284  return false;
285 
286  return pg_u_isgraph(code) || pg_u_isblank(code);
287 }
bool pg_u_isblank(pg_wchar code)
bool pg_u_isgraph(pg_wchar code)
pg_unicode_category

References PG_U_CONTROL, pg_u_isblank(), pg_u_isgraph(), and unicode_category().

Referenced by pg_wc_isprint().

◆ pg_u_ispunct()

bool pg_u_ispunct ( pg_wchar  code,
bool  posix 
)

Definition at line 290 of file unicode_category.c.

291 {
292  uint32 category_mask;
293 
294  if (posix)
295  {
296  if (pg_u_isalpha(code))
297  return false;
298 
299  category_mask = PG_U_CATEGORY_MASK(unicode_category(code));
300  return category_mask & (PG_U_P_MASK | PG_U_S_MASK);
301  }
302  else
303  {
304  category_mask = PG_U_CATEGORY_MASK(unicode_category(code));
305 
306  return category_mask & PG_U_P_MASK;
307  }
308 }
#define PG_U_P_MASK
#define PG_U_S_MASK

References PG_U_CATEGORY_MASK, pg_u_isalpha(), PG_U_P_MASK, PG_U_S_MASK, and unicode_category().

Referenced by pg_wc_ispunct().

◆ pg_u_isspace()

bool pg_u_isspace ( pg_wchar  code)

Definition at line 311 of file unicode_category.c.

312 {
313  return pg_u_prop_white_space(code);
314 }
bool pg_u_prop_white_space(pg_wchar code)

References pg_u_prop_white_space().

Referenced by pg_u_isgraph(), and pg_wc_isspace().

◆ pg_u_isupper()

bool pg_u_isupper ( pg_wchar  code)

Definition at line 243 of file unicode_category.c.

244 {
245  return pg_u_prop_uppercase(code);
246 }
bool pg_u_prop_uppercase(pg_wchar code)

References pg_u_prop_uppercase().

Referenced by pg_wc_isupper().

◆ pg_u_isword()

bool pg_u_isword ( pg_wchar  code)

Definition at line 232 of file unicode_category.c.

233 {
234  uint32 category_mask = PG_U_CATEGORY_MASK(unicode_category(code));
235 
236  return
237  category_mask & (PG_U_M_MASK | PG_U_ND_MASK | PG_U_PC_MASK) ||
238  pg_u_isalpha(code) ||
240 }
#define PG_U_ND_MASK
#define PG_U_M_MASK
bool pg_u_prop_join_control(pg_wchar code)

References PG_U_CATEGORY_MASK, pg_u_isalpha(), PG_U_M_MASK, PG_U_ND_MASK, PG_U_PC_MASK, pg_u_prop_join_control(), and unicode_category().

◆ pg_u_isxdigit()

bool pg_u_isxdigit ( pg_wchar  code,
bool  posix 
)

Definition at line 317 of file unicode_category.c.

318 {
319  if (posix)
320  return (('0' <= code && code <= '9') ||
321  ('A' <= code && code <= 'F') ||
322  ('a' <= code && code <= 'f'));
323  else
324  return unicode_category(code) == PG_U_DECIMAL_NUMBER ||
325  pg_u_prop_hex_digit(code);
326 }
bool pg_u_prop_hex_digit(pg_wchar code)

References PG_U_DECIMAL_NUMBER, pg_u_prop_hex_digit(), and unicode_category().

◆ pg_u_prop_alphabetic()

bool pg_u_prop_alphabetic ( pg_wchar  code)

Definition at line 111 of file unicode_category.c.

112 {
113  if (code < 0x80)
115 
118  code);
119 }
#define lengthof(array)
Definition: c.h:742
static bool range_search(const pg_unicode_range *tbl, size_t size, pg_wchar code)
#define PG_U_PROP_ALPHABETIC
static const pg_unicode_range unicode_alphabetic[1141]
static const pg_unicode_properties unicode_opt_ascii[128]

References lengthof, PG_U_PROP_ALPHABETIC, pg_unicode_properties::properties, range_search(), unicode_alphabetic, and unicode_opt_ascii.

Referenced by pg_u_isalpha().

◆ pg_u_prop_case_ignorable()

bool pg_u_prop_case_ignorable ( pg_wchar  code)

Definition at line 159 of file unicode_category.c.

160 {
161  if (code < 0x80)
163 
166  code);
167 }
static const pg_unicode_range unicode_case_ignorable[491]
#define PG_U_PROP_CASE_IGNORABLE

References lengthof, PG_U_PROP_CASE_IGNORABLE, pg_unicode_properties::properties, range_search(), unicode_case_ignorable, and unicode_opt_ascii.

◆ pg_u_prop_cased()

bool pg_u_prop_cased ( pg_wchar  code)

Definition at line 144 of file unicode_category.c.

145 {
146  uint32 category_mask;
147 
148  if (code < 0x80)
150 
151  category_mask = PG_U_CATEGORY_MASK(unicode_category(code));
152 
153  return category_mask & PG_U_LT_MASK ||
154  pg_u_prop_lowercase(code) ||
155  pg_u_prop_uppercase(code);
156 }
#define PG_U_PROP_CASED

References PG_U_CATEGORY_MASK, PG_U_LT_MASK, PG_U_PROP_CASED, pg_u_prop_lowercase(), pg_u_prop_uppercase(), pg_unicode_properties::properties, unicode_category(), and unicode_opt_ascii.

◆ pg_u_prop_hex_digit()

bool pg_u_prop_hex_digit ( pg_wchar  code)

Definition at line 181 of file unicode_category.c.

182 {
183  if (code < 0x80)
185 
188  code);
189 }
#define PG_U_PROP_HEX_DIGIT
static const pg_unicode_range unicode_hex_digit[6]

References lengthof, PG_U_PROP_HEX_DIGIT, pg_unicode_properties::properties, range_search(), unicode_hex_digit, and unicode_opt_ascii.

Referenced by pg_u_isxdigit().

◆ pg_u_prop_join_control()

bool pg_u_prop_join_control ( pg_wchar  code)

Definition at line 192 of file unicode_category.c.

193 {
194  if (code < 0x80)
196 
199  code);
200 }
#define PG_U_PROP_JOIN_CONTROL
static const pg_unicode_range unicode_join_control[1]

References lengthof, PG_U_PROP_JOIN_CONTROL, pg_unicode_properties::properties, range_search(), unicode_join_control, and unicode_opt_ascii.

Referenced by pg_u_isword().

◆ pg_u_prop_lowercase()

bool pg_u_prop_lowercase ( pg_wchar  code)

Definition at line 122 of file unicode_category.c.

123 {
124  if (code < 0x80)
126 
129  code);
130 }
#define PG_U_PROP_LOWERCASE
static const pg_unicode_range unicode_lowercase[686]

References lengthof, PG_U_PROP_LOWERCASE, pg_unicode_properties::properties, range_search(), unicode_lowercase, and unicode_opt_ascii.

Referenced by pg_u_islower(), and pg_u_prop_cased().

◆ pg_u_prop_uppercase()

bool pg_u_prop_uppercase ( pg_wchar  code)

Definition at line 133 of file unicode_category.c.

134 {
135  if (code < 0x80)
137 
140  code);
141 }
#define PG_U_PROP_UPPERCASE
static const pg_unicode_range unicode_uppercase[651]

References lengthof, PG_U_PROP_UPPERCASE, pg_unicode_properties::properties, range_search(), unicode_opt_ascii, and unicode_uppercase.

Referenced by pg_u_isupper(), and pg_u_prop_cased().

◆ pg_u_prop_white_space()

bool pg_u_prop_white_space ( pg_wchar  code)

Definition at line 170 of file unicode_category.c.

171 {
172  if (code < 0x80)
174 
177  code);
178 }
static const pg_unicode_range unicode_white_space[11]
#define PG_U_PROP_WHITE_SPACE

References lengthof, PG_U_PROP_WHITE_SPACE, pg_unicode_properties::properties, range_search(), unicode_opt_ascii, and unicode_white_space.

Referenced by pg_u_isspace().

◆ range_search()

static bool range_search ( const pg_unicode_range tbl,
size_t  size,
pg_wchar  code 
)
static

Definition at line 481 of file unicode_category.c.

482 {
483  int min = 0;
484  int mid;
485  int max = size - 1;
486 
487  Assert(code <= 0x10ffff);
488 
489  while (max >= min)
490  {
491  mid = (min + max) / 2;
492  if (code > tbl[mid].last)
493  min = mid + 1;
494  else if (code < tbl[mid].first)
495  max = mid - 1;
496  else
497  return true;
498  }
499 
500  return false;
501 }
#define Assert(condition)
Definition: c.h:812
static pg_noinline void Size size
Definition: slab.c:607

References Assert, and size.

Referenced by pg_u_prop_alphabetic(), pg_u_prop_case_ignorable(), pg_u_prop_hex_digit(), pg_u_prop_join_control(), pg_u_prop_lowercase(), pg_u_prop_uppercase(), and pg_u_prop_white_space().

◆ unicode_category()

pg_unicode_category unicode_category ( pg_wchar  code)

Definition at line 85 of file unicode_category.c.

86 {
87  int min = 0;
88  int mid;
89  int max = lengthof(unicode_categories) - 1;
90 
91  Assert(code <= 0x10ffff);
92 
93  if (code < 0x80)
94  return unicode_opt_ascii[code].category;
95 
96  while (max >= min)
97  {
98  mid = (min + max) / 2;
99  if (code > unicode_categories[mid].last)
100  min = mid + 1;
101  else if (code < unicode_categories[mid].first)
102  max = mid - 1;
103  else
104  return unicode_categories[mid].category;
105  }
106 
107  return PG_U_UNASSIGNED;
108 }
@ PG_U_UNASSIGNED
static const pg_category_range unicode_categories[3302]

References Assert, pg_category_range::category, pg_unicode_properties::category, lengthof, PG_U_UNASSIGNED, unicode_categories, and unicode_opt_ascii.

Referenced by pg_u_isblank(), pg_u_iscntrl(), pg_u_isdigit(), pg_u_isgraph(), pg_u_isprint(), pg_u_ispunct(), pg_u_isword(), pg_u_isxdigit(), pg_u_prop_cased(), and unicode_assigned().

◆ unicode_category_abbrev()

const char* unicode_category_abbrev ( pg_unicode_category  category)

Definition at line 406 of file unicode_category.c.

407 {
408  switch (category)
409  {
410  case PG_U_UNASSIGNED:
411  return "Cn";
413  return "Lu";
415  return "Ll";
417  return "Lt";
419  return "Lm";
420  case PG_U_OTHER_LETTER:
421  return "Lo";
423  return "Mn";
424  case PG_U_ENCLOSING_MARK:
425  return "Me";
426  case PG_U_SPACING_MARK:
427  return "Mc";
428  case PG_U_DECIMAL_NUMBER:
429  return "Nd";
430  case PG_U_LETTER_NUMBER:
431  return "Nl";
432  case PG_U_OTHER_NUMBER:
433  return "No";
435  return "Zs";
436  case PG_U_LINE_SEPARATOR:
437  return "Zl";
439  return "Zp";
440  case PG_U_CONTROL:
441  return "Cc";
442  case PG_U_FORMAT:
443  return "Cf";
444  case PG_U_PRIVATE_USE:
445  return "Co";
446  case PG_U_SURROGATE:
447  return "Cs";
449  return "Pd";
451  return "Ps";
453  return "Pe";
455  return "Pc";
457  return "Po";
458  case PG_U_MATH_SYMBOL:
459  return "Sm";
461  return "Sc";
463  return "Sk";
464  case PG_U_OTHER_SYMBOL:
465  return "So";
467  return "Pi";
469  return "Pf";
470  }
471 
472  Assert(false);
473  return "??"; /* keep compiler quiet */
474 }
@ PG_U_CONNECTOR_PUNCTUATION
@ PG_U_OTHER_SYMBOL
@ PG_U_DASH_PUNCTUATION
@ PG_U_UPPERCASE_LETTER
@ PG_U_CLOSE_PUNCTUATION
@ PG_U_NONSPACING_MARK
@ PG_U_INITIAL_PUNCTUATION
@ PG_U_CURRENCY_SYMBOL
@ PG_U_LETTER_NUMBER
@ PG_U_MODIFIER_SYMBOL
@ PG_U_OPEN_PUNCTUATION
@ PG_U_FORMAT
@ PG_U_PRIVATE_USE
@ PG_U_OTHER_LETTER
@ PG_U_PARAGRAPH_SEPARATOR
@ PG_U_SPACING_MARK
@ PG_U_TITLECASE_LETTER
@ PG_U_OTHER_NUMBER
@ PG_U_MATH_SYMBOL
@ PG_U_LOWERCASE_LETTER
@ PG_U_LINE_SEPARATOR
@ PG_U_SURROGATE
@ PG_U_FINAL_PUNCTUATION
@ PG_U_MODIFIER_LETTER
@ PG_U_OTHER_PUNCTUATION
@ PG_U_ENCLOSING_MARK

References Assert, PG_U_CLOSE_PUNCTUATION, PG_U_CONNECTOR_PUNCTUATION, PG_U_CONTROL, PG_U_CURRENCY_SYMBOL, PG_U_DASH_PUNCTUATION, PG_U_DECIMAL_NUMBER, PG_U_ENCLOSING_MARK, PG_U_FINAL_PUNCTUATION, PG_U_FORMAT, PG_U_INITIAL_PUNCTUATION, PG_U_LETTER_NUMBER, PG_U_LINE_SEPARATOR, PG_U_LOWERCASE_LETTER, PG_U_MATH_SYMBOL, PG_U_MODIFIER_LETTER, PG_U_MODIFIER_SYMBOL, PG_U_NONSPACING_MARK, PG_U_OPEN_PUNCTUATION, PG_U_OTHER_LETTER, PG_U_OTHER_NUMBER, PG_U_OTHER_PUNCTUATION, PG_U_OTHER_SYMBOL, PG_U_PARAGRAPH_SEPARATOR, PG_U_PRIVATE_USE, PG_U_SPACE_SEPARATOR, PG_U_SPACING_MARK, PG_U_SURROGATE, PG_U_TITLECASE_LETTER, PG_U_UNASSIGNED, and PG_U_UPPERCASE_LETTER.

◆ unicode_category_string()

const char* unicode_category_string ( pg_unicode_category  category)

Definition at line 332 of file unicode_category.c.

333 {
334  switch (category)
335  {
336  case PG_U_UNASSIGNED:
337  return "Unassigned";
339  return "Uppercase_Letter";
341  return "Lowercase_Letter";
343  return "Titlecase_Letter";
345  return "Modifier_Letter";
346  case PG_U_OTHER_LETTER:
347  return "Other_Letter";
349  return "Nonspacing_Mark";
350  case PG_U_ENCLOSING_MARK:
351  return "Enclosing_Mark";
352  case PG_U_SPACING_MARK:
353  return "Spacing_Mark";
354  case PG_U_DECIMAL_NUMBER:
355  return "Decimal_Number";
356  case PG_U_LETTER_NUMBER:
357  return "Letter_Number";
358  case PG_U_OTHER_NUMBER:
359  return "Other_Number";
361  return "Space_Separator";
362  case PG_U_LINE_SEPARATOR:
363  return "Line_Separator";
365  return "Paragraph_Separator";
366  case PG_U_CONTROL:
367  return "Control";
368  case PG_U_FORMAT:
369  return "Format";
370  case PG_U_PRIVATE_USE:
371  return "Private_Use";
372  case PG_U_SURROGATE:
373  return "Surrogate";
375  return "Dash_Punctuation";
377  return "Open_Punctuation";
379  return "Close_Punctuation";
381  return "Connector_Punctuation";
383  return "Other_Punctuation";
384  case PG_U_MATH_SYMBOL:
385  return "Math_Symbol";
387  return "Currency_Symbol";
389  return "Modifier_Symbol";
390  case PG_U_OTHER_SYMBOL:
391  return "Other_Symbol";
393  return "Initial_Punctuation";
395  return "Final_Punctuation";
396  }
397 
398  Assert(false);
399  return "Unrecognized"; /* keep compiler quiet */
400 }

References Assert, PG_U_CLOSE_PUNCTUATION, PG_U_CONNECTOR_PUNCTUATION, PG_U_CONTROL, PG_U_CURRENCY_SYMBOL, PG_U_DASH_PUNCTUATION, PG_U_DECIMAL_NUMBER, PG_U_ENCLOSING_MARK, PG_U_FINAL_PUNCTUATION, PG_U_FORMAT, PG_U_INITIAL_PUNCTUATION, PG_U_LETTER_NUMBER, PG_U_LINE_SEPARATOR, PG_U_LOWERCASE_LETTER, PG_U_MATH_SYMBOL, PG_U_MODIFIER_LETTER, PG_U_MODIFIER_SYMBOL, PG_U_NONSPACING_MARK, PG_U_OPEN_PUNCTUATION, PG_U_OTHER_LETTER, PG_U_OTHER_NUMBER, PG_U_OTHER_PUNCTUATION, PG_U_OTHER_SYMBOL, PG_U_PARAGRAPH_SEPARATOR, PG_U_PRIVATE_USE, PG_U_SPACE_SEPARATOR, PG_U_SPACING_MARK, PG_U_SURROGATE, PG_U_TITLECASE_LETTER, PG_U_UNASSIGNED, and PG_U_UPPERCASE_LETTER.