#include "postgres.h"
#include "common/unicode_category.h"
#include "common/unicode_category_table.h"

Include dependency graph for unicode_category.c:

Macros
#define	PG_U_CATEGORY_MASK(X) ((uint32)(1 << (X)))

#define	PG_U_LU_MASK PG_U_CATEGORY_MASK(PG_U_UPPERCASE_LETTER)

#define	PG_U_LL_MASK PG_U_CATEGORY_MASK(PG_U_LOWERCASE_LETTER)

#define	PG_U_LT_MASK PG_U_CATEGORY_MASK(PG_U_TITLECASE_LETTER)

#define	PG_U_LC_MASK (PG_U_LU_MASK\|PG_U_LL_MASK\|PG_U_LT_MASK)

#define	PG_U_LM_MASK PG_U_CATEGORY_MASK(PG_U_MODIFIER_LETTER)

#define	PG_U_LO_MASK PG_U_CATEGORY_MASK(PG_U_OTHER_LETTER)

#define	PG_U_L_MASK

#define	PG_U_MN_MASK PG_U_CATEGORY_MASK(PG_U_NONSPACING_MARK)

#define	PG_U_ME_MASK PG_U_CATEGORY_MASK(PG_U_ENCLOSING_MARK)

#define	PG_U_MC_MASK PG_U_CATEGORY_MASK(PG_U_SPACING_MARK)

#define	PG_U_M_MASK (PG_U_MN_MASK\|PG_U_MC_MASK\|PG_U_ME_MASK)

#define	PG_U_ND_MASK PG_U_CATEGORY_MASK(PG_U_DECIMAL_NUMBER)

#define	PG_U_NL_MASK PG_U_CATEGORY_MASK(PG_U_LETTER_NUMBER)

#define	PG_U_NO_MASK PG_U_CATEGORY_MASK(PG_U_OTHER_NUMBER)

#define	PG_U_N_MASK (PG_U_ND_MASK\|PG_U_NL_MASK\|PG_U_NO_MASK)

#define	PG_U_PC_MASK PG_U_CATEGORY_MASK(PG_U_CONNECTOR_PUNCTUATION)

#define	PG_U_PD_MASK PG_U_CATEGORY_MASK(PG_U_DASH_PUNCTUATION)

#define	PG_U_PS_MASK PG_U_CATEGORY_MASK(PG_U_OPEN_PUNCTUATION)

#define	PG_U_PE_MASK PG_U_CATEGORY_MASK(PG_U_CLOSE_PUNCTUATION)

#define	PG_U_PI_MASK PG_U_CATEGORY_MASK(PG_U_INITIAL_PUNCTUATION)

#define	PG_U_PF_MASK PG_U_CATEGORY_MASK(PG_U_FINAL_PUNCTUATION)

#define	PG_U_PO_MASK PG_U_CATEGORY_MASK(PG_U_OTHER_PUNCTUATION)

#define	PG_U_P_MASK

#define	PG_U_SM_MASK PG_U_CATEGORY_MASK(PG_U_MATH_SYMBOL)

#define	PG_U_SC_MASK PG_U_CATEGORY_MASK(PG_U_CURRENCY_SYMBOL)

#define	PG_U_SK_MASK PG_U_CATEGORY_MASK(PG_U_MODIFIER_SYMBOL)

#define	PG_U_SO_MASK PG_U_CATEGORY_MASK(PG_U_OTHER_SYMBOL)

#define	PG_U_S_MASK (PG_U_SM_MASK\|PG_U_SC_MASK\|PG_U_SK_MASK\|PG_U_SO_MASK)

#define	PG_U_ZS_MASK PG_U_CATEGORY_MASK(PG_U_SPACE_SEPARATOR)

#define	PG_U_ZL_MASK PG_U_CATEGORY_MASK(PG_U_LINE_SEPARATOR)

#define	PG_U_ZP_MASK PG_U_CATEGORY_MASK(PG_U_PARAGRAPH_SEPARATOR)

#define	PG_U_Z_MASK (PG_U_ZS_MASK\|PG_U_ZL_MASK\|PG_U_ZP_MASK)

#define	PG_U_CC_MASK PG_U_CATEGORY_MASK(PG_U_CONTROL)

#define	PG_U_CF_MASK PG_U_CATEGORY_MASK(PG_U_FORMAT)

#define	PG_U_CS_MASK PG_U_CATEGORY_MASK(PG_U_SURROGATE)

#define	PG_U_CO_MASK PG_U_CATEGORY_MASK(PG_U_PRIVATE_USE)

#define	PG_U_CN_MASK PG_U_CATEGORY_MASK(PG_U_UNASSIGNED)

#define	PG_U_C_MASK

#define	PG_U_CHARACTER_TAB 0x09

Functions
static bool	range_search (const pg_unicode_range *tbl, size_t size, pg_wchar code)

pg_unicode_category	unicode_category (pg_wchar code)

bool	pg_u_prop_alphabetic (pg_wchar code)

bool	pg_u_prop_lowercase (pg_wchar code)

bool	pg_u_prop_uppercase (pg_wchar code)

bool	pg_u_prop_cased (pg_wchar code)

bool	pg_u_prop_case_ignorable (pg_wchar code)

bool	pg_u_prop_white_space (pg_wchar code)

bool	pg_u_prop_hex_digit (pg_wchar code)

bool	pg_u_prop_join_control (pg_wchar code)

bool	pg_u_isdigit (pg_wchar code, bool posix)

bool	pg_u_isalpha (pg_wchar code)

bool	pg_u_isalnum (pg_wchar code, bool posix)

bool	pg_u_isword (pg_wchar code)

bool	pg_u_isupper (pg_wchar code)

bool	pg_u_islower (pg_wchar code)

bool	pg_u_isblank (pg_wchar code)

bool	pg_u_iscntrl (pg_wchar code)

bool	pg_u_isgraph (pg_wchar code)

bool	pg_u_isprint (pg_wchar code)

bool	pg_u_ispunct (pg_wchar code, bool posix)

bool	pg_u_isspace (pg_wchar code)

bool	pg_u_isxdigit (pg_wchar code, bool posix)

const char *	unicode_category_string (pg_unicode_category category)

const char *	unicode_category_abbrev (pg_unicode_category category)

Macro Definition Documentation

◆ PG_U_C_MASK

#define PG_U_C_MASK

Value:

(PG_U_CC_MASK|PG_U_CF_MASK|PG_U_CS_MASK|PG_U_CO_MASK|\

PG_U_CN_MASK)

PG_U_CC_MASK

#define PG_U_CC_MASK

Definition: unicode_category.c:68

PG_U_CO_MASK

#define PG_U_CO_MASK

Definition: unicode_category.c:71

PG_U_CS_MASK

#define PG_U_CS_MASK

Definition: unicode_category.c:70

PG_U_CF_MASK

#define PG_U_CF_MASK

Definition: unicode_category.c:69

Definition at line 73 of file unicode_category.c.

◆ PG_U_CATEGORY_MASK

#define PG_U_CATEGORY_MASK ( X ) ((uint32)(1 << (X)))

Definition at line 32 of file unicode_category.c.

◆ PG_U_CC_MASK

#define PG_U_CC_MASK PG_U_CATEGORY_MASK(PG_U_CONTROL)

Definition at line 68 of file unicode_category.c.

◆ PG_U_CF_MASK

#define PG_U_CF_MASK PG_U_CATEGORY_MASK(PG_U_FORMAT)

Definition at line 69 of file unicode_category.c.

◆ PG_U_CHARACTER_TAB

#define PG_U_CHARACTER_TAB 0x09

Definition at line 76 of file unicode_category.c.

◆ PG_U_CN_MASK

#define PG_U_CN_MASK PG_U_CATEGORY_MASK(PG_U_UNASSIGNED)

Definition at line 72 of file unicode_category.c.

◆ PG_U_CO_MASK

#define PG_U_CO_MASK PG_U_CATEGORY_MASK(PG_U_PRIVATE_USE)

Definition at line 71 of file unicode_category.c.

◆ PG_U_CS_MASK

#define PG_U_CS_MASK PG_U_CATEGORY_MASK(PG_U_SURROGATE)

Definition at line 70 of file unicode_category.c.

◆ PG_U_L_MASK

#define PG_U_L_MASK

Value:

(PG_U_LU_MASK|PG_U_LL_MASK|PG_U_LT_MASK|PG_U_LM_MASK|\

PG_U_LO_MASK)

PG_U_LL_MASK

#define PG_U_LL_MASK

Definition: unicode_category.c:35

PG_U_LT_MASK

#define PG_U_LT_MASK

Definition: unicode_category.c:36

PG_U_LM_MASK

#define PG_U_LM_MASK

Definition: unicode_category.c:38

PG_U_LU_MASK

#define PG_U_LU_MASK

Definition: unicode_category.c:34

Definition at line 40 of file unicode_category.c.

◆ PG_U_LC_MASK

#define PG_U_LC_MASK (PG_U_LU_MASK|PG_U_LL_MASK|PG_U_LT_MASK)

Definition at line 37 of file unicode_category.c.

◆ PG_U_LL_MASK

#define PG_U_LL_MASK PG_U_CATEGORY_MASK(PG_U_LOWERCASE_LETTER)

Definition at line 35 of file unicode_category.c.

◆ PG_U_LM_MASK

#define PG_U_LM_MASK PG_U_CATEGORY_MASK(PG_U_MODIFIER_LETTER)

Definition at line 38 of file unicode_category.c.

◆ PG_U_LO_MASK

#define PG_U_LO_MASK PG_U_CATEGORY_MASK(PG_U_OTHER_LETTER)

Definition at line 39 of file unicode_category.c.

◆ PG_U_LT_MASK

#define PG_U_LT_MASK PG_U_CATEGORY_MASK(PG_U_TITLECASE_LETTER)

Definition at line 36 of file unicode_category.c.

◆ PG_U_LU_MASK

#define PG_U_LU_MASK PG_U_CATEGORY_MASK(PG_U_UPPERCASE_LETTER)

Definition at line 34 of file unicode_category.c.

◆ PG_U_M_MASK

#define PG_U_M_MASK (PG_U_MN_MASK|PG_U_MC_MASK|PG_U_ME_MASK)

Definition at line 45 of file unicode_category.c.

◆ PG_U_MC_MASK

#define PG_U_MC_MASK PG_U_CATEGORY_MASK(PG_U_SPACING_MARK)

Definition at line 44 of file unicode_category.c.

◆ PG_U_ME_MASK

#define PG_U_ME_MASK PG_U_CATEGORY_MASK(PG_U_ENCLOSING_MARK)

Definition at line 43 of file unicode_category.c.

◆ PG_U_MN_MASK

#define PG_U_MN_MASK PG_U_CATEGORY_MASK(PG_U_NONSPACING_MARK)

Definition at line 42 of file unicode_category.c.

◆ PG_U_N_MASK

#define PG_U_N_MASK (PG_U_ND_MASK|PG_U_NL_MASK|PG_U_NO_MASK)

Definition at line 49 of file unicode_category.c.

◆ PG_U_ND_MASK

#define PG_U_ND_MASK PG_U_CATEGORY_MASK(PG_U_DECIMAL_NUMBER)

Definition at line 46 of file unicode_category.c.

◆ PG_U_NL_MASK

#define PG_U_NL_MASK PG_U_CATEGORY_MASK(PG_U_LETTER_NUMBER)

Definition at line 47 of file unicode_category.c.

◆ PG_U_NO_MASK

#define PG_U_NO_MASK PG_U_CATEGORY_MASK(PG_U_OTHER_NUMBER)

Definition at line 48 of file unicode_category.c.

◆ PG_U_P_MASK

#define PG_U_P_MASK

Value:

(PG_U_PC_MASK|PG_U_PD_MASK|PG_U_PS_MASK|PG_U_PE_MASK|\

PG_U_PI_MASK|PG_U_PF_MASK|PG_U_PO_MASK)

PG_U_PO_MASK

#define PG_U_PO_MASK

Definition: unicode_category.c:56

PG_U_PS_MASK

#define PG_U_PS_MASK

Definition: unicode_category.c:52

PG_U_PC_MASK

#define PG_U_PC_MASK

Definition: unicode_category.c:50

PG_U_PD_MASK

#define PG_U_PD_MASK

Definition: unicode_category.c:51

PG_U_PF_MASK

#define PG_U_PF_MASK

Definition: unicode_category.c:55

PG_U_PE_MASK

#define PG_U_PE_MASK

Definition: unicode_category.c:53

Definition at line 57 of file unicode_category.c.

◆ PG_U_PC_MASK

#define PG_U_PC_MASK PG_U_CATEGORY_MASK(PG_U_CONNECTOR_PUNCTUATION)

Definition at line 50 of file unicode_category.c.

◆ PG_U_PD_MASK

#define PG_U_PD_MASK PG_U_CATEGORY_MASK(PG_U_DASH_PUNCTUATION)

Definition at line 51 of file unicode_category.c.

◆ PG_U_PE_MASK

#define PG_U_PE_MASK PG_U_CATEGORY_MASK(PG_U_CLOSE_PUNCTUATION)

Definition at line 53 of file unicode_category.c.

◆ PG_U_PF_MASK

#define PG_U_PF_MASK PG_U_CATEGORY_MASK(PG_U_FINAL_PUNCTUATION)

Definition at line 55 of file unicode_category.c.

◆ PG_U_PI_MASK

#define PG_U_PI_MASK PG_U_CATEGORY_MASK(PG_U_INITIAL_PUNCTUATION)

Definition at line 54 of file unicode_category.c.

◆ PG_U_PO_MASK

#define PG_U_PO_MASK PG_U_CATEGORY_MASK(PG_U_OTHER_PUNCTUATION)

Definition at line 56 of file unicode_category.c.

◆ PG_U_PS_MASK

#define PG_U_PS_MASK PG_U_CATEGORY_MASK(PG_U_OPEN_PUNCTUATION)

Definition at line 52 of file unicode_category.c.

◆ PG_U_S_MASK

#define PG_U_S_MASK (PG_U_SM_MASK|PG_U_SC_MASK|PG_U_SK_MASK|PG_U_SO_MASK)

Definition at line 63 of file unicode_category.c.

◆ PG_U_SC_MASK

#define PG_U_SC_MASK PG_U_CATEGORY_MASK(PG_U_CURRENCY_SYMBOL)

Definition at line 60 of file unicode_category.c.

◆ PG_U_SK_MASK

#define PG_U_SK_MASK PG_U_CATEGORY_MASK(PG_U_MODIFIER_SYMBOL)

Definition at line 61 of file unicode_category.c.

◆ PG_U_SM_MASK

#define PG_U_SM_MASK PG_U_CATEGORY_MASK(PG_U_MATH_SYMBOL)

Definition at line 59 of file unicode_category.c.

◆ PG_U_SO_MASK

#define PG_U_SO_MASK PG_U_CATEGORY_MASK(PG_U_OTHER_SYMBOL)

Definition at line 62 of file unicode_category.c.

◆ PG_U_Z_MASK

#define PG_U_Z_MASK (PG_U_ZS_MASK|PG_U_ZL_MASK|PG_U_ZP_MASK)

Definition at line 67 of file unicode_category.c.

◆ PG_U_ZL_MASK

#define PG_U_ZL_MASK PG_U_CATEGORY_MASK(PG_U_LINE_SEPARATOR)

Definition at line 65 of file unicode_category.c.

◆ PG_U_ZP_MASK

#define PG_U_ZP_MASK PG_U_CATEGORY_MASK(PG_U_PARAGRAPH_SEPARATOR)

Definition at line 66 of file unicode_category.c.

◆ PG_U_ZS_MASK

#define PG_U_ZS_MASK PG_U_CATEGORY_MASK(PG_U_SPACE_SEPARATOR)

Definition at line 64 of file unicode_category.c.

Function Documentation

◆ pg_u_isalnum()

bool pg_u_isalnum	(	pg_wchar	code,
		bool	posix
	)

Definition at line 226 of file unicode_category.c.

{
    return pg_u_isalpha(code) || pg_u_isdigit(code, posix);
}

References pg_u_isalpha(), and pg_u_isdigit().

Referenced by initcap_wbnext(), and pg_wc_isalnum().

◆ pg_u_isalpha()

bool pg_u_isalpha ( pg_wchar code )

Definition at line 220 of file unicode_category.c.

{
    return pg_u_prop_alphabetic(code);
}

References pg_u_prop_alphabetic().

Referenced by pg_u_isalnum(), pg_u_ispunct(), pg_u_isword(), and pg_wc_isalpha().

◆ pg_u_isblank()

bool pg_u_isblank ( pg_wchar code )

Definition at line 255 of file unicode_category.c.

{
    return code == PG_U_CHARACTER_TAB ||
        unicode_category(code) == PG_U_SPACE_SEPARATOR;
}

References PG_U_CHARACTER_TAB, PG_U_SPACE_SEPARATOR, and unicode_category().

Referenced by pg_u_isprint().

◆ pg_u_iscntrl()

bool pg_u_iscntrl ( pg_wchar code )

Definition at line 262 of file unicode_category.c.

{
    return unicode_category(code) == PG_U_CONTROL;
}

References PG_U_CONTROL, and unicode_category().

◆ pg_u_isdigit()

bool pg_u_isdigit	(	pg_wchar	code,
		bool	posix
	)

Definition at line 211 of file unicode_category.c.

{
    if (posix)
        return ('0' <= code && code <= '9');
    else
        return unicode_category(code) == PG_U_DECIMAL_NUMBER;
}

References PG_U_DECIMAL_NUMBER, and unicode_category().

Referenced by pg_u_isalnum(), and pg_wc_isdigit().

◆ pg_u_isgraph()

bool pg_u_isgraph ( pg_wchar code )

Definition at line 268 of file unicode_category.c.

{
    uint32      category_mask = PG_U_CATEGORY_MASK(unicode_category(code));
 
    if (category_mask & (PG_U_CC_MASK | PG_U_CS_MASK | PG_U_CN_MASK) ||
        pg_u_isspace(code))
        return false;
    return true;
}

References PG_U_CATEGORY_MASK, PG_U_CC_MASK, PG_U_CN_MASK, PG_U_CS_MASK, pg_u_isspace(), and unicode_category().

Referenced by pg_u_isprint(), and pg_wc_isgraph().

◆ pg_u_islower()

bool pg_u_islower ( pg_wchar code )

Definition at line 249 of file unicode_category.c.

{
    return pg_u_prop_lowercase(code);
}

References pg_u_prop_lowercase().

Referenced by pg_wc_islower().

◆ pg_u_isprint()

bool pg_u_isprint ( pg_wchar code )

Definition at line 279 of file unicode_category.c.

{
    pg_unicode_category category = unicode_category(code);
 
    if (category == PG_U_CONTROL)
        return false;
 
    return pg_u_isgraph(code) || pg_u_isblank(code);
}

References PG_U_CONTROL, pg_u_isblank(), pg_u_isgraph(), and unicode_category().

Referenced by pg_wc_isprint().

◆ pg_u_ispunct()

bool pg_u_ispunct	(	pg_wchar	code,
		bool	posix
	)

Definition at line 290 of file unicode_category.c.

{
    uint32      category_mask;
 
    if (posix)
    {
        if (pg_u_isalpha(code))
            return false;
 
        category_mask = PG_U_CATEGORY_MASK(unicode_category(code));
        return category_mask & (PG_U_P_MASK | PG_U_S_MASK);
    }
    else
    {
        category_mask = PG_U_CATEGORY_MASK(unicode_category(code));
 
        return category_mask & PG_U_P_MASK;
    }
}

References PG_U_CATEGORY_MASK, pg_u_isalpha(), PG_U_P_MASK, PG_U_S_MASK, and unicode_category().

Referenced by pg_wc_ispunct().

◆ pg_u_isspace()

bool pg_u_isspace ( pg_wchar code )

Definition at line 311 of file unicode_category.c.

{
    return pg_u_prop_white_space(code);
}

References pg_u_prop_white_space().

Referenced by pg_u_isgraph(), and pg_wc_isspace().

◆ pg_u_isupper()

bool pg_u_isupper ( pg_wchar code )

Definition at line 243 of file unicode_category.c.

{
    return pg_u_prop_uppercase(code);
}

References pg_u_prop_uppercase().

Referenced by pg_wc_isupper().

◆ pg_u_isword()

bool pg_u_isword ( pg_wchar code )

Definition at line 232 of file unicode_category.c.

{
    uint32      category_mask = PG_U_CATEGORY_MASK(unicode_category(code));
 
    return
        category_mask & (PG_U_M_MASK | PG_U_ND_MASK | PG_U_PC_MASK) ||
        pg_u_isalpha(code) ||
        pg_u_prop_join_control(code);
}

References PG_U_CATEGORY_MASK, pg_u_isalpha(), PG_U_M_MASK, PG_U_ND_MASK, PG_U_PC_MASK, pg_u_prop_join_control(), and unicode_category().

◆ pg_u_isxdigit()

bool pg_u_isxdigit	(	pg_wchar	code,
		bool	posix
	)

Definition at line 317 of file unicode_category.c.

{
    if (posix)
        return (('0' <= code && code <= '9') ||
                ('A' <= code && code <= 'F') ||
                ('a' <= code && code <= 'f'));
    else
        return unicode_category(code) == PG_U_DECIMAL_NUMBER ||
            pg_u_prop_hex_digit(code);
}

References PG_U_DECIMAL_NUMBER, pg_u_prop_hex_digit(), and unicode_category().

◆ pg_u_prop_alphabetic()

bool pg_u_prop_alphabetic ( pg_wchar code )

Definition at line 111 of file unicode_category.c.

{
    if (code < 0x80)
        return unicode_opt_ascii[code].properties & PG_U_PROP_ALPHABETIC;
 
    return range_search(unicode_alphabetic,
                        lengthof(unicode_alphabetic),
                        code);
}

References lengthof, PG_U_PROP_ALPHABETIC, pg_unicode_properties::properties, range_search(), unicode_alphabetic, and unicode_opt_ascii.

Referenced by pg_u_isalpha().

◆ pg_u_prop_case_ignorable()

bool pg_u_prop_case_ignorable ( pg_wchar code )

Definition at line 159 of file unicode_category.c.

{
    if (code < 0x80)
        return unicode_opt_ascii[code].properties & PG_U_PROP_CASE_IGNORABLE;
 
    return range_search(unicode_case_ignorable,
                        lengthof(unicode_case_ignorable),
                        code);
}

References lengthof, PG_U_PROP_CASE_IGNORABLE, pg_unicode_properties::properties, range_search(), unicode_case_ignorable, and unicode_opt_ascii.

Referenced by check_final_sigma().

◆ pg_u_prop_cased()

bool pg_u_prop_cased ( pg_wchar code )

Definition at line 144 of file unicode_category.c.

{
    uint32      category_mask;
 
    if (code < 0x80)
        return unicode_opt_ascii[code].properties & PG_U_PROP_CASED;
 
    category_mask = PG_U_CATEGORY_MASK(unicode_category(code));
 
    return category_mask & PG_U_LT_MASK ||
        pg_u_prop_lowercase(code) ||
        pg_u_prop_uppercase(code);
}

References PG_U_CATEGORY_MASK, PG_U_LT_MASK, PG_U_PROP_CASED, pg_u_prop_lowercase(), pg_u_prop_uppercase(), pg_unicode_properties::properties, unicode_category(), and unicode_opt_ascii.

Referenced by check_final_sigma().

◆ pg_u_prop_hex_digit()

bool pg_u_prop_hex_digit ( pg_wchar code )

Definition at line 181 of file unicode_category.c.

{
    if (code < 0x80)
        return unicode_opt_ascii[code].properties & PG_U_PROP_HEX_DIGIT;
 
    return range_search(unicode_hex_digit,
                        lengthof(unicode_hex_digit),
                        code);
}

References lengthof, PG_U_PROP_HEX_DIGIT, pg_unicode_properties::properties, range_search(), unicode_hex_digit, and unicode_opt_ascii.

Referenced by pg_u_isxdigit().

◆ pg_u_prop_join_control()

bool pg_u_prop_join_control ( pg_wchar code )

Definition at line 192 of file unicode_category.c.

{
    if (code < 0x80)
        return unicode_opt_ascii[code].properties & PG_U_PROP_JOIN_CONTROL;
 
    return range_search(unicode_join_control,
                        lengthof(unicode_join_control),
                        code);
}

References lengthof, PG_U_PROP_JOIN_CONTROL, pg_unicode_properties::properties, range_search(), unicode_join_control, and unicode_opt_ascii.

Referenced by pg_u_isword().

◆ pg_u_prop_lowercase()

bool pg_u_prop_lowercase ( pg_wchar code )

Definition at line 122 of file unicode_category.c.

{
    if (code < 0x80)
        return unicode_opt_ascii[code].properties & PG_U_PROP_LOWERCASE;
 
    return range_search(unicode_lowercase,
                        lengthof(unicode_lowercase),
                        code);
}

References lengthof, PG_U_PROP_LOWERCASE, pg_unicode_properties::properties, range_search(), unicode_lowercase, and unicode_opt_ascii.

Referenced by pg_u_islower(), and pg_u_prop_cased().

◆ pg_u_prop_uppercase()

bool pg_u_prop_uppercase ( pg_wchar code )

Definition at line 133 of file unicode_category.c.

{
    if (code < 0x80)
        return unicode_opt_ascii[code].properties & PG_U_PROP_UPPERCASE;
 
    return range_search(unicode_uppercase,
                        lengthof(unicode_uppercase),
                        code);
}

References lengthof, PG_U_PROP_UPPERCASE, pg_unicode_properties::properties, range_search(), unicode_opt_ascii, and unicode_uppercase.

Referenced by pg_u_isupper(), and pg_u_prop_cased().

◆ pg_u_prop_white_space()

bool pg_u_prop_white_space ( pg_wchar code )

Definition at line 170 of file unicode_category.c.

{
    if (code < 0x80)
        return unicode_opt_ascii[code].properties & PG_U_PROP_WHITE_SPACE;
 
    return range_search(unicode_white_space,
                        lengthof(unicode_white_space),
                        code);
}

References lengthof, PG_U_PROP_WHITE_SPACE, pg_unicode_properties::properties, range_search(), unicode_opt_ascii, and unicode_white_space.

Referenced by pg_u_isspace().

◆ range_search()

static bool range_search	(	const pg_unicode_range *	tbl,
		size_t	size,
		pg_wchar	code
	)

static

Definition at line 481 of file unicode_category.c.

{
    int         min = 0;
    int         mid;
    int         max = size - 1;
 
    Assert(code <= 0x10ffff);
 
    while (max >= min)
    {
        mid = (min + max) / 2;
        if (code > tbl[mid].last)
            min = mid + 1;
        else if (code < tbl[mid].first)
            max = mid - 1;
        else
            return true;
    }
 
    return false;
}

References Assert().

Referenced by pg_u_prop_alphabetic(), pg_u_prop_case_ignorable(), pg_u_prop_hex_digit(), pg_u_prop_join_control(), pg_u_prop_lowercase(), pg_u_prop_uppercase(), and pg_u_prop_white_space().

◆ unicode_category()

pg_unicode_category unicode_category ( pg_wchar code )

Definition at line 85 of file unicode_category.c.

{
    int         min = 0;
    int         mid;
    int         max = lengthof(unicode_categories) - 1;
 
    Assert(code <= 0x10ffff);
 
    if (code < 0x80)
        return unicode_opt_ascii[code].category;
 
    while (max >= min)
    {
        mid = (min + max) / 2;
        if (code > unicode_categories[mid].last)
            min = mid + 1;
        else if (code < unicode_categories[mid].first)
            max = mid - 1;
        else
            return unicode_categories[mid].category;
    }
 
    return PG_U_UNASSIGNED;
}

References Assert(), pg_category_range::category, pg_unicode_properties::category, lengthof, PG_U_UNASSIGNED, unicode_categories, and unicode_opt_ascii.

Referenced by pg_u_isblank(), pg_u_iscntrl(), pg_u_isdigit(), pg_u_isgraph(), pg_u_isprint(), pg_u_ispunct(), pg_u_isword(), pg_u_isxdigit(), pg_u_prop_cased(), and unicode_assigned().

◆ unicode_category_abbrev()

const char * unicode_category_abbrev ( pg_unicode_category category )

Definition at line 406 of file unicode_category.c.

{
    switch (category)
    {
        case PG_U_UNASSIGNED:
            return "Cn";
        case PG_U_UPPERCASE_LETTER:
            return "Lu";
        case PG_U_LOWERCASE_LETTER:
            return "Ll";
        case PG_U_TITLECASE_LETTER:
            return "Lt";
        case PG_U_MODIFIER_LETTER:
            return "Lm";
        case PG_U_OTHER_LETTER:
            return "Lo";
        case PG_U_NONSPACING_MARK:
            return "Mn";
        case PG_U_ENCLOSING_MARK:
            return "Me";
        case PG_U_SPACING_MARK:
            return "Mc";
        case PG_U_DECIMAL_NUMBER:
            return "Nd";
        case PG_U_LETTER_NUMBER:
            return "Nl";
        case PG_U_OTHER_NUMBER:
            return "No";
        case PG_U_SPACE_SEPARATOR:
            return "Zs";
        case PG_U_LINE_SEPARATOR:
            return "Zl";
        case PG_U_PARAGRAPH_SEPARATOR:
            return "Zp";
        case PG_U_CONTROL:
            return "Cc";
        case PG_U_FORMAT:
            return "Cf";
        case PG_U_PRIVATE_USE:
            return "Co";
        case PG_U_SURROGATE:
            return "Cs";
        case PG_U_DASH_PUNCTUATION:
            return "Pd";
        case PG_U_OPEN_PUNCTUATION:
            return "Ps";
        case PG_U_CLOSE_PUNCTUATION:
            return "Pe";
        case PG_U_CONNECTOR_PUNCTUATION:
            return "Pc";
        case PG_U_OTHER_PUNCTUATION:
            return "Po";
        case PG_U_MATH_SYMBOL:
            return "Sm";
        case PG_U_CURRENCY_SYMBOL:
            return "Sc";
        case PG_U_MODIFIER_SYMBOL:
            return "Sk";
        case PG_U_OTHER_SYMBOL:
            return "So";
        case PG_U_INITIAL_PUNCTUATION:
            return "Pi";
        case PG_U_FINAL_PUNCTUATION:
            return "Pf";
    }
 
    Assert(false);
    return "??";                /* keep compiler quiet */
}

References Assert(), PG_U_CLOSE_PUNCTUATION, PG_U_CONNECTOR_PUNCTUATION, PG_U_CONTROL, PG_U_CURRENCY_SYMBOL, PG_U_DASH_PUNCTUATION, PG_U_DECIMAL_NUMBER, PG_U_ENCLOSING_MARK, PG_U_FINAL_PUNCTUATION, PG_U_FORMAT, PG_U_INITIAL_PUNCTUATION, PG_U_LETTER_NUMBER, PG_U_LINE_SEPARATOR, PG_U_LOWERCASE_LETTER, PG_U_MATH_SYMBOL, PG_U_MODIFIER_LETTER, PG_U_MODIFIER_SYMBOL, PG_U_NONSPACING_MARK, PG_U_OPEN_PUNCTUATION, PG_U_OTHER_LETTER, PG_U_OTHER_NUMBER, PG_U_OTHER_PUNCTUATION, PG_U_OTHER_SYMBOL, PG_U_PARAGRAPH_SEPARATOR, PG_U_PRIVATE_USE, PG_U_SPACE_SEPARATOR, PG_U_SPACING_MARK, PG_U_SURROGATE, PG_U_TITLECASE_LETTER, PG_U_UNASSIGNED, and PG_U_UPPERCASE_LETTER.

◆ unicode_category_string()

const char * unicode_category_string ( pg_unicode_category category )

Definition at line 332 of file unicode_category.c.

{
    switch (category)
    {
        case PG_U_UNASSIGNED:
            return "Unassigned";
        case PG_U_UPPERCASE_LETTER:
            return "Uppercase_Letter";
        case PG_U_LOWERCASE_LETTER:
            return "Lowercase_Letter";
        case PG_U_TITLECASE_LETTER:
            return "Titlecase_Letter";
        case PG_U_MODIFIER_LETTER:
            return "Modifier_Letter";
        case PG_U_OTHER_LETTER:
            return "Other_Letter";
        case PG_U_NONSPACING_MARK:
            return "Nonspacing_Mark";
        case PG_U_ENCLOSING_MARK:
            return "Enclosing_Mark";
        case PG_U_SPACING_MARK:
            return "Spacing_Mark";
        case PG_U_DECIMAL_NUMBER:
            return "Decimal_Number";
        case PG_U_LETTER_NUMBER:
            return "Letter_Number";
        case PG_U_OTHER_NUMBER:
            return "Other_Number";
        case PG_U_SPACE_SEPARATOR:
            return "Space_Separator";
        case PG_U_LINE_SEPARATOR:
            return "Line_Separator";
        case PG_U_PARAGRAPH_SEPARATOR:
            return "Paragraph_Separator";
        case PG_U_CONTROL:
            return "Control";
        case PG_U_FORMAT:
            return "Format";
        case PG_U_PRIVATE_USE:
            return "Private_Use";
        case PG_U_SURROGATE:
            return "Surrogate";
        case PG_U_DASH_PUNCTUATION:
            return "Dash_Punctuation";
        case PG_U_OPEN_PUNCTUATION:
            return "Open_Punctuation";
        case PG_U_CLOSE_PUNCTUATION:
            return "Close_Punctuation";
        case PG_U_CONNECTOR_PUNCTUATION:
            return "Connector_Punctuation";
        case PG_U_OTHER_PUNCTUATION:
            return "Other_Punctuation";
        case PG_U_MATH_SYMBOL:
            return "Math_Symbol";
        case PG_U_CURRENCY_SYMBOL:
            return "Currency_Symbol";
        case PG_U_MODIFIER_SYMBOL:
            return "Modifier_Symbol";
        case PG_U_OTHER_SYMBOL:
            return "Other_Symbol";
        case PG_U_INITIAL_PUNCTUATION:
            return "Initial_Punctuation";
        case PG_U_FINAL_PUNCTUATION:
            return "Final_Punctuation";
    }
 
    Assert(false);
    return "Unrecognized";      /* keep compiler quiet */
}

References Assert(), PG_U_CLOSE_PUNCTUATION, PG_U_CONNECTOR_PUNCTUATION, PG_U_CONTROL, PG_U_CURRENCY_SYMBOL, PG_U_DASH_PUNCTUATION, PG_U_DECIMAL_NUMBER, PG_U_ENCLOSING_MARK, PG_U_FINAL_PUNCTUATION, PG_U_FORMAT, PG_U_INITIAL_PUNCTUATION, PG_U_LETTER_NUMBER, PG_U_LINE_SEPARATOR, PG_U_LOWERCASE_LETTER, PG_U_MATH_SYMBOL, PG_U_MODIFIER_LETTER, PG_U_MODIFIER_SYMBOL, PG_U_NONSPACING_MARK, PG_U_OPEN_PUNCTUATION, PG_U_OTHER_LETTER, PG_U_OTHER_NUMBER, PG_U_OTHER_PUNCTUATION, PG_U_OTHER_SYMBOL, PG_U_PARAGRAPH_SEPARATOR, PG_U_PRIVATE_USE, PG_U_SPACE_SEPARATOR, PG_U_SPACING_MARK, PG_U_SURROGATE, PG_U_TITLECASE_LETTER, PG_U_UNASSIGNED, and PG_U_UPPERCASE_LETTER.

Macros

Functions

Macro Definition Documentation

◆ PG_U_C_MASK

◆ PG_U_CATEGORY_MASK

◆ PG_U_CC_MASK

◆ PG_U_CF_MASK

◆ PG_U_CHARACTER_TAB

◆ PG_U_CN_MASK

◆ PG_U_CO_MASK

◆ PG_U_CS_MASK

◆ PG_U_L_MASK

◆ PG_U_LC_MASK

◆ PG_U_LL_MASK

◆ PG_U_LM_MASK

◆ PG_U_LO_MASK

◆ PG_U_LT_MASK

◆ PG_U_LU_MASK

◆ PG_U_M_MASK

◆ PG_U_MC_MASK

◆ PG_U_ME_MASK

◆ PG_U_MN_MASK

◆ PG_U_N_MASK

◆ PG_U_ND_MASK

◆ PG_U_NL_MASK

◆ PG_U_NO_MASK

◆ PG_U_P_MASK

◆ PG_U_PC_MASK

◆ PG_U_PD_MASK

◆ PG_U_PE_MASK

◆ PG_U_PF_MASK

◆ PG_U_PI_MASK

◆ PG_U_PO_MASK

◆ PG_U_PS_MASK

◆ PG_U_S_MASK

◆ PG_U_SC_MASK

◆ PG_U_SK_MASK

◆ PG_U_SM_MASK

◆ PG_U_SO_MASK

◆ PG_U_Z_MASK

◆ PG_U_ZL_MASK

◆ PG_U_ZP_MASK

◆ PG_U_ZS_MASK

Function Documentation

◆ pg_u_isalnum()

◆ pg_u_isalpha()

◆ pg_u_isblank()

◆ pg_u_iscntrl()

◆ pg_u_isdigit()

◆ pg_u_isgraph()

◆ pg_u_islower()

◆ pg_u_isprint()

◆ pg_u_ispunct()

◆ pg_u_isspace()

◆ pg_u_isupper()

◆ pg_u_isword()

◆ pg_u_isxdigit()

◆ pg_u_prop_alphabetic()

◆ pg_u_prop_case_ignorable()

◆ pg_u_prop_cased()

◆ pg_u_prop_hex_digit()

◆ pg_u_prop_join_control()

◆ pg_u_prop_lowercase()

◆ pg_u_prop_uppercase()

◆ pg_u_prop_white_space()

◆ range_search()

◆ unicode_category()

◆ unicode_category_abbrev()

◆ unicode_category_string()