unicode__category_8c_source.html

/*-------------------------------------------------------------------------

 * unicode_category.c

 *      Determine general category and character properties of Unicode

 *      characters. Encoding must be UTF8, where we assume that the pg_wchar

 *      representation is a code point.

 *

 * Portions Copyright (c) 2017-2025, PostgreSQL Global Development Group

 *

 * IDENTIFICATION

 *    src/common/unicode_category.c

 *

 *-------------------------------------------------------------------------

 */

#ifndef FRONTEND

#include "postgres.h"

#else

#include "postgres_fe.h"

#endif


#include "common/unicode_category.h"

#include "common/unicode_category_table.h"


/*

 * Create bitmasks from pg_unicode_category values for efficient comparison of

 * multiple categories. For instance, PG_U_MN_MASK is a bitmask representing

 * the general category Mn; and PG_U_M_MASK represents general categories Mn,

 * Me, and Mc.

 *

 * The number of Unicode General Categories should never grow, so a 32-bit

 * mask is fine.

 */

#define PG_U_CATEGORY_MASK(X) ((uint32)(1 << (X)))


#define PG_U_LU_MASK PG_U_CATEGORY_MASK(PG_U_UPPERCASE_LETTER)

#define PG_U_LL_MASK PG_U_CATEGORY_MASK(PG_U_LOWERCASE_LETTER)

#define PG_U_LT_MASK PG_U_CATEGORY_MASK(PG_U_TITLECASE_LETTER)

#define PG_U_LC_MASK (PG_U_LU_MASK|PG_U_LL_MASK|PG_U_LT_MASK)

#define PG_U_LM_MASK PG_U_CATEGORY_MASK(PG_U_MODIFIER_LETTER)

#define PG_U_LO_MASK PG_U_CATEGORY_MASK(PG_U_OTHER_LETTER)

#define PG_U_L_MASK (PG_U_LU_MASK|PG_U_LL_MASK|PG_U_LT_MASK|PG_U_LM_MASK|\

                     PG_U_LO_MASK)

#define PG_U_MN_MASK PG_U_CATEGORY_MASK(PG_U_NONSPACING_MARK)

#define PG_U_ME_MASK PG_U_CATEGORY_MASK(PG_U_ENCLOSING_MARK)

#define PG_U_MC_MASK PG_U_CATEGORY_MASK(PG_U_SPACING_MARK)

#define PG_U_M_MASK (PG_U_MN_MASK|PG_U_MC_MASK|PG_U_ME_MASK)

#define PG_U_ND_MASK PG_U_CATEGORY_MASK(PG_U_DECIMAL_NUMBER)

#define PG_U_NL_MASK PG_U_CATEGORY_MASK(PG_U_LETTER_NUMBER)

#define PG_U_NO_MASK PG_U_CATEGORY_MASK(PG_U_OTHER_NUMBER)

#define PG_U_N_MASK (PG_U_ND_MASK|PG_U_NL_MASK|PG_U_NO_MASK)

#define PG_U_PC_MASK PG_U_CATEGORY_MASK(PG_U_CONNECTOR_PUNCTUATION)

#define PG_U_PD_MASK PG_U_CATEGORY_MASK(PG_U_DASH_PUNCTUATION)

#define PG_U_PS_MASK PG_U_CATEGORY_MASK(PG_U_OPEN_PUNCTUATION)

#define PG_U_PE_MASK PG_U_CATEGORY_MASK(PG_U_CLOSE_PUNCTUATION)

#define PG_U_PI_MASK PG_U_CATEGORY_MASK(PG_U_INITIAL_PUNCTUATION)

#define PG_U_PF_MASK PG_U_CATEGORY_MASK(PG_U_FINAL_PUNCTUATION)

#define PG_U_PO_MASK PG_U_CATEGORY_MASK(PG_U_OTHER_PUNCTUATION)

#define PG_U_P_MASK (PG_U_PC_MASK|PG_U_PD_MASK|PG_U_PS_MASK|PG_U_PE_MASK|\

                     PG_U_PI_MASK|PG_U_PF_MASK|PG_U_PO_MASK)

#define PG_U_SM_MASK PG_U_CATEGORY_MASK(PG_U_MATH_SYMBOL)

#define PG_U_SC_MASK PG_U_CATEGORY_MASK(PG_U_CURRENCY_SYMBOL)

#define PG_U_SK_MASK PG_U_CATEGORY_MASK(PG_U_MODIFIER_SYMBOL)

#define PG_U_SO_MASK PG_U_CATEGORY_MASK(PG_U_OTHER_SYMBOL)

#define PG_U_S_MASK (PG_U_SM_MASK|PG_U_SC_MASK|PG_U_SK_MASK|PG_U_SO_MASK)

#define PG_U_ZS_MASK PG_U_CATEGORY_MASK(PG_U_SPACE_SEPARATOR)

#define PG_U_ZL_MASK PG_U_CATEGORY_MASK(PG_U_LINE_SEPARATOR)

#define PG_U_ZP_MASK PG_U_CATEGORY_MASK(PG_U_PARAGRAPH_SEPARATOR)

#define PG_U_Z_MASK (PG_U_ZS_MASK|PG_U_ZL_MASK|PG_U_ZP_MASK)

#define PG_U_CC_MASK PG_U_CATEGORY_MASK(PG_U_CONTROL)

#define PG_U_CF_MASK PG_U_CATEGORY_MASK(PG_U_FORMAT)

#define PG_U_CS_MASK PG_U_CATEGORY_MASK(PG_U_SURROGATE)

#define PG_U_CO_MASK PG_U_CATEGORY_MASK(PG_U_PRIVATE_USE)

#define PG_U_CN_MASK PG_U_CATEGORY_MASK(PG_U_UNASSIGNED)

#define PG_U_C_MASK (PG_U_CC_MASK|PG_U_CF_MASK|PG_U_CS_MASK|PG_U_CO_MASK|\

                     PG_U_CN_MASK)


#define PG_U_CHARACTER_TAB  0x09


static bool range_search(const pg_unicode_range *tbl, size_t size,

                         pg_wchar code);


/*

 * Unicode general category for the given codepoint.

 */

pg_unicode_category

unicode_category(pg_wchar code)

{

    int         min = 0;

    int         mid;

    int         max = lengthof(unicode_categories) - 1;


    Assert(code <= 0x10ffff);


    if (code < 0x80)

        return unicode_opt_ascii[code].category;


    while (max >= min)

    {

        mid = (min + max) / 2;

        if (code > unicode_categories[mid].last)

            min = mid + 1;

        else if (code < unicode_categories[mid].first)

            max = mid - 1;

        else

            return unicode_categories[mid].category;

    }


    return PG_U_UNASSIGNED;

}


bool

pg_u_prop_alphabetic(pg_wchar code)

{

    if (code < 0x80)

        return unicode_opt_ascii[code].properties & PG_U_PROP_ALPHABETIC;


    return range_search(unicode_alphabetic,

                        lengthof(unicode_alphabetic),

                        code);

}


bool

pg_u_prop_lowercase(pg_wchar code)

{

    if (code < 0x80)

        return unicode_opt_ascii[code].properties & PG_U_PROP_LOWERCASE;


    return range_search(unicode_lowercase,

                        lengthof(unicode_lowercase),

                        code);

}


bool

pg_u_prop_uppercase(pg_wchar code)

{

    if (code < 0x80)

        return unicode_opt_ascii[code].properties & PG_U_PROP_UPPERCASE;


    return range_search(unicode_uppercase,

                        lengthof(unicode_uppercase),

                        code);

}


bool

pg_u_prop_cased(pg_wchar code)

{

    uint32      category_mask;


    if (code < 0x80)

        return unicode_opt_ascii[code].properties & PG_U_PROP_CASED;


    category_mask = PG_U_CATEGORY_MASK(unicode_category(code));


    return category_mask & PG_U_LT_MASK ||

        pg_u_prop_lowercase(code) ||

        pg_u_prop_uppercase(code);

}


bool

pg_u_prop_case_ignorable(pg_wchar code)

{

    if (code < 0x80)

        return unicode_opt_ascii[code].properties & PG_U_PROP_CASE_IGNORABLE;


    return range_search(unicode_case_ignorable,

                        lengthof(unicode_case_ignorable),

                        code);

}


bool

pg_u_prop_white_space(pg_wchar code)

{

    if (code < 0x80)

        return unicode_opt_ascii[code].properties & PG_U_PROP_WHITE_SPACE;


    return range_search(unicode_white_space,

                        lengthof(unicode_white_space),

                        code);

}


bool

pg_u_prop_hex_digit(pg_wchar code)

{

    if (code < 0x80)

        return unicode_opt_ascii[code].properties & PG_U_PROP_HEX_DIGIT;


    return range_search(unicode_hex_digit,

                        lengthof(unicode_hex_digit),

                        code);

}


bool

pg_u_prop_join_control(pg_wchar code)

{

    if (code < 0x80)

        return unicode_opt_ascii[code].properties & PG_U_PROP_JOIN_CONTROL;


    return range_search(unicode_join_control,

                        lengthof(unicode_join_control),

                        code);

}


/*

 * The following functions implement the Compatibility Properties described

 * at: http://www.unicode.org/reports/tr18/#Compatibility_Properties

 *

 * If 'posix' is true, implements the "POSIX Compatible" variant, otherwise

 * the "Standard" variant.

 */


bool

pg_u_isdigit(pg_wchar code, bool posix)

{

    if (posix)

        return ('0' <= code && code <= '9');

    else

        return unicode_category(code) == PG_U_DECIMAL_NUMBER;

}


bool

pg_u_isalpha(pg_wchar code)

{

    return pg_u_prop_alphabetic(code);

}


bool

pg_u_isalnum(pg_wchar code, bool posix)

{

    return pg_u_isalpha(code) || pg_u_isdigit(code, posix);

}


bool

pg_u_isword(pg_wchar code)

{

    uint32      category_mask = PG_U_CATEGORY_MASK(unicode_category(code));


    return

        category_mask & (PG_U_M_MASK | PG_U_ND_MASK | PG_U_PC_MASK) ||

        pg_u_isalpha(code) ||

        pg_u_prop_join_control(code);

}


bool

pg_u_isupper(pg_wchar code)

{

    return pg_u_prop_uppercase(code);

}


bool

pg_u_islower(pg_wchar code)

{

    return pg_u_prop_lowercase(code);

}


bool

pg_u_isblank(pg_wchar code)

{

    return code == PG_U_CHARACTER_TAB ||

        unicode_category(code) == PG_U_SPACE_SEPARATOR;

}


bool

pg_u_iscntrl(pg_wchar code)

{

    return unicode_category(code) == PG_U_CONTROL;

}


bool

pg_u_isgraph(pg_wchar code)

{

    uint32      category_mask = PG_U_CATEGORY_MASK(unicode_category(code));


    if (category_mask & (PG_U_CC_MASK | PG_U_CS_MASK | PG_U_CN_MASK) ||

        pg_u_isspace(code))

        return false;

    return true;

}


bool

pg_u_isprint(pg_wchar code)

{

    pg_unicode_category category = unicode_category(code);


    if (category == PG_U_CONTROL)

        return false;


    return pg_u_isgraph(code) || pg_u_isblank(code);

}


bool

pg_u_ispunct(pg_wchar code, bool posix)

{

    uint32      category_mask;


    if (posix)

    {

        if (pg_u_isalpha(code))

            return false;


        category_mask = PG_U_CATEGORY_MASK(unicode_category(code));

        return category_mask & (PG_U_P_MASK | PG_U_S_MASK);

    }

    else

    {

        category_mask = PG_U_CATEGORY_MASK(unicode_category(code));


        return category_mask & PG_U_P_MASK;

    }

}


bool

pg_u_isspace(pg_wchar code)

{

    return pg_u_prop_white_space(code);

}


bool

pg_u_isxdigit(pg_wchar code, bool posix)

{

    if (posix)

        return (('0' <= code && code <= '9') ||

                ('A' <= code && code <= 'F') ||

                ('a' <= code && code <= 'f'));

    else

        return unicode_category(code) == PG_U_DECIMAL_NUMBER ||

            pg_u_prop_hex_digit(code);

}


/*

 * Description of Unicode general category.

 */

const char *

unicode_category_string(pg_unicode_category category)

{

    switch (category)

    {

        case PG_U_UNASSIGNED:

            return "Unassigned";

        case PG_U_UPPERCASE_LETTER:

            return "Uppercase_Letter";

        case PG_U_LOWERCASE_LETTER:

            return "Lowercase_Letter";

        case PG_U_TITLECASE_LETTER:

            return "Titlecase_Letter";

        case PG_U_MODIFIER_LETTER:

            return "Modifier_Letter";

        case PG_U_OTHER_LETTER:

            return "Other_Letter";

        case PG_U_NONSPACING_MARK:

            return "Nonspacing_Mark";

        case PG_U_ENCLOSING_MARK:

            return "Enclosing_Mark";

        case PG_U_SPACING_MARK:

            return "Spacing_Mark";

        case PG_U_DECIMAL_NUMBER:

            return "Decimal_Number";

        case PG_U_LETTER_NUMBER:

            return "Letter_Number";

        case PG_U_OTHER_NUMBER:

            return "Other_Number";

        case PG_U_SPACE_SEPARATOR:

            return "Space_Separator";

        case PG_U_LINE_SEPARATOR:

            return "Line_Separator";

        case PG_U_PARAGRAPH_SEPARATOR:

            return "Paragraph_Separator";

        case PG_U_CONTROL:

            return "Control";

        case PG_U_FORMAT:

            return "Format";

        case PG_U_PRIVATE_USE:

            return "Private_Use";

        case PG_U_SURROGATE:

            return "Surrogate";

        case PG_U_DASH_PUNCTUATION:

            return "Dash_Punctuation";

        case PG_U_OPEN_PUNCTUATION:

            return "Open_Punctuation";

        case PG_U_CLOSE_PUNCTUATION:

            return "Close_Punctuation";

        case PG_U_CONNECTOR_PUNCTUATION:

            return "Connector_Punctuation";

        case PG_U_OTHER_PUNCTUATION:

            return "Other_Punctuation";

        case PG_U_MATH_SYMBOL:

            return "Math_Symbol";

        case PG_U_CURRENCY_SYMBOL:

            return "Currency_Symbol";

        case PG_U_MODIFIER_SYMBOL:

            return "Modifier_Symbol";

        case PG_U_OTHER_SYMBOL:

            return "Other_Symbol";

        case PG_U_INITIAL_PUNCTUATION:

            return "Initial_Punctuation";

        case PG_U_FINAL_PUNCTUATION:

            return "Final_Punctuation";

    }


    Assert(false);

    return "Unrecognized";      /* keep compiler quiet */

}


/*

 * Short code for Unicode general category.

 */

const char *

unicode_category_abbrev(pg_unicode_category category)

{

    switch (category)

    {

        case PG_U_UNASSIGNED:

            return "Cn";

        case PG_U_UPPERCASE_LETTER:

            return "Lu";

        case PG_U_LOWERCASE_LETTER:

            return "Ll";

        case PG_U_TITLECASE_LETTER:

            return "Lt";

        case PG_U_MODIFIER_LETTER:

            return "Lm";

        case PG_U_OTHER_LETTER:

            return "Lo";

        case PG_U_NONSPACING_MARK:

            return "Mn";

        case PG_U_ENCLOSING_MARK:

            return "Me";

        case PG_U_SPACING_MARK:

            return "Mc";

        case PG_U_DECIMAL_NUMBER:

            return "Nd";

        case PG_U_LETTER_NUMBER:

            return "Nl";

        case PG_U_OTHER_NUMBER:

            return "No";

        case PG_U_SPACE_SEPARATOR:

            return "Zs";

        case PG_U_LINE_SEPARATOR:

            return "Zl";

        case PG_U_PARAGRAPH_SEPARATOR:

            return "Zp";

        case PG_U_CONTROL:

            return "Cc";

        case PG_U_FORMAT:

            return "Cf";

        case PG_U_PRIVATE_USE:

            return "Co";

        case PG_U_SURROGATE:

            return "Cs";

        case PG_U_DASH_PUNCTUATION:

            return "Pd";

        case PG_U_OPEN_PUNCTUATION:

            return "Ps";

        case PG_U_CLOSE_PUNCTUATION:

            return "Pe";

        case PG_U_CONNECTOR_PUNCTUATION:

            return "Pc";

        case PG_U_OTHER_PUNCTUATION:

            return "Po";

        case PG_U_MATH_SYMBOL:

            return "Sm";

        case PG_U_CURRENCY_SYMBOL:

            return "Sc";

        case PG_U_MODIFIER_SYMBOL:

            return "Sk";

        case PG_U_OTHER_SYMBOL:

            return "So";

        case PG_U_INITIAL_PUNCTUATION:

            return "Pi";

        case PG_U_FINAL_PUNCTUATION:

            return "Pf";

    }


    Assert(false);

    return "??";                /* keep compiler quiet */

}


/*

 * Binary search to test if given codepoint exists in one of the ranges in the

 * given table.

 */

static bool

range_search(const pg_unicode_range *tbl, size_t size, pg_wchar code)

{

    int         min = 0;

    int         mid;

    int         max = size - 1;


    Assert(code <= 0x10ffff);


    while (max >= min)

    {

        mid = (min + max) / 2;

        if (code > tbl[mid].last)

            min = mid + 1;

        else if (code < tbl[mid].first)

            max = mid - 1;

        else

            return true;

    }


    return false;

}

uint32
uint32_t uint32
Definition: c.h:502

lengthof
#define lengthof(array)
Definition: c.h:759

Assert
Assert(PointerIsAligned(start, uint64))

pg_wchar
unsigned int pg_wchar
Definition: mbprint.c:31

postgres.h

postgres_fe.h

pg_category_range::category
uint8 category
Definition: unicode_category_table.h:25

pg_unicode_properties::properties
uint8 properties
Definition: unicode_category_table.h:37

pg_unicode_properties::category
uint8 category
Definition: unicode_category_table.h:36

pg_unicode_range
Definition: unicode_category_table.h:29

pg_u_prop_uppercase
bool pg_u_prop_uppercase(pg_wchar code)
Definition: unicode_category.c:133

PG_U_CC_MASK
#define PG_U_CC_MASK
Definition: unicode_category.c:68

pg_u_isspace
bool pg_u_isspace(pg_wchar code)
Definition: unicode_category.c:311

pg_u_isxdigit
bool pg_u_isxdigit(pg_wchar code, bool posix)
Definition: unicode_category.c:317

pg_u_ispunct
bool pg_u_ispunct(pg_wchar code, bool posix)
Definition: unicode_category.c:290

unicode_category_string
const char * unicode_category_string(pg_unicode_category category)
Definition: unicode_category.c:332

pg_u_isprint
bool pg_u_isprint(pg_wchar code)
Definition: unicode_category.c:279

pg_u_islower
bool pg_u_islower(pg_wchar code)
Definition: unicode_category.c:249

unicode_category_abbrev
const char * unicode_category_abbrev(pg_unicode_category category)
Definition: unicode_category.c:406

PG_U_CATEGORY_MASK
#define PG_U_CATEGORY_MASK(X)
Definition: unicode_category.c:32

PG_U_CHARACTER_TAB
#define PG_U_CHARACTER_TAB
Definition: unicode_category.c:76

PG_U_CS_MASK
#define PG_U_CS_MASK
Definition: unicode_category.c:70

pg_u_prop_white_space
bool pg_u_prop_white_space(pg_wchar code)
Definition: unicode_category.c:170

PG_U_PC_MASK
#define PG_U_PC_MASK
Definition: unicode_category.c:50

pg_u_isblank
bool pg_u_isblank(pg_wchar code)
Definition: unicode_category.c:255

pg_u_prop_cased
bool pg_u_prop_cased(pg_wchar code)
Definition: unicode_category.c:144

pg_u_isalpha
bool pg_u_isalpha(pg_wchar code)
Definition: unicode_category.c:220

pg_u_prop_lowercase
bool pg_u_prop_lowercase(pg_wchar code)
Definition: unicode_category.c:122

PG_U_ND_MASK
#define PG_U_ND_MASK
Definition: unicode_category.c:46

PG_U_M_MASK
#define PG_U_M_MASK
Definition: unicode_category.c:45

range_search
static bool range_search(const pg_unicode_range *tbl, size_t size, pg_wchar code)
Definition: unicode_category.c:481

PG_U_P_MASK
#define PG_U_P_MASK
Definition: unicode_category.c:57

pg_u_isalnum
bool pg_u_isalnum(pg_wchar code, bool posix)
Definition: unicode_category.c:226

pg_u_isupper
bool pg_u_isupper(pg_wchar code)
Definition: unicode_category.c:243

pg_u_prop_alphabetic
bool pg_u_prop_alphabetic(pg_wchar code)
Definition: unicode_category.c:111

PG_U_LT_MASK
#define PG_U_LT_MASK
Definition: unicode_category.c:36

pg_u_isdigit
bool pg_u_isdigit(pg_wchar code, bool posix)
Definition: unicode_category.c:211

pg_u_iscntrl
bool pg_u_iscntrl(pg_wchar code)
Definition: unicode_category.c:262

pg_u_prop_join_control
bool pg_u_prop_join_control(pg_wchar code)
Definition: unicode_category.c:192

pg_u_isgraph
bool pg_u_isgraph(pg_wchar code)
Definition: unicode_category.c:268

pg_u_isword
bool pg_u_isword(pg_wchar code)
Definition: unicode_category.c:232

pg_u_prop_case_ignorable
bool pg_u_prop_case_ignorable(pg_wchar code)
Definition: unicode_category.c:159

pg_u_prop_hex_digit
bool pg_u_prop_hex_digit(pg_wchar code)
Definition: unicode_category.c:181

unicode_category
pg_unicode_category unicode_category(pg_wchar code)
Definition: unicode_category.c:85

PG_U_S_MASK
#define PG_U_S_MASK
Definition: unicode_category.c:63

PG_U_CN_MASK
#define PG_U_CN_MASK
Definition: unicode_category.c:72

unicode_category.h

pg_unicode_category
pg_unicode_category
Definition: unicode_category.h:31

PG_U_CONNECTOR_PUNCTUATION
@ PG_U_CONNECTOR_PUNCTUATION
Definition: unicode_category.h:54

PG_U_OTHER_SYMBOL
@ PG_U_OTHER_SYMBOL
Definition: unicode_category.h:59

PG_U_DASH_PUNCTUATION
@ PG_U_DASH_PUNCTUATION
Definition: unicode_category.h:51

PG_U_UPPERCASE_LETTER
@ PG_U_UPPERCASE_LETTER
Definition: unicode_category.h:33

PG_U_DECIMAL_NUMBER
@ PG_U_DECIMAL_NUMBER
Definition: unicode_category.h:41

PG_U_CLOSE_PUNCTUATION
@ PG_U_CLOSE_PUNCTUATION
Definition: unicode_category.h:53

PG_U_NONSPACING_MARK
@ PG_U_NONSPACING_MARK
Definition: unicode_category.h:38

PG_U_INITIAL_PUNCTUATION
@ PG_U_INITIAL_PUNCTUATION
Definition: unicode_category.h:60

PG_U_CURRENCY_SYMBOL
@ PG_U_CURRENCY_SYMBOL
Definition: unicode_category.h:57

PG_U_LETTER_NUMBER
@ PG_U_LETTER_NUMBER
Definition: unicode_category.h:42

PG_U_MODIFIER_SYMBOL
@ PG_U_MODIFIER_SYMBOL
Definition: unicode_category.h:58

PG_U_SPACE_SEPARATOR
@ PG_U_SPACE_SEPARATOR
Definition: unicode_category.h:44

PG_U_OPEN_PUNCTUATION
@ PG_U_OPEN_PUNCTUATION
Definition: unicode_category.h:52

PG_U_FORMAT
@ PG_U_FORMAT
Definition: unicode_category.h:48

PG_U_PRIVATE_USE
@ PG_U_PRIVATE_USE
Definition: unicode_category.h:49

PG_U_OTHER_LETTER
@ PG_U_OTHER_LETTER
Definition: unicode_category.h:37

PG_U_PARAGRAPH_SEPARATOR
@ PG_U_PARAGRAPH_SEPARATOR
Definition: unicode_category.h:46

PG_U_CONTROL
@ PG_U_CONTROL
Definition: unicode_category.h:47

PG_U_SPACING_MARK
@ PG_U_SPACING_MARK
Definition: unicode_category.h:40

PG_U_TITLECASE_LETTER
@ PG_U_TITLECASE_LETTER
Definition: unicode_category.h:35

PG_U_OTHER_NUMBER
@ PG_U_OTHER_NUMBER
Definition: unicode_category.h:43

PG_U_MATH_SYMBOL
@ PG_U_MATH_SYMBOL
Definition: unicode_category.h:56

PG_U_LOWERCASE_LETTER
@ PG_U_LOWERCASE_LETTER
Definition: unicode_category.h:34

PG_U_LINE_SEPARATOR
@ PG_U_LINE_SEPARATOR
Definition: unicode_category.h:45

PG_U_UNASSIGNED
@ PG_U_UNASSIGNED
Definition: unicode_category.h:32

PG_U_SURROGATE
@ PG_U_SURROGATE
Definition: unicode_category.h:50

PG_U_FINAL_PUNCTUATION
@ PG_U_FINAL_PUNCTUATION
Definition: unicode_category.h:61

PG_U_MODIFIER_LETTER
@ PG_U_MODIFIER_LETTER
Definition: unicode_category.h:36

PG_U_OTHER_PUNCTUATION
@ PG_U_OTHER_PUNCTUATION
Definition: unicode_category.h:55

PG_U_ENCLOSING_MARK
@ PG_U_ENCLOSING_MARK
Definition: unicode_category.h:39

unicode_category_table.h

PG_U_PROP_ALPHABETIC
#define PG_U_PROP_ALPHABETIC
Definition: unicode_category_table.h:44

unicode_white_space
static const pg_unicode_range unicode_white_space[11]
Definition: unicode_category_table.h:7123

PG_U_PROP_LOWERCASE
#define PG_U_PROP_LOWERCASE
Definition: unicode_category_table.h:45

PG_U_PROP_JOIN_CONTROL
#define PG_U_PROP_JOIN_CONTROL
Definition: unicode_category_table.h:50

PG_U_PROP_UPPERCASE
#define PG_U_PROP_UPPERCASE
Definition: unicode_category_table.h:46

PG_U_PROP_HEX_DIGIT
#define PG_U_PROP_HEX_DIGIT
Definition: unicode_category_table.h:51

unicode_join_control
static const pg_unicode_range unicode_join_control[1]
Definition: unicode_category_table.h:7150

unicode_alphabetic
static const pg_unicode_range unicode_alphabetic[1179]
Definition: unicode_category_table.h:4072

unicode_lowercase
static const pg_unicode_range unicode_lowercase[690]
Definition: unicode_category_table.h:5256

unicode_uppercase
static const pg_unicode_range unicode_uppercase[656]
Definition: unicode_category_table.h:5951

unicode_case_ignorable
static const pg_unicode_range unicode_case_ignorable[506]
Definition: unicode_category_table.h:6612

unicode_opt_ascii
static const pg_unicode_properties unicode_opt_ascii[128]
Definition: unicode_category_table.h:54

PG_U_PROP_WHITE_SPACE
#define PG_U_PROP_WHITE_SPACE
Definition: unicode_category_table.h:49

unicode_hex_digit
static const pg_unicode_range unicode_hex_digit[6]
Definition: unicode_category_table.h:7139

PG_U_PROP_CASED
#define PG_U_PROP_CASED
Definition: unicode_category_table.h:47

unicode_categories
static const pg_category_range unicode_categories[3368]
Definition: unicode_category_table.h:699

PG_U_PROP_CASE_IGNORABLE
#define PG_U_PROP_CASE_IGNORABLE
Definition: unicode_category_table.h:48