34#define NONUTF8_INVALID_BYTE0 (0x8d)
35#define NONUTF8_INVALID_BYTE1 (' ')
72 while (
len > 0 && *from)
93 if (*s < 0x20 || *s == 0x7f)
107 while (
len > 0 && *from)
109 if (*from ==
SS2 &&
len >= 2)
113 *to = (
SS2 << 8) | *from++;
116 else if (*from ==
SS3 &&
len >= 3)
119 *to = (
SS3 << 16) | (*from++ << 8);
234 while (
len > 0 && *from)
236 if (*from ==
SS2 &&
len >= 3)
239 *to = (
SS2 << 16) | (*from++ << 8);
243 else if (*from ==
SS3 &&
len >= 3)
246 *to = (
SS3 << 16) | (*from++ << 8);
301 while (
len > 0 && *from)
303 if (*from ==
SS2 &&
len >= 4)
306 *to = (((
uint32)
SS2) << 24) | (*from++ << 16);
311 else if (*from ==
SS3 &&
len >= 3)
314 *to = (
SS3 << 16) | (*from++ << 8);
379 while (
len > 0 && *from)
383 if ((
c = (*from >> 24)))
386 *to++ = (*from >> 16) & 0xff;
387 *to++ = (*from >> 8) & 0xff;
388 *to++ = *from & 0xff;
391 else if ((
c = (*from >> 16)))
394 *to++ = (*from >> 8) & 0xff;
395 *to++ = *from & 0xff;
398 else if ((
c = (*from >> 8)))
401 *to++ = *from & 0xff;
447 while (
len > 0 && *from)
449 if ((*from & 0x80) == 0)
454 else if ((*from & 0xe0) == 0xc0)
460 *to = (c1 << 6) | c2;
463 else if ((*from & 0xf0) == 0xe0)
470 *to = (c1 << 12) | (c2 << 6) | c3;
473 else if ((*from & 0xf8) == 0xf0)
481 *to = (c1 << 18) | (c2 << 12) | (c3 << 6) | c4;
509 while (
len > 0 && *from)
540 if ((*s & 0x80) == 0)
542 else if ((*s & 0xe0) == 0xc0)
544 else if ((*s & 0xf0) == 0xe0)
546 else if ((*s & 0xf8) == 0xf0)
549 else if ((*s & 0xfc) == 0xf8)
551 else if ((*s & 0xfe) == 0xfc)
584 if (ucs <
table[0].first || ucs >
table[max].last)
588 mid = (min + max) / 2;
589 if (ucs >
table[mid].last)
591 else if (ucs <
table[mid].first)
635 if (ucs < 0x20 || (ucs >= 0x7f && ucs < 0xa0) || ucs > 0x0010ffff)
676 while (
len > 0 && *from)
708 *to = (
unsigned char) *from++;
729 while (
len > 0 && *from)
733 lb = (*from >> 16) & 0xff;
737 *to++ = *from & 0xff;
743 *to++ = (*from >> 8) & 0xff;
744 *to++ = *from & 0xff;
751 *to++ = *from & 0xff;
758 *to++ = *from & 0xff;
765 *to++ = (*from >> 8) & 0xff;
766 *to++ = *from & 0xff;
773 *to++ = (*from >> 8) & 0xff;
774 *to++ = *from & 0xff;
779 *to++ = *from & 0xff;
841 while (
len > 0 && *from)
863 while (
len > 0 && *from)
893 if (*s >= 0xa1 && *s <= 0xdf)
907 if (*s >= 0xa1 && *s <= 0xdf)
1019 else if (*(s + 1) >= 0x30 && *(s + 1) <= 0x39)
1069 const unsigned char *nullpos = memchr(s, 0,
len);
1071 if (nullpos == NULL)
1077#define IS_EUC_RANGE_VALID(c) ((c) >= 0xa1 && (c) <= 0xfe)
1095 if (c2 < 0xa1 || c2 > 0xdf)
1137 const unsigned char *
start = s;
1195 const unsigned char *
start = s;
1222#define pg_euccn_verifychar pg_euckr_verifychar
1223#define pg_euccn_verifystr pg_euckr_verifystr
1241 if (c2 < 0xa1 || c2 > 0xa7)
1278 const unsigned char *
start = s;
1331 const unsigned char *
start = s;
1381 const unsigned char *
start = s;
1416 const unsigned char *nullpos = memchr(s, 0,
len);
1418 if (nullpos == NULL)
1450 const unsigned char *
start = s;
1504 const unsigned char *
start = s;
1558 const unsigned char *
start = s;
1612 const unsigned char *
start = s;
1645 else if (
len >= 4 && *(s + 1) >= 0x30 && *(s + 1) <= 0x39)
1648 if (*s >= 0x81 && *s <= 0xfe &&
1649 *(s + 2) >= 0x81 && *(s + 2) <= 0xfe &&
1650 *(s + 3) >= 0x30 && *(s + 3) <= 0x39)
1655 else if (
len >= 2 && *s >= 0x81 && *s <= 0xfe)
1658 if ((*(s + 1) >= 0x40 && *(s + 1) <= 0x7e) ||
1659 (*(s + 1) >= 0x80 && *(s + 1) <= 0xfe))
1672 const unsigned char *
start = s;
1703 if ((*s & 0x80) == 0)
1709 else if ((*s & 0xe0) == 0xc0)
1711 else if ((*s & 0xf0) == 0xe0)
1713 else if ((*s & 0xf8) == 0xf0)
1795#define ASC (END << BGN)
1797#define L2A (CS1 << BGN)
1799#define L3A (P3A << BGN)
1800#define L3B (CS2 << BGN)
1801#define L3C (P3B << BGN)
1803#define L4A (P4A << BGN)
1804#define L4B (CS3 << BGN)
1805#define L4C (P4B << BGN)
1807#define CR1 (END << CS1) | (CS1 << CS2) | (CS2 << CS3) | (CS1 << P3B) | (CS2 << P4B)
1808#define CR2 (END << CS1) | (CS1 << CS2) | (CS2 << CS3) | (CS1 << P3B) | (CS2 << P4A)
1809#define CR3 (END << CS1) | (CS1 << CS2) | (CS2 << CS3) | (CS1 << P3A) | (CS2 << P4A)
1891 const unsigned char *
start = s;
1892 const int orig_len =
len;
1900#define STRIDE_LENGTH (2 * sizeof(Vector8))
1998 if (a < 0x80 || a > 0xBF)
2003 if (a < 0x80 || a > 0xBF)
2011 if (a < 0xA0 || a > 0xBF)
2015 if (a < 0x80 || a > 0x9F)
2019 if (a < 0x90 || a > 0xBF)
2023 if (a < 0x80 || a > 0x8F)
2027 if (a < 0x80 || a > 0xBF)
2034 if (
a >= 0x80 &&
a < 0xC2)
static bool is_valid_ascii(const unsigned char *s, int len)
#define IS_HIGHBIT_SET(ch)
Assert(PointerIsAligned(start, uint64))
static pg_wchar utf8_to_unicode(const unsigned char *c)
static const struct lconv_member_info table[]
static rewind_source * source
static unsigned char * unicode_to_utf8(pg_wchar c, unsigned char *utf8string)
#define IS_LCPRV2_B_RANGE(c)
#define IS_LCPRV1_A_RANGE(c)
#define PG_VALID_ENCODING(_enc)
#define IS_LCPRV1_B_RANGE(c)
#define IS_LCPRV2_A_RANGE(c)
size_t strnlen(const char *str, size_t maxlen)
static const struct mbinterval east_asian_fw[]
static const struct mbinterval nonspacing[]
static int pg_uhc_verifystr(const unsigned char *s, int len)
static int pg_latin1_dsplen(const unsigned char *s)
int pg_encoding_mblen_bounded(int encoding, const char *mbstr)
static int pg_euctw_mblen(const unsigned char *s)
static int pg_euckr_dsplen(const unsigned char *s)
static const uint32 Utf8Transition[256]
bool pg_utf8_islegal(const unsigned char *source, int length)
static int pg_ascii_verifystr(const unsigned char *s, int len)
static int pg_latin1_verifychar(const unsigned char *s, int len)
static int pg_sjis_dsplen(const unsigned char *s)
static int pg_sjis_verifychar(const unsigned char *s, int len)
static int pg_euc2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
static int pg_eucjp_dsplen(const unsigned char *s)
static int pg_ascii2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
static int pg_uhc_verifychar(const unsigned char *s, int len)
static int pg_gbk_dsplen(const unsigned char *s)
static int pg_euckr2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
static int pg_euctw2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
#define pg_euccn_verifychar
static int pg_sjis_verifystr(const unsigned char *s, int len)
static int pg_johab_mblen(const unsigned char *s)
static int pg_johab_dsplen(const unsigned char *s)
static int pg_big5_verifystr(const unsigned char *s, int len)
static int pg_mule_verifychar(const unsigned char *s, int len)
static int pg_eucjp2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
static int pg_latin1_verifystr(const unsigned char *s, int len)
static int pg_latin1_mblen(const unsigned char *s)
static int pg_ascii_verifychar(const unsigned char *s, int len)
static int pg_ascii_mblen(const unsigned char *s)
void pg_encoding_set_invalid(int encoding, char *dst)
static int mbbisearch(pg_wchar ucs, const struct mbinterval *table, int max)
static int pg_big5_dsplen(const unsigned char *s)
#define pg_euccn_verifystr
int pg_utf_mblen(const unsigned char *s)
#define NONUTF8_INVALID_BYTE0
static int pg_eucjp_mblen(const unsigned char *s)
static int pg_gbk_verifychar(const unsigned char *s, int len)
static int pg_big5_mblen(const unsigned char *s)
static int pg_euccn_dsplen(const unsigned char *s)
static int pg_euctw_verifychar(const unsigned char *s, int len)
static int pg_euckr_verifychar(const unsigned char *s, int len)
static int pg_euctw_verifystr(const unsigned char *s, int len)
static int pg_gbk_verifystr(const unsigned char *s, int len)
static int pg_gb18030_dsplen(const unsigned char *s)
static int pg_utf2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
int pg_mule_mblen(const unsigned char *s)
static int pg_euccn_mblen(const unsigned char *s)
static int pg_gbk_mblen(const unsigned char *s)
static int pg_eucjp_verifystr(const unsigned char *s, int len)
static int pg_johab_verifystr(const unsigned char *s, int len)
static int pg_euc_dsplen(const unsigned char *s)
static int pg_gb18030_verifystr(const unsigned char *s, int len)
static int pg_euckr_verifystr(const unsigned char *s, int len)
static int pg_wchar2mule_with_len(const pg_wchar *from, unsigned char *to, int len)
static int pg_sjis_mblen(const unsigned char *s)
#define IS_EUC_RANGE_VALID(c)
static int pg_uhc_dsplen(const unsigned char *s)
static int pg_eucjp_verifychar(const unsigned char *s, int len)
static int pg_big5_verifychar(const unsigned char *s, int len)
static int pg_gb18030_verifychar(const unsigned char *s, int len)
static int pg_mule_verifystr(const unsigned char *s, int len)
static int pg_wchar2euc_with_len(const pg_wchar *from, unsigned char *to, int len)
static int pg_utf8_verifychar(const unsigned char *s, int len)
static int pg_wchar2single_with_len(const pg_wchar *from, unsigned char *to, int len)
static int pg_wchar2utf_with_len(const pg_wchar *from, unsigned char *to, int len)
static int pg_euccn2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
static int pg_gb18030_mblen(const unsigned char *s)
int pg_encoding_dsplen(int encoding, const char *mbstr)
static void utf8_advance(const unsigned char *s, uint32 *state, int len)
static int pg_euctw_dsplen(const unsigned char *s)
static int pg_latin12wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
static int pg_uhc_mblen(const unsigned char *s)
static int pg_euc_mblen(const unsigned char *s)
static int pg_mule_dsplen(const unsigned char *s)
int pg_encoding_verifymbstr(int encoding, const char *mbstr, int len)
#define NONUTF8_INVALID_BYTE1
static int pg_utf8_verifystr(const unsigned char *s, int len)
static int pg_euckr_mblen(const unsigned char *s)
const pg_wchar_tbl pg_wchar_table[]
static int pg_mule2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
int pg_encoding_max_length(int encoding)
int pg_encoding_mblen(int encoding, const char *mbstr)
static int pg_johab_verifychar(const unsigned char *s, int len)
static int pg_ascii_dsplen(const unsigned char *s)
int pg_encoding_verifymbchar(int encoding, const char *mbstr, int len)
static int ucs_wcwidth(pg_wchar ucs)
static int pg_utf_dsplen(const unsigned char *s)