36#define NONUTF8_INVALID_BYTE0 (0x8d)
37#define NONUTF8_INVALID_BYTE1 (' ')
67#define MB2CHAR_NEED_AT_LEAST(len, need) if ((len) < (need)) break
77 while (
len > 0 && *from)
98 if (*s < 0x20 || *s == 0x7f)
112 while (
len > 0 && *from)
118 *to = (
SS2 << 8) | *from++;
121 else if (*from ==
SS3)
125 *to = (
SS3 << 16) | (*from++ << 8);
241 while (
len > 0 && *from)
247 *to = (
SS2 << 16) | (*from++ << 8);
251 else if (*from ==
SS3)
255 *to = (
SS3 << 16) | (*from++ << 8);
321 while (
len > 0 && *from)
327 *to = (((
uint32)
SS2) << 24) | (*from++ << 16);
332 else if (*from ==
SS3)
336 *to = (
SS3 << 16) | (*from++ << 8);
402 while (
len > 0 && *from)
406 if ((
c = (*from >> 24)))
409 *to++ = (*from >> 16) & 0xff;
410 *to++ = (*from >> 8) & 0xff;
411 *to++ = *from & 0xff;
414 else if ((
c = (*from >> 16)))
417 *to++ = (*from >> 8) & 0xff;
418 *to++ = *from & 0xff;
421 else if ((
c = (*from >> 8)))
424 *to++ = *from & 0xff;
470 while (
len > 0 && *from)
472 if ((*from & 0x80) == 0)
477 else if ((*from & 0xe0) == 0xc0)
482 *to = (
c1 << 6) |
c2;
485 else if ((*from & 0xf0) == 0xe0)
491 *to = (
c1 << 12) | (
c2 << 6) |
c3;
494 else if ((*from & 0xf8) == 0xf0)
501 *to = (
c1 << 18) | (
c2 << 12) | (
c3 << 6) |
c4;
529 while (
len > 0 && *from)
560 if ((*s & 0x80) == 0)
562 else if ((*s & 0xe0) == 0xc0)
564 else if ((*s & 0xf0) == 0xe0)
566 else if ((*s & 0xf8) == 0xf0)
569 else if ((*s & 0xfc) == 0xf8)
571 else if ((*s & 0xfe) == 0xfc)
608 mid = (min + max) / 2;
655 if (
ucs < 0x20 || (
ucs >= 0x7f &&
ucs < 0xa0) ||
ucs > 0x0010ffff)
696 while (
len > 0 && *from)
732 *to = (
unsigned char) *from++;
753 while (
len > 0 && *from)
757 lb = (*from >> 16) & 0xff;
761 *to++ = *from & 0xff;
767 *to++ = (*from >> 8) & 0xff;
768 *to++ = *from & 0xff;
775 *to++ = *from & 0xff;
782 *to++ = *from & 0xff;
789 *to++ = (*from >> 8) & 0xff;
790 *to++ = *from & 0xff;
797 *to++ = (*from >> 8) & 0xff;
798 *to++ = *from & 0xff;
803 *to++ = *from & 0xff;
865 while (
len > 0 && *from)
887 while (
len > 0 && *from)
917 if (*s >= 0xa1 && *s <= 0xdf)
931 if (*s >= 0xa1 && *s <= 0xdf)
1043 else if (*(s + 1) >= 0x30 && *(s + 1) <= 0x39)
1101#define IS_EUC_RANGE_VALID(c) ((c) >= 0xa1 && (c) <= 0xfe)
1161 const unsigned char *
start = s;
1219 const unsigned char *
start = s;
1246#define pg_euccn_verifychar pg_euckr_verifychar
1247#define pg_euccn_verifystr pg_euckr_verifystr
1302 const unsigned char *
start = s;
1355 const unsigned char *
start = s;
1405 const unsigned char *
start = s;
1474 const unsigned char *
start = s;
1528 const unsigned char *
start = s;
1582 const unsigned char *
start = s;
1636 const unsigned char *
start = s;
1669 else if (
len >= 4 && *(s + 1) >= 0x30 && *(s + 1) <= 0x39)
1672 if (*s >= 0x81 && *s <= 0xfe &&
1673 *(s + 2) >= 0x81 && *(s + 2) <= 0xfe &&
1674 *(s + 3) >= 0x30 && *(s + 3) <= 0x39)
1679 else if (
len >= 2 && *s >= 0x81 && *s <= 0xfe)
1682 if ((*(s + 1) >= 0x40 && *(s + 1) <= 0x7e) ||
1683 (*(s + 1) >= 0x80 && *(s + 1) <= 0xfe))
1696 const unsigned char *
start = s;
1727 if ((*s & 0x80) == 0)
1733 else if ((*s & 0xe0) == 0xc0)
1735 else if ((*s & 0xf0) == 0xe0)
1737 else if ((*s & 0xf8) == 0xf0)
1819#define ASC (END << BGN)
1821#define L2A (CS1 << BGN)
1823#define L3A (P3A << BGN)
1824#define L3B (CS2 << BGN)
1825#define L3C (P3B << BGN)
1827#define L4A (P4A << BGN)
1828#define L4B (CS3 << BGN)
1829#define L4C (P4B << BGN)
1831#define CR1 (END << CS1) | (CS1 << CS2) | (CS2 << CS3) | (CS1 << P3B) | (CS2 << P4B)
1832#define CR2 (END << CS1) | (CS1 << CS2) | (CS2 << CS3) | (CS1 << P3B) | (CS2 << P4A)
1833#define CR3 (END << CS1) | (CS1 << CS2) | (CS2 << CS3) | (CS1 << P3A) | (CS2 << P4A)
1915 const unsigned char *
start = s;
1924#define STRIDE_LENGTH (2 * sizeof(Vector8))
2058 if (
a >= 0x80 &&
a < 0xC2)
static bool is_valid_ascii(const unsigned char *s, int len)
#define IS_HIGHBIT_SET(ch)
#define Assert(condition)
static char32_t utf8_to_unicode(const unsigned char *c)
static const struct lconv_member_info table[]
static rewind_source * source
static unsigned char * unicode_to_utf8(char32_t c, unsigned char *utf8string)
#define IS_LCPRV2_B_RANGE(c)
#define IS_LCPRV1_A_RANGE(c)
#define PG_VALID_ENCODING(_enc)
#define IS_LCPRV1_B_RANGE(c)
#define IS_LCPRV2_A_RANGE(c)
mbstr_verifier mbverifystr
mbdisplaylen_converter dsplen
mbchar_verifier mbverifychar
static const struct mbinterval east_asian_fw[]
static const struct mbinterval nonspacing[]
static int pg_uhc_verifystr(const unsigned char *s, int len)
static int pg_latin1_dsplen(const unsigned char *s)
int pg_encoding_mblen_bounded(int encoding, const char *mbstr)
static int pg_euctw_mblen(const unsigned char *s)
static int pg_euckr_dsplen(const unsigned char *s)
static const uint32 Utf8Transition[256]
bool pg_utf8_islegal(const unsigned char *source, int length)
static int pg_ascii_verifystr(const unsigned char *s, int len)
static int pg_latin1_verifychar(const unsigned char *s, int len)
static int pg_sjis_dsplen(const unsigned char *s)
static int pg_sjis_verifychar(const unsigned char *s, int len)
static int pg_euc2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
static int pg_eucjp_dsplen(const unsigned char *s)
static int pg_ascii2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
static int pg_uhc_verifychar(const unsigned char *s, int len)
static int pg_gbk_dsplen(const unsigned char *s)
static int pg_euckr2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
static int pg_euctw2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
#define pg_euccn_verifychar
static int pg_sjis_verifystr(const unsigned char *s, int len)
static int pg_johab_mblen(const unsigned char *s)
static int pg_johab_dsplen(const unsigned char *s)
static int pg_big5_verifystr(const unsigned char *s, int len)
static int pg_mule_verifychar(const unsigned char *s, int len)
static int pg_eucjp2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
static int pg_latin1_verifystr(const unsigned char *s, int len)
static int pg_latin1_mblen(const unsigned char *s)
static int pg_ascii_verifychar(const unsigned char *s, int len)
static int pg_ascii_mblen(const unsigned char *s)
void pg_encoding_set_invalid(int encoding, char *dst)
static int mbbisearch(pg_wchar ucs, const struct mbinterval *table, int max)
static int pg_big5_dsplen(const unsigned char *s)
#define pg_euccn_verifystr
int pg_encoding_mblen_or_incomplete(int encoding, const char *mbstr, size_t remaining)
#define NONUTF8_INVALID_BYTE0
static int pg_eucjp_mblen(const unsigned char *s)
static int pg_gbk_verifychar(const unsigned char *s, int len)
static int pg_big5_mblen(const unsigned char *s)
static int pg_euccn_dsplen(const unsigned char *s)
static int pg_euctw_verifychar(const unsigned char *s, int len)
static int pg_euckr_verifychar(const unsigned char *s, int len)
static int pg_euctw_verifystr(const unsigned char *s, int len)
static int pg_gbk_verifystr(const unsigned char *s, int len)
static int pg_gb18030_dsplen(const unsigned char *s)
static int pg_utf2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
int pg_mule_mblen(const unsigned char *s)
static int pg_euccn_mblen(const unsigned char *s)
static int pg_gbk_mblen(const unsigned char *s)
static int pg_eucjp_verifystr(const unsigned char *s, int len)
static int pg_johab_verifystr(const unsigned char *s, int len)
static int pg_euc_dsplen(const unsigned char *s)
static int pg_gb18030_verifystr(const unsigned char *s, int len)
static int pg_euckr_verifystr(const unsigned char *s, int len)
static int pg_wchar2mule_with_len(const pg_wchar *from, unsigned char *to, int len)
static int pg_sjis_mblen(const unsigned char *s)
#define IS_EUC_RANGE_VALID(c)
static int pg_uhc_dsplen(const unsigned char *s)
static int pg_eucjp_verifychar(const unsigned char *s, int len)
static int pg_big5_verifychar(const unsigned char *s, int len)
static int pg_gb18030_verifychar(const unsigned char *s, int len)
static int pg_mule_verifystr(const unsigned char *s, int len)
static int pg_wchar2euc_with_len(const pg_wchar *from, unsigned char *to, int len)
static int pg_utf8_verifychar(const unsigned char *s, int len)
#define MB2CHAR_NEED_AT_LEAST(len, need)
static int pg_wchar2single_with_len(const pg_wchar *from, unsigned char *to, int len)
static int pg_wchar2utf_with_len(const pg_wchar *from, unsigned char *to, int len)
static int pg_euccn2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
static int pg_gb18030_mblen(const unsigned char *s)
int pg_encoding_dsplen(int encoding, const char *mbstr)
static void utf8_advance(const unsigned char *s, uint32 *state, int len)
static int pg_euctw_dsplen(const unsigned char *s)
static int pg_latin12wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
static int pg_uhc_mblen(const unsigned char *s)
static int pg_euc_mblen(const unsigned char *s)
static int pg_mule_dsplen(const unsigned char *s)
int pg_encoding_verifymbstr(int encoding, const char *mbstr, int len)
#define NONUTF8_INVALID_BYTE1
static int pg_utf8_verifystr(const unsigned char *s, int len)
static int pg_euckr_mblen(const unsigned char *s)
const pg_wchar_tbl pg_wchar_table[]
static int pg_mule2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
int pg_encoding_max_length(int encoding)
int pg_encoding_mblen(int encoding, const char *mbstr)
static int pg_johab_verifychar(const unsigned char *s, int len)
static int pg_ascii_dsplen(const unsigned char *s)
int pg_encoding_verifymbchar(int encoding, const char *mbstr, int len)
static int ucs_wcwidth(pg_wchar ucs)
static int pg_utf_dsplen(const unsigned char *s)