36#define NONUTF8_INVALID_BYTE0 (0x8d)
37#define NONUTF8_INVALID_BYTE1 (' ')
67#define MB2CHAR_NEED_AT_LEAST(len, need) if ((len) < (need)) break
77 while (
len > 0 && *from)
98 if (*s < 0x20 || *s == 0x7f)
112 while (
len > 0 && *from)
118 *to = (
SS2 << 8) | *from++;
121 else if (*from ==
SS3)
125 *to = (
SS3 << 16) | (*from++ << 8);
241 while (
len > 0 && *from)
247 *to = (
SS2 << 16) | (*from++ << 8);
251 else if (*from ==
SS3)
255 *to = (
SS3 << 16) | (*from++ << 8);
321 while (
len > 0 && *from)
327 *to = (((
uint32)
SS2) << 24) | (*from++ << 16);
332 else if (*from ==
SS3)
336 *to = (
SS3 << 16) | (*from++ << 8);
402 while (
len > 0 && *from)
406 if ((
c = (*from >> 24)))
409 *to++ = (*from >> 16) & 0xff;
410 *to++ = (*from >> 8) & 0xff;
411 *to++ = *from & 0xff;
414 else if ((
c = (*from >> 16)))
417 *to++ = (*from >> 8) & 0xff;
418 *to++ = *from & 0xff;
421 else if ((
c = (*from >> 8)))
424 *to++ = *from & 0xff;
470 while (
len > 0 && *from)
472 if ((*from & 0x80) == 0)
477 else if ((*from & 0xe0) == 0xc0)
482 *to = (
c1 << 6) |
c2;
485 else if ((*from & 0xf0) == 0xe0)
491 *to = (
c1 << 12) | (
c2 << 6) |
c3;
494 else if ((*from & 0xf8) == 0xf0)
501 *to = (
c1 << 18) | (
c2 << 12) | (
c3 << 6) |
c4;
529 while (
len > 0 && *from)
560 if ((*s & 0x80) == 0)
562 else if ((*s & 0xe0) == 0xc0)
564 else if ((*s & 0xf0) == 0xe0)
566 else if ((*s & 0xf8) == 0xf0)
569 else if ((*s & 0xfc) == 0xf8)
571 else if ((*s & 0xfe) == 0xfc)
608 mid = (min + max) / 2;
655 if (
ucs < 0x20 || (
ucs >= 0x7f &&
ucs < 0xa0) ||
ucs > 0x0010ffff)
693 while (
len > 0 && *from)
715 while (
len > 0 && *from)
745 if (*s >= 0xa1 && *s <= 0xdf)
759 if (*s >= 0xa1 && *s <= 0xdf)
871 else if (*(s + 1) >= 0x30 && *(s + 1) <= 0x39)
929#define IS_EUC_RANGE_VALID(c) ((c) >= 0xa1 && (c) <= 0xfe)
989 const unsigned char *
start = s;
1047 const unsigned char *
start = s;
1074#define pg_euccn_verifychar pg_euckr_verifychar
1075#define pg_euccn_verifystr pg_euckr_verifystr
1130 const unsigned char *
start = s;
1183 const unsigned char *
start = s;
1252 const unsigned char *
start = s;
1306 const unsigned char *
start = s;
1360 const unsigned char *
start = s;
1414 const unsigned char *
start = s;
1447 else if (
len >= 4 && *(s + 1) >= 0x30 && *(s + 1) <= 0x39)
1450 if (*s >= 0x81 && *s <= 0xfe &&
1451 *(s + 2) >= 0x81 && *(s + 2) <= 0xfe &&
1452 *(s + 3) >= 0x30 && *(s + 3) <= 0x39)
1457 else if (
len >= 2 && *s >= 0x81 && *s <= 0xfe)
1460 if ((*(s + 1) >= 0x40 && *(s + 1) <= 0x7e) ||
1461 (*(s + 1) >= 0x80 && *(s + 1) <= 0xfe))
1474 const unsigned char *
start = s;
1505 if ((*s & 0x80) == 0)
1511 else if ((*s & 0xe0) == 0xc0)
1513 else if ((*s & 0xf0) == 0xe0)
1515 else if ((*s & 0xf8) == 0xf0)
1597#define ASC (END << BGN)
1599#define L2A (CS1 << BGN)
1601#define L3A (P3A << BGN)
1602#define L3B (CS2 << BGN)
1603#define L3C (P3B << BGN)
1605#define L4A (P4A << BGN)
1606#define L4B (CS3 << BGN)
1607#define L4C (P4B << BGN)
1609#define CR1 (END << CS1) | (CS1 << CS2) | (CS2 << CS3) | (CS1 << P3B) | (CS2 << P4B)
1610#define CR2 (END << CS1) | (CS1 << CS2) | (CS2 << CS3) | (CS1 << P3B) | (CS2 << P4A)
1611#define CR3 (END << CS1) | (CS1 << CS2) | (CS2 << CS3) | (CS1 << P3A) | (CS2 << P4A)
1693 const unsigned char *
start = s;
1702#define STRIDE_LENGTH (2 * sizeof(Vector8))
1836 if (
a >= 0x80 &&
a < 0xC2)
static bool is_valid_ascii(const unsigned char *s, int len)
#define IS_HIGHBIT_SET(ch)
#define Assert(condition)
static char32_t utf8_to_unicode(const unsigned char *c)
static const struct lconv_member_info table[]
static rewind_source * source
static unsigned char * unicode_to_utf8(char32_t c, unsigned char *utf8string)
#define PG_VALID_ENCODING(_enc)
mbstr_verifier mbverifystr
mbdisplaylen_converter dsplen
mbchar_verifier mbverifychar
static const struct mbinterval east_asian_fw[]
static const struct mbinterval nonspacing[]
static int pg_uhc_verifystr(const unsigned char *s, int len)
static int pg_latin1_dsplen(const unsigned char *s)
int pg_encoding_mblen_bounded(int encoding, const char *mbstr)
static int pg_euctw_mblen(const unsigned char *s)
static int pg_euckr_dsplen(const unsigned char *s)
static const uint32 Utf8Transition[256]
bool pg_utf8_islegal(const unsigned char *source, int length)
static int pg_ascii_verifystr(const unsigned char *s, int len)
static int pg_latin1_verifychar(const unsigned char *s, int len)
static int pg_sjis_dsplen(const unsigned char *s)
static int pg_sjis_verifychar(const unsigned char *s, int len)
static int pg_euc2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
static int pg_eucjp_dsplen(const unsigned char *s)
static int pg_ascii2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
static int pg_uhc_verifychar(const unsigned char *s, int len)
static int pg_gbk_dsplen(const unsigned char *s)
static int pg_euckr2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
static int pg_euctw2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
#define pg_euccn_verifychar
static int pg_sjis_verifystr(const unsigned char *s, int len)
static int pg_johab_mblen(const unsigned char *s)
static int pg_johab_dsplen(const unsigned char *s)
static int pg_big5_verifystr(const unsigned char *s, int len)
static int pg_eucjp2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
static int pg_latin1_verifystr(const unsigned char *s, int len)
static int pg_latin1_mblen(const unsigned char *s)
static int pg_ascii_verifychar(const unsigned char *s, int len)
static int pg_ascii_mblen(const unsigned char *s)
void pg_encoding_set_invalid(int encoding, char *dst)
static int mbbisearch(pg_wchar ucs, const struct mbinterval *table, int max)
static int pg_big5_dsplen(const unsigned char *s)
#define pg_euccn_verifystr
int pg_encoding_mblen_or_incomplete(int encoding, const char *mbstr, size_t remaining)
#define NONUTF8_INVALID_BYTE0
static int pg_eucjp_mblen(const unsigned char *s)
static int pg_gbk_verifychar(const unsigned char *s, int len)
static int pg_big5_mblen(const unsigned char *s)
static int pg_euccn_dsplen(const unsigned char *s)
static int pg_euctw_verifychar(const unsigned char *s, int len)
static int pg_euckr_verifychar(const unsigned char *s, int len)
static int pg_euctw_verifystr(const unsigned char *s, int len)
static int pg_gbk_verifystr(const unsigned char *s, int len)
static int pg_gb18030_dsplen(const unsigned char *s)
static int pg_utf2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
static int pg_euccn_mblen(const unsigned char *s)
static int pg_gbk_mblen(const unsigned char *s)
static int pg_eucjp_verifystr(const unsigned char *s, int len)
static int pg_johab_verifystr(const unsigned char *s, int len)
static int pg_euc_dsplen(const unsigned char *s)
static int pg_gb18030_verifystr(const unsigned char *s, int len)
static int pg_euckr_verifystr(const unsigned char *s, int len)
static int pg_sjis_mblen(const unsigned char *s)
#define IS_EUC_RANGE_VALID(c)
static int pg_uhc_dsplen(const unsigned char *s)
static int pg_eucjp_verifychar(const unsigned char *s, int len)
static int pg_big5_verifychar(const unsigned char *s, int len)
static int pg_gb18030_verifychar(const unsigned char *s, int len)
static int pg_wchar2euc_with_len(const pg_wchar *from, unsigned char *to, int len)
static int pg_utf8_verifychar(const unsigned char *s, int len)
#define MB2CHAR_NEED_AT_LEAST(len, need)
static int pg_wchar2single_with_len(const pg_wchar *from, unsigned char *to, int len)
static int pg_wchar2utf_with_len(const pg_wchar *from, unsigned char *to, int len)
static int pg_euccn2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
static int pg_gb18030_mblen(const unsigned char *s)
int pg_encoding_dsplen(int encoding, const char *mbstr)
static void utf8_advance(const unsigned char *s, uint32 *state, int len)
static int pg_euctw_dsplen(const unsigned char *s)
static int pg_latin12wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
static int pg_uhc_mblen(const unsigned char *s)
static int pg_euc_mblen(const unsigned char *s)
int pg_encoding_verifymbstr(int encoding, const char *mbstr, int len)
#define NONUTF8_INVALID_BYTE1
static int pg_utf8_verifystr(const unsigned char *s, int len)
static int pg_euckr_mblen(const unsigned char *s)
const pg_wchar_tbl pg_wchar_table[]
int pg_encoding_max_length(int encoding)
int pg_encoding_mblen(int encoding, const char *mbstr)
static int pg_johab_verifychar(const unsigned char *s, int len)
static int pg_ascii_dsplen(const unsigned char *s)
int pg_encoding_verifymbchar(int encoding, const char *mbstr, int len)
static int ucs_wcwidth(pg_wchar ucs)
static int pg_utf_dsplen(const unsigned char *s)