52 while (
len > 0 && *from)
73 if (*s < 0x20 || *s == 0x7f)
87 while (
len > 0 && *from)
89 if (*from ==
SS2 &&
len >= 2)
93 *to = (
SS2 << 8) | *from++;
96 else if (*from ==
SS3 &&
len >= 3)
99 *to = (
SS3 << 16) | (*from++ << 8);
214 while (
len > 0 && *from)
216 if (*from ==
SS2 &&
len >= 3)
219 *to = (
SS2 << 16) | (*from++ << 8);
223 else if (*from ==
SS3 &&
len >= 3)
226 *to = (
SS3 << 16) | (*from++ << 8);
281 while (
len > 0 && *from)
283 if (*from ==
SS2 &&
len >= 4)
286 *to = (((
uint32)
SS2) << 24) | (*from++ << 16);
291 else if (*from ==
SS3 &&
len >= 3)
294 *to = (
SS3 << 16) | (*from++ << 8);
359 while (
len > 0 && *from)
363 if ((
c = (*from >> 24)))
366 *to++ = (*from >> 16) & 0xff;
367 *to++ = (*from >> 8) & 0xff;
368 *to++ = *from & 0xff;
371 else if ((
c = (*from >> 16)))
374 *to++ = (*from >> 8) & 0xff;
375 *to++ = *from & 0xff;
378 else if ((
c = (*from >> 8)))
381 *to++ = *from & 0xff;
427 while (
len > 0 && *from)
429 if ((*from & 0x80) == 0)
434 else if ((*from & 0xe0) == 0xc0)
440 *to = (c1 << 6) | c2;
443 else if ((*from & 0xf0) == 0xe0)
450 *to = (c1 << 12) | (c2 << 6) | c3;
453 else if ((*from & 0xf8) == 0xf0)
461 *to = (c1 << 18) | (c2 << 12) | (c3 << 6) | c4;
491 utf8string[0] = 0xC0 | ((
c >> 6) & 0x1F);
492 utf8string[1] = 0x80 | (
c & 0x3F);
494 else if (
c <= 0xFFFF)
496 utf8string[0] = 0xE0 | ((
c >> 12) & 0x0F);
497 utf8string[1] = 0x80 | ((
c >> 6) & 0x3F);
498 utf8string[2] = 0x80 | (
c & 0x3F);
502 utf8string[0] = 0xF0 | ((
c >> 18) & 0x07);
503 utf8string[1] = 0x80 | ((
c >> 12) & 0x3F);
504 utf8string[2] = 0x80 | ((
c >> 6) & 0x3F);
505 utf8string[3] = 0x80 | (
c & 0x3F);
522 while (
len > 0 && *from)
553 if ((*s & 0x80) == 0)
555 else if ((*s & 0xe0) == 0xc0)
557 else if ((*s & 0xf0) == 0xe0)
559 else if ((*s & 0xf8) == 0xf0)
562 else if ((*s & 0xfc) == 0xf8)
564 else if ((*s & 0xfe) == 0xfc)
597 if (ucs < table[0].first || ucs > table[max].last)
601 mid = (min + max) / 2;
602 if (ucs > table[mid].last)
604 else if (ucs < table[mid].first)
648 if (ucs < 0x20 || (ucs >= 0x7f && ucs < 0xa0) || ucs > 0x0010ffff)
681 if ((*
c & 0x80) == 0)
683 else if ((*
c & 0xe0) == 0xc0)
684 return (
pg_wchar) (((
c[0] & 0x1f) << 6) |
686 else if ((*
c & 0xf0) == 0xe0)
687 return (
pg_wchar) (((
c[0] & 0x0f) << 12) |
688 ((
c[1] & 0x3f) << 6) |
690 else if ((*
c & 0xf8) == 0xf0)
691 return (
pg_wchar) (((
c[0] & 0x07) << 18) |
692 ((
c[1] & 0x3f) << 12) |
693 ((
c[2] & 0x3f) << 6) |
717 while (
len > 0 && *from)
749 *to = (
unsigned char) *from++;
770 while (
len > 0 && *from)
774 lb = (*from >> 16) & 0xff;
778 *to++ = *from & 0xff;
784 *to++ = (*from >> 8) & 0xff;
785 *to++ = *from & 0xff;
792 *to++ = *from & 0xff;
799 *to++ = *from & 0xff;
806 *to++ = (*from >> 8) & 0xff;
807 *to++ = *from & 0xff;
814 *to++ = (*from >> 8) & 0xff;
815 *to++ = *from & 0xff;
820 *to++ = *from & 0xff;
882 while (
len > 0 && *from)
904 while (
len > 0 && *from)
934 if (*s >= 0xa1 && *s <= 0xdf)
948 if (*s >= 0xa1 && *s <= 0xdf)
1060 else if (*(s + 1) >= 0x30 && *(s + 1) <= 0x39)
1110 const unsigned char *nullpos = memchr(s, 0,
len);
1112 if (nullpos == NULL)
1118 #define IS_EUC_RANGE_VALID(c) ((c) >= 0xa1 && (c) <= 0xfe)
1136 if (c2 < 0xa1 || c2 > 0xdf)
1178 const unsigned char *start = s;
1236 const unsigned char *start = s;
1263 #define pg_euccn_verifychar pg_euckr_verifychar
1264 #define pg_euccn_verifystr pg_euckr_verifystr
1282 if (c2 < 0xa1 || c2 > 0xa7)
1319 const unsigned char *start = s;
1372 const unsigned char *start = s;
1422 const unsigned char *start = s;
1457 const unsigned char *nullpos = memchr(s, 0,
len);
1459 if (nullpos == NULL)
1491 const unsigned char *start = s;
1540 const unsigned char *start = s;
1589 const unsigned char *start = s;
1638 const unsigned char *start = s;
1671 else if (
len >= 4 && *(s + 1) >= 0x30 && *(s + 1) <= 0x39)
1674 if (*s >= 0x81 && *s <= 0xfe &&
1675 *(s + 2) >= 0x81 && *(s + 2) <= 0xfe &&
1676 *(s + 3) >= 0x30 && *(s + 3) <= 0x39)
1681 else if (
len >= 2 && *s >= 0x81 && *s <= 0xfe)
1684 if ((*(s + 1) >= 0x40 && *(s + 1) <= 0x7e) ||
1685 (*(s + 1) >= 0x80 && *(s + 1) <= 0xfe))
1698 const unsigned char *start = s;
1729 if ((*s & 0x80) == 0)
1735 else if ((*s & 0xe0) == 0xc0)
1737 else if ((*s & 0xf0) == 0xe0)
1739 else if ((*s & 0xf8) == 0xf0)
1821 #define ASC (END << BGN)
1823 #define L2A (CS1 << BGN)
1825 #define L3A (P3A << BGN)
1826 #define L3B (CS2 << BGN)
1827 #define L3C (P3B << BGN)
1829 #define L4A (P4A << BGN)
1830 #define L4B (CS3 << BGN)
1831 #define L4C (P4B << BGN)
1833 #define CR1 (END << CS1) | (CS1 << CS2) | (CS2 << CS3) | (CS1 << P3B) | (CS2 << P4B)
1834 #define CR2 (END << CS1) | (CS1 << CS2) | (CS2 << CS3) | (CS1 << P3B) | (CS2 << P4A)
1835 #define CR3 (END << CS1) | (CS1 << CS2) | (CS2 << CS3) | (CS1 << P3A) | (CS2 << P4A)
1917 const unsigned char *start = s;
1918 const int orig_len =
len;
1926 #define STRIDE_LENGTH (2 * sizeof(Vector8))
2024 if (a < 0x80 || a > 0xBF)
2029 if (a < 0x80 || a > 0xBF)
2037 if (a < 0xA0 || a > 0xBF)
2041 if (a < 0x80 || a > 0x9F)
2045 if (a < 0x90 || a > 0xBF)
2049 if (a < 0x80 || a > 0x8F)
2053 if (a < 0x80 || a > 0xBF)
2060 if (
a >= 0x80 &&
a < 0xC2)
#define IS_HIGHBIT_SET(ch)
Assert(fmt[strlen(fmt) - 1] !='\n')
static rewind_source * source
#define IS_LCPRV2_B_RANGE(c)
static bool is_valid_ascii(const unsigned char *s, int len)
#define IS_LCPRV1_A_RANGE(c)
#define PG_VALID_ENCODING(_enc)
#define IS_LCPRV1_B_RANGE(c)
#define IS_LCPRV2_A_RANGE(c)
size_t strnlen(const char *str, size_t maxlen)
static const struct mbinterval east_asian_fw[]
static const struct mbinterval nonspacing[]
static int pg_uhc_verifystr(const unsigned char *s, int len)
static int pg_latin1_dsplen(const unsigned char *s)
int pg_encoding_mblen_bounded(int encoding, const char *mbstr)
static int pg_euctw_mblen(const unsigned char *s)
static int pg_euckr_dsplen(const unsigned char *s)
static const uint32 Utf8Transition[256]
bool pg_utf8_islegal(const unsigned char *source, int length)
static int pg_ascii_verifystr(const unsigned char *s, int len)
static int pg_latin1_verifychar(const unsigned char *s, int len)
static int pg_sjis_dsplen(const unsigned char *s)
static int pg_sjis_verifychar(const unsigned char *s, int len)
static int pg_euc2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
static int pg_eucjp_dsplen(const unsigned char *s)
static int pg_ascii2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
static int pg_uhc_verifychar(const unsigned char *s, int len)
static int pg_gbk_dsplen(const unsigned char *s)
static int pg_euckr2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
static int pg_euctw2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
#define pg_euccn_verifychar
static int pg_sjis_verifystr(const unsigned char *s, int len)
static int pg_johab_mblen(const unsigned char *s)
unsigned char * unicode_to_utf8(pg_wchar c, unsigned char *utf8string)
static int pg_johab_dsplen(const unsigned char *s)
static int pg_big5_verifystr(const unsigned char *s, int len)
static int pg_mule_verifychar(const unsigned char *s, int len)
static int pg_eucjp2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
static int pg_latin1_verifystr(const unsigned char *s, int len)
static int pg_latin1_mblen(const unsigned char *s)
static int pg_ascii_verifychar(const unsigned char *s, int len)
static int pg_ascii_mblen(const unsigned char *s)
static int mbbisearch(pg_wchar ucs, const struct mbinterval *table, int max)
static int pg_big5_dsplen(const unsigned char *s)
#define pg_euccn_verifystr
int pg_utf_mblen(const unsigned char *s)
static int pg_eucjp_mblen(const unsigned char *s)
static int pg_gbk_verifychar(const unsigned char *s, int len)
static int pg_big5_mblen(const unsigned char *s)
static int pg_euccn_dsplen(const unsigned char *s)
static int pg_euctw_verifychar(const unsigned char *s, int len)
static int pg_euckr_verifychar(const unsigned char *s, int len)
static int pg_euctw_verifystr(const unsigned char *s, int len)
static int pg_gbk_verifystr(const unsigned char *s, int len)
static int pg_gb18030_dsplen(const unsigned char *s)
static int pg_utf2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
int pg_mule_mblen(const unsigned char *s)
static int pg_euccn_mblen(const unsigned char *s)
static int pg_gbk_mblen(const unsigned char *s)
static int pg_eucjp_verifystr(const unsigned char *s, int len)
static int pg_johab_verifystr(const unsigned char *s, int len)
static int pg_euc_dsplen(const unsigned char *s)
static int pg_gb18030_verifystr(const unsigned char *s, int len)
static int pg_euckr_verifystr(const unsigned char *s, int len)
static int pg_wchar2mule_with_len(const pg_wchar *from, unsigned char *to, int len)
static int pg_sjis_mblen(const unsigned char *s)
#define IS_EUC_RANGE_VALID(c)
pg_wchar utf8_to_unicode(const unsigned char *c)
static int pg_uhc_dsplen(const unsigned char *s)
static int pg_eucjp_verifychar(const unsigned char *s, int len)
static int pg_big5_verifychar(const unsigned char *s, int len)
static int pg_gb18030_verifychar(const unsigned char *s, int len)
static int pg_mule_verifystr(const unsigned char *s, int len)
static int pg_wchar2euc_with_len(const pg_wchar *from, unsigned char *to, int len)
static int pg_utf8_verifychar(const unsigned char *s, int len)
static int pg_wchar2single_with_len(const pg_wchar *from, unsigned char *to, int len)
static int pg_wchar2utf_with_len(const pg_wchar *from, unsigned char *to, int len)
static int pg_euccn2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
static int pg_gb18030_mblen(const unsigned char *s)
int pg_encoding_dsplen(int encoding, const char *mbstr)
static void utf8_advance(const unsigned char *s, uint32 *state, int len)
static int pg_euctw_dsplen(const unsigned char *s)
static int pg_latin12wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
static int pg_uhc_mblen(const unsigned char *s)
static int pg_euc_mblen(const unsigned char *s)
static int pg_mule_dsplen(const unsigned char *s)
int pg_encoding_verifymbstr(int encoding, const char *mbstr, int len)
static int pg_utf8_verifystr(const unsigned char *s, int len)
static int pg_euckr_mblen(const unsigned char *s)
const pg_wchar_tbl pg_wchar_table[]
static int pg_mule2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
int pg_encoding_max_length(int encoding)
int pg_encoding_mblen(int encoding, const char *mbstr)
static int pg_johab_verifychar(const unsigned char *s, int len)
static int pg_ascii_dsplen(const unsigned char *s)
int pg_encoding_verifymbchar(int encoding, const char *mbstr, int len)
static int ucs_wcwidth(pg_wchar ucs)
static int pg_utf_dsplen(const unsigned char *s)