PostgreSQL Source Code git master
Loading...
Searching...
No Matches
wchar.c File Reference
#include "c.h"
#include <limits.h>
#include "mb/pg_wchar.h"
#include "utils/ascii.h"
#include "common/unicode_nonspacing_table.h"
#include "common/unicode_east_asian_fw_table.h"
Include dependency graph for wchar.c:

Go to the source code of this file.

Data Structures

struct  mbinterval
 

Macros

#define NONUTF8_INVALID_BYTE0   (0x8d)
 
#define NONUTF8_INVALID_BYTE1   (' ')
 
#define MB2CHAR_NEED_AT_LEAST(len, need)   if ((len) < (need)) break
 
#define IS_EUC_RANGE_VALID(c)   ((c) >= 0xa1 && (c) <= 0xfe)
 
#define pg_euccn_verifychar   pg_euckr_verifychar
 
#define pg_euccn_verifystr   pg_euckr_verifystr
 
#define ERR   0
 
#define BGN   11
 
#define CS1   16
 
#define CS2   1
 
#define CS3   5
 
#define P3A   6 /* Lead was E0, check for 3-byte overlong */
 
#define P3B   20 /* Lead was ED, check for surrogate */
 
#define P4A   25 /* Lead was F0, check for 4-byte overlong */
 
#define P4B   30 /* Lead was F4, check for too-large */
 
#define END   BGN
 
#define ASC   (END << BGN)
 
#define L2A   (CS1 << BGN)
 
#define L3A   (P3A << BGN)
 
#define L3B   (CS2 << BGN)
 
#define L3C   (P3B << BGN)
 
#define L4A   (P4A << BGN)
 
#define L4B   (CS3 << BGN)
 
#define L4C   (P4B << BGN)
 
#define CR1   (END << CS1) | (CS1 << CS2) | (CS2 << CS3) | (CS1 << P3B) | (CS2 << P4B)
 
#define CR2   (END << CS1) | (CS1 << CS2) | (CS2 << CS3) | (CS1 << P3B) | (CS2 << P4A)
 
#define CR3   (END << CS1) | (CS1 << CS2) | (CS2 << CS3) | (CS1 << P3A) | (CS2 << P4A)
 
#define ILL   ERR
 
#define STRIDE_LENGTH   (2 * sizeof(Vector8))
 

Functions

static int pg_ascii2wchar_with_len (const unsigned char *from, pg_wchar *to, int len)
 
static int pg_ascii_mblen (const unsigned char *s)
 
static int pg_ascii_dsplen (const unsigned char *s)
 
static int pg_euc2wchar_with_len (const unsigned char *from, pg_wchar *to, int len)
 
static int pg_euc_mblen (const unsigned char *s)
 
static int pg_euc_dsplen (const unsigned char *s)
 
static int pg_eucjp2wchar_with_len (const unsigned char *from, pg_wchar *to, int len)
 
static int pg_eucjp_mblen (const unsigned char *s)
 
static int pg_eucjp_dsplen (const unsigned char *s)
 
static int pg_euckr2wchar_with_len (const unsigned char *from, pg_wchar *to, int len)
 
static int pg_euckr_mblen (const unsigned char *s)
 
static int pg_euckr_dsplen (const unsigned char *s)
 
static int pg_euccn2wchar_with_len (const unsigned char *from, pg_wchar *to, int len)
 
static int pg_euccn_mblen (const unsigned char *s)
 
static int pg_euccn_dsplen (const unsigned char *s)
 
static int pg_euctw2wchar_with_len (const unsigned char *from, pg_wchar *to, int len)
 
static int pg_euctw_mblen (const unsigned char *s)
 
static int pg_euctw_dsplen (const unsigned char *s)
 
static int pg_wchar2euc_with_len (const pg_wchar *from, unsigned char *to, int len)
 
static int pg_johab_mblen (const unsigned char *s)
 
static int pg_johab_dsplen (const unsigned char *s)
 
static int pg_utf2wchar_with_len (const unsigned char *from, pg_wchar *to, int len)
 
static int pg_wchar2utf_with_len (const pg_wchar *from, unsigned char *to, int len)
 
int pg_utf_mblen (const unsigned char *s)
 
static int mbbisearch (pg_wchar ucs, const struct mbinterval *table, int max)
 
static int ucs_wcwidth (pg_wchar ucs)
 
static int pg_utf_dsplen (const unsigned char *s)
 
static int pg_mule2wchar_with_len (const unsigned char *from, pg_wchar *to, int len)
 
static int pg_wchar2mule_with_len (const pg_wchar *from, unsigned char *to, int len)
 
int pg_mule_mblen (const unsigned char *s)
 
static int pg_mule_dsplen (const unsigned char *s)
 
static int pg_latin12wchar_with_len (const unsigned char *from, pg_wchar *to, int len)
 
static int pg_wchar2single_with_len (const pg_wchar *from, unsigned char *to, int len)
 
static int pg_latin1_mblen (const unsigned char *s)
 
static int pg_latin1_dsplen (const unsigned char *s)
 
static int pg_sjis_mblen (const unsigned char *s)
 
static int pg_sjis_dsplen (const unsigned char *s)
 
static int pg_big5_mblen (const unsigned char *s)
 
static int pg_big5_dsplen (const unsigned char *s)
 
static int pg_gbk_mblen (const unsigned char *s)
 
static int pg_gbk_dsplen (const unsigned char *s)
 
static int pg_uhc_mblen (const unsigned char *s)
 
static int pg_uhc_dsplen (const unsigned char *s)
 
static int pg_gb18030_mblen (const unsigned char *s)
 
static int pg_gb18030_dsplen (const unsigned char *s)
 
static int pg_ascii_verifychar (const unsigned char *s, int len)
 
static int pg_ascii_verifystr (const unsigned char *s, int len)
 
static int pg_eucjp_verifychar (const unsigned char *s, int len)
 
static int pg_eucjp_verifystr (const unsigned char *s, int len)
 
static int pg_euckr_verifychar (const unsigned char *s, int len)
 
static int pg_euckr_verifystr (const unsigned char *s, int len)
 
static int pg_euctw_verifychar (const unsigned char *s, int len)
 
static int pg_euctw_verifystr (const unsigned char *s, int len)
 
static int pg_johab_verifychar (const unsigned char *s, int len)
 
static int pg_johab_verifystr (const unsigned char *s, int len)
 
static int pg_mule_verifychar (const unsigned char *s, int len)
 
static int pg_mule_verifystr (const unsigned char *s, int len)
 
static int pg_latin1_verifychar (const unsigned char *s, int len)
 
static int pg_latin1_verifystr (const unsigned char *s, int len)
 
static int pg_sjis_verifychar (const unsigned char *s, int len)
 
static int pg_sjis_verifystr (const unsigned char *s, int len)
 
static int pg_big5_verifychar (const unsigned char *s, int len)
 
static int pg_big5_verifystr (const unsigned char *s, int len)
 
static int pg_gbk_verifychar (const unsigned char *s, int len)
 
static int pg_gbk_verifystr (const unsigned char *s, int len)
 
static int pg_uhc_verifychar (const unsigned char *s, int len)
 
static int pg_uhc_verifystr (const unsigned char *s, int len)
 
static int pg_gb18030_verifychar (const unsigned char *s, int len)
 
static int pg_gb18030_verifystr (const unsigned char *s, int len)
 
static int pg_utf8_verifychar (const unsigned char *s, int len)
 
static void utf8_advance (const unsigned char *s, uint32 *state, int len)
 
static int pg_utf8_verifystr (const unsigned char *s, int len)
 
bool pg_utf8_islegal (const unsigned char *source, int length)
 
void pg_encoding_set_invalid (int encoding, char *dst)
 
int pg_encoding_mblen (int encoding, const char *mbstr)
 
int pg_encoding_mblen_or_incomplete (int encoding, const char *mbstr, size_t remaining)
 
int pg_encoding_mblen_bounded (int encoding, const char *mbstr)
 
int pg_encoding_dsplen (int encoding, const char *mbstr)
 
int pg_encoding_verifymbchar (int encoding, const char *mbstr, int len)
 
int pg_encoding_verifymbstr (int encoding, const char *mbstr, int len)
 
int pg_encoding_max_length (int encoding)
 

Variables

static const uint32 Utf8Transition [256]
 
const pg_wchar_tbl pg_wchar_table []
 

Macro Definition Documentation

◆ ASC

#define ASC   (END << BGN)

Definition at line 1819 of file wchar.c.

◆ BGN

#define BGN   11

Definition at line 1803 of file wchar.c.

◆ CR1

#define CR1   (END << CS1) | (CS1 << CS2) | (CS2 << CS3) | (CS1 << P3B) | (CS2 << P4B)

Definition at line 1831 of file wchar.c.

◆ CR2

#define CR2   (END << CS1) | (CS1 << CS2) | (CS2 << CS3) | (CS1 << P3B) | (CS2 << P4A)

Definition at line 1832 of file wchar.c.

◆ CR3

#define CR3   (END << CS1) | (CS1 << CS2) | (CS2 << CS3) | (CS1 << P3A) | (CS2 << P4A)

Definition at line 1833 of file wchar.c.

◆ CS1

#define CS1   16

Definition at line 1805 of file wchar.c.

◆ CS2

#define CS2   1

Definition at line 1806 of file wchar.c.

◆ CS3

#define CS3   5

Definition at line 1807 of file wchar.c.

◆ END

#define END   BGN

Definition at line 1814 of file wchar.c.

◆ ERR

#define ERR   0

Definition at line 1801 of file wchar.c.

◆ ILL

#define ILL   ERR

Definition at line 1835 of file wchar.c.

◆ IS_EUC_RANGE_VALID

#define IS_EUC_RANGE_VALID (   c)    ((c) >= 0xa1 && (c) <= 0xfe)

Definition at line 1101 of file wchar.c.

◆ L2A

#define L2A   (CS1 << BGN)

Definition at line 1821 of file wchar.c.

◆ L3A

#define L3A   (P3A << BGN)

Definition at line 1823 of file wchar.c.

◆ L3B

#define L3B   (CS2 << BGN)

Definition at line 1824 of file wchar.c.

◆ L3C

#define L3C   (P3B << BGN)

Definition at line 1825 of file wchar.c.

◆ L4A

#define L4A   (P4A << BGN)

Definition at line 1827 of file wchar.c.

◆ L4B

#define L4B   (CS3 << BGN)

Definition at line 1828 of file wchar.c.

◆ L4C

#define L4C   (P4B << BGN)

Definition at line 1829 of file wchar.c.

◆ MB2CHAR_NEED_AT_LEAST

#define MB2CHAR_NEED_AT_LEAST (   len,
  need 
)    if ((len) < (need)) break

Definition at line 67 of file wchar.c.

◆ NONUTF8_INVALID_BYTE0

#define NONUTF8_INVALID_BYTE0   (0x8d)

Definition at line 36 of file wchar.c.

◆ NONUTF8_INVALID_BYTE1

#define NONUTF8_INVALID_BYTE1   (' ')

Definition at line 37 of file wchar.c.

◆ P3A

#define P3A   6 /* Lead was E0, check for 3-byte overlong */

Definition at line 1809 of file wchar.c.

◆ P3B

#define P3B   20 /* Lead was ED, check for surrogate */

Definition at line 1810 of file wchar.c.

◆ P4A

#define P4A   25 /* Lead was F0, check for 4-byte overlong */

Definition at line 1811 of file wchar.c.

◆ P4B

#define P4B   30 /* Lead was F4, check for too-large */

Definition at line 1812 of file wchar.c.

◆ pg_euccn_verifychar

#define pg_euccn_verifychar   pg_euckr_verifychar

Definition at line 1246 of file wchar.c.

◆ pg_euccn_verifystr

#define pg_euccn_verifystr   pg_euckr_verifystr

Definition at line 1247 of file wchar.c.

◆ STRIDE_LENGTH

#define STRIDE_LENGTH   (2 * sizeof(Vector8))

Function Documentation

◆ mbbisearch()

static int mbbisearch ( pg_wchar  ucs,
const struct mbinterval table,
int  max 
)
static

Definition at line 599 of file wchar.c.

600{
601 int min = 0;
602 int mid;
603
604 if (ucs < table[0].first || ucs > table[max].last)
605 return 0;
606 while (max >= min)
607 {
608 mid = (min + max) / 2;
609 if (ucs > table[mid].last)
610 min = mid + 1;
611 else if (ucs < table[mid].first)
612 max = mid - 1;
613 else
614 return 1;
615 }
616
617 return 0;
618}
static const struct lconv_member_info table[]
static int fb(int x)

References fb(), and table.

Referenced by ucs_wcwidth().

◆ pg_ascii2wchar_with_len()

static int pg_ascii2wchar_with_len ( const unsigned char from,
pg_wchar to,
int  len 
)
static

Definition at line 73 of file wchar.c.

74{
75 int cnt = 0;
76
77 while (len > 0 && *from)
78 {
79 *to++ = *from++;
80 len--;
81 cnt++;
82 }
83 *to = 0;
84 return cnt;
85}
const void size_t len

References len.

◆ pg_ascii_dsplen()

static int pg_ascii_dsplen ( const unsigned char s)
static

Definition at line 94 of file wchar.c.

95{
96 if (*s == '\0')
97 return 0;
98 if (*s < 0x20 || *s == 0x7f)
99 return -1;
100
101 return 1;
102}

Referenced by pg_big5_dsplen(), pg_euc_dsplen(), pg_euccn_dsplen(), pg_eucjp_dsplen(), pg_euctw_dsplen(), pg_gb18030_dsplen(), pg_gbk_dsplen(), pg_latin1_dsplen(), pg_sjis_dsplen(), and pg_uhc_dsplen().

◆ pg_ascii_mblen()

static int pg_ascii_mblen ( const unsigned char s)
static

Definition at line 88 of file wchar.c.

89{
90 return 1;
91}

◆ pg_ascii_verifychar()

static int pg_ascii_verifychar ( const unsigned char s,
int  len 
)
static

Definition at line 1085 of file wchar.c.

1086{
1087 return 1;
1088}

◆ pg_ascii_verifystr()

static int pg_ascii_verifystr ( const unsigned char s,
int  len 
)
static

Definition at line 1091 of file wchar.c.

1092{
1093 const unsigned char *nullpos = memchr(s, 0, len);
1094
1095 if (nullpos == NULL)
1096 return len;
1097 else
1098 return nullpos - s;
1099}

References fb(), and len.

◆ pg_big5_dsplen()

static int pg_big5_dsplen ( const unsigned char s)
static

Definition at line 956 of file wchar.c.

957{
958 int len;
959
960 if (IS_HIGHBIT_SET(*s))
961 len = 2; /* kanji? */
962 else
963 len = pg_ascii_dsplen(s); /* should be ASCII */
964 return len;
965}
#define IS_HIGHBIT_SET(ch)
Definition c.h:1172
static int pg_ascii_dsplen(const unsigned char *s)
Definition wchar.c:94

References IS_HIGHBIT_SET, len, and pg_ascii_dsplen().

◆ pg_big5_mblen()

static int pg_big5_mblen ( const unsigned char s)
static

Definition at line 944 of file wchar.c.

945{
946 int len;
947
948 if (IS_HIGHBIT_SET(*s))
949 len = 2; /* kanji? */
950 else
951 len = 1; /* should be ASCII */
952 return len;
953}

References IS_HIGHBIT_SET, and len.

Referenced by pg_big5_verifychar().

◆ pg_big5_verifychar()

static int pg_big5_verifychar ( const unsigned char s,
int  len 
)
static

Definition at line 1501 of file wchar.c.

1502{
1503 int l,
1504 mbl;
1505
1506 l = mbl = pg_big5_mblen(s);
1507
1508 if (len < l)
1509 return -1;
1510
1511 if (l == 2 &&
1512 s[0] == NONUTF8_INVALID_BYTE0 &&
1513 s[1] == NONUTF8_INVALID_BYTE1)
1514 return -1;
1515
1516 while (--l > 0)
1517 {
1518 if (*++s == '\0')
1519 return -1;
1520 }
1521
1522 return mbl;
1523}
#define NONUTF8_INVALID_BYTE0
Definition wchar.c:36
static int pg_big5_mblen(const unsigned char *s)
Definition wchar.c:944
#define NONUTF8_INVALID_BYTE1
Definition wchar.c:37

References fb(), len, NONUTF8_INVALID_BYTE0, NONUTF8_INVALID_BYTE1, and pg_big5_mblen().

Referenced by pg_big5_verifystr().

◆ pg_big5_verifystr()

static int pg_big5_verifystr ( const unsigned char s,
int  len 
)
static

Definition at line 1526 of file wchar.c.

1527{
1528 const unsigned char *start = s;
1529
1530 while (len > 0)
1531 {
1532 int l;
1533
1534 /* fast path for ASCII-subset characters */
1535 if (!IS_HIGHBIT_SET(*s))
1536 {
1537 if (*s == '\0')
1538 break;
1539 l = 1;
1540 }
1541 else
1542 {
1543 l = pg_big5_verifychar(s, len);
1544 if (l == -1)
1545 break;
1546 }
1547 s += l;
1548 len -= l;
1549 }
1550
1551 return s - start;
1552}
return str start
static int pg_big5_verifychar(const unsigned char *s, int len)
Definition wchar.c:1501

References IS_HIGHBIT_SET, len, pg_big5_verifychar(), and start.

◆ pg_encoding_dsplen()

int pg_encoding_dsplen ( int  encoding,
const char mbstr 
)

Definition at line 2198 of file wchar.c.

2199{
2200 return (PG_VALID_ENCODING(encoding) ?
2201 pg_wchar_table[encoding].dsplen((const unsigned char *) mbstr) :
2203}
static char * encoding
Definition initdb.c:139
@ PG_SQL_ASCII
Definition pg_wchar.h:226
#define PG_VALID_ENCODING(_enc)
Definition pg_wchar.h:287
const pg_wchar_tbl pg_wchar_table[]
Definition wchar.c:2086

References pg_wchar_tbl::dsplen, encoding, fb(), PG_SQL_ASCII, PG_VALID_ENCODING, and pg_wchar_table.

Referenced by PQdsplen(), and reportErrorPosition().

◆ pg_encoding_max_length()

int pg_encoding_max_length ( int  encoding)

Definition at line 2235 of file wchar.c.

2236{
2238
2239 /*
2240 * Check for the encoding despite the assert, due to some mingw versions
2241 * otherwise issuing bogus warnings.
2242 */
2243 return PG_VALID_ENCODING(encoding) ?
2246}
#define Assert(condition)
Definition c.h:885

References Assert, encoding, pg_wchar_tbl::maxmblen, PG_SQL_ASCII, PG_VALID_ENCODING, and pg_wchar_table.

Referenced by ascii(), chr(), CopyConvertBuf(), make_trigrams(), pg_encoding_mbcliplen(), pg_encoding_set_invalid(), pg_verify_mbstr_len(), reportErrorPosition(), test_enc_setup(), test_wchars_to_text(), and type_maximum_size().

◆ pg_encoding_mblen()

◆ pg_encoding_mblen_bounded()

int pg_encoding_mblen_bounded ( int  encoding,
const char mbstr 
)

Definition at line 2189 of file wchar.c.

2190{
2192}
int pg_encoding_mblen(int encoding, const char *mbstr)
Definition wchar.c:2157

References encoding, fb(), and pg_encoding_mblen().

◆ pg_encoding_mblen_or_incomplete()

int pg_encoding_mblen_or_incomplete ( int  encoding,
const char mbstr,
size_t  remaining 
)

Definition at line 2169 of file wchar.c.

2171{
2172 /*
2173 * Define zero remaining as too few, even for single-byte encodings.
2174 * pg_gb18030_mblen() reads one or two bytes; single-byte encodings read
2175 * zero; others read one.
2176 */
2177 if (remaining < 1 ||
2179 return INT_MAX;
2181}
int remaining
Definition informix.c:692
@ PG_GB18030
Definition pg_wchar.h:268

References encoding, fb(), IS_HIGHBIT_SET, pg_encoding_mblen(), PG_GB18030, and remaining.

Referenced by PQescapeInternal(), PQescapeStringInternal(), report_invalid_encoding(), and report_untranslatable_char().

◆ pg_encoding_set_invalid()

void pg_encoding_set_invalid ( int  encoding,
char dst 
)

Definition at line 2073 of file wchar.c.

2074{
2076
2077 dst[0] = (encoding == PG_UTF8 ? 0xc0 : NONUTF8_INVALID_BYTE0);
2079}
#define PG_UTF8
Definition mbprint.c:43
int pg_encoding_max_length(int encoding)
Definition wchar.c:2235

References Assert, encoding, fb(), NONUTF8_INVALID_BYTE0, NONUTF8_INVALID_BYTE1, pg_encoding_max_length(), and PG_UTF8.

Referenced by appendStringLiteral(), fmtIdEnc(), PQescapeStringInternal(), and test_enc_setup().

◆ pg_encoding_verifymbchar()

◆ pg_encoding_verifymbstr()

int pg_encoding_verifymbstr ( int  encoding,
const char mbstr,
int  len 
)

◆ pg_euc2wchar_with_len()

static int pg_euc2wchar_with_len ( const unsigned char from,
pg_wchar to,
int  len 
)
static

Definition at line 108 of file wchar.c.

109{
110 int cnt = 0;
111
112 while (len > 0 && *from)
113 {
114 if (*from == SS2) /* JIS X 0201 (so called "1 byte KANA") */
115 {
117 from++;
118 *to = (SS2 << 8) | *from++;
119 len -= 2;
120 }
121 else if (*from == SS3) /* JIS X 0212 KANJI */
122 {
124 from++;
125 *to = (SS3 << 16) | (*from++ << 8);
126 *to |= *from++;
127 len -= 3;
128 }
129 else if (IS_HIGHBIT_SET(*from)) /* JIS X 0208 KANJI */
130 {
132 *to = *from++ << 8;
133 *to |= *from++;
134 len -= 2;
135 }
136 else /* must be ASCII */
137 {
138 *to = *from++;
139 len--;
140 }
141 to++;
142 cnt++;
143 }
144 *to = 0;
145 return cnt;
146}
#define SS2
Definition pg_wchar.h:38
#define SS3
Definition pg_wchar.h:39
#define MB2CHAR_NEED_AT_LEAST(len, need)
Definition wchar.c:67

References IS_HIGHBIT_SET, len, MB2CHAR_NEED_AT_LEAST, SS2, and SS3.

Referenced by pg_eucjp2wchar_with_len(), and pg_euckr2wchar_with_len().

◆ pg_euc_dsplen()

static int pg_euc_dsplen ( const unsigned char s)
inlinestatic

Definition at line 165 of file wchar.c.

166{
167 int len;
168
169 if (*s == SS2)
170 len = 2;
171 else if (*s == SS3)
172 len = 2;
173 else if (IS_HIGHBIT_SET(*s))
174 len = 2;
175 else
176 len = pg_ascii_dsplen(s);
177 return len;
178}

References IS_HIGHBIT_SET, len, pg_ascii_dsplen(), SS2, and SS3.

Referenced by pg_euckr_dsplen(), and pg_johab_dsplen().

◆ pg_euc_mblen()

static int pg_euc_mblen ( const unsigned char s)
inlinestatic

Definition at line 149 of file wchar.c.

150{
151 int len;
152
153 if (*s == SS2)
154 len = 2;
155 else if (*s == SS3)
156 len = 3;
157 else if (IS_HIGHBIT_SET(*s))
158 len = 2;
159 else
160 len = 1;
161 return len;
162}

References IS_HIGHBIT_SET, len, SS2, and SS3.

Referenced by pg_eucjp_mblen(), pg_euckr_mblen(), and pg_johab_mblen().

◆ pg_euccn2wchar_with_len()

static int pg_euccn2wchar_with_len ( const unsigned char from,
pg_wchar to,
int  len 
)
static

Definition at line 237 of file wchar.c.

238{
239 int cnt = 0;
240
241 while (len > 0 && *from)
242 {
243 if (*from == SS2) /* code set 2 (unused?) */
244 {
246 from++;
247 *to = (SS2 << 16) | (*from++ << 8);
248 *to |= *from++;
249 len -= 3;
250 }
251 else if (*from == SS3) /* code set 3 (unused ?) */
252 {
254 from++;
255 *to = (SS3 << 16) | (*from++ << 8);
256 *to |= *from++;
257 len -= 3;
258 }
259 else if (IS_HIGHBIT_SET(*from)) /* code set 1 */
260 {
262 *to = *from++ << 8;
263 *to |= *from++;
264 len -= 2;
265 }
266 else
267 {
268 *to = *from++;
269 len--;
270 }
271 to++;
272 cnt++;
273 }
274 *to = 0;
275 return cnt;
276}

References IS_HIGHBIT_SET, len, MB2CHAR_NEED_AT_LEAST, SS2, and SS3.

◆ pg_euccn_dsplen()

static int pg_euccn_dsplen ( const unsigned char s)
static

Definition at line 301 of file wchar.c.

302{
303 int len;
304
305 if (IS_HIGHBIT_SET(*s))
306 len = 2;
307 else
308 len = pg_ascii_dsplen(s);
309 return len;
310}

References IS_HIGHBIT_SET, len, and pg_ascii_dsplen().

◆ pg_euccn_mblen()

static int pg_euccn_mblen ( const unsigned char s)
static

Definition at line 285 of file wchar.c.

286{
287 int len;
288
289 if (*s == SS2)
290 len = 3;
291 else if (*s == SS3)
292 len = 3;
293 else if (IS_HIGHBIT_SET(*s))
294 len = 2;
295 else
296 len = 1;
297 return len;
298}

References IS_HIGHBIT_SET, len, SS2, and SS3.

◆ pg_eucjp2wchar_with_len()

static int pg_eucjp2wchar_with_len ( const unsigned char from,
pg_wchar to,
int  len 
)
static

Definition at line 184 of file wchar.c.

185{
186 return pg_euc2wchar_with_len(from, to, len);
187}
static int pg_euc2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
Definition wchar.c:108

References len, and pg_euc2wchar_with_len().

◆ pg_eucjp_dsplen()

static int pg_eucjp_dsplen ( const unsigned char s)
static

Definition at line 196 of file wchar.c.

197{
198 int len;
199
200 if (*s == SS2)
201 len = 1;
202 else if (*s == SS3)
203 len = 2;
204 else if (IS_HIGHBIT_SET(*s))
205 len = 2;
206 else
207 len = pg_ascii_dsplen(s);
208 return len;
209}

References IS_HIGHBIT_SET, len, pg_ascii_dsplen(), SS2, and SS3.

◆ pg_eucjp_mblen()

static int pg_eucjp_mblen ( const unsigned char s)
static

Definition at line 190 of file wchar.c.

191{
192 return pg_euc_mblen(s);
193}
static int pg_euc_mblen(const unsigned char *s)
Definition wchar.c:149

References pg_euc_mblen().

◆ pg_eucjp_verifychar()

static int pg_eucjp_verifychar ( const unsigned char s,
int  len 
)
static

Definition at line 1104 of file wchar.c.

1105{
1106 int l;
1107 unsigned char c1,
1108 c2;
1109
1110 c1 = *s++;
1111
1112 switch (c1)
1113 {
1114 case SS2: /* JIS X 0201 */
1115 l = 2;
1116 if (l > len)
1117 return -1;
1118 c2 = *s++;
1119 if (c2 < 0xa1 || c2 > 0xdf)
1120 return -1;
1121 break;
1122
1123 case SS3: /* JIS X 0212 */
1124 l = 3;
1125 if (l > len)
1126 return -1;
1127 c2 = *s++;
1128 if (!IS_EUC_RANGE_VALID(c2))
1129 return -1;
1130 c2 = *s++;
1131 if (!IS_EUC_RANGE_VALID(c2))
1132 return -1;
1133 break;
1134
1135 default:
1136 if (IS_HIGHBIT_SET(c1)) /* JIS X 0208? */
1137 {
1138 l = 2;
1139 if (l > len)
1140 return -1;
1141 if (!IS_EUC_RANGE_VALID(c1))
1142 return -1;
1143 c2 = *s++;
1144 if (!IS_EUC_RANGE_VALID(c2))
1145 return -1;
1146 }
1147 else
1148 /* must be ASCII */
1149 {
1150 l = 1;
1151 }
1152 break;
1153 }
1154
1155 return l;
1156}
#define IS_EUC_RANGE_VALID(c)
Definition wchar.c:1101

References fb(), IS_EUC_RANGE_VALID, IS_HIGHBIT_SET, len, SS2, and SS3.

Referenced by pg_eucjp_verifystr().

◆ pg_eucjp_verifystr()

static int pg_eucjp_verifystr ( const unsigned char s,
int  len 
)
static

Definition at line 1159 of file wchar.c.

1160{
1161 const unsigned char *start = s;
1162
1163 while (len > 0)
1164 {
1165 int l;
1166
1167 /* fast path for ASCII-subset characters */
1168 if (!IS_HIGHBIT_SET(*s))
1169 {
1170 if (*s == '\0')
1171 break;
1172 l = 1;
1173 }
1174 else
1175 {
1176 l = pg_eucjp_verifychar(s, len);
1177 if (l == -1)
1178 break;
1179 }
1180 s += l;
1181 len -= l;
1182 }
1183
1184 return s - start;
1185}
static int pg_eucjp_verifychar(const unsigned char *s, int len)
Definition wchar.c:1104

References IS_HIGHBIT_SET, len, pg_eucjp_verifychar(), and start.

◆ pg_euckr2wchar_with_len()

static int pg_euckr2wchar_with_len ( const unsigned char from,
pg_wchar to,
int  len 
)
static

Definition at line 215 of file wchar.c.

216{
217 return pg_euc2wchar_with_len(from, to, len);
218}

References len, and pg_euc2wchar_with_len().

◆ pg_euckr_dsplen()

static int pg_euckr_dsplen ( const unsigned char s)
static

Definition at line 227 of file wchar.c.

228{
229 return pg_euc_dsplen(s);
230}
static int pg_euc_dsplen(const unsigned char *s)
Definition wchar.c:165

References pg_euc_dsplen().

◆ pg_euckr_mblen()

static int pg_euckr_mblen ( const unsigned char s)
static

Definition at line 221 of file wchar.c.

222{
223 return pg_euc_mblen(s);
224}

References pg_euc_mblen().

◆ pg_euckr_verifychar()

static int pg_euckr_verifychar ( const unsigned char s,
int  len 
)
static

Definition at line 1188 of file wchar.c.

1189{
1190 int l;
1191 unsigned char c1,
1192 c2;
1193
1194 c1 = *s++;
1195
1196 if (IS_HIGHBIT_SET(c1))
1197 {
1198 l = 2;
1199 if (l > len)
1200 return -1;
1201 if (!IS_EUC_RANGE_VALID(c1))
1202 return -1;
1203 c2 = *s++;
1204 if (!IS_EUC_RANGE_VALID(c2))
1205 return -1;
1206 }
1207 else
1208 /* must be ASCII */
1209 {
1210 l = 1;
1211 }
1212
1213 return l;
1214}

References fb(), IS_EUC_RANGE_VALID, IS_HIGHBIT_SET, and len.

Referenced by pg_euckr_verifystr().

◆ pg_euckr_verifystr()

static int pg_euckr_verifystr ( const unsigned char s,
int  len 
)
static

Definition at line 1217 of file wchar.c.

1218{
1219 const unsigned char *start = s;
1220
1221 while (len > 0)
1222 {
1223 int l;
1224
1225 /* fast path for ASCII-subset characters */
1226 if (!IS_HIGHBIT_SET(*s))
1227 {
1228 if (*s == '\0')
1229 break;
1230 l = 1;
1231 }
1232 else
1233 {
1234 l = pg_euckr_verifychar(s, len);
1235 if (l == -1)
1236 break;
1237 }
1238 s += l;
1239 len -= l;
1240 }
1241
1242 return s - start;
1243}
static int pg_euckr_verifychar(const unsigned char *s, int len)
Definition wchar.c:1188

References IS_HIGHBIT_SET, len, pg_euckr_verifychar(), and start.

◆ pg_euctw2wchar_with_len()

static int pg_euctw2wchar_with_len ( const unsigned char from,
pg_wchar to,
int  len 
)
static

Definition at line 317 of file wchar.c.

318{
319 int cnt = 0;
320
321 while (len > 0 && *from)
322 {
323 if (*from == SS2) /* code set 2 */
324 {
326 from++;
327 *to = (((uint32) SS2) << 24) | (*from++ << 16);
328 *to |= *from++ << 8;
329 *to |= *from++;
330 len -= 4;
331 }
332 else if (*from == SS3) /* code set 3 (unused?) */
333 {
335 from++;
336 *to = (SS3 << 16) | (*from++ << 8);
337 *to |= *from++;
338 len -= 3;
339 }
340 else if (IS_HIGHBIT_SET(*from)) /* code set 2 */
341 {
343 *to = *from++ << 8;
344 *to |= *from++;
345 len -= 2;
346 }
347 else
348 {
349 *to = *from++;
350 len--;
351 }
352 to++;
353 cnt++;
354 }
355 *to = 0;
356 return cnt;
357}
uint32_t uint32
Definition c.h:558

References IS_HIGHBIT_SET, len, MB2CHAR_NEED_AT_LEAST, SS2, and SS3.

◆ pg_euctw_dsplen()

static int pg_euctw_dsplen ( const unsigned char s)
static

Definition at line 376 of file wchar.c.

377{
378 int len;
379
380 if (*s == SS2)
381 len = 2;
382 else if (*s == SS3)
383 len = 2;
384 else if (IS_HIGHBIT_SET(*s))
385 len = 2;
386 else
387 len = pg_ascii_dsplen(s);
388 return len;
389}

References IS_HIGHBIT_SET, len, pg_ascii_dsplen(), SS2, and SS3.

◆ pg_euctw_mblen()

static int pg_euctw_mblen ( const unsigned char s)
static

Definition at line 360 of file wchar.c.

361{
362 int len;
363
364 if (*s == SS2)
365 len = 4;
366 else if (*s == SS3)
367 len = 3;
368 else if (IS_HIGHBIT_SET(*s))
369 len = 2;
370 else
371 len = 1;
372 return len;
373}

References IS_HIGHBIT_SET, len, SS2, and SS3.

◆ pg_euctw_verifychar()

static int pg_euctw_verifychar ( const unsigned char s,
int  len 
)
static

Definition at line 1250 of file wchar.c.

1251{
1252 int l;
1253 unsigned char c1,
1254 c2;
1255
1256 c1 = *s++;
1257
1258 switch (c1)
1259 {
1260 case SS2: /* CNS 11643 Plane 1-7 */
1261 l = 4;
1262 if (l > len)
1263 return -1;
1264 c2 = *s++;
1265 if (c2 < 0xa1 || c2 > 0xa7)
1266 return -1;
1267 c2 = *s++;
1268 if (!IS_EUC_RANGE_VALID(c2))
1269 return -1;
1270 c2 = *s++;
1271 if (!IS_EUC_RANGE_VALID(c2))
1272 return -1;
1273 break;
1274
1275 case SS3: /* unused */
1276 return -1;
1277
1278 default:
1279 if (IS_HIGHBIT_SET(c1)) /* CNS 11643 Plane 1 */
1280 {
1281 l = 2;
1282 if (l > len)
1283 return -1;
1284 /* no further range check on c1? */
1285 c2 = *s++;
1286 if (!IS_EUC_RANGE_VALID(c2))
1287 return -1;
1288 }
1289 else
1290 /* must be ASCII */
1291 {
1292 l = 1;
1293 }
1294 break;
1295 }
1296 return l;
1297}

References fb(), IS_EUC_RANGE_VALID, IS_HIGHBIT_SET, len, SS2, and SS3.

Referenced by pg_euctw_verifystr().

◆ pg_euctw_verifystr()

static int pg_euctw_verifystr ( const unsigned char s,
int  len 
)
static

Definition at line 1300 of file wchar.c.

1301{
1302 const unsigned char *start = s;
1303
1304 while (len > 0)
1305 {
1306 int l;
1307
1308 /* fast path for ASCII-subset characters */
1309 if (!IS_HIGHBIT_SET(*s))
1310 {
1311 if (*s == '\0')
1312 break;
1313 l = 1;
1314 }
1315 else
1316 {
1317 l = pg_euctw_verifychar(s, len);
1318 if (l == -1)
1319 break;
1320 }
1321 s += l;
1322 len -= l;
1323 }
1324
1325 return s - start;
1326}
static int pg_euctw_verifychar(const unsigned char *s, int len)
Definition wchar.c:1250

References IS_HIGHBIT_SET, len, pg_euctw_verifychar(), and start.

◆ pg_gb18030_dsplen()

static int pg_gb18030_dsplen ( const unsigned char s)
static

Definition at line 1051 of file wchar.c.

1052{
1053 int len;
1054
1055 if (IS_HIGHBIT_SET(*s))
1056 len = 2;
1057 else
1058 len = pg_ascii_dsplen(s); /* ASCII */
1059 return len;
1060}

References IS_HIGHBIT_SET, len, and pg_ascii_dsplen().

◆ pg_gb18030_mblen()

static int pg_gb18030_mblen ( const unsigned char s)
static

Definition at line 1037 of file wchar.c.

1038{
1039 int len;
1040
1041 if (!IS_HIGHBIT_SET(*s))
1042 len = 1; /* ASCII */
1043 else if (*(s + 1) >= 0x30 && *(s + 1) <= 0x39)
1044 len = 4;
1045 else
1046 len = 2;
1047 return len;
1048}

References IS_HIGHBIT_SET, and len.

◆ pg_gb18030_verifychar()

static int pg_gb18030_verifychar ( const unsigned char s,
int  len 
)
static

Definition at line 1663 of file wchar.c.

1664{
1665 int l;
1666
1667 if (!IS_HIGHBIT_SET(*s))
1668 l = 1; /* ASCII */
1669 else if (len >= 4 && *(s + 1) >= 0x30 && *(s + 1) <= 0x39)
1670 {
1671 /* Should be 4-byte, validate remaining bytes */
1672 if (*s >= 0x81 && *s <= 0xfe &&
1673 *(s + 2) >= 0x81 && *(s + 2) <= 0xfe &&
1674 *(s + 3) >= 0x30 && *(s + 3) <= 0x39)
1675 l = 4;
1676 else
1677 l = -1;
1678 }
1679 else if (len >= 2 && *s >= 0x81 && *s <= 0xfe)
1680 {
1681 /* Should be 2-byte, validate */
1682 if ((*(s + 1) >= 0x40 && *(s + 1) <= 0x7e) ||
1683 (*(s + 1) >= 0x80 && *(s + 1) <= 0xfe))
1684 l = 2;
1685 else
1686 l = -1;
1687 }
1688 else
1689 l = -1;
1690 return l;
1691}

References IS_HIGHBIT_SET, and len.

Referenced by pg_gb18030_verifystr().

◆ pg_gb18030_verifystr()

static int pg_gb18030_verifystr ( const unsigned char s,
int  len 
)
static

Definition at line 1694 of file wchar.c.

1695{
1696 const unsigned char *start = s;
1697
1698 while (len > 0)
1699 {
1700 int l;
1701
1702 /* fast path for ASCII-subset characters */
1703 if (!IS_HIGHBIT_SET(*s))
1704 {
1705 if (*s == '\0')
1706 break;
1707 l = 1;
1708 }
1709 else
1710 {
1711 l = pg_gb18030_verifychar(s, len);
1712 if (l == -1)
1713 break;
1714 }
1715 s += l;
1716 len -= l;
1717 }
1718
1719 return s - start;
1720}
static int pg_gb18030_verifychar(const unsigned char *s, int len)
Definition wchar.c:1663

References IS_HIGHBIT_SET, len, pg_gb18030_verifychar(), and start.

◆ pg_gbk_dsplen()

static int pg_gbk_dsplen ( const unsigned char s)
static

Definition at line 983 of file wchar.c.

984{
985 int len;
986
987 if (IS_HIGHBIT_SET(*s))
988 len = 2; /* kanji? */
989 else
990 len = pg_ascii_dsplen(s); /* should be ASCII */
991 return len;
992}

References IS_HIGHBIT_SET, len, and pg_ascii_dsplen().

◆ pg_gbk_mblen()

static int pg_gbk_mblen ( const unsigned char s)
static

Definition at line 971 of file wchar.c.

972{
973 int len;
974
975 if (IS_HIGHBIT_SET(*s))
976 len = 2; /* kanji? */
977 else
978 len = 1; /* should be ASCII */
979 return len;
980}

References IS_HIGHBIT_SET, and len.

Referenced by pg_gbk_verifychar().

◆ pg_gbk_verifychar()

static int pg_gbk_verifychar ( const unsigned char s,
int  len 
)
static

Definition at line 1555 of file wchar.c.

1556{
1557 int l,
1558 mbl;
1559
1560 l = mbl = pg_gbk_mblen(s);
1561
1562 if (len < l)
1563 return -1;
1564
1565 if (l == 2 &&
1566 s[0] == NONUTF8_INVALID_BYTE0 &&
1567 s[1] == NONUTF8_INVALID_BYTE1)
1568 return -1;
1569
1570 while (--l > 0)
1571 {
1572 if (*++s == '\0')
1573 return -1;
1574 }
1575
1576 return mbl;
1577}
static int pg_gbk_mblen(const unsigned char *s)
Definition wchar.c:971

References fb(), len, NONUTF8_INVALID_BYTE0, NONUTF8_INVALID_BYTE1, and pg_gbk_mblen().

Referenced by pg_gbk_verifystr().

◆ pg_gbk_verifystr()

static int pg_gbk_verifystr ( const unsigned char s,
int  len 
)
static

Definition at line 1580 of file wchar.c.

1581{
1582 const unsigned char *start = s;
1583
1584 while (len > 0)
1585 {
1586 int l;
1587
1588 /* fast path for ASCII-subset characters */
1589 if (!IS_HIGHBIT_SET(*s))
1590 {
1591 if (*s == '\0')
1592 break;
1593 l = 1;
1594 }
1595 else
1596 {
1597 l = pg_gbk_verifychar(s, len);
1598 if (l == -1)
1599 break;
1600 }
1601 s += l;
1602 len -= l;
1603 }
1604
1605 return s - start;
1606}
static int pg_gbk_verifychar(const unsigned char *s, int len)
Definition wchar.c:1555

References IS_HIGHBIT_SET, len, pg_gbk_verifychar(), and start.

◆ pg_johab_dsplen()

static int pg_johab_dsplen ( const unsigned char s)
static

Definition at line 450 of file wchar.c.

451{
452 return pg_euc_dsplen(s);
453}

References pg_euc_dsplen().

◆ pg_johab_mblen()

static int pg_johab_mblen ( const unsigned char s)
static

Definition at line 444 of file wchar.c.

445{
446 return pg_euc_mblen(s);
447}

References pg_euc_mblen().

Referenced by pg_johab_verifychar().

◆ pg_johab_verifychar()

static int pg_johab_verifychar ( const unsigned char s,
int  len 
)
static

Definition at line 1329 of file wchar.c.

1330{
1331 int l,
1332 mbl;
1333 unsigned char c;
1334
1335 l = mbl = pg_johab_mblen(s);
1336
1337 if (len < l)
1338 return -1;
1339
1340 if (!IS_HIGHBIT_SET(*s))
1341 return mbl;
1342
1343 while (--l > 0)
1344 {
1345 c = *++s;
1346 if (!IS_EUC_RANGE_VALID(c))
1347 return -1;
1348 }
1349 return mbl;
1350}
char * c
static int pg_johab_mblen(const unsigned char *s)
Definition wchar.c:444

References fb(), IS_EUC_RANGE_VALID, IS_HIGHBIT_SET, len, and pg_johab_mblen().

Referenced by pg_johab_verifystr().

◆ pg_johab_verifystr()

static int pg_johab_verifystr ( const unsigned char s,
int  len 
)
static

Definition at line 1353 of file wchar.c.

1354{
1355 const unsigned char *start = s;
1356
1357 while (len > 0)
1358 {
1359 int l;
1360
1361 /* fast path for ASCII-subset characters */
1362 if (!IS_HIGHBIT_SET(*s))
1363 {
1364 if (*s == '\0')
1365 break;
1366 l = 1;
1367 }
1368 else
1369 {
1370 l = pg_johab_verifychar(s, len);
1371 if (l == -1)
1372 break;
1373 }
1374 s += l;
1375 len -= l;
1376 }
1377
1378 return s - start;
1379}
static int pg_johab_verifychar(const unsigned char *s, int len)
Definition wchar.c:1329

References IS_HIGHBIT_SET, len, pg_johab_verifychar(), and start.

◆ pg_latin12wchar_with_len()

static int pg_latin12wchar_with_len ( const unsigned char from,
pg_wchar to,
int  len 
)
static

Definition at line 861 of file wchar.c.

862{
863 int cnt = 0;
864
865 while (len > 0 && *from)
866 {
867 *to++ = *from++;
868 len--;
869 cnt++;
870 }
871 *to = 0;
872 return cnt;
873}

References len.

◆ pg_latin1_dsplen()

static int pg_latin1_dsplen ( const unsigned char s)
static

Definition at line 904 of file wchar.c.

905{
906 return pg_ascii_dsplen(s);
907}

References pg_ascii_dsplen().

◆ pg_latin1_mblen()

static int pg_latin1_mblen ( const unsigned char s)
static

Definition at line 898 of file wchar.c.

899{
900 return 1;
901}

◆ pg_latin1_verifychar()

static int pg_latin1_verifychar ( const unsigned char s,
int  len 
)
static

Definition at line 1432 of file wchar.c.

1433{
1434 return 1;
1435}

◆ pg_latin1_verifystr()

static int pg_latin1_verifystr ( const unsigned char s,
int  len 
)
static

Definition at line 1438 of file wchar.c.

1439{
1440 const unsigned char *nullpos = memchr(s, 0, len);
1441
1442 if (nullpos == NULL)
1443 return len;
1444 else
1445 return nullpos - s;
1446}

References fb(), and len.

◆ pg_mule2wchar_with_len()

static int pg_mule2wchar_with_len ( const unsigned char from,
pg_wchar to,
int  len 
)
static

Definition at line 692 of file wchar.c.

693{
694 int cnt = 0;
695
696 while (len > 0 && *from)
697 {
698 if (IS_LC1(*from))
699 {
701 *to = *from++ << 16;
702 *to |= *from++;
703 len -= 2;
704 }
705 else if (IS_LCPRV1(*from))
706 {
708 from++;
709 *to = *from++ << 16;
710 *to |= *from++;
711 len -= 3;
712 }
713 else if (IS_LC2(*from))
714 {
716 *to = *from++ << 16;
717 *to |= *from++ << 8;
718 *to |= *from++;
719 len -= 3;
720 }
721 else if (IS_LCPRV2(*from))
722 {
724 from++;
725 *to = *from++ << 16;
726 *to |= *from++ << 8;
727 *to |= *from++;
728 len -= 4;
729 }
730 else
731 { /* assume ASCII */
732 *to = (unsigned char) *from++;
733 len--;
734 }
735 to++;
736 cnt++;
737 }
738 *to = 0;
739 return cnt;
740}
#define IS_LCPRV2(c)
Definition pg_wchar.h:164
#define IS_LC2(c)
Definition pg_wchar.h:144
#define IS_LCPRV1(c)
Definition pg_wchar.h:152
#define IS_LC1(c)
Definition pg_wchar.h:126

References fb(), IS_LC1, IS_LC2, IS_LCPRV1, IS_LCPRV2, len, and MB2CHAR_NEED_AT_LEAST.

◆ pg_mule_dsplen()

static int pg_mule_dsplen ( const unsigned char s)
static

Definition at line 833 of file wchar.c.

834{
835 int len;
836
837 /*
838 * Note: it's not really appropriate to assume that all multibyte charsets
839 * are double-wide on screen. But this seems an okay approximation for
840 * the MULE charsets we currently support.
841 */
842
843 if (IS_LC1(*s))
844 len = 1;
845 else if (IS_LCPRV1(*s))
846 len = 1;
847 else if (IS_LC2(*s))
848 len = 2;
849 else if (IS_LCPRV2(*s))
850 len = 2;
851 else
852 len = 1; /* assume ASCII */
853
854 return len;
855}

References IS_LC1, IS_LC2, IS_LCPRV1, IS_LCPRV2, and len.

◆ pg_mule_mblen()

int pg_mule_mblen ( const unsigned char s)

Definition at line 815 of file wchar.c.

816{
817 int len;
818
819 if (IS_LC1(*s))
820 len = 2;
821 else if (IS_LCPRV1(*s))
822 len = 3;
823 else if (IS_LC2(*s))
824 len = 3;
825 else if (IS_LCPRV2(*s))
826 len = 4;
827 else
828 len = 1; /* assume ASCII */
829 return len;
830}

References IS_LC1, IS_LC2, IS_LCPRV1, IS_LCPRV2, and len.

Referenced by mic2latin(), mic2latin_with_table(), and pg_mule_verifychar().

◆ pg_mule_verifychar()

static int pg_mule_verifychar ( const unsigned char s,
int  len 
)
static

Definition at line 1382 of file wchar.c.

1383{
1384 int l,
1385 mbl;
1386 unsigned char c;
1387
1388 l = mbl = pg_mule_mblen(s);
1389
1390 if (len < l)
1391 return -1;
1392
1393 while (--l > 0)
1394 {
1395 c = *++s;
1396 if (!IS_HIGHBIT_SET(c))
1397 return -1;
1398 }
1399 return mbl;
1400}
int pg_mule_mblen(const unsigned char *s)
Definition wchar.c:815

References fb(), IS_HIGHBIT_SET, len, and pg_mule_mblen().

Referenced by pg_mule_verifystr().

◆ pg_mule_verifystr()

static int pg_mule_verifystr ( const unsigned char s,
int  len 
)
static

Definition at line 1403 of file wchar.c.

1404{
1405 const unsigned char *start = s;
1406
1407 while (len > 0)
1408 {
1409 int l;
1410
1411 /* fast path for ASCII-subset characters */
1412 if (!IS_HIGHBIT_SET(*s))
1413 {
1414 if (*s == '\0')
1415 break;
1416 l = 1;
1417 }
1418 else
1419 {
1420 l = pg_mule_verifychar(s, len);
1421 if (l == -1)
1422 break;
1423 }
1424 s += l;
1425 len -= l;
1426 }
1427
1428 return s - start;
1429}
static int pg_mule_verifychar(const unsigned char *s, int len)
Definition wchar.c:1382

References IS_HIGHBIT_SET, len, pg_mule_verifychar(), and start.

◆ pg_sjis_dsplen()

static int pg_sjis_dsplen ( const unsigned char s)
static

Definition at line 927 of file wchar.c.

928{
929 int len;
930
931 if (*s >= 0xa1 && *s <= 0xdf)
932 len = 1; /* 1 byte kana? */
933 else if (IS_HIGHBIT_SET(*s))
934 len = 2; /* kanji? */
935 else
936 len = pg_ascii_dsplen(s); /* should be ASCII */
937 return len;
938}

References IS_HIGHBIT_SET, len, and pg_ascii_dsplen().

◆ pg_sjis_mblen()

static int pg_sjis_mblen ( const unsigned char s)
static

Definition at line 913 of file wchar.c.

914{
915 int len;
916
917 if (*s >= 0xa1 && *s <= 0xdf)
918 len = 1; /* 1 byte kana? */
919 else if (IS_HIGHBIT_SET(*s))
920 len = 2; /* kanji? */
921 else
922 len = 1; /* should be ASCII */
923 return len;
924}

References IS_HIGHBIT_SET, and len.

Referenced by pg_sjis_verifychar().

◆ pg_sjis_verifychar()

static int pg_sjis_verifychar ( const unsigned char s,
int  len 
)
static

Definition at line 1449 of file wchar.c.

1450{
1451 int l,
1452 mbl;
1453 unsigned char c1,
1454 c2;
1455
1456 l = mbl = pg_sjis_mblen(s);
1457
1458 if (len < l)
1459 return -1;
1460
1461 if (l == 1) /* pg_sjis_mblen already verified it */
1462 return mbl;
1463
1464 c1 = *s++;
1465 c2 = *s;
1466 if (!ISSJISHEAD(c1) || !ISSJISTAIL(c2))
1467 return -1;
1468 return mbl;
1469}
#define ISSJISTAIL(c)
Definition pg_wchar.h:45
#define ISSJISHEAD(c)
Definition pg_wchar.h:44
static int pg_sjis_mblen(const unsigned char *s)
Definition wchar.c:913

References fb(), ISSJISHEAD, ISSJISTAIL, len, and pg_sjis_mblen().

Referenced by pg_sjis_verifystr().

◆ pg_sjis_verifystr()

static int pg_sjis_verifystr ( const unsigned char s,
int  len 
)
static

Definition at line 1472 of file wchar.c.

1473{
1474 const unsigned char *start = s;
1475
1476 while (len > 0)
1477 {
1478 int l;
1479
1480 /* fast path for ASCII-subset characters */
1481 if (!IS_HIGHBIT_SET(*s))
1482 {
1483 if (*s == '\0')
1484 break;
1485 l = 1;
1486 }
1487 else
1488 {
1489 l = pg_sjis_verifychar(s, len);
1490 if (l == -1)
1491 break;
1492 }
1493 s += l;
1494 len -= l;
1495 }
1496
1497 return s - start;
1498}
static int pg_sjis_verifychar(const unsigned char *s, int len)
Definition wchar.c:1449

References IS_HIGHBIT_SET, len, pg_sjis_verifychar(), and start.

◆ pg_uhc_dsplen()

static int pg_uhc_dsplen ( const unsigned char s)
static

Definition at line 1010 of file wchar.c.

1011{
1012 int len;
1013
1014 if (IS_HIGHBIT_SET(*s))
1015 len = 2; /* 2byte? */
1016 else
1017 len = pg_ascii_dsplen(s); /* should be ASCII */
1018 return len;
1019}

References IS_HIGHBIT_SET, len, and pg_ascii_dsplen().

◆ pg_uhc_mblen()

static int pg_uhc_mblen ( const unsigned char s)
static

Definition at line 998 of file wchar.c.

999{
1000 int len;
1001
1002 if (IS_HIGHBIT_SET(*s))
1003 len = 2; /* 2byte? */
1004 else
1005 len = 1; /* should be ASCII */
1006 return len;
1007}

References IS_HIGHBIT_SET, and len.

Referenced by pg_uhc_verifychar().

◆ pg_uhc_verifychar()

static int pg_uhc_verifychar ( const unsigned char s,
int  len 
)
static

Definition at line 1609 of file wchar.c.

1610{
1611 int l,
1612 mbl;
1613
1614 l = mbl = pg_uhc_mblen(s);
1615
1616 if (len < l)
1617 return -1;
1618
1619 if (l == 2 &&
1620 s[0] == NONUTF8_INVALID_BYTE0 &&
1621 s[1] == NONUTF8_INVALID_BYTE1)
1622 return -1;
1623
1624 while (--l > 0)
1625 {
1626 if (*++s == '\0')
1627 return -1;
1628 }
1629
1630 return mbl;
1631}
static int pg_uhc_mblen(const unsigned char *s)
Definition wchar.c:998

References fb(), len, NONUTF8_INVALID_BYTE0, NONUTF8_INVALID_BYTE1, and pg_uhc_mblen().

Referenced by pg_uhc_verifystr().

◆ pg_uhc_verifystr()

static int pg_uhc_verifystr ( const unsigned char s,
int  len 
)
static

Definition at line 1634 of file wchar.c.

1635{
1636 const unsigned char *start = s;
1637
1638 while (len > 0)
1639 {
1640 int l;
1641
1642 /* fast path for ASCII-subset characters */
1643 if (!IS_HIGHBIT_SET(*s))
1644 {
1645 if (*s == '\0')
1646 break;
1647 l = 1;
1648 }
1649 else
1650 {
1651 l = pg_uhc_verifychar(s, len);
1652 if (l == -1)
1653 break;
1654 }
1655 s += l;
1656 len -= l;
1657 }
1658
1659 return s - start;
1660}
static int pg_uhc_verifychar(const unsigned char *s, int len)
Definition wchar.c:1609

References IS_HIGHBIT_SET, len, pg_uhc_verifychar(), and start.

◆ pg_utf2wchar_with_len()

static int pg_utf2wchar_with_len ( const unsigned char from,
pg_wchar to,
int  len 
)
static

Definition at line 462 of file wchar.c.

463{
464 int cnt = 0;
465 uint32 c1,
466 c2,
467 c3,
468 c4;
469
470 while (len > 0 && *from)
471 {
472 if ((*from & 0x80) == 0)
473 {
474 *to = *from++;
475 len--;
476 }
477 else if ((*from & 0xe0) == 0xc0)
478 {
480 c1 = *from++ & 0x1f;
481 c2 = *from++ & 0x3f;
482 *to = (c1 << 6) | c2;
483 len -= 2;
484 }
485 else if ((*from & 0xf0) == 0xe0)
486 {
488 c1 = *from++ & 0x0f;
489 c2 = *from++ & 0x3f;
490 c3 = *from++ & 0x3f;
491 *to = (c1 << 12) | (c2 << 6) | c3;
492 len -= 3;
493 }
494 else if ((*from & 0xf8) == 0xf0)
495 {
497 c1 = *from++ & 0x07;
498 c2 = *from++ & 0x3f;
499 c3 = *from++ & 0x3f;
500 c4 = *from++ & 0x3f;
501 *to = (c1 << 18) | (c2 << 12) | (c3 << 6) | c4;
502 len -= 4;
503 }
504 else
505 {
506 /* treat a bogus char as length 1; not ours to raise error */
507 *to = *from++;
508 len--;
509 }
510 to++;
511 cnt++;
512 }
513 *to = 0;
514 return cnt;
515}

References fb(), len, and MB2CHAR_NEED_AT_LEAST.

◆ pg_utf8_islegal()

bool pg_utf8_islegal ( const unsigned char source,
int  length 
)

Definition at line 2011 of file wchar.c.

2012{
2013 unsigned char a;
2014
2015 switch (length)
2016 {
2017 default:
2018 /* reject lengths 5 and 6 for now */
2019 return false;
2020 case 4:
2021 a = source[3];
2022 if (a < 0x80 || a > 0xBF)
2023 return false;
2025 case 3:
2026 a = source[2];
2027 if (a < 0x80 || a > 0xBF)
2028 return false;
2030 case 2:
2031 a = source[1];
2032 switch (*source)
2033 {
2034 case 0xE0:
2035 if (a < 0xA0 || a > 0xBF)
2036 return false;
2037 break;
2038 case 0xED:
2039 if (a < 0x80 || a > 0x9F)
2040 return false;
2041 break;
2042 case 0xF0:
2043 if (a < 0x90 || a > 0xBF)
2044 return false;
2045 break;
2046 case 0xF4:
2047 if (a < 0x80 || a > 0x8F)
2048 return false;
2049 break;
2050 default:
2051 if (a < 0x80 || a > 0xBF)
2052 return false;
2053 break;
2054 }
2056 case 1:
2057 a = *source;
2058 if (a >= 0x80 && a < 0xC2)
2059 return false;
2060 if (a > 0xF4)
2061 return false;
2062 break;
2063 }
2064 return true;
2065}
#define pg_fallthrough
Definition c.h:144
int a
Definition isn.c:73
static rewind_source * source
Definition pg_rewind.c:89

References a, fb(), pg_fallthrough, and source.

Referenced by chr(), pg_utf8_string_len(), pg_utf8_verifychar(), utf8_to_iso8859_1(), and UtfToLocal().

◆ pg_utf8_verifychar()

static int pg_utf8_verifychar ( const unsigned char s,
int  len 
)
static

Definition at line 1723 of file wchar.c.

1724{
1725 int l;
1726
1727 if ((*s & 0x80) == 0)
1728 {
1729 if (*s == '\0')
1730 return -1;
1731 return 1;
1732 }
1733 else if ((*s & 0xe0) == 0xc0)
1734 l = 2;
1735 else if ((*s & 0xf0) == 0xe0)
1736 l = 3;
1737 else if ((*s & 0xf8) == 0xf0)
1738 l = 4;
1739 else
1740 l = 1;
1741
1742 if (l > len)
1743 return -1;
1744
1745 if (!pg_utf8_islegal(s, l))
1746 return -1;
1747
1748 return l;
1749}
bool pg_utf8_islegal(const unsigned char *source, int length)
Definition wchar.c:2011

References len, and pg_utf8_islegal().

Referenced by pg_utf8_verifystr().

◆ pg_utf8_verifystr()

static int pg_utf8_verifystr ( const unsigned char s,
int  len 
)
static

Definition at line 1913 of file wchar.c.

1914{
1915 const unsigned char *start = s;
1916 const int orig_len = len;
1917 uint32 state = BGN;
1918
1919/*
1920 * With a stride of two vector widths, gcc will unroll the loop. Even if
1921 * the compiler can unroll a longer loop, it's not worth it because we
1922 * must fall back to the byte-wise algorithm if we find any non-ASCII.
1923 */
1924#define STRIDE_LENGTH (2 * sizeof(Vector8))
1925
1926 if (len >= STRIDE_LENGTH)
1927 {
1928 while (len >= STRIDE_LENGTH)
1929 {
1930 /*
1931 * If the chunk is all ASCII, we can skip the full UTF-8 check,
1932 * but we must first check for a non-END state, which means the
1933 * previous chunk ended in the middle of a multibyte sequence.
1934 */
1935 if (state != END || !is_valid_ascii(s, STRIDE_LENGTH))
1937
1938 s += STRIDE_LENGTH;
1939 len -= STRIDE_LENGTH;
1940 }
1941
1942 /* The error state persists, so we only need to check for it here. */
1943 if (state == ERR)
1944 {
1945 /*
1946 * Start over from the beginning with the slow path so we can
1947 * count the valid bytes.
1948 */
1949 len = orig_len;
1950 s = start;
1951 }
1952 else if (state != END)
1953 {
1954 /*
1955 * The fast path exited in the middle of a multibyte sequence.
1956 * Walk backwards to find the leading byte so that the slow path
1957 * can resume checking from there. We must always backtrack at
1958 * least one byte, since the current byte could be e.g. an ASCII
1959 * byte after a 2-byte lead, which is invalid.
1960 */
1961 do
1962 {
1963 Assert(s > start);
1964 s--;
1965 len++;
1967 } while (pg_utf_mblen(s) <= 1);
1968 }
1969 }
1970
1971 /* check remaining bytes */
1972 while (len > 0)
1973 {
1974 int l;
1975
1976 /* fast path for ASCII-subset characters */
1977 if (!IS_HIGHBIT_SET(*s))
1978 {
1979 if (*s == '\0')
1980 break;
1981 l = 1;
1982 }
1983 else
1984 {
1985 l = pg_utf8_verifychar(s, len);
1986 if (l == -1)
1987 break;
1988 }
1989 s += l;
1990 len -= l;
1991 }
1992
1993 return s - start;
1994}
static bool is_valid_ascii(const unsigned char *s, int len)
Definition ascii.h:25
#define pg_utf_mblen
Definition pg_wchar.h:633
#define END
Definition wchar.c:1814
#define ERR
Definition wchar.c:1801
static int pg_utf8_verifychar(const unsigned char *s, int len)
Definition wchar.c:1723
static void utf8_advance(const unsigned char *s, uint32 *state, int len)
Definition wchar.c:1895
#define BGN
Definition wchar.c:1803
#define STRIDE_LENGTH

References Assert, BGN, END, ERR, fb(), IS_HIGHBIT_SET, is_valid_ascii(), len, pg_utf8_verifychar(), pg_utf_mblen, start, STRIDE_LENGTH, and utf8_advance().

◆ pg_utf_dsplen()

static int pg_utf_dsplen ( const unsigned char s)
static

Definition at line 680 of file wchar.c.

681{
682 return ucs_wcwidth(utf8_to_unicode(s));
683}
static char32_t utf8_to_unicode(const unsigned char *c)
Definition mbprint.c:53
static int ucs_wcwidth(pg_wchar ucs)
Definition wchar.c:646

References ucs_wcwidth(), and utf8_to_unicode().

◆ pg_utf_mblen()

int pg_utf_mblen ( const unsigned char s)

Definition at line 556 of file wchar.c.

557{
558 int len;
559
560 if ((*s & 0x80) == 0)
561 len = 1;
562 else if ((*s & 0xe0) == 0xc0)
563 len = 2;
564 else if ((*s & 0xf0) == 0xe0)
565 len = 3;
566 else if ((*s & 0xf8) == 0xf0)
567 len = 4;
568#ifdef NOT_USED
569 else if ((*s & 0xfc) == 0xf8)
570 len = 5;
571 else if ((*s & 0xfe) == 0xfc)
572 len = 6;
573#endif
574 else
575 len = 1;
576 return len;
577}

References len.

◆ pg_wchar2euc_with_len()

static int pg_wchar2euc_with_len ( const pg_wchar from,
unsigned char to,
int  len 
)
static

Definition at line 398 of file wchar.c.

399{
400 int cnt = 0;
401
402 while (len > 0 && *from)
403 {
404 unsigned char c;
405
406 if ((c = (*from >> 24)))
407 {
408 *to++ = c;
409 *to++ = (*from >> 16) & 0xff;
410 *to++ = (*from >> 8) & 0xff;
411 *to++ = *from & 0xff;
412 cnt += 4;
413 }
414 else if ((c = (*from >> 16)))
415 {
416 *to++ = c;
417 *to++ = (*from >> 8) & 0xff;
418 *to++ = *from & 0xff;
419 cnt += 3;
420 }
421 else if ((c = (*from >> 8)))
422 {
423 *to++ = c;
424 *to++ = *from & 0xff;
425 cnt += 2;
426 }
427 else
428 {
429 *to++ = *from;
430 cnt++;
431 }
432 from++;
433 len--;
434 }
435 *to = 0;
436 return cnt;
437}

References len.

◆ pg_wchar2mule_with_len()

static int pg_wchar2mule_with_len ( const pg_wchar from,
unsigned char to,
int  len 
)
static

Definition at line 749 of file wchar.c.

750{
751 int cnt = 0;
752
753 while (len > 0 && *from)
754 {
755 unsigned char lb;
756
757 lb = (*from >> 16) & 0xff;
758 if (IS_LC1(lb))
759 {
760 *to++ = lb;
761 *to++ = *from & 0xff;
762 cnt += 2;
763 }
764 else if (IS_LC2(lb))
765 {
766 *to++ = lb;
767 *to++ = (*from >> 8) & 0xff;
768 *to++ = *from & 0xff;
769 cnt += 3;
770 }
771 else if (IS_LCPRV1_A_RANGE(lb))
772 {
773 *to++ = LCPRV1_A;
774 *to++ = lb;
775 *to++ = *from & 0xff;
776 cnt += 3;
777 }
778 else if (IS_LCPRV1_B_RANGE(lb))
779 {
780 *to++ = LCPRV1_B;
781 *to++ = lb;
782 *to++ = *from & 0xff;
783 cnt += 3;
784 }
785 else if (IS_LCPRV2_A_RANGE(lb))
786 {
787 *to++ = LCPRV2_A;
788 *to++ = lb;
789 *to++ = (*from >> 8) & 0xff;
790 *to++ = *from & 0xff;
791 cnt += 4;
792 }
793 else if (IS_LCPRV2_B_RANGE(lb))
794 {
795 *to++ = LCPRV2_B;
796 *to++ = lb;
797 *to++ = (*from >> 8) & 0xff;
798 *to++ = *from & 0xff;
799 cnt += 4;
800 }
801 else
802 {
803 *to++ = *from & 0xff;
804 cnt += 1;
805 }
806 from++;
807 len--;
808 }
809 *to = 0;
810 return cnt;
811}
#define LCPRV1_A
Definition pg_wchar.h:150
#define LCPRV1_B
Definition pg_wchar.h:151
#define LCPRV2_A
Definition pg_wchar.h:162
#define IS_LCPRV2_B_RANGE(c)
Definition pg_wchar.h:167
#define IS_LCPRV1_A_RANGE(c)
Definition pg_wchar.h:153
#define IS_LCPRV1_B_RANGE(c)
Definition pg_wchar.h:155
#define IS_LCPRV2_A_RANGE(c)
Definition pg_wchar.h:165
#define LCPRV2_B
Definition pg_wchar.h:163

References IS_LC1, IS_LC2, IS_LCPRV1_A_RANGE, IS_LCPRV1_B_RANGE, IS_LCPRV2_A_RANGE, IS_LCPRV2_B_RANGE, LCPRV1_A, LCPRV1_B, LCPRV2_A, LCPRV2_B, and len.

◆ pg_wchar2single_with_len()

static int pg_wchar2single_with_len ( const pg_wchar from,
unsigned char to,
int  len 
)
static

Definition at line 883 of file wchar.c.

884{
885 int cnt = 0;
886
887 while (len > 0 && *from)
888 {
889 *to++ = *from++;
890 len--;
891 cnt++;
892 }
893 *to = 0;
894 return cnt;
895}

References len.

◆ pg_wchar2utf_with_len()

static int pg_wchar2utf_with_len ( const pg_wchar from,
unsigned char to,
int  len 
)
static

Definition at line 525 of file wchar.c.

526{
527 int cnt = 0;
528
529 while (len > 0 && *from)
530 {
531 int char_len;
532
533 unicode_to_utf8(*from, to);
535 cnt += char_len;
536 to += char_len;
537 from++;
538 len--;
539 }
540 *to = 0;
541 return cnt;
542}
static unsigned char * unicode_to_utf8(char32_t c, unsigned char *utf8string)
Definition pg_wchar.h:575

References fb(), len, pg_utf_mblen, and unicode_to_utf8().

◆ ucs_wcwidth()

static int ucs_wcwidth ( pg_wchar  ucs)
static

Definition at line 646 of file wchar.c.

647{
650
651 /* test for 8-bit control characters */
652 if (ucs == 0)
653 return 0;
654
655 if (ucs < 0x20 || (ucs >= 0x7f && ucs < 0xa0) || ucs > 0x0010ffff)
656 return -1;
657
658 /*
659 * binary search in table of non-spacing characters
660 *
661 * XXX: In the official Unicode sources, it is possible for a character to
662 * be described as both non-spacing and wide at the same time. As of
663 * Unicode 13.0, treating the non-spacing property as the determining
664 * factor for display width leads to the correct behavior, so do that
665 * search first.
666 */
668 sizeof(nonspacing) / sizeof(struct mbinterval) - 1))
669 return 0;
670
671 /* binary search in table of wide characters */
673 sizeof(east_asian_fw) / sizeof(struct mbinterval) - 1))
674 return 2;
675
676 return 1;
677}
static const struct mbinterval east_asian_fw[]
static const struct mbinterval nonspacing[]
static int mbbisearch(pg_wchar ucs, const struct mbinterval *table, int max)
Definition wchar.c:599

References east_asian_fw, fb(), mbbisearch(), and nonspacing.

Referenced by pg_utf_dsplen().

◆ utf8_advance()

static void utf8_advance ( const unsigned char s,
uint32 state,
int  len 
)
static

Definition at line 1895 of file wchar.c.

1896{
1897 /* Note: We deliberately don't check the state's value here. */
1898 while (len > 0)
1899 {
1900 /*
1901 * It's important that the mask value is 31: In most instruction sets,
1902 * a shift by a 32-bit operand is understood to be a shift by its mod
1903 * 32, so the compiler should elide the mask operation.
1904 */
1905 *state = Utf8Transition[*s++] >> (*state & 31);
1906 len--;
1907 }
1908
1909 *state &= 31;
1910}
static const uint32 Utf8Transition[256]
Definition wchar.c:1837

References len, and Utf8Transition.

Referenced by pg_utf8_verifystr().

Variable Documentation

◆ pg_wchar_table

const pg_wchar_tbl pg_wchar_table[]

Definition at line 2086 of file wchar.c.

2086 {
2129};
@ PG_WIN1254
Definition pg_wchar.h:257
@ PG_LATIN4
Definition pg_wchar.h:237
@ PG_LATIN9
Definition pg_wchar.h:242
@ PG_JOHAB
Definition pg_wchar.h:269
@ PG_KOI8R
Definition pg_wchar.h:248
@ PG_ISO_8859_6
Definition pg_wchar.h:252
@ PG_WIN1253
Definition pg_wchar.h:256
@ PG_KOI8U
Definition pg_wchar.h:260
@ PG_LATIN6
Definition pg_wchar.h:239
@ PG_MULE_INTERNAL
Definition pg_wchar.h:233
@ PG_LATIN5
Definition pg_wchar.h:238
@ PG_EUC_CN
Definition pg_wchar.h:228
@ PG_UHC
Definition pg_wchar.h:267
@ PG_LATIN2
Definition pg_wchar.h:235
@ PG_ISO_8859_5
Definition pg_wchar.h:251
@ PG_LATIN10
Definition pg_wchar.h:243
@ PG_WIN1250
Definition pg_wchar.h:255
@ PG_ISO_8859_7
Definition pg_wchar.h:253
@ PG_SJIS
Definition pg_wchar.h:264
@ PG_LATIN8
Definition pg_wchar.h:241
@ PG_EUC_JP
Definition pg_wchar.h:227
@ PG_GBK
Definition pg_wchar.h:266
@ PG_LATIN3
Definition pg_wchar.h:236
@ PG_WIN1256
Definition pg_wchar.h:244
@ PG_LATIN1
Definition pg_wchar.h:234
@ PG_EUC_TW
Definition pg_wchar.h:230
@ PG_WIN1258
Definition pg_wchar.h:245
@ PG_SHIFT_JIS_2004
Definition pg_wchar.h:270
@ PG_WIN1252
Definition pg_wchar.h:250
@ PG_LATIN7
Definition pg_wchar.h:240
@ PG_WIN1255
Definition pg_wchar.h:258
@ PG_WIN1257
Definition pg_wchar.h:259
@ PG_WIN1251
Definition pg_wchar.h:249
@ PG_EUC_KR
Definition pg_wchar.h:229
@ PG_WIN866
Definition pg_wchar.h:246
@ PG_ISO_8859_8
Definition pg_wchar.h:254
@ PG_WIN874
Definition pg_wchar.h:247
@ PG_EUC_JIS_2004
Definition pg_wchar.h:231
@ PG_BIG5
Definition pg_wchar.h:265
static int pg_uhc_verifystr(const unsigned char *s, int len)
Definition wchar.c:1634
static int pg_latin1_dsplen(const unsigned char *s)
Definition wchar.c:904
static int pg_euctw_mblen(const unsigned char *s)
Definition wchar.c:360
static int pg_euckr_dsplen(const unsigned char *s)
Definition wchar.c:227
static int pg_ascii_verifystr(const unsigned char *s, int len)
Definition wchar.c:1091
static int pg_latin1_verifychar(const unsigned char *s, int len)
Definition wchar.c:1432
static int pg_sjis_dsplen(const unsigned char *s)
Definition wchar.c:927
static int pg_eucjp_dsplen(const unsigned char *s)
Definition wchar.c:196
static int pg_ascii2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
Definition wchar.c:73
static int pg_gbk_dsplen(const unsigned char *s)
Definition wchar.c:983
static int pg_euckr2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
Definition wchar.c:215
static int pg_euctw2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
Definition wchar.c:317
#define pg_euccn_verifychar
Definition wchar.c:1246
static int pg_sjis_verifystr(const unsigned char *s, int len)
Definition wchar.c:1472
static int pg_johab_dsplen(const unsigned char *s)
Definition wchar.c:450
static int pg_big5_verifystr(const unsigned char *s, int len)
Definition wchar.c:1526
static int pg_eucjp2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
Definition wchar.c:184
static int pg_latin1_verifystr(const unsigned char *s, int len)
Definition wchar.c:1438
static int pg_latin1_mblen(const unsigned char *s)
Definition wchar.c:898
static int pg_ascii_verifychar(const unsigned char *s, int len)
Definition wchar.c:1085
static int pg_ascii_mblen(const unsigned char *s)
Definition wchar.c:88
static int pg_big5_dsplen(const unsigned char *s)
Definition wchar.c:956
#define pg_euccn_verifystr
Definition wchar.c:1247
static int pg_eucjp_mblen(const unsigned char *s)
Definition wchar.c:190
static int pg_euccn_dsplen(const unsigned char *s)
Definition wchar.c:301
static int pg_euctw_verifystr(const unsigned char *s, int len)
Definition wchar.c:1300
static int pg_gbk_verifystr(const unsigned char *s, int len)
Definition wchar.c:1580
static int pg_gb18030_dsplen(const unsigned char *s)
Definition wchar.c:1051
static int pg_utf2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
Definition wchar.c:462
static int pg_euccn_mblen(const unsigned char *s)
Definition wchar.c:285
static int pg_eucjp_verifystr(const unsigned char *s, int len)
Definition wchar.c:1159
static int pg_johab_verifystr(const unsigned char *s, int len)
Definition wchar.c:1353
static int pg_gb18030_verifystr(const unsigned char *s, int len)
Definition wchar.c:1694
static int pg_euckr_verifystr(const unsigned char *s, int len)
Definition wchar.c:1217
static int pg_wchar2mule_with_len(const pg_wchar *from, unsigned char *to, int len)
Definition wchar.c:749
static int pg_uhc_dsplen(const unsigned char *s)
Definition wchar.c:1010
static int pg_mule_verifystr(const unsigned char *s, int len)
Definition wchar.c:1403
static int pg_wchar2euc_with_len(const pg_wchar *from, unsigned char *to, int len)
Definition wchar.c:398
static int pg_wchar2single_with_len(const pg_wchar *from, unsigned char *to, int len)
Definition wchar.c:883
static int pg_wchar2utf_with_len(const pg_wchar *from, unsigned char *to, int len)
Definition wchar.c:525
static int pg_euccn2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
Definition wchar.c:237
static int pg_gb18030_mblen(const unsigned char *s)
Definition wchar.c:1037
static int pg_euctw_dsplen(const unsigned char *s)
Definition wchar.c:376
static int pg_latin12wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
Definition wchar.c:861
static int pg_mule_dsplen(const unsigned char *s)
Definition wchar.c:833
static int pg_utf8_verifystr(const unsigned char *s, int len)
Definition wchar.c:1913
static int pg_euckr_mblen(const unsigned char *s)
Definition wchar.c:221
static int pg_mule2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
Definition wchar.c:692
static int pg_utf_dsplen(const unsigned char *s)
Definition wchar.c:680

Referenced by pg_database_encoding_max_length(), pg_dsplen(), pg_encoding_dsplen(), pg_encoding_max_length(), pg_encoding_max_length_sql(), pg_encoding_mb2wchar_with_len(), pg_encoding_mbcliplen(), pg_encoding_mblen(), pg_encoding_verifymbchar(), pg_encoding_verifymbstr(), pg_encoding_wchar2mb_with_len(), pg_generic_charinc(), pg_mb2wchar(), pg_mb2wchar_with_len(), pg_mblen_cstr(), pg_mblen_range(), pg_mblen_unbounded(), pg_mblen_with_len(), pg_verify_mbstr(), pg_verify_mbstr_len(), pg_wchar2mb(), and pg_wchar2mb_with_len().

◆ Utf8Transition

const uint32 Utf8Transition[256]
static

Definition at line 1837 of file wchar.c.

1838{
1839 /* ASCII */
1840
1841 ILL, ASC, ASC, ASC, ASC, ASC, ASC, ASC,
1842 ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC,
1843 ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC,
1844 ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC,
1845
1846 ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC,
1847 ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC,
1848 ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC,
1849 ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC,
1850
1851 ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC,
1852 ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC,
1853 ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC,
1854 ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC,
1855
1856 ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC,
1857 ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC,
1858 ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC,
1859 ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC,
1860
1861 /* continuation bytes */
1862
1863 /* 80..8F */
1864 CR1, CR1, CR1, CR1, CR1, CR1, CR1, CR1,
1865 CR1, CR1, CR1, CR1, CR1, CR1, CR1, CR1,
1866
1867 /* 90..9F */
1868 CR2, CR2, CR2, CR2, CR2, CR2, CR2, CR2,
1869 CR2, CR2, CR2, CR2, CR2, CR2, CR2, CR2,
1870
1871 /* A0..BF */
1872 CR3, CR3, CR3, CR3, CR3, CR3, CR3, CR3,
1873 CR3, CR3, CR3, CR3, CR3, CR3, CR3, CR3,
1874 CR3, CR3, CR3, CR3, CR3, CR3, CR3, CR3,
1875 CR3, CR3, CR3, CR3, CR3, CR3, CR3, CR3,
1876
1877 /* leading bytes */
1878
1879 /* C0..DF */
1880 ILL, ILL, L2A, L2A, L2A, L2A, L2A, L2A,
1881 L2A, L2A, L2A, L2A, L2A, L2A, L2A, L2A,
1882 L2A, L2A, L2A, L2A, L2A, L2A, L2A, L2A,
1883 L2A, L2A, L2A, L2A, L2A, L2A, L2A, L2A,
1884
1885 /* E0..EF */
1886 L3A, L3B, L3B, L3B, L3B, L3B, L3B, L3B,
1887 L3B, L3B, L3B, L3B, L3B, L3C, L3B, L3B,
1888
1889 /* F0..FF */
1890 L4A, L4B, L4B, L4B, L4C, ILL, ILL, ILL,
1891 ILL, ILL, ILL, ILL, ILL, ILL, ILL, ILL
1892};
#define CR3
Definition wchar.c:1833
#define L3B
Definition wchar.c:1824
#define L2A
Definition wchar.c:1821
#define L4C
Definition wchar.c:1829
#define CR2
Definition wchar.c:1832
#define ASC
Definition wchar.c:1819
#define L3C
Definition wchar.c:1825
#define CR1
Definition wchar.c:1831
#define L3A
Definition wchar.c:1823
#define L4B
Definition wchar.c:1828
#define ILL
Definition wchar.c:1835
#define L4A
Definition wchar.c:1827

Referenced by utf8_advance().