PostgreSQL Source Code git master
Loading...
Searching...
No Matches
wchar.c File Reference
#include "c.h"
#include <limits.h>
#include "mb/pg_wchar.h"
#include "utils/ascii.h"
#include "common/unicode_nonspacing_table.h"
#include "common/unicode_east_asian_fw_table.h"
Include dependency graph for wchar.c:

Go to the source code of this file.

Data Structures

struct  mbinterval
 

Macros

#define NONUTF8_INVALID_BYTE0   (0x8d)
 
#define NONUTF8_INVALID_BYTE1   (' ')
 
#define IS_EUC_RANGE_VALID(c)   ((c) >= 0xa1 && (c) <= 0xfe)
 
#define pg_euccn_verifychar   pg_euckr_verifychar
 
#define pg_euccn_verifystr   pg_euckr_verifystr
 
#define ERR   0
 
#define BGN   11
 
#define CS1   16
 
#define CS2   1
 
#define CS3   5
 
#define P3A   6 /* Lead was E0, check for 3-byte overlong */
 
#define P3B   20 /* Lead was ED, check for surrogate */
 
#define P4A   25 /* Lead was F0, check for 4-byte overlong */
 
#define P4B   30 /* Lead was F4, check for too-large */
 
#define END   BGN
 
#define ASC   (END << BGN)
 
#define L2A   (CS1 << BGN)
 
#define L3A   (P3A << BGN)
 
#define L3B   (CS2 << BGN)
 
#define L3C   (P3B << BGN)
 
#define L4A   (P4A << BGN)
 
#define L4B   (CS3 << BGN)
 
#define L4C   (P4B << BGN)
 
#define CR1   (END << CS1) | (CS1 << CS2) | (CS2 << CS3) | (CS1 << P3B) | (CS2 << P4B)
 
#define CR2   (END << CS1) | (CS1 << CS2) | (CS2 << CS3) | (CS1 << P3B) | (CS2 << P4A)
 
#define CR3   (END << CS1) | (CS1 << CS2) | (CS2 << CS3) | (CS1 << P3A) | (CS2 << P4A)
 
#define ILL   ERR
 
#define STRIDE_LENGTH   (2 * sizeof(Vector8))
 

Functions

static int pg_ascii2wchar_with_len (const unsigned char *from, pg_wchar *to, int len)
 
static int pg_ascii_mblen (const unsigned char *s)
 
static int pg_ascii_dsplen (const unsigned char *s)
 
static int pg_euc2wchar_with_len (const unsigned char *from, pg_wchar *to, int len)
 
static int pg_euc_mblen (const unsigned char *s)
 
static int pg_euc_dsplen (const unsigned char *s)
 
static int pg_eucjp2wchar_with_len (const unsigned char *from, pg_wchar *to, int len)
 
static int pg_eucjp_mblen (const unsigned char *s)
 
static int pg_eucjp_dsplen (const unsigned char *s)
 
static int pg_euckr2wchar_with_len (const unsigned char *from, pg_wchar *to, int len)
 
static int pg_euckr_mblen (const unsigned char *s)
 
static int pg_euckr_dsplen (const unsigned char *s)
 
static int pg_euccn2wchar_with_len (const unsigned char *from, pg_wchar *to, int len)
 
static int pg_euccn_mblen (const unsigned char *s)
 
static int pg_euccn_dsplen (const unsigned char *s)
 
static int pg_euctw2wchar_with_len (const unsigned char *from, pg_wchar *to, int len)
 
static int pg_euctw_mblen (const unsigned char *s)
 
static int pg_euctw_dsplen (const unsigned char *s)
 
static int pg_wchar2euc_with_len (const pg_wchar *from, unsigned char *to, int len)
 
static int pg_johab_mblen (const unsigned char *s)
 
static int pg_johab_dsplen (const unsigned char *s)
 
static int pg_utf2wchar_with_len (const unsigned char *from, pg_wchar *to, int len)
 
static int pg_wchar2utf_with_len (const pg_wchar *from, unsigned char *to, int len)
 
int pg_utf_mblen (const unsigned char *s)
 
static int mbbisearch (pg_wchar ucs, const struct mbinterval *table, int max)
 
static int ucs_wcwidth (pg_wchar ucs)
 
static int pg_utf_dsplen (const unsigned char *s)
 
static int pg_mule2wchar_with_len (const unsigned char *from, pg_wchar *to, int len)
 
static int pg_wchar2mule_with_len (const pg_wchar *from, unsigned char *to, int len)
 
int pg_mule_mblen (const unsigned char *s)
 
static int pg_mule_dsplen (const unsigned char *s)
 
static int pg_latin12wchar_with_len (const unsigned char *from, pg_wchar *to, int len)
 
static int pg_wchar2single_with_len (const pg_wchar *from, unsigned char *to, int len)
 
static int pg_latin1_mblen (const unsigned char *s)
 
static int pg_latin1_dsplen (const unsigned char *s)
 
static int pg_sjis_mblen (const unsigned char *s)
 
static int pg_sjis_dsplen (const unsigned char *s)
 
static int pg_big5_mblen (const unsigned char *s)
 
static int pg_big5_dsplen (const unsigned char *s)
 
static int pg_gbk_mblen (const unsigned char *s)
 
static int pg_gbk_dsplen (const unsigned char *s)
 
static int pg_uhc_mblen (const unsigned char *s)
 
static int pg_uhc_dsplen (const unsigned char *s)
 
static int pg_gb18030_mblen (const unsigned char *s)
 
static int pg_gb18030_dsplen (const unsigned char *s)
 
static int pg_ascii_verifychar (const unsigned char *s, int len)
 
static int pg_ascii_verifystr (const unsigned char *s, int len)
 
static int pg_eucjp_verifychar (const unsigned char *s, int len)
 
static int pg_eucjp_verifystr (const unsigned char *s, int len)
 
static int pg_euckr_verifychar (const unsigned char *s, int len)
 
static int pg_euckr_verifystr (const unsigned char *s, int len)
 
static int pg_euctw_verifychar (const unsigned char *s, int len)
 
static int pg_euctw_verifystr (const unsigned char *s, int len)
 
static int pg_johab_verifychar (const unsigned char *s, int len)
 
static int pg_johab_verifystr (const unsigned char *s, int len)
 
static int pg_mule_verifychar (const unsigned char *s, int len)
 
static int pg_mule_verifystr (const unsigned char *s, int len)
 
static int pg_latin1_verifychar (const unsigned char *s, int len)
 
static int pg_latin1_verifystr (const unsigned char *s, int len)
 
static int pg_sjis_verifychar (const unsigned char *s, int len)
 
static int pg_sjis_verifystr (const unsigned char *s, int len)
 
static int pg_big5_verifychar (const unsigned char *s, int len)
 
static int pg_big5_verifystr (const unsigned char *s, int len)
 
static int pg_gbk_verifychar (const unsigned char *s, int len)
 
static int pg_gbk_verifystr (const unsigned char *s, int len)
 
static int pg_uhc_verifychar (const unsigned char *s, int len)
 
static int pg_uhc_verifystr (const unsigned char *s, int len)
 
static int pg_gb18030_verifychar (const unsigned char *s, int len)
 
static int pg_gb18030_verifystr (const unsigned char *s, int len)
 
static int pg_utf8_verifychar (const unsigned char *s, int len)
 
static void utf8_advance (const unsigned char *s, uint32 *state, int len)
 
static int pg_utf8_verifystr (const unsigned char *s, int len)
 
bool pg_utf8_islegal (const unsigned char *source, int length)
 
void pg_encoding_set_invalid (int encoding, char *dst)
 
int pg_encoding_mblen (int encoding, const char *mbstr)
 
int pg_encoding_mblen_or_incomplete (int encoding, const char *mbstr, size_t remaining)
 
int pg_encoding_mblen_bounded (int encoding, const char *mbstr)
 
int pg_encoding_dsplen (int encoding, const char *mbstr)
 
int pg_encoding_verifymbchar (int encoding, const char *mbstr, int len)
 
int pg_encoding_verifymbstr (int encoding, const char *mbstr, int len)
 
int pg_encoding_max_length (int encoding)
 

Variables

static const uint32 Utf8Transition [256]
 
const pg_wchar_tbl pg_wchar_table []
 

Macro Definition Documentation

◆ ASC

#define ASC   (END << BGN)

Definition at line 1797 of file wchar.c.

◆ BGN

#define BGN   11

Definition at line 1781 of file wchar.c.

◆ CR1

#define CR1   (END << CS1) | (CS1 << CS2) | (CS2 << CS3) | (CS1 << P3B) | (CS2 << P4B)

Definition at line 1809 of file wchar.c.

◆ CR2

#define CR2   (END << CS1) | (CS1 << CS2) | (CS2 << CS3) | (CS1 << P3B) | (CS2 << P4A)

Definition at line 1810 of file wchar.c.

◆ CR3

#define CR3   (END << CS1) | (CS1 << CS2) | (CS2 << CS3) | (CS1 << P3A) | (CS2 << P4A)

Definition at line 1811 of file wchar.c.

◆ CS1

#define CS1   16

Definition at line 1783 of file wchar.c.

◆ CS2

#define CS2   1

Definition at line 1784 of file wchar.c.

◆ CS3

#define CS3   5

Definition at line 1785 of file wchar.c.

◆ END

#define END   BGN

Definition at line 1792 of file wchar.c.

◆ ERR

#define ERR   0

Definition at line 1779 of file wchar.c.

◆ ILL

#define ILL   ERR

Definition at line 1813 of file wchar.c.

◆ IS_EUC_RANGE_VALID

#define IS_EUC_RANGE_VALID (   c)    ((c) >= 0xa1 && (c) <= 0xfe)

Definition at line 1079 of file wchar.c.

◆ L2A

#define L2A   (CS1 << BGN)

Definition at line 1799 of file wchar.c.

◆ L3A

#define L3A   (P3A << BGN)

Definition at line 1801 of file wchar.c.

◆ L3B

#define L3B   (CS2 << BGN)

Definition at line 1802 of file wchar.c.

◆ L3C

#define L3C   (P3B << BGN)

Definition at line 1803 of file wchar.c.

◆ L4A

#define L4A   (P4A << BGN)

Definition at line 1805 of file wchar.c.

◆ L4B

#define L4B   (CS3 << BGN)

Definition at line 1806 of file wchar.c.

◆ L4C

#define L4C   (P4B << BGN)

Definition at line 1807 of file wchar.c.

◆ NONUTF8_INVALID_BYTE0

#define NONUTF8_INVALID_BYTE0   (0x8d)

Definition at line 36 of file wchar.c.

◆ NONUTF8_INVALID_BYTE1

#define NONUTF8_INVALID_BYTE1   (' ')

Definition at line 37 of file wchar.c.

◆ P3A

#define P3A   6 /* Lead was E0, check for 3-byte overlong */

Definition at line 1787 of file wchar.c.

◆ P3B

#define P3B   20 /* Lead was ED, check for surrogate */

Definition at line 1788 of file wchar.c.

◆ P4A

#define P4A   25 /* Lead was F0, check for 4-byte overlong */

Definition at line 1789 of file wchar.c.

◆ P4B

#define P4B   30 /* Lead was F4, check for too-large */

Definition at line 1790 of file wchar.c.

◆ pg_euccn_verifychar

#define pg_euccn_verifychar   pg_euckr_verifychar

Definition at line 1224 of file wchar.c.

◆ pg_euccn_verifystr

#define pg_euccn_verifystr   pg_euckr_verifystr

Definition at line 1225 of file wchar.c.

◆ STRIDE_LENGTH

#define STRIDE_LENGTH   (2 * sizeof(Vector8))

Function Documentation

◆ mbbisearch()

static int mbbisearch ( pg_wchar  ucs,
const struct mbinterval table,
int  max 
)
static

Definition at line 581 of file wchar.c.

582{
583 int min = 0;
584 int mid;
585
586 if (ucs < table[0].first || ucs > table[max].last)
587 return 0;
588 while (max >= min)
589 {
590 mid = (min + max) / 2;
591 if (ucs > table[mid].last)
592 min = mid + 1;
593 else if (ucs < table[mid].first)
594 max = mid - 1;
595 else
596 return 1;
597 }
598
599 return 0;
600}
static const struct lconv_member_info table[]
static int fb(int x)

References fb(), and table.

Referenced by ucs_wcwidth().

◆ pg_ascii2wchar_with_len()

static int pg_ascii2wchar_with_len ( const unsigned char from,
pg_wchar to,
int  len 
)
static

Definition at line 70 of file wchar.c.

71{
72 int cnt = 0;
73
74 while (len > 0 && *from)
75 {
76 *to++ = *from++;
77 len--;
78 cnt++;
79 }
80 *to = 0;
81 return cnt;
82}
const void size_t len

References len.

◆ pg_ascii_dsplen()

static int pg_ascii_dsplen ( const unsigned char s)
static

Definition at line 91 of file wchar.c.

92{
93 if (*s == '\0')
94 return 0;
95 if (*s < 0x20 || *s == 0x7f)
96 return -1;
97
98 return 1;
99}

Referenced by pg_big5_dsplen(), pg_euc_dsplen(), pg_euccn_dsplen(), pg_eucjp_dsplen(), pg_euctw_dsplen(), pg_gb18030_dsplen(), pg_gbk_dsplen(), pg_latin1_dsplen(), pg_sjis_dsplen(), and pg_uhc_dsplen().

◆ pg_ascii_mblen()

static int pg_ascii_mblen ( const unsigned char s)
static

Definition at line 85 of file wchar.c.

86{
87 return 1;
88}

◆ pg_ascii_verifychar()

static int pg_ascii_verifychar ( const unsigned char s,
int  len 
)
static

Definition at line 1063 of file wchar.c.

1064{
1065 return 1;
1066}

◆ pg_ascii_verifystr()

static int pg_ascii_verifystr ( const unsigned char s,
int  len 
)
static

Definition at line 1069 of file wchar.c.

1070{
1071 const unsigned char *nullpos = memchr(s, 0, len);
1072
1073 if (nullpos == NULL)
1074 return len;
1075 else
1076 return nullpos - s;
1077}

References fb(), and len.

◆ pg_big5_dsplen()

static int pg_big5_dsplen ( const unsigned char s)
static

Definition at line 934 of file wchar.c.

935{
936 int len;
937
938 if (IS_HIGHBIT_SET(*s))
939 len = 2; /* kanji? */
940 else
941 len = pg_ascii_dsplen(s); /* should be ASCII */
942 return len;
943}
#define IS_HIGHBIT_SET(ch)
Definition c.h:1150
static int pg_ascii_dsplen(const unsigned char *s)
Definition wchar.c:91

References IS_HIGHBIT_SET, len, and pg_ascii_dsplen().

◆ pg_big5_mblen()

static int pg_big5_mblen ( const unsigned char s)
static

Definition at line 922 of file wchar.c.

923{
924 int len;
925
926 if (IS_HIGHBIT_SET(*s))
927 len = 2; /* kanji? */
928 else
929 len = 1; /* should be ASCII */
930 return len;
931}

References IS_HIGHBIT_SET, and len.

Referenced by pg_big5_verifychar().

◆ pg_big5_verifychar()

static int pg_big5_verifychar ( const unsigned char s,
int  len 
)
static

Definition at line 1479 of file wchar.c.

1480{
1481 int l,
1482 mbl;
1483
1484 l = mbl = pg_big5_mblen(s);
1485
1486 if (len < l)
1487 return -1;
1488
1489 if (l == 2 &&
1490 s[0] == NONUTF8_INVALID_BYTE0 &&
1491 s[1] == NONUTF8_INVALID_BYTE1)
1492 return -1;
1493
1494 while (--l > 0)
1495 {
1496 if (*++s == '\0')
1497 return -1;
1498 }
1499
1500 return mbl;
1501}
#define NONUTF8_INVALID_BYTE0
Definition wchar.c:36
static int pg_big5_mblen(const unsigned char *s)
Definition wchar.c:922
#define NONUTF8_INVALID_BYTE1
Definition wchar.c:37

References fb(), len, NONUTF8_INVALID_BYTE0, NONUTF8_INVALID_BYTE1, and pg_big5_mblen().

Referenced by pg_big5_verifystr().

◆ pg_big5_verifystr()

static int pg_big5_verifystr ( const unsigned char s,
int  len 
)
static

Definition at line 1504 of file wchar.c.

1505{
1506 const unsigned char *start = s;
1507
1508 while (len > 0)
1509 {
1510 int l;
1511
1512 /* fast path for ASCII-subset characters */
1513 if (!IS_HIGHBIT_SET(*s))
1514 {
1515 if (*s == '\0')
1516 break;
1517 l = 1;
1518 }
1519 else
1520 {
1521 l = pg_big5_verifychar(s, len);
1522 if (l == -1)
1523 break;
1524 }
1525 s += l;
1526 len -= l;
1527 }
1528
1529 return s - start;
1530}
return str start
static int pg_big5_verifychar(const unsigned char *s, int len)
Definition wchar.c:1479

References IS_HIGHBIT_SET, len, pg_big5_verifychar(), and start.

◆ pg_encoding_dsplen()

int pg_encoding_dsplen ( int  encoding,
const char mbstr 
)

Definition at line 2176 of file wchar.c.

2177{
2178 return (PG_VALID_ENCODING(encoding) ?
2179 pg_wchar_table[encoding].dsplen((const unsigned char *) mbstr) :
2181}
static char * encoding
Definition initdb.c:139
@ PG_SQL_ASCII
Definition pg_wchar.h:226
#define PG_VALID_ENCODING(_enc)
Definition pg_wchar.h:287
const pg_wchar_tbl pg_wchar_table[]
Definition wchar.c:2064

References pg_wchar_tbl::dsplen, encoding, fb(), PG_SQL_ASCII, PG_VALID_ENCODING, and pg_wchar_table.

Referenced by PQdsplen(), and reportErrorPosition().

◆ pg_encoding_max_length()

int pg_encoding_max_length ( int  encoding)

Definition at line 2213 of file wchar.c.

2214{
2216
2217 /*
2218 * Check for the encoding despite the assert, due to some mingw versions
2219 * otherwise issuing bogus warnings.
2220 */
2221 return PG_VALID_ENCODING(encoding) ?
2224}
#define Assert(condition)
Definition c.h:873

References Assert, encoding, pg_wchar_tbl::maxmblen, PG_SQL_ASCII, PG_VALID_ENCODING, and pg_wchar_table.

Referenced by ascii(), chr(), CopyConvertBuf(), pg_encoding_mbcliplen(), pg_encoding_set_invalid(), pg_verify_mbstr_len(), reportErrorPosition(), test_enc_setup(), and type_maximum_size().

◆ pg_encoding_mblen()

◆ pg_encoding_mblen_bounded()

int pg_encoding_mblen_bounded ( int  encoding,
const char mbstr 
)

Definition at line 2167 of file wchar.c.

2168{
2170}
int pg_encoding_mblen(int encoding, const char *mbstr)
Definition wchar.c:2135

References encoding, fb(), and pg_encoding_mblen().

◆ pg_encoding_mblen_or_incomplete()

int pg_encoding_mblen_or_incomplete ( int  encoding,
const char mbstr,
size_t  remaining 
)

Definition at line 2147 of file wchar.c.

2149{
2150 /*
2151 * Define zero remaining as too few, even for single-byte encodings.
2152 * pg_gb18030_mblen() reads one or two bytes; single-byte encodings read
2153 * zero; others read one.
2154 */
2155 if (remaining < 1 ||
2157 return INT_MAX;
2159}
int remaining
Definition informix.c:692
@ PG_GB18030
Definition pg_wchar.h:268

References encoding, fb(), IS_HIGHBIT_SET, pg_encoding_mblen(), PG_GB18030, and remaining.

Referenced by PQescapeInternal(), PQescapeStringInternal(), report_invalid_encoding(), and report_untranslatable_char().

◆ pg_encoding_set_invalid()

void pg_encoding_set_invalid ( int  encoding,
char dst 
)

Definition at line 2051 of file wchar.c.

2052{
2054
2055 dst[0] = (encoding == PG_UTF8 ? 0xc0 : NONUTF8_INVALID_BYTE0);
2057}
#define PG_UTF8
Definition mbprint.c:43
int pg_encoding_max_length(int encoding)
Definition wchar.c:2213

References Assert, encoding, fb(), NONUTF8_INVALID_BYTE0, NONUTF8_INVALID_BYTE1, pg_encoding_max_length(), and PG_UTF8.

Referenced by appendStringLiteral(), fmtIdEnc(), PQescapeStringInternal(), and test_enc_setup().

◆ pg_encoding_verifymbchar()

◆ pg_encoding_verifymbstr()

int pg_encoding_verifymbstr ( int  encoding,
const char mbstr,
int  len 
)

◆ pg_euc2wchar_with_len()

static int pg_euc2wchar_with_len ( const unsigned char from,
pg_wchar to,
int  len 
)
static

Definition at line 105 of file wchar.c.

106{
107 int cnt = 0;
108
109 while (len > 0 && *from)
110 {
111 if (*from == SS2 && len >= 2) /* JIS X 0201 (so called "1 byte
112 * KANA") */
113 {
114 from++;
115 *to = (SS2 << 8) | *from++;
116 len -= 2;
117 }
118 else if (*from == SS3 && len >= 3) /* JIS X 0212 KANJI */
119 {
120 from++;
121 *to = (SS3 << 16) | (*from++ << 8);
122 *to |= *from++;
123 len -= 3;
124 }
125 else if (IS_HIGHBIT_SET(*from) && len >= 2) /* JIS X 0208 KANJI */
126 {
127 *to = *from++ << 8;
128 *to |= *from++;
129 len -= 2;
130 }
131 else /* must be ASCII */
132 {
133 *to = *from++;
134 len--;
135 }
136 to++;
137 cnt++;
138 }
139 *to = 0;
140 return cnt;
141}
#define SS2
Definition pg_wchar.h:38
#define SS3
Definition pg_wchar.h:39

References IS_HIGHBIT_SET, len, SS2, and SS3.

Referenced by pg_eucjp2wchar_with_len(), and pg_euckr2wchar_with_len().

◆ pg_euc_dsplen()

static int pg_euc_dsplen ( const unsigned char s)
inlinestatic

Definition at line 160 of file wchar.c.

161{
162 int len;
163
164 if (*s == SS2)
165 len = 2;
166 else if (*s == SS3)
167 len = 2;
168 else if (IS_HIGHBIT_SET(*s))
169 len = 2;
170 else
171 len = pg_ascii_dsplen(s);
172 return len;
173}

References IS_HIGHBIT_SET, len, pg_ascii_dsplen(), SS2, and SS3.

Referenced by pg_euckr_dsplen(), and pg_johab_dsplen().

◆ pg_euc_mblen()

static int pg_euc_mblen ( const unsigned char s)
inlinestatic

Definition at line 144 of file wchar.c.

145{
146 int len;
147
148 if (*s == SS2)
149 len = 2;
150 else if (*s == SS3)
151 len = 3;
152 else if (IS_HIGHBIT_SET(*s))
153 len = 2;
154 else
155 len = 1;
156 return len;
157}

References IS_HIGHBIT_SET, len, SS2, and SS3.

Referenced by pg_eucjp_mblen(), pg_euckr_mblen(), and pg_johab_mblen().

◆ pg_euccn2wchar_with_len()

static int pg_euccn2wchar_with_len ( const unsigned char from,
pg_wchar to,
int  len 
)
static

Definition at line 232 of file wchar.c.

233{
234 int cnt = 0;
235
236 while (len > 0 && *from)
237 {
238 if (*from == SS2 && len >= 3) /* code set 2 (unused?) */
239 {
240 from++;
241 *to = (SS2 << 16) | (*from++ << 8);
242 *to |= *from++;
243 len -= 3;
244 }
245 else if (*from == SS3 && len >= 3) /* code set 3 (unused ?) */
246 {
247 from++;
248 *to = (SS3 << 16) | (*from++ << 8);
249 *to |= *from++;
250 len -= 3;
251 }
252 else if (IS_HIGHBIT_SET(*from) && len >= 2) /* code set 1 */
253 {
254 *to = *from++ << 8;
255 *to |= *from++;
256 len -= 2;
257 }
258 else
259 {
260 *to = *from++;
261 len--;
262 }
263 to++;
264 cnt++;
265 }
266 *to = 0;
267 return cnt;
268}

References IS_HIGHBIT_SET, len, SS2, and SS3.

◆ pg_euccn_dsplen()

static int pg_euccn_dsplen ( const unsigned char s)
static

Definition at line 283 of file wchar.c.

284{
285 int len;
286
287 if (IS_HIGHBIT_SET(*s))
288 len = 2;
289 else
290 len = pg_ascii_dsplen(s);
291 return len;
292}

References IS_HIGHBIT_SET, len, and pg_ascii_dsplen().

◆ pg_euccn_mblen()

static int pg_euccn_mblen ( const unsigned char s)
static

Definition at line 271 of file wchar.c.

272{
273 int len;
274
275 if (IS_HIGHBIT_SET(*s))
276 len = 2;
277 else
278 len = 1;
279 return len;
280}

References IS_HIGHBIT_SET, and len.

◆ pg_eucjp2wchar_with_len()

static int pg_eucjp2wchar_with_len ( const unsigned char from,
pg_wchar to,
int  len 
)
static

Definition at line 179 of file wchar.c.

180{
181 return pg_euc2wchar_with_len(from, to, len);
182}
static int pg_euc2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
Definition wchar.c:105

References len, and pg_euc2wchar_with_len().

◆ pg_eucjp_dsplen()

static int pg_eucjp_dsplen ( const unsigned char s)
static

Definition at line 191 of file wchar.c.

192{
193 int len;
194
195 if (*s == SS2)
196 len = 1;
197 else if (*s == SS3)
198 len = 2;
199 else if (IS_HIGHBIT_SET(*s))
200 len = 2;
201 else
202 len = pg_ascii_dsplen(s);
203 return len;
204}

References IS_HIGHBIT_SET, len, pg_ascii_dsplen(), SS2, and SS3.

◆ pg_eucjp_mblen()

static int pg_eucjp_mblen ( const unsigned char s)
static

Definition at line 185 of file wchar.c.

186{
187 return pg_euc_mblen(s);
188}
static int pg_euc_mblen(const unsigned char *s)
Definition wchar.c:144

References pg_euc_mblen().

◆ pg_eucjp_verifychar()

static int pg_eucjp_verifychar ( const unsigned char s,
int  len 
)
static

Definition at line 1082 of file wchar.c.

1083{
1084 int l;
1085 unsigned char c1,
1086 c2;
1087
1088 c1 = *s++;
1089
1090 switch (c1)
1091 {
1092 case SS2: /* JIS X 0201 */
1093 l = 2;
1094 if (l > len)
1095 return -1;
1096 c2 = *s++;
1097 if (c2 < 0xa1 || c2 > 0xdf)
1098 return -1;
1099 break;
1100
1101 case SS3: /* JIS X 0212 */
1102 l = 3;
1103 if (l > len)
1104 return -1;
1105 c2 = *s++;
1106 if (!IS_EUC_RANGE_VALID(c2))
1107 return -1;
1108 c2 = *s++;
1109 if (!IS_EUC_RANGE_VALID(c2))
1110 return -1;
1111 break;
1112
1113 default:
1114 if (IS_HIGHBIT_SET(c1)) /* JIS X 0208? */
1115 {
1116 l = 2;
1117 if (l > len)
1118 return -1;
1119 if (!IS_EUC_RANGE_VALID(c1))
1120 return -1;
1121 c2 = *s++;
1122 if (!IS_EUC_RANGE_VALID(c2))
1123 return -1;
1124 }
1125 else
1126 /* must be ASCII */
1127 {
1128 l = 1;
1129 }
1130 break;
1131 }
1132
1133 return l;
1134}
#define IS_EUC_RANGE_VALID(c)
Definition wchar.c:1079

References fb(), IS_EUC_RANGE_VALID, IS_HIGHBIT_SET, len, SS2, and SS3.

Referenced by pg_eucjp_verifystr().

◆ pg_eucjp_verifystr()

static int pg_eucjp_verifystr ( const unsigned char s,
int  len 
)
static

Definition at line 1137 of file wchar.c.

1138{
1139 const unsigned char *start = s;
1140
1141 while (len > 0)
1142 {
1143 int l;
1144
1145 /* fast path for ASCII-subset characters */
1146 if (!IS_HIGHBIT_SET(*s))
1147 {
1148 if (*s == '\0')
1149 break;
1150 l = 1;
1151 }
1152 else
1153 {
1154 l = pg_eucjp_verifychar(s, len);
1155 if (l == -1)
1156 break;
1157 }
1158 s += l;
1159 len -= l;
1160 }
1161
1162 return s - start;
1163}
static int pg_eucjp_verifychar(const unsigned char *s, int len)
Definition wchar.c:1082

References IS_HIGHBIT_SET, len, pg_eucjp_verifychar(), and start.

◆ pg_euckr2wchar_with_len()

static int pg_euckr2wchar_with_len ( const unsigned char from,
pg_wchar to,
int  len 
)
static

Definition at line 210 of file wchar.c.

211{
212 return pg_euc2wchar_with_len(from, to, len);
213}

References len, and pg_euc2wchar_with_len().

◆ pg_euckr_dsplen()

static int pg_euckr_dsplen ( const unsigned char s)
static

Definition at line 222 of file wchar.c.

223{
224 return pg_euc_dsplen(s);
225}
static int pg_euc_dsplen(const unsigned char *s)
Definition wchar.c:160

References pg_euc_dsplen().

◆ pg_euckr_mblen()

static int pg_euckr_mblen ( const unsigned char s)
static

Definition at line 216 of file wchar.c.

217{
218 return pg_euc_mblen(s);
219}

References pg_euc_mblen().

◆ pg_euckr_verifychar()

static int pg_euckr_verifychar ( const unsigned char s,
int  len 
)
static

Definition at line 1166 of file wchar.c.

1167{
1168 int l;
1169 unsigned char c1,
1170 c2;
1171
1172 c1 = *s++;
1173
1174 if (IS_HIGHBIT_SET(c1))
1175 {
1176 l = 2;
1177 if (l > len)
1178 return -1;
1179 if (!IS_EUC_RANGE_VALID(c1))
1180 return -1;
1181 c2 = *s++;
1182 if (!IS_EUC_RANGE_VALID(c2))
1183 return -1;
1184 }
1185 else
1186 /* must be ASCII */
1187 {
1188 l = 1;
1189 }
1190
1191 return l;
1192}

References fb(), IS_EUC_RANGE_VALID, IS_HIGHBIT_SET, and len.

Referenced by pg_euckr_verifystr().

◆ pg_euckr_verifystr()

static int pg_euckr_verifystr ( const unsigned char s,
int  len 
)
static

Definition at line 1195 of file wchar.c.

1196{
1197 const unsigned char *start = s;
1198
1199 while (len > 0)
1200 {
1201 int l;
1202
1203 /* fast path for ASCII-subset characters */
1204 if (!IS_HIGHBIT_SET(*s))
1205 {
1206 if (*s == '\0')
1207 break;
1208 l = 1;
1209 }
1210 else
1211 {
1212 l = pg_euckr_verifychar(s, len);
1213 if (l == -1)
1214 break;
1215 }
1216 s += l;
1217 len -= l;
1218 }
1219
1220 return s - start;
1221}
static int pg_euckr_verifychar(const unsigned char *s, int len)
Definition wchar.c:1166

References IS_HIGHBIT_SET, len, pg_euckr_verifychar(), and start.

◆ pg_euctw2wchar_with_len()

static int pg_euctw2wchar_with_len ( const unsigned char from,
pg_wchar to,
int  len 
)
static

Definition at line 299 of file wchar.c.

300{
301 int cnt = 0;
302
303 while (len > 0 && *from)
304 {
305 if (*from == SS2 && len >= 4) /* code set 2 */
306 {
307 from++;
308 *to = (((uint32) SS2) << 24) | (*from++ << 16);
309 *to |= *from++ << 8;
310 *to |= *from++;
311 len -= 4;
312 }
313 else if (*from == SS3 && len >= 3) /* code set 3 (unused?) */
314 {
315 from++;
316 *to = (SS3 << 16) | (*from++ << 8);
317 *to |= *from++;
318 len -= 3;
319 }
320 else if (IS_HIGHBIT_SET(*from) && len >= 2) /* code set 2 */
321 {
322 *to = *from++ << 8;
323 *to |= *from++;
324 len -= 2;
325 }
326 else
327 {
328 *to = *from++;
329 len--;
330 }
331 to++;
332 cnt++;
333 }
334 *to = 0;
335 return cnt;
336}
uint32_t uint32
Definition c.h:546

References IS_HIGHBIT_SET, len, SS2, and SS3.

◆ pg_euctw_dsplen()

static int pg_euctw_dsplen ( const unsigned char s)
static

Definition at line 355 of file wchar.c.

356{
357 int len;
358
359 if (*s == SS2)
360 len = 2;
361 else if (*s == SS3)
362 len = 2;
363 else if (IS_HIGHBIT_SET(*s))
364 len = 2;
365 else
366 len = pg_ascii_dsplen(s);
367 return len;
368}

References IS_HIGHBIT_SET, len, pg_ascii_dsplen(), SS2, and SS3.

◆ pg_euctw_mblen()

static int pg_euctw_mblen ( const unsigned char s)
static

Definition at line 339 of file wchar.c.

340{
341 int len;
342
343 if (*s == SS2)
344 len = 4;
345 else if (*s == SS3)
346 len = 3;
347 else if (IS_HIGHBIT_SET(*s))
348 len = 2;
349 else
350 len = 1;
351 return len;
352}

References IS_HIGHBIT_SET, len, SS2, and SS3.

◆ pg_euctw_verifychar()

static int pg_euctw_verifychar ( const unsigned char s,
int  len 
)
static

Definition at line 1228 of file wchar.c.

1229{
1230 int l;
1231 unsigned char c1,
1232 c2;
1233
1234 c1 = *s++;
1235
1236 switch (c1)
1237 {
1238 case SS2: /* CNS 11643 Plane 1-7 */
1239 l = 4;
1240 if (l > len)
1241 return -1;
1242 c2 = *s++;
1243 if (c2 < 0xa1 || c2 > 0xa7)
1244 return -1;
1245 c2 = *s++;
1246 if (!IS_EUC_RANGE_VALID(c2))
1247 return -1;
1248 c2 = *s++;
1249 if (!IS_EUC_RANGE_VALID(c2))
1250 return -1;
1251 break;
1252
1253 case SS3: /* unused */
1254 return -1;
1255
1256 default:
1257 if (IS_HIGHBIT_SET(c1)) /* CNS 11643 Plane 1 */
1258 {
1259 l = 2;
1260 if (l > len)
1261 return -1;
1262 /* no further range check on c1? */
1263 c2 = *s++;
1264 if (!IS_EUC_RANGE_VALID(c2))
1265 return -1;
1266 }
1267 else
1268 /* must be ASCII */
1269 {
1270 l = 1;
1271 }
1272 break;
1273 }
1274 return l;
1275}

References fb(), IS_EUC_RANGE_VALID, IS_HIGHBIT_SET, len, SS2, and SS3.

Referenced by pg_euctw_verifystr().

◆ pg_euctw_verifystr()

static int pg_euctw_verifystr ( const unsigned char s,
int  len 
)
static

Definition at line 1278 of file wchar.c.

1279{
1280 const unsigned char *start = s;
1281
1282 while (len > 0)
1283 {
1284 int l;
1285
1286 /* fast path for ASCII-subset characters */
1287 if (!IS_HIGHBIT_SET(*s))
1288 {
1289 if (*s == '\0')
1290 break;
1291 l = 1;
1292 }
1293 else
1294 {
1295 l = pg_euctw_verifychar(s, len);
1296 if (l == -1)
1297 break;
1298 }
1299 s += l;
1300 len -= l;
1301 }
1302
1303 return s - start;
1304}
static int pg_euctw_verifychar(const unsigned char *s, int len)
Definition wchar.c:1228

References IS_HIGHBIT_SET, len, pg_euctw_verifychar(), and start.

◆ pg_gb18030_dsplen()

static int pg_gb18030_dsplen ( const unsigned char s)
static

Definition at line 1029 of file wchar.c.

1030{
1031 int len;
1032
1033 if (IS_HIGHBIT_SET(*s))
1034 len = 2;
1035 else
1036 len = pg_ascii_dsplen(s); /* ASCII */
1037 return len;
1038}

References IS_HIGHBIT_SET, len, and pg_ascii_dsplen().

◆ pg_gb18030_mblen()

static int pg_gb18030_mblen ( const unsigned char s)
static

Definition at line 1015 of file wchar.c.

1016{
1017 int len;
1018
1019 if (!IS_HIGHBIT_SET(*s))
1020 len = 1; /* ASCII */
1021 else if (*(s + 1) >= 0x30 && *(s + 1) <= 0x39)
1022 len = 4;
1023 else
1024 len = 2;
1025 return len;
1026}

References IS_HIGHBIT_SET, and len.

◆ pg_gb18030_verifychar()

static int pg_gb18030_verifychar ( const unsigned char s,
int  len 
)
static

Definition at line 1641 of file wchar.c.

1642{
1643 int l;
1644
1645 if (!IS_HIGHBIT_SET(*s))
1646 l = 1; /* ASCII */
1647 else if (len >= 4 && *(s + 1) >= 0x30 && *(s + 1) <= 0x39)
1648 {
1649 /* Should be 4-byte, validate remaining bytes */
1650 if (*s >= 0x81 && *s <= 0xfe &&
1651 *(s + 2) >= 0x81 && *(s + 2) <= 0xfe &&
1652 *(s + 3) >= 0x30 && *(s + 3) <= 0x39)
1653 l = 4;
1654 else
1655 l = -1;
1656 }
1657 else if (len >= 2 && *s >= 0x81 && *s <= 0xfe)
1658 {
1659 /* Should be 2-byte, validate */
1660 if ((*(s + 1) >= 0x40 && *(s + 1) <= 0x7e) ||
1661 (*(s + 1) >= 0x80 && *(s + 1) <= 0xfe))
1662 l = 2;
1663 else
1664 l = -1;
1665 }
1666 else
1667 l = -1;
1668 return l;
1669}

References IS_HIGHBIT_SET, and len.

Referenced by pg_gb18030_verifystr().

◆ pg_gb18030_verifystr()

static int pg_gb18030_verifystr ( const unsigned char s,
int  len 
)
static

Definition at line 1672 of file wchar.c.

1673{
1674 const unsigned char *start = s;
1675
1676 while (len > 0)
1677 {
1678 int l;
1679
1680 /* fast path for ASCII-subset characters */
1681 if (!IS_HIGHBIT_SET(*s))
1682 {
1683 if (*s == '\0')
1684 break;
1685 l = 1;
1686 }
1687 else
1688 {
1689 l = pg_gb18030_verifychar(s, len);
1690 if (l == -1)
1691 break;
1692 }
1693 s += l;
1694 len -= l;
1695 }
1696
1697 return s - start;
1698}
static int pg_gb18030_verifychar(const unsigned char *s, int len)
Definition wchar.c:1641

References IS_HIGHBIT_SET, len, pg_gb18030_verifychar(), and start.

◆ pg_gbk_dsplen()

static int pg_gbk_dsplen ( const unsigned char s)
static

Definition at line 961 of file wchar.c.

962{
963 int len;
964
965 if (IS_HIGHBIT_SET(*s))
966 len = 2; /* kanji? */
967 else
968 len = pg_ascii_dsplen(s); /* should be ASCII */
969 return len;
970}

References IS_HIGHBIT_SET, len, and pg_ascii_dsplen().

◆ pg_gbk_mblen()

static int pg_gbk_mblen ( const unsigned char s)
static

Definition at line 949 of file wchar.c.

950{
951 int len;
952
953 if (IS_HIGHBIT_SET(*s))
954 len = 2; /* kanji? */
955 else
956 len = 1; /* should be ASCII */
957 return len;
958}

References IS_HIGHBIT_SET, and len.

Referenced by pg_gbk_verifychar().

◆ pg_gbk_verifychar()

static int pg_gbk_verifychar ( const unsigned char s,
int  len 
)
static

Definition at line 1533 of file wchar.c.

1534{
1535 int l,
1536 mbl;
1537
1538 l = mbl = pg_gbk_mblen(s);
1539
1540 if (len < l)
1541 return -1;
1542
1543 if (l == 2 &&
1544 s[0] == NONUTF8_INVALID_BYTE0 &&
1545 s[1] == NONUTF8_INVALID_BYTE1)
1546 return -1;
1547
1548 while (--l > 0)
1549 {
1550 if (*++s == '\0')
1551 return -1;
1552 }
1553
1554 return mbl;
1555}
static int pg_gbk_mblen(const unsigned char *s)
Definition wchar.c:949

References fb(), len, NONUTF8_INVALID_BYTE0, NONUTF8_INVALID_BYTE1, and pg_gbk_mblen().

Referenced by pg_gbk_verifystr().

◆ pg_gbk_verifystr()

static int pg_gbk_verifystr ( const unsigned char s,
int  len 
)
static

Definition at line 1558 of file wchar.c.

1559{
1560 const unsigned char *start = s;
1561
1562 while (len > 0)
1563 {
1564 int l;
1565
1566 /* fast path for ASCII-subset characters */
1567 if (!IS_HIGHBIT_SET(*s))
1568 {
1569 if (*s == '\0')
1570 break;
1571 l = 1;
1572 }
1573 else
1574 {
1575 l = pg_gbk_verifychar(s, len);
1576 if (l == -1)
1577 break;
1578 }
1579 s += l;
1580 len -= l;
1581 }
1582
1583 return s - start;
1584}
static int pg_gbk_verifychar(const unsigned char *s, int len)
Definition wchar.c:1533

References IS_HIGHBIT_SET, len, pg_gbk_verifychar(), and start.

◆ pg_johab_dsplen()

static int pg_johab_dsplen ( const unsigned char s)
static

Definition at line 429 of file wchar.c.

430{
431 return pg_euc_dsplen(s);
432}

References pg_euc_dsplen().

◆ pg_johab_mblen()

static int pg_johab_mblen ( const unsigned char s)
static

Definition at line 423 of file wchar.c.

424{
425 return pg_euc_mblen(s);
426}

References pg_euc_mblen().

Referenced by pg_johab_verifychar().

◆ pg_johab_verifychar()

static int pg_johab_verifychar ( const unsigned char s,
int  len 
)
static

Definition at line 1307 of file wchar.c.

1308{
1309 int l,
1310 mbl;
1311 unsigned char c;
1312
1313 l = mbl = pg_johab_mblen(s);
1314
1315 if (len < l)
1316 return -1;
1317
1318 if (!IS_HIGHBIT_SET(*s))
1319 return mbl;
1320
1321 while (--l > 0)
1322 {
1323 c = *++s;
1324 if (!IS_EUC_RANGE_VALID(c))
1325 return -1;
1326 }
1327 return mbl;
1328}
char * c
static int pg_johab_mblen(const unsigned char *s)
Definition wchar.c:423

References fb(), IS_EUC_RANGE_VALID, IS_HIGHBIT_SET, len, and pg_johab_mblen().

Referenced by pg_johab_verifystr().

◆ pg_johab_verifystr()

static int pg_johab_verifystr ( const unsigned char s,
int  len 
)
static

Definition at line 1331 of file wchar.c.

1332{
1333 const unsigned char *start = s;
1334
1335 while (len > 0)
1336 {
1337 int l;
1338
1339 /* fast path for ASCII-subset characters */
1340 if (!IS_HIGHBIT_SET(*s))
1341 {
1342 if (*s == '\0')
1343 break;
1344 l = 1;
1345 }
1346 else
1347 {
1348 l = pg_johab_verifychar(s, len);
1349 if (l == -1)
1350 break;
1351 }
1352 s += l;
1353 len -= l;
1354 }
1355
1356 return s - start;
1357}
static int pg_johab_verifychar(const unsigned char *s, int len)
Definition wchar.c:1307

References IS_HIGHBIT_SET, len, pg_johab_verifychar(), and start.

◆ pg_latin12wchar_with_len()

static int pg_latin12wchar_with_len ( const unsigned char from,
pg_wchar to,
int  len 
)
static

Definition at line 839 of file wchar.c.

840{
841 int cnt = 0;
842
843 while (len > 0 && *from)
844 {
845 *to++ = *from++;
846 len--;
847 cnt++;
848 }
849 *to = 0;
850 return cnt;
851}

References len.

◆ pg_latin1_dsplen()

static int pg_latin1_dsplen ( const unsigned char s)
static

Definition at line 882 of file wchar.c.

883{
884 return pg_ascii_dsplen(s);
885}

References pg_ascii_dsplen().

◆ pg_latin1_mblen()

static int pg_latin1_mblen ( const unsigned char s)
static

Definition at line 876 of file wchar.c.

877{
878 return 1;
879}

◆ pg_latin1_verifychar()

static int pg_latin1_verifychar ( const unsigned char s,
int  len 
)
static

Definition at line 1410 of file wchar.c.

1411{
1412 return 1;
1413}

◆ pg_latin1_verifystr()

static int pg_latin1_verifystr ( const unsigned char s,
int  len 
)
static

Definition at line 1416 of file wchar.c.

1417{
1418 const unsigned char *nullpos = memchr(s, 0, len);
1419
1420 if (nullpos == NULL)
1421 return len;
1422 else
1423 return nullpos - s;
1424}

References fb(), and len.

◆ pg_mule2wchar_with_len()

static int pg_mule2wchar_with_len ( const unsigned char from,
pg_wchar to,
int  len 
)
static

Definition at line 674 of file wchar.c.

675{
676 int cnt = 0;
677
678 while (len > 0 && *from)
679 {
680 if (IS_LC1(*from) && len >= 2)
681 {
682 *to = *from++ << 16;
683 *to |= *from++;
684 len -= 2;
685 }
686 else if (IS_LCPRV1(*from) && len >= 3)
687 {
688 from++;
689 *to = *from++ << 16;
690 *to |= *from++;
691 len -= 3;
692 }
693 else if (IS_LC2(*from) && len >= 3)
694 {
695 *to = *from++ << 16;
696 *to |= *from++ << 8;
697 *to |= *from++;
698 len -= 3;
699 }
700 else if (IS_LCPRV2(*from) && len >= 4)
701 {
702 from++;
703 *to = *from++ << 16;
704 *to |= *from++ << 8;
705 *to |= *from++;
706 len -= 4;
707 }
708 else
709 { /* assume ASCII */
710 *to = (unsigned char) *from++;
711 len--;
712 }
713 to++;
714 cnt++;
715 }
716 *to = 0;
717 return cnt;
718}
#define IS_LCPRV2(c)
Definition pg_wchar.h:164
#define IS_LC2(c)
Definition pg_wchar.h:144
#define IS_LCPRV1(c)
Definition pg_wchar.h:152
#define IS_LC1(c)
Definition pg_wchar.h:126

References fb(), IS_LC1, IS_LC2, IS_LCPRV1, IS_LCPRV2, and len.

◆ pg_mule_dsplen()

static int pg_mule_dsplen ( const unsigned char s)
static

Definition at line 811 of file wchar.c.

812{
813 int len;
814
815 /*
816 * Note: it's not really appropriate to assume that all multibyte charsets
817 * are double-wide on screen. But this seems an okay approximation for
818 * the MULE charsets we currently support.
819 */
820
821 if (IS_LC1(*s))
822 len = 1;
823 else if (IS_LCPRV1(*s))
824 len = 1;
825 else if (IS_LC2(*s))
826 len = 2;
827 else if (IS_LCPRV2(*s))
828 len = 2;
829 else
830 len = 1; /* assume ASCII */
831
832 return len;
833}

References IS_LC1, IS_LC2, IS_LCPRV1, IS_LCPRV2, and len.

◆ pg_mule_mblen()

int pg_mule_mblen ( const unsigned char s)

Definition at line 793 of file wchar.c.

794{
795 int len;
796
797 if (IS_LC1(*s))
798 len = 2;
799 else if (IS_LCPRV1(*s))
800 len = 3;
801 else if (IS_LC2(*s))
802 len = 3;
803 else if (IS_LCPRV2(*s))
804 len = 4;
805 else
806 len = 1; /* assume ASCII */
807 return len;
808}

References IS_LC1, IS_LC2, IS_LCPRV1, IS_LCPRV2, and len.

Referenced by mic2latin(), mic2latin_with_table(), and pg_mule_verifychar().

◆ pg_mule_verifychar()

static int pg_mule_verifychar ( const unsigned char s,
int  len 
)
static

Definition at line 1360 of file wchar.c.

1361{
1362 int l,
1363 mbl;
1364 unsigned char c;
1365
1366 l = mbl = pg_mule_mblen(s);
1367
1368 if (len < l)
1369 return -1;
1370
1371 while (--l > 0)
1372 {
1373 c = *++s;
1374 if (!IS_HIGHBIT_SET(c))
1375 return -1;
1376 }
1377 return mbl;
1378}
int pg_mule_mblen(const unsigned char *s)
Definition wchar.c:793

References fb(), IS_HIGHBIT_SET, len, and pg_mule_mblen().

Referenced by pg_mule_verifystr().

◆ pg_mule_verifystr()

static int pg_mule_verifystr ( const unsigned char s,
int  len 
)
static

Definition at line 1381 of file wchar.c.

1382{
1383 const unsigned char *start = s;
1384
1385 while (len > 0)
1386 {
1387 int l;
1388
1389 /* fast path for ASCII-subset characters */
1390 if (!IS_HIGHBIT_SET(*s))
1391 {
1392 if (*s == '\0')
1393 break;
1394 l = 1;
1395 }
1396 else
1397 {
1398 l = pg_mule_verifychar(s, len);
1399 if (l == -1)
1400 break;
1401 }
1402 s += l;
1403 len -= l;
1404 }
1405
1406 return s - start;
1407}
static int pg_mule_verifychar(const unsigned char *s, int len)
Definition wchar.c:1360

References IS_HIGHBIT_SET, len, pg_mule_verifychar(), and start.

◆ pg_sjis_dsplen()

static int pg_sjis_dsplen ( const unsigned char s)
static

Definition at line 905 of file wchar.c.

906{
907 int len;
908
909 if (*s >= 0xa1 && *s <= 0xdf)
910 len = 1; /* 1 byte kana? */
911 else if (IS_HIGHBIT_SET(*s))
912 len = 2; /* kanji? */
913 else
914 len = pg_ascii_dsplen(s); /* should be ASCII */
915 return len;
916}

References IS_HIGHBIT_SET, len, and pg_ascii_dsplen().

◆ pg_sjis_mblen()

static int pg_sjis_mblen ( const unsigned char s)
static

Definition at line 891 of file wchar.c.

892{
893 int len;
894
895 if (*s >= 0xa1 && *s <= 0xdf)
896 len = 1; /* 1 byte kana? */
897 else if (IS_HIGHBIT_SET(*s))
898 len = 2; /* kanji? */
899 else
900 len = 1; /* should be ASCII */
901 return len;
902}

References IS_HIGHBIT_SET, and len.

Referenced by pg_sjis_verifychar().

◆ pg_sjis_verifychar()

static int pg_sjis_verifychar ( const unsigned char s,
int  len 
)
static

Definition at line 1427 of file wchar.c.

1428{
1429 int l,
1430 mbl;
1431 unsigned char c1,
1432 c2;
1433
1434 l = mbl = pg_sjis_mblen(s);
1435
1436 if (len < l)
1437 return -1;
1438
1439 if (l == 1) /* pg_sjis_mblen already verified it */
1440 return mbl;
1441
1442 c1 = *s++;
1443 c2 = *s;
1444 if (!ISSJISHEAD(c1) || !ISSJISTAIL(c2))
1445 return -1;
1446 return mbl;
1447}
#define ISSJISTAIL(c)
Definition pg_wchar.h:45
#define ISSJISHEAD(c)
Definition pg_wchar.h:44
static int pg_sjis_mblen(const unsigned char *s)
Definition wchar.c:891

References fb(), ISSJISHEAD, ISSJISTAIL, len, and pg_sjis_mblen().

Referenced by pg_sjis_verifystr().

◆ pg_sjis_verifystr()

static int pg_sjis_verifystr ( const unsigned char s,
int  len 
)
static

Definition at line 1450 of file wchar.c.

1451{
1452 const unsigned char *start = s;
1453
1454 while (len > 0)
1455 {
1456 int l;
1457
1458 /* fast path for ASCII-subset characters */
1459 if (!IS_HIGHBIT_SET(*s))
1460 {
1461 if (*s == '\0')
1462 break;
1463 l = 1;
1464 }
1465 else
1466 {
1467 l = pg_sjis_verifychar(s, len);
1468 if (l == -1)
1469 break;
1470 }
1471 s += l;
1472 len -= l;
1473 }
1474
1475 return s - start;
1476}
static int pg_sjis_verifychar(const unsigned char *s, int len)
Definition wchar.c:1427

References IS_HIGHBIT_SET, len, pg_sjis_verifychar(), and start.

◆ pg_uhc_dsplen()

static int pg_uhc_dsplen ( const unsigned char s)
static

Definition at line 988 of file wchar.c.

989{
990 int len;
991
992 if (IS_HIGHBIT_SET(*s))
993 len = 2; /* 2byte? */
994 else
995 len = pg_ascii_dsplen(s); /* should be ASCII */
996 return len;
997}

References IS_HIGHBIT_SET, len, and pg_ascii_dsplen().

◆ pg_uhc_mblen()

static int pg_uhc_mblen ( const unsigned char s)
static

Definition at line 976 of file wchar.c.

977{
978 int len;
979
980 if (IS_HIGHBIT_SET(*s))
981 len = 2; /* 2byte? */
982 else
983 len = 1; /* should be ASCII */
984 return len;
985}

References IS_HIGHBIT_SET, and len.

Referenced by pg_uhc_verifychar().

◆ pg_uhc_verifychar()

static int pg_uhc_verifychar ( const unsigned char s,
int  len 
)
static

Definition at line 1587 of file wchar.c.

1588{
1589 int l,
1590 mbl;
1591
1592 l = mbl = pg_uhc_mblen(s);
1593
1594 if (len < l)
1595 return -1;
1596
1597 if (l == 2 &&
1598 s[0] == NONUTF8_INVALID_BYTE0 &&
1599 s[1] == NONUTF8_INVALID_BYTE1)
1600 return -1;
1601
1602 while (--l > 0)
1603 {
1604 if (*++s == '\0')
1605 return -1;
1606 }
1607
1608 return mbl;
1609}
static int pg_uhc_mblen(const unsigned char *s)
Definition wchar.c:976

References fb(), len, NONUTF8_INVALID_BYTE0, NONUTF8_INVALID_BYTE1, and pg_uhc_mblen().

Referenced by pg_uhc_verifystr().

◆ pg_uhc_verifystr()

static int pg_uhc_verifystr ( const unsigned char s,
int  len 
)
static

Definition at line 1612 of file wchar.c.

1613{
1614 const unsigned char *start = s;
1615
1616 while (len > 0)
1617 {
1618 int l;
1619
1620 /* fast path for ASCII-subset characters */
1621 if (!IS_HIGHBIT_SET(*s))
1622 {
1623 if (*s == '\0')
1624 break;
1625 l = 1;
1626 }
1627 else
1628 {
1629 l = pg_uhc_verifychar(s, len);
1630 if (l == -1)
1631 break;
1632 }
1633 s += l;
1634 len -= l;
1635 }
1636
1637 return s - start;
1638}
static int pg_uhc_verifychar(const unsigned char *s, int len)
Definition wchar.c:1587

References IS_HIGHBIT_SET, len, pg_uhc_verifychar(), and start.

◆ pg_utf2wchar_with_len()

static int pg_utf2wchar_with_len ( const unsigned char from,
pg_wchar to,
int  len 
)
static

Definition at line 441 of file wchar.c.

442{
443 int cnt = 0;
444 uint32 c1,
445 c2,
446 c3,
447 c4;
448
449 while (len > 0 && *from)
450 {
451 if ((*from & 0x80) == 0)
452 {
453 *to = *from++;
454 len--;
455 }
456 else if ((*from & 0xe0) == 0xc0)
457 {
458 if (len < 2)
459 break; /* drop trailing incomplete char */
460 c1 = *from++ & 0x1f;
461 c2 = *from++ & 0x3f;
462 *to = (c1 << 6) | c2;
463 len -= 2;
464 }
465 else if ((*from & 0xf0) == 0xe0)
466 {
467 if (len < 3)
468 break; /* drop trailing incomplete char */
469 c1 = *from++ & 0x0f;
470 c2 = *from++ & 0x3f;
471 c3 = *from++ & 0x3f;
472 *to = (c1 << 12) | (c2 << 6) | c3;
473 len -= 3;
474 }
475 else if ((*from & 0xf8) == 0xf0)
476 {
477 if (len < 4)
478 break; /* drop trailing incomplete char */
479 c1 = *from++ & 0x07;
480 c2 = *from++ & 0x3f;
481 c3 = *from++ & 0x3f;
482 c4 = *from++ & 0x3f;
483 *to = (c1 << 18) | (c2 << 12) | (c3 << 6) | c4;
484 len -= 4;
485 }
486 else
487 {
488 /* treat a bogus char as length 1; not ours to raise error */
489 *to = *from++;
490 len--;
491 }
492 to++;
493 cnt++;
494 }
495 *to = 0;
496 return cnt;
497}

References fb(), and len.

◆ pg_utf8_islegal()

bool pg_utf8_islegal ( const unsigned char source,
int  length 
)

Definition at line 1989 of file wchar.c.

1990{
1991 unsigned char a;
1992
1993 switch (length)
1994 {
1995 default:
1996 /* reject lengths 5 and 6 for now */
1997 return false;
1998 case 4:
1999 a = source[3];
2000 if (a < 0x80 || a > 0xBF)
2001 return false;
2002 /* FALL THRU */
2003 case 3:
2004 a = source[2];
2005 if (a < 0x80 || a > 0xBF)
2006 return false;
2007 /* FALL THRU */
2008 case 2:
2009 a = source[1];
2010 switch (*source)
2011 {
2012 case 0xE0:
2013 if (a < 0xA0 || a > 0xBF)
2014 return false;
2015 break;
2016 case 0xED:
2017 if (a < 0x80 || a > 0x9F)
2018 return false;
2019 break;
2020 case 0xF0:
2021 if (a < 0x90 || a > 0xBF)
2022 return false;
2023 break;
2024 case 0xF4:
2025 if (a < 0x80 || a > 0x8F)
2026 return false;
2027 break;
2028 default:
2029 if (a < 0x80 || a > 0xBF)
2030 return false;
2031 break;
2032 }
2033 /* FALL THRU */
2034 case 1:
2035 a = *source;
2036 if (a >= 0x80 && a < 0xC2)
2037 return false;
2038 if (a > 0xF4)
2039 return false;
2040 break;
2041 }
2042 return true;
2043}
int a
Definition isn.c:73
static rewind_source * source
Definition pg_rewind.c:89

References a, fb(), and source.

Referenced by chr(), pg_utf8_string_len(), pg_utf8_verifychar(), utf8_to_iso8859_1(), and UtfToLocal().

◆ pg_utf8_verifychar()

static int pg_utf8_verifychar ( const unsigned char s,
int  len 
)
static

Definition at line 1701 of file wchar.c.

1702{
1703 int l;
1704
1705 if ((*s & 0x80) == 0)
1706 {
1707 if (*s == '\0')
1708 return -1;
1709 return 1;
1710 }
1711 else if ((*s & 0xe0) == 0xc0)
1712 l = 2;
1713 else if ((*s & 0xf0) == 0xe0)
1714 l = 3;
1715 else if ((*s & 0xf8) == 0xf0)
1716 l = 4;
1717 else
1718 l = 1;
1719
1720 if (l > len)
1721 return -1;
1722
1723 if (!pg_utf8_islegal(s, l))
1724 return -1;
1725
1726 return l;
1727}
bool pg_utf8_islegal(const unsigned char *source, int length)
Definition wchar.c:1989

References len, and pg_utf8_islegal().

Referenced by pg_utf8_verifystr().

◆ pg_utf8_verifystr()

static int pg_utf8_verifystr ( const unsigned char s,
int  len 
)
static

Definition at line 1891 of file wchar.c.

1892{
1893 const unsigned char *start = s;
1894 const int orig_len = len;
1895 uint32 state = BGN;
1896
1897/*
1898 * With a stride of two vector widths, gcc will unroll the loop. Even if
1899 * the compiler can unroll a longer loop, it's not worth it because we
1900 * must fall back to the byte-wise algorithm if we find any non-ASCII.
1901 */
1902#define STRIDE_LENGTH (2 * sizeof(Vector8))
1903
1904 if (len >= STRIDE_LENGTH)
1905 {
1906 while (len >= STRIDE_LENGTH)
1907 {
1908 /*
1909 * If the chunk is all ASCII, we can skip the full UTF-8 check,
1910 * but we must first check for a non-END state, which means the
1911 * previous chunk ended in the middle of a multibyte sequence.
1912 */
1913 if (state != END || !is_valid_ascii(s, STRIDE_LENGTH))
1915
1916 s += STRIDE_LENGTH;
1917 len -= STRIDE_LENGTH;
1918 }
1919
1920 /* The error state persists, so we only need to check for it here. */
1921 if (state == ERR)
1922 {
1923 /*
1924 * Start over from the beginning with the slow path so we can
1925 * count the valid bytes.
1926 */
1927 len = orig_len;
1928 s = start;
1929 }
1930 else if (state != END)
1931 {
1932 /*
1933 * The fast path exited in the middle of a multibyte sequence.
1934 * Walk backwards to find the leading byte so that the slow path
1935 * can resume checking from there. We must always backtrack at
1936 * least one byte, since the current byte could be e.g. an ASCII
1937 * byte after a 2-byte lead, which is invalid.
1938 */
1939 do
1940 {
1941 Assert(s > start);
1942 s--;
1943 len++;
1945 } while (pg_utf_mblen(s) <= 1);
1946 }
1947 }
1948
1949 /* check remaining bytes */
1950 while (len > 0)
1951 {
1952 int l;
1953
1954 /* fast path for ASCII-subset characters */
1955 if (!IS_HIGHBIT_SET(*s))
1956 {
1957 if (*s == '\0')
1958 break;
1959 l = 1;
1960 }
1961 else
1962 {
1963 l = pg_utf8_verifychar(s, len);
1964 if (l == -1)
1965 break;
1966 }
1967 s += l;
1968 len -= l;
1969 }
1970
1971 return s - start;
1972}
static bool is_valid_ascii(const unsigned char *s, int len)
Definition ascii.h:25
#define pg_utf_mblen
Definition pg_wchar.h:633
#define END
Definition wchar.c:1792
#define ERR
Definition wchar.c:1779
static int pg_utf8_verifychar(const unsigned char *s, int len)
Definition wchar.c:1701
static void utf8_advance(const unsigned char *s, uint32 *state, int len)
Definition wchar.c:1873
#define BGN
Definition wchar.c:1781
#define STRIDE_LENGTH

References Assert, BGN, END, ERR, fb(), IS_HIGHBIT_SET, is_valid_ascii(), len, pg_utf8_verifychar(), pg_utf_mblen, start, STRIDE_LENGTH, and utf8_advance().

◆ pg_utf_dsplen()

static int pg_utf_dsplen ( const unsigned char s)
static

Definition at line 662 of file wchar.c.

663{
664 return ucs_wcwidth(utf8_to_unicode(s));
665}
static char32_t utf8_to_unicode(const unsigned char *c)
Definition mbprint.c:53
static int ucs_wcwidth(pg_wchar ucs)
Definition wchar.c:628

References ucs_wcwidth(), and utf8_to_unicode().

◆ pg_utf_mblen()

int pg_utf_mblen ( const unsigned char s)

Definition at line 538 of file wchar.c.

539{
540 int len;
541
542 if ((*s & 0x80) == 0)
543 len = 1;
544 else if ((*s & 0xe0) == 0xc0)
545 len = 2;
546 else if ((*s & 0xf0) == 0xe0)
547 len = 3;
548 else if ((*s & 0xf8) == 0xf0)
549 len = 4;
550#ifdef NOT_USED
551 else if ((*s & 0xfc) == 0xf8)
552 len = 5;
553 else if ((*s & 0xfe) == 0xfc)
554 len = 6;
555#endif
556 else
557 len = 1;
558 return len;
559}

References len.

◆ pg_wchar2euc_with_len()

static int pg_wchar2euc_with_len ( const pg_wchar from,
unsigned char to,
int  len 
)
static

Definition at line 377 of file wchar.c.

378{
379 int cnt = 0;
380
381 while (len > 0 && *from)
382 {
383 unsigned char c;
384
385 if ((c = (*from >> 24)))
386 {
387 *to++ = c;
388 *to++ = (*from >> 16) & 0xff;
389 *to++ = (*from >> 8) & 0xff;
390 *to++ = *from & 0xff;
391 cnt += 4;
392 }
393 else if ((c = (*from >> 16)))
394 {
395 *to++ = c;
396 *to++ = (*from >> 8) & 0xff;
397 *to++ = *from & 0xff;
398 cnt += 3;
399 }
400 else if ((c = (*from >> 8)))
401 {
402 *to++ = c;
403 *to++ = *from & 0xff;
404 cnt += 2;
405 }
406 else
407 {
408 *to++ = *from;
409 cnt++;
410 }
411 from++;
412 len--;
413 }
414 *to = 0;
415 return cnt;
416}

References len.

◆ pg_wchar2mule_with_len()

static int pg_wchar2mule_with_len ( const pg_wchar from,
unsigned char to,
int  len 
)
static

Definition at line 727 of file wchar.c.

728{
729 int cnt = 0;
730
731 while (len > 0 && *from)
732 {
733 unsigned char lb;
734
735 lb = (*from >> 16) & 0xff;
736 if (IS_LC1(lb))
737 {
738 *to++ = lb;
739 *to++ = *from & 0xff;
740 cnt += 2;
741 }
742 else if (IS_LC2(lb))
743 {
744 *to++ = lb;
745 *to++ = (*from >> 8) & 0xff;
746 *to++ = *from & 0xff;
747 cnt += 3;
748 }
749 else if (IS_LCPRV1_A_RANGE(lb))
750 {
751 *to++ = LCPRV1_A;
752 *to++ = lb;
753 *to++ = *from & 0xff;
754 cnt += 3;
755 }
756 else if (IS_LCPRV1_B_RANGE(lb))
757 {
758 *to++ = LCPRV1_B;
759 *to++ = lb;
760 *to++ = *from & 0xff;
761 cnt += 3;
762 }
763 else if (IS_LCPRV2_A_RANGE(lb))
764 {
765 *to++ = LCPRV2_A;
766 *to++ = lb;
767 *to++ = (*from >> 8) & 0xff;
768 *to++ = *from & 0xff;
769 cnt += 4;
770 }
771 else if (IS_LCPRV2_B_RANGE(lb))
772 {
773 *to++ = LCPRV2_B;
774 *to++ = lb;
775 *to++ = (*from >> 8) & 0xff;
776 *to++ = *from & 0xff;
777 cnt += 4;
778 }
779 else
780 {
781 *to++ = *from & 0xff;
782 cnt += 1;
783 }
784 from++;
785 len--;
786 }
787 *to = 0;
788 return cnt;
789}
#define LCPRV1_A
Definition pg_wchar.h:150
#define LCPRV1_B
Definition pg_wchar.h:151
#define LCPRV2_A
Definition pg_wchar.h:162
#define IS_LCPRV2_B_RANGE(c)
Definition pg_wchar.h:167
#define IS_LCPRV1_A_RANGE(c)
Definition pg_wchar.h:153
#define IS_LCPRV1_B_RANGE(c)
Definition pg_wchar.h:155
#define IS_LCPRV2_A_RANGE(c)
Definition pg_wchar.h:165
#define LCPRV2_B
Definition pg_wchar.h:163

References IS_LC1, IS_LC2, IS_LCPRV1_A_RANGE, IS_LCPRV1_B_RANGE, IS_LCPRV2_A_RANGE, IS_LCPRV2_B_RANGE, LCPRV1_A, LCPRV1_B, LCPRV2_A, LCPRV2_B, and len.

◆ pg_wchar2single_with_len()

static int pg_wchar2single_with_len ( const pg_wchar from,
unsigned char to,
int  len 
)
static

Definition at line 861 of file wchar.c.

862{
863 int cnt = 0;
864
865 while (len > 0 && *from)
866 {
867 *to++ = *from++;
868 len--;
869 cnt++;
870 }
871 *to = 0;
872 return cnt;
873}

References len.

◆ pg_wchar2utf_with_len()

static int pg_wchar2utf_with_len ( const pg_wchar from,
unsigned char to,
int  len 
)
static

Definition at line 507 of file wchar.c.

508{
509 int cnt = 0;
510
511 while (len > 0 && *from)
512 {
513 int char_len;
514
515 unicode_to_utf8(*from, to);
517 cnt += char_len;
518 to += char_len;
519 from++;
520 len--;
521 }
522 *to = 0;
523 return cnt;
524}
static unsigned char * unicode_to_utf8(char32_t c, unsigned char *utf8string)
Definition pg_wchar.h:575

References fb(), len, pg_utf_mblen, and unicode_to_utf8().

◆ ucs_wcwidth()

static int ucs_wcwidth ( pg_wchar  ucs)
static

Definition at line 628 of file wchar.c.

629{
632
633 /* test for 8-bit control characters */
634 if (ucs == 0)
635 return 0;
636
637 if (ucs < 0x20 || (ucs >= 0x7f && ucs < 0xa0) || ucs > 0x0010ffff)
638 return -1;
639
640 /*
641 * binary search in table of non-spacing characters
642 *
643 * XXX: In the official Unicode sources, it is possible for a character to
644 * be described as both non-spacing and wide at the same time. As of
645 * Unicode 13.0, treating the non-spacing property as the determining
646 * factor for display width leads to the correct behavior, so do that
647 * search first.
648 */
650 sizeof(nonspacing) / sizeof(struct mbinterval) - 1))
651 return 0;
652
653 /* binary search in table of wide characters */
655 sizeof(east_asian_fw) / sizeof(struct mbinterval) - 1))
656 return 2;
657
658 return 1;
659}
static const struct mbinterval east_asian_fw[]
static const struct mbinterval nonspacing[]
static int mbbisearch(pg_wchar ucs, const struct mbinterval *table, int max)
Definition wchar.c:581

References east_asian_fw, fb(), mbbisearch(), and nonspacing.

Referenced by pg_utf_dsplen().

◆ utf8_advance()

static void utf8_advance ( const unsigned char s,
uint32 state,
int  len 
)
static

Definition at line 1873 of file wchar.c.

1874{
1875 /* Note: We deliberately don't check the state's value here. */
1876 while (len > 0)
1877 {
1878 /*
1879 * It's important that the mask value is 31: In most instruction sets,
1880 * a shift by a 32-bit operand is understood to be a shift by its mod
1881 * 32, so the compiler should elide the mask operation.
1882 */
1883 *state = Utf8Transition[*s++] >> (*state & 31);
1884 len--;
1885 }
1886
1887 *state &= 31;
1888}
static const uint32 Utf8Transition[256]
Definition wchar.c:1815

References len, and Utf8Transition.

Referenced by pg_utf8_verifystr().

Variable Documentation

◆ pg_wchar_table

const pg_wchar_tbl pg_wchar_table[]

Definition at line 2064 of file wchar.c.

2064 {
2107};
@ PG_WIN1254
Definition pg_wchar.h:257
@ PG_LATIN4
Definition pg_wchar.h:237
@ PG_LATIN9
Definition pg_wchar.h:242
@ PG_JOHAB
Definition pg_wchar.h:269
@ PG_KOI8R
Definition pg_wchar.h:248
@ PG_ISO_8859_6
Definition pg_wchar.h:252
@ PG_WIN1253
Definition pg_wchar.h:256
@ PG_KOI8U
Definition pg_wchar.h:260
@ PG_LATIN6
Definition pg_wchar.h:239
@ PG_MULE_INTERNAL
Definition pg_wchar.h:233
@ PG_LATIN5
Definition pg_wchar.h:238
@ PG_EUC_CN
Definition pg_wchar.h:228
@ PG_UHC
Definition pg_wchar.h:267
@ PG_LATIN2
Definition pg_wchar.h:235
@ PG_ISO_8859_5
Definition pg_wchar.h:251
@ PG_LATIN10
Definition pg_wchar.h:243
@ PG_WIN1250
Definition pg_wchar.h:255
@ PG_ISO_8859_7
Definition pg_wchar.h:253
@ PG_SJIS
Definition pg_wchar.h:264
@ PG_LATIN8
Definition pg_wchar.h:241
@ PG_EUC_JP
Definition pg_wchar.h:227
@ PG_GBK
Definition pg_wchar.h:266
@ PG_LATIN3
Definition pg_wchar.h:236
@ PG_WIN1256
Definition pg_wchar.h:244
@ PG_LATIN1
Definition pg_wchar.h:234
@ PG_EUC_TW
Definition pg_wchar.h:230
@ PG_WIN1258
Definition pg_wchar.h:245
@ PG_SHIFT_JIS_2004
Definition pg_wchar.h:270
@ PG_WIN1252
Definition pg_wchar.h:250
@ PG_LATIN7
Definition pg_wchar.h:240
@ PG_WIN1255
Definition pg_wchar.h:258
@ PG_WIN1257
Definition pg_wchar.h:259
@ PG_WIN1251
Definition pg_wchar.h:249
@ PG_EUC_KR
Definition pg_wchar.h:229
@ PG_WIN866
Definition pg_wchar.h:246
@ PG_ISO_8859_8
Definition pg_wchar.h:254
@ PG_WIN874
Definition pg_wchar.h:247
@ PG_EUC_JIS_2004
Definition pg_wchar.h:231
@ PG_BIG5
Definition pg_wchar.h:265
static int pg_uhc_verifystr(const unsigned char *s, int len)
Definition wchar.c:1612
static int pg_latin1_dsplen(const unsigned char *s)
Definition wchar.c:882
static int pg_euctw_mblen(const unsigned char *s)
Definition wchar.c:339
static int pg_euckr_dsplen(const unsigned char *s)
Definition wchar.c:222
static int pg_ascii_verifystr(const unsigned char *s, int len)
Definition wchar.c:1069
static int pg_latin1_verifychar(const unsigned char *s, int len)
Definition wchar.c:1410
static int pg_sjis_dsplen(const unsigned char *s)
Definition wchar.c:905
static int pg_eucjp_dsplen(const unsigned char *s)
Definition wchar.c:191
static int pg_ascii2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
Definition wchar.c:70
static int pg_gbk_dsplen(const unsigned char *s)
Definition wchar.c:961
static int pg_euckr2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
Definition wchar.c:210
static int pg_euctw2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
Definition wchar.c:299
#define pg_euccn_verifychar
Definition wchar.c:1224
static int pg_sjis_verifystr(const unsigned char *s, int len)
Definition wchar.c:1450
static int pg_johab_dsplen(const unsigned char *s)
Definition wchar.c:429
static int pg_big5_verifystr(const unsigned char *s, int len)
Definition wchar.c:1504
static int pg_eucjp2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
Definition wchar.c:179
static int pg_latin1_verifystr(const unsigned char *s, int len)
Definition wchar.c:1416
static int pg_latin1_mblen(const unsigned char *s)
Definition wchar.c:876
static int pg_ascii_verifychar(const unsigned char *s, int len)
Definition wchar.c:1063
static int pg_ascii_mblen(const unsigned char *s)
Definition wchar.c:85
static int pg_big5_dsplen(const unsigned char *s)
Definition wchar.c:934
#define pg_euccn_verifystr
Definition wchar.c:1225
static int pg_eucjp_mblen(const unsigned char *s)
Definition wchar.c:185
static int pg_euccn_dsplen(const unsigned char *s)
Definition wchar.c:283
static int pg_euctw_verifystr(const unsigned char *s, int len)
Definition wchar.c:1278
static int pg_gbk_verifystr(const unsigned char *s, int len)
Definition wchar.c:1558
static int pg_gb18030_dsplen(const unsigned char *s)
Definition wchar.c:1029
static int pg_utf2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
Definition wchar.c:441
static int pg_euccn_mblen(const unsigned char *s)
Definition wchar.c:271
static int pg_eucjp_verifystr(const unsigned char *s, int len)
Definition wchar.c:1137
static int pg_johab_verifystr(const unsigned char *s, int len)
Definition wchar.c:1331
static int pg_gb18030_verifystr(const unsigned char *s, int len)
Definition wchar.c:1672
static int pg_euckr_verifystr(const unsigned char *s, int len)
Definition wchar.c:1195
static int pg_wchar2mule_with_len(const pg_wchar *from, unsigned char *to, int len)
Definition wchar.c:727
static int pg_uhc_dsplen(const unsigned char *s)
Definition wchar.c:988
static int pg_mule_verifystr(const unsigned char *s, int len)
Definition wchar.c:1381
static int pg_wchar2euc_with_len(const pg_wchar *from, unsigned char *to, int len)
Definition wchar.c:377
static int pg_wchar2single_with_len(const pg_wchar *from, unsigned char *to, int len)
Definition wchar.c:861
static int pg_wchar2utf_with_len(const pg_wchar *from, unsigned char *to, int len)
Definition wchar.c:507
static int pg_euccn2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
Definition wchar.c:232
static int pg_gb18030_mblen(const unsigned char *s)
Definition wchar.c:1015
static int pg_euctw_dsplen(const unsigned char *s)
Definition wchar.c:355
static int pg_latin12wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
Definition wchar.c:839
static int pg_mule_dsplen(const unsigned char *s)
Definition wchar.c:811
static int pg_utf8_verifystr(const unsigned char *s, int len)
Definition wchar.c:1891
static int pg_euckr_mblen(const unsigned char *s)
Definition wchar.c:216
static int pg_mule2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
Definition wchar.c:674
static int pg_utf_dsplen(const unsigned char *s)
Definition wchar.c:662

Referenced by pg_database_encoding_max_length(), pg_dsplen(), pg_encoding_dsplen(), pg_encoding_max_length(), pg_encoding_max_length_sql(), pg_encoding_mb2wchar_with_len(), pg_encoding_mbcliplen(), pg_encoding_mblen(), pg_encoding_verifymbchar(), pg_encoding_verifymbstr(), pg_encoding_wchar2mb_with_len(), pg_generic_charinc(), pg_mb2wchar(), pg_mb2wchar_with_len(), pg_mblen(), pg_verify_mbstr(), pg_verify_mbstr_len(), pg_wchar2mb(), and pg_wchar2mb_with_len().

◆ Utf8Transition

const uint32 Utf8Transition[256]
static

Definition at line 1815 of file wchar.c.

1816{
1817 /* ASCII */
1818
1819 ILL, ASC, ASC, ASC, ASC, ASC, ASC, ASC,
1820 ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC,
1821 ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC,
1822 ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC,
1823
1824 ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC,
1825 ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC,
1826 ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC,
1827 ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC,
1828
1829 ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC,
1830 ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC,
1831 ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC,
1832 ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC,
1833
1834 ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC,
1835 ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC,
1836 ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC,
1837 ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC,
1838
1839 /* continuation bytes */
1840
1841 /* 80..8F */
1842 CR1, CR1, CR1, CR1, CR1, CR1, CR1, CR1,
1843 CR1, CR1, CR1, CR1, CR1, CR1, CR1, CR1,
1844
1845 /* 90..9F */
1846 CR2, CR2, CR2, CR2, CR2, CR2, CR2, CR2,
1847 CR2, CR2, CR2, CR2, CR2, CR2, CR2, CR2,
1848
1849 /* A0..BF */
1850 CR3, CR3, CR3, CR3, CR3, CR3, CR3, CR3,
1851 CR3, CR3, CR3, CR3, CR3, CR3, CR3, CR3,
1852 CR3, CR3, CR3, CR3, CR3, CR3, CR3, CR3,
1853 CR3, CR3, CR3, CR3, CR3, CR3, CR3, CR3,
1854
1855 /* leading bytes */
1856
1857 /* C0..DF */
1858 ILL, ILL, L2A, L2A, L2A, L2A, L2A, L2A,
1859 L2A, L2A, L2A, L2A, L2A, L2A, L2A, L2A,
1860 L2A, L2A, L2A, L2A, L2A, L2A, L2A, L2A,
1861 L2A, L2A, L2A, L2A, L2A, L2A, L2A, L2A,
1862
1863 /* E0..EF */
1864 L3A, L3B, L3B, L3B, L3B, L3B, L3B, L3B,
1865 L3B, L3B, L3B, L3B, L3B, L3C, L3B, L3B,
1866
1867 /* F0..FF */
1868 L4A, L4B, L4B, L4B, L4C, ILL, ILL, ILL,
1869 ILL, ILL, ILL, ILL, ILL, ILL, ILL, ILL
1870};
#define CR3
Definition wchar.c:1811
#define L3B
Definition wchar.c:1802
#define L2A
Definition wchar.c:1799
#define L4C
Definition wchar.c:1807
#define CR2
Definition wchar.c:1810
#define ASC
Definition wchar.c:1797
#define L3C
Definition wchar.c:1803
#define CR1
Definition wchar.c:1809
#define L3A
Definition wchar.c:1801
#define L4B
Definition wchar.c:1806
#define ILL
Definition wchar.c:1813
#define L4A
Definition wchar.c:1805

Referenced by utf8_advance().