PostgreSQL Source Code git master
wchar.c File Reference
Include dependency graph for wchar.c:

Go to the source code of this file.

Data Structures

struct  mbinterval
 

Macros

#define IS_EUC_RANGE_VALID(c)   ((c) >= 0xa1 && (c) <= 0xfe)
 
#define pg_euccn_verifychar   pg_euckr_verifychar
 
#define pg_euccn_verifystr   pg_euckr_verifystr
 
#define ERR   0
 
#define BGN   11
 
#define CS1   16
 
#define CS2   1
 
#define CS3   5
 
#define P3A   6 /* Lead was E0, check for 3-byte overlong */
 
#define P3B   20 /* Lead was ED, check for surrogate */
 
#define P4A   25 /* Lead was F0, check for 4-byte overlong */
 
#define P4B   30 /* Lead was F4, check for too-large */
 
#define END   BGN
 
#define ASC   (END << BGN)
 
#define L2A   (CS1 << BGN)
 
#define L3A   (P3A << BGN)
 
#define L3B   (CS2 << BGN)
 
#define L3C   (P3B << BGN)
 
#define L4A   (P4A << BGN)
 
#define L4B   (CS3 << BGN)
 
#define L4C   (P4B << BGN)
 
#define CR1   (END << CS1) | (CS1 << CS2) | (CS2 << CS3) | (CS1 << P3B) | (CS2 << P4B)
 
#define CR2   (END << CS1) | (CS1 << CS2) | (CS2 << CS3) | (CS1 << P3B) | (CS2 << P4A)
 
#define CR3   (END << CS1) | (CS1 << CS2) | (CS2 << CS3) | (CS1 << P3A) | (CS2 << P4A)
 
#define ILL   ERR
 
#define STRIDE_LENGTH   (2 * sizeof(Vector8))
 

Functions

static int pg_ascii2wchar_with_len (const unsigned char *from, pg_wchar *to, int len)
 
static int pg_ascii_mblen (const unsigned char *s)
 
static int pg_ascii_dsplen (const unsigned char *s)
 
static int pg_euc2wchar_with_len (const unsigned char *from, pg_wchar *to, int len)
 
static int pg_euc_mblen (const unsigned char *s)
 
static int pg_euc_dsplen (const unsigned char *s)
 
static int pg_eucjp2wchar_with_len (const unsigned char *from, pg_wchar *to, int len)
 
static int pg_eucjp_mblen (const unsigned char *s)
 
static int pg_eucjp_dsplen (const unsigned char *s)
 
static int pg_euckr2wchar_with_len (const unsigned char *from, pg_wchar *to, int len)
 
static int pg_euckr_mblen (const unsigned char *s)
 
static int pg_euckr_dsplen (const unsigned char *s)
 
static int pg_euccn2wchar_with_len (const unsigned char *from, pg_wchar *to, int len)
 
static int pg_euccn_mblen (const unsigned char *s)
 
static int pg_euccn_dsplen (const unsigned char *s)
 
static int pg_euctw2wchar_with_len (const unsigned char *from, pg_wchar *to, int len)
 
static int pg_euctw_mblen (const unsigned char *s)
 
static int pg_euctw_dsplen (const unsigned char *s)
 
static int pg_wchar2euc_with_len (const pg_wchar *from, unsigned char *to, int len)
 
static int pg_johab_mblen (const unsigned char *s)
 
static int pg_johab_dsplen (const unsigned char *s)
 
static int pg_utf2wchar_with_len (const unsigned char *from, pg_wchar *to, int len)
 
static int pg_wchar2utf_with_len (const pg_wchar *from, unsigned char *to, int len)
 
int pg_utf_mblen (const unsigned char *s)
 
static int mbbisearch (pg_wchar ucs, const struct mbinterval *table, int max)
 
static int ucs_wcwidth (pg_wchar ucs)
 
static int pg_utf_dsplen (const unsigned char *s)
 
static int pg_mule2wchar_with_len (const unsigned char *from, pg_wchar *to, int len)
 
static int pg_wchar2mule_with_len (const pg_wchar *from, unsigned char *to, int len)
 
int pg_mule_mblen (const unsigned char *s)
 
static int pg_mule_dsplen (const unsigned char *s)
 
static int pg_latin12wchar_with_len (const unsigned char *from, pg_wchar *to, int len)
 
static int pg_wchar2single_with_len (const pg_wchar *from, unsigned char *to, int len)
 
static int pg_latin1_mblen (const unsigned char *s)
 
static int pg_latin1_dsplen (const unsigned char *s)
 
static int pg_sjis_mblen (const unsigned char *s)
 
static int pg_sjis_dsplen (const unsigned char *s)
 
static int pg_big5_mblen (const unsigned char *s)
 
static int pg_big5_dsplen (const unsigned char *s)
 
static int pg_gbk_mblen (const unsigned char *s)
 
static int pg_gbk_dsplen (const unsigned char *s)
 
static int pg_uhc_mblen (const unsigned char *s)
 
static int pg_uhc_dsplen (const unsigned char *s)
 
static int pg_gb18030_mblen (const unsigned char *s)
 
static int pg_gb18030_dsplen (const unsigned char *s)
 
static int pg_ascii_verifychar (const unsigned char *s, int len)
 
static int pg_ascii_verifystr (const unsigned char *s, int len)
 
static int pg_eucjp_verifychar (const unsigned char *s, int len)
 
static int pg_eucjp_verifystr (const unsigned char *s, int len)
 
static int pg_euckr_verifychar (const unsigned char *s, int len)
 
static int pg_euckr_verifystr (const unsigned char *s, int len)
 
static int pg_euctw_verifychar (const unsigned char *s, int len)
 
static int pg_euctw_verifystr (const unsigned char *s, int len)
 
static int pg_johab_verifychar (const unsigned char *s, int len)
 
static int pg_johab_verifystr (const unsigned char *s, int len)
 
static int pg_mule_verifychar (const unsigned char *s, int len)
 
static int pg_mule_verifystr (const unsigned char *s, int len)
 
static int pg_latin1_verifychar (const unsigned char *s, int len)
 
static int pg_latin1_verifystr (const unsigned char *s, int len)
 
static int pg_sjis_verifychar (const unsigned char *s, int len)
 
static int pg_sjis_verifystr (const unsigned char *s, int len)
 
static int pg_big5_verifychar (const unsigned char *s, int len)
 
static int pg_big5_verifystr (const unsigned char *s, int len)
 
static int pg_gbk_verifychar (const unsigned char *s, int len)
 
static int pg_gbk_verifystr (const unsigned char *s, int len)
 
static int pg_uhc_verifychar (const unsigned char *s, int len)
 
static int pg_uhc_verifystr (const unsigned char *s, int len)
 
static int pg_gb18030_verifychar (const unsigned char *s, int len)
 
static int pg_gb18030_verifystr (const unsigned char *s, int len)
 
static int pg_utf8_verifychar (const unsigned char *s, int len)
 
static void utf8_advance (const unsigned char *s, uint32 *state, int len)
 
static int pg_utf8_verifystr (const unsigned char *s, int len)
 
bool pg_utf8_islegal (const unsigned char *source, int length)
 
int pg_encoding_mblen (int encoding, const char *mbstr)
 
int pg_encoding_mblen_bounded (int encoding, const char *mbstr)
 
int pg_encoding_dsplen (int encoding, const char *mbstr)
 
int pg_encoding_verifymbchar (int encoding, const char *mbstr, int len)
 
int pg_encoding_verifymbstr (int encoding, const char *mbstr, int len)
 
int pg_encoding_max_length (int encoding)
 

Variables

static const uint32 Utf8Transition [256]
 
const pg_wchar_tbl pg_wchar_table []
 

Macro Definition Documentation

◆ ASC

#define ASC   (END << BGN)

Definition at line 1761 of file wchar.c.

◆ BGN

#define BGN   11

Definition at line 1745 of file wchar.c.

◆ CR1

#define CR1   (END << CS1) | (CS1 << CS2) | (CS2 << CS3) | (CS1 << P3B) | (CS2 << P4B)

Definition at line 1773 of file wchar.c.

◆ CR2

#define CR2   (END << CS1) | (CS1 << CS2) | (CS2 << CS3) | (CS1 << P3B) | (CS2 << P4A)

Definition at line 1774 of file wchar.c.

◆ CR3

#define CR3   (END << CS1) | (CS1 << CS2) | (CS2 << CS3) | (CS1 << P3A) | (CS2 << P4A)

Definition at line 1775 of file wchar.c.

◆ CS1

#define CS1   16

Definition at line 1747 of file wchar.c.

◆ CS2

#define CS2   1

Definition at line 1748 of file wchar.c.

◆ CS3

#define CS3   5

Definition at line 1749 of file wchar.c.

◆ END

#define END   BGN

Definition at line 1756 of file wchar.c.

◆ ERR

#define ERR   0

Definition at line 1743 of file wchar.c.

◆ ILL

#define ILL   ERR

Definition at line 1777 of file wchar.c.

◆ IS_EUC_RANGE_VALID

#define IS_EUC_RANGE_VALID (   c)    ((c) >= 0xa1 && (c) <= 0xfe)

Definition at line 1058 of file wchar.c.

◆ L2A

#define L2A   (CS1 << BGN)

Definition at line 1763 of file wchar.c.

◆ L3A

#define L3A   (P3A << BGN)

Definition at line 1765 of file wchar.c.

◆ L3B

#define L3B   (CS2 << BGN)

Definition at line 1766 of file wchar.c.

◆ L3C

#define L3C   (P3B << BGN)

Definition at line 1767 of file wchar.c.

◆ L4A

#define L4A   (P4A << BGN)

Definition at line 1769 of file wchar.c.

◆ L4B

#define L4B   (CS3 << BGN)

Definition at line 1770 of file wchar.c.

◆ L4C

#define L4C   (P4B << BGN)

Definition at line 1771 of file wchar.c.

◆ P3A

#define P3A   6 /* Lead was E0, check for 3-byte overlong */

Definition at line 1751 of file wchar.c.

◆ P3B

#define P3B   20 /* Lead was ED, check for surrogate */

Definition at line 1752 of file wchar.c.

◆ P4A

#define P4A   25 /* Lead was F0, check for 4-byte overlong */

Definition at line 1753 of file wchar.c.

◆ P4B

#define P4B   30 /* Lead was F4, check for too-large */

Definition at line 1754 of file wchar.c.

◆ pg_euccn_verifychar

#define pg_euccn_verifychar   pg_euckr_verifychar

Definition at line 1203 of file wchar.c.

◆ pg_euccn_verifystr

#define pg_euccn_verifystr   pg_euckr_verifystr

Definition at line 1204 of file wchar.c.

◆ STRIDE_LENGTH

#define STRIDE_LENGTH   (2 * sizeof(Vector8))

Function Documentation

◆ mbbisearch()

static int mbbisearch ( pg_wchar  ucs,
const struct mbinterval table,
int  max 
)
static

Definition at line 560 of file wchar.c.

561{
562 int min = 0;
563 int mid;
564
565 if (ucs < table[0].first || ucs > table[max].last)
566 return 0;
567 while (max >= min)
568 {
569 mid = (min + max) / 2;
570 if (ucs > table[mid].last)
571 min = mid + 1;
572 else if (ucs < table[mid].first)
573 max = mid - 1;
574 else
575 return 1;
576 }
577
578 return 0;
579}

Referenced by ucs_wcwidth().

◆ pg_ascii2wchar_with_len()

static int pg_ascii2wchar_with_len ( const unsigned char *  from,
pg_wchar to,
int  len 
)
static

Definition at line 49 of file wchar.c.

50{
51 int cnt = 0;
52
53 while (len > 0 && *from)
54 {
55 *to++ = *from++;
56 len--;
57 cnt++;
58 }
59 *to = 0;
60 return cnt;
61}
const void size_t len

References len.

◆ pg_ascii_dsplen()

static int pg_ascii_dsplen ( const unsigned char *  s)
static

Definition at line 70 of file wchar.c.

71{
72 if (*s == '\0')
73 return 0;
74 if (*s < 0x20 || *s == 0x7f)
75 return -1;
76
77 return 1;
78}

Referenced by pg_big5_dsplen(), pg_euc_dsplen(), pg_euccn_dsplen(), pg_eucjp_dsplen(), pg_euctw_dsplen(), pg_gb18030_dsplen(), pg_gbk_dsplen(), pg_latin1_dsplen(), pg_sjis_dsplen(), and pg_uhc_dsplen().

◆ pg_ascii_mblen()

static int pg_ascii_mblen ( const unsigned char *  s)
static

Definition at line 64 of file wchar.c.

65{
66 return 1;
67}

◆ pg_ascii_verifychar()

static int pg_ascii_verifychar ( const unsigned char *  s,
int  len 
)
static

Definition at line 1042 of file wchar.c.

1043{
1044 return 1;
1045}

◆ pg_ascii_verifystr()

static int pg_ascii_verifystr ( const unsigned char *  s,
int  len 
)
static

Definition at line 1048 of file wchar.c.

1049{
1050 const unsigned char *nullpos = memchr(s, 0, len);
1051
1052 if (nullpos == NULL)
1053 return len;
1054 else
1055 return nullpos - s;
1056}

References len.

◆ pg_big5_dsplen()

static int pg_big5_dsplen ( const unsigned char *  s)
static

Definition at line 913 of file wchar.c.

914{
915 int len;
916
917 if (IS_HIGHBIT_SET(*s))
918 len = 2; /* kanji? */
919 else
920 len = pg_ascii_dsplen(s); /* should be ASCII */
921 return len;
922}
#define IS_HIGHBIT_SET(ch)
Definition: c.h:1109
static int pg_ascii_dsplen(const unsigned char *s)
Definition: wchar.c:70

References IS_HIGHBIT_SET, len, and pg_ascii_dsplen().

◆ pg_big5_mblen()

static int pg_big5_mblen ( const unsigned char *  s)
static

Definition at line 901 of file wchar.c.

902{
903 int len;
904
905 if (IS_HIGHBIT_SET(*s))
906 len = 2; /* kanji? */
907 else
908 len = 1; /* should be ASCII */
909 return len;
910}

References IS_HIGHBIT_SET, and len.

Referenced by pg_big5_verifychar().

◆ pg_big5_verifychar()

static int pg_big5_verifychar ( const unsigned char *  s,
int  len 
)
static

Definition at line 1458 of file wchar.c.

1459{
1460 int l,
1461 mbl;
1462
1463 l = mbl = pg_big5_mblen(s);
1464
1465 if (len < l)
1466 return -1;
1467
1468 while (--l > 0)
1469 {
1470 if (*++s == '\0')
1471 return -1;
1472 }
1473
1474 return mbl;
1475}
static int pg_big5_mblen(const unsigned char *s)
Definition: wchar.c:901

References len, and pg_big5_mblen().

Referenced by pg_big5_verifystr().

◆ pg_big5_verifystr()

static int pg_big5_verifystr ( const unsigned char *  s,
int  len 
)
static

Definition at line 1478 of file wchar.c.

1479{
1480 const unsigned char *start = s;
1481
1482 while (len > 0)
1483 {
1484 int l;
1485
1486 /* fast path for ASCII-subset characters */
1487 if (!IS_HIGHBIT_SET(*s))
1488 {
1489 if (*s == '\0')
1490 break;
1491 l = 1;
1492 }
1493 else
1494 {
1495 l = pg_big5_verifychar(s, len);
1496 if (l == -1)
1497 break;
1498 }
1499 s += l;
1500 len -= l;
1501 }
1502
1503 return s - start;
1504}
return str start
static int pg_big5_verifychar(const unsigned char *s, int len)
Definition: wchar.c:1458

References IS_HIGHBIT_SET, len, pg_big5_verifychar(), and start.

◆ pg_encoding_dsplen()

int pg_encoding_dsplen ( int  encoding,
const char *  mbstr 
)

Definition at line 2090 of file wchar.c.

2091{
2092 return (PG_VALID_ENCODING(encoding) ?
2093 pg_wchar_table[encoding].dsplen((const unsigned char *) mbstr) :
2094 pg_wchar_table[PG_SQL_ASCII].dsplen((const unsigned char *) mbstr));
2095}
int32 encoding
Definition: pg_database.h:41
@ PG_SQL_ASCII
Definition: pg_wchar.h:226
#define PG_VALID_ENCODING(_enc)
Definition: pg_wchar.h:287
const pg_wchar_tbl pg_wchar_table[]
Definition: wchar.c:2015

References encoding, PG_SQL_ASCII, PG_VALID_ENCODING, and pg_wchar_table.

Referenced by PQdsplen(), and reportErrorPosition().

◆ pg_encoding_max_length()

int pg_encoding_max_length ( int  encoding)

◆ pg_encoding_mblen()

int pg_encoding_mblen ( int  encoding,
const char *  mbstr 
)

◆ pg_encoding_mblen_bounded()

int pg_encoding_mblen_bounded ( int  encoding,
const char *  mbstr 
)

Definition at line 2081 of file wchar.c.

2082{
2083 return strnlen(mbstr, pg_encoding_mblen(encoding, mbstr));
2084}
size_t strnlen(const char *str, size_t maxlen)
Definition: strnlen.c:26
int pg_encoding_mblen(int encoding, const char *mbstr)
Definition: wchar.c:2069

References encoding, pg_encoding_mblen(), and strnlen().

◆ pg_encoding_verifymbchar()

int pg_encoding_verifymbchar ( int  encoding,
const char *  mbstr,
int  len 
)

Definition at line 2103 of file wchar.c.

2104{
2105 return (PG_VALID_ENCODING(encoding) ?
2106 pg_wchar_table[encoding].mbverifychar((const unsigned char *) mbstr, len) :
2107 pg_wchar_table[PG_SQL_ASCII].mbverifychar((const unsigned char *) mbstr, len));
2108}

References encoding, len, PG_SQL_ASCII, PG_VALID_ENCODING, and pg_wchar_table.

Referenced by big52euc_tw(), big52mic(), euc_jis_20042shift_jis_2004(), euc_jp2mic(), euc_jp2sjis(), euc_kr2mic(), euc_tw2big5(), euc_tw2mic(), LocalToUtf(), mic2big5(), mic2euc_jp(), mic2euc_kr(), mic2euc_tw(), mic2sjis(), shift_jis_20042euc_jis_2004(), and sjis2euc_jp().

◆ pg_encoding_verifymbstr()

int pg_encoding_verifymbstr ( int  encoding,
const char *  mbstr,
int  len 
)

Definition at line 2116 of file wchar.c.

2117{
2118 return (PG_VALID_ENCODING(encoding) ?
2119 pg_wchar_table[encoding].mbverifystr((const unsigned char *) mbstr, len) :
2120 pg_wchar_table[PG_SQL_ASCII].mbverifystr((const unsigned char *) mbstr, len));
2121}

References encoding, len, PG_SQL_ASCII, PG_VALID_ENCODING, and pg_wchar_table.

Referenced by add_file_to_manifest(), CopyConvertBuf(), and test_enc_conversion().

◆ pg_euc2wchar_with_len()

static int pg_euc2wchar_with_len ( const unsigned char *  from,
pg_wchar to,
int  len 
)
static

Definition at line 84 of file wchar.c.

85{
86 int cnt = 0;
87
88 while (len > 0 && *from)
89 {
90 if (*from == SS2 && len >= 2) /* JIS X 0201 (so called "1 byte
91 * KANA") */
92 {
93 from++;
94 *to = (SS2 << 8) | *from++;
95 len -= 2;
96 }
97 else if (*from == SS3 && len >= 3) /* JIS X 0212 KANJI */
98 {
99 from++;
100 *to = (SS3 << 16) | (*from++ << 8);
101 *to |= *from++;
102 len -= 3;
103 }
104 else if (IS_HIGHBIT_SET(*from) && len >= 2) /* JIS X 0208 KANJI */
105 {
106 *to = *from++ << 8;
107 *to |= *from++;
108 len -= 2;
109 }
110 else /* must be ASCII */
111 {
112 *to = *from++;
113 len--;
114 }
115 to++;
116 cnt++;
117 }
118 *to = 0;
119 return cnt;
120}
#define SS2
Definition: pg_wchar.h:38
#define SS3
Definition: pg_wchar.h:39

References IS_HIGHBIT_SET, len, SS2, and SS3.

Referenced by pg_eucjp2wchar_with_len(), and pg_euckr2wchar_with_len().

◆ pg_euc_dsplen()

static int pg_euc_dsplen ( const unsigned char *  s)
inlinestatic

Definition at line 139 of file wchar.c.

140{
141 int len;
142
143 if (*s == SS2)
144 len = 2;
145 else if (*s == SS3)
146 len = 2;
147 else if (IS_HIGHBIT_SET(*s))
148 len = 2;
149 else
150 len = pg_ascii_dsplen(s);
151 return len;
152}

References IS_HIGHBIT_SET, len, pg_ascii_dsplen(), SS2, and SS3.

Referenced by pg_euckr_dsplen(), and pg_johab_dsplen().

◆ pg_euc_mblen()

static int pg_euc_mblen ( const unsigned char *  s)
inlinestatic

Definition at line 123 of file wchar.c.

124{
125 int len;
126
127 if (*s == SS2)
128 len = 2;
129 else if (*s == SS3)
130 len = 3;
131 else if (IS_HIGHBIT_SET(*s))
132 len = 2;
133 else
134 len = 1;
135 return len;
136}

References IS_HIGHBIT_SET, len, SS2, and SS3.

Referenced by pg_eucjp_mblen(), pg_euckr_mblen(), and pg_johab_mblen().

◆ pg_euccn2wchar_with_len()

static int pg_euccn2wchar_with_len ( const unsigned char *  from,
pg_wchar to,
int  len 
)
static

Definition at line 211 of file wchar.c.

212{
213 int cnt = 0;
214
215 while (len > 0 && *from)
216 {
217 if (*from == SS2 && len >= 3) /* code set 2 (unused?) */
218 {
219 from++;
220 *to = (SS2 << 16) | (*from++ << 8);
221 *to |= *from++;
222 len -= 3;
223 }
224 else if (*from == SS3 && len >= 3) /* code set 3 (unused ?) */
225 {
226 from++;
227 *to = (SS3 << 16) | (*from++ << 8);
228 *to |= *from++;
229 len -= 3;
230 }
231 else if (IS_HIGHBIT_SET(*from) && len >= 2) /* code set 1 */
232 {
233 *to = *from++ << 8;
234 *to |= *from++;
235 len -= 2;
236 }
237 else
238 {
239 *to = *from++;
240 len--;
241 }
242 to++;
243 cnt++;
244 }
245 *to = 0;
246 return cnt;
247}

References IS_HIGHBIT_SET, len, SS2, and SS3.

◆ pg_euccn_dsplen()

static int pg_euccn_dsplen ( const unsigned char *  s)
static

Definition at line 262 of file wchar.c.

263{
264 int len;
265
266 if (IS_HIGHBIT_SET(*s))
267 len = 2;
268 else
269 len = pg_ascii_dsplen(s);
270 return len;
271}

References IS_HIGHBIT_SET, len, and pg_ascii_dsplen().

◆ pg_euccn_mblen()

static int pg_euccn_mblen ( const unsigned char *  s)
static

Definition at line 250 of file wchar.c.

251{
252 int len;
253
254 if (IS_HIGHBIT_SET(*s))
255 len = 2;
256 else
257 len = 1;
258 return len;
259}

References IS_HIGHBIT_SET, and len.

◆ pg_eucjp2wchar_with_len()

static int pg_eucjp2wchar_with_len ( const unsigned char *  from,
pg_wchar to,
int  len 
)
static

Definition at line 158 of file wchar.c.

159{
160 return pg_euc2wchar_with_len(from, to, len);
161}
static int pg_euc2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
Definition: wchar.c:84

References len, and pg_euc2wchar_with_len().

◆ pg_eucjp_dsplen()

static int pg_eucjp_dsplen ( const unsigned char *  s)
static

Definition at line 170 of file wchar.c.

171{
172 int len;
173
174 if (*s == SS2)
175 len = 1;
176 else if (*s == SS3)
177 len = 2;
178 else if (IS_HIGHBIT_SET(*s))
179 len = 2;
180 else
181 len = pg_ascii_dsplen(s);
182 return len;
183}

References IS_HIGHBIT_SET, len, pg_ascii_dsplen(), SS2, and SS3.

◆ pg_eucjp_mblen()

static int pg_eucjp_mblen ( const unsigned char *  s)
static

Definition at line 164 of file wchar.c.

165{
166 return pg_euc_mblen(s);
167}
static int pg_euc_mblen(const unsigned char *s)
Definition: wchar.c:123

References pg_euc_mblen().

◆ pg_eucjp_verifychar()

static int pg_eucjp_verifychar ( const unsigned char *  s,
int  len 
)
static

Definition at line 1061 of file wchar.c.

1062{
1063 int l;
1064 unsigned char c1,
1065 c2;
1066
1067 c1 = *s++;
1068
1069 switch (c1)
1070 {
1071 case SS2: /* JIS X 0201 */
1072 l = 2;
1073 if (l > len)
1074 return -1;
1075 c2 = *s++;
1076 if (c2 < 0xa1 || c2 > 0xdf)
1077 return -1;
1078 break;
1079
1080 case SS3: /* JIS X 0212 */
1081 l = 3;
1082 if (l > len)
1083 return -1;
1084 c2 = *s++;
1085 if (!IS_EUC_RANGE_VALID(c2))
1086 return -1;
1087 c2 = *s++;
1088 if (!IS_EUC_RANGE_VALID(c2))
1089 return -1;
1090 break;
1091
1092 default:
1093 if (IS_HIGHBIT_SET(c1)) /* JIS X 0208? */
1094 {
1095 l = 2;
1096 if (l > len)
1097 return -1;
1098 if (!IS_EUC_RANGE_VALID(c1))
1099 return -1;
1100 c2 = *s++;
1101 if (!IS_EUC_RANGE_VALID(c2))
1102 return -1;
1103 }
1104 else
1105 /* must be ASCII */
1106 {
1107 l = 1;
1108 }
1109 break;
1110 }
1111
1112 return l;
1113}
#define IS_EUC_RANGE_VALID(c)
Definition: wchar.c:1058

References IS_EUC_RANGE_VALID, IS_HIGHBIT_SET, len, SS2, and SS3.

Referenced by pg_eucjp_verifystr().

◆ pg_eucjp_verifystr()

static int pg_eucjp_verifystr ( const unsigned char *  s,
int  len 
)
static

Definition at line 1116 of file wchar.c.

1117{
1118 const unsigned char *start = s;
1119
1120 while (len > 0)
1121 {
1122 int l;
1123
1124 /* fast path for ASCII-subset characters */
1125 if (!IS_HIGHBIT_SET(*s))
1126 {
1127 if (*s == '\0')
1128 break;
1129 l = 1;
1130 }
1131 else
1132 {
1133 l = pg_eucjp_verifychar(s, len);
1134 if (l == -1)
1135 break;
1136 }
1137 s += l;
1138 len -= l;
1139 }
1140
1141 return s - start;
1142}
static int pg_eucjp_verifychar(const unsigned char *s, int len)
Definition: wchar.c:1061

References IS_HIGHBIT_SET, len, pg_eucjp_verifychar(), and start.

◆ pg_euckr2wchar_with_len()

static int pg_euckr2wchar_with_len ( const unsigned char *  from,
pg_wchar to,
int  len 
)
static

Definition at line 189 of file wchar.c.

190{
191 return pg_euc2wchar_with_len(from, to, len);
192}

References len, and pg_euc2wchar_with_len().

◆ pg_euckr_dsplen()

static int pg_euckr_dsplen ( const unsigned char *  s)
static

Definition at line 201 of file wchar.c.

202{
203 return pg_euc_dsplen(s);
204}
static int pg_euc_dsplen(const unsigned char *s)
Definition: wchar.c:139

References pg_euc_dsplen().

◆ pg_euckr_mblen()

static int pg_euckr_mblen ( const unsigned char *  s)
static

Definition at line 195 of file wchar.c.

196{
197 return pg_euc_mblen(s);
198}

References pg_euc_mblen().

◆ pg_euckr_verifychar()

static int pg_euckr_verifychar ( const unsigned char *  s,
int  len 
)
static

Definition at line 1145 of file wchar.c.

1146{
1147 int l;
1148 unsigned char c1,
1149 c2;
1150
1151 c1 = *s++;
1152
1153 if (IS_HIGHBIT_SET(c1))
1154 {
1155 l = 2;
1156 if (l > len)
1157 return -1;
1158 if (!IS_EUC_RANGE_VALID(c1))
1159 return -1;
1160 c2 = *s++;
1161 if (!IS_EUC_RANGE_VALID(c2))
1162 return -1;
1163 }
1164 else
1165 /* must be ASCII */
1166 {
1167 l = 1;
1168 }
1169
1170 return l;
1171}

References IS_EUC_RANGE_VALID, IS_HIGHBIT_SET, and len.

Referenced by pg_euckr_verifystr().

◆ pg_euckr_verifystr()

static int pg_euckr_verifystr ( const unsigned char *  s,
int  len 
)
static

Definition at line 1174 of file wchar.c.

1175{
1176 const unsigned char *start = s;
1177
1178 while (len > 0)
1179 {
1180 int l;
1181
1182 /* fast path for ASCII-subset characters */
1183 if (!IS_HIGHBIT_SET(*s))
1184 {
1185 if (*s == '\0')
1186 break;
1187 l = 1;
1188 }
1189 else
1190 {
1191 l = pg_euckr_verifychar(s, len);
1192 if (l == -1)
1193 break;
1194 }
1195 s += l;
1196 len -= l;
1197 }
1198
1199 return s - start;
1200}
static int pg_euckr_verifychar(const unsigned char *s, int len)
Definition: wchar.c:1145

References IS_HIGHBIT_SET, len, pg_euckr_verifychar(), and start.

◆ pg_euctw2wchar_with_len()

static int pg_euctw2wchar_with_len ( const unsigned char *  from,
pg_wchar to,
int  len 
)
static

Definition at line 278 of file wchar.c.

279{
280 int cnt = 0;
281
282 while (len > 0 && *from)
283 {
284 if (*from == SS2 && len >= 4) /* code set 2 */
285 {
286 from++;
287 *to = (((uint32) SS2) << 24) | (*from++ << 16);
288 *to |= *from++ << 8;
289 *to |= *from++;
290 len -= 4;
291 }
292 else if (*from == SS3 && len >= 3) /* code set 3 (unused?) */
293 {
294 from++;
295 *to = (SS3 << 16) | (*from++ << 8);
296 *to |= *from++;
297 len -= 3;
298 }
299 else if (IS_HIGHBIT_SET(*from) && len >= 2) /* code set 2 */
300 {
301 *to = *from++ << 8;
302 *to |= *from++;
303 len -= 2;
304 }
305 else
306 {
307 *to = *from++;
308 len--;
309 }
310 to++;
311 cnt++;
312 }
313 *to = 0;
314 return cnt;
315}
uint32_t uint32
Definition: c.h:485

References IS_HIGHBIT_SET, len, SS2, and SS3.

◆ pg_euctw_dsplen()

static int pg_euctw_dsplen ( const unsigned char *  s)
static

Definition at line 334 of file wchar.c.

335{
336 int len;
337
338 if (*s == SS2)
339 len = 2;
340 else if (*s == SS3)
341 len = 2;
342 else if (IS_HIGHBIT_SET(*s))
343 len = 2;
344 else
345 len = pg_ascii_dsplen(s);
346 return len;
347}

References IS_HIGHBIT_SET, len, pg_ascii_dsplen(), SS2, and SS3.

◆ pg_euctw_mblen()

static int pg_euctw_mblen ( const unsigned char *  s)
static

Definition at line 318 of file wchar.c.

319{
320 int len;
321
322 if (*s == SS2)
323 len = 4;
324 else if (*s == SS3)
325 len = 3;
326 else if (IS_HIGHBIT_SET(*s))
327 len = 2;
328 else
329 len = 1;
330 return len;
331}

References IS_HIGHBIT_SET, len, SS2, and SS3.

◆ pg_euctw_verifychar()

static int pg_euctw_verifychar ( const unsigned char *  s,
int  len 
)
static

Definition at line 1207 of file wchar.c.

1208{
1209 int l;
1210 unsigned char c1,
1211 c2;
1212
1213 c1 = *s++;
1214
1215 switch (c1)
1216 {
1217 case SS2: /* CNS 11643 Plane 1-7 */
1218 l = 4;
1219 if (l > len)
1220 return -1;
1221 c2 = *s++;
1222 if (c2 < 0xa1 || c2 > 0xa7)
1223 return -1;
1224 c2 = *s++;
1225 if (!IS_EUC_RANGE_VALID(c2))
1226 return -1;
1227 c2 = *s++;
1228 if (!IS_EUC_RANGE_VALID(c2))
1229 return -1;
1230 break;
1231
1232 case SS3: /* unused */
1233 return -1;
1234
1235 default:
1236 if (IS_HIGHBIT_SET(c1)) /* CNS 11643 Plane 1 */
1237 {
1238 l = 2;
1239 if (l > len)
1240 return -1;
1241 /* no further range check on c1? */
1242 c2 = *s++;
1243 if (!IS_EUC_RANGE_VALID(c2))
1244 return -1;
1245 }
1246 else
1247 /* must be ASCII */
1248 {
1249 l = 1;
1250 }
1251 break;
1252 }
1253 return l;
1254}

References IS_EUC_RANGE_VALID, IS_HIGHBIT_SET, len, SS2, and SS3.

Referenced by pg_euctw_verifystr().

◆ pg_euctw_verifystr()

static int pg_euctw_verifystr ( const unsigned char *  s,
int  len 
)
static

Definition at line 1257 of file wchar.c.

1258{
1259 const unsigned char *start = s;
1260
1261 while (len > 0)
1262 {
1263 int l;
1264
1265 /* fast path for ASCII-subset characters */
1266 if (!IS_HIGHBIT_SET(*s))
1267 {
1268 if (*s == '\0')
1269 break;
1270 l = 1;
1271 }
1272 else
1273 {
1274 l = pg_euctw_verifychar(s, len);
1275 if (l == -1)
1276 break;
1277 }
1278 s += l;
1279 len -= l;
1280 }
1281
1282 return s - start;
1283}
static int pg_euctw_verifychar(const unsigned char *s, int len)
Definition: wchar.c:1207

References IS_HIGHBIT_SET, len, pg_euctw_verifychar(), and start.

◆ pg_gb18030_dsplen()

static int pg_gb18030_dsplen ( const unsigned char *  s)
static

Definition at line 1008 of file wchar.c.

1009{
1010 int len;
1011
1012 if (IS_HIGHBIT_SET(*s))
1013 len = 2;
1014 else
1015 len = pg_ascii_dsplen(s); /* ASCII */
1016 return len;
1017}

References IS_HIGHBIT_SET, len, and pg_ascii_dsplen().

◆ pg_gb18030_mblen()

static int pg_gb18030_mblen ( const unsigned char *  s)
static

Definition at line 994 of file wchar.c.

995{
996 int len;
997
998 if (!IS_HIGHBIT_SET(*s))
999 len = 1; /* ASCII */
1000 else if (*(s + 1) >= 0x30 && *(s + 1) <= 0x39)
1001 len = 4;
1002 else
1003 len = 2;
1004 return len;
1005}

References IS_HIGHBIT_SET, and len.

◆ pg_gb18030_verifychar()

static int pg_gb18030_verifychar ( const unsigned char *  s,
int  len 
)
static

Definition at line 1605 of file wchar.c.

1606{
1607 int l;
1608
1609 if (!IS_HIGHBIT_SET(*s))
1610 l = 1; /* ASCII */
1611 else if (len >= 4 && *(s + 1) >= 0x30 && *(s + 1) <= 0x39)
1612 {
1613 /* Should be 4-byte, validate remaining bytes */
1614 if (*s >= 0x81 && *s <= 0xfe &&
1615 *(s + 2) >= 0x81 && *(s + 2) <= 0xfe &&
1616 *(s + 3) >= 0x30 && *(s + 3) <= 0x39)
1617 l = 4;
1618 else
1619 l = -1;
1620 }
1621 else if (len >= 2 && *s >= 0x81 && *s <= 0xfe)
1622 {
1623 /* Should be 2-byte, validate */
1624 if ((*(s + 1) >= 0x40 && *(s + 1) <= 0x7e) ||
1625 (*(s + 1) >= 0x80 && *(s + 1) <= 0xfe))
1626 l = 2;
1627 else
1628 l = -1;
1629 }
1630 else
1631 l = -1;
1632 return l;
1633}

References IS_HIGHBIT_SET, and len.

Referenced by pg_gb18030_verifystr().

◆ pg_gb18030_verifystr()

static int pg_gb18030_verifystr ( const unsigned char *  s,
int  len 
)
static

Definition at line 1636 of file wchar.c.

1637{
1638 const unsigned char *start = s;
1639
1640 while (len > 0)
1641 {
1642 int l;
1643
1644 /* fast path for ASCII-subset characters */
1645 if (!IS_HIGHBIT_SET(*s))
1646 {
1647 if (*s == '\0')
1648 break;
1649 l = 1;
1650 }
1651 else
1652 {
1653 l = pg_gb18030_verifychar(s, len);
1654 if (l == -1)
1655 break;
1656 }
1657 s += l;
1658 len -= l;
1659 }
1660
1661 return s - start;
1662}
static int pg_gb18030_verifychar(const unsigned char *s, int len)
Definition: wchar.c:1605

References IS_HIGHBIT_SET, len, pg_gb18030_verifychar(), and start.

◆ pg_gbk_dsplen()

static int pg_gbk_dsplen ( const unsigned char *  s)
static

Definition at line 940 of file wchar.c.

941{
942 int len;
943
944 if (IS_HIGHBIT_SET(*s))
945 len = 2; /* kanji? */
946 else
947 len = pg_ascii_dsplen(s); /* should be ASCII */
948 return len;
949}

References IS_HIGHBIT_SET, len, and pg_ascii_dsplen().

◆ pg_gbk_mblen()

static int pg_gbk_mblen ( const unsigned char *  s)
static

Definition at line 928 of file wchar.c.

929{
930 int len;
931
932 if (IS_HIGHBIT_SET(*s))
933 len = 2; /* kanji? */
934 else
935 len = 1; /* should be ASCII */
936 return len;
937}

References IS_HIGHBIT_SET, and len.

Referenced by pg_gbk_verifychar().

◆ pg_gbk_verifychar()

static int pg_gbk_verifychar ( const unsigned char *  s,
int  len 
)
static

Definition at line 1507 of file wchar.c.

1508{
1509 int l,
1510 mbl;
1511
1512 l = mbl = pg_gbk_mblen(s);
1513
1514 if (len < l)
1515 return -1;
1516
1517 while (--l > 0)
1518 {
1519 if (*++s == '\0')
1520 return -1;
1521 }
1522
1523 return mbl;
1524}
static int pg_gbk_mblen(const unsigned char *s)
Definition: wchar.c:928

References len, and pg_gbk_mblen().

Referenced by pg_gbk_verifystr().

◆ pg_gbk_verifystr()

static int pg_gbk_verifystr ( const unsigned char *  s,
int  len 
)
static

Definition at line 1527 of file wchar.c.

1528{
1529 const unsigned char *start = s;
1530
1531 while (len > 0)
1532 {
1533 int l;
1534
1535 /* fast path for ASCII-subset characters */
1536 if (!IS_HIGHBIT_SET(*s))
1537 {
1538 if (*s == '\0')
1539 break;
1540 l = 1;
1541 }
1542 else
1543 {
1544 l = pg_gbk_verifychar(s, len);
1545 if (l == -1)
1546 break;
1547 }
1548 s += l;
1549 len -= l;
1550 }
1551
1552 return s - start;
1553}
static int pg_gbk_verifychar(const unsigned char *s, int len)
Definition: wchar.c:1507

References IS_HIGHBIT_SET, len, pg_gbk_verifychar(), and start.

◆ pg_johab_dsplen()

static int pg_johab_dsplen ( const unsigned char *  s)
static

Definition at line 408 of file wchar.c.

409{
410 return pg_euc_dsplen(s);
411}

References pg_euc_dsplen().

◆ pg_johab_mblen()

static int pg_johab_mblen ( const unsigned char *  s)
static

Definition at line 402 of file wchar.c.

403{
404 return pg_euc_mblen(s);
405}

References pg_euc_mblen().

Referenced by pg_johab_verifychar().

◆ pg_johab_verifychar()

static int pg_johab_verifychar ( const unsigned char *  s,
int  len 
)
static

Definition at line 1286 of file wchar.c.

1287{
1288 int l,
1289 mbl;
1290 unsigned char c;
1291
1292 l = mbl = pg_johab_mblen(s);
1293
1294 if (len < l)
1295 return -1;
1296
1297 if (!IS_HIGHBIT_SET(*s))
1298 return mbl;
1299
1300 while (--l > 0)
1301 {
1302 c = *++s;
1303 if (!IS_EUC_RANGE_VALID(c))
1304 return -1;
1305 }
1306 return mbl;
1307}
char * c
static int pg_johab_mblen(const unsigned char *s)
Definition: wchar.c:402

References IS_EUC_RANGE_VALID, IS_HIGHBIT_SET, len, and pg_johab_mblen().

Referenced by pg_johab_verifystr().

◆ pg_johab_verifystr()

static int pg_johab_verifystr ( const unsigned char *  s,
int  len 
)
static

Definition at line 1310 of file wchar.c.

1311{
1312 const unsigned char *start = s;
1313
1314 while (len > 0)
1315 {
1316 int l;
1317
1318 /* fast path for ASCII-subset characters */
1319 if (!IS_HIGHBIT_SET(*s))
1320 {
1321 if (*s == '\0')
1322 break;
1323 l = 1;
1324 }
1325 else
1326 {
1327 l = pg_johab_verifychar(s, len);
1328 if (l == -1)
1329 break;
1330 }
1331 s += l;
1332 len -= l;
1333 }
1334
1335 return s - start;
1336}
static int pg_johab_verifychar(const unsigned char *s, int len)
Definition: wchar.c:1286

References IS_HIGHBIT_SET, len, pg_johab_verifychar(), and start.

◆ pg_latin12wchar_with_len()

static int pg_latin12wchar_with_len ( const unsigned char *  from,
pg_wchar to,
int  len 
)
static

Definition at line 818 of file wchar.c.

819{
820 int cnt = 0;
821
822 while (len > 0 && *from)
823 {
824 *to++ = *from++;
825 len--;
826 cnt++;
827 }
828 *to = 0;
829 return cnt;
830}

References len.

◆ pg_latin1_dsplen()

static int pg_latin1_dsplen ( const unsigned char *  s)
static

Definition at line 861 of file wchar.c.

862{
863 return pg_ascii_dsplen(s);
864}

References pg_ascii_dsplen().

◆ pg_latin1_mblen()

static int pg_latin1_mblen ( const unsigned char *  s)
static

Definition at line 855 of file wchar.c.

856{
857 return 1;
858}

◆ pg_latin1_verifychar()

static int pg_latin1_verifychar ( const unsigned char *  s,
int  len 
)
static

Definition at line 1389 of file wchar.c.

1390{
1391 return 1;
1392}

◆ pg_latin1_verifystr()

static int pg_latin1_verifystr ( const unsigned char *  s,
int  len 
)
static

Definition at line 1395 of file wchar.c.

1396{
1397 const unsigned char *nullpos = memchr(s, 0, len);
1398
1399 if (nullpos == NULL)
1400 return len;
1401 else
1402 return nullpos - s;
1403}

References len.

◆ pg_mule2wchar_with_len()

static int pg_mule2wchar_with_len ( const unsigned char *  from,
pg_wchar to,
int  len 
)
static

Definition at line 653 of file wchar.c.

654{
655 int cnt = 0;
656
657 while (len > 0 && *from)
658 {
659 if (IS_LC1(*from) && len >= 2)
660 {
661 *to = *from++ << 16;
662 *to |= *from++;
663 len -= 2;
664 }
665 else if (IS_LCPRV1(*from) && len >= 3)
666 {
667 from++;
668 *to = *from++ << 16;
669 *to |= *from++;
670 len -= 3;
671 }
672 else if (IS_LC2(*from) && len >= 3)
673 {
674 *to = *from++ << 16;
675 *to |= *from++ << 8;
676 *to |= *from++;
677 len -= 3;
678 }
679 else if (IS_LCPRV2(*from) && len >= 4)
680 {
681 from++;
682 *to = *from++ << 16;
683 *to |= *from++ << 8;
684 *to |= *from++;
685 len -= 4;
686 }
687 else
688 { /* assume ASCII */
689 *to = (unsigned char) *from++;
690 len--;
691 }
692 to++;
693 cnt++;
694 }
695 *to = 0;
696 return cnt;
697}
#define IS_LCPRV2(c)
Definition: pg_wchar.h:164
#define IS_LC2(c)
Definition: pg_wchar.h:144
#define IS_LCPRV1(c)
Definition: pg_wchar.h:152
#define IS_LC1(c)
Definition: pg_wchar.h:126

References IS_LC1, IS_LC2, IS_LCPRV1, IS_LCPRV2, and len.

◆ pg_mule_dsplen()

static int pg_mule_dsplen ( const unsigned char *  s)
static

Definition at line 790 of file wchar.c.

791{
792 int len;
793
794 /*
795 * Note: it's not really appropriate to assume that all multibyte charsets
796 * are double-wide on screen. But this seems an okay approximation for
797 * the MULE charsets we currently support.
798 */
799
800 if (IS_LC1(*s))
801 len = 1;
802 else if (IS_LCPRV1(*s))
803 len = 1;
804 else if (IS_LC2(*s))
805 len = 2;
806 else if (IS_LCPRV2(*s))
807 len = 2;
808 else
809 len = 1; /* assume ASCII */
810
811 return len;
812}

References IS_LC1, IS_LC2, IS_LCPRV1, IS_LCPRV2, and len.

◆ pg_mule_mblen()

int pg_mule_mblen ( const unsigned char *  s)

Definition at line 772 of file wchar.c.

773{
774 int len;
775
776 if (IS_LC1(*s))
777 len = 2;
778 else if (IS_LCPRV1(*s))
779 len = 3;
780 else if (IS_LC2(*s))
781 len = 3;
782 else if (IS_LCPRV2(*s))
783 len = 4;
784 else
785 len = 1; /* assume ASCII */
786 return len;
787}

References IS_LC1, IS_LC2, IS_LCPRV1, IS_LCPRV2, and len.

Referenced by mic2latin(), mic2latin_with_table(), and pg_mule_verifychar().

◆ pg_mule_verifychar()

static int pg_mule_verifychar ( const unsigned char *  s,
int  len 
)
static

Definition at line 1339 of file wchar.c.

1340{
1341 int l,
1342 mbl;
1343 unsigned char c;
1344
1345 l = mbl = pg_mule_mblen(s);
1346
1347 if (len < l)
1348 return -1;
1349
1350 while (--l > 0)
1351 {
1352 c = *++s;
1353 if (!IS_HIGHBIT_SET(c))
1354 return -1;
1355 }
1356 return mbl;
1357}
int pg_mule_mblen(const unsigned char *s)
Definition: wchar.c:772

References IS_HIGHBIT_SET, len, and pg_mule_mblen().

Referenced by pg_mule_verifystr().

◆ pg_mule_verifystr()

static int pg_mule_verifystr ( const unsigned char *  s,
int  len 
)
static

Definition at line 1360 of file wchar.c.

1361{
1362 const unsigned char *start = s;
1363
1364 while (len > 0)
1365 {
1366 int l;
1367
1368 /* fast path for ASCII-subset characters */
1369 if (!IS_HIGHBIT_SET(*s))
1370 {
1371 if (*s == '\0')
1372 break;
1373 l = 1;
1374 }
1375 else
1376 {
1377 l = pg_mule_verifychar(s, len);
1378 if (l == -1)
1379 break;
1380 }
1381 s += l;
1382 len -= l;
1383 }
1384
1385 return s - start;
1386}
static int pg_mule_verifychar(const unsigned char *s, int len)
Definition: wchar.c:1339

References IS_HIGHBIT_SET, len, pg_mule_verifychar(), and start.

◆ pg_sjis_dsplen()

static int pg_sjis_dsplen ( const unsigned char *  s)
static

Definition at line 884 of file wchar.c.

885{
886 int len;
887
888 if (*s >= 0xa1 && *s <= 0xdf)
889 len = 1; /* 1 byte kana? */
890 else if (IS_HIGHBIT_SET(*s))
891 len = 2; /* kanji? */
892 else
893 len = pg_ascii_dsplen(s); /* should be ASCII */
894 return len;
895}

References IS_HIGHBIT_SET, len, and pg_ascii_dsplen().

◆ pg_sjis_mblen()

static int pg_sjis_mblen ( const unsigned char *  s)
static

Definition at line 870 of file wchar.c.

871{
872 int len;
873
874 if (*s >= 0xa1 && *s <= 0xdf)
875 len = 1; /* 1 byte kana? */
876 else if (IS_HIGHBIT_SET(*s))
877 len = 2; /* kanji? */
878 else
879 len = 1; /* should be ASCII */
880 return len;
881}

References IS_HIGHBIT_SET, and len.

Referenced by pg_sjis_verifychar().

◆ pg_sjis_verifychar()

static int pg_sjis_verifychar ( const unsigned char *  s,
int  len 
)
static

Definition at line 1406 of file wchar.c.

1407{
1408 int l,
1409 mbl;
1410 unsigned char c1,
1411 c2;
1412
1413 l = mbl = pg_sjis_mblen(s);
1414
1415 if (len < l)
1416 return -1;
1417
1418 if (l == 1) /* pg_sjis_mblen already verified it */
1419 return mbl;
1420
1421 c1 = *s++;
1422 c2 = *s;
1423 if (!ISSJISHEAD(c1) || !ISSJISTAIL(c2))
1424 return -1;
1425 return mbl;
1426}
#define ISSJISTAIL(c)
Definition: pg_wchar.h:45
#define ISSJISHEAD(c)
Definition: pg_wchar.h:44
static int pg_sjis_mblen(const unsigned char *s)
Definition: wchar.c:870

References ISSJISHEAD, ISSJISTAIL, len, and pg_sjis_mblen().

Referenced by pg_sjis_verifystr().

◆ pg_sjis_verifystr()

static int pg_sjis_verifystr ( const unsigned char *  s,
int  len 
)
static

Definition at line 1429 of file wchar.c.

1430{
1431 const unsigned char *start = s;
1432
1433 while (len > 0)
1434 {
1435 int l;
1436
1437 /* fast path for ASCII-subset characters */
1438 if (!IS_HIGHBIT_SET(*s))
1439 {
1440 if (*s == '\0')
1441 break;
1442 l = 1;
1443 }
1444 else
1445 {
1446 l = pg_sjis_verifychar(s, len);
1447 if (l == -1)
1448 break;
1449 }
1450 s += l;
1451 len -= l;
1452 }
1453
1454 return s - start;
1455}
static int pg_sjis_verifychar(const unsigned char *s, int len)
Definition: wchar.c:1406

References IS_HIGHBIT_SET, len, pg_sjis_verifychar(), and start.

◆ pg_uhc_dsplen()

static int pg_uhc_dsplen ( const unsigned char *  s)
static

Definition at line 967 of file wchar.c.

968{
969 int len;
970
971 if (IS_HIGHBIT_SET(*s))
972 len = 2; /* 2byte? */
973 else
974 len = pg_ascii_dsplen(s); /* should be ASCII */
975 return len;
976}

References IS_HIGHBIT_SET, len, and pg_ascii_dsplen().

◆ pg_uhc_mblen()

static int pg_uhc_mblen ( const unsigned char *  s)
static

Definition at line 955 of file wchar.c.

956{
957 int len;
958
959 if (IS_HIGHBIT_SET(*s))
960 len = 2; /* 2byte? */
961 else
962 len = 1; /* should be ASCII */
963 return len;
964}

References IS_HIGHBIT_SET, and len.

Referenced by pg_uhc_verifychar().

◆ pg_uhc_verifychar()

static int pg_uhc_verifychar ( const unsigned char *  s,
int  len 
)
static

Definition at line 1556 of file wchar.c.

1557{
1558 int l,
1559 mbl;
1560
1561 l = mbl = pg_uhc_mblen(s);
1562
1563 if (len < l)
1564 return -1;
1565
1566 while (--l > 0)
1567 {
1568 if (*++s == '\0')
1569 return -1;
1570 }
1571
1572 return mbl;
1573}
static int pg_uhc_mblen(const unsigned char *s)
Definition: wchar.c:955

References len, and pg_uhc_mblen().

Referenced by pg_uhc_verifystr().

◆ pg_uhc_verifystr()

static int pg_uhc_verifystr ( const unsigned char *  s,
int  len 
)
static

Definition at line 1576 of file wchar.c.

1577{
1578 const unsigned char *start = s;
1579
1580 while (len > 0)
1581 {
1582 int l;
1583
1584 /* fast path for ASCII-subset characters */
1585 if (!IS_HIGHBIT_SET(*s))
1586 {
1587 if (*s == '\0')
1588 break;
1589 l = 1;
1590 }
1591 else
1592 {
1593 l = pg_uhc_verifychar(s, len);
1594 if (l == -1)
1595 break;
1596 }
1597 s += l;
1598 len -= l;
1599 }
1600
1601 return s - start;
1602}
static int pg_uhc_verifychar(const unsigned char *s, int len)
Definition: wchar.c:1556

References IS_HIGHBIT_SET, len, pg_uhc_verifychar(), and start.

◆ pg_utf2wchar_with_len()

static int pg_utf2wchar_with_len ( const unsigned char *  from,
pg_wchar to,
int  len 
)
static

Definition at line 420 of file wchar.c.

421{
422 int cnt = 0;
423 uint32 c1,
424 c2,
425 c3,
426 c4;
427
428 while (len > 0 && *from)
429 {
430 if ((*from & 0x80) == 0)
431 {
432 *to = *from++;
433 len--;
434 }
435 else if ((*from & 0xe0) == 0xc0)
436 {
437 if (len < 2)
438 break; /* drop trailing incomplete char */
439 c1 = *from++ & 0x1f;
440 c2 = *from++ & 0x3f;
441 *to = (c1 << 6) | c2;
442 len -= 2;
443 }
444 else if ((*from & 0xf0) == 0xe0)
445 {
446 if (len < 3)
447 break; /* drop trailing incomplete char */
448 c1 = *from++ & 0x0f;
449 c2 = *from++ & 0x3f;
450 c3 = *from++ & 0x3f;
451 *to = (c1 << 12) | (c2 << 6) | c3;
452 len -= 3;
453 }
454 else if ((*from & 0xf8) == 0xf0)
455 {
456 if (len < 4)
457 break; /* drop trailing incomplete char */
458 c1 = *from++ & 0x07;
459 c2 = *from++ & 0x3f;
460 c3 = *from++ & 0x3f;
461 c4 = *from++ & 0x3f;
462 *to = (c1 << 18) | (c2 << 12) | (c3 << 6) | c4;
463 len -= 4;
464 }
465 else
466 {
467 /* treat a bogus char as length 1; not ours to raise error */
468 *to = *from++;
469 len--;
470 }
471 to++;
472 cnt++;
473 }
474 *to = 0;
475 return cnt;
476}

References len.

◆ pg_utf8_islegal()

bool pg_utf8_islegal ( const unsigned char *  source,
int  length 
)

Definition at line 1953 of file wchar.c.

1954{
1955 unsigned char a;
1956
1957 switch (length)
1958 {
1959 default:
1960 /* reject lengths 5 and 6 for now */
1961 return false;
1962 case 4:
1963 a = source[3];
1964 if (a < 0x80 || a > 0xBF)
1965 return false;
1966 /* FALL THRU */
1967 case 3:
1968 a = source[2];
1969 if (a < 0x80 || a > 0xBF)
1970 return false;
1971 /* FALL THRU */
1972 case 2:
1973 a = source[1];
1974 switch (*source)
1975 {
1976 case 0xE0:
1977 if (a < 0xA0 || a > 0xBF)
1978 return false;
1979 break;
1980 case 0xED:
1981 if (a < 0x80 || a > 0x9F)
1982 return false;
1983 break;
1984 case 0xF0:
1985 if (a < 0x90 || a > 0xBF)
1986 return false;
1987 break;
1988 case 0xF4:
1989 if (a < 0x80 || a > 0x8F)
1990 return false;
1991 break;
1992 default:
1993 if (a < 0x80 || a > 0xBF)
1994 return false;
1995 break;
1996 }
1997 /* FALL THRU */
1998 case 1:
1999 a = *source;
2000 if (a >= 0x80 && a < 0xC2)
2001 return false;
2002 if (a > 0xF4)
2003 return false;
2004 break;
2005 }
2006 return true;
2007}
int a
Definition: isn.c:68
static rewind_source * source
Definition: pg_rewind.c:89

References a, and source.

Referenced by chr(), pg_utf8_string_len(), pg_utf8_verifychar(), utf8_to_iso8859_1(), and UtfToLocal().

◆ pg_utf8_verifychar()

static int pg_utf8_verifychar ( const unsigned char *  s,
int  len 
)
static

Definition at line 1665 of file wchar.c.

1666{
1667 int l;
1668
1669 if ((*s & 0x80) == 0)
1670 {
1671 if (*s == '\0')
1672 return -1;
1673 return 1;
1674 }
1675 else if ((*s & 0xe0) == 0xc0)
1676 l = 2;
1677 else if ((*s & 0xf0) == 0xe0)
1678 l = 3;
1679 else if ((*s & 0xf8) == 0xf0)
1680 l = 4;
1681 else
1682 l = 1;
1683
1684 if (l > len)
1685 return -1;
1686
1687 if (!pg_utf8_islegal(s, l))
1688 return -1;
1689
1690 return l;
1691}
bool pg_utf8_islegal(const unsigned char *source, int length)
Definition: wchar.c:1953

References len, and pg_utf8_islegal().

Referenced by pg_utf8_verifystr().

◆ pg_utf8_verifystr()

static int pg_utf8_verifystr ( const unsigned char *  s,
int  len 
)
static

Definition at line 1855 of file wchar.c.

1856{
1857 const unsigned char *start = s;
1858 const int orig_len = len;
1859 uint32 state = BGN;
1860
1861/*
1862 * With a stride of two vector widths, gcc will unroll the loop. Even if
1863 * the compiler can unroll a longer loop, it's not worth it because we
1864 * must fall back to the byte-wise algorithm if we find any non-ASCII.
1865 */
1866#define STRIDE_LENGTH (2 * sizeof(Vector8))
1867
1868 if (len >= STRIDE_LENGTH)
1869 {
1870 while (len >= STRIDE_LENGTH)
1871 {
1872 /*
1873 * If the chunk is all ASCII, we can skip the full UTF-8 check,
1874 * but we must first check for a non-END state, which means the
1875 * previous chunk ended in the middle of a multibyte sequence.
1876 */
1877 if (state != END || !is_valid_ascii(s, STRIDE_LENGTH))
1879
1880 s += STRIDE_LENGTH;
1881 len -= STRIDE_LENGTH;
1882 }
1883
1884 /* The error state persists, so we only need to check for it here. */
1885 if (state == ERR)
1886 {
1887 /*
1888 * Start over from the beginning with the slow path so we can
1889 * count the valid bytes.
1890 */
1891 len = orig_len;
1892 s = start;
1893 }
1894 else if (state != END)
1895 {
1896 /*
1897 * The fast path exited in the middle of a multibyte sequence.
1898 * Walk backwards to find the leading byte so that the slow path
1899 * can resume checking from there. We must always backtrack at
1900 * least one byte, since the current byte could be e.g. an ASCII
1901 * byte after a 2-byte lead, which is invalid.
1902 */
1903 do
1904 {
1905 Assert(s > start);
1906 s--;
1907 len++;
1909 } while (pg_utf_mblen(s) <= 1);
1910 }
1911 }
1912
1913 /* check remaining bytes */
1914 while (len > 0)
1915 {
1916 int l;
1917
1918 /* fast path for ASCII-subset characters */
1919 if (!IS_HIGHBIT_SET(*s))
1920 {
1921 if (*s == '\0')
1922 break;
1923 l = 1;
1924 }
1925 else
1926 {
1927 l = pg_utf8_verifychar(s, len);
1928 if (l == -1)
1929 break;
1930 }
1931 s += l;
1932 len -= l;
1933 }
1934
1935 return s - start;
1936}
static bool is_valid_ascii(const unsigned char *s, int len)
Definition: ascii.h:25
Definition: regguts.h:323
#define END
Definition: wchar.c:1756
int pg_utf_mblen(const unsigned char *s)
Definition: wchar.c:517
#define ERR
Definition: wchar.c:1743
static int pg_utf8_verifychar(const unsigned char *s, int len)
Definition: wchar.c:1665
static void utf8_advance(const unsigned char *s, uint32 *state, int len)
Definition: wchar.c:1837
#define BGN
Definition: wchar.c:1745
#define STRIDE_LENGTH

References Assert, BGN, END, ERR, IS_HIGHBIT_SET, is_valid_ascii(), len, pg_utf8_verifychar(), pg_utf_mblen(), start, STRIDE_LENGTH, and utf8_advance().

◆ pg_utf_dsplen()

static int pg_utf_dsplen ( const unsigned char *  s)
static

Definition at line 641 of file wchar.c.

642{
643 return ucs_wcwidth(utf8_to_unicode(s));
644}
static pg_wchar utf8_to_unicode(const unsigned char *c)
Definition: mbprint.c:53
static int ucs_wcwidth(pg_wchar ucs)
Definition: wchar.c:607

References ucs_wcwidth(), and utf8_to_unicode().

◆ pg_utf_mblen()

int pg_utf_mblen ( const unsigned char *  s)

Definition at line 517 of file wchar.c.

518{
519 int len;
520
521 if ((*s & 0x80) == 0)
522 len = 1;
523 else if ((*s & 0xe0) == 0xc0)
524 len = 2;
525 else if ((*s & 0xf0) == 0xe0)
526 len = 3;
527 else if ((*s & 0xf8) == 0xf0)
528 len = 4;
529#ifdef NOT_USED
530 else if ((*s & 0xfc) == 0xf8)
531 len = 5;
532 else if ((*s & 0xfe) == 0xfc)
533 len = 6;
534#endif
535 else
536 len = 1;
537 return len;
538}

References len.

Referenced by pg_utf8_verifystr(), and pg_wchar2utf_with_len().

◆ pg_wchar2euc_with_len()

static int pg_wchar2euc_with_len ( const pg_wchar from,
unsigned char *  to,
int  len 
)
static

Definition at line 356 of file wchar.c.

357{
358 int cnt = 0;
359
360 while (len > 0 && *from)
361 {
362 unsigned char c;
363
364 if ((c = (*from >> 24)))
365 {
366 *to++ = c;
367 *to++ = (*from >> 16) & 0xff;
368 *to++ = (*from >> 8) & 0xff;
369 *to++ = *from & 0xff;
370 cnt += 4;
371 }
372 else if ((c = (*from >> 16)))
373 {
374 *to++ = c;
375 *to++ = (*from >> 8) & 0xff;
376 *to++ = *from & 0xff;
377 cnt += 3;
378 }
379 else if ((c = (*from >> 8)))
380 {
381 *to++ = c;
382 *to++ = *from & 0xff;
383 cnt += 2;
384 }
385 else
386 {
387 *to++ = *from;
388 cnt++;
389 }
390 from++;
391 len--;
392 }
393 *to = 0;
394 return cnt;
395}

References len.

◆ pg_wchar2mule_with_len()

static int pg_wchar2mule_with_len ( const pg_wchar from,
unsigned char *  to,
int  len 
)
static

Definition at line 706 of file wchar.c.

707{
708 int cnt = 0;
709
710 while (len > 0 && *from)
711 {
712 unsigned char lb;
713
714 lb = (*from >> 16) & 0xff;
715 if (IS_LC1(lb))
716 {
717 *to++ = lb;
718 *to++ = *from & 0xff;
719 cnt += 2;
720 }
721 else if (IS_LC2(lb))
722 {
723 *to++ = lb;
724 *to++ = (*from >> 8) & 0xff;
725 *to++ = *from & 0xff;
726 cnt += 3;
727 }
728 else if (IS_LCPRV1_A_RANGE(lb))
729 {
730 *to++ = LCPRV1_A;
731 *to++ = lb;
732 *to++ = *from & 0xff;
733 cnt += 3;
734 }
735 else if (IS_LCPRV1_B_RANGE(lb))
736 {
737 *to++ = LCPRV1_B;
738 *to++ = lb;
739 *to++ = *from & 0xff;
740 cnt += 3;
741 }
742 else if (IS_LCPRV2_A_RANGE(lb))
743 {
744 *to++ = LCPRV2_A;
745 *to++ = lb;
746 *to++ = (*from >> 8) & 0xff;
747 *to++ = *from & 0xff;
748 cnt += 4;
749 }
750 else if (IS_LCPRV2_B_RANGE(lb))
751 {
752 *to++ = LCPRV2_B;
753 *to++ = lb;
754 *to++ = (*from >> 8) & 0xff;
755 *to++ = *from & 0xff;
756 cnt += 4;
757 }
758 else
759 {
760 *to++ = *from & 0xff;
761 cnt += 1;
762 }
763 from++;
764 len--;
765 }
766 *to = 0;
767 return cnt;
768}
#define LCPRV1_A
Definition: pg_wchar.h:150
#define LCPRV1_B
Definition: pg_wchar.h:151
#define LCPRV2_A
Definition: pg_wchar.h:162
#define IS_LCPRV2_B_RANGE(c)
Definition: pg_wchar.h:167
#define IS_LCPRV1_A_RANGE(c)
Definition: pg_wchar.h:153
#define IS_LCPRV1_B_RANGE(c)
Definition: pg_wchar.h:155
#define IS_LCPRV2_A_RANGE(c)
Definition: pg_wchar.h:165
#define LCPRV2_B
Definition: pg_wchar.h:163

References IS_LC1, IS_LC2, IS_LCPRV1_A_RANGE, IS_LCPRV1_B_RANGE, IS_LCPRV2_A_RANGE, IS_LCPRV2_B_RANGE, LCPRV1_A, LCPRV1_B, LCPRV2_A, LCPRV2_B, and len.

◆ pg_wchar2single_with_len()

static int pg_wchar2single_with_len ( const pg_wchar from,
unsigned char *  to,
int  len 
)
static

Definition at line 840 of file wchar.c.

841{
842 int cnt = 0;
843
844 while (len > 0 && *from)
845 {
846 *to++ = *from++;
847 len--;
848 cnt++;
849 }
850 *to = 0;
851 return cnt;
852}

References len.

◆ pg_wchar2utf_with_len()

static int pg_wchar2utf_with_len ( const pg_wchar from,
unsigned char *  to,
int  len 
)
static

Definition at line 486 of file wchar.c.

487{
488 int cnt = 0;
489
490 while (len > 0 && *from)
491 {
492 int char_len;
493
494 unicode_to_utf8(*from, to);
495 char_len = pg_utf_mblen(to);
496 cnt += char_len;
497 to += char_len;
498 from++;
499 len--;
500 }
501 *to = 0;
502 return cnt;
503}
static unsigned char * unicode_to_utf8(pg_wchar c, unsigned char *utf8string)
Definition: pg_wchar.h:575

References len, pg_utf_mblen(), and unicode_to_utf8().

◆ ucs_wcwidth()

static int ucs_wcwidth ( pg_wchar  ucs)
static

Definition at line 607 of file wchar.c.

608{
611
612 /* test for 8-bit control characters */
613 if (ucs == 0)
614 return 0;
615
616 if (ucs < 0x20 || (ucs >= 0x7f && ucs < 0xa0) || ucs > 0x0010ffff)
617 return -1;
618
619 /*
620 * binary search in table of non-spacing characters
621 *
622 * XXX: In the official Unicode sources, it is possible for a character to
623 * be described as both non-spacing and wide at the same time. As of
624 * Unicode 13.0, treating the non-spacing property as the determining
625 * factor for display width leads to the correct behavior, so do that
626 * search first.
627 */
628 if (mbbisearch(ucs, nonspacing,
629 sizeof(nonspacing) / sizeof(struct mbinterval) - 1))
630 return 0;
631
632 /* binary search in table of wide characters */
633 if (mbbisearch(ucs, east_asian_fw,
634 sizeof(east_asian_fw) / sizeof(struct mbinterval) - 1))
635 return 2;
636
637 return 1;
638}
static const struct mbinterval east_asian_fw[]
static const struct mbinterval nonspacing[]
static int mbbisearch(pg_wchar ucs, const struct mbinterval *table, int max)
Definition: wchar.c:560

References east_asian_fw, mbbisearch(), and nonspacing.

Referenced by pg_utf_dsplen().

◆ utf8_advance()

static void utf8_advance ( const unsigned char *  s,
uint32 state,
int  len 
)
static

Definition at line 1837 of file wchar.c.

1838{
1839 /* Note: We deliberately don't check the state's value here. */
1840 while (len > 0)
1841 {
1842 /*
1843 * It's important that the mask value is 31: In most instruction sets,
1844 * a shift by a 32-bit operand is understood to be a shift by its mod
1845 * 32, so the compiler should elide the mask operation.
1846 */
1847 *state = Utf8Transition[*s++] >> (*state & 31);
1848 len--;
1849 }
1850
1851 *state &= 31;
1852}
static const uint32 Utf8Transition[256]
Definition: wchar.c:1779

References len, and Utf8Transition.

Referenced by pg_utf8_verifystr().

Variable Documentation

◆ pg_wchar_table

◆ Utf8Transition

const uint32 Utf8Transition[256]
static

Definition at line 1779 of file wchar.c.

Referenced by utf8_advance().