PostgreSQL Source Code git master
wchar.c File Reference
Include dependency graph for wchar.c:

Go to the source code of this file.

Data Structures

struct  mbinterval
 

Macros

#define NONUTF8_INVALID_BYTE0   (0x8d)
 
#define NONUTF8_INVALID_BYTE1   (' ')
 
#define IS_EUC_RANGE_VALID(c)   ((c) >= 0xa1 && (c) <= 0xfe)
 
#define pg_euccn_verifychar   pg_euckr_verifychar
 
#define pg_euccn_verifystr   pg_euckr_verifystr
 
#define ERR   0
 
#define BGN   11
 
#define CS1   16
 
#define CS2   1
 
#define CS3   5
 
#define P3A   6 /* Lead was E0, check for 3-byte overlong */
 
#define P3B   20 /* Lead was ED, check for surrogate */
 
#define P4A   25 /* Lead was F0, check for 4-byte overlong */
 
#define P4B   30 /* Lead was F4, check for too-large */
 
#define END   BGN
 
#define ASC   (END << BGN)
 
#define L2A   (CS1 << BGN)
 
#define L3A   (P3A << BGN)
 
#define L3B   (CS2 << BGN)
 
#define L3C   (P3B << BGN)
 
#define L4A   (P4A << BGN)
 
#define L4B   (CS3 << BGN)
 
#define L4C   (P4B << BGN)
 
#define CR1   (END << CS1) | (CS1 << CS2) | (CS2 << CS3) | (CS1 << P3B) | (CS2 << P4B)
 
#define CR2   (END << CS1) | (CS1 << CS2) | (CS2 << CS3) | (CS1 << P3B) | (CS2 << P4A)
 
#define CR3   (END << CS1) | (CS1 << CS2) | (CS2 << CS3) | (CS1 << P3A) | (CS2 << P4A)
 
#define ILL   ERR
 
#define STRIDE_LENGTH   (2 * sizeof(Vector8))
 

Functions

static int pg_ascii2wchar_with_len (const unsigned char *from, pg_wchar *to, int len)
 
static int pg_ascii_mblen (const unsigned char *s)
 
static int pg_ascii_dsplen (const unsigned char *s)
 
static int pg_euc2wchar_with_len (const unsigned char *from, pg_wchar *to, int len)
 
static int pg_euc_mblen (const unsigned char *s)
 
static int pg_euc_dsplen (const unsigned char *s)
 
static int pg_eucjp2wchar_with_len (const unsigned char *from, pg_wchar *to, int len)
 
static int pg_eucjp_mblen (const unsigned char *s)
 
static int pg_eucjp_dsplen (const unsigned char *s)
 
static int pg_euckr2wchar_with_len (const unsigned char *from, pg_wchar *to, int len)
 
static int pg_euckr_mblen (const unsigned char *s)
 
static int pg_euckr_dsplen (const unsigned char *s)
 
static int pg_euccn2wchar_with_len (const unsigned char *from, pg_wchar *to, int len)
 
static int pg_euccn_mblen (const unsigned char *s)
 
static int pg_euccn_dsplen (const unsigned char *s)
 
static int pg_euctw2wchar_with_len (const unsigned char *from, pg_wchar *to, int len)
 
static int pg_euctw_mblen (const unsigned char *s)
 
static int pg_euctw_dsplen (const unsigned char *s)
 
static int pg_wchar2euc_with_len (const pg_wchar *from, unsigned char *to, int len)
 
static int pg_johab_mblen (const unsigned char *s)
 
static int pg_johab_dsplen (const unsigned char *s)
 
static int pg_utf2wchar_with_len (const unsigned char *from, pg_wchar *to, int len)
 
static int pg_wchar2utf_with_len (const pg_wchar *from, unsigned char *to, int len)
 
int pg_utf_mblen (const unsigned char *s)
 
static int mbbisearch (pg_wchar ucs, const struct mbinterval *table, int max)
 
static int ucs_wcwidth (pg_wchar ucs)
 
static int pg_utf_dsplen (const unsigned char *s)
 
static int pg_mule2wchar_with_len (const unsigned char *from, pg_wchar *to, int len)
 
static int pg_wchar2mule_with_len (const pg_wchar *from, unsigned char *to, int len)
 
int pg_mule_mblen (const unsigned char *s)
 
static int pg_mule_dsplen (const unsigned char *s)
 
static int pg_latin12wchar_with_len (const unsigned char *from, pg_wchar *to, int len)
 
static int pg_wchar2single_with_len (const pg_wchar *from, unsigned char *to, int len)
 
static int pg_latin1_mblen (const unsigned char *s)
 
static int pg_latin1_dsplen (const unsigned char *s)
 
static int pg_sjis_mblen (const unsigned char *s)
 
static int pg_sjis_dsplen (const unsigned char *s)
 
static int pg_big5_mblen (const unsigned char *s)
 
static int pg_big5_dsplen (const unsigned char *s)
 
static int pg_gbk_mblen (const unsigned char *s)
 
static int pg_gbk_dsplen (const unsigned char *s)
 
static int pg_uhc_mblen (const unsigned char *s)
 
static int pg_uhc_dsplen (const unsigned char *s)
 
static int pg_gb18030_mblen (const unsigned char *s)
 
static int pg_gb18030_dsplen (const unsigned char *s)
 
static int pg_ascii_verifychar (const unsigned char *s, int len)
 
static int pg_ascii_verifystr (const unsigned char *s, int len)
 
static int pg_eucjp_verifychar (const unsigned char *s, int len)
 
static int pg_eucjp_verifystr (const unsigned char *s, int len)
 
static int pg_euckr_verifychar (const unsigned char *s, int len)
 
static int pg_euckr_verifystr (const unsigned char *s, int len)
 
static int pg_euctw_verifychar (const unsigned char *s, int len)
 
static int pg_euctw_verifystr (const unsigned char *s, int len)
 
static int pg_johab_verifychar (const unsigned char *s, int len)
 
static int pg_johab_verifystr (const unsigned char *s, int len)
 
static int pg_mule_verifychar (const unsigned char *s, int len)
 
static int pg_mule_verifystr (const unsigned char *s, int len)
 
static int pg_latin1_verifychar (const unsigned char *s, int len)
 
static int pg_latin1_verifystr (const unsigned char *s, int len)
 
static int pg_sjis_verifychar (const unsigned char *s, int len)
 
static int pg_sjis_verifystr (const unsigned char *s, int len)
 
static int pg_big5_verifychar (const unsigned char *s, int len)
 
static int pg_big5_verifystr (const unsigned char *s, int len)
 
static int pg_gbk_verifychar (const unsigned char *s, int len)
 
static int pg_gbk_verifystr (const unsigned char *s, int len)
 
static int pg_uhc_verifychar (const unsigned char *s, int len)
 
static int pg_uhc_verifystr (const unsigned char *s, int len)
 
static int pg_gb18030_verifychar (const unsigned char *s, int len)
 
static int pg_gb18030_verifystr (const unsigned char *s, int len)
 
static int pg_utf8_verifychar (const unsigned char *s, int len)
 
static void utf8_advance (const unsigned char *s, uint32 *state, int len)
 
static int pg_utf8_verifystr (const unsigned char *s, int len)
 
bool pg_utf8_islegal (const unsigned char *source, int length)
 
void pg_encoding_set_invalid (int encoding, char *dst)
 
int pg_encoding_mblen (int encoding, const char *mbstr)
 
int pg_encoding_mblen_bounded (int encoding, const char *mbstr)
 
int pg_encoding_dsplen (int encoding, const char *mbstr)
 
int pg_encoding_verifymbchar (int encoding, const char *mbstr, int len)
 
int pg_encoding_verifymbstr (int encoding, const char *mbstr, int len)
 
int pg_encoding_max_length (int encoding)
 

Variables

static const uint32 Utf8Transition [256]
 
const pg_wchar_tbl pg_wchar_table []
 

Macro Definition Documentation

◆ ASC

#define ASC   (END << BGN)

Definition at line 1795 of file wchar.c.

◆ BGN

#define BGN   11

Definition at line 1779 of file wchar.c.

◆ CR1

#define CR1   (END << CS1) | (CS1 << CS2) | (CS2 << CS3) | (CS1 << P3B) | (CS2 << P4B)

Definition at line 1807 of file wchar.c.

◆ CR2

#define CR2   (END << CS1) | (CS1 << CS2) | (CS2 << CS3) | (CS1 << P3B) | (CS2 << P4A)

Definition at line 1808 of file wchar.c.

◆ CR3

#define CR3   (END << CS1) | (CS1 << CS2) | (CS2 << CS3) | (CS1 << P3A) | (CS2 << P4A)

Definition at line 1809 of file wchar.c.

◆ CS1

#define CS1   16

Definition at line 1781 of file wchar.c.

◆ CS2

#define CS2   1

Definition at line 1782 of file wchar.c.

◆ CS3

#define CS3   5

Definition at line 1783 of file wchar.c.

◆ END

#define END   BGN

Definition at line 1790 of file wchar.c.

◆ ERR

#define ERR   0

Definition at line 1777 of file wchar.c.

◆ ILL

#define ILL   ERR

Definition at line 1811 of file wchar.c.

◆ IS_EUC_RANGE_VALID

#define IS_EUC_RANGE_VALID (   c)    ((c) >= 0xa1 && (c) <= 0xfe)

Definition at line 1077 of file wchar.c.

◆ L2A

#define L2A   (CS1 << BGN)

Definition at line 1797 of file wchar.c.

◆ L3A

#define L3A   (P3A << BGN)

Definition at line 1799 of file wchar.c.

◆ L3B

#define L3B   (CS2 << BGN)

Definition at line 1800 of file wchar.c.

◆ L3C

#define L3C   (P3B << BGN)

Definition at line 1801 of file wchar.c.

◆ L4A

#define L4A   (P4A << BGN)

Definition at line 1803 of file wchar.c.

◆ L4B

#define L4B   (CS3 << BGN)

Definition at line 1804 of file wchar.c.

◆ L4C

#define L4C   (P4B << BGN)

Definition at line 1805 of file wchar.c.

◆ NONUTF8_INVALID_BYTE0

#define NONUTF8_INVALID_BYTE0   (0x8d)

Definition at line 34 of file wchar.c.

◆ NONUTF8_INVALID_BYTE1

#define NONUTF8_INVALID_BYTE1   (' ')

Definition at line 35 of file wchar.c.

◆ P3A

#define P3A   6 /* Lead was E0, check for 3-byte overlong */

Definition at line 1785 of file wchar.c.

◆ P3B

#define P3B   20 /* Lead was ED, check for surrogate */

Definition at line 1786 of file wchar.c.

◆ P4A

#define P4A   25 /* Lead was F0, check for 4-byte overlong */

Definition at line 1787 of file wchar.c.

◆ P4B

#define P4B   30 /* Lead was F4, check for too-large */

Definition at line 1788 of file wchar.c.

◆ pg_euccn_verifychar

#define pg_euccn_verifychar   pg_euckr_verifychar

Definition at line 1222 of file wchar.c.

◆ pg_euccn_verifystr

#define pg_euccn_verifystr   pg_euckr_verifystr

Definition at line 1223 of file wchar.c.

◆ STRIDE_LENGTH

#define STRIDE_LENGTH   (2 * sizeof(Vector8))

Function Documentation

◆ mbbisearch()

static int mbbisearch ( pg_wchar  ucs,
const struct mbinterval table,
int  max 
)
static

Definition at line 579 of file wchar.c.

580{
581 int min = 0;
582 int mid;
583
584 if (ucs < table[0].first || ucs > table[max].last)
585 return 0;
586 while (max >= min)
587 {
588 mid = (min + max) / 2;
589 if (ucs > table[mid].last)
590 min = mid + 1;
591 else if (ucs < table[mid].first)
592 max = mid - 1;
593 else
594 return 1;
595 }
596
597 return 0;
598}

Referenced by ucs_wcwidth().

◆ pg_ascii2wchar_with_len()

static int pg_ascii2wchar_with_len ( const unsigned char *  from,
pg_wchar to,
int  len 
)
static

Definition at line 68 of file wchar.c.

69{
70 int cnt = 0;
71
72 while (len > 0 && *from)
73 {
74 *to++ = *from++;
75 len--;
76 cnt++;
77 }
78 *to = 0;
79 return cnt;
80}
const void size_t len

References len.

◆ pg_ascii_dsplen()

static int pg_ascii_dsplen ( const unsigned char *  s)
static

Definition at line 89 of file wchar.c.

90{
91 if (*s == '\0')
92 return 0;
93 if (*s < 0x20 || *s == 0x7f)
94 return -1;
95
96 return 1;
97}

Referenced by pg_big5_dsplen(), pg_euc_dsplen(), pg_euccn_dsplen(), pg_eucjp_dsplen(), pg_euctw_dsplen(), pg_gb18030_dsplen(), pg_gbk_dsplen(), pg_latin1_dsplen(), pg_sjis_dsplen(), and pg_uhc_dsplen().

◆ pg_ascii_mblen()

static int pg_ascii_mblen ( const unsigned char *  s)
static

Definition at line 83 of file wchar.c.

84{
85 return 1;
86}

◆ pg_ascii_verifychar()

static int pg_ascii_verifychar ( const unsigned char *  s,
int  len 
)
static

Definition at line 1061 of file wchar.c.

1062{
1063 return 1;
1064}

◆ pg_ascii_verifystr()

static int pg_ascii_verifystr ( const unsigned char *  s,
int  len 
)
static

Definition at line 1067 of file wchar.c.

1068{
1069 const unsigned char *nullpos = memchr(s, 0, len);
1070
1071 if (nullpos == NULL)
1072 return len;
1073 else
1074 return nullpos - s;
1075}

References len.

◆ pg_big5_dsplen()

static int pg_big5_dsplen ( const unsigned char *  s)
static

Definition at line 932 of file wchar.c.

933{
934 int len;
935
936 if (IS_HIGHBIT_SET(*s))
937 len = 2; /* kanji? */
938 else
939 len = pg_ascii_dsplen(s); /* should be ASCII */
940 return len;
941}
#define IS_HIGHBIT_SET(ch)
Definition: c.h:1126
static int pg_ascii_dsplen(const unsigned char *s)
Definition: wchar.c:89

References IS_HIGHBIT_SET, len, and pg_ascii_dsplen().

◆ pg_big5_mblen()

static int pg_big5_mblen ( const unsigned char *  s)
static

Definition at line 920 of file wchar.c.

921{
922 int len;
923
924 if (IS_HIGHBIT_SET(*s))
925 len = 2; /* kanji? */
926 else
927 len = 1; /* should be ASCII */
928 return len;
929}

References IS_HIGHBIT_SET, and len.

Referenced by pg_big5_verifychar().

◆ pg_big5_verifychar()

static int pg_big5_verifychar ( const unsigned char *  s,
int  len 
)
static

Definition at line 1477 of file wchar.c.

1478{
1479 int l,
1480 mbl;
1481
1482 l = mbl = pg_big5_mblen(s);
1483
1484 if (len < l)
1485 return -1;
1486
1487 if (l == 2 &&
1488 s[0] == NONUTF8_INVALID_BYTE0 &&
1489 s[1] == NONUTF8_INVALID_BYTE1)
1490 return -1;
1491
1492 while (--l > 0)
1493 {
1494 if (*++s == '\0')
1495 return -1;
1496 }
1497
1498 return mbl;
1499}
#define NONUTF8_INVALID_BYTE0
Definition: wchar.c:34
static int pg_big5_mblen(const unsigned char *s)
Definition: wchar.c:920
#define NONUTF8_INVALID_BYTE1
Definition: wchar.c:35

References len, NONUTF8_INVALID_BYTE0, NONUTF8_INVALID_BYTE1, and pg_big5_mblen().

Referenced by pg_big5_verifystr().

◆ pg_big5_verifystr()

static int pg_big5_verifystr ( const unsigned char *  s,
int  len 
)
static

Definition at line 1502 of file wchar.c.

1503{
1504 const unsigned char *start = s;
1505
1506 while (len > 0)
1507 {
1508 int l;
1509
1510 /* fast path for ASCII-subset characters */
1511 if (!IS_HIGHBIT_SET(*s))
1512 {
1513 if (*s == '\0')
1514 break;
1515 l = 1;
1516 }
1517 else
1518 {
1519 l = pg_big5_verifychar(s, len);
1520 if (l == -1)
1521 break;
1522 }
1523 s += l;
1524 len -= l;
1525 }
1526
1527 return s - start;
1528}
return str start
static int pg_big5_verifychar(const unsigned char *s, int len)
Definition: wchar.c:1477

References IS_HIGHBIT_SET, len, pg_big5_verifychar(), and start.

◆ pg_encoding_dsplen()

int pg_encoding_dsplen ( int  encoding,
const char *  mbstr 
)

Definition at line 2137 of file wchar.c.

2138{
2139 return (PG_VALID_ENCODING(encoding) ?
2140 pg_wchar_table[encoding].dsplen((const unsigned char *) mbstr) :
2141 pg_wchar_table[PG_SQL_ASCII].dsplen((const unsigned char *) mbstr));
2142}
int32 encoding
Definition: pg_database.h:41
@ PG_SQL_ASCII
Definition: pg_wchar.h:226
#define PG_VALID_ENCODING(_enc)
Definition: pg_wchar.h:287
const pg_wchar_tbl pg_wchar_table[]
Definition: wchar.c:2062

References encoding, PG_SQL_ASCII, PG_VALID_ENCODING, and pg_wchar_table.

Referenced by PQdsplen(), and reportErrorPosition().

◆ pg_encoding_max_length()

int pg_encoding_max_length ( int  encoding)

Definition at line 2174 of file wchar.c.

2175{
2177
2178 /*
2179 * Check for the encoding despite the assert, due to some mingw versions
2180 * otherwise issuing bogus warnings.
2181 */
2182 return PG_VALID_ENCODING(encoding) ?
2185}
Assert(PointerIsAligned(start, uint64))
int maxmblen
Definition: pg_wchar.h:386

References Assert(), encoding, pg_wchar_tbl::maxmblen, PG_SQL_ASCII, PG_VALID_ENCODING, and pg_wchar_table.

Referenced by ascii(), chr(), CopyConvertBuf(), pg_encoding_mbcliplen(), pg_encoding_set_invalid(), pg_verify_mbstr_len(), reportErrorPosition(), test_enc_setup(), and type_maximum_size().

◆ pg_encoding_mblen()

int pg_encoding_mblen ( int  encoding,
const char *  mbstr 
)

◆ pg_encoding_mblen_bounded()

int pg_encoding_mblen_bounded ( int  encoding,
const char *  mbstr 
)

Definition at line 2128 of file wchar.c.

2129{
2130 return strnlen(mbstr, pg_encoding_mblen(encoding, mbstr));
2131}
size_t strnlen(const char *str, size_t maxlen)
Definition: strnlen.c:26
int pg_encoding_mblen(int encoding, const char *mbstr)
Definition: wchar.c:2116

References encoding, pg_encoding_mblen(), and strnlen().

◆ pg_encoding_set_invalid()

void pg_encoding_set_invalid ( int  encoding,
char *  dst 
)

Definition at line 2049 of file wchar.c.

2050{
2052
2053 dst[0] = (encoding == PG_UTF8 ? 0xc0 : NONUTF8_INVALID_BYTE0);
2054 dst[1] = NONUTF8_INVALID_BYTE1;
2055}
@ PG_UTF8
Definition: pg_wchar.h:232
int pg_encoding_max_length(int encoding)
Definition: wchar.c:2174

References Assert(), encoding, NONUTF8_INVALID_BYTE0, NONUTF8_INVALID_BYTE1, pg_encoding_max_length(), and PG_UTF8.

Referenced by appendStringLiteral(), fmtIdEnc(), PQescapeStringInternal(), and test_enc_setup().

◆ pg_encoding_verifymbchar()

int pg_encoding_verifymbchar ( int  encoding,
const char *  mbstr,
int  len 
)

◆ pg_encoding_verifymbstr()

int pg_encoding_verifymbstr ( int  encoding,
const char *  mbstr,
int  len 
)

Definition at line 2163 of file wchar.c.

2164{
2165 return (PG_VALID_ENCODING(encoding) ?
2166 pg_wchar_table[encoding].mbverifystr((const unsigned char *) mbstr, len) :
2167 pg_wchar_table[PG_SQL_ASCII].mbverifystr((const unsigned char *) mbstr, len));
2168}

References encoding, len, PG_SQL_ASCII, PG_VALID_ENCODING, and pg_wchar_table.

Referenced by add_file_to_manifest(), CopyConvertBuf(), handle_oauth_sasl_error(), parse_oauth_json(), PQescapeInternal(), test_enc_conversion(), test_enc_setup(), and test_one_vector_escape().

◆ pg_euc2wchar_with_len()

static int pg_euc2wchar_with_len ( const unsigned char *  from,
pg_wchar to,
int  len 
)
static

Definition at line 103 of file wchar.c.

104{
105 int cnt = 0;
106
107 while (len > 0 && *from)
108 {
109 if (*from == SS2 && len >= 2) /* JIS X 0201 (so called "1 byte
110 * KANA") */
111 {
112 from++;
113 *to = (SS2 << 8) | *from++;
114 len -= 2;
115 }
116 else if (*from == SS3 && len >= 3) /* JIS X 0212 KANJI */
117 {
118 from++;
119 *to = (SS3 << 16) | (*from++ << 8);
120 *to |= *from++;
121 len -= 3;
122 }
123 else if (IS_HIGHBIT_SET(*from) && len >= 2) /* JIS X 0208 KANJI */
124 {
125 *to = *from++ << 8;
126 *to |= *from++;
127 len -= 2;
128 }
129 else /* must be ASCII */
130 {
131 *to = *from++;
132 len--;
133 }
134 to++;
135 cnt++;
136 }
137 *to = 0;
138 return cnt;
139}
#define SS2
Definition: pg_wchar.h:38
#define SS3
Definition: pg_wchar.h:39

References IS_HIGHBIT_SET, len, SS2, and SS3.

Referenced by pg_eucjp2wchar_with_len(), and pg_euckr2wchar_with_len().

◆ pg_euc_dsplen()

static int pg_euc_dsplen ( const unsigned char *  s)
inlinestatic

Definition at line 158 of file wchar.c.

159{
160 int len;
161
162 if (*s == SS2)
163 len = 2;
164 else if (*s == SS3)
165 len = 2;
166 else if (IS_HIGHBIT_SET(*s))
167 len = 2;
168 else
169 len = pg_ascii_dsplen(s);
170 return len;
171}

References IS_HIGHBIT_SET, len, pg_ascii_dsplen(), SS2, and SS3.

Referenced by pg_euckr_dsplen(), and pg_johab_dsplen().

◆ pg_euc_mblen()

static int pg_euc_mblen ( const unsigned char *  s)
inlinestatic

Definition at line 142 of file wchar.c.

143{
144 int len;
145
146 if (*s == SS2)
147 len = 2;
148 else if (*s == SS3)
149 len = 3;
150 else if (IS_HIGHBIT_SET(*s))
151 len = 2;
152 else
153 len = 1;
154 return len;
155}

References IS_HIGHBIT_SET, len, SS2, and SS3.

Referenced by pg_eucjp_mblen(), pg_euckr_mblen(), and pg_johab_mblen().

◆ pg_euccn2wchar_with_len()

static int pg_euccn2wchar_with_len ( const unsigned char *  from,
pg_wchar to,
int  len 
)
static

Definition at line 230 of file wchar.c.

231{
232 int cnt = 0;
233
234 while (len > 0 && *from)
235 {
236 if (*from == SS2 && len >= 3) /* code set 2 (unused?) */
237 {
238 from++;
239 *to = (SS2 << 16) | (*from++ << 8);
240 *to |= *from++;
241 len -= 3;
242 }
243 else if (*from == SS3 && len >= 3) /* code set 3 (unused ?) */
244 {
245 from++;
246 *to = (SS3 << 16) | (*from++ << 8);
247 *to |= *from++;
248 len -= 3;
249 }
250 else if (IS_HIGHBIT_SET(*from) && len >= 2) /* code set 1 */
251 {
252 *to = *from++ << 8;
253 *to |= *from++;
254 len -= 2;
255 }
256 else
257 {
258 *to = *from++;
259 len--;
260 }
261 to++;
262 cnt++;
263 }
264 *to = 0;
265 return cnt;
266}

References IS_HIGHBIT_SET, len, SS2, and SS3.

◆ pg_euccn_dsplen()

static int pg_euccn_dsplen ( const unsigned char *  s)
static

Definition at line 281 of file wchar.c.

282{
283 int len;
284
285 if (IS_HIGHBIT_SET(*s))
286 len = 2;
287 else
288 len = pg_ascii_dsplen(s);
289 return len;
290}

References IS_HIGHBIT_SET, len, and pg_ascii_dsplen().

◆ pg_euccn_mblen()

static int pg_euccn_mblen ( const unsigned char *  s)
static

Definition at line 269 of file wchar.c.

270{
271 int len;
272
273 if (IS_HIGHBIT_SET(*s))
274 len = 2;
275 else
276 len = 1;
277 return len;
278}

References IS_HIGHBIT_SET, and len.

◆ pg_eucjp2wchar_with_len()

static int pg_eucjp2wchar_with_len ( const unsigned char *  from,
pg_wchar to,
int  len 
)
static

Definition at line 177 of file wchar.c.

178{
179 return pg_euc2wchar_with_len(from, to, len);
180}
static int pg_euc2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
Definition: wchar.c:103

References len, and pg_euc2wchar_with_len().

◆ pg_eucjp_dsplen()

static int pg_eucjp_dsplen ( const unsigned char *  s)
static

Definition at line 189 of file wchar.c.

190{
191 int len;
192
193 if (*s == SS2)
194 len = 1;
195 else if (*s == SS3)
196 len = 2;
197 else if (IS_HIGHBIT_SET(*s))
198 len = 2;
199 else
200 len = pg_ascii_dsplen(s);
201 return len;
202}

References IS_HIGHBIT_SET, len, pg_ascii_dsplen(), SS2, and SS3.

◆ pg_eucjp_mblen()

static int pg_eucjp_mblen ( const unsigned char *  s)
static

Definition at line 183 of file wchar.c.

184{
185 return pg_euc_mblen(s);
186}
static int pg_euc_mblen(const unsigned char *s)
Definition: wchar.c:142

References pg_euc_mblen().

◆ pg_eucjp_verifychar()

static int pg_eucjp_verifychar ( const unsigned char *  s,
int  len 
)
static

Definition at line 1080 of file wchar.c.

1081{
1082 int l;
1083 unsigned char c1,
1084 c2;
1085
1086 c1 = *s++;
1087
1088 switch (c1)
1089 {
1090 case SS2: /* JIS X 0201 */
1091 l = 2;
1092 if (l > len)
1093 return -1;
1094 c2 = *s++;
1095 if (c2 < 0xa1 || c2 > 0xdf)
1096 return -1;
1097 break;
1098
1099 case SS3: /* JIS X 0212 */
1100 l = 3;
1101 if (l > len)
1102 return -1;
1103 c2 = *s++;
1104 if (!IS_EUC_RANGE_VALID(c2))
1105 return -1;
1106 c2 = *s++;
1107 if (!IS_EUC_RANGE_VALID(c2))
1108 return -1;
1109 break;
1110
1111 default:
1112 if (IS_HIGHBIT_SET(c1)) /* JIS X 0208? */
1113 {
1114 l = 2;
1115 if (l > len)
1116 return -1;
1117 if (!IS_EUC_RANGE_VALID(c1))
1118 return -1;
1119 c2 = *s++;
1120 if (!IS_EUC_RANGE_VALID(c2))
1121 return -1;
1122 }
1123 else
1124 /* must be ASCII */
1125 {
1126 l = 1;
1127 }
1128 break;
1129 }
1130
1131 return l;
1132}
#define IS_EUC_RANGE_VALID(c)
Definition: wchar.c:1077

References IS_EUC_RANGE_VALID, IS_HIGHBIT_SET, len, SS2, and SS3.

Referenced by pg_eucjp_verifystr().

◆ pg_eucjp_verifystr()

static int pg_eucjp_verifystr ( const unsigned char *  s,
int  len 
)
static

Definition at line 1135 of file wchar.c.

1136{
1137 const unsigned char *start = s;
1138
1139 while (len > 0)
1140 {
1141 int l;
1142
1143 /* fast path for ASCII-subset characters */
1144 if (!IS_HIGHBIT_SET(*s))
1145 {
1146 if (*s == '\0')
1147 break;
1148 l = 1;
1149 }
1150 else
1151 {
1152 l = pg_eucjp_verifychar(s, len);
1153 if (l == -1)
1154 break;
1155 }
1156 s += l;
1157 len -= l;
1158 }
1159
1160 return s - start;
1161}
static int pg_eucjp_verifychar(const unsigned char *s, int len)
Definition: wchar.c:1080

References IS_HIGHBIT_SET, len, pg_eucjp_verifychar(), and start.

◆ pg_euckr2wchar_with_len()

static int pg_euckr2wchar_with_len ( const unsigned char *  from,
pg_wchar to,
int  len 
)
static

Definition at line 208 of file wchar.c.

209{
210 return pg_euc2wchar_with_len(from, to, len);
211}

References len, and pg_euc2wchar_with_len().

◆ pg_euckr_dsplen()

static int pg_euckr_dsplen ( const unsigned char *  s)
static

Definition at line 220 of file wchar.c.

221{
222 return pg_euc_dsplen(s);
223}
static int pg_euc_dsplen(const unsigned char *s)
Definition: wchar.c:158

References pg_euc_dsplen().

◆ pg_euckr_mblen()

static int pg_euckr_mblen ( const unsigned char *  s)
static

Definition at line 214 of file wchar.c.

215{
216 return pg_euc_mblen(s);
217}

References pg_euc_mblen().

◆ pg_euckr_verifychar()

static int pg_euckr_verifychar ( const unsigned char *  s,
int  len 
)
static

Definition at line 1164 of file wchar.c.

1165{
1166 int l;
1167 unsigned char c1,
1168 c2;
1169
1170 c1 = *s++;
1171
1172 if (IS_HIGHBIT_SET(c1))
1173 {
1174 l = 2;
1175 if (l > len)
1176 return -1;
1177 if (!IS_EUC_RANGE_VALID(c1))
1178 return -1;
1179 c2 = *s++;
1180 if (!IS_EUC_RANGE_VALID(c2))
1181 return -1;
1182 }
1183 else
1184 /* must be ASCII */
1185 {
1186 l = 1;
1187 }
1188
1189 return l;
1190}

References IS_EUC_RANGE_VALID, IS_HIGHBIT_SET, and len.

Referenced by pg_euckr_verifystr().

◆ pg_euckr_verifystr()

static int pg_euckr_verifystr ( const unsigned char *  s,
int  len 
)
static

Definition at line 1193 of file wchar.c.

1194{
1195 const unsigned char *start = s;
1196
1197 while (len > 0)
1198 {
1199 int l;
1200
1201 /* fast path for ASCII-subset characters */
1202 if (!IS_HIGHBIT_SET(*s))
1203 {
1204 if (*s == '\0')
1205 break;
1206 l = 1;
1207 }
1208 else
1209 {
1210 l = pg_euckr_verifychar(s, len);
1211 if (l == -1)
1212 break;
1213 }
1214 s += l;
1215 len -= l;
1216 }
1217
1218 return s - start;
1219}
static int pg_euckr_verifychar(const unsigned char *s, int len)
Definition: wchar.c:1164

References IS_HIGHBIT_SET, len, pg_euckr_verifychar(), and start.

◆ pg_euctw2wchar_with_len()

static int pg_euctw2wchar_with_len ( const unsigned char *  from,
pg_wchar to,
int  len 
)
static

Definition at line 297 of file wchar.c.

298{
299 int cnt = 0;
300
301 while (len > 0 && *from)
302 {
303 if (*from == SS2 && len >= 4) /* code set 2 */
304 {
305 from++;
306 *to = (((uint32) SS2) << 24) | (*from++ << 16);
307 *to |= *from++ << 8;
308 *to |= *from++;
309 len -= 4;
310 }
311 else if (*from == SS3 && len >= 3) /* code set 3 (unused?) */
312 {
313 from++;
314 *to = (SS3 << 16) | (*from++ << 8);
315 *to |= *from++;
316 len -= 3;
317 }
318 else if (IS_HIGHBIT_SET(*from) && len >= 2) /* code set 2 */
319 {
320 *to = *from++ << 8;
321 *to |= *from++;
322 len -= 2;
323 }
324 else
325 {
326 *to = *from++;
327 len--;
328 }
329 to++;
330 cnt++;
331 }
332 *to = 0;
333 return cnt;
334}
uint32_t uint32
Definition: c.h:502

References IS_HIGHBIT_SET, len, SS2, and SS3.

◆ pg_euctw_dsplen()

static int pg_euctw_dsplen ( const unsigned char *  s)
static

Definition at line 353 of file wchar.c.

354{
355 int len;
356
357 if (*s == SS2)
358 len = 2;
359 else if (*s == SS3)
360 len = 2;
361 else if (IS_HIGHBIT_SET(*s))
362 len = 2;
363 else
364 len = pg_ascii_dsplen(s);
365 return len;
366}

References IS_HIGHBIT_SET, len, pg_ascii_dsplen(), SS2, and SS3.

◆ pg_euctw_mblen()

static int pg_euctw_mblen ( const unsigned char *  s)
static

Definition at line 337 of file wchar.c.

338{
339 int len;
340
341 if (*s == SS2)
342 len = 4;
343 else if (*s == SS3)
344 len = 3;
345 else if (IS_HIGHBIT_SET(*s))
346 len = 2;
347 else
348 len = 1;
349 return len;
350}

References IS_HIGHBIT_SET, len, SS2, and SS3.

◆ pg_euctw_verifychar()

static int pg_euctw_verifychar ( const unsigned char *  s,
int  len 
)
static

Definition at line 1226 of file wchar.c.

1227{
1228 int l;
1229 unsigned char c1,
1230 c2;
1231
1232 c1 = *s++;
1233
1234 switch (c1)
1235 {
1236 case SS2: /* CNS 11643 Plane 1-7 */
1237 l = 4;
1238 if (l > len)
1239 return -1;
1240 c2 = *s++;
1241 if (c2 < 0xa1 || c2 > 0xa7)
1242 return -1;
1243 c2 = *s++;
1244 if (!IS_EUC_RANGE_VALID(c2))
1245 return -1;
1246 c2 = *s++;
1247 if (!IS_EUC_RANGE_VALID(c2))
1248 return -1;
1249 break;
1250
1251 case SS3: /* unused */
1252 return -1;
1253
1254 default:
1255 if (IS_HIGHBIT_SET(c1)) /* CNS 11643 Plane 1 */
1256 {
1257 l = 2;
1258 if (l > len)
1259 return -1;
1260 /* no further range check on c1? */
1261 c2 = *s++;
1262 if (!IS_EUC_RANGE_VALID(c2))
1263 return -1;
1264 }
1265 else
1266 /* must be ASCII */
1267 {
1268 l = 1;
1269 }
1270 break;
1271 }
1272 return l;
1273}

References IS_EUC_RANGE_VALID, IS_HIGHBIT_SET, len, SS2, and SS3.

Referenced by pg_euctw_verifystr().

◆ pg_euctw_verifystr()

static int pg_euctw_verifystr ( const unsigned char *  s,
int  len 
)
static

Definition at line 1276 of file wchar.c.

1277{
1278 const unsigned char *start = s;
1279
1280 while (len > 0)
1281 {
1282 int l;
1283
1284 /* fast path for ASCII-subset characters */
1285 if (!IS_HIGHBIT_SET(*s))
1286 {
1287 if (*s == '\0')
1288 break;
1289 l = 1;
1290 }
1291 else
1292 {
1293 l = pg_euctw_verifychar(s, len);
1294 if (l == -1)
1295 break;
1296 }
1297 s += l;
1298 len -= l;
1299 }
1300
1301 return s - start;
1302}
static int pg_euctw_verifychar(const unsigned char *s, int len)
Definition: wchar.c:1226

References IS_HIGHBIT_SET, len, pg_euctw_verifychar(), and start.

◆ pg_gb18030_dsplen()

static int pg_gb18030_dsplen ( const unsigned char *  s)
static

Definition at line 1027 of file wchar.c.

1028{
1029 int len;
1030
1031 if (IS_HIGHBIT_SET(*s))
1032 len = 2;
1033 else
1034 len = pg_ascii_dsplen(s); /* ASCII */
1035 return len;
1036}

References IS_HIGHBIT_SET, len, and pg_ascii_dsplen().

◆ pg_gb18030_mblen()

static int pg_gb18030_mblen ( const unsigned char *  s)
static

Definition at line 1013 of file wchar.c.

1014{
1015 int len;
1016
1017 if (!IS_HIGHBIT_SET(*s))
1018 len = 1; /* ASCII */
1019 else if (*(s + 1) >= 0x30 && *(s + 1) <= 0x39)
1020 len = 4;
1021 else
1022 len = 2;
1023 return len;
1024}

References IS_HIGHBIT_SET, and len.

◆ pg_gb18030_verifychar()

static int pg_gb18030_verifychar ( const unsigned char *  s,
int  len 
)
static

Definition at line 1639 of file wchar.c.

1640{
1641 int l;
1642
1643 if (!IS_HIGHBIT_SET(*s))
1644 l = 1; /* ASCII */
1645 else if (len >= 4 && *(s + 1) >= 0x30 && *(s + 1) <= 0x39)
1646 {
1647 /* Should be 4-byte, validate remaining bytes */
1648 if (*s >= 0x81 && *s <= 0xfe &&
1649 *(s + 2) >= 0x81 && *(s + 2) <= 0xfe &&
1650 *(s + 3) >= 0x30 && *(s + 3) <= 0x39)
1651 l = 4;
1652 else
1653 l = -1;
1654 }
1655 else if (len >= 2 && *s >= 0x81 && *s <= 0xfe)
1656 {
1657 /* Should be 2-byte, validate */
1658 if ((*(s + 1) >= 0x40 && *(s + 1) <= 0x7e) ||
1659 (*(s + 1) >= 0x80 && *(s + 1) <= 0xfe))
1660 l = 2;
1661 else
1662 l = -1;
1663 }
1664 else
1665 l = -1;
1666 return l;
1667}

References IS_HIGHBIT_SET, and len.

Referenced by pg_gb18030_verifystr().

◆ pg_gb18030_verifystr()

static int pg_gb18030_verifystr ( const unsigned char *  s,
int  len 
)
static

Definition at line 1670 of file wchar.c.

1671{
1672 const unsigned char *start = s;
1673
1674 while (len > 0)
1675 {
1676 int l;
1677
1678 /* fast path for ASCII-subset characters */
1679 if (!IS_HIGHBIT_SET(*s))
1680 {
1681 if (*s == '\0')
1682 break;
1683 l = 1;
1684 }
1685 else
1686 {
1687 l = pg_gb18030_verifychar(s, len);
1688 if (l == -1)
1689 break;
1690 }
1691 s += l;
1692 len -= l;
1693 }
1694
1695 return s - start;
1696}
static int pg_gb18030_verifychar(const unsigned char *s, int len)
Definition: wchar.c:1639

References IS_HIGHBIT_SET, len, pg_gb18030_verifychar(), and start.

◆ pg_gbk_dsplen()

static int pg_gbk_dsplen ( const unsigned char *  s)
static

Definition at line 959 of file wchar.c.

960{
961 int len;
962
963 if (IS_HIGHBIT_SET(*s))
964 len = 2; /* kanji? */
965 else
966 len = pg_ascii_dsplen(s); /* should be ASCII */
967 return len;
968}

References IS_HIGHBIT_SET, len, and pg_ascii_dsplen().

◆ pg_gbk_mblen()

static int pg_gbk_mblen ( const unsigned char *  s)
static

Definition at line 947 of file wchar.c.

948{
949 int len;
950
951 if (IS_HIGHBIT_SET(*s))
952 len = 2; /* kanji? */
953 else
954 len = 1; /* should be ASCII */
955 return len;
956}

References IS_HIGHBIT_SET, and len.

Referenced by pg_gbk_verifychar().

◆ pg_gbk_verifychar()

static int pg_gbk_verifychar ( const unsigned char *  s,
int  len 
)
static

Definition at line 1531 of file wchar.c.

1532{
1533 int l,
1534 mbl;
1535
1536 l = mbl = pg_gbk_mblen(s);
1537
1538 if (len < l)
1539 return -1;
1540
1541 if (l == 2 &&
1542 s[0] == NONUTF8_INVALID_BYTE0 &&
1543 s[1] == NONUTF8_INVALID_BYTE1)
1544 return -1;
1545
1546 while (--l > 0)
1547 {
1548 if (*++s == '\0')
1549 return -1;
1550 }
1551
1552 return mbl;
1553}
static int pg_gbk_mblen(const unsigned char *s)
Definition: wchar.c:947

References len, NONUTF8_INVALID_BYTE0, NONUTF8_INVALID_BYTE1, and pg_gbk_mblen().

Referenced by pg_gbk_verifystr().

◆ pg_gbk_verifystr()

static int pg_gbk_verifystr ( const unsigned char *  s,
int  len 
)
static

Definition at line 1556 of file wchar.c.

1557{
1558 const unsigned char *start = s;
1559
1560 while (len > 0)
1561 {
1562 int l;
1563
1564 /* fast path for ASCII-subset characters */
1565 if (!IS_HIGHBIT_SET(*s))
1566 {
1567 if (*s == '\0')
1568 break;
1569 l = 1;
1570 }
1571 else
1572 {
1573 l = pg_gbk_verifychar(s, len);
1574 if (l == -1)
1575 break;
1576 }
1577 s += l;
1578 len -= l;
1579 }
1580
1581 return s - start;
1582}
static int pg_gbk_verifychar(const unsigned char *s, int len)
Definition: wchar.c:1531

References IS_HIGHBIT_SET, len, pg_gbk_verifychar(), and start.

◆ pg_johab_dsplen()

static int pg_johab_dsplen ( const unsigned char *  s)
static

Definition at line 427 of file wchar.c.

428{
429 return pg_euc_dsplen(s);
430}

References pg_euc_dsplen().

◆ pg_johab_mblen()

static int pg_johab_mblen ( const unsigned char *  s)
static

Definition at line 421 of file wchar.c.

422{
423 return pg_euc_mblen(s);
424}

References pg_euc_mblen().

Referenced by pg_johab_verifychar().

◆ pg_johab_verifychar()

static int pg_johab_verifychar ( const unsigned char *  s,
int  len 
)
static

Definition at line 1305 of file wchar.c.

1306{
1307 int l,
1308 mbl;
1309 unsigned char c;
1310
1311 l = mbl = pg_johab_mblen(s);
1312
1313 if (len < l)
1314 return -1;
1315
1316 if (!IS_HIGHBIT_SET(*s))
1317 return mbl;
1318
1319 while (--l > 0)
1320 {
1321 c = *++s;
1322 if (!IS_EUC_RANGE_VALID(c))
1323 return -1;
1324 }
1325 return mbl;
1326}
char * c
static int pg_johab_mblen(const unsigned char *s)
Definition: wchar.c:421

References IS_EUC_RANGE_VALID, IS_HIGHBIT_SET, len, and pg_johab_mblen().

Referenced by pg_johab_verifystr().

◆ pg_johab_verifystr()

static int pg_johab_verifystr ( const unsigned char *  s,
int  len 
)
static

Definition at line 1329 of file wchar.c.

1330{
1331 const unsigned char *start = s;
1332
1333 while (len > 0)
1334 {
1335 int l;
1336
1337 /* fast path for ASCII-subset characters */
1338 if (!IS_HIGHBIT_SET(*s))
1339 {
1340 if (*s == '\0')
1341 break;
1342 l = 1;
1343 }
1344 else
1345 {
1346 l = pg_johab_verifychar(s, len);
1347 if (l == -1)
1348 break;
1349 }
1350 s += l;
1351 len -= l;
1352 }
1353
1354 return s - start;
1355}
static int pg_johab_verifychar(const unsigned char *s, int len)
Definition: wchar.c:1305

References IS_HIGHBIT_SET, len, pg_johab_verifychar(), and start.

◆ pg_latin12wchar_with_len()

static int pg_latin12wchar_with_len ( const unsigned char *  from,
pg_wchar to,
int  len 
)
static

Definition at line 837 of file wchar.c.

838{
839 int cnt = 0;
840
841 while (len > 0 && *from)
842 {
843 *to++ = *from++;
844 len--;
845 cnt++;
846 }
847 *to = 0;
848 return cnt;
849}

References len.

◆ pg_latin1_dsplen()

static int pg_latin1_dsplen ( const unsigned char *  s)
static

Definition at line 880 of file wchar.c.

881{
882 return pg_ascii_dsplen(s);
883}

References pg_ascii_dsplen().

◆ pg_latin1_mblen()

static int pg_latin1_mblen ( const unsigned char *  s)
static

Definition at line 874 of file wchar.c.

875{
876 return 1;
877}

◆ pg_latin1_verifychar()

static int pg_latin1_verifychar ( const unsigned char *  s,
int  len 
)
static

Definition at line 1408 of file wchar.c.

1409{
1410 return 1;
1411}

◆ pg_latin1_verifystr()

static int pg_latin1_verifystr ( const unsigned char *  s,
int  len 
)
static

Definition at line 1414 of file wchar.c.

1415{
1416 const unsigned char *nullpos = memchr(s, 0, len);
1417
1418 if (nullpos == NULL)
1419 return len;
1420 else
1421 return nullpos - s;
1422}

References len.

◆ pg_mule2wchar_with_len()

static int pg_mule2wchar_with_len ( const unsigned char *  from,
pg_wchar to,
int  len 
)
static

Definition at line 672 of file wchar.c.

673{
674 int cnt = 0;
675
676 while (len > 0 && *from)
677 {
678 if (IS_LC1(*from) && len >= 2)
679 {
680 *to = *from++ << 16;
681 *to |= *from++;
682 len -= 2;
683 }
684 else if (IS_LCPRV1(*from) && len >= 3)
685 {
686 from++;
687 *to = *from++ << 16;
688 *to |= *from++;
689 len -= 3;
690 }
691 else if (IS_LC2(*from) && len >= 3)
692 {
693 *to = *from++ << 16;
694 *to |= *from++ << 8;
695 *to |= *from++;
696 len -= 3;
697 }
698 else if (IS_LCPRV2(*from) && len >= 4)
699 {
700 from++;
701 *to = *from++ << 16;
702 *to |= *from++ << 8;
703 *to |= *from++;
704 len -= 4;
705 }
706 else
707 { /* assume ASCII */
708 *to = (unsigned char) *from++;
709 len--;
710 }
711 to++;
712 cnt++;
713 }
714 *to = 0;
715 return cnt;
716}
#define IS_LCPRV2(c)
Definition: pg_wchar.h:164
#define IS_LC2(c)
Definition: pg_wchar.h:144
#define IS_LCPRV1(c)
Definition: pg_wchar.h:152
#define IS_LC1(c)
Definition: pg_wchar.h:126

References IS_LC1, IS_LC2, IS_LCPRV1, IS_LCPRV2, and len.

◆ pg_mule_dsplen()

static int pg_mule_dsplen ( const unsigned char *  s)
static

Definition at line 809 of file wchar.c.

810{
811 int len;
812
813 /*
814 * Note: it's not really appropriate to assume that all multibyte charsets
815 * are double-wide on screen. But this seems an okay approximation for
816 * the MULE charsets we currently support.
817 */
818
819 if (IS_LC1(*s))
820 len = 1;
821 else if (IS_LCPRV1(*s))
822 len = 1;
823 else if (IS_LC2(*s))
824 len = 2;
825 else if (IS_LCPRV2(*s))
826 len = 2;
827 else
828 len = 1; /* assume ASCII */
829
830 return len;
831}

References IS_LC1, IS_LC2, IS_LCPRV1, IS_LCPRV2, and len.

◆ pg_mule_mblen()

int pg_mule_mblen ( const unsigned char *  s)

Definition at line 791 of file wchar.c.

792{
793 int len;
794
795 if (IS_LC1(*s))
796 len = 2;
797 else if (IS_LCPRV1(*s))
798 len = 3;
799 else if (IS_LC2(*s))
800 len = 3;
801 else if (IS_LCPRV2(*s))
802 len = 4;
803 else
804 len = 1; /* assume ASCII */
805 return len;
806}

References IS_LC1, IS_LC2, IS_LCPRV1, IS_LCPRV2, and len.

Referenced by mic2latin(), mic2latin_with_table(), and pg_mule_verifychar().

◆ pg_mule_verifychar()

static int pg_mule_verifychar ( const unsigned char *  s,
int  len 
)
static

Definition at line 1358 of file wchar.c.

1359{
1360 int l,
1361 mbl;
1362 unsigned char c;
1363
1364 l = mbl = pg_mule_mblen(s);
1365
1366 if (len < l)
1367 return -1;
1368
1369 while (--l > 0)
1370 {
1371 c = *++s;
1372 if (!IS_HIGHBIT_SET(c))
1373 return -1;
1374 }
1375 return mbl;
1376}
int pg_mule_mblen(const unsigned char *s)
Definition: wchar.c:791

References IS_HIGHBIT_SET, len, and pg_mule_mblen().

Referenced by pg_mule_verifystr().

◆ pg_mule_verifystr()

static int pg_mule_verifystr ( const unsigned char *  s,
int  len 
)
static

Definition at line 1379 of file wchar.c.

1380{
1381 const unsigned char *start = s;
1382
1383 while (len > 0)
1384 {
1385 int l;
1386
1387 /* fast path for ASCII-subset characters */
1388 if (!IS_HIGHBIT_SET(*s))
1389 {
1390 if (*s == '\0')
1391 break;
1392 l = 1;
1393 }
1394 else
1395 {
1396 l = pg_mule_verifychar(s, len);
1397 if (l == -1)
1398 break;
1399 }
1400 s += l;
1401 len -= l;
1402 }
1403
1404 return s - start;
1405}
static int pg_mule_verifychar(const unsigned char *s, int len)
Definition: wchar.c:1358

References IS_HIGHBIT_SET, len, pg_mule_verifychar(), and start.

◆ pg_sjis_dsplen()

static int pg_sjis_dsplen ( const unsigned char *  s)
static

Definition at line 903 of file wchar.c.

904{
905 int len;
906
907 if (*s >= 0xa1 && *s <= 0xdf)
908 len = 1; /* 1 byte kana? */
909 else if (IS_HIGHBIT_SET(*s))
910 len = 2; /* kanji? */
911 else
912 len = pg_ascii_dsplen(s); /* should be ASCII */
913 return len;
914}

References IS_HIGHBIT_SET, len, and pg_ascii_dsplen().

◆ pg_sjis_mblen()

static int pg_sjis_mblen ( const unsigned char *  s)
static

Definition at line 889 of file wchar.c.

890{
891 int len;
892
893 if (*s >= 0xa1 && *s <= 0xdf)
894 len = 1; /* 1 byte kana? */
895 else if (IS_HIGHBIT_SET(*s))
896 len = 2; /* kanji? */
897 else
898 len = 1; /* should be ASCII */
899 return len;
900}

References IS_HIGHBIT_SET, and len.

Referenced by pg_sjis_verifychar().

◆ pg_sjis_verifychar()

static int pg_sjis_verifychar ( const unsigned char *  s,
int  len 
)
static

Definition at line 1425 of file wchar.c.

1426{
1427 int l,
1428 mbl;
1429 unsigned char c1,
1430 c2;
1431
1432 l = mbl = pg_sjis_mblen(s);
1433
1434 if (len < l)
1435 return -1;
1436
1437 if (l == 1) /* pg_sjis_mblen already verified it */
1438 return mbl;
1439
1440 c1 = *s++;
1441 c2 = *s;
1442 if (!ISSJISHEAD(c1) || !ISSJISTAIL(c2))
1443 return -1;
1444 return mbl;
1445}
#define ISSJISTAIL(c)
Definition: pg_wchar.h:45
#define ISSJISHEAD(c)
Definition: pg_wchar.h:44
static int pg_sjis_mblen(const unsigned char *s)
Definition: wchar.c:889

References ISSJISHEAD, ISSJISTAIL, len, and pg_sjis_mblen().

Referenced by pg_sjis_verifystr().

◆ pg_sjis_verifystr()

static int pg_sjis_verifystr ( const unsigned char *  s,
int  len 
)
static

Definition at line 1448 of file wchar.c.

1449{
1450 const unsigned char *start = s;
1451
1452 while (len > 0)
1453 {
1454 int l;
1455
1456 /* fast path for ASCII-subset characters */
1457 if (!IS_HIGHBIT_SET(*s))
1458 {
1459 if (*s == '\0')
1460 break;
1461 l = 1;
1462 }
1463 else
1464 {
1465 l = pg_sjis_verifychar(s, len);
1466 if (l == -1)
1467 break;
1468 }
1469 s += l;
1470 len -= l;
1471 }
1472
1473 return s - start;
1474}
static int pg_sjis_verifychar(const unsigned char *s, int len)
Definition: wchar.c:1425

References IS_HIGHBIT_SET, len, pg_sjis_verifychar(), and start.

◆ pg_uhc_dsplen()

static int pg_uhc_dsplen ( const unsigned char *  s)
static

Definition at line 986 of file wchar.c.

987{
988 int len;
989
990 if (IS_HIGHBIT_SET(*s))
991 len = 2; /* 2byte? */
992 else
993 len = pg_ascii_dsplen(s); /* should be ASCII */
994 return len;
995}

References IS_HIGHBIT_SET, len, and pg_ascii_dsplen().

◆ pg_uhc_mblen()

static int pg_uhc_mblen ( const unsigned char *  s)
static

Definition at line 974 of file wchar.c.

975{
976 int len;
977
978 if (IS_HIGHBIT_SET(*s))
979 len = 2; /* 2byte? */
980 else
981 len = 1; /* should be ASCII */
982 return len;
983}

References IS_HIGHBIT_SET, and len.

Referenced by pg_uhc_verifychar().

◆ pg_uhc_verifychar()

static int pg_uhc_verifychar ( const unsigned char *  s,
int  len 
)
static

Definition at line 1585 of file wchar.c.

1586{
1587 int l,
1588 mbl;
1589
1590 l = mbl = pg_uhc_mblen(s);
1591
1592 if (len < l)
1593 return -1;
1594
1595 if (l == 2 &&
1596 s[0] == NONUTF8_INVALID_BYTE0 &&
1597 s[1] == NONUTF8_INVALID_BYTE1)
1598 return -1;
1599
1600 while (--l > 0)
1601 {
1602 if (*++s == '\0')
1603 return -1;
1604 }
1605
1606 return mbl;
1607}
static int pg_uhc_mblen(const unsigned char *s)
Definition: wchar.c:974

References len, NONUTF8_INVALID_BYTE0, NONUTF8_INVALID_BYTE1, and pg_uhc_mblen().

Referenced by pg_uhc_verifystr().

◆ pg_uhc_verifystr()

static int pg_uhc_verifystr ( const unsigned char *  s,
int  len 
)
static

Definition at line 1610 of file wchar.c.

1611{
1612 const unsigned char *start = s;
1613
1614 while (len > 0)
1615 {
1616 int l;
1617
1618 /* fast path for ASCII-subset characters */
1619 if (!IS_HIGHBIT_SET(*s))
1620 {
1621 if (*s == '\0')
1622 break;
1623 l = 1;
1624 }
1625 else
1626 {
1627 l = pg_uhc_verifychar(s, len);
1628 if (l == -1)
1629 break;
1630 }
1631 s += l;
1632 len -= l;
1633 }
1634
1635 return s - start;
1636}
static int pg_uhc_verifychar(const unsigned char *s, int len)
Definition: wchar.c:1585

References IS_HIGHBIT_SET, len, pg_uhc_verifychar(), and start.

◆ pg_utf2wchar_with_len()

static int pg_utf2wchar_with_len ( const unsigned char *  from,
pg_wchar to,
int  len 
)
static

Definition at line 439 of file wchar.c.

440{
441 int cnt = 0;
442 uint32 c1,
443 c2,
444 c3,
445 c4;
446
447 while (len > 0 && *from)
448 {
449 if ((*from & 0x80) == 0)
450 {
451 *to = *from++;
452 len--;
453 }
454 else if ((*from & 0xe0) == 0xc0)
455 {
456 if (len < 2)
457 break; /* drop trailing incomplete char */
458 c1 = *from++ & 0x1f;
459 c2 = *from++ & 0x3f;
460 *to = (c1 << 6) | c2;
461 len -= 2;
462 }
463 else if ((*from & 0xf0) == 0xe0)
464 {
465 if (len < 3)
466 break; /* drop trailing incomplete char */
467 c1 = *from++ & 0x0f;
468 c2 = *from++ & 0x3f;
469 c3 = *from++ & 0x3f;
470 *to = (c1 << 12) | (c2 << 6) | c3;
471 len -= 3;
472 }
473 else if ((*from & 0xf8) == 0xf0)
474 {
475 if (len < 4)
476 break; /* drop trailing incomplete char */
477 c1 = *from++ & 0x07;
478 c2 = *from++ & 0x3f;
479 c3 = *from++ & 0x3f;
480 c4 = *from++ & 0x3f;
481 *to = (c1 << 18) | (c2 << 12) | (c3 << 6) | c4;
482 len -= 4;
483 }
484 else
485 {
486 /* treat a bogus char as length 1; not ours to raise error */
487 *to = *from++;
488 len--;
489 }
490 to++;
491 cnt++;
492 }
493 *to = 0;
494 return cnt;
495}

References len.

◆ pg_utf8_islegal()

bool pg_utf8_islegal ( const unsigned char *  source,
int  length 
)

Definition at line 1987 of file wchar.c.

1988{
1989 unsigned char a;
1990
1991 switch (length)
1992 {
1993 default:
1994 /* reject lengths 5 and 6 for now */
1995 return false;
1996 case 4:
1997 a = source[3];
1998 if (a < 0x80 || a > 0xBF)
1999 return false;
2000 /* FALL THRU */
2001 case 3:
2002 a = source[2];
2003 if (a < 0x80 || a > 0xBF)
2004 return false;
2005 /* FALL THRU */
2006 case 2:
2007 a = source[1];
2008 switch (*source)
2009 {
2010 case 0xE0:
2011 if (a < 0xA0 || a > 0xBF)
2012 return false;
2013 break;
2014 case 0xED:
2015 if (a < 0x80 || a > 0x9F)
2016 return false;
2017 break;
2018 case 0xF0:
2019 if (a < 0x90 || a > 0xBF)
2020 return false;
2021 break;
2022 case 0xF4:
2023 if (a < 0x80 || a > 0x8F)
2024 return false;
2025 break;
2026 default:
2027 if (a < 0x80 || a > 0xBF)
2028 return false;
2029 break;
2030 }
2031 /* FALL THRU */
2032 case 1:
2033 a = *source;
2034 if (a >= 0x80 && a < 0xC2)
2035 return false;
2036 if (a > 0xF4)
2037 return false;
2038 break;
2039 }
2040 return true;
2041}
int a
Definition: isn.c:70
static rewind_source * source
Definition: pg_rewind.c:89

References a, and source.

Referenced by chr(), pg_utf8_string_len(), pg_utf8_verifychar(), utf8_to_iso8859_1(), and UtfToLocal().

◆ pg_utf8_verifychar()

static int pg_utf8_verifychar ( const unsigned char *  s,
int  len 
)
static

Definition at line 1699 of file wchar.c.

1700{
1701 int l;
1702
1703 if ((*s & 0x80) == 0)
1704 {
1705 if (*s == '\0')
1706 return -1;
1707 return 1;
1708 }
1709 else if ((*s & 0xe0) == 0xc0)
1710 l = 2;
1711 else if ((*s & 0xf0) == 0xe0)
1712 l = 3;
1713 else if ((*s & 0xf8) == 0xf0)
1714 l = 4;
1715 else
1716 l = 1;
1717
1718 if (l > len)
1719 return -1;
1720
1721 if (!pg_utf8_islegal(s, l))
1722 return -1;
1723
1724 return l;
1725}
bool pg_utf8_islegal(const unsigned char *source, int length)
Definition: wchar.c:1987

References len, and pg_utf8_islegal().

Referenced by pg_utf8_verifystr().

◆ pg_utf8_verifystr()

static int pg_utf8_verifystr ( const unsigned char *  s,
int  len 
)
static

Definition at line 1889 of file wchar.c.

1890{
1891 const unsigned char *start = s;
1892 const int orig_len = len;
1893 uint32 state = BGN;
1894
1895/*
1896 * With a stride of two vector widths, gcc will unroll the loop. Even if
1897 * the compiler can unroll a longer loop, it's not worth it because we
1898 * must fall back to the byte-wise algorithm if we find any non-ASCII.
1899 */
1900#define STRIDE_LENGTH (2 * sizeof(Vector8))
1901
1902 if (len >= STRIDE_LENGTH)
1903 {
1904 while (len >= STRIDE_LENGTH)
1905 {
1906 /*
1907 * If the chunk is all ASCII, we can skip the full UTF-8 check,
1908 * but we must first check for a non-END state, which means the
1909 * previous chunk ended in the middle of a multibyte sequence.
1910 */
1911 if (state != END || !is_valid_ascii(s, STRIDE_LENGTH))
1913
1914 s += STRIDE_LENGTH;
1915 len -= STRIDE_LENGTH;
1916 }
1917
1918 /* The error state persists, so we only need to check for it here. */
1919 if (state == ERR)
1920 {
1921 /*
1922 * Start over from the beginning with the slow path so we can
1923 * count the valid bytes.
1924 */
1925 len = orig_len;
1926 s = start;
1927 }
1928 else if (state != END)
1929 {
1930 /*
1931 * The fast path exited in the middle of a multibyte sequence.
1932 * Walk backwards to find the leading byte so that the slow path
1933 * can resume checking from there. We must always backtrack at
1934 * least one byte, since the current byte could be e.g. an ASCII
1935 * byte after a 2-byte lead, which is invalid.
1936 */
1937 do
1938 {
1939 Assert(s > start);
1940 s--;
1941 len++;
1943 } while (pg_utf_mblen(s) <= 1);
1944 }
1945 }
1946
1947 /* check remaining bytes */
1948 while (len > 0)
1949 {
1950 int l;
1951
1952 /* fast path for ASCII-subset characters */
1953 if (!IS_HIGHBIT_SET(*s))
1954 {
1955 if (*s == '\0')
1956 break;
1957 l = 1;
1958 }
1959 else
1960 {
1961 l = pg_utf8_verifychar(s, len);
1962 if (l == -1)
1963 break;
1964 }
1965 s += l;
1966 len -= l;
1967 }
1968
1969 return s - start;
1970}
static bool is_valid_ascii(const unsigned char *s, int len)
Definition: ascii.h:25
Definition: regguts.h:323
#define END
Definition: wchar.c:1790
int pg_utf_mblen(const unsigned char *s)
Definition: wchar.c:536
#define ERR
Definition: wchar.c:1777
static int pg_utf8_verifychar(const unsigned char *s, int len)
Definition: wchar.c:1699
static void utf8_advance(const unsigned char *s, uint32 *state, int len)
Definition: wchar.c:1871
#define BGN
Definition: wchar.c:1779
#define STRIDE_LENGTH

References Assert(), BGN, END, ERR, IS_HIGHBIT_SET, is_valid_ascii(), len, pg_utf8_verifychar(), pg_utf_mblen(), start, STRIDE_LENGTH, and utf8_advance().

◆ pg_utf_dsplen()

static int pg_utf_dsplen ( const unsigned char *  s)
static

Definition at line 660 of file wchar.c.

661{
662 return ucs_wcwidth(utf8_to_unicode(s));
663}
static pg_wchar utf8_to_unicode(const unsigned char *c)
Definition: mbprint.c:53
static int ucs_wcwidth(pg_wchar ucs)
Definition: wchar.c:626

References ucs_wcwidth(), and utf8_to_unicode().

◆ pg_utf_mblen()

int pg_utf_mblen ( const unsigned char *  s)

Definition at line 536 of file wchar.c.

537{
538 int len;
539
540 if ((*s & 0x80) == 0)
541 len = 1;
542 else if ((*s & 0xe0) == 0xc0)
543 len = 2;
544 else if ((*s & 0xf0) == 0xe0)
545 len = 3;
546 else if ((*s & 0xf8) == 0xf0)
547 len = 4;
548#ifdef NOT_USED
549 else if ((*s & 0xfc) == 0xf8)
550 len = 5;
551 else if ((*s & 0xfe) == 0xfc)
552 len = 6;
553#endif
554 else
555 len = 1;
556 return len;
557}

References len.

Referenced by pg_utf8_verifystr(), and pg_wchar2utf_with_len().

◆ pg_wchar2euc_with_len()

static int pg_wchar2euc_with_len ( const pg_wchar from,
unsigned char *  to,
int  len 
)
static

Definition at line 375 of file wchar.c.

376{
377 int cnt = 0;
378
379 while (len > 0 && *from)
380 {
381 unsigned char c;
382
383 if ((c = (*from >> 24)))
384 {
385 *to++ = c;
386 *to++ = (*from >> 16) & 0xff;
387 *to++ = (*from >> 8) & 0xff;
388 *to++ = *from & 0xff;
389 cnt += 4;
390 }
391 else if ((c = (*from >> 16)))
392 {
393 *to++ = c;
394 *to++ = (*from >> 8) & 0xff;
395 *to++ = *from & 0xff;
396 cnt += 3;
397 }
398 else if ((c = (*from >> 8)))
399 {
400 *to++ = c;
401 *to++ = *from & 0xff;
402 cnt += 2;
403 }
404 else
405 {
406 *to++ = *from;
407 cnt++;
408 }
409 from++;
410 len--;
411 }
412 *to = 0;
413 return cnt;
414}

References len.

◆ pg_wchar2mule_with_len()

static int pg_wchar2mule_with_len ( const pg_wchar from,
unsigned char *  to,
int  len 
)
static

Definition at line 725 of file wchar.c.

726{
727 int cnt = 0;
728
729 while (len > 0 && *from)
730 {
731 unsigned char lb;
732
733 lb = (*from >> 16) & 0xff;
734 if (IS_LC1(lb))
735 {
736 *to++ = lb;
737 *to++ = *from & 0xff;
738 cnt += 2;
739 }
740 else if (IS_LC2(lb))
741 {
742 *to++ = lb;
743 *to++ = (*from >> 8) & 0xff;
744 *to++ = *from & 0xff;
745 cnt += 3;
746 }
747 else if (IS_LCPRV1_A_RANGE(lb))
748 {
749 *to++ = LCPRV1_A;
750 *to++ = lb;
751 *to++ = *from & 0xff;
752 cnt += 3;
753 }
754 else if (IS_LCPRV1_B_RANGE(lb))
755 {
756 *to++ = LCPRV1_B;
757 *to++ = lb;
758 *to++ = *from & 0xff;
759 cnt += 3;
760 }
761 else if (IS_LCPRV2_A_RANGE(lb))
762 {
763 *to++ = LCPRV2_A;
764 *to++ = lb;
765 *to++ = (*from >> 8) & 0xff;
766 *to++ = *from & 0xff;
767 cnt += 4;
768 }
769 else if (IS_LCPRV2_B_RANGE(lb))
770 {
771 *to++ = LCPRV2_B;
772 *to++ = lb;
773 *to++ = (*from >> 8) & 0xff;
774 *to++ = *from & 0xff;
775 cnt += 4;
776 }
777 else
778 {
779 *to++ = *from & 0xff;
780 cnt += 1;
781 }
782 from++;
783 len--;
784 }
785 *to = 0;
786 return cnt;
787}
#define LCPRV1_A
Definition: pg_wchar.h:150
#define LCPRV1_B
Definition: pg_wchar.h:151
#define LCPRV2_A
Definition: pg_wchar.h:162
#define IS_LCPRV2_B_RANGE(c)
Definition: pg_wchar.h:167
#define IS_LCPRV1_A_RANGE(c)
Definition: pg_wchar.h:153
#define IS_LCPRV1_B_RANGE(c)
Definition: pg_wchar.h:155
#define IS_LCPRV2_A_RANGE(c)
Definition: pg_wchar.h:165
#define LCPRV2_B
Definition: pg_wchar.h:163

References IS_LC1, IS_LC2, IS_LCPRV1_A_RANGE, IS_LCPRV1_B_RANGE, IS_LCPRV2_A_RANGE, IS_LCPRV2_B_RANGE, LCPRV1_A, LCPRV1_B, LCPRV2_A, LCPRV2_B, and len.

◆ pg_wchar2single_with_len()

static int pg_wchar2single_with_len ( const pg_wchar from,
unsigned char *  to,
int  len 
)
static

Definition at line 859 of file wchar.c.

860{
861 int cnt = 0;
862
863 while (len > 0 && *from)
864 {
865 *to++ = *from++;
866 len--;
867 cnt++;
868 }
869 *to = 0;
870 return cnt;
871}

References len.

◆ pg_wchar2utf_with_len()

static int pg_wchar2utf_with_len ( const pg_wchar from,
unsigned char *  to,
int  len 
)
static

Definition at line 505 of file wchar.c.

506{
507 int cnt = 0;
508
509 while (len > 0 && *from)
510 {
511 int char_len;
512
513 unicode_to_utf8(*from, to);
514 char_len = pg_utf_mblen(to);
515 cnt += char_len;
516 to += char_len;
517 from++;
518 len--;
519 }
520 *to = 0;
521 return cnt;
522}
static unsigned char * unicode_to_utf8(pg_wchar c, unsigned char *utf8string)
Definition: pg_wchar.h:575

References len, pg_utf_mblen(), and unicode_to_utf8().

◆ ucs_wcwidth()

static int ucs_wcwidth ( pg_wchar  ucs)
static

Definition at line 626 of file wchar.c.

627{
630
631 /* test for 8-bit control characters */
632 if (ucs == 0)
633 return 0;
634
635 if (ucs < 0x20 || (ucs >= 0x7f && ucs < 0xa0) || ucs > 0x0010ffff)
636 return -1;
637
638 /*
639 * binary search in table of non-spacing characters
640 *
641 * XXX: In the official Unicode sources, it is possible for a character to
642 * be described as both non-spacing and wide at the same time. As of
643 * Unicode 13.0, treating the non-spacing property as the determining
644 * factor for display width leads to the correct behavior, so do that
645 * search first.
646 */
647 if (mbbisearch(ucs, nonspacing,
648 sizeof(nonspacing) / sizeof(struct mbinterval) - 1))
649 return 0;
650
651 /* binary search in table of wide characters */
652 if (mbbisearch(ucs, east_asian_fw,
653 sizeof(east_asian_fw) / sizeof(struct mbinterval) - 1))
654 return 2;
655
656 return 1;
657}
static const struct mbinterval east_asian_fw[]
static const struct mbinterval nonspacing[]
static int mbbisearch(pg_wchar ucs, const struct mbinterval *table, int max)
Definition: wchar.c:579

References east_asian_fw, mbbisearch(), and nonspacing.

Referenced by pg_utf_dsplen().

◆ utf8_advance()

static void utf8_advance ( const unsigned char *  s,
uint32 state,
int  len 
)
static

Definition at line 1871 of file wchar.c.

1872{
1873 /* Note: We deliberately don't check the state's value here. */
1874 while (len > 0)
1875 {
1876 /*
1877 * It's important that the mask value is 31: In most instruction sets,
1878 * a shift by a 32-bit operand is understood to be a shift by its mod
1879 * 32, so the compiler should elide the mask operation.
1880 */
1881 *state = Utf8Transition[*s++] >> (*state & 31);
1882 len--;
1883 }
1884
1885 *state &= 31;
1886}
static const uint32 Utf8Transition[256]
Definition: wchar.c:1813

References len, and Utf8Transition.

Referenced by pg_utf8_verifystr().

Variable Documentation

◆ pg_wchar_table

◆ Utf8Transition

const uint32 Utf8Transition[256]
static

Definition at line 1813 of file wchar.c.

Referenced by utf8_advance().