PostgreSQL Source Code git master
All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
wchar.c File Reference
#include "c.h"
#include <limits.h>
#include "mb/pg_wchar.h"
#include "utils/ascii.h"
#include "common/unicode_nonspacing_table.h"
#include "common/unicode_east_asian_fw_table.h"
Include dependency graph for wchar.c:

Go to the source code of this file.

Data Structures

struct  mbinterval
 

Macros

#define NONUTF8_INVALID_BYTE0   (0x8d)
 
#define NONUTF8_INVALID_BYTE1   (' ')
 
#define IS_EUC_RANGE_VALID(c)   ((c) >= 0xa1 && (c) <= 0xfe)
 
#define pg_euccn_verifychar   pg_euckr_verifychar
 
#define pg_euccn_verifystr   pg_euckr_verifystr
 
#define ERR   0
 
#define BGN   11
 
#define CS1   16
 
#define CS2   1
 
#define CS3   5
 
#define P3A   6 /* Lead was E0, check for 3-byte overlong */
 
#define P3B   20 /* Lead was ED, check for surrogate */
 
#define P4A   25 /* Lead was F0, check for 4-byte overlong */
 
#define P4B   30 /* Lead was F4, check for too-large */
 
#define END   BGN
 
#define ASC   (END << BGN)
 
#define L2A   (CS1 << BGN)
 
#define L3A   (P3A << BGN)
 
#define L3B   (CS2 << BGN)
 
#define L3C   (P3B << BGN)
 
#define L4A   (P4A << BGN)
 
#define L4B   (CS3 << BGN)
 
#define L4C   (P4B << BGN)
 
#define CR1   (END << CS1) | (CS1 << CS2) | (CS2 << CS3) | (CS1 << P3B) | (CS2 << P4B)
 
#define CR2   (END << CS1) | (CS1 << CS2) | (CS2 << CS3) | (CS1 << P3B) | (CS2 << P4A)
 
#define CR3   (END << CS1) | (CS1 << CS2) | (CS2 << CS3) | (CS1 << P3A) | (CS2 << P4A)
 
#define ILL   ERR
 
#define STRIDE_LENGTH   (2 * sizeof(Vector8))
 

Functions

static int pg_ascii2wchar_with_len (const unsigned char *from, pg_wchar *to, int len)
 
static int pg_ascii_mblen (const unsigned char *s)
 
static int pg_ascii_dsplen (const unsigned char *s)
 
static int pg_euc2wchar_with_len (const unsigned char *from, pg_wchar *to, int len)
 
static int pg_euc_mblen (const unsigned char *s)
 
static int pg_euc_dsplen (const unsigned char *s)
 
static int pg_eucjp2wchar_with_len (const unsigned char *from, pg_wchar *to, int len)
 
static int pg_eucjp_mblen (const unsigned char *s)
 
static int pg_eucjp_dsplen (const unsigned char *s)
 
static int pg_euckr2wchar_with_len (const unsigned char *from, pg_wchar *to, int len)
 
static int pg_euckr_mblen (const unsigned char *s)
 
static int pg_euckr_dsplen (const unsigned char *s)
 
static int pg_euccn2wchar_with_len (const unsigned char *from, pg_wchar *to, int len)
 
static int pg_euccn_mblen (const unsigned char *s)
 
static int pg_euccn_dsplen (const unsigned char *s)
 
static int pg_euctw2wchar_with_len (const unsigned char *from, pg_wchar *to, int len)
 
static int pg_euctw_mblen (const unsigned char *s)
 
static int pg_euctw_dsplen (const unsigned char *s)
 
static int pg_wchar2euc_with_len (const pg_wchar *from, unsigned char *to, int len)
 
static int pg_johab_mblen (const unsigned char *s)
 
static int pg_johab_dsplen (const unsigned char *s)
 
static int pg_utf2wchar_with_len (const unsigned char *from, pg_wchar *to, int len)
 
static int pg_wchar2utf_with_len (const pg_wchar *from, unsigned char *to, int len)
 
int pg_utf_mblen (const unsigned char *s)
 
static int mbbisearch (pg_wchar ucs, const struct mbinterval *table, int max)
 
static int ucs_wcwidth (pg_wchar ucs)
 
static int pg_utf_dsplen (const unsigned char *s)
 
static int pg_mule2wchar_with_len (const unsigned char *from, pg_wchar *to, int len)
 
static int pg_wchar2mule_with_len (const pg_wchar *from, unsigned char *to, int len)
 
int pg_mule_mblen (const unsigned char *s)
 
static int pg_mule_dsplen (const unsigned char *s)
 
static int pg_latin12wchar_with_len (const unsigned char *from, pg_wchar *to, int len)
 
static int pg_wchar2single_with_len (const pg_wchar *from, unsigned char *to, int len)
 
static int pg_latin1_mblen (const unsigned char *s)
 
static int pg_latin1_dsplen (const unsigned char *s)
 
static int pg_sjis_mblen (const unsigned char *s)
 
static int pg_sjis_dsplen (const unsigned char *s)
 
static int pg_big5_mblen (const unsigned char *s)
 
static int pg_big5_dsplen (const unsigned char *s)
 
static int pg_gbk_mblen (const unsigned char *s)
 
static int pg_gbk_dsplen (const unsigned char *s)
 
static int pg_uhc_mblen (const unsigned char *s)
 
static int pg_uhc_dsplen (const unsigned char *s)
 
static int pg_gb18030_mblen (const unsigned char *s)
 
static int pg_gb18030_dsplen (const unsigned char *s)
 
static int pg_ascii_verifychar (const unsigned char *s, int len)
 
static int pg_ascii_verifystr (const unsigned char *s, int len)
 
static int pg_eucjp_verifychar (const unsigned char *s, int len)
 
static int pg_eucjp_verifystr (const unsigned char *s, int len)
 
static int pg_euckr_verifychar (const unsigned char *s, int len)
 
static int pg_euckr_verifystr (const unsigned char *s, int len)
 
static int pg_euctw_verifychar (const unsigned char *s, int len)
 
static int pg_euctw_verifystr (const unsigned char *s, int len)
 
static int pg_johab_verifychar (const unsigned char *s, int len)
 
static int pg_johab_verifystr (const unsigned char *s, int len)
 
static int pg_mule_verifychar (const unsigned char *s, int len)
 
static int pg_mule_verifystr (const unsigned char *s, int len)
 
static int pg_latin1_verifychar (const unsigned char *s, int len)
 
static int pg_latin1_verifystr (const unsigned char *s, int len)
 
static int pg_sjis_verifychar (const unsigned char *s, int len)
 
static int pg_sjis_verifystr (const unsigned char *s, int len)
 
static int pg_big5_verifychar (const unsigned char *s, int len)
 
static int pg_big5_verifystr (const unsigned char *s, int len)
 
static int pg_gbk_verifychar (const unsigned char *s, int len)
 
static int pg_gbk_verifystr (const unsigned char *s, int len)
 
static int pg_uhc_verifychar (const unsigned char *s, int len)
 
static int pg_uhc_verifystr (const unsigned char *s, int len)
 
static int pg_gb18030_verifychar (const unsigned char *s, int len)
 
static int pg_gb18030_verifystr (const unsigned char *s, int len)
 
static int pg_utf8_verifychar (const unsigned char *s, int len)
 
static void utf8_advance (const unsigned char *s, uint32 *state, int len)
 
static int pg_utf8_verifystr (const unsigned char *s, int len)
 
bool pg_utf8_islegal (const unsigned char *source, int length)
 
void pg_encoding_set_invalid (int encoding, char *dst)
 
int pg_encoding_mblen (int encoding, const char *mbstr)
 
int pg_encoding_mblen_or_incomplete (int encoding, const char *mbstr, size_t remaining)
 
int pg_encoding_mblen_bounded (int encoding, const char *mbstr)
 
int pg_encoding_dsplen (int encoding, const char *mbstr)
 
int pg_encoding_verifymbchar (int encoding, const char *mbstr, int len)
 
int pg_encoding_verifymbstr (int encoding, const char *mbstr, int len)
 
int pg_encoding_max_length (int encoding)
 

Variables

static const uint32 Utf8Transition [256]
 
const pg_wchar_tbl pg_wchar_table []
 

Macro Definition Documentation

◆ ASC

#define ASC   (END << BGN)

Definition at line 1797 of file wchar.c.

◆ BGN

#define BGN   11

Definition at line 1781 of file wchar.c.

◆ CR1

#define CR1   (END << CS1) | (CS1 << CS2) | (CS2 << CS3) | (CS1 << P3B) | (CS2 << P4B)

Definition at line 1809 of file wchar.c.

◆ CR2

#define CR2   (END << CS1) | (CS1 << CS2) | (CS2 << CS3) | (CS1 << P3B) | (CS2 << P4A)

Definition at line 1810 of file wchar.c.

◆ CR3

#define CR3   (END << CS1) | (CS1 << CS2) | (CS2 << CS3) | (CS1 << P3A) | (CS2 << P4A)

Definition at line 1811 of file wchar.c.

◆ CS1

#define CS1   16

Definition at line 1783 of file wchar.c.

◆ CS2

#define CS2   1

Definition at line 1784 of file wchar.c.

◆ CS3

#define CS3   5

Definition at line 1785 of file wchar.c.

◆ END

#define END   BGN

Definition at line 1792 of file wchar.c.

◆ ERR

#define ERR   0

Definition at line 1779 of file wchar.c.

◆ ILL

#define ILL   ERR

Definition at line 1813 of file wchar.c.

◆ IS_EUC_RANGE_VALID

#define IS_EUC_RANGE_VALID (   c)    ((c) >= 0xa1 && (c) <= 0xfe)

Definition at line 1079 of file wchar.c.

◆ L2A

#define L2A   (CS1 << BGN)

Definition at line 1799 of file wchar.c.

◆ L3A

#define L3A   (P3A << BGN)

Definition at line 1801 of file wchar.c.

◆ L3B

#define L3B   (CS2 << BGN)

Definition at line 1802 of file wchar.c.

◆ L3C

#define L3C   (P3B << BGN)

Definition at line 1803 of file wchar.c.

◆ L4A

#define L4A   (P4A << BGN)

Definition at line 1805 of file wchar.c.

◆ L4B

#define L4B   (CS3 << BGN)

Definition at line 1806 of file wchar.c.

◆ L4C

#define L4C   (P4B << BGN)

Definition at line 1807 of file wchar.c.

◆ NONUTF8_INVALID_BYTE0

#define NONUTF8_INVALID_BYTE0   (0x8d)

Definition at line 36 of file wchar.c.

◆ NONUTF8_INVALID_BYTE1

#define NONUTF8_INVALID_BYTE1   (' ')

Definition at line 37 of file wchar.c.

◆ P3A

#define P3A   6 /* Lead was E0, check for 3-byte overlong */

Definition at line 1787 of file wchar.c.

◆ P3B

#define P3B   20 /* Lead was ED, check for surrogate */

Definition at line 1788 of file wchar.c.

◆ P4A

#define P4A   25 /* Lead was F0, check for 4-byte overlong */

Definition at line 1789 of file wchar.c.

◆ P4B

#define P4B   30 /* Lead was F4, check for too-large */

Definition at line 1790 of file wchar.c.

◆ pg_euccn_verifychar

#define pg_euccn_verifychar   pg_euckr_verifychar

Definition at line 1224 of file wchar.c.

◆ pg_euccn_verifystr

#define pg_euccn_verifystr   pg_euckr_verifystr

Definition at line 1225 of file wchar.c.

◆ STRIDE_LENGTH

#define STRIDE_LENGTH   (2 * sizeof(Vector8))

Function Documentation

◆ mbbisearch()

static int mbbisearch ( pg_wchar  ucs,
const struct mbinterval table,
int  max 
)
static

Definition at line 581 of file wchar.c.

582{
583 int min = 0;
584 int mid;
585
586 if (ucs < table[0].first || ucs > table[max].last)
587 return 0;
588 while (max >= min)
589 {
590 mid = (min + max) / 2;
591 if (ucs > table[mid].last)
592 min = mid + 1;
593 else if (ucs < table[mid].first)
594 max = mid - 1;
595 else
596 return 1;
597 }
598
599 return 0;
600}
static const struct lconv_member_info table[]

References table.

Referenced by ucs_wcwidth().

◆ pg_ascii2wchar_with_len()

static int pg_ascii2wchar_with_len ( const unsigned char *  from,
pg_wchar to,
int  len 
)
static

Definition at line 70 of file wchar.c.

71{
72 int cnt = 0;
73
74 while (len > 0 && *from)
75 {
76 *to++ = *from++;
77 len--;
78 cnt++;
79 }
80 *to = 0;
81 return cnt;
82}
const void size_t len

References len.

◆ pg_ascii_dsplen()

static int pg_ascii_dsplen ( const unsigned char *  s)
static

Definition at line 91 of file wchar.c.

92{
93 if (*s == '\0')
94 return 0;
95 if (*s < 0x20 || *s == 0x7f)
96 return -1;
97
98 return 1;
99}

Referenced by pg_big5_dsplen(), pg_euc_dsplen(), pg_euccn_dsplen(), pg_eucjp_dsplen(), pg_euctw_dsplen(), pg_gb18030_dsplen(), pg_gbk_dsplen(), pg_latin1_dsplen(), pg_sjis_dsplen(), and pg_uhc_dsplen().

◆ pg_ascii_mblen()

static int pg_ascii_mblen ( const unsigned char *  s)
static

Definition at line 85 of file wchar.c.

86{
87 return 1;
88}

◆ pg_ascii_verifychar()

static int pg_ascii_verifychar ( const unsigned char *  s,
int  len 
)
static

Definition at line 1063 of file wchar.c.

1064{
1065 return 1;
1066}

◆ pg_ascii_verifystr()

static int pg_ascii_verifystr ( const unsigned char *  s,
int  len 
)
static

Definition at line 1069 of file wchar.c.

1070{
1071 const unsigned char *nullpos = memchr(s, 0, len);
1072
1073 if (nullpos == NULL)
1074 return len;
1075 else
1076 return nullpos - s;
1077}

References len.

◆ pg_big5_dsplen()

static int pg_big5_dsplen ( const unsigned char *  s)
static

Definition at line 934 of file wchar.c.

935{
936 int len;
937
938 if (IS_HIGHBIT_SET(*s))
939 len = 2; /* kanji? */
940 else
941 len = pg_ascii_dsplen(s); /* should be ASCII */
942 return len;
943}
#define IS_HIGHBIT_SET(ch)
Definition: c.h:1126
static int pg_ascii_dsplen(const unsigned char *s)
Definition: wchar.c:91

References IS_HIGHBIT_SET, len, and pg_ascii_dsplen().

◆ pg_big5_mblen()

static int pg_big5_mblen ( const unsigned char *  s)
static

Definition at line 922 of file wchar.c.

923{
924 int len;
925
926 if (IS_HIGHBIT_SET(*s))
927 len = 2; /* kanji? */
928 else
929 len = 1; /* should be ASCII */
930 return len;
931}

References IS_HIGHBIT_SET, and len.

Referenced by pg_big5_verifychar().

◆ pg_big5_verifychar()

static int pg_big5_verifychar ( const unsigned char *  s,
int  len 
)
static

Definition at line 1479 of file wchar.c.

1480{
1481 int l,
1482 mbl;
1483
1484 l = mbl = pg_big5_mblen(s);
1485
1486 if (len < l)
1487 return -1;
1488
1489 if (l == 2 &&
1490 s[0] == NONUTF8_INVALID_BYTE0 &&
1491 s[1] == NONUTF8_INVALID_BYTE1)
1492 return -1;
1493
1494 while (--l > 0)
1495 {
1496 if (*++s == '\0')
1497 return -1;
1498 }
1499
1500 return mbl;
1501}
#define NONUTF8_INVALID_BYTE0
Definition: wchar.c:36
static int pg_big5_mblen(const unsigned char *s)
Definition: wchar.c:922
#define NONUTF8_INVALID_BYTE1
Definition: wchar.c:37

References len, NONUTF8_INVALID_BYTE0, NONUTF8_INVALID_BYTE1, and pg_big5_mblen().

Referenced by pg_big5_verifystr().

◆ pg_big5_verifystr()

static int pg_big5_verifystr ( const unsigned char *  s,
int  len 
)
static

Definition at line 1504 of file wchar.c.

1505{
1506 const unsigned char *start = s;
1507
1508 while (len > 0)
1509 {
1510 int l;
1511
1512 /* fast path for ASCII-subset characters */
1513 if (!IS_HIGHBIT_SET(*s))
1514 {
1515 if (*s == '\0')
1516 break;
1517 l = 1;
1518 }
1519 else
1520 {
1521 l = pg_big5_verifychar(s, len);
1522 if (l == -1)
1523 break;
1524 }
1525 s += l;
1526 len -= l;
1527 }
1528
1529 return s - start;
1530}
return str start
static int pg_big5_verifychar(const unsigned char *s, int len)
Definition: wchar.c:1479

References IS_HIGHBIT_SET, len, pg_big5_verifychar(), and start.

◆ pg_encoding_dsplen()

int pg_encoding_dsplen ( int  encoding,
const char *  mbstr 
)

Definition at line 2176 of file wchar.c.

2177{
2178 return (PG_VALID_ENCODING(encoding) ?
2179 pg_wchar_table[encoding].dsplen((const unsigned char *) mbstr) :
2180 pg_wchar_table[PG_SQL_ASCII].dsplen((const unsigned char *) mbstr));
2181}
int32 encoding
Definition: pg_database.h:41
@ PG_SQL_ASCII
Definition: pg_wchar.h:226
#define PG_VALID_ENCODING(_enc)
Definition: pg_wchar.h:287
const pg_wchar_tbl pg_wchar_table[]
Definition: wchar.c:2064

References encoding, PG_SQL_ASCII, PG_VALID_ENCODING, and pg_wchar_table.

Referenced by PQdsplen(), and reportErrorPosition().

◆ pg_encoding_max_length()

int pg_encoding_max_length ( int  encoding)

Definition at line 2213 of file wchar.c.

2214{
2216
2217 /*
2218 * Check for the encoding despite the assert, due to some mingw versions
2219 * otherwise issuing bogus warnings.
2220 */
2221 return PG_VALID_ENCODING(encoding) ?
2224}
Assert(PointerIsAligned(start, uint64))
int maxmblen
Definition: pg_wchar.h:386

References Assert(), encoding, pg_wchar_tbl::maxmblen, PG_SQL_ASCII, PG_VALID_ENCODING, and pg_wchar_table.

Referenced by ascii(), chr(), CopyConvertBuf(), pg_encoding_mbcliplen(), pg_encoding_set_invalid(), pg_verify_mbstr_len(), reportErrorPosition(), test_enc_setup(), and type_maximum_size().

◆ pg_encoding_mblen()

int pg_encoding_mblen ( int  encoding,
const char *  mbstr 
)

Definition at line 2135 of file wchar.c.

2136{
2137 return (PG_VALID_ENCODING(encoding) ?
2138 pg_wchar_table[encoding].mblen((const unsigned char *) mbstr) :
2139 pg_wchar_table[PG_SQL_ASCII].mblen((const unsigned char *) mbstr));
2140}

References encoding, PG_SQL_ASCII, PG_VALID_ENCODING, and pg_wchar_table.

Referenced by CopyAttributeOutCSV(), CopyAttributeOutText(), fmtIdEnc(), pg_encoding_mblen_bounded(), pg_encoding_mblen_or_incomplete(), PQescapeInternal(), PQmblen(), PQmblenBounded(), and test_enc_setup().

◆ pg_encoding_mblen_bounded()

int pg_encoding_mblen_bounded ( int  encoding,
const char *  mbstr 
)

Definition at line 2167 of file wchar.c.

2168{
2169 return strnlen(mbstr, pg_encoding_mblen(encoding, mbstr));
2170}
size_t strnlen(const char *str, size_t maxlen)
Definition: strnlen.c:26
int pg_encoding_mblen(int encoding, const char *mbstr)
Definition: wchar.c:2135

References encoding, pg_encoding_mblen(), and strnlen().

◆ pg_encoding_mblen_or_incomplete()

int pg_encoding_mblen_or_incomplete ( int  encoding,
const char *  mbstr,
size_t  remaining 
)

Definition at line 2147 of file wchar.c.

2149{
2150 /*
2151 * Define zero remaining as too few, even for single-byte encodings.
2152 * pg_gb18030_mblen() reads one or two bytes; single-byte encodings read
2153 * zero; others read one.
2154 */
2155 if (remaining < 1 ||
2156 (encoding == PG_GB18030 && IS_HIGHBIT_SET(*mbstr) && remaining < 2))
2157 return INT_MAX;
2158 return pg_encoding_mblen(encoding, mbstr);
2159}
int remaining
Definition: informix.c:692
@ PG_GB18030
Definition: pg_wchar.h:268

References encoding, IS_HIGHBIT_SET, pg_encoding_mblen(), PG_GB18030, and remaining.

Referenced by PQescapeInternal(), PQescapeStringInternal(), report_invalid_encoding(), and report_untranslatable_char().

◆ pg_encoding_set_invalid()

void pg_encoding_set_invalid ( int  encoding,
char *  dst 
)

Definition at line 2051 of file wchar.c.

2052{
2054
2055 dst[0] = (encoding == PG_UTF8 ? 0xc0 : NONUTF8_INVALID_BYTE0);
2056 dst[1] = NONUTF8_INVALID_BYTE1;
2057}
@ PG_UTF8
Definition: pg_wchar.h:232
int pg_encoding_max_length(int encoding)
Definition: wchar.c:2213

References Assert(), encoding, NONUTF8_INVALID_BYTE0, NONUTF8_INVALID_BYTE1, pg_encoding_max_length(), and PG_UTF8.

Referenced by appendStringLiteral(), fmtIdEnc(), PQescapeStringInternal(), and test_enc_setup().

◆ pg_encoding_verifymbchar()

int pg_encoding_verifymbchar ( int  encoding,
const char *  mbstr,
int  len 
)

◆ pg_encoding_verifymbstr()

int pg_encoding_verifymbstr ( int  encoding,
const char *  mbstr,
int  len 
)

Definition at line 2202 of file wchar.c.

2203{
2204 return (PG_VALID_ENCODING(encoding) ?
2205 pg_wchar_table[encoding].mbverifystr((const unsigned char *) mbstr, len) :
2206 pg_wchar_table[PG_SQL_ASCII].mbverifystr((const unsigned char *) mbstr, len));
2207}

References encoding, len, PG_SQL_ASCII, PG_VALID_ENCODING, and pg_wchar_table.

Referenced by add_file_to_manifest(), CopyConvertBuf(), handle_oauth_sasl_error(), parse_oauth_json(), PQescapeInternal(), test_enc_conversion(), test_enc_setup(), and test_one_vector_escape().

◆ pg_euc2wchar_with_len()

static int pg_euc2wchar_with_len ( const unsigned char *  from,
pg_wchar to,
int  len 
)
static

Definition at line 105 of file wchar.c.

106{
107 int cnt = 0;
108
109 while (len > 0 && *from)
110 {
111 if (*from == SS2 && len >= 2) /* JIS X 0201 (so called "1 byte
112 * KANA") */
113 {
114 from++;
115 *to = (SS2 << 8) | *from++;
116 len -= 2;
117 }
118 else if (*from == SS3 && len >= 3) /* JIS X 0212 KANJI */
119 {
120 from++;
121 *to = (SS3 << 16) | (*from++ << 8);
122 *to |= *from++;
123 len -= 3;
124 }
125 else if (IS_HIGHBIT_SET(*from) && len >= 2) /* JIS X 0208 KANJI */
126 {
127 *to = *from++ << 8;
128 *to |= *from++;
129 len -= 2;
130 }
131 else /* must be ASCII */
132 {
133 *to = *from++;
134 len--;
135 }
136 to++;
137 cnt++;
138 }
139 *to = 0;
140 return cnt;
141}
#define SS2
Definition: pg_wchar.h:38
#define SS3
Definition: pg_wchar.h:39

References IS_HIGHBIT_SET, len, SS2, and SS3.

Referenced by pg_eucjp2wchar_with_len(), and pg_euckr2wchar_with_len().

◆ pg_euc_dsplen()

static int pg_euc_dsplen ( const unsigned char *  s)
inlinestatic

Definition at line 160 of file wchar.c.

161{
162 int len;
163
164 if (*s == SS2)
165 len = 2;
166 else if (*s == SS3)
167 len = 2;
168 else if (IS_HIGHBIT_SET(*s))
169 len = 2;
170 else
171 len = pg_ascii_dsplen(s);
172 return len;
173}

References IS_HIGHBIT_SET, len, pg_ascii_dsplen(), SS2, and SS3.

Referenced by pg_euckr_dsplen(), and pg_johab_dsplen().

◆ pg_euc_mblen()

static int pg_euc_mblen ( const unsigned char *  s)
inlinestatic

Definition at line 144 of file wchar.c.

145{
146 int len;
147
148 if (*s == SS2)
149 len = 2;
150 else if (*s == SS3)
151 len = 3;
152 else if (IS_HIGHBIT_SET(*s))
153 len = 2;
154 else
155 len = 1;
156 return len;
157}

References IS_HIGHBIT_SET, len, SS2, and SS3.

Referenced by pg_eucjp_mblen(), pg_euckr_mblen(), and pg_johab_mblen().

◆ pg_euccn2wchar_with_len()

static int pg_euccn2wchar_with_len ( const unsigned char *  from,
pg_wchar to,
int  len 
)
static

Definition at line 232 of file wchar.c.

233{
234 int cnt = 0;
235
236 while (len > 0 && *from)
237 {
238 if (*from == SS2 && len >= 3) /* code set 2 (unused?) */
239 {
240 from++;
241 *to = (SS2 << 16) | (*from++ << 8);
242 *to |= *from++;
243 len -= 3;
244 }
245 else if (*from == SS3 && len >= 3) /* code set 3 (unused ?) */
246 {
247 from++;
248 *to = (SS3 << 16) | (*from++ << 8);
249 *to |= *from++;
250 len -= 3;
251 }
252 else if (IS_HIGHBIT_SET(*from) && len >= 2) /* code set 1 */
253 {
254 *to = *from++ << 8;
255 *to |= *from++;
256 len -= 2;
257 }
258 else
259 {
260 *to = *from++;
261 len--;
262 }
263 to++;
264 cnt++;
265 }
266 *to = 0;
267 return cnt;
268}

References IS_HIGHBIT_SET, len, SS2, and SS3.

◆ pg_euccn_dsplen()

static int pg_euccn_dsplen ( const unsigned char *  s)
static

Definition at line 283 of file wchar.c.

284{
285 int len;
286
287 if (IS_HIGHBIT_SET(*s))
288 len = 2;
289 else
290 len = pg_ascii_dsplen(s);
291 return len;
292}

References IS_HIGHBIT_SET, len, and pg_ascii_dsplen().

◆ pg_euccn_mblen()

static int pg_euccn_mblen ( const unsigned char *  s)
static

Definition at line 271 of file wchar.c.

272{
273 int len;
274
275 if (IS_HIGHBIT_SET(*s))
276 len = 2;
277 else
278 len = 1;
279 return len;
280}

References IS_HIGHBIT_SET, and len.

◆ pg_eucjp2wchar_with_len()

static int pg_eucjp2wchar_with_len ( const unsigned char *  from,
pg_wchar to,
int  len 
)
static

Definition at line 179 of file wchar.c.

180{
181 return pg_euc2wchar_with_len(from, to, len);
182}
static int pg_euc2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
Definition: wchar.c:105

References len, and pg_euc2wchar_with_len().

◆ pg_eucjp_dsplen()

static int pg_eucjp_dsplen ( const unsigned char *  s)
static

Definition at line 191 of file wchar.c.

192{
193 int len;
194
195 if (*s == SS2)
196 len = 1;
197 else if (*s == SS3)
198 len = 2;
199 else if (IS_HIGHBIT_SET(*s))
200 len = 2;
201 else
202 len = pg_ascii_dsplen(s);
203 return len;
204}

References IS_HIGHBIT_SET, len, pg_ascii_dsplen(), SS2, and SS3.

◆ pg_eucjp_mblen()

static int pg_eucjp_mblen ( const unsigned char *  s)
static

Definition at line 185 of file wchar.c.

186{
187 return pg_euc_mblen(s);
188}
static int pg_euc_mblen(const unsigned char *s)
Definition: wchar.c:144

References pg_euc_mblen().

◆ pg_eucjp_verifychar()

static int pg_eucjp_verifychar ( const unsigned char *  s,
int  len 
)
static

Definition at line 1082 of file wchar.c.

1083{
1084 int l;
1085 unsigned char c1,
1086 c2;
1087
1088 c1 = *s++;
1089
1090 switch (c1)
1091 {
1092 case SS2: /* JIS X 0201 */
1093 l = 2;
1094 if (l > len)
1095 return -1;
1096 c2 = *s++;
1097 if (c2 < 0xa1 || c2 > 0xdf)
1098 return -1;
1099 break;
1100
1101 case SS3: /* JIS X 0212 */
1102 l = 3;
1103 if (l > len)
1104 return -1;
1105 c2 = *s++;
1106 if (!IS_EUC_RANGE_VALID(c2))
1107 return -1;
1108 c2 = *s++;
1109 if (!IS_EUC_RANGE_VALID(c2))
1110 return -1;
1111 break;
1112
1113 default:
1114 if (IS_HIGHBIT_SET(c1)) /* JIS X 0208? */
1115 {
1116 l = 2;
1117 if (l > len)
1118 return -1;
1119 if (!IS_EUC_RANGE_VALID(c1))
1120 return -1;
1121 c2 = *s++;
1122 if (!IS_EUC_RANGE_VALID(c2))
1123 return -1;
1124 }
1125 else
1126 /* must be ASCII */
1127 {
1128 l = 1;
1129 }
1130 break;
1131 }
1132
1133 return l;
1134}
#define IS_EUC_RANGE_VALID(c)
Definition: wchar.c:1079

References IS_EUC_RANGE_VALID, IS_HIGHBIT_SET, len, SS2, and SS3.

Referenced by pg_eucjp_verifystr().

◆ pg_eucjp_verifystr()

static int pg_eucjp_verifystr ( const unsigned char *  s,
int  len 
)
static

Definition at line 1137 of file wchar.c.

1138{
1139 const unsigned char *start = s;
1140
1141 while (len > 0)
1142 {
1143 int l;
1144
1145 /* fast path for ASCII-subset characters */
1146 if (!IS_HIGHBIT_SET(*s))
1147 {
1148 if (*s == '\0')
1149 break;
1150 l = 1;
1151 }
1152 else
1153 {
1154 l = pg_eucjp_verifychar(s, len);
1155 if (l == -1)
1156 break;
1157 }
1158 s += l;
1159 len -= l;
1160 }
1161
1162 return s - start;
1163}
static int pg_eucjp_verifychar(const unsigned char *s, int len)
Definition: wchar.c:1082

References IS_HIGHBIT_SET, len, pg_eucjp_verifychar(), and start.

◆ pg_euckr2wchar_with_len()

static int pg_euckr2wchar_with_len ( const unsigned char *  from,
pg_wchar to,
int  len 
)
static

Definition at line 210 of file wchar.c.

211{
212 return pg_euc2wchar_with_len(from, to, len);
213}

References len, and pg_euc2wchar_with_len().

◆ pg_euckr_dsplen()

static int pg_euckr_dsplen ( const unsigned char *  s)
static

Definition at line 222 of file wchar.c.

223{
224 return pg_euc_dsplen(s);
225}
static int pg_euc_dsplen(const unsigned char *s)
Definition: wchar.c:160

References pg_euc_dsplen().

◆ pg_euckr_mblen()

static int pg_euckr_mblen ( const unsigned char *  s)
static

Definition at line 216 of file wchar.c.

217{
218 return pg_euc_mblen(s);
219}

References pg_euc_mblen().

◆ pg_euckr_verifychar()

static int pg_euckr_verifychar ( const unsigned char *  s,
int  len 
)
static

Definition at line 1166 of file wchar.c.

1167{
1168 int l;
1169 unsigned char c1,
1170 c2;
1171
1172 c1 = *s++;
1173
1174 if (IS_HIGHBIT_SET(c1))
1175 {
1176 l = 2;
1177 if (l > len)
1178 return -1;
1179 if (!IS_EUC_RANGE_VALID(c1))
1180 return -1;
1181 c2 = *s++;
1182 if (!IS_EUC_RANGE_VALID(c2))
1183 return -1;
1184 }
1185 else
1186 /* must be ASCII */
1187 {
1188 l = 1;
1189 }
1190
1191 return l;
1192}

References IS_EUC_RANGE_VALID, IS_HIGHBIT_SET, and len.

Referenced by pg_euckr_verifystr().

◆ pg_euckr_verifystr()

static int pg_euckr_verifystr ( const unsigned char *  s,
int  len 
)
static

Definition at line 1195 of file wchar.c.

1196{
1197 const unsigned char *start = s;
1198
1199 while (len > 0)
1200 {
1201 int l;
1202
1203 /* fast path for ASCII-subset characters */
1204 if (!IS_HIGHBIT_SET(*s))
1205 {
1206 if (*s == '\0')
1207 break;
1208 l = 1;
1209 }
1210 else
1211 {
1212 l = pg_euckr_verifychar(s, len);
1213 if (l == -1)
1214 break;
1215 }
1216 s += l;
1217 len -= l;
1218 }
1219
1220 return s - start;
1221}
static int pg_euckr_verifychar(const unsigned char *s, int len)
Definition: wchar.c:1166

References IS_HIGHBIT_SET, len, pg_euckr_verifychar(), and start.

◆ pg_euctw2wchar_with_len()

static int pg_euctw2wchar_with_len ( const unsigned char *  from,
pg_wchar to,
int  len 
)
static

Definition at line 299 of file wchar.c.

300{
301 int cnt = 0;
302
303 while (len > 0 && *from)
304 {
305 if (*from == SS2 && len >= 4) /* code set 2 */
306 {
307 from++;
308 *to = (((uint32) SS2) << 24) | (*from++ << 16);
309 *to |= *from++ << 8;
310 *to |= *from++;
311 len -= 4;
312 }
313 else if (*from == SS3 && len >= 3) /* code set 3 (unused?) */
314 {
315 from++;
316 *to = (SS3 << 16) | (*from++ << 8);
317 *to |= *from++;
318 len -= 3;
319 }
320 else if (IS_HIGHBIT_SET(*from) && len >= 2) /* code set 2 */
321 {
322 *to = *from++ << 8;
323 *to |= *from++;
324 len -= 2;
325 }
326 else
327 {
328 *to = *from++;
329 len--;
330 }
331 to++;
332 cnt++;
333 }
334 *to = 0;
335 return cnt;
336}
uint32_t uint32
Definition: c.h:502

References IS_HIGHBIT_SET, len, SS2, and SS3.

◆ pg_euctw_dsplen()

static int pg_euctw_dsplen ( const unsigned char *  s)
static

Definition at line 355 of file wchar.c.

356{
357 int len;
358
359 if (*s == SS2)
360 len = 2;
361 else if (*s == SS3)
362 len = 2;
363 else if (IS_HIGHBIT_SET(*s))
364 len = 2;
365 else
366 len = pg_ascii_dsplen(s);
367 return len;
368}

References IS_HIGHBIT_SET, len, pg_ascii_dsplen(), SS2, and SS3.

◆ pg_euctw_mblen()

static int pg_euctw_mblen ( const unsigned char *  s)
static

Definition at line 339 of file wchar.c.

340{
341 int len;
342
343 if (*s == SS2)
344 len = 4;
345 else if (*s == SS3)
346 len = 3;
347 else if (IS_HIGHBIT_SET(*s))
348 len = 2;
349 else
350 len = 1;
351 return len;
352}

References IS_HIGHBIT_SET, len, SS2, and SS3.

◆ pg_euctw_verifychar()

static int pg_euctw_verifychar ( const unsigned char *  s,
int  len 
)
static

Definition at line 1228 of file wchar.c.

1229{
1230 int l;
1231 unsigned char c1,
1232 c2;
1233
1234 c1 = *s++;
1235
1236 switch (c1)
1237 {
1238 case SS2: /* CNS 11643 Plane 1-7 */
1239 l = 4;
1240 if (l > len)
1241 return -1;
1242 c2 = *s++;
1243 if (c2 < 0xa1 || c2 > 0xa7)
1244 return -1;
1245 c2 = *s++;
1246 if (!IS_EUC_RANGE_VALID(c2))
1247 return -1;
1248 c2 = *s++;
1249 if (!IS_EUC_RANGE_VALID(c2))
1250 return -1;
1251 break;
1252
1253 case SS3: /* unused */
1254 return -1;
1255
1256 default:
1257 if (IS_HIGHBIT_SET(c1)) /* CNS 11643 Plane 1 */
1258 {
1259 l = 2;
1260 if (l > len)
1261 return -1;
1262 /* no further range check on c1? */
1263 c2 = *s++;
1264 if (!IS_EUC_RANGE_VALID(c2))
1265 return -1;
1266 }
1267 else
1268 /* must be ASCII */
1269 {
1270 l = 1;
1271 }
1272 break;
1273 }
1274 return l;
1275}

References IS_EUC_RANGE_VALID, IS_HIGHBIT_SET, len, SS2, and SS3.

Referenced by pg_euctw_verifystr().

◆ pg_euctw_verifystr()

static int pg_euctw_verifystr ( const unsigned char *  s,
int  len 
)
static

Definition at line 1278 of file wchar.c.

1279{
1280 const unsigned char *start = s;
1281
1282 while (len > 0)
1283 {
1284 int l;
1285
1286 /* fast path for ASCII-subset characters */
1287 if (!IS_HIGHBIT_SET(*s))
1288 {
1289 if (*s == '\0')
1290 break;
1291 l = 1;
1292 }
1293 else
1294 {
1295 l = pg_euctw_verifychar(s, len);
1296 if (l == -1)
1297 break;
1298 }
1299 s += l;
1300 len -= l;
1301 }
1302
1303 return s - start;
1304}
static int pg_euctw_verifychar(const unsigned char *s, int len)
Definition: wchar.c:1228

References IS_HIGHBIT_SET, len, pg_euctw_verifychar(), and start.

◆ pg_gb18030_dsplen()

static int pg_gb18030_dsplen ( const unsigned char *  s)
static

Definition at line 1029 of file wchar.c.

1030{
1031 int len;
1032
1033 if (IS_HIGHBIT_SET(*s))
1034 len = 2;
1035 else
1036 len = pg_ascii_dsplen(s); /* ASCII */
1037 return len;
1038}

References IS_HIGHBIT_SET, len, and pg_ascii_dsplen().

◆ pg_gb18030_mblen()

static int pg_gb18030_mblen ( const unsigned char *  s)
static

Definition at line 1015 of file wchar.c.

1016{
1017 int len;
1018
1019 if (!IS_HIGHBIT_SET(*s))
1020 len = 1; /* ASCII */
1021 else if (*(s + 1) >= 0x30 && *(s + 1) <= 0x39)
1022 len = 4;
1023 else
1024 len = 2;
1025 return len;
1026}

References IS_HIGHBIT_SET, and len.

◆ pg_gb18030_verifychar()

static int pg_gb18030_verifychar ( const unsigned char *  s,
int  len 
)
static

Definition at line 1641 of file wchar.c.

1642{
1643 int l;
1644
1645 if (!IS_HIGHBIT_SET(*s))
1646 l = 1; /* ASCII */
1647 else if (len >= 4 && *(s + 1) >= 0x30 && *(s + 1) <= 0x39)
1648 {
1649 /* Should be 4-byte, validate remaining bytes */
1650 if (*s >= 0x81 && *s <= 0xfe &&
1651 *(s + 2) >= 0x81 && *(s + 2) <= 0xfe &&
1652 *(s + 3) >= 0x30 && *(s + 3) <= 0x39)
1653 l = 4;
1654 else
1655 l = -1;
1656 }
1657 else if (len >= 2 && *s >= 0x81 && *s <= 0xfe)
1658 {
1659 /* Should be 2-byte, validate */
1660 if ((*(s + 1) >= 0x40 && *(s + 1) <= 0x7e) ||
1661 (*(s + 1) >= 0x80 && *(s + 1) <= 0xfe))
1662 l = 2;
1663 else
1664 l = -1;
1665 }
1666 else
1667 l = -1;
1668 return l;
1669}

References IS_HIGHBIT_SET, and len.

Referenced by pg_gb18030_verifystr().

◆ pg_gb18030_verifystr()

static int pg_gb18030_verifystr ( const unsigned char *  s,
int  len 
)
static

Definition at line 1672 of file wchar.c.

1673{
1674 const unsigned char *start = s;
1675
1676 while (len > 0)
1677 {
1678 int l;
1679
1680 /* fast path for ASCII-subset characters */
1681 if (!IS_HIGHBIT_SET(*s))
1682 {
1683 if (*s == '\0')
1684 break;
1685 l = 1;
1686 }
1687 else
1688 {
1689 l = pg_gb18030_verifychar(s, len);
1690 if (l == -1)
1691 break;
1692 }
1693 s += l;
1694 len -= l;
1695 }
1696
1697 return s - start;
1698}
static int pg_gb18030_verifychar(const unsigned char *s, int len)
Definition: wchar.c:1641

References IS_HIGHBIT_SET, len, pg_gb18030_verifychar(), and start.

◆ pg_gbk_dsplen()

static int pg_gbk_dsplen ( const unsigned char *  s)
static

Definition at line 961 of file wchar.c.

962{
963 int len;
964
965 if (IS_HIGHBIT_SET(*s))
966 len = 2; /* kanji? */
967 else
968 len = pg_ascii_dsplen(s); /* should be ASCII */
969 return len;
970}

References IS_HIGHBIT_SET, len, and pg_ascii_dsplen().

◆ pg_gbk_mblen()

static int pg_gbk_mblen ( const unsigned char *  s)
static

Definition at line 949 of file wchar.c.

950{
951 int len;
952
953 if (IS_HIGHBIT_SET(*s))
954 len = 2; /* kanji? */
955 else
956 len = 1; /* should be ASCII */
957 return len;
958}

References IS_HIGHBIT_SET, and len.

Referenced by pg_gbk_verifychar().

◆ pg_gbk_verifychar()

static int pg_gbk_verifychar ( const unsigned char *  s,
int  len 
)
static

Definition at line 1533 of file wchar.c.

1534{
1535 int l,
1536 mbl;
1537
1538 l = mbl = pg_gbk_mblen(s);
1539
1540 if (len < l)
1541 return -1;
1542
1543 if (l == 2 &&
1544 s[0] == NONUTF8_INVALID_BYTE0 &&
1545 s[1] == NONUTF8_INVALID_BYTE1)
1546 return -1;
1547
1548 while (--l > 0)
1549 {
1550 if (*++s == '\0')
1551 return -1;
1552 }
1553
1554 return mbl;
1555}
static int pg_gbk_mblen(const unsigned char *s)
Definition: wchar.c:949

References len, NONUTF8_INVALID_BYTE0, NONUTF8_INVALID_BYTE1, and pg_gbk_mblen().

Referenced by pg_gbk_verifystr().

◆ pg_gbk_verifystr()

static int pg_gbk_verifystr ( const unsigned char *  s,
int  len 
)
static

Definition at line 1558 of file wchar.c.

1559{
1560 const unsigned char *start = s;
1561
1562 while (len > 0)
1563 {
1564 int l;
1565
1566 /* fast path for ASCII-subset characters */
1567 if (!IS_HIGHBIT_SET(*s))
1568 {
1569 if (*s == '\0')
1570 break;
1571 l = 1;
1572 }
1573 else
1574 {
1575 l = pg_gbk_verifychar(s, len);
1576 if (l == -1)
1577 break;
1578 }
1579 s += l;
1580 len -= l;
1581 }
1582
1583 return s - start;
1584}
static int pg_gbk_verifychar(const unsigned char *s, int len)
Definition: wchar.c:1533

References IS_HIGHBIT_SET, len, pg_gbk_verifychar(), and start.

◆ pg_johab_dsplen()

static int pg_johab_dsplen ( const unsigned char *  s)
static

Definition at line 429 of file wchar.c.

430{
431 return pg_euc_dsplen(s);
432}

References pg_euc_dsplen().

◆ pg_johab_mblen()

static int pg_johab_mblen ( const unsigned char *  s)
static

Definition at line 423 of file wchar.c.

424{
425 return pg_euc_mblen(s);
426}

References pg_euc_mblen().

Referenced by pg_johab_verifychar().

◆ pg_johab_verifychar()

static int pg_johab_verifychar ( const unsigned char *  s,
int  len 
)
static

Definition at line 1307 of file wchar.c.

1308{
1309 int l,
1310 mbl;
1311 unsigned char c;
1312
1313 l = mbl = pg_johab_mblen(s);
1314
1315 if (len < l)
1316 return -1;
1317
1318 if (!IS_HIGHBIT_SET(*s))
1319 return mbl;
1320
1321 while (--l > 0)
1322 {
1323 c = *++s;
1324 if (!IS_EUC_RANGE_VALID(c))
1325 return -1;
1326 }
1327 return mbl;
1328}
char * c
static int pg_johab_mblen(const unsigned char *s)
Definition: wchar.c:423

References IS_EUC_RANGE_VALID, IS_HIGHBIT_SET, len, and pg_johab_mblen().

Referenced by pg_johab_verifystr().

◆ pg_johab_verifystr()

static int pg_johab_verifystr ( const unsigned char *  s,
int  len 
)
static

Definition at line 1331 of file wchar.c.

1332{
1333 const unsigned char *start = s;
1334
1335 while (len > 0)
1336 {
1337 int l;
1338
1339 /* fast path for ASCII-subset characters */
1340 if (!IS_HIGHBIT_SET(*s))
1341 {
1342 if (*s == '\0')
1343 break;
1344 l = 1;
1345 }
1346 else
1347 {
1348 l = pg_johab_verifychar(s, len);
1349 if (l == -1)
1350 break;
1351 }
1352 s += l;
1353 len -= l;
1354 }
1355
1356 return s - start;
1357}
static int pg_johab_verifychar(const unsigned char *s, int len)
Definition: wchar.c:1307

References IS_HIGHBIT_SET, len, pg_johab_verifychar(), and start.

◆ pg_latin12wchar_with_len()

static int pg_latin12wchar_with_len ( const unsigned char *  from,
pg_wchar to,
int  len 
)
static

Definition at line 839 of file wchar.c.

840{
841 int cnt = 0;
842
843 while (len > 0 && *from)
844 {
845 *to++ = *from++;
846 len--;
847 cnt++;
848 }
849 *to = 0;
850 return cnt;
851}

References len.

◆ pg_latin1_dsplen()

static int pg_latin1_dsplen ( const unsigned char *  s)
static

Definition at line 882 of file wchar.c.

883{
884 return pg_ascii_dsplen(s);
885}

References pg_ascii_dsplen().

◆ pg_latin1_mblen()

static int pg_latin1_mblen ( const unsigned char *  s)
static

Definition at line 876 of file wchar.c.

877{
878 return 1;
879}

◆ pg_latin1_verifychar()

static int pg_latin1_verifychar ( const unsigned char *  s,
int  len 
)
static

Definition at line 1410 of file wchar.c.

1411{
1412 return 1;
1413}

◆ pg_latin1_verifystr()

static int pg_latin1_verifystr ( const unsigned char *  s,
int  len 
)
static

Definition at line 1416 of file wchar.c.

1417{
1418 const unsigned char *nullpos = memchr(s, 0, len);
1419
1420 if (nullpos == NULL)
1421 return len;
1422 else
1423 return nullpos - s;
1424}

References len.

◆ pg_mule2wchar_with_len()

static int pg_mule2wchar_with_len ( const unsigned char *  from,
pg_wchar to,
int  len 
)
static

Definition at line 674 of file wchar.c.

675{
676 int cnt = 0;
677
678 while (len > 0 && *from)
679 {
680 if (IS_LC1(*from) && len >= 2)
681 {
682 *to = *from++ << 16;
683 *to |= *from++;
684 len -= 2;
685 }
686 else if (IS_LCPRV1(*from) && len >= 3)
687 {
688 from++;
689 *to = *from++ << 16;
690 *to |= *from++;
691 len -= 3;
692 }
693 else if (IS_LC2(*from) && len >= 3)
694 {
695 *to = *from++ << 16;
696 *to |= *from++ << 8;
697 *to |= *from++;
698 len -= 3;
699 }
700 else if (IS_LCPRV2(*from) && len >= 4)
701 {
702 from++;
703 *to = *from++ << 16;
704 *to |= *from++ << 8;
705 *to |= *from++;
706 len -= 4;
707 }
708 else
709 { /* assume ASCII */
710 *to = (unsigned char) *from++;
711 len--;
712 }
713 to++;
714 cnt++;
715 }
716 *to = 0;
717 return cnt;
718}
#define IS_LCPRV2(c)
Definition: pg_wchar.h:164
#define IS_LC2(c)
Definition: pg_wchar.h:144
#define IS_LCPRV1(c)
Definition: pg_wchar.h:152
#define IS_LC1(c)
Definition: pg_wchar.h:126

References IS_LC1, IS_LC2, IS_LCPRV1, IS_LCPRV2, and len.

◆ pg_mule_dsplen()

static int pg_mule_dsplen ( const unsigned char *  s)
static

Definition at line 811 of file wchar.c.

812{
813 int len;
814
815 /*
816 * Note: it's not really appropriate to assume that all multibyte charsets
817 * are double-wide on screen. But this seems an okay approximation for
818 * the MULE charsets we currently support.
819 */
820
821 if (IS_LC1(*s))
822 len = 1;
823 else if (IS_LCPRV1(*s))
824 len = 1;
825 else if (IS_LC2(*s))
826 len = 2;
827 else if (IS_LCPRV2(*s))
828 len = 2;
829 else
830 len = 1; /* assume ASCII */
831
832 return len;
833}

References IS_LC1, IS_LC2, IS_LCPRV1, IS_LCPRV2, and len.

◆ pg_mule_mblen()

int pg_mule_mblen ( const unsigned char *  s)

Definition at line 793 of file wchar.c.

794{
795 int len;
796
797 if (IS_LC1(*s))
798 len = 2;
799 else if (IS_LCPRV1(*s))
800 len = 3;
801 else if (IS_LC2(*s))
802 len = 3;
803 else if (IS_LCPRV2(*s))
804 len = 4;
805 else
806 len = 1; /* assume ASCII */
807 return len;
808}

References IS_LC1, IS_LC2, IS_LCPRV1, IS_LCPRV2, and len.

Referenced by mic2latin(), mic2latin_with_table(), and pg_mule_verifychar().

◆ pg_mule_verifychar()

static int pg_mule_verifychar ( const unsigned char *  s,
int  len 
)
static

Definition at line 1360 of file wchar.c.

1361{
1362 int l,
1363 mbl;
1364 unsigned char c;
1365
1366 l = mbl = pg_mule_mblen(s);
1367
1368 if (len < l)
1369 return -1;
1370
1371 while (--l > 0)
1372 {
1373 c = *++s;
1374 if (!IS_HIGHBIT_SET(c))
1375 return -1;
1376 }
1377 return mbl;
1378}
int pg_mule_mblen(const unsigned char *s)
Definition: wchar.c:793

References IS_HIGHBIT_SET, len, and pg_mule_mblen().

Referenced by pg_mule_verifystr().

◆ pg_mule_verifystr()

static int pg_mule_verifystr ( const unsigned char *  s,
int  len 
)
static

Definition at line 1381 of file wchar.c.

1382{
1383 const unsigned char *start = s;
1384
1385 while (len > 0)
1386 {
1387 int l;
1388
1389 /* fast path for ASCII-subset characters */
1390 if (!IS_HIGHBIT_SET(*s))
1391 {
1392 if (*s == '\0')
1393 break;
1394 l = 1;
1395 }
1396 else
1397 {
1398 l = pg_mule_verifychar(s, len);
1399 if (l == -1)
1400 break;
1401 }
1402 s += l;
1403 len -= l;
1404 }
1405
1406 return s - start;
1407}
static int pg_mule_verifychar(const unsigned char *s, int len)
Definition: wchar.c:1360

References IS_HIGHBIT_SET, len, pg_mule_verifychar(), and start.

◆ pg_sjis_dsplen()

static int pg_sjis_dsplen ( const unsigned char *  s)
static

Definition at line 905 of file wchar.c.

906{
907 int len;
908
909 if (*s >= 0xa1 && *s <= 0xdf)
910 len = 1; /* 1 byte kana? */
911 else if (IS_HIGHBIT_SET(*s))
912 len = 2; /* kanji? */
913 else
914 len = pg_ascii_dsplen(s); /* should be ASCII */
915 return len;
916}

References IS_HIGHBIT_SET, len, and pg_ascii_dsplen().

◆ pg_sjis_mblen()

static int pg_sjis_mblen ( const unsigned char *  s)
static

Definition at line 891 of file wchar.c.

892{
893 int len;
894
895 if (*s >= 0xa1 && *s <= 0xdf)
896 len = 1; /* 1 byte kana? */
897 else if (IS_HIGHBIT_SET(*s))
898 len = 2; /* kanji? */
899 else
900 len = 1; /* should be ASCII */
901 return len;
902}

References IS_HIGHBIT_SET, and len.

Referenced by pg_sjis_verifychar().

◆ pg_sjis_verifychar()

static int pg_sjis_verifychar ( const unsigned char *  s,
int  len 
)
static

Definition at line 1427 of file wchar.c.

1428{
1429 int l,
1430 mbl;
1431 unsigned char c1,
1432 c2;
1433
1434 l = mbl = pg_sjis_mblen(s);
1435
1436 if (len < l)
1437 return -1;
1438
1439 if (l == 1) /* pg_sjis_mblen already verified it */
1440 return mbl;
1441
1442 c1 = *s++;
1443 c2 = *s;
1444 if (!ISSJISHEAD(c1) || !ISSJISTAIL(c2))
1445 return -1;
1446 return mbl;
1447}
#define ISSJISTAIL(c)
Definition: pg_wchar.h:45
#define ISSJISHEAD(c)
Definition: pg_wchar.h:44
static int pg_sjis_mblen(const unsigned char *s)
Definition: wchar.c:891

References ISSJISHEAD, ISSJISTAIL, len, and pg_sjis_mblen().

Referenced by pg_sjis_verifystr().

◆ pg_sjis_verifystr()

static int pg_sjis_verifystr ( const unsigned char *  s,
int  len 
)
static

Definition at line 1450 of file wchar.c.

1451{
1452 const unsigned char *start = s;
1453
1454 while (len > 0)
1455 {
1456 int l;
1457
1458 /* fast path for ASCII-subset characters */
1459 if (!IS_HIGHBIT_SET(*s))
1460 {
1461 if (*s == '\0')
1462 break;
1463 l = 1;
1464 }
1465 else
1466 {
1467 l = pg_sjis_verifychar(s, len);
1468 if (l == -1)
1469 break;
1470 }
1471 s += l;
1472 len -= l;
1473 }
1474
1475 return s - start;
1476}
static int pg_sjis_verifychar(const unsigned char *s, int len)
Definition: wchar.c:1427

References IS_HIGHBIT_SET, len, pg_sjis_verifychar(), and start.

◆ pg_uhc_dsplen()

static int pg_uhc_dsplen ( const unsigned char *  s)
static

Definition at line 988 of file wchar.c.

989{
990 int len;
991
992 if (IS_HIGHBIT_SET(*s))
993 len = 2; /* 2byte? */
994 else
995 len = pg_ascii_dsplen(s); /* should be ASCII */
996 return len;
997}

References IS_HIGHBIT_SET, len, and pg_ascii_dsplen().

◆ pg_uhc_mblen()

static int pg_uhc_mblen ( const unsigned char *  s)
static

Definition at line 976 of file wchar.c.

977{
978 int len;
979
980 if (IS_HIGHBIT_SET(*s))
981 len = 2; /* 2byte? */
982 else
983 len = 1; /* should be ASCII */
984 return len;
985}

References IS_HIGHBIT_SET, and len.

Referenced by pg_uhc_verifychar().

◆ pg_uhc_verifychar()

static int pg_uhc_verifychar ( const unsigned char *  s,
int  len 
)
static

Definition at line 1587 of file wchar.c.

1588{
1589 int l,
1590 mbl;
1591
1592 l = mbl = pg_uhc_mblen(s);
1593
1594 if (len < l)
1595 return -1;
1596
1597 if (l == 2 &&
1598 s[0] == NONUTF8_INVALID_BYTE0 &&
1599 s[1] == NONUTF8_INVALID_BYTE1)
1600 return -1;
1601
1602 while (--l > 0)
1603 {
1604 if (*++s == '\0')
1605 return -1;
1606 }
1607
1608 return mbl;
1609}
static int pg_uhc_mblen(const unsigned char *s)
Definition: wchar.c:976

References len, NONUTF8_INVALID_BYTE0, NONUTF8_INVALID_BYTE1, and pg_uhc_mblen().

Referenced by pg_uhc_verifystr().

◆ pg_uhc_verifystr()

static int pg_uhc_verifystr ( const unsigned char *  s,
int  len 
)
static

Definition at line 1612 of file wchar.c.

1613{
1614 const unsigned char *start = s;
1615
1616 while (len > 0)
1617 {
1618 int l;
1619
1620 /* fast path for ASCII-subset characters */
1621 if (!IS_HIGHBIT_SET(*s))
1622 {
1623 if (*s == '\0')
1624 break;
1625 l = 1;
1626 }
1627 else
1628 {
1629 l = pg_uhc_verifychar(s, len);
1630 if (l == -1)
1631 break;
1632 }
1633 s += l;
1634 len -= l;
1635 }
1636
1637 return s - start;
1638}
static int pg_uhc_verifychar(const unsigned char *s, int len)
Definition: wchar.c:1587

References IS_HIGHBIT_SET, len, pg_uhc_verifychar(), and start.

◆ pg_utf2wchar_with_len()

static int pg_utf2wchar_with_len ( const unsigned char *  from,
pg_wchar to,
int  len 
)
static

Definition at line 441 of file wchar.c.

442{
443 int cnt = 0;
444 uint32 c1,
445 c2,
446 c3,
447 c4;
448
449 while (len > 0 && *from)
450 {
451 if ((*from & 0x80) == 0)
452 {
453 *to = *from++;
454 len--;
455 }
456 else if ((*from & 0xe0) == 0xc0)
457 {
458 if (len < 2)
459 break; /* drop trailing incomplete char */
460 c1 = *from++ & 0x1f;
461 c2 = *from++ & 0x3f;
462 *to = (c1 << 6) | c2;
463 len -= 2;
464 }
465 else if ((*from & 0xf0) == 0xe0)
466 {
467 if (len < 3)
468 break; /* drop trailing incomplete char */
469 c1 = *from++ & 0x0f;
470 c2 = *from++ & 0x3f;
471 c3 = *from++ & 0x3f;
472 *to = (c1 << 12) | (c2 << 6) | c3;
473 len -= 3;
474 }
475 else if ((*from & 0xf8) == 0xf0)
476 {
477 if (len < 4)
478 break; /* drop trailing incomplete char */
479 c1 = *from++ & 0x07;
480 c2 = *from++ & 0x3f;
481 c3 = *from++ & 0x3f;
482 c4 = *from++ & 0x3f;
483 *to = (c1 << 18) | (c2 << 12) | (c3 << 6) | c4;
484 len -= 4;
485 }
486 else
487 {
488 /* treat a bogus char as length 1; not ours to raise error */
489 *to = *from++;
490 len--;
491 }
492 to++;
493 cnt++;
494 }
495 *to = 0;
496 return cnt;
497}

References len.

◆ pg_utf8_islegal()

bool pg_utf8_islegal ( const unsigned char *  source,
int  length 
)

Definition at line 1989 of file wchar.c.

1990{
1991 unsigned char a;
1992
1993 switch (length)
1994 {
1995 default:
1996 /* reject lengths 5 and 6 for now */
1997 return false;
1998 case 4:
1999 a = source[3];
2000 if (a < 0x80 || a > 0xBF)
2001 return false;
2002 /* FALL THRU */
2003 case 3:
2004 a = source[2];
2005 if (a < 0x80 || a > 0xBF)
2006 return false;
2007 /* FALL THRU */
2008 case 2:
2009 a = source[1];
2010 switch (*source)
2011 {
2012 case 0xE0:
2013 if (a < 0xA0 || a > 0xBF)
2014 return false;
2015 break;
2016 case 0xED:
2017 if (a < 0x80 || a > 0x9F)
2018 return false;
2019 break;
2020 case 0xF0:
2021 if (a < 0x90 || a > 0xBF)
2022 return false;
2023 break;
2024 case 0xF4:
2025 if (a < 0x80 || a > 0x8F)
2026 return false;
2027 break;
2028 default:
2029 if (a < 0x80 || a > 0xBF)
2030 return false;
2031 break;
2032 }
2033 /* FALL THRU */
2034 case 1:
2035 a = *source;
2036 if (a >= 0x80 && a < 0xC2)
2037 return false;
2038 if (a > 0xF4)
2039 return false;
2040 break;
2041 }
2042 return true;
2043}
int a
Definition: isn.c:73
static rewind_source * source
Definition: pg_rewind.c:89

References a, and source.

Referenced by chr(), pg_utf8_string_len(), pg_utf8_verifychar(), utf8_to_iso8859_1(), and UtfToLocal().

◆ pg_utf8_verifychar()

static int pg_utf8_verifychar ( const unsigned char *  s,
int  len 
)
static

Definition at line 1701 of file wchar.c.

1702{
1703 int l;
1704
1705 if ((*s & 0x80) == 0)
1706 {
1707 if (*s == '\0')
1708 return -1;
1709 return 1;
1710 }
1711 else if ((*s & 0xe0) == 0xc0)
1712 l = 2;
1713 else if ((*s & 0xf0) == 0xe0)
1714 l = 3;
1715 else if ((*s & 0xf8) == 0xf0)
1716 l = 4;
1717 else
1718 l = 1;
1719
1720 if (l > len)
1721 return -1;
1722
1723 if (!pg_utf8_islegal(s, l))
1724 return -1;
1725
1726 return l;
1727}
bool pg_utf8_islegal(const unsigned char *source, int length)
Definition: wchar.c:1989

References len, and pg_utf8_islegal().

Referenced by pg_utf8_verifystr().

◆ pg_utf8_verifystr()

static int pg_utf8_verifystr ( const unsigned char *  s,
int  len 
)
static

Definition at line 1891 of file wchar.c.

1892{
1893 const unsigned char *start = s;
1894 const int orig_len = len;
1895 uint32 state = BGN;
1896
1897/*
1898 * With a stride of two vector widths, gcc will unroll the loop. Even if
1899 * the compiler can unroll a longer loop, it's not worth it because we
1900 * must fall back to the byte-wise algorithm if we find any non-ASCII.
1901 */
1902#define STRIDE_LENGTH (2 * sizeof(Vector8))
1903
1904 if (len >= STRIDE_LENGTH)
1905 {
1906 while (len >= STRIDE_LENGTH)
1907 {
1908 /*
1909 * If the chunk is all ASCII, we can skip the full UTF-8 check,
1910 * but we must first check for a non-END state, which means the
1911 * previous chunk ended in the middle of a multibyte sequence.
1912 */
1913 if (state != END || !is_valid_ascii(s, STRIDE_LENGTH))
1915
1916 s += STRIDE_LENGTH;
1917 len -= STRIDE_LENGTH;
1918 }
1919
1920 /* The error state persists, so we only need to check for it here. */
1921 if (state == ERR)
1922 {
1923 /*
1924 * Start over from the beginning with the slow path so we can
1925 * count the valid bytes.
1926 */
1927 len = orig_len;
1928 s = start;
1929 }
1930 else if (state != END)
1931 {
1932 /*
1933 * The fast path exited in the middle of a multibyte sequence.
1934 * Walk backwards to find the leading byte so that the slow path
1935 * can resume checking from there. We must always backtrack at
1936 * least one byte, since the current byte could be e.g. an ASCII
1937 * byte after a 2-byte lead, which is invalid.
1938 */
1939 do
1940 {
1941 Assert(s > start);
1942 s--;
1943 len++;
1945 } while (pg_utf_mblen(s) <= 1);
1946 }
1947 }
1948
1949 /* check remaining bytes */
1950 while (len > 0)
1951 {
1952 int l;
1953
1954 /* fast path for ASCII-subset characters */
1955 if (!IS_HIGHBIT_SET(*s))
1956 {
1957 if (*s == '\0')
1958 break;
1959 l = 1;
1960 }
1961 else
1962 {
1963 l = pg_utf8_verifychar(s, len);
1964 if (l == -1)
1965 break;
1966 }
1967 s += l;
1968 len -= l;
1969 }
1970
1971 return s - start;
1972}
static bool is_valid_ascii(const unsigned char *s, int len)
Definition: ascii.h:25
Definition: regguts.h:323
#define END
Definition: wchar.c:1792
int pg_utf_mblen(const unsigned char *s)
Definition: wchar.c:538
#define ERR
Definition: wchar.c:1779
static int pg_utf8_verifychar(const unsigned char *s, int len)
Definition: wchar.c:1701
static void utf8_advance(const unsigned char *s, uint32 *state, int len)
Definition: wchar.c:1873
#define BGN
Definition: wchar.c:1781
#define STRIDE_LENGTH

References Assert(), BGN, END, ERR, IS_HIGHBIT_SET, is_valid_ascii(), len, pg_utf8_verifychar(), pg_utf_mblen(), start, STRIDE_LENGTH, and utf8_advance().

◆ pg_utf_dsplen()

static int pg_utf_dsplen ( const unsigned char *  s)
static

Definition at line 662 of file wchar.c.

663{
664 return ucs_wcwidth(utf8_to_unicode(s));
665}
static pg_wchar utf8_to_unicode(const unsigned char *c)
Definition: mbprint.c:53
static int ucs_wcwidth(pg_wchar ucs)
Definition: wchar.c:628

References ucs_wcwidth(), and utf8_to_unicode().

◆ pg_utf_mblen()

int pg_utf_mblen ( const unsigned char *  s)

Definition at line 538 of file wchar.c.

539{
540 int len;
541
542 if ((*s & 0x80) == 0)
543 len = 1;
544 else if ((*s & 0xe0) == 0xc0)
545 len = 2;
546 else if ((*s & 0xf0) == 0xe0)
547 len = 3;
548 else if ((*s & 0xf8) == 0xf0)
549 len = 4;
550#ifdef NOT_USED
551 else if ((*s & 0xfc) == 0xf8)
552 len = 5;
553 else if ((*s & 0xfe) == 0xfc)
554 len = 6;
555#endif
556 else
557 len = 1;
558 return len;
559}

References len.

Referenced by pg_utf8_verifystr(), and pg_wchar2utf_with_len().

◆ pg_wchar2euc_with_len()

static int pg_wchar2euc_with_len ( const pg_wchar from,
unsigned char *  to,
int  len 
)
static

Definition at line 377 of file wchar.c.

378{
379 int cnt = 0;
380
381 while (len > 0 && *from)
382 {
383 unsigned char c;
384
385 if ((c = (*from >> 24)))
386 {
387 *to++ = c;
388 *to++ = (*from >> 16) & 0xff;
389 *to++ = (*from >> 8) & 0xff;
390 *to++ = *from & 0xff;
391 cnt += 4;
392 }
393 else if ((c = (*from >> 16)))
394 {
395 *to++ = c;
396 *to++ = (*from >> 8) & 0xff;
397 *to++ = *from & 0xff;
398 cnt += 3;
399 }
400 else if ((c = (*from >> 8)))
401 {
402 *to++ = c;
403 *to++ = *from & 0xff;
404 cnt += 2;
405 }
406 else
407 {
408 *to++ = *from;
409 cnt++;
410 }
411 from++;
412 len--;
413 }
414 *to = 0;
415 return cnt;
416}

References len.

◆ pg_wchar2mule_with_len()

static int pg_wchar2mule_with_len ( const pg_wchar from,
unsigned char *  to,
int  len 
)
static

Definition at line 727 of file wchar.c.

728{
729 int cnt = 0;
730
731 while (len > 0 && *from)
732 {
733 unsigned char lb;
734
735 lb = (*from >> 16) & 0xff;
736 if (IS_LC1(lb))
737 {
738 *to++ = lb;
739 *to++ = *from & 0xff;
740 cnt += 2;
741 }
742 else if (IS_LC2(lb))
743 {
744 *to++ = lb;
745 *to++ = (*from >> 8) & 0xff;
746 *to++ = *from & 0xff;
747 cnt += 3;
748 }
749 else if (IS_LCPRV1_A_RANGE(lb))
750 {
751 *to++ = LCPRV1_A;
752 *to++ = lb;
753 *to++ = *from & 0xff;
754 cnt += 3;
755 }
756 else if (IS_LCPRV1_B_RANGE(lb))
757 {
758 *to++ = LCPRV1_B;
759 *to++ = lb;
760 *to++ = *from & 0xff;
761 cnt += 3;
762 }
763 else if (IS_LCPRV2_A_RANGE(lb))
764 {
765 *to++ = LCPRV2_A;
766 *to++ = lb;
767 *to++ = (*from >> 8) & 0xff;
768 *to++ = *from & 0xff;
769 cnt += 4;
770 }
771 else if (IS_LCPRV2_B_RANGE(lb))
772 {
773 *to++ = LCPRV2_B;
774 *to++ = lb;
775 *to++ = (*from >> 8) & 0xff;
776 *to++ = *from & 0xff;
777 cnt += 4;
778 }
779 else
780 {
781 *to++ = *from & 0xff;
782 cnt += 1;
783 }
784 from++;
785 len--;
786 }
787 *to = 0;
788 return cnt;
789}
#define LCPRV1_A
Definition: pg_wchar.h:150
#define LCPRV1_B
Definition: pg_wchar.h:151
#define LCPRV2_A
Definition: pg_wchar.h:162
#define IS_LCPRV2_B_RANGE(c)
Definition: pg_wchar.h:167
#define IS_LCPRV1_A_RANGE(c)
Definition: pg_wchar.h:153
#define IS_LCPRV1_B_RANGE(c)
Definition: pg_wchar.h:155
#define IS_LCPRV2_A_RANGE(c)
Definition: pg_wchar.h:165
#define LCPRV2_B
Definition: pg_wchar.h:163

References IS_LC1, IS_LC2, IS_LCPRV1_A_RANGE, IS_LCPRV1_B_RANGE, IS_LCPRV2_A_RANGE, IS_LCPRV2_B_RANGE, LCPRV1_A, LCPRV1_B, LCPRV2_A, LCPRV2_B, and len.

◆ pg_wchar2single_with_len()

static int pg_wchar2single_with_len ( const pg_wchar from,
unsigned char *  to,
int  len 
)
static

Definition at line 861 of file wchar.c.

862{
863 int cnt = 0;
864
865 while (len > 0 && *from)
866 {
867 *to++ = *from++;
868 len--;
869 cnt++;
870 }
871 *to = 0;
872 return cnt;
873}

References len.

◆ pg_wchar2utf_with_len()

static int pg_wchar2utf_with_len ( const pg_wchar from,
unsigned char *  to,
int  len 
)
static

Definition at line 507 of file wchar.c.

508{
509 int cnt = 0;
510
511 while (len > 0 && *from)
512 {
513 int char_len;
514
515 unicode_to_utf8(*from, to);
516 char_len = pg_utf_mblen(to);
517 cnt += char_len;
518 to += char_len;
519 from++;
520 len--;
521 }
522 *to = 0;
523 return cnt;
524}
static unsigned char * unicode_to_utf8(pg_wchar c, unsigned char *utf8string)
Definition: pg_wchar.h:575

References len, pg_utf_mblen(), and unicode_to_utf8().

◆ ucs_wcwidth()

static int ucs_wcwidth ( pg_wchar  ucs)
static

Definition at line 628 of file wchar.c.

629{
632
633 /* test for 8-bit control characters */
634 if (ucs == 0)
635 return 0;
636
637 if (ucs < 0x20 || (ucs >= 0x7f && ucs < 0xa0) || ucs > 0x0010ffff)
638 return -1;
639
640 /*
641 * binary search in table of non-spacing characters
642 *
643 * XXX: In the official Unicode sources, it is possible for a character to
644 * be described as both non-spacing and wide at the same time. As of
645 * Unicode 13.0, treating the non-spacing property as the determining
646 * factor for display width leads to the correct behavior, so do that
647 * search first.
648 */
649 if (mbbisearch(ucs, nonspacing,
650 sizeof(nonspacing) / sizeof(struct mbinterval) - 1))
651 return 0;
652
653 /* binary search in table of wide characters */
654 if (mbbisearch(ucs, east_asian_fw,
655 sizeof(east_asian_fw) / sizeof(struct mbinterval) - 1))
656 return 2;
657
658 return 1;
659}
static const struct mbinterval east_asian_fw[]
static const struct mbinterval nonspacing[]
static int mbbisearch(pg_wchar ucs, const struct mbinterval *table, int max)
Definition: wchar.c:581

References east_asian_fw, mbbisearch(), and nonspacing.

Referenced by pg_utf_dsplen().

◆ utf8_advance()

static void utf8_advance ( const unsigned char *  s,
uint32 state,
int  len 
)
static

Definition at line 1873 of file wchar.c.

1874{
1875 /* Note: We deliberately don't check the state's value here. */
1876 while (len > 0)
1877 {
1878 /*
1879 * It's important that the mask value is 31: In most instruction sets,
1880 * a shift by a 32-bit operand is understood to be a shift by its mod
1881 * 32, so the compiler should elide the mask operation.
1882 */
1883 *state = Utf8Transition[*s++] >> (*state & 31);
1884 len--;
1885 }
1886
1887 *state &= 31;
1888}
static const uint32 Utf8Transition[256]
Definition: wchar.c:1815

References len, and Utf8Transition.

Referenced by pg_utf8_verifystr().

Variable Documentation

◆ pg_wchar_table

◆ Utf8Transition

const uint32 Utf8Transition[256]
static

Definition at line 1815 of file wchar.c.

Referenced by utf8_advance().