PostgreSQL Source Code  git master
wchar.c File Reference
Include dependency graph for wchar.c:

Go to the source code of this file.

Data Structures

struct  mbinterval
 

Macros

#define IS_EUC_RANGE_VALID(c)   ((c) >= 0xa1 && (c) <= 0xfe)
 
#define pg_euccn_verifychar   pg_euckr_verifychar
 
#define pg_euccn_verifystr   pg_euckr_verifystr
 
#define ERR   0
 
#define BGN   11
 
#define CS1   16
 
#define CS2   1
 
#define CS3   5
 
#define P3A   6 /* Lead was E0, check for 3-byte overlong */
 
#define P3B   20 /* Lead was ED, check for surrogate */
 
#define P4A   25 /* Lead was F0, check for 4-byte overlong */
 
#define P4B   30 /* Lead was F4, check for too-large */
 
#define END   BGN
 
#define ASC   (END << BGN)
 
#define L2A   (CS1 << BGN)
 
#define L3A   (P3A << BGN)
 
#define L3B   (CS2 << BGN)
 
#define L3C   (P3B << BGN)
 
#define L4A   (P4A << BGN)
 
#define L4B   (CS3 << BGN)
 
#define L4C   (P4B << BGN)
 
#define CR1   (END << CS1) | (CS1 << CS2) | (CS2 << CS3) | (CS1 << P3B) | (CS2 << P4B)
 
#define CR2   (END << CS1) | (CS1 << CS2) | (CS2 << CS3) | (CS1 << P3B) | (CS2 << P4A)
 
#define CR3   (END << CS1) | (CS1 << CS2) | (CS2 << CS3) | (CS1 << P3A) | (CS2 << P4A)
 
#define ILL   ERR
 
#define STRIDE_LENGTH   (2 * sizeof(Vector8))
 

Functions

static int pg_ascii2wchar_with_len (const unsigned char *from, pg_wchar *to, int len)
 
static int pg_ascii_mblen (const unsigned char *s)
 
static int pg_ascii_dsplen (const unsigned char *s)
 
static int pg_euc2wchar_with_len (const unsigned char *from, pg_wchar *to, int len)
 
static int pg_euc_mblen (const unsigned char *s)
 
static int pg_euc_dsplen (const unsigned char *s)
 
static int pg_eucjp2wchar_with_len (const unsigned char *from, pg_wchar *to, int len)
 
static int pg_eucjp_mblen (const unsigned char *s)
 
static int pg_eucjp_dsplen (const unsigned char *s)
 
static int pg_euckr2wchar_with_len (const unsigned char *from, pg_wchar *to, int len)
 
static int pg_euckr_mblen (const unsigned char *s)
 
static int pg_euckr_dsplen (const unsigned char *s)
 
static int pg_euccn2wchar_with_len (const unsigned char *from, pg_wchar *to, int len)
 
static int pg_euccn_mblen (const unsigned char *s)
 
static int pg_euccn_dsplen (const unsigned char *s)
 
static int pg_euctw2wchar_with_len (const unsigned char *from, pg_wchar *to, int len)
 
static int pg_euctw_mblen (const unsigned char *s)
 
static int pg_euctw_dsplen (const unsigned char *s)
 
static int pg_wchar2euc_with_len (const pg_wchar *from, unsigned char *to, int len)
 
static int pg_johab_mblen (const unsigned char *s)
 
static int pg_johab_dsplen (const unsigned char *s)
 
static int pg_utf2wchar_with_len (const unsigned char *from, pg_wchar *to, int len)
 
static int pg_wchar2utf_with_len (const pg_wchar *from, unsigned char *to, int len)
 
int pg_utf_mblen (const unsigned char *s)
 
static int mbbisearch (pg_wchar ucs, const struct mbinterval *table, int max)
 
static int ucs_wcwidth (pg_wchar ucs)
 
static int pg_utf_dsplen (const unsigned char *s)
 
static int pg_mule2wchar_with_len (const unsigned char *from, pg_wchar *to, int len)
 
static int pg_wchar2mule_with_len (const pg_wchar *from, unsigned char *to, int len)
 
int pg_mule_mblen (const unsigned char *s)
 
static int pg_mule_dsplen (const unsigned char *s)
 
static int pg_latin12wchar_with_len (const unsigned char *from, pg_wchar *to, int len)
 
static int pg_wchar2single_with_len (const pg_wchar *from, unsigned char *to, int len)
 
static int pg_latin1_mblen (const unsigned char *s)
 
static int pg_latin1_dsplen (const unsigned char *s)
 
static int pg_sjis_mblen (const unsigned char *s)
 
static int pg_sjis_dsplen (const unsigned char *s)
 
static int pg_big5_mblen (const unsigned char *s)
 
static int pg_big5_dsplen (const unsigned char *s)
 
static int pg_gbk_mblen (const unsigned char *s)
 
static int pg_gbk_dsplen (const unsigned char *s)
 
static int pg_uhc_mblen (const unsigned char *s)
 
static int pg_uhc_dsplen (const unsigned char *s)
 
static int pg_gb18030_mblen (const unsigned char *s)
 
static int pg_gb18030_dsplen (const unsigned char *s)
 
static int pg_ascii_verifychar (const unsigned char *s, int len)
 
static int pg_ascii_verifystr (const unsigned char *s, int len)
 
static int pg_eucjp_verifychar (const unsigned char *s, int len)
 
static int pg_eucjp_verifystr (const unsigned char *s, int len)
 
static int pg_euckr_verifychar (const unsigned char *s, int len)
 
static int pg_euckr_verifystr (const unsigned char *s, int len)
 
static int pg_euctw_verifychar (const unsigned char *s, int len)
 
static int pg_euctw_verifystr (const unsigned char *s, int len)
 
static int pg_johab_verifychar (const unsigned char *s, int len)
 
static int pg_johab_verifystr (const unsigned char *s, int len)
 
static int pg_mule_verifychar (const unsigned char *s, int len)
 
static int pg_mule_verifystr (const unsigned char *s, int len)
 
static int pg_latin1_verifychar (const unsigned char *s, int len)
 
static int pg_latin1_verifystr (const unsigned char *s, int len)
 
static int pg_sjis_verifychar (const unsigned char *s, int len)
 
static int pg_sjis_verifystr (const unsigned char *s, int len)
 
static int pg_big5_verifychar (const unsigned char *s, int len)
 
static int pg_big5_verifystr (const unsigned char *s, int len)
 
static int pg_gbk_verifychar (const unsigned char *s, int len)
 
static int pg_gbk_verifystr (const unsigned char *s, int len)
 
static int pg_uhc_verifychar (const unsigned char *s, int len)
 
static int pg_uhc_verifystr (const unsigned char *s, int len)
 
static int pg_gb18030_verifychar (const unsigned char *s, int len)
 
static int pg_gb18030_verifystr (const unsigned char *s, int len)
 
static int pg_utf8_verifychar (const unsigned char *s, int len)
 
static void utf8_advance (const unsigned char *s, uint32 *state, int len)
 
static int pg_utf8_verifystr (const unsigned char *s, int len)
 
bool pg_utf8_islegal (const unsigned char *source, int length)
 
int pg_encoding_mblen (int encoding, const char *mbstr)
 
int pg_encoding_mblen_bounded (int encoding, const char *mbstr)
 
int pg_encoding_dsplen (int encoding, const char *mbstr)
 
int pg_encoding_verifymbchar (int encoding, const char *mbstr, int len)
 
int pg_encoding_verifymbstr (int encoding, const char *mbstr, int len)
 
int pg_encoding_max_length (int encoding)
 

Variables

static const uint32 Utf8Transition [256]
 
const pg_wchar_tbl pg_wchar_table []
 

Macro Definition Documentation

◆ ASC

#define ASC   (END << BGN)

Definition at line 1761 of file wchar.c.

◆ BGN

#define BGN   11

Definition at line 1745 of file wchar.c.

◆ CR1

#define CR1   (END << CS1) | (CS1 << CS2) | (CS2 << CS3) | (CS1 << P3B) | (CS2 << P4B)

Definition at line 1773 of file wchar.c.

◆ CR2

#define CR2   (END << CS1) | (CS1 << CS2) | (CS2 << CS3) | (CS1 << P3B) | (CS2 << P4A)

Definition at line 1774 of file wchar.c.

◆ CR3

#define CR3   (END << CS1) | (CS1 << CS2) | (CS2 << CS3) | (CS1 << P3A) | (CS2 << P4A)

Definition at line 1775 of file wchar.c.

◆ CS1

#define CS1   16

Definition at line 1747 of file wchar.c.

◆ CS2

#define CS2   1

Definition at line 1748 of file wchar.c.

◆ CS3

#define CS3   5

Definition at line 1749 of file wchar.c.

◆ END

#define END   BGN

Definition at line 1756 of file wchar.c.

◆ ERR

#define ERR   0

Definition at line 1743 of file wchar.c.

◆ ILL

#define ILL   ERR

Definition at line 1777 of file wchar.c.

◆ IS_EUC_RANGE_VALID

#define IS_EUC_RANGE_VALID (   c)    ((c) >= 0xa1 && (c) <= 0xfe)

Definition at line 1058 of file wchar.c.

◆ L2A

#define L2A   (CS1 << BGN)

Definition at line 1763 of file wchar.c.

◆ L3A

#define L3A   (P3A << BGN)

Definition at line 1765 of file wchar.c.

◆ L3B

#define L3B   (CS2 << BGN)

Definition at line 1766 of file wchar.c.

◆ L3C

#define L3C   (P3B << BGN)

Definition at line 1767 of file wchar.c.

◆ L4A

#define L4A   (P4A << BGN)

Definition at line 1769 of file wchar.c.

◆ L4B

#define L4B   (CS3 << BGN)

Definition at line 1770 of file wchar.c.

◆ L4C

#define L4C   (P4B << BGN)

Definition at line 1771 of file wchar.c.

◆ P3A

#define P3A   6 /* Lead was E0, check for 3-byte overlong */

Definition at line 1751 of file wchar.c.

◆ P3B

#define P3B   20 /* Lead was ED, check for surrogate */

Definition at line 1752 of file wchar.c.

◆ P4A

#define P4A   25 /* Lead was F0, check for 4-byte overlong */

Definition at line 1753 of file wchar.c.

◆ P4B

#define P4B   30 /* Lead was F4, check for too-large */

Definition at line 1754 of file wchar.c.

◆ pg_euccn_verifychar

#define pg_euccn_verifychar   pg_euckr_verifychar

Definition at line 1203 of file wchar.c.

◆ pg_euccn_verifystr

#define pg_euccn_verifystr   pg_euckr_verifystr

Definition at line 1204 of file wchar.c.

◆ STRIDE_LENGTH

#define STRIDE_LENGTH   (2 * sizeof(Vector8))

Function Documentation

◆ mbbisearch()

static int mbbisearch ( pg_wchar  ucs,
const struct mbinterval table,
int  max 
)
static

Definition at line 560 of file wchar.c.

561 {
562  int min = 0;
563  int mid;
564 
565  if (ucs < table[0].first || ucs > table[max].last)
566  return 0;
567  while (max >= min)
568  {
569  mid = (min + max) / 2;
570  if (ucs > table[mid].last)
571  min = mid + 1;
572  else if (ucs < table[mid].first)
573  max = mid - 1;
574  else
575  return 1;
576  }
577 
578  return 0;
579 }

Referenced by ucs_wcwidth().

◆ pg_ascii2wchar_with_len()

static int pg_ascii2wchar_with_len ( const unsigned char *  from,
pg_wchar to,
int  len 
)
static

Definition at line 49 of file wchar.c.

50 {
51  int cnt = 0;
52 
53  while (len > 0 && *from)
54  {
55  *to++ = *from++;
56  len--;
57  cnt++;
58  }
59  *to = 0;
60  return cnt;
61 }
const void size_t len

References len.

◆ pg_ascii_dsplen()

static int pg_ascii_dsplen ( const unsigned char *  s)
static

Definition at line 70 of file wchar.c.

71 {
72  if (*s == '\0')
73  return 0;
74  if (*s < 0x20 || *s == 0x7f)
75  return -1;
76 
77  return 1;
78 }

Referenced by pg_big5_dsplen(), pg_euc_dsplen(), pg_euccn_dsplen(), pg_eucjp_dsplen(), pg_euctw_dsplen(), pg_gb18030_dsplen(), pg_gbk_dsplen(), pg_latin1_dsplen(), pg_sjis_dsplen(), and pg_uhc_dsplen().

◆ pg_ascii_mblen()

static int pg_ascii_mblen ( const unsigned char *  s)
static

Definition at line 64 of file wchar.c.

65 {
66  return 1;
67 }

◆ pg_ascii_verifychar()

static int pg_ascii_verifychar ( const unsigned char *  s,
int  len 
)
static

Definition at line 1042 of file wchar.c.

1043 {
1044  return 1;
1045 }

◆ pg_ascii_verifystr()

static int pg_ascii_verifystr ( const unsigned char *  s,
int  len 
)
static

Definition at line 1048 of file wchar.c.

1049 {
1050  const unsigned char *nullpos = memchr(s, 0, len);
1051 
1052  if (nullpos == NULL)
1053  return len;
1054  else
1055  return nullpos - s;
1056 }

References len.

◆ pg_big5_dsplen()

static int pg_big5_dsplen ( const unsigned char *  s)
static

Definition at line 913 of file wchar.c.

914 {
915  int len;
916 
917  if (IS_HIGHBIT_SET(*s))
918  len = 2; /* kanji? */
919  else
920  len = pg_ascii_dsplen(s); /* should be ASCII */
921  return len;
922 }
#define IS_HIGHBIT_SET(ch)
Definition: c.h:1155
static int pg_ascii_dsplen(const unsigned char *s)
Definition: wchar.c:70

References IS_HIGHBIT_SET, len, and pg_ascii_dsplen().

◆ pg_big5_mblen()

static int pg_big5_mblen ( const unsigned char *  s)
static

Definition at line 901 of file wchar.c.

902 {
903  int len;
904 
905  if (IS_HIGHBIT_SET(*s))
906  len = 2; /* kanji? */
907  else
908  len = 1; /* should be ASCII */
909  return len;
910 }

References IS_HIGHBIT_SET, and len.

Referenced by pg_big5_verifychar().

◆ pg_big5_verifychar()

static int pg_big5_verifychar ( const unsigned char *  s,
int  len 
)
static

Definition at line 1458 of file wchar.c.

1459 {
1460  int l,
1461  mbl;
1462 
1463  l = mbl = pg_big5_mblen(s);
1464 
1465  if (len < l)
1466  return -1;
1467 
1468  while (--l > 0)
1469  {
1470  if (*++s == '\0')
1471  return -1;
1472  }
1473 
1474  return mbl;
1475 }
static int pg_big5_mblen(const unsigned char *s)
Definition: wchar.c:901

References len, and pg_big5_mblen().

Referenced by pg_big5_verifystr().

◆ pg_big5_verifystr()

static int pg_big5_verifystr ( const unsigned char *  s,
int  len 
)
static

Definition at line 1478 of file wchar.c.

1479 {
1480  const unsigned char *start = s;
1481 
1482  while (len > 0)
1483  {
1484  int l;
1485 
1486  /* fast path for ASCII-subset characters */
1487  if (!IS_HIGHBIT_SET(*s))
1488  {
1489  if (*s == '\0')
1490  break;
1491  l = 1;
1492  }
1493  else
1494  {
1495  l = pg_big5_verifychar(s, len);
1496  if (l == -1)
1497  break;
1498  }
1499  s += l;
1500  len -= l;
1501  }
1502 
1503  return s - start;
1504 }
return str start
static int pg_big5_verifychar(const unsigned char *s, int len)
Definition: wchar.c:1458

References IS_HIGHBIT_SET, len, pg_big5_verifychar(), and start.

◆ pg_encoding_dsplen()

int pg_encoding_dsplen ( int  encoding,
const char *  mbstr 
)

Definition at line 2090 of file wchar.c.

2091 {
2092  return (PG_VALID_ENCODING(encoding) ?
2093  pg_wchar_table[encoding].dsplen((const unsigned char *) mbstr) :
2094  pg_wchar_table[PG_SQL_ASCII].dsplen((const unsigned char *) mbstr));
2095 }
int32 encoding
Definition: pg_database.h:41
@ PG_SQL_ASCII
Definition: pg_wchar.h:226
#define PG_VALID_ENCODING(_enc)
Definition: pg_wchar.h:287
const pg_wchar_tbl pg_wchar_table[]
Definition: wchar.c:2015

References encoding, PG_SQL_ASCII, PG_VALID_ENCODING, and pg_wchar_table.

Referenced by PQdsplen(), and reportErrorPosition().

◆ pg_encoding_max_length()

int pg_encoding_max_length ( int  encoding)

Definition at line 2127 of file wchar.c.

2128 {
2130 
2132 }
#define Assert(condition)
Definition: c.h:858
int maxmblen
Definition: pg_wchar.h:386

References Assert, encoding, pg_wchar_tbl::maxmblen, PG_VALID_ENCODING, and pg_wchar_table.

Referenced by ascii(), chr(), CopyConvertBuf(), pg_encoding_mbcliplen(), pg_verify_mbstr_len(), reportErrorPosition(), and type_maximum_size().

◆ pg_encoding_mblen()

int pg_encoding_mblen ( int  encoding,
const char *  mbstr 
)

Definition at line 2069 of file wchar.c.

2070 {
2071  return (PG_VALID_ENCODING(encoding) ?
2072  pg_wchar_table[encoding].mblen((const unsigned char *) mbstr) :
2073  pg_wchar_table[PG_SQL_ASCII].mblen((const unsigned char *) mbstr));
2074 }

References encoding, PG_SQL_ASCII, PG_VALID_ENCODING, and pg_wchar_table.

Referenced by CopyAttributeOutCSV(), CopyAttributeOutText(), pg_encoding_mblen_bounded(), PQescapeInternal(), PQescapeStringInternal(), PQmblen(), PQmblenBounded(), report_invalid_encoding(), and report_untranslatable_char().

◆ pg_encoding_mblen_bounded()

int pg_encoding_mblen_bounded ( int  encoding,
const char *  mbstr 
)

Definition at line 2081 of file wchar.c.

2082 {
2083  return strnlen(mbstr, pg_encoding_mblen(encoding, mbstr));
2084 }
size_t strnlen(const char *str, size_t maxlen)
Definition: strnlen.c:26
int pg_encoding_mblen(int encoding, const char *mbstr)
Definition: wchar.c:2069

References encoding, pg_encoding_mblen(), and strnlen().

◆ pg_encoding_verifymbchar()

int pg_encoding_verifymbchar ( int  encoding,
const char *  mbstr,
int  len 
)

Definition at line 2103 of file wchar.c.

2104 {
2105  return (PG_VALID_ENCODING(encoding) ?
2106  pg_wchar_table[encoding].mbverifychar((const unsigned char *) mbstr, len) :
2107  pg_wchar_table[PG_SQL_ASCII].mbverifychar((const unsigned char *) mbstr, len));
2108 }

References encoding, len, PG_SQL_ASCII, PG_VALID_ENCODING, and pg_wchar_table.

Referenced by big52euc_tw(), big52mic(), euc_jis_20042shift_jis_2004(), euc_jp2mic(), euc_jp2sjis(), euc_kr2mic(), euc_tw2big5(), euc_tw2mic(), LocalToUtf(), mic2big5(), mic2euc_jp(), mic2euc_kr(), mic2euc_tw(), mic2sjis(), shift_jis_20042euc_jis_2004(), and sjis2euc_jp().

◆ pg_encoding_verifymbstr()

int pg_encoding_verifymbstr ( int  encoding,
const char *  mbstr,
int  len 
)

Definition at line 2116 of file wchar.c.

2117 {
2118  return (PG_VALID_ENCODING(encoding) ?
2119  pg_wchar_table[encoding].mbverifystr((const unsigned char *) mbstr, len) :
2120  pg_wchar_table[PG_SQL_ASCII].mbverifystr((const unsigned char *) mbstr, len));
2121 }

References encoding, len, PG_SQL_ASCII, PG_VALID_ENCODING, and pg_wchar_table.

Referenced by add_file_to_manifest(), CopyConvertBuf(), and test_enc_conversion().

◆ pg_euc2wchar_with_len()

static int pg_euc2wchar_with_len ( const unsigned char *  from,
pg_wchar to,
int  len 
)
static

Definition at line 84 of file wchar.c.

85 {
86  int cnt = 0;
87 
88  while (len > 0 && *from)
89  {
90  if (*from == SS2 && len >= 2) /* JIS X 0201 (so called "1 byte
91  * KANA") */
92  {
93  from++;
94  *to = (SS2 << 8) | *from++;
95  len -= 2;
96  }
97  else if (*from == SS3 && len >= 3) /* JIS X 0212 KANJI */
98  {
99  from++;
100  *to = (SS3 << 16) | (*from++ << 8);
101  *to |= *from++;
102  len -= 3;
103  }
104  else if (IS_HIGHBIT_SET(*from) && len >= 2) /* JIS X 0208 KANJI */
105  {
106  *to = *from++ << 8;
107  *to |= *from++;
108  len -= 2;
109  }
110  else /* must be ASCII */
111  {
112  *to = *from++;
113  len--;
114  }
115  to++;
116  cnt++;
117  }
118  *to = 0;
119  return cnt;
120 }
#define SS2
Definition: pg_wchar.h:38
#define SS3
Definition: pg_wchar.h:39

References IS_HIGHBIT_SET, len, SS2, and SS3.

Referenced by pg_eucjp2wchar_with_len(), and pg_euckr2wchar_with_len().

◆ pg_euc_dsplen()

static int pg_euc_dsplen ( const unsigned char *  s)
inlinestatic

Definition at line 139 of file wchar.c.

140 {
141  int len;
142 
143  if (*s == SS2)
144  len = 2;
145  else if (*s == SS3)
146  len = 2;
147  else if (IS_HIGHBIT_SET(*s))
148  len = 2;
149  else
150  len = pg_ascii_dsplen(s);
151  return len;
152 }

References IS_HIGHBIT_SET, len, pg_ascii_dsplen(), SS2, and SS3.

Referenced by pg_euckr_dsplen(), and pg_johab_dsplen().

◆ pg_euc_mblen()

static int pg_euc_mblen ( const unsigned char *  s)
inlinestatic

Definition at line 123 of file wchar.c.

124 {
125  int len;
126 
127  if (*s == SS2)
128  len = 2;
129  else if (*s == SS3)
130  len = 3;
131  else if (IS_HIGHBIT_SET(*s))
132  len = 2;
133  else
134  len = 1;
135  return len;
136 }

References IS_HIGHBIT_SET, len, SS2, and SS3.

Referenced by pg_eucjp_mblen(), pg_euckr_mblen(), and pg_johab_mblen().

◆ pg_euccn2wchar_with_len()

static int pg_euccn2wchar_with_len ( const unsigned char *  from,
pg_wchar to,
int  len 
)
static

Definition at line 211 of file wchar.c.

212 {
213  int cnt = 0;
214 
215  while (len > 0 && *from)
216  {
217  if (*from == SS2 && len >= 3) /* code set 2 (unused?) */
218  {
219  from++;
220  *to = (SS2 << 16) | (*from++ << 8);
221  *to |= *from++;
222  len -= 3;
223  }
224  else if (*from == SS3 && len >= 3) /* code set 3 (unused ?) */
225  {
226  from++;
227  *to = (SS3 << 16) | (*from++ << 8);
228  *to |= *from++;
229  len -= 3;
230  }
231  else if (IS_HIGHBIT_SET(*from) && len >= 2) /* code set 1 */
232  {
233  *to = *from++ << 8;
234  *to |= *from++;
235  len -= 2;
236  }
237  else
238  {
239  *to = *from++;
240  len--;
241  }
242  to++;
243  cnt++;
244  }
245  *to = 0;
246  return cnt;
247 }

References IS_HIGHBIT_SET, len, SS2, and SS3.

◆ pg_euccn_dsplen()

static int pg_euccn_dsplen ( const unsigned char *  s)
static

Definition at line 262 of file wchar.c.

263 {
264  int len;
265 
266  if (IS_HIGHBIT_SET(*s))
267  len = 2;
268  else
269  len = pg_ascii_dsplen(s);
270  return len;
271 }

References IS_HIGHBIT_SET, len, and pg_ascii_dsplen().

◆ pg_euccn_mblen()

static int pg_euccn_mblen ( const unsigned char *  s)
static

Definition at line 250 of file wchar.c.

251 {
252  int len;
253 
254  if (IS_HIGHBIT_SET(*s))
255  len = 2;
256  else
257  len = 1;
258  return len;
259 }

References IS_HIGHBIT_SET, and len.

◆ pg_eucjp2wchar_with_len()

static int pg_eucjp2wchar_with_len ( const unsigned char *  from,
pg_wchar to,
int  len 
)
static

Definition at line 158 of file wchar.c.

159 {
160  return pg_euc2wchar_with_len(from, to, len);
161 }
static int pg_euc2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
Definition: wchar.c:84

References len, and pg_euc2wchar_with_len().

◆ pg_eucjp_dsplen()

static int pg_eucjp_dsplen ( const unsigned char *  s)
static

Definition at line 170 of file wchar.c.

171 {
172  int len;
173 
174  if (*s == SS2)
175  len = 1;
176  else if (*s == SS3)
177  len = 2;
178  else if (IS_HIGHBIT_SET(*s))
179  len = 2;
180  else
181  len = pg_ascii_dsplen(s);
182  return len;
183 }

References IS_HIGHBIT_SET, len, pg_ascii_dsplen(), SS2, and SS3.

◆ pg_eucjp_mblen()

static int pg_eucjp_mblen ( const unsigned char *  s)
static

Definition at line 164 of file wchar.c.

165 {
166  return pg_euc_mblen(s);
167 }
static int pg_euc_mblen(const unsigned char *s)
Definition: wchar.c:123

References pg_euc_mblen().

◆ pg_eucjp_verifychar()

static int pg_eucjp_verifychar ( const unsigned char *  s,
int  len 
)
static

Definition at line 1061 of file wchar.c.

1062 {
1063  int l;
1064  unsigned char c1,
1065  c2;
1066 
1067  c1 = *s++;
1068 
1069  switch (c1)
1070  {
1071  case SS2: /* JIS X 0201 */
1072  l = 2;
1073  if (l > len)
1074  return -1;
1075  c2 = *s++;
1076  if (c2 < 0xa1 || c2 > 0xdf)
1077  return -1;
1078  break;
1079 
1080  case SS3: /* JIS X 0212 */
1081  l = 3;
1082  if (l > len)
1083  return -1;
1084  c2 = *s++;
1085  if (!IS_EUC_RANGE_VALID(c2))
1086  return -1;
1087  c2 = *s++;
1088  if (!IS_EUC_RANGE_VALID(c2))
1089  return -1;
1090  break;
1091 
1092  default:
1093  if (IS_HIGHBIT_SET(c1)) /* JIS X 0208? */
1094  {
1095  l = 2;
1096  if (l > len)
1097  return -1;
1098  if (!IS_EUC_RANGE_VALID(c1))
1099  return -1;
1100  c2 = *s++;
1101  if (!IS_EUC_RANGE_VALID(c2))
1102  return -1;
1103  }
1104  else
1105  /* must be ASCII */
1106  {
1107  l = 1;
1108  }
1109  break;
1110  }
1111 
1112  return l;
1113 }
#define IS_EUC_RANGE_VALID(c)
Definition: wchar.c:1058

References IS_EUC_RANGE_VALID, IS_HIGHBIT_SET, len, SS2, and SS3.

Referenced by pg_eucjp_verifystr().

◆ pg_eucjp_verifystr()

static int pg_eucjp_verifystr ( const unsigned char *  s,
int  len 
)
static

Definition at line 1116 of file wchar.c.

1117 {
1118  const unsigned char *start = s;
1119 
1120  while (len > 0)
1121  {
1122  int l;
1123 
1124  /* fast path for ASCII-subset characters */
1125  if (!IS_HIGHBIT_SET(*s))
1126  {
1127  if (*s == '\0')
1128  break;
1129  l = 1;
1130  }
1131  else
1132  {
1133  l = pg_eucjp_verifychar(s, len);
1134  if (l == -1)
1135  break;
1136  }
1137  s += l;
1138  len -= l;
1139  }
1140 
1141  return s - start;
1142 }
static int pg_eucjp_verifychar(const unsigned char *s, int len)
Definition: wchar.c:1061

References IS_HIGHBIT_SET, len, pg_eucjp_verifychar(), and start.

◆ pg_euckr2wchar_with_len()

static int pg_euckr2wchar_with_len ( const unsigned char *  from,
pg_wchar to,
int  len 
)
static

Definition at line 189 of file wchar.c.

190 {
191  return pg_euc2wchar_with_len(from, to, len);
192 }

References len, and pg_euc2wchar_with_len().

◆ pg_euckr_dsplen()

static int pg_euckr_dsplen ( const unsigned char *  s)
static

Definition at line 201 of file wchar.c.

202 {
203  return pg_euc_dsplen(s);
204 }
static int pg_euc_dsplen(const unsigned char *s)
Definition: wchar.c:139

References pg_euc_dsplen().

◆ pg_euckr_mblen()

static int pg_euckr_mblen ( const unsigned char *  s)
static

Definition at line 195 of file wchar.c.

196 {
197  return pg_euc_mblen(s);
198 }

References pg_euc_mblen().

◆ pg_euckr_verifychar()

static int pg_euckr_verifychar ( const unsigned char *  s,
int  len 
)
static

Definition at line 1145 of file wchar.c.

1146 {
1147  int l;
1148  unsigned char c1,
1149  c2;
1150 
1151  c1 = *s++;
1152 
1153  if (IS_HIGHBIT_SET(c1))
1154  {
1155  l = 2;
1156  if (l > len)
1157  return -1;
1158  if (!IS_EUC_RANGE_VALID(c1))
1159  return -1;
1160  c2 = *s++;
1161  if (!IS_EUC_RANGE_VALID(c2))
1162  return -1;
1163  }
1164  else
1165  /* must be ASCII */
1166  {
1167  l = 1;
1168  }
1169 
1170  return l;
1171 }

References IS_EUC_RANGE_VALID, IS_HIGHBIT_SET, and len.

Referenced by pg_euckr_verifystr().

◆ pg_euckr_verifystr()

static int pg_euckr_verifystr ( const unsigned char *  s,
int  len 
)
static

Definition at line 1174 of file wchar.c.

1175 {
1176  const unsigned char *start = s;
1177 
1178  while (len > 0)
1179  {
1180  int l;
1181 
1182  /* fast path for ASCII-subset characters */
1183  if (!IS_HIGHBIT_SET(*s))
1184  {
1185  if (*s == '\0')
1186  break;
1187  l = 1;
1188  }
1189  else
1190  {
1191  l = pg_euckr_verifychar(s, len);
1192  if (l == -1)
1193  break;
1194  }
1195  s += l;
1196  len -= l;
1197  }
1198 
1199  return s - start;
1200 }
static int pg_euckr_verifychar(const unsigned char *s, int len)
Definition: wchar.c:1145

References IS_HIGHBIT_SET, len, pg_euckr_verifychar(), and start.

◆ pg_euctw2wchar_with_len()

static int pg_euctw2wchar_with_len ( const unsigned char *  from,
pg_wchar to,
int  len 
)
static

Definition at line 278 of file wchar.c.

279 {
280  int cnt = 0;
281 
282  while (len > 0 && *from)
283  {
284  if (*from == SS2 && len >= 4) /* code set 2 */
285  {
286  from++;
287  *to = (((uint32) SS2) << 24) | (*from++ << 16);
288  *to |= *from++ << 8;
289  *to |= *from++;
290  len -= 4;
291  }
292  else if (*from == SS3 && len >= 3) /* code set 3 (unused?) */
293  {
294  from++;
295  *to = (SS3 << 16) | (*from++ << 8);
296  *to |= *from++;
297  len -= 3;
298  }
299  else if (IS_HIGHBIT_SET(*from) && len >= 2) /* code set 2 */
300  {
301  *to = *from++ << 8;
302  *to |= *from++;
303  len -= 2;
304  }
305  else
306  {
307  *to = *from++;
308  len--;
309  }
310  to++;
311  cnt++;
312  }
313  *to = 0;
314  return cnt;
315 }
unsigned int uint32
Definition: c.h:506

References IS_HIGHBIT_SET, len, SS2, and SS3.

◆ pg_euctw_dsplen()

static int pg_euctw_dsplen ( const unsigned char *  s)
static

Definition at line 334 of file wchar.c.

335 {
336  int len;
337 
338  if (*s == SS2)
339  len = 2;
340  else if (*s == SS3)
341  len = 2;
342  else if (IS_HIGHBIT_SET(*s))
343  len = 2;
344  else
345  len = pg_ascii_dsplen(s);
346  return len;
347 }

References IS_HIGHBIT_SET, len, pg_ascii_dsplen(), SS2, and SS3.

◆ pg_euctw_mblen()

static int pg_euctw_mblen ( const unsigned char *  s)
static

Definition at line 318 of file wchar.c.

319 {
320  int len;
321 
322  if (*s == SS2)
323  len = 4;
324  else if (*s == SS3)
325  len = 3;
326  else if (IS_HIGHBIT_SET(*s))
327  len = 2;
328  else
329  len = 1;
330  return len;
331 }

References IS_HIGHBIT_SET, len, SS2, and SS3.

◆ pg_euctw_verifychar()

static int pg_euctw_verifychar ( const unsigned char *  s,
int  len 
)
static

Definition at line 1207 of file wchar.c.

1208 {
1209  int l;
1210  unsigned char c1,
1211  c2;
1212 
1213  c1 = *s++;
1214 
1215  switch (c1)
1216  {
1217  case SS2: /* CNS 11643 Plane 1-7 */
1218  l = 4;
1219  if (l > len)
1220  return -1;
1221  c2 = *s++;
1222  if (c2 < 0xa1 || c2 > 0xa7)
1223  return -1;
1224  c2 = *s++;
1225  if (!IS_EUC_RANGE_VALID(c2))
1226  return -1;
1227  c2 = *s++;
1228  if (!IS_EUC_RANGE_VALID(c2))
1229  return -1;
1230  break;
1231 
1232  case SS3: /* unused */
1233  return -1;
1234 
1235  default:
1236  if (IS_HIGHBIT_SET(c1)) /* CNS 11643 Plane 1 */
1237  {
1238  l = 2;
1239  if (l > len)
1240  return -1;
1241  /* no further range check on c1? */
1242  c2 = *s++;
1243  if (!IS_EUC_RANGE_VALID(c2))
1244  return -1;
1245  }
1246  else
1247  /* must be ASCII */
1248  {
1249  l = 1;
1250  }
1251  break;
1252  }
1253  return l;
1254 }

References IS_EUC_RANGE_VALID, IS_HIGHBIT_SET, len, SS2, and SS3.

Referenced by pg_euctw_verifystr().

◆ pg_euctw_verifystr()

static int pg_euctw_verifystr ( const unsigned char *  s,
int  len 
)
static

Definition at line 1257 of file wchar.c.

1258 {
1259  const unsigned char *start = s;
1260 
1261  while (len > 0)
1262  {
1263  int l;
1264 
1265  /* fast path for ASCII-subset characters */
1266  if (!IS_HIGHBIT_SET(*s))
1267  {
1268  if (*s == '\0')
1269  break;
1270  l = 1;
1271  }
1272  else
1273  {
1274  l = pg_euctw_verifychar(s, len);
1275  if (l == -1)
1276  break;
1277  }
1278  s += l;
1279  len -= l;
1280  }
1281 
1282  return s - start;
1283 }
static int pg_euctw_verifychar(const unsigned char *s, int len)
Definition: wchar.c:1207

References IS_HIGHBIT_SET, len, pg_euctw_verifychar(), and start.

◆ pg_gb18030_dsplen()

static int pg_gb18030_dsplen ( const unsigned char *  s)
static

Definition at line 1008 of file wchar.c.

1009 {
1010  int len;
1011 
1012  if (IS_HIGHBIT_SET(*s))
1013  len = 2;
1014  else
1015  len = pg_ascii_dsplen(s); /* ASCII */
1016  return len;
1017 }

References IS_HIGHBIT_SET, len, and pg_ascii_dsplen().

◆ pg_gb18030_mblen()

static int pg_gb18030_mblen ( const unsigned char *  s)
static

Definition at line 994 of file wchar.c.

995 {
996  int len;
997 
998  if (!IS_HIGHBIT_SET(*s))
999  len = 1; /* ASCII */
1000  else if (*(s + 1) >= 0x30 && *(s + 1) <= 0x39)
1001  len = 4;
1002  else
1003  len = 2;
1004  return len;
1005 }

References IS_HIGHBIT_SET, and len.

◆ pg_gb18030_verifychar()

static int pg_gb18030_verifychar ( const unsigned char *  s,
int  len 
)
static

Definition at line 1605 of file wchar.c.

1606 {
1607  int l;
1608 
1609  if (!IS_HIGHBIT_SET(*s))
1610  l = 1; /* ASCII */
1611  else if (len >= 4 && *(s + 1) >= 0x30 && *(s + 1) <= 0x39)
1612  {
1613  /* Should be 4-byte, validate remaining bytes */
1614  if (*s >= 0x81 && *s <= 0xfe &&
1615  *(s + 2) >= 0x81 && *(s + 2) <= 0xfe &&
1616  *(s + 3) >= 0x30 && *(s + 3) <= 0x39)
1617  l = 4;
1618  else
1619  l = -1;
1620  }
1621  else if (len >= 2 && *s >= 0x81 && *s <= 0xfe)
1622  {
1623  /* Should be 2-byte, validate */
1624  if ((*(s + 1) >= 0x40 && *(s + 1) <= 0x7e) ||
1625  (*(s + 1) >= 0x80 && *(s + 1) <= 0xfe))
1626  l = 2;
1627  else
1628  l = -1;
1629  }
1630  else
1631  l = -1;
1632  return l;
1633 }

References IS_HIGHBIT_SET, and len.

Referenced by pg_gb18030_verifystr().

◆ pg_gb18030_verifystr()

static int pg_gb18030_verifystr ( const unsigned char *  s,
int  len 
)
static

Definition at line 1636 of file wchar.c.

1637 {
1638  const unsigned char *start = s;
1639 
1640  while (len > 0)
1641  {
1642  int l;
1643 
1644  /* fast path for ASCII-subset characters */
1645  if (!IS_HIGHBIT_SET(*s))
1646  {
1647  if (*s == '\0')
1648  break;
1649  l = 1;
1650  }
1651  else
1652  {
1653  l = pg_gb18030_verifychar(s, len);
1654  if (l == -1)
1655  break;
1656  }
1657  s += l;
1658  len -= l;
1659  }
1660 
1661  return s - start;
1662 }
static int pg_gb18030_verifychar(const unsigned char *s, int len)
Definition: wchar.c:1605

References IS_HIGHBIT_SET, len, pg_gb18030_verifychar(), and start.

◆ pg_gbk_dsplen()

static int pg_gbk_dsplen ( const unsigned char *  s)
static

Definition at line 940 of file wchar.c.

941 {
942  int len;
943 
944  if (IS_HIGHBIT_SET(*s))
945  len = 2; /* kanji? */
946  else
947  len = pg_ascii_dsplen(s); /* should be ASCII */
948  return len;
949 }

References IS_HIGHBIT_SET, len, and pg_ascii_dsplen().

◆ pg_gbk_mblen()

static int pg_gbk_mblen ( const unsigned char *  s)
static

Definition at line 928 of file wchar.c.

929 {
930  int len;
931 
932  if (IS_HIGHBIT_SET(*s))
933  len = 2; /* kanji? */
934  else
935  len = 1; /* should be ASCII */
936  return len;
937 }

References IS_HIGHBIT_SET, and len.

Referenced by pg_gbk_verifychar().

◆ pg_gbk_verifychar()

static int pg_gbk_verifychar ( const unsigned char *  s,
int  len 
)
static

Definition at line 1507 of file wchar.c.

1508 {
1509  int l,
1510  mbl;
1511 
1512  l = mbl = pg_gbk_mblen(s);
1513 
1514  if (len < l)
1515  return -1;
1516 
1517  while (--l > 0)
1518  {
1519  if (*++s == '\0')
1520  return -1;
1521  }
1522 
1523  return mbl;
1524 }
static int pg_gbk_mblen(const unsigned char *s)
Definition: wchar.c:928

References len, and pg_gbk_mblen().

Referenced by pg_gbk_verifystr().

◆ pg_gbk_verifystr()

static int pg_gbk_verifystr ( const unsigned char *  s,
int  len 
)
static

Definition at line 1527 of file wchar.c.

1528 {
1529  const unsigned char *start = s;
1530 
1531  while (len > 0)
1532  {
1533  int l;
1534 
1535  /* fast path for ASCII-subset characters */
1536  if (!IS_HIGHBIT_SET(*s))
1537  {
1538  if (*s == '\0')
1539  break;
1540  l = 1;
1541  }
1542  else
1543  {
1544  l = pg_gbk_verifychar(s, len);
1545  if (l == -1)
1546  break;
1547  }
1548  s += l;
1549  len -= l;
1550  }
1551 
1552  return s - start;
1553 }
static int pg_gbk_verifychar(const unsigned char *s, int len)
Definition: wchar.c:1507

References IS_HIGHBIT_SET, len, pg_gbk_verifychar(), and start.

◆ pg_johab_dsplen()

static int pg_johab_dsplen ( const unsigned char *  s)
static

Definition at line 408 of file wchar.c.

409 {
410  return pg_euc_dsplen(s);
411 }

References pg_euc_dsplen().

◆ pg_johab_mblen()

static int pg_johab_mblen ( const unsigned char *  s)
static

Definition at line 402 of file wchar.c.

403 {
404  return pg_euc_mblen(s);
405 }

References pg_euc_mblen().

Referenced by pg_johab_verifychar().

◆ pg_johab_verifychar()

static int pg_johab_verifychar ( const unsigned char *  s,
int  len 
)
static

Definition at line 1286 of file wchar.c.

1287 {
1288  int l,
1289  mbl;
1290  unsigned char c;
1291 
1292  l = mbl = pg_johab_mblen(s);
1293 
1294  if (len < l)
1295  return -1;
1296 
1297  if (!IS_HIGHBIT_SET(*s))
1298  return mbl;
1299 
1300  while (--l > 0)
1301  {
1302  c = *++s;
1303  if (!IS_EUC_RANGE_VALID(c))
1304  return -1;
1305  }
1306  return mbl;
1307 }
char * c
static int pg_johab_mblen(const unsigned char *s)
Definition: wchar.c:402

References IS_EUC_RANGE_VALID, IS_HIGHBIT_SET, len, and pg_johab_mblen().

Referenced by pg_johab_verifystr().

◆ pg_johab_verifystr()

static int pg_johab_verifystr ( const unsigned char *  s,
int  len 
)
static

Definition at line 1310 of file wchar.c.

1311 {
1312  const unsigned char *start = s;
1313 
1314  while (len > 0)
1315  {
1316  int l;
1317 
1318  /* fast path for ASCII-subset characters */
1319  if (!IS_HIGHBIT_SET(*s))
1320  {
1321  if (*s == '\0')
1322  break;
1323  l = 1;
1324  }
1325  else
1326  {
1327  l = pg_johab_verifychar(s, len);
1328  if (l == -1)
1329  break;
1330  }
1331  s += l;
1332  len -= l;
1333  }
1334 
1335  return s - start;
1336 }
static int pg_johab_verifychar(const unsigned char *s, int len)
Definition: wchar.c:1286

References IS_HIGHBIT_SET, len, pg_johab_verifychar(), and start.

◆ pg_latin12wchar_with_len()

static int pg_latin12wchar_with_len ( const unsigned char *  from,
pg_wchar to,
int  len 
)
static

Definition at line 818 of file wchar.c.

819 {
820  int cnt = 0;
821 
822  while (len > 0 && *from)
823  {
824  *to++ = *from++;
825  len--;
826  cnt++;
827  }
828  *to = 0;
829  return cnt;
830 }

References len.

◆ pg_latin1_dsplen()

static int pg_latin1_dsplen ( const unsigned char *  s)
static

Definition at line 861 of file wchar.c.

862 {
863  return pg_ascii_dsplen(s);
864 }

References pg_ascii_dsplen().

◆ pg_latin1_mblen()

static int pg_latin1_mblen ( const unsigned char *  s)
static

Definition at line 855 of file wchar.c.

856 {
857  return 1;
858 }

◆ pg_latin1_verifychar()

static int pg_latin1_verifychar ( const unsigned char *  s,
int  len 
)
static

Definition at line 1389 of file wchar.c.

1390 {
1391  return 1;
1392 }

◆ pg_latin1_verifystr()

static int pg_latin1_verifystr ( const unsigned char *  s,
int  len 
)
static

Definition at line 1395 of file wchar.c.

1396 {
1397  const unsigned char *nullpos = memchr(s, 0, len);
1398 
1399  if (nullpos == NULL)
1400  return len;
1401  else
1402  return nullpos - s;
1403 }

References len.

◆ pg_mule2wchar_with_len()

static int pg_mule2wchar_with_len ( const unsigned char *  from,
pg_wchar to,
int  len 
)
static

Definition at line 653 of file wchar.c.

654 {
655  int cnt = 0;
656 
657  while (len > 0 && *from)
658  {
659  if (IS_LC1(*from) && len >= 2)
660  {
661  *to = *from++ << 16;
662  *to |= *from++;
663  len -= 2;
664  }
665  else if (IS_LCPRV1(*from) && len >= 3)
666  {
667  from++;
668  *to = *from++ << 16;
669  *to |= *from++;
670  len -= 3;
671  }
672  else if (IS_LC2(*from) && len >= 3)
673  {
674  *to = *from++ << 16;
675  *to |= *from++ << 8;
676  *to |= *from++;
677  len -= 3;
678  }
679  else if (IS_LCPRV2(*from) && len >= 4)
680  {
681  from++;
682  *to = *from++ << 16;
683  *to |= *from++ << 8;
684  *to |= *from++;
685  len -= 4;
686  }
687  else
688  { /* assume ASCII */
689  *to = (unsigned char) *from++;
690  len--;
691  }
692  to++;
693  cnt++;
694  }
695  *to = 0;
696  return cnt;
697 }
#define IS_LCPRV2(c)
Definition: pg_wchar.h:164
#define IS_LC2(c)
Definition: pg_wchar.h:144
#define IS_LCPRV1(c)
Definition: pg_wchar.h:152
#define IS_LC1(c)
Definition: pg_wchar.h:126

References IS_LC1, IS_LC2, IS_LCPRV1, IS_LCPRV2, and len.

◆ pg_mule_dsplen()

static int pg_mule_dsplen ( const unsigned char *  s)
static

Definition at line 790 of file wchar.c.

791 {
792  int len;
793 
794  /*
795  * Note: it's not really appropriate to assume that all multibyte charsets
796  * are double-wide on screen. But this seems an okay approximation for
797  * the MULE charsets we currently support.
798  */
799 
800  if (IS_LC1(*s))
801  len = 1;
802  else if (IS_LCPRV1(*s))
803  len = 1;
804  else if (IS_LC2(*s))
805  len = 2;
806  else if (IS_LCPRV2(*s))
807  len = 2;
808  else
809  len = 1; /* assume ASCII */
810 
811  return len;
812 }

References IS_LC1, IS_LC2, IS_LCPRV1, IS_LCPRV2, and len.

◆ pg_mule_mblen()

int pg_mule_mblen ( const unsigned char *  s)

Definition at line 772 of file wchar.c.

773 {
774  int len;
775 
776  if (IS_LC1(*s))
777  len = 2;
778  else if (IS_LCPRV1(*s))
779  len = 3;
780  else if (IS_LC2(*s))
781  len = 3;
782  else if (IS_LCPRV2(*s))
783  len = 4;
784  else
785  len = 1; /* assume ASCII */
786  return len;
787 }

References IS_LC1, IS_LC2, IS_LCPRV1, IS_LCPRV2, and len.

Referenced by mic2latin(), mic2latin_with_table(), and pg_mule_verifychar().

◆ pg_mule_verifychar()

static int pg_mule_verifychar ( const unsigned char *  s,
int  len 
)
static

Definition at line 1339 of file wchar.c.

1340 {
1341  int l,
1342  mbl;
1343  unsigned char c;
1344 
1345  l = mbl = pg_mule_mblen(s);
1346 
1347  if (len < l)
1348  return -1;
1349 
1350  while (--l > 0)
1351  {
1352  c = *++s;
1353  if (!IS_HIGHBIT_SET(c))
1354  return -1;
1355  }
1356  return mbl;
1357 }
int pg_mule_mblen(const unsigned char *s)
Definition: wchar.c:772

References IS_HIGHBIT_SET, len, and pg_mule_mblen().

Referenced by pg_mule_verifystr().

◆ pg_mule_verifystr()

static int pg_mule_verifystr ( const unsigned char *  s,
int  len 
)
static

Definition at line 1360 of file wchar.c.

1361 {
1362  const unsigned char *start = s;
1363 
1364  while (len > 0)
1365  {
1366  int l;
1367 
1368  /* fast path for ASCII-subset characters */
1369  if (!IS_HIGHBIT_SET(*s))
1370  {
1371  if (*s == '\0')
1372  break;
1373  l = 1;
1374  }
1375  else
1376  {
1377  l = pg_mule_verifychar(s, len);
1378  if (l == -1)
1379  break;
1380  }
1381  s += l;
1382  len -= l;
1383  }
1384 
1385  return s - start;
1386 }
static int pg_mule_verifychar(const unsigned char *s, int len)
Definition: wchar.c:1339

References IS_HIGHBIT_SET, len, pg_mule_verifychar(), and start.

◆ pg_sjis_dsplen()

static int pg_sjis_dsplen ( const unsigned char *  s)
static

Definition at line 884 of file wchar.c.

885 {
886  int len;
887 
888  if (*s >= 0xa1 && *s <= 0xdf)
889  len = 1; /* 1 byte kana? */
890  else if (IS_HIGHBIT_SET(*s))
891  len = 2; /* kanji? */
892  else
893  len = pg_ascii_dsplen(s); /* should be ASCII */
894  return len;
895 }

References IS_HIGHBIT_SET, len, and pg_ascii_dsplen().

◆ pg_sjis_mblen()

static int pg_sjis_mblen ( const unsigned char *  s)
static

Definition at line 870 of file wchar.c.

871 {
872  int len;
873 
874  if (*s >= 0xa1 && *s <= 0xdf)
875  len = 1; /* 1 byte kana? */
876  else if (IS_HIGHBIT_SET(*s))
877  len = 2; /* kanji? */
878  else
879  len = 1; /* should be ASCII */
880  return len;
881 }

References IS_HIGHBIT_SET, and len.

Referenced by pg_sjis_verifychar().

◆ pg_sjis_verifychar()

static int pg_sjis_verifychar ( const unsigned char *  s,
int  len 
)
static

Definition at line 1406 of file wchar.c.

1407 {
1408  int l,
1409  mbl;
1410  unsigned char c1,
1411  c2;
1412 
1413  l = mbl = pg_sjis_mblen(s);
1414 
1415  if (len < l)
1416  return -1;
1417 
1418  if (l == 1) /* pg_sjis_mblen already verified it */
1419  return mbl;
1420 
1421  c1 = *s++;
1422  c2 = *s;
1423  if (!ISSJISHEAD(c1) || !ISSJISTAIL(c2))
1424  return -1;
1425  return mbl;
1426 }
#define ISSJISTAIL(c)
Definition: pg_wchar.h:45
#define ISSJISHEAD(c)
Definition: pg_wchar.h:44
static int pg_sjis_mblen(const unsigned char *s)
Definition: wchar.c:870

References ISSJISHEAD, ISSJISTAIL, len, and pg_sjis_mblen().

Referenced by pg_sjis_verifystr().

◆ pg_sjis_verifystr()

static int pg_sjis_verifystr ( const unsigned char *  s,
int  len 
)
static

Definition at line 1429 of file wchar.c.

1430 {
1431  const unsigned char *start = s;
1432 
1433  while (len > 0)
1434  {
1435  int l;
1436 
1437  /* fast path for ASCII-subset characters */
1438  if (!IS_HIGHBIT_SET(*s))
1439  {
1440  if (*s == '\0')
1441  break;
1442  l = 1;
1443  }
1444  else
1445  {
1446  l = pg_sjis_verifychar(s, len);
1447  if (l == -1)
1448  break;
1449  }
1450  s += l;
1451  len -= l;
1452  }
1453 
1454  return s - start;
1455 }
static int pg_sjis_verifychar(const unsigned char *s, int len)
Definition: wchar.c:1406

References IS_HIGHBIT_SET, len, pg_sjis_verifychar(), and start.

◆ pg_uhc_dsplen()

static int pg_uhc_dsplen ( const unsigned char *  s)
static

Definition at line 967 of file wchar.c.

968 {
969  int len;
970 
971  if (IS_HIGHBIT_SET(*s))
972  len = 2; /* 2byte? */
973  else
974  len = pg_ascii_dsplen(s); /* should be ASCII */
975  return len;
976 }

References IS_HIGHBIT_SET, len, and pg_ascii_dsplen().

◆ pg_uhc_mblen()

static int pg_uhc_mblen ( const unsigned char *  s)
static

Definition at line 955 of file wchar.c.

956 {
957  int len;
958 
959  if (IS_HIGHBIT_SET(*s))
960  len = 2; /* 2byte? */
961  else
962  len = 1; /* should be ASCII */
963  return len;
964 }

References IS_HIGHBIT_SET, and len.

Referenced by pg_uhc_verifychar().

◆ pg_uhc_verifychar()

static int pg_uhc_verifychar ( const unsigned char *  s,
int  len 
)
static

Definition at line 1556 of file wchar.c.

1557 {
1558  int l,
1559  mbl;
1560 
1561  l = mbl = pg_uhc_mblen(s);
1562 
1563  if (len < l)
1564  return -1;
1565 
1566  while (--l > 0)
1567  {
1568  if (*++s == '\0')
1569  return -1;
1570  }
1571 
1572  return mbl;
1573 }
static int pg_uhc_mblen(const unsigned char *s)
Definition: wchar.c:955

References len, and pg_uhc_mblen().

Referenced by pg_uhc_verifystr().

◆ pg_uhc_verifystr()

static int pg_uhc_verifystr ( const unsigned char *  s,
int  len 
)
static

Definition at line 1576 of file wchar.c.

1577 {
1578  const unsigned char *start = s;
1579 
1580  while (len > 0)
1581  {
1582  int l;
1583 
1584  /* fast path for ASCII-subset characters */
1585  if (!IS_HIGHBIT_SET(*s))
1586  {
1587  if (*s == '\0')
1588  break;
1589  l = 1;
1590  }
1591  else
1592  {
1593  l = pg_uhc_verifychar(s, len);
1594  if (l == -1)
1595  break;
1596  }
1597  s += l;
1598  len -= l;
1599  }
1600 
1601  return s - start;
1602 }
static int pg_uhc_verifychar(const unsigned char *s, int len)
Definition: wchar.c:1556

References IS_HIGHBIT_SET, len, pg_uhc_verifychar(), and start.

◆ pg_utf2wchar_with_len()

static int pg_utf2wchar_with_len ( const unsigned char *  from,
pg_wchar to,
int  len 
)
static

Definition at line 420 of file wchar.c.

421 {
422  int cnt = 0;
423  uint32 c1,
424  c2,
425  c3,
426  c4;
427 
428  while (len > 0 && *from)
429  {
430  if ((*from & 0x80) == 0)
431  {
432  *to = *from++;
433  len--;
434  }
435  else if ((*from & 0xe0) == 0xc0)
436  {
437  if (len < 2)
438  break; /* drop trailing incomplete char */
439  c1 = *from++ & 0x1f;
440  c2 = *from++ & 0x3f;
441  *to = (c1 << 6) | c2;
442  len -= 2;
443  }
444  else if ((*from & 0xf0) == 0xe0)
445  {
446  if (len < 3)
447  break; /* drop trailing incomplete char */
448  c1 = *from++ & 0x0f;
449  c2 = *from++ & 0x3f;
450  c3 = *from++ & 0x3f;
451  *to = (c1 << 12) | (c2 << 6) | c3;
452  len -= 3;
453  }
454  else if ((*from & 0xf8) == 0xf0)
455  {
456  if (len < 4)
457  break; /* drop trailing incomplete char */
458  c1 = *from++ & 0x07;
459  c2 = *from++ & 0x3f;
460  c3 = *from++ & 0x3f;
461  c4 = *from++ & 0x3f;
462  *to = (c1 << 18) | (c2 << 12) | (c3 << 6) | c4;
463  len -= 4;
464  }
465  else
466  {
467  /* treat a bogus char as length 1; not ours to raise error */
468  *to = *from++;
469  len--;
470  }
471  to++;
472  cnt++;
473  }
474  *to = 0;
475  return cnt;
476 }

References len.

◆ pg_utf8_islegal()

bool pg_utf8_islegal ( const unsigned char *  source,
int  length 
)

Definition at line 1953 of file wchar.c.

1954 {
1955  unsigned char a;
1956 
1957  switch (length)
1958  {
1959  default:
1960  /* reject lengths 5 and 6 for now */
1961  return false;
1962  case 4:
1963  a = source[3];
1964  if (a < 0x80 || a > 0xBF)
1965  return false;
1966  /* FALL THRU */
1967  case 3:
1968  a = source[2];
1969  if (a < 0x80 || a > 0xBF)
1970  return false;
1971  /* FALL THRU */
1972  case 2:
1973  a = source[1];
1974  switch (*source)
1975  {
1976  case 0xE0:
1977  if (a < 0xA0 || a > 0xBF)
1978  return false;
1979  break;
1980  case 0xED:
1981  if (a < 0x80 || a > 0x9F)
1982  return false;
1983  break;
1984  case 0xF0:
1985  if (a < 0x90 || a > 0xBF)
1986  return false;
1987  break;
1988  case 0xF4:
1989  if (a < 0x80 || a > 0x8F)
1990  return false;
1991  break;
1992  default:
1993  if (a < 0x80 || a > 0xBF)
1994  return false;
1995  break;
1996  }
1997  /* FALL THRU */
1998  case 1:
1999  a = *source;
2000  if (a >= 0x80 && a < 0xC2)
2001  return false;
2002  if (a > 0xF4)
2003  return false;
2004  break;
2005  }
2006  return true;
2007 }
int a
Definition: isn.c:69
static rewind_source * source
Definition: pg_rewind.c:89

References a, and source.

Referenced by chr(), pg_utf8_string_len(), pg_utf8_verifychar(), utf8_to_iso8859_1(), and UtfToLocal().

◆ pg_utf8_verifychar()

static int pg_utf8_verifychar ( const unsigned char *  s,
int  len 
)
static

Definition at line 1665 of file wchar.c.

1666 {
1667  int l;
1668 
1669  if ((*s & 0x80) == 0)
1670  {
1671  if (*s == '\0')
1672  return -1;
1673  return 1;
1674  }
1675  else if ((*s & 0xe0) == 0xc0)
1676  l = 2;
1677  else if ((*s & 0xf0) == 0xe0)
1678  l = 3;
1679  else if ((*s & 0xf8) == 0xf0)
1680  l = 4;
1681  else
1682  l = 1;
1683 
1684  if (l > len)
1685  return -1;
1686 
1687  if (!pg_utf8_islegal(s, l))
1688  return -1;
1689 
1690  return l;
1691 }
bool pg_utf8_islegal(const unsigned char *source, int length)
Definition: wchar.c:1953

References len, and pg_utf8_islegal().

Referenced by pg_utf8_verifystr().

◆ pg_utf8_verifystr()

static int pg_utf8_verifystr ( const unsigned char *  s,
int  len 
)
static

Definition at line 1855 of file wchar.c.

1856 {
1857  const unsigned char *start = s;
1858  const int orig_len = len;
1859  uint32 state = BGN;
1860 
1861 /*
1862  * With a stride of two vector widths, gcc will unroll the loop. Even if
1863  * the compiler can unroll a longer loop, it's not worth it because we
1864  * must fall back to the byte-wise algorithm if we find any non-ASCII.
1865  */
1866 #define STRIDE_LENGTH (2 * sizeof(Vector8))
1867 
1868  if (len >= STRIDE_LENGTH)
1869  {
1870  while (len >= STRIDE_LENGTH)
1871  {
1872  /*
1873  * If the chunk is all ASCII, we can skip the full UTF-8 check,
1874  * but we must first check for a non-END state, which means the
1875  * previous chunk ended in the middle of a multibyte sequence.
1876  */
1877  if (state != END || !is_valid_ascii(s, STRIDE_LENGTH))
1879 
1880  s += STRIDE_LENGTH;
1881  len -= STRIDE_LENGTH;
1882  }
1883 
1884  /* The error state persists, so we only need to check for it here. */
1885  if (state == ERR)
1886  {
1887  /*
1888  * Start over from the beginning with the slow path so we can
1889  * count the valid bytes.
1890  */
1891  len = orig_len;
1892  s = start;
1893  }
1894  else if (state != END)
1895  {
1896  /*
1897  * The fast path exited in the middle of a multibyte sequence.
1898  * Walk backwards to find the leading byte so that the slow path
1899  * can resume checking from there. We must always backtrack at
1900  * least one byte, since the current byte could be e.g. an ASCII
1901  * byte after a 2-byte lead, which is invalid.
1902  */
1903  do
1904  {
1905  Assert(s > start);
1906  s--;
1907  len++;
1908  Assert(IS_HIGHBIT_SET(*s));
1909  } while (pg_utf_mblen(s) <= 1);
1910  }
1911  }
1912 
1913  /* check remaining bytes */
1914  while (len > 0)
1915  {
1916  int l;
1917 
1918  /* fast path for ASCII-subset characters */
1919  if (!IS_HIGHBIT_SET(*s))
1920  {
1921  if (*s == '\0')
1922  break;
1923  l = 1;
1924  }
1925  else
1926  {
1927  l = pg_utf8_verifychar(s, len);
1928  if (l == -1)
1929  break;
1930  }
1931  s += l;
1932  len -= l;
1933  }
1934 
1935  return s - start;
1936 }
static bool is_valid_ascii(const unsigned char *s, int len)
Definition: ascii.h:25
Definition: regguts.h:323
#define END
Definition: wchar.c:1756
int pg_utf_mblen(const unsigned char *s)
Definition: wchar.c:517
#define ERR
Definition: wchar.c:1743
static int pg_utf8_verifychar(const unsigned char *s, int len)
Definition: wchar.c:1665
static void utf8_advance(const unsigned char *s, uint32 *state, int len)
Definition: wchar.c:1837
#define BGN
Definition: wchar.c:1745
#define STRIDE_LENGTH

References Assert, BGN, END, ERR, IS_HIGHBIT_SET, is_valid_ascii(), len, pg_utf8_verifychar(), pg_utf_mblen(), start, STRIDE_LENGTH, and utf8_advance().

◆ pg_utf_dsplen()

static int pg_utf_dsplen ( const unsigned char *  s)
static

Definition at line 641 of file wchar.c.

642 {
643  return ucs_wcwidth(utf8_to_unicode(s));
644 }
static pg_wchar utf8_to_unicode(const unsigned char *c)
Definition: mbprint.c:53
static int ucs_wcwidth(pg_wchar ucs)
Definition: wchar.c:607

References ucs_wcwidth(), and utf8_to_unicode().

◆ pg_utf_mblen()

int pg_utf_mblen ( const unsigned char *  s)

Definition at line 517 of file wchar.c.

518 {
519  int len;
520 
521  if ((*s & 0x80) == 0)
522  len = 1;
523  else if ((*s & 0xe0) == 0xc0)
524  len = 2;
525  else if ((*s & 0xf0) == 0xe0)
526  len = 3;
527  else if ((*s & 0xf8) == 0xf0)
528  len = 4;
529 #ifdef NOT_USED
530  else if ((*s & 0xfc) == 0xf8)
531  len = 5;
532  else if ((*s & 0xfe) == 0xfc)
533  len = 6;
534 #endif
535  else
536  len = 1;
537  return len;
538 }

References len.

Referenced by pg_utf8_verifystr(), and pg_wchar2utf_with_len().

◆ pg_wchar2euc_with_len()

static int pg_wchar2euc_with_len ( const pg_wchar from,
unsigned char *  to,
int  len 
)
static

Definition at line 356 of file wchar.c.

357 {
358  int cnt = 0;
359 
360  while (len > 0 && *from)
361  {
362  unsigned char c;
363 
364  if ((c = (*from >> 24)))
365  {
366  *to++ = c;
367  *to++ = (*from >> 16) & 0xff;
368  *to++ = (*from >> 8) & 0xff;
369  *to++ = *from & 0xff;
370  cnt += 4;
371  }
372  else if ((c = (*from >> 16)))
373  {
374  *to++ = c;
375  *to++ = (*from >> 8) & 0xff;
376  *to++ = *from & 0xff;
377  cnt += 3;
378  }
379  else if ((c = (*from >> 8)))
380  {
381  *to++ = c;
382  *to++ = *from & 0xff;
383  cnt += 2;
384  }
385  else
386  {
387  *to++ = *from;
388  cnt++;
389  }
390  from++;
391  len--;
392  }
393  *to = 0;
394  return cnt;
395 }

References len.

◆ pg_wchar2mule_with_len()

static int pg_wchar2mule_with_len ( const pg_wchar from,
unsigned char *  to,
int  len 
)
static

Definition at line 706 of file wchar.c.

707 {
708  int cnt = 0;
709 
710  while (len > 0 && *from)
711  {
712  unsigned char lb;
713 
714  lb = (*from >> 16) & 0xff;
715  if (IS_LC1(lb))
716  {
717  *to++ = lb;
718  *to++ = *from & 0xff;
719  cnt += 2;
720  }
721  else if (IS_LC2(lb))
722  {
723  *to++ = lb;
724  *to++ = (*from >> 8) & 0xff;
725  *to++ = *from & 0xff;
726  cnt += 3;
727  }
728  else if (IS_LCPRV1_A_RANGE(lb))
729  {
730  *to++ = LCPRV1_A;
731  *to++ = lb;
732  *to++ = *from & 0xff;
733  cnt += 3;
734  }
735  else if (IS_LCPRV1_B_RANGE(lb))
736  {
737  *to++ = LCPRV1_B;
738  *to++ = lb;
739  *to++ = *from & 0xff;
740  cnt += 3;
741  }
742  else if (IS_LCPRV2_A_RANGE(lb))
743  {
744  *to++ = LCPRV2_A;
745  *to++ = lb;
746  *to++ = (*from >> 8) & 0xff;
747  *to++ = *from & 0xff;
748  cnt += 4;
749  }
750  else if (IS_LCPRV2_B_RANGE(lb))
751  {
752  *to++ = LCPRV2_B;
753  *to++ = lb;
754  *to++ = (*from >> 8) & 0xff;
755  *to++ = *from & 0xff;
756  cnt += 4;
757  }
758  else
759  {
760  *to++ = *from & 0xff;
761  cnt += 1;
762  }
763  from++;
764  len--;
765  }
766  *to = 0;
767  return cnt;
768 }
#define LCPRV1_A
Definition: pg_wchar.h:150
#define LCPRV1_B
Definition: pg_wchar.h:151
#define LCPRV2_A
Definition: pg_wchar.h:162
#define IS_LCPRV2_B_RANGE(c)
Definition: pg_wchar.h:167
#define IS_LCPRV1_A_RANGE(c)
Definition: pg_wchar.h:153
#define IS_LCPRV1_B_RANGE(c)
Definition: pg_wchar.h:155
#define IS_LCPRV2_A_RANGE(c)
Definition: pg_wchar.h:165
#define LCPRV2_B
Definition: pg_wchar.h:163

References IS_LC1, IS_LC2, IS_LCPRV1_A_RANGE, IS_LCPRV1_B_RANGE, IS_LCPRV2_A_RANGE, IS_LCPRV2_B_RANGE, LCPRV1_A, LCPRV1_B, LCPRV2_A, LCPRV2_B, and len.

◆ pg_wchar2single_with_len()

static int pg_wchar2single_with_len ( const pg_wchar from,
unsigned char *  to,
int  len 
)
static

Definition at line 840 of file wchar.c.

841 {
842  int cnt = 0;
843 
844  while (len > 0 && *from)
845  {
846  *to++ = *from++;
847  len--;
848  cnt++;
849  }
850  *to = 0;
851  return cnt;
852 }

References len.

◆ pg_wchar2utf_with_len()

static int pg_wchar2utf_with_len ( const pg_wchar from,
unsigned char *  to,
int  len 
)
static

Definition at line 486 of file wchar.c.

487 {
488  int cnt = 0;
489 
490  while (len > 0 && *from)
491  {
492  int char_len;
493 
494  unicode_to_utf8(*from, to);
495  char_len = pg_utf_mblen(to);
496  cnt += char_len;
497  to += char_len;
498  from++;
499  len--;
500  }
501  *to = 0;
502  return cnt;
503 }
static unsigned char * unicode_to_utf8(pg_wchar c, unsigned char *utf8string)
Definition: pg_wchar.h:575

References len, pg_utf_mblen(), and unicode_to_utf8().

◆ ucs_wcwidth()

static int ucs_wcwidth ( pg_wchar  ucs)
static

Definition at line 607 of file wchar.c.

608 {
611 
612  /* test for 8-bit control characters */
613  if (ucs == 0)
614  return 0;
615 
616  if (ucs < 0x20 || (ucs >= 0x7f && ucs < 0xa0) || ucs > 0x0010ffff)
617  return -1;
618 
619  /*
620  * binary search in table of non-spacing characters
621  *
622  * XXX: In the official Unicode sources, it is possible for a character to
623  * be described as both non-spacing and wide at the same time. As of
624  * Unicode 13.0, treating the non-spacing property as the determining
625  * factor for display width leads to the correct behavior, so do that
626  * search first.
627  */
628  if (mbbisearch(ucs, nonspacing,
629  sizeof(nonspacing) / sizeof(struct mbinterval) - 1))
630  return 0;
631 
632  /* binary search in table of wide characters */
633  if (mbbisearch(ucs, east_asian_fw,
634  sizeof(east_asian_fw) / sizeof(struct mbinterval) - 1))
635  return 2;
636 
637  return 1;
638 }
static const struct mbinterval east_asian_fw[]
static const struct mbinterval nonspacing[]
static int mbbisearch(pg_wchar ucs, const struct mbinterval *table, int max)
Definition: wchar.c:560

References east_asian_fw, mbbisearch(), and nonspacing.

Referenced by pg_utf_dsplen().

◆ utf8_advance()

static void utf8_advance ( const unsigned char *  s,
uint32 state,
int  len 
)
static

Definition at line 1837 of file wchar.c.

1838 {
1839  /* Note: We deliberately don't check the state's value here. */
1840  while (len > 0)
1841  {
1842  /*
1843  * It's important that the mask value is 31: In most instruction sets,
1844  * a shift by a 32-bit operand is understood to be a shift by its mod
1845  * 32, so the compiler should elide the mask operation.
1846  */
1847  *state = Utf8Transition[*s++] >> (*state & 31);
1848  len--;
1849  }
1850 
1851  *state &= 31;
1852 }
static const uint32 Utf8Transition[256]
Definition: wchar.c:1779

References len, and Utf8Transition.

Referenced by pg_utf8_verifystr().

Variable Documentation

◆ pg_wchar_table

◆ Utf8Transition

const uint32 Utf8Transition[256]
static

Definition at line 1779 of file wchar.c.

Referenced by utf8_advance().