PostgreSQL Source Code  git master
wchar.c File Reference
Include dependency graph for wchar.c:

Go to the source code of this file.

Data Structures

struct  mbinterval
 

Macros

#define IS_EUC_RANGE_VALID(c)   ((c) >= 0xa1 && (c) <= 0xfe)
 
#define pg_euccn_verifychar   pg_euckr_verifychar
 
#define pg_euccn_verifystr   pg_euckr_verifystr
 
#define ERR   0
 
#define BGN   11
 
#define CS1   16
 
#define CS2   1
 
#define CS3   5
 
#define P3A   6 /* Lead was E0, check for 3-byte overlong */
 
#define P3B   20 /* Lead was ED, check for surrogate */
 
#define P4A   25 /* Lead was F0, check for 4-byte overlong */
 
#define P4B   30 /* Lead was F4, check for too-large */
 
#define END   BGN
 
#define ASC   (END << BGN)
 
#define L2A   (CS1 << BGN)
 
#define L3A   (P3A << BGN)
 
#define L3B   (CS2 << BGN)
 
#define L3C   (P3B << BGN)
 
#define L4A   (P4A << BGN)
 
#define L4B   (CS3 << BGN)
 
#define L4C   (P4B << BGN)
 
#define CR1   (END << CS1) | (CS1 << CS2) | (CS2 << CS3) | (CS1 << P3B) | (CS2 << P4B)
 
#define CR2   (END << CS1) | (CS1 << CS2) | (CS2 << CS3) | (CS1 << P3B) | (CS2 << P4A)
 
#define CR3   (END << CS1) | (CS1 << CS2) | (CS2 << CS3) | (CS1 << P3A) | (CS2 << P4A)
 
#define ILL   ERR
 
#define STRIDE_LENGTH   (2 * sizeof(Vector8))
 

Functions

static int pg_ascii2wchar_with_len (const unsigned char *from, pg_wchar *to, int len)
 
static int pg_ascii_mblen (const unsigned char *s)
 
static int pg_ascii_dsplen (const unsigned char *s)
 
static int pg_euc2wchar_with_len (const unsigned char *from, pg_wchar *to, int len)
 
static int pg_euc_mblen (const unsigned char *s)
 
static int pg_euc_dsplen (const unsigned char *s)
 
static int pg_eucjp2wchar_with_len (const unsigned char *from, pg_wchar *to, int len)
 
static int pg_eucjp_mblen (const unsigned char *s)
 
static int pg_eucjp_dsplen (const unsigned char *s)
 
static int pg_euckr2wchar_with_len (const unsigned char *from, pg_wchar *to, int len)
 
static int pg_euckr_mblen (const unsigned char *s)
 
static int pg_euckr_dsplen (const unsigned char *s)
 
static int pg_euccn2wchar_with_len (const unsigned char *from, pg_wchar *to, int len)
 
static int pg_euccn_mblen (const unsigned char *s)
 
static int pg_euccn_dsplen (const unsigned char *s)
 
static int pg_euctw2wchar_with_len (const unsigned char *from, pg_wchar *to, int len)
 
static int pg_euctw_mblen (const unsigned char *s)
 
static int pg_euctw_dsplen (const unsigned char *s)
 
static int pg_wchar2euc_with_len (const pg_wchar *from, unsigned char *to, int len)
 
static int pg_johab_mblen (const unsigned char *s)
 
static int pg_johab_dsplen (const unsigned char *s)
 
static int pg_utf2wchar_with_len (const unsigned char *from, pg_wchar *to, int len)
 
unsigned char * unicode_to_utf8 (pg_wchar c, unsigned char *utf8string)
 
static int pg_wchar2utf_with_len (const pg_wchar *from, unsigned char *to, int len)
 
int pg_utf_mblen (const unsigned char *s)
 
static int mbbisearch (pg_wchar ucs, const struct mbinterval *table, int max)
 
static int ucs_wcwidth (pg_wchar ucs)
 
pg_wchar utf8_to_unicode (const unsigned char *c)
 
static int pg_utf_dsplen (const unsigned char *s)
 
static int pg_mule2wchar_with_len (const unsigned char *from, pg_wchar *to, int len)
 
static int pg_wchar2mule_with_len (const pg_wchar *from, unsigned char *to, int len)
 
int pg_mule_mblen (const unsigned char *s)
 
static int pg_mule_dsplen (const unsigned char *s)
 
static int pg_latin12wchar_with_len (const unsigned char *from, pg_wchar *to, int len)
 
static int pg_wchar2single_with_len (const pg_wchar *from, unsigned char *to, int len)
 
static int pg_latin1_mblen (const unsigned char *s)
 
static int pg_latin1_dsplen (const unsigned char *s)
 
static int pg_sjis_mblen (const unsigned char *s)
 
static int pg_sjis_dsplen (const unsigned char *s)
 
static int pg_big5_mblen (const unsigned char *s)
 
static int pg_big5_dsplen (const unsigned char *s)
 
static int pg_gbk_mblen (const unsigned char *s)
 
static int pg_gbk_dsplen (const unsigned char *s)
 
static int pg_uhc_mblen (const unsigned char *s)
 
static int pg_uhc_dsplen (const unsigned char *s)
 
static int pg_gb18030_mblen (const unsigned char *s)
 
static int pg_gb18030_dsplen (const unsigned char *s)
 
static int pg_ascii_verifychar (const unsigned char *s, int len)
 
static int pg_ascii_verifystr (const unsigned char *s, int len)
 
static int pg_eucjp_verifychar (const unsigned char *s, int len)
 
static int pg_eucjp_verifystr (const unsigned char *s, int len)
 
static int pg_euckr_verifychar (const unsigned char *s, int len)
 
static int pg_euckr_verifystr (const unsigned char *s, int len)
 
static int pg_euctw_verifychar (const unsigned char *s, int len)
 
static int pg_euctw_verifystr (const unsigned char *s, int len)
 
static int pg_johab_verifychar (const unsigned char *s, int len)
 
static int pg_johab_verifystr (const unsigned char *s, int len)
 
static int pg_mule_verifychar (const unsigned char *s, int len)
 
static int pg_mule_verifystr (const unsigned char *s, int len)
 
static int pg_latin1_verifychar (const unsigned char *s, int len)
 
static int pg_latin1_verifystr (const unsigned char *s, int len)
 
static int pg_sjis_verifychar (const unsigned char *s, int len)
 
static int pg_sjis_verifystr (const unsigned char *s, int len)
 
static int pg_big5_verifychar (const unsigned char *s, int len)
 
static int pg_big5_verifystr (const unsigned char *s, int len)
 
static int pg_gbk_verifychar (const unsigned char *s, int len)
 
static int pg_gbk_verifystr (const unsigned char *s, int len)
 
static int pg_uhc_verifychar (const unsigned char *s, int len)
 
static int pg_uhc_verifystr (const unsigned char *s, int len)
 
static int pg_gb18030_verifychar (const unsigned char *s, int len)
 
static int pg_gb18030_verifystr (const unsigned char *s, int len)
 
static int pg_utf8_verifychar (const unsigned char *s, int len)
 
static void utf8_advance (const unsigned char *s, uint32 *state, int len)
 
static int pg_utf8_verifystr (const unsigned char *s, int len)
 
bool pg_utf8_islegal (const unsigned char *source, int length)
 
int pg_encoding_mblen (int encoding, const char *mbstr)
 
int pg_encoding_mblen_bounded (int encoding, const char *mbstr)
 
int pg_encoding_dsplen (int encoding, const char *mbstr)
 
int pg_encoding_verifymbchar (int encoding, const char *mbstr, int len)
 
int pg_encoding_verifymbstr (int encoding, const char *mbstr, int len)
 
int pg_encoding_max_length (int encoding)
 

Variables

static const uint32 Utf8Transition [256]
 
const pg_wchar_tbl pg_wchar_table []
 

Macro Definition Documentation

◆ ASC

#define ASC   (END << BGN)

Definition at line 1822 of file wchar.c.

◆ BGN

#define BGN   11

Definition at line 1806 of file wchar.c.

◆ CR1

#define CR1   (END << CS1) | (CS1 << CS2) | (CS2 << CS3) | (CS1 << P3B) | (CS2 << P4B)

Definition at line 1834 of file wchar.c.

◆ CR2

#define CR2   (END << CS1) | (CS1 << CS2) | (CS2 << CS3) | (CS1 << P3B) | (CS2 << P4A)

Definition at line 1835 of file wchar.c.

◆ CR3

#define CR3   (END << CS1) | (CS1 << CS2) | (CS2 << CS3) | (CS1 << P3A) | (CS2 << P4A)

Definition at line 1836 of file wchar.c.

◆ CS1

#define CS1   16

Definition at line 1808 of file wchar.c.

◆ CS2

#define CS2   1

Definition at line 1809 of file wchar.c.

◆ CS3

#define CS3   5

Definition at line 1810 of file wchar.c.

◆ END

#define END   BGN

Definition at line 1817 of file wchar.c.

◆ ERR

#define ERR   0

Definition at line 1804 of file wchar.c.

◆ ILL

#define ILL   ERR

Definition at line 1838 of file wchar.c.

◆ IS_EUC_RANGE_VALID

#define IS_EUC_RANGE_VALID (   c)    ((c) >= 0xa1 && (c) <= 0xfe)

Definition at line 1119 of file wchar.c.

◆ L2A

#define L2A   (CS1 << BGN)

Definition at line 1824 of file wchar.c.

◆ L3A

#define L3A   (P3A << BGN)

Definition at line 1826 of file wchar.c.

◆ L3B

#define L3B   (CS2 << BGN)

Definition at line 1827 of file wchar.c.

◆ L3C

#define L3C   (P3B << BGN)

Definition at line 1828 of file wchar.c.

◆ L4A

#define L4A   (P4A << BGN)

Definition at line 1830 of file wchar.c.

◆ L4B

#define L4B   (CS3 << BGN)

Definition at line 1831 of file wchar.c.

◆ L4C

#define L4C   (P4B << BGN)

Definition at line 1832 of file wchar.c.

◆ P3A

#define P3A   6 /* Lead was E0, check for 3-byte overlong */

Definition at line 1812 of file wchar.c.

◆ P3B

#define P3B   20 /* Lead was ED, check for surrogate */

Definition at line 1813 of file wchar.c.

◆ P4A

#define P4A   25 /* Lead was F0, check for 4-byte overlong */

Definition at line 1814 of file wchar.c.

◆ P4B

#define P4B   30 /* Lead was F4, check for too-large */

Definition at line 1815 of file wchar.c.

◆ pg_euccn_verifychar

#define pg_euccn_verifychar   pg_euckr_verifychar

Definition at line 1264 of file wchar.c.

◆ pg_euccn_verifystr

#define pg_euccn_verifystr   pg_euckr_verifystr

Definition at line 1265 of file wchar.c.

◆ STRIDE_LENGTH

#define STRIDE_LENGTH   (2 * sizeof(Vector8))

Function Documentation

◆ mbbisearch()

static int mbbisearch ( pg_wchar  ucs,
const struct mbinterval table,
int  max 
)
static

Definition at line 593 of file wchar.c.

594 {
595  int min = 0;
596  int mid;
597 
598  if (ucs < table[0].first || ucs > table[max].last)
599  return 0;
600  while (max >= min)
601  {
602  mid = (min + max) / 2;
603  if (ucs > table[mid].last)
604  min = mid + 1;
605  else if (ucs < table[mid].first)
606  max = mid - 1;
607  else
608  return 1;
609  }
610 
611  return 0;
612 }

Referenced by ucs_wcwidth().

◆ pg_ascii2wchar_with_len()

static int pg_ascii2wchar_with_len ( const unsigned char *  from,
pg_wchar to,
int  len 
)
static

Definition at line 49 of file wchar.c.

50 {
51  int cnt = 0;
52 
53  while (len > 0 && *from)
54  {
55  *to++ = *from++;
56  len--;
57  cnt++;
58  }
59  *to = 0;
60  return cnt;
61 }
const void size_t len

References len.

◆ pg_ascii_dsplen()

static int pg_ascii_dsplen ( const unsigned char *  s)
static

Definition at line 70 of file wchar.c.

71 {
72  if (*s == '\0')
73  return 0;
74  if (*s < 0x20 || *s == 0x7f)
75  return -1;
76 
77  return 1;
78 }

Referenced by pg_big5_dsplen(), pg_euc_dsplen(), pg_euccn_dsplen(), pg_eucjp_dsplen(), pg_euctw_dsplen(), pg_gb18030_dsplen(), pg_gbk_dsplen(), pg_latin1_dsplen(), pg_sjis_dsplen(), and pg_uhc_dsplen().

◆ pg_ascii_mblen()

static int pg_ascii_mblen ( const unsigned char *  s)
static

Definition at line 64 of file wchar.c.

65 {
66  return 1;
67 }

◆ pg_ascii_verifychar()

static int pg_ascii_verifychar ( const unsigned char *  s,
int  len 
)
static

Definition at line 1103 of file wchar.c.

1104 {
1105  return 1;
1106 }

◆ pg_ascii_verifystr()

static int pg_ascii_verifystr ( const unsigned char *  s,
int  len 
)
static

Definition at line 1109 of file wchar.c.

1110 {
1111  const unsigned char *nullpos = memchr(s, 0, len);
1112 
1113  if (nullpos == NULL)
1114  return len;
1115  else
1116  return nullpos - s;
1117 }

References len.

◆ pg_big5_dsplen()

static int pg_big5_dsplen ( const unsigned char *  s)
static

Definition at line 974 of file wchar.c.

975 {
976  int len;
977 
978  if (IS_HIGHBIT_SET(*s))
979  len = 2; /* kanji? */
980  else
981  len = pg_ascii_dsplen(s); /* should be ASCII */
982  return len;
983 }
#define IS_HIGHBIT_SET(ch)
Definition: c.h:1142
static int pg_ascii_dsplen(const unsigned char *s)
Definition: wchar.c:70

References IS_HIGHBIT_SET, len, and pg_ascii_dsplen().

◆ pg_big5_mblen()

static int pg_big5_mblen ( const unsigned char *  s)
static

Definition at line 962 of file wchar.c.

963 {
964  int len;
965 
966  if (IS_HIGHBIT_SET(*s))
967  len = 2; /* kanji? */
968  else
969  len = 1; /* should be ASCII */
970  return len;
971 }

References IS_HIGHBIT_SET, and len.

Referenced by pg_big5_verifychar().

◆ pg_big5_verifychar()

static int pg_big5_verifychar ( const unsigned char *  s,
int  len 
)
static

Definition at line 1519 of file wchar.c.

1520 {
1521  int l,
1522  mbl;
1523 
1524  l = mbl = pg_big5_mblen(s);
1525 
1526  if (len < l)
1527  return -1;
1528 
1529  while (--l > 0)
1530  {
1531  if (*++s == '\0')
1532  return -1;
1533  }
1534 
1535  return mbl;
1536 }
static int pg_big5_mblen(const unsigned char *s)
Definition: wchar.c:962

References len, and pg_big5_mblen().

Referenced by pg_big5_verifystr().

◆ pg_big5_verifystr()

static int pg_big5_verifystr ( const unsigned char *  s,
int  len 
)
static

Definition at line 1539 of file wchar.c.

1540 {
1541  const unsigned char *start = s;
1542 
1543  while (len > 0)
1544  {
1545  int l;
1546 
1547  /* fast path for ASCII-subset characters */
1548  if (!IS_HIGHBIT_SET(*s))
1549  {
1550  if (*s == '\0')
1551  break;
1552  l = 1;
1553  }
1554  else
1555  {
1556  l = pg_big5_verifychar(s, len);
1557  if (l == -1)
1558  break;
1559  }
1560  s += l;
1561  len -= l;
1562  }
1563 
1564  return s - start;
1565 }
static int pg_big5_verifychar(const unsigned char *s, int len)
Definition: wchar.c:1519

References IS_HIGHBIT_SET, len, and pg_big5_verifychar().

◆ pg_encoding_dsplen()

int pg_encoding_dsplen ( int  encoding,
const char *  mbstr 
)

Definition at line 2151 of file wchar.c.

2152 {
2153  return (PG_VALID_ENCODING(encoding) ?
2154  pg_wchar_table[encoding].dsplen((const unsigned char *) mbstr) :
2155  pg_wchar_table[PG_SQL_ASCII].dsplen((const unsigned char *) mbstr));
2156 }
int32 encoding
Definition: pg_database.h:41
@ PG_SQL_ASCII
Definition: pg_wchar.h:226
#define PG_VALID_ENCODING(_enc)
Definition: pg_wchar.h:287
const pg_wchar_tbl pg_wchar_table[]
Definition: wchar.c:2076

References encoding, PG_SQL_ASCII, PG_VALID_ENCODING, and pg_wchar_table.

Referenced by PQdsplen(), and reportErrorPosition().

◆ pg_encoding_max_length()

int pg_encoding_max_length ( int  encoding)

Definition at line 2188 of file wchar.c.

2189 {
2191 
2193 }
Assert(fmt[strlen(fmt) - 1] !='\n')
int maxmblen
Definition: pg_wchar.h:386

References Assert(), encoding, pg_wchar_tbl::maxmblen, PG_VALID_ENCODING, and pg_wchar_table.

Referenced by ascii(), chr(), CopyConvertBuf(), pg_encoding_mbcliplen(), pg_verify_mbstr_len(), reportErrorPosition(), and type_maximum_size().

◆ pg_encoding_mblen()

int pg_encoding_mblen ( int  encoding,
const char *  mbstr 
)

Definition at line 2130 of file wchar.c.

2131 {
2132  return (PG_VALID_ENCODING(encoding) ?
2133  pg_wchar_table[encoding].mblen((const unsigned char *) mbstr) :
2134  pg_wchar_table[PG_SQL_ASCII].mblen((const unsigned char *) mbstr));
2135 }

References encoding, PG_SQL_ASCII, PG_VALID_ENCODING, and pg_wchar_table.

Referenced by CopyAttributeOutCSV(), CopyAttributeOutText(), pg_encoding_mblen_bounded(), PQescapeInternal(), PQescapeStringInternal(), PQmblen(), PQmblenBounded(), report_invalid_encoding(), and report_untranslatable_char().

◆ pg_encoding_mblen_bounded()

int pg_encoding_mblen_bounded ( int  encoding,
const char *  mbstr 
)

Definition at line 2142 of file wchar.c.

2143 {
2144  return strnlen(mbstr, pg_encoding_mblen(encoding, mbstr));
2145 }
size_t strnlen(const char *str, size_t maxlen)
Definition: strnlen.c:26
int pg_encoding_mblen(int encoding, const char *mbstr)
Definition: wchar.c:2130

References encoding, pg_encoding_mblen(), and strnlen().

◆ pg_encoding_verifymbchar()

int pg_encoding_verifymbchar ( int  encoding,
const char *  mbstr,
int  len 
)

Definition at line 2164 of file wchar.c.

2165 {
2166  return (PG_VALID_ENCODING(encoding) ?
2167  pg_wchar_table[encoding].mbverifychar((const unsigned char *) mbstr, len) :
2168  pg_wchar_table[PG_SQL_ASCII].mbverifychar((const unsigned char *) mbstr, len));
2169 }

References encoding, len, PG_SQL_ASCII, PG_VALID_ENCODING, and pg_wchar_table.

Referenced by big52euc_tw(), big52mic(), euc_jis_20042shift_jis_2004(), euc_jp2mic(), euc_jp2sjis(), euc_kr2mic(), euc_tw2big5(), euc_tw2mic(), LocalToUtf(), mic2big5(), mic2euc_jp(), mic2euc_kr(), mic2euc_tw(), mic2sjis(), shift_jis_20042euc_jis_2004(), and sjis2euc_jp().

◆ pg_encoding_verifymbstr()

int pg_encoding_verifymbstr ( int  encoding,
const char *  mbstr,
int  len 
)

Definition at line 2177 of file wchar.c.

2178 {
2179  return (PG_VALID_ENCODING(encoding) ?
2180  pg_wchar_table[encoding].mbverifystr((const unsigned char *) mbstr, len) :
2181  pg_wchar_table[PG_SQL_ASCII].mbverifystr((const unsigned char *) mbstr, len));
2182 }

References encoding, len, PG_SQL_ASCII, PG_VALID_ENCODING, and pg_wchar_table.

Referenced by add_file_to_manifest(), CopyConvertBuf(), and test_enc_conversion().

◆ pg_euc2wchar_with_len()

static int pg_euc2wchar_with_len ( const unsigned char *  from,
pg_wchar to,
int  len 
)
static

Definition at line 84 of file wchar.c.

85 {
86  int cnt = 0;
87 
88  while (len > 0 && *from)
89  {
90  if (*from == SS2 && len >= 2) /* JIS X 0201 (so called "1 byte
91  * KANA") */
92  {
93  from++;
94  *to = (SS2 << 8) | *from++;
95  len -= 2;
96  }
97  else if (*from == SS3 && len >= 3) /* JIS X 0212 KANJI */
98  {
99  from++;
100  *to = (SS3 << 16) | (*from++ << 8);
101  *to |= *from++;
102  len -= 3;
103  }
104  else if (IS_HIGHBIT_SET(*from) && len >= 2) /* JIS X 0208 KANJI */
105  {
106  *to = *from++ << 8;
107  *to |= *from++;
108  len -= 2;
109  }
110  else /* must be ASCII */
111  {
112  *to = *from++;
113  len--;
114  }
115  to++;
116  cnt++;
117  }
118  *to = 0;
119  return cnt;
120 }
#define SS2
Definition: pg_wchar.h:38
#define SS3
Definition: pg_wchar.h:39

References IS_HIGHBIT_SET, len, SS2, and SS3.

Referenced by pg_eucjp2wchar_with_len(), and pg_euckr2wchar_with_len().

◆ pg_euc_dsplen()

static int pg_euc_dsplen ( const unsigned char *  s)
inlinestatic

Definition at line 139 of file wchar.c.

140 {
141  int len;
142 
143  if (*s == SS2)
144  len = 2;
145  else if (*s == SS3)
146  len = 2;
147  else if (IS_HIGHBIT_SET(*s))
148  len = 2;
149  else
150  len = pg_ascii_dsplen(s);
151  return len;
152 }

References IS_HIGHBIT_SET, len, pg_ascii_dsplen(), SS2, and SS3.

Referenced by pg_euckr_dsplen(), and pg_johab_dsplen().

◆ pg_euc_mblen()

static int pg_euc_mblen ( const unsigned char *  s)
inlinestatic

Definition at line 123 of file wchar.c.

124 {
125  int len;
126 
127  if (*s == SS2)
128  len = 2;
129  else if (*s == SS3)
130  len = 3;
131  else if (IS_HIGHBIT_SET(*s))
132  len = 2;
133  else
134  len = 1;
135  return len;
136 }

References IS_HIGHBIT_SET, len, SS2, and SS3.

Referenced by pg_eucjp_mblen(), pg_euckr_mblen(), and pg_johab_mblen().

◆ pg_euccn2wchar_with_len()

static int pg_euccn2wchar_with_len ( const unsigned char *  from,
pg_wchar to,
int  len 
)
static

Definition at line 211 of file wchar.c.

212 {
213  int cnt = 0;
214 
215  while (len > 0 && *from)
216  {
217  if (*from == SS2 && len >= 3) /* code set 2 (unused?) */
218  {
219  from++;
220  *to = (SS2 << 16) | (*from++ << 8);
221  *to |= *from++;
222  len -= 3;
223  }
224  else if (*from == SS3 && len >= 3) /* code set 3 (unused ?) */
225  {
226  from++;
227  *to = (SS3 << 16) | (*from++ << 8);
228  *to |= *from++;
229  len -= 3;
230  }
231  else if (IS_HIGHBIT_SET(*from) && len >= 2) /* code set 1 */
232  {
233  *to = *from++ << 8;
234  *to |= *from++;
235  len -= 2;
236  }
237  else
238  {
239  *to = *from++;
240  len--;
241  }
242  to++;
243  cnt++;
244  }
245  *to = 0;
246  return cnt;
247 }

References IS_HIGHBIT_SET, len, SS2, and SS3.

◆ pg_euccn_dsplen()

static int pg_euccn_dsplen ( const unsigned char *  s)
static

Definition at line 262 of file wchar.c.

263 {
264  int len;
265 
266  if (IS_HIGHBIT_SET(*s))
267  len = 2;
268  else
269  len = pg_ascii_dsplen(s);
270  return len;
271 }

References IS_HIGHBIT_SET, len, and pg_ascii_dsplen().

◆ pg_euccn_mblen()

static int pg_euccn_mblen ( const unsigned char *  s)
static

Definition at line 250 of file wchar.c.

251 {
252  int len;
253 
254  if (IS_HIGHBIT_SET(*s))
255  len = 2;
256  else
257  len = 1;
258  return len;
259 }

References IS_HIGHBIT_SET, and len.

◆ pg_eucjp2wchar_with_len()

static int pg_eucjp2wchar_with_len ( const unsigned char *  from,
pg_wchar to,
int  len 
)
static

Definition at line 158 of file wchar.c.

159 {
160  return pg_euc2wchar_with_len(from, to, len);
161 }
static int pg_euc2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
Definition: wchar.c:84

References len, and pg_euc2wchar_with_len().

◆ pg_eucjp_dsplen()

static int pg_eucjp_dsplen ( const unsigned char *  s)
static

Definition at line 170 of file wchar.c.

171 {
172  int len;
173 
174  if (*s == SS2)
175  len = 1;
176  else if (*s == SS3)
177  len = 2;
178  else if (IS_HIGHBIT_SET(*s))
179  len = 2;
180  else
181  len = pg_ascii_dsplen(s);
182  return len;
183 }

References IS_HIGHBIT_SET, len, pg_ascii_dsplen(), SS2, and SS3.

◆ pg_eucjp_mblen()

static int pg_eucjp_mblen ( const unsigned char *  s)
static

Definition at line 164 of file wchar.c.

165 {
166  return pg_euc_mblen(s);
167 }
static int pg_euc_mblen(const unsigned char *s)
Definition: wchar.c:123

References pg_euc_mblen().

◆ pg_eucjp_verifychar()

static int pg_eucjp_verifychar ( const unsigned char *  s,
int  len 
)
static

Definition at line 1122 of file wchar.c.

1123 {
1124  int l;
1125  unsigned char c1,
1126  c2;
1127 
1128  c1 = *s++;
1129 
1130  switch (c1)
1131  {
1132  case SS2: /* JIS X 0201 */
1133  l = 2;
1134  if (l > len)
1135  return -1;
1136  c2 = *s++;
1137  if (c2 < 0xa1 || c2 > 0xdf)
1138  return -1;
1139  break;
1140 
1141  case SS3: /* JIS X 0212 */
1142  l = 3;
1143  if (l > len)
1144  return -1;
1145  c2 = *s++;
1146  if (!IS_EUC_RANGE_VALID(c2))
1147  return -1;
1148  c2 = *s++;
1149  if (!IS_EUC_RANGE_VALID(c2))
1150  return -1;
1151  break;
1152 
1153  default:
1154  if (IS_HIGHBIT_SET(c1)) /* JIS X 0208? */
1155  {
1156  l = 2;
1157  if (l > len)
1158  return -1;
1159  if (!IS_EUC_RANGE_VALID(c1))
1160  return -1;
1161  c2 = *s++;
1162  if (!IS_EUC_RANGE_VALID(c2))
1163  return -1;
1164  }
1165  else
1166  /* must be ASCII */
1167  {
1168  l = 1;
1169  }
1170  break;
1171  }
1172 
1173  return l;
1174 }
#define IS_EUC_RANGE_VALID(c)
Definition: wchar.c:1119

References IS_EUC_RANGE_VALID, IS_HIGHBIT_SET, len, SS2, and SS3.

Referenced by pg_eucjp_verifystr().

◆ pg_eucjp_verifystr()

static int pg_eucjp_verifystr ( const unsigned char *  s,
int  len 
)
static

Definition at line 1177 of file wchar.c.

1178 {
1179  const unsigned char *start = s;
1180 
1181  while (len > 0)
1182  {
1183  int l;
1184 
1185  /* fast path for ASCII-subset characters */
1186  if (!IS_HIGHBIT_SET(*s))
1187  {
1188  if (*s == '\0')
1189  break;
1190  l = 1;
1191  }
1192  else
1193  {
1194  l = pg_eucjp_verifychar(s, len);
1195  if (l == -1)
1196  break;
1197  }
1198  s += l;
1199  len -= l;
1200  }
1201 
1202  return s - start;
1203 }
static int pg_eucjp_verifychar(const unsigned char *s, int len)
Definition: wchar.c:1122

References IS_HIGHBIT_SET, len, and pg_eucjp_verifychar().

◆ pg_euckr2wchar_with_len()

static int pg_euckr2wchar_with_len ( const unsigned char *  from,
pg_wchar to,
int  len 
)
static

Definition at line 189 of file wchar.c.

190 {
191  return pg_euc2wchar_with_len(from, to, len);
192 }

References len, and pg_euc2wchar_with_len().

◆ pg_euckr_dsplen()

static int pg_euckr_dsplen ( const unsigned char *  s)
static

Definition at line 201 of file wchar.c.

202 {
203  return pg_euc_dsplen(s);
204 }
static int pg_euc_dsplen(const unsigned char *s)
Definition: wchar.c:139

References pg_euc_dsplen().

◆ pg_euckr_mblen()

static int pg_euckr_mblen ( const unsigned char *  s)
static

Definition at line 195 of file wchar.c.

196 {
197  return pg_euc_mblen(s);
198 }

References pg_euc_mblen().

◆ pg_euckr_verifychar()

static int pg_euckr_verifychar ( const unsigned char *  s,
int  len 
)
static

Definition at line 1206 of file wchar.c.

1207 {
1208  int l;
1209  unsigned char c1,
1210  c2;
1211 
1212  c1 = *s++;
1213 
1214  if (IS_HIGHBIT_SET(c1))
1215  {
1216  l = 2;
1217  if (l > len)
1218  return -1;
1219  if (!IS_EUC_RANGE_VALID(c1))
1220  return -1;
1221  c2 = *s++;
1222  if (!IS_EUC_RANGE_VALID(c2))
1223  return -1;
1224  }
1225  else
1226  /* must be ASCII */
1227  {
1228  l = 1;
1229  }
1230 
1231  return l;
1232 }

References IS_EUC_RANGE_VALID, IS_HIGHBIT_SET, and len.

Referenced by pg_euckr_verifystr().

◆ pg_euckr_verifystr()

static int pg_euckr_verifystr ( const unsigned char *  s,
int  len 
)
static

Definition at line 1235 of file wchar.c.

1236 {
1237  const unsigned char *start = s;
1238 
1239  while (len > 0)
1240  {
1241  int l;
1242 
1243  /* fast path for ASCII-subset characters */
1244  if (!IS_HIGHBIT_SET(*s))
1245  {
1246  if (*s == '\0')
1247  break;
1248  l = 1;
1249  }
1250  else
1251  {
1252  l = pg_euckr_verifychar(s, len);
1253  if (l == -1)
1254  break;
1255  }
1256  s += l;
1257  len -= l;
1258  }
1259 
1260  return s - start;
1261 }
static int pg_euckr_verifychar(const unsigned char *s, int len)
Definition: wchar.c:1206

References IS_HIGHBIT_SET, len, and pg_euckr_verifychar().

◆ pg_euctw2wchar_with_len()

static int pg_euctw2wchar_with_len ( const unsigned char *  from,
pg_wchar to,
int  len 
)
static

Definition at line 278 of file wchar.c.

279 {
280  int cnt = 0;
281 
282  while (len > 0 && *from)
283  {
284  if (*from == SS2 && len >= 4) /* code set 2 */
285  {
286  from++;
287  *to = (((uint32) SS2) << 24) | (*from++ << 16);
288  *to |= *from++ << 8;
289  *to |= *from++;
290  len -= 4;
291  }
292  else if (*from == SS3 && len >= 3) /* code set 3 (unused?) */
293  {
294  from++;
295  *to = (SS3 << 16) | (*from++ << 8);
296  *to |= *from++;
297  len -= 3;
298  }
299  else if (IS_HIGHBIT_SET(*from) && len >= 2) /* code set 2 */
300  {
301  *to = *from++ << 8;
302  *to |= *from++;
303  len -= 2;
304  }
305  else
306  {
307  *to = *from++;
308  len--;
309  }
310  to++;
311  cnt++;
312  }
313  *to = 0;
314  return cnt;
315 }
unsigned int uint32
Definition: c.h:493

References IS_HIGHBIT_SET, len, SS2, and SS3.

◆ pg_euctw_dsplen()

static int pg_euctw_dsplen ( const unsigned char *  s)
static

Definition at line 334 of file wchar.c.

335 {
336  int len;
337 
338  if (*s == SS2)
339  len = 2;
340  else if (*s == SS3)
341  len = 2;
342  else if (IS_HIGHBIT_SET(*s))
343  len = 2;
344  else
345  len = pg_ascii_dsplen(s);
346  return len;
347 }

References IS_HIGHBIT_SET, len, pg_ascii_dsplen(), SS2, and SS3.

◆ pg_euctw_mblen()

static int pg_euctw_mblen ( const unsigned char *  s)
static

Definition at line 318 of file wchar.c.

319 {
320  int len;
321 
322  if (*s == SS2)
323  len = 4;
324  else if (*s == SS3)
325  len = 3;
326  else if (IS_HIGHBIT_SET(*s))
327  len = 2;
328  else
329  len = 1;
330  return len;
331 }

References IS_HIGHBIT_SET, len, SS2, and SS3.

◆ pg_euctw_verifychar()

static int pg_euctw_verifychar ( const unsigned char *  s,
int  len 
)
static

Definition at line 1268 of file wchar.c.

1269 {
1270  int l;
1271  unsigned char c1,
1272  c2;
1273 
1274  c1 = *s++;
1275 
1276  switch (c1)
1277  {
1278  case SS2: /* CNS 11643 Plane 1-7 */
1279  l = 4;
1280  if (l > len)
1281  return -1;
1282  c2 = *s++;
1283  if (c2 < 0xa1 || c2 > 0xa7)
1284  return -1;
1285  c2 = *s++;
1286  if (!IS_EUC_RANGE_VALID(c2))
1287  return -1;
1288  c2 = *s++;
1289  if (!IS_EUC_RANGE_VALID(c2))
1290  return -1;
1291  break;
1292 
1293  case SS3: /* unused */
1294  return -1;
1295 
1296  default:
1297  if (IS_HIGHBIT_SET(c1)) /* CNS 11643 Plane 1 */
1298  {
1299  l = 2;
1300  if (l > len)
1301  return -1;
1302  /* no further range check on c1? */
1303  c2 = *s++;
1304  if (!IS_EUC_RANGE_VALID(c2))
1305  return -1;
1306  }
1307  else
1308  /* must be ASCII */
1309  {
1310  l = 1;
1311  }
1312  break;
1313  }
1314  return l;
1315 }

References IS_EUC_RANGE_VALID, IS_HIGHBIT_SET, len, SS2, and SS3.

Referenced by pg_euctw_verifystr().

◆ pg_euctw_verifystr()

static int pg_euctw_verifystr ( const unsigned char *  s,
int  len 
)
static

Definition at line 1318 of file wchar.c.

1319 {
1320  const unsigned char *start = s;
1321 
1322  while (len > 0)
1323  {
1324  int l;
1325 
1326  /* fast path for ASCII-subset characters */
1327  if (!IS_HIGHBIT_SET(*s))
1328  {
1329  if (*s == '\0')
1330  break;
1331  l = 1;
1332  }
1333  else
1334  {
1335  l = pg_euctw_verifychar(s, len);
1336  if (l == -1)
1337  break;
1338  }
1339  s += l;
1340  len -= l;
1341  }
1342 
1343  return s - start;
1344 }
static int pg_euctw_verifychar(const unsigned char *s, int len)
Definition: wchar.c:1268

References IS_HIGHBIT_SET, len, and pg_euctw_verifychar().

◆ pg_gb18030_dsplen()

static int pg_gb18030_dsplen ( const unsigned char *  s)
static

Definition at line 1069 of file wchar.c.

1070 {
1071  int len;
1072 
1073  if (IS_HIGHBIT_SET(*s))
1074  len = 2;
1075  else
1076  len = pg_ascii_dsplen(s); /* ASCII */
1077  return len;
1078 }

References IS_HIGHBIT_SET, len, and pg_ascii_dsplen().

◆ pg_gb18030_mblen()

static int pg_gb18030_mblen ( const unsigned char *  s)
static

Definition at line 1055 of file wchar.c.

1056 {
1057  int len;
1058 
1059  if (!IS_HIGHBIT_SET(*s))
1060  len = 1; /* ASCII */
1061  else if (*(s + 1) >= 0x30 && *(s + 1) <= 0x39)
1062  len = 4;
1063  else
1064  len = 2;
1065  return len;
1066 }

References IS_HIGHBIT_SET, and len.

◆ pg_gb18030_verifychar()

static int pg_gb18030_verifychar ( const unsigned char *  s,
int  len 
)
static

Definition at line 1666 of file wchar.c.

1667 {
1668  int l;
1669 
1670  if (!IS_HIGHBIT_SET(*s))
1671  l = 1; /* ASCII */
1672  else if (len >= 4 && *(s + 1) >= 0x30 && *(s + 1) <= 0x39)
1673  {
1674  /* Should be 4-byte, validate remaining bytes */
1675  if (*s >= 0x81 && *s <= 0xfe &&
1676  *(s + 2) >= 0x81 && *(s + 2) <= 0xfe &&
1677  *(s + 3) >= 0x30 && *(s + 3) <= 0x39)
1678  l = 4;
1679  else
1680  l = -1;
1681  }
1682  else if (len >= 2 && *s >= 0x81 && *s <= 0xfe)
1683  {
1684  /* Should be 2-byte, validate */
1685  if ((*(s + 1) >= 0x40 && *(s + 1) <= 0x7e) ||
1686  (*(s + 1) >= 0x80 && *(s + 1) <= 0xfe))
1687  l = 2;
1688  else
1689  l = -1;
1690  }
1691  else
1692  l = -1;
1693  return l;
1694 }

References IS_HIGHBIT_SET, and len.

Referenced by pg_gb18030_verifystr().

◆ pg_gb18030_verifystr()

static int pg_gb18030_verifystr ( const unsigned char *  s,
int  len 
)
static

Definition at line 1697 of file wchar.c.

1698 {
1699  const unsigned char *start = s;
1700 
1701  while (len > 0)
1702  {
1703  int l;
1704 
1705  /* fast path for ASCII-subset characters */
1706  if (!IS_HIGHBIT_SET(*s))
1707  {
1708  if (*s == '\0')
1709  break;
1710  l = 1;
1711  }
1712  else
1713  {
1714  l = pg_gb18030_verifychar(s, len);
1715  if (l == -1)
1716  break;
1717  }
1718  s += l;
1719  len -= l;
1720  }
1721 
1722  return s - start;
1723 }
static int pg_gb18030_verifychar(const unsigned char *s, int len)
Definition: wchar.c:1666

References IS_HIGHBIT_SET, len, and pg_gb18030_verifychar().

◆ pg_gbk_dsplen()

static int pg_gbk_dsplen ( const unsigned char *  s)
static

Definition at line 1001 of file wchar.c.

1002 {
1003  int len;
1004 
1005  if (IS_HIGHBIT_SET(*s))
1006  len = 2; /* kanji? */
1007  else
1008  len = pg_ascii_dsplen(s); /* should be ASCII */
1009  return len;
1010 }

References IS_HIGHBIT_SET, len, and pg_ascii_dsplen().

◆ pg_gbk_mblen()

static int pg_gbk_mblen ( const unsigned char *  s)
static

Definition at line 989 of file wchar.c.

990 {
991  int len;
992 
993  if (IS_HIGHBIT_SET(*s))
994  len = 2; /* kanji? */
995  else
996  len = 1; /* should be ASCII */
997  return len;
998 }

References IS_HIGHBIT_SET, and len.

Referenced by pg_gbk_verifychar().

◆ pg_gbk_verifychar()

static int pg_gbk_verifychar ( const unsigned char *  s,
int  len 
)
static

Definition at line 1568 of file wchar.c.

1569 {
1570  int l,
1571  mbl;
1572 
1573  l = mbl = pg_gbk_mblen(s);
1574 
1575  if (len < l)
1576  return -1;
1577 
1578  while (--l > 0)
1579  {
1580  if (*++s == '\0')
1581  return -1;
1582  }
1583 
1584  return mbl;
1585 }
static int pg_gbk_mblen(const unsigned char *s)
Definition: wchar.c:989

References len, and pg_gbk_mblen().

Referenced by pg_gbk_verifystr().

◆ pg_gbk_verifystr()

static int pg_gbk_verifystr ( const unsigned char *  s,
int  len 
)
static

Definition at line 1588 of file wchar.c.

1589 {
1590  const unsigned char *start = s;
1591 
1592  while (len > 0)
1593  {
1594  int l;
1595 
1596  /* fast path for ASCII-subset characters */
1597  if (!IS_HIGHBIT_SET(*s))
1598  {
1599  if (*s == '\0')
1600  break;
1601  l = 1;
1602  }
1603  else
1604  {
1605  l = pg_gbk_verifychar(s, len);
1606  if (l == -1)
1607  break;
1608  }
1609  s += l;
1610  len -= l;
1611  }
1612 
1613  return s - start;
1614 }
static int pg_gbk_verifychar(const unsigned char *s, int len)
Definition: wchar.c:1568

References IS_HIGHBIT_SET, len, and pg_gbk_verifychar().

◆ pg_johab_dsplen()

static int pg_johab_dsplen ( const unsigned char *  s)
static

Definition at line 408 of file wchar.c.

409 {
410  return pg_euc_dsplen(s);
411 }

References pg_euc_dsplen().

◆ pg_johab_mblen()

static int pg_johab_mblen ( const unsigned char *  s)
static

Definition at line 402 of file wchar.c.

403 {
404  return pg_euc_mblen(s);
405 }

References pg_euc_mblen().

Referenced by pg_johab_verifychar().

◆ pg_johab_verifychar()

static int pg_johab_verifychar ( const unsigned char *  s,
int  len 
)
static

Definition at line 1347 of file wchar.c.

1348 {
1349  int l,
1350  mbl;
1351  unsigned char c;
1352 
1353  l = mbl = pg_johab_mblen(s);
1354 
1355  if (len < l)
1356  return -1;
1357 
1358  if (!IS_HIGHBIT_SET(*s))
1359  return mbl;
1360 
1361  while (--l > 0)
1362  {
1363  c = *++s;
1364  if (!IS_EUC_RANGE_VALID(c))
1365  return -1;
1366  }
1367  return mbl;
1368 }
char * c
static int pg_johab_mblen(const unsigned char *s)
Definition: wchar.c:402

References IS_EUC_RANGE_VALID, IS_HIGHBIT_SET, len, and pg_johab_mblen().

Referenced by pg_johab_verifystr().

◆ pg_johab_verifystr()

static int pg_johab_verifystr ( const unsigned char *  s,
int  len 
)
static

Definition at line 1371 of file wchar.c.

1372 {
1373  const unsigned char *start = s;
1374 
1375  while (len > 0)
1376  {
1377  int l;
1378 
1379  /* fast path for ASCII-subset characters */
1380  if (!IS_HIGHBIT_SET(*s))
1381  {
1382  if (*s == '\0')
1383  break;
1384  l = 1;
1385  }
1386  else
1387  {
1388  l = pg_johab_verifychar(s, len);
1389  if (l == -1)
1390  break;
1391  }
1392  s += l;
1393  len -= l;
1394  }
1395 
1396  return s - start;
1397 }
static int pg_johab_verifychar(const unsigned char *s, int len)
Definition: wchar.c:1347

References IS_HIGHBIT_SET, len, and pg_johab_verifychar().

◆ pg_latin12wchar_with_len()

static int pg_latin12wchar_with_len ( const unsigned char *  from,
pg_wchar to,
int  len 
)
static

Definition at line 879 of file wchar.c.

880 {
881  int cnt = 0;
882 
883  while (len > 0 && *from)
884  {
885  *to++ = *from++;
886  len--;
887  cnt++;
888  }
889  *to = 0;
890  return cnt;
891 }

References len.

◆ pg_latin1_dsplen()

static int pg_latin1_dsplen ( const unsigned char *  s)
static

Definition at line 922 of file wchar.c.

923 {
924  return pg_ascii_dsplen(s);
925 }

References pg_ascii_dsplen().

◆ pg_latin1_mblen()

static int pg_latin1_mblen ( const unsigned char *  s)
static

Definition at line 916 of file wchar.c.

917 {
918  return 1;
919 }

◆ pg_latin1_verifychar()

static int pg_latin1_verifychar ( const unsigned char *  s,
int  len 
)
static

Definition at line 1450 of file wchar.c.

1451 {
1452  return 1;
1453 }

◆ pg_latin1_verifystr()

static int pg_latin1_verifystr ( const unsigned char *  s,
int  len 
)
static

Definition at line 1456 of file wchar.c.

1457 {
1458  const unsigned char *nullpos = memchr(s, 0, len);
1459 
1460  if (nullpos == NULL)
1461  return len;
1462  else
1463  return nullpos - s;
1464 }

References len.

◆ pg_mule2wchar_with_len()

static int pg_mule2wchar_with_len ( const unsigned char *  from,
pg_wchar to,
int  len 
)
static

Definition at line 714 of file wchar.c.

715 {
716  int cnt = 0;
717 
718  while (len > 0 && *from)
719  {
720  if (IS_LC1(*from) && len >= 2)
721  {
722  *to = *from++ << 16;
723  *to |= *from++;
724  len -= 2;
725  }
726  else if (IS_LCPRV1(*from) && len >= 3)
727  {
728  from++;
729  *to = *from++ << 16;
730  *to |= *from++;
731  len -= 3;
732  }
733  else if (IS_LC2(*from) && len >= 3)
734  {
735  *to = *from++ << 16;
736  *to |= *from++ << 8;
737  *to |= *from++;
738  len -= 3;
739  }
740  else if (IS_LCPRV2(*from) && len >= 4)
741  {
742  from++;
743  *to = *from++ << 16;
744  *to |= *from++ << 8;
745  *to |= *from++;
746  len -= 4;
747  }
748  else
749  { /* assume ASCII */
750  *to = (unsigned char) *from++;
751  len--;
752  }
753  to++;
754  cnt++;
755  }
756  *to = 0;
757  return cnt;
758 }
#define IS_LCPRV2(c)
Definition: pg_wchar.h:164
#define IS_LC2(c)
Definition: pg_wchar.h:144
#define IS_LCPRV1(c)
Definition: pg_wchar.h:152
#define IS_LC1(c)
Definition: pg_wchar.h:126

References IS_LC1, IS_LC2, IS_LCPRV1, IS_LCPRV2, and len.

◆ pg_mule_dsplen()

static int pg_mule_dsplen ( const unsigned char *  s)
static

Definition at line 851 of file wchar.c.

852 {
853  int len;
854 
855  /*
856  * Note: it's not really appropriate to assume that all multibyte charsets
857  * are double-wide on screen. But this seems an okay approximation for
858  * the MULE charsets we currently support.
859  */
860 
861  if (IS_LC1(*s))
862  len = 1;
863  else if (IS_LCPRV1(*s))
864  len = 1;
865  else if (IS_LC2(*s))
866  len = 2;
867  else if (IS_LCPRV2(*s))
868  len = 2;
869  else
870  len = 1; /* assume ASCII */
871 
872  return len;
873 }

References IS_LC1, IS_LC2, IS_LCPRV1, IS_LCPRV2, and len.

◆ pg_mule_mblen()

int pg_mule_mblen ( const unsigned char *  s)

Definition at line 833 of file wchar.c.

834 {
835  int len;
836 
837  if (IS_LC1(*s))
838  len = 2;
839  else if (IS_LCPRV1(*s))
840  len = 3;
841  else if (IS_LC2(*s))
842  len = 3;
843  else if (IS_LCPRV2(*s))
844  len = 4;
845  else
846  len = 1; /* assume ASCII */
847  return len;
848 }

References IS_LC1, IS_LC2, IS_LCPRV1, IS_LCPRV2, and len.

Referenced by mic2latin(), mic2latin_with_table(), and pg_mule_verifychar().

◆ pg_mule_verifychar()

static int pg_mule_verifychar ( const unsigned char *  s,
int  len 
)
static

Definition at line 1400 of file wchar.c.

1401 {
1402  int l,
1403  mbl;
1404  unsigned char c;
1405 
1406  l = mbl = pg_mule_mblen(s);
1407 
1408  if (len < l)
1409  return -1;
1410 
1411  while (--l > 0)
1412  {
1413  c = *++s;
1414  if (!IS_HIGHBIT_SET(c))
1415  return -1;
1416  }
1417  return mbl;
1418 }
int pg_mule_mblen(const unsigned char *s)
Definition: wchar.c:833

References IS_HIGHBIT_SET, len, and pg_mule_mblen().

Referenced by pg_mule_verifystr().

◆ pg_mule_verifystr()

static int pg_mule_verifystr ( const unsigned char *  s,
int  len 
)
static

Definition at line 1421 of file wchar.c.

1422 {
1423  const unsigned char *start = s;
1424 
1425  while (len > 0)
1426  {
1427  int l;
1428 
1429  /* fast path for ASCII-subset characters */
1430  if (!IS_HIGHBIT_SET(*s))
1431  {
1432  if (*s == '\0')
1433  break;
1434  l = 1;
1435  }
1436  else
1437  {
1438  l = pg_mule_verifychar(s, len);
1439  if (l == -1)
1440  break;
1441  }
1442  s += l;
1443  len -= l;
1444  }
1445 
1446  return s - start;
1447 }
static int pg_mule_verifychar(const unsigned char *s, int len)
Definition: wchar.c:1400

References IS_HIGHBIT_SET, len, and pg_mule_verifychar().

◆ pg_sjis_dsplen()

static int pg_sjis_dsplen ( const unsigned char *  s)
static

Definition at line 945 of file wchar.c.

946 {
947  int len;
948 
949  if (*s >= 0xa1 && *s <= 0xdf)
950  len = 1; /* 1 byte kana? */
951  else if (IS_HIGHBIT_SET(*s))
952  len = 2; /* kanji? */
953  else
954  len = pg_ascii_dsplen(s); /* should be ASCII */
955  return len;
956 }

References IS_HIGHBIT_SET, len, and pg_ascii_dsplen().

◆ pg_sjis_mblen()

static int pg_sjis_mblen ( const unsigned char *  s)
static

Definition at line 931 of file wchar.c.

932 {
933  int len;
934 
935  if (*s >= 0xa1 && *s <= 0xdf)
936  len = 1; /* 1 byte kana? */
937  else if (IS_HIGHBIT_SET(*s))
938  len = 2; /* kanji? */
939  else
940  len = 1; /* should be ASCII */
941  return len;
942 }

References IS_HIGHBIT_SET, and len.

Referenced by pg_sjis_verifychar().

◆ pg_sjis_verifychar()

static int pg_sjis_verifychar ( const unsigned char *  s,
int  len 
)
static

Definition at line 1467 of file wchar.c.

1468 {
1469  int l,
1470  mbl;
1471  unsigned char c1,
1472  c2;
1473 
1474  l = mbl = pg_sjis_mblen(s);
1475 
1476  if (len < l)
1477  return -1;
1478 
1479  if (l == 1) /* pg_sjis_mblen already verified it */
1480  return mbl;
1481 
1482  c1 = *s++;
1483  c2 = *s;
1484  if (!ISSJISHEAD(c1) || !ISSJISTAIL(c2))
1485  return -1;
1486  return mbl;
1487 }
#define ISSJISTAIL(c)
Definition: pg_wchar.h:45
#define ISSJISHEAD(c)
Definition: pg_wchar.h:44
static int pg_sjis_mblen(const unsigned char *s)
Definition: wchar.c:931

References ISSJISHEAD, ISSJISTAIL, len, and pg_sjis_mblen().

Referenced by pg_sjis_verifystr().

◆ pg_sjis_verifystr()

static int pg_sjis_verifystr ( const unsigned char *  s,
int  len 
)
static

Definition at line 1490 of file wchar.c.

1491 {
1492  const unsigned char *start = s;
1493 
1494  while (len > 0)
1495  {
1496  int l;
1497 
1498  /* fast path for ASCII-subset characters */
1499  if (!IS_HIGHBIT_SET(*s))
1500  {
1501  if (*s == '\0')
1502  break;
1503  l = 1;
1504  }
1505  else
1506  {
1507  l = pg_sjis_verifychar(s, len);
1508  if (l == -1)
1509  break;
1510  }
1511  s += l;
1512  len -= l;
1513  }
1514 
1515  return s - start;
1516 }
static int pg_sjis_verifychar(const unsigned char *s, int len)
Definition: wchar.c:1467

References IS_HIGHBIT_SET, len, and pg_sjis_verifychar().

◆ pg_uhc_dsplen()

static int pg_uhc_dsplen ( const unsigned char *  s)
static

Definition at line 1028 of file wchar.c.

1029 {
1030  int len;
1031 
1032  if (IS_HIGHBIT_SET(*s))
1033  len = 2; /* 2byte? */
1034  else
1035  len = pg_ascii_dsplen(s); /* should be ASCII */
1036  return len;
1037 }

References IS_HIGHBIT_SET, len, and pg_ascii_dsplen().

◆ pg_uhc_mblen()

static int pg_uhc_mblen ( const unsigned char *  s)
static

Definition at line 1016 of file wchar.c.

1017 {
1018  int len;
1019 
1020  if (IS_HIGHBIT_SET(*s))
1021  len = 2; /* 2byte? */
1022  else
1023  len = 1; /* should be ASCII */
1024  return len;
1025 }

References IS_HIGHBIT_SET, and len.

Referenced by pg_uhc_verifychar().

◆ pg_uhc_verifychar()

static int pg_uhc_verifychar ( const unsigned char *  s,
int  len 
)
static

Definition at line 1617 of file wchar.c.

1618 {
1619  int l,
1620  mbl;
1621 
1622  l = mbl = pg_uhc_mblen(s);
1623 
1624  if (len < l)
1625  return -1;
1626 
1627  while (--l > 0)
1628  {
1629  if (*++s == '\0')
1630  return -1;
1631  }
1632 
1633  return mbl;
1634 }
static int pg_uhc_mblen(const unsigned char *s)
Definition: wchar.c:1016

References len, and pg_uhc_mblen().

Referenced by pg_uhc_verifystr().

◆ pg_uhc_verifystr()

static int pg_uhc_verifystr ( const unsigned char *  s,
int  len 
)
static

Definition at line 1637 of file wchar.c.

1638 {
1639  const unsigned char *start = s;
1640 
1641  while (len > 0)
1642  {
1643  int l;
1644 
1645  /* fast path for ASCII-subset characters */
1646  if (!IS_HIGHBIT_SET(*s))
1647  {
1648  if (*s == '\0')
1649  break;
1650  l = 1;
1651  }
1652  else
1653  {
1654  l = pg_uhc_verifychar(s, len);
1655  if (l == -1)
1656  break;
1657  }
1658  s += l;
1659  len -= l;
1660  }
1661 
1662  return s - start;
1663 }
static int pg_uhc_verifychar(const unsigned char *s, int len)
Definition: wchar.c:1617

References IS_HIGHBIT_SET, len, and pg_uhc_verifychar().

◆ pg_utf2wchar_with_len()

static int pg_utf2wchar_with_len ( const unsigned char *  from,
pg_wchar to,
int  len 
)
static

Definition at line 420 of file wchar.c.

421 {
422  int cnt = 0;
423  uint32 c1,
424  c2,
425  c3,
426  c4;
427 
428  while (len > 0 && *from)
429  {
430  if ((*from & 0x80) == 0)
431  {
432  *to = *from++;
433  len--;
434  }
435  else if ((*from & 0xe0) == 0xc0)
436  {
437  if (len < 2)
438  break; /* drop trailing incomplete char */
439  c1 = *from++ & 0x1f;
440  c2 = *from++ & 0x3f;
441  *to = (c1 << 6) | c2;
442  len -= 2;
443  }
444  else if ((*from & 0xf0) == 0xe0)
445  {
446  if (len < 3)
447  break; /* drop trailing incomplete char */
448  c1 = *from++ & 0x0f;
449  c2 = *from++ & 0x3f;
450  c3 = *from++ & 0x3f;
451  *to = (c1 << 12) | (c2 << 6) | c3;
452  len -= 3;
453  }
454  else if ((*from & 0xf8) == 0xf0)
455  {
456  if (len < 4)
457  break; /* drop trailing incomplete char */
458  c1 = *from++ & 0x07;
459  c2 = *from++ & 0x3f;
460  c3 = *from++ & 0x3f;
461  c4 = *from++ & 0x3f;
462  *to = (c1 << 18) | (c2 << 12) | (c3 << 6) | c4;
463  len -= 4;
464  }
465  else
466  {
467  /* treat a bogus char as length 1; not ours to raise error */
468  *to = *from++;
469  len--;
470  }
471  to++;
472  cnt++;
473  }
474  *to = 0;
475  return cnt;
476 }

References len.

◆ pg_utf8_islegal()

bool pg_utf8_islegal ( const unsigned char *  source,
int  length 
)

Definition at line 2014 of file wchar.c.

2015 {
2016  unsigned char a;
2017 
2018  switch (length)
2019  {
2020  default:
2021  /* reject lengths 5 and 6 for now */
2022  return false;
2023  case 4:
2024  a = source[3];
2025  if (a < 0x80 || a > 0xBF)
2026  return false;
2027  /* FALL THRU */
2028  case 3:
2029  a = source[2];
2030  if (a < 0x80 || a > 0xBF)
2031  return false;
2032  /* FALL THRU */
2033  case 2:
2034  a = source[1];
2035  switch (*source)
2036  {
2037  case 0xE0:
2038  if (a < 0xA0 || a > 0xBF)
2039  return false;
2040  break;
2041  case 0xED:
2042  if (a < 0x80 || a > 0x9F)
2043  return false;
2044  break;
2045  case 0xF0:
2046  if (a < 0x90 || a > 0xBF)
2047  return false;
2048  break;
2049  case 0xF4:
2050  if (a < 0x80 || a > 0x8F)
2051  return false;
2052  break;
2053  default:
2054  if (a < 0x80 || a > 0xBF)
2055  return false;
2056  break;
2057  }
2058  /* FALL THRU */
2059  case 1:
2060  a = *source;
2061  if (a >= 0x80 && a < 0xC2)
2062  return false;
2063  if (a > 0xF4)
2064  return false;
2065  break;
2066  }
2067  return true;
2068 }
int a
Definition: isn.c:69
static rewind_source * source
Definition: pg_rewind.c:89

References a, and source.

Referenced by chr(), pg_utf8_string_len(), pg_utf8_verifychar(), utf8_to_iso8859_1(), and UtfToLocal().

◆ pg_utf8_verifychar()

static int pg_utf8_verifychar ( const unsigned char *  s,
int  len 
)
static

Definition at line 1726 of file wchar.c.

1727 {
1728  int l;
1729 
1730  if ((*s & 0x80) == 0)
1731  {
1732  if (*s == '\0')
1733  return -1;
1734  return 1;
1735  }
1736  else if ((*s & 0xe0) == 0xc0)
1737  l = 2;
1738  else if ((*s & 0xf0) == 0xe0)
1739  l = 3;
1740  else if ((*s & 0xf8) == 0xf0)
1741  l = 4;
1742  else
1743  l = 1;
1744 
1745  if (l > len)
1746  return -1;
1747 
1748  if (!pg_utf8_islegal(s, l))
1749  return -1;
1750 
1751  return l;
1752 }
bool pg_utf8_islegal(const unsigned char *source, int length)
Definition: wchar.c:2014

References len, and pg_utf8_islegal().

Referenced by pg_utf8_verifystr().

◆ pg_utf8_verifystr()

static int pg_utf8_verifystr ( const unsigned char *  s,
int  len 
)
static

Definition at line 1916 of file wchar.c.

1917 {
1918  const unsigned char *start = s;
1919  const int orig_len = len;
1920  uint32 state = BGN;
1921 
1922 /*
1923  * With a stride of two vector widths, gcc will unroll the loop. Even if
1924  * the compiler can unroll a longer loop, it's not worth it because we
1925  * must fall back to the byte-wise algorithm if we find any non-ASCII.
1926  */
1927 #define STRIDE_LENGTH (2 * sizeof(Vector8))
1928 
1929  if (len >= STRIDE_LENGTH)
1930  {
1931  while (len >= STRIDE_LENGTH)
1932  {
1933  /*
1934  * If the chunk is all ASCII, we can skip the full UTF-8 check,
1935  * but we must first check for a non-END state, which means the
1936  * previous chunk ended in the middle of a multibyte sequence.
1937  */
1938  if (state != END || !is_valid_ascii(s, STRIDE_LENGTH))
1940 
1941  s += STRIDE_LENGTH;
1942  len -= STRIDE_LENGTH;
1943  }
1944 
1945  /* The error state persists, so we only need to check for it here. */
1946  if (state == ERR)
1947  {
1948  /*
1949  * Start over from the beginning with the slow path so we can
1950  * count the valid bytes.
1951  */
1952  len = orig_len;
1953  s = start;
1954  }
1955  else if (state != END)
1956  {
1957  /*
1958  * The fast path exited in the middle of a multibyte sequence.
1959  * Walk backwards to find the leading byte so that the slow path
1960  * can resume checking from there. We must always backtrack at
1961  * least one byte, since the current byte could be e.g. an ASCII
1962  * byte after a 2-byte lead, which is invalid.
1963  */
1964  do
1965  {
1966  Assert(s > start);
1967  s--;
1968  len++;
1969  Assert(IS_HIGHBIT_SET(*s));
1970  } while (pg_utf_mblen(s) <= 1);
1971  }
1972  }
1973 
1974  /* check remaining bytes */
1975  while (len > 0)
1976  {
1977  int l;
1978 
1979  /* fast path for ASCII-subset characters */
1980  if (!IS_HIGHBIT_SET(*s))
1981  {
1982  if (*s == '\0')
1983  break;
1984  l = 1;
1985  }
1986  else
1987  {
1988  l = pg_utf8_verifychar(s, len);
1989  if (l == -1)
1990  break;
1991  }
1992  s += l;
1993  len -= l;
1994  }
1995 
1996  return s - start;
1997 }
static bool is_valid_ascii(const unsigned char *s, int len)
Definition: ascii.h:25
Definition: regguts.h:323
#define END
Definition: wchar.c:1817
int pg_utf_mblen(const unsigned char *s)
Definition: wchar.c:550
#define ERR
Definition: wchar.c:1804
static int pg_utf8_verifychar(const unsigned char *s, int len)
Definition: wchar.c:1726
static void utf8_advance(const unsigned char *s, uint32 *state, int len)
Definition: wchar.c:1898
#define BGN
Definition: wchar.c:1806
#define STRIDE_LENGTH

References Assert(), BGN, END, ERR, IS_HIGHBIT_SET, is_valid_ascii(), len, pg_utf8_verifychar(), pg_utf_mblen(), STRIDE_LENGTH, and utf8_advance().

◆ pg_utf_dsplen()

static int pg_utf_dsplen ( const unsigned char *  s)
static

Definition at line 702 of file wchar.c.

703 {
704  return ucs_wcwidth(utf8_to_unicode(s));
705 }
pg_wchar utf8_to_unicode(const unsigned char *c)
Definition: wchar.c:680
static int ucs_wcwidth(pg_wchar ucs)
Definition: wchar.c:640

References ucs_wcwidth(), and utf8_to_unicode().

◆ pg_utf_mblen()

int pg_utf_mblen ( const unsigned char *  s)

Definition at line 550 of file wchar.c.

551 {
552  int len;
553 
554  if ((*s & 0x80) == 0)
555  len = 1;
556  else if ((*s & 0xe0) == 0xc0)
557  len = 2;
558  else if ((*s & 0xf0) == 0xe0)
559  len = 3;
560  else if ((*s & 0xf8) == 0xf0)
561  len = 4;
562 #ifdef NOT_USED
563  else if ((*s & 0xfc) == 0xf8)
564  len = 5;
565  else if ((*s & 0xfe) == 0xfc)
566  len = 6;
567 #endif
568  else
569  len = 1;
570  return len;
571 }

References len.

Referenced by pg_utf8_verifystr(), and pg_wchar2utf_with_len().

◆ pg_wchar2euc_with_len()

static int pg_wchar2euc_with_len ( const pg_wchar from,
unsigned char *  to,
int  len 
)
static

Definition at line 356 of file wchar.c.

357 {
358  int cnt = 0;
359 
360  while (len > 0 && *from)
361  {
362  unsigned char c;
363 
364  if ((c = (*from >> 24)))
365  {
366  *to++ = c;
367  *to++ = (*from >> 16) & 0xff;
368  *to++ = (*from >> 8) & 0xff;
369  *to++ = *from & 0xff;
370  cnt += 4;
371  }
372  else if ((c = (*from >> 16)))
373  {
374  *to++ = c;
375  *to++ = (*from >> 8) & 0xff;
376  *to++ = *from & 0xff;
377  cnt += 3;
378  }
379  else if ((c = (*from >> 8)))
380  {
381  *to++ = c;
382  *to++ = *from & 0xff;
383  cnt += 2;
384  }
385  else
386  {
387  *to++ = *from;
388  cnt++;
389  }
390  from++;
391  len--;
392  }
393  *to = 0;
394  return cnt;
395 }

References len.

◆ pg_wchar2mule_with_len()

static int pg_wchar2mule_with_len ( const pg_wchar from,
unsigned char *  to,
int  len 
)
static

Definition at line 767 of file wchar.c.

768 {
769  int cnt = 0;
770 
771  while (len > 0 && *from)
772  {
773  unsigned char lb;
774 
775  lb = (*from >> 16) & 0xff;
776  if (IS_LC1(lb))
777  {
778  *to++ = lb;
779  *to++ = *from & 0xff;
780  cnt += 2;
781  }
782  else if (IS_LC2(lb))
783  {
784  *to++ = lb;
785  *to++ = (*from >> 8) & 0xff;
786  *to++ = *from & 0xff;
787  cnt += 3;
788  }
789  else if (IS_LCPRV1_A_RANGE(lb))
790  {
791  *to++ = LCPRV1_A;
792  *to++ = lb;
793  *to++ = *from & 0xff;
794  cnt += 3;
795  }
796  else if (IS_LCPRV1_B_RANGE(lb))
797  {
798  *to++ = LCPRV1_B;
799  *to++ = lb;
800  *to++ = *from & 0xff;
801  cnt += 3;
802  }
803  else if (IS_LCPRV2_A_RANGE(lb))
804  {
805  *to++ = LCPRV2_A;
806  *to++ = lb;
807  *to++ = (*from >> 8) & 0xff;
808  *to++ = *from & 0xff;
809  cnt += 4;
810  }
811  else if (IS_LCPRV2_B_RANGE(lb))
812  {
813  *to++ = LCPRV2_B;
814  *to++ = lb;
815  *to++ = (*from >> 8) & 0xff;
816  *to++ = *from & 0xff;
817  cnt += 4;
818  }
819  else
820  {
821  *to++ = *from & 0xff;
822  cnt += 1;
823  }
824  from++;
825  len--;
826  }
827  *to = 0;
828  return cnt;
829 }
#define LCPRV1_A
Definition: pg_wchar.h:150
#define LCPRV1_B
Definition: pg_wchar.h:151
#define LCPRV2_A
Definition: pg_wchar.h:162
#define IS_LCPRV2_B_RANGE(c)
Definition: pg_wchar.h:167
#define IS_LCPRV1_A_RANGE(c)
Definition: pg_wchar.h:153
#define IS_LCPRV1_B_RANGE(c)
Definition: pg_wchar.h:155
#define IS_LCPRV2_A_RANGE(c)
Definition: pg_wchar.h:165
#define LCPRV2_B
Definition: pg_wchar.h:163

References IS_LC1, IS_LC2, IS_LCPRV1_A_RANGE, IS_LCPRV1_B_RANGE, IS_LCPRV2_A_RANGE, IS_LCPRV2_B_RANGE, LCPRV1_A, LCPRV1_B, LCPRV2_A, LCPRV2_B, and len.

◆ pg_wchar2single_with_len()

static int pg_wchar2single_with_len ( const pg_wchar from,
unsigned char *  to,
int  len 
)
static

Definition at line 901 of file wchar.c.

902 {
903  int cnt = 0;
904 
905  while (len > 0 && *from)
906  {
907  *to++ = *from++;
908  len--;
909  cnt++;
910  }
911  *to = 0;
912  return cnt;
913 }

References len.

◆ pg_wchar2utf_with_len()

static int pg_wchar2utf_with_len ( const pg_wchar from,
unsigned char *  to,
int  len 
)
static

Definition at line 519 of file wchar.c.

520 {
521  int cnt = 0;
522 
523  while (len > 0 && *from)
524  {
525  int char_len;
526 
527  unicode_to_utf8(*from, to);
528  char_len = pg_utf_mblen(to);
529  cnt += char_len;
530  to += char_len;
531  from++;
532  len--;
533  }
534  *to = 0;
535  return cnt;
536 }
unsigned char * unicode_to_utf8(pg_wchar c, unsigned char *utf8string)
Definition: wchar.c:484

References len, pg_utf_mblen(), and unicode_to_utf8().

◆ ucs_wcwidth()

static int ucs_wcwidth ( pg_wchar  ucs)
static

Definition at line 640 of file wchar.c.

641 {
644 
645  /* test for 8-bit control characters */
646  if (ucs == 0)
647  return 0;
648 
649  if (ucs < 0x20 || (ucs >= 0x7f && ucs < 0xa0) || ucs > 0x0010ffff)
650  return -1;
651 
652  /*
653  * binary search in table of non-spacing characters
654  *
655  * XXX: In the official Unicode sources, it is possible for a character to
656  * be described as both non-spacing and wide at the same time. As of
657  * Unicode 13.0, treating the non-spacing property as the determining
658  * factor for display width leads to the correct behavior, so do that
659  * search first.
660  */
661  if (mbbisearch(ucs, nonspacing,
662  sizeof(nonspacing) / sizeof(struct mbinterval) - 1))
663  return 0;
664 
665  /* binary search in table of wide characters */
666  if (mbbisearch(ucs, east_asian_fw,
667  sizeof(east_asian_fw) / sizeof(struct mbinterval) - 1))
668  return 2;
669 
670  return 1;
671 }
static const struct mbinterval east_asian_fw[]
static const struct mbinterval nonspacing[]
static int mbbisearch(pg_wchar ucs, const struct mbinterval *table, int max)
Definition: wchar.c:593

References east_asian_fw, mbbisearch(), and nonspacing.

Referenced by pg_utf_dsplen().

◆ unicode_to_utf8()

unsigned char* unicode_to_utf8 ( pg_wchar  c,
unsigned char *  utf8string 
)

Definition at line 484 of file wchar.c.

485 {
486  if (c <= 0x7F)
487  {
488  utf8string[0] = c;
489  }
490  else if (c <= 0x7FF)
491  {
492  utf8string[0] = 0xC0 | ((c >> 6) & 0x1F);
493  utf8string[1] = 0x80 | (c & 0x3F);
494  }
495  else if (c <= 0xFFFF)
496  {
497  utf8string[0] = 0xE0 | ((c >> 12) & 0x0F);
498  utf8string[1] = 0x80 | ((c >> 6) & 0x3F);
499  utf8string[2] = 0x80 | (c & 0x3F);
500  }
501  else
502  {
503  utf8string[0] = 0xF0 | ((c >> 18) & 0x07);
504  utf8string[1] = 0x80 | ((c >> 12) & 0x3F);
505  utf8string[2] = 0x80 | ((c >> 6) & 0x3F);
506  utf8string[3] = 0x80 | (c & 0x3F);
507  }
508 
509  return utf8string;
510 }

Referenced by convert_case(), json_lex_string(), pg_saslprep(), pg_unicode_to_server(), pg_unicode_to_server_noerror(), pg_wchar2utf_with_len(), and unicode_normalize_func().

◆ utf8_advance()

static void utf8_advance ( const unsigned char *  s,
uint32 state,
int  len 
)
static

Definition at line 1898 of file wchar.c.

1899 {
1900  /* Note: We deliberately don't check the state's value here. */
1901  while (len > 0)
1902  {
1903  /*
1904  * It's important that the mask value is 31: In most instruction sets,
1905  * a shift by a 32-bit operand is understood to be a shift by its mod
1906  * 32, so the compiler should elide the mask operation.
1907  */
1908  *state = Utf8Transition[*s++] >> (*state & 31);
1909  len--;
1910  }
1911 
1912  *state &= 31;
1913 }
static const uint32 Utf8Transition[256]
Definition: wchar.c:1840

References len, and Utf8Transition.

Referenced by pg_utf8_verifystr().

◆ utf8_to_unicode()

pg_wchar utf8_to_unicode ( const unsigned char *  c)

Definition at line 680 of file wchar.c.

681 {
682  if ((*c & 0x80) == 0)
683  return (pg_wchar) c[0];
684  else if ((*c & 0xe0) == 0xc0)
685  return (pg_wchar) (((c[0] & 0x1f) << 6) |
686  (c[1] & 0x3f));
687  else if ((*c & 0xf0) == 0xe0)
688  return (pg_wchar) (((c[0] & 0x0f) << 12) |
689  ((c[1] & 0x3f) << 6) |
690  (c[2] & 0x3f));
691  else if ((*c & 0xf8) == 0xf0)
692  return (pg_wchar) (((c[0] & 0x07) << 18) |
693  ((c[1] & 0x3f) << 12) |
694  ((c[2] & 0x3f) << 6) |
695  (c[3] & 0x3f));
696  else
697  /* that is an invalid code on purpose */
698  return 0xffffffff;
699 }
unsigned int pg_wchar
Definition: mbprint.c:31

Referenced by convert_case(), pg_saslprep(), pg_utf_dsplen(), read_char(), unicode_assigned(), unicode_is_normalized(), and unicode_normalize_func().

Variable Documentation

◆ pg_wchar_table

◆ Utf8Transition

const uint32 Utf8Transition[256]
static

Definition at line 1840 of file wchar.c.

Referenced by utf8_advance().