PostgreSQL Source Code  git master
wchar.c File Reference
Include dependency graph for wchar.c:

Go to the source code of this file.

Data Structures

struct  mbinterval
 

Macros

#define IS_EUC_RANGE_VALID(c)   ((c) >= 0xa1 && (c) <= 0xfe)
 
#define pg_euccn_verifychar   pg_euckr_verifychar
 
#define pg_euccn_verifystr   pg_euckr_verifystr
 
#define ERR   0
 
#define BGN   11
 
#define CS1   16
 
#define CS2   1
 
#define CS3   5
 
#define P3A   6 /* Lead was E0, check for 3-byte overlong */
 
#define P3B   20 /* Lead was ED, check for surrogate */
 
#define P4A   25 /* Lead was F0, check for 4-byte overlong */
 
#define P4B   30 /* Lead was F4, check for too-large */
 
#define END   BGN
 
#define ASC   (END << BGN)
 
#define L2A   (CS1 << BGN)
 
#define L3A   (P3A << BGN)
 
#define L3B   (CS2 << BGN)
 
#define L3C   (P3B << BGN)
 
#define L4A   (P4A << BGN)
 
#define L4B   (CS3 << BGN)
 
#define L4C   (P4B << BGN)
 
#define CR1   (END << CS1) | (CS1 << CS2) | (CS2 << CS3) | (CS1 << P3B) | (CS2 << P4B)
 
#define CR2   (END << CS1) | (CS1 << CS2) | (CS2 << CS3) | (CS1 << P3B) | (CS2 << P4A)
 
#define CR3   (END << CS1) | (CS1 << CS2) | (CS2 << CS3) | (CS1 << P3A) | (CS2 << P4A)
 
#define ILL   ERR
 
#define STRIDE_LENGTH   16
 

Functions

static int pg_ascii2wchar_with_len (const unsigned char *from, pg_wchar *to, int len)
 
static int pg_ascii_mblen (const unsigned char *s)
 
static int pg_ascii_dsplen (const unsigned char *s)
 
static int pg_euc2wchar_with_len (const unsigned char *from, pg_wchar *to, int len)
 
static int pg_euc_mblen (const unsigned char *s)
 
static int pg_euc_dsplen (const unsigned char *s)
 
static int pg_eucjp2wchar_with_len (const unsigned char *from, pg_wchar *to, int len)
 
static int pg_eucjp_mblen (const unsigned char *s)
 
static int pg_eucjp_dsplen (const unsigned char *s)
 
static int pg_euckr2wchar_with_len (const unsigned char *from, pg_wchar *to, int len)
 
static int pg_euckr_mblen (const unsigned char *s)
 
static int pg_euckr_dsplen (const unsigned char *s)
 
static int pg_euccn2wchar_with_len (const unsigned char *from, pg_wchar *to, int len)
 
static int pg_euccn_mblen (const unsigned char *s)
 
static int pg_euccn_dsplen (const unsigned char *s)
 
static int pg_euctw2wchar_with_len (const unsigned char *from, pg_wchar *to, int len)
 
static int pg_euctw_mblen (const unsigned char *s)
 
static int pg_euctw_dsplen (const unsigned char *s)
 
static int pg_wchar2euc_with_len (const pg_wchar *from, unsigned char *to, int len)
 
static int pg_johab_mblen (const unsigned char *s)
 
static int pg_johab_dsplen (const unsigned char *s)
 
static int pg_utf2wchar_with_len (const unsigned char *from, pg_wchar *to, int len)
 
unsigned char * unicode_to_utf8 (pg_wchar c, unsigned char *utf8string)
 
static int pg_wchar2utf_with_len (const pg_wchar *from, unsigned char *to, int len)
 
int pg_utf_mblen (const unsigned char *s)
 
static int mbbisearch (pg_wchar ucs, const struct mbinterval *table, int max)
 
static int ucs_wcwidth (pg_wchar ucs)
 
pg_wchar utf8_to_unicode (const unsigned char *c)
 
static int pg_utf_dsplen (const unsigned char *s)
 
static int pg_mule2wchar_with_len (const unsigned char *from, pg_wchar *to, int len)
 
static int pg_wchar2mule_with_len (const pg_wchar *from, unsigned char *to, int len)
 
int pg_mule_mblen (const unsigned char *s)
 
static int pg_mule_dsplen (const unsigned char *s)
 
static int pg_latin12wchar_with_len (const unsigned char *from, pg_wchar *to, int len)
 
static int pg_wchar2single_with_len (const pg_wchar *from, unsigned char *to, int len)
 
static int pg_latin1_mblen (const unsigned char *s)
 
static int pg_latin1_dsplen (const unsigned char *s)
 
static int pg_sjis_mblen (const unsigned char *s)
 
static int pg_sjis_dsplen (const unsigned char *s)
 
static int pg_big5_mblen (const unsigned char *s)
 
static int pg_big5_dsplen (const unsigned char *s)
 
static int pg_gbk_mblen (const unsigned char *s)
 
static int pg_gbk_dsplen (const unsigned char *s)
 
static int pg_uhc_mblen (const unsigned char *s)
 
static int pg_uhc_dsplen (const unsigned char *s)
 
static int pg_gb18030_mblen (const unsigned char *s)
 
static int pg_gb18030_dsplen (const unsigned char *s)
 
static int pg_ascii_verifychar (const unsigned char *s, int len)
 
static int pg_ascii_verifystr (const unsigned char *s, int len)
 
static int pg_eucjp_verifychar (const unsigned char *s, int len)
 
static int pg_eucjp_verifystr (const unsigned char *s, int len)
 
static int pg_euckr_verifychar (const unsigned char *s, int len)
 
static int pg_euckr_verifystr (const unsigned char *s, int len)
 
static int pg_euctw_verifychar (const unsigned char *s, int len)
 
static int pg_euctw_verifystr (const unsigned char *s, int len)
 
static int pg_johab_verifychar (const unsigned char *s, int len)
 
static int pg_johab_verifystr (const unsigned char *s, int len)
 
static int pg_mule_verifychar (const unsigned char *s, int len)
 
static int pg_mule_verifystr (const unsigned char *s, int len)
 
static int pg_latin1_verifychar (const unsigned char *s, int len)
 
static int pg_latin1_verifystr (const unsigned char *s, int len)
 
static int pg_sjis_verifychar (const unsigned char *s, int len)
 
static int pg_sjis_verifystr (const unsigned char *s, int len)
 
static int pg_big5_verifychar (const unsigned char *s, int len)
 
static int pg_big5_verifystr (const unsigned char *s, int len)
 
static int pg_gbk_verifychar (const unsigned char *s, int len)
 
static int pg_gbk_verifystr (const unsigned char *s, int len)
 
static int pg_uhc_verifychar (const unsigned char *s, int len)
 
static int pg_uhc_verifystr (const unsigned char *s, int len)
 
static int pg_gb18030_verifychar (const unsigned char *s, int len)
 
static int pg_gb18030_verifystr (const unsigned char *s, int len)
 
static int pg_utf8_verifychar (const unsigned char *s, int len)
 
static void utf8_advance (const unsigned char *s, uint32 *state, int len)
 
static int pg_utf8_verifystr (const unsigned char *s, int len)
 
bool pg_utf8_islegal (const unsigned char *source, int length)
 
int pg_encoding_mblen (int encoding, const char *mbstr)
 
int pg_encoding_mblen_bounded (int encoding, const char *mbstr)
 
int pg_encoding_dsplen (int encoding, const char *mbstr)
 
int pg_encoding_verifymbchar (int encoding, const char *mbstr, int len)
 
int pg_encoding_verifymbstr (int encoding, const char *mbstr, int len)
 
int pg_encoding_max_length (int encoding)
 

Variables

static const uint32 Utf8Transition [256]
 
const pg_wchar_tbl pg_wchar_table []
 

Macro Definition Documentation

◆ ASC

#define ASC   (END << BGN)

Definition at line 1821 of file wchar.c.

◆ BGN

#define BGN   11

Definition at line 1805 of file wchar.c.

◆ CR1

#define CR1   (END << CS1) | (CS1 << CS2) | (CS2 << CS3) | (CS1 << P3B) | (CS2 << P4B)

Definition at line 1833 of file wchar.c.

◆ CR2

#define CR2   (END << CS1) | (CS1 << CS2) | (CS2 << CS3) | (CS1 << P3B) | (CS2 << P4A)

Definition at line 1834 of file wchar.c.

◆ CR3

#define CR3   (END << CS1) | (CS1 << CS2) | (CS2 << CS3) | (CS1 << P3A) | (CS2 << P4A)

Definition at line 1835 of file wchar.c.

◆ CS1

#define CS1   16

Definition at line 1807 of file wchar.c.

◆ CS2

#define CS2   1

Definition at line 1808 of file wchar.c.

◆ CS3

#define CS3   5

Definition at line 1809 of file wchar.c.

◆ END

#define END   BGN

Definition at line 1816 of file wchar.c.

◆ ERR

#define ERR   0

Definition at line 1803 of file wchar.c.

◆ ILL

#define ILL   ERR

Definition at line 1837 of file wchar.c.

◆ IS_EUC_RANGE_VALID

#define IS_EUC_RANGE_VALID (   c)    ((c) >= 0xa1 && (c) <= 0xfe)

Definition at line 1118 of file wchar.c.

◆ L2A

#define L2A   (CS1 << BGN)

Definition at line 1823 of file wchar.c.

◆ L3A

#define L3A   (P3A << BGN)

Definition at line 1825 of file wchar.c.

◆ L3B

#define L3B   (CS2 << BGN)

Definition at line 1826 of file wchar.c.

◆ L3C

#define L3C   (P3B << BGN)

Definition at line 1827 of file wchar.c.

◆ L4A

#define L4A   (P4A << BGN)

Definition at line 1829 of file wchar.c.

◆ L4B

#define L4B   (CS3 << BGN)

Definition at line 1830 of file wchar.c.

◆ L4C

#define L4C   (P4B << BGN)

Definition at line 1831 of file wchar.c.

◆ P3A

#define P3A   6 /* Lead was E0, check for 3-byte overlong */

Definition at line 1811 of file wchar.c.

◆ P3B

#define P3B   20 /* Lead was ED, check for surrogate */

Definition at line 1812 of file wchar.c.

◆ P4A

#define P4A   25 /* Lead was F0, check for 4-byte overlong */

Definition at line 1813 of file wchar.c.

◆ P4B

#define P4B   30 /* Lead was F4, check for too-large */

Definition at line 1814 of file wchar.c.

◆ pg_euccn_verifychar

#define pg_euccn_verifychar   pg_euckr_verifychar

Definition at line 1263 of file wchar.c.

◆ pg_euccn_verifystr

#define pg_euccn_verifystr   pg_euckr_verifystr

Definition at line 1264 of file wchar.c.

◆ STRIDE_LENGTH

#define STRIDE_LENGTH   16

Function Documentation

◆ mbbisearch()

static int mbbisearch ( pg_wchar  ucs,
const struct mbinterval table,
int  max 
)
static

Definition at line 592 of file wchar.c.

593 {
594  int min = 0;
595  int mid;
596 
597  if (ucs < table[0].first || ucs > table[max].last)
598  return 0;
599  while (max >= min)
600  {
601  mid = (min + max) / 2;
602  if (ucs > table[mid].last)
603  min = mid + 1;
604  else if (ucs < table[mid].first)
605  max = mid - 1;
606  else
607  return 1;
608  }
609 
610  return 0;
611 }

Referenced by ucs_wcwidth().

◆ pg_ascii2wchar_with_len()

static int pg_ascii2wchar_with_len ( const unsigned char *  from,
pg_wchar to,
int  len 
)
static

Definition at line 48 of file wchar.c.

49 {
50  int cnt = 0;
51 
52  while (len > 0 && *from)
53  {
54  *to++ = *from++;
55  len--;
56  cnt++;
57  }
58  *to = 0;
59  return cnt;
60 }
const void size_t len

References len.

◆ pg_ascii_dsplen()

static int pg_ascii_dsplen ( const unsigned char *  s)
static

Definition at line 69 of file wchar.c.

70 {
71  if (*s == '\0')
72  return 0;
73  if (*s < 0x20 || *s == 0x7f)
74  return -1;
75 
76  return 1;
77 }

Referenced by pg_big5_dsplen(), pg_euc_dsplen(), pg_euccn_dsplen(), pg_eucjp_dsplen(), pg_euctw_dsplen(), pg_gb18030_dsplen(), pg_gbk_dsplen(), pg_latin1_dsplen(), pg_sjis_dsplen(), and pg_uhc_dsplen().

◆ pg_ascii_mblen()

static int pg_ascii_mblen ( const unsigned char *  s)
static

Definition at line 63 of file wchar.c.

64 {
65  return 1;
66 }

◆ pg_ascii_verifychar()

static int pg_ascii_verifychar ( const unsigned char *  s,
int  len 
)
static

Definition at line 1102 of file wchar.c.

1103 {
1104  return 1;
1105 }

◆ pg_ascii_verifystr()

static int pg_ascii_verifystr ( const unsigned char *  s,
int  len 
)
static

Definition at line 1108 of file wchar.c.

1109 {
1110  const unsigned char *nullpos = memchr(s, 0, len);
1111 
1112  if (nullpos == NULL)
1113  return len;
1114  else
1115  return nullpos - s;
1116 }

References len.

◆ pg_big5_dsplen()

static int pg_big5_dsplen ( const unsigned char *  s)
static

Definition at line 973 of file wchar.c.

974 {
975  int len;
976 
977  if (IS_HIGHBIT_SET(*s))
978  len = 2; /* kanji? */
979  else
980  len = pg_ascii_dsplen(s); /* should be ASCII */
981  return len;
982 }
#define IS_HIGHBIT_SET(ch)
Definition: c.h:1153
static int pg_ascii_dsplen(const unsigned char *s)
Definition: wchar.c:69

References IS_HIGHBIT_SET, len, and pg_ascii_dsplen().

◆ pg_big5_mblen()

static int pg_big5_mblen ( const unsigned char *  s)
static

Definition at line 961 of file wchar.c.

962 {
963  int len;
964 
965  if (IS_HIGHBIT_SET(*s))
966  len = 2; /* kanji? */
967  else
968  len = 1; /* should be ASCII */
969  return len;
970 }

References IS_HIGHBIT_SET, and len.

Referenced by pg_big5_verifychar().

◆ pg_big5_verifychar()

static int pg_big5_verifychar ( const unsigned char *  s,
int  len 
)
static

Definition at line 1518 of file wchar.c.

1519 {
1520  int l,
1521  mbl;
1522 
1523  l = mbl = pg_big5_mblen(s);
1524 
1525  if (len < l)
1526  return -1;
1527 
1528  while (--l > 0)
1529  {
1530  if (*++s == '\0')
1531  return -1;
1532  }
1533 
1534  return mbl;
1535 }
static int pg_big5_mblen(const unsigned char *s)
Definition: wchar.c:961

References len, and pg_big5_mblen().

Referenced by pg_big5_verifystr().

◆ pg_big5_verifystr()

static int pg_big5_verifystr ( const unsigned char *  s,
int  len 
)
static

Definition at line 1538 of file wchar.c.

1539 {
1540  const unsigned char *start = s;
1541 
1542  while (len > 0)
1543  {
1544  int l;
1545 
1546  /* fast path for ASCII-subset characters */
1547  if (!IS_HIGHBIT_SET(*s))
1548  {
1549  if (*s == '\0')
1550  break;
1551  l = 1;
1552  }
1553  else
1554  {
1555  l = pg_big5_verifychar(s, len);
1556  if (l == -1)
1557  break;
1558  }
1559  s += l;
1560  len -= l;
1561  }
1562 
1563  return s - start;
1564 }
static int pg_big5_verifychar(const unsigned char *s, int len)
Definition: wchar.c:1518

References IS_HIGHBIT_SET, len, and pg_big5_verifychar().

◆ pg_encoding_dsplen()

int pg_encoding_dsplen ( int  encoding,
const char *  mbstr 
)

Definition at line 2150 of file wchar.c.

2151 {
2152  return (PG_VALID_ENCODING(encoding) ?
2153  pg_wchar_table[encoding].dsplen((const unsigned char *) mbstr) :
2154  pg_wchar_table[PG_SQL_ASCII].dsplen((const unsigned char *) mbstr));
2155 }
int32 encoding
Definition: pg_database.h:41
@ PG_SQL_ASCII
Definition: pg_wchar.h:224
#define PG_VALID_ENCODING(_enc)
Definition: pg_wchar.h:285
const pg_wchar_tbl pg_wchar_table[]
Definition: wchar.c:2075

References encoding, PG_SQL_ASCII, PG_VALID_ENCODING, and pg_wchar_table.

Referenced by PQdsplen(), and reportErrorPosition().

◆ pg_encoding_max_length()

int pg_encoding_max_length ( int  encoding)

Definition at line 2187 of file wchar.c.

2188 {
2190 
2192 }
Assert(fmt[strlen(fmt) - 1] !='\n')
int maxmblen
Definition: pg_wchar.h:390

References Assert(), encoding, pg_wchar_tbl::maxmblen, PG_VALID_ENCODING, and pg_wchar_table.

Referenced by ascii(), chr(), pg_encoding_mbcliplen(), pg_verify_mbstr_len(), reportErrorPosition(), and type_maximum_size().

◆ pg_encoding_mblen()

int pg_encoding_mblen ( int  encoding,
const char *  mbstr 
)

Definition at line 2129 of file wchar.c.

2130 {
2131  return (PG_VALID_ENCODING(encoding) ?
2132  pg_wchar_table[encoding].mblen((const unsigned char *) mbstr) :
2133  pg_wchar_table[PG_SQL_ASCII].mblen((const unsigned char *) mbstr));
2134 }

References encoding, PG_SQL_ASCII, PG_VALID_ENCODING, and pg_wchar_table.

Referenced by CopyAttributeOutCSV(), CopyAttributeOutText(), pg_encoding_mblen_bounded(), PQescapeInternal(), PQescapeStringInternal(), PQmblen(), PQmblenBounded(), report_invalid_encoding(), and report_untranslatable_char().

◆ pg_encoding_mblen_bounded()

int pg_encoding_mblen_bounded ( int  encoding,
const char *  mbstr 
)

Definition at line 2141 of file wchar.c.

2142 {
2143  return strnlen(mbstr, pg_encoding_mblen(encoding, mbstr));
2144 }
size_t strnlen(const char *str, size_t maxlen)
Definition: strnlen.c:26
int pg_encoding_mblen(int encoding, const char *mbstr)
Definition: wchar.c:2129

References encoding, pg_encoding_mblen(), and strnlen().

Referenced by json_lex_string().

◆ pg_encoding_verifymbchar()

int pg_encoding_verifymbchar ( int  encoding,
const char *  mbstr,
int  len 
)

Definition at line 2163 of file wchar.c.

2164 {
2165  return (PG_VALID_ENCODING(encoding) ?
2166  pg_wchar_table[encoding].mbverifychar((const unsigned char *) mbstr, len) :
2167  pg_wchar_table[PG_SQL_ASCII].mbverifychar((const unsigned char *) mbstr, len));
2168 }

References encoding, len, PG_SQL_ASCII, PG_VALID_ENCODING, and pg_wchar_table.

Referenced by big52euc_tw(), big52mic(), euc_jis_20042shift_jis_2004(), euc_jp2mic(), euc_jp2sjis(), euc_kr2mic(), euc_tw2big5(), euc_tw2mic(), LocalToUtf(), mic2big5(), mic2euc_jp(), mic2euc_kr(), mic2euc_tw(), mic2sjis(), shift_jis_20042euc_jis_2004(), and sjis2euc_jp().

◆ pg_encoding_verifymbstr()

int pg_encoding_verifymbstr ( int  encoding,
const char *  mbstr,
int  len 
)

Definition at line 2176 of file wchar.c.

2177 {
2178  return (PG_VALID_ENCODING(encoding) ?
2179  pg_wchar_table[encoding].mbverifystr((const unsigned char *) mbstr, len) :
2180  pg_wchar_table[PG_SQL_ASCII].mbverifystr((const unsigned char *) mbstr, len));
2181 }

References encoding, len, PG_SQL_ASCII, PG_VALID_ENCODING, and pg_wchar_table.

Referenced by CopyConvertBuf(), and test_enc_conversion().

◆ pg_euc2wchar_with_len()

static int pg_euc2wchar_with_len ( const unsigned char *  from,
pg_wchar to,
int  len 
)
static

Definition at line 83 of file wchar.c.

84 {
85  int cnt = 0;
86 
87  while (len > 0 && *from)
88  {
89  if (*from == SS2 && len >= 2) /* JIS X 0201 (so called "1 byte
90  * KANA") */
91  {
92  from++;
93  *to = (SS2 << 8) | *from++;
94  len -= 2;
95  }
96  else if (*from == SS3 && len >= 3) /* JIS X 0212 KANJI */
97  {
98  from++;
99  *to = (SS3 << 16) | (*from++ << 8);
100  *to |= *from++;
101  len -= 3;
102  }
103  else if (IS_HIGHBIT_SET(*from) && len >= 2) /* JIS X 0208 KANJI */
104  {
105  *to = *from++ << 8;
106  *to |= *from++;
107  len -= 2;
108  }
109  else /* must be ASCII */
110  {
111  *to = *from++;
112  len--;
113  }
114  to++;
115  cnt++;
116  }
117  *to = 0;
118  return cnt;
119 }
#define SS2
Definition: pg_wchar.h:35
#define SS3
Definition: pg_wchar.h:36

References IS_HIGHBIT_SET, len, SS2, and SS3.

Referenced by pg_eucjp2wchar_with_len(), and pg_euckr2wchar_with_len().

◆ pg_euc_dsplen()

static int pg_euc_dsplen ( const unsigned char *  s)
inlinestatic

Definition at line 138 of file wchar.c.

139 {
140  int len;
141 
142  if (*s == SS2)
143  len = 2;
144  else if (*s == SS3)
145  len = 2;
146  else if (IS_HIGHBIT_SET(*s))
147  len = 2;
148  else
149  len = pg_ascii_dsplen(s);
150  return len;
151 }

References IS_HIGHBIT_SET, len, pg_ascii_dsplen(), SS2, and SS3.

Referenced by pg_euckr_dsplen(), and pg_johab_dsplen().

◆ pg_euc_mblen()

static int pg_euc_mblen ( const unsigned char *  s)
inlinestatic

Definition at line 122 of file wchar.c.

123 {
124  int len;
125 
126  if (*s == SS2)
127  len = 2;
128  else if (*s == SS3)
129  len = 3;
130  else if (IS_HIGHBIT_SET(*s))
131  len = 2;
132  else
133  len = 1;
134  return len;
135 }

References IS_HIGHBIT_SET, len, SS2, and SS3.

Referenced by pg_eucjp_mblen(), pg_euckr_mblen(), and pg_johab_mblen().

◆ pg_euccn2wchar_with_len()

static int pg_euccn2wchar_with_len ( const unsigned char *  from,
pg_wchar to,
int  len 
)
static

Definition at line 210 of file wchar.c.

211 {
212  int cnt = 0;
213 
214  while (len > 0 && *from)
215  {
216  if (*from == SS2 && len >= 3) /* code set 2 (unused?) */
217  {
218  from++;
219  *to = (SS2 << 16) | (*from++ << 8);
220  *to |= *from++;
221  len -= 3;
222  }
223  else if (*from == SS3 && len >= 3) /* code set 3 (unused ?) */
224  {
225  from++;
226  *to = (SS3 << 16) | (*from++ << 8);
227  *to |= *from++;
228  len -= 3;
229  }
230  else if (IS_HIGHBIT_SET(*from) && len >= 2) /* code set 1 */
231  {
232  *to = *from++ << 8;
233  *to |= *from++;
234  len -= 2;
235  }
236  else
237  {
238  *to = *from++;
239  len--;
240  }
241  to++;
242  cnt++;
243  }
244  *to = 0;
245  return cnt;
246 }

References IS_HIGHBIT_SET, len, SS2, and SS3.

◆ pg_euccn_dsplen()

static int pg_euccn_dsplen ( const unsigned char *  s)
static

Definition at line 261 of file wchar.c.

262 {
263  int len;
264 
265  if (IS_HIGHBIT_SET(*s))
266  len = 2;
267  else
268  len = pg_ascii_dsplen(s);
269  return len;
270 }

References IS_HIGHBIT_SET, len, and pg_ascii_dsplen().

◆ pg_euccn_mblen()

static int pg_euccn_mblen ( const unsigned char *  s)
static

Definition at line 249 of file wchar.c.

250 {
251  int len;
252 
253  if (IS_HIGHBIT_SET(*s))
254  len = 2;
255  else
256  len = 1;
257  return len;
258 }

References IS_HIGHBIT_SET, and len.

◆ pg_eucjp2wchar_with_len()

static int pg_eucjp2wchar_with_len ( const unsigned char *  from,
pg_wchar to,
int  len 
)
static

Definition at line 157 of file wchar.c.

158 {
159  return pg_euc2wchar_with_len(from, to, len);
160 }
static int pg_euc2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
Definition: wchar.c:83

References len, and pg_euc2wchar_with_len().

◆ pg_eucjp_dsplen()

static int pg_eucjp_dsplen ( const unsigned char *  s)
static

Definition at line 169 of file wchar.c.

170 {
171  int len;
172 
173  if (*s == SS2)
174  len = 1;
175  else if (*s == SS3)
176  len = 2;
177  else if (IS_HIGHBIT_SET(*s))
178  len = 2;
179  else
180  len = pg_ascii_dsplen(s);
181  return len;
182 }

References IS_HIGHBIT_SET, len, pg_ascii_dsplen(), SS2, and SS3.

◆ pg_eucjp_mblen()

static int pg_eucjp_mblen ( const unsigned char *  s)
static

Definition at line 163 of file wchar.c.

164 {
165  return pg_euc_mblen(s);
166 }
static int pg_euc_mblen(const unsigned char *s)
Definition: wchar.c:122

References pg_euc_mblen().

◆ pg_eucjp_verifychar()

static int pg_eucjp_verifychar ( const unsigned char *  s,
int  len 
)
static

Definition at line 1121 of file wchar.c.

1122 {
1123  int l;
1124  unsigned char c1,
1125  c2;
1126 
1127  c1 = *s++;
1128 
1129  switch (c1)
1130  {
1131  case SS2: /* JIS X 0201 */
1132  l = 2;
1133  if (l > len)
1134  return -1;
1135  c2 = *s++;
1136  if (c2 < 0xa1 || c2 > 0xdf)
1137  return -1;
1138  break;
1139 
1140  case SS3: /* JIS X 0212 */
1141  l = 3;
1142  if (l > len)
1143  return -1;
1144  c2 = *s++;
1145  if (!IS_EUC_RANGE_VALID(c2))
1146  return -1;
1147  c2 = *s++;
1148  if (!IS_EUC_RANGE_VALID(c2))
1149  return -1;
1150  break;
1151 
1152  default:
1153  if (IS_HIGHBIT_SET(c1)) /* JIS X 0208? */
1154  {
1155  l = 2;
1156  if (l > len)
1157  return -1;
1158  if (!IS_EUC_RANGE_VALID(c1))
1159  return -1;
1160  c2 = *s++;
1161  if (!IS_EUC_RANGE_VALID(c2))
1162  return -1;
1163  }
1164  else
1165  /* must be ASCII */
1166  {
1167  l = 1;
1168  }
1169  break;
1170  }
1171 
1172  return l;
1173 }
#define IS_EUC_RANGE_VALID(c)
Definition: wchar.c:1118

References IS_EUC_RANGE_VALID, IS_HIGHBIT_SET, len, SS2, and SS3.

Referenced by pg_eucjp_verifystr().

◆ pg_eucjp_verifystr()

static int pg_eucjp_verifystr ( const unsigned char *  s,
int  len 
)
static

Definition at line 1176 of file wchar.c.

1177 {
1178  const unsigned char *start = s;
1179 
1180  while (len > 0)
1181  {
1182  int l;
1183 
1184  /* fast path for ASCII-subset characters */
1185  if (!IS_HIGHBIT_SET(*s))
1186  {
1187  if (*s == '\0')
1188  break;
1189  l = 1;
1190  }
1191  else
1192  {
1193  l = pg_eucjp_verifychar(s, len);
1194  if (l == -1)
1195  break;
1196  }
1197  s += l;
1198  len -= l;
1199  }
1200 
1201  return s - start;
1202 }
static int pg_eucjp_verifychar(const unsigned char *s, int len)
Definition: wchar.c:1121

References IS_HIGHBIT_SET, len, and pg_eucjp_verifychar().

◆ pg_euckr2wchar_with_len()

static int pg_euckr2wchar_with_len ( const unsigned char *  from,
pg_wchar to,
int  len 
)
static

Definition at line 188 of file wchar.c.

189 {
190  return pg_euc2wchar_with_len(from, to, len);
191 }

References len, and pg_euc2wchar_with_len().

◆ pg_euckr_dsplen()

static int pg_euckr_dsplen ( const unsigned char *  s)
static

Definition at line 200 of file wchar.c.

201 {
202  return pg_euc_dsplen(s);
203 }
static int pg_euc_dsplen(const unsigned char *s)
Definition: wchar.c:138

References pg_euc_dsplen().

◆ pg_euckr_mblen()

static int pg_euckr_mblen ( const unsigned char *  s)
static

Definition at line 194 of file wchar.c.

195 {
196  return pg_euc_mblen(s);
197 }

References pg_euc_mblen().

◆ pg_euckr_verifychar()

static int pg_euckr_verifychar ( const unsigned char *  s,
int  len 
)
static

Definition at line 1205 of file wchar.c.

1206 {
1207  int l;
1208  unsigned char c1,
1209  c2;
1210 
1211  c1 = *s++;
1212 
1213  if (IS_HIGHBIT_SET(c1))
1214  {
1215  l = 2;
1216  if (l > len)
1217  return -1;
1218  if (!IS_EUC_RANGE_VALID(c1))
1219  return -1;
1220  c2 = *s++;
1221  if (!IS_EUC_RANGE_VALID(c2))
1222  return -1;
1223  }
1224  else
1225  /* must be ASCII */
1226  {
1227  l = 1;
1228  }
1229 
1230  return l;
1231 }

References IS_EUC_RANGE_VALID, IS_HIGHBIT_SET, and len.

Referenced by pg_euckr_verifystr().

◆ pg_euckr_verifystr()

static int pg_euckr_verifystr ( const unsigned char *  s,
int  len 
)
static

Definition at line 1234 of file wchar.c.

1235 {
1236  const unsigned char *start = s;
1237 
1238  while (len > 0)
1239  {
1240  int l;
1241 
1242  /* fast path for ASCII-subset characters */
1243  if (!IS_HIGHBIT_SET(*s))
1244  {
1245  if (*s == '\0')
1246  break;
1247  l = 1;
1248  }
1249  else
1250  {
1251  l = pg_euckr_verifychar(s, len);
1252  if (l == -1)
1253  break;
1254  }
1255  s += l;
1256  len -= l;
1257  }
1258 
1259  return s - start;
1260 }
static int pg_euckr_verifychar(const unsigned char *s, int len)
Definition: wchar.c:1205

References IS_HIGHBIT_SET, len, and pg_euckr_verifychar().

◆ pg_euctw2wchar_with_len()

static int pg_euctw2wchar_with_len ( const unsigned char *  from,
pg_wchar to,
int  len 
)
static

Definition at line 277 of file wchar.c.

278 {
279  int cnt = 0;
280 
281  while (len > 0 && *from)
282  {
283  if (*from == SS2 && len >= 4) /* code set 2 */
284  {
285  from++;
286  *to = (((uint32) SS2) << 24) | (*from++ << 16);
287  *to |= *from++ << 8;
288  *to |= *from++;
289  len -= 4;
290  }
291  else if (*from == SS3 && len >= 3) /* code set 3 (unused?) */
292  {
293  from++;
294  *to = (SS3 << 16) | (*from++ << 8);
295  *to |= *from++;
296  len -= 3;
297  }
298  else if (IS_HIGHBIT_SET(*from) && len >= 2) /* code set 2 */
299  {
300  *to = *from++ << 8;
301  *to |= *from++;
302  len -= 2;
303  }
304  else
305  {
306  *to = *from++;
307  len--;
308  }
309  to++;
310  cnt++;
311  }
312  *to = 0;
313  return cnt;
314 }
unsigned int uint32
Definition: c.h:441

References IS_HIGHBIT_SET, len, SS2, and SS3.

◆ pg_euctw_dsplen()

static int pg_euctw_dsplen ( const unsigned char *  s)
static

Definition at line 333 of file wchar.c.

334 {
335  int len;
336 
337  if (*s == SS2)
338  len = 2;
339  else if (*s == SS3)
340  len = 2;
341  else if (IS_HIGHBIT_SET(*s))
342  len = 2;
343  else
344  len = pg_ascii_dsplen(s);
345  return len;
346 }

References IS_HIGHBIT_SET, len, pg_ascii_dsplen(), SS2, and SS3.

◆ pg_euctw_mblen()

static int pg_euctw_mblen ( const unsigned char *  s)
static

Definition at line 317 of file wchar.c.

318 {
319  int len;
320 
321  if (*s == SS2)
322  len = 4;
323  else if (*s == SS3)
324  len = 3;
325  else if (IS_HIGHBIT_SET(*s))
326  len = 2;
327  else
328  len = 1;
329  return len;
330 }

References IS_HIGHBIT_SET, len, SS2, and SS3.

◆ pg_euctw_verifychar()

static int pg_euctw_verifychar ( const unsigned char *  s,
int  len 
)
static

Definition at line 1267 of file wchar.c.

1268 {
1269  int l;
1270  unsigned char c1,
1271  c2;
1272 
1273  c1 = *s++;
1274 
1275  switch (c1)
1276  {
1277  case SS2: /* CNS 11643 Plane 1-7 */
1278  l = 4;
1279  if (l > len)
1280  return -1;
1281  c2 = *s++;
1282  if (c2 < 0xa1 || c2 > 0xa7)
1283  return -1;
1284  c2 = *s++;
1285  if (!IS_EUC_RANGE_VALID(c2))
1286  return -1;
1287  c2 = *s++;
1288  if (!IS_EUC_RANGE_VALID(c2))
1289  return -1;
1290  break;
1291 
1292  case SS3: /* unused */
1293  return -1;
1294 
1295  default:
1296  if (IS_HIGHBIT_SET(c1)) /* CNS 11643 Plane 1 */
1297  {
1298  l = 2;
1299  if (l > len)
1300  return -1;
1301  /* no further range check on c1? */
1302  c2 = *s++;
1303  if (!IS_EUC_RANGE_VALID(c2))
1304  return -1;
1305  }
1306  else
1307  /* must be ASCII */
1308  {
1309  l = 1;
1310  }
1311  break;
1312  }
1313  return l;
1314 }

References IS_EUC_RANGE_VALID, IS_HIGHBIT_SET, len, SS2, and SS3.

Referenced by pg_euctw_verifystr().

◆ pg_euctw_verifystr()

static int pg_euctw_verifystr ( const unsigned char *  s,
int  len 
)
static

Definition at line 1317 of file wchar.c.

1318 {
1319  const unsigned char *start = s;
1320 
1321  while (len > 0)
1322  {
1323  int l;
1324 
1325  /* fast path for ASCII-subset characters */
1326  if (!IS_HIGHBIT_SET(*s))
1327  {
1328  if (*s == '\0')
1329  break;
1330  l = 1;
1331  }
1332  else
1333  {
1334  l = pg_euctw_verifychar(s, len);
1335  if (l == -1)
1336  break;
1337  }
1338  s += l;
1339  len -= l;
1340  }
1341 
1342  return s - start;
1343 }
static int pg_euctw_verifychar(const unsigned char *s, int len)
Definition: wchar.c:1267

References IS_HIGHBIT_SET, len, and pg_euctw_verifychar().

◆ pg_gb18030_dsplen()

static int pg_gb18030_dsplen ( const unsigned char *  s)
static

Definition at line 1068 of file wchar.c.

1069 {
1070  int len;
1071 
1072  if (IS_HIGHBIT_SET(*s))
1073  len = 2;
1074  else
1075  len = pg_ascii_dsplen(s); /* ASCII */
1076  return len;
1077 }

References IS_HIGHBIT_SET, len, and pg_ascii_dsplen().

◆ pg_gb18030_mblen()

static int pg_gb18030_mblen ( const unsigned char *  s)
static

Definition at line 1054 of file wchar.c.

1055 {
1056  int len;
1057 
1058  if (!IS_HIGHBIT_SET(*s))
1059  len = 1; /* ASCII */
1060  else if (*(s + 1) >= 0x30 && *(s + 1) <= 0x39)
1061  len = 4;
1062  else
1063  len = 2;
1064  return len;
1065 }

References IS_HIGHBIT_SET, and len.

◆ pg_gb18030_verifychar()

static int pg_gb18030_verifychar ( const unsigned char *  s,
int  len 
)
static

Definition at line 1665 of file wchar.c.

1666 {
1667  int l;
1668 
1669  if (!IS_HIGHBIT_SET(*s))
1670  l = 1; /* ASCII */
1671  else if (len >= 4 && *(s + 1) >= 0x30 && *(s + 1) <= 0x39)
1672  {
1673  /* Should be 4-byte, validate remaining bytes */
1674  if (*s >= 0x81 && *s <= 0xfe &&
1675  *(s + 2) >= 0x81 && *(s + 2) <= 0xfe &&
1676  *(s + 3) >= 0x30 && *(s + 3) <= 0x39)
1677  l = 4;
1678  else
1679  l = -1;
1680  }
1681  else if (len >= 2 && *s >= 0x81 && *s <= 0xfe)
1682  {
1683  /* Should be 2-byte, validate */
1684  if ((*(s + 1) >= 0x40 && *(s + 1) <= 0x7e) ||
1685  (*(s + 1) >= 0x80 && *(s + 1) <= 0xfe))
1686  l = 2;
1687  else
1688  l = -1;
1689  }
1690  else
1691  l = -1;
1692  return l;
1693 }

References IS_HIGHBIT_SET, and len.

Referenced by pg_gb18030_verifystr().

◆ pg_gb18030_verifystr()

static int pg_gb18030_verifystr ( const unsigned char *  s,
int  len 
)
static

Definition at line 1696 of file wchar.c.

1697 {
1698  const unsigned char *start = s;
1699 
1700  while (len > 0)
1701  {
1702  int l;
1703 
1704  /* fast path for ASCII-subset characters */
1705  if (!IS_HIGHBIT_SET(*s))
1706  {
1707  if (*s == '\0')
1708  break;
1709  l = 1;
1710  }
1711  else
1712  {
1713  l = pg_gb18030_verifychar(s, len);
1714  if (l == -1)
1715  break;
1716  }
1717  s += l;
1718  len -= l;
1719  }
1720 
1721  return s - start;
1722 }
static int pg_gb18030_verifychar(const unsigned char *s, int len)
Definition: wchar.c:1665

References IS_HIGHBIT_SET, len, and pg_gb18030_verifychar().

◆ pg_gbk_dsplen()

static int pg_gbk_dsplen ( const unsigned char *  s)
static

Definition at line 1000 of file wchar.c.

1001 {
1002  int len;
1003 
1004  if (IS_HIGHBIT_SET(*s))
1005  len = 2; /* kanji? */
1006  else
1007  len = pg_ascii_dsplen(s); /* should be ASCII */
1008  return len;
1009 }

References IS_HIGHBIT_SET, len, and pg_ascii_dsplen().

◆ pg_gbk_mblen()

static int pg_gbk_mblen ( const unsigned char *  s)
static

Definition at line 988 of file wchar.c.

989 {
990  int len;
991 
992  if (IS_HIGHBIT_SET(*s))
993  len = 2; /* kanji? */
994  else
995  len = 1; /* should be ASCII */
996  return len;
997 }

References IS_HIGHBIT_SET, and len.

Referenced by pg_gbk_verifychar().

◆ pg_gbk_verifychar()

static int pg_gbk_verifychar ( const unsigned char *  s,
int  len 
)
static

Definition at line 1567 of file wchar.c.

1568 {
1569  int l,
1570  mbl;
1571 
1572  l = mbl = pg_gbk_mblen(s);
1573 
1574  if (len < l)
1575  return -1;
1576 
1577  while (--l > 0)
1578  {
1579  if (*++s == '\0')
1580  return -1;
1581  }
1582 
1583  return mbl;
1584 }
static int pg_gbk_mblen(const unsigned char *s)
Definition: wchar.c:988

References len, and pg_gbk_mblen().

Referenced by pg_gbk_verifystr().

◆ pg_gbk_verifystr()

static int pg_gbk_verifystr ( const unsigned char *  s,
int  len 
)
static

Definition at line 1587 of file wchar.c.

1588 {
1589  const unsigned char *start = s;
1590 
1591  while (len > 0)
1592  {
1593  int l;
1594 
1595  /* fast path for ASCII-subset characters */
1596  if (!IS_HIGHBIT_SET(*s))
1597  {
1598  if (*s == '\0')
1599  break;
1600  l = 1;
1601  }
1602  else
1603  {
1604  l = pg_gbk_verifychar(s, len);
1605  if (l == -1)
1606  break;
1607  }
1608  s += l;
1609  len -= l;
1610  }
1611 
1612  return s - start;
1613 }
static int pg_gbk_verifychar(const unsigned char *s, int len)
Definition: wchar.c:1567

References IS_HIGHBIT_SET, len, and pg_gbk_verifychar().

◆ pg_johab_dsplen()

static int pg_johab_dsplen ( const unsigned char *  s)
static

Definition at line 407 of file wchar.c.

408 {
409  return pg_euc_dsplen(s);
410 }

References pg_euc_dsplen().

◆ pg_johab_mblen()

static int pg_johab_mblen ( const unsigned char *  s)
static

Definition at line 401 of file wchar.c.

402 {
403  return pg_euc_mblen(s);
404 }

References pg_euc_mblen().

Referenced by pg_johab_verifychar().

◆ pg_johab_verifychar()

static int pg_johab_verifychar ( const unsigned char *  s,
int  len 
)
static

Definition at line 1346 of file wchar.c.

1347 {
1348  int l,
1349  mbl;
1350  unsigned char c;
1351 
1352  l = mbl = pg_johab_mblen(s);
1353 
1354  if (len < l)
1355  return -1;
1356 
1357  if (!IS_HIGHBIT_SET(*s))
1358  return mbl;
1359 
1360  while (--l > 0)
1361  {
1362  c = *++s;
1363  if (!IS_EUC_RANGE_VALID(c))
1364  return -1;
1365  }
1366  return mbl;
1367 }
char * c
static int pg_johab_mblen(const unsigned char *s)
Definition: wchar.c:401

References IS_EUC_RANGE_VALID, IS_HIGHBIT_SET, len, and pg_johab_mblen().

Referenced by pg_johab_verifystr().

◆ pg_johab_verifystr()

static int pg_johab_verifystr ( const unsigned char *  s,
int  len 
)
static

Definition at line 1370 of file wchar.c.

1371 {
1372  const unsigned char *start = s;
1373 
1374  while (len > 0)
1375  {
1376  int l;
1377 
1378  /* fast path for ASCII-subset characters */
1379  if (!IS_HIGHBIT_SET(*s))
1380  {
1381  if (*s == '\0')
1382  break;
1383  l = 1;
1384  }
1385  else
1386  {
1387  l = pg_johab_verifychar(s, len);
1388  if (l == -1)
1389  break;
1390  }
1391  s += l;
1392  len -= l;
1393  }
1394 
1395  return s - start;
1396 }
static int pg_johab_verifychar(const unsigned char *s, int len)
Definition: wchar.c:1346

References IS_HIGHBIT_SET, len, and pg_johab_verifychar().

◆ pg_latin12wchar_with_len()

static int pg_latin12wchar_with_len ( const unsigned char *  from,
pg_wchar to,
int  len 
)
static

Definition at line 878 of file wchar.c.

879 {
880  int cnt = 0;
881 
882  while (len > 0 && *from)
883  {
884  *to++ = *from++;
885  len--;
886  cnt++;
887  }
888  *to = 0;
889  return cnt;
890 }

References len.

◆ pg_latin1_dsplen()

static int pg_latin1_dsplen ( const unsigned char *  s)
static

Definition at line 921 of file wchar.c.

922 {
923  return pg_ascii_dsplen(s);
924 }

References pg_ascii_dsplen().

◆ pg_latin1_mblen()

static int pg_latin1_mblen ( const unsigned char *  s)
static

Definition at line 915 of file wchar.c.

916 {
917  return 1;
918 }

◆ pg_latin1_verifychar()

static int pg_latin1_verifychar ( const unsigned char *  s,
int  len 
)
static

Definition at line 1449 of file wchar.c.

1450 {
1451  return 1;
1452 }

◆ pg_latin1_verifystr()

static int pg_latin1_verifystr ( const unsigned char *  s,
int  len 
)
static

Definition at line 1455 of file wchar.c.

1456 {
1457  const unsigned char *nullpos = memchr(s, 0, len);
1458 
1459  if (nullpos == NULL)
1460  return len;
1461  else
1462  return nullpos - s;
1463 }

References len.

◆ pg_mule2wchar_with_len()

static int pg_mule2wchar_with_len ( const unsigned char *  from,
pg_wchar to,
int  len 
)
static

Definition at line 713 of file wchar.c.

714 {
715  int cnt = 0;
716 
717  while (len > 0 && *from)
718  {
719  if (IS_LC1(*from) && len >= 2)
720  {
721  *to = *from++ << 16;
722  *to |= *from++;
723  len -= 2;
724  }
725  else if (IS_LCPRV1(*from) && len >= 3)
726  {
727  from++;
728  *to = *from++ << 16;
729  *to |= *from++;
730  len -= 3;
731  }
732  else if (IS_LC2(*from) && len >= 3)
733  {
734  *to = *from++ << 16;
735  *to |= *from++ << 8;
736  *to |= *from++;
737  len -= 3;
738  }
739  else if (IS_LCPRV2(*from) && len >= 4)
740  {
741  from++;
742  *to = *from++ << 16;
743  *to |= *from++ << 8;
744  *to |= *from++;
745  len -= 4;
746  }
747  else
748  { /* assume ASCII */
749  *to = (unsigned char) *from++;
750  len--;
751  }
752  to++;
753  cnt++;
754  }
755  *to = 0;
756  return cnt;
757 }
#define IS_LCPRV2(c)
Definition: pg_wchar.h:161
#define IS_LC2(c)
Definition: pg_wchar.h:141
#define IS_LCPRV1(c)
Definition: pg_wchar.h:149
#define IS_LC1(c)
Definition: pg_wchar.h:123

References IS_LC1, IS_LC2, IS_LCPRV1, IS_LCPRV2, and len.

◆ pg_mule_dsplen()

static int pg_mule_dsplen ( const unsigned char *  s)
static

Definition at line 850 of file wchar.c.

851 {
852  int len;
853 
854  /*
855  * Note: it's not really appropriate to assume that all multibyte charsets
856  * are double-wide on screen. But this seems an okay approximation for
857  * the MULE charsets we currently support.
858  */
859 
860  if (IS_LC1(*s))
861  len = 1;
862  else if (IS_LCPRV1(*s))
863  len = 1;
864  else if (IS_LC2(*s))
865  len = 2;
866  else if (IS_LCPRV2(*s))
867  len = 2;
868  else
869  len = 1; /* assume ASCII */
870 
871  return len;
872 }

References IS_LC1, IS_LC2, IS_LCPRV1, IS_LCPRV2, and len.

◆ pg_mule_mblen()

int pg_mule_mblen ( const unsigned char *  s)

Definition at line 832 of file wchar.c.

833 {
834  int len;
835 
836  if (IS_LC1(*s))
837  len = 2;
838  else if (IS_LCPRV1(*s))
839  len = 3;
840  else if (IS_LC2(*s))
841  len = 3;
842  else if (IS_LCPRV2(*s))
843  len = 4;
844  else
845  len = 1; /* assume ASCII */
846  return len;
847 }

References IS_LC1, IS_LC2, IS_LCPRV1, IS_LCPRV2, and len.

Referenced by mic2latin(), mic2latin_with_table(), and pg_mule_verifychar().

◆ pg_mule_verifychar()

static int pg_mule_verifychar ( const unsigned char *  s,
int  len 
)
static

Definition at line 1399 of file wchar.c.

1400 {
1401  int l,
1402  mbl;
1403  unsigned char c;
1404 
1405  l = mbl = pg_mule_mblen(s);
1406 
1407  if (len < l)
1408  return -1;
1409 
1410  while (--l > 0)
1411  {
1412  c = *++s;
1413  if (!IS_HIGHBIT_SET(c))
1414  return -1;
1415  }
1416  return mbl;
1417 }
int pg_mule_mblen(const unsigned char *s)
Definition: wchar.c:832

References IS_HIGHBIT_SET, len, and pg_mule_mblen().

Referenced by pg_mule_verifystr().

◆ pg_mule_verifystr()

static int pg_mule_verifystr ( const unsigned char *  s,
int  len 
)
static

Definition at line 1420 of file wchar.c.

1421 {
1422  const unsigned char *start = s;
1423 
1424  while (len > 0)
1425  {
1426  int l;
1427 
1428  /* fast path for ASCII-subset characters */
1429  if (!IS_HIGHBIT_SET(*s))
1430  {
1431  if (*s == '\0')
1432  break;
1433  l = 1;
1434  }
1435  else
1436  {
1437  l = pg_mule_verifychar(s, len);
1438  if (l == -1)
1439  break;
1440  }
1441  s += l;
1442  len -= l;
1443  }
1444 
1445  return s - start;
1446 }
static int pg_mule_verifychar(const unsigned char *s, int len)
Definition: wchar.c:1399

References IS_HIGHBIT_SET, len, and pg_mule_verifychar().

◆ pg_sjis_dsplen()

static int pg_sjis_dsplen ( const unsigned char *  s)
static

Definition at line 944 of file wchar.c.

945 {
946  int len;
947 
948  if (*s >= 0xa1 && *s <= 0xdf)
949  len = 1; /* 1 byte kana? */
950  else if (IS_HIGHBIT_SET(*s))
951  len = 2; /* kanji? */
952  else
953  len = pg_ascii_dsplen(s); /* should be ASCII */
954  return len;
955 }

References IS_HIGHBIT_SET, len, and pg_ascii_dsplen().

◆ pg_sjis_mblen()

static int pg_sjis_mblen ( const unsigned char *  s)
static

Definition at line 930 of file wchar.c.

931 {
932  int len;
933 
934  if (*s >= 0xa1 && *s <= 0xdf)
935  len = 1; /* 1 byte kana? */
936  else if (IS_HIGHBIT_SET(*s))
937  len = 2; /* kanji? */
938  else
939  len = 1; /* should be ASCII */
940  return len;
941 }

References IS_HIGHBIT_SET, and len.

Referenced by pg_sjis_verifychar().

◆ pg_sjis_verifychar()

static int pg_sjis_verifychar ( const unsigned char *  s,
int  len 
)
static

Definition at line 1466 of file wchar.c.

1467 {
1468  int l,
1469  mbl;
1470  unsigned char c1,
1471  c2;
1472 
1473  l = mbl = pg_sjis_mblen(s);
1474 
1475  if (len < l)
1476  return -1;
1477 
1478  if (l == 1) /* pg_sjis_mblen already verified it */
1479  return mbl;
1480 
1481  c1 = *s++;
1482  c2 = *s;
1483  if (!ISSJISHEAD(c1) || !ISSJISTAIL(c2))
1484  return -1;
1485  return mbl;
1486 }
#define ISSJISTAIL(c)
Definition: pg_wchar.h:42
#define ISSJISHEAD(c)
Definition: pg_wchar.h:41
static int pg_sjis_mblen(const unsigned char *s)
Definition: wchar.c:930

References ISSJISHEAD, ISSJISTAIL, len, and pg_sjis_mblen().

Referenced by pg_sjis_verifystr().

◆ pg_sjis_verifystr()

static int pg_sjis_verifystr ( const unsigned char *  s,
int  len 
)
static

Definition at line 1489 of file wchar.c.

1490 {
1491  const unsigned char *start = s;
1492 
1493  while (len > 0)
1494  {
1495  int l;
1496 
1497  /* fast path for ASCII-subset characters */
1498  if (!IS_HIGHBIT_SET(*s))
1499  {
1500  if (*s == '\0')
1501  break;
1502  l = 1;
1503  }
1504  else
1505  {
1506  l = pg_sjis_verifychar(s, len);
1507  if (l == -1)
1508  break;
1509  }
1510  s += l;
1511  len -= l;
1512  }
1513 
1514  return s - start;
1515 }
static int pg_sjis_verifychar(const unsigned char *s, int len)
Definition: wchar.c:1466

References IS_HIGHBIT_SET, len, and pg_sjis_verifychar().

◆ pg_uhc_dsplen()

static int pg_uhc_dsplen ( const unsigned char *  s)
static

Definition at line 1027 of file wchar.c.

1028 {
1029  int len;
1030 
1031  if (IS_HIGHBIT_SET(*s))
1032  len = 2; /* 2byte? */
1033  else
1034  len = pg_ascii_dsplen(s); /* should be ASCII */
1035  return len;
1036 }

References IS_HIGHBIT_SET, len, and pg_ascii_dsplen().

◆ pg_uhc_mblen()

static int pg_uhc_mblen ( const unsigned char *  s)
static

Definition at line 1015 of file wchar.c.

1016 {
1017  int len;
1018 
1019  if (IS_HIGHBIT_SET(*s))
1020  len = 2; /* 2byte? */
1021  else
1022  len = 1; /* should be ASCII */
1023  return len;
1024 }

References IS_HIGHBIT_SET, and len.

Referenced by pg_uhc_verifychar().

◆ pg_uhc_verifychar()

static int pg_uhc_verifychar ( const unsigned char *  s,
int  len 
)
static

Definition at line 1616 of file wchar.c.

1617 {
1618  int l,
1619  mbl;
1620 
1621  l = mbl = pg_uhc_mblen(s);
1622 
1623  if (len < l)
1624  return -1;
1625 
1626  while (--l > 0)
1627  {
1628  if (*++s == '\0')
1629  return -1;
1630  }
1631 
1632  return mbl;
1633 }
static int pg_uhc_mblen(const unsigned char *s)
Definition: wchar.c:1015

References len, and pg_uhc_mblen().

Referenced by pg_uhc_verifystr().

◆ pg_uhc_verifystr()

static int pg_uhc_verifystr ( const unsigned char *  s,
int  len 
)
static

Definition at line 1636 of file wchar.c.

1637 {
1638  const unsigned char *start = s;
1639 
1640  while (len > 0)
1641  {
1642  int l;
1643 
1644  /* fast path for ASCII-subset characters */
1645  if (!IS_HIGHBIT_SET(*s))
1646  {
1647  if (*s == '\0')
1648  break;
1649  l = 1;
1650  }
1651  else
1652  {
1653  l = pg_uhc_verifychar(s, len);
1654  if (l == -1)
1655  break;
1656  }
1657  s += l;
1658  len -= l;
1659  }
1660 
1661  return s - start;
1662 }
static int pg_uhc_verifychar(const unsigned char *s, int len)
Definition: wchar.c:1616

References IS_HIGHBIT_SET, len, and pg_uhc_verifychar().

◆ pg_utf2wchar_with_len()

static int pg_utf2wchar_with_len ( const unsigned char *  from,
pg_wchar to,
int  len 
)
static

Definition at line 419 of file wchar.c.

420 {
421  int cnt = 0;
422  uint32 c1,
423  c2,
424  c3,
425  c4;
426 
427  while (len > 0 && *from)
428  {
429  if ((*from & 0x80) == 0)
430  {
431  *to = *from++;
432  len--;
433  }
434  else if ((*from & 0xe0) == 0xc0)
435  {
436  if (len < 2)
437  break; /* drop trailing incomplete char */
438  c1 = *from++ & 0x1f;
439  c2 = *from++ & 0x3f;
440  *to = (c1 << 6) | c2;
441  len -= 2;
442  }
443  else if ((*from & 0xf0) == 0xe0)
444  {
445  if (len < 3)
446  break; /* drop trailing incomplete char */
447  c1 = *from++ & 0x0f;
448  c2 = *from++ & 0x3f;
449  c3 = *from++ & 0x3f;
450  *to = (c1 << 12) | (c2 << 6) | c3;
451  len -= 3;
452  }
453  else if ((*from & 0xf8) == 0xf0)
454  {
455  if (len < 4)
456  break; /* drop trailing incomplete char */
457  c1 = *from++ & 0x07;
458  c2 = *from++ & 0x3f;
459  c3 = *from++ & 0x3f;
460  c4 = *from++ & 0x3f;
461  *to = (c1 << 18) | (c2 << 12) | (c3 << 6) | c4;
462  len -= 4;
463  }
464  else
465  {
466  /* treat a bogus char as length 1; not ours to raise error */
467  *to = *from++;
468  len--;
469  }
470  to++;
471  cnt++;
472  }
473  *to = 0;
474  return cnt;
475 }

References len.

◆ pg_utf8_islegal()

bool pg_utf8_islegal ( const unsigned char *  source,
int  length 
)

Definition at line 2012 of file wchar.c.

2013 {
2014  unsigned char a;
2015 
2016  switch (length)
2017  {
2018  default:
2019  /* reject lengths 5 and 6 for now */
2020  return false;
2021  case 4:
2022  a = source[3];
2023  if (a < 0x80 || a > 0xBF)
2024  return false;
2025  /* FALL THRU */
2026  case 3:
2027  a = source[2];
2028  if (a < 0x80 || a > 0xBF)
2029  return false;
2030  /* FALL THRU */
2031  case 2:
2032  a = source[1];
2033  switch (*source)
2034  {
2035  case 0xE0:
2036  if (a < 0xA0 || a > 0xBF)
2037  return false;
2038  break;
2039  case 0xED:
2040  if (a < 0x80 || a > 0x9F)
2041  return false;
2042  break;
2043  case 0xF0:
2044  if (a < 0x90 || a > 0xBF)
2045  return false;
2046  break;
2047  case 0xF4:
2048  if (a < 0x80 || a > 0x8F)
2049  return false;
2050  break;
2051  default:
2052  if (a < 0x80 || a > 0xBF)
2053  return false;
2054  break;
2055  }
2056  /* FALL THRU */
2057  case 1:
2058  a = *source;
2059  if (a >= 0x80 && a < 0xC2)
2060  return false;
2061  if (a > 0xF4)
2062  return false;
2063  break;
2064  }
2065  return true;
2066 }
int a
Definition: isn.c:69
static rewind_source * source
Definition: pg_rewind.c:81

References a, and source.

Referenced by chr(), pg_utf8_string_len(), pg_utf8_verifychar(), utf8_to_iso8859_1(), and UtfToLocal().

◆ pg_utf8_verifychar()

static int pg_utf8_verifychar ( const unsigned char *  s,
int  len 
)
static

Definition at line 1725 of file wchar.c.

1726 {
1727  int l;
1728 
1729  if ((*s & 0x80) == 0)
1730  {
1731  if (*s == '\0')
1732  return -1;
1733  return 1;
1734  }
1735  else if ((*s & 0xe0) == 0xc0)
1736  l = 2;
1737  else if ((*s & 0xf0) == 0xe0)
1738  l = 3;
1739  else if ((*s & 0xf8) == 0xf0)
1740  l = 4;
1741  else
1742  l = 1;
1743 
1744  if (l > len)
1745  return -1;
1746 
1747  if (!pg_utf8_islegal(s, l))
1748  return -1;
1749 
1750  return l;
1751 }
bool pg_utf8_islegal(const unsigned char *source, int length)
Definition: wchar.c:2012

References len, and pg_utf8_islegal().

Referenced by pg_utf8_verifystr().

◆ pg_utf8_verifystr()

static int pg_utf8_verifystr ( const unsigned char *  s,
int  len 
)
static

Definition at line 1915 of file wchar.c.

1916 {
1917  const unsigned char *start = s;
1918  const int orig_len = len;
1919  uint32 state = BGN;
1920 
1921 /*
1922  * Sixteen seems to give the best balance of performance across different
1923  * byte distributions.
1924  */
1925 #define STRIDE_LENGTH 16
1926 
1927  if (len >= STRIDE_LENGTH)
1928  {
1929  while (len >= STRIDE_LENGTH)
1930  {
1931  /*
1932  * If the chunk is all ASCII, we can skip the full UTF-8 check,
1933  * but we must first check for a non-END state, which means the
1934  * previous chunk ended in the middle of a multibyte sequence.
1935  */
1936  if (state != END || !is_valid_ascii(s, STRIDE_LENGTH))
1938 
1939  s += STRIDE_LENGTH;
1940  len -= STRIDE_LENGTH;
1941  }
1942 
1943  /* The error state persists, so we only need to check for it here. */
1944  if (state == ERR)
1945  {
1946  /*
1947  * Start over from the beginning with the slow path so we can
1948  * count the valid bytes.
1949  */
1950  len = orig_len;
1951  s = start;
1952  }
1953  else if (state != END)
1954  {
1955  /*
1956  * The fast path exited in the middle of a multibyte sequence.
1957  * Walk backwards to find the leading byte so that the slow path
1958  * can resume checking from there. We must always backtrack at
1959  * least one byte, since the current byte could be e.g. an ASCII
1960  * byte after a 2-byte lead, which is invalid.
1961  */
1962  do
1963  {
1964  Assert(s > start);
1965  s--;
1966  len++;
1967  Assert(IS_HIGHBIT_SET(*s));
1968  } while (pg_utf_mblen(s) <= 1);
1969  }
1970  }
1971 
1972  /* check remaining bytes */
1973  while (len > 0)
1974  {
1975  int l;
1976 
1977  /* fast path for ASCII-subset characters */
1978  if (!IS_HIGHBIT_SET(*s))
1979  {
1980  if (*s == '\0')
1981  break;
1982  l = 1;
1983  }
1984  else
1985  {
1986  l = pg_utf8_verifychar(s, len);
1987  if (l == -1)
1988  break;
1989  }
1990  s += l;
1991  len -= l;
1992  }
1993 
1994  return s - start;
1995 }
static bool is_valid_ascii(const unsigned char *s, int len)
Definition: pg_wchar.h:694
Definition: regguts.h:318
#define END
Definition: wchar.c:1816
int pg_utf_mblen(const unsigned char *s)
Definition: wchar.c:549
#define ERR
Definition: wchar.c:1803
static int pg_utf8_verifychar(const unsigned char *s, int len)
Definition: wchar.c:1725
static void utf8_advance(const unsigned char *s, uint32 *state, int len)
Definition: wchar.c:1897
#define BGN
Definition: wchar.c:1805
#define STRIDE_LENGTH

References Assert(), BGN, END, ERR, IS_HIGHBIT_SET, is_valid_ascii(), len, pg_utf8_verifychar(), pg_utf_mblen(), STRIDE_LENGTH, and utf8_advance().

◆ pg_utf_dsplen()

static int pg_utf_dsplen ( const unsigned char *  s)
static

Definition at line 701 of file wchar.c.

702 {
703  return ucs_wcwidth(utf8_to_unicode(s));
704 }
pg_wchar utf8_to_unicode(const unsigned char *c)
Definition: wchar.c:679
static int ucs_wcwidth(pg_wchar ucs)
Definition: wchar.c:639

References ucs_wcwidth(), and utf8_to_unicode().

◆ pg_utf_mblen()

int pg_utf_mblen ( const unsigned char *  s)

Definition at line 549 of file wchar.c.

550 {
551  int len;
552 
553  if ((*s & 0x80) == 0)
554  len = 1;
555  else if ((*s & 0xe0) == 0xc0)
556  len = 2;
557  else if ((*s & 0xf0) == 0xe0)
558  len = 3;
559  else if ((*s & 0xf8) == 0xf0)
560  len = 4;
561 #ifdef NOT_USED
562  else if ((*s & 0xfc) == 0xf8)
563  len = 5;
564  else if ((*s & 0xfe) == 0xfc)
565  len = 6;
566 #endif
567  else
568  len = 1;
569  return len;
570 }

References len.

Referenced by json_lex_string(), pg_saslprep(), pg_unicode_to_server(), pg_utf8_string_len(), pg_utf8_verifystr(), pg_wchar2utf_with_len(), unicode_is_normalized(), unicode_normalize_func(), utf8_to_iso8859_1(), and UtfToLocal().

◆ pg_wchar2euc_with_len()

static int pg_wchar2euc_with_len ( const pg_wchar from,
unsigned char *  to,
int  len 
)
static

Definition at line 355 of file wchar.c.

356 {
357  int cnt = 0;
358 
359  while (len > 0 && *from)
360  {
361  unsigned char c;
362 
363  if ((c = (*from >> 24)))
364  {
365  *to++ = c;
366  *to++ = (*from >> 16) & 0xff;
367  *to++ = (*from >> 8) & 0xff;
368  *to++ = *from & 0xff;
369  cnt += 4;
370  }
371  else if ((c = (*from >> 16)))
372  {
373  *to++ = c;
374  *to++ = (*from >> 8) & 0xff;
375  *to++ = *from & 0xff;
376  cnt += 3;
377  }
378  else if ((c = (*from >> 8)))
379  {
380  *to++ = c;
381  *to++ = *from & 0xff;
382  cnt += 2;
383  }
384  else
385  {
386  *to++ = *from;
387  cnt++;
388  }
389  from++;
390  len--;
391  }
392  *to = 0;
393  return cnt;
394 }

References len.

◆ pg_wchar2mule_with_len()

static int pg_wchar2mule_with_len ( const pg_wchar from,
unsigned char *  to,
int  len 
)
static

Definition at line 766 of file wchar.c.

767 {
768  int cnt = 0;
769 
770  while (len > 0 && *from)
771  {
772  unsigned char lb;
773 
774  lb = (*from >> 16) & 0xff;
775  if (IS_LC1(lb))
776  {
777  *to++ = lb;
778  *to++ = *from & 0xff;
779  cnt += 2;
780  }
781  else if (IS_LC2(lb))
782  {
783  *to++ = lb;
784  *to++ = (*from >> 8) & 0xff;
785  *to++ = *from & 0xff;
786  cnt += 3;
787  }
788  else if (IS_LCPRV1_A_RANGE(lb))
789  {
790  *to++ = LCPRV1_A;
791  *to++ = lb;
792  *to++ = *from & 0xff;
793  cnt += 3;
794  }
795  else if (IS_LCPRV1_B_RANGE(lb))
796  {
797  *to++ = LCPRV1_B;
798  *to++ = lb;
799  *to++ = *from & 0xff;
800  cnt += 3;
801  }
802  else if (IS_LCPRV2_A_RANGE(lb))
803  {
804  *to++ = LCPRV2_A;
805  *to++ = lb;
806  *to++ = (*from >> 8) & 0xff;
807  *to++ = *from & 0xff;
808  cnt += 4;
809  }
810  else if (IS_LCPRV2_B_RANGE(lb))
811  {
812  *to++ = LCPRV2_B;
813  *to++ = lb;
814  *to++ = (*from >> 8) & 0xff;
815  *to++ = *from & 0xff;
816  cnt += 4;
817  }
818  else
819  {
820  *to++ = *from & 0xff;
821  cnt += 1;
822  }
823  from++;
824  len--;
825  }
826  *to = 0;
827  return cnt;
828 }
#define LCPRV1_A
Definition: pg_wchar.h:147
#define LCPRV1_B
Definition: pg_wchar.h:148
#define LCPRV2_A
Definition: pg_wchar.h:159
#define IS_LCPRV2_B_RANGE(c)
Definition: pg_wchar.h:164
#define IS_LCPRV1_A_RANGE(c)
Definition: pg_wchar.h:150
#define IS_LCPRV1_B_RANGE(c)
Definition: pg_wchar.h:152
#define IS_LCPRV2_A_RANGE(c)
Definition: pg_wchar.h:162
#define LCPRV2_B
Definition: pg_wchar.h:160

References IS_LC1, IS_LC2, IS_LCPRV1_A_RANGE, IS_LCPRV1_B_RANGE, IS_LCPRV2_A_RANGE, IS_LCPRV2_B_RANGE, LCPRV1_A, LCPRV1_B, LCPRV2_A, LCPRV2_B, and len.

◆ pg_wchar2single_with_len()

static int pg_wchar2single_with_len ( const pg_wchar from,
unsigned char *  to,
int  len 
)
static

Definition at line 900 of file wchar.c.

901 {
902  int cnt = 0;
903 
904  while (len > 0 && *from)
905  {
906  *to++ = *from++;
907  len--;
908  cnt++;
909  }
910  *to = 0;
911  return cnt;
912 }

References len.

◆ pg_wchar2utf_with_len()

static int pg_wchar2utf_with_len ( const pg_wchar from,
unsigned char *  to,
int  len 
)
static

Definition at line 518 of file wchar.c.

519 {
520  int cnt = 0;
521 
522  while (len > 0 && *from)
523  {
524  int char_len;
525 
526  unicode_to_utf8(*from, to);
527  char_len = pg_utf_mblen(to);
528  cnt += char_len;
529  to += char_len;
530  from++;
531  len--;
532  }
533  *to = 0;
534  return cnt;
535 }
unsigned char * unicode_to_utf8(pg_wchar c, unsigned char *utf8string)
Definition: wchar.c:483

References len, pg_utf_mblen(), and unicode_to_utf8().

◆ ucs_wcwidth()

static int ucs_wcwidth ( pg_wchar  ucs)
static

Definition at line 639 of file wchar.c.

640 {
643 
644  /* test for 8-bit control characters */
645  if (ucs == 0)
646  return 0;
647 
648  if (ucs < 0x20 || (ucs >= 0x7f && ucs < 0xa0) || ucs > 0x0010ffff)
649  return -1;
650 
651  /*
652  * binary search in table of non-spacing characters
653  *
654  * XXX: In the official Unicode sources, it is possible for a character to
655  * be described as both non-spacing and wide at the same time. As of
656  * Unicode 13.0, treating the non-spacing property as the determining
657  * factor for display width leads to the correct behavior, so do that
658  * search first.
659  */
660  if (mbbisearch(ucs, combining,
661  sizeof(combining) / sizeof(struct mbinterval) - 1))
662  return 0;
663 
664  /* binary search in table of wide characters */
665  if (mbbisearch(ucs, east_asian_fw,
666  sizeof(east_asian_fw) / sizeof(struct mbinterval) - 1))
667  return 2;
668 
669  return 1;
670 }
static const struct mbinterval combining[]
static const struct mbinterval east_asian_fw[]
static int mbbisearch(pg_wchar ucs, const struct mbinterval *table, int max)
Definition: wchar.c:592

References combining, east_asian_fw, and mbbisearch().

Referenced by pg_utf_dsplen().

◆ unicode_to_utf8()

unsigned char* unicode_to_utf8 ( pg_wchar  c,
unsigned char *  utf8string 
)

Definition at line 483 of file wchar.c.

484 {
485  if (c <= 0x7F)
486  {
487  utf8string[0] = c;
488  }
489  else if (c <= 0x7FF)
490  {
491  utf8string[0] = 0xC0 | ((c >> 6) & 0x1F);
492  utf8string[1] = 0x80 | (c & 0x3F);
493  }
494  else if (c <= 0xFFFF)
495  {
496  utf8string[0] = 0xE0 | ((c >> 12) & 0x0F);
497  utf8string[1] = 0x80 | ((c >> 6) & 0x3F);
498  utf8string[2] = 0x80 | (c & 0x3F);
499  }
500  else
501  {
502  utf8string[0] = 0xF0 | ((c >> 18) & 0x07);
503  utf8string[1] = 0x80 | ((c >> 12) & 0x3F);
504  utf8string[2] = 0x80 | ((c >> 6) & 0x3F);
505  utf8string[3] = 0x80 | (c & 0x3F);
506  }
507 
508  return utf8string;
509 }

Referenced by json_lex_string(), pg_saslprep(), pg_unicode_to_server(), pg_wchar2utf_with_len(), and unicode_normalize_func().

◆ utf8_advance()

static void utf8_advance ( const unsigned char *  s,
uint32 state,
int  len 
)
static

Definition at line 1897 of file wchar.c.

1898 {
1899  /* Note: We deliberately don't check the state's value here. */
1900  while (len > 0)
1901  {
1902  /*
1903  * It's important that the mask value is 31: In most instruction sets,
1904  * a shift by a 32-bit operand is understood to be a shift by its mod
1905  * 32, so the compiler should elide the mask operation.
1906  */
1907  *state = Utf8Transition[*s++] >> (*state & 31);
1908  len--;
1909  }
1910 
1911  *state &= 31;
1912 }
static const uint32 Utf8Transition[256]
Definition: wchar.c:1839

References len, and Utf8Transition.

Referenced by pg_utf8_verifystr().

◆ utf8_to_unicode()

pg_wchar utf8_to_unicode ( const unsigned char *  c)

Definition at line 679 of file wchar.c.

680 {
681  if ((*c & 0x80) == 0)
682  return (pg_wchar) c[0];
683  else if ((*c & 0xe0) == 0xc0)
684  return (pg_wchar) (((c[0] & 0x1f) << 6) |
685  (c[1] & 0x3f));
686  else if ((*c & 0xf0) == 0xe0)
687  return (pg_wchar) (((c[0] & 0x0f) << 12) |
688  ((c[1] & 0x3f) << 6) |
689  (c[2] & 0x3f));
690  else if ((*c & 0xf8) == 0xf0)
691  return (pg_wchar) (((c[0] & 0x07) << 18) |
692  ((c[1] & 0x3f) << 12) |
693  ((c[2] & 0x3f) << 6) |
694  (c[3] & 0x3f));
695  else
696  /* that is an invalid code on purpose */
697  return 0xffffffff;
698 }
unsigned int pg_wchar
Definition: mbprint.c:31

Referenced by pg_saslprep(), pg_utf_dsplen(), unicode_is_normalized(), and unicode_normalize_func().

Variable Documentation

◆ pg_wchar_table

◆ Utf8Transition

const uint32 Utf8Transition[256]
static

Definition at line 1839 of file wchar.c.

Referenced by utf8_advance().