PostgreSQL Source Code  git master
wchar.c File Reference
#include "c.h"
#include "mb/pg_wchar.h"
#include "common/unicode_combining_table.h"
Include dependency graph for wchar.c:

Go to the source code of this file.

Data Structures

struct  mbinterval
 

Macros

#define IS_EUC_RANGE_VALID(c)   ((c) >= 0xa1 && (c) <= 0xfe)
 
#define pg_euccn_verifier   pg_euckr_verifier
 

Functions

static int pg_ascii2wchar_with_len (const unsigned char *from, pg_wchar *to, int len)
 
static int pg_ascii_mblen (const unsigned char *s)
 
static int pg_ascii_dsplen (const unsigned char *s)
 
static int pg_euc2wchar_with_len (const unsigned char *from, pg_wchar *to, int len)
 
static int pg_euc_mblen (const unsigned char *s)
 
static int pg_euc_dsplen (const unsigned char *s)
 
static int pg_eucjp2wchar_with_len (const unsigned char *from, pg_wchar *to, int len)
 
static int pg_eucjp_mblen (const unsigned char *s)
 
static int pg_eucjp_dsplen (const unsigned char *s)
 
static int pg_euckr2wchar_with_len (const unsigned char *from, pg_wchar *to, int len)
 
static int pg_euckr_mblen (const unsigned char *s)
 
static int pg_euckr_dsplen (const unsigned char *s)
 
static int pg_euccn2wchar_with_len (const unsigned char *from, pg_wchar *to, int len)
 
static int pg_euccn_mblen (const unsigned char *s)
 
static int pg_euccn_dsplen (const unsigned char *s)
 
static int pg_euctw2wchar_with_len (const unsigned char *from, pg_wchar *to, int len)
 
static int pg_euctw_mblen (const unsigned char *s)
 
static int pg_euctw_dsplen (const unsigned char *s)
 
static int pg_wchar2euc_with_len (const pg_wchar *from, unsigned char *to, int len)
 
static int pg_johab_mblen (const unsigned char *s)
 
static int pg_johab_dsplen (const unsigned char *s)
 
static int pg_utf2wchar_with_len (const unsigned char *from, pg_wchar *to, int len)
 
unsigned char * unicode_to_utf8 (pg_wchar c, unsigned char *utf8string)
 
static int pg_wchar2utf_with_len (const pg_wchar *from, unsigned char *to, int len)
 
int pg_utf_mblen (const unsigned char *s)
 
static int mbbisearch (pg_wchar ucs, const struct mbinterval *table, int max)
 
static int ucs_wcwidth (pg_wchar ucs)
 
pg_wchar utf8_to_unicode (const unsigned char *c)
 
static int pg_utf_dsplen (const unsigned char *s)
 
static int pg_mule2wchar_with_len (const unsigned char *from, pg_wchar *to, int len)
 
static int pg_wchar2mule_with_len (const pg_wchar *from, unsigned char *to, int len)
 
int pg_mule_mblen (const unsigned char *s)
 
static int pg_mule_dsplen (const unsigned char *s)
 
static int pg_latin12wchar_with_len (const unsigned char *from, pg_wchar *to, int len)
 
static int pg_wchar2single_with_len (const pg_wchar *from, unsigned char *to, int len)
 
static int pg_latin1_mblen (const unsigned char *s)
 
static int pg_latin1_dsplen (const unsigned char *s)
 
static int pg_sjis_mblen (const unsigned char *s)
 
static int pg_sjis_dsplen (const unsigned char *s)
 
static int pg_big5_mblen (const unsigned char *s)
 
static int pg_big5_dsplen (const unsigned char *s)
 
static int pg_gbk_mblen (const unsigned char *s)
 
static int pg_gbk_dsplen (const unsigned char *s)
 
static int pg_uhc_mblen (const unsigned char *s)
 
static int pg_uhc_dsplen (const unsigned char *s)
 
static int pg_gb18030_mblen (const unsigned char *s)
 
static int pg_gb18030_dsplen (const unsigned char *s)
 
static int pg_ascii_verifier (const unsigned char *s, int len)
 
static int pg_eucjp_verifier (const unsigned char *s, int len)
 
static int pg_euckr_verifier (const unsigned char *s, int len)
 
static int pg_euctw_verifier (const unsigned char *s, int len)
 
static int pg_johab_verifier (const unsigned char *s, int len)
 
static int pg_mule_verifier (const unsigned char *s, int len)
 
static int pg_latin1_verifier (const unsigned char *s, int len)
 
static int pg_sjis_verifier (const unsigned char *s, int len)
 
static int pg_big5_verifier (const unsigned char *s, int len)
 
static int pg_gbk_verifier (const unsigned char *s, int len)
 
static int pg_uhc_verifier (const unsigned char *s, int len)
 
static int pg_gb18030_verifier (const unsigned char *s, int len)
 
static int pg_utf8_verifier (const unsigned char *s, int len)
 
bool pg_utf8_islegal (const unsigned char *source, int length)
 
int pg_encoding_mblen (int encoding, const char *mbstr)
 
int pg_encoding_dsplen (int encoding, const char *mbstr)
 
int pg_encoding_verifymb (int encoding, const char *mbstr, int len)
 
int pg_encoding_max_length (int encoding)
 

Variables

const pg_wchar_tbl pg_wchar_table []
 

Macro Definition Documentation

◆ IS_EUC_RANGE_VALID

#define IS_EUC_RANGE_VALID (   c)    ((c) >= 0xa1 && (c) <= 0xfe)

◆ pg_euccn_verifier

#define pg_euccn_verifier   pg_euckr_verifier

Definition at line 1196 of file wchar.c.

Function Documentation

◆ mbbisearch()

static int mbbisearch ( pg_wchar  ucs,
const struct mbinterval table,
int  max 
)
static

Definition at line 592 of file wchar.c.

References mbinterval::first, and mbinterval::last.

Referenced by ucs_wcwidth().

593 {
594  int min = 0;
595  int mid;
596 
597  if (ucs < table[0].first || ucs > table[max].last)
598  return 0;
599  while (max >= min)
600  {
601  mid = (min + max) / 2;
602  if (ucs > table[mid].last)
603  min = mid + 1;
604  else if (ucs < table[mid].first)
605  max = mid - 1;
606  else
607  return 1;
608  }
609 
610  return 0;
611 }

◆ pg_ascii2wchar_with_len()

static int pg_ascii2wchar_with_len ( const unsigned char *  from,
pg_wchar to,
int  len 
)
static

Definition at line 48 of file wchar.c.

49 {
50  int cnt = 0;
51 
52  while (len > 0 && *from)
53  {
54  *to++ = *from++;
55  len--;
56  cnt++;
57  }
58  *to = 0;
59  return cnt;
60 }

◆ pg_ascii_dsplen()

static int pg_ascii_dsplen ( const unsigned char *  s)
static

Definition at line 69 of file wchar.c.

Referenced by pg_big5_dsplen(), pg_euc_dsplen(), pg_euccn_dsplen(), pg_eucjp_dsplen(), pg_euctw_dsplen(), pg_gb18030_dsplen(), pg_gbk_dsplen(), pg_latin1_dsplen(), pg_sjis_dsplen(), and pg_uhc_dsplen().

70 {
71  if (*s == '\0')
72  return 0;
73  if (*s < 0x20 || *s == 0x7f)
74  return -1;
75 
76  return 1;
77 }

◆ pg_ascii_mblen()

static int pg_ascii_mblen ( const unsigned char *  s)
static

Definition at line 63 of file wchar.c.

64 {
65  return 1;
66 }

◆ pg_ascii_verifier()

static int pg_ascii_verifier ( const unsigned char *  s,
int  len 
)
static

Definition at line 1104 of file wchar.c.

1105 {
1106  return 1;
1107 }

◆ pg_big5_dsplen()

static int pg_big5_dsplen ( const unsigned char *  s)
static

Definition at line 980 of file wchar.c.

References IS_HIGHBIT_SET, and pg_ascii_dsplen().

981 {
982  int len;
983 
984  if (IS_HIGHBIT_SET(*s))
985  len = 2; /* kanji? */
986  else
987  len = pg_ascii_dsplen(s); /* should be ASCII */
988  return len;
989 }
static int pg_ascii_dsplen(const unsigned char *s)
Definition: wchar.c:69
#define IS_HIGHBIT_SET(ch)
Definition: c.h:1119

◆ pg_big5_mblen()

static int pg_big5_mblen ( const unsigned char *  s)
static

Definition at line 968 of file wchar.c.

References IS_HIGHBIT_SET.

Referenced by pg_big5_verifier().

969 {
970  int len;
971 
972  if (IS_HIGHBIT_SET(*s))
973  len = 2; /* kanji? */
974  else
975  len = 1; /* should be ASCII */
976  return len;
977 }
#define IS_HIGHBIT_SET(ch)
Definition: c.h:1119

◆ pg_big5_verifier()

static int pg_big5_verifier ( const unsigned char *  s,
int  len 
)
static

Definition at line 1323 of file wchar.c.

References pg_big5_mblen().

1324 {
1325  int l,
1326  mbl;
1327 
1328  l = mbl = pg_big5_mblen(s);
1329 
1330  if (len < l)
1331  return -1;
1332 
1333  while (--l > 0)
1334  {
1335  if (*++s == '\0')
1336  return -1;
1337  }
1338 
1339  return mbl;
1340 }
static int pg_big5_mblen(const unsigned char *s)
Definition: wchar.c:968

◆ pg_encoding_dsplen()

int pg_encoding_dsplen ( int  encoding,
const char *  mbstr 
)

Definition at line 1565 of file wchar.c.

References PG_SQL_ASCII, and PG_VALID_ENCODING.

Referenced by PQdsplen(), reportErrorPosition(), and surrogate_pair_to_codepoint().

1566 {
1567  return (PG_VALID_ENCODING(encoding) ?
1568  pg_wchar_table[encoding].dsplen((const unsigned char *) mbstr) :
1569  pg_wchar_table[PG_SQL_ASCII].dsplen((const unsigned char *) mbstr));
1570 }
#define PG_VALID_ENCODING(_enc)
Definition: pg_wchar.h:301
int32 encoding
Definition: pg_database.h:41
const pg_wchar_tbl pg_wchar_table[]
Definition: wchar.c:1505

◆ pg_encoding_max_length()

int pg_encoding_max_length ( int  encoding)

Definition at line 1589 of file wchar.c.

References Assert, encoding, pg_wchar_tbl::maxmblen, and PG_VALID_ENCODING.

Referenced by ascii(), chr(), pg_encoding_mbcliplen(), pg_verify_mbstr_len(), reportErrorPosition(), surrogate_pair_to_codepoint(), and type_maximum_size().

1590 {
1592 
1594 }
int maxmblen
Definition: pg_wchar.h:385
#define PG_VALID_ENCODING(_enc)
Definition: pg_wchar.h:301
#define Assert(condition)
Definition: c.h:738
int32 encoding
Definition: pg_database.h:41
const pg_wchar_tbl pg_wchar_table[]
Definition: wchar.c:1505

◆ pg_encoding_mblen()

int pg_encoding_mblen ( int  encoding,
const char *  mbstr 
)

Definition at line 1554 of file wchar.c.

References PG_SQL_ASCII, and PG_VALID_ENCODING.

Referenced by CopyAttributeOutCSV(), CopyAttributeOutText(), CopyReadLineText(), json_lex_string(), PQescapeInternal(), PQescapeStringInternal(), PQmblen(), report_invalid_encoding(), report_untranslatable_char(), reportErrorPosition(), surrogate_pair_to_codepoint(), and xml_is_document().

1555 {
1556  return (PG_VALID_ENCODING(encoding) ?
1557  pg_wchar_table[encoding].mblen((const unsigned char *) mbstr) :
1558  pg_wchar_table[PG_SQL_ASCII].mblen((const unsigned char *) mbstr));
1559 }
#define PG_VALID_ENCODING(_enc)
Definition: pg_wchar.h:301
int32 encoding
Definition: pg_database.h:41
const pg_wchar_tbl pg_wchar_table[]
Definition: wchar.c:1505

◆ pg_encoding_verifymb()

int pg_encoding_verifymb ( int  encoding,
const char *  mbstr,
int  len 
)

Definition at line 1578 of file wchar.c.

References PG_SQL_ASCII, and PG_VALID_ENCODING.

Referenced by big52mic(), euc_jis_20042shift_jis_2004(), euc_jp2mic(), euc_jp2sjis(), euc_kr2mic(), euc_tw2mic(), LocalToUtf(), mic2big5(), mic2euc_jp(), mic2euc_kr(), mic2euc_tw(), mic2sjis(), shift_jis_20042euc_jis_2004(), sjis2euc_jp(), and surrogate_pair_to_codepoint().

1579 {
1580  return (PG_VALID_ENCODING(encoding) ?
1581  pg_wchar_table[encoding].mbverify((const unsigned char *) mbstr, len) :
1582  pg_wchar_table[PG_SQL_ASCII].mbverify((const unsigned char *) mbstr, len));
1583 }
#define PG_VALID_ENCODING(_enc)
Definition: pg_wchar.h:301
int32 encoding
Definition: pg_database.h:41
const pg_wchar_tbl pg_wchar_table[]
Definition: wchar.c:1505

◆ pg_euc2wchar_with_len()

static int pg_euc2wchar_with_len ( const unsigned char *  from,
pg_wchar to,
int  len 
)
static

Definition at line 83 of file wchar.c.

References IS_HIGHBIT_SET, SS2, and SS3.

Referenced by pg_eucjp2wchar_with_len(), and pg_euckr2wchar_with_len().

84 {
85  int cnt = 0;
86 
87  while (len > 0 && *from)
88  {
89  if (*from == SS2 && len >= 2) /* JIS X 0201 (so called "1 byte
90  * KANA") */
91  {
92  from++;
93  *to = (SS2 << 8) | *from++;
94  len -= 2;
95  }
96  else if (*from == SS3 && len >= 3) /* JIS X 0212 KANJI */
97  {
98  from++;
99  *to = (SS3 << 16) | (*from++ << 8);
100  *to |= *from++;
101  len -= 3;
102  }
103  else if (IS_HIGHBIT_SET(*from) && len >= 2) /* JIS X 0208 KANJI */
104  {
105  *to = *from++ << 8;
106  *to |= *from++;
107  len -= 2;
108  }
109  else /* must be ASCII */
110  {
111  *to = *from++;
112  len--;
113  }
114  to++;
115  cnt++;
116  }
117  *to = 0;
118  return cnt;
119 }
#define SS3
Definition: pg_wchar.h:36
#define IS_HIGHBIT_SET(ch)
Definition: c.h:1119
#define SS2
Definition: pg_wchar.h:35

◆ pg_euc_dsplen()

static int pg_euc_dsplen ( const unsigned char *  s)
inlinestatic

Definition at line 138 of file wchar.c.

References IS_HIGHBIT_SET, pg_ascii_dsplen(), SS2, and SS3.

Referenced by pg_euckr_dsplen(), and pg_johab_dsplen().

139 {
140  int len;
141 
142  if (*s == SS2)
143  len = 2;
144  else if (*s == SS3)
145  len = 2;
146  else if (IS_HIGHBIT_SET(*s))
147  len = 2;
148  else
149  len = pg_ascii_dsplen(s);
150  return len;
151 }
static int pg_ascii_dsplen(const unsigned char *s)
Definition: wchar.c:69
#define SS3
Definition: pg_wchar.h:36
#define IS_HIGHBIT_SET(ch)
Definition: c.h:1119
#define SS2
Definition: pg_wchar.h:35

◆ pg_euc_mblen()

static int pg_euc_mblen ( const unsigned char *  s)
inlinestatic

Definition at line 122 of file wchar.c.

References IS_HIGHBIT_SET, SS2, and SS3.

Referenced by pg_eucjp_mblen(), pg_euckr_mblen(), and pg_johab_mblen().

123 {
124  int len;
125 
126  if (*s == SS2)
127  len = 2;
128  else if (*s == SS3)
129  len = 3;
130  else if (IS_HIGHBIT_SET(*s))
131  len = 2;
132  else
133  len = 1;
134  return len;
135 }
#define SS3
Definition: pg_wchar.h:36
#define IS_HIGHBIT_SET(ch)
Definition: c.h:1119
#define SS2
Definition: pg_wchar.h:35

◆ pg_euccn2wchar_with_len()

static int pg_euccn2wchar_with_len ( const unsigned char *  from,
pg_wchar to,
int  len 
)
static

Definition at line 210 of file wchar.c.

References IS_HIGHBIT_SET, SS2, and SS3.

211 {
212  int cnt = 0;
213 
214  while (len > 0 && *from)
215  {
216  if (*from == SS2 && len >= 3) /* code set 2 (unused?) */
217  {
218  from++;
219  *to = (SS2 << 16) | (*from++ << 8);
220  *to |= *from++;
221  len -= 3;
222  }
223  else if (*from == SS3 && len >= 3) /* code set 3 (unused ?) */
224  {
225  from++;
226  *to = (SS3 << 16) | (*from++ << 8);
227  *to |= *from++;
228  len -= 3;
229  }
230  else if (IS_HIGHBIT_SET(*from) && len >= 2) /* code set 1 */
231  {
232  *to = *from++ << 8;
233  *to |= *from++;
234  len -= 2;
235  }
236  else
237  {
238  *to = *from++;
239  len--;
240  }
241  to++;
242  cnt++;
243  }
244  *to = 0;
245  return cnt;
246 }
#define SS3
Definition: pg_wchar.h:36
#define IS_HIGHBIT_SET(ch)
Definition: c.h:1119
#define SS2
Definition: pg_wchar.h:35

◆ pg_euccn_dsplen()

static int pg_euccn_dsplen ( const unsigned char *  s)
static

Definition at line 261 of file wchar.c.

References IS_HIGHBIT_SET, and pg_ascii_dsplen().

262 {
263  int len;
264 
265  if (IS_HIGHBIT_SET(*s))
266  len = 2;
267  else
268  len = pg_ascii_dsplen(s);
269  return len;
270 }
static int pg_ascii_dsplen(const unsigned char *s)
Definition: wchar.c:69
#define IS_HIGHBIT_SET(ch)
Definition: c.h:1119

◆ pg_euccn_mblen()

static int pg_euccn_mblen ( const unsigned char *  s)
static

Definition at line 249 of file wchar.c.

References IS_HIGHBIT_SET.

250 {
251  int len;
252 
253  if (IS_HIGHBIT_SET(*s))
254  len = 2;
255  else
256  len = 1;
257  return len;
258 }
#define IS_HIGHBIT_SET(ch)
Definition: c.h:1119

◆ pg_eucjp2wchar_with_len()

static int pg_eucjp2wchar_with_len ( const unsigned char *  from,
pg_wchar to,
int  len 
)
static

Definition at line 157 of file wchar.c.

References pg_euc2wchar_with_len().

158 {
159  return pg_euc2wchar_with_len(from, to, len);
160 }
static int pg_euc2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
Definition: wchar.c:83

◆ pg_eucjp_dsplen()

static int pg_eucjp_dsplen ( const unsigned char *  s)
static

Definition at line 169 of file wchar.c.

References IS_HIGHBIT_SET, pg_ascii_dsplen(), SS2, and SS3.

170 {
171  int len;
172 
173  if (*s == SS2)
174  len = 1;
175  else if (*s == SS3)
176  len = 2;
177  else if (IS_HIGHBIT_SET(*s))
178  len = 2;
179  else
180  len = pg_ascii_dsplen(s);
181  return len;
182 }
static int pg_ascii_dsplen(const unsigned char *s)
Definition: wchar.c:69
#define SS3
Definition: pg_wchar.h:36
#define IS_HIGHBIT_SET(ch)
Definition: c.h:1119
#define SS2
Definition: pg_wchar.h:35

◆ pg_eucjp_mblen()

static int pg_eucjp_mblen ( const unsigned char *  s)
static

Definition at line 163 of file wchar.c.

References pg_euc_mblen().

164 {
165  return pg_euc_mblen(s);
166 }
static int pg_euc_mblen(const unsigned char *s)
Definition: wchar.c:122

◆ pg_eucjp_verifier()

static int pg_eucjp_verifier ( const unsigned char *  s,
int  len 
)
static

Definition at line 1112 of file wchar.c.

References IS_EUC_RANGE_VALID, IS_HIGHBIT_SET, SS2, and SS3.

1113 {
1114  int l;
1115  unsigned char c1,
1116  c2;
1117 
1118  c1 = *s++;
1119 
1120  switch (c1)
1121  {
1122  case SS2: /* JIS X 0201 */
1123  l = 2;
1124  if (l > len)
1125  return -1;
1126  c2 = *s++;
1127  if (c2 < 0xa1 || c2 > 0xdf)
1128  return -1;
1129  break;
1130 
1131  case SS3: /* JIS X 0212 */
1132  l = 3;
1133  if (l > len)
1134  return -1;
1135  c2 = *s++;
1136  if (!IS_EUC_RANGE_VALID(c2))
1137  return -1;
1138  c2 = *s++;
1139  if (!IS_EUC_RANGE_VALID(c2))
1140  return -1;
1141  break;
1142 
1143  default:
1144  if (IS_HIGHBIT_SET(c1)) /* JIS X 0208? */
1145  {
1146  l = 2;
1147  if (l > len)
1148  return -1;
1149  if (!IS_EUC_RANGE_VALID(c1))
1150  return -1;
1151  c2 = *s++;
1152  if (!IS_EUC_RANGE_VALID(c2))
1153  return -1;
1154  }
1155  else
1156  /* must be ASCII */
1157  {
1158  l = 1;
1159  }
1160  break;
1161  }
1162 
1163  return l;
1164 }
#define SS3
Definition: pg_wchar.h:36
#define IS_HIGHBIT_SET(ch)
Definition: c.h:1119
#define IS_EUC_RANGE_VALID(c)
Definition: wchar.c:1109
#define SS2
Definition: pg_wchar.h:35

◆ pg_euckr2wchar_with_len()

static int pg_euckr2wchar_with_len ( const unsigned char *  from,
pg_wchar to,
int  len 
)
static

Definition at line 188 of file wchar.c.

References pg_euc2wchar_with_len().

189 {
190  return pg_euc2wchar_with_len(from, to, len);
191 }
static int pg_euc2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
Definition: wchar.c:83

◆ pg_euckr_dsplen()

static int pg_euckr_dsplen ( const unsigned char *  s)
static

Definition at line 200 of file wchar.c.

References pg_euc_dsplen().

201 {
202  return pg_euc_dsplen(s);
203 }
static int pg_euc_dsplen(const unsigned char *s)
Definition: wchar.c:138

◆ pg_euckr_mblen()

static int pg_euckr_mblen ( const unsigned char *  s)
static

Definition at line 194 of file wchar.c.

References pg_euc_mblen().

195 {
196  return pg_euc_mblen(s);
197 }
static int pg_euc_mblen(const unsigned char *s)
Definition: wchar.c:122

◆ pg_euckr_verifier()

static int pg_euckr_verifier ( const unsigned char *  s,
int  len 
)
static

Definition at line 1167 of file wchar.c.

References IS_EUC_RANGE_VALID, and IS_HIGHBIT_SET.

1168 {
1169  int l;
1170  unsigned char c1,
1171  c2;
1172 
1173  c1 = *s++;
1174 
1175  if (IS_HIGHBIT_SET(c1))
1176  {
1177  l = 2;
1178  if (l > len)
1179  return -1;
1180  if (!IS_EUC_RANGE_VALID(c1))
1181  return -1;
1182  c2 = *s++;
1183  if (!IS_EUC_RANGE_VALID(c2))
1184  return -1;
1185  }
1186  else
1187  /* must be ASCII */
1188  {
1189  l = 1;
1190  }
1191 
1192  return l;
1193 }
#define IS_HIGHBIT_SET(ch)
Definition: c.h:1119
#define IS_EUC_RANGE_VALID(c)
Definition: wchar.c:1109

◆ pg_euctw2wchar_with_len()

static int pg_euctw2wchar_with_len ( const unsigned char *  from,
pg_wchar to,
int  len 
)
static

Definition at line 277 of file wchar.c.

References IS_HIGHBIT_SET, SS2, and SS3.

278 {
279  int cnt = 0;
280 
281  while (len > 0 && *from)
282  {
283  if (*from == SS2 && len >= 4) /* code set 2 */
284  {
285  from++;
286  *to = (((uint32) SS2) << 24) | (*from++ << 16);
287  *to |= *from++ << 8;
288  *to |= *from++;
289  len -= 4;
290  }
291  else if (*from == SS3 && len >= 3) /* code set 3 (unused?) */
292  {
293  from++;
294  *to = (SS3 << 16) | (*from++ << 8);
295  *to |= *from++;
296  len -= 3;
297  }
298  else if (IS_HIGHBIT_SET(*from) && len >= 2) /* code set 2 */
299  {
300  *to = *from++ << 8;
301  *to |= *from++;
302  len -= 2;
303  }
304  else
305  {
306  *to = *from++;
307  len--;
308  }
309  to++;
310  cnt++;
311  }
312  *to = 0;
313  return cnt;
314 }
#define SS3
Definition: pg_wchar.h:36
#define IS_HIGHBIT_SET(ch)
Definition: c.h:1119
unsigned int uint32
Definition: c.h:367
#define SS2
Definition: pg_wchar.h:35

◆ pg_euctw_dsplen()

static int pg_euctw_dsplen ( const unsigned char *  s)
static

Definition at line 333 of file wchar.c.

References IS_HIGHBIT_SET, pg_ascii_dsplen(), SS2, and SS3.

334 {
335  int len;
336 
337  if (*s == SS2)
338  len = 2;
339  else if (*s == SS3)
340  len = 2;
341  else if (IS_HIGHBIT_SET(*s))
342  len = 2;
343  else
344  len = pg_ascii_dsplen(s);
345  return len;
346 }
static int pg_ascii_dsplen(const unsigned char *s)
Definition: wchar.c:69
#define SS3
Definition: pg_wchar.h:36
#define IS_HIGHBIT_SET(ch)
Definition: c.h:1119
#define SS2
Definition: pg_wchar.h:35

◆ pg_euctw_mblen()

static int pg_euctw_mblen ( const unsigned char *  s)
static

Definition at line 317 of file wchar.c.

References IS_HIGHBIT_SET, SS2, and SS3.

318 {
319  int len;
320 
321  if (*s == SS2)
322  len = 4;
323  else if (*s == SS3)
324  len = 3;
325  else if (IS_HIGHBIT_SET(*s))
326  len = 2;
327  else
328  len = 1;
329  return len;
330 }
#define SS3
Definition: pg_wchar.h:36
#define IS_HIGHBIT_SET(ch)
Definition: c.h:1119
#define SS2
Definition: pg_wchar.h:35

◆ pg_euctw_verifier()

static int pg_euctw_verifier ( const unsigned char *  s,
int  len 
)
static

Definition at line 1199 of file wchar.c.

References IS_EUC_RANGE_VALID, IS_HIGHBIT_SET, SS2, and SS3.

1200 {
1201  int l;
1202  unsigned char c1,
1203  c2;
1204 
1205  c1 = *s++;
1206 
1207  switch (c1)
1208  {
1209  case SS2: /* CNS 11643 Plane 1-7 */
1210  l = 4;
1211  if (l > len)
1212  return -1;
1213  c2 = *s++;
1214  if (c2 < 0xa1 || c2 > 0xa7)
1215  return -1;
1216  c2 = *s++;
1217  if (!IS_EUC_RANGE_VALID(c2))
1218  return -1;
1219  c2 = *s++;
1220  if (!IS_EUC_RANGE_VALID(c2))
1221  return -1;
1222  break;
1223 
1224  case SS3: /* unused */
1225  return -1;
1226 
1227  default:
1228  if (IS_HIGHBIT_SET(c1)) /* CNS 11643 Plane 1 */
1229  {
1230  l = 2;
1231  if (l > len)
1232  return -1;
1233  /* no further range check on c1? */
1234  c2 = *s++;
1235  if (!IS_EUC_RANGE_VALID(c2))
1236  return -1;
1237  }
1238  else
1239  /* must be ASCII */
1240  {
1241  l = 1;
1242  }
1243  break;
1244  }
1245  return l;
1246 }
#define SS3
Definition: pg_wchar.h:36
#define IS_HIGHBIT_SET(ch)
Definition: c.h:1119
#define IS_EUC_RANGE_VALID(c)
Definition: wchar.c:1109
#define SS2
Definition: pg_wchar.h:35

◆ pg_gb18030_dsplen()

static int pg_gb18030_dsplen ( const unsigned char *  s)
static

Definition at line 1075 of file wchar.c.

References IS_HIGHBIT_SET, and pg_ascii_dsplen().

1076 {
1077  int len;
1078 
1079  if (IS_HIGHBIT_SET(*s))
1080  len = 2;
1081  else
1082  len = pg_ascii_dsplen(s); /* ASCII */
1083  return len;
1084 }
static int pg_ascii_dsplen(const unsigned char *s)
Definition: wchar.c:69
#define IS_HIGHBIT_SET(ch)
Definition: c.h:1119

◆ pg_gb18030_mblen()

static int pg_gb18030_mblen ( const unsigned char *  s)
static

Definition at line 1061 of file wchar.c.

References IS_HIGHBIT_SET.

1062 {
1063  int len;
1064 
1065  if (!IS_HIGHBIT_SET(*s))
1066  len = 1; /* ASCII */
1067  else if (*(s + 1) >= 0x30 && *(s + 1) <= 0x39)
1068  len = 4;
1069  else
1070  len = 2;
1071  return len;
1072 }
#define IS_HIGHBIT_SET(ch)
Definition: c.h:1119

◆ pg_gb18030_verifier()

static int pg_gb18030_verifier ( const unsigned char *  s,
int  len 
)
static

Definition at line 1383 of file wchar.c.

References IS_HIGHBIT_SET.

1384 {
1385  int l;
1386 
1387  if (!IS_HIGHBIT_SET(*s))
1388  l = 1; /* ASCII */
1389  else if (len >= 4 && *(s + 1) >= 0x30 && *(s + 1) <= 0x39)
1390  {
1391  /* Should be 4-byte, validate remaining bytes */
1392  if (*s >= 0x81 && *s <= 0xfe &&
1393  *(s + 2) >= 0x81 && *(s + 2) <= 0xfe &&
1394  *(s + 3) >= 0x30 && *(s + 3) <= 0x39)
1395  l = 4;
1396  else
1397  l = -1;
1398  }
1399  else if (len >= 2 && *s >= 0x81 && *s <= 0xfe)
1400  {
1401  /* Should be 2-byte, validate */
1402  if ((*(s + 1) >= 0x40 && *(s + 1) <= 0x7e) ||
1403  (*(s + 1) >= 0x80 && *(s + 1) <= 0xfe))
1404  l = 2;
1405  else
1406  l = -1;
1407  }
1408  else
1409  l = -1;
1410  return l;
1411 }
#define IS_HIGHBIT_SET(ch)
Definition: c.h:1119

◆ pg_gbk_dsplen()

static int pg_gbk_dsplen ( const unsigned char *  s)
static

Definition at line 1007 of file wchar.c.

References IS_HIGHBIT_SET, and pg_ascii_dsplen().

1008 {
1009  int len;
1010 
1011  if (IS_HIGHBIT_SET(*s))
1012  len = 2; /* kanji? */
1013  else
1014  len = pg_ascii_dsplen(s); /* should be ASCII */
1015  return len;
1016 }
static int pg_ascii_dsplen(const unsigned char *s)
Definition: wchar.c:69
#define IS_HIGHBIT_SET(ch)
Definition: c.h:1119

◆ pg_gbk_mblen()

static int pg_gbk_mblen ( const unsigned char *  s)
static

Definition at line 995 of file wchar.c.

References IS_HIGHBIT_SET.

Referenced by pg_gbk_verifier().

996 {
997  int len;
998 
999  if (IS_HIGHBIT_SET(*s))
1000  len = 2; /* kanji? */
1001  else
1002  len = 1; /* should be ASCII */
1003  return len;
1004 }
#define IS_HIGHBIT_SET(ch)
Definition: c.h:1119

◆ pg_gbk_verifier()

static int pg_gbk_verifier ( const unsigned char *  s,
int  len 
)
static

Definition at line 1343 of file wchar.c.

References pg_gbk_mblen().

1344 {
1345  int l,
1346  mbl;
1347 
1348  l = mbl = pg_gbk_mblen(s);
1349 
1350  if (len < l)
1351  return -1;
1352 
1353  while (--l > 0)
1354  {
1355  if (*++s == '\0')
1356  return -1;
1357  }
1358 
1359  return mbl;
1360 }
static int pg_gbk_mblen(const unsigned char *s)
Definition: wchar.c:995

◆ pg_johab_dsplen()

static int pg_johab_dsplen ( const unsigned char *  s)
static

Definition at line 407 of file wchar.c.

References pg_euc_dsplen().

408 {
409  return pg_euc_dsplen(s);
410 }
static int pg_euc_dsplen(const unsigned char *s)
Definition: wchar.c:138

◆ pg_johab_mblen()

static int pg_johab_mblen ( const unsigned char *  s)
static

Definition at line 401 of file wchar.c.

References pg_euc_mblen().

Referenced by pg_johab_verifier().

402 {
403  return pg_euc_mblen(s);
404 }
static int pg_euc_mblen(const unsigned char *s)
Definition: wchar.c:122

◆ pg_johab_verifier()

static int pg_johab_verifier ( const unsigned char *  s,
int  len 
)
static

Definition at line 1249 of file wchar.c.

References IS_EUC_RANGE_VALID, IS_HIGHBIT_SET, and pg_johab_mblen().

1250 {
1251  int l,
1252  mbl;
1253  unsigned char c;
1254 
1255  l = mbl = pg_johab_mblen(s);
1256 
1257  if (len < l)
1258  return -1;
1259 
1260  if (!IS_HIGHBIT_SET(*s))
1261  return mbl;
1262 
1263  while (--l > 0)
1264  {
1265  c = *++s;
1266  if (!IS_EUC_RANGE_VALID(c))
1267  return -1;
1268  }
1269  return mbl;
1270 }
#define IS_HIGHBIT_SET(ch)
Definition: c.h:1119
char * c
static int pg_johab_mblen(const unsigned char *s)
Definition: wchar.c:401
#define IS_EUC_RANGE_VALID(c)
Definition: wchar.c:1109

◆ pg_latin12wchar_with_len()

static int pg_latin12wchar_with_len ( const unsigned char *  from,
pg_wchar to,
int  len 
)
static

Definition at line 885 of file wchar.c.

886 {
887  int cnt = 0;
888 
889  while (len > 0 && *from)
890  {
891  *to++ = *from++;
892  len--;
893  cnt++;
894  }
895  *to = 0;
896  return cnt;
897 }

◆ pg_latin1_dsplen()

static int pg_latin1_dsplen ( const unsigned char *  s)
static

Definition at line 928 of file wchar.c.

References pg_ascii_dsplen().

929 {
930  return pg_ascii_dsplen(s);
931 }
static int pg_ascii_dsplen(const unsigned char *s)
Definition: wchar.c:69

◆ pg_latin1_mblen()

static int pg_latin1_mblen ( const unsigned char *  s)
static

Definition at line 922 of file wchar.c.

923 {
924  return 1;
925 }

◆ pg_latin1_verifier()

static int pg_latin1_verifier ( const unsigned char *  s,
int  len 
)
static

Definition at line 1294 of file wchar.c.

1295 {
1296  return 1;
1297 }

◆ pg_mule2wchar_with_len()

static int pg_mule2wchar_with_len ( const unsigned char *  from,
pg_wchar to,
int  len 
)
static

Definition at line 720 of file wchar.c.

References IS_LC1, IS_LC2, IS_LCPRV1, and IS_LCPRV2.

721 {
722  int cnt = 0;
723 
724  while (len > 0 && *from)
725  {
726  if (IS_LC1(*from) && len >= 2)
727  {
728  *to = *from++ << 16;
729  *to |= *from++;
730  len -= 2;
731  }
732  else if (IS_LCPRV1(*from) && len >= 3)
733  {
734  from++;
735  *to = *from++ << 16;
736  *to |= *from++;
737  len -= 3;
738  }
739  else if (IS_LC2(*from) && len >= 3)
740  {
741  *to = *from++ << 16;
742  *to |= *from++ << 8;
743  *to |= *from++;
744  len -= 3;
745  }
746  else if (IS_LCPRV2(*from) && len >= 4)
747  {
748  from++;
749  *to = *from++ << 16;
750  *to |= *from++ << 8;
751  *to |= *from++;
752  len -= 4;
753  }
754  else
755  { /* assume ASCII */
756  *to = (unsigned char) *from++;
757  len--;
758  }
759  to++;
760  cnt++;
761  }
762  *to = 0;
763  return cnt;
764 }
#define IS_LC2(c)
Definition: pg_wchar.h:144
#define IS_LCPRV2(c)
Definition: pg_wchar.h:164
#define IS_LCPRV1(c)
Definition: pg_wchar.h:152
#define IS_LC1(c)
Definition: pg_wchar.h:123

◆ pg_mule_dsplen()

static int pg_mule_dsplen ( const unsigned char *  s)
static

Definition at line 857 of file wchar.c.

References IS_LC1, IS_LC2, IS_LCPRV1, and IS_LCPRV2.

858 {
859  int len;
860 
861  /*
862  * Note: it's not really appropriate to assume that all multibyte charsets
863  * are double-wide on screen. But this seems an okay approximation for
864  * the MULE charsets we currently support.
865  */
866 
867  if (IS_LC1(*s))
868  len = 1;
869  else if (IS_LCPRV1(*s))
870  len = 1;
871  else if (IS_LC2(*s))
872  len = 2;
873  else if (IS_LCPRV2(*s))
874  len = 2;
875  else
876  len = 1; /* assume ASCII */
877 
878  return len;
879 }
#define IS_LC2(c)
Definition: pg_wchar.h:144
#define IS_LCPRV2(c)
Definition: pg_wchar.h:164
#define IS_LCPRV1(c)
Definition: pg_wchar.h:152
#define IS_LC1(c)
Definition: pg_wchar.h:123

◆ pg_mule_mblen()

int pg_mule_mblen ( const unsigned char *  s)

Definition at line 839 of file wchar.c.

References IS_LC1, IS_LC2, IS_LCPRV1, and IS_LCPRV2.

Referenced by mic2latin(), mic2latin_with_table(), pg_mule_verifier(), and surrogate_pair_to_codepoint().

840 {
841  int len;
842 
843  if (IS_LC1(*s))
844  len = 2;
845  else if (IS_LCPRV1(*s))
846  len = 3;
847  else if (IS_LC2(*s))
848  len = 3;
849  else if (IS_LCPRV2(*s))
850  len = 4;
851  else
852  len = 1; /* assume ASCII */
853  return len;
854 }
#define IS_LC2(c)
Definition: pg_wchar.h:144
#define IS_LCPRV2(c)
Definition: pg_wchar.h:164
#define IS_LCPRV1(c)
Definition: pg_wchar.h:152
#define IS_LC1(c)
Definition: pg_wchar.h:123

◆ pg_mule_verifier()

static int pg_mule_verifier ( const unsigned char *  s,
int  len 
)
static

Definition at line 1273 of file wchar.c.

References IS_HIGHBIT_SET, and pg_mule_mblen().

1274 {
1275  int l,
1276  mbl;
1277  unsigned char c;
1278 
1279  l = mbl = pg_mule_mblen(s);
1280 
1281  if (len < l)
1282  return -1;
1283 
1284  while (--l > 0)
1285  {
1286  c = *++s;
1287  if (!IS_HIGHBIT_SET(c))
1288  return -1;
1289  }
1290  return mbl;
1291 }
int pg_mule_mblen(const unsigned char *s)
Definition: wchar.c:839
#define IS_HIGHBIT_SET(ch)
Definition: c.h:1119
char * c

◆ pg_sjis_dsplen()

static int pg_sjis_dsplen ( const unsigned char *  s)
static

Definition at line 951 of file wchar.c.

References IS_HIGHBIT_SET, and pg_ascii_dsplen().

952 {
953  int len;
954 
955  if (*s >= 0xa1 && *s <= 0xdf)
956  len = 1; /* 1 byte kana? */
957  else if (IS_HIGHBIT_SET(*s))
958  len = 2; /* kanji? */
959  else
960  len = pg_ascii_dsplen(s); /* should be ASCII */
961  return len;
962 }
static int pg_ascii_dsplen(const unsigned char *s)
Definition: wchar.c:69
#define IS_HIGHBIT_SET(ch)
Definition: c.h:1119

◆ pg_sjis_mblen()

static int pg_sjis_mblen ( const unsigned char *  s)
static

Definition at line 937 of file wchar.c.

References IS_HIGHBIT_SET.

Referenced by pg_sjis_verifier().

938 {
939  int len;
940 
941  if (*s >= 0xa1 && *s <= 0xdf)
942  len = 1; /* 1 byte kana? */
943  else if (IS_HIGHBIT_SET(*s))
944  len = 2; /* kanji? */
945  else
946  len = 1; /* should be ASCII */
947  return len;
948 }
#define IS_HIGHBIT_SET(ch)
Definition: c.h:1119

◆ pg_sjis_verifier()

static int pg_sjis_verifier ( const unsigned char *  s,
int  len 
)
static

Definition at line 1300 of file wchar.c.

References ISSJISHEAD, ISSJISTAIL, and pg_sjis_mblen().

1301 {
1302  int l,
1303  mbl;
1304  unsigned char c1,
1305  c2;
1306 
1307  l = mbl = pg_sjis_mblen(s);
1308 
1309  if (len < l)
1310  return -1;
1311 
1312  if (l == 1) /* pg_sjis_mblen already verified it */
1313  return mbl;
1314 
1315  c1 = *s++;
1316  c2 = *s;
1317  if (!ISSJISHEAD(c1) || !ISSJISTAIL(c2))
1318  return -1;
1319  return mbl;
1320 }
#define ISSJISTAIL(c)
Definition: pg_wchar.h:42
static int pg_sjis_mblen(const unsigned char *s)
Definition: wchar.c:937
#define ISSJISHEAD(c)
Definition: pg_wchar.h:41

◆ pg_uhc_dsplen()

static int pg_uhc_dsplen ( const unsigned char *  s)
static

Definition at line 1034 of file wchar.c.

References IS_HIGHBIT_SET, and pg_ascii_dsplen().

1035 {
1036  int len;
1037 
1038  if (IS_HIGHBIT_SET(*s))
1039  len = 2; /* 2byte? */
1040  else
1041  len = pg_ascii_dsplen(s); /* should be ASCII */
1042  return len;
1043 }
static int pg_ascii_dsplen(const unsigned char *s)
Definition: wchar.c:69
#define IS_HIGHBIT_SET(ch)
Definition: c.h:1119

◆ pg_uhc_mblen()

static int pg_uhc_mblen ( const unsigned char *  s)
static

Definition at line 1022 of file wchar.c.

References IS_HIGHBIT_SET.

Referenced by pg_uhc_verifier().

1023 {
1024  int len;
1025 
1026  if (IS_HIGHBIT_SET(*s))
1027  len = 2; /* 2byte? */
1028  else
1029  len = 1; /* should be ASCII */
1030  return len;
1031 }
#define IS_HIGHBIT_SET(ch)
Definition: c.h:1119

◆ pg_uhc_verifier()

static int pg_uhc_verifier ( const unsigned char *  s,
int  len 
)
static

Definition at line 1363 of file wchar.c.

References pg_uhc_mblen().

1364 {
1365  int l,
1366  mbl;
1367 
1368  l = mbl = pg_uhc_mblen(s);
1369 
1370  if (len < l)
1371  return -1;
1372 
1373  while (--l > 0)
1374  {
1375  if (*++s == '\0')
1376  return -1;
1377  }
1378 
1379  return mbl;
1380 }
static int pg_uhc_mblen(const unsigned char *s)
Definition: wchar.c:1022

◆ pg_utf2wchar_with_len()

static int pg_utf2wchar_with_len ( const unsigned char *  from,
pg_wchar to,
int  len 
)
static

Definition at line 419 of file wchar.c.

420 {
421  int cnt = 0;
422  uint32 c1,
423  c2,
424  c3,
425  c4;
426 
427  while (len > 0 && *from)
428  {
429  if ((*from & 0x80) == 0)
430  {
431  *to = *from++;
432  len--;
433  }
434  else if ((*from & 0xe0) == 0xc0)
435  {
436  if (len < 2)
437  break; /* drop trailing incomplete char */
438  c1 = *from++ & 0x1f;
439  c2 = *from++ & 0x3f;
440  *to = (c1 << 6) | c2;
441  len -= 2;
442  }
443  else if ((*from & 0xf0) == 0xe0)
444  {
445  if (len < 3)
446  break; /* drop trailing incomplete char */
447  c1 = *from++ & 0x0f;
448  c2 = *from++ & 0x3f;
449  c3 = *from++ & 0x3f;
450  *to = (c1 << 12) | (c2 << 6) | c3;
451  len -= 3;
452  }
453  else if ((*from & 0xf8) == 0xf0)
454  {
455  if (len < 4)
456  break; /* drop trailing incomplete char */
457  c1 = *from++ & 0x07;
458  c2 = *from++ & 0x3f;
459  c3 = *from++ & 0x3f;
460  c4 = *from++ & 0x3f;
461  *to = (c1 << 18) | (c2 << 12) | (c3 << 6) | c4;
462  len -= 4;
463  }
464  else
465  {
466  /* treat a bogus char as length 1; not ours to raise error */
467  *to = *from++;
468  len--;
469  }
470  to++;
471  cnt++;
472  }
473  *to = 0;
474  return cnt;
475 }
unsigned int uint32
Definition: c.h:367

◆ pg_utf8_islegal()

bool pg_utf8_islegal ( const unsigned char *  source,
int  length 
)

Definition at line 1442 of file wchar.c.

Referenced by chr(), pg_utf8_string_len(), pg_utf8_verifier(), surrogate_pair_to_codepoint(), utf8_to_iso8859_1(), and UtfToLocal().

1443 {
1444  unsigned char a;
1445 
1446  switch (length)
1447  {
1448  default:
1449  /* reject lengths 5 and 6 for now */
1450  return false;
1451  case 4:
1452  a = source[3];
1453  if (a < 0x80 || a > 0xBF)
1454  return false;
1455  /* FALL THRU */
1456  case 3:
1457  a = source[2];
1458  if (a < 0x80 || a > 0xBF)
1459  return false;
1460  /* FALL THRU */
1461  case 2:
1462  a = source[1];
1463  switch (*source)
1464  {
1465  case 0xE0:
1466  if (a < 0xA0 || a > 0xBF)
1467  return false;
1468  break;
1469  case 0xED:
1470  if (a < 0x80 || a > 0x9F)
1471  return false;
1472  break;
1473  case 0xF0:
1474  if (a < 0x90 || a > 0xBF)
1475  return false;
1476  break;
1477  case 0xF4:
1478  if (a < 0x80 || a > 0x8F)
1479  return false;
1480  break;
1481  default:
1482  if (a < 0x80 || a > 0xBF)
1483  return false;
1484  break;
1485  }
1486  /* FALL THRU */
1487  case 1:
1488  a = *source;
1489  if (a >= 0x80 && a < 0xC2)
1490  return false;
1491  if (a > 0xF4)
1492  return false;
1493  break;
1494  }
1495  return true;
1496 }

◆ pg_utf8_verifier()

static int pg_utf8_verifier ( const unsigned char *  s,
int  len 
)
static

Definition at line 1414 of file wchar.c.

References pg_utf8_islegal(), and pg_utf_mblen().

1415 {
1416  int l = pg_utf_mblen(s);
1417 
1418  if (len < l)
1419  return -1;
1420 
1421  if (!pg_utf8_islegal(s, l))
1422  return -1;
1423 
1424  return l;
1425 }
bool pg_utf8_islegal(const unsigned char *source, int length)
Definition: wchar.c:1442
int pg_utf_mblen(const unsigned char *s)
Definition: wchar.c:549

◆ pg_utf_dsplen()

static int pg_utf_dsplen ( const unsigned char *  s)
static

Definition at line 708 of file wchar.c.

References ucs_wcwidth(), and utf8_to_unicode().

709 {
710  return ucs_wcwidth(utf8_to_unicode(s));
711 }
static int ucs_wcwidth(pg_wchar ucs)
Definition: wchar.c:645
pg_wchar utf8_to_unicode(const unsigned char *c)
Definition: wchar.c:686

◆ pg_utf_mblen()

int pg_utf_mblen ( const unsigned char *  s)

Definition at line 549 of file wchar.c.

Referenced by json_lex_string(), pg_saslprep(), pg_unicode_to_server(), pg_utf8_string_len(), pg_utf8_verifier(), pg_wchar2utf_with_len(), surrogate_pair_to_codepoint(), unicode_is_normalized(), unicode_normalize_func(), utf8_to_iso8859_1(), and UtfToLocal().

550 {
551  int len;
552 
553  if ((*s & 0x80) == 0)
554  len = 1;
555  else if ((*s & 0xe0) == 0xc0)
556  len = 2;
557  else if ((*s & 0xf0) == 0xe0)
558  len = 3;
559  else if ((*s & 0xf8) == 0xf0)
560  len = 4;
561 #ifdef NOT_USED
562  else if ((*s & 0xfc) == 0xf8)
563  len = 5;
564  else if ((*s & 0xfe) == 0xfc)
565  len = 6;
566 #endif
567  else
568  len = 1;
569  return len;
570 }

◆ pg_wchar2euc_with_len()

static int pg_wchar2euc_with_len ( const pg_wchar from,
unsigned char *  to,
int  len 
)
static

Definition at line 355 of file wchar.c.

356 {
357  int cnt = 0;
358 
359  while (len > 0 && *from)
360  {
361  unsigned char c;
362 
363  if ((c = (*from >> 24)))
364  {
365  *to++ = c;
366  *to++ = (*from >> 16) & 0xff;
367  *to++ = (*from >> 8) & 0xff;
368  *to++ = *from & 0xff;
369  cnt += 4;
370  }
371  else if ((c = (*from >> 16)))
372  {
373  *to++ = c;
374  *to++ = (*from >> 8) & 0xff;
375  *to++ = *from & 0xff;
376  cnt += 3;
377  }
378  else if ((c = (*from >> 8)))
379  {
380  *to++ = c;
381  *to++ = *from & 0xff;
382  cnt += 2;
383  }
384  else
385  {
386  *to++ = *from;
387  cnt++;
388  }
389  from++;
390  len--;
391  }
392  *to = 0;
393  return cnt;
394 }
char * c

◆ pg_wchar2mule_with_len()

static int pg_wchar2mule_with_len ( const pg_wchar from,
unsigned char *  to,
int  len 
)
static

Definition at line 773 of file wchar.c.

References IS_LC1, IS_LC2, IS_LCPRV1_A_RANGE, IS_LCPRV1_B_RANGE, IS_LCPRV2_A_RANGE, IS_LCPRV2_B_RANGE, LCPRV1_A, LCPRV1_B, LCPRV2_A, and LCPRV2_B.

774 {
775  int cnt = 0;
776 
777  while (len > 0 && *from)
778  {
779  unsigned char lb;
780 
781  lb = (*from >> 16) & 0xff;
782  if (IS_LC1(lb))
783  {
784  *to++ = lb;
785  *to++ = *from & 0xff;
786  cnt += 2;
787  }
788  else if (IS_LC2(lb))
789  {
790  *to++ = lb;
791  *to++ = (*from >> 8) & 0xff;
792  *to++ = *from & 0xff;
793  cnt += 3;
794  }
795  else if (IS_LCPRV1_A_RANGE(lb))
796  {
797  *to++ = LCPRV1_A;
798  *to++ = lb;
799  *to++ = *from & 0xff;
800  cnt += 3;
801  }
802  else if (IS_LCPRV1_B_RANGE(lb))
803  {
804  *to++ = LCPRV1_B;
805  *to++ = lb;
806  *to++ = *from & 0xff;
807  cnt += 3;
808  }
809  else if (IS_LCPRV2_A_RANGE(lb))
810  {
811  *to++ = LCPRV2_A;
812  *to++ = lb;
813  *to++ = (*from >> 8) & 0xff;
814  *to++ = *from & 0xff;
815  cnt += 4;
816  }
817  else if (IS_LCPRV2_B_RANGE(lb))
818  {
819  *to++ = LCPRV2_B;
820  *to++ = lb;
821  *to++ = (*from >> 8) & 0xff;
822  *to++ = *from & 0xff;
823  cnt += 4;
824  }
825  else
826  {
827  *to++ = *from & 0xff;
828  cnt += 1;
829  }
830  from++;
831  len--;
832  }
833  *to = 0;
834  return cnt;
835 }
#define IS_LC2(c)
Definition: pg_wchar.h:144
#define LCPRV1_A
Definition: pg_wchar.h:150
#define LCPRV1_B
Definition: pg_wchar.h:151
#define IS_LCPRV2_A_RANGE(c)
Definition: pg_wchar.h:165
#define LCPRV2_B
Definition: pg_wchar.h:163
#define LCPRV2_A
Definition: pg_wchar.h:162
#define IS_LCPRV2_B_RANGE(c)
Definition: pg_wchar.h:167
#define IS_LC1(c)
Definition: pg_wchar.h:123
#define IS_LCPRV1_A_RANGE(c)
Definition: pg_wchar.h:153
#define IS_LCPRV1_B_RANGE(c)
Definition: pg_wchar.h:155

◆ pg_wchar2single_with_len()

static int pg_wchar2single_with_len ( const pg_wchar from,
unsigned char *  to,
int  len 
)
static

Definition at line 907 of file wchar.c.

908 {
909  int cnt = 0;
910 
911  while (len > 0 && *from)
912  {
913  *to++ = *from++;
914  len--;
915  cnt++;
916  }
917  *to = 0;
918  return cnt;
919 }

◆ pg_wchar2utf_with_len()

static int pg_wchar2utf_with_len ( const pg_wchar from,
unsigned char *  to,
int  len 
)
static

Definition at line 518 of file wchar.c.

References pg_utf_mblen(), and unicode_to_utf8().

519 {
520  int cnt = 0;
521 
522  while (len > 0 && *from)
523  {
524  int char_len;
525 
526  unicode_to_utf8(*from, to);
527  char_len = pg_utf_mblen(to);
528  cnt += char_len;
529  to += char_len;
530  from++;
531  len--;
532  }
533  *to = 0;
534  return cnt;
535 }
unsigned char * unicode_to_utf8(pg_wchar c, unsigned char *utf8string)
Definition: wchar.c:483
int pg_utf_mblen(const unsigned char *s)
Definition: wchar.c:549

◆ ucs_wcwidth()

static int ucs_wcwidth ( pg_wchar  ucs)
static

Definition at line 645 of file wchar.c.

References combining, and mbbisearch().

Referenced by pg_utf_dsplen().

646 {
648 
649  /* test for 8-bit control characters */
650  if (ucs == 0)
651  return 0;
652 
653  if (ucs < 0x20 || (ucs >= 0x7f && ucs < 0xa0) || ucs > 0x0010ffff)
654  return -1;
655 
656  /* binary search in table of non-spacing characters */
657  if (mbbisearch(ucs, combining,
658  sizeof(combining) / sizeof(struct mbinterval) - 1))
659  return 0;
660 
661  /*
662  * if we arrive here, ucs is not a combining or C0/C1 control character
663  */
664 
665  return 1 +
666  (ucs >= 0x1100 &&
667  (ucs <= 0x115f || /* Hangul Jamo init. consonants */
668  (ucs >= 0x2e80 && ucs <= 0xa4cf && (ucs & ~0x0011) != 0x300a &&
669  ucs != 0x303f) || /* CJK ... Yi */
670  (ucs >= 0xac00 && ucs <= 0xd7a3) || /* Hangul Syllables */
671  (ucs >= 0xf900 && ucs <= 0xfaff) || /* CJK Compatibility
672  * Ideographs */
673  (ucs >= 0xfe30 && ucs <= 0xfe6f) || /* CJK Compatibility Forms */
674  (ucs >= 0xff00 && ucs <= 0xff5f) || /* Fullwidth Forms */
675  (ucs >= 0xffe0 && ucs <= 0xffe6) ||
676  (ucs >= 0x20000 && ucs <= 0x2ffff)));
677 }
static int mbbisearch(pg_wchar ucs, const struct mbinterval *table, int max)
Definition: wchar.c:592
static const struct mbinterval combining[]

◆ unicode_to_utf8()

unsigned char* unicode_to_utf8 ( pg_wchar  c,
unsigned char *  utf8string 
)

Definition at line 483 of file wchar.c.

Referenced by json_lex_string(), pg_saslprep(), pg_unicode_to_server(), pg_wchar2utf_with_len(), surrogate_pair_to_codepoint(), and unicode_normalize_func().

484 {
485  if (c <= 0x7F)
486  {
487  utf8string[0] = c;
488  }
489  else if (c <= 0x7FF)
490  {
491  utf8string[0] = 0xC0 | ((c >> 6) & 0x1F);
492  utf8string[1] = 0x80 | (c & 0x3F);
493  }
494  else if (c <= 0xFFFF)
495  {
496  utf8string[0] = 0xE0 | ((c >> 12) & 0x0F);
497  utf8string[1] = 0x80 | ((c >> 6) & 0x3F);
498  utf8string[2] = 0x80 | (c & 0x3F);
499  }
500  else
501  {
502  utf8string[0] = 0xF0 | ((c >> 18) & 0x07);
503  utf8string[1] = 0x80 | ((c >> 12) & 0x3F);
504  utf8string[2] = 0x80 | ((c >> 6) & 0x3F);
505  utf8string[3] = 0x80 | (c & 0x3F);
506  }
507 
508  return utf8string;
509 }
char * c

◆ utf8_to_unicode()

pg_wchar utf8_to_unicode ( const unsigned char *  c)

Definition at line 686 of file wchar.c.

Referenced by pg_saslprep(), pg_utf_dsplen(), surrogate_pair_to_codepoint(), unicode_is_normalized(), and unicode_normalize_func().

687 {
688  if ((*c & 0x80) == 0)
689  return (pg_wchar) c[0];
690  else if ((*c & 0xe0) == 0xc0)
691  return (pg_wchar) (((c[0] & 0x1f) << 6) |
692  (c[1] & 0x3f));
693  else if ((*c & 0xf0) == 0xe0)
694  return (pg_wchar) (((c[0] & 0x0f) << 12) |
695  ((c[1] & 0x3f) << 6) |
696  (c[2] & 0x3f));
697  else if ((*c & 0xf8) == 0xf0)
698  return (pg_wchar) (((c[0] & 0x07) << 18) |
699  ((c[1] & 0x3f) << 12) |
700  ((c[2] & 0x3f) << 6) |
701  (c[3] & 0x3f));
702  else
703  /* that is an invalid code on purpose */
704  return 0xffffffff;
705 }
char * c
unsigned int pg_wchar
Definition: mbprint.c:31

Variable Documentation

◆ pg_wchar_table