PostgreSQL Source Code  git master
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros
wchar.c File Reference
#include "postgres.h"
#include "mb/pg_wchar.h"
Include dependency graph for wchar.c:

Go to the source code of this file.

Data Structures

struct  mbinterval
 

Macros

#define IS_EUC_RANGE_VALID(c)   ((c) >= 0xa1 && (c) <= 0xfe)
 
#define pg_euccn_verifier   pg_euckr_verifier
 

Functions

static int pg_ascii2wchar_with_len (const unsigned char *from, pg_wchar *to, int len)
 
static int pg_ascii_mblen (const unsigned char *s)
 
static int pg_ascii_dsplen (const unsigned char *s)
 
static int pg_euc2wchar_with_len (const unsigned char *from, pg_wchar *to, int len)
 
static int pg_euc_mblen (const unsigned char *s)
 
static int pg_euc_dsplen (const unsigned char *s)
 
static int pg_eucjp2wchar_with_len (const unsigned char *from, pg_wchar *to, int len)
 
static int pg_eucjp_mblen (const unsigned char *s)
 
static int pg_eucjp_dsplen (const unsigned char *s)
 
static int pg_euckr2wchar_with_len (const unsigned char *from, pg_wchar *to, int len)
 
static int pg_euckr_mblen (const unsigned char *s)
 
static int pg_euckr_dsplen (const unsigned char *s)
 
static int pg_euccn2wchar_with_len (const unsigned char *from, pg_wchar *to, int len)
 
static int pg_euccn_mblen (const unsigned char *s)
 
static int pg_euccn_dsplen (const unsigned char *s)
 
static int pg_euctw2wchar_with_len (const unsigned char *from, pg_wchar *to, int len)
 
static int pg_euctw_mblen (const unsigned char *s)
 
static int pg_euctw_dsplen (const unsigned char *s)
 
static int pg_wchar2euc_with_len (const pg_wchar *from, unsigned char *to, int len)
 
static int pg_johab_mblen (const unsigned char *s)
 
static int pg_johab_dsplen (const unsigned char *s)
 
static int pg_utf2wchar_with_len (const unsigned char *from, pg_wchar *to, int len)
 
unsigned char * unicode_to_utf8 (pg_wchar c, unsigned char *utf8string)
 
static int pg_wchar2utf_with_len (const pg_wchar *from, unsigned char *to, int len)
 
int pg_utf_mblen (const unsigned char *s)
 
static int mbbisearch (pg_wchar ucs, const struct mbinterval *table, int max)
 
static int ucs_wcwidth (pg_wchar ucs)
 
pg_wchar utf8_to_unicode (const unsigned char *c)
 
static int pg_utf_dsplen (const unsigned char *s)
 
static int pg_mule2wchar_with_len (const unsigned char *from, pg_wchar *to, int len)
 
static int pg_wchar2mule_with_len (const pg_wchar *from, unsigned char *to, int len)
 
int pg_mule_mblen (const unsigned char *s)
 
static int pg_mule_dsplen (const unsigned char *s)
 
static int pg_latin12wchar_with_len (const unsigned char *from, pg_wchar *to, int len)
 
static int pg_wchar2single_with_len (const pg_wchar *from, unsigned char *to, int len)
 
static int pg_latin1_mblen (const unsigned char *s)
 
static int pg_latin1_dsplen (const unsigned char *s)
 
static int pg_sjis_mblen (const unsigned char *s)
 
static int pg_sjis_dsplen (const unsigned char *s)
 
static int pg_big5_mblen (const unsigned char *s)
 
static int pg_big5_dsplen (const unsigned char *s)
 
static int pg_gbk_mblen (const unsigned char *s)
 
static int pg_gbk_dsplen (const unsigned char *s)
 
static int pg_uhc_mblen (const unsigned char *s)
 
static int pg_uhc_dsplen (const unsigned char *s)
 
static int pg_gb18030_mblen (const unsigned char *s)
 
static int pg_gb18030_dsplen (const unsigned char *s)
 
static int pg_ascii_verifier (const unsigned char *s, int len)
 
static int pg_eucjp_verifier (const unsigned char *s, int len)
 
static int pg_euckr_verifier (const unsigned char *s, int len)
 
static int pg_euctw_verifier (const unsigned char *s, int len)
 
static int pg_johab_verifier (const unsigned char *s, int len)
 
static int pg_mule_verifier (const unsigned char *s, int len)
 
static int pg_latin1_verifier (const unsigned char *s, int len)
 
static int pg_sjis_verifier (const unsigned char *s, int len)
 
static int pg_big5_verifier (const unsigned char *s, int len)
 
static int pg_gbk_verifier (const unsigned char *s, int len)
 
static int pg_uhc_verifier (const unsigned char *s, int len)
 
static int pg_gb18030_verifier (const unsigned char *s, int len)
 
static int pg_utf8_verifier (const unsigned char *s, int len)
 
bool pg_utf8_islegal (const unsigned char *source, int length)
 
static bool pg_generic_charinc (unsigned char *charptr, int len)
 
static bool pg_utf8_increment (unsigned char *charptr, int length)
 
static bool pg_eucjp_increment (unsigned char *charptr, int length)
 
int pg_mic_mblen (const unsigned char *mbstr)
 
int pg_encoding_mblen (int encoding, const char *mbstr)
 
int pg_encoding_dsplen (int encoding, const char *mbstr)
 
int pg_encoding_verifymb (int encoding, const char *mbstr, int len)
 
int pg_encoding_max_length (int encoding)
 
int pg_database_encoding_max_length (void)
 
mbcharacter_incrementer pg_database_encoding_character_incrementer (void)
 
bool pg_verifymbstr (const char *mbstr, int len, bool noError)
 
bool pg_verify_mbstr (int encoding, const char *mbstr, int len, bool noError)
 
int pg_verify_mbstr_len (int encoding, const char *mbstr, int len, bool noError)
 
void check_encoding_conversion_args (int src_encoding, int dest_encoding, int len, int expected_src_encoding, int expected_dest_encoding)
 
void report_invalid_encoding (int encoding, const char *mbstr, int len)
 
void report_untranslatable_char (int src_encoding, int dest_encoding, const char *mbstr, int len)
 

Variables

const pg_wchar_tbl pg_wchar_table []
 

Macro Definition Documentation

#define IS_EUC_RANGE_VALID (   c)    ((c) >= 0xa1 && (c) <= 0xfe)
#define pg_euccn_verifier   pg_euckr_verifier

Definition at line 1212 of file wchar.c.

Function Documentation

void check_encoding_conversion_args ( int  src_encoding,
int  dest_encoding,
int  len,
int  expected_src_encoding,
int  expected_dest_encoding 
)

Definition at line 1966 of file wchar.c.

References elog, ERROR, name, pg_enc2name_tbl, and PG_VALID_ENCODING.

1971 {
1972  if (!PG_VALID_ENCODING(src_encoding))
1973  elog(ERROR, "invalid source encoding ID: %d", src_encoding);
1974  if (src_encoding != expected_src_encoding && expected_src_encoding >= 0)
1975  elog(ERROR, "expected source encoding \"%s\", but got \"%s\"",
1976  pg_enc2name_tbl[expected_src_encoding].name,
1977  pg_enc2name_tbl[src_encoding].name);
1978  if (!PG_VALID_ENCODING(dest_encoding))
1979  elog(ERROR, "invalid destination encoding ID: %d", dest_encoding);
1980  if (dest_encoding != expected_dest_encoding && expected_dest_encoding >= 0)
1981  elog(ERROR, "expected destination encoding \"%s\", but got \"%s\"",
1982  pg_enc2name_tbl[expected_dest_encoding].name,
1983  pg_enc2name_tbl[dest_encoding].name);
1984  if (len < 0)
1985  elog(ERROR, "encoding conversion length must not be negative");
1986 }
#define ERROR
Definition: elog.h:43
#define PG_VALID_ENCODING(_enc)
Definition: pg_wchar.h:299
const pg_enc2name pg_enc2name_tbl[]
Definition: encnames.c:307
const char * name
Definition: encode.c:521
#define elog
Definition: elog.h:219
static int mbbisearch ( pg_wchar  ucs,
const struct mbinterval table,
int  max 
)
static

Definition at line 584 of file wchar.c.

Referenced by ucs_wcwidth().

585 {
586  int min = 0;
587  int mid;
588 
589  if (ucs < table[0].first || ucs > table[max].last)
590  return 0;
591  while (max >= min)
592  {
593  mid = (min + max) / 2;
594  if (ucs > table[mid].last)
595  min = mid + 1;
596  else if (ucs < table[mid].first)
597  max = mid - 1;
598  else
599  return 1;
600  }
601 
602  return 0;
603 }
static int pg_ascii2wchar_with_len ( const unsigned char *  from,
pg_wchar to,
int  len 
)
static

Definition at line 40 of file wchar.c.

41 {
42  int cnt = 0;
43 
44  while (len > 0 && *from)
45  {
46  *to++ = *from++;
47  len--;
48  cnt++;
49  }
50  *to = 0;
51  return cnt;
52 }
static int pg_ascii_dsplen ( const unsigned char *  s)
static

Definition at line 61 of file wchar.c.

Referenced by pg_big5_dsplen(), pg_euc_dsplen(), pg_euccn_dsplen(), pg_eucjp_dsplen(), pg_euctw_dsplen(), pg_gb18030_dsplen(), pg_gbk_dsplen(), pg_latin1_dsplen(), pg_sjis_dsplen(), and pg_uhc_dsplen().

62 {
63  if (*s == '\0')
64  return 0;
65  if (*s < 0x20 || *s == 0x7f)
66  return -1;
67 
68  return 1;
69 }
static int pg_ascii_mblen ( const unsigned char *  s)
static

Definition at line 55 of file wchar.c.

56 {
57  return 1;
58 }
static int pg_ascii_verifier ( const unsigned char *  s,
int  len 
)
static

Definition at line 1120 of file wchar.c.

1121 {
1122  return 1;
1123 }
static int pg_big5_dsplen ( const unsigned char *  s)
static

Definition at line 1007 of file wchar.c.

References IS_HIGHBIT_SET, and pg_ascii_dsplen().

1008 {
1009  int len;
1010 
1011  if (IS_HIGHBIT_SET(*s))
1012  len = 2; /* kanji? */
1013  else
1014  len = pg_ascii_dsplen(s); /* should be ASCII */
1015  return len;
1016 }
static int pg_ascii_dsplen(const unsigned char *s)
Definition: wchar.c:61
#define IS_HIGHBIT_SET(ch)
Definition: c.h:973
static int pg_big5_mblen ( const unsigned char *  s)
static

Definition at line 995 of file wchar.c.

References IS_HIGHBIT_SET.

Referenced by pg_big5_verifier().

996 {
997  int len;
998 
999  if (IS_HIGHBIT_SET(*s))
1000  len = 2; /* kanji? */
1001  else
1002  len = 1; /* should be ASCII */
1003  return len;
1004 }
#define IS_HIGHBIT_SET(ch)
Definition: c.h:973
static int pg_big5_verifier ( const unsigned char *  s,
int  len 
)
static

Definition at line 1339 of file wchar.c.

References pg_big5_mblen().

1340 {
1341  int l,
1342  mbl;
1343 
1344  l = mbl = pg_big5_mblen(s);
1345 
1346  if (len < l)
1347  return -1;
1348 
1349  while (--l > 0)
1350  {
1351  if (*++s == '\0')
1352  return -1;
1353  }
1354 
1355  return mbl;
1356 }
static int pg_big5_mblen(const unsigned char *s)
Definition: wchar.c:995
mbcharacter_incrementer pg_database_encoding_character_incrementer ( void  )

Definition at line 1842 of file wchar.c.

References GetDatabaseEncoding(), PG_EUC_JP, pg_eucjp_increment(), pg_generic_charinc(), PG_UTF8, and pg_utf8_increment().

Referenced by make_greater_string().

1843 {
1844  /*
1845  * Eventually it might be best to add a field to pg_wchar_table[], but for
1846  * now we just use a switch.
1847  */
1848  switch (GetDatabaseEncoding())
1849  {
1850  case PG_UTF8:
1851  return pg_utf8_increment;
1852 
1853  case PG_EUC_JP:
1854  return pg_eucjp_increment;
1855 
1856  default:
1857  return pg_generic_charinc;
1858  }
1859 }
static bool pg_eucjp_increment(unsigned char *charptr, int length)
Definition: wchar.c:1639
static bool pg_generic_charinc(unsigned char *charptr, int len)
Definition: wchar.c:1527
static bool pg_utf8_increment(unsigned char *charptr, int length)
Definition: wchar.c:1561
int GetDatabaseEncoding(void)
Definition: mbutils.c:1015
int pg_encoding_dsplen ( int  encoding,
const char *  mbstr 
)

Definition at line 1796 of file wchar.c.

References PG_SQL_ASCII, and PG_VALID_ENCODING.

Referenced by PQdsplen(), and reportErrorPosition().

1797 {
1798  return (PG_VALID_ENCODING(encoding) ?
1799  ((*pg_wchar_table[encoding].dsplen) ((const unsigned char *) mbstr)) :
1800  ((*pg_wchar_table[PG_SQL_ASCII].dsplen) ((const unsigned char *) mbstr)));
1801 }
#define PG_VALID_ENCODING(_enc)
Definition: pg_wchar.h:299
static char * encoding
Definition: initdb.c:122
const pg_wchar_tbl pg_wchar_table[]
Definition: wchar.c:1729
int pg_encoding_max_length ( int  encoding)

Definition at line 1820 of file wchar.c.

References Assert, encoding, pg_wchar_tbl::maxmblen, and PG_VALID_ENCODING.

Referenced by ascii(), chr(), pg_encoding_mbcliplen(), pg_verify_mbstr_len(), reportErrorPosition(), and type_maximum_size().

1821 {
1823 
1825 }
int maxmblen
Definition: pg_wchar.h:369
#define PG_VALID_ENCODING(_enc)
Definition: pg_wchar.h:299
static char * encoding
Definition: initdb.c:122
#define Assert(condition)
Definition: c.h:675
const pg_wchar_tbl pg_wchar_table[]
Definition: wchar.c:1729
int pg_encoding_mblen ( int  encoding,
const char *  mbstr 
)

Definition at line 1785 of file wchar.c.

References PG_SQL_ASCII, and PG_VALID_ENCODING.

Referenced by CopyAttributeOutCSV(), CopyAttributeOutText(), CopyReadLineText(), PQescapeInternal(), PQescapeStringInternal(), PQmblen(), report_invalid_encoding(), report_untranslatable_char(), and reportErrorPosition().

1786 {
1787  return (PG_VALID_ENCODING(encoding) ?
1788  ((*pg_wchar_table[encoding].mblen) ((const unsigned char *) mbstr)) :
1789  ((*pg_wchar_table[PG_SQL_ASCII].mblen) ((const unsigned char *) mbstr)));
1790 }
#define PG_VALID_ENCODING(_enc)
Definition: pg_wchar.h:299
static char * encoding
Definition: initdb.c:122
const pg_wchar_tbl pg_wchar_table[]
Definition: wchar.c:1729
int pg_encoding_verifymb ( int  encoding,
const char *  mbstr,
int  len 
)

Definition at line 1809 of file wchar.c.

References PG_SQL_ASCII, and PG_VALID_ENCODING.

Referenced by big52mic(), euc_jis_20042shift_jis_2004(), euc_jp2mic(), euc_jp2sjis(), euc_kr2mic(), euc_tw2mic(), LocalToUtf(), mic2big5(), mic2euc_jp(), mic2euc_kr(), mic2euc_tw(), mic2sjis(), shift_jis_20042euc_jis_2004(), and sjis2euc_jp().

1810 {
1811  return (PG_VALID_ENCODING(encoding) ?
1812  ((*pg_wchar_table[encoding].mbverify) ((const unsigned char *) mbstr, len)) :
1813  ((*pg_wchar_table[PG_SQL_ASCII].mbverify) ((const unsigned char *) mbstr, len)));
1814 }
#define PG_VALID_ENCODING(_enc)
Definition: pg_wchar.h:299
static char * encoding
Definition: initdb.c:122
const pg_wchar_tbl pg_wchar_table[]
Definition: wchar.c:1729
static int pg_euc2wchar_with_len ( const unsigned char *  from,
pg_wchar to,
int  len 
)
static

Definition at line 75 of file wchar.c.

References IS_HIGHBIT_SET, SS2, and SS3.

Referenced by pg_eucjp2wchar_with_len(), and pg_euckr2wchar_with_len().

76 {
77  int cnt = 0;
78 
79  while (len > 0 && *from)
80  {
81  if (*from == SS2 && len >= 2) /* JIS X 0201 (so called "1 byte
82  * KANA") */
83  {
84  from++;
85  *to = (SS2 << 8) | *from++;
86  len -= 2;
87  }
88  else if (*from == SS3 && len >= 3) /* JIS X 0212 KANJI */
89  {
90  from++;
91  *to = (SS3 << 16) | (*from++ << 8);
92  *to |= *from++;
93  len -= 3;
94  }
95  else if (IS_HIGHBIT_SET(*from) && len >= 2) /* JIS X 0208 KANJI */
96  {
97  *to = *from++ << 8;
98  *to |= *from++;
99  len -= 2;
100  }
101  else /* must be ASCII */
102  {
103  *to = *from++;
104  len--;
105  }
106  to++;
107  cnt++;
108  }
109  *to = 0;
110  return cnt;
111 }
#define SS3
Definition: pg_wchar.h:36
#define IS_HIGHBIT_SET(ch)
Definition: c.h:973
#define SS2
Definition: pg_wchar.h:35
static int pg_euc_dsplen ( const unsigned char *  s)
inlinestatic

Definition at line 130 of file wchar.c.

References IS_HIGHBIT_SET, pg_ascii_dsplen(), SS2, and SS3.

Referenced by pg_euckr_dsplen(), and pg_johab_dsplen().

131 {
132  int len;
133 
134  if (*s == SS2)
135  len = 2;
136  else if (*s == SS3)
137  len = 2;
138  else if (IS_HIGHBIT_SET(*s))
139  len = 2;
140  else
141  len = pg_ascii_dsplen(s);
142  return len;
143 }
static int pg_ascii_dsplen(const unsigned char *s)
Definition: wchar.c:61
#define SS3
Definition: pg_wchar.h:36
#define IS_HIGHBIT_SET(ch)
Definition: c.h:973
#define SS2
Definition: pg_wchar.h:35
static int pg_euc_mblen ( const unsigned char *  s)
inlinestatic

Definition at line 114 of file wchar.c.

References IS_HIGHBIT_SET, SS2, and SS3.

Referenced by pg_eucjp_mblen(), pg_euckr_mblen(), and pg_johab_mblen().

115 {
116  int len;
117 
118  if (*s == SS2)
119  len = 2;
120  else if (*s == SS3)
121  len = 3;
122  else if (IS_HIGHBIT_SET(*s))
123  len = 2;
124  else
125  len = 1;
126  return len;
127 }
#define SS3
Definition: pg_wchar.h:36
#define IS_HIGHBIT_SET(ch)
Definition: c.h:973
#define SS2
Definition: pg_wchar.h:35
static int pg_euccn2wchar_with_len ( const unsigned char *  from,
pg_wchar to,
int  len 
)
static

Definition at line 202 of file wchar.c.

References IS_HIGHBIT_SET, SS2, and SS3.

203 {
204  int cnt = 0;
205 
206  while (len > 0 && *from)
207  {
208  if (*from == SS2 && len >= 3) /* code set 2 (unused?) */
209  {
210  from++;
211  *to = (SS2 << 16) | (*from++ << 8);
212  *to |= *from++;
213  len -= 3;
214  }
215  else if (*from == SS3 && len >= 3) /* code set 3 (unused ?) */
216  {
217  from++;
218  *to = (SS3 << 16) | (*from++ << 8);
219  *to |= *from++;
220  len -= 3;
221  }
222  else if (IS_HIGHBIT_SET(*from) && len >= 2) /* code set 1 */
223  {
224  *to = *from++ << 8;
225  *to |= *from++;
226  len -= 2;
227  }
228  else
229  {
230  *to = *from++;
231  len--;
232  }
233  to++;
234  cnt++;
235  }
236  *to = 0;
237  return cnt;
238 }
#define SS3
Definition: pg_wchar.h:36
#define IS_HIGHBIT_SET(ch)
Definition: c.h:973
#define SS2
Definition: pg_wchar.h:35
static int pg_euccn_dsplen ( const unsigned char *  s)
static

Definition at line 253 of file wchar.c.

References IS_HIGHBIT_SET, and pg_ascii_dsplen().

254 {
255  int len;
256 
257  if (IS_HIGHBIT_SET(*s))
258  len = 2;
259  else
260  len = pg_ascii_dsplen(s);
261  return len;
262 }
static int pg_ascii_dsplen(const unsigned char *s)
Definition: wchar.c:61
#define IS_HIGHBIT_SET(ch)
Definition: c.h:973
static int pg_euccn_mblen ( const unsigned char *  s)
static

Definition at line 241 of file wchar.c.

References IS_HIGHBIT_SET.

242 {
243  int len;
244 
245  if (IS_HIGHBIT_SET(*s))
246  len = 2;
247  else
248  len = 1;
249  return len;
250 }
#define IS_HIGHBIT_SET(ch)
Definition: c.h:973
static int pg_eucjp2wchar_with_len ( const unsigned char *  from,
pg_wchar to,
int  len 
)
static

Definition at line 149 of file wchar.c.

References pg_euc2wchar_with_len().

150 {
151  return pg_euc2wchar_with_len(from, to, len);
152 }
static int pg_euc2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
Definition: wchar.c:75
static int pg_eucjp_dsplen ( const unsigned char *  s)
static

Definition at line 161 of file wchar.c.

References IS_HIGHBIT_SET, pg_ascii_dsplen(), SS2, and SS3.

162 {
163  int len;
164 
165  if (*s == SS2)
166  len = 1;
167  else if (*s == SS3)
168  len = 2;
169  else if (IS_HIGHBIT_SET(*s))
170  len = 2;
171  else
172  len = pg_ascii_dsplen(s);
173  return len;
174 }
static int pg_ascii_dsplen(const unsigned char *s)
Definition: wchar.c:61
#define SS3
Definition: pg_wchar.h:36
#define IS_HIGHBIT_SET(ch)
Definition: c.h:973
#define SS2
Definition: pg_wchar.h:35
static bool pg_eucjp_increment ( unsigned char *  charptr,
int  length 
)
static

Definition at line 1639 of file wchar.c.

References i, IS_HIGHBIT_SET, SS2, and SS3.

Referenced by pg_database_encoding_character_incrementer().

1640 {
1641  unsigned char c1,
1642  c2;
1643  int i;
1644 
1645  c1 = *charptr;
1646 
1647  switch (c1)
1648  {
1649  case SS2: /* JIS X 0201 */
1650  if (length != 2)
1651  return false;
1652 
1653  c2 = charptr[1];
1654 
1655  if (c2 >= 0xdf)
1656  charptr[0] = charptr[1] = 0xa1;
1657  else if (c2 < 0xa1)
1658  charptr[1] = 0xa1;
1659  else
1660  charptr[1]++;
1661  break;
1662 
1663  case SS3: /* JIS X 0212 */
1664  if (length != 3)
1665  return false;
1666 
1667  for (i = 2; i > 0; i--)
1668  {
1669  c2 = charptr[i];
1670  if (c2 < 0xa1)
1671  {
1672  charptr[i] = 0xa1;
1673  return true;
1674  }
1675  else if (c2 < 0xfe)
1676  {
1677  charptr[i]++;
1678  return true;
1679  }
1680  }
1681 
1682  /* Out of 3-byte code region */
1683  return false;
1684 
1685  default:
1686  if (IS_HIGHBIT_SET(c1)) /* JIS X 0208? */
1687  {
1688  if (length != 2)
1689  return false;
1690 
1691  for (i = 1; i >= 0; i--)
1692  {
1693  c2 = charptr[i];
1694  if (c2 < 0xa1)
1695  {
1696  charptr[i] = 0xa1;
1697  return true;
1698  }
1699  else if (c2 < 0xfe)
1700  {
1701  charptr[i]++;
1702  return true;
1703  }
1704  }
1705 
1706  /* Out of 2 byte code region */
1707  return false;
1708  }
1709  else
1710  { /* ASCII, single byte */
1711  if (c1 > 0x7e)
1712  return false;
1713  (*charptr)++;
1714  }
1715  break;
1716  }
1717 
1718  return true;
1719 }
int length(const List *list)
Definition: list.c:1271
#define SS3
Definition: pg_wchar.h:36
#define IS_HIGHBIT_SET(ch)
Definition: c.h:973
int i
#define SS2
Definition: pg_wchar.h:35
static int pg_eucjp_mblen ( const unsigned char *  s)
static

Definition at line 155 of file wchar.c.

References pg_euc_mblen().

156 {
157  return pg_euc_mblen(s);
158 }
static int pg_euc_mblen(const unsigned char *s)
Definition: wchar.c:114
static int pg_eucjp_verifier ( const unsigned char *  s,
int  len 
)
static

Definition at line 1128 of file wchar.c.

References IS_EUC_RANGE_VALID, IS_HIGHBIT_SET, SS2, and SS3.

1129 {
1130  int l;
1131  unsigned char c1,
1132  c2;
1133 
1134  c1 = *s++;
1135 
1136  switch (c1)
1137  {
1138  case SS2: /* JIS X 0201 */
1139  l = 2;
1140  if (l > len)
1141  return -1;
1142  c2 = *s++;
1143  if (c2 < 0xa1 || c2 > 0xdf)
1144  return -1;
1145  break;
1146 
1147  case SS3: /* JIS X 0212 */
1148  l = 3;
1149  if (l > len)
1150  return -1;
1151  c2 = *s++;
1152  if (!IS_EUC_RANGE_VALID(c2))
1153  return -1;
1154  c2 = *s++;
1155  if (!IS_EUC_RANGE_VALID(c2))
1156  return -1;
1157  break;
1158 
1159  default:
1160  if (IS_HIGHBIT_SET(c1)) /* JIS X 0208? */
1161  {
1162  l = 2;
1163  if (l > len)
1164  return -1;
1165  if (!IS_EUC_RANGE_VALID(c1))
1166  return -1;
1167  c2 = *s++;
1168  if (!IS_EUC_RANGE_VALID(c2))
1169  return -1;
1170  }
1171  else
1172  /* must be ASCII */
1173  {
1174  l = 1;
1175  }
1176  break;
1177  }
1178 
1179  return l;
1180 }
#define SS3
Definition: pg_wchar.h:36
#define IS_HIGHBIT_SET(ch)
Definition: c.h:973
#define IS_EUC_RANGE_VALID(c)
Definition: wchar.c:1125
#define SS2
Definition: pg_wchar.h:35
static int pg_euckr2wchar_with_len ( const unsigned char *  from,
pg_wchar to,
int  len 
)
static

Definition at line 180 of file wchar.c.

References pg_euc2wchar_with_len().

181 {
182  return pg_euc2wchar_with_len(from, to, len);
183 }
static int pg_euc2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
Definition: wchar.c:75
static int pg_euckr_dsplen ( const unsigned char *  s)
static

Definition at line 192 of file wchar.c.

References pg_euc_dsplen().

193 {
194  return pg_euc_dsplen(s);
195 }
static int pg_euc_dsplen(const unsigned char *s)
Definition: wchar.c:130
static int pg_euckr_mblen ( const unsigned char *  s)
static

Definition at line 186 of file wchar.c.

References pg_euc_mblen().

187 {
188  return pg_euc_mblen(s);
189 }
static int pg_euc_mblen(const unsigned char *s)
Definition: wchar.c:114
static int pg_euckr_verifier ( const unsigned char *  s,
int  len 
)
static

Definition at line 1183 of file wchar.c.

References IS_EUC_RANGE_VALID, and IS_HIGHBIT_SET.

1184 {
1185  int l;
1186  unsigned char c1,
1187  c2;
1188 
1189  c1 = *s++;
1190 
1191  if (IS_HIGHBIT_SET(c1))
1192  {
1193  l = 2;
1194  if (l > len)
1195  return -1;
1196  if (!IS_EUC_RANGE_VALID(c1))
1197  return -1;
1198  c2 = *s++;
1199  if (!IS_EUC_RANGE_VALID(c2))
1200  return -1;
1201  }
1202  else
1203  /* must be ASCII */
1204  {
1205  l = 1;
1206  }
1207 
1208  return l;
1209 }
#define IS_HIGHBIT_SET(ch)
Definition: c.h:973
#define IS_EUC_RANGE_VALID(c)
Definition: wchar.c:1125
static int pg_euctw2wchar_with_len ( const unsigned char *  from,
pg_wchar to,
int  len 
)
static

Definition at line 269 of file wchar.c.

References IS_HIGHBIT_SET, SS2, and SS3.

270 {
271  int cnt = 0;
272 
273  while (len > 0 && *from)
274  {
275  if (*from == SS2 && len >= 4) /* code set 2 */
276  {
277  from++;
278  *to = (((uint32) SS2) << 24) | (*from++ << 16);
279  *to |= *from++ << 8;
280  *to |= *from++;
281  len -= 4;
282  }
283  else if (*from == SS3 && len >= 3) /* code set 3 (unused?) */
284  {
285  from++;
286  *to = (SS3 << 16) | (*from++ << 8);
287  *to |= *from++;
288  len -= 3;
289  }
290  else if (IS_HIGHBIT_SET(*from) && len >= 2) /* code set 2 */
291  {
292  *to = *from++ << 8;
293  *to |= *from++;
294  len -= 2;
295  }
296  else
297  {
298  *to = *from++;
299  len--;
300  }
301  to++;
302  cnt++;
303  }
304  *to = 0;
305  return cnt;
306 }
#define SS3
Definition: pg_wchar.h:36
#define IS_HIGHBIT_SET(ch)
Definition: c.h:973
unsigned int uint32
Definition: c.h:268
#define SS2
Definition: pg_wchar.h:35
static int pg_euctw_dsplen ( const unsigned char *  s)
static

Definition at line 325 of file wchar.c.

References IS_HIGHBIT_SET, pg_ascii_dsplen(), SS2, and SS3.

326 {
327  int len;
328 
329  if (*s == SS2)
330  len = 2;
331  else if (*s == SS3)
332  len = 2;
333  else if (IS_HIGHBIT_SET(*s))
334  len = 2;
335  else
336  len = pg_ascii_dsplen(s);
337  return len;
338 }
static int pg_ascii_dsplen(const unsigned char *s)
Definition: wchar.c:61
#define SS3
Definition: pg_wchar.h:36
#define IS_HIGHBIT_SET(ch)
Definition: c.h:973
#define SS2
Definition: pg_wchar.h:35
static int pg_euctw_mblen ( const unsigned char *  s)
static

Definition at line 309 of file wchar.c.

References IS_HIGHBIT_SET, SS2, and SS3.

310 {
311  int len;
312 
313  if (*s == SS2)
314  len = 4;
315  else if (*s == SS3)
316  len = 3;
317  else if (IS_HIGHBIT_SET(*s))
318  len = 2;
319  else
320  len = 1;
321  return len;
322 }
#define SS3
Definition: pg_wchar.h:36
#define IS_HIGHBIT_SET(ch)
Definition: c.h:973
#define SS2
Definition: pg_wchar.h:35
static int pg_euctw_verifier ( const unsigned char *  s,
int  len 
)
static

Definition at line 1215 of file wchar.c.

References IS_EUC_RANGE_VALID, IS_HIGHBIT_SET, SS2, and SS3.

1216 {
1217  int l;
1218  unsigned char c1,
1219  c2;
1220 
1221  c1 = *s++;
1222 
1223  switch (c1)
1224  {
1225  case SS2: /* CNS 11643 Plane 1-7 */
1226  l = 4;
1227  if (l > len)
1228  return -1;
1229  c2 = *s++;
1230  if (c2 < 0xa1 || c2 > 0xa7)
1231  return -1;
1232  c2 = *s++;
1233  if (!IS_EUC_RANGE_VALID(c2))
1234  return -1;
1235  c2 = *s++;
1236  if (!IS_EUC_RANGE_VALID(c2))
1237  return -1;
1238  break;
1239 
1240  case SS3: /* unused */
1241  return -1;
1242 
1243  default:
1244  if (IS_HIGHBIT_SET(c1)) /* CNS 11643 Plane 1 */
1245  {
1246  l = 2;
1247  if (l > len)
1248  return -1;
1249  /* no further range check on c1? */
1250  c2 = *s++;
1251  if (!IS_EUC_RANGE_VALID(c2))
1252  return -1;
1253  }
1254  else
1255  /* must be ASCII */
1256  {
1257  l = 1;
1258  }
1259  break;
1260  }
1261  return l;
1262 }
#define SS3
Definition: pg_wchar.h:36
#define IS_HIGHBIT_SET(ch)
Definition: c.h:973
#define IS_EUC_RANGE_VALID(c)
Definition: wchar.c:1125
#define SS2
Definition: pg_wchar.h:35
static int pg_gb18030_dsplen ( const unsigned char *  s)
static

Definition at line 1091 of file wchar.c.

References IS_HIGHBIT_SET, and pg_ascii_dsplen().

1092 {
1093  int len;
1094 
1095  if (IS_HIGHBIT_SET(*s))
1096  len = 2;
1097  else
1098  len = pg_ascii_dsplen(s); /* ASCII */
1099  return len;
1100 }
static int pg_ascii_dsplen(const unsigned char *s)
Definition: wchar.c:61
#define IS_HIGHBIT_SET(ch)
Definition: c.h:973
static int pg_gb18030_mblen ( const unsigned char *  s)
static

Definition at line 1077 of file wchar.c.

References IS_HIGHBIT_SET.

1078 {
1079  int len;
1080 
1081  if (!IS_HIGHBIT_SET(*s))
1082  len = 1; /* ASCII */
1083  else if (*(s + 1) >= 0x30 && *(s + 1) <= 0x39)
1084  len = 4;
1085  else
1086  len = 2;
1087  return len;
1088 }
#define IS_HIGHBIT_SET(ch)
Definition: c.h:973
static int pg_gb18030_verifier ( const unsigned char *  s,
int  len 
)
static

Definition at line 1399 of file wchar.c.

References IS_HIGHBIT_SET.

1400 {
1401  int l;
1402 
1403  if (!IS_HIGHBIT_SET(*s))
1404  l = 1; /* ASCII */
1405  else if (len >= 4 && *(s + 1) >= 0x30 && *(s + 1) <= 0x39)
1406  {
1407  /* Should be 4-byte, validate remaining bytes */
1408  if (*s >= 0x81 && *s <= 0xfe &&
1409  *(s + 2) >= 0x81 && *(s + 2) <= 0xfe &&
1410  *(s + 3) >= 0x30 && *(s + 3) <= 0x39)
1411  l = 4;
1412  else
1413  l = -1;
1414  }
1415  else if (len >= 2 && *s >= 0x81 && *s <= 0xfe)
1416  {
1417  /* Should be 2-byte, validate */
1418  if ((*(s + 1) >= 0x40 && *(s + 1) <= 0x7e) ||
1419  (*(s + 1) >= 0x80 && *(s + 1) <= 0xfe))
1420  l = 2;
1421  else
1422  l = -1;
1423  }
1424  else
1425  l = -1;
1426  return l;
1427 }
#define IS_HIGHBIT_SET(ch)
Definition: c.h:973
static int pg_gbk_dsplen ( const unsigned char *  s)
static

Definition at line 1034 of file wchar.c.

References IS_HIGHBIT_SET, and pg_ascii_dsplen().

1035 {
1036  int len;
1037 
1038  if (IS_HIGHBIT_SET(*s))
1039  len = 2; /* kanji? */
1040  else
1041  len = pg_ascii_dsplen(s); /* should be ASCII */
1042  return len;
1043 }
static int pg_ascii_dsplen(const unsigned char *s)
Definition: wchar.c:61
#define IS_HIGHBIT_SET(ch)
Definition: c.h:973
static int pg_gbk_mblen ( const unsigned char *  s)
static

Definition at line 1022 of file wchar.c.

References IS_HIGHBIT_SET.

Referenced by pg_gbk_verifier().

1023 {
1024  int len;
1025 
1026  if (IS_HIGHBIT_SET(*s))
1027  len = 2; /* kanji? */
1028  else
1029  len = 1; /* should be ASCII */
1030  return len;
1031 }
#define IS_HIGHBIT_SET(ch)
Definition: c.h:973
static int pg_gbk_verifier ( const unsigned char *  s,
int  len 
)
static

Definition at line 1359 of file wchar.c.

References pg_gbk_mblen().

1360 {
1361  int l,
1362  mbl;
1363 
1364  l = mbl = pg_gbk_mblen(s);
1365 
1366  if (len < l)
1367  return -1;
1368 
1369  while (--l > 0)
1370  {
1371  if (*++s == '\0')
1372  return -1;
1373  }
1374 
1375  return mbl;
1376 }
static int pg_gbk_mblen(const unsigned char *s)
Definition: wchar.c:1022
static bool pg_generic_charinc ( unsigned char *  charptr,
int  len 
)
static

Definition at line 1527 of file wchar.c.

References GetDatabaseEncoding(), pg_wchar_tbl::mbverify, and pg_wchar_table.

Referenced by pg_database_encoding_character_incrementer().

1528 {
1529  unsigned char *lastbyte = charptr + len - 1;
1530  mbverifier mbverify;
1531 
1532  /* We can just invoke the character verifier directly. */
1534 
1535  while (*lastbyte < (unsigned char) 255)
1536  {
1537  (*lastbyte)++;
1538  if ((*mbverify) (charptr, len) == len)
1539  return true;
1540  }
1541 
1542  return false;
1543 }
int GetDatabaseEncoding(void)
Definition: mbutils.c:1015
const pg_wchar_tbl pg_wchar_table[]
Definition: wchar.c:1729
int(* mbverifier)(const unsigned char *mbstr, int len)
Definition: pg_wchar.h:358
mbverifier mbverify
Definition: pg_wchar.h:368
static int pg_johab_dsplen ( const unsigned char *  s)
static

Definition at line 399 of file wchar.c.

References pg_euc_dsplen().

400 {
401  return pg_euc_dsplen(s);
402 }
static int pg_euc_dsplen(const unsigned char *s)
Definition: wchar.c:130
static int pg_johab_mblen ( const unsigned char *  s)
static

Definition at line 393 of file wchar.c.

References pg_euc_mblen().

Referenced by pg_johab_verifier().

394 {
395  return pg_euc_mblen(s);
396 }
static int pg_euc_mblen(const unsigned char *s)
Definition: wchar.c:114
static int pg_johab_verifier ( const unsigned char *  s,
int  len 
)
static

Definition at line 1265 of file wchar.c.

References IS_EUC_RANGE_VALID, IS_HIGHBIT_SET, and pg_johab_mblen().

1266 {
1267  int l,
1268  mbl;
1269  unsigned char c;
1270 
1271  l = mbl = pg_johab_mblen(s);
1272 
1273  if (len < l)
1274  return -1;
1275 
1276  if (!IS_HIGHBIT_SET(*s))
1277  return mbl;
1278 
1279  while (--l > 0)
1280  {
1281  c = *++s;
1282  if (!IS_EUC_RANGE_VALID(c))
1283  return -1;
1284  }
1285  return mbl;
1286 }
#define IS_HIGHBIT_SET(ch)
Definition: c.h:973
char * c
static int pg_johab_mblen(const unsigned char *s)
Definition: wchar.c:393
#define IS_EUC_RANGE_VALID(c)
Definition: wchar.c:1125
static int pg_latin12wchar_with_len ( const unsigned char *  from,
pg_wchar to,
int  len 
)
static

Definition at line 912 of file wchar.c.

913 {
914  int cnt = 0;
915 
916  while (len > 0 && *from)
917  {
918  *to++ = *from++;
919  len--;
920  cnt++;
921  }
922  *to = 0;
923  return cnt;
924 }
static int pg_latin1_dsplen ( const unsigned char *  s)
static

Definition at line 955 of file wchar.c.

References pg_ascii_dsplen().

956 {
957  return pg_ascii_dsplen(s);
958 }
static int pg_ascii_dsplen(const unsigned char *s)
Definition: wchar.c:61
static int pg_latin1_mblen ( const unsigned char *  s)
static

Definition at line 949 of file wchar.c.

950 {
951  return 1;
952 }
static int pg_latin1_verifier ( const unsigned char *  s,
int  len 
)
static

Definition at line 1310 of file wchar.c.

1311 {
1312  return 1;
1313 }
int pg_mic_mblen ( const unsigned char *  mbstr)

Definition at line 1776 of file wchar.c.

References pg_mule_mblen().

Referenced by mic2latin(), and mic2latin_with_table().

1777 {
1778  return pg_mule_mblen(mbstr);
1779 }
int pg_mule_mblen(const unsigned char *s)
Definition: wchar.c:866
static int pg_mule2wchar_with_len ( const unsigned char *  from,
pg_wchar to,
int  len 
)
static

Definition at line 748 of file wchar.c.

References IS_LC1, IS_LC2, IS_LCPRV1, and IS_LCPRV2.

749 {
750  int cnt = 0;
751 
752  while (len > 0 && *from)
753  {
754  if (IS_LC1(*from) && len >= 2)
755  {
756  *to = *from++ << 16;
757  *to |= *from++;
758  len -= 2;
759  }
760  else if (IS_LCPRV1(*from) && len >= 3)
761  {
762  from++;
763  *to = *from++ << 16;
764  *to |= *from++;
765  len -= 3;
766  }
767  else if (IS_LC2(*from) && len >= 3)
768  {
769  *to = *from++ << 16;
770  *to |= *from++ << 8;
771  *to |= *from++;
772  len -= 3;
773  }
774  else if (IS_LCPRV2(*from) && len >= 4)
775  {
776  from++;
777  *to = *from++ << 16;
778  *to |= *from++ << 8;
779  *to |= *from++;
780  len -= 4;
781  }
782  else
783  { /* assume ASCII */
784  *to = (unsigned char) *from++;
785  len--;
786  }
787  to++;
788  cnt++;
789  }
790  *to = 0;
791  return cnt;
792 }
#define IS_LC2(c)
Definition: pg_wchar.h:142
#define IS_LCPRV2(c)
Definition: pg_wchar.h:162
#define IS_LCPRV1(c)
Definition: pg_wchar.h:150
#define IS_LC1(c)
Definition: pg_wchar.h:123
static int pg_mule_dsplen ( const unsigned char *  s)
static

Definition at line 884 of file wchar.c.

References IS_LC1, IS_LC2, IS_LCPRV1, and IS_LCPRV2.

885 {
886  int len;
887 
888  /*
889  * Note: it's not really appropriate to assume that all multibyte charsets
890  * are double-wide on screen. But this seems an okay approximation for
891  * the MULE charsets we currently support.
892  */
893 
894  if (IS_LC1(*s))
895  len = 1;
896  else if (IS_LCPRV1(*s))
897  len = 1;
898  else if (IS_LC2(*s))
899  len = 2;
900  else if (IS_LCPRV2(*s))
901  len = 2;
902  else
903  len = 1; /* assume ASCII */
904 
905  return len;
906 }
#define IS_LC2(c)
Definition: pg_wchar.h:142
#define IS_LCPRV2(c)
Definition: pg_wchar.h:162
#define IS_LCPRV1(c)
Definition: pg_wchar.h:150
#define IS_LC1(c)
Definition: pg_wchar.h:123
int pg_mule_mblen ( const unsigned char *  s)

Definition at line 866 of file wchar.c.

References IS_LC1, IS_LC2, IS_LCPRV1, and IS_LCPRV2.

Referenced by pg_mic_mblen(), and pg_mule_verifier().

867 {
868  int len;
869 
870  if (IS_LC1(*s))
871  len = 2;
872  else if (IS_LCPRV1(*s))
873  len = 3;
874  else if (IS_LC2(*s))
875  len = 3;
876  else if (IS_LCPRV2(*s))
877  len = 4;
878  else
879  len = 1; /* assume ASCII */
880  return len;
881 }
#define IS_LC2(c)
Definition: pg_wchar.h:142
#define IS_LCPRV2(c)
Definition: pg_wchar.h:162
#define IS_LCPRV1(c)
Definition: pg_wchar.h:150
#define IS_LC1(c)
Definition: pg_wchar.h:123
static int pg_mule_verifier ( const unsigned char *  s,
int  len 
)
static

Definition at line 1289 of file wchar.c.

References IS_HIGHBIT_SET, and pg_mule_mblen().

1290 {
1291  int l,
1292  mbl;
1293  unsigned char c;
1294 
1295  l = mbl = pg_mule_mblen(s);
1296 
1297  if (len < l)
1298  return -1;
1299 
1300  while (--l > 0)
1301  {
1302  c = *++s;
1303  if (!IS_HIGHBIT_SET(c))
1304  return -1;
1305  }
1306  return mbl;
1307 }
int pg_mule_mblen(const unsigned char *s)
Definition: wchar.c:866
#define IS_HIGHBIT_SET(ch)
Definition: c.h:973
char * c
static int pg_sjis_dsplen ( const unsigned char *  s)
static

Definition at line 978 of file wchar.c.

References IS_HIGHBIT_SET, and pg_ascii_dsplen().

979 {
980  int len;
981 
982  if (*s >= 0xa1 && *s <= 0xdf)
983  len = 1; /* 1 byte kana? */
984  else if (IS_HIGHBIT_SET(*s))
985  len = 2; /* kanji? */
986  else
987  len = pg_ascii_dsplen(s); /* should be ASCII */
988  return len;
989 }
static int pg_ascii_dsplen(const unsigned char *s)
Definition: wchar.c:61
#define IS_HIGHBIT_SET(ch)
Definition: c.h:973
static int pg_sjis_mblen ( const unsigned char *  s)
static

Definition at line 964 of file wchar.c.

References IS_HIGHBIT_SET.

Referenced by pg_sjis_verifier().

965 {
966  int len;
967 
968  if (*s >= 0xa1 && *s <= 0xdf)
969  len = 1; /* 1 byte kana? */
970  else if (IS_HIGHBIT_SET(*s))
971  len = 2; /* kanji? */
972  else
973  len = 1; /* should be ASCII */
974  return len;
975 }
#define IS_HIGHBIT_SET(ch)
Definition: c.h:973
static int pg_sjis_verifier ( const unsigned char *  s,
int  len 
)
static

Definition at line 1316 of file wchar.c.

References ISSJISHEAD, ISSJISTAIL, and pg_sjis_mblen().

1317 {
1318  int l,
1319  mbl;
1320  unsigned char c1,
1321  c2;
1322 
1323  l = mbl = pg_sjis_mblen(s);
1324 
1325  if (len < l)
1326  return -1;
1327 
1328  if (l == 1) /* pg_sjis_mblen already verified it */
1329  return mbl;
1330 
1331  c1 = *s++;
1332  c2 = *s;
1333  if (!ISSJISHEAD(c1) || !ISSJISTAIL(c2))
1334  return -1;
1335  return mbl;
1336 }
#define ISSJISTAIL(c)
Definition: pg_wchar.h:42
static int pg_sjis_mblen(const unsigned char *s)
Definition: wchar.c:964
#define ISSJISHEAD(c)
Definition: pg_wchar.h:41
static int pg_uhc_dsplen ( const unsigned char *  s)
static

Definition at line 1061 of file wchar.c.

References IS_HIGHBIT_SET, and pg_ascii_dsplen().

1062 {
1063  int len;
1064 
1065  if (IS_HIGHBIT_SET(*s))
1066  len = 2; /* 2byte? */
1067  else
1068  len = pg_ascii_dsplen(s); /* should be ASCII */
1069  return len;
1070 }
static int pg_ascii_dsplen(const unsigned char *s)
Definition: wchar.c:61
#define IS_HIGHBIT_SET(ch)
Definition: c.h:973
static int pg_uhc_mblen ( const unsigned char *  s)
static

Definition at line 1049 of file wchar.c.

References IS_HIGHBIT_SET.

Referenced by pg_uhc_verifier().

1050 {
1051  int len;
1052 
1053  if (IS_HIGHBIT_SET(*s))
1054  len = 2; /* 2byte? */
1055  else
1056  len = 1; /* should be ASCII */
1057  return len;
1058 }
#define IS_HIGHBIT_SET(ch)
Definition: c.h:973
static int pg_uhc_verifier ( const unsigned char *  s,
int  len 
)
static

Definition at line 1379 of file wchar.c.

References pg_uhc_mblen().

1380 {
1381  int l,
1382  mbl;
1383 
1384  l = mbl = pg_uhc_mblen(s);
1385 
1386  if (len < l)
1387  return -1;
1388 
1389  while (--l > 0)
1390  {
1391  if (*++s == '\0')
1392  return -1;
1393  }
1394 
1395  return mbl;
1396 }
static int pg_uhc_mblen(const unsigned char *s)
Definition: wchar.c:1049
static int pg_utf2wchar_with_len ( const unsigned char *  from,
pg_wchar to,
int  len 
)
static

Definition at line 411 of file wchar.c.

412 {
413  int cnt = 0;
414  uint32 c1,
415  c2,
416  c3,
417  c4;
418 
419  while (len > 0 && *from)
420  {
421  if ((*from & 0x80) == 0)
422  {
423  *to = *from++;
424  len--;
425  }
426  else if ((*from & 0xe0) == 0xc0)
427  {
428  if (len < 2)
429  break; /* drop trailing incomplete char */
430  c1 = *from++ & 0x1f;
431  c2 = *from++ & 0x3f;
432  *to = (c1 << 6) | c2;
433  len -= 2;
434  }
435  else if ((*from & 0xf0) == 0xe0)
436  {
437  if (len < 3)
438  break; /* drop trailing incomplete char */
439  c1 = *from++ & 0x0f;
440  c2 = *from++ & 0x3f;
441  c3 = *from++ & 0x3f;
442  *to = (c1 << 12) | (c2 << 6) | c3;
443  len -= 3;
444  }
445  else if ((*from & 0xf8) == 0xf0)
446  {
447  if (len < 4)
448  break; /* drop trailing incomplete char */
449  c1 = *from++ & 0x07;
450  c2 = *from++ & 0x3f;
451  c3 = *from++ & 0x3f;
452  c4 = *from++ & 0x3f;
453  *to = (c1 << 18) | (c2 << 12) | (c3 << 6) | c4;
454  len -= 4;
455  }
456  else
457  {
458  /* treat a bogus char as length 1; not ours to raise error */
459  *to = *from++;
460  len--;
461  }
462  to++;
463  cnt++;
464  }
465  *to = 0;
466  return cnt;
467 }
unsigned int uint32
Definition: c.h:268
static bool pg_utf8_increment ( unsigned char *  charptr,
int  length 
)
static

Definition at line 1561 of file wchar.c.

Referenced by pg_database_encoding_character_incrementer().

1562 {
1563  unsigned char a;
1564  unsigned char limit;
1565 
1566  switch (length)
1567  {
1568  default:
1569  /* reject lengths 5 and 6 for now */
1570  return false;
1571  case 4:
1572  a = charptr[3];
1573  if (a < 0xBF)
1574  {
1575  charptr[3]++;
1576  break;
1577  }
1578  /* FALL THRU */
1579  case 3:
1580  a = charptr[2];
1581  if (a < 0xBF)
1582  {
1583  charptr[2]++;
1584  break;
1585  }
1586  /* FALL THRU */
1587  case 2:
1588  a = charptr[1];
1589  switch (*charptr)
1590  {
1591  case 0xED:
1592  limit = 0x9F;
1593  break;
1594  case 0xF4:
1595  limit = 0x8F;
1596  break;
1597  default:
1598  limit = 0xBF;
1599  break;
1600  }
1601  if (a < limit)
1602  {
1603  charptr[1]++;
1604  break;
1605  }
1606  /* FALL THRU */
1607  case 1:
1608  a = *charptr;
1609  if (a == 0x7F || a == 0xDF || a == 0xEF || a == 0xF4)
1610  return false;
1611  charptr[0]++;
1612  break;
1613  }
1614 
1615  return true;
1616 }
int length(const List *list)
Definition: list.c:1271
bool pg_utf8_islegal ( const unsigned char *  source,
int  length 
)

Definition at line 1458 of file wchar.c.

Referenced by chr(), pg_utf8_string_len(), pg_utf8_verifier(), utf8_to_iso8859_1(), and UtfToLocal().

1459 {
1460  unsigned char a;
1461 
1462  switch (length)
1463  {
1464  default:
1465  /* reject lengths 5 and 6 for now */
1466  return false;
1467  case 4:
1468  a = source[3];
1469  if (a < 0x80 || a > 0xBF)
1470  return false;
1471  /* FALL THRU */
1472  case 3:
1473  a = source[2];
1474  if (a < 0x80 || a > 0xBF)
1475  return false;
1476  /* FALL THRU */
1477  case 2:
1478  a = source[1];
1479  switch (*source)
1480  {
1481  case 0xE0:
1482  if (a < 0xA0 || a > 0xBF)
1483  return false;
1484  break;
1485  case 0xED:
1486  if (a < 0x80 || a > 0x9F)
1487  return false;
1488  break;
1489  case 0xF0:
1490  if (a < 0x90 || a > 0xBF)
1491  return false;
1492  break;
1493  case 0xF4:
1494  if (a < 0x80 || a > 0x8F)
1495  return false;
1496  break;
1497  default:
1498  if (a < 0x80 || a > 0xBF)
1499  return false;
1500  break;
1501  }
1502  /* FALL THRU */
1503  case 1:
1504  a = *source;
1505  if (a >= 0x80 && a < 0xC2)
1506  return false;
1507  if (a > 0xF4)
1508  return false;
1509  break;
1510  }
1511  return true;
1512 }
int length(const List *list)
Definition: list.c:1271
static int pg_utf8_verifier ( const unsigned char *  s,
int  len 
)
static

Definition at line 1430 of file wchar.c.

References pg_utf8_islegal(), and pg_utf_mblen().

1431 {
1432  int l = pg_utf_mblen(s);
1433 
1434  if (len < l)
1435  return -1;
1436 
1437  if (!pg_utf8_islegal(s, l))
1438  return -1;
1439 
1440  return l;
1441 }
bool pg_utf8_islegal(const unsigned char *source, int length)
Definition: wchar.c:1458
int pg_utf_mblen(const unsigned char *s)
Definition: wchar.c:541
static int pg_utf_dsplen ( const unsigned char *  s)
static

Definition at line 736 of file wchar.c.

References ucs_wcwidth(), and utf8_to_unicode().

737 {
738  return ucs_wcwidth(utf8_to_unicode(s));
739 }
static int ucs_wcwidth(pg_wchar ucs)
Definition: wchar.c:637
pg_wchar utf8_to_unicode(const unsigned char *c)
Definition: wchar.c:714
int pg_utf_mblen ( const unsigned char *  s)

Definition at line 541 of file wchar.c.

Referenced by json_lex_string(), pg_saslprep(), pg_utf8_string_len(), pg_utf8_verifier(), pg_wchar2utf_with_len(), utf8_to_iso8859_1(), and UtfToLocal().

542 {
543  int len;
544 
545  if ((*s & 0x80) == 0)
546  len = 1;
547  else if ((*s & 0xe0) == 0xc0)
548  len = 2;
549  else if ((*s & 0xf0) == 0xe0)
550  len = 3;
551  else if ((*s & 0xf8) == 0xf0)
552  len = 4;
553 #ifdef NOT_USED
554  else if ((*s & 0xfc) == 0xf8)
555  len = 5;
556  else if ((*s & 0xfe) == 0xfc)
557  len = 6;
558 #endif
559  else
560  len = 1;
561  return len;
562 }
bool pg_verify_mbstr ( int  encoding,
const char *  mbstr,
int  len,
bool  noError 
)

Definition at line 1877 of file wchar.c.

References pg_verify_mbstr_len().

Referenced by LogicalOutputWrite(), pg_any_to_server(), pg_do_encoding_conversion(), pg_server_to_any(), and t_readline().

1878 {
1879  return pg_verify_mbstr_len(encoding, mbstr, len, noError) >= 0;
1880 }
int pg_verify_mbstr_len(int encoding, const char *mbstr, int len, bool noError)
Definition: wchar.c:1894
static char * encoding
Definition: initdb.c:122
int pg_verify_mbstr_len ( int  encoding,
const char *  mbstr,
int  len,
bool  noError 
)

Definition at line 1894 of file wchar.c.

References Assert, encoding, IS_HIGHBIT_SET, pg_wchar_tbl::mbverify, NULL, pg_encoding_max_length(), PG_VALID_ENCODING, and report_invalid_encoding().

Referenced by length_in_encoding(), pg_convert(), pg_verify_mbstr(), pg_verifymbstr(), and read_extension_script_file().

1895 {
1896  mbverifier mbverify;
1897  int mb_len;
1898 
1900 
1901  /*
1902  * In single-byte encodings, we need only reject nulls (\0).
1903  */
1904  if (pg_encoding_max_length(encoding) <= 1)
1905  {
1906  const char *nullpos = memchr(mbstr, 0, len);
1907 
1908  if (nullpos == NULL)
1909  return len;
1910  if (noError)
1911  return -1;
1912  report_invalid_encoding(encoding, nullpos, 1);
1913  }
1914 
1915  /* fetch function pointer just once */
1916  mbverify = pg_wchar_table[encoding].mbverify;
1917 
1918  mb_len = 0;
1919 
1920  while (len > 0)
1921  {
1922  int l;
1923 
1924  /* fast path for ASCII-subset characters */
1925  if (!IS_HIGHBIT_SET(*mbstr))
1926  {
1927  if (*mbstr != '\0')
1928  {
1929  mb_len++;
1930  mbstr++;
1931  len--;
1932  continue;
1933  }
1934  if (noError)
1935  return -1;
1936  report_invalid_encoding(encoding, mbstr, len);
1937  }
1938 
1939  l = (*mbverify) ((const unsigned char *) mbstr, len);
1940 
1941  if (l < 0)
1942  {
1943  if (noError)
1944  return -1;
1945  report_invalid_encoding(encoding, mbstr, len);
1946  }
1947 
1948  mbstr += l;
1949  len -= l;
1950  mb_len++;
1951  }
1952  return mb_len;
1953 }
#define IS_HIGHBIT_SET(ch)
Definition: c.h:973
int pg_encoding_max_length(int encoding)
Definition: wchar.c:1820
#define PG_VALID_ENCODING(_enc)
Definition: pg_wchar.h:299
static char * encoding
Definition: initdb.c:122
#define NULL
Definition: c.h:229
#define Assert(condition)
Definition: c.h:675
void report_invalid_encoding(int encoding, const char *mbstr, int len)
Definition: wchar.c:1995
const pg_wchar_tbl pg_wchar_table[]
Definition: wchar.c:1729
int(* mbverifier)(const unsigned char *mbstr, int len)
Definition: pg_wchar.h:358
mbverifier mbverify
Definition: pg_wchar.h:368
bool pg_verifymbstr ( const char *  mbstr,
int  len,
bool  noError 
)

Definition at line 1866 of file wchar.c.

References GetDatabaseEncoding(), and pg_verify_mbstr_len().

Referenced by CopyReadAttributesText(), plperl_spi_exec(), plperl_spi_prepare(), plperl_spi_query(), PLy_cursor_query(), PLy_output(), PLy_spi_execute_query(), PLy_spi_prepare(), PLyObject_AsString(), read_text_file(), and spg_text_leaf_consistent().

1867 {
1868  return
1869  pg_verify_mbstr_len(GetDatabaseEncoding(), mbstr, len, noError) >= 0;
1870 }
int pg_verify_mbstr_len(int encoding, const char *mbstr, int len, bool noError)
Definition: wchar.c:1894
int GetDatabaseEncoding(void)
Definition: mbutils.c:1015
static int pg_wchar2euc_with_len ( const pg_wchar from,
unsigned char *  to,
int  len 
)
static

Definition at line 347 of file wchar.c.

348 {
349  int cnt = 0;
350 
351  while (len > 0 && *from)
352  {
353  unsigned char c;
354 
355  if ((c = (*from >> 24)))
356  {
357  *to++ = c;
358  *to++ = (*from >> 16) & 0xff;
359  *to++ = (*from >> 8) & 0xff;
360  *to++ = *from & 0xff;
361  cnt += 4;
362  }
363  else if ((c = (*from >> 16)))
364  {
365  *to++ = c;
366  *to++ = (*from >> 8) & 0xff;
367  *to++ = *from & 0xff;
368  cnt += 3;
369  }
370  else if ((c = (*from >> 8)))
371  {
372  *to++ = c;
373  *to++ = *from & 0xff;
374  cnt += 2;
375  }
376  else
377  {
378  *to++ = *from;
379  cnt++;
380  }
381  from++;
382  len--;
383  }
384  *to = 0;
385  return cnt;
386 }
char * c
static int pg_wchar2mule_with_len ( const pg_wchar from,
unsigned char *  to,
int  len 
)
static

Definition at line 801 of file wchar.c.

References IS_LC1, IS_LC2, IS_LCPRV1_A_RANGE, IS_LCPRV1_B_RANGE, IS_LCPRV2_A_RANGE, IS_LCPRV2_B_RANGE, LCPRV1_A, LCPRV1_B, LCPRV2_A, and LCPRV2_B.

802 {
803  int cnt = 0;
804 
805  while (len > 0 && *from)
806  {
807  unsigned char lb;
808 
809  lb = (*from >> 16) & 0xff;
810  if (IS_LC1(lb))
811  {
812  *to++ = lb;
813  *to++ = *from & 0xff;
814  cnt += 2;
815  }
816  else if (IS_LC2(lb))
817  {
818  *to++ = lb;
819  *to++ = (*from >> 8) & 0xff;
820  *to++ = *from & 0xff;
821  cnt += 3;
822  }
823  else if (IS_LCPRV1_A_RANGE(lb))
824  {
825  *to++ = LCPRV1_A;
826  *to++ = lb;
827  *to++ = *from & 0xff;
828  cnt += 3;
829  }
830  else if (IS_LCPRV1_B_RANGE(lb))
831  {
832  *to++ = LCPRV1_B;
833  *to++ = lb;
834  *to++ = *from & 0xff;
835  cnt += 3;
836  }
837  else if (IS_LCPRV2_A_RANGE(lb))
838  {
839  *to++ = LCPRV2_A;
840  *to++ = lb;
841  *to++ = (*from >> 8) & 0xff;
842  *to++ = *from & 0xff;
843  cnt += 4;
844  }
845  else if (IS_LCPRV2_B_RANGE(lb))
846  {
847  *to++ = LCPRV2_B;
848  *to++ = lb;
849  *to++ = (*from >> 8) & 0xff;
850  *to++ = *from & 0xff;
851  cnt += 4;
852  }
853  else
854  {
855  *to++ = *from & 0xff;
856  cnt += 1;
857  }
858  from++;
859  len--;
860  }
861  *to = 0;
862  return cnt;
863 }
#define IS_LC2(c)
Definition: pg_wchar.h:142
#define LCPRV1_A
Definition: pg_wchar.h:148
#define LCPRV1_B
Definition: pg_wchar.h:149
#define IS_LCPRV2_A_RANGE(c)
Definition: pg_wchar.h:163
#define LCPRV2_B
Definition: pg_wchar.h:161
#define LCPRV2_A
Definition: pg_wchar.h:160
#define IS_LCPRV2_B_RANGE(c)
Definition: pg_wchar.h:165
#define IS_LC1(c)
Definition: pg_wchar.h:123
#define IS_LCPRV1_A_RANGE(c)
Definition: pg_wchar.h:151
#define IS_LCPRV1_B_RANGE(c)
Definition: pg_wchar.h:153
static int pg_wchar2single_with_len ( const pg_wchar from,
unsigned char *  to,
int  len 
)
static

Definition at line 934 of file wchar.c.

935 {
936  int cnt = 0;
937 
938  while (len > 0 && *from)
939  {
940  *to++ = *from++;
941  len--;
942  cnt++;
943  }
944  *to = 0;
945  return cnt;
946 }
static int pg_wchar2utf_with_len ( const pg_wchar from,
unsigned char *  to,
int  len 
)
static

Definition at line 510 of file wchar.c.

References pg_utf_mblen(), and unicode_to_utf8().

511 {
512  int cnt = 0;
513 
514  while (len > 0 && *from)
515  {
516  int char_len;
517 
518  unicode_to_utf8(*from, to);
519  char_len = pg_utf_mblen(to);
520  cnt += char_len;
521  to += char_len;
522  from++;
523  len--;
524  }
525  *to = 0;
526  return cnt;
527 }
unsigned char * unicode_to_utf8(pg_wchar c, unsigned char *utf8string)
Definition: wchar.c:475
int pg_utf_mblen(const unsigned char *s)
Definition: wchar.c:541
void report_invalid_encoding ( int  encoding,
const char *  mbstr,
int  len 
)

Definition at line 1995 of file wchar.c.

References buf, ereport, errcode(), errmsg(), ERROR, Min, name, pg_enc2name_tbl, and pg_encoding_mblen().

Referenced by big52mic(), euc_cn2mic(), euc_jis_20042shift_jis_2004(), euc_jp2mic(), euc_jp2sjis(), euc_kr2mic(), euc_tw2mic(), iso8859_1_to_utf8(), latin2mic(), latin2mic_with_table(), local2local(), LocalToUtf(), mic2big5(), mic2euc_cn(), mic2euc_jp(), mic2euc_kr(), mic2euc_tw(), mic2latin(), mic2latin_with_table(), mic2sjis(), pg_ascii2mic(), pg_verify_mbstr_len(), shift_jis_20042euc_jis_2004(), sjis2euc_jp(), sjis2mic(), utf8_to_iso8859_1(), and UtfToLocal().

1996 {
1997  int l = pg_encoding_mblen(encoding, mbstr);
1998  char buf[8 * 5 + 1];
1999  char *p = buf;
2000  int j,
2001  jlimit;
2002 
2003  jlimit = Min(l, len);
2004  jlimit = Min(jlimit, 8); /* prevent buffer overrun */
2005 
2006  for (j = 0; j < jlimit; j++)
2007  {
2008  p += sprintf(p, "0x%02x", (unsigned char) mbstr[j]);
2009  if (j < jlimit - 1)
2010  p += sprintf(p, " ");
2011  }
2012 
2013  ereport(ERROR,
2014  (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
2015  errmsg("invalid byte sequence for encoding \"%s\": %s",
2017  buf)));
2018 }
#define Min(x, y)
Definition: c.h:806
int errcode(int sqlerrcode)
Definition: elog.c:575
#define ERROR
Definition: elog.h:43
static char * buf
Definition: pg_test_fsync.c:66
const pg_enc2name pg_enc2name_tbl[]
Definition: encnames.c:307
int pg_encoding_mblen(int encoding, const char *mbstr)
Definition: wchar.c:1785
#define ereport(elevel, rest)
Definition: elog.h:122
static char * encoding
Definition: initdb.c:122
const char * name
Definition: encode.c:521
int errmsg(const char *fmt,...)
Definition: elog.c:797
void report_untranslatable_char ( int  src_encoding,
int  dest_encoding,
const char *  mbstr,
int  len 
)

Definition at line 2027 of file wchar.c.

References buf, ereport, errcode(), errmsg(), ERROR, Min, name, pg_enc2name_tbl, and pg_encoding_mblen().

Referenced by big52mic(), latin2mic_with_table(), local2local(), LocalToUtf(), mic2big5(), mic2euc_cn(), mic2euc_jp(), mic2euc_kr(), mic2euc_tw(), mic2latin(), mic2latin_with_table(), mic2sjis(), pg_mic2ascii(), utf8_to_iso8859_1(), and UtfToLocal().

2029 {
2030  int l = pg_encoding_mblen(src_encoding, mbstr);
2031  char buf[8 * 5 + 1];
2032  char *p = buf;
2033  int j,
2034  jlimit;
2035 
2036  jlimit = Min(l, len);
2037  jlimit = Min(jlimit, 8); /* prevent buffer overrun */
2038 
2039  for (j = 0; j < jlimit; j++)
2040  {
2041  p += sprintf(p, "0x%02x", (unsigned char) mbstr[j]);
2042  if (j < jlimit - 1)
2043  p += sprintf(p, " ");
2044  }
2045 
2046  ereport(ERROR,
2047  (errcode(ERRCODE_UNTRANSLATABLE_CHARACTER),
2048  errmsg("character with byte sequence %s in encoding \"%s\" has no equivalent in encoding \"%s\"",
2049  buf,
2050  pg_enc2name_tbl[src_encoding].name,
2051  pg_enc2name_tbl[dest_encoding].name)));
2052 }
#define Min(x, y)
Definition: c.h:806
int errcode(int sqlerrcode)
Definition: elog.c:575
#define ERROR
Definition: elog.h:43
static char * buf
Definition: pg_test_fsync.c:66
const pg_enc2name pg_enc2name_tbl[]
Definition: encnames.c:307
int pg_encoding_mblen(int encoding, const char *mbstr)
Definition: wchar.c:1785
#define ereport(elevel, rest)
Definition: elog.h:122
const char * name
Definition: encode.c:521
int errmsg(const char *fmt,...)
Definition: elog.c:797
static int ucs_wcwidth ( pg_wchar  ucs)
static

Definition at line 637 of file wchar.c.

References mbbisearch().

Referenced by pg_utf_dsplen().

638 {
639  /* sorted list of non-overlapping intervals of non-spacing characters */
640  static const struct mbinterval combining[] = {
641  {0x0300, 0x034E}, {0x0360, 0x0362}, {0x0483, 0x0486},
642  {0x0488, 0x0489}, {0x0591, 0x05A1}, {0x05A3, 0x05B9},
643  {0x05BB, 0x05BD}, {0x05BF, 0x05BF}, {0x05C1, 0x05C2},
644  {0x05C4, 0x05C4}, {0x064B, 0x0655}, {0x0670, 0x0670},
645  {0x06D6, 0x06E4}, {0x06E7, 0x06E8}, {0x06EA, 0x06ED},
646  {0x070F, 0x070F}, {0x0711, 0x0711}, {0x0730, 0x074A},
647  {0x07A6, 0x07B0}, {0x0901, 0x0902}, {0x093C, 0x093C},
648  {0x0941, 0x0948}, {0x094D, 0x094D}, {0x0951, 0x0954},
649  {0x0962, 0x0963}, {0x0981, 0x0981}, {0x09BC, 0x09BC},
650  {0x09C1, 0x09C4}, {0x09CD, 0x09CD}, {0x09E2, 0x09E3},
651  {0x0A02, 0x0A02}, {0x0A3C, 0x0A3C}, {0x0A41, 0x0A42},
652  {0x0A47, 0x0A48}, {0x0A4B, 0x0A4D}, {0x0A70, 0x0A71},
653  {0x0A81, 0x0A82}, {0x0ABC, 0x0ABC}, {0x0AC1, 0x0AC5},
654  {0x0AC7, 0x0AC8}, {0x0ACD, 0x0ACD}, {0x0B01, 0x0B01},
655  {0x0B3C, 0x0B3C}, {0x0B3F, 0x0B3F}, {0x0B41, 0x0B43},
656  {0x0B4D, 0x0B4D}, {0x0B56, 0x0B56}, {0x0B82, 0x0B82},
657  {0x0BC0, 0x0BC0}, {0x0BCD, 0x0BCD}, {0x0C3E, 0x0C40},
658  {0x0C46, 0x0C48}, {0x0C4A, 0x0C4D}, {0x0C55, 0x0C56},
659  {0x0CBF, 0x0CBF}, {0x0CC6, 0x0CC6}, {0x0CCC, 0x0CCD},
660  {0x0D41, 0x0D43}, {0x0D4D, 0x0D4D}, {0x0DCA, 0x0DCA},
661  {0x0DD2, 0x0DD4}, {0x0DD6, 0x0DD6}, {0x0E31, 0x0E31},
662  {0x0E34, 0x0E3A}, {0x0E47, 0x0E4E}, {0x0EB1, 0x0EB1},
663  {0x0EB4, 0x0EB9}, {0x0EBB, 0x0EBC}, {0x0EC8, 0x0ECD},
664  {0x0F18, 0x0F19}, {0x0F35, 0x0F35}, {0x0F37, 0x0F37},
665  {0x0F39, 0x0F39}, {0x0F71, 0x0F7E}, {0x0F80, 0x0F84},
666  {0x0F86, 0x0F87}, {0x0F90, 0x0F97}, {0x0F99, 0x0FBC},
667  {0x0FC6, 0x0FC6}, {0x102D, 0x1030}, {0x1032, 0x1032},
668  {0x1036, 0x1037}, {0x1039, 0x1039}, {0x1058, 0x1059},
669  {0x1160, 0x11FF}, {0x17B7, 0x17BD}, {0x17C6, 0x17C6},
670  {0x17C9, 0x17D3}, {0x180B, 0x180E}, {0x18A9, 0x18A9},
671  {0x200B, 0x200F}, {0x202A, 0x202E}, {0x206A, 0x206F},
672  {0x20D0, 0x20E3}, {0x302A, 0x302F}, {0x3099, 0x309A},
673  {0xFB1E, 0xFB1E}, {0xFE20, 0xFE23}, {0xFEFF, 0xFEFF},
674  {0xFFF9, 0xFFFB}
675  };
676 
677  /* test for 8-bit control characters */
678  if (ucs == 0)
679  return 0;
680 
681  if (ucs < 0x20 || (ucs >= 0x7f && ucs < 0xa0) || ucs > 0x0010ffff)
682  return -1;
683 
684  /* binary search in table of non-spacing characters */
685  if (mbbisearch(ucs, combining,
686  sizeof(combining) / sizeof(struct mbinterval) - 1))
687  return 0;
688 
689  /*
690  * if we arrive here, ucs is not a combining or C0/C1 control character
691  */
692 
693  return 1 +
694  (ucs >= 0x1100 &&
695  (ucs <= 0x115f || /* Hangul Jamo init. consonants */
696  (ucs >= 0x2e80 && ucs <= 0xa4cf && (ucs & ~0x0011) != 0x300a &&
697  ucs != 0x303f) || /* CJK ... Yi */
698  (ucs >= 0xac00 && ucs <= 0xd7a3) || /* Hangul Syllables */
699  (ucs >= 0xf900 && ucs <= 0xfaff) || /* CJK Compatibility
700  * Ideographs */
701  (ucs >= 0xfe30 && ucs <= 0xfe6f) || /* CJK Compatibility Forms */
702  (ucs >= 0xff00 && ucs <= 0xff5f) || /* Fullwidth Forms */
703  (ucs >= 0xffe0 && ucs <= 0xffe6) ||
704  (ucs >= 0x20000 && ucs <= 0x2ffff)));
705 }
static int mbbisearch(pg_wchar ucs, const struct mbinterval *table, int max)
Definition: wchar.c:584
unsigned char* unicode_to_utf8 ( pg_wchar  c,
unsigned char *  utf8string 
)

Definition at line 475 of file wchar.c.

Referenced by json_lex_string(), pg_saslprep(), pg_wchar2utf_with_len(), and unicode_to_sqlchar().

476 {
477  if (c <= 0x7F)
478  {
479  utf8string[0] = c;
480  }
481  else if (c <= 0x7FF)
482  {
483  utf8string[0] = 0xC0 | ((c >> 6) & 0x1F);
484  utf8string[1] = 0x80 | (c & 0x3F);
485  }
486  else if (c <= 0xFFFF)
487  {
488  utf8string[0] = 0xE0 | ((c >> 12) & 0x0F);
489  utf8string[1] = 0x80 | ((c >> 6) & 0x3F);
490  utf8string[2] = 0x80 | (c & 0x3F);
491  }
492  else
493  {
494  utf8string[0] = 0xF0 | ((c >> 18) & 0x07);
495  utf8string[1] = 0x80 | ((c >> 12) & 0x3F);
496  utf8string[2] = 0x80 | ((c >> 6) & 0x3F);
497  utf8string[3] = 0x80 | (c & 0x3F);
498  }
499 
500  return utf8string;
501 }
char * c
pg_wchar utf8_to_unicode ( const unsigned char *  c)

Definition at line 714 of file wchar.c.

Referenced by pg_saslprep(), and pg_utf_dsplen().

715 {
716  if ((*c & 0x80) == 0)
717  return (pg_wchar) c[0];
718  else if ((*c & 0xe0) == 0xc0)
719  return (pg_wchar) (((c[0] & 0x1f) << 6) |
720  (c[1] & 0x3f));
721  else if ((*c & 0xf0) == 0xe0)
722  return (pg_wchar) (((c[0] & 0x0f) << 12) |
723  ((c[1] & 0x3f) << 6) |
724  (c[2] & 0x3f));
725  else if ((*c & 0xf8) == 0xf0)
726  return (pg_wchar) (((c[0] & 0x07) << 18) |
727  ((c[1] & 0x3f) << 12) |
728  ((c[2] & 0x3f) << 6) |
729  (c[3] & 0x3f));
730  else
731  /* that is an invalid code on purpose */
732  return 0xffffffff;
733 }
char * c
unsigned int pg_wchar
Definition: mbprint.c:31

Variable Documentation