PostgreSQL Source Code  git master
wchar.c File Reference
#include "postgres.h"
#include "mb/pg_wchar.h"
Include dependency graph for wchar.c:

Go to the source code of this file.

Data Structures

struct  mbinterval
 

Macros

#define IS_EUC_RANGE_VALID(c)   ((c) >= 0xa1 && (c) <= 0xfe)
 
#define pg_euccn_verifier   pg_euckr_verifier
 

Functions

static int pg_ascii2wchar_with_len (const unsigned char *from, pg_wchar *to, int len)
 
static int pg_ascii_mblen (const unsigned char *s)
 
static int pg_ascii_dsplen (const unsigned char *s)
 
static int pg_euc2wchar_with_len (const unsigned char *from, pg_wchar *to, int len)
 
static int pg_euc_mblen (const unsigned char *s)
 
static int pg_euc_dsplen (const unsigned char *s)
 
static int pg_eucjp2wchar_with_len (const unsigned char *from, pg_wchar *to, int len)
 
static int pg_eucjp_mblen (const unsigned char *s)
 
static int pg_eucjp_dsplen (const unsigned char *s)
 
static int pg_euckr2wchar_with_len (const unsigned char *from, pg_wchar *to, int len)
 
static int pg_euckr_mblen (const unsigned char *s)
 
static int pg_euckr_dsplen (const unsigned char *s)
 
static int pg_euccn2wchar_with_len (const unsigned char *from, pg_wchar *to, int len)
 
static int pg_euccn_mblen (const unsigned char *s)
 
static int pg_euccn_dsplen (const unsigned char *s)
 
static int pg_euctw2wchar_with_len (const unsigned char *from, pg_wchar *to, int len)
 
static int pg_euctw_mblen (const unsigned char *s)
 
static int pg_euctw_dsplen (const unsigned char *s)
 
static int pg_wchar2euc_with_len (const pg_wchar *from, unsigned char *to, int len)
 
static int pg_johab_mblen (const unsigned char *s)
 
static int pg_johab_dsplen (const unsigned char *s)
 
static int pg_utf2wchar_with_len (const unsigned char *from, pg_wchar *to, int len)
 
unsigned char * unicode_to_utf8 (pg_wchar c, unsigned char *utf8string)
 
static int pg_wchar2utf_with_len (const pg_wchar *from, unsigned char *to, int len)
 
int pg_utf_mblen (const unsigned char *s)
 
static int mbbisearch (pg_wchar ucs, const struct mbinterval *table, int max)
 
static int ucs_wcwidth (pg_wchar ucs)
 
pg_wchar utf8_to_unicode (const unsigned char *c)
 
static int pg_utf_dsplen (const unsigned char *s)
 
static int pg_mule2wchar_with_len (const unsigned char *from, pg_wchar *to, int len)
 
static int pg_wchar2mule_with_len (const pg_wchar *from, unsigned char *to, int len)
 
int pg_mule_mblen (const unsigned char *s)
 
static int pg_mule_dsplen (const unsigned char *s)
 
static int pg_latin12wchar_with_len (const unsigned char *from, pg_wchar *to, int len)
 
static int pg_wchar2single_with_len (const pg_wchar *from, unsigned char *to, int len)
 
static int pg_latin1_mblen (const unsigned char *s)
 
static int pg_latin1_dsplen (const unsigned char *s)
 
static int pg_sjis_mblen (const unsigned char *s)
 
static int pg_sjis_dsplen (const unsigned char *s)
 
static int pg_big5_mblen (const unsigned char *s)
 
static int pg_big5_dsplen (const unsigned char *s)
 
static int pg_gbk_mblen (const unsigned char *s)
 
static int pg_gbk_dsplen (const unsigned char *s)
 
static int pg_uhc_mblen (const unsigned char *s)
 
static int pg_uhc_dsplen (const unsigned char *s)
 
static int pg_gb18030_mblen (const unsigned char *s)
 
static int pg_gb18030_dsplen (const unsigned char *s)
 
static int pg_ascii_verifier (const unsigned char *s, int len)
 
static int pg_eucjp_verifier (const unsigned char *s, int len)
 
static int pg_euckr_verifier (const unsigned char *s, int len)
 
static int pg_euctw_verifier (const unsigned char *s, int len)
 
static int pg_johab_verifier (const unsigned char *s, int len)
 
static int pg_mule_verifier (const unsigned char *s, int len)
 
static int pg_latin1_verifier (const unsigned char *s, int len)
 
static int pg_sjis_verifier (const unsigned char *s, int len)
 
static int pg_big5_verifier (const unsigned char *s, int len)
 
static int pg_gbk_verifier (const unsigned char *s, int len)
 
static int pg_uhc_verifier (const unsigned char *s, int len)
 
static int pg_gb18030_verifier (const unsigned char *s, int len)
 
static int pg_utf8_verifier (const unsigned char *s, int len)
 
bool pg_utf8_islegal (const unsigned char *source, int length)
 
static bool pg_generic_charinc (unsigned char *charptr, int len)
 
static bool pg_utf8_increment (unsigned char *charptr, int length)
 
static bool pg_eucjp_increment (unsigned char *charptr, int length)
 
int pg_mic_mblen (const unsigned char *mbstr)
 
int pg_encoding_mblen (int encoding, const char *mbstr)
 
int pg_encoding_dsplen (int encoding, const char *mbstr)
 
int pg_encoding_verifymb (int encoding, const char *mbstr, int len)
 
int pg_encoding_max_length (int encoding)
 
int pg_database_encoding_max_length (void)
 
mbcharacter_incrementer pg_database_encoding_character_incrementer (void)
 
bool pg_verifymbstr (const char *mbstr, int len, bool noError)
 
bool pg_verify_mbstr (int encoding, const char *mbstr, int len, bool noError)
 
int pg_verify_mbstr_len (int encoding, const char *mbstr, int len, bool noError)
 
void check_encoding_conversion_args (int src_encoding, int dest_encoding, int len, int expected_src_encoding, int expected_dest_encoding)
 
void report_invalid_encoding (int encoding, const char *mbstr, int len)
 
void report_untranslatable_char (int src_encoding, int dest_encoding, const char *mbstr, int len)
 

Variables

const pg_wchar_tbl pg_wchar_table []
 

Macro Definition Documentation

◆ IS_EUC_RANGE_VALID

#define IS_EUC_RANGE_VALID (   c)    ((c) >= 0xa1 && (c) <= 0xfe)

◆ pg_euccn_verifier

#define pg_euccn_verifier   pg_euckr_verifier

Definition at line 1260 of file wchar.c.

Function Documentation

◆ check_encoding_conversion_args()

void check_encoding_conversion_args ( int  src_encoding,
int  dest_encoding,
int  len,
int  expected_src_encoding,
int  expected_dest_encoding 
)

Definition at line 2014 of file wchar.c.

References elog, ERROR, name, pg_enc2name_tbl, and PG_VALID_ENCODING.

2019 {
2020  if (!PG_VALID_ENCODING(src_encoding))
2021  elog(ERROR, "invalid source encoding ID: %d", src_encoding);
2022  if (src_encoding != expected_src_encoding && expected_src_encoding >= 0)
2023  elog(ERROR, "expected source encoding \"%s\", but got \"%s\"",
2024  pg_enc2name_tbl[expected_src_encoding].name,
2025  pg_enc2name_tbl[src_encoding].name);
2026  if (!PG_VALID_ENCODING(dest_encoding))
2027  elog(ERROR, "invalid destination encoding ID: %d", dest_encoding);
2028  if (dest_encoding != expected_dest_encoding && expected_dest_encoding >= 0)
2029  elog(ERROR, "expected destination encoding \"%s\", but got \"%s\"",
2030  pg_enc2name_tbl[expected_dest_encoding].name,
2031  pg_enc2name_tbl[dest_encoding].name);
2032  if (len < 0)
2033  elog(ERROR, "encoding conversion length must not be negative");
2034 }
#define ERROR
Definition: elog.h:43
#define PG_VALID_ENCODING(_enc)
Definition: pg_wchar.h:301
const pg_enc2name pg_enc2name_tbl[]
Definition: encnames.c:307
const char * name
Definition: encode.c:521
#define elog(elevel,...)
Definition: elog.h:226

◆ mbbisearch()

static int mbbisearch ( pg_wchar  ucs,
const struct mbinterval table,
int  max 
)
static

Definition at line 591 of file wchar.c.

References mbinterval::first, and mbinterval::last.

Referenced by ucs_wcwidth().

592 {
593  int min = 0;
594  int mid;
595 
596  if (ucs < table[0].first || ucs > table[max].last)
597  return 0;
598  while (max >= min)
599  {
600  mid = (min + max) / 2;
601  if (ucs > table[mid].last)
602  min = mid + 1;
603  else if (ucs < table[mid].first)
604  max = mid - 1;
605  else
606  return 1;
607  }
608 
609  return 0;
610 }

◆ pg_ascii2wchar_with_len()

static int pg_ascii2wchar_with_len ( const unsigned char *  from,
pg_wchar to,
int  len 
)
static

Definition at line 47 of file wchar.c.

48 {
49  int cnt = 0;
50 
51  while (len > 0 && *from)
52  {
53  *to++ = *from++;
54  len--;
55  cnt++;
56  }
57  *to = 0;
58  return cnt;
59 }

◆ pg_ascii_dsplen()

static int pg_ascii_dsplen ( const unsigned char *  s)
static

Definition at line 68 of file wchar.c.

Referenced by pg_big5_dsplen(), pg_euc_dsplen(), pg_euccn_dsplen(), pg_eucjp_dsplen(), pg_euctw_dsplen(), pg_gb18030_dsplen(), pg_gbk_dsplen(), pg_latin1_dsplen(), pg_sjis_dsplen(), and pg_uhc_dsplen().

69 {
70  if (*s == '\0')
71  return 0;
72  if (*s < 0x20 || *s == 0x7f)
73  return -1;
74 
75  return 1;
76 }

◆ pg_ascii_mblen()

static int pg_ascii_mblen ( const unsigned char *  s)
static

Definition at line 62 of file wchar.c.

63 {
64  return 1;
65 }

◆ pg_ascii_verifier()

static int pg_ascii_verifier ( const unsigned char *  s,
int  len 
)
static

Definition at line 1168 of file wchar.c.

1169 {
1170  return 1;
1171 }

◆ pg_big5_dsplen()

static int pg_big5_dsplen ( const unsigned char *  s)
static

Definition at line 1044 of file wchar.c.

References IS_HIGHBIT_SET, and pg_ascii_dsplen().

1045 {
1046  int len;
1047 
1048  if (IS_HIGHBIT_SET(*s))
1049  len = 2; /* kanji? */
1050  else
1051  len = pg_ascii_dsplen(s); /* should be ASCII */
1052  return len;
1053 }
static int pg_ascii_dsplen(const unsigned char *s)
Definition: wchar.c:68
#define IS_HIGHBIT_SET(ch)
Definition: c.h:1075

◆ pg_big5_mblen()

static int pg_big5_mblen ( const unsigned char *  s)
static

Definition at line 1032 of file wchar.c.

References IS_HIGHBIT_SET.

Referenced by pg_big5_verifier().

1033 {
1034  int len;
1035 
1036  if (IS_HIGHBIT_SET(*s))
1037  len = 2; /* kanji? */
1038  else
1039  len = 1; /* should be ASCII */
1040  return len;
1041 }
#define IS_HIGHBIT_SET(ch)
Definition: c.h:1075

◆ pg_big5_verifier()

static int pg_big5_verifier ( const unsigned char *  s,
int  len 
)
static

Definition at line 1387 of file wchar.c.

References pg_big5_mblen().

1388 {
1389  int l,
1390  mbl;
1391 
1392  l = mbl = pg_big5_mblen(s);
1393 
1394  if (len < l)
1395  return -1;
1396 
1397  while (--l > 0)
1398  {
1399  if (*++s == '\0')
1400  return -1;
1401  }
1402 
1403  return mbl;
1404 }
static int pg_big5_mblen(const unsigned char *s)
Definition: wchar.c:1032

◆ pg_database_encoding_character_incrementer()

mbcharacter_incrementer pg_database_encoding_character_incrementer ( void  )

Definition at line 1890 of file wchar.c.

References GetDatabaseEncoding(), PG_EUC_JP, pg_eucjp_increment(), pg_generic_charinc(), PG_UTF8, and pg_utf8_increment().

Referenced by make_greater_string().

1891 {
1892  /*
1893  * Eventually it might be best to add a field to pg_wchar_table[], but for
1894  * now we just use a switch.
1895  */
1896  switch (GetDatabaseEncoding())
1897  {
1898  case PG_UTF8:
1899  return pg_utf8_increment;
1900 
1901  case PG_EUC_JP:
1902  return pg_eucjp_increment;
1903 
1904  default:
1905  return pg_generic_charinc;
1906  }
1907 }
static bool pg_eucjp_increment(unsigned char *charptr, int length)
Definition: wchar.c:1687
static bool pg_generic_charinc(unsigned char *charptr, int len)
Definition: wchar.c:1575
static bool pg_utf8_increment(unsigned char *charptr, int length)
Definition: wchar.c:1609
int GetDatabaseEncoding(void)
Definition: mbutils.c:996

◆ pg_database_encoding_max_length()

◆ pg_encoding_dsplen()

int pg_encoding_dsplen ( int  encoding,
const char *  mbstr 
)

Definition at line 1844 of file wchar.c.

References PG_SQL_ASCII, and PG_VALID_ENCODING.

Referenced by PQdsplen(), and reportErrorPosition().

1845 {
1846  return (PG_VALID_ENCODING(encoding) ?
1847  pg_wchar_table[encoding].dsplen((const unsigned char *) mbstr) :
1848  pg_wchar_table[PG_SQL_ASCII].dsplen((const unsigned char *) mbstr));
1849 }
#define PG_VALID_ENCODING(_enc)
Definition: pg_wchar.h:301
int32 encoding
Definition: pg_database.h:41
const pg_wchar_tbl pg_wchar_table[]
Definition: wchar.c:1777

◆ pg_encoding_max_length()

int pg_encoding_max_length ( int  encoding)

Definition at line 1868 of file wchar.c.

References Assert, encoding, pg_wchar_tbl::maxmblen, and PG_VALID_ENCODING.

Referenced by ascii(), chr(), pg_encoding_mbcliplen(), pg_verify_mbstr_len(), reportErrorPosition(), and type_maximum_size().

1869 {
1871 
1873 }
int maxmblen
Definition: pg_wchar.h:382
#define PG_VALID_ENCODING(_enc)
Definition: pg_wchar.h:301
#define Assert(condition)
Definition: c.h:732
int32 encoding
Definition: pg_database.h:41
const pg_wchar_tbl pg_wchar_table[]
Definition: wchar.c:1777

◆ pg_encoding_mblen()

int pg_encoding_mblen ( int  encoding,
const char *  mbstr 
)

Definition at line 1833 of file wchar.c.

References PG_SQL_ASCII, and PG_VALID_ENCODING.

Referenced by CopyAttributeOutCSV(), CopyAttributeOutText(), CopyReadLineText(), PQescapeInternal(), PQescapeStringInternal(), PQmblen(), report_invalid_encoding(), report_untranslatable_char(), reportErrorPosition(), and xml_is_document().

1834 {
1835  return (PG_VALID_ENCODING(encoding) ?
1836  pg_wchar_table[encoding].mblen((const unsigned char *) mbstr) :
1837  pg_wchar_table[PG_SQL_ASCII].mblen((const unsigned char *) mbstr));
1838 }
#define PG_VALID_ENCODING(_enc)
Definition: pg_wchar.h:301
int32 encoding
Definition: pg_database.h:41
const pg_wchar_tbl pg_wchar_table[]
Definition: wchar.c:1777

◆ pg_encoding_verifymb()

int pg_encoding_verifymb ( int  encoding,
const char *  mbstr,
int  len 
)

Definition at line 1857 of file wchar.c.

References PG_SQL_ASCII, and PG_VALID_ENCODING.

Referenced by big52mic(), euc_jis_20042shift_jis_2004(), euc_jp2mic(), euc_jp2sjis(), euc_kr2mic(), euc_tw2mic(), LocalToUtf(), mic2big5(), mic2euc_jp(), mic2euc_kr(), mic2euc_tw(), mic2sjis(), shift_jis_20042euc_jis_2004(), and sjis2euc_jp().

1858 {
1859  return (PG_VALID_ENCODING(encoding) ?
1860  pg_wchar_table[encoding].mbverify((const unsigned char *) mbstr, len) :
1861  pg_wchar_table[PG_SQL_ASCII].mbverify((const unsigned char *) mbstr, len));
1862 }
#define PG_VALID_ENCODING(_enc)
Definition: pg_wchar.h:301
int32 encoding
Definition: pg_database.h:41
const pg_wchar_tbl pg_wchar_table[]
Definition: wchar.c:1777

◆ pg_euc2wchar_with_len()

static int pg_euc2wchar_with_len ( const unsigned char *  from,
pg_wchar to,
int  len 
)
static

Definition at line 82 of file wchar.c.

References IS_HIGHBIT_SET, SS2, and SS3.

Referenced by pg_eucjp2wchar_with_len(), and pg_euckr2wchar_with_len().

83 {
84  int cnt = 0;
85 
86  while (len > 0 && *from)
87  {
88  if (*from == SS2 && len >= 2) /* JIS X 0201 (so called "1 byte
89  * KANA") */
90  {
91  from++;
92  *to = (SS2 << 8) | *from++;
93  len -= 2;
94  }
95  else if (*from == SS3 && len >= 3) /* JIS X 0212 KANJI */
96  {
97  from++;
98  *to = (SS3 << 16) | (*from++ << 8);
99  *to |= *from++;
100  len -= 3;
101  }
102  else if (IS_HIGHBIT_SET(*from) && len >= 2) /* JIS X 0208 KANJI */
103  {
104  *to = *from++ << 8;
105  *to |= *from++;
106  len -= 2;
107  }
108  else /* must be ASCII */
109  {
110  *to = *from++;
111  len--;
112  }
113  to++;
114  cnt++;
115  }
116  *to = 0;
117  return cnt;
118 }
#define SS3
Definition: pg_wchar.h:36
#define IS_HIGHBIT_SET(ch)
Definition: c.h:1075
#define SS2
Definition: pg_wchar.h:35

◆ pg_euc_dsplen()

static int pg_euc_dsplen ( const unsigned char *  s)
inlinestatic

Definition at line 137 of file wchar.c.

References IS_HIGHBIT_SET, pg_ascii_dsplen(), SS2, and SS3.

Referenced by pg_euckr_dsplen(), and pg_johab_dsplen().

138 {
139  int len;
140 
141  if (*s == SS2)
142  len = 2;
143  else if (*s == SS3)
144  len = 2;
145  else if (IS_HIGHBIT_SET(*s))
146  len = 2;
147  else
148  len = pg_ascii_dsplen(s);
149  return len;
150 }
static int pg_ascii_dsplen(const unsigned char *s)
Definition: wchar.c:68
#define SS3
Definition: pg_wchar.h:36
#define IS_HIGHBIT_SET(ch)
Definition: c.h:1075
#define SS2
Definition: pg_wchar.h:35

◆ pg_euc_mblen()

static int pg_euc_mblen ( const unsigned char *  s)
inlinestatic

Definition at line 121 of file wchar.c.

References IS_HIGHBIT_SET, SS2, and SS3.

Referenced by pg_eucjp_mblen(), pg_euckr_mblen(), and pg_johab_mblen().

122 {
123  int len;
124 
125  if (*s == SS2)
126  len = 2;
127  else if (*s == SS3)
128  len = 3;
129  else if (IS_HIGHBIT_SET(*s))
130  len = 2;
131  else
132  len = 1;
133  return len;
134 }
#define SS3
Definition: pg_wchar.h:36
#define IS_HIGHBIT_SET(ch)
Definition: c.h:1075
#define SS2
Definition: pg_wchar.h:35

◆ pg_euccn2wchar_with_len()

static int pg_euccn2wchar_with_len ( const unsigned char *  from,
pg_wchar to,
int  len 
)
static

Definition at line 209 of file wchar.c.

References IS_HIGHBIT_SET, SS2, and SS3.

210 {
211  int cnt = 0;
212 
213  while (len > 0 && *from)
214  {
215  if (*from == SS2 && len >= 3) /* code set 2 (unused?) */
216  {
217  from++;
218  *to = (SS2 << 16) | (*from++ << 8);
219  *to |= *from++;
220  len -= 3;
221  }
222  else if (*from == SS3 && len >= 3) /* code set 3 (unused ?) */
223  {
224  from++;
225  *to = (SS3 << 16) | (*from++ << 8);
226  *to |= *from++;
227  len -= 3;
228  }
229  else if (IS_HIGHBIT_SET(*from) && len >= 2) /* code set 1 */
230  {
231  *to = *from++ << 8;
232  *to |= *from++;
233  len -= 2;
234  }
235  else
236  {
237  *to = *from++;
238  len--;
239  }
240  to++;
241  cnt++;
242  }
243  *to = 0;
244  return cnt;
245 }
#define SS3
Definition: pg_wchar.h:36
#define IS_HIGHBIT_SET(ch)
Definition: c.h:1075
#define SS2
Definition: pg_wchar.h:35

◆ pg_euccn_dsplen()

static int pg_euccn_dsplen ( const unsigned char *  s)
static

Definition at line 260 of file wchar.c.

References IS_HIGHBIT_SET, and pg_ascii_dsplen().

261 {
262  int len;
263 
264  if (IS_HIGHBIT_SET(*s))
265  len = 2;
266  else
267  len = pg_ascii_dsplen(s);
268  return len;
269 }
static int pg_ascii_dsplen(const unsigned char *s)
Definition: wchar.c:68
#define IS_HIGHBIT_SET(ch)
Definition: c.h:1075

◆ pg_euccn_mblen()

static int pg_euccn_mblen ( const unsigned char *  s)
static

Definition at line 248 of file wchar.c.

References IS_HIGHBIT_SET.

249 {
250  int len;
251 
252  if (IS_HIGHBIT_SET(*s))
253  len = 2;
254  else
255  len = 1;
256  return len;
257 }
#define IS_HIGHBIT_SET(ch)
Definition: c.h:1075

◆ pg_eucjp2wchar_with_len()

static int pg_eucjp2wchar_with_len ( const unsigned char *  from,
pg_wchar to,
int  len 
)
static

Definition at line 156 of file wchar.c.

References pg_euc2wchar_with_len().

157 {
158  return pg_euc2wchar_with_len(from, to, len);
159 }
static int pg_euc2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
Definition: wchar.c:82

◆ pg_eucjp_dsplen()

static int pg_eucjp_dsplen ( const unsigned char *  s)
static

Definition at line 168 of file wchar.c.

References IS_HIGHBIT_SET, pg_ascii_dsplen(), SS2, and SS3.

169 {
170  int len;
171 
172  if (*s == SS2)
173  len = 1;
174  else if (*s == SS3)
175  len = 2;
176  else if (IS_HIGHBIT_SET(*s))
177  len = 2;
178  else
179  len = pg_ascii_dsplen(s);
180  return len;
181 }
static int pg_ascii_dsplen(const unsigned char *s)
Definition: wchar.c:68
#define SS3
Definition: pg_wchar.h:36
#define IS_HIGHBIT_SET(ch)
Definition: c.h:1075
#define SS2
Definition: pg_wchar.h:35

◆ pg_eucjp_increment()

static bool pg_eucjp_increment ( unsigned char *  charptr,
int  length 
)
static

Definition at line 1687 of file wchar.c.

References i, IS_HIGHBIT_SET, SS2, and SS3.

Referenced by pg_database_encoding_character_incrementer().

1688 {
1689  unsigned char c1,
1690  c2;
1691  int i;
1692 
1693  c1 = *charptr;
1694 
1695  switch (c1)
1696  {
1697  case SS2: /* JIS X 0201 */
1698  if (length != 2)
1699  return false;
1700 
1701  c2 = charptr[1];
1702 
1703  if (c2 >= 0xdf)
1704  charptr[0] = charptr[1] = 0xa1;
1705  else if (c2 < 0xa1)
1706  charptr[1] = 0xa1;
1707  else
1708  charptr[1]++;
1709  break;
1710 
1711  case SS3: /* JIS X 0212 */
1712  if (length != 3)
1713  return false;
1714 
1715  for (i = 2; i > 0; i--)
1716  {
1717  c2 = charptr[i];
1718  if (c2 < 0xa1)
1719  {
1720  charptr[i] = 0xa1;
1721  return true;
1722  }
1723  else if (c2 < 0xfe)
1724  {
1725  charptr[i]++;
1726  return true;
1727  }
1728  }
1729 
1730  /* Out of 3-byte code region */
1731  return false;
1732 
1733  default:
1734  if (IS_HIGHBIT_SET(c1)) /* JIS X 0208? */
1735  {
1736  if (length != 2)
1737  return false;
1738 
1739  for (i = 1; i >= 0; i--)
1740  {
1741  c2 = charptr[i];
1742  if (c2 < 0xa1)
1743  {
1744  charptr[i] = 0xa1;
1745  return true;
1746  }
1747  else if (c2 < 0xfe)
1748  {
1749  charptr[i]++;
1750  return true;
1751  }
1752  }
1753 
1754  /* Out of 2 byte code region */
1755  return false;
1756  }
1757  else
1758  { /* ASCII, single byte */
1759  if (c1 > 0x7e)
1760  return false;
1761  (*charptr)++;
1762  }
1763  break;
1764  }
1765 
1766  return true;
1767 }
#define SS3
Definition: pg_wchar.h:36
#define IS_HIGHBIT_SET(ch)
Definition: c.h:1075
int i
#define SS2
Definition: pg_wchar.h:35

◆ pg_eucjp_mblen()

static int pg_eucjp_mblen ( const unsigned char *  s)
static

Definition at line 162 of file wchar.c.

References pg_euc_mblen().

163 {
164  return pg_euc_mblen(s);
165 }
static int pg_euc_mblen(const unsigned char *s)
Definition: wchar.c:121

◆ pg_eucjp_verifier()

static int pg_eucjp_verifier ( const unsigned char *  s,
int  len 
)
static

Definition at line 1176 of file wchar.c.

References IS_EUC_RANGE_VALID, IS_HIGHBIT_SET, SS2, and SS3.

1177 {
1178  int l;
1179  unsigned char c1,
1180  c2;
1181 
1182  c1 = *s++;
1183 
1184  switch (c1)
1185  {
1186  case SS2: /* JIS X 0201 */
1187  l = 2;
1188  if (l > len)
1189  return -1;
1190  c2 = *s++;
1191  if (c2 < 0xa1 || c2 > 0xdf)
1192  return -1;
1193  break;
1194 
1195  case SS3: /* JIS X 0212 */
1196  l = 3;
1197  if (l > len)
1198  return -1;
1199  c2 = *s++;
1200  if (!IS_EUC_RANGE_VALID(c2))
1201  return -1;
1202  c2 = *s++;
1203  if (!IS_EUC_RANGE_VALID(c2))
1204  return -1;
1205  break;
1206 
1207  default:
1208  if (IS_HIGHBIT_SET(c1)) /* JIS X 0208? */
1209  {
1210  l = 2;
1211  if (l > len)
1212  return -1;
1213  if (!IS_EUC_RANGE_VALID(c1))
1214  return -1;
1215  c2 = *s++;
1216  if (!IS_EUC_RANGE_VALID(c2))
1217  return -1;
1218  }
1219  else
1220  /* must be ASCII */
1221  {
1222  l = 1;
1223  }
1224  break;
1225  }
1226 
1227  return l;
1228 }
#define SS3
Definition: pg_wchar.h:36
#define IS_HIGHBIT_SET(ch)
Definition: c.h:1075
#define IS_EUC_RANGE_VALID(c)
Definition: wchar.c:1173
#define SS2
Definition: pg_wchar.h:35

◆ pg_euckr2wchar_with_len()

static int pg_euckr2wchar_with_len ( const unsigned char *  from,
pg_wchar to,
int  len 
)
static

Definition at line 187 of file wchar.c.

References pg_euc2wchar_with_len().

188 {
189  return pg_euc2wchar_with_len(from, to, len);
190 }
static int pg_euc2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
Definition: wchar.c:82

◆ pg_euckr_dsplen()

static int pg_euckr_dsplen ( const unsigned char *  s)
static

Definition at line 199 of file wchar.c.

References pg_euc_dsplen().

200 {
201  return pg_euc_dsplen(s);
202 }
static int pg_euc_dsplen(const unsigned char *s)
Definition: wchar.c:137

◆ pg_euckr_mblen()

static int pg_euckr_mblen ( const unsigned char *  s)
static

Definition at line 193 of file wchar.c.

References pg_euc_mblen().

194 {
195  return pg_euc_mblen(s);
196 }
static int pg_euc_mblen(const unsigned char *s)
Definition: wchar.c:121

◆ pg_euckr_verifier()

static int pg_euckr_verifier ( const unsigned char *  s,
int  len 
)
static

Definition at line 1231 of file wchar.c.

References IS_EUC_RANGE_VALID, and IS_HIGHBIT_SET.

1232 {
1233  int l;
1234  unsigned char c1,
1235  c2;
1236 
1237  c1 = *s++;
1238 
1239  if (IS_HIGHBIT_SET(c1))
1240  {
1241  l = 2;
1242  if (l > len)
1243  return -1;
1244  if (!IS_EUC_RANGE_VALID(c1))
1245  return -1;
1246  c2 = *s++;
1247  if (!IS_EUC_RANGE_VALID(c2))
1248  return -1;
1249  }
1250  else
1251  /* must be ASCII */
1252  {
1253  l = 1;
1254  }
1255 
1256  return l;
1257 }
#define IS_HIGHBIT_SET(ch)
Definition: c.h:1075
#define IS_EUC_RANGE_VALID(c)
Definition: wchar.c:1173

◆ pg_euctw2wchar_with_len()

static int pg_euctw2wchar_with_len ( const unsigned char *  from,
pg_wchar to,
int  len 
)
static

Definition at line 276 of file wchar.c.

References IS_HIGHBIT_SET, SS2, and SS3.

277 {
278  int cnt = 0;
279 
280  while (len > 0 && *from)
281  {
282  if (*from == SS2 && len >= 4) /* code set 2 */
283  {
284  from++;
285  *to = (((uint32) SS2) << 24) | (*from++ << 16);
286  *to |= *from++ << 8;
287  *to |= *from++;
288  len -= 4;
289  }
290  else if (*from == SS3 && len >= 3) /* code set 3 (unused?) */
291  {
292  from++;
293  *to = (SS3 << 16) | (*from++ << 8);
294  *to |= *from++;
295  len -= 3;
296  }
297  else if (IS_HIGHBIT_SET(*from) && len >= 2) /* code set 2 */
298  {
299  *to = *from++ << 8;
300  *to |= *from++;
301  len -= 2;
302  }
303  else
304  {
305  *to = *from++;
306  len--;
307  }
308  to++;
309  cnt++;
310  }
311  *to = 0;
312  return cnt;
313 }
#define SS3
Definition: pg_wchar.h:36
#define IS_HIGHBIT_SET(ch)
Definition: c.h:1075
unsigned int uint32
Definition: c.h:358
#define SS2
Definition: pg_wchar.h:35

◆ pg_euctw_dsplen()

static int pg_euctw_dsplen ( const unsigned char *  s)
static

Definition at line 332 of file wchar.c.

References IS_HIGHBIT_SET, pg_ascii_dsplen(), SS2, and SS3.

333 {
334  int len;
335 
336  if (*s == SS2)
337  len = 2;
338  else if (*s == SS3)
339  len = 2;
340  else if (IS_HIGHBIT_SET(*s))
341  len = 2;
342  else
343  len = pg_ascii_dsplen(s);
344  return len;
345 }
static int pg_ascii_dsplen(const unsigned char *s)
Definition: wchar.c:68
#define SS3
Definition: pg_wchar.h:36
#define IS_HIGHBIT_SET(ch)
Definition: c.h:1075
#define SS2
Definition: pg_wchar.h:35

◆ pg_euctw_mblen()

static int pg_euctw_mblen ( const unsigned char *  s)
static

Definition at line 316 of file wchar.c.

References IS_HIGHBIT_SET, SS2, and SS3.

317 {
318  int len;
319 
320  if (*s == SS2)
321  len = 4;
322  else if (*s == SS3)
323  len = 3;
324  else if (IS_HIGHBIT_SET(*s))
325  len = 2;
326  else
327  len = 1;
328  return len;
329 }
#define SS3
Definition: pg_wchar.h:36
#define IS_HIGHBIT_SET(ch)
Definition: c.h:1075
#define SS2
Definition: pg_wchar.h:35

◆ pg_euctw_verifier()

static int pg_euctw_verifier ( const unsigned char *  s,
int  len 
)
static

Definition at line 1263 of file wchar.c.

References IS_EUC_RANGE_VALID, IS_HIGHBIT_SET, SS2, and SS3.

1264 {
1265  int l;
1266  unsigned char c1,
1267  c2;
1268 
1269  c1 = *s++;
1270 
1271  switch (c1)
1272  {
1273  case SS2: /* CNS 11643 Plane 1-7 */
1274  l = 4;
1275  if (l > len)
1276  return -1;
1277  c2 = *s++;
1278  if (c2 < 0xa1 || c2 > 0xa7)
1279  return -1;
1280  c2 = *s++;
1281  if (!IS_EUC_RANGE_VALID(c2))
1282  return -1;
1283  c2 = *s++;
1284  if (!IS_EUC_RANGE_VALID(c2))
1285  return -1;
1286  break;
1287 
1288  case SS3: /* unused */
1289  return -1;
1290 
1291  default:
1292  if (IS_HIGHBIT_SET(c1)) /* CNS 11643 Plane 1 */
1293  {
1294  l = 2;
1295  if (l > len)
1296  return -1;
1297  /* no further range check on c1? */
1298  c2 = *s++;
1299  if (!IS_EUC_RANGE_VALID(c2))
1300  return -1;
1301  }
1302  else
1303  /* must be ASCII */
1304  {
1305  l = 1;
1306  }
1307  break;
1308  }
1309  return l;
1310 }
#define SS3
Definition: pg_wchar.h:36
#define IS_HIGHBIT_SET(ch)
Definition: c.h:1075
#define IS_EUC_RANGE_VALID(c)
Definition: wchar.c:1173
#define SS2
Definition: pg_wchar.h:35

◆ pg_gb18030_dsplen()

static int pg_gb18030_dsplen ( const unsigned char *  s)
static

Definition at line 1139 of file wchar.c.

References IS_HIGHBIT_SET, and pg_ascii_dsplen().

1140 {
1141  int len;
1142 
1143  if (IS_HIGHBIT_SET(*s))
1144  len = 2;
1145  else
1146  len = pg_ascii_dsplen(s); /* ASCII */
1147  return len;
1148 }
static int pg_ascii_dsplen(const unsigned char *s)
Definition: wchar.c:68
#define IS_HIGHBIT_SET(ch)
Definition: c.h:1075

◆ pg_gb18030_mblen()

static int pg_gb18030_mblen ( const unsigned char *  s)
static

Definition at line 1125 of file wchar.c.

References IS_HIGHBIT_SET.

1126 {
1127  int len;
1128 
1129  if (!IS_HIGHBIT_SET(*s))
1130  len = 1; /* ASCII */
1131  else if (*(s + 1) >= 0x30 && *(s + 1) <= 0x39)
1132  len = 4;
1133  else
1134  len = 2;
1135  return len;
1136 }
#define IS_HIGHBIT_SET(ch)
Definition: c.h:1075

◆ pg_gb18030_verifier()

static int pg_gb18030_verifier ( const unsigned char *  s,
int  len 
)
static

Definition at line 1447 of file wchar.c.

References IS_HIGHBIT_SET.

1448 {
1449  int l;
1450 
1451  if (!IS_HIGHBIT_SET(*s))
1452  l = 1; /* ASCII */
1453  else if (len >= 4 && *(s + 1) >= 0x30 && *(s + 1) <= 0x39)
1454  {
1455  /* Should be 4-byte, validate remaining bytes */
1456  if (*s >= 0x81 && *s <= 0xfe &&
1457  *(s + 2) >= 0x81 && *(s + 2) <= 0xfe &&
1458  *(s + 3) >= 0x30 && *(s + 3) <= 0x39)
1459  l = 4;
1460  else
1461  l = -1;
1462  }
1463  else if (len >= 2 && *s >= 0x81 && *s <= 0xfe)
1464  {
1465  /* Should be 2-byte, validate */
1466  if ((*(s + 1) >= 0x40 && *(s + 1) <= 0x7e) ||
1467  (*(s + 1) >= 0x80 && *(s + 1) <= 0xfe))
1468  l = 2;
1469  else
1470  l = -1;
1471  }
1472  else
1473  l = -1;
1474  return l;
1475 }
#define IS_HIGHBIT_SET(ch)
Definition: c.h:1075

◆ pg_gbk_dsplen()

static int pg_gbk_dsplen ( const unsigned char *  s)
static

Definition at line 1071 of file wchar.c.

References IS_HIGHBIT_SET, and pg_ascii_dsplen().

1072 {
1073  int len;
1074 
1075  if (IS_HIGHBIT_SET(*s))
1076  len = 2; /* kanji? */
1077  else
1078  len = pg_ascii_dsplen(s); /* should be ASCII */
1079  return len;
1080 }
static int pg_ascii_dsplen(const unsigned char *s)
Definition: wchar.c:68
#define IS_HIGHBIT_SET(ch)
Definition: c.h:1075

◆ pg_gbk_mblen()

static int pg_gbk_mblen ( const unsigned char *  s)
static

Definition at line 1059 of file wchar.c.

References IS_HIGHBIT_SET.

Referenced by pg_gbk_verifier().

1060 {
1061  int len;
1062 
1063  if (IS_HIGHBIT_SET(*s))
1064  len = 2; /* kanji? */
1065  else
1066  len = 1; /* should be ASCII */
1067  return len;
1068 }
#define IS_HIGHBIT_SET(ch)
Definition: c.h:1075

◆ pg_gbk_verifier()

static int pg_gbk_verifier ( const unsigned char *  s,
int  len 
)
static

Definition at line 1407 of file wchar.c.

References pg_gbk_mblen().

1408 {
1409  int l,
1410  mbl;
1411 
1412  l = mbl = pg_gbk_mblen(s);
1413 
1414  if (len < l)
1415  return -1;
1416 
1417  while (--l > 0)
1418  {
1419  if (*++s == '\0')
1420  return -1;
1421  }
1422 
1423  return mbl;
1424 }
static int pg_gbk_mblen(const unsigned char *s)
Definition: wchar.c:1059

◆ pg_generic_charinc()

static bool pg_generic_charinc ( unsigned char *  charptr,
int  len 
)
static

Definition at line 1575 of file wchar.c.

References GetDatabaseEncoding(), pg_wchar_tbl::mbverify, and pg_wchar_table.

Referenced by pg_database_encoding_character_incrementer().

1576 {
1577  unsigned char *lastbyte = charptr + len - 1;
1578  mbverifier mbverify;
1579 
1580  /* We can just invoke the character verifier directly. */
1582 
1583  while (*lastbyte < (unsigned char) 255)
1584  {
1585  (*lastbyte)++;
1586  if ((*mbverify) (charptr, len) == len)
1587  return true;
1588  }
1589 
1590  return false;
1591 }
int(* mbverifier)(const unsigned char *mbstr, int len)
Definition: pg_wchar.h:371
int GetDatabaseEncoding(void)
Definition: mbutils.c:996
const pg_wchar_tbl pg_wchar_table[]
Definition: wchar.c:1777
mbverifier mbverify
Definition: pg_wchar.h:381

◆ pg_johab_dsplen()

static int pg_johab_dsplen ( const unsigned char *  s)
static

Definition at line 406 of file wchar.c.

References pg_euc_dsplen().

407 {
408  return pg_euc_dsplen(s);
409 }
static int pg_euc_dsplen(const unsigned char *s)
Definition: wchar.c:137

◆ pg_johab_mblen()

static int pg_johab_mblen ( const unsigned char *  s)
static

Definition at line 400 of file wchar.c.

References pg_euc_mblen().

Referenced by pg_johab_verifier().

401 {
402  return pg_euc_mblen(s);
403 }
static int pg_euc_mblen(const unsigned char *s)
Definition: wchar.c:121

◆ pg_johab_verifier()

static int pg_johab_verifier ( const unsigned char *  s,
int  len 
)
static

Definition at line 1313 of file wchar.c.

References IS_EUC_RANGE_VALID, IS_HIGHBIT_SET, and pg_johab_mblen().

1314 {
1315  int l,
1316  mbl;
1317  unsigned char c;
1318 
1319  l = mbl = pg_johab_mblen(s);
1320 
1321  if (len < l)
1322  return -1;
1323 
1324  if (!IS_HIGHBIT_SET(*s))
1325  return mbl;
1326 
1327  while (--l > 0)
1328  {
1329  c = *++s;
1330  if (!IS_EUC_RANGE_VALID(c))
1331  return -1;
1332  }
1333  return mbl;
1334 }
#define IS_HIGHBIT_SET(ch)
Definition: c.h:1075
char * c
static int pg_johab_mblen(const unsigned char *s)
Definition: wchar.c:400
#define IS_EUC_RANGE_VALID(c)
Definition: wchar.c:1173

◆ pg_latin12wchar_with_len()

static int pg_latin12wchar_with_len ( const unsigned char *  from,
pg_wchar to,
int  len 
)
static

Definition at line 949 of file wchar.c.

950 {
951  int cnt = 0;
952 
953  while (len > 0 && *from)
954  {
955  *to++ = *from++;
956  len--;
957  cnt++;
958  }
959  *to = 0;
960  return cnt;
961 }

◆ pg_latin1_dsplen()

static int pg_latin1_dsplen ( const unsigned char *  s)
static

Definition at line 992 of file wchar.c.

References pg_ascii_dsplen().

993 {
994  return pg_ascii_dsplen(s);
995 }
static int pg_ascii_dsplen(const unsigned char *s)
Definition: wchar.c:68

◆ pg_latin1_mblen()

static int pg_latin1_mblen ( const unsigned char *  s)
static

Definition at line 986 of file wchar.c.

987 {
988  return 1;
989 }

◆ pg_latin1_verifier()

static int pg_latin1_verifier ( const unsigned char *  s,
int  len 
)
static

Definition at line 1358 of file wchar.c.

1359 {
1360  return 1;
1361 }

◆ pg_mic_mblen()

int pg_mic_mblen ( const unsigned char *  mbstr)

Definition at line 1824 of file wchar.c.

References pg_mule_mblen().

Referenced by mic2latin(), and mic2latin_with_table().

1825 {
1826  return pg_mule_mblen(mbstr);
1827 }
int pg_mule_mblen(const unsigned char *s)
Definition: wchar.c:903

◆ pg_mule2wchar_with_len()

static int pg_mule2wchar_with_len ( const unsigned char *  from,
pg_wchar to,
int  len 
)
static

Definition at line 785 of file wchar.c.

References IS_LC1, IS_LC2, IS_LCPRV1, and IS_LCPRV2.

786 {
787  int cnt = 0;
788 
789  while (len > 0 && *from)
790  {
791  if (IS_LC1(*from) && len >= 2)
792  {
793  *to = *from++ << 16;
794  *to |= *from++;
795  len -= 2;
796  }
797  else if (IS_LCPRV1(*from) && len >= 3)
798  {
799  from++;
800  *to = *from++ << 16;
801  *to |= *from++;
802  len -= 3;
803  }
804  else if (IS_LC2(*from) && len >= 3)
805  {
806  *to = *from++ << 16;
807  *to |= *from++ << 8;
808  *to |= *from++;
809  len -= 3;
810  }
811  else if (IS_LCPRV2(*from) && len >= 4)
812  {
813  from++;
814  *to = *from++ << 16;
815  *to |= *from++ << 8;
816  *to |= *from++;
817  len -= 4;
818  }
819  else
820  { /* assume ASCII */
821  *to = (unsigned char) *from++;
822  len--;
823  }
824  to++;
825  cnt++;
826  }
827  *to = 0;
828  return cnt;
829 }
#define IS_LC2(c)
Definition: pg_wchar.h:144
#define IS_LCPRV2(c)
Definition: pg_wchar.h:164
#define IS_LCPRV1(c)
Definition: pg_wchar.h:152
#define IS_LC1(c)
Definition: pg_wchar.h:123

◆ pg_mule_dsplen()

static int pg_mule_dsplen ( const unsigned char *  s)
static

Definition at line 921 of file wchar.c.

References IS_LC1, IS_LC2, IS_LCPRV1, and IS_LCPRV2.

922 {
923  int len;
924 
925  /*
926  * Note: it's not really appropriate to assume that all multibyte charsets
927  * are double-wide on screen. But this seems an okay approximation for
928  * the MULE charsets we currently support.
929  */
930 
931  if (IS_LC1(*s))
932  len = 1;
933  else if (IS_LCPRV1(*s))
934  len = 1;
935  else if (IS_LC2(*s))
936  len = 2;
937  else if (IS_LCPRV2(*s))
938  len = 2;
939  else
940  len = 1; /* assume ASCII */
941 
942  return len;
943 }
#define IS_LC2(c)
Definition: pg_wchar.h:144
#define IS_LCPRV2(c)
Definition: pg_wchar.h:164
#define IS_LCPRV1(c)
Definition: pg_wchar.h:152
#define IS_LC1(c)
Definition: pg_wchar.h:123

◆ pg_mule_mblen()

int pg_mule_mblen ( const unsigned char *  s)

Definition at line 903 of file wchar.c.

References IS_LC1, IS_LC2, IS_LCPRV1, and IS_LCPRV2.

Referenced by pg_mic_mblen(), and pg_mule_verifier().

904 {
905  int len;
906 
907  if (IS_LC1(*s))
908  len = 2;
909  else if (IS_LCPRV1(*s))
910  len = 3;
911  else if (IS_LC2(*s))
912  len = 3;
913  else if (IS_LCPRV2(*s))
914  len = 4;
915  else
916  len = 1; /* assume ASCII */
917  return len;
918 }
#define IS_LC2(c)
Definition: pg_wchar.h:144
#define IS_LCPRV2(c)
Definition: pg_wchar.h:164
#define IS_LCPRV1(c)
Definition: pg_wchar.h:152
#define IS_LC1(c)
Definition: pg_wchar.h:123

◆ pg_mule_verifier()

static int pg_mule_verifier ( const unsigned char *  s,
int  len 
)
static

Definition at line 1337 of file wchar.c.

References IS_HIGHBIT_SET, and pg_mule_mblen().

1338 {
1339  int l,
1340  mbl;
1341  unsigned char c;
1342 
1343  l = mbl = pg_mule_mblen(s);
1344 
1345  if (len < l)
1346  return -1;
1347 
1348  while (--l > 0)
1349  {
1350  c = *++s;
1351  if (!IS_HIGHBIT_SET(c))
1352  return -1;
1353  }
1354  return mbl;
1355 }
int pg_mule_mblen(const unsigned char *s)
Definition: wchar.c:903
#define IS_HIGHBIT_SET(ch)
Definition: c.h:1075
char * c

◆ pg_sjis_dsplen()

static int pg_sjis_dsplen ( const unsigned char *  s)
static

Definition at line 1015 of file wchar.c.

References IS_HIGHBIT_SET, and pg_ascii_dsplen().

1016 {
1017  int len;
1018 
1019  if (*s >= 0xa1 && *s <= 0xdf)
1020  len = 1; /* 1 byte kana? */
1021  else if (IS_HIGHBIT_SET(*s))
1022  len = 2; /* kanji? */
1023  else
1024  len = pg_ascii_dsplen(s); /* should be ASCII */
1025  return len;
1026 }
static int pg_ascii_dsplen(const unsigned char *s)
Definition: wchar.c:68
#define IS_HIGHBIT_SET(ch)
Definition: c.h:1075

◆ pg_sjis_mblen()

static int pg_sjis_mblen ( const unsigned char *  s)
static

Definition at line 1001 of file wchar.c.

References IS_HIGHBIT_SET.

Referenced by pg_sjis_verifier().

1002 {
1003  int len;
1004 
1005  if (*s >= 0xa1 && *s <= 0xdf)
1006  len = 1; /* 1 byte kana? */
1007  else if (IS_HIGHBIT_SET(*s))
1008  len = 2; /* kanji? */
1009  else
1010  len = 1; /* should be ASCII */
1011  return len;
1012 }
#define IS_HIGHBIT_SET(ch)
Definition: c.h:1075

◆ pg_sjis_verifier()

static int pg_sjis_verifier ( const unsigned char *  s,
int  len 
)
static

Definition at line 1364 of file wchar.c.

References ISSJISHEAD, ISSJISTAIL, and pg_sjis_mblen().

1365 {
1366  int l,
1367  mbl;
1368  unsigned char c1,
1369  c2;
1370 
1371  l = mbl = pg_sjis_mblen(s);
1372 
1373  if (len < l)
1374  return -1;
1375 
1376  if (l == 1) /* pg_sjis_mblen already verified it */
1377  return mbl;
1378 
1379  c1 = *s++;
1380  c2 = *s;
1381  if (!ISSJISHEAD(c1) || !ISSJISTAIL(c2))
1382  return -1;
1383  return mbl;
1384 }
#define ISSJISTAIL(c)
Definition: pg_wchar.h:42
static int pg_sjis_mblen(const unsigned char *s)
Definition: wchar.c:1001
#define ISSJISHEAD(c)
Definition: pg_wchar.h:41

◆ pg_uhc_dsplen()

static int pg_uhc_dsplen ( const unsigned char *  s)
static

Definition at line 1098 of file wchar.c.

References IS_HIGHBIT_SET, and pg_ascii_dsplen().

1099 {
1100  int len;
1101 
1102  if (IS_HIGHBIT_SET(*s))
1103  len = 2; /* 2byte? */
1104  else
1105  len = pg_ascii_dsplen(s); /* should be ASCII */
1106  return len;
1107 }
static int pg_ascii_dsplen(const unsigned char *s)
Definition: wchar.c:68
#define IS_HIGHBIT_SET(ch)
Definition: c.h:1075

◆ pg_uhc_mblen()

static int pg_uhc_mblen ( const unsigned char *  s)
static

Definition at line 1086 of file wchar.c.

References IS_HIGHBIT_SET.

Referenced by pg_uhc_verifier().

1087 {
1088  int len;
1089 
1090  if (IS_HIGHBIT_SET(*s))
1091  len = 2; /* 2byte? */
1092  else
1093  len = 1; /* should be ASCII */
1094  return len;
1095 }
#define IS_HIGHBIT_SET(ch)
Definition: c.h:1075

◆ pg_uhc_verifier()

static int pg_uhc_verifier ( const unsigned char *  s,
int  len 
)
static

Definition at line 1427 of file wchar.c.

References pg_uhc_mblen().

1428 {
1429  int l,
1430  mbl;
1431 
1432  l = mbl = pg_uhc_mblen(s);
1433 
1434  if (len < l)
1435  return -1;
1436 
1437  while (--l > 0)
1438  {
1439  if (*++s == '\0')
1440  return -1;
1441  }
1442 
1443  return mbl;
1444 }
static int pg_uhc_mblen(const unsigned char *s)
Definition: wchar.c:1086

◆ pg_utf2wchar_with_len()

static int pg_utf2wchar_with_len ( const unsigned char *  from,
pg_wchar to,
int  len 
)
static

Definition at line 418 of file wchar.c.

419 {
420  int cnt = 0;
421  uint32 c1,
422  c2,
423  c3,
424  c4;
425 
426  while (len > 0 && *from)
427  {
428  if ((*from & 0x80) == 0)
429  {
430  *to = *from++;
431  len--;
432  }
433  else if ((*from & 0xe0) == 0xc0)
434  {
435  if (len < 2)
436  break; /* drop trailing incomplete char */
437  c1 = *from++ & 0x1f;
438  c2 = *from++ & 0x3f;
439  *to = (c1 << 6) | c2;
440  len -= 2;
441  }
442  else if ((*from & 0xf0) == 0xe0)
443  {
444  if (len < 3)
445  break; /* drop trailing incomplete char */
446  c1 = *from++ & 0x0f;
447  c2 = *from++ & 0x3f;
448  c3 = *from++ & 0x3f;
449  *to = (c1 << 12) | (c2 << 6) | c3;
450  len -= 3;
451  }
452  else if ((*from & 0xf8) == 0xf0)
453  {
454  if (len < 4)
455  break; /* drop trailing incomplete char */
456  c1 = *from++ & 0x07;
457  c2 = *from++ & 0x3f;
458  c3 = *from++ & 0x3f;
459  c4 = *from++ & 0x3f;
460  *to = (c1 << 18) | (c2 << 12) | (c3 << 6) | c4;
461  len -= 4;
462  }
463  else
464  {
465  /* treat a bogus char as length 1; not ours to raise error */
466  *to = *from++;
467  len--;
468  }
469  to++;
470  cnt++;
471  }
472  *to = 0;
473  return cnt;
474 }
unsigned int uint32
Definition: c.h:358

◆ pg_utf8_increment()

static bool pg_utf8_increment ( unsigned char *  charptr,
int  length 
)
static

Definition at line 1609 of file wchar.c.

Referenced by pg_database_encoding_character_incrementer().

1610 {
1611  unsigned char a;
1612  unsigned char limit;
1613 
1614  switch (length)
1615  {
1616  default:
1617  /* reject lengths 5 and 6 for now */
1618  return false;
1619  case 4:
1620  a = charptr[3];
1621  if (a < 0xBF)
1622  {
1623  charptr[3]++;
1624  break;
1625  }
1626  /* FALL THRU */
1627  case 3:
1628  a = charptr[2];
1629  if (a < 0xBF)
1630  {
1631  charptr[2]++;
1632  break;
1633  }
1634  /* FALL THRU */
1635  case 2:
1636  a = charptr[1];
1637  switch (*charptr)
1638  {
1639  case 0xED:
1640  limit = 0x9F;
1641  break;
1642  case 0xF4:
1643  limit = 0x8F;
1644  break;
1645  default:
1646  limit = 0xBF;
1647  break;
1648  }
1649  if (a < limit)
1650  {
1651  charptr[1]++;
1652  break;
1653  }
1654  /* FALL THRU */
1655  case 1:
1656  a = *charptr;
1657  if (a == 0x7F || a == 0xDF || a == 0xEF || a == 0xF4)
1658  return false;
1659  charptr[0]++;
1660  break;
1661  }
1662 
1663  return true;
1664 }

◆ pg_utf8_islegal()

bool pg_utf8_islegal ( const unsigned char *  source,
int  length 
)

Definition at line 1506 of file wchar.c.

Referenced by chr(), pg_utf8_string_len(), pg_utf8_verifier(), utf8_to_iso8859_1(), and UtfToLocal().

1507 {
1508  unsigned char a;
1509 
1510  switch (length)
1511  {
1512  default:
1513  /* reject lengths 5 and 6 for now */
1514  return false;
1515  case 4:
1516  a = source[3];
1517  if (a < 0x80 || a > 0xBF)
1518  return false;
1519  /* FALL THRU */
1520  case 3:
1521  a = source[2];
1522  if (a < 0x80 || a > 0xBF)
1523  return false;
1524  /* FALL THRU */
1525  case 2:
1526  a = source[1];
1527  switch (*source)
1528  {
1529  case 0xE0:
1530  if (a < 0xA0 || a > 0xBF)
1531  return false;
1532  break;
1533  case 0xED:
1534  if (a < 0x80 || a > 0x9F)
1535  return false;
1536  break;
1537  case 0xF0:
1538  if (a < 0x90 || a > 0xBF)
1539  return false;
1540  break;
1541  case 0xF4:
1542  if (a < 0x80 || a > 0x8F)
1543  return false;
1544  break;
1545  default:
1546  if (a < 0x80 || a > 0xBF)
1547  return false;
1548  break;
1549  }
1550  /* FALL THRU */
1551  case 1:
1552  a = *source;
1553  if (a >= 0x80 && a < 0xC2)
1554  return false;
1555  if (a > 0xF4)
1556  return false;
1557  break;
1558  }
1559  return true;
1560 }

◆ pg_utf8_verifier()

static int pg_utf8_verifier ( const unsigned char *  s,
int  len 
)
static

Definition at line 1478 of file wchar.c.

References pg_utf8_islegal(), and pg_utf_mblen().

1479 {
1480  int l = pg_utf_mblen(s);
1481 
1482  if (len < l)
1483  return -1;
1484 
1485  if (!pg_utf8_islegal(s, l))
1486  return -1;
1487 
1488  return l;
1489 }
bool pg_utf8_islegal(const unsigned char *source, int length)
Definition: wchar.c:1506
int pg_utf_mblen(const unsigned char *s)
Definition: wchar.c:548

◆ pg_utf_dsplen()

static int pg_utf_dsplen ( const unsigned char *  s)
static

Definition at line 773 of file wchar.c.

References ucs_wcwidth(), and utf8_to_unicode().

774 {
775  return ucs_wcwidth(utf8_to_unicode(s));
776 }
static int ucs_wcwidth(pg_wchar ucs)
Definition: wchar.c:644
pg_wchar utf8_to_unicode(const unsigned char *c)
Definition: wchar.c:751

◆ pg_utf_mblen()

int pg_utf_mblen ( const unsigned char *  s)

Definition at line 548 of file wchar.c.

Referenced by json_lex_string(), pg_saslprep(), pg_utf8_string_len(), pg_utf8_verifier(), pg_wchar2utf_with_len(), utf8_to_iso8859_1(), and UtfToLocal().

549 {
550  int len;
551 
552  if ((*s & 0x80) == 0)
553  len = 1;
554  else if ((*s & 0xe0) == 0xc0)
555  len = 2;
556  else if ((*s & 0xf0) == 0xe0)
557  len = 3;
558  else if ((*s & 0xf8) == 0xf0)
559  len = 4;
560 #ifdef NOT_USED
561  else if ((*s & 0xfc) == 0xf8)
562  len = 5;
563  else if ((*s & 0xfe) == 0xfc)
564  len = 6;
565 #endif
566  else
567  len = 1;
568  return len;
569 }

◆ pg_verify_mbstr()

bool pg_verify_mbstr ( int  encoding,
const char *  mbstr,
int  len,
bool  noError 
)

Definition at line 1925 of file wchar.c.

References pg_verify_mbstr_len().

Referenced by LogicalOutputWrite(), pg_any_to_server(), pg_do_encoding_conversion(), pg_server_to_any(), and t_readline().

1926 {
1927  return pg_verify_mbstr_len(encoding, mbstr, len, noError) >= 0;
1928 }
int pg_verify_mbstr_len(int encoding, const char *mbstr, int len, bool noError)
Definition: wchar.c:1942
int32 encoding
Definition: pg_database.h:41

◆ pg_verify_mbstr_len()

int pg_verify_mbstr_len ( int  encoding,
const char *  mbstr,
int  len,
bool  noError 
)

Definition at line 1942 of file wchar.c.

References Assert, encoding, IS_HIGHBIT_SET, pg_wchar_tbl::mbverify, pg_encoding_max_length(), PG_VALID_ENCODING, and report_invalid_encoding().

Referenced by length_in_encoding(), pg_convert(), pg_verify_mbstr(), pg_verifymbstr(), and read_extension_script_file().

1943 {
1944  mbverifier mbverify;
1945  int mb_len;
1946 
1948 
1949  /*
1950  * In single-byte encodings, we need only reject nulls (\0).
1951  */
1952  if (pg_encoding_max_length(encoding) <= 1)
1953  {
1954  const char *nullpos = memchr(mbstr, 0, len);
1955 
1956  if (nullpos == NULL)
1957  return len;
1958  if (noError)
1959  return -1;
1960  report_invalid_encoding(encoding, nullpos, 1);
1961  }
1962 
1963  /* fetch function pointer just once */
1964  mbverify = pg_wchar_table[encoding].mbverify;
1965 
1966  mb_len = 0;
1967 
1968  while (len > 0)
1969  {
1970  int l;
1971 
1972  /* fast path for ASCII-subset characters */
1973  if (!IS_HIGHBIT_SET(*mbstr))
1974  {
1975  if (*mbstr != '\0')
1976  {
1977  mb_len++;
1978  mbstr++;
1979  len--;
1980  continue;
1981  }
1982  if (noError)
1983  return -1;
1984  report_invalid_encoding(encoding, mbstr, len);
1985  }
1986 
1987  l = (*mbverify) ((const unsigned char *) mbstr, len);
1988 
1989  if (l < 0)
1990  {
1991  if (noError)
1992  return -1;
1993  report_invalid_encoding(encoding, mbstr, len);
1994  }
1995 
1996  mbstr += l;
1997  len -= l;
1998  mb_len++;
1999  }
2000  return mb_len;
2001 }
#define IS_HIGHBIT_SET(ch)
Definition: c.h:1075
int pg_encoding_max_length(int encoding)
Definition: wchar.c:1868
#define PG_VALID_ENCODING(_enc)
Definition: pg_wchar.h:301
int(* mbverifier)(const unsigned char *mbstr, int len)
Definition: pg_wchar.h:371
#define Assert(condition)
Definition: c.h:732
int32 encoding
Definition: pg_database.h:41
void report_invalid_encoding(int encoding, const char *mbstr, int len)
Definition: wchar.c:2043
const pg_wchar_tbl pg_wchar_table[]
Definition: wchar.c:1777
mbverifier mbverify
Definition: pg_wchar.h:381

◆ pg_verifymbstr()

bool pg_verifymbstr ( const char *  mbstr,
int  len,
bool  noError 
)

Definition at line 1914 of file wchar.c.

References GetDatabaseEncoding(), and pg_verify_mbstr_len().

Referenced by char2wchar(), CopyReadAttributesText(), plperl_spi_exec(), plperl_spi_prepare(), plperl_spi_query(), PLy_cursor_query(), PLy_output(), PLy_spi_execute_query(), PLy_spi_prepare(), PLyObject_AsString(), read_text_file(), and spg_text_leaf_consistent().

1915 {
1916  return
1917  pg_verify_mbstr_len(GetDatabaseEncoding(), mbstr, len, noError) >= 0;
1918 }
int pg_verify_mbstr_len(int encoding, const char *mbstr, int len, bool noError)
Definition: wchar.c:1942
int GetDatabaseEncoding(void)
Definition: mbutils.c:996

◆ pg_wchar2euc_with_len()

static int pg_wchar2euc_with_len ( const pg_wchar from,
unsigned char *  to,
int  len 
)
static

Definition at line 354 of file wchar.c.

355 {
356  int cnt = 0;
357 
358  while (len > 0 && *from)
359  {
360  unsigned char c;
361 
362  if ((c = (*from >> 24)))
363  {
364  *to++ = c;
365  *to++ = (*from >> 16) & 0xff;
366  *to++ = (*from >> 8) & 0xff;
367  *to++ = *from & 0xff;
368  cnt += 4;
369  }
370  else if ((c = (*from >> 16)))
371  {
372  *to++ = c;
373  *to++ = (*from >> 8) & 0xff;
374  *to++ = *from & 0xff;
375  cnt += 3;
376  }
377  else if ((c = (*from >> 8)))
378  {
379  *to++ = c;
380  *to++ = *from & 0xff;
381  cnt += 2;
382  }
383  else
384  {
385  *to++ = *from;
386  cnt++;
387  }
388  from++;
389  len--;
390  }
391  *to = 0;
392  return cnt;
393 }
char * c

◆ pg_wchar2mule_with_len()

static int pg_wchar2mule_with_len ( const pg_wchar from,
unsigned char *  to,
int  len 
)
static

Definition at line 838 of file wchar.c.

References IS_LC1, IS_LC2, IS_LCPRV1_A_RANGE, IS_LCPRV1_B_RANGE, IS_LCPRV2_A_RANGE, IS_LCPRV2_B_RANGE, LCPRV1_A, LCPRV1_B, LCPRV2_A, and LCPRV2_B.

839 {
840  int cnt = 0;
841 
842  while (len > 0 && *from)
843  {
844  unsigned char lb;
845 
846  lb = (*from >> 16) & 0xff;
847  if (IS_LC1(lb))
848  {
849  *to++ = lb;
850  *to++ = *from & 0xff;
851  cnt += 2;
852  }
853  else if (IS_LC2(lb))
854  {
855  *to++ = lb;
856  *to++ = (*from >> 8) & 0xff;
857  *to++ = *from & 0xff;
858  cnt += 3;
859  }
860  else if (IS_LCPRV1_A_RANGE(lb))
861  {
862  *to++ = LCPRV1_A;
863  *to++ = lb;
864  *to++ = *from & 0xff;
865  cnt += 3;
866  }
867  else if (IS_LCPRV1_B_RANGE(lb))
868  {
869  *to++ = LCPRV1_B;
870  *to++ = lb;
871  *to++ = *from & 0xff;
872  cnt += 3;
873  }
874  else if (IS_LCPRV2_A_RANGE(lb))
875  {
876  *to++ = LCPRV2_A;
877  *to++ = lb;
878  *to++ = (*from >> 8) & 0xff;
879  *to++ = *from & 0xff;
880  cnt += 4;
881  }
882  else if (IS_LCPRV2_B_RANGE(lb))
883  {
884  *to++ = LCPRV2_B;
885  *to++ = lb;
886  *to++ = (*from >> 8) & 0xff;
887  *to++ = *from & 0xff;
888  cnt += 4;
889  }
890  else
891  {
892  *to++ = *from & 0xff;
893  cnt += 1;
894  }
895  from++;
896  len--;
897  }
898  *to = 0;
899  return cnt;
900 }
#define IS_LC2(c)
Definition: pg_wchar.h:144
#define LCPRV1_A
Definition: pg_wchar.h:150
#define LCPRV1_B
Definition: pg_wchar.h:151
#define IS_LCPRV2_A_RANGE(c)
Definition: pg_wchar.h:165
#define LCPRV2_B
Definition: pg_wchar.h:163
#define LCPRV2_A
Definition: pg_wchar.h:162
#define IS_LCPRV2_B_RANGE(c)
Definition: pg_wchar.h:167
#define IS_LC1(c)
Definition: pg_wchar.h:123
#define IS_LCPRV1_A_RANGE(c)
Definition: pg_wchar.h:153
#define IS_LCPRV1_B_RANGE(c)
Definition: pg_wchar.h:155

◆ pg_wchar2single_with_len()

static int pg_wchar2single_with_len ( const pg_wchar from,
unsigned char *  to,
int  len 
)
static

Definition at line 971 of file wchar.c.

972 {
973  int cnt = 0;
974 
975  while (len > 0 && *from)
976  {
977  *to++ = *from++;
978  len--;
979  cnt++;
980  }
981  *to = 0;
982  return cnt;
983 }

◆ pg_wchar2utf_with_len()

static int pg_wchar2utf_with_len ( const pg_wchar from,
unsigned char *  to,
int  len 
)
static

Definition at line 517 of file wchar.c.

References pg_utf_mblen(), and unicode_to_utf8().

518 {
519  int cnt = 0;
520 
521  while (len > 0 && *from)
522  {
523  int char_len;
524 
525  unicode_to_utf8(*from, to);
526  char_len = pg_utf_mblen(to);
527  cnt += char_len;
528  to += char_len;
529  from++;
530  len--;
531  }
532  *to = 0;
533  return cnt;
534 }
unsigned char * unicode_to_utf8(pg_wchar c, unsigned char *utf8string)
Definition: wchar.c:482
int pg_utf_mblen(const unsigned char *s)
Definition: wchar.c:548

◆ report_invalid_encoding()

void report_invalid_encoding ( int  encoding,
const char *  mbstr,
int  len 
)

Definition at line 2043 of file wchar.c.

References buf, ereport, errcode(), errmsg(), ERROR, Min, name, pg_enc2name_tbl, pg_encoding_mblen(), and sprintf.

Referenced by big52mic(), euc_cn2mic(), euc_jis_20042shift_jis_2004(), euc_jp2mic(), euc_jp2sjis(), euc_kr2mic(), euc_tw2mic(), iso8859_1_to_utf8(), latin2mic(), latin2mic_with_table(), local2local(), LocalToUtf(), mic2big5(), mic2euc_cn(), mic2euc_jp(), mic2euc_kr(), mic2euc_tw(), mic2latin(), mic2latin_with_table(), mic2sjis(), pg_verify_mbstr_len(), shift_jis_20042euc_jis_2004(), sjis2euc_jp(), sjis2mic(), utf8_to_iso8859_1(), and UtfToLocal().

2044 {
2045  int l = pg_encoding_mblen(encoding, mbstr);
2046  char buf[8 * 5 + 1];
2047  char *p = buf;
2048  int j,
2049  jlimit;
2050 
2051  jlimit = Min(l, len);
2052  jlimit = Min(jlimit, 8); /* prevent buffer overrun */
2053 
2054  for (j = 0; j < jlimit; j++)
2055  {
2056  p += sprintf(p, "0x%02x", (unsigned char) mbstr[j]);
2057  if (j < jlimit - 1)
2058  p += sprintf(p, " ");
2059  }
2060 
2061  ereport(ERROR,
2062  (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
2063  errmsg("invalid byte sequence for encoding \"%s\": %s",
2065  buf)));
2066 }
#define Min(x, y)
Definition: c.h:904
int errcode(int sqlerrcode)
Definition: elog.c:570
#define sprintf
Definition: port.h:194
#define ERROR
Definition: elog.h:43
static char * buf
Definition: pg_test_fsync.c:68
const pg_enc2name pg_enc2name_tbl[]
Definition: encnames.c:307
int pg_encoding_mblen(int encoding, const char *mbstr)
Definition: wchar.c:1833
#define ereport(elevel, rest)
Definition: elog.h:141
int32 encoding
Definition: pg_database.h:41
const char * name
Definition: encode.c:521
int errmsg(const char *fmt,...)
Definition: elog.c:784

◆ report_untranslatable_char()

void report_untranslatable_char ( int  src_encoding,
int  dest_encoding,
const char *  mbstr,
int  len 
)

Definition at line 2075 of file wchar.c.

References buf, ereport, errcode(), errmsg(), ERROR, Min, name, pg_enc2name_tbl, pg_encoding_mblen(), and sprintf.

Referenced by big52mic(), latin2mic_with_table(), local2local(), LocalToUtf(), mic2big5(), mic2euc_cn(), mic2euc_jp(), mic2euc_kr(), mic2euc_tw(), mic2latin(), mic2latin_with_table(), mic2sjis(), utf8_to_iso8859_1(), and UtfToLocal().

2077 {
2078  int l = pg_encoding_mblen(src_encoding, mbstr);
2079  char buf[8 * 5 + 1];
2080  char *p = buf;
2081  int j,
2082  jlimit;
2083 
2084  jlimit = Min(l, len);
2085  jlimit = Min(jlimit, 8); /* prevent buffer overrun */
2086 
2087  for (j = 0; j < jlimit; j++)
2088  {
2089  p += sprintf(p, "0x%02x", (unsigned char) mbstr[j]);
2090  if (j < jlimit - 1)
2091  p += sprintf(p, " ");
2092  }
2093 
2094  ereport(ERROR,
2095  (errcode(ERRCODE_UNTRANSLATABLE_CHARACTER),
2096  errmsg("character with byte sequence %s in encoding \"%s\" has no equivalent in encoding \"%s\"",
2097  buf,
2098  pg_enc2name_tbl[src_encoding].name,
2099  pg_enc2name_tbl[dest_encoding].name)));
2100 }
#define Min(x, y)
Definition: c.h:904
int errcode(int sqlerrcode)
Definition: elog.c:570
#define sprintf
Definition: port.h:194
#define ERROR
Definition: elog.h:43
static char * buf
Definition: pg_test_fsync.c:68
const pg_enc2name pg_enc2name_tbl[]
Definition: encnames.c:307
int pg_encoding_mblen(int encoding, const char *mbstr)
Definition: wchar.c:1833
#define ereport(elevel, rest)
Definition: elog.h:141
const char * name
Definition: encode.c:521
int errmsg(const char *fmt,...)
Definition: elog.c:784

◆ ucs_wcwidth()

static int ucs_wcwidth ( pg_wchar  ucs)
static

Definition at line 644 of file wchar.c.

References mbbisearch().

Referenced by pg_utf_dsplen().

645 {
646  /* sorted list of non-overlapping intervals of non-spacing characters */
647  static const struct mbinterval combining[] = {
648  {0x0300, 0x036F}, {0x0483, 0x0489}, {0x0591, 0x05BD},
649  {0x05BF, 0x05BF}, {0x05C1, 0x05C2}, {0x05C4, 0x05C5},
650  {0x05C7, 0x05C7}, {0x0610, 0x061A}, {0x064B, 0x065F},
651  {0x0670, 0x0670}, {0x06D6, 0x06DC}, {0x06DF, 0x06E4},
652  {0x06E7, 0x06E8}, {0x06EA, 0x06ED}, {0x0711, 0x0711},
653  {0x0730, 0x074A}, {0x07A6, 0x07B0}, {0x07EB, 0x07F3},
654  {0x07FD, 0x07FD}, {0x0816, 0x0819}, {0x081B, 0x0823},
655  {0x0825, 0x0827}, {0x0829, 0x082D}, {0x0859, 0x085B},
656  {0x08D3, 0x08E1}, {0x08E3, 0x0902}, {0x093A, 0x093A},
657  {0x093C, 0x093C}, {0x0941, 0x0948}, {0x094D, 0x094D},
658  {0x0951, 0x0957}, {0x0962, 0x0963}, {0x0981, 0x0981},
659  {0x09BC, 0x09BC}, {0x09C1, 0x09C4}, {0x09CD, 0x09CD},
660  {0x09E2, 0x09E3}, {0x09FE, 0x0A02}, {0x0A3C, 0x0A3C},
661  {0x0A41, 0x0A51}, {0x0A70, 0x0A71}, {0x0A75, 0x0A75},
662  {0x0A81, 0x0A82}, {0x0ABC, 0x0ABC}, {0x0AC1, 0x0AC8},
663  {0x0ACD, 0x0ACD}, {0x0AE2, 0x0AE3}, {0x0AFA, 0x0B01},
664  {0x0B3C, 0x0B3C}, {0x0B3F, 0x0B3F}, {0x0B41, 0x0B44},
665  {0x0B4D, 0x0B56}, {0x0B62, 0x0B63}, {0x0B82, 0x0B82},
666  {0x0BC0, 0x0BC0}, {0x0BCD, 0x0BCD}, {0x0C00, 0x0C00},
667  {0x0C04, 0x0C04}, {0x0C3E, 0x0C40}, {0x0C46, 0x0C56},
668  {0x0C62, 0x0C63}, {0x0C81, 0x0C81}, {0x0CBC, 0x0CBC},
669  {0x0CBF, 0x0CBF}, {0x0CC6, 0x0CC6}, {0x0CCC, 0x0CCD},
670  {0x0CE2, 0x0CE3}, {0x0D00, 0x0D01}, {0x0D3B, 0x0D3C},
671  {0x0D41, 0x0D44}, {0x0D4D, 0x0D4D}, {0x0D62, 0x0D63},
672  {0x0DCA, 0x0DCA}, {0x0DD2, 0x0DD6}, {0x0E31, 0x0E31},
673  {0x0E34, 0x0E3A}, {0x0E47, 0x0E4E}, {0x0EB1, 0x0EB1},
674  {0x0EB4, 0x0EBC}, {0x0EC8, 0x0ECD}, {0x0F18, 0x0F19},
675  {0x0F35, 0x0F35}, {0x0F37, 0x0F37}, {0x0F39, 0x0F39},
676  {0x0F71, 0x0F7E}, {0x0F80, 0x0F84}, {0x0F86, 0x0F87},
677  {0x0F8D, 0x0FBC}, {0x0FC6, 0x0FC6}, {0x102D, 0x1030},
678  {0x1032, 0x1037}, {0x1039, 0x103A}, {0x103D, 0x103E},
679  {0x1058, 0x1059}, {0x105E, 0x1060}, {0x1071, 0x1074},
680  {0x1082, 0x1082}, {0x1085, 0x1086}, {0x108D, 0x108D},
681  {0x109D, 0x109D}, {0x135D, 0x135F}, {0x1712, 0x1714},
682  {0x1732, 0x1734}, {0x1752, 0x1753}, {0x1772, 0x1773},
683  {0x17B4, 0x17B5}, {0x17B7, 0x17BD}, {0x17C6, 0x17C6},
684  {0x17C9, 0x17D3}, {0x17DD, 0x17DD}, {0x180B, 0x180D},
685  {0x1885, 0x1886}, {0x18A9, 0x18A9}, {0x1920, 0x1922},
686  {0x1927, 0x1928}, {0x1932, 0x1932}, {0x1939, 0x193B},
687  {0x1A17, 0x1A18}, {0x1A1B, 0x1A1B}, {0x1A56, 0x1A56},
688  {0x1A58, 0x1A60}, {0x1A62, 0x1A62}, {0x1A65, 0x1A6C},
689  {0x1A73, 0x1A7F}, {0x1AB0, 0x1B03}, {0x1B34, 0x1B34},
690  {0x1B36, 0x1B3A}, {0x1B3C, 0x1B3C}, {0x1B42, 0x1B42},
691  {0x1B6B, 0x1B73}, {0x1B80, 0x1B81}, {0x1BA2, 0x1BA5},
692  {0x1BA8, 0x1BA9}, {0x1BAB, 0x1BAD}, {0x1BE6, 0x1BE6},
693  {0x1BE8, 0x1BE9}, {0x1BED, 0x1BED}, {0x1BEF, 0x1BF1},
694  {0x1C2C, 0x1C33}, {0x1C36, 0x1C37}, {0x1CD0, 0x1CD2},
695  {0x1CD4, 0x1CE0}, {0x1CE2, 0x1CE8}, {0x1CED, 0x1CED},
696  {0x1CF4, 0x1CF4}, {0x1CF8, 0x1CF9}, {0x1DC0, 0x1DFF},
697  {0x20D0, 0x20F0}, {0x2CEF, 0x2CF1}, {0x2D7F, 0x2D7F},
698  {0x2DE0, 0x2DFF}, {0x302A, 0x302D}, {0x3099, 0x309A},
699  {0xA66F, 0xA672}, {0xA674, 0xA67D}, {0xA69E, 0xA69F},
700  {0xA6F0, 0xA6F1}, {0xA802, 0xA802}, {0xA806, 0xA806},
701  {0xA80B, 0xA80B}, {0xA825, 0xA826}, {0xA8C4, 0xA8C5},
702  {0xA8E0, 0xA8F1}, {0xA8FF, 0xA8FF}, {0xA926, 0xA92D},
703  {0xA947, 0xA951}, {0xA980, 0xA982}, {0xA9B3, 0xA9B3},
704  {0xA9B6, 0xA9B9}, {0xA9BC, 0xA9BD}, {0xA9E5, 0xA9E5},
705  {0xAA29, 0xAA2E}, {0xAA31, 0xAA32}, {0xAA35, 0xAA36},
706  {0xAA43, 0xAA43}, {0xAA4C, 0xAA4C}, {0xAA7C, 0xAA7C},
707  {0xAAB0, 0xAAB0}, {0xAAB2, 0xAAB4}, {0xAAB7, 0xAAB8},
708  {0xAABE, 0xAABF}, {0xAAC1, 0xAAC1}, {0xAAEC, 0xAAED},
709  {0xAAF6, 0xAAF6}, {0xABE5, 0xABE5}, {0xABE8, 0xABE8},
710  {0xABED, 0xABED}, {0xFB1E, 0xFB1E}, {0xFE00, 0xFE0F},
711  {0xFE20, 0xFE2F},
712  };
713 
714  /* test for 8-bit control characters */
715  if (ucs == 0)
716  return 0;
717 
718  if (ucs < 0x20 || (ucs >= 0x7f && ucs < 0xa0) || ucs > 0x0010ffff)
719  return -1;
720 
721  /* binary search in table of non-spacing characters */
722  if (mbbisearch(ucs, combining,
723  sizeof(combining) / sizeof(struct mbinterval) - 1))
724  return 0;
725 
726  /*
727  * if we arrive here, ucs is not a combining or C0/C1 control character
728  */
729 
730  return 1 +
731  (ucs >= 0x1100 &&
732  (ucs <= 0x115f || /* Hangul Jamo init. consonants */
733  (ucs >= 0x2e80 && ucs <= 0xa4cf && (ucs & ~0x0011) != 0x300a &&
734  ucs != 0x303f) || /* CJK ... Yi */
735  (ucs >= 0xac00 && ucs <= 0xd7a3) || /* Hangul Syllables */
736  (ucs >= 0xf900 && ucs <= 0xfaff) || /* CJK Compatibility
737  * Ideographs */
738  (ucs >= 0xfe30 && ucs <= 0xfe6f) || /* CJK Compatibility Forms */
739  (ucs >= 0xff00 && ucs <= 0xff5f) || /* Fullwidth Forms */
740  (ucs >= 0xffe0 && ucs <= 0xffe6) ||
741  (ucs >= 0x20000 && ucs <= 0x2ffff)));
742 }
static int mbbisearch(pg_wchar ucs, const struct mbinterval *table, int max)
Definition: wchar.c:591

◆ unicode_to_utf8()

unsigned char* unicode_to_utf8 ( pg_wchar  c,
unsigned char *  utf8string 
)

Definition at line 482 of file wchar.c.

Referenced by json_lex_string(), pg_saslprep(), pg_wchar2utf_with_len(), and unicode_to_sqlchar().

483 {
484  if (c <= 0x7F)
485  {
486  utf8string[0] = c;
487  }
488  else if (c <= 0x7FF)
489  {
490  utf8string[0] = 0xC0 | ((c >> 6) & 0x1F);
491  utf8string[1] = 0x80 | (c & 0x3F);
492  }
493  else if (c <= 0xFFFF)
494  {
495  utf8string[0] = 0xE0 | ((c >> 12) & 0x0F);
496  utf8string[1] = 0x80 | ((c >> 6) & 0x3F);
497  utf8string[2] = 0x80 | (c & 0x3F);
498  }
499  else
500  {
501  utf8string[0] = 0xF0 | ((c >> 18) & 0x07);
502  utf8string[1] = 0x80 | ((c >> 12) & 0x3F);
503  utf8string[2] = 0x80 | ((c >> 6) & 0x3F);
504  utf8string[3] = 0x80 | (c & 0x3F);
505  }
506 
507  return utf8string;
508 }
char * c

◆ utf8_to_unicode()

pg_wchar utf8_to_unicode ( const unsigned char *  c)

Definition at line 751 of file wchar.c.

Referenced by pg_saslprep(), and pg_utf_dsplen().

752 {
753  if ((*c & 0x80) == 0)
754  return (pg_wchar) c[0];
755  else if ((*c & 0xe0) == 0xc0)
756  return (pg_wchar) (((c[0] & 0x1f) << 6) |
757  (c[1] & 0x3f));
758  else if ((*c & 0xf0) == 0xe0)
759  return (pg_wchar) (((c[0] & 0x0f) << 12) |
760  ((c[1] & 0x3f) << 6) |
761  (c[2] & 0x3f));
762  else if ((*c & 0xf8) == 0xf0)
763  return (pg_wchar) (((c[0] & 0x07) << 18) |
764  ((c[1] & 0x3f) << 12) |
765  ((c[2] & 0x3f) << 6) |
766  (c[3] & 0x3f));
767  else
768  /* that is an invalid code on purpose */
769  return 0xffffffff;
770 }
char * c
unsigned int pg_wchar
Definition: mbprint.c:31

Variable Documentation

◆ pg_wchar_table