PostgreSQL Source Code  git master
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros
conv.c File Reference
#include "postgres.h"
#include "mb/pg_wchar.h"
Include dependency graph for conv.c:

Go to the source code of this file.

Functions

void local2local (const unsigned char *l, unsigned char *p, int len, int src_encoding, int dest_encoding, const unsigned char *tab)
 
void latin2mic (const unsigned char *l, unsigned char *p, int len, int lc, int encoding)
 
void mic2latin (const unsigned char *mic, unsigned char *p, int len, int lc, int encoding)
 
void pg_ascii2mic (const unsigned char *l, unsigned char *p, int len)
 
void pg_mic2ascii (const unsigned char *mic, unsigned char *p, int len)
 
void latin2mic_with_table (const unsigned char *l, unsigned char *p, int len, int lc, int encoding, const unsigned char *tab)
 
void mic2latin_with_table (const unsigned char *mic, unsigned char *p, int len, int lc, int encoding, const unsigned char *tab)
 
static int compare3 (const void *p1, const void *p2)
 
static int compare4 (const void *p1, const void *p2)
 
static unsigned char * store_coded_char (unsigned char *dest, uint32 code)
 
static uint32 pg_mb_radix_conv (const pg_mb_radix_tree *rt, int l, unsigned char b1, unsigned char b2, unsigned char b3, unsigned char b4)
 
void UtfToLocal (const unsigned char *utf, int len, unsigned char *iso, const pg_mb_radix_tree *map, const pg_utf_to_local_combined *cmap, int cmapsize, utf_local_conversion_func conv_func, int encoding)
 
void LocalToUtf (const unsigned char *iso, int len, unsigned char *utf, const pg_mb_radix_tree *map, const pg_local_to_utf_combined *cmap, int cmapsize, utf_local_conversion_func conv_func, int encoding)
 

Function Documentation

static int compare3 ( const void *  p1,
const void *  p2 
)
static

Definition at line 290 of file conv.c.

References s1, and s2.

Referenced by UtfToLocal().

291 {
292  uint32 s1,
293  s2,
294  d1,
295  d2;
296 
297  s1 = *(const uint32 *) p1;
298  s2 = *((const uint32 *) p1 + 1);
299  d1 = ((const pg_utf_to_local_combined *) p2)->utf1;
300  d2 = ((const pg_utf_to_local_combined *) p2)->utf2;
301  return (s1 > d1 || (s1 == d1 && s2 > d2)) ? 1 : ((s1 == d1 && s2 == d2) ? 0 : -1);
302 }
char * s1
unsigned int uint32
Definition: c.h:268
char * s2
static int compare4 ( const void *  p1,
const void *  p2 
)
static

Definition at line 309 of file conv.c.

Referenced by LocalToUtf().

310 {
311  uint32 v1,
312  v2;
313 
314  v1 = *(const uint32 *) p1;
315  v2 = ((const pg_local_to_utf_combined *) p2)->code;
316  return (v1 > v2) ? 1 : ((v1 == v2) ? 0 : -1);
317 }
unsigned int uint32
Definition: c.h:268
void latin2mic ( const unsigned char *  l,
unsigned char *  p,
int  len,
int  lc,
int  encoding 
)

Definition at line 71 of file conv.c.

References IS_HIGHBIT_SET, and report_invalid_encoding().

Referenced by koi8r_to_mic(), latin1_to_mic(), latin2_to_mic(), latin3_to_mic(), and latin4_to_mic().

73 {
74  int c1;
75 
76  while (len > 0)
77  {
78  c1 = *l;
79  if (c1 == 0)
80  report_invalid_encoding(encoding, (const char *) l, len);
81  if (IS_HIGHBIT_SET(c1))
82  *p++ = lc;
83  *p++ = c1;
84  l++;
85  len--;
86  }
87  *p = '\0';
88 }
#define IS_HIGHBIT_SET(ch)
Definition: c.h:973
static char * encoding
Definition: initdb.c:122
void report_invalid_encoding(int encoding, const char *mbstr, int len)
Definition: wchar.c:1995
void latin2mic_with_table ( const unsigned char *  l,
unsigned char *  p,
int  len,
int  lc,
int  encoding,
const unsigned char *  tab 
)

Definition at line 193 of file conv.c.

References HIGHBIT, IS_HIGHBIT_SET, PG_MULE_INTERNAL, report_invalid_encoding(), and report_untranslatable_char().

Referenced by iso_to_mic(), win1250_to_mic(), win1251_to_mic(), and win866_to_mic().

199 {
200  unsigned char c1,
201  c2;
202 
203  while (len > 0)
204  {
205  c1 = *l;
206  if (c1 == 0)
207  report_invalid_encoding(encoding, (const char *) l, len);
208  if (!IS_HIGHBIT_SET(c1))
209  *p++ = c1;
210  else
211  {
212  c2 = tab[c1 - HIGHBIT];
213  if (c2)
214  {
215  *p++ = lc;
216  *p++ = c2;
217  }
218  else
220  (const char *) l, len);
221  }
222  l++;
223  len--;
224  }
225  *p = '\0';
226 }
#define IS_HIGHBIT_SET(ch)
Definition: c.h:973
void report_untranslatable_char(int src_encoding, int dest_encoding, const char *mbstr, int len)
Definition: wchar.c:2027
#define HIGHBIT
Definition: c.h:972
static char * encoding
Definition: initdb.c:122
void report_invalid_encoding(int encoding, const char *mbstr, int len)
Definition: wchar.c:1995
void local2local ( const unsigned char *  l,
unsigned char *  p,
int  len,
int  src_encoding,
int  dest_encoding,
const unsigned char *  tab 
)

Definition at line 30 of file conv.c.

References HIGHBIT, IS_HIGHBIT_SET, report_invalid_encoding(), and report_untranslatable_char().

Referenced by iso_to_koi8r(), iso_to_win1251(), iso_to_win866(), koi8r_to_iso(), koi8r_to_win1251(), koi8r_to_win866(), latin2_to_win1250(), win1250_to_latin2(), win1251_to_iso(), win1251_to_koi8r(), win1251_to_win866(), win866_to_iso(), win866_to_koi8r(), and win866_to_win1251().

36 {
37  unsigned char c1,
38  c2;
39 
40  while (len > 0)
41  {
42  c1 = *l;
43  if (c1 == 0)
44  report_invalid_encoding(src_encoding, (const char *) l, len);
45  if (!IS_HIGHBIT_SET(c1))
46  *p++ = c1;
47  else
48  {
49  c2 = tab[c1 - HIGHBIT];
50  if (c2)
51  *p++ = c2;
52  else
53  report_untranslatable_char(src_encoding, dest_encoding,
54  (const char *) l, len);
55  }
56  l++;
57  len--;
58  }
59  *p = '\0';
60 }
#define IS_HIGHBIT_SET(ch)
Definition: c.h:973
void report_untranslatable_char(int src_encoding, int dest_encoding, const char *mbstr, int len)
Definition: wchar.c:2027
#define HIGHBIT
Definition: c.h:972
void report_invalid_encoding(int encoding, const char *mbstr, int len)
Definition: wchar.c:1995
void LocalToUtf ( const unsigned char *  iso,
int  len,
unsigned char *  utf,
const pg_mb_radix_tree map,
const pg_local_to_utf_combined cmap,
int  cmapsize,
utf_local_conversion_func  conv_func,
int  encoding 
)

Definition at line 666 of file conv.c.

References compare4(), elog, ereport, errcode(), errmsg(), ERROR, IS_HIGHBIT_SET, pg_encoding_verifymb(), pg_mb_radix_conv(), PG_UTF8, PG_VALID_ENCODING, report_invalid_encoding(), report_untranslatable_char(), store_coded_char(), pg_local_to_utf_combined::utf1, and pg_local_to_utf_combined::utf2.

Referenced by big5_to_utf8(), euc_cn_to_utf8(), euc_jis_2004_to_utf8(), euc_jp_to_utf8(), euc_kr_to_utf8(), euc_tw_to_utf8(), gb18030_to_utf8(), gbk_to_utf8(), iso8859_to_utf8(), johab_to_utf8(), koi8r_to_utf8(), koi8u_to_utf8(), shift_jis_2004_to_utf8(), sjis_to_utf8(), uhc_to_utf8(), and win_to_utf8().

672 {
673  uint32 iiso;
674  int l;
675  const pg_local_to_utf_combined *cp;
676 
678  ereport(ERROR,
679  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
680  errmsg("invalid encoding number: %d", encoding)));
681 
682  for (; len > 0; len -= l)
683  {
684  unsigned char b1 = 0;
685  unsigned char b2 = 0;
686  unsigned char b3 = 0;
687  unsigned char b4 = 0;
688 
689  /* "break" cases all represent errors */
690  if (*iso == '\0')
691  break;
692 
693  if (!IS_HIGHBIT_SET(*iso))
694  {
695  /* ASCII case is easy, assume it's one-to-one conversion */
696  *utf++ = *iso++;
697  l = 1;
698  continue;
699  }
700 
701  l = pg_encoding_verifymb(encoding, (const char *) iso, len);
702  if (l < 0)
703  break;
704 
705  /* collect coded char of length l */
706  if (l == 1)
707  b4 = *iso++;
708  else if (l == 2)
709  {
710  b3 = *iso++;
711  b4 = *iso++;
712  }
713  else if (l == 3)
714  {
715  b2 = *iso++;
716  b3 = *iso++;
717  b4 = *iso++;
718  }
719  else if (l == 4)
720  {
721  b1 = *iso++;
722  b2 = *iso++;
723  b3 = *iso++;
724  b4 = *iso++;
725  }
726  else
727  {
728  elog(ERROR, "unsupported character length %d", l);
729  iiso = 0; /* keep compiler quiet */
730  }
731  iiso = (b1 << 24 | b2 << 16 | b3 << 8 | b4);
732 
733  if (map)
734  {
735  uint32 converted = pg_mb_radix_conv(map, l, b1, b2, b3, b4);
736 
737  if (converted)
738  {
739  utf = store_coded_char(utf, converted);
740  continue;
741  }
742 
743  /* If there's a combined character map, try that */
744  if (cmap)
745  {
746  cp = bsearch(&iiso, cmap, cmapsize,
748 
749  if (cp)
750  {
751  utf = store_coded_char(utf, cp->utf1);
752  utf = store_coded_char(utf, cp->utf2);
753  continue;
754  }
755  }
756  }
757 
758  /* if there's a conversion function, try that */
759  if (conv_func)
760  {
761  uint32 converted = (*conv_func) (iiso);
762 
763  if (converted)
764  {
765  utf = store_coded_char(utf, converted);
766  continue;
767  }
768  }
769 
770  /* failed to translate this character */
772  (const char *) (iso - l), len);
773  }
774 
775  /* if we broke out of loop early, must be invalid input */
776  if (len > 0)
777  report_invalid_encoding(encoding, (const char *) iso, len);
778 
779  *utf = '\0';
780 }
static int compare4(const void *p1, const void *p2)
Definition: conv.c:309
int errcode(int sqlerrcode)
Definition: elog.c:575
static unsigned char * store_coded_char(unsigned char *dest, uint32 code)
Definition: conv.c:323
int pg_encoding_verifymb(int encoding, const char *mbstr, int len)
Definition: wchar.c:1809
#define IS_HIGHBIT_SET(ch)
Definition: c.h:973
#define ERROR
Definition: elog.h:43
void report_untranslatable_char(int src_encoding, int dest_encoding, const char *mbstr, int len)
Definition: wchar.c:2027
#define PG_VALID_ENCODING(_enc)
Definition: pg_wchar.h:301
static uint32 pg_mb_radix_conv(const pg_mb_radix_tree *rt, int l, unsigned char b1, unsigned char b2, unsigned char b3, unsigned char b4)
Definition: conv.c:343
unsigned int uint32
Definition: c.h:268
#define ereport(elevel, rest)
Definition: elog.h:122
static char * encoding
Definition: initdb.c:122
void report_invalid_encoding(int encoding, const char *mbstr, int len)
Definition: wchar.c:1995
int errmsg(const char *fmt,...)
Definition: elog.c:797
#define elog
Definition: elog.h:219
void mic2latin ( const unsigned char *  mic,
unsigned char *  p,
int  len,
int  lc,
int  encoding 
)

Definition at line 99 of file conv.c.

References IS_HIGHBIT_SET, pg_mic_mblen(), PG_MULE_INTERNAL, report_invalid_encoding(), and report_untranslatable_char().

Referenced by mic_to_koi8r(), mic_to_latin1(), mic_to_latin2(), mic_to_latin3(), and mic_to_latin4().

101 {
102  int c1;
103 
104  while (len > 0)
105  {
106  c1 = *mic;
107  if (c1 == 0)
108  report_invalid_encoding(PG_MULE_INTERNAL, (const char *) mic, len);
109  if (!IS_HIGHBIT_SET(c1))
110  {
111  /* easy for ASCII */
112  *p++ = c1;
113  mic++;
114  len--;
115  }
116  else
117  {
118  int l = pg_mic_mblen(mic);
119 
120  if (len < l)
121  report_invalid_encoding(PG_MULE_INTERNAL, (const char *) mic,
122  len);
123  if (l != 2 || c1 != lc || !IS_HIGHBIT_SET(mic[1]))
125  (const char *) mic, len);
126  *p++ = mic[1];
127  mic += 2;
128  len -= 2;
129  }
130  }
131  *p = '\0';
132 }
int pg_mic_mblen(const unsigned char *mbstr)
Definition: wchar.c:1776
#define IS_HIGHBIT_SET(ch)
Definition: c.h:973
void report_untranslatable_char(int src_encoding, int dest_encoding, const char *mbstr, int len)
Definition: wchar.c:2027
static char * encoding
Definition: initdb.c:122
void report_invalid_encoding(int encoding, const char *mbstr, int len)
Definition: wchar.c:1995
void mic2latin_with_table ( const unsigned char *  mic,
unsigned char *  p,
int  len,
int  lc,
int  encoding,
const unsigned char *  tab 
)

Definition at line 241 of file conv.c.

References HIGHBIT, IS_HIGHBIT_SET, pg_mic_mblen(), PG_MULE_INTERNAL, report_invalid_encoding(), and report_untranslatable_char().

Referenced by mic_to_iso(), mic_to_win1250(), mic_to_win1251(), and mic_to_win866().

247 {
248  unsigned char c1,
249  c2;
250 
251  while (len > 0)
252  {
253  c1 = *mic;
254  if (c1 == 0)
255  report_invalid_encoding(PG_MULE_INTERNAL, (const char *) mic, len);
256  if (!IS_HIGHBIT_SET(c1))
257  {
258  /* easy for ASCII */
259  *p++ = c1;
260  mic++;
261  len--;
262  }
263  else
264  {
265  int l = pg_mic_mblen(mic);
266 
267  if (len < l)
268  report_invalid_encoding(PG_MULE_INTERNAL, (const char *) mic,
269  len);
270  if (l != 2 || c1 != lc || !IS_HIGHBIT_SET(mic[1]) ||
271  (c2 = tab[mic[1] - HIGHBIT]) == 0)
272  {
274  (const char *) mic, len);
275  break; /* keep compiler quiet */
276  }
277  *p++ = c2;
278  mic += 2;
279  len -= 2;
280  }
281  }
282  *p = '\0';
283 }
int pg_mic_mblen(const unsigned char *mbstr)
Definition: wchar.c:1776
#define IS_HIGHBIT_SET(ch)
Definition: c.h:973
void report_untranslatable_char(int src_encoding, int dest_encoding, const char *mbstr, int len)
Definition: wchar.c:2027
#define HIGHBIT
Definition: c.h:972
static char * encoding
Definition: initdb.c:122
void report_invalid_encoding(int encoding, const char *mbstr, int len)
Definition: wchar.c:1995
void pg_ascii2mic ( const unsigned char *  l,
unsigned char *  p,
int  len 
)

Definition at line 143 of file conv.c.

References IS_HIGHBIT_SET, PG_SQL_ASCII, and report_invalid_encoding().

Referenced by ascii_to_mic(), and ascii_to_utf8().

144 {
145  int c1;
146 
147  while (len > 0)
148  {
149  c1 = *l;
150  if (c1 == 0 || IS_HIGHBIT_SET(c1))
151  report_invalid_encoding(PG_SQL_ASCII, (const char *) l, len);
152  *p++ = c1;
153  l++;
154  len--;
155  }
156  *p = '\0';
157 }
#define IS_HIGHBIT_SET(ch)
Definition: c.h:973
void report_invalid_encoding(int encoding, const char *mbstr, int len)
Definition: wchar.c:1995
static uint32 pg_mb_radix_conv ( const pg_mb_radix_tree rt,
int  l,
unsigned char  b1,
unsigned char  b2,
unsigned char  b3,
unsigned char  b4 
)
inlinestatic

Definition at line 343 of file conv.c.

References pg_mb_radix_tree::b1_lower, pg_mb_radix_tree::b1_upper, pg_mb_radix_tree::b1root, pg_mb_radix_tree::b2_1_lower, pg_mb_radix_tree::b2_1_upper, pg_mb_radix_tree::b2_2_lower, pg_mb_radix_tree::b2_2_upper, pg_mb_radix_tree::b2root, pg_mb_radix_tree::b3_1_lower, pg_mb_radix_tree::b3_1_upper, pg_mb_radix_tree::b3_2_lower, pg_mb_radix_tree::b3_2_upper, pg_mb_radix_tree::b3_3_lower, pg_mb_radix_tree::b3_3_upper, pg_mb_radix_tree::b3root, pg_mb_radix_tree::b4_1_lower, pg_mb_radix_tree::b4_1_upper, pg_mb_radix_tree::b4_2_lower, pg_mb_radix_tree::b4_2_upper, pg_mb_radix_tree::b4_3_lower, pg_mb_radix_tree::b4_3_upper, pg_mb_radix_tree::b4_4_lower, pg_mb_radix_tree::b4_4_upper, pg_mb_radix_tree::b4root, pg_mb_radix_tree::chars16, pg_mb_radix_tree::chars32, and idx().

Referenced by LocalToUtf(), and UtfToLocal().

349 {
350  if (l == 4)
351  {
352  /* 4-byte code */
353 
354  /* check code validity */
355  if (b1 < rt->b4_1_lower || b1 > rt->b4_1_upper ||
356  b2 < rt->b4_2_lower || b2 > rt->b4_2_upper ||
357  b3 < rt->b4_3_lower || b3 > rt->b4_3_upper ||
358  b4 < rt->b4_4_lower || b4 > rt->b4_4_upper)
359  return 0;
360 
361  /* perform lookup */
362  if (rt->chars32)
363  {
364  uint32 idx = rt->b4root;
365 
366  idx = rt->chars32[b1 + idx - rt->b4_1_lower];
367  idx = rt->chars32[b2 + idx - rt->b4_2_lower];
368  idx = rt->chars32[b3 + idx - rt->b4_3_lower];
369  return rt->chars32[b4 + idx - rt->b4_4_lower];
370  }
371  else
372  {
373  uint16 idx = rt->b4root;
374 
375  idx = rt->chars16[b1 + idx - rt->b4_1_lower];
376  idx = rt->chars16[b2 + idx - rt->b4_2_lower];
377  idx = rt->chars16[b3 + idx - rt->b4_3_lower];
378  return rt->chars16[b4 + idx - rt->b4_4_lower];
379  }
380  }
381  else if (l == 3)
382  {
383  /* 3-byte code */
384 
385  /* check code validity */
386  if (b2 < rt->b3_1_lower || b2 > rt->b3_1_upper ||
387  b3 < rt->b3_2_lower || b3 > rt->b3_2_upper ||
388  b4 < rt->b3_3_lower || b4 > rt->b3_3_upper)
389  return 0;
390 
391  /* perform lookup */
392  if (rt->chars32)
393  {
394  uint32 idx = rt->b3root;
395 
396  idx = rt->chars32[b2 + idx - rt->b3_1_lower];
397  idx = rt->chars32[b3 + idx - rt->b3_2_lower];
398  return rt->chars32[b4 + idx - rt->b3_3_lower];
399  }
400  else
401  {
402  uint16 idx = rt->b3root;
403 
404  idx = rt->chars16[b2 + idx - rt->b3_1_lower];
405  idx = rt->chars16[b3 + idx - rt->b3_2_lower];
406  return rt->chars16[b4 + idx - rt->b3_3_lower];
407  }
408  }
409  else if (l == 2)
410  {
411  /* 2-byte code */
412 
413  /* check code validity - first byte */
414  if (b3 < rt->b2_1_lower || b3 > rt->b2_1_upper ||
415  b4 < rt->b2_2_lower || b4 > rt->b2_2_upper)
416  return 0;
417 
418  /* perform lookup */
419  if (rt->chars32)
420  {
421  uint32 idx = rt->b2root;
422 
423  idx = rt->chars32[b3 + idx - rt->b2_1_lower];
424  return rt->chars32[b4 + idx - rt->b2_2_lower];
425  }
426  else
427  {
428  uint16 idx = rt->b2root;
429 
430  idx = rt->chars16[b3 + idx - rt->b2_1_lower];
431  return rt->chars16[b4 + idx - rt->b2_2_lower];
432  }
433  }
434  else if (l == 1)
435  {
436  /* 1-byte code */
437 
438  /* check code validity - first byte */
439  if (b4 < rt->b1_lower || b4 > rt->b1_upper)
440  return 0;
441 
442  /* perform lookup */
443  if (rt->chars32)
444  return rt->chars32[b4 + rt->b1root - rt->b1_lower];
445  else
446  return rt->chars16[b4 + rt->b1root - rt->b1_lower];
447  }
448  return 0; /* shouldn't happen */
449 }
uint8 b2_2_upper
Definition: pg_wchar.h:435
uint8 b4_2_lower
Definition: pg_wchar.h:450
const uint16 * chars16
Definition: pg_wchar.h:422
uint8 b2_1_lower
Definition: pg_wchar.h:432
uint8 b4_4_upper
Definition: pg_wchar.h:455
Datum idx(PG_FUNCTION_ARGS)
Definition: _int_op.c:264
const uint32 * chars32
Definition: pg_wchar.h:423
uint8 b4_3_upper
Definition: pg_wchar.h:453
uint8 b4_4_lower
Definition: pg_wchar.h:454
uint8 b3_1_lower
Definition: pg_wchar.h:439
unsigned short uint16
Definition: c.h:267
uint8 b3_1_upper
Definition: pg_wchar.h:440
uint8 b4_2_upper
Definition: pg_wchar.h:451
unsigned int uint32
Definition: c.h:268
uint8 b3_2_upper
Definition: pg_wchar.h:442
uint8 b3_2_lower
Definition: pg_wchar.h:441
uint8 b3_3_upper
Definition: pg_wchar.h:444
uint8 b2_2_lower
Definition: pg_wchar.h:434
uint8 b4_1_upper
Definition: pg_wchar.h:449
uint8 b4_3_lower
Definition: pg_wchar.h:452
uint8 b2_1_upper
Definition: pg_wchar.h:433
uint8 b3_3_lower
Definition: pg_wchar.h:443
uint8 b4_1_lower
Definition: pg_wchar.h:448
void pg_mic2ascii ( const unsigned char *  mic,
unsigned char *  p,
int  len 
)

Definition at line 163 of file conv.c.

References IS_HIGHBIT_SET, PG_MULE_INTERNAL, PG_SQL_ASCII, and report_untranslatable_char().

Referenced by mic_to_ascii(), and utf8_to_ascii().

164 {
165  int c1;
166 
167  while (len > 0)
168  {
169  c1 = *mic;
170  if (c1 == 0 || IS_HIGHBIT_SET(c1))
172  (const char *) mic, len);
173  *p++ = c1;
174  mic++;
175  len--;
176  }
177  *p = '\0';
178 }
#define IS_HIGHBIT_SET(ch)
Definition: c.h:973
void report_untranslatable_char(int src_encoding, int dest_encoding, const char *mbstr, int len)
Definition: wchar.c:2027
static unsigned char* store_coded_char ( unsigned char *  dest,
uint32  code 
)
inlinestatic

Definition at line 323 of file conv.c.

Referenced by LocalToUtf(), and UtfToLocal().

324 {
325  if (code & 0xff000000)
326  *dest++ = code >> 24;
327  if (code & 0x00ff0000)
328  *dest++ = code >> 16;
329  if (code & 0x0000ff00)
330  *dest++ = code >> 8;
331  if (code & 0x000000ff)
332  *dest++ = code;
333  return dest;
334 }
void UtfToLocal ( const unsigned char *  utf,
int  len,
unsigned char *  iso,
const pg_mb_radix_tree map,
const pg_utf_to_local_combined cmap,
int  cmapsize,
utf_local_conversion_func  conv_func,
int  encoding 
)

Definition at line 474 of file conv.c.

References pg_utf_to_local_combined::code, compare3(), elog, ereport, errcode(), errmsg(), ERROR, pg_mb_radix_conv(), PG_UTF8, pg_utf8_islegal(), pg_utf_mblen(), PG_VALID_ENCODING, report_invalid_encoding(), report_untranslatable_char(), and store_coded_char().

Referenced by utf8_to_big5(), utf8_to_euc_cn(), utf8_to_euc_jis_2004(), utf8_to_euc_jp(), utf8_to_euc_kr(), utf8_to_euc_tw(), utf8_to_gb18030(), utf8_to_gbk(), utf8_to_iso8859(), utf8_to_johab(), utf8_to_koi8r(), utf8_to_koi8u(), utf8_to_shift_jis_2004(), utf8_to_sjis(), utf8_to_uhc(), and utf8_to_win().

480 {
481  uint32 iutf;
482  int l;
483  const pg_utf_to_local_combined *cp;
484 
486  ereport(ERROR,
487  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
488  errmsg("invalid encoding number: %d", encoding)));
489 
490  for (; len > 0; len -= l)
491  {
492  unsigned char b1 = 0;
493  unsigned char b2 = 0;
494  unsigned char b3 = 0;
495  unsigned char b4 = 0;
496 
497  /* "break" cases all represent errors */
498  if (*utf == '\0')
499  break;
500 
501  l = pg_utf_mblen(utf);
502  if (len < l)
503  break;
504 
505  if (!pg_utf8_islegal(utf, l))
506  break;
507 
508  if (l == 1)
509  {
510  /* ASCII case is easy, assume it's one-to-one conversion */
511  *iso++ = *utf++;
512  continue;
513  }
514 
515  /* collect coded char of length l */
516  if (l == 2)
517  {
518  b3 = *utf++;
519  b4 = *utf++;
520  }
521  else if (l == 3)
522  {
523  b2 = *utf++;
524  b3 = *utf++;
525  b4 = *utf++;
526  }
527  else if (l == 4)
528  {
529  b1 = *utf++;
530  b2 = *utf++;
531  b3 = *utf++;
532  b4 = *utf++;
533  }
534  else
535  {
536  elog(ERROR, "unsupported character length %d", l);
537  iutf = 0; /* keep compiler quiet */
538  }
539  iutf = (b1 << 24 | b2 << 16 | b3 << 8 | b4);
540 
541  /* First, try with combined map if possible */
542  if (cmap && len > l)
543  {
544  const unsigned char *utf_save = utf;
545  int len_save = len;
546  int l_save = l;
547 
548  /* collect next character, same as above */
549  len -= l;
550 
551  l = pg_utf_mblen(utf);
552  if (len < l)
553  break;
554 
555  if (!pg_utf8_islegal(utf, l))
556  break;
557 
558  /* We assume ASCII character cannot be in combined map */
559  if (l > 1)
560  {
561  uint32 iutf2;
562  uint32 cutf[2];
563 
564  if (l == 2)
565  {
566  iutf2 = *utf++ << 8;
567  iutf2 |= *utf++;
568  }
569  else if (l == 3)
570  {
571  iutf2 = *utf++ << 16;
572  iutf2 |= *utf++ << 8;
573  iutf2 |= *utf++;
574  }
575  else if (l == 4)
576  {
577  iutf2 = *utf++ << 24;
578  iutf2 |= *utf++ << 16;
579  iutf2 |= *utf++ << 8;
580  iutf2 |= *utf++;
581  }
582  else
583  {
584  elog(ERROR, "unsupported character length %d", l);
585  iutf2 = 0; /* keep compiler quiet */
586  }
587 
588  cutf[0] = iutf;
589  cutf[1] = iutf2;
590 
591  cp = bsearch(cutf, cmap, cmapsize,
593 
594  if (cp)
595  {
596  iso = store_coded_char(iso, cp->code);
597  continue;
598  }
599  }
600 
601  /* fail, so back up to reprocess second character next time */
602  utf = utf_save;
603  len = len_save;
604  l = l_save;
605  }
606 
607  /* Now check ordinary map */
608  if (map)
609  {
610  uint32 converted = pg_mb_radix_conv(map, l, b1, b2, b3, b4);
611 
612  if (converted)
613  {
614  iso = store_coded_char(iso, converted);
615  continue;
616  }
617  }
618 
619  /* if there's a conversion function, try that */
620  if (conv_func)
621  {
622  uint32 converted = (*conv_func) (iutf);
623 
624  if (converted)
625  {
626  iso = store_coded_char(iso, converted);
627  continue;
628  }
629  }
630 
631  /* failed to translate this character */
633  (const char *) (utf - l), len);
634  }
635 
636  /* if we broke out of loop early, must be invalid input */
637  if (len > 0)
638  report_invalid_encoding(PG_UTF8, (const char *) utf, len);
639 
640  *iso = '\0';
641 }
bool pg_utf8_islegal(const unsigned char *source, int length)
Definition: wchar.c:1458
int errcode(int sqlerrcode)
Definition: elog.c:575
static unsigned char * store_coded_char(unsigned char *dest, uint32 code)
Definition: conv.c:323
#define ERROR
Definition: elog.h:43
void report_untranslatable_char(int src_encoding, int dest_encoding, const char *mbstr, int len)
Definition: wchar.c:2027
#define PG_VALID_ENCODING(_enc)
Definition: pg_wchar.h:301
static uint32 pg_mb_radix_conv(const pg_mb_radix_tree *rt, int l, unsigned char b1, unsigned char b2, unsigned char b3, unsigned char b4)
Definition: conv.c:343
unsigned int uint32
Definition: c.h:268
#define ereport(elevel, rest)
Definition: elog.h:122
static int compare3(const void *p1, const void *p2)
Definition: conv.c:290
static char * encoding
Definition: initdb.c:122
void report_invalid_encoding(int encoding, const char *mbstr, int len)
Definition: wchar.c:1995
int pg_utf_mblen(const unsigned char *s)
Definition: wchar.c:541
int errmsg(const char *fmt,...)
Definition: elog.c:797
#define elog
Definition: elog.h:219