PostgreSQL Source Code  git master
conv.c File Reference
#include "postgres.h"
#include "mb/pg_wchar.h"
Include dependency graph for conv.c:

Go to the source code of this file.

Functions

void local2local (const unsigned char *l, unsigned char *p, int len, int src_encoding, int dest_encoding, const unsigned char *tab)
 
void latin2mic (const unsigned char *l, unsigned char *p, int len, int lc, int encoding)
 
void mic2latin (const unsigned char *mic, unsigned char *p, int len, int lc, int encoding)
 
void latin2mic_with_table (const unsigned char *l, unsigned char *p, int len, int lc, int encoding, const unsigned char *tab)
 
void mic2latin_with_table (const unsigned char *mic, unsigned char *p, int len, int lc, int encoding, const unsigned char *tab)
 
static int compare3 (const void *p1, const void *p2)
 
static int compare4 (const void *p1, const void *p2)
 
static unsigned char * store_coded_char (unsigned char *dest, uint32 code)
 
static uint32 pg_mb_radix_conv (const pg_mb_radix_tree *rt, int l, unsigned char b1, unsigned char b2, unsigned char b3, unsigned char b4)
 
void UtfToLocal (const unsigned char *utf, int len, unsigned char *iso, const pg_mb_radix_tree *map, const pg_utf_to_local_combined *cmap, int cmapsize, utf_local_conversion_func conv_func, int encoding)
 
void LocalToUtf (const unsigned char *iso, int len, unsigned char *utf, const pg_mb_radix_tree *map, const pg_local_to_utf_combined *cmap, int cmapsize, utf_local_conversion_func conv_func, int encoding)
 

Function Documentation

◆ compare3()

static int compare3 ( const void *  p1,
const void *  p2 
)
static

Definition at line 245 of file conv.c.

References s1, and s2.

Referenced by UtfToLocal().

246 {
247  uint32 s1,
248  s2,
249  d1,
250  d2;
251 
252  s1 = *(const uint32 *) p1;
253  s2 = *((const uint32 *) p1 + 1);
254  d1 = ((const pg_utf_to_local_combined *) p2)->utf1;
255  d2 = ((const pg_utf_to_local_combined *) p2)->utf2;
256  return (s1 > d1 || (s1 == d1 && s2 > d2)) ? 1 : ((s1 == d1 && s2 == d2) ? 0 : -1);
257 }
char * s1
unsigned int uint32
Definition: c.h:359
char * s2

◆ compare4()

static int compare4 ( const void *  p1,
const void *  p2 
)
static

Definition at line 264 of file conv.c.

Referenced by LocalToUtf().

265 {
266  uint32 v1,
267  v2;
268 
269  v1 = *(const uint32 *) p1;
270  v2 = ((const pg_local_to_utf_combined *) p2)->code;
271  return (v1 > v2) ? 1 : ((v1 == v2) ? 0 : -1);
272 }
unsigned int uint32
Definition: c.h:359

◆ latin2mic()

void latin2mic ( const unsigned char *  l,
unsigned char *  p,
int  len,
int  lc,
int  encoding 
)

Definition at line 71 of file conv.c.

References IS_HIGHBIT_SET, and report_invalid_encoding().

Referenced by koi8r_to_mic(), latin1_to_mic(), latin2_to_mic(), latin3_to_mic(), and latin4_to_mic().

73 {
74  int c1;
75 
76  while (len > 0)
77  {
78  c1 = *l;
79  if (c1 == 0)
80  report_invalid_encoding(encoding, (const char *) l, len);
81  if (IS_HIGHBIT_SET(c1))
82  *p++ = lc;
83  *p++ = c1;
84  l++;
85  len--;
86  }
87  *p = '\0';
88 }
#define IS_HIGHBIT_SET(ch)
Definition: c.h:1106
int32 encoding
Definition: pg_database.h:41
void report_invalid_encoding(int encoding, const char *mbstr, int len)
Definition: wchar.c:2043

◆ latin2mic_with_table()

void latin2mic_with_table ( const unsigned char *  l,
unsigned char *  p,
int  len,
int  lc,
int  encoding,
const unsigned char *  tab 
)

Definition at line 148 of file conv.c.

References HIGHBIT, IS_HIGHBIT_SET, PG_MULE_INTERNAL, report_invalid_encoding(), and report_untranslatable_char().

Referenced by iso_to_mic(), win1250_to_mic(), win1251_to_mic(), and win866_to_mic().

154 {
155  unsigned char c1,
156  c2;
157 
158  while (len > 0)
159  {
160  c1 = *l;
161  if (c1 == 0)
162  report_invalid_encoding(encoding, (const char *) l, len);
163  if (!IS_HIGHBIT_SET(c1))
164  *p++ = c1;
165  else
166  {
167  c2 = tab[c1 - HIGHBIT];
168  if (c2)
169  {
170  *p++ = lc;
171  *p++ = c2;
172  }
173  else
175  (const char *) l, len);
176  }
177  l++;
178  len--;
179  }
180  *p = '\0';
181 }
#define IS_HIGHBIT_SET(ch)
Definition: c.h:1106
void report_untranslatable_char(int src_encoding, int dest_encoding, const char *mbstr, int len)
Definition: wchar.c:2075
#define HIGHBIT
Definition: c.h:1105
int32 encoding
Definition: pg_database.h:41
void report_invalid_encoding(int encoding, const char *mbstr, int len)
Definition: wchar.c:2043

◆ local2local()

void local2local ( const unsigned char *  l,
unsigned char *  p,
int  len,
int  src_encoding,
int  dest_encoding,
const unsigned char *  tab 
)

Definition at line 30 of file conv.c.

References HIGHBIT, IS_HIGHBIT_SET, report_invalid_encoding(), and report_untranslatable_char().

Referenced by iso_to_koi8r(), iso_to_win1251(), iso_to_win866(), koi8r_to_iso(), koi8r_to_win1251(), koi8r_to_win866(), latin2_to_win1250(), win1250_to_latin2(), win1251_to_iso(), win1251_to_koi8r(), win1251_to_win866(), win866_to_iso(), win866_to_koi8r(), and win866_to_win1251().

36 {
37  unsigned char c1,
38  c2;
39 
40  while (len > 0)
41  {
42  c1 = *l;
43  if (c1 == 0)
44  report_invalid_encoding(src_encoding, (const char *) l, len);
45  if (!IS_HIGHBIT_SET(c1))
46  *p++ = c1;
47  else
48  {
49  c2 = tab[c1 - HIGHBIT];
50  if (c2)
51  *p++ = c2;
52  else
53  report_untranslatable_char(src_encoding, dest_encoding,
54  (const char *) l, len);
55  }
56  l++;
57  len--;
58  }
59  *p = '\0';
60 }
#define IS_HIGHBIT_SET(ch)
Definition: c.h:1106
void report_untranslatable_char(int src_encoding, int dest_encoding, const char *mbstr, int len)
Definition: wchar.c:2075
#define HIGHBIT
Definition: c.h:1105
void report_invalid_encoding(int encoding, const char *mbstr, int len)
Definition: wchar.c:2043

◆ LocalToUtf()

void LocalToUtf ( const unsigned char *  iso,
int  len,
unsigned char *  utf,
const pg_mb_radix_tree map,
const pg_local_to_utf_combined cmap,
int  cmapsize,
utf_local_conversion_func  conv_func,
int  encoding 
)

Definition at line 621 of file conv.c.

References compare4(), elog, ereport, errcode(), errmsg(), ERROR, IS_HIGHBIT_SET, pg_encoding_verifymb(), pg_mb_radix_conv(), PG_UTF8, PG_VALID_ENCODING, report_invalid_encoding(), report_untranslatable_char(), store_coded_char(), pg_local_to_utf_combined::utf1, and pg_local_to_utf_combined::utf2.

Referenced by big5_to_utf8(), euc_cn_to_utf8(), euc_jis_2004_to_utf8(), euc_jp_to_utf8(), euc_kr_to_utf8(), euc_tw_to_utf8(), gb18030_to_utf8(), gbk_to_utf8(), iso8859_to_utf8(), johab_to_utf8(), koi8r_to_utf8(), koi8u_to_utf8(), shift_jis_2004_to_utf8(), sjis_to_utf8(), uhc_to_utf8(), and win_to_utf8().

627 {
628  uint32 iiso;
629  int l;
630  const pg_local_to_utf_combined *cp;
631 
633  ereport(ERROR,
634  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
635  errmsg("invalid encoding number: %d", encoding)));
636 
637  for (; len > 0; len -= l)
638  {
639  unsigned char b1 = 0;
640  unsigned char b2 = 0;
641  unsigned char b3 = 0;
642  unsigned char b4 = 0;
643 
644  /* "break" cases all represent errors */
645  if (*iso == '\0')
646  break;
647 
648  if (!IS_HIGHBIT_SET(*iso))
649  {
650  /* ASCII case is easy, assume it's one-to-one conversion */
651  *utf++ = *iso++;
652  l = 1;
653  continue;
654  }
655 
656  l = pg_encoding_verifymb(encoding, (const char *) iso, len);
657  if (l < 0)
658  break;
659 
660  /* collect coded char of length l */
661  if (l == 1)
662  b4 = *iso++;
663  else if (l == 2)
664  {
665  b3 = *iso++;
666  b4 = *iso++;
667  }
668  else if (l == 3)
669  {
670  b2 = *iso++;
671  b3 = *iso++;
672  b4 = *iso++;
673  }
674  else if (l == 4)
675  {
676  b1 = *iso++;
677  b2 = *iso++;
678  b3 = *iso++;
679  b4 = *iso++;
680  }
681  else
682  {
683  elog(ERROR, "unsupported character length %d", l);
684  iiso = 0; /* keep compiler quiet */
685  }
686  iiso = (b1 << 24 | b2 << 16 | b3 << 8 | b4);
687 
688  if (map)
689  {
690  uint32 converted = pg_mb_radix_conv(map, l, b1, b2, b3, b4);
691 
692  if (converted)
693  {
694  utf = store_coded_char(utf, converted);
695  continue;
696  }
697 
698  /* If there's a combined character map, try that */
699  if (cmap)
700  {
701  cp = bsearch(&iiso, cmap, cmapsize,
703 
704  if (cp)
705  {
706  utf = store_coded_char(utf, cp->utf1);
707  utf = store_coded_char(utf, cp->utf2);
708  continue;
709  }
710  }
711  }
712 
713  /* if there's a conversion function, try that */
714  if (conv_func)
715  {
716  uint32 converted = (*conv_func) (iiso);
717 
718  if (converted)
719  {
720  utf = store_coded_char(utf, converted);
721  continue;
722  }
723  }
724 
725  /* failed to translate this character */
727  (const char *) (iso - l), len);
728  }
729 
730  /* if we broke out of loop early, must be invalid input */
731  if (len > 0)
732  report_invalid_encoding(encoding, (const char *) iso, len);
733 
734  *utf = '\0';
735 }
static int compare4(const void *p1, const void *p2)
Definition: conv.c:264
int errcode(int sqlerrcode)
Definition: elog.c:608
static unsigned char * store_coded_char(unsigned char *dest, uint32 code)
Definition: conv.c:278
int pg_encoding_verifymb(int encoding, const char *mbstr, int len)
Definition: wchar.c:1857
#define IS_HIGHBIT_SET(ch)
Definition: c.h:1106
#define ERROR
Definition: elog.h:43
void report_untranslatable_char(int src_encoding, int dest_encoding, const char *mbstr, int len)
Definition: wchar.c:2075
#define PG_VALID_ENCODING(_enc)
Definition: pg_wchar.h:301
static uint32 pg_mb_radix_conv(const pg_mb_radix_tree *rt, int l, unsigned char b1, unsigned char b2, unsigned char b3, unsigned char b4)
Definition: conv.c:298
unsigned int uint32
Definition: c.h:359
#define ereport(elevel, rest)
Definition: elog.h:141
int32 encoding
Definition: pg_database.h:41
void report_invalid_encoding(int encoding, const char *mbstr, int len)
Definition: wchar.c:2043
int errmsg(const char *fmt,...)
Definition: elog.c:822
#define elog(elevel,...)
Definition: elog.h:228

◆ mic2latin()

void mic2latin ( const unsigned char *  mic,
unsigned char *  p,
int  len,
int  lc,
int  encoding 
)

Definition at line 99 of file conv.c.

References IS_HIGHBIT_SET, pg_mic_mblen(), PG_MULE_INTERNAL, report_invalid_encoding(), and report_untranslatable_char().

Referenced by mic_to_koi8r(), mic_to_latin1(), mic_to_latin2(), mic_to_latin3(), and mic_to_latin4().

101 {
102  int c1;
103 
104  while (len > 0)
105  {
106  c1 = *mic;
107  if (c1 == 0)
108  report_invalid_encoding(PG_MULE_INTERNAL, (const char *) mic, len);
109  if (!IS_HIGHBIT_SET(c1))
110  {
111  /* easy for ASCII */
112  *p++ = c1;
113  mic++;
114  len--;
115  }
116  else
117  {
118  int l = pg_mic_mblen(mic);
119 
120  if (len < l)
121  report_invalid_encoding(PG_MULE_INTERNAL, (const char *) mic,
122  len);
123  if (l != 2 || c1 != lc || !IS_HIGHBIT_SET(mic[1]))
125  (const char *) mic, len);
126  *p++ = mic[1];
127  mic += 2;
128  len -= 2;
129  }
130  }
131  *p = '\0';
132 }
int pg_mic_mblen(const unsigned char *mbstr)
Definition: wchar.c:1824
#define IS_HIGHBIT_SET(ch)
Definition: c.h:1106
void report_untranslatable_char(int src_encoding, int dest_encoding, const char *mbstr, int len)
Definition: wchar.c:2075
int32 encoding
Definition: pg_database.h:41
void report_invalid_encoding(int encoding, const char *mbstr, int len)
Definition: wchar.c:2043

◆ mic2latin_with_table()

void mic2latin_with_table ( const unsigned char *  mic,
unsigned char *  p,
int  len,
int  lc,
int  encoding,
const unsigned char *  tab 
)

Definition at line 196 of file conv.c.

References HIGHBIT, IS_HIGHBIT_SET, pg_mic_mblen(), PG_MULE_INTERNAL, report_invalid_encoding(), and report_untranslatable_char().

Referenced by mic_to_iso(), mic_to_win1250(), mic_to_win1251(), and mic_to_win866().

202 {
203  unsigned char c1,
204  c2;
205 
206  while (len > 0)
207  {
208  c1 = *mic;
209  if (c1 == 0)
210  report_invalid_encoding(PG_MULE_INTERNAL, (const char *) mic, len);
211  if (!IS_HIGHBIT_SET(c1))
212  {
213  /* easy for ASCII */
214  *p++ = c1;
215  mic++;
216  len--;
217  }
218  else
219  {
220  int l = pg_mic_mblen(mic);
221 
222  if (len < l)
223  report_invalid_encoding(PG_MULE_INTERNAL, (const char *) mic,
224  len);
225  if (l != 2 || c1 != lc || !IS_HIGHBIT_SET(mic[1]) ||
226  (c2 = tab[mic[1] - HIGHBIT]) == 0)
227  {
229  (const char *) mic, len);
230  break; /* keep compiler quiet */
231  }
232  *p++ = c2;
233  mic += 2;
234  len -= 2;
235  }
236  }
237  *p = '\0';
238 }
int pg_mic_mblen(const unsigned char *mbstr)
Definition: wchar.c:1824
#define IS_HIGHBIT_SET(ch)
Definition: c.h:1106
void report_untranslatable_char(int src_encoding, int dest_encoding, const char *mbstr, int len)
Definition: wchar.c:2075
#define HIGHBIT
Definition: c.h:1105
int32 encoding
Definition: pg_database.h:41
void report_invalid_encoding(int encoding, const char *mbstr, int len)
Definition: wchar.c:2043

◆ pg_mb_radix_conv()

static uint32 pg_mb_radix_conv ( const pg_mb_radix_tree rt,
int  l,
unsigned char  b1,
unsigned char  b2,
unsigned char  b3,
unsigned char  b4 
)
inlinestatic

Definition at line 298 of file conv.c.

References pg_mb_radix_tree::b1_lower, pg_mb_radix_tree::b1_upper, pg_mb_radix_tree::b1root, pg_mb_radix_tree::b2_1_lower, pg_mb_radix_tree::b2_1_upper, pg_mb_radix_tree::b2_2_lower, pg_mb_radix_tree::b2_2_upper, pg_mb_radix_tree::b2root, pg_mb_radix_tree::b3_1_lower, pg_mb_radix_tree::b3_1_upper, pg_mb_radix_tree::b3_2_lower, pg_mb_radix_tree::b3_2_upper, pg_mb_radix_tree::b3_3_lower, pg_mb_radix_tree::b3_3_upper, pg_mb_radix_tree::b3root, pg_mb_radix_tree::b4_1_lower, pg_mb_radix_tree::b4_1_upper, pg_mb_radix_tree::b4_2_lower, pg_mb_radix_tree::b4_2_upper, pg_mb_radix_tree::b4_3_lower, pg_mb_radix_tree::b4_3_upper, pg_mb_radix_tree::b4_4_lower, pg_mb_radix_tree::b4_4_upper, pg_mb_radix_tree::b4root, pg_mb_radix_tree::chars16, pg_mb_radix_tree::chars32, and idx().

Referenced by LocalToUtf(), and UtfToLocal().

304 {
305  if (l == 4)
306  {
307  /* 4-byte code */
308 
309  /* check code validity */
310  if (b1 < rt->b4_1_lower || b1 > rt->b4_1_upper ||
311  b2 < rt->b4_2_lower || b2 > rt->b4_2_upper ||
312  b3 < rt->b4_3_lower || b3 > rt->b4_3_upper ||
313  b4 < rt->b4_4_lower || b4 > rt->b4_4_upper)
314  return 0;
315 
316  /* perform lookup */
317  if (rt->chars32)
318  {
319  uint32 idx = rt->b4root;
320 
321  idx = rt->chars32[b1 + idx - rt->b4_1_lower];
322  idx = rt->chars32[b2 + idx - rt->b4_2_lower];
323  idx = rt->chars32[b3 + idx - rt->b4_3_lower];
324  return rt->chars32[b4 + idx - rt->b4_4_lower];
325  }
326  else
327  {
328  uint16 idx = rt->b4root;
329 
330  idx = rt->chars16[b1 + idx - rt->b4_1_lower];
331  idx = rt->chars16[b2 + idx - rt->b4_2_lower];
332  idx = rt->chars16[b3 + idx - rt->b4_3_lower];
333  return rt->chars16[b4 + idx - rt->b4_4_lower];
334  }
335  }
336  else if (l == 3)
337  {
338  /* 3-byte code */
339 
340  /* check code validity */
341  if (b2 < rt->b3_1_lower || b2 > rt->b3_1_upper ||
342  b3 < rt->b3_2_lower || b3 > rt->b3_2_upper ||
343  b4 < rt->b3_3_lower || b4 > rt->b3_3_upper)
344  return 0;
345 
346  /* perform lookup */
347  if (rt->chars32)
348  {
349  uint32 idx = rt->b3root;
350 
351  idx = rt->chars32[b2 + idx - rt->b3_1_lower];
352  idx = rt->chars32[b3 + idx - rt->b3_2_lower];
353  return rt->chars32[b4 + idx - rt->b3_3_lower];
354  }
355  else
356  {
357  uint16 idx = rt->b3root;
358 
359  idx = rt->chars16[b2 + idx - rt->b3_1_lower];
360  idx = rt->chars16[b3 + idx - rt->b3_2_lower];
361  return rt->chars16[b4 + idx - rt->b3_3_lower];
362  }
363  }
364  else if (l == 2)
365  {
366  /* 2-byte code */
367 
368  /* check code validity - first byte */
369  if (b3 < rt->b2_1_lower || b3 > rt->b2_1_upper ||
370  b4 < rt->b2_2_lower || b4 > rt->b2_2_upper)
371  return 0;
372 
373  /* perform lookup */
374  if (rt->chars32)
375  {
376  uint32 idx = rt->b2root;
377 
378  idx = rt->chars32[b3 + idx - rt->b2_1_lower];
379  return rt->chars32[b4 + idx - rt->b2_2_lower];
380  }
381  else
382  {
383  uint16 idx = rt->b2root;
384 
385  idx = rt->chars16[b3 + idx - rt->b2_1_lower];
386  return rt->chars16[b4 + idx - rt->b2_2_lower];
387  }
388  }
389  else if (l == 1)
390  {
391  /* 1-byte code */
392 
393  /* check code validity - first byte */
394  if (b4 < rt->b1_lower || b4 > rt->b1_upper)
395  return 0;
396 
397  /* perform lookup */
398  if (rt->chars32)
399  return rt->chars32[b4 + rt->b1root - rt->b1_lower];
400  else
401  return rt->chars16[b4 + rt->b1root - rt->b1_lower];
402  }
403  return 0; /* shouldn't happen */
404 }
uint8 b2_2_upper
Definition: pg_wchar.h:446
uint8 b4_2_lower
Definition: pg_wchar.h:461
const uint16 * chars16
Definition: pg_wchar.h:433
uint8 b2_1_lower
Definition: pg_wchar.h:443
uint8 b4_4_upper
Definition: pg_wchar.h:466
Datum idx(PG_FUNCTION_ARGS)
Definition: _int_op.c:263
const uint32 * chars32
Definition: pg_wchar.h:434
uint8 b4_3_upper
Definition: pg_wchar.h:464
uint8 b4_4_lower
Definition: pg_wchar.h:465
uint8 b3_1_lower
Definition: pg_wchar.h:450
unsigned short uint16
Definition: c.h:358
uint8 b3_1_upper
Definition: pg_wchar.h:451
uint8 b4_2_upper
Definition: pg_wchar.h:462
unsigned int uint32
Definition: c.h:359
uint8 b3_2_upper
Definition: pg_wchar.h:453
uint8 b3_2_lower
Definition: pg_wchar.h:452
uint8 b3_3_upper
Definition: pg_wchar.h:455
uint8 b2_2_lower
Definition: pg_wchar.h:445
uint8 b4_1_upper
Definition: pg_wchar.h:460
uint8 b4_3_lower
Definition: pg_wchar.h:463
uint8 b2_1_upper
Definition: pg_wchar.h:444
uint8 b3_3_lower
Definition: pg_wchar.h:454
uint8 b4_1_lower
Definition: pg_wchar.h:459

◆ store_coded_char()

static unsigned char* store_coded_char ( unsigned char *  dest,
uint32  code 
)
inlinestatic

Definition at line 278 of file conv.c.

References generate_unaccent_rules::dest.

Referenced by LocalToUtf(), and UtfToLocal().

279 {
280  if (code & 0xff000000)
281  *dest++ = code >> 24;
282  if (code & 0x00ff0000)
283  *dest++ = code >> 16;
284  if (code & 0x0000ff00)
285  *dest++ = code >> 8;
286  if (code & 0x000000ff)
287  *dest++ = code;
288  return dest;
289 }

◆ UtfToLocal()

void UtfToLocal ( const unsigned char *  utf,
int  len,
unsigned char *  iso,
const pg_mb_radix_tree map,
const pg_utf_to_local_combined cmap,
int  cmapsize,
utf_local_conversion_func  conv_func,
int  encoding 
)

Definition at line 429 of file conv.c.

References pg_utf_to_local_combined::code, compare3(), elog, ereport, errcode(), errmsg(), ERROR, pg_mb_radix_conv(), PG_UTF8, pg_utf8_islegal(), pg_utf_mblen(), PG_VALID_ENCODING, report_invalid_encoding(), report_untranslatable_char(), and store_coded_char().

Referenced by utf8_to_big5(), utf8_to_euc_cn(), utf8_to_euc_jis_2004(), utf8_to_euc_jp(), utf8_to_euc_kr(), utf8_to_euc_tw(), utf8_to_gb18030(), utf8_to_gbk(), utf8_to_iso8859(), utf8_to_johab(), utf8_to_koi8r(), utf8_to_koi8u(), utf8_to_shift_jis_2004(), utf8_to_sjis(), utf8_to_uhc(), and utf8_to_win().

435 {
436  uint32 iutf;
437  int l;
438  const pg_utf_to_local_combined *cp;
439 
441  ereport(ERROR,
442  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
443  errmsg("invalid encoding number: %d", encoding)));
444 
445  for (; len > 0; len -= l)
446  {
447  unsigned char b1 = 0;
448  unsigned char b2 = 0;
449  unsigned char b3 = 0;
450  unsigned char b4 = 0;
451 
452  /* "break" cases all represent errors */
453  if (*utf == '\0')
454  break;
455 
456  l = pg_utf_mblen(utf);
457  if (len < l)
458  break;
459 
460  if (!pg_utf8_islegal(utf, l))
461  break;
462 
463  if (l == 1)
464  {
465  /* ASCII case is easy, assume it's one-to-one conversion */
466  *iso++ = *utf++;
467  continue;
468  }
469 
470  /* collect coded char of length l */
471  if (l == 2)
472  {
473  b3 = *utf++;
474  b4 = *utf++;
475  }
476  else if (l == 3)
477  {
478  b2 = *utf++;
479  b3 = *utf++;
480  b4 = *utf++;
481  }
482  else if (l == 4)
483  {
484  b1 = *utf++;
485  b2 = *utf++;
486  b3 = *utf++;
487  b4 = *utf++;
488  }
489  else
490  {
491  elog(ERROR, "unsupported character length %d", l);
492  iutf = 0; /* keep compiler quiet */
493  }
494  iutf = (b1 << 24 | b2 << 16 | b3 << 8 | b4);
495 
496  /* First, try with combined map if possible */
497  if (cmap && len > l)
498  {
499  const unsigned char *utf_save = utf;
500  int len_save = len;
501  int l_save = l;
502 
503  /* collect next character, same as above */
504  len -= l;
505 
506  l = pg_utf_mblen(utf);
507  if (len < l)
508  break;
509 
510  if (!pg_utf8_islegal(utf, l))
511  break;
512 
513  /* We assume ASCII character cannot be in combined map */
514  if (l > 1)
515  {
516  uint32 iutf2;
517  uint32 cutf[2];
518 
519  if (l == 2)
520  {
521  iutf2 = *utf++ << 8;
522  iutf2 |= *utf++;
523  }
524  else if (l == 3)
525  {
526  iutf2 = *utf++ << 16;
527  iutf2 |= *utf++ << 8;
528  iutf2 |= *utf++;
529  }
530  else if (l == 4)
531  {
532  iutf2 = *utf++ << 24;
533  iutf2 |= *utf++ << 16;
534  iutf2 |= *utf++ << 8;
535  iutf2 |= *utf++;
536  }
537  else
538  {
539  elog(ERROR, "unsupported character length %d", l);
540  iutf2 = 0; /* keep compiler quiet */
541  }
542 
543  cutf[0] = iutf;
544  cutf[1] = iutf2;
545 
546  cp = bsearch(cutf, cmap, cmapsize,
548 
549  if (cp)
550  {
551  iso = store_coded_char(iso, cp->code);
552  continue;
553  }
554  }
555 
556  /* fail, so back up to reprocess second character next time */
557  utf = utf_save;
558  len = len_save;
559  l = l_save;
560  }
561 
562  /* Now check ordinary map */
563  if (map)
564  {
565  uint32 converted = pg_mb_radix_conv(map, l, b1, b2, b3, b4);
566 
567  if (converted)
568  {
569  iso = store_coded_char(iso, converted);
570  continue;
571  }
572  }
573 
574  /* if there's a conversion function, try that */
575  if (conv_func)
576  {
577  uint32 converted = (*conv_func) (iutf);
578 
579  if (converted)
580  {
581  iso = store_coded_char(iso, converted);
582  continue;
583  }
584  }
585 
586  /* failed to translate this character */
588  (const char *) (utf - l), len);
589  }
590 
591  /* if we broke out of loop early, must be invalid input */
592  if (len > 0)
593  report_invalid_encoding(PG_UTF8, (const char *) utf, len);
594 
595  *iso = '\0';
596 }
bool pg_utf8_islegal(const unsigned char *source, int length)
Definition: wchar.c:1506
int errcode(int sqlerrcode)
Definition: elog.c:608
static unsigned char * store_coded_char(unsigned char *dest, uint32 code)
Definition: conv.c:278
#define ERROR
Definition: elog.h:43
void report_untranslatable_char(int src_encoding, int dest_encoding, const char *mbstr, int len)
Definition: wchar.c:2075
#define PG_VALID_ENCODING(_enc)
Definition: pg_wchar.h:301
static uint32 pg_mb_radix_conv(const pg_mb_radix_tree *rt, int l, unsigned char b1, unsigned char b2, unsigned char b3, unsigned char b4)
Definition: conv.c:298
unsigned int uint32
Definition: c.h:359
#define ereport(elevel, rest)
Definition: elog.h:141
static int compare3(const void *p1, const void *p2)
Definition: conv.c:245
int32 encoding
Definition: pg_database.h:41
void report_invalid_encoding(int encoding, const char *mbstr, int len)
Definition: wchar.c:2043
int pg_utf_mblen(const unsigned char *s)
Definition: wchar.c:548
int errmsg(const char *fmt,...)
Definition: elog.c:822
#define elog(elevel,...)
Definition: elog.h:228