PostgreSQL Source Code git master
conv.c File Reference
#include "postgres.h"
#include "mb/pg_wchar.h"
Include dependency graph for conv.c:

Go to the source code of this file.

Functions

int local2local (const unsigned char *l, unsigned char *p, int len, int src_encoding, int dest_encoding, const unsigned char *tab, bool noError)
 
int latin2mic (const unsigned char *l, unsigned char *p, int len, int lc, int encoding, bool noError)
 
int mic2latin (const unsigned char *mic, unsigned char *p, int len, int lc, int encoding, bool noError)
 
int latin2mic_with_table (const unsigned char *l, unsigned char *p, int len, int lc, int encoding, const unsigned char *tab, bool noError)
 
int mic2latin_with_table (const unsigned char *mic, unsigned char *p, int len, int lc, int encoding, const unsigned char *tab, bool noError)
 
static int compare3 (const void *p1, const void *p2)
 
static int compare4 (const void *p1, const void *p2)
 
static unsigned char * store_coded_char (unsigned char *dest, uint32 code)
 
static uint32 pg_mb_radix_conv (const pg_mb_radix_tree *rt, int l, unsigned char b1, unsigned char b2, unsigned char b3, unsigned char b4)
 
int UtfToLocal (const unsigned char *utf, int len, unsigned char *iso, const pg_mb_radix_tree *map, const pg_utf_to_local_combined *cmap, int cmapsize, utf_local_conversion_func conv_func, int encoding, bool noError)
 
int LocalToUtf (const unsigned char *iso, int len, unsigned char *utf, const pg_mb_radix_tree *map, const pg_local_to_utf_combined *cmap, int cmapsize, utf_local_conversion_func conv_func, int encoding, bool noError)
 

Function Documentation

◆ compare3()

static int compare3 ( const void *  p1,
const void *  p2 
)
static

Definition at line 320 of file conv.c.

321{
322 uint32 s1,
323 s2,
324 d1,
325 d2;
326
327 s1 = *(const uint32 *) p1;
328 s2 = *((const uint32 *) p1 + 1);
329 d1 = ((const pg_utf_to_local_combined *) p2)->utf1;
330 d2 = ((const pg_utf_to_local_combined *) p2)->utf2;
331 return (s1 > d1 || (s1 == d1 && s2 > d2)) ? 1 : ((s1 == d1 && s2 == d2) ? 0 : -1);
332}
uint32_t uint32
Definition: c.h:502
char * s1
char * s2

References s1, and s2.

Referenced by UtfToLocal().

◆ compare4()

static int compare4 ( const void *  p1,
const void *  p2 
)
static

Definition at line 339 of file conv.c.

340{
341 uint32 v1,
342 v2;
343
344 v1 = *(const uint32 *) p1;
345 v2 = ((const pg_local_to_utf_combined *) p2)->code;
346 return (v1 > v2) ? 1 : ((v1 == v2) ? 0 : -1);
347}

Referenced by LocalToUtf().

◆ latin2mic()

int latin2mic ( const unsigned char *  l,
unsigned char *  p,
int  len,
int  lc,
int  encoding,
bool  noError 
)

Definition at line 89 of file conv.c.

91{
92 const unsigned char *start = l;
93 int c1;
94
95 while (len > 0)
96 {
97 c1 = *l;
98 if (c1 == 0)
99 {
100 if (noError)
101 break;
102 report_invalid_encoding(encoding, (const char *) l, len);
103 }
104 if (IS_HIGHBIT_SET(c1))
105 *p++ = lc;
106 *p++ = c1;
107 l++;
108 len--;
109 }
110 *p = '\0';
111
112 return l - start;
113}
#define IS_HIGHBIT_SET(ch)
Definition: c.h:1126
return str start
void report_invalid_encoding(int encoding, const char *mbstr, int len)
Definition: mbutils.c:1698
const void size_t len
int32 encoding
Definition: pg_database.h:41

References encoding, IS_HIGHBIT_SET, len, report_invalid_encoding(), and start.

Referenced by koi8r_to_mic(), latin1_to_mic(), latin2_to_mic(), latin3_to_mic(), and latin4_to_mic().

◆ latin2mic_with_table()

int latin2mic_with_table ( const unsigned char *  l,
unsigned char *  p,
int  len,
int  lc,
int  encoding,
const unsigned char *  tab,
bool  noError 
)

Definition at line 194 of file conv.c.

201{
202 const unsigned char *start = l;
203 unsigned char c1,
204 c2;
205
206 while (len > 0)
207 {
208 c1 = *l;
209 if (c1 == 0)
210 {
211 if (noError)
212 break;
213 report_invalid_encoding(encoding, (const char *) l, len);
214 }
215 if (!IS_HIGHBIT_SET(c1))
216 *p++ = c1;
217 else
218 {
219 c2 = tab[c1 - HIGHBIT];
220 if (c2)
221 {
222 *p++ = lc;
223 *p++ = c2;
224 }
225 else
226 {
227 if (noError)
228 break;
230 (const char *) l, len);
231 }
232 }
233 l++;
234 len--;
235 }
236 *p = '\0';
237
238 return l - start;
239}
#define HIGHBIT
Definition: c.h:1125
void report_untranslatable_char(int src_encoding, int dest_encoding, const char *mbstr, int len)
Definition: mbutils.c:1730
@ PG_MULE_INTERNAL
Definition: pg_wchar.h:233

References encoding, HIGHBIT, IS_HIGHBIT_SET, len, PG_MULE_INTERNAL, report_invalid_encoding(), report_untranslatable_char(), and start.

Referenced by iso_to_mic(), win1250_to_mic(), win1251_to_mic(), and win866_to_mic().

◆ local2local()

int local2local ( const unsigned char *  l,
unsigned char *  p,
int  len,
int  src_encoding,
int  dest_encoding,
const unsigned char *  tab,
bool  noError 
)

Definition at line 33 of file conv.c.

40{
41 const unsigned char *start = l;
42 unsigned char c1,
43 c2;
44
45 while (len > 0)
46 {
47 c1 = *l;
48 if (c1 == 0)
49 {
50 if (noError)
51 break;
52 report_invalid_encoding(src_encoding, (const char *) l, len);
53 }
54 if (!IS_HIGHBIT_SET(c1))
55 *p++ = c1;
56 else
57 {
58 c2 = tab[c1 - HIGHBIT];
59 if (c2)
60 *p++ = c2;
61 else
62 {
63 if (noError)
64 break;
65 report_untranslatable_char(src_encoding, dest_encoding,
66 (const char *) l, len);
67 }
68 }
69 l++;
70 len--;
71 }
72 *p = '\0';
73
74 return l - start;
75}

References HIGHBIT, IS_HIGHBIT_SET, len, report_invalid_encoding(), report_untranslatable_char(), and start.

Referenced by iso_to_koi8r(), iso_to_win1251(), iso_to_win866(), koi8r_to_iso(), koi8r_to_win1251(), koi8r_to_win866(), latin2_to_win1250(), win1250_to_latin2(), win1251_to_iso(), win1251_to_koi8r(), win1251_to_win866(), win866_to_iso(), win866_to_koi8r(), and win866_to_win1251().

◆ LocalToUtf()

int LocalToUtf ( const unsigned char *  iso,
int  len,
unsigned char *  utf,
const pg_mb_radix_tree map,
const pg_local_to_utf_combined cmap,
int  cmapsize,
utf_local_conversion_func  conv_func,
int  encoding,
bool  noError 
)

Definition at line 717 of file conv.c.

724{
725 uint32 iiso;
726 int l;
727 const pg_local_to_utf_combined *cp;
728 const unsigned char *start = iso;
729
732 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
733 errmsg("invalid encoding number: %d", encoding)));
734
735 for (; len > 0; len -= l)
736 {
737 unsigned char b1 = 0;
738 unsigned char b2 = 0;
739 unsigned char b3 = 0;
740 unsigned char b4 = 0;
741
742 /* "break" cases all represent errors */
743 if (*iso == '\0')
744 break;
745
746 if (!IS_HIGHBIT_SET(*iso))
747 {
748 /* ASCII case is easy, assume it's one-to-one conversion */
749 *utf++ = *iso++;
750 l = 1;
751 continue;
752 }
753
754 l = pg_encoding_verifymbchar(encoding, (const char *) iso, len);
755 if (l < 0)
756 break;
757
758 /* collect coded char of length l */
759 if (l == 1)
760 b4 = *iso++;
761 else if (l == 2)
762 {
763 b3 = *iso++;
764 b4 = *iso++;
765 }
766 else if (l == 3)
767 {
768 b2 = *iso++;
769 b3 = *iso++;
770 b4 = *iso++;
771 }
772 else if (l == 4)
773 {
774 b1 = *iso++;
775 b2 = *iso++;
776 b3 = *iso++;
777 b4 = *iso++;
778 }
779 else
780 {
781 elog(ERROR, "unsupported character length %d", l);
782 iiso = 0; /* keep compiler quiet */
783 }
784 iiso = (b1 << 24 | b2 << 16 | b3 << 8 | b4);
785
786 if (map)
787 {
788 uint32 converted = pg_mb_radix_conv(map, l, b1, b2, b3, b4);
789
790 if (converted)
791 {
792 utf = store_coded_char(utf, converted);
793 continue;
794 }
795
796 /* If there's a combined character map, try that */
797 if (cmap)
798 {
799 cp = bsearch(&iiso, cmap, cmapsize,
801
802 if (cp)
803 {
804 utf = store_coded_char(utf, cp->utf1);
805 utf = store_coded_char(utf, cp->utf2);
806 continue;
807 }
808 }
809 }
810
811 /* if there's a conversion function, try that */
812 if (conv_func)
813 {
814 uint32 converted = (*conv_func) (iiso);
815
816 if (converted)
817 {
818 utf = store_coded_char(utf, converted);
819 continue;
820 }
821 }
822
823 /* failed to translate this character */
824 iso -= l;
825 if (noError)
826 break;
828 (const char *) iso, len);
829 }
830
831 /* if we broke out of loop early, must be invalid input */
832 if (len > 0 && !noError)
833 report_invalid_encoding(encoding, (const char *) iso, len);
834
835 *utf = '\0';
836
837 return iso - start;
838}
static unsigned char * store_coded_char(unsigned char *dest, uint32 code)
Definition: conv.c:353
static uint32 pg_mb_radix_conv(const pg_mb_radix_tree *rt, int l, unsigned char b1, unsigned char b2, unsigned char b3, unsigned char b4)
Definition: conv.c:373
static int compare4(const void *p1, const void *p2)
Definition: conv.c:339
int errcode(int sqlerrcode)
Definition: elog.c:853
int errmsg(const char *fmt,...)
Definition: elog.c:1070
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:225
#define ereport(elevel,...)
Definition: elog.h:149
@ PG_UTF8
Definition: pg_wchar.h:232
#define PG_VALID_ENCODING(_enc)
Definition: pg_wchar.h:287
int pg_encoding_verifymbchar(int encoding, const char *mbstr, int len)
Definition: wchar.c:2150

References compare4(), elog, encoding, ereport, errcode(), errmsg(), ERROR, IS_HIGHBIT_SET, len, pg_encoding_verifymbchar(), pg_mb_radix_conv(), PG_UTF8, PG_VALID_ENCODING, report_invalid_encoding(), report_untranslatable_char(), start, store_coded_char(), pg_local_to_utf_combined::utf1, and pg_local_to_utf_combined::utf2.

Referenced by big5_to_utf8(), euc_cn_to_utf8(), euc_jis_2004_to_utf8(), euc_jp_to_utf8(), euc_kr_to_utf8(), euc_tw_to_utf8(), gb18030_to_utf8(), gbk_to_utf8(), iso8859_to_utf8(), johab_to_utf8(), koi8r_to_utf8(), koi8u_to_utf8(), shift_jis_2004_to_utf8(), sjis_to_utf8(), uhc_to_utf8(), and win_to_utf8().

◆ mic2latin()

int mic2latin ( const unsigned char *  mic,
unsigned char *  p,
int  len,
int  lc,
int  encoding,
bool  noError 
)

Definition at line 127 of file conv.c.

129{
130 const unsigned char *start = mic;
131 int c1;
132
133 while (len > 0)
134 {
135 c1 = *mic;
136 if (c1 == 0)
137 {
138 if (noError)
139 break;
140 report_invalid_encoding(PG_MULE_INTERNAL, (const char *) mic, len);
141 }
142 if (!IS_HIGHBIT_SET(c1))
143 {
144 /* easy for ASCII */
145 *p++ = c1;
146 mic++;
147 len--;
148 }
149 else
150 {
151 int l = pg_mule_mblen(mic);
152
153 if (len < l)
154 {
155 if (noError)
156 break;
157 report_invalid_encoding(PG_MULE_INTERNAL, (const char *) mic,
158 len);
159 }
160 if (l != 2 || c1 != lc || !IS_HIGHBIT_SET(mic[1]))
161 {
162 if (noError)
163 break;
165 (const char *) mic, len);
166 }
167 *p++ = mic[1];
168 mic += 2;
169 len -= 2;
170 }
171 }
172 *p = '\0';
173
174 return mic - start;
175}
int pg_mule_mblen(const unsigned char *s)
Definition: wchar.c:791

References encoding, IS_HIGHBIT_SET, len, PG_MULE_INTERNAL, pg_mule_mblen(), report_invalid_encoding(), report_untranslatable_char(), and start.

Referenced by mic_to_koi8r(), mic_to_latin1(), mic_to_latin2(), mic_to_latin3(), and mic_to_latin4().

◆ mic2latin_with_table()

int mic2latin_with_table ( const unsigned char *  mic,
unsigned char *  p,
int  len,
int  lc,
int  encoding,
const unsigned char *  tab,
bool  noError 
)

Definition at line 257 of file conv.c.

264{
265 const unsigned char *start = mic;
266 unsigned char c1,
267 c2;
268
269 while (len > 0)
270 {
271 c1 = *mic;
272 if (c1 == 0)
273 {
274 if (noError)
275 break;
276 report_invalid_encoding(PG_MULE_INTERNAL, (const char *) mic, len);
277 }
278 if (!IS_HIGHBIT_SET(c1))
279 {
280 /* easy for ASCII */
281 *p++ = c1;
282 mic++;
283 len--;
284 }
285 else
286 {
287 int l = pg_mule_mblen(mic);
288
289 if (len < l)
290 {
291 if (noError)
292 break;
293 report_invalid_encoding(PG_MULE_INTERNAL, (const char *) mic,
294 len);
295 }
296 if (l != 2 || c1 != lc || !IS_HIGHBIT_SET(mic[1]) ||
297 (c2 = tab[mic[1] - HIGHBIT]) == 0)
298 {
299 if (noError)
300 break;
302 (const char *) mic, len);
303 break; /* keep compiler quiet */
304 }
305 *p++ = c2;
306 mic += 2;
307 len -= 2;
308 }
309 }
310 *p = '\0';
311
312 return mic - start;
313}

References encoding, HIGHBIT, IS_HIGHBIT_SET, len, PG_MULE_INTERNAL, pg_mule_mblen(), report_invalid_encoding(), report_untranslatable_char(), and start.

Referenced by mic_to_iso(), mic_to_win1250(), mic_to_win1251(), and mic_to_win866().

◆ pg_mb_radix_conv()

static uint32 pg_mb_radix_conv ( const pg_mb_radix_tree rt,
int  l,
unsigned char  b1,
unsigned char  b2,
unsigned char  b3,
unsigned char  b4 
)
inlinestatic

Definition at line 373 of file conv.c.

379{
380 if (l == 4)
381 {
382 /* 4-byte code */
383
384 /* check code validity */
385 if (b1 < rt->b4_1_lower || b1 > rt->b4_1_upper ||
386 b2 < rt->b4_2_lower || b2 > rt->b4_2_upper ||
387 b3 < rt->b4_3_lower || b3 > rt->b4_3_upper ||
388 b4 < rt->b4_4_lower || b4 > rt->b4_4_upper)
389 return 0;
390
391 /* perform lookup */
392 if (rt->chars32)
393 {
394 uint32 idx = rt->b4root;
395
396 idx = rt->chars32[b1 + idx - rt->b4_1_lower];
397 idx = rt->chars32[b2 + idx - rt->b4_2_lower];
398 idx = rt->chars32[b3 + idx - rt->b4_3_lower];
399 return rt->chars32[b4 + idx - rt->b4_4_lower];
400 }
401 else
402 {
403 uint16 idx = rt->b4root;
404
405 idx = rt->chars16[b1 + idx - rt->b4_1_lower];
406 idx = rt->chars16[b2 + idx - rt->b4_2_lower];
407 idx = rt->chars16[b3 + idx - rt->b4_3_lower];
408 return rt->chars16[b4 + idx - rt->b4_4_lower];
409 }
410 }
411 else if (l == 3)
412 {
413 /* 3-byte code */
414
415 /* check code validity */
416 if (b2 < rt->b3_1_lower || b2 > rt->b3_1_upper ||
417 b3 < rt->b3_2_lower || b3 > rt->b3_2_upper ||
418 b4 < rt->b3_3_lower || b4 > rt->b3_3_upper)
419 return 0;
420
421 /* perform lookup */
422 if (rt->chars32)
423 {
424 uint32 idx = rt->b3root;
425
426 idx = rt->chars32[b2 + idx - rt->b3_1_lower];
427 idx = rt->chars32[b3 + idx - rt->b3_2_lower];
428 return rt->chars32[b4 + idx - rt->b3_3_lower];
429 }
430 else
431 {
432 uint16 idx = rt->b3root;
433
434 idx = rt->chars16[b2 + idx - rt->b3_1_lower];
435 idx = rt->chars16[b3 + idx - rt->b3_2_lower];
436 return rt->chars16[b4 + idx - rt->b3_3_lower];
437 }
438 }
439 else if (l == 2)
440 {
441 /* 2-byte code */
442
443 /* check code validity - first byte */
444 if (b3 < rt->b2_1_lower || b3 > rt->b2_1_upper ||
445 b4 < rt->b2_2_lower || b4 > rt->b2_2_upper)
446 return 0;
447
448 /* perform lookup */
449 if (rt->chars32)
450 {
451 uint32 idx = rt->b2root;
452
453 idx = rt->chars32[b3 + idx - rt->b2_1_lower];
454 return rt->chars32[b4 + idx - rt->b2_2_lower];
455 }
456 else
457 {
458 uint16 idx = rt->b2root;
459
460 idx = rt->chars16[b3 + idx - rt->b2_1_lower];
461 return rt->chars16[b4 + idx - rt->b2_2_lower];
462 }
463 }
464 else if (l == 1)
465 {
466 /* 1-byte code */
467
468 /* check code validity - first byte */
469 if (b4 < rt->b1_lower || b4 > rt->b1_upper)
470 return 0;
471
472 /* perform lookup */
473 if (rt->chars32)
474 return rt->chars32[b4 + rt->b1root - rt->b1_lower];
475 else
476 return rt->chars16[b4 + rt->b1root - rt->b1_lower];
477 }
478 return 0; /* shouldn't happen */
479}
Datum idx(PG_FUNCTION_ARGS)
Definition: _int_op.c:259
uint16_t uint16
Definition: c.h:501
uint8 b4_3_lower
Definition: pg_wchar.h:467
uint8 b4_1_lower
Definition: pg_wchar.h:463
const uint32 * chars32
Definition: pg_wchar.h:438
uint8 b2_2_lower
Definition: pg_wchar.h:449
uint8 b4_4_upper
Definition: pg_wchar.h:470
uint8 b4_2_lower
Definition: pg_wchar.h:465
uint8 b2_1_upper
Definition: pg_wchar.h:448
uint8 b4_3_upper
Definition: pg_wchar.h:468
uint8 b3_1_lower
Definition: pg_wchar.h:454
uint8 b3_3_lower
Definition: pg_wchar.h:458
uint8 b3_3_upper
Definition: pg_wchar.h:459
uint8 b2_2_upper
Definition: pg_wchar.h:450
uint8 b3_1_upper
Definition: pg_wchar.h:455
uint8 b4_4_lower
Definition: pg_wchar.h:469
const uint16 * chars16
Definition: pg_wchar.h:437
uint8 b4_1_upper
Definition: pg_wchar.h:464
uint8 b2_1_lower
Definition: pg_wchar.h:447
uint8 b4_2_upper
Definition: pg_wchar.h:466
uint8 b3_2_upper
Definition: pg_wchar.h:457
uint8 b3_2_lower
Definition: pg_wchar.h:456

References pg_mb_radix_tree::b1_lower, pg_mb_radix_tree::b1_upper, pg_mb_radix_tree::b1root, pg_mb_radix_tree::b2_1_lower, pg_mb_radix_tree::b2_1_upper, pg_mb_radix_tree::b2_2_lower, pg_mb_radix_tree::b2_2_upper, pg_mb_radix_tree::b2root, pg_mb_radix_tree::b3_1_lower, pg_mb_radix_tree::b3_1_upper, pg_mb_radix_tree::b3_2_lower, pg_mb_radix_tree::b3_2_upper, pg_mb_radix_tree::b3_3_lower, pg_mb_radix_tree::b3_3_upper, pg_mb_radix_tree::b3root, pg_mb_radix_tree::b4_1_lower, pg_mb_radix_tree::b4_1_upper, pg_mb_radix_tree::b4_2_lower, pg_mb_radix_tree::b4_2_upper, pg_mb_radix_tree::b4_3_lower, pg_mb_radix_tree::b4_3_upper, pg_mb_radix_tree::b4_4_lower, pg_mb_radix_tree::b4_4_upper, pg_mb_radix_tree::b4root, pg_mb_radix_tree::chars16, pg_mb_radix_tree::chars32, and idx().

Referenced by LocalToUtf(), and UtfToLocal().

◆ store_coded_char()

static unsigned char * store_coded_char ( unsigned char *  dest,
uint32  code 
)
inlinestatic

Definition at line 353 of file conv.c.

354{
355 if (code & 0xff000000)
356 *dest++ = code >> 24;
357 if (code & 0x00ff0000)
358 *dest++ = code >> 16;
359 if (code & 0x0000ff00)
360 *dest++ = code >> 8;
361 if (code & 0x000000ff)
362 *dest++ = code;
363 return dest;
364}

References generate_unaccent_rules::dest.

Referenced by LocalToUtf(), and UtfToLocal().

◆ UtfToLocal()

int UtfToLocal ( const unsigned char *  utf,
int  len,
unsigned char *  iso,
const pg_mb_radix_tree map,
const pg_utf_to_local_combined cmap,
int  cmapsize,
utf_local_conversion_func  conv_func,
int  encoding,
bool  noError 
)

Definition at line 507 of file conv.c.

513{
514 uint32 iutf;
515 int l;
516 const pg_utf_to_local_combined *cp;
517 const unsigned char *start = utf;
518
521 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
522 errmsg("invalid encoding number: %d", encoding)));
523
524 for (; len > 0; len -= l)
525 {
526 unsigned char b1 = 0;
527 unsigned char b2 = 0;
528 unsigned char b3 = 0;
529 unsigned char b4 = 0;
530
531 /* "break" cases all represent errors */
532 if (*utf == '\0')
533 break;
534
535 l = pg_utf_mblen(utf);
536 if (len < l)
537 break;
538
539 if (!pg_utf8_islegal(utf, l))
540 break;
541
542 if (l == 1)
543 {
544 /* ASCII case is easy, assume it's one-to-one conversion */
545 *iso++ = *utf++;
546 continue;
547 }
548
549 /* collect coded char of length l */
550 if (l == 2)
551 {
552 b3 = *utf++;
553 b4 = *utf++;
554 }
555 else if (l == 3)
556 {
557 b2 = *utf++;
558 b3 = *utf++;
559 b4 = *utf++;
560 }
561 else if (l == 4)
562 {
563 b1 = *utf++;
564 b2 = *utf++;
565 b3 = *utf++;
566 b4 = *utf++;
567 }
568 else
569 {
570 elog(ERROR, "unsupported character length %d", l);
571 iutf = 0; /* keep compiler quiet */
572 }
573 iutf = (b1 << 24 | b2 << 16 | b3 << 8 | b4);
574
575 /* First, try with combined map if possible */
576 if (cmap && len > l)
577 {
578 const unsigned char *utf_save = utf;
579 int len_save = len;
580 int l_save = l;
581
582 /* collect next character, same as above */
583 len -= l;
584
585 l = pg_utf_mblen(utf);
586 if (len < l)
587 {
588 /* need more data to decide if this is a combined char */
589 utf -= l_save;
590 break;
591 }
592
593 if (!pg_utf8_islegal(utf, l))
594 {
595 if (!noError)
596 report_invalid_encoding(PG_UTF8, (const char *) utf, len);
597 utf -= l_save;
598 break;
599 }
600
601 /* We assume ASCII character cannot be in combined map */
602 if (l > 1)
603 {
604 uint32 iutf2;
605 uint32 cutf[2];
606
607 if (l == 2)
608 {
609 iutf2 = *utf++ << 8;
610 iutf2 |= *utf++;
611 }
612 else if (l == 3)
613 {
614 iutf2 = *utf++ << 16;
615 iutf2 |= *utf++ << 8;
616 iutf2 |= *utf++;
617 }
618 else if (l == 4)
619 {
620 iutf2 = *utf++ << 24;
621 iutf2 |= *utf++ << 16;
622 iutf2 |= *utf++ << 8;
623 iutf2 |= *utf++;
624 }
625 else
626 {
627 elog(ERROR, "unsupported character length %d", l);
628 iutf2 = 0; /* keep compiler quiet */
629 }
630
631 cutf[0] = iutf;
632 cutf[1] = iutf2;
633
634 cp = bsearch(cutf, cmap, cmapsize,
636
637 if (cp)
638 {
639 iso = store_coded_char(iso, cp->code);
640 continue;
641 }
642 }
643
644 /* fail, so back up to reprocess second character next time */
645 utf = utf_save;
646 len = len_save;
647 l = l_save;
648 }
649
650 /* Now check ordinary map */
651 if (map)
652 {
653 uint32 converted = pg_mb_radix_conv(map, l, b1, b2, b3, b4);
654
655 if (converted)
656 {
657 iso = store_coded_char(iso, converted);
658 continue;
659 }
660 }
661
662 /* if there's a conversion function, try that */
663 if (conv_func)
664 {
665 uint32 converted = (*conv_func) (iutf);
666
667 if (converted)
668 {
669 iso = store_coded_char(iso, converted);
670 continue;
671 }
672 }
673
674 /* failed to translate this character */
675 utf -= l;
676 if (noError)
677 break;
679 (const char *) utf, len);
680 }
681
682 /* if we broke out of loop early, must be invalid input */
683 if (len > 0 && !noError)
684 report_invalid_encoding(PG_UTF8, (const char *) utf, len);
685
686 *iso = '\0';
687
688 return utf - start;
689}
static int compare3(const void *p1, const void *p2)
Definition: conv.c:320
#define pg_utf_mblen
Definition: pg_wchar.h:633
bool pg_utf8_islegal(const unsigned char *source, int length)
Definition: wchar.c:1987

References pg_utf_to_local_combined::code, compare3(), elog, encoding, ereport, errcode(), errmsg(), ERROR, len, pg_mb_radix_conv(), PG_UTF8, pg_utf8_islegal(), pg_utf_mblen, PG_VALID_ENCODING, report_invalid_encoding(), report_untranslatable_char(), start, and store_coded_char().

Referenced by utf8_to_big5(), utf8_to_euc_cn(), utf8_to_euc_jis_2004(), utf8_to_euc_jp(), utf8_to_euc_kr(), utf8_to_euc_tw(), utf8_to_gb18030(), utf8_to_gbk(), utf8_to_iso8859(), utf8_to_johab(), utf8_to_koi8r(), utf8_to_koi8u(), utf8_to_shift_jis_2004(), utf8_to_sjis(), utf8_to_uhc(), and utf8_to_win().