PostgreSQL Source Code git master
conv.c
Go to the documentation of this file.
1/*-------------------------------------------------------------------------
2 *
3 * Utility functions for conversion procs.
4 *
5 * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
6 * Portions Copyright (c) 1994, Regents of the University of California
7 *
8 * IDENTIFICATION
9 * src/backend/utils/mb/conv.c
10 *
11 *-------------------------------------------------------------------------
12 */
13#include "postgres.h"
14#include "mb/pg_wchar.h"
15
16
17/*
18 * local2local: a generic single byte charset encoding
19 * conversion between two ASCII-superset encodings.
20 *
21 * l points to the source string of length len
22 * p is the output area (must be large enough!)
23 * src_encoding is the PG identifier for the source encoding
24 * dest_encoding is the PG identifier for the target encoding
25 * tab holds conversion entries for the source charset
26 * starting from 128 (0x80). each entry in the table holds the corresponding
27 * code point for the target charset, or 0 if there is no equivalent code.
28 *
29 * Returns the number of input bytes consumed. If noError is true, this can
30 * be less than 'len'.
31 */
32int
33local2local(const unsigned char *l,
34 unsigned char *p,
35 int len,
36 int src_encoding,
37 int dest_encoding,
38 const unsigned char *tab,
39 bool noError)
40{
41 const unsigned char *start = l;
42 unsigned char c1,
43 c2;
44
45 while (len > 0)
46 {
47 c1 = *l;
48 if (c1 == 0)
49 {
50 if (noError)
51 break;
52 report_invalid_encoding(src_encoding, (const char *) l, len);
53 }
54 if (!IS_HIGHBIT_SET(c1))
55 *p++ = c1;
56 else
57 {
58 c2 = tab[c1 - HIGHBIT];
59 if (c2)
60 *p++ = c2;
61 else
62 {
63 if (noError)
64 break;
65 report_untranslatable_char(src_encoding, dest_encoding,
66 (const char *) l, len);
67 }
68 }
69 l++;
70 len--;
71 }
72 *p = '\0';
73
74 return l - start;
75}
76
77/*
78 * LATINn ---> MIC when the charset's local codes map directly to MIC
79 *
80 * l points to the source string of length len
81 * p is the output area (must be large enough!)
82 * lc is the mule character set id for the local encoding
83 * encoding is the PG identifier for the local encoding
84 *
85 * Returns the number of input bytes consumed. If noError is true, this can
86 * be less than 'len'.
87 */
88int
89latin2mic(const unsigned char *l, unsigned char *p, int len,
90 int lc, int encoding, bool noError)
91{
92 const unsigned char *start = l;
93 int c1;
94
95 while (len > 0)
96 {
97 c1 = *l;
98 if (c1 == 0)
99 {
100 if (noError)
101 break;
102 report_invalid_encoding(encoding, (const char *) l, len);
103 }
104 if (IS_HIGHBIT_SET(c1))
105 *p++ = lc;
106 *p++ = c1;
107 l++;
108 len--;
109 }
110 *p = '\0';
111
112 return l - start;
113}
114
115/*
116 * MIC ---> LATINn when the charset's local codes map directly to MIC
117 *
118 * mic points to the source string of length len
119 * p is the output area (must be large enough!)
120 * lc is the mule character set id for the local encoding
121 * encoding is the PG identifier for the local encoding
122 *
123 * Returns the number of input bytes consumed. If noError is true, this can
124 * be less than 'len'.
125 */
126int
127mic2latin(const unsigned char *mic, unsigned char *p, int len,
128 int lc, int encoding, bool noError)
129{
130 const unsigned char *start = mic;
131 int c1;
132
133 while (len > 0)
134 {
135 c1 = *mic;
136 if (c1 == 0)
137 {
138 if (noError)
139 break;
140 report_invalid_encoding(PG_MULE_INTERNAL, (const char *) mic, len);
141 }
142 if (!IS_HIGHBIT_SET(c1))
143 {
144 /* easy for ASCII */
145 *p++ = c1;
146 mic++;
147 len--;
148 }
149 else
150 {
151 int l = pg_mule_mblen(mic);
152
153 if (len < l)
154 {
155 if (noError)
156 break;
157 report_invalid_encoding(PG_MULE_INTERNAL, (const char *) mic,
158 len);
159 }
160 if (l != 2 || c1 != lc || !IS_HIGHBIT_SET(mic[1]))
161 {
162 if (noError)
163 break;
165 (const char *) mic, len);
166 }
167 *p++ = mic[1];
168 mic += 2;
169 len -= 2;
170 }
171 }
172 *p = '\0';
173
174 return mic - start;
175}
176
177
178/*
179 * latin2mic_with_table: a generic single byte charset encoding
180 * conversion from a local charset to the mule internal code.
181 *
182 * l points to the source string of length len
183 * p is the output area (must be large enough!)
184 * lc is the mule character set id for the local encoding
185 * encoding is the PG identifier for the local encoding
186 * tab holds conversion entries for the local charset
187 * starting from 128 (0x80). each entry in the table holds the corresponding
188 * code point for the mule encoding, or 0 if there is no equivalent code.
189 *
190 * Returns the number of input bytes consumed. If noError is true, this can
191 * be less than 'len'.
192 */
193int
194latin2mic_with_table(const unsigned char *l,
195 unsigned char *p,
196 int len,
197 int lc,
198 int encoding,
199 const unsigned char *tab,
200 bool noError)
201{
202 const unsigned char *start = l;
203 unsigned char c1,
204 c2;
205
206 while (len > 0)
207 {
208 c1 = *l;
209 if (c1 == 0)
210 {
211 if (noError)
212 break;
213 report_invalid_encoding(encoding, (const char *) l, len);
214 }
215 if (!IS_HIGHBIT_SET(c1))
216 *p++ = c1;
217 else
218 {
219 c2 = tab[c1 - HIGHBIT];
220 if (c2)
221 {
222 *p++ = lc;
223 *p++ = c2;
224 }
225 else
226 {
227 if (noError)
228 break;
230 (const char *) l, len);
231 }
232 }
233 l++;
234 len--;
235 }
236 *p = '\0';
237
238 return l - start;
239}
240
241/*
242 * mic2latin_with_table: a generic single byte charset encoding
243 * conversion from the mule internal code to a local charset.
244 *
245 * mic points to the source string of length len
246 * p is the output area (must be large enough!)
247 * lc is the mule character set id for the local encoding
248 * encoding is the PG identifier for the local encoding
249 * tab holds conversion entries for the mule internal code's second byte,
250 * starting from 128 (0x80). each entry in the table holds the corresponding
251 * code point for the local charset, or 0 if there is no equivalent code.
252 *
253 * Returns the number of input bytes consumed. If noError is true, this can
254 * be less than 'len'.
255 */
256int
257mic2latin_with_table(const unsigned char *mic,
258 unsigned char *p,
259 int len,
260 int lc,
261 int encoding,
262 const unsigned char *tab,
263 bool noError)
264{
265 const unsigned char *start = mic;
266 unsigned char c1,
267 c2;
268
269 while (len > 0)
270 {
271 c1 = *mic;
272 if (c1 == 0)
273 {
274 if (noError)
275 break;
276 report_invalid_encoding(PG_MULE_INTERNAL, (const char *) mic, len);
277 }
278 if (!IS_HIGHBIT_SET(c1))
279 {
280 /* easy for ASCII */
281 *p++ = c1;
282 mic++;
283 len--;
284 }
285 else
286 {
287 int l = pg_mule_mblen(mic);
288
289 if (len < l)
290 {
291 if (noError)
292 break;
293 report_invalid_encoding(PG_MULE_INTERNAL, (const char *) mic,
294 len);
295 }
296 if (l != 2 || c1 != lc || !IS_HIGHBIT_SET(mic[1]) ||
297 (c2 = tab[mic[1] - HIGHBIT]) == 0)
298 {
299 if (noError)
300 break;
302 (const char *) mic, len);
303 break; /* keep compiler quiet */
304 }
305 *p++ = c2;
306 mic += 2;
307 len -= 2;
308 }
309 }
310 *p = '\0';
311
312 return mic - start;
313}
314
315/*
316 * comparison routine for bsearch()
317 * this routine is intended for combined UTF8 -> local code
318 */
319static int
320compare3(const void *p1, const void *p2)
321{
322 uint32 s1,
323 s2,
324 d1,
325 d2;
326
327 s1 = *(const uint32 *) p1;
328 s2 = *((const uint32 *) p1 + 1);
329 d1 = ((const pg_utf_to_local_combined *) p2)->utf1;
330 d2 = ((const pg_utf_to_local_combined *) p2)->utf2;
331 return (s1 > d1 || (s1 == d1 && s2 > d2)) ? 1 : ((s1 == d1 && s2 == d2) ? 0 : -1);
332}
333
334/*
335 * comparison routine for bsearch()
336 * this routine is intended for local code -> combined UTF8
337 */
338static int
339compare4(const void *p1, const void *p2)
340{
341 uint32 v1,
342 v2;
343
344 v1 = *(const uint32 *) p1;
345 v2 = ((const pg_local_to_utf_combined *) p2)->code;
346 return (v1 > v2) ? 1 : ((v1 == v2) ? 0 : -1);
347}
348
349/*
350 * store 32bit character representation into multibyte stream
351 */
352static inline unsigned char *
353store_coded_char(unsigned char *dest, uint32 code)
354{
355 if (code & 0xff000000)
356 *dest++ = code >> 24;
357 if (code & 0x00ff0000)
358 *dest++ = code >> 16;
359 if (code & 0x0000ff00)
360 *dest++ = code >> 8;
361 if (code & 0x000000ff)
362 *dest++ = code;
363 return dest;
364}
365
366/*
367 * Convert a character using a conversion radix tree.
368 *
369 * 'l' is the length of the input character in bytes, and b1-b4 are
370 * the input character's bytes.
371 */
372static inline uint32
374 int l,
375 unsigned char b1,
376 unsigned char b2,
377 unsigned char b3,
378 unsigned char b4)
379{
380 if (l == 4)
381 {
382 /* 4-byte code */
383
384 /* check code validity */
385 if (b1 < rt->b4_1_lower || b1 > rt->b4_1_upper ||
386 b2 < rt->b4_2_lower || b2 > rt->b4_2_upper ||
387 b3 < rt->b4_3_lower || b3 > rt->b4_3_upper ||
388 b4 < rt->b4_4_lower || b4 > rt->b4_4_upper)
389 return 0;
390
391 /* perform lookup */
392 if (rt->chars32)
393 {
394 uint32 idx = rt->b4root;
395
396 idx = rt->chars32[b1 + idx - rt->b4_1_lower];
397 idx = rt->chars32[b2 + idx - rt->b4_2_lower];
398 idx = rt->chars32[b3 + idx - rt->b4_3_lower];
399 return rt->chars32[b4 + idx - rt->b4_4_lower];
400 }
401 else
402 {
403 uint16 idx = rt->b4root;
404
405 idx = rt->chars16[b1 + idx - rt->b4_1_lower];
406 idx = rt->chars16[b2 + idx - rt->b4_2_lower];
407 idx = rt->chars16[b3 + idx - rt->b4_3_lower];
408 return rt->chars16[b4 + idx - rt->b4_4_lower];
409 }
410 }
411 else if (l == 3)
412 {
413 /* 3-byte code */
414
415 /* check code validity */
416 if (b2 < rt->b3_1_lower || b2 > rt->b3_1_upper ||
417 b3 < rt->b3_2_lower || b3 > rt->b3_2_upper ||
418 b4 < rt->b3_3_lower || b4 > rt->b3_3_upper)
419 return 0;
420
421 /* perform lookup */
422 if (rt->chars32)
423 {
424 uint32 idx = rt->b3root;
425
426 idx = rt->chars32[b2 + idx - rt->b3_1_lower];
427 idx = rt->chars32[b3 + idx - rt->b3_2_lower];
428 return rt->chars32[b4 + idx - rt->b3_3_lower];
429 }
430 else
431 {
432 uint16 idx = rt->b3root;
433
434 idx = rt->chars16[b2 + idx - rt->b3_1_lower];
435 idx = rt->chars16[b3 + idx - rt->b3_2_lower];
436 return rt->chars16[b4 + idx - rt->b3_3_lower];
437 }
438 }
439 else if (l == 2)
440 {
441 /* 2-byte code */
442
443 /* check code validity - first byte */
444 if (b3 < rt->b2_1_lower || b3 > rt->b2_1_upper ||
445 b4 < rt->b2_2_lower || b4 > rt->b2_2_upper)
446 return 0;
447
448 /* perform lookup */
449 if (rt->chars32)
450 {
451 uint32 idx = rt->b2root;
452
453 idx = rt->chars32[b3 + idx - rt->b2_1_lower];
454 return rt->chars32[b4 + idx - rt->b2_2_lower];
455 }
456 else
457 {
458 uint16 idx = rt->b2root;
459
460 idx = rt->chars16[b3 + idx - rt->b2_1_lower];
461 return rt->chars16[b4 + idx - rt->b2_2_lower];
462 }
463 }
464 else if (l == 1)
465 {
466 /* 1-byte code */
467
468 /* check code validity - first byte */
469 if (b4 < rt->b1_lower || b4 > rt->b1_upper)
470 return 0;
471
472 /* perform lookup */
473 if (rt->chars32)
474 return rt->chars32[b4 + rt->b1root - rt->b1_lower];
475 else
476 return rt->chars16[b4 + rt->b1root - rt->b1_lower];
477 }
478 return 0; /* shouldn't happen */
479}
480
481/*
482 * UTF8 ---> local code
483 *
484 * utf: input string in UTF8 encoding (need not be null-terminated)
485 * len: length of input string (in bytes)
486 * iso: pointer to the output area (must be large enough!)
487 (output string will be null-terminated)
488 * map: conversion map for single characters
489 * cmap: conversion map for combined characters
490 * (optional, pass NULL if none)
491 * cmapsize: number of entries in the conversion map for combined characters
492 * (optional, pass 0 if none)
493 * conv_func: algorithmic encoding conversion function
494 * (optional, pass NULL if none)
495 * encoding: PG identifier for the local encoding
496 *
497 * For each character, the cmap (if provided) is consulted first; if no match,
498 * the map is consulted next; if still no match, the conv_func (if provided)
499 * is applied. An error is raised if no match is found.
500 *
501 * See pg_wchar.h for more details about the data structures used here.
502 *
503 * Returns the number of input bytes consumed. If noError is true, this can
504 * be less than 'len'.
505 */
506int
507UtfToLocal(const unsigned char *utf, int len,
508 unsigned char *iso,
509 const pg_mb_radix_tree *map,
510 const pg_utf_to_local_combined *cmap, int cmapsize,
512 int encoding, bool noError)
513{
514 uint32 iutf;
515 int l;
516 const pg_utf_to_local_combined *cp;
517 const unsigned char *start = utf;
518
521 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
522 errmsg("invalid encoding number: %d", encoding)));
523
524 for (; len > 0; len -= l)
525 {
526 unsigned char b1 = 0;
527 unsigned char b2 = 0;
528 unsigned char b3 = 0;
529 unsigned char b4 = 0;
530
531 /* "break" cases all represent errors */
532 if (*utf == '\0')
533 break;
534
535 l = pg_utf_mblen(utf);
536 if (len < l)
537 break;
538
539 if (!pg_utf8_islegal(utf, l))
540 break;
541
542 if (l == 1)
543 {
544 /* ASCII case is easy, assume it's one-to-one conversion */
545 *iso++ = *utf++;
546 continue;
547 }
548
549 /* collect coded char of length l */
550 if (l == 2)
551 {
552 b3 = *utf++;
553 b4 = *utf++;
554 }
555 else if (l == 3)
556 {
557 b2 = *utf++;
558 b3 = *utf++;
559 b4 = *utf++;
560 }
561 else if (l == 4)
562 {
563 b1 = *utf++;
564 b2 = *utf++;
565 b3 = *utf++;
566 b4 = *utf++;
567 }
568 else
569 {
570 elog(ERROR, "unsupported character length %d", l);
571 iutf = 0; /* keep compiler quiet */
572 }
573 iutf = (b1 << 24 | b2 << 16 | b3 << 8 | b4);
574
575 /* First, try with combined map if possible */
576 if (cmap && len > l)
577 {
578 const unsigned char *utf_save = utf;
579 int len_save = len;
580 int l_save = l;
581
582 /* collect next character, same as above */
583 len -= l;
584
585 l = pg_utf_mblen(utf);
586 if (len < l)
587 {
588 /* need more data to decide if this is a combined char */
589 utf -= l_save;
590 break;
591 }
592
593 if (!pg_utf8_islegal(utf, l))
594 {
595 if (!noError)
596 report_invalid_encoding(PG_UTF8, (const char *) utf, len);
597 utf -= l_save;
598 break;
599 }
600
601 /* We assume ASCII character cannot be in combined map */
602 if (l > 1)
603 {
604 uint32 iutf2;
605 uint32 cutf[2];
606
607 if (l == 2)
608 {
609 iutf2 = *utf++ << 8;
610 iutf2 |= *utf++;
611 }
612 else if (l == 3)
613 {
614 iutf2 = *utf++ << 16;
615 iutf2 |= *utf++ << 8;
616 iutf2 |= *utf++;
617 }
618 else if (l == 4)
619 {
620 iutf2 = *utf++ << 24;
621 iutf2 |= *utf++ << 16;
622 iutf2 |= *utf++ << 8;
623 iutf2 |= *utf++;
624 }
625 else
626 {
627 elog(ERROR, "unsupported character length %d", l);
628 iutf2 = 0; /* keep compiler quiet */
629 }
630
631 cutf[0] = iutf;
632 cutf[1] = iutf2;
633
634 cp = bsearch(cutf, cmap, cmapsize,
636
637 if (cp)
638 {
639 iso = store_coded_char(iso, cp->code);
640 continue;
641 }
642 }
643
644 /* fail, so back up to reprocess second character next time */
645 utf = utf_save;
646 len = len_save;
647 l = l_save;
648 }
649
650 /* Now check ordinary map */
651 if (map)
652 {
653 uint32 converted = pg_mb_radix_conv(map, l, b1, b2, b3, b4);
654
655 if (converted)
656 {
657 iso = store_coded_char(iso, converted);
658 continue;
659 }
660 }
661
662 /* if there's a conversion function, try that */
663 if (conv_func)
664 {
665 uint32 converted = (*conv_func) (iutf);
666
667 if (converted)
668 {
669 iso = store_coded_char(iso, converted);
670 continue;
671 }
672 }
673
674 /* failed to translate this character */
675 utf -= l;
676 if (noError)
677 break;
679 (const char *) utf, len);
680 }
681
682 /* if we broke out of loop early, must be invalid input */
683 if (len > 0 && !noError)
684 report_invalid_encoding(PG_UTF8, (const char *) utf, len);
685
686 *iso = '\0';
687
688 return utf - start;
689}
690
691/*
692 * local code ---> UTF8
693 *
694 * iso: input string in local encoding (need not be null-terminated)
695 * len: length of input string (in bytes)
696 * utf: pointer to the output area (must be large enough!)
697 (output string will be null-terminated)
698 * map: conversion map for single characters
699 * cmap: conversion map for combined characters
700 * (optional, pass NULL if none)
701 * cmapsize: number of entries in the conversion map for combined characters
702 * (optional, pass 0 if none)
703 * conv_func: algorithmic encoding conversion function
704 * (optional, pass NULL if none)
705 * encoding: PG identifier for the local encoding
706 *
707 * For each character, the map is consulted first; if no match, the cmap
708 * (if provided) is consulted next; if still no match, the conv_func
709 * (if provided) is applied. An error is raised if no match is found.
710 *
711 * See pg_wchar.h for more details about the data structures used here.
712 *
713 * Returns the number of input bytes consumed. If noError is true, this can
714 * be less than 'len'.
715 */
716int
717LocalToUtf(const unsigned char *iso, int len,
718 unsigned char *utf,
719 const pg_mb_radix_tree *map,
720 const pg_local_to_utf_combined *cmap, int cmapsize,
722 int encoding,
723 bool noError)
724{
725 uint32 iiso;
726 int l;
727 const pg_local_to_utf_combined *cp;
728 const unsigned char *start = iso;
729
732 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
733 errmsg("invalid encoding number: %d", encoding)));
734
735 for (; len > 0; len -= l)
736 {
737 unsigned char b1 = 0;
738 unsigned char b2 = 0;
739 unsigned char b3 = 0;
740 unsigned char b4 = 0;
741
742 /* "break" cases all represent errors */
743 if (*iso == '\0')
744 break;
745
746 if (!IS_HIGHBIT_SET(*iso))
747 {
748 /* ASCII case is easy, assume it's one-to-one conversion */
749 *utf++ = *iso++;
750 l = 1;
751 continue;
752 }
753
754 l = pg_encoding_verifymbchar(encoding, (const char *) iso, len);
755 if (l < 0)
756 break;
757
758 /* collect coded char of length l */
759 if (l == 1)
760 b4 = *iso++;
761 else if (l == 2)
762 {
763 b3 = *iso++;
764 b4 = *iso++;
765 }
766 else if (l == 3)
767 {
768 b2 = *iso++;
769 b3 = *iso++;
770 b4 = *iso++;
771 }
772 else if (l == 4)
773 {
774 b1 = *iso++;
775 b2 = *iso++;
776 b3 = *iso++;
777 b4 = *iso++;
778 }
779 else
780 {
781 elog(ERROR, "unsupported character length %d", l);
782 iiso = 0; /* keep compiler quiet */
783 }
784 iiso = (b1 << 24 | b2 << 16 | b3 << 8 | b4);
785
786 if (map)
787 {
788 uint32 converted = pg_mb_radix_conv(map, l, b1, b2, b3, b4);
789
790 if (converted)
791 {
792 utf = store_coded_char(utf, converted);
793 continue;
794 }
795
796 /* If there's a combined character map, try that */
797 if (cmap)
798 {
799 cp = bsearch(&iiso, cmap, cmapsize,
801
802 if (cp)
803 {
804 utf = store_coded_char(utf, cp->utf1);
805 utf = store_coded_char(utf, cp->utf2);
806 continue;
807 }
808 }
809 }
810
811 /* if there's a conversion function, try that */
812 if (conv_func)
813 {
814 uint32 converted = (*conv_func) (iiso);
815
816 if (converted)
817 {
818 utf = store_coded_char(utf, converted);
819 continue;
820 }
821 }
822
823 /* failed to translate this character */
824 iso -= l;
825 if (noError)
826 break;
828 (const char *) iso, len);
829 }
830
831 /* if we broke out of loop early, must be invalid input */
832 if (len > 0 && !noError)
833 report_invalid_encoding(encoding, (const char *) iso, len);
834
835 *utf = '\0';
836
837 return iso - start;
838}
Datum idx(PG_FUNCTION_ARGS)
Definition: _int_op.c:259
#define IS_HIGHBIT_SET(ch)
Definition: c.h:1109
uint16_t uint16
Definition: c.h:484
uint32_t uint32
Definition: c.h:485
#define HIGHBIT
Definition: c.h:1108
int mic2latin_with_table(const unsigned char *mic, unsigned char *p, int len, int lc, int encoding, const unsigned char *tab, bool noError)
Definition: conv.c:257
int UtfToLocal(const unsigned char *utf, int len, unsigned char *iso, const pg_mb_radix_tree *map, const pg_utf_to_local_combined *cmap, int cmapsize, utf_local_conversion_func conv_func, int encoding, bool noError)
Definition: conv.c:507
static int compare3(const void *p1, const void *p2)
Definition: conv.c:320
int latin2mic_with_table(const unsigned char *l, unsigned char *p, int len, int lc, int encoding, const unsigned char *tab, bool noError)
Definition: conv.c:194
static unsigned char * store_coded_char(unsigned char *dest, uint32 code)
Definition: conv.c:353
int mic2latin(const unsigned char *mic, unsigned char *p, int len, int lc, int encoding, bool noError)
Definition: conv.c:127
static uint32 pg_mb_radix_conv(const pg_mb_radix_tree *rt, int l, unsigned char b1, unsigned char b2, unsigned char b3, unsigned char b4)
Definition: conv.c:373
int LocalToUtf(const unsigned char *iso, int len, unsigned char *utf, const pg_mb_radix_tree *map, const pg_local_to_utf_combined *cmap, int cmapsize, utf_local_conversion_func conv_func, int encoding, bool noError)
Definition: conv.c:717
static int compare4(const void *p1, const void *p2)
Definition: conv.c:339
int local2local(const unsigned char *l, unsigned char *p, int len, int src_encoding, int dest_encoding, const unsigned char *tab, bool noError)
Definition: conv.c:33
int latin2mic(const unsigned char *l, unsigned char *p, int len, int lc, int encoding, bool noError)
Definition: conv.c:89
int errcode(int sqlerrcode)
Definition: elog.c:853
int errmsg(const char *fmt,...)
Definition: elog.c:1070
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:225
#define ereport(elevel,...)
Definition: elog.h:149
return str start
void report_untranslatable_char(int src_encoding, int dest_encoding, const char *mbstr, int len)
Definition: mbutils.c:1730
void report_invalid_encoding(int encoding, const char *mbstr, int len)
Definition: mbutils.c:1698
const void size_t len
int32 encoding
Definition: pg_database.h:41
#define pg_utf_mblen
Definition: pg_wchar.h:633
@ PG_MULE_INTERNAL
Definition: pg_wchar.h:233
@ PG_UTF8
Definition: pg_wchar.h:232
uint32(* utf_local_conversion_func)(uint32 code)
Definition: pg_wchar.h:499
#define PG_VALID_ENCODING(_enc)
Definition: pg_wchar.h:287
char * s1
char * s2
uint8 b4_3_lower
Definition: pg_wchar.h:467
uint8 b4_1_lower
Definition: pg_wchar.h:463
const uint32 * chars32
Definition: pg_wchar.h:438
uint8 b2_2_lower
Definition: pg_wchar.h:449
uint8 b4_4_upper
Definition: pg_wchar.h:470
uint8 b4_2_lower
Definition: pg_wchar.h:465
uint8 b2_1_upper
Definition: pg_wchar.h:448
uint8 b4_3_upper
Definition: pg_wchar.h:468
uint8 b3_1_lower
Definition: pg_wchar.h:454
uint8 b3_3_lower
Definition: pg_wchar.h:458
uint8 b3_3_upper
Definition: pg_wchar.h:459
uint8 b2_2_upper
Definition: pg_wchar.h:450
uint8 b3_1_upper
Definition: pg_wchar.h:455
uint8 b4_4_lower
Definition: pg_wchar.h:469
const uint16 * chars16
Definition: pg_wchar.h:437
uint8 b4_1_upper
Definition: pg_wchar.h:464
uint8 b2_1_lower
Definition: pg_wchar.h:447
uint8 b4_2_upper
Definition: pg_wchar.h:466
uint8 b3_2_upper
Definition: pg_wchar.h:457
uint8 b3_2_lower
Definition: pg_wchar.h:456
bool pg_utf8_islegal(const unsigned char *source, int length)
Definition: wchar.c:1953
int pg_mule_mblen(const unsigned char *s)
Definition: wchar.c:772
int pg_encoding_verifymbchar(int encoding, const char *mbstr, int len)
Definition: wchar.c:2103