PostgreSQL Source Code git master
All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
utf8_and_iso8859_1.c
Go to the documentation of this file.
1/*-------------------------------------------------------------------------
2 *
3 * ISO8859_1 <--> UTF8
4 *
5 * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
6 * Portions Copyright (c) 1994, Regents of the University of California
7 *
8 * IDENTIFICATION
9 * src/backend/utils/mb/conversion_procs/utf8_and_iso8859_1/utf8_and_iso8859_1.c
10 *
11 *-------------------------------------------------------------------------
12 */
13
14#include "postgres.h"
15#include "fmgr.h"
16#include "mb/pg_wchar.h"
17
19
22
23/* ----------
24 * conv_proc(
25 * INTEGER, -- source encoding id
26 * INTEGER, -- destination encoding id
27 * CSTRING, -- source string (null terminated C string)
28 * CSTRING, -- destination string (null terminated C string)
29 * INTEGER, -- source string length
30 * BOOL -- if true, don't throw an error if conversion fails
31 * ) returns INTEGER;
32 *
33 * Returns the number of bytes successfully converted.
34 * ----------
35 */
36
39{
40 unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
41 unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
42 int len = PG_GETARG_INT32(4);
43 bool noError = PG_GETARG_BOOL(5);
44 unsigned char *start = src;
45 unsigned short c;
46
48
49 while (len > 0)
50 {
51 c = *src;
52 if (c == 0)
53 {
54 if (noError)
55 break;
56 report_invalid_encoding(PG_LATIN1, (const char *) src, len);
57 }
58 if (!IS_HIGHBIT_SET(c))
59 *dest++ = c;
60 else
61 {
62 *dest++ = (c >> 6) | 0xc0;
63 *dest++ = (c & 0x003f) | HIGHBIT;
64 }
65 src++;
66 len--;
67 }
68 *dest = '\0';
69
71}
72
75{
76 unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
77 unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
78 int len = PG_GETARG_INT32(4);
79 bool noError = PG_GETARG_BOOL(5);
80 unsigned char *start = src;
81 unsigned short c,
82 c1;
83
85
86 while (len > 0)
87 {
88 c = *src;
89 if (c == 0)
90 {
91 if (noError)
92 break;
93 report_invalid_encoding(PG_UTF8, (const char *) src, len);
94 }
95 /* fast path for ASCII-subset characters */
96 if (!IS_HIGHBIT_SET(c))
97 {
98 *dest++ = c;
99 src++;
100 len--;
101 }
102 else
103 {
104 int l = pg_utf_mblen(src);
105
106 if (l > len || !pg_utf8_islegal(src, l))
107 {
108 if (noError)
109 break;
110 report_invalid_encoding(PG_UTF8, (const char *) src, len);
111 }
112 if (l != 2)
113 {
114 if (noError)
115 break;
117 (const char *) src, len);
118 }
119 c1 = src[1] & 0x3f;
120 c = ((c & 0x1f) << 6) | c1;
121 if (c >= 0x80 && c <= 0xff)
122 {
123 *dest++ = (unsigned char) c;
124 src += 2;
125 len -= 2;
126 }
127 else
128 {
129 if (noError)
130 break;
132 (const char *) src, len);
133 }
134 }
135 }
136 *dest = '\0';
137
138 PG_RETURN_INT32(src - start);
139}
#define IS_HIGHBIT_SET(ch)
Definition: c.h:1126
#define HIGHBIT
Definition: c.h:1125
#define PG_GETARG_CSTRING(n)
Definition: fmgr.h:277
#define PG_RETURN_INT32(x)
Definition: fmgr.h:354
#define PG_GETARG_INT32(n)
Definition: fmgr.h:269
#define PG_GETARG_BOOL(n)
Definition: fmgr.h:274
#define PG_FUNCTION_ARGS
Definition: fmgr.h:193
return str start
void report_untranslatable_char(int src_encoding, int dest_encoding, const char *mbstr, int len)
Definition: mbutils.c:1730
void report_invalid_encoding(int encoding, const char *mbstr, int len)
Definition: mbutils.c:1698
const void size_t len
#define pg_utf_mblen
Definition: pg_wchar.h:633
@ PG_LATIN1
Definition: pg_wchar.h:234
@ PG_UTF8
Definition: pg_wchar.h:232
#define CHECK_ENCODING_CONVERSION_ARGS(srcencoding, destencoding)
Definition: pg_wchar.h:507
uintptr_t Datum
Definition: postgres.h:69
char * c
Datum utf8_to_iso8859_1(PG_FUNCTION_ARGS)
PG_MODULE_MAGIC
PG_FUNCTION_INFO_V1(iso8859_1_to_utf8)
Datum iso8859_1_to_utf8(PG_FUNCTION_ARGS)
bool pg_utf8_islegal(const unsigned char *source, int length)
Definition: wchar.c:1987