PostgreSQL Source Code git master
All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
utf8_and_iso8859_1.c
Go to the documentation of this file.
1/*-------------------------------------------------------------------------
2 *
3 * ISO8859_1 <--> UTF8
4 *
5 * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
6 * Portions Copyright (c) 1994, Regents of the University of California
7 *
8 * IDENTIFICATION
9 * src/backend/utils/mb/conversion_procs/utf8_and_iso8859_1/utf8_and_iso8859_1.c
10 *
11 *-------------------------------------------------------------------------
12 */
13
14#include "postgres.h"
15#include "fmgr.h"
16#include "mb/pg_wchar.h"
17
19 .name = "utf8_and_iso8859_1",
20 .version = PG_VERSION
21);
22
25
26/* ----------
27 * conv_proc(
28 * INTEGER, -- source encoding id
29 * INTEGER, -- destination encoding id
30 * CSTRING, -- source string (null terminated C string)
31 * CSTRING, -- destination string (null terminated C string)
32 * INTEGER, -- source string length
33 * BOOL -- if true, don't throw an error if conversion fails
34 * ) returns INTEGER;
35 *
36 * Returns the number of bytes successfully converted.
37 * ----------
38 */
39
42{
43 unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
44 unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
45 int len = PG_GETARG_INT32(4);
46 bool noError = PG_GETARG_BOOL(5);
47 unsigned char *start = src;
48 unsigned short c;
49
51
52 while (len > 0)
53 {
54 c = *src;
55 if (c == 0)
56 {
57 if (noError)
58 break;
59 report_invalid_encoding(PG_LATIN1, (const char *) src, len);
60 }
61 if (!IS_HIGHBIT_SET(c))
62 *dest++ = c;
63 else
64 {
65 *dest++ = (c >> 6) | 0xc0;
66 *dest++ = (c & 0x003f) | HIGHBIT;
67 }
68 src++;
69 len--;
70 }
71 *dest = '\0';
72
74}
75
78{
79 unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
80 unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
81 int len = PG_GETARG_INT32(4);
82 bool noError = PG_GETARG_BOOL(5);
83 unsigned char *start = src;
84 unsigned short c,
85 c1;
86
88
89 while (len > 0)
90 {
91 c = *src;
92 if (c == 0)
93 {
94 if (noError)
95 break;
96 report_invalid_encoding(PG_UTF8, (const char *) src, len);
97 }
98 /* fast path for ASCII-subset characters */
99 if (!IS_HIGHBIT_SET(c))
100 {
101 *dest++ = c;
102 src++;
103 len--;
104 }
105 else
106 {
107 int l = pg_utf_mblen(src);
108
109 if (l > len || !pg_utf8_islegal(src, l))
110 {
111 if (noError)
112 break;
113 report_invalid_encoding(PG_UTF8, (const char *) src, len);
114 }
115 if (l != 2)
116 {
117 if (noError)
118 break;
120 (const char *) src, len);
121 }
122 c1 = src[1] & 0x3f;
123 c = ((c & 0x1f) << 6) | c1;
124 if (c >= 0x80 && c <= 0xff)
125 {
126 *dest++ = (unsigned char) c;
127 src += 2;
128 len -= 2;
129 }
130 else
131 {
132 if (noError)
133 break;
135 (const char *) src, len);
136 }
137 }
138 }
139 *dest = '\0';
140
141 PG_RETURN_INT32(src - start);
142}
#define IS_HIGHBIT_SET(ch)
Definition: c.h:1126
#define HIGHBIT
Definition: c.h:1125
#define PG_GETARG_CSTRING(n)
Definition: fmgr.h:277
#define PG_RETURN_INT32(x)
Definition: fmgr.h:354
#define PG_GETARG_INT32(n)
Definition: fmgr.h:269
#define PG_GETARG_BOOL(n)
Definition: fmgr.h:274
#define PG_FUNCTION_ARGS
Definition: fmgr.h:193
return str start
void report_untranslatable_char(int src_encoding, int dest_encoding, const char *mbstr, int len)
Definition: mbutils.c:1730
void report_invalid_encoding(int encoding, const char *mbstr, int len)
Definition: mbutils.c:1698
const void size_t len
#define pg_utf_mblen
Definition: pg_wchar.h:633
@ PG_LATIN1
Definition: pg_wchar.h:234
@ PG_UTF8
Definition: pg_wchar.h:232
#define CHECK_ENCODING_CONVERSION_ARGS(srcencoding, destencoding)
Definition: pg_wchar.h:507
uintptr_t Datum
Definition: postgres.h:69
char * c
Datum utf8_to_iso8859_1(PG_FUNCTION_ARGS)
PG_FUNCTION_INFO_V1(iso8859_1_to_utf8)
Datum iso8859_1_to_utf8(PG_FUNCTION_ARGS)
PG_MODULE_MAGIC_EXT(.name="utf8_and_iso8859_1",.version=PG_VERSION)
const char * name
bool pg_utf8_islegal(const unsigned char *source, int length)
Definition: wchar.c:1989