PostgreSQL Source Code git master
utf8_and_iso8859.c
Go to the documentation of this file.
1/*-------------------------------------------------------------------------
2 *
3 * ISO 8859 2-16 <--> UTF8
4 *
5 * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
6 * Portions Copyright (c) 1994, Regents of the University of California
7 *
8 * IDENTIFICATION
9 * src/backend/utils/mb/conversion_procs/utf8_and_iso8859/utf8_and_iso8859.c
10 *
11 *-------------------------------------------------------------------------
12 */
13
14#include "postgres.h"
15#include "fmgr.h"
16#include "mb/pg_wchar.h"
17#include "../../Unicode/iso8859_10_to_utf8.map"
18#include "../../Unicode/iso8859_13_to_utf8.map"
19#include "../../Unicode/iso8859_14_to_utf8.map"
20#include "../../Unicode/iso8859_15_to_utf8.map"
21#include "../../Unicode/iso8859_2_to_utf8.map"
22#include "../../Unicode/iso8859_3_to_utf8.map"
23#include "../../Unicode/iso8859_4_to_utf8.map"
24#include "../../Unicode/iso8859_5_to_utf8.map"
25#include "../../Unicode/iso8859_6_to_utf8.map"
26#include "../../Unicode/iso8859_7_to_utf8.map"
27#include "../../Unicode/iso8859_8_to_utf8.map"
28#include "../../Unicode/iso8859_9_to_utf8.map"
29#include "../../Unicode/utf8_to_iso8859_10.map"
30#include "../../Unicode/utf8_to_iso8859_13.map"
31#include "../../Unicode/utf8_to_iso8859_14.map"
32#include "../../Unicode/utf8_to_iso8859_15.map"
33#include "../../Unicode/utf8_to_iso8859_16.map"
34#include "../../Unicode/utf8_to_iso8859_2.map"
35#include "../../Unicode/utf8_to_iso8859_3.map"
36#include "../../Unicode/utf8_to_iso8859_4.map"
37#include "../../Unicode/utf8_to_iso8859_5.map"
38#include "../../Unicode/utf8_to_iso8859_6.map"
39#include "../../Unicode/utf8_to_iso8859_7.map"
40#include "../../Unicode/utf8_to_iso8859_8.map"
41#include "../../Unicode/utf8_to_iso8859_9.map"
42#include "../../Unicode/iso8859_16_to_utf8.map"
43
45
48
49/* ----------
50 * conv_proc(
51 * INTEGER, -- source encoding id
52 * INTEGER, -- destination encoding id
53 * CSTRING, -- source string (null terminated C string)
54 * CSTRING, -- destination string (null terminated C string)
55 * INTEGER, -- source string length
56 * BOOL -- if true, don't throw an error if conversion fails
57 * ) returns INTEGER;
58 *
59 * Returns the number of bytes successfully converted.
60 * ----------
61 */
62
63typedef struct
64{
66 const pg_mb_radix_tree *map1; /* to UTF8 map name */
67 const pg_mb_radix_tree *map2; /* from UTF8 map name */
69
70static const pg_conv_map maps[] = {
71 {PG_LATIN2, &iso8859_2_to_unicode_tree,
72 &iso8859_2_from_unicode_tree}, /* ISO-8859-2 Latin 2 */
73 {PG_LATIN3, &iso8859_3_to_unicode_tree,
74 &iso8859_3_from_unicode_tree}, /* ISO-8859-3 Latin 3 */
75 {PG_LATIN4, &iso8859_4_to_unicode_tree,
76 &iso8859_4_from_unicode_tree}, /* ISO-8859-4 Latin 4 */
77 {PG_LATIN5, &iso8859_9_to_unicode_tree,
78 &iso8859_9_from_unicode_tree}, /* ISO-8859-9 Latin 5 */
79 {PG_LATIN6, &iso8859_10_to_unicode_tree,
80 &iso8859_10_from_unicode_tree}, /* ISO-8859-10 Latin 6 */
81 {PG_LATIN7, &iso8859_13_to_unicode_tree,
82 &iso8859_13_from_unicode_tree}, /* ISO-8859-13 Latin 7 */
83 {PG_LATIN8, &iso8859_14_to_unicode_tree,
84 &iso8859_14_from_unicode_tree}, /* ISO-8859-14 Latin 8 */
85 {PG_LATIN9, &iso8859_15_to_unicode_tree,
86 &iso8859_15_from_unicode_tree}, /* ISO-8859-15 Latin 9 */
87 {PG_LATIN10, &iso8859_16_to_unicode_tree,
88 &iso8859_16_from_unicode_tree}, /* ISO-8859-16 Latin 10 */
89 {PG_ISO_8859_5, &iso8859_5_to_unicode_tree,
90 &iso8859_5_from_unicode_tree}, /* ISO-8859-5 */
91 {PG_ISO_8859_6, &iso8859_6_to_unicode_tree,
92 &iso8859_6_from_unicode_tree}, /* ISO-8859-6 */
93 {PG_ISO_8859_7, &iso8859_7_to_unicode_tree,
94 &iso8859_7_from_unicode_tree}, /* ISO-8859-7 */
95 {PG_ISO_8859_8, &iso8859_8_to_unicode_tree,
96 &iso8859_8_from_unicode_tree}, /* ISO-8859-8 */
97};
98
101{
102 int encoding = PG_GETARG_INT32(0);
103 unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
104 unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
105 int len = PG_GETARG_INT32(4);
106 bool noError = PG_GETARG_BOOL(5);
107 int i;
108
110
111 for (i = 0; i < lengthof(maps); i++)
112 {
113 if (encoding == maps[i].encoding)
114 {
115 int converted;
116
117 converted = LocalToUtf(src, len, dest,
118 maps[i].map1,
119 NULL, 0,
120 NULL,
121 encoding,
122 noError);
123 PG_RETURN_INT32(converted);
124 }
125 }
126
128 (errcode(ERRCODE_INTERNAL_ERROR),
129 errmsg("unexpected encoding ID %d for ISO 8859 character sets",
130 encoding)));
131
133}
134
135Datum
137{
138 int encoding = PG_GETARG_INT32(1);
139 unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
140 unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
141 int len = PG_GETARG_INT32(4);
142 bool noError = PG_GETARG_BOOL(5);
143 int i;
144
146
147 for (i = 0; i < lengthof(maps); i++)
148 {
149 if (encoding == maps[i].encoding)
150 {
151 int converted;
152
153 converted = UtfToLocal(src, len, dest,
154 maps[i].map2,
155 NULL, 0,
156 NULL,
157 encoding,
158 noError);
159 PG_RETURN_INT32(converted);
160 }
161 }
162
164 (errcode(ERRCODE_INTERNAL_ERROR),
165 errmsg("unexpected encoding ID %d for ISO 8859 character sets",
166 encoding)));
167
169}
#define lengthof(array)
Definition: c.h:759
int UtfToLocal(const unsigned char *utf, int len, unsigned char *iso, const pg_mb_radix_tree *map, const pg_utf_to_local_combined *cmap, int cmapsize, utf_local_conversion_func conv_func, int encoding, bool noError)
Definition: conv.c:507
int LocalToUtf(const unsigned char *iso, int len, unsigned char *utf, const pg_mb_radix_tree *map, const pg_local_to_utf_combined *cmap, int cmapsize, utf_local_conversion_func conv_func, int encoding, bool noError)
Definition: conv.c:717
int errcode(int sqlerrcode)
Definition: elog.c:853
int errmsg(const char *fmt,...)
Definition: elog.c:1070
#define ERROR
Definition: elog.h:39
#define ereport(elevel,...)
Definition: elog.h:149
#define PG_GETARG_CSTRING(n)
Definition: fmgr.h:277
#define PG_RETURN_INT32(x)
Definition: fmgr.h:354
#define PG_GETARG_INT32(n)
Definition: fmgr.h:269
#define PG_GETARG_BOOL(n)
Definition: fmgr.h:274
#define PG_FUNCTION_ARGS
Definition: fmgr.h:193
int i
Definition: isn.c:74
const void size_t len
int32 encoding
Definition: pg_database.h:41
pg_enc
Definition: pg_wchar.h:225
@ PG_LATIN4
Definition: pg_wchar.h:237
@ PG_LATIN9
Definition: pg_wchar.h:242
@ PG_ISO_8859_6
Definition: pg_wchar.h:252
@ PG_LATIN6
Definition: pg_wchar.h:239
@ PG_LATIN5
Definition: pg_wchar.h:238
@ PG_LATIN2
Definition: pg_wchar.h:235
@ PG_ISO_8859_5
Definition: pg_wchar.h:251
@ PG_LATIN10
Definition: pg_wchar.h:243
@ PG_ISO_8859_7
Definition: pg_wchar.h:253
@ PG_LATIN8
Definition: pg_wchar.h:241
@ PG_LATIN3
Definition: pg_wchar.h:236
@ PG_LATIN7
Definition: pg_wchar.h:240
@ PG_UTF8
Definition: pg_wchar.h:232
@ PG_ISO_8859_8
Definition: pg_wchar.h:254
#define CHECK_ENCODING_CONVERSION_ARGS(srcencoding, destencoding)
Definition: pg_wchar.h:507
uintptr_t Datum
Definition: postgres.h:69
const pg_mb_radix_tree * map2
const pg_mb_radix_tree * map1
PG_MODULE_MAGIC
static const pg_conv_map maps[]
Datum utf8_to_iso8859(PG_FUNCTION_ARGS)
Datum iso8859_to_utf8(PG_FUNCTION_ARGS)
PG_FUNCTION_INFO_V1(iso8859_to_utf8)