PostgreSQL Source Code  git master
utf8_and_iso8859.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * ISO 8859 2-16 <--> UTF8
4  *
5  * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
6  * Portions Copyright (c) 1994, Regents of the University of California
7  *
8  * IDENTIFICATION
9  * src/backend/utils/mb/conversion_procs/utf8_and_iso8859/utf8_and_iso8859.c
10  *
11  *-------------------------------------------------------------------------
12  */
13 
14 #include "postgres.h"
15 #include "fmgr.h"
16 #include "mb/pg_wchar.h"
17 #include "../../Unicode/iso8859_10_to_utf8.map"
18 #include "../../Unicode/iso8859_13_to_utf8.map"
19 #include "../../Unicode/iso8859_14_to_utf8.map"
20 #include "../../Unicode/iso8859_15_to_utf8.map"
21 #include "../../Unicode/iso8859_2_to_utf8.map"
22 #include "../../Unicode/iso8859_3_to_utf8.map"
23 #include "../../Unicode/iso8859_4_to_utf8.map"
24 #include "../../Unicode/iso8859_5_to_utf8.map"
25 #include "../../Unicode/iso8859_6_to_utf8.map"
26 #include "../../Unicode/iso8859_7_to_utf8.map"
27 #include "../../Unicode/iso8859_8_to_utf8.map"
28 #include "../../Unicode/iso8859_9_to_utf8.map"
29 #include "../../Unicode/utf8_to_iso8859_10.map"
30 #include "../../Unicode/utf8_to_iso8859_13.map"
31 #include "../../Unicode/utf8_to_iso8859_14.map"
32 #include "../../Unicode/utf8_to_iso8859_15.map"
33 #include "../../Unicode/utf8_to_iso8859_16.map"
34 #include "../../Unicode/utf8_to_iso8859_2.map"
35 #include "../../Unicode/utf8_to_iso8859_3.map"
36 #include "../../Unicode/utf8_to_iso8859_4.map"
37 #include "../../Unicode/utf8_to_iso8859_5.map"
38 #include "../../Unicode/utf8_to_iso8859_6.map"
39 #include "../../Unicode/utf8_to_iso8859_7.map"
40 #include "../../Unicode/utf8_to_iso8859_8.map"
41 #include "../../Unicode/utf8_to_iso8859_9.map"
42 #include "../../Unicode/iso8859_16_to_utf8.map"
43 
45 
48 
49 /* ----------
50  * conv_proc(
51  * INTEGER, -- source encoding id
52  * INTEGER, -- destination encoding id
53  * CSTRING, -- source string (null terminated C string)
54  * CSTRING, -- destination string (null terminated C string)
55  * INTEGER, -- source string length
56  * BOOL -- if true, don't throw an error if conversion fails
57  * ) returns INTEGER;
58  *
59  * Returns the number of bytes successfully converted.
60  * ----------
61  */
62 
63 typedef struct
64 {
66  const pg_mb_radix_tree *map1; /* to UTF8 map name */
67  const pg_mb_radix_tree *map2; /* from UTF8 map name */
68 } pg_conv_map;
69 
70 static const pg_conv_map maps[] = {
71  {PG_LATIN2, &iso8859_2_to_unicode_tree,
72  &iso8859_2_from_unicode_tree}, /* ISO-8859-2 Latin 2 */
73  {PG_LATIN3, &iso8859_3_to_unicode_tree,
74  &iso8859_3_from_unicode_tree}, /* ISO-8859-3 Latin 3 */
75  {PG_LATIN4, &iso8859_4_to_unicode_tree,
76  &iso8859_4_from_unicode_tree}, /* ISO-8859-4 Latin 4 */
77  {PG_LATIN5, &iso8859_9_to_unicode_tree,
78  &iso8859_9_from_unicode_tree}, /* ISO-8859-9 Latin 5 */
79  {PG_LATIN6, &iso8859_10_to_unicode_tree,
80  &iso8859_10_from_unicode_tree}, /* ISO-8859-10 Latin 6 */
81  {PG_LATIN7, &iso8859_13_to_unicode_tree,
82  &iso8859_13_from_unicode_tree}, /* ISO-8859-13 Latin 7 */
83  {PG_LATIN8, &iso8859_14_to_unicode_tree,
84  &iso8859_14_from_unicode_tree}, /* ISO-8859-14 Latin 8 */
85  {PG_LATIN9, &iso8859_15_to_unicode_tree,
86  &iso8859_15_from_unicode_tree}, /* ISO-8859-15 Latin 9 */
87  {PG_LATIN10, &iso8859_16_to_unicode_tree,
88  &iso8859_16_from_unicode_tree}, /* ISO-8859-16 Latin 10 */
89  {PG_ISO_8859_5, &iso8859_5_to_unicode_tree,
90  &iso8859_5_from_unicode_tree}, /* ISO-8859-5 */
91  {PG_ISO_8859_6, &iso8859_6_to_unicode_tree,
92  &iso8859_6_from_unicode_tree}, /* ISO-8859-6 */
93  {PG_ISO_8859_7, &iso8859_7_to_unicode_tree,
94  &iso8859_7_from_unicode_tree}, /* ISO-8859-7 */
95  {PG_ISO_8859_8, &iso8859_8_to_unicode_tree,
96  &iso8859_8_from_unicode_tree}, /* ISO-8859-8 */
97 };
98 
99 Datum
101 {
102  int encoding = PG_GETARG_INT32(0);
103  unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
104  unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
105  int len = PG_GETARG_INT32(4);
106  bool noError = PG_GETARG_BOOL(5);
107  int i;
108 
110 
111  for (i = 0; i < lengthof(maps); i++)
112  {
113  if (encoding == maps[i].encoding)
114  {
115  int converted;
116 
117  converted = LocalToUtf(src, len, dest,
118  maps[i].map1,
119  NULL, 0,
120  NULL,
121  encoding,
122  noError);
123  PG_RETURN_INT32(converted);
124  }
125  }
126 
127  ereport(ERROR,
128  (errcode(ERRCODE_INTERNAL_ERROR),
129  errmsg("unexpected encoding ID %d for ISO 8859 character sets",
130  encoding)));
131 
132  PG_RETURN_INT32(0);
133 }
134 
135 Datum
137 {
138  int encoding = PG_GETARG_INT32(1);
139  unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
140  unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
141  int len = PG_GETARG_INT32(4);
142  bool noError = PG_GETARG_BOOL(5);
143  int i;
144 
146 
147  for (i = 0; i < lengthof(maps); i++)
148  {
149  if (encoding == maps[i].encoding)
150  {
151  int converted;
152 
153  converted = UtfToLocal(src, len, dest,
154  maps[i].map2,
155  NULL, 0,
156  NULL,
157  encoding,
158  noError);
159  PG_RETURN_INT32(converted);
160  }
161  }
162 
163  ereport(ERROR,
164  (errcode(ERRCODE_INTERNAL_ERROR),
165  errmsg("unexpected encoding ID %d for ISO 8859 character sets",
166  encoding)));
167 
168  PG_RETURN_INT32(0);
169 }
#define lengthof(array)
Definition: c.h:775
int UtfToLocal(const unsigned char *utf, int len, unsigned char *iso, const pg_mb_radix_tree *map, const pg_utf_to_local_combined *cmap, int cmapsize, utf_local_conversion_func conv_func, int encoding, bool noError)
Definition: conv.c:507
int LocalToUtf(const unsigned char *iso, int len, unsigned char *utf, const pg_mb_radix_tree *map, const pg_local_to_utf_combined *cmap, int cmapsize, utf_local_conversion_func conv_func, int encoding, bool noError)
Definition: conv.c:717
int errcode(int sqlerrcode)
Definition: elog.c:859
int errmsg(const char *fmt,...)
Definition: elog.c:1072
#define ERROR
Definition: elog.h:39
#define ereport(elevel,...)
Definition: elog.h:149
#define PG_GETARG_CSTRING(n)
Definition: fmgr.h:277
#define PG_RETURN_INT32(x)
Definition: fmgr.h:354
#define PG_GETARG_INT32(n)
Definition: fmgr.h:269
#define PG_GETARG_BOOL(n)
Definition: fmgr.h:274
#define PG_FUNCTION_ARGS
Definition: fmgr.h:193
int i
Definition: isn.c:73
const void size_t len
int32 encoding
Definition: pg_database.h:41
pg_enc
Definition: pg_wchar.h:225
@ PG_LATIN4
Definition: pg_wchar.h:237
@ PG_LATIN9
Definition: pg_wchar.h:242
@ PG_ISO_8859_6
Definition: pg_wchar.h:252
@ PG_LATIN6
Definition: pg_wchar.h:239
@ PG_LATIN5
Definition: pg_wchar.h:238
@ PG_LATIN2
Definition: pg_wchar.h:235
@ PG_ISO_8859_5
Definition: pg_wchar.h:251
@ PG_LATIN10
Definition: pg_wchar.h:243
@ PG_ISO_8859_7
Definition: pg_wchar.h:253
@ PG_LATIN8
Definition: pg_wchar.h:241
@ PG_LATIN3
Definition: pg_wchar.h:236
@ PG_LATIN7
Definition: pg_wchar.h:240
@ PG_UTF8
Definition: pg_wchar.h:232
@ PG_ISO_8859_8
Definition: pg_wchar.h:254
#define CHECK_ENCODING_CONVERSION_ARGS(srcencoding, destencoding)
Definition: pg_wchar.h:507
uintptr_t Datum
Definition: postgres.h:64
const pg_mb_radix_tree * map2
const pg_mb_radix_tree * map1
PG_MODULE_MAGIC
static const pg_conv_map maps[]
Datum utf8_to_iso8859(PG_FUNCTION_ARGS)
Datum iso8859_to_utf8(PG_FUNCTION_ARGS)
PG_FUNCTION_INFO_V1(iso8859_to_utf8)