PostgreSQL Source Code  git master
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros
utf8_and_iso8859.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * ISO 8859 2-16 <--> UTF8
4  *
5  * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
6  * Portions Copyright (c) 1994, Regents of the University of California
7  *
8  * IDENTIFICATION
9  * src/backend/utils/mb/conversion_procs/utf8_and_iso8859/utf8_and_iso8859.c
10  *
11  *-------------------------------------------------------------------------
12  */
13 
14 #include "postgres.h"
15 #include "fmgr.h"
16 #include "mb/pg_wchar.h"
17 #include "../../Unicode/iso8859_10_to_utf8.map"
18 #include "../../Unicode/iso8859_13_to_utf8.map"
19 #include "../../Unicode/iso8859_14_to_utf8.map"
20 #include "../../Unicode/iso8859_15_to_utf8.map"
21 #include "../../Unicode/iso8859_2_to_utf8.map"
22 #include "../../Unicode/iso8859_3_to_utf8.map"
23 #include "../../Unicode/iso8859_4_to_utf8.map"
24 #include "../../Unicode/iso8859_5_to_utf8.map"
25 #include "../../Unicode/iso8859_6_to_utf8.map"
26 #include "../../Unicode/iso8859_7_to_utf8.map"
27 #include "../../Unicode/iso8859_8_to_utf8.map"
28 #include "../../Unicode/iso8859_9_to_utf8.map"
29 #include "../../Unicode/utf8_to_iso8859_10.map"
30 #include "../../Unicode/utf8_to_iso8859_13.map"
31 #include "../../Unicode/utf8_to_iso8859_14.map"
32 #include "../../Unicode/utf8_to_iso8859_15.map"
33 #include "../../Unicode/utf8_to_iso8859_16.map"
34 #include "../../Unicode/utf8_to_iso8859_2.map"
35 #include "../../Unicode/utf8_to_iso8859_3.map"
36 #include "../../Unicode/utf8_to_iso8859_4.map"
37 #include "../../Unicode/utf8_to_iso8859_5.map"
38 #include "../../Unicode/utf8_to_iso8859_6.map"
39 #include "../../Unicode/utf8_to_iso8859_7.map"
40 #include "../../Unicode/utf8_to_iso8859_8.map"
41 #include "../../Unicode/utf8_to_iso8859_9.map"
42 #include "../../Unicode/iso8859_16_to_utf8.map"
43 
45 
48 
49 /* ----------
50  * conv_proc(
51  * INTEGER, -- source encoding id
52  * INTEGER, -- destination encoding id
53  * CSTRING, -- source string (null terminated C string)
54  * CSTRING, -- destination string (null terminated C string)
55  * INTEGER -- source string length
56  * ) returns VOID;
57  * ----------
58  */
59 
60 typedef struct
61 {
63  const pg_local_to_utf *map1; /* to UTF8 map name */
64  const pg_utf_to_local *map2; /* from UTF8 map name */
65  int size1; /* size of map1 */
66  int size2; /* size of map2 */
67 } pg_conv_map;
68 
69 static const pg_conv_map maps[] = {
70  {PG_LATIN2, LUmapISO8859_2, ULmapISO8859_2,
71  lengthof(LUmapISO8859_2),
72  lengthof(ULmapISO8859_2)}, /* ISO-8859-2 Latin 2 */
73  {PG_LATIN3, LUmapISO8859_3, ULmapISO8859_3,
74  lengthof(LUmapISO8859_3),
75  lengthof(ULmapISO8859_3)}, /* ISO-8859-3 Latin 3 */
76  {PG_LATIN4, LUmapISO8859_4, ULmapISO8859_4,
77  lengthof(LUmapISO8859_4),
78  lengthof(ULmapISO8859_4)}, /* ISO-8859-4 Latin 4 */
79  {PG_LATIN5, LUmapISO8859_9, ULmapISO8859_9,
80  lengthof(LUmapISO8859_9),
81  lengthof(ULmapISO8859_9)}, /* ISO-8859-9 Latin 5 */
82  {PG_LATIN6, LUmapISO8859_10, ULmapISO8859_10,
83  lengthof(LUmapISO8859_10),
84  lengthof(ULmapISO8859_10)}, /* ISO-8859-10 Latin 6 */
85  {PG_LATIN7, LUmapISO8859_13, ULmapISO8859_13,
86  lengthof(LUmapISO8859_13),
87  lengthof(ULmapISO8859_13)}, /* ISO-8859-13 Latin 7 */
88  {PG_LATIN8, LUmapISO8859_14, ULmapISO8859_14,
89  lengthof(LUmapISO8859_14),
90  lengthof(ULmapISO8859_14)}, /* ISO-8859-14 Latin 8 */
91  {PG_LATIN9, LUmapISO8859_15, ULmapISO8859_15,
92  lengthof(LUmapISO8859_15),
93  lengthof(ULmapISO8859_15)}, /* ISO-8859-15 Latin 9 */
94  {PG_LATIN10, LUmapISO8859_16, ULmapISO8859_16,
95  lengthof(LUmapISO8859_16),
96  lengthof(ULmapISO8859_16)}, /* ISO-8859-16 Latin 10 */
97  {PG_ISO_8859_5, LUmapISO8859_5, ULmapISO8859_5,
98  lengthof(LUmapISO8859_5),
99  lengthof(ULmapISO8859_5)}, /* ISO-8859-5 */
100  {PG_ISO_8859_6, LUmapISO8859_6, ULmapISO8859_6,
101  lengthof(LUmapISO8859_6),
102  lengthof(ULmapISO8859_6)}, /* ISO-8859-6 */
103  {PG_ISO_8859_7, LUmapISO8859_7, ULmapISO8859_7,
104  lengthof(LUmapISO8859_7),
105  lengthof(ULmapISO8859_7)}, /* ISO-8859-7 */
106  {PG_ISO_8859_8, LUmapISO8859_8, ULmapISO8859_8,
107  lengthof(LUmapISO8859_8),
108  lengthof(ULmapISO8859_8)}, /* ISO-8859-8 */
109 };
110 
111 Datum
113 {
114  int encoding = PG_GETARG_INT32(0);
115  unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
116  unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
117  int len = PG_GETARG_INT32(4);
118  int i;
119 
121 
122  for (i = 0; i < lengthof(maps); i++)
123  {
124  if (encoding == maps[i].encoding)
125  {
126  LocalToUtf(src, len, dest,
127  maps[i].map1, maps[i].size1,
128  NULL, 0,
129  NULL,
130  encoding);
131  PG_RETURN_VOID();
132  }
133  }
134 
135  ereport(ERROR,
136  (errcode(ERRCODE_INTERNAL_ERROR),
137  errmsg("unexpected encoding ID %d for ISO 8859 character sets",
138  encoding)));
139 
140  PG_RETURN_VOID();
141 }
142 
143 Datum
145 {
146  int encoding = PG_GETARG_INT32(1);
147  unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
148  unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
149  int len = PG_GETARG_INT32(4);
150  int i;
151 
153 
154  for (i = 0; i < lengthof(maps); i++)
155  {
156  if (encoding == maps[i].encoding)
157  {
158  UtfToLocal(src, len, dest,
159  maps[i].map2, maps[i].size2,
160  NULL, 0,
161  NULL,
162  encoding);
163  PG_RETURN_VOID();
164  }
165  }
166 
167  ereport(ERROR,
168  (errcode(ERRCODE_INTERNAL_ERROR),
169  errmsg("unexpected encoding ID %d for ISO 8859 character sets",
170  encoding)));
171 
172  PG_RETURN_VOID();
173 }
#define PG_GETARG_INT32(n)
Definition: fmgr.h:225
int errcode(int sqlerrcode)
Definition: elog.c:575
PG_MODULE_MAGIC
#define lengthof(array)
Definition: c.h:557
void UtfToLocal(const unsigned char *utf, int len, unsigned char *iso, const pg_utf_to_local *map, int mapsize, const pg_utf_to_local_combined *cmap, int cmapsize, utf_local_conversion_func conv_func, int encoding)
Definition: conv.c:390
#define ERROR
Definition: elog.h:43
void LocalToUtf(const unsigned char *iso, int len, unsigned char *utf, const pg_local_to_utf *map, int mapsize, const pg_local_to_utf_combined *cmap, int cmapsize, utf_local_conversion_func conv_func, int encoding)
Definition: conv.c:576
PG_FUNCTION_INFO_V1(iso8859_to_utf8)
#define ereport(elevel, rest)
Definition: elog.h:122
Datum iso8859_to_utf8(PG_FUNCTION_ARGS)
uintptr_t Datum
Definition: postgres.h:374
static char * encoding
Definition: initdb.c:121
#define PG_RETURN_VOID()
Definition: fmgr.h:293
#define NULL
Definition: c.h:226
#define CHECK_ENCODING_CONVERSION_ARGS(srcencoding, destencoding)
Definition: pg_wchar.h:428
static const pg_conv_map maps[]
pg_enc
Definition: pg_wchar.h:236
int errmsg(const char *fmt,...)
Definition: elog.c:797
int i
#define PG_GETARG_CSTRING(n)
Definition: fmgr.h:233
const pg_local_to_utf * map1
#define PG_FUNCTION_ARGS
Definition: fmgr.h:150
Datum utf8_to_iso8859(PG_FUNCTION_ARGS)
const pg_utf_to_local * map2