PostgreSQL Source Code  git master
utf8_and_iso8859_1.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * ISO8859_1 <--> UTF8
4  *
5  * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
6  * Portions Copyright (c) 1994, Regents of the University of California
7  *
8  * IDENTIFICATION
9  * src/backend/utils/mb/conversion_procs/utf8_and_iso8859_1/utf8_and_iso8859_1.c
10  *
11  *-------------------------------------------------------------------------
12  */
13 
14 #include "postgres.h"
15 #include "fmgr.h"
16 #include "mb/pg_wchar.h"
17 
19 
22 
23 /* ----------
24  * conv_proc(
25  * INTEGER, -- source encoding id
26  * INTEGER, -- destination encoding id
27  * CSTRING, -- source string (null terminated C string)
28  * CSTRING, -- destination string (null terminated C string)
29  * INTEGER, -- source string length
30  * BOOL -- if true, don't throw an error if conversion fails
31  * ) returns INTEGER;
32  *
33  * Returns the number of bytes successfully converted.
34  * ----------
35  */
36 
37 Datum
39 {
40  unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
41  unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
42  int len = PG_GETARG_INT32(4);
43  bool noError = PG_GETARG_BOOL(5);
44  unsigned char *start = src;
45  unsigned short c;
46 
48 
49  while (len > 0)
50  {
51  c = *src;
52  if (c == 0)
53  {
54  if (noError)
55  break;
56  report_invalid_encoding(PG_LATIN1, (const char *) src, len);
57  }
58  if (!IS_HIGHBIT_SET(c))
59  *dest++ = c;
60  else
61  {
62  *dest++ = (c >> 6) | 0xc0;
63  *dest++ = (c & 0x003f) | HIGHBIT;
64  }
65  src++;
66  len--;
67  }
68  *dest = '\0';
69 
70  PG_RETURN_INT32(src - start);
71 }
72 
73 Datum
75 {
76  unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
77  unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
78  int len = PG_GETARG_INT32(4);
79  bool noError = PG_GETARG_BOOL(5);
80  unsigned char *start = src;
81  unsigned short c,
82  c1;
83 
85 
86  while (len > 0)
87  {
88  c = *src;
89  if (c == 0)
90  {
91  if (noError)
92  break;
93  report_invalid_encoding(PG_UTF8, (const char *) src, len);
94  }
95  /* fast path for ASCII-subset characters */
96  if (!IS_HIGHBIT_SET(c))
97  {
98  *dest++ = c;
99  src++;
100  len--;
101  }
102  else
103  {
104  int l = pg_utf_mblen(src);
105 
106  if (l > len || !pg_utf8_islegal(src, l))
107  {
108  if (noError)
109  break;
110  report_invalid_encoding(PG_UTF8, (const char *) src, len);
111  }
112  if (l != 2)
113  {
114  if (noError)
115  break;
117  (const char *) src, len);
118  }
119  c1 = src[1] & 0x3f;
120  c = ((c & 0x1f) << 6) | c1;
121  if (c >= 0x80 && c <= 0xff)
122  {
123  *dest++ = (unsigned char) c;
124  src += 2;
125  len -= 2;
126  }
127  else
128  {
129  if (noError)
130  break;
132  (const char *) src, len);
133  }
134  }
135  }
136  *dest = '\0';
137 
138  PG_RETURN_INT32(src - start);
139 }
#define IS_HIGHBIT_SET(ch)
Definition: c.h:1158
#define HIGHBIT
Definition: c.h:1157
#define PG_GETARG_CSTRING(n)
Definition: fmgr.h:277
#define PG_RETURN_INT32(x)
Definition: fmgr.h:354
#define PG_GETARG_INT32(n)
Definition: fmgr.h:269
#define PG_GETARG_BOOL(n)
Definition: fmgr.h:274
#define PG_FUNCTION_ARGS
Definition: fmgr.h:193
return str start
void report_untranslatable_char(int src_encoding, int dest_encoding, const char *mbstr, int len)
Definition: mbutils.c:1730
void report_invalid_encoding(int encoding, const char *mbstr, int len)
Definition: mbutils.c:1698
const void size_t len
#define pg_utf_mblen
Definition: pg_wchar.h:633
@ PG_LATIN1
Definition: pg_wchar.h:234
@ PG_UTF8
Definition: pg_wchar.h:232
#define CHECK_ENCODING_CONVERSION_ARGS(srcencoding, destencoding)
Definition: pg_wchar.h:507
uintptr_t Datum
Definition: postgres.h:64
char * c
Datum utf8_to_iso8859_1(PG_FUNCTION_ARGS)
PG_MODULE_MAGIC
PG_FUNCTION_INFO_V1(iso8859_1_to_utf8)
Datum iso8859_1_to_utf8(PG_FUNCTION_ARGS)
bool pg_utf8_islegal(const unsigned char *source, int length)
Definition: wchar.c:1953