PostgreSQL Source Code  git master
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros
utf8_and_iso8859_1.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * ISO8859_1 <--> UTF8
4  *
5  * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
6  * Portions Copyright (c) 1994, Regents of the University of California
7  *
8  * IDENTIFICATION
9  * src/backend/utils/mb/conversion_procs/utf8_and_iso8859_1/utf8_and_iso8859_1.c
10  *
11  *-------------------------------------------------------------------------
12  */
13 
14 #include "postgres.h"
15 #include "fmgr.h"
16 #include "mb/pg_wchar.h"
17 
19 
22 
23 /* ----------
24  * conv_proc(
25  * INTEGER, -- source encoding id
26  * INTEGER, -- destination encoding id
27  * CSTRING, -- source string (null terminated C string)
28  * CSTRING, -- destination string (null terminated C string)
29  * INTEGER -- source string length
30  * ) returns VOID;
31  * ----------
32  */
33 
34 Datum
36 {
37  unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
38  unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
39  int len = PG_GETARG_INT32(4);
40  unsigned short c;
41 
43 
44  while (len > 0)
45  {
46  c = *src;
47  if (c == 0)
48  report_invalid_encoding(PG_LATIN1, (const char *) src, len);
49  if (!IS_HIGHBIT_SET(c))
50  *dest++ = c;
51  else
52  {
53  *dest++ = (c >> 6) | 0xc0;
54  *dest++ = (c & 0x003f) | HIGHBIT;
55  }
56  src++;
57  len--;
58  }
59  *dest = '\0';
60 
62 }
63 
64 Datum
66 {
67  unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
68  unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
69  int len = PG_GETARG_INT32(4);
70  unsigned short c,
71  c1;
72 
74 
75  while (len > 0)
76  {
77  c = *src;
78  if (c == 0)
79  report_invalid_encoding(PG_UTF8, (const char *) src, len);
80  /* fast path for ASCII-subset characters */
81  if (!IS_HIGHBIT_SET(c))
82  {
83  *dest++ = c;
84  src++;
85  len--;
86  }
87  else
88  {
89  int l = pg_utf_mblen(src);
90 
91  if (l > len || !pg_utf8_islegal(src, l))
92  report_invalid_encoding(PG_UTF8, (const char *) src, len);
93  if (l != 2)
95  (const char *) src, len);
96  c1 = src[1] & 0x3f;
97  c = ((c & 0x1f) << 6) | c1;
98  if (c >= 0x80 && c <= 0xff)
99  {
100  *dest++ = (unsigned char) c;
101  src += 2;
102  len -= 2;
103  }
104  else
106  (const char *) src, len);
107  }
108  }
109  *dest = '\0';
110 
111  PG_RETURN_VOID();
112 }
#define PG_GETARG_INT32(n)
Definition: fmgr.h:234
bool pg_utf8_islegal(const unsigned char *source, int length)
Definition: wchar.c:1458
PG_FUNCTION_INFO_V1(iso8859_1_to_utf8)
Datum iso8859_1_to_utf8(PG_FUNCTION_ARGS)
PG_MODULE_MAGIC
#define IS_HIGHBIT_SET(ch)
Definition: c.h:973
void report_untranslatable_char(int src_encoding, int dest_encoding, const char *mbstr, int len)
Definition: wchar.c:2027
char * c
#define HIGHBIT
Definition: c.h:972
uintptr_t Datum
Definition: postgres.h:372
#define PG_RETURN_VOID()
Definition: fmgr.h:309
#define CHECK_ENCODING_CONVERSION_ARGS(srcencoding, destencoding)
Definition: pg_wchar.h:492
void report_invalid_encoding(int encoding, const char *mbstr, int len)
Definition: wchar.c:1995
int pg_utf_mblen(const unsigned char *s)
Definition: wchar.c:541
#define PG_GETARG_CSTRING(n)
Definition: fmgr.h:242
#define PG_FUNCTION_ARGS
Definition: fmgr.h:158
Datum utf8_to_iso8859_1(PG_FUNCTION_ARGS)