PostgreSQL Source Code  git master
All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
utf8_and_iso8859_1.c File Reference
#include "postgres.h"
#include "fmgr.h"
#include "mb/pg_wchar.h"
Include dependency graph for utf8_and_iso8859_1.c:

Go to the source code of this file.

Functions

 PG_FUNCTION_INFO_V1 (iso8859_1_to_utf8)
 
 PG_FUNCTION_INFO_V1 (utf8_to_iso8859_1)
 
Datum iso8859_1_to_utf8 (PG_FUNCTION_ARGS)
 
Datum utf8_to_iso8859_1 (PG_FUNCTION_ARGS)
 

Variables

 PG_MODULE_MAGIC
 

Function Documentation

◆ iso8859_1_to_utf8()

Datum iso8859_1_to_utf8 ( PG_FUNCTION_ARGS  )

Definition at line 38 of file utf8_and_iso8859_1.c.

39 {
40  unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
41  unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
42  int len = PG_GETARG_INT32(4);
43  bool noError = PG_GETARG_BOOL(5);
44  unsigned char *start = src;
45  unsigned short c;
46 
48 
49  while (len > 0)
50  {
51  c = *src;
52  if (c == 0)
53  {
54  if (noError)
55  break;
56  report_invalid_encoding(PG_LATIN1, (const char *) src, len);
57  }
58  if (!IS_HIGHBIT_SET(c))
59  *dest++ = c;
60  else
61  {
62  *dest++ = (c >> 6) | 0xc0;
63  *dest++ = (c & 0x003f) | HIGHBIT;
64  }
65  src++;
66  len--;
67  }
68  *dest = '\0';
69 
70  PG_RETURN_INT32(src - start);
71 }
#define IS_HIGHBIT_SET(ch)
Definition: c.h:1134
#define HIGHBIT
Definition: c.h:1133
#define PG_GETARG_CSTRING(n)
Definition: fmgr.h:277
#define PG_RETURN_INT32(x)
Definition: fmgr.h:354
#define PG_GETARG_INT32(n)
Definition: fmgr.h:269
#define PG_GETARG_BOOL(n)
Definition: fmgr.h:274
return str start
void report_invalid_encoding(int encoding, const char *mbstr, int len)
Definition: mbutils.c:1698
const void size_t len
@ PG_LATIN1
Definition: pg_wchar.h:234
@ PG_UTF8
Definition: pg_wchar.h:232
#define CHECK_ENCODING_CONVERSION_ARGS(srcencoding, destencoding)
Definition: pg_wchar.h:507
char * c

References CHECK_ENCODING_CONVERSION_ARGS, generate_unaccent_rules::dest, HIGHBIT, IS_HIGHBIT_SET, len, PG_GETARG_BOOL, PG_GETARG_CSTRING, PG_GETARG_INT32, PG_LATIN1, PG_RETURN_INT32, PG_UTF8, report_invalid_encoding(), and start.

◆ PG_FUNCTION_INFO_V1() [1/2]

PG_FUNCTION_INFO_V1 ( iso8859_1_to_utf8  )

◆ PG_FUNCTION_INFO_V1() [2/2]

PG_FUNCTION_INFO_V1 ( utf8_to_iso8859_1  )

◆ utf8_to_iso8859_1()

Datum utf8_to_iso8859_1 ( PG_FUNCTION_ARGS  )

Definition at line 74 of file utf8_and_iso8859_1.c.

75 {
76  unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
77  unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
78  int len = PG_GETARG_INT32(4);
79  bool noError = PG_GETARG_BOOL(5);
80  unsigned char *start = src;
81  unsigned short c,
82  c1;
83 
85 
86  while (len > 0)
87  {
88  c = *src;
89  if (c == 0)
90  {
91  if (noError)
92  break;
93  report_invalid_encoding(PG_UTF8, (const char *) src, len);
94  }
95  /* fast path for ASCII-subset characters */
96  if (!IS_HIGHBIT_SET(c))
97  {
98  *dest++ = c;
99  src++;
100  len--;
101  }
102  else
103  {
104  int l = pg_utf_mblen(src);
105 
106  if (l > len || !pg_utf8_islegal(src, l))
107  {
108  if (noError)
109  break;
110  report_invalid_encoding(PG_UTF8, (const char *) src, len);
111  }
112  if (l != 2)
113  {
114  if (noError)
115  break;
117  (const char *) src, len);
118  }
119  c1 = src[1] & 0x3f;
120  c = ((c & 0x1f) << 6) | c1;
121  if (c >= 0x80 && c <= 0xff)
122  {
123  *dest++ = (unsigned char) c;
124  src += 2;
125  len -= 2;
126  }
127  else
128  {
129  if (noError)
130  break;
132  (const char *) src, len);
133  }
134  }
135  }
136  *dest = '\0';
137 
138  PG_RETURN_INT32(src - start);
139 }
void report_untranslatable_char(int src_encoding, int dest_encoding, const char *mbstr, int len)
Definition: mbutils.c:1730
#define pg_utf_mblen
Definition: pg_wchar.h:633
bool pg_utf8_islegal(const unsigned char *source, int length)
Definition: wchar.c:1953

References CHECK_ENCODING_CONVERSION_ARGS, generate_unaccent_rules::dest, IS_HIGHBIT_SET, len, PG_GETARG_BOOL, PG_GETARG_CSTRING, PG_GETARG_INT32, PG_LATIN1, PG_RETURN_INT32, PG_UTF8, pg_utf8_islegal(), pg_utf_mblen, report_invalid_encoding(), report_untranslatable_char(), and start.

Variable Documentation

◆ PG_MODULE_MAGIC

PG_MODULE_MAGIC

Definition at line 18 of file utf8_and_iso8859_1.c.