PostgreSQL Source Code git master
All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
utf8_and_gb18030.c File Reference
#include "postgres.h"
#include "fmgr.h"
#include "mb/pg_wchar.h"
#include "../../Unicode/gb18030_to_utf8.map"
#include "../../Unicode/utf8_to_gb18030.map"
Include dependency graph for utf8_and_gb18030.c:

Go to the source code of this file.

Macros

#define conv18030(minunicode, mincode, maxcode)
 
#define convutf8(minunicode, maxunicode, mincode)
 

Functions

 PG_FUNCTION_INFO_V1 (gb18030_to_utf8)
 
 PG_FUNCTION_INFO_V1 (utf8_to_gb18030)
 
static uint32 gb_linear (uint32 gb)
 
static uint32 gb_unlinear (uint32 lin)
 
static uint32 unicode_to_utf8word (uint32 c)
 
static uint32 utf8word_to_unicode (uint32 c)
 
static uint32 conv_18030_to_utf8 (uint32 code)
 
static uint32 conv_utf8_to_18030 (uint32 code)
 
Datum gb18030_to_utf8 (PG_FUNCTION_ARGS)
 
Datum utf8_to_gb18030 (PG_FUNCTION_ARGS)
 

Variables

 PG_MODULE_MAGIC
 

Macro Definition Documentation

◆ conv18030

#define conv18030 (   minunicode,
  mincode,
  maxcode 
)
Value:
if (code >= mincode && code <= maxcode) \
return unicode_to_utf8word(gb_linear(code) - gb_linear(mincode) + minunicode)
static uint32 unicode_to_utf8word(uint32 c)
static uint32 gb_linear(uint32 gb)

◆ convutf8

#define convutf8 (   minunicode,
  maxunicode,
  mincode 
)
Value:
if (ucs >= minunicode && ucs <= maxunicode) \
return gb_unlinear(ucs - minunicode + gb_linear(mincode))
static uint32 gb_unlinear(uint32 lin)

Function Documentation

◆ conv_18030_to_utf8()

static uint32 conv_18030_to_utf8 ( uint32  code)
static

Definition at line 128 of file utf8_and_gb18030.c.

129{
130#define conv18030(minunicode, mincode, maxcode) \
131 if (code >= mincode && code <= maxcode) \
132 return unicode_to_utf8word(gb_linear(code) - gb_linear(mincode) + minunicode)
133
134 conv18030(0x0452, 0x8130D330, 0x8136A531);
135 conv18030(0x2643, 0x8137A839, 0x8138FD38);
136 conv18030(0x361B, 0x8230A633, 0x8230F237);
137 conv18030(0x3CE1, 0x8231D438, 0x8232AF32);
138 conv18030(0x4160, 0x8232C937, 0x8232F837);
139 conv18030(0x44D7, 0x8233A339, 0x8233C931);
140 conv18030(0x478E, 0x8233E838, 0x82349638);
141 conv18030(0x49B8, 0x8234A131, 0x8234E733);
142 conv18030(0x9FA6, 0x82358F33, 0x8336C738);
143 conv18030(0xE865, 0x8336D030, 0x84308534);
144 conv18030(0xFA2A, 0x84309C38, 0x84318537);
145 conv18030(0xFFE6, 0x8431A234, 0x8431A439);
146 conv18030(0x10000, 0x90308130, 0xE3329A35);
147 /* No mapping exists */
148 return 0;
149}
#define conv18030(minunicode, mincode, maxcode)

References conv18030.

Referenced by gb18030_to_utf8().

◆ conv_utf8_to_18030()

static uint32 conv_utf8_to_18030 ( uint32  code)
static

Definition at line 155 of file utf8_and_gb18030.c.

156{
157 uint32 ucs = utf8word_to_unicode(code);
158
159#define convutf8(minunicode, maxunicode, mincode) \
160 if (ucs >= minunicode && ucs <= maxunicode) \
161 return gb_unlinear(ucs - minunicode + gb_linear(mincode))
162
163 convutf8(0x0452, 0x200F, 0x8130D330);
164 convutf8(0x2643, 0x2E80, 0x8137A839);
165 convutf8(0x361B, 0x3917, 0x8230A633);
166 convutf8(0x3CE1, 0x4055, 0x8231D438);
167 convutf8(0x4160, 0x4336, 0x8232C937);
168 convutf8(0x44D7, 0x464B, 0x8233A339);
169 convutf8(0x478E, 0x4946, 0x8233E838);
170 convutf8(0x49B8, 0x4C76, 0x8234A131);
171 convutf8(0x9FA6, 0xD7FF, 0x82358F33);
172 convutf8(0xE865, 0xF92B, 0x8336D030);
173 convutf8(0xFA2A, 0xFE2F, 0x84309C38);
174 convutf8(0xFFE6, 0xFFFF, 0x8431A234);
175 convutf8(0x10000, 0x10FFFF, 0x90308130);
176 /* No mapping exists */
177 return 0;
178}
uint32_t uint32
Definition: c.h:488
static uint32 utf8word_to_unicode(uint32 c)
#define convutf8(minunicode, maxunicode, mincode)

References convutf8, and utf8word_to_unicode().

Referenced by utf8_to_gb18030().

◆ gb18030_to_utf8()

Datum gb18030_to_utf8 ( PG_FUNCTION_ARGS  )

Definition at line 194 of file utf8_and_gb18030.c.

195{
196 unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
197 unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
198 int len = PG_GETARG_INT32(4);
199 bool noError = PG_GETARG_BOOL(5);
200 int converted;
201
203
204 converted = LocalToUtf(src, len, dest,
205 &gb18030_to_unicode_tree,
206 NULL, 0,
209 noError);
210
211 PG_RETURN_INT32(converted);
212}
int LocalToUtf(const unsigned char *iso, int len, unsigned char *utf, const pg_mb_radix_tree *map, const pg_local_to_utf_combined *cmap, int cmapsize, utf_local_conversion_func conv_func, int encoding, bool noError)
Definition: conv.c:717
#define PG_GETARG_CSTRING(n)
Definition: fmgr.h:277
#define PG_RETURN_INT32(x)
Definition: fmgr.h:354
#define PG_GETARG_INT32(n)
Definition: fmgr.h:269
#define PG_GETARG_BOOL(n)
Definition: fmgr.h:274
const void size_t len
@ PG_GB18030
Definition: pg_wchar.h:268
@ PG_UTF8
Definition: pg_wchar.h:232
#define CHECK_ENCODING_CONVERSION_ARGS(srcencoding, destencoding)
Definition: pg_wchar.h:507
static uint32 conv_18030_to_utf8(uint32 code)

References CHECK_ENCODING_CONVERSION_ARGS, conv_18030_to_utf8(), generate_unaccent_rules::dest, len, LocalToUtf(), PG_GB18030, PG_GETARG_BOOL, PG_GETARG_CSTRING, PG_GETARG_INT32, PG_RETURN_INT32, and PG_UTF8.

◆ gb_linear()

static uint32 gb_linear ( uint32  gb)
inlinestatic

Definition at line 32 of file utf8_and_gb18030.c.

33{
34 uint32 b0 = (gb & 0xff000000) >> 24;
35 uint32 b1 = (gb & 0x00ff0000) >> 16;
36 uint32 b2 = (gb & 0x0000ff00) >> 8;
37 uint32 b3 = (gb & 0x000000ff);
38
39 return b0 * 12600 + b1 * 1260 + b2 * 10 + b3 -
40 (0x81 * 12600 + 0x30 * 1260 + 0x81 * 10 + 0x30);
41}

◆ gb_unlinear()

static uint32 gb_unlinear ( uint32  lin)
inlinestatic

Definition at line 44 of file utf8_and_gb18030.c.

45{
46 uint32 r0 = 0x81 + lin / 12600;
47 uint32 r1 = 0x30 + (lin / 1260) % 10;
48 uint32 r2 = 0x81 + (lin / 10) % 126;
49 uint32 r3 = 0x30 + lin % 10;
50
51 return (r0 << 24) | (r1 << 16) | (r2 << 8) | r3;
52}

◆ PG_FUNCTION_INFO_V1() [1/2]

PG_FUNCTION_INFO_V1 ( gb18030_to_utf8  )

◆ PG_FUNCTION_INFO_V1() [2/2]

PG_FUNCTION_INFO_V1 ( utf8_to_gb18030  )

◆ unicode_to_utf8word()

static uint32 unicode_to_utf8word ( uint32  c)
inlinestatic

Definition at line 60 of file utf8_and_gb18030.c.

61{
63
64 if (c <= 0x7F)
65 {
66 word = c;
67 }
68 else if (c <= 0x7FF)
69 {
70 word = (0xC0 | ((c >> 6) & 0x1F)) << 8;
71 word |= 0x80 | (c & 0x3F);
72 }
73 else if (c <= 0xFFFF)
74 {
75 word = (0xE0 | ((c >> 12) & 0x0F)) << 16;
76 word |= (0x80 | ((c >> 6) & 0x3F)) << 8;
77 word |= 0x80 | (c & 0x3F);
78 }
79 else
80 {
81 word = (0xF0 | ((c >> 18) & 0x07)) << 24;
82 word |= (0x80 | ((c >> 12) & 0x3F)) << 16;
83 word |= (0x80 | ((c >> 6) & 0x3F)) << 8;
84 word |= 0x80 | (c & 0x3F);
85 }
86
87 return word;
88}
char * c
static void word(struct vars *v, int dir, struct state *lp, struct state *rp)
Definition: regcomp.c:1476

References word().

◆ utf8_to_gb18030()

Datum utf8_to_gb18030 ( PG_FUNCTION_ARGS  )

Definition at line 215 of file utf8_and_gb18030.c.

216{
217 unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
218 unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
219 int len = PG_GETARG_INT32(4);
220 bool noError = PG_GETARG_BOOL(5);
221 int converted;
222
224
225 converted = UtfToLocal(src, len, dest,
226 &gb18030_from_unicode_tree,
227 NULL, 0,
230 noError);
231
232 PG_RETURN_INT32(converted);
233}
int UtfToLocal(const unsigned char *utf, int len, unsigned char *iso, const pg_mb_radix_tree *map, const pg_utf_to_local_combined *cmap, int cmapsize, utf_local_conversion_func conv_func, int encoding, bool noError)
Definition: conv.c:507
static uint32 conv_utf8_to_18030(uint32 code)

References CHECK_ENCODING_CONVERSION_ARGS, conv_utf8_to_18030(), generate_unaccent_rules::dest, len, PG_GB18030, PG_GETARG_BOOL, PG_GETARG_CSTRING, PG_GETARG_INT32, PG_RETURN_INT32, PG_UTF8, and UtfToLocal().

◆ utf8word_to_unicode()

static uint32 utf8word_to_unicode ( uint32  c)
inlinestatic

Definition at line 91 of file utf8_and_gb18030.c.

92{
93 uint32 ucs;
94
95 if (c <= 0x7F)
96 {
97 ucs = c;
98 }
99 else if (c <= 0xFFFF)
100 {
101 ucs = ((c >> 8) & 0x1F) << 6;
102 ucs |= c & 0x3F;
103 }
104 else if (c <= 0xFFFFFF)
105 {
106 ucs = ((c >> 16) & 0x0F) << 12;
107 ucs |= ((c >> 8) & 0x3F) << 6;
108 ucs |= c & 0x3F;
109 }
110 else
111 {
112 ucs = ((c >> 24) & 0x07) << 18;
113 ucs |= ((c >> 16) & 0x3F) << 12;
114 ucs |= ((c >> 8) & 0x3F) << 6;
115 ucs |= c & 0x3F;
116 }
117
118 return ucs;
119}

Referenced by conv_utf8_to_18030().

Variable Documentation

◆ PG_MODULE_MAGIC

PG_MODULE_MAGIC

Definition at line 20 of file utf8_and_gb18030.c.