PostgreSQL Source Code  git master
euc_tw_and_big5.c File Reference
#include "postgres.h"
#include "fmgr.h"
#include "mb/pg_wchar.h"
Include dependency graph for euc_tw_and_big5.c:

Go to the source code of this file.

Macros

#define ENCODING_GROWTH_RATE   4
 

Functions

 PG_FUNCTION_INFO_V1 (euc_tw_to_big5)
 
 PG_FUNCTION_INFO_V1 (big5_to_euc_tw)
 
 PG_FUNCTION_INFO_V1 (euc_tw_to_mic)
 
 PG_FUNCTION_INFO_V1 (mic_to_euc_tw)
 
 PG_FUNCTION_INFO_V1 (big5_to_mic)
 
 PG_FUNCTION_INFO_V1 (mic_to_big5)
 
static void big52mic (const unsigned char *big5, unsigned char *p, int len)
 
static void mic2big5 (const unsigned char *mic, unsigned char *p, int len)
 
static void euc_tw2mic (const unsigned char *euc, unsigned char *p, int len)
 
static void mic2euc_tw (const unsigned char *mic, unsigned char *p, int len)
 
Datum euc_tw_to_big5 (PG_FUNCTION_ARGS)
 
Datum big5_to_euc_tw (PG_FUNCTION_ARGS)
 
Datum euc_tw_to_mic (PG_FUNCTION_ARGS)
 
Datum mic_to_euc_tw (PG_FUNCTION_ARGS)
 
Datum big5_to_mic (PG_FUNCTION_ARGS)
 
Datum mic_to_big5 (PG_FUNCTION_ARGS)
 

Variables

 PG_MODULE_MAGIC
 

Macro Definition Documentation

◆ ENCODING_GROWTH_RATE

#define ENCODING_GROWTH_RATE   4

Definition at line 18 of file euc_tw_and_big5.c.

Referenced by big5_to_euc_tw(), and euc_tw_to_big5().

Function Documentation

◆ big52mic()

static void big52mic ( const unsigned char *  big5,
unsigned char *  p,
int  len 
)
static

Definition at line 253 of file euc_tw_and_big5.c.

References BIG5toCNS(), IS_HIGHBIT_SET, LC_CNS11643_3, LC_CNS11643_4, LCPRV2_B, PG_BIG5, pg_encoding_verifymb(), PG_MULE_INTERNAL, report_invalid_encoding(), and report_untranslatable_char().

Referenced by big5_to_euc_tw(), and big5_to_mic().

254 {
255  unsigned short c1;
256  unsigned short big5buf,
257  cnsBuf;
258  unsigned char lc;
259  int l;
260 
261  while (len > 0)
262  {
263  c1 = *big5;
264  if (!IS_HIGHBIT_SET(c1))
265  {
266  /* ASCII */
267  if (c1 == 0)
269  (const char *) big5, len);
270  *p++ = c1;
271  big5++;
272  len--;
273  continue;
274  }
275  l = pg_encoding_verifymb(PG_BIG5, (const char *) big5, len);
276  if (l < 0)
278  (const char *) big5, len);
279  big5buf = (c1 << 8) | big5[1];
280  cnsBuf = BIG5toCNS(big5buf, &lc);
281  if (lc != 0)
282  {
283  /* Planes 3 and 4 are MULE private charsets */
284  if (lc == LC_CNS11643_3 || lc == LC_CNS11643_4)
285  *p++ = LCPRV2_B;
286  *p++ = lc; /* Plane No. */
287  *p++ = (cnsBuf >> 8) & 0x00ff;
288  *p++ = cnsBuf & 0x00ff;
289  }
290  else
292  (const char *) big5, len);
293  big5 += l;
294  len -= l;
295  }
296  *p = '\0';
297 }
#define LC_CNS11643_4
Definition: pg_wchar.h:205
int pg_encoding_verifymb(int encoding, const char *mbstr, int len)
Definition: wchar.c:1809
#define IS_HIGHBIT_SET(ch)
Definition: c.h:939
void report_untranslatable_char(int src_encoding, int dest_encoding, const char *mbstr, int len)
Definition: wchar.c:2027
unsigned short BIG5toCNS(unsigned short big5, unsigned char *lc)
Definition: big5.c:292
#define LCPRV2_B
Definition: pg_wchar.h:163
#define LC_CNS11643_3
Definition: pg_wchar.h:204
void report_invalid_encoding(int encoding, const char *mbstr, int len)
Definition: wchar.c:1995

◆ big5_to_euc_tw()

Datum big5_to_euc_tw ( PG_FUNCTION_ARGS  )

Definition at line 64 of file euc_tw_and_big5.c.

References big52mic(), buf, CHECK_ENCODING_CONVERSION_ARGS, generate_unaccent_rules::dest, ENCODING_GROWTH_RATE, mic2euc_tw(), palloc(), pfree(), PG_BIG5, PG_EUC_TW, PG_GETARG_CSTRING, PG_GETARG_INT32, and PG_RETURN_VOID.

65 {
66  unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
67  unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
68  int len = PG_GETARG_INT32(4);
69  unsigned char *buf;
70 
72 
73  buf = palloc(len * ENCODING_GROWTH_RATE + 1);
74  big52mic(src, buf, len);
75  mic2euc_tw(buf, dest, strlen((char *) buf));
76  pfree(buf);
77 
79 }
#define PG_GETARG_INT32(n)
Definition: fmgr.h:234
static void big52mic(const unsigned char *big5, unsigned char *p, int len)
#define ENCODING_GROWTH_RATE
void pfree(void *pointer)
Definition: mcxt.c:949
static char * buf
Definition: pg_test_fsync.c:67
static void mic2euc_tw(const unsigned char *mic, unsigned char *p, int len)
#define PG_RETURN_VOID()
Definition: fmgr.h:309
#define CHECK_ENCODING_CONVERSION_ARGS(srcencoding, destencoding)
Definition: pg_wchar.h:503
void * palloc(Size size)
Definition: mcxt.c:848
#define PG_GETARG_CSTRING(n)
Definition: fmgr.h:242

◆ big5_to_mic()

Datum big5_to_mic ( PG_FUNCTION_ARGS  )

Definition at line 110 of file euc_tw_and_big5.c.

References big52mic(), CHECK_ENCODING_CONVERSION_ARGS, generate_unaccent_rules::dest, PG_BIG5, PG_GETARG_CSTRING, PG_GETARG_INT32, PG_MULE_INTERNAL, and PG_RETURN_VOID.

111 {
112  unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
113  unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
114  int len = PG_GETARG_INT32(4);
115 
117 
118  big52mic(src, dest, len);
119 
120  PG_RETURN_VOID();
121 }
#define PG_GETARG_INT32(n)
Definition: fmgr.h:234
static void big52mic(const unsigned char *big5, unsigned char *p, int len)
#define PG_RETURN_VOID()
Definition: fmgr.h:309
#define CHECK_ENCODING_CONVERSION_ARGS(srcencoding, destencoding)
Definition: pg_wchar.h:503
#define PG_GETARG_CSTRING(n)
Definition: fmgr.h:242

◆ euc_tw2mic()

static void euc_tw2mic ( const unsigned char *  euc,
unsigned char *  p,
int  len 
)
static

Definition at line 141 of file euc_tw_and_big5.c.

References IS_HIGHBIT_SET, LC_CNS11643_1, LC_CNS11643_2, LC_CNS11643_3, LCPRV2_B, pg_encoding_verifymb(), PG_EUC_TW, report_invalid_encoding(), and SS2.

Referenced by euc_tw_to_big5(), and euc_tw_to_mic().

142 {
143  int c1;
144  int l;
145 
146  while (len > 0)
147  {
148  c1 = *euc;
149  if (IS_HIGHBIT_SET(c1))
150  {
151  l = pg_encoding_verifymb(PG_EUC_TW, (const char *) euc, len);
152  if (l < 0)
154  (const char *) euc, len);
155  if (c1 == SS2)
156  {
157  c1 = euc[1]; /* plane No. */
158  if (c1 == 0xa1)
159  *p++ = LC_CNS11643_1;
160  else if (c1 == 0xa2)
161  *p++ = LC_CNS11643_2;
162  else
163  {
164  /* other planes are MULE private charsets */
165  *p++ = LCPRV2_B;
166  *p++ = c1 - 0xa3 + LC_CNS11643_3;
167  }
168  *p++ = euc[2];
169  *p++ = euc[3];
170  }
171  else
172  { /* CNS11643-1 */
173  *p++ = LC_CNS11643_1;
174  *p++ = c1;
175  *p++ = euc[1];
176  }
177  euc += l;
178  len -= l;
179  }
180  else
181  { /* should be ASCII */
182  if (c1 == 0)
184  (const char *) euc, len);
185  *p++ = c1;
186  euc++;
187  len--;
188  }
189  }
190  *p = '\0';
191 }
#define LC_CNS11643_1
Definition: pg_wchar.h:134
int pg_encoding_verifymb(int encoding, const char *mbstr, int len)
Definition: wchar.c:1809
#define IS_HIGHBIT_SET(ch)
Definition: c.h:939
#define LCPRV2_B
Definition: pg_wchar.h:163
#define LC_CNS11643_2
Definition: pg_wchar.h:135
#define LC_CNS11643_3
Definition: pg_wchar.h:204
void report_invalid_encoding(int encoding, const char *mbstr, int len)
Definition: wchar.c:1995
#define SS2
Definition: pg_wchar.h:35

◆ euc_tw_to_big5()

Datum euc_tw_to_big5 ( PG_FUNCTION_ARGS  )

Definition at line 46 of file euc_tw_and_big5.c.

References buf, CHECK_ENCODING_CONVERSION_ARGS, generate_unaccent_rules::dest, ENCODING_GROWTH_RATE, euc_tw2mic(), mic2big5(), palloc(), pfree(), PG_BIG5, PG_EUC_TW, PG_GETARG_CSTRING, PG_GETARG_INT32, and PG_RETURN_VOID.

47 {
48  unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
49  unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
50  int len = PG_GETARG_INT32(4);
51  unsigned char *buf;
52 
54 
55  buf = palloc(len * ENCODING_GROWTH_RATE + 1);
56  euc_tw2mic(src, buf, len);
57  mic2big5(buf, dest, strlen((char *) buf));
58  pfree(buf);
59 
61 }
#define PG_GETARG_INT32(n)
Definition: fmgr.h:234
#define ENCODING_GROWTH_RATE
void pfree(void *pointer)
Definition: mcxt.c:949
static char * buf
Definition: pg_test_fsync.c:67
static void mic2big5(const unsigned char *mic, unsigned char *p, int len)
#define PG_RETURN_VOID()
Definition: fmgr.h:309
#define CHECK_ENCODING_CONVERSION_ARGS(srcencoding, destencoding)
Definition: pg_wchar.h:503
static void euc_tw2mic(const unsigned char *euc, unsigned char *p, int len)
void * palloc(Size size)
Definition: mcxt.c:848
#define PG_GETARG_CSTRING(n)
Definition: fmgr.h:242

◆ euc_tw_to_mic()

Datum euc_tw_to_mic ( PG_FUNCTION_ARGS  )

Definition at line 82 of file euc_tw_and_big5.c.

References CHECK_ENCODING_CONVERSION_ARGS, generate_unaccent_rules::dest, euc_tw2mic(), PG_EUC_TW, PG_GETARG_CSTRING, PG_GETARG_INT32, PG_MULE_INTERNAL, and PG_RETURN_VOID.

83 {
84  unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
85  unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
86  int len = PG_GETARG_INT32(4);
87 
89 
90  euc_tw2mic(src, dest, len);
91 
93 }
#define PG_GETARG_INT32(n)
Definition: fmgr.h:234
#define PG_RETURN_VOID()
Definition: fmgr.h:309
#define CHECK_ENCODING_CONVERSION_ARGS(srcencoding, destencoding)
Definition: pg_wchar.h:503
static void euc_tw2mic(const unsigned char *euc, unsigned char *p, int len)
#define PG_GETARG_CSTRING(n)
Definition: fmgr.h:242

◆ mic2big5()

static void mic2big5 ( const unsigned char *  mic,
unsigned char *  p,
int  len 
)
static

Definition at line 303 of file euc_tw_and_big5.c.

References CNStoBIG5(), IS_HIGHBIT_SET, LC_CNS11643_1, LC_CNS11643_2, LCPRV2_B, PG_BIG5, pg_encoding_verifymb(), PG_MULE_INTERNAL, report_invalid_encoding(), and report_untranslatable_char().

Referenced by euc_tw_to_big5(), and mic_to_big5().

304 {
305  unsigned short c1;
306  unsigned short big5buf,
307  cnsBuf;
308  int l;
309 
310  while (len > 0)
311  {
312  c1 = *mic;
313  if (!IS_HIGHBIT_SET(c1))
314  {
315  /* ASCII */
316  if (c1 == 0)
318  (const char *) mic, len);
319  *p++ = c1;
320  mic++;
321  len--;
322  continue;
323  }
324  l = pg_encoding_verifymb(PG_MULE_INTERNAL, (const char *) mic, len);
325  if (l < 0)
327  (const char *) mic, len);
328  if (c1 == LC_CNS11643_1 || c1 == LC_CNS11643_2 || c1 == LCPRV2_B)
329  {
330  if (c1 == LCPRV2_B)
331  {
332  c1 = mic[1]; /* get plane no. */
333  cnsBuf = (mic[2] << 8) | mic[3];
334  }
335  else
336  {
337  cnsBuf = (mic[1] << 8) | mic[2];
338  }
339  big5buf = CNStoBIG5(cnsBuf, c1);
340  if (big5buf == 0)
342  (const char *) mic, len);
343  *p++ = (big5buf >> 8) & 0x00ff;
344  *p++ = big5buf & 0x00ff;
345  }
346  else
348  (const char *) mic, len);
349  mic += l;
350  len -= l;
351  }
352  *p = '\0';
353 }
#define LC_CNS11643_1
Definition: pg_wchar.h:134
int pg_encoding_verifymb(int encoding, const char *mbstr, int len)
Definition: wchar.c:1809
#define IS_HIGHBIT_SET(ch)
Definition: c.h:939
void report_untranslatable_char(int src_encoding, int dest_encoding, const char *mbstr, int len)
Definition: wchar.c:2027
unsigned short CNStoBIG5(unsigned short cns, unsigned char lc)
Definition: big5.c:345
#define LCPRV2_B
Definition: pg_wchar.h:163
#define LC_CNS11643_2
Definition: pg_wchar.h:135
void report_invalid_encoding(int encoding, const char *mbstr, int len)
Definition: wchar.c:1995

◆ mic2euc_tw()

static void mic2euc_tw ( const unsigned char *  mic,
unsigned char *  p,
int  len 
)
static

Definition at line 197 of file euc_tw_and_big5.c.

References IS_HIGHBIT_SET, LC_CNS11643_1, LC_CNS11643_2, LC_CNS11643_3, LC_CNS11643_7, LCPRV2_B, pg_encoding_verifymb(), PG_EUC_TW, PG_MULE_INTERNAL, report_invalid_encoding(), report_untranslatable_char(), and SS2.

Referenced by big5_to_euc_tw(), and mic_to_euc_tw().

198 {
199  int c1;
200  int l;
201 
202  while (len > 0)
203  {
204  c1 = *mic;
205  if (!IS_HIGHBIT_SET(c1))
206  {
207  /* ASCII */
208  if (c1 == 0)
210  (const char *) mic, len);
211  *p++ = c1;
212  mic++;
213  len--;
214  continue;
215  }
216  l = pg_encoding_verifymb(PG_MULE_INTERNAL, (const char *) mic, len);
217  if (l < 0)
219  (const char *) mic, len);
220  if (c1 == LC_CNS11643_1)
221  {
222  *p++ = mic[1];
223  *p++ = mic[2];
224  }
225  else if (c1 == LC_CNS11643_2)
226  {
227  *p++ = SS2;
228  *p++ = 0xa2;
229  *p++ = mic[1];
230  *p++ = mic[2];
231  }
232  else if (c1 == LCPRV2_B &&
233  mic[1] >= LC_CNS11643_3 && mic[1] <= LC_CNS11643_7)
234  {
235  *p++ = SS2;
236  *p++ = mic[1] - LC_CNS11643_3 + 0xa3;
237  *p++ = mic[2];
238  *p++ = mic[3];
239  }
240  else
242  (const char *) mic, len);
243  mic += l;
244  len -= l;
245  }
246  *p = '\0';
247 }
#define LC_CNS11643_1
Definition: pg_wchar.h:134
int pg_encoding_verifymb(int encoding, const char *mbstr, int len)
Definition: wchar.c:1809
#define IS_HIGHBIT_SET(ch)
Definition: c.h:939
#define LC_CNS11643_7
Definition: pg_wchar.h:208
void report_untranslatable_char(int src_encoding, int dest_encoding, const char *mbstr, int len)
Definition: wchar.c:2027
#define LCPRV2_B
Definition: pg_wchar.h:163
#define LC_CNS11643_2
Definition: pg_wchar.h:135
#define LC_CNS11643_3
Definition: pg_wchar.h:204
void report_invalid_encoding(int encoding, const char *mbstr, int len)
Definition: wchar.c:1995
#define SS2
Definition: pg_wchar.h:35

◆ mic_to_big5()

Datum mic_to_big5 ( PG_FUNCTION_ARGS  )

Definition at line 124 of file euc_tw_and_big5.c.

References CHECK_ENCODING_CONVERSION_ARGS, generate_unaccent_rules::dest, mic2big5(), PG_BIG5, PG_GETARG_CSTRING, PG_GETARG_INT32, PG_MULE_INTERNAL, and PG_RETURN_VOID.

125 {
126  unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
127  unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
128  int len = PG_GETARG_INT32(4);
129 
131 
132  mic2big5(src, dest, len);
133 
134  PG_RETURN_VOID();
135 }
#define PG_GETARG_INT32(n)
Definition: fmgr.h:234
static void mic2big5(const unsigned char *mic, unsigned char *p, int len)
#define PG_RETURN_VOID()
Definition: fmgr.h:309
#define CHECK_ENCODING_CONVERSION_ARGS(srcencoding, destencoding)
Definition: pg_wchar.h:503
#define PG_GETARG_CSTRING(n)
Definition: fmgr.h:242

◆ mic_to_euc_tw()

Datum mic_to_euc_tw ( PG_FUNCTION_ARGS  )

Definition at line 96 of file euc_tw_and_big5.c.

References CHECK_ENCODING_CONVERSION_ARGS, generate_unaccent_rules::dest, mic2euc_tw(), PG_EUC_TW, PG_GETARG_CSTRING, PG_GETARG_INT32, PG_MULE_INTERNAL, and PG_RETURN_VOID.

97 {
98  unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
99  unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
100  int len = PG_GETARG_INT32(4);
101 
103 
104  mic2euc_tw(src, dest, len);
105 
106  PG_RETURN_VOID();
107 }
#define PG_GETARG_INT32(n)
Definition: fmgr.h:234
static void mic2euc_tw(const unsigned char *mic, unsigned char *p, int len)
#define PG_RETURN_VOID()
Definition: fmgr.h:309
#define CHECK_ENCODING_CONVERSION_ARGS(srcencoding, destencoding)
Definition: pg_wchar.h:503
#define PG_GETARG_CSTRING(n)
Definition: fmgr.h:242

◆ PG_FUNCTION_INFO_V1() [1/6]

PG_FUNCTION_INFO_V1 ( euc_tw_to_big5  )

◆ PG_FUNCTION_INFO_V1() [2/6]

PG_FUNCTION_INFO_V1 ( big5_to_euc_tw  )

◆ PG_FUNCTION_INFO_V1() [3/6]

PG_FUNCTION_INFO_V1 ( euc_tw_to_mic  )

◆ PG_FUNCTION_INFO_V1() [4/6]

PG_FUNCTION_INFO_V1 ( mic_to_euc_tw  )

◆ PG_FUNCTION_INFO_V1() [5/6]

PG_FUNCTION_INFO_V1 ( big5_to_mic  )

◆ PG_FUNCTION_INFO_V1() [6/6]

PG_FUNCTION_INFO_V1 ( mic_to_big5  )

Variable Documentation

◆ PG_MODULE_MAGIC

PG_MODULE_MAGIC

Definition at line 20 of file euc_tw_and_big5.c.