PostgreSQL Source Code  git master
euc_tw_and_big5.c File Reference
#include "postgres.h"
#include "fmgr.h"
#include "mb/pg_wchar.h"
Include dependency graph for euc_tw_and_big5.c:

Go to the source code of this file.

Macros

#define ENCODING_GROWTH_RATE   4
 

Functions

 PG_FUNCTION_INFO_V1 (euc_tw_to_big5)
 
 PG_FUNCTION_INFO_V1 (big5_to_euc_tw)
 
 PG_FUNCTION_INFO_V1 (euc_tw_to_mic)
 
 PG_FUNCTION_INFO_V1 (mic_to_euc_tw)
 
 PG_FUNCTION_INFO_V1 (big5_to_mic)
 
 PG_FUNCTION_INFO_V1 (mic_to_big5)
 
static int euc_tw2big5 (const unsigned char *euc, unsigned char *p, int len, bool noError)
 
static int big52euc_tw (const unsigned char *big5, unsigned char *p, int len, bool noError)
 
static int big52mic (const unsigned char *big5, unsigned char *p, int len, bool noError)
 
static int mic2big5 (const unsigned char *mic, unsigned char *p, int len, bool noError)
 
static int euc_tw2mic (const unsigned char *euc, unsigned char *p, int len, bool noError)
 
static int mic2euc_tw (const unsigned char *mic, unsigned char *p, int len, bool noError)
 
Datum euc_tw_to_big5 (PG_FUNCTION_ARGS)
 
Datum big5_to_euc_tw (PG_FUNCTION_ARGS)
 
Datum euc_tw_to_mic (PG_FUNCTION_ARGS)
 
Datum mic_to_euc_tw (PG_FUNCTION_ARGS)
 
Datum big5_to_mic (PG_FUNCTION_ARGS)
 
Datum mic_to_big5 (PG_FUNCTION_ARGS)
 

Variables

 PG_MODULE_MAGIC
 

Macro Definition Documentation

◆ ENCODING_GROWTH_RATE

#define ENCODING_GROWTH_RATE   4

Definition at line 18 of file euc_tw_and_big5.c.

Function Documentation

◆ big52euc_tw()

static int big52euc_tw ( const unsigned char *  big5,
unsigned char *  p,
int  len,
bool  noError 
)
static

Definition at line 229 of file euc_tw_and_big5.c.

230 {
231  const unsigned char *start = big5;
232  unsigned short c1;
233  unsigned short big5buf,
234  cnsBuf;
235  unsigned char lc;
236  int l;
237 
238  while (len > 0)
239  {
240  /* Verify and decode the next Big5 input character */
241  c1 = *big5;
242  if (IS_HIGHBIT_SET(c1))
243  {
244  l = pg_encoding_verifymbchar(PG_BIG5, (const char *) big5, len);
245  if (l < 0)
246  {
247  if (noError)
248  break;
250  (const char *) big5, len);
251  }
252  big5buf = (c1 << 8) | big5[1];
253  cnsBuf = BIG5toCNS(big5buf, &lc);
254 
255  if (lc == LC_CNS11643_1)
256  {
257  *p++ = (cnsBuf >> 8) & 0x00ff;
258  *p++ = cnsBuf & 0x00ff;
259  }
260  else if (lc == LC_CNS11643_2)
261  {
262  *p++ = SS2;
263  *p++ = 0xa2;
264  *p++ = (cnsBuf >> 8) & 0x00ff;
265  *p++ = cnsBuf & 0x00ff;
266  }
267  else if (lc >= LC_CNS11643_3 && lc <= LC_CNS11643_7)
268  {
269  *p++ = SS2;
270  *p++ = lc - LC_CNS11643_3 + 0xa3;
271  *p++ = (cnsBuf >> 8) & 0x00ff;
272  *p++ = cnsBuf & 0x00ff;
273  }
274  else
275  {
276  if (noError)
277  break;
279  (const char *) big5, len);
280  }
281 
282  big5 += l;
283  len -= l;
284  }
285  else
286  {
287  /* ASCII */
288  if (c1 == 0)
289  {
290  if (noError)
291  break;
293  (const char *) big5, len);
294  }
295  *p++ = c1;
296  big5++;
297  len--;
298  continue;
299  }
300  }
301  *p = '\0';
302 
303  return big5 - start;
304 }
unsigned short BIG5toCNS(unsigned short big5, unsigned char *lc)
Definition: big5.c:292
#define IS_HIGHBIT_SET(ch)
Definition: c.h:1168
void report_untranslatable_char(int src_encoding, int dest_encoding, const char *mbstr, int len)
Definition: mbutils.c:1737
void report_invalid_encoding(int encoding, const char *mbstr, int len)
Definition: mbutils.c:1705
const void size_t len
#define LC_CNS11643_7
Definition: pg_wchar.h:198
@ PG_EUC_TW
Definition: pg_wchar.h:233
@ PG_BIG5
Definition: pg_wchar.h:268
#define LC_CNS11643_3
Definition: pg_wchar.h:194
#define SS2
Definition: pg_wchar.h:40
#define LC_CNS11643_1
Definition: pg_wchar.h:139
#define LC_CNS11643_2
Definition: pg_wchar.h:140
int pg_encoding_verifymbchar(int encoding, const char *mbstr, int len)
Definition: wchar.c:2164

References BIG5toCNS(), IS_HIGHBIT_SET, LC_CNS11643_1, LC_CNS11643_2, LC_CNS11643_3, LC_CNS11643_7, len, PG_BIG5, pg_encoding_verifymbchar(), PG_EUC_TW, report_invalid_encoding(), report_untranslatable_char(), and SS2.

Referenced by big5_to_euc_tw().

◆ big52mic()

static int big52mic ( const unsigned char *  big5,
unsigned char *  p,
int  len,
bool  noError 
)
static

Definition at line 448 of file euc_tw_and_big5.c.

449 {
450  const unsigned char *start = big5;
451  unsigned short c1;
452  unsigned short big5buf,
453  cnsBuf;
454  unsigned char lc;
455  int l;
456 
457  while (len > 0)
458  {
459  c1 = *big5;
460  if (!IS_HIGHBIT_SET(c1))
461  {
462  /* ASCII */
463  if (c1 == 0)
464  {
465  if (noError)
466  break;
468  (const char *) big5, len);
469  }
470  *p++ = c1;
471  big5++;
472  len--;
473  continue;
474  }
475  l = pg_encoding_verifymbchar(PG_BIG5, (const char *) big5, len);
476  if (l < 0)
477  {
478  if (noError)
479  break;
481  (const char *) big5, len);
482  }
483  big5buf = (c1 << 8) | big5[1];
484  cnsBuf = BIG5toCNS(big5buf, &lc);
485  if (lc != 0)
486  {
487  /* Planes 3 and 4 are MULE private charsets */
488  if (lc == LC_CNS11643_3 || lc == LC_CNS11643_4)
489  *p++ = LCPRV2_B;
490  *p++ = lc; /* Plane No. */
491  *p++ = (cnsBuf >> 8) & 0x00ff;
492  *p++ = cnsBuf & 0x00ff;
493  }
494  else
495  {
496  if (noError)
497  break;
499  (const char *) big5, len);
500  }
501  big5 += l;
502  len -= l;
503  }
504  *p = '\0';
505 
506  return big5 - start;
507 }
@ PG_MULE_INTERNAL
Definition: pg_wchar.h:236
#define LC_CNS11643_4
Definition: pg_wchar.h:195
#define LCPRV2_B
Definition: pg_wchar.h:165

References BIG5toCNS(), IS_HIGHBIT_SET, LC_CNS11643_3, LC_CNS11643_4, LCPRV2_B, len, PG_BIG5, pg_encoding_verifymbchar(), PG_MULE_INTERNAL, report_invalid_encoding(), and report_untranslatable_char().

Referenced by big5_to_mic().

◆ big5_to_euc_tw()

Datum big5_to_euc_tw ( PG_FUNCTION_ARGS  )

Definition at line 67 of file euc_tw_and_big5.c.

68 {
69  unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
70  unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
71  int len = PG_GETARG_INT32(4);
72  bool noError = PG_GETARG_BOOL(5);
73  int converted;
74 
76 
77  converted = big52euc_tw(src, dest, len, noError);
78 
79  PG_RETURN_INT32(converted);
80 }
static int big52euc_tw(const unsigned char *big5, unsigned char *p, int len, bool noError)
#define PG_GETARG_CSTRING(n)
Definition: fmgr.h:277
#define PG_RETURN_INT32(x)
Definition: fmgr.h:354
#define PG_GETARG_INT32(n)
Definition: fmgr.h:269
#define PG_GETARG_BOOL(n)
Definition: fmgr.h:274
#define CHECK_ENCODING_CONVERSION_ARGS(srcencoding, destencoding)
Definition: pg_wchar.h:516

References big52euc_tw(), CHECK_ENCODING_CONVERSION_ARGS, generate_unaccent_rules::dest, len, PG_BIG5, PG_EUC_TW, PG_GETARG_BOOL, PG_GETARG_CSTRING, PG_GETARG_INT32, and PG_RETURN_INT32.

◆ big5_to_mic()

Datum big5_to_mic ( PG_FUNCTION_ARGS  )

Definition at line 115 of file euc_tw_and_big5.c.

116 {
117  unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
118  unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
119  int len = PG_GETARG_INT32(4);
120  bool noError = PG_GETARG_BOOL(5);
121  int converted;
122 
124 
125  converted = big52mic(src, dest, len, noError);
126 
127  PG_RETURN_INT32(converted);
128 }
static int big52mic(const unsigned char *big5, unsigned char *p, int len, bool noError)

References big52mic(), CHECK_ENCODING_CONVERSION_ARGS, generate_unaccent_rules::dest, len, PG_BIG5, PG_GETARG_BOOL, PG_GETARG_CSTRING, PG_GETARG_INT32, PG_MULE_INTERNAL, and PG_RETURN_INT32.

◆ euc_tw2big5()

static int euc_tw2big5 ( const unsigned char *  euc,
unsigned char *  p,
int  len,
bool  noError 
)
static

Definition at line 151 of file euc_tw_and_big5.c.

152 {
153  const unsigned char *start = euc;
154  unsigned char c1;
155  unsigned short big5buf,
156  cnsBuf;
157  unsigned char lc;
158  int l;
159 
160  while (len > 0)
161  {
162  c1 = *euc;
163  if (IS_HIGHBIT_SET(c1))
164  {
165  /* Verify and decode the next EUC_TW input character */
166  l = pg_encoding_verifymbchar(PG_EUC_TW, (const char *) euc, len);
167  if (l < 0)
168  {
169  if (noError)
170  break;
172  (const char *) euc, len);
173  }
174  if (c1 == SS2)
175  {
176  c1 = euc[1]; /* plane No. */
177  if (c1 == 0xa1)
178  lc = LC_CNS11643_1;
179  else if (c1 == 0xa2)
180  lc = LC_CNS11643_2;
181  else
182  lc = c1 - 0xa3 + LC_CNS11643_3;
183  cnsBuf = (euc[2] << 8) | euc[3];
184  }
185  else
186  { /* CNS11643-1 */
187  lc = LC_CNS11643_1;
188  cnsBuf = (c1 << 8) | euc[1];
189  }
190 
191  /* Write it out in Big5 */
192  big5buf = CNStoBIG5(cnsBuf, lc);
193  if (big5buf == 0)
194  {
195  if (noError)
196  break;
198  (const char *) euc, len);
199  }
200  *p++ = (big5buf >> 8) & 0x00ff;
201  *p++ = big5buf & 0x00ff;
202 
203  euc += l;
204  len -= l;
205  }
206  else
207  { /* should be ASCII */
208  if (c1 == 0)
209  {
210  if (noError)
211  break;
213  (const char *) euc, len);
214  }
215  *p++ = c1;
216  euc++;
217  len--;
218  }
219  }
220  *p = '\0';
221 
222  return euc - start;
223 }
unsigned short CNStoBIG5(unsigned short cns, unsigned char lc)
Definition: big5.c:345

References CNStoBIG5(), IS_HIGHBIT_SET, LC_CNS11643_1, LC_CNS11643_2, LC_CNS11643_3, len, PG_BIG5, pg_encoding_verifymbchar(), PG_EUC_TW, report_invalid_encoding(), report_untranslatable_char(), and SS2.

Referenced by euc_tw_to_big5().

◆ euc_tw2mic()

static int euc_tw2mic ( const unsigned char *  euc,
unsigned char *  p,
int  len,
bool  noError 
)
static

Definition at line 310 of file euc_tw_and_big5.c.

311 {
312  const unsigned char *start = euc;
313  int c1;
314  int l;
315 
316  while (len > 0)
317  {
318  c1 = *euc;
319  if (IS_HIGHBIT_SET(c1))
320  {
321  l = pg_encoding_verifymbchar(PG_EUC_TW, (const char *) euc, len);
322  if (l < 0)
323  {
324  if (noError)
325  break;
327  (const char *) euc, len);
328  }
329  if (c1 == SS2)
330  {
331  c1 = euc[1]; /* plane No. */
332  if (c1 == 0xa1)
333  *p++ = LC_CNS11643_1;
334  else if (c1 == 0xa2)
335  *p++ = LC_CNS11643_2;
336  else
337  {
338  /* other planes are MULE private charsets */
339  *p++ = LCPRV2_B;
340  *p++ = c1 - 0xa3 + LC_CNS11643_3;
341  }
342  *p++ = euc[2];
343  *p++ = euc[3];
344  }
345  else
346  { /* CNS11643-1 */
347  *p++ = LC_CNS11643_1;
348  *p++ = c1;
349  *p++ = euc[1];
350  }
351  euc += l;
352  len -= l;
353  }
354  else
355  { /* should be ASCII */
356  if (c1 == 0)
357  {
358  if (noError)
359  break;
361  (const char *) euc, len);
362  }
363  *p++ = c1;
364  euc++;
365  len--;
366  }
367  }
368  *p = '\0';
369 
370  return euc - start;
371 }

References IS_HIGHBIT_SET, LC_CNS11643_1, LC_CNS11643_2, LC_CNS11643_3, LCPRV2_B, len, pg_encoding_verifymbchar(), PG_EUC_TW, report_invalid_encoding(), and SS2.

Referenced by euc_tw_to_mic().

◆ euc_tw_to_big5()

Datum euc_tw_to_big5 ( PG_FUNCTION_ARGS  )

Definition at line 51 of file euc_tw_and_big5.c.

52 {
53  unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
54  unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
55  int len = PG_GETARG_INT32(4);
56  bool noError = PG_GETARG_BOOL(5);
57  int converted;
58 
60 
61  converted = euc_tw2big5(src, dest, len, noError);
62 
63  PG_RETURN_INT32(converted);
64 }
static int euc_tw2big5(const unsigned char *euc, unsigned char *p, int len, bool noError)

References CHECK_ENCODING_CONVERSION_ARGS, generate_unaccent_rules::dest, euc_tw2big5(), len, PG_BIG5, PG_EUC_TW, PG_GETARG_BOOL, PG_GETARG_CSTRING, PG_GETARG_INT32, and PG_RETURN_INT32.

◆ euc_tw_to_mic()

Datum euc_tw_to_mic ( PG_FUNCTION_ARGS  )

Definition at line 83 of file euc_tw_and_big5.c.

84 {
85  unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
86  unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
87  int len = PG_GETARG_INT32(4);
88  bool noError = PG_GETARG_BOOL(5);
89  int converted;
90 
92 
93  converted = euc_tw2mic(src, dest, len, noError);
94 
95  PG_RETURN_INT32(converted);
96 }
static int euc_tw2mic(const unsigned char *euc, unsigned char *p, int len, bool noError)

References CHECK_ENCODING_CONVERSION_ARGS, generate_unaccent_rules::dest, euc_tw2mic(), len, PG_EUC_TW, PG_GETARG_BOOL, PG_GETARG_CSTRING, PG_GETARG_INT32, PG_MULE_INTERNAL, and PG_RETURN_INT32.

◆ mic2big5()

static int mic2big5 ( const unsigned char *  mic,
unsigned char *  p,
int  len,
bool  noError 
)
static

Definition at line 513 of file euc_tw_and_big5.c.

514 {
515  const unsigned char *start = mic;
516  unsigned short c1;
517  unsigned short big5buf,
518  cnsBuf;
519  int l;
520 
521  while (len > 0)
522  {
523  c1 = *mic;
524  if (!IS_HIGHBIT_SET(c1))
525  {
526  /* ASCII */
527  if (c1 == 0)
528  {
529  if (noError)
530  break;
532  (const char *) mic, len);
533  }
534  *p++ = c1;
535  mic++;
536  len--;
537  continue;
538  }
539  l = pg_encoding_verifymbchar(PG_MULE_INTERNAL, (const char *) mic, len);
540  if (l < 0)
541  {
542  if (noError)
543  break;
545  (const char *) mic, len);
546  }
547  if (c1 == LC_CNS11643_1 || c1 == LC_CNS11643_2 || c1 == LCPRV2_B)
548  {
549  if (c1 == LCPRV2_B)
550  {
551  c1 = mic[1]; /* get plane no. */
552  cnsBuf = (mic[2] << 8) | mic[3];
553  }
554  else
555  {
556  cnsBuf = (mic[1] << 8) | mic[2];
557  }
558  big5buf = CNStoBIG5(cnsBuf, c1);
559  if (big5buf == 0)
560  {
561  if (noError)
562  break;
564  (const char *) mic, len);
565  }
566  *p++ = (big5buf >> 8) & 0x00ff;
567  *p++ = big5buf & 0x00ff;
568  }
569  else
570  {
571  if (noError)
572  break;
574  (const char *) mic, len);
575  }
576  mic += l;
577  len -= l;
578  }
579  *p = '\0';
580 
581  return mic - start;
582 }

References CNStoBIG5(), IS_HIGHBIT_SET, LC_CNS11643_1, LC_CNS11643_2, LCPRV2_B, len, PG_BIG5, pg_encoding_verifymbchar(), PG_MULE_INTERNAL, report_invalid_encoding(), and report_untranslatable_char().

Referenced by mic_to_big5().

◆ mic2euc_tw()

static int mic2euc_tw ( const unsigned char *  mic,
unsigned char *  p,
int  len,
bool  noError 
)
static

Definition at line 377 of file euc_tw_and_big5.c.

378 {
379  const unsigned char *start = mic;
380  int c1;
381  int l;
382 
383  while (len > 0)
384  {
385  c1 = *mic;
386  if (!IS_HIGHBIT_SET(c1))
387  {
388  /* ASCII */
389  if (c1 == 0)
390  {
391  if (noError)
392  break;
394  (const char *) mic, len);
395  }
396  *p++ = c1;
397  mic++;
398  len--;
399  continue;
400  }
401  l = pg_encoding_verifymbchar(PG_MULE_INTERNAL, (const char *) mic, len);
402  if (l < 0)
403  {
404  if (noError)
405  break;
407  (const char *) mic, len);
408  }
409  if (c1 == LC_CNS11643_1)
410  {
411  *p++ = mic[1];
412  *p++ = mic[2];
413  }
414  else if (c1 == LC_CNS11643_2)
415  {
416  *p++ = SS2;
417  *p++ = 0xa2;
418  *p++ = mic[1];
419  *p++ = mic[2];
420  }
421  else if (c1 == LCPRV2_B &&
422  mic[1] >= LC_CNS11643_3 && mic[1] <= LC_CNS11643_7)
423  {
424  *p++ = SS2;
425  *p++ = mic[1] - LC_CNS11643_3 + 0xa3;
426  *p++ = mic[2];
427  *p++ = mic[3];
428  }
429  else
430  {
431  if (noError)
432  break;
434  (const char *) mic, len);
435  }
436  mic += l;
437  len -= l;
438  }
439  *p = '\0';
440 
441  return mic - start;
442 }

References IS_HIGHBIT_SET, LC_CNS11643_1, LC_CNS11643_2, LC_CNS11643_3, LC_CNS11643_7, LCPRV2_B, len, pg_encoding_verifymbchar(), PG_EUC_TW, PG_MULE_INTERNAL, report_invalid_encoding(), report_untranslatable_char(), and SS2.

Referenced by mic_to_euc_tw().

◆ mic_to_big5()

Datum mic_to_big5 ( PG_FUNCTION_ARGS  )

Definition at line 131 of file euc_tw_and_big5.c.

132 {
133  unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
134  unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
135  int len = PG_GETARG_INT32(4);
136  bool noError = PG_GETARG_BOOL(5);
137  int converted;
138 
140 
141  converted = mic2big5(src, dest, len, noError);
142 
143  PG_RETURN_INT32(converted);
144 }
static int mic2big5(const unsigned char *mic, unsigned char *p, int len, bool noError)

References CHECK_ENCODING_CONVERSION_ARGS, generate_unaccent_rules::dest, len, mic2big5(), PG_BIG5, PG_GETARG_BOOL, PG_GETARG_CSTRING, PG_GETARG_INT32, PG_MULE_INTERNAL, and PG_RETURN_INT32.

◆ mic_to_euc_tw()

Datum mic_to_euc_tw ( PG_FUNCTION_ARGS  )

Definition at line 99 of file euc_tw_and_big5.c.

100 {
101  unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
102  unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
103  int len = PG_GETARG_INT32(4);
104  bool noError = PG_GETARG_BOOL(5);
105  int converted;
106 
108 
109  converted = mic2euc_tw(src, dest, len, noError);
110 
111  PG_RETURN_INT32(converted);
112 }
static int mic2euc_tw(const unsigned char *mic, unsigned char *p, int len, bool noError)

References CHECK_ENCODING_CONVERSION_ARGS, generate_unaccent_rules::dest, len, mic2euc_tw(), PG_EUC_TW, PG_GETARG_BOOL, PG_GETARG_CSTRING, PG_GETARG_INT32, PG_MULE_INTERNAL, and PG_RETURN_INT32.

◆ PG_FUNCTION_INFO_V1() [1/6]

PG_FUNCTION_INFO_V1 ( big5_to_euc_tw  )

◆ PG_FUNCTION_INFO_V1() [2/6]

PG_FUNCTION_INFO_V1 ( big5_to_mic  )

◆ PG_FUNCTION_INFO_V1() [3/6]

PG_FUNCTION_INFO_V1 ( euc_tw_to_big5  )

◆ PG_FUNCTION_INFO_V1() [4/6]

PG_FUNCTION_INFO_V1 ( euc_tw_to_mic  )

◆ PG_FUNCTION_INFO_V1() [5/6]

PG_FUNCTION_INFO_V1 ( mic_to_big5  )

◆ PG_FUNCTION_INFO_V1() [6/6]

PG_FUNCTION_INFO_V1 ( mic_to_euc_tw  )

Variable Documentation

◆ PG_MODULE_MAGIC

PG_MODULE_MAGIC

Definition at line 20 of file euc_tw_and_big5.c.