PostgreSQL Source Code  git master
euc_tw_and_big5.c File Reference
#include "postgres.h"
#include "fmgr.h"
#include "mb/pg_wchar.h"
Include dependency graph for euc_tw_and_big5.c:

Go to the source code of this file.

Functions

 PG_FUNCTION_INFO_V1 (euc_tw_to_big5)
 
 PG_FUNCTION_INFO_V1 (big5_to_euc_tw)
 
 PG_FUNCTION_INFO_V1 (euc_tw_to_mic)
 
 PG_FUNCTION_INFO_V1 (mic_to_euc_tw)
 
 PG_FUNCTION_INFO_V1 (big5_to_mic)
 
 PG_FUNCTION_INFO_V1 (mic_to_big5)
 
static int euc_tw2big5 (const unsigned char *euc, unsigned char *p, int len, bool noError)
 
static int big52euc_tw (const unsigned char *big5, unsigned char *p, int len, bool noError)
 
static int big52mic (const unsigned char *big5, unsigned char *p, int len, bool noError)
 
static int mic2big5 (const unsigned char *mic, unsigned char *p, int len, bool noError)
 
static int euc_tw2mic (const unsigned char *euc, unsigned char *p, int len, bool noError)
 
static int mic2euc_tw (const unsigned char *mic, unsigned char *p, int len, bool noError)
 
Datum euc_tw_to_big5 (PG_FUNCTION_ARGS)
 
Datum big5_to_euc_tw (PG_FUNCTION_ARGS)
 
Datum euc_tw_to_mic (PG_FUNCTION_ARGS)
 
Datum mic_to_euc_tw (PG_FUNCTION_ARGS)
 
Datum big5_to_mic (PG_FUNCTION_ARGS)
 
Datum mic_to_big5 (PG_FUNCTION_ARGS)
 

Variables

 PG_MODULE_MAGIC
 

Function Documentation

◆ big52euc_tw()

static int big52euc_tw ( const unsigned char *  big5,
unsigned char *  p,
int  len,
bool  noError 
)
static

Definition at line 227 of file euc_tw_and_big5.c.

228 {
229  const unsigned char *start = big5;
230  unsigned short c1;
231  unsigned short big5buf,
232  cnsBuf;
233  unsigned char lc;
234  int l;
235 
236  while (len > 0)
237  {
238  /* Verify and decode the next Big5 input character */
239  c1 = *big5;
240  if (IS_HIGHBIT_SET(c1))
241  {
242  l = pg_encoding_verifymbchar(PG_BIG5, (const char *) big5, len);
243  if (l < 0)
244  {
245  if (noError)
246  break;
248  (const char *) big5, len);
249  }
250  big5buf = (c1 << 8) | big5[1];
251  cnsBuf = BIG5toCNS(big5buf, &lc);
252 
253  if (lc == LC_CNS11643_1)
254  {
255  *p++ = (cnsBuf >> 8) & 0x00ff;
256  *p++ = cnsBuf & 0x00ff;
257  }
258  else if (lc == LC_CNS11643_2)
259  {
260  *p++ = SS2;
261  *p++ = 0xa2;
262  *p++ = (cnsBuf >> 8) & 0x00ff;
263  *p++ = cnsBuf & 0x00ff;
264  }
265  else if (lc >= LC_CNS11643_3 && lc <= LC_CNS11643_7)
266  {
267  *p++ = SS2;
268  *p++ = lc - LC_CNS11643_3 + 0xa3;
269  *p++ = (cnsBuf >> 8) & 0x00ff;
270  *p++ = cnsBuf & 0x00ff;
271  }
272  else
273  {
274  if (noError)
275  break;
277  (const char *) big5, len);
278  }
279 
280  big5 += l;
281  len -= l;
282  }
283  else
284  {
285  /* ASCII */
286  if (c1 == 0)
287  {
288  if (noError)
289  break;
291  (const char *) big5, len);
292  }
293  *p++ = c1;
294  big5++;
295  len--;
296  continue;
297  }
298  }
299  *p = '\0';
300 
301  return big5 - start;
302 }
unsigned short BIG5toCNS(unsigned short big5, unsigned char *lc)
Definition: big5.c:292
#define IS_HIGHBIT_SET(ch)
Definition: c.h:1146
return str start
void report_untranslatable_char(int src_encoding, int dest_encoding, const char *mbstr, int len)
Definition: mbutils.c:1730
void report_invalid_encoding(int encoding, const char *mbstr, int len)
Definition: mbutils.c:1698
const void size_t len
#define LC_CNS11643_7
Definition: pg_wchar.h:196
@ PG_EUC_TW
Definition: pg_wchar.h:230
@ PG_BIG5
Definition: pg_wchar.h:265
#define LC_CNS11643_3
Definition: pg_wchar.h:192
#define SS2
Definition: pg_wchar.h:38
#define LC_CNS11643_1
Definition: pg_wchar.h:137
#define LC_CNS11643_2
Definition: pg_wchar.h:138
int pg_encoding_verifymbchar(int encoding, const char *mbstr, int len)
Definition: wchar.c:2103

References BIG5toCNS(), IS_HIGHBIT_SET, LC_CNS11643_1, LC_CNS11643_2, LC_CNS11643_3, LC_CNS11643_7, len, PG_BIG5, pg_encoding_verifymbchar(), PG_EUC_TW, report_invalid_encoding(), report_untranslatable_char(), SS2, and start.

Referenced by big5_to_euc_tw().

◆ big52mic()

static int big52mic ( const unsigned char *  big5,
unsigned char *  p,
int  len,
bool  noError 
)
static

Definition at line 446 of file euc_tw_and_big5.c.

447 {
448  const unsigned char *start = big5;
449  unsigned short c1;
450  unsigned short big5buf,
451  cnsBuf;
452  unsigned char lc;
453  int l;
454 
455  while (len > 0)
456  {
457  c1 = *big5;
458  if (!IS_HIGHBIT_SET(c1))
459  {
460  /* ASCII */
461  if (c1 == 0)
462  {
463  if (noError)
464  break;
466  (const char *) big5, len);
467  }
468  *p++ = c1;
469  big5++;
470  len--;
471  continue;
472  }
473  l = pg_encoding_verifymbchar(PG_BIG5, (const char *) big5, len);
474  if (l < 0)
475  {
476  if (noError)
477  break;
479  (const char *) big5, len);
480  }
481  big5buf = (c1 << 8) | big5[1];
482  cnsBuf = BIG5toCNS(big5buf, &lc);
483  if (lc != 0)
484  {
485  /* Planes 3 and 4 are MULE private charsets */
486  if (lc == LC_CNS11643_3 || lc == LC_CNS11643_4)
487  *p++ = LCPRV2_B;
488  *p++ = lc; /* Plane No. */
489  *p++ = (cnsBuf >> 8) & 0x00ff;
490  *p++ = cnsBuf & 0x00ff;
491  }
492  else
493  {
494  if (noError)
495  break;
497  (const char *) big5, len);
498  }
499  big5 += l;
500  len -= l;
501  }
502  *p = '\0';
503 
504  return big5 - start;
505 }
@ PG_MULE_INTERNAL
Definition: pg_wchar.h:233
#define LC_CNS11643_4
Definition: pg_wchar.h:193
#define LCPRV2_B
Definition: pg_wchar.h:163

References BIG5toCNS(), IS_HIGHBIT_SET, LC_CNS11643_3, LC_CNS11643_4, LCPRV2_B, len, PG_BIG5, pg_encoding_verifymbchar(), PG_MULE_INTERNAL, report_invalid_encoding(), report_untranslatable_char(), and start.

Referenced by big5_to_mic().

◆ big5_to_euc_tw()

Datum big5_to_euc_tw ( PG_FUNCTION_ARGS  )

Definition at line 65 of file euc_tw_and_big5.c.

66 {
67  unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
68  unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
69  int len = PG_GETARG_INT32(4);
70  bool noError = PG_GETARG_BOOL(5);
71  int converted;
72 
74 
75  converted = big52euc_tw(src, dest, len, noError);
76 
77  PG_RETURN_INT32(converted);
78 }
static int big52euc_tw(const unsigned char *big5, unsigned char *p, int len, bool noError)
#define PG_GETARG_CSTRING(n)
Definition: fmgr.h:277
#define PG_RETURN_INT32(x)
Definition: fmgr.h:354
#define PG_GETARG_INT32(n)
Definition: fmgr.h:269
#define PG_GETARG_BOOL(n)
Definition: fmgr.h:274
#define CHECK_ENCODING_CONVERSION_ARGS(srcencoding, destencoding)
Definition: pg_wchar.h:507

References big52euc_tw(), CHECK_ENCODING_CONVERSION_ARGS, generate_unaccent_rules::dest, len, PG_BIG5, PG_EUC_TW, PG_GETARG_BOOL, PG_GETARG_CSTRING, PG_GETARG_INT32, and PG_RETURN_INT32.

◆ big5_to_mic()

Datum big5_to_mic ( PG_FUNCTION_ARGS  )

Definition at line 113 of file euc_tw_and_big5.c.

114 {
115  unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
116  unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
117  int len = PG_GETARG_INT32(4);
118  bool noError = PG_GETARG_BOOL(5);
119  int converted;
120 
122 
123  converted = big52mic(src, dest, len, noError);
124 
125  PG_RETURN_INT32(converted);
126 }
static int big52mic(const unsigned char *big5, unsigned char *p, int len, bool noError)

References big52mic(), CHECK_ENCODING_CONVERSION_ARGS, generate_unaccent_rules::dest, len, PG_BIG5, PG_GETARG_BOOL, PG_GETARG_CSTRING, PG_GETARG_INT32, PG_MULE_INTERNAL, and PG_RETURN_INT32.

◆ euc_tw2big5()

static int euc_tw2big5 ( const unsigned char *  euc,
unsigned char *  p,
int  len,
bool  noError 
)
static

Definition at line 149 of file euc_tw_and_big5.c.

150 {
151  const unsigned char *start = euc;
152  unsigned char c1;
153  unsigned short big5buf,
154  cnsBuf;
155  unsigned char lc;
156  int l;
157 
158  while (len > 0)
159  {
160  c1 = *euc;
161  if (IS_HIGHBIT_SET(c1))
162  {
163  /* Verify and decode the next EUC_TW input character */
164  l = pg_encoding_verifymbchar(PG_EUC_TW, (const char *) euc, len);
165  if (l < 0)
166  {
167  if (noError)
168  break;
170  (const char *) euc, len);
171  }
172  if (c1 == SS2)
173  {
174  c1 = euc[1]; /* plane No. */
175  if (c1 == 0xa1)
176  lc = LC_CNS11643_1;
177  else if (c1 == 0xa2)
178  lc = LC_CNS11643_2;
179  else
180  lc = c1 - 0xa3 + LC_CNS11643_3;
181  cnsBuf = (euc[2] << 8) | euc[3];
182  }
183  else
184  { /* CNS11643-1 */
185  lc = LC_CNS11643_1;
186  cnsBuf = (c1 << 8) | euc[1];
187  }
188 
189  /* Write it out in Big5 */
190  big5buf = CNStoBIG5(cnsBuf, lc);
191  if (big5buf == 0)
192  {
193  if (noError)
194  break;
196  (const char *) euc, len);
197  }
198  *p++ = (big5buf >> 8) & 0x00ff;
199  *p++ = big5buf & 0x00ff;
200 
201  euc += l;
202  len -= l;
203  }
204  else
205  { /* should be ASCII */
206  if (c1 == 0)
207  {
208  if (noError)
209  break;
211  (const char *) euc, len);
212  }
213  *p++ = c1;
214  euc++;
215  len--;
216  }
217  }
218  *p = '\0';
219 
220  return euc - start;
221 }
unsigned short CNStoBIG5(unsigned short cns, unsigned char lc)
Definition: big5.c:345

References CNStoBIG5(), IS_HIGHBIT_SET, LC_CNS11643_1, LC_CNS11643_2, LC_CNS11643_3, len, PG_BIG5, pg_encoding_verifymbchar(), PG_EUC_TW, report_invalid_encoding(), report_untranslatable_char(), SS2, and start.

Referenced by euc_tw_to_big5().

◆ euc_tw2mic()

static int euc_tw2mic ( const unsigned char *  euc,
unsigned char *  p,
int  len,
bool  noError 
)
static

Definition at line 308 of file euc_tw_and_big5.c.

309 {
310  const unsigned char *start = euc;
311  int c1;
312  int l;
313 
314  while (len > 0)
315  {
316  c1 = *euc;
317  if (IS_HIGHBIT_SET(c1))
318  {
319  l = pg_encoding_verifymbchar(PG_EUC_TW, (const char *) euc, len);
320  if (l < 0)
321  {
322  if (noError)
323  break;
325  (const char *) euc, len);
326  }
327  if (c1 == SS2)
328  {
329  c1 = euc[1]; /* plane No. */
330  if (c1 == 0xa1)
331  *p++ = LC_CNS11643_1;
332  else if (c1 == 0xa2)
333  *p++ = LC_CNS11643_2;
334  else
335  {
336  /* other planes are MULE private charsets */
337  *p++ = LCPRV2_B;
338  *p++ = c1 - 0xa3 + LC_CNS11643_3;
339  }
340  *p++ = euc[2];
341  *p++ = euc[3];
342  }
343  else
344  { /* CNS11643-1 */
345  *p++ = LC_CNS11643_1;
346  *p++ = c1;
347  *p++ = euc[1];
348  }
349  euc += l;
350  len -= l;
351  }
352  else
353  { /* should be ASCII */
354  if (c1 == 0)
355  {
356  if (noError)
357  break;
359  (const char *) euc, len);
360  }
361  *p++ = c1;
362  euc++;
363  len--;
364  }
365  }
366  *p = '\0';
367 
368  return euc - start;
369 }

References IS_HIGHBIT_SET, LC_CNS11643_1, LC_CNS11643_2, LC_CNS11643_3, LCPRV2_B, len, pg_encoding_verifymbchar(), PG_EUC_TW, report_invalid_encoding(), SS2, and start.

Referenced by euc_tw_to_mic().

◆ euc_tw_to_big5()

Datum euc_tw_to_big5 ( PG_FUNCTION_ARGS  )

Definition at line 49 of file euc_tw_and_big5.c.

50 {
51  unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
52  unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
53  int len = PG_GETARG_INT32(4);
54  bool noError = PG_GETARG_BOOL(5);
55  int converted;
56 
58 
59  converted = euc_tw2big5(src, dest, len, noError);
60 
61  PG_RETURN_INT32(converted);
62 }
static int euc_tw2big5(const unsigned char *euc, unsigned char *p, int len, bool noError)

References CHECK_ENCODING_CONVERSION_ARGS, generate_unaccent_rules::dest, euc_tw2big5(), len, PG_BIG5, PG_EUC_TW, PG_GETARG_BOOL, PG_GETARG_CSTRING, PG_GETARG_INT32, and PG_RETURN_INT32.

◆ euc_tw_to_mic()

Datum euc_tw_to_mic ( PG_FUNCTION_ARGS  )

Definition at line 81 of file euc_tw_and_big5.c.

82 {
83  unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
84  unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
85  int len = PG_GETARG_INT32(4);
86  bool noError = PG_GETARG_BOOL(5);
87  int converted;
88 
90 
91  converted = euc_tw2mic(src, dest, len, noError);
92 
93  PG_RETURN_INT32(converted);
94 }
static int euc_tw2mic(const unsigned char *euc, unsigned char *p, int len, bool noError)

References CHECK_ENCODING_CONVERSION_ARGS, generate_unaccent_rules::dest, euc_tw2mic(), len, PG_EUC_TW, PG_GETARG_BOOL, PG_GETARG_CSTRING, PG_GETARG_INT32, PG_MULE_INTERNAL, and PG_RETURN_INT32.

◆ mic2big5()

static int mic2big5 ( const unsigned char *  mic,
unsigned char *  p,
int  len,
bool  noError 
)
static

Definition at line 511 of file euc_tw_and_big5.c.

512 {
513  const unsigned char *start = mic;
514  unsigned short c1;
515  unsigned short big5buf,
516  cnsBuf;
517  int l;
518 
519  while (len > 0)
520  {
521  c1 = *mic;
522  if (!IS_HIGHBIT_SET(c1))
523  {
524  /* ASCII */
525  if (c1 == 0)
526  {
527  if (noError)
528  break;
530  (const char *) mic, len);
531  }
532  *p++ = c1;
533  mic++;
534  len--;
535  continue;
536  }
537  l = pg_encoding_verifymbchar(PG_MULE_INTERNAL, (const char *) mic, len);
538  if (l < 0)
539  {
540  if (noError)
541  break;
543  (const char *) mic, len);
544  }
545  if (c1 == LC_CNS11643_1 || c1 == LC_CNS11643_2 || c1 == LCPRV2_B)
546  {
547  if (c1 == LCPRV2_B)
548  {
549  c1 = mic[1]; /* get plane no. */
550  cnsBuf = (mic[2] << 8) | mic[3];
551  }
552  else
553  {
554  cnsBuf = (mic[1] << 8) | mic[2];
555  }
556  big5buf = CNStoBIG5(cnsBuf, c1);
557  if (big5buf == 0)
558  {
559  if (noError)
560  break;
562  (const char *) mic, len);
563  }
564  *p++ = (big5buf >> 8) & 0x00ff;
565  *p++ = big5buf & 0x00ff;
566  }
567  else
568  {
569  if (noError)
570  break;
572  (const char *) mic, len);
573  }
574  mic += l;
575  len -= l;
576  }
577  *p = '\0';
578 
579  return mic - start;
580 }

References CNStoBIG5(), IS_HIGHBIT_SET, LC_CNS11643_1, LC_CNS11643_2, LCPRV2_B, len, PG_BIG5, pg_encoding_verifymbchar(), PG_MULE_INTERNAL, report_invalid_encoding(), report_untranslatable_char(), and start.

Referenced by mic_to_big5().

◆ mic2euc_tw()

static int mic2euc_tw ( const unsigned char *  mic,
unsigned char *  p,
int  len,
bool  noError 
)
static

Definition at line 375 of file euc_tw_and_big5.c.

376 {
377  const unsigned char *start = mic;
378  int c1;
379  int l;
380 
381  while (len > 0)
382  {
383  c1 = *mic;
384  if (!IS_HIGHBIT_SET(c1))
385  {
386  /* ASCII */
387  if (c1 == 0)
388  {
389  if (noError)
390  break;
392  (const char *) mic, len);
393  }
394  *p++ = c1;
395  mic++;
396  len--;
397  continue;
398  }
399  l = pg_encoding_verifymbchar(PG_MULE_INTERNAL, (const char *) mic, len);
400  if (l < 0)
401  {
402  if (noError)
403  break;
405  (const char *) mic, len);
406  }
407  if (c1 == LC_CNS11643_1)
408  {
409  *p++ = mic[1];
410  *p++ = mic[2];
411  }
412  else if (c1 == LC_CNS11643_2)
413  {
414  *p++ = SS2;
415  *p++ = 0xa2;
416  *p++ = mic[1];
417  *p++ = mic[2];
418  }
419  else if (c1 == LCPRV2_B &&
420  mic[1] >= LC_CNS11643_3 && mic[1] <= LC_CNS11643_7)
421  {
422  *p++ = SS2;
423  *p++ = mic[1] - LC_CNS11643_3 + 0xa3;
424  *p++ = mic[2];
425  *p++ = mic[3];
426  }
427  else
428  {
429  if (noError)
430  break;
432  (const char *) mic, len);
433  }
434  mic += l;
435  len -= l;
436  }
437  *p = '\0';
438 
439  return mic - start;
440 }

References IS_HIGHBIT_SET, LC_CNS11643_1, LC_CNS11643_2, LC_CNS11643_3, LC_CNS11643_7, LCPRV2_B, len, pg_encoding_verifymbchar(), PG_EUC_TW, PG_MULE_INTERNAL, report_invalid_encoding(), report_untranslatable_char(), SS2, and start.

Referenced by mic_to_euc_tw().

◆ mic_to_big5()

Datum mic_to_big5 ( PG_FUNCTION_ARGS  )

Definition at line 129 of file euc_tw_and_big5.c.

130 {
131  unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
132  unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
133  int len = PG_GETARG_INT32(4);
134  bool noError = PG_GETARG_BOOL(5);
135  int converted;
136 
138 
139  converted = mic2big5(src, dest, len, noError);
140 
141  PG_RETURN_INT32(converted);
142 }
static int mic2big5(const unsigned char *mic, unsigned char *p, int len, bool noError)

References CHECK_ENCODING_CONVERSION_ARGS, generate_unaccent_rules::dest, len, mic2big5(), PG_BIG5, PG_GETARG_BOOL, PG_GETARG_CSTRING, PG_GETARG_INT32, PG_MULE_INTERNAL, and PG_RETURN_INT32.

◆ mic_to_euc_tw()

Datum mic_to_euc_tw ( PG_FUNCTION_ARGS  )

Definition at line 97 of file euc_tw_and_big5.c.

98 {
99  unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
100  unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
101  int len = PG_GETARG_INT32(4);
102  bool noError = PG_GETARG_BOOL(5);
103  int converted;
104 
106 
107  converted = mic2euc_tw(src, dest, len, noError);
108 
109  PG_RETURN_INT32(converted);
110 }
static int mic2euc_tw(const unsigned char *mic, unsigned char *p, int len, bool noError)

References CHECK_ENCODING_CONVERSION_ARGS, generate_unaccent_rules::dest, len, mic2euc_tw(), PG_EUC_TW, PG_GETARG_BOOL, PG_GETARG_CSTRING, PG_GETARG_INT32, PG_MULE_INTERNAL, and PG_RETURN_INT32.

◆ PG_FUNCTION_INFO_V1() [1/6]

PG_FUNCTION_INFO_V1 ( big5_to_euc_tw  )

◆ PG_FUNCTION_INFO_V1() [2/6]

PG_FUNCTION_INFO_V1 ( big5_to_mic  )

◆ PG_FUNCTION_INFO_V1() [3/6]

PG_FUNCTION_INFO_V1 ( euc_tw_to_big5  )

◆ PG_FUNCTION_INFO_V1() [4/6]

PG_FUNCTION_INFO_V1 ( euc_tw_to_mic  )

◆ PG_FUNCTION_INFO_V1() [5/6]

PG_FUNCTION_INFO_V1 ( mic_to_big5  )

◆ PG_FUNCTION_INFO_V1() [6/6]

PG_FUNCTION_INFO_V1 ( mic_to_euc_tw  )

Variable Documentation

◆ PG_MODULE_MAGIC

PG_MODULE_MAGIC

Definition at line 18 of file euc_tw_and_big5.c.