PostgreSQL Source Code  git master
euc_tw_and_big5.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * EUC_TW, BIG5 and MULE_INTERNAL
4  *
5  * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
6  * Portions Copyright (c) 1994, Regents of the University of California
7  *
8  * IDENTIFICATION
9  * src/backend/utils/mb/conversion_procs/euc_tw_and_big5/euc_tw_and_big5.c
10  *
11  *-------------------------------------------------------------------------
12  */
13 
14 #include "postgres.h"
15 #include "fmgr.h"
16 #include "mb/pg_wchar.h"
17 
19 
26 
27 /* ----------
28  * conv_proc(
29  * INTEGER, -- source encoding id
30  * INTEGER, -- destination encoding id
31  * CSTRING, -- source string (null terminated C string)
32  * CSTRING, -- destination string (null terminated C string)
33  * INTEGER, -- source string length
34  * BOOL -- if true, don't throw an error if conversion fails
35  * ) returns INTEGER;
36  *
37  * Returns the number of bytes successfully converted.
38  * ----------
39  */
40 
41 static int euc_tw2big5(const unsigned char *euc, unsigned char *p, int len, bool noError);
42 static int big52euc_tw(const unsigned char *big5, unsigned char *p, int len, bool noError);
43 static int big52mic(const unsigned char *big5, unsigned char *p, int len, bool noError);
44 static int mic2big5(const unsigned char *mic, unsigned char *p, int len, bool noError);
45 static int euc_tw2mic(const unsigned char *euc, unsigned char *p, int len, bool noError);
46 static int mic2euc_tw(const unsigned char *mic, unsigned char *p, int len, bool noError);
47 
48 Datum
50 {
51  unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
52  unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
53  int len = PG_GETARG_INT32(4);
54  bool noError = PG_GETARG_BOOL(5);
55  int converted;
56 
58 
59  converted = euc_tw2big5(src, dest, len, noError);
60 
61  PG_RETURN_INT32(converted);
62 }
63 
64 Datum
66 {
67  unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
68  unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
69  int len = PG_GETARG_INT32(4);
70  bool noError = PG_GETARG_BOOL(5);
71  int converted;
72 
74 
75  converted = big52euc_tw(src, dest, len, noError);
76 
77  PG_RETURN_INT32(converted);
78 }
79 
80 Datum
82 {
83  unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
84  unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
85  int len = PG_GETARG_INT32(4);
86  bool noError = PG_GETARG_BOOL(5);
87  int converted;
88 
90 
91  converted = euc_tw2mic(src, dest, len, noError);
92 
93  PG_RETURN_INT32(converted);
94 }
95 
96 Datum
98 {
99  unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
100  unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
101  int len = PG_GETARG_INT32(4);
102  bool noError = PG_GETARG_BOOL(5);
103  int converted;
104 
106 
107  converted = mic2euc_tw(src, dest, len, noError);
108 
109  PG_RETURN_INT32(converted);
110 }
111 
112 Datum
114 {
115  unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
116  unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
117  int len = PG_GETARG_INT32(4);
118  bool noError = PG_GETARG_BOOL(5);
119  int converted;
120 
122 
123  converted = big52mic(src, dest, len, noError);
124 
125  PG_RETURN_INT32(converted);
126 }
127 
128 Datum
130 {
131  unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
132  unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
133  int len = PG_GETARG_INT32(4);
134  bool noError = PG_GETARG_BOOL(5);
135  int converted;
136 
138 
139  converted = mic2big5(src, dest, len, noError);
140 
141  PG_RETURN_INT32(converted);
142 }
143 
144 
145 /*
146  * EUC_TW ---> Big5
147  */
148 static int
149 euc_tw2big5(const unsigned char *euc, unsigned char *p, int len, bool noError)
150 {
151  const unsigned char *start = euc;
152  unsigned char c1;
153  unsigned short big5buf,
154  cnsBuf;
155  unsigned char lc;
156  int l;
157 
158  while (len > 0)
159  {
160  c1 = *euc;
161  if (IS_HIGHBIT_SET(c1))
162  {
163  /* Verify and decode the next EUC_TW input character */
164  l = pg_encoding_verifymbchar(PG_EUC_TW, (const char *) euc, len);
165  if (l < 0)
166  {
167  if (noError)
168  break;
170  (const char *) euc, len);
171  }
172  if (c1 == SS2)
173  {
174  c1 = euc[1]; /* plane No. */
175  if (c1 == 0xa1)
176  lc = LC_CNS11643_1;
177  else if (c1 == 0xa2)
178  lc = LC_CNS11643_2;
179  else
180  lc = c1 - 0xa3 + LC_CNS11643_3;
181  cnsBuf = (euc[2] << 8) | euc[3];
182  }
183  else
184  { /* CNS11643-1 */
185  lc = LC_CNS11643_1;
186  cnsBuf = (c1 << 8) | euc[1];
187  }
188 
189  /* Write it out in Big5 */
190  big5buf = CNStoBIG5(cnsBuf, lc);
191  if (big5buf == 0)
192  {
193  if (noError)
194  break;
196  (const char *) euc, len);
197  }
198  *p++ = (big5buf >> 8) & 0x00ff;
199  *p++ = big5buf & 0x00ff;
200 
201  euc += l;
202  len -= l;
203  }
204  else
205  { /* should be ASCII */
206  if (c1 == 0)
207  {
208  if (noError)
209  break;
211  (const char *) euc, len);
212  }
213  *p++ = c1;
214  euc++;
215  len--;
216  }
217  }
218  *p = '\0';
219 
220  return euc - start;
221 }
222 
223 /*
224  * Big5 ---> EUC_TW
225  */
226 static int
227 big52euc_tw(const unsigned char *big5, unsigned char *p, int len, bool noError)
228 {
229  const unsigned char *start = big5;
230  unsigned short c1;
231  unsigned short big5buf,
232  cnsBuf;
233  unsigned char lc;
234  int l;
235 
236  while (len > 0)
237  {
238  /* Verify and decode the next Big5 input character */
239  c1 = *big5;
240  if (IS_HIGHBIT_SET(c1))
241  {
242  l = pg_encoding_verifymbchar(PG_BIG5, (const char *) big5, len);
243  if (l < 0)
244  {
245  if (noError)
246  break;
248  (const char *) big5, len);
249  }
250  big5buf = (c1 << 8) | big5[1];
251  cnsBuf = BIG5toCNS(big5buf, &lc);
252 
253  if (lc == LC_CNS11643_1)
254  {
255  *p++ = (cnsBuf >> 8) & 0x00ff;
256  *p++ = cnsBuf & 0x00ff;
257  }
258  else if (lc == LC_CNS11643_2)
259  {
260  *p++ = SS2;
261  *p++ = 0xa2;
262  *p++ = (cnsBuf >> 8) & 0x00ff;
263  *p++ = cnsBuf & 0x00ff;
264  }
265  else if (lc >= LC_CNS11643_3 && lc <= LC_CNS11643_7)
266  {
267  *p++ = SS2;
268  *p++ = lc - LC_CNS11643_3 + 0xa3;
269  *p++ = (cnsBuf >> 8) & 0x00ff;
270  *p++ = cnsBuf & 0x00ff;
271  }
272  else
273  {
274  if (noError)
275  break;
277  (const char *) big5, len);
278  }
279 
280  big5 += l;
281  len -= l;
282  }
283  else
284  {
285  /* ASCII */
286  if (c1 == 0)
287  {
288  if (noError)
289  break;
291  (const char *) big5, len);
292  }
293  *p++ = c1;
294  big5++;
295  len--;
296  continue;
297  }
298  }
299  *p = '\0';
300 
301  return big5 - start;
302 }
303 
304 /*
305  * EUC_TW ---> MIC
306  */
307 static int
308 euc_tw2mic(const unsigned char *euc, unsigned char *p, int len, bool noError)
309 {
310  const unsigned char *start = euc;
311  int c1;
312  int l;
313 
314  while (len > 0)
315  {
316  c1 = *euc;
317  if (IS_HIGHBIT_SET(c1))
318  {
319  l = pg_encoding_verifymbchar(PG_EUC_TW, (const char *) euc, len);
320  if (l < 0)
321  {
322  if (noError)
323  break;
325  (const char *) euc, len);
326  }
327  if (c1 == SS2)
328  {
329  c1 = euc[1]; /* plane No. */
330  if (c1 == 0xa1)
331  *p++ = LC_CNS11643_1;
332  else if (c1 == 0xa2)
333  *p++ = LC_CNS11643_2;
334  else
335  {
336  /* other planes are MULE private charsets */
337  *p++ = LCPRV2_B;
338  *p++ = c1 - 0xa3 + LC_CNS11643_3;
339  }
340  *p++ = euc[2];
341  *p++ = euc[3];
342  }
343  else
344  { /* CNS11643-1 */
345  *p++ = LC_CNS11643_1;
346  *p++ = c1;
347  *p++ = euc[1];
348  }
349  euc += l;
350  len -= l;
351  }
352  else
353  { /* should be ASCII */
354  if (c1 == 0)
355  {
356  if (noError)
357  break;
359  (const char *) euc, len);
360  }
361  *p++ = c1;
362  euc++;
363  len--;
364  }
365  }
366  *p = '\0';
367 
368  return euc - start;
369 }
370 
371 /*
372  * MIC ---> EUC_TW
373  */
374 static int
375 mic2euc_tw(const unsigned char *mic, unsigned char *p, int len, bool noError)
376 {
377  const unsigned char *start = mic;
378  int c1;
379  int l;
380 
381  while (len > 0)
382  {
383  c1 = *mic;
384  if (!IS_HIGHBIT_SET(c1))
385  {
386  /* ASCII */
387  if (c1 == 0)
388  {
389  if (noError)
390  break;
392  (const char *) mic, len);
393  }
394  *p++ = c1;
395  mic++;
396  len--;
397  continue;
398  }
399  l = pg_encoding_verifymbchar(PG_MULE_INTERNAL, (const char *) mic, len);
400  if (l < 0)
401  {
402  if (noError)
403  break;
405  (const char *) mic, len);
406  }
407  if (c1 == LC_CNS11643_1)
408  {
409  *p++ = mic[1];
410  *p++ = mic[2];
411  }
412  else if (c1 == LC_CNS11643_2)
413  {
414  *p++ = SS2;
415  *p++ = 0xa2;
416  *p++ = mic[1];
417  *p++ = mic[2];
418  }
419  else if (c1 == LCPRV2_B &&
420  mic[1] >= LC_CNS11643_3 && mic[1] <= LC_CNS11643_7)
421  {
422  *p++ = SS2;
423  *p++ = mic[1] - LC_CNS11643_3 + 0xa3;
424  *p++ = mic[2];
425  *p++ = mic[3];
426  }
427  else
428  {
429  if (noError)
430  break;
432  (const char *) mic, len);
433  }
434  mic += l;
435  len -= l;
436  }
437  *p = '\0';
438 
439  return mic - start;
440 }
441 
442 /*
443  * Big5 ---> MIC
444  */
445 static int
446 big52mic(const unsigned char *big5, unsigned char *p, int len, bool noError)
447 {
448  const unsigned char *start = big5;
449  unsigned short c1;
450  unsigned short big5buf,
451  cnsBuf;
452  unsigned char lc;
453  int l;
454 
455  while (len > 0)
456  {
457  c1 = *big5;
458  if (!IS_HIGHBIT_SET(c1))
459  {
460  /* ASCII */
461  if (c1 == 0)
462  {
463  if (noError)
464  break;
466  (const char *) big5, len);
467  }
468  *p++ = c1;
469  big5++;
470  len--;
471  continue;
472  }
473  l = pg_encoding_verifymbchar(PG_BIG5, (const char *) big5, len);
474  if (l < 0)
475  {
476  if (noError)
477  break;
479  (const char *) big5, len);
480  }
481  big5buf = (c1 << 8) | big5[1];
482  cnsBuf = BIG5toCNS(big5buf, &lc);
483  if (lc != 0)
484  {
485  /* Planes 3 and 4 are MULE private charsets */
486  if (lc == LC_CNS11643_3 || lc == LC_CNS11643_4)
487  *p++ = LCPRV2_B;
488  *p++ = lc; /* Plane No. */
489  *p++ = (cnsBuf >> 8) & 0x00ff;
490  *p++ = cnsBuf & 0x00ff;
491  }
492  else
493  {
494  if (noError)
495  break;
497  (const char *) big5, len);
498  }
499  big5 += l;
500  len -= l;
501  }
502  *p = '\0';
503 
504  return big5 - start;
505 }
506 
507 /*
508  * MIC ---> Big5
509  */
510 static int
511 mic2big5(const unsigned char *mic, unsigned char *p, int len, bool noError)
512 {
513  const unsigned char *start = mic;
514  unsigned short c1;
515  unsigned short big5buf,
516  cnsBuf;
517  int l;
518 
519  while (len > 0)
520  {
521  c1 = *mic;
522  if (!IS_HIGHBIT_SET(c1))
523  {
524  /* ASCII */
525  if (c1 == 0)
526  {
527  if (noError)
528  break;
530  (const char *) mic, len);
531  }
532  *p++ = c1;
533  mic++;
534  len--;
535  continue;
536  }
537  l = pg_encoding_verifymbchar(PG_MULE_INTERNAL, (const char *) mic, len);
538  if (l < 0)
539  {
540  if (noError)
541  break;
543  (const char *) mic, len);
544  }
545  if (c1 == LC_CNS11643_1 || c1 == LC_CNS11643_2 || c1 == LCPRV2_B)
546  {
547  if (c1 == LCPRV2_B)
548  {
549  c1 = mic[1]; /* get plane no. */
550  cnsBuf = (mic[2] << 8) | mic[3];
551  }
552  else
553  {
554  cnsBuf = (mic[1] << 8) | mic[2];
555  }
556  big5buf = CNStoBIG5(cnsBuf, c1);
557  if (big5buf == 0)
558  {
559  if (noError)
560  break;
562  (const char *) mic, len);
563  }
564  *p++ = (big5buf >> 8) & 0x00ff;
565  *p++ = big5buf & 0x00ff;
566  }
567  else
568  {
569  if (noError)
570  break;
572  (const char *) mic, len);
573  }
574  mic += l;
575  len -= l;
576  }
577  *p = '\0';
578 
579  return mic - start;
580 }
unsigned short CNStoBIG5(unsigned short cns, unsigned char lc)
Definition: big5.c:345
unsigned short BIG5toCNS(unsigned short big5, unsigned char *lc)
Definition: big5.c:292
#define IS_HIGHBIT_SET(ch)
Definition: c.h:1155
static int euc_tw2mic(const unsigned char *euc, unsigned char *p, int len, bool noError)
static int big52mic(const unsigned char *big5, unsigned char *p, int len, bool noError)
Datum euc_tw_to_mic(PG_FUNCTION_ARGS)
Datum big5_to_euc_tw(PG_FUNCTION_ARGS)
PG_MODULE_MAGIC
PG_FUNCTION_INFO_V1(euc_tw_to_big5)
Datum mic_to_big5(PG_FUNCTION_ARGS)
Datum big5_to_mic(PG_FUNCTION_ARGS)
Datum euc_tw_to_big5(PG_FUNCTION_ARGS)
static int mic2euc_tw(const unsigned char *mic, unsigned char *p, int len, bool noError)
Datum mic_to_euc_tw(PG_FUNCTION_ARGS)
static int mic2big5(const unsigned char *mic, unsigned char *p, int len, bool noError)
static int euc_tw2big5(const unsigned char *euc, unsigned char *p, int len, bool noError)
static int big52euc_tw(const unsigned char *big5, unsigned char *p, int len, bool noError)
#define PG_GETARG_CSTRING(n)
Definition: fmgr.h:277
#define PG_RETURN_INT32(x)
Definition: fmgr.h:354
#define PG_GETARG_INT32(n)
Definition: fmgr.h:269
#define PG_GETARG_BOOL(n)
Definition: fmgr.h:274
#define PG_FUNCTION_ARGS
Definition: fmgr.h:193
return str start
void report_untranslatable_char(int src_encoding, int dest_encoding, const char *mbstr, int len)
Definition: mbutils.c:1730
void report_invalid_encoding(int encoding, const char *mbstr, int len)
Definition: mbutils.c:1698
const void size_t len
#define LC_CNS11643_7
Definition: pg_wchar.h:196
@ PG_MULE_INTERNAL
Definition: pg_wchar.h:233
@ PG_EUC_TW
Definition: pg_wchar.h:230
@ PG_BIG5
Definition: pg_wchar.h:265
#define LC_CNS11643_3
Definition: pg_wchar.h:192
#define SS2
Definition: pg_wchar.h:38
#define CHECK_ENCODING_CONVERSION_ARGS(srcencoding, destencoding)
Definition: pg_wchar.h:507
#define LC_CNS11643_1
Definition: pg_wchar.h:137
#define LC_CNS11643_4
Definition: pg_wchar.h:193
#define LC_CNS11643_2
Definition: pg_wchar.h:138
#define LCPRV2_B
Definition: pg_wchar.h:163
uintptr_t Datum
Definition: postgres.h:64
int pg_encoding_verifymbchar(int encoding, const char *mbstr, int len)
Definition: wchar.c:2103