PostgreSQL Source Code  git master
euc_tw_and_big5.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * EUC_TW, BIG5 and MULE_INTERNAL
4  *
5  * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
6  * Portions Copyright (c) 1994, Regents of the University of California
7  *
8  * IDENTIFICATION
9  * src/backend/utils/mb/conversion_procs/euc_tw_and_big5/euc_tw_and_big5.c
10  *
11  *-------------------------------------------------------------------------
12  */
13 
14 #include "postgres.h"
15 #include "fmgr.h"
16 #include "mb/pg_wchar.h"
17 
18 #define ENCODING_GROWTH_RATE 4
19 
21 
28 
29 /* ----------
30  * conv_proc(
31  * INTEGER, -- source encoding id
32  * INTEGER, -- destination encoding id
33  * CSTRING, -- source string (null terminated C string)
34  * CSTRING, -- destination string (null terminated C string)
35  * INTEGER, -- source string length
36  * BOOL -- if true, don't throw an error if conversion fails
37  * ) returns INTEGER;
38  *
39  * Returns the number of bytes successfully converted.
40  * ----------
41  */
42 
43 static int euc_tw2big5(const unsigned char *euc, unsigned char *p, int len, bool noError);
44 static int big52euc_tw(const unsigned char *euc, unsigned char *p, int len, bool noError);
45 static int big52mic(const unsigned char *big5, unsigned char *p, int len, bool noError);
46 static int mic2big5(const unsigned char *mic, unsigned char *p, int len, bool noError);
47 static int euc_tw2mic(const unsigned char *euc, unsigned char *p, int len, bool noError);
48 static int mic2euc_tw(const unsigned char *mic, unsigned char *p, int len, bool noError);
49 
50 Datum
52 {
53  unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
54  unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
55  int len = PG_GETARG_INT32(4);
56  bool noError = PG_GETARG_BOOL(5);
57  int converted;
58 
60 
61  converted = euc_tw2big5(src, dest, len, noError);
62 
63  PG_RETURN_INT32(converted);
64 }
65 
66 Datum
68 {
69  unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
70  unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
71  int len = PG_GETARG_INT32(4);
72  bool noError = PG_GETARG_BOOL(5);
73  int converted;
74 
76 
77  converted = big52euc_tw(src, dest, len, noError);
78 
79  PG_RETURN_INT32(converted);
80 }
81 
82 Datum
84 {
85  unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
86  unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
87  int len = PG_GETARG_INT32(4);
88  bool noError = PG_GETARG_BOOL(5);
89  int converted;
90 
92 
93  converted = euc_tw2mic(src, dest, len, noError);
94 
95  PG_RETURN_INT32(converted);
96 }
97 
98 Datum
100 {
101  unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
102  unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
103  int len = PG_GETARG_INT32(4);
104  bool noError = PG_GETARG_BOOL(5);
105  int converted;
106 
108 
109  converted = mic2euc_tw(src, dest, len, noError);
110 
111  PG_RETURN_INT32(converted);
112 }
113 
114 Datum
116 {
117  unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
118  unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
119  int len = PG_GETARG_INT32(4);
120  bool noError = PG_GETARG_BOOL(5);
121  int converted;
122 
124 
125  converted = big52mic(src, dest, len, noError);
126 
127  PG_RETURN_INT32(converted);
128 }
129 
130 Datum
132 {
133  unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
134  unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
135  int len = PG_GETARG_INT32(4);
136  bool noError = PG_GETARG_BOOL(5);
137  int converted;
138 
140 
141  converted = mic2big5(src, dest, len, noError);
142 
143  PG_RETURN_INT32(converted);
144 }
145 
146 
147 /*
148  * EUC_TW ---> Big5
149  */
150 static int
151 euc_tw2big5(const unsigned char *euc, unsigned char *p, int len, bool noError)
152 {
153  const unsigned char *start = euc;
154  unsigned char c1;
155  unsigned short big5buf,
156  cnsBuf;
157  unsigned char lc;
158  int l;
159 
160  while (len > 0)
161  {
162  c1 = *euc;
163  if (IS_HIGHBIT_SET(c1))
164  {
165  /* Verify and decode the next EUC_TW input character */
166  l = pg_encoding_verifymbchar(PG_EUC_TW, (const char *) euc, len);
167  if (l < 0)
168  {
169  if (noError)
170  break;
172  (const char *) euc, len);
173  }
174  if (c1 == SS2)
175  {
176  c1 = euc[1]; /* plane No. */
177  if (c1 == 0xa1)
178  lc = LC_CNS11643_1;
179  else if (c1 == 0xa2)
180  lc = LC_CNS11643_2;
181  else
182  lc = c1 - 0xa3 + LC_CNS11643_3;
183  cnsBuf = (euc[2] << 8) | euc[3];
184  }
185  else
186  { /* CNS11643-1 */
187  lc = LC_CNS11643_1;
188  cnsBuf = (c1 << 8) | euc[1];
189  }
190 
191  /* Write it out in Big5 */
192  big5buf = CNStoBIG5(cnsBuf, lc);
193  if (big5buf == 0)
194  {
195  if (noError)
196  break;
198  (const char *) euc, len);
199  }
200  *p++ = (big5buf >> 8) & 0x00ff;
201  *p++ = big5buf & 0x00ff;
202 
203  euc += l;
204  len -= l;
205  }
206  else
207  { /* should be ASCII */
208  if (c1 == 0)
209  {
210  if (noError)
211  break;
213  (const char *) euc, len);
214  }
215  *p++ = c1;
216  euc++;
217  len--;
218  }
219  }
220  *p = '\0';
221 
222  return euc - start;
223 }
224 
225 /*
226  * Big5 ---> EUC_TW
227  */
228 static int
229 big52euc_tw(const unsigned char *big5, unsigned char *p, int len, bool noError)
230 {
231  const unsigned char *start = big5;
232  unsigned short c1;
233  unsigned short big5buf,
234  cnsBuf;
235  unsigned char lc;
236  int l;
237 
238  while (len > 0)
239  {
240  /* Verify and decode the next Big5 input character */
241  c1 = *big5;
242  if (IS_HIGHBIT_SET(c1))
243  {
244  l = pg_encoding_verifymbchar(PG_BIG5, (const char *) big5, len);
245  if (l < 0)
246  {
247  if (noError)
248  break;
250  (const char *) big5, len);
251  }
252  big5buf = (c1 << 8) | big5[1];
253  cnsBuf = BIG5toCNS(big5buf, &lc);
254 
255  if (lc == LC_CNS11643_1)
256  {
257  *p++ = (cnsBuf >> 8) & 0x00ff;
258  *p++ = cnsBuf & 0x00ff;
259  }
260  else if (lc == LC_CNS11643_2)
261  {
262  *p++ = SS2;
263  *p++ = 0xa2;
264  *p++ = (cnsBuf >> 8) & 0x00ff;
265  *p++ = cnsBuf & 0x00ff;
266  }
267  else if (lc >= LC_CNS11643_3 && lc <= LC_CNS11643_7)
268  {
269  *p++ = SS2;
270  *p++ = lc - LC_CNS11643_3 + 0xa3;
271  *p++ = (cnsBuf >> 8) & 0x00ff;
272  *p++ = cnsBuf & 0x00ff;
273  }
274  else
275  {
276  if (noError)
277  break;
279  (const char *) big5, len);
280  }
281 
282  big5 += l;
283  len -= l;
284  }
285  else
286  {
287  /* ASCII */
288  if (c1 == 0)
290  (const char *) big5, len);
291  *p++ = c1;
292  big5++;
293  len--;
294  continue;
295  }
296  }
297  *p = '\0';
298 
299  return big5 - start;
300 }
301 
302 /*
303  * EUC_TW ---> MIC
304  */
305 static int
306 euc_tw2mic(const unsigned char *euc, unsigned char *p, int len, bool noError)
307 {
308  const unsigned char *start = euc;
309  int c1;
310  int l;
311 
312  while (len > 0)
313  {
314  c1 = *euc;
315  if (IS_HIGHBIT_SET(c1))
316  {
317  l = pg_encoding_verifymbchar(PG_EUC_TW, (const char *) euc, len);
318  if (l < 0)
319  {
320  if (noError)
321  break;
323  (const char *) euc, len);
324  }
325  if (c1 == SS2)
326  {
327  c1 = euc[1]; /* plane No. */
328  if (c1 == 0xa1)
329  *p++ = LC_CNS11643_1;
330  else if (c1 == 0xa2)
331  *p++ = LC_CNS11643_2;
332  else
333  {
334  /* other planes are MULE private charsets */
335  *p++ = LCPRV2_B;
336  *p++ = c1 - 0xa3 + LC_CNS11643_3;
337  }
338  *p++ = euc[2];
339  *p++ = euc[3];
340  }
341  else
342  { /* CNS11643-1 */
343  *p++ = LC_CNS11643_1;
344  *p++ = c1;
345  *p++ = euc[1];
346  }
347  euc += l;
348  len -= l;
349  }
350  else
351  { /* should be ASCII */
352  if (c1 == 0)
353  {
354  if (noError)
355  break;
357  (const char *) euc, len);
358  }
359  *p++ = c1;
360  euc++;
361  len--;
362  }
363  }
364  *p = '\0';
365 
366  return euc - start;
367 }
368 
369 /*
370  * MIC ---> EUC_TW
371  */
372 static int
373 mic2euc_tw(const unsigned char *mic, unsigned char *p, int len, bool noError)
374 {
375  const unsigned char *start = mic;
376  int c1;
377  int l;
378 
379  while (len > 0)
380  {
381  c1 = *mic;
382  if (!IS_HIGHBIT_SET(c1))
383  {
384  /* ASCII */
385  if (c1 == 0)
386  {
387  if (noError)
388  break;
390  (const char *) mic, len);
391  }
392  *p++ = c1;
393  mic++;
394  len--;
395  continue;
396  }
397  l = pg_encoding_verifymbchar(PG_MULE_INTERNAL, (const char *) mic, len);
398  if (l < 0)
399  {
400  if (noError)
401  break;
403  (const char *) mic, len);
404  }
405  if (c1 == LC_CNS11643_1)
406  {
407  *p++ = mic[1];
408  *p++ = mic[2];
409  }
410  else if (c1 == LC_CNS11643_2)
411  {
412  *p++ = SS2;
413  *p++ = 0xa2;
414  *p++ = mic[1];
415  *p++ = mic[2];
416  }
417  else if (c1 == LCPRV2_B &&
418  mic[1] >= LC_CNS11643_3 && mic[1] <= LC_CNS11643_7)
419  {
420  *p++ = SS2;
421  *p++ = mic[1] - LC_CNS11643_3 + 0xa3;
422  *p++ = mic[2];
423  *p++ = mic[3];
424  }
425  else
426  {
427  if (noError)
428  break;
430  (const char *) mic, len);
431  }
432  mic += l;
433  len -= l;
434  }
435  *p = '\0';
436 
437  return mic - start;
438 }
439 
440 /*
441  * Big5 ---> MIC
442  */
443 static int
444 big52mic(const unsigned char *big5, unsigned char *p, int len, bool noError)
445 {
446  const unsigned char *start = big5;
447  unsigned short c1;
448  unsigned short big5buf,
449  cnsBuf;
450  unsigned char lc;
451  int l;
452 
453  while (len > 0)
454  {
455  c1 = *big5;
456  if (!IS_HIGHBIT_SET(c1))
457  {
458  /* ASCII */
459  if (c1 == 0)
460  {
461  if (noError)
462  break;
464  (const char *) big5, len);
465  }
466  *p++ = c1;
467  big5++;
468  len--;
469  continue;
470  }
471  l = pg_encoding_verifymbchar(PG_BIG5, (const char *) big5, len);
472  if (l < 0)
473  {
474  if (noError)
475  break;
477  (const char *) big5, len);
478  }
479  big5buf = (c1 << 8) | big5[1];
480  cnsBuf = BIG5toCNS(big5buf, &lc);
481  if (lc != 0)
482  {
483  /* Planes 3 and 4 are MULE private charsets */
484  if (lc == LC_CNS11643_3 || lc == LC_CNS11643_4)
485  *p++ = LCPRV2_B;
486  *p++ = lc; /* Plane No. */
487  *p++ = (cnsBuf >> 8) & 0x00ff;
488  *p++ = cnsBuf & 0x00ff;
489  }
490  else
491  {
492  if (noError)
493  break;
495  (const char *) big5, len);
496  }
497  big5 += l;
498  len -= l;
499  }
500  *p = '\0';
501 
502  return big5 - start;
503 }
504 
505 /*
506  * MIC ---> Big5
507  */
508 static int
509 mic2big5(const unsigned char *mic, unsigned char *p, int len, bool noError)
510 {
511  const unsigned char *start = mic;
512  unsigned short c1;
513  unsigned short big5buf,
514  cnsBuf;
515  int l;
516 
517  while (len > 0)
518  {
519  c1 = *mic;
520  if (!IS_HIGHBIT_SET(c1))
521  {
522  /* ASCII */
523  if (c1 == 0)
524  {
525  if (noError)
526  break;
528  (const char *) mic, len);
529  }
530  *p++ = c1;
531  mic++;
532  len--;
533  continue;
534  }
535  l = pg_encoding_verifymbchar(PG_MULE_INTERNAL, (const char *) mic, len);
536  if (l < 0)
537  {
538  if (noError)
539  break;
541  (const char *) mic, len);
542  }
543  if (c1 == LC_CNS11643_1 || c1 == LC_CNS11643_2 || c1 == LCPRV2_B)
544  {
545  if (c1 == LCPRV2_B)
546  {
547  c1 = mic[1]; /* get plane no. */
548  cnsBuf = (mic[2] << 8) | mic[3];
549  }
550  else
551  {
552  cnsBuf = (mic[1] << 8) | mic[2];
553  }
554  big5buf = CNStoBIG5(cnsBuf, c1);
555  if (big5buf == 0)
556  {
557  if (noError)
558  break;
560  (const char *) mic, len);
561  }
562  *p++ = (big5buf >> 8) & 0x00ff;
563  *p++ = big5buf & 0x00ff;
564  }
565  else
566  {
567  if (noError)
568  break;
570  (const char *) mic, len);
571  }
572  mic += l;
573  len -= l;
574  }
575  *p = '\0';
576 
577  return mic - start;
578 }
Datum mic_to_euc_tw(PG_FUNCTION_ARGS)
#define PG_GETARG_INT32(n)
Definition: fmgr.h:269
#define LC_CNS11643_1
Definition: pg_wchar.h:134
static int mic2big5(const unsigned char *mic, unsigned char *p, int len, bool noError)
#define LC_CNS11643_4
Definition: pg_wchar.h:205
void report_invalid_encoding(int encoding, const char *mbstr, int len)
Definition: mbutils.c:1647
#define PG_RETURN_INT32(x)
Definition: fmgr.h:354
Datum euc_tw_to_big5(PG_FUNCTION_ARGS)
#define PG_GETARG_BOOL(n)
Definition: fmgr.h:274
static int big52euc_tw(const unsigned char *euc, unsigned char *p, int len, bool noError)
PG_FUNCTION_INFO_V1(euc_tw_to_big5)
Datum big5_to_euc_tw(PG_FUNCTION_ARGS)
static int euc_tw2big5(const unsigned char *euc, unsigned char *p, int len, bool noError)
#define IS_HIGHBIT_SET(ch)
Definition: c.h:1156
#define LC_CNS11643_7
Definition: pg_wchar.h:208
unsigned short CNStoBIG5(unsigned short cns, unsigned char lc)
Definition: big5.c:345
static int euc_tw2mic(const unsigned char *euc, unsigned char *p, int len, bool noError)
uintptr_t Datum
Definition: postgres.h:411
static int mic2euc_tw(const unsigned char *mic, unsigned char *p, int len, bool noError)
unsigned short BIG5toCNS(unsigned short big5, unsigned char *lc)
Definition: big5.c:292
#define LCPRV2_B
Definition: pg_wchar.h:163
#define LC_CNS11643_2
Definition: pg_wchar.h:135
#define CHECK_ENCODING_CONVERSION_ARGS(srcencoding, destencoding)
Definition: pg_wchar.h:527
static int big52mic(const unsigned char *big5, unsigned char *p, int len, bool noError)
#define LC_CNS11643_3
Definition: pg_wchar.h:204
PG_MODULE_MAGIC
void report_untranslatable_char(int src_encoding, int dest_encoding, const char *mbstr, int len)
Definition: mbutils.c:1679
int pg_encoding_verifymbchar(int encoding, const char *mbstr, int len)
Definition: wchar.c:1940
Datum big5_to_mic(PG_FUNCTION_ARGS)
#define PG_GETARG_CSTRING(n)
Definition: fmgr.h:277
#define PG_FUNCTION_ARGS
Definition: fmgr.h:193
Datum euc_tw_to_mic(PG_FUNCTION_ARGS)
Datum mic_to_big5(PG_FUNCTION_ARGS)
#define SS2
Definition: pg_wchar.h:35