PostgreSQL Source Code git master
euc_tw_and_big5.c
Go to the documentation of this file.
1/*-------------------------------------------------------------------------
2 *
3 * EUC_TW, BIG5 and MULE_INTERNAL
4 *
5 * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
6 * Portions Copyright (c) 1994, Regents of the University of California
7 *
8 * IDENTIFICATION
9 * src/backend/utils/mb/conversion_procs/euc_tw_and_big5/euc_tw_and_big5.c
10 *
11 *-------------------------------------------------------------------------
12 */
13
14#include "postgres.h"
15#include "fmgr.h"
16#include "mb/pg_wchar.h"
17
19
26
27/* ----------
28 * conv_proc(
29 * INTEGER, -- source encoding id
30 * INTEGER, -- destination encoding id
31 * CSTRING, -- source string (null terminated C string)
32 * CSTRING, -- destination string (null terminated C string)
33 * INTEGER, -- source string length
34 * BOOL -- if true, don't throw an error if conversion fails
35 * ) returns INTEGER;
36 *
37 * Returns the number of bytes successfully converted.
38 * ----------
39 */
40
41static int euc_tw2big5(const unsigned char *euc, unsigned char *p, int len, bool noError);
42static int big52euc_tw(const unsigned char *big5, unsigned char *p, int len, bool noError);
43static int big52mic(const unsigned char *big5, unsigned char *p, int len, bool noError);
44static int mic2big5(const unsigned char *mic, unsigned char *p, int len, bool noError);
45static int euc_tw2mic(const unsigned char *euc, unsigned char *p, int len, bool noError);
46static int mic2euc_tw(const unsigned char *mic, unsigned char *p, int len, bool noError);
47
50{
51 unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
52 unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
53 int len = PG_GETARG_INT32(4);
54 bool noError = PG_GETARG_BOOL(5);
55 int converted;
56
58
59 converted = euc_tw2big5(src, dest, len, noError);
60
61 PG_RETURN_INT32(converted);
62}
63
66{
67 unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
68 unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
69 int len = PG_GETARG_INT32(4);
70 bool noError = PG_GETARG_BOOL(5);
71 int converted;
72
74
75 converted = big52euc_tw(src, dest, len, noError);
76
77 PG_RETURN_INT32(converted);
78}
79
82{
83 unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
84 unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
85 int len = PG_GETARG_INT32(4);
86 bool noError = PG_GETARG_BOOL(5);
87 int converted;
88
90
91 converted = euc_tw2mic(src, dest, len, noError);
92
93 PG_RETURN_INT32(converted);
94}
95
98{
99 unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
100 unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
101 int len = PG_GETARG_INT32(4);
102 bool noError = PG_GETARG_BOOL(5);
103 int converted;
104
106
107 converted = mic2euc_tw(src, dest, len, noError);
108
109 PG_RETURN_INT32(converted);
110}
111
112Datum
114{
115 unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
116 unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
117 int len = PG_GETARG_INT32(4);
118 bool noError = PG_GETARG_BOOL(5);
119 int converted;
120
122
123 converted = big52mic(src, dest, len, noError);
124
125 PG_RETURN_INT32(converted);
126}
127
128Datum
130{
131 unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
132 unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
133 int len = PG_GETARG_INT32(4);
134 bool noError = PG_GETARG_BOOL(5);
135 int converted;
136
138
139 converted = mic2big5(src, dest, len, noError);
140
141 PG_RETURN_INT32(converted);
142}
143
144
145/*
146 * EUC_TW ---> Big5
147 */
148static int
149euc_tw2big5(const unsigned char *euc, unsigned char *p, int len, bool noError)
150{
151 const unsigned char *start = euc;
152 unsigned char c1;
153 unsigned short big5buf,
154 cnsBuf;
155 unsigned char lc;
156 int l;
157
158 while (len > 0)
159 {
160 c1 = *euc;
161 if (IS_HIGHBIT_SET(c1))
162 {
163 /* Verify and decode the next EUC_TW input character */
164 l = pg_encoding_verifymbchar(PG_EUC_TW, (const char *) euc, len);
165 if (l < 0)
166 {
167 if (noError)
168 break;
170 (const char *) euc, len);
171 }
172 if (c1 == SS2)
173 {
174 c1 = euc[1]; /* plane No. */
175 if (c1 == 0xa1)
176 lc = LC_CNS11643_1;
177 else if (c1 == 0xa2)
178 lc = LC_CNS11643_2;
179 else
180 lc = c1 - 0xa3 + LC_CNS11643_3;
181 cnsBuf = (euc[2] << 8) | euc[3];
182 }
183 else
184 { /* CNS11643-1 */
185 lc = LC_CNS11643_1;
186 cnsBuf = (c1 << 8) | euc[1];
187 }
188
189 /* Write it out in Big5 */
190 big5buf = CNStoBIG5(cnsBuf, lc);
191 if (big5buf == 0)
192 {
193 if (noError)
194 break;
196 (const char *) euc, len);
197 }
198 *p++ = (big5buf >> 8) & 0x00ff;
199 *p++ = big5buf & 0x00ff;
200
201 euc += l;
202 len -= l;
203 }
204 else
205 { /* should be ASCII */
206 if (c1 == 0)
207 {
208 if (noError)
209 break;
211 (const char *) euc, len);
212 }
213 *p++ = c1;
214 euc++;
215 len--;
216 }
217 }
218 *p = '\0';
219
220 return euc - start;
221}
222
223/*
224 * Big5 ---> EUC_TW
225 */
226static int
227big52euc_tw(const unsigned char *big5, unsigned char *p, int len, bool noError)
228{
229 const unsigned char *start = big5;
230 unsigned short c1;
231 unsigned short big5buf,
232 cnsBuf;
233 unsigned char lc;
234 int l;
235
236 while (len > 0)
237 {
238 /* Verify and decode the next Big5 input character */
239 c1 = *big5;
240 if (IS_HIGHBIT_SET(c1))
241 {
242 l = pg_encoding_verifymbchar(PG_BIG5, (const char *) big5, len);
243 if (l < 0)
244 {
245 if (noError)
246 break;
248 (const char *) big5, len);
249 }
250 big5buf = (c1 << 8) | big5[1];
251 cnsBuf = BIG5toCNS(big5buf, &lc);
252
253 if (lc == LC_CNS11643_1)
254 {
255 *p++ = (cnsBuf >> 8) & 0x00ff;
256 *p++ = cnsBuf & 0x00ff;
257 }
258 else if (lc == LC_CNS11643_2)
259 {
260 *p++ = SS2;
261 *p++ = 0xa2;
262 *p++ = (cnsBuf >> 8) & 0x00ff;
263 *p++ = cnsBuf & 0x00ff;
264 }
265 else if (lc >= LC_CNS11643_3 && lc <= LC_CNS11643_7)
266 {
267 *p++ = SS2;
268 *p++ = lc - LC_CNS11643_3 + 0xa3;
269 *p++ = (cnsBuf >> 8) & 0x00ff;
270 *p++ = cnsBuf & 0x00ff;
271 }
272 else
273 {
274 if (noError)
275 break;
277 (const char *) big5, len);
278 }
279
280 big5 += l;
281 len -= l;
282 }
283 else
284 {
285 /* ASCII */
286 if (c1 == 0)
287 {
288 if (noError)
289 break;
291 (const char *) big5, len);
292 }
293 *p++ = c1;
294 big5++;
295 len--;
296 continue;
297 }
298 }
299 *p = '\0';
300
301 return big5 - start;
302}
303
304/*
305 * EUC_TW ---> MIC
306 */
307static int
308euc_tw2mic(const unsigned char *euc, unsigned char *p, int len, bool noError)
309{
310 const unsigned char *start = euc;
311 int c1;
312 int l;
313
314 while (len > 0)
315 {
316 c1 = *euc;
317 if (IS_HIGHBIT_SET(c1))
318 {
319 l = pg_encoding_verifymbchar(PG_EUC_TW, (const char *) euc, len);
320 if (l < 0)
321 {
322 if (noError)
323 break;
325 (const char *) euc, len);
326 }
327 if (c1 == SS2)
328 {
329 c1 = euc[1]; /* plane No. */
330 if (c1 == 0xa1)
331 *p++ = LC_CNS11643_1;
332 else if (c1 == 0xa2)
333 *p++ = LC_CNS11643_2;
334 else
335 {
336 /* other planes are MULE private charsets */
337 *p++ = LCPRV2_B;
338 *p++ = c1 - 0xa3 + LC_CNS11643_3;
339 }
340 *p++ = euc[2];
341 *p++ = euc[3];
342 }
343 else
344 { /* CNS11643-1 */
345 *p++ = LC_CNS11643_1;
346 *p++ = c1;
347 *p++ = euc[1];
348 }
349 euc += l;
350 len -= l;
351 }
352 else
353 { /* should be ASCII */
354 if (c1 == 0)
355 {
356 if (noError)
357 break;
359 (const char *) euc, len);
360 }
361 *p++ = c1;
362 euc++;
363 len--;
364 }
365 }
366 *p = '\0';
367
368 return euc - start;
369}
370
371/*
372 * MIC ---> EUC_TW
373 */
374static int
375mic2euc_tw(const unsigned char *mic, unsigned char *p, int len, bool noError)
376{
377 const unsigned char *start = mic;
378 int c1;
379 int l;
380
381 while (len > 0)
382 {
383 c1 = *mic;
384 if (!IS_HIGHBIT_SET(c1))
385 {
386 /* ASCII */
387 if (c1 == 0)
388 {
389 if (noError)
390 break;
392 (const char *) mic, len);
393 }
394 *p++ = c1;
395 mic++;
396 len--;
397 continue;
398 }
399 l = pg_encoding_verifymbchar(PG_MULE_INTERNAL, (const char *) mic, len);
400 if (l < 0)
401 {
402 if (noError)
403 break;
405 (const char *) mic, len);
406 }
407 if (c1 == LC_CNS11643_1)
408 {
409 *p++ = mic[1];
410 *p++ = mic[2];
411 }
412 else if (c1 == LC_CNS11643_2)
413 {
414 *p++ = SS2;
415 *p++ = 0xa2;
416 *p++ = mic[1];
417 *p++ = mic[2];
418 }
419 else if (c1 == LCPRV2_B &&
420 mic[1] >= LC_CNS11643_3 && mic[1] <= LC_CNS11643_7)
421 {
422 *p++ = SS2;
423 *p++ = mic[1] - LC_CNS11643_3 + 0xa3;
424 *p++ = mic[2];
425 *p++ = mic[3];
426 }
427 else
428 {
429 if (noError)
430 break;
432 (const char *) mic, len);
433 }
434 mic += l;
435 len -= l;
436 }
437 *p = '\0';
438
439 return mic - start;
440}
441
442/*
443 * Big5 ---> MIC
444 */
445static int
446big52mic(const unsigned char *big5, unsigned char *p, int len, bool noError)
447{
448 const unsigned char *start = big5;
449 unsigned short c1;
450 unsigned short big5buf,
451 cnsBuf;
452 unsigned char lc;
453 int l;
454
455 while (len > 0)
456 {
457 c1 = *big5;
458 if (!IS_HIGHBIT_SET(c1))
459 {
460 /* ASCII */
461 if (c1 == 0)
462 {
463 if (noError)
464 break;
466 (const char *) big5, len);
467 }
468 *p++ = c1;
469 big5++;
470 len--;
471 continue;
472 }
473 l = pg_encoding_verifymbchar(PG_BIG5, (const char *) big5, len);
474 if (l < 0)
475 {
476 if (noError)
477 break;
479 (const char *) big5, len);
480 }
481 big5buf = (c1 << 8) | big5[1];
482 cnsBuf = BIG5toCNS(big5buf, &lc);
483 if (lc != 0)
484 {
485 /* Planes 3 and 4 are MULE private charsets */
486 if (lc == LC_CNS11643_3 || lc == LC_CNS11643_4)
487 *p++ = LCPRV2_B;
488 *p++ = lc; /* Plane No. */
489 *p++ = (cnsBuf >> 8) & 0x00ff;
490 *p++ = cnsBuf & 0x00ff;
491 }
492 else
493 {
494 if (noError)
495 break;
497 (const char *) big5, len);
498 }
499 big5 += l;
500 len -= l;
501 }
502 *p = '\0';
503
504 return big5 - start;
505}
506
507/*
508 * MIC ---> Big5
509 */
510static int
511mic2big5(const unsigned char *mic, unsigned char *p, int len, bool noError)
512{
513 const unsigned char *start = mic;
514 unsigned short c1;
515 unsigned short big5buf,
516 cnsBuf;
517 int l;
518
519 while (len > 0)
520 {
521 c1 = *mic;
522 if (!IS_HIGHBIT_SET(c1))
523 {
524 /* ASCII */
525 if (c1 == 0)
526 {
527 if (noError)
528 break;
530 (const char *) mic, len);
531 }
532 *p++ = c1;
533 mic++;
534 len--;
535 continue;
536 }
537 l = pg_encoding_verifymbchar(PG_MULE_INTERNAL, (const char *) mic, len);
538 if (l < 0)
539 {
540 if (noError)
541 break;
543 (const char *) mic, len);
544 }
545 if (c1 == LC_CNS11643_1 || c1 == LC_CNS11643_2 || c1 == LCPRV2_B)
546 {
547 if (c1 == LCPRV2_B)
548 {
549 c1 = mic[1]; /* get plane no. */
550 cnsBuf = (mic[2] << 8) | mic[3];
551 }
552 else
553 {
554 cnsBuf = (mic[1] << 8) | mic[2];
555 }
556 big5buf = CNStoBIG5(cnsBuf, c1);
557 if (big5buf == 0)
558 {
559 if (noError)
560 break;
562 (const char *) mic, len);
563 }
564 *p++ = (big5buf >> 8) & 0x00ff;
565 *p++ = big5buf & 0x00ff;
566 }
567 else
568 {
569 if (noError)
570 break;
572 (const char *) mic, len);
573 }
574 mic += l;
575 len -= l;
576 }
577 *p = '\0';
578
579 return mic - start;
580}
unsigned short CNStoBIG5(unsigned short cns, unsigned char lc)
Definition: big5.c:345
unsigned short BIG5toCNS(unsigned short big5, unsigned char *lc)
Definition: big5.c:292
#define IS_HIGHBIT_SET(ch)
Definition: c.h:1112
static int euc_tw2mic(const unsigned char *euc, unsigned char *p, int len, bool noError)
static int big52mic(const unsigned char *big5, unsigned char *p, int len, bool noError)
Datum euc_tw_to_mic(PG_FUNCTION_ARGS)
Datum big5_to_euc_tw(PG_FUNCTION_ARGS)
PG_MODULE_MAGIC
PG_FUNCTION_INFO_V1(euc_tw_to_big5)
Datum mic_to_big5(PG_FUNCTION_ARGS)
Datum big5_to_mic(PG_FUNCTION_ARGS)
Datum euc_tw_to_big5(PG_FUNCTION_ARGS)
static int mic2euc_tw(const unsigned char *mic, unsigned char *p, int len, bool noError)
Datum mic_to_euc_tw(PG_FUNCTION_ARGS)
static int mic2big5(const unsigned char *mic, unsigned char *p, int len, bool noError)
static int euc_tw2big5(const unsigned char *euc, unsigned char *p, int len, bool noError)
static int big52euc_tw(const unsigned char *big5, unsigned char *p, int len, bool noError)
#define PG_GETARG_CSTRING(n)
Definition: fmgr.h:277
#define PG_RETURN_INT32(x)
Definition: fmgr.h:354
#define PG_GETARG_INT32(n)
Definition: fmgr.h:269
#define PG_GETARG_BOOL(n)
Definition: fmgr.h:274
#define PG_FUNCTION_ARGS
Definition: fmgr.h:193
return str start
void report_untranslatable_char(int src_encoding, int dest_encoding, const char *mbstr, int len)
Definition: mbutils.c:1730
void report_invalid_encoding(int encoding, const char *mbstr, int len)
Definition: mbutils.c:1698
const void size_t len
#define LC_CNS11643_7
Definition: pg_wchar.h:196
@ PG_MULE_INTERNAL
Definition: pg_wchar.h:233
@ PG_EUC_TW
Definition: pg_wchar.h:230
@ PG_BIG5
Definition: pg_wchar.h:265
#define LC_CNS11643_3
Definition: pg_wchar.h:192
#define SS2
Definition: pg_wchar.h:38
#define CHECK_ENCODING_CONVERSION_ARGS(srcencoding, destencoding)
Definition: pg_wchar.h:507
#define LC_CNS11643_1
Definition: pg_wchar.h:137
#define LC_CNS11643_4
Definition: pg_wchar.h:193
#define LC_CNS11643_2
Definition: pg_wchar.h:138
#define LCPRV2_B
Definition: pg_wchar.h:163
uintptr_t Datum
Definition: postgres.h:69
int pg_encoding_verifymbchar(int encoding, const char *mbstr, int len)
Definition: wchar.c:2103