PostgreSQL Source Code git master
All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
euc_tw_and_big5.c File Reference
#include "postgres.h"
#include "fmgr.h"
#include "mb/pg_wchar.h"
Include dependency graph for euc_tw_and_big5.c:

Go to the source code of this file.

Functions

 PG_MODULE_MAGIC_EXT (.name="euc_tw_and_big5",.version=PG_VERSION)
 
 PG_FUNCTION_INFO_V1 (euc_tw_to_big5)
 
 PG_FUNCTION_INFO_V1 (big5_to_euc_tw)
 
 PG_FUNCTION_INFO_V1 (euc_tw_to_mic)
 
 PG_FUNCTION_INFO_V1 (mic_to_euc_tw)
 
 PG_FUNCTION_INFO_V1 (big5_to_mic)
 
 PG_FUNCTION_INFO_V1 (mic_to_big5)
 
static int euc_tw2big5 (const unsigned char *euc, unsigned char *p, int len, bool noError)
 
static int big52euc_tw (const unsigned char *big5, unsigned char *p, int len, bool noError)
 
static int big52mic (const unsigned char *big5, unsigned char *p, int len, bool noError)
 
static int mic2big5 (const unsigned char *mic, unsigned char *p, int len, bool noError)
 
static int euc_tw2mic (const unsigned char *euc, unsigned char *p, int len, bool noError)
 
static int mic2euc_tw (const unsigned char *mic, unsigned char *p, int len, bool noError)
 
Datum euc_tw_to_big5 (PG_FUNCTION_ARGS)
 
Datum big5_to_euc_tw (PG_FUNCTION_ARGS)
 
Datum euc_tw_to_mic (PG_FUNCTION_ARGS)
 
Datum mic_to_euc_tw (PG_FUNCTION_ARGS)
 
Datum big5_to_mic (PG_FUNCTION_ARGS)
 
Datum mic_to_big5 (PG_FUNCTION_ARGS)
 

Function Documentation

◆ big52euc_tw()

static int big52euc_tw ( const unsigned char *  big5,
unsigned char *  p,
int  len,
bool  noError 
)
static

Definition at line 230 of file euc_tw_and_big5.c.

231{
232 const unsigned char *start = big5;
233 unsigned short c1;
234 unsigned short big5buf,
235 cnsBuf;
236 unsigned char lc;
237 int l;
238
239 while (len > 0)
240 {
241 /* Verify and decode the next Big5 input character */
242 c1 = *big5;
243 if (IS_HIGHBIT_SET(c1))
244 {
245 l = pg_encoding_verifymbchar(PG_BIG5, (const char *) big5, len);
246 if (l < 0)
247 {
248 if (noError)
249 break;
251 (const char *) big5, len);
252 }
253 big5buf = (c1 << 8) | big5[1];
254 cnsBuf = BIG5toCNS(big5buf, &lc);
255
256 if (lc == LC_CNS11643_1)
257 {
258 *p++ = (cnsBuf >> 8) & 0x00ff;
259 *p++ = cnsBuf & 0x00ff;
260 }
261 else if (lc == LC_CNS11643_2)
262 {
263 *p++ = SS2;
264 *p++ = 0xa2;
265 *p++ = (cnsBuf >> 8) & 0x00ff;
266 *p++ = cnsBuf & 0x00ff;
267 }
268 else if (lc >= LC_CNS11643_3 && lc <= LC_CNS11643_7)
269 {
270 *p++ = SS2;
271 *p++ = lc - LC_CNS11643_3 + 0xa3;
272 *p++ = (cnsBuf >> 8) & 0x00ff;
273 *p++ = cnsBuf & 0x00ff;
274 }
275 else
276 {
277 if (noError)
278 break;
280 (const char *) big5, len);
281 }
282
283 big5 += l;
284 len -= l;
285 }
286 else
287 {
288 /* ASCII */
289 if (c1 == 0)
290 {
291 if (noError)
292 break;
294 (const char *) big5, len);
295 }
296 *p++ = c1;
297 big5++;
298 len--;
299 continue;
300 }
301 }
302 *p = '\0';
303
304 return big5 - start;
305}
unsigned short BIG5toCNS(unsigned short big5, unsigned char *lc)
Definition: big5.c:292
#define IS_HIGHBIT_SET(ch)
Definition: c.h:1126
return str start
void report_untranslatable_char(int src_encoding, int dest_encoding, const char *mbstr, int len)
Definition: mbutils.c:1731
void report_invalid_encoding(int encoding, const char *mbstr, int len)
Definition: mbutils.c:1699
const void size_t len
#define LC_CNS11643_7
Definition: pg_wchar.h:196
@ PG_EUC_TW
Definition: pg_wchar.h:230
@ PG_BIG5
Definition: pg_wchar.h:265
#define LC_CNS11643_3
Definition: pg_wchar.h:192
#define SS2
Definition: pg_wchar.h:38
#define LC_CNS11643_1
Definition: pg_wchar.h:137
#define LC_CNS11643_2
Definition: pg_wchar.h:138
int pg_encoding_verifymbchar(int encoding, const char *mbstr, int len)
Definition: wchar.c:2189

References BIG5toCNS(), IS_HIGHBIT_SET, LC_CNS11643_1, LC_CNS11643_2, LC_CNS11643_3, LC_CNS11643_7, len, PG_BIG5, pg_encoding_verifymbchar(), PG_EUC_TW, report_invalid_encoding(), report_untranslatable_char(), SS2, and start.

Referenced by big5_to_euc_tw().

◆ big52mic()

static int big52mic ( const unsigned char *  big5,
unsigned char *  p,
int  len,
bool  noError 
)
static

Definition at line 449 of file euc_tw_and_big5.c.

450{
451 const unsigned char *start = big5;
452 unsigned short c1;
453 unsigned short big5buf,
454 cnsBuf;
455 unsigned char lc;
456 int l;
457
458 while (len > 0)
459 {
460 c1 = *big5;
461 if (!IS_HIGHBIT_SET(c1))
462 {
463 /* ASCII */
464 if (c1 == 0)
465 {
466 if (noError)
467 break;
469 (const char *) big5, len);
470 }
471 *p++ = c1;
472 big5++;
473 len--;
474 continue;
475 }
476 l = pg_encoding_verifymbchar(PG_BIG5, (const char *) big5, len);
477 if (l < 0)
478 {
479 if (noError)
480 break;
482 (const char *) big5, len);
483 }
484 big5buf = (c1 << 8) | big5[1];
485 cnsBuf = BIG5toCNS(big5buf, &lc);
486 if (lc != 0)
487 {
488 /* Planes 3 and 4 are MULE private charsets */
489 if (lc == LC_CNS11643_3 || lc == LC_CNS11643_4)
490 *p++ = LCPRV2_B;
491 *p++ = lc; /* Plane No. */
492 *p++ = (cnsBuf >> 8) & 0x00ff;
493 *p++ = cnsBuf & 0x00ff;
494 }
495 else
496 {
497 if (noError)
498 break;
500 (const char *) big5, len);
501 }
502 big5 += l;
503 len -= l;
504 }
505 *p = '\0';
506
507 return big5 - start;
508}
@ PG_MULE_INTERNAL
Definition: pg_wchar.h:233
#define LC_CNS11643_4
Definition: pg_wchar.h:193
#define LCPRV2_B
Definition: pg_wchar.h:163

References BIG5toCNS(), IS_HIGHBIT_SET, LC_CNS11643_3, LC_CNS11643_4, LCPRV2_B, len, PG_BIG5, pg_encoding_verifymbchar(), PG_MULE_INTERNAL, report_invalid_encoding(), report_untranslatable_char(), and start.

Referenced by big5_to_mic().

◆ big5_to_euc_tw()

Datum big5_to_euc_tw ( PG_FUNCTION_ARGS  )

Definition at line 68 of file euc_tw_and_big5.c.

69{
70 unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
71 unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
72 int len = PG_GETARG_INT32(4);
73 bool noError = PG_GETARG_BOOL(5);
74 int converted;
75
77
78 converted = big52euc_tw(src, dest, len, noError);
79
80 PG_RETURN_INT32(converted);
81}
static int big52euc_tw(const unsigned char *big5, unsigned char *p, int len, bool noError)
#define PG_GETARG_CSTRING(n)
Definition: fmgr.h:277
#define PG_RETURN_INT32(x)
Definition: fmgr.h:354
#define PG_GETARG_INT32(n)
Definition: fmgr.h:269
#define PG_GETARG_BOOL(n)
Definition: fmgr.h:274
#define CHECK_ENCODING_CONVERSION_ARGS(srcencoding, destencoding)
Definition: pg_wchar.h:507

References big52euc_tw(), CHECK_ENCODING_CONVERSION_ARGS, generate_unaccent_rules::dest, len, PG_BIG5, PG_EUC_TW, PG_GETARG_BOOL, PG_GETARG_CSTRING, PG_GETARG_INT32, and PG_RETURN_INT32.

◆ big5_to_mic()

Datum big5_to_mic ( PG_FUNCTION_ARGS  )

Definition at line 116 of file euc_tw_and_big5.c.

117{
118 unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
119 unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
120 int len = PG_GETARG_INT32(4);
121 bool noError = PG_GETARG_BOOL(5);
122 int converted;
123
125
126 converted = big52mic(src, dest, len, noError);
127
128 PG_RETURN_INT32(converted);
129}
static int big52mic(const unsigned char *big5, unsigned char *p, int len, bool noError)

References big52mic(), CHECK_ENCODING_CONVERSION_ARGS, generate_unaccent_rules::dest, len, PG_BIG5, PG_GETARG_BOOL, PG_GETARG_CSTRING, PG_GETARG_INT32, PG_MULE_INTERNAL, and PG_RETURN_INT32.

◆ euc_tw2big5()

static int euc_tw2big5 ( const unsigned char *  euc,
unsigned char *  p,
int  len,
bool  noError 
)
static

Definition at line 152 of file euc_tw_and_big5.c.

153{
154 const unsigned char *start = euc;
155 unsigned char c1;
156 unsigned short big5buf,
157 cnsBuf;
158 unsigned char lc;
159 int l;
160
161 while (len > 0)
162 {
163 c1 = *euc;
164 if (IS_HIGHBIT_SET(c1))
165 {
166 /* Verify and decode the next EUC_TW input character */
167 l = pg_encoding_verifymbchar(PG_EUC_TW, (const char *) euc, len);
168 if (l < 0)
169 {
170 if (noError)
171 break;
173 (const char *) euc, len);
174 }
175 if (c1 == SS2)
176 {
177 c1 = euc[1]; /* plane No. */
178 if (c1 == 0xa1)
179 lc = LC_CNS11643_1;
180 else if (c1 == 0xa2)
181 lc = LC_CNS11643_2;
182 else
183 lc = c1 - 0xa3 + LC_CNS11643_3;
184 cnsBuf = (euc[2] << 8) | euc[3];
185 }
186 else
187 { /* CNS11643-1 */
188 lc = LC_CNS11643_1;
189 cnsBuf = (c1 << 8) | euc[1];
190 }
191
192 /* Write it out in Big5 */
193 big5buf = CNStoBIG5(cnsBuf, lc);
194 if (big5buf == 0)
195 {
196 if (noError)
197 break;
199 (const char *) euc, len);
200 }
201 *p++ = (big5buf >> 8) & 0x00ff;
202 *p++ = big5buf & 0x00ff;
203
204 euc += l;
205 len -= l;
206 }
207 else
208 { /* should be ASCII */
209 if (c1 == 0)
210 {
211 if (noError)
212 break;
214 (const char *) euc, len);
215 }
216 *p++ = c1;
217 euc++;
218 len--;
219 }
220 }
221 *p = '\0';
222
223 return euc - start;
224}
unsigned short CNStoBIG5(unsigned short cns, unsigned char lc)
Definition: big5.c:345

References CNStoBIG5(), IS_HIGHBIT_SET, LC_CNS11643_1, LC_CNS11643_2, LC_CNS11643_3, len, PG_BIG5, pg_encoding_verifymbchar(), PG_EUC_TW, report_invalid_encoding(), report_untranslatable_char(), SS2, and start.

Referenced by euc_tw_to_big5().

◆ euc_tw2mic()

static int euc_tw2mic ( const unsigned char *  euc,
unsigned char *  p,
int  len,
bool  noError 
)
static

Definition at line 311 of file euc_tw_and_big5.c.

312{
313 const unsigned char *start = euc;
314 int c1;
315 int l;
316
317 while (len > 0)
318 {
319 c1 = *euc;
320 if (IS_HIGHBIT_SET(c1))
321 {
322 l = pg_encoding_verifymbchar(PG_EUC_TW, (const char *) euc, len);
323 if (l < 0)
324 {
325 if (noError)
326 break;
328 (const char *) euc, len);
329 }
330 if (c1 == SS2)
331 {
332 c1 = euc[1]; /* plane No. */
333 if (c1 == 0xa1)
334 *p++ = LC_CNS11643_1;
335 else if (c1 == 0xa2)
336 *p++ = LC_CNS11643_2;
337 else
338 {
339 /* other planes are MULE private charsets */
340 *p++ = LCPRV2_B;
341 *p++ = c1 - 0xa3 + LC_CNS11643_3;
342 }
343 *p++ = euc[2];
344 *p++ = euc[3];
345 }
346 else
347 { /* CNS11643-1 */
348 *p++ = LC_CNS11643_1;
349 *p++ = c1;
350 *p++ = euc[1];
351 }
352 euc += l;
353 len -= l;
354 }
355 else
356 { /* should be ASCII */
357 if (c1 == 0)
358 {
359 if (noError)
360 break;
362 (const char *) euc, len);
363 }
364 *p++ = c1;
365 euc++;
366 len--;
367 }
368 }
369 *p = '\0';
370
371 return euc - start;
372}

References IS_HIGHBIT_SET, LC_CNS11643_1, LC_CNS11643_2, LC_CNS11643_3, LCPRV2_B, len, pg_encoding_verifymbchar(), PG_EUC_TW, report_invalid_encoding(), SS2, and start.

Referenced by euc_tw_to_mic().

◆ euc_tw_to_big5()

Datum euc_tw_to_big5 ( PG_FUNCTION_ARGS  )

Definition at line 52 of file euc_tw_and_big5.c.

53{
54 unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
55 unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
56 int len = PG_GETARG_INT32(4);
57 bool noError = PG_GETARG_BOOL(5);
58 int converted;
59
61
62 converted = euc_tw2big5(src, dest, len, noError);
63
64 PG_RETURN_INT32(converted);
65}
static int euc_tw2big5(const unsigned char *euc, unsigned char *p, int len, bool noError)

References CHECK_ENCODING_CONVERSION_ARGS, generate_unaccent_rules::dest, euc_tw2big5(), len, PG_BIG5, PG_EUC_TW, PG_GETARG_BOOL, PG_GETARG_CSTRING, PG_GETARG_INT32, and PG_RETURN_INT32.

◆ euc_tw_to_mic()

Datum euc_tw_to_mic ( PG_FUNCTION_ARGS  )

Definition at line 84 of file euc_tw_and_big5.c.

85{
86 unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
87 unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
88 int len = PG_GETARG_INT32(4);
89 bool noError = PG_GETARG_BOOL(5);
90 int converted;
91
93
94 converted = euc_tw2mic(src, dest, len, noError);
95
96 PG_RETURN_INT32(converted);
97}
static int euc_tw2mic(const unsigned char *euc, unsigned char *p, int len, bool noError)

References CHECK_ENCODING_CONVERSION_ARGS, generate_unaccent_rules::dest, euc_tw2mic(), len, PG_EUC_TW, PG_GETARG_BOOL, PG_GETARG_CSTRING, PG_GETARG_INT32, PG_MULE_INTERNAL, and PG_RETURN_INT32.

◆ mic2big5()

static int mic2big5 ( const unsigned char *  mic,
unsigned char *  p,
int  len,
bool  noError 
)
static

Definition at line 514 of file euc_tw_and_big5.c.

515{
516 const unsigned char *start = mic;
517 unsigned short c1;
518 unsigned short big5buf,
519 cnsBuf;
520 int l;
521
522 while (len > 0)
523 {
524 c1 = *mic;
525 if (!IS_HIGHBIT_SET(c1))
526 {
527 /* ASCII */
528 if (c1 == 0)
529 {
530 if (noError)
531 break;
533 (const char *) mic, len);
534 }
535 *p++ = c1;
536 mic++;
537 len--;
538 continue;
539 }
540 l = pg_encoding_verifymbchar(PG_MULE_INTERNAL, (const char *) mic, len);
541 if (l < 0)
542 {
543 if (noError)
544 break;
546 (const char *) mic, len);
547 }
548 if (c1 == LC_CNS11643_1 || c1 == LC_CNS11643_2 || c1 == LCPRV2_B)
549 {
550 if (c1 == LCPRV2_B)
551 {
552 c1 = mic[1]; /* get plane no. */
553 cnsBuf = (mic[2] << 8) | mic[3];
554 }
555 else
556 {
557 cnsBuf = (mic[1] << 8) | mic[2];
558 }
559 big5buf = CNStoBIG5(cnsBuf, c1);
560 if (big5buf == 0)
561 {
562 if (noError)
563 break;
565 (const char *) mic, len);
566 }
567 *p++ = (big5buf >> 8) & 0x00ff;
568 *p++ = big5buf & 0x00ff;
569 }
570 else
571 {
572 if (noError)
573 break;
575 (const char *) mic, len);
576 }
577 mic += l;
578 len -= l;
579 }
580 *p = '\0';
581
582 return mic - start;
583}

References CNStoBIG5(), IS_HIGHBIT_SET, LC_CNS11643_1, LC_CNS11643_2, LCPRV2_B, len, PG_BIG5, pg_encoding_verifymbchar(), PG_MULE_INTERNAL, report_invalid_encoding(), report_untranslatable_char(), and start.

Referenced by mic_to_big5().

◆ mic2euc_tw()

static int mic2euc_tw ( const unsigned char *  mic,
unsigned char *  p,
int  len,
bool  noError 
)
static

Definition at line 378 of file euc_tw_and_big5.c.

379{
380 const unsigned char *start = mic;
381 int c1;
382 int l;
383
384 while (len > 0)
385 {
386 c1 = *mic;
387 if (!IS_HIGHBIT_SET(c1))
388 {
389 /* ASCII */
390 if (c1 == 0)
391 {
392 if (noError)
393 break;
395 (const char *) mic, len);
396 }
397 *p++ = c1;
398 mic++;
399 len--;
400 continue;
401 }
402 l = pg_encoding_verifymbchar(PG_MULE_INTERNAL, (const char *) mic, len);
403 if (l < 0)
404 {
405 if (noError)
406 break;
408 (const char *) mic, len);
409 }
410 if (c1 == LC_CNS11643_1)
411 {
412 *p++ = mic[1];
413 *p++ = mic[2];
414 }
415 else if (c1 == LC_CNS11643_2)
416 {
417 *p++ = SS2;
418 *p++ = 0xa2;
419 *p++ = mic[1];
420 *p++ = mic[2];
421 }
422 else if (c1 == LCPRV2_B &&
423 mic[1] >= LC_CNS11643_3 && mic[1] <= LC_CNS11643_7)
424 {
425 *p++ = SS2;
426 *p++ = mic[1] - LC_CNS11643_3 + 0xa3;
427 *p++ = mic[2];
428 *p++ = mic[3];
429 }
430 else
431 {
432 if (noError)
433 break;
435 (const char *) mic, len);
436 }
437 mic += l;
438 len -= l;
439 }
440 *p = '\0';
441
442 return mic - start;
443}

References IS_HIGHBIT_SET, LC_CNS11643_1, LC_CNS11643_2, LC_CNS11643_3, LC_CNS11643_7, LCPRV2_B, len, pg_encoding_verifymbchar(), PG_EUC_TW, PG_MULE_INTERNAL, report_invalid_encoding(), report_untranslatable_char(), SS2, and start.

Referenced by mic_to_euc_tw().

◆ mic_to_big5()

Datum mic_to_big5 ( PG_FUNCTION_ARGS  )

Definition at line 132 of file euc_tw_and_big5.c.

133{
134 unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
135 unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
136 int len = PG_GETARG_INT32(4);
137 bool noError = PG_GETARG_BOOL(5);
138 int converted;
139
141
142 converted = mic2big5(src, dest, len, noError);
143
144 PG_RETURN_INT32(converted);
145}
static int mic2big5(const unsigned char *mic, unsigned char *p, int len, bool noError)

References CHECK_ENCODING_CONVERSION_ARGS, generate_unaccent_rules::dest, len, mic2big5(), PG_BIG5, PG_GETARG_BOOL, PG_GETARG_CSTRING, PG_GETARG_INT32, PG_MULE_INTERNAL, and PG_RETURN_INT32.

◆ mic_to_euc_tw()

Datum mic_to_euc_tw ( PG_FUNCTION_ARGS  )

Definition at line 100 of file euc_tw_and_big5.c.

101{
102 unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
103 unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
104 int len = PG_GETARG_INT32(4);
105 bool noError = PG_GETARG_BOOL(5);
106 int converted;
107
109
110 converted = mic2euc_tw(src, dest, len, noError);
111
112 PG_RETURN_INT32(converted);
113}
static int mic2euc_tw(const unsigned char *mic, unsigned char *p, int len, bool noError)

References CHECK_ENCODING_CONVERSION_ARGS, generate_unaccent_rules::dest, len, mic2euc_tw(), PG_EUC_TW, PG_GETARG_BOOL, PG_GETARG_CSTRING, PG_GETARG_INT32, PG_MULE_INTERNAL, and PG_RETURN_INT32.

◆ PG_FUNCTION_INFO_V1() [1/6]

PG_FUNCTION_INFO_V1 ( big5_to_euc_tw  )

◆ PG_FUNCTION_INFO_V1() [2/6]

PG_FUNCTION_INFO_V1 ( big5_to_mic  )

◆ PG_FUNCTION_INFO_V1() [3/6]

PG_FUNCTION_INFO_V1 ( euc_tw_to_big5  )

◆ PG_FUNCTION_INFO_V1() [4/6]

PG_FUNCTION_INFO_V1 ( euc_tw_to_mic  )

◆ PG_FUNCTION_INFO_V1() [5/6]

PG_FUNCTION_INFO_V1 ( mic_to_big5  )

◆ PG_FUNCTION_INFO_V1() [6/6]

PG_FUNCTION_INFO_V1 ( mic_to_euc_tw  )

◆ PG_MODULE_MAGIC_EXT()

PG_MODULE_MAGIC_EXT ( name = "euc_tw_and_big5",
version = PG_VERSION 
)