PostgreSQL Source Code git master
Loading...
Searching...
No Matches
big5.c
Go to the documentation of this file.
1/*
2 * BIG5 support functions (CNS 116643-1992 * plane 1 and plane 2).
3 * This program is partially copied from lv(Multilingual file viewer)
4 * and slightly modified. lv is written and copyrighted by NARITA Tomio
5 * (nrt@web.ad.jp).
6 *
7 * 1999/1/15 Tatsuo Ishii
8 *
9 * src/backend/utils/mb/conversion_procs/euc_tw_and_big5/big5.c
10 */
11
12/* can be used in either frontend or backend */
13#include "postgres_fe.h"
14
15#include "mb/pg_wchar.h"
16
17typedef struct
18{
19 unsigned short code,
21} codes_t;
22
23/* map Big5 Level 1 to CNS 11643-1992 Plane 1 */
24static const codes_t big5Level1ToCnsPlane1[25] = { /* range */
25 {0xA140, 0x2121},
26 {0xA1F6, 0x2258},
27 {0xA1F7, 0x2257},
28 {0xA1F8, 0x2259},
29 {0xA2AF, 0x2421},
30 {0xA3C0, 0x4221},
31 {0xa3e1, 0x0000},
32 {0xA440, 0x4421},
33 {0xACFE, 0x5753},
34 {0xacff, 0x0000},
35 {0xAD40, 0x5323},
36 {0xAFD0, 0x5754},
37 {0xBBC8, 0x6B51},
38 {0xBE52, 0x6B50},
39 {0xBE53, 0x6F5C},
40 {0xC1AB, 0x7536},
41 {0xC2CB, 0x7535},
42 {0xC2CC, 0x7737},
43 {0xC361, 0x782E},
44 {0xC3B9, 0x7865},
45 {0xC3BA, 0x7864},
46 {0xC3BB, 0x7866},
47 {0xC456, 0x782D},
48 {0xC457, 0x7962},
49 {0xc67f, 0x0000}
50};
51
52/* map CNS 11643-1992 Plane 1 to Big5 Level 1 */
53static const codes_t cnsPlane1ToBig5Level1[26] = { /* range */
54 {0x2121, 0xA140},
55 {0x2257, 0xA1F7},
56 {0x2258, 0xA1F6},
57 {0x2259, 0xA1F8},
58 {0x234f, 0x0000},
59 {0x2421, 0xA2AF},
60 {0x2571, 0x0000},
61 {0x4221, 0xA3C0},
62 {0x4242, 0x0000},
63 {0x4421, 0xA440},
64 {0x5323, 0xAD40},
65 {0x5753, 0xACFE},
66 {0x5754, 0xAFD0},
67 {0x6B50, 0xBE52},
68 {0x6B51, 0xBBC8},
69 {0x6F5C, 0xBE53},
70 {0x7535, 0xC2CB},
71 {0x7536, 0xC1AB},
72 {0x7737, 0xC2CC},
73 {0x782D, 0xC456},
74 {0x782E, 0xC361},
75 {0x7864, 0xC3BA},
76 {0x7865, 0xC3B9},
77 {0x7866, 0xC3BB},
78 {0x7962, 0xC457},
79 {0x7d4c, 0x0000}
80};
81
82/* map Big5 Level 2 to CNS 11643-1992 Plane 2 */
83static const codes_t big5Level2ToCnsPlane2[48] = { /* range */
84 {0xC940, 0x2121},
85 {0xc94a, 0x0000},
86 {0xC94B, 0x212B},
87 {0xC96C, 0x214D},
88 {0xC9BE, 0x214C},
89 {0xC9BF, 0x217D},
90 {0xC9ED, 0x224E},
91 {0xCAF7, 0x224D},
92 {0xCAF8, 0x2439},
93 {0xD77A, 0x3F6A},
94 {0xD77B, 0x387E},
95 {0xDBA7, 0x3F6B},
96 {0xDDFC, 0x4176},
97 {0xDDFD, 0x4424},
98 {0xE8A3, 0x554C},
99 {0xE976, 0x5723},
100 {0xEB5B, 0x5A29},
101 {0xEBF1, 0x554B},
102 {0xEBF2, 0x5B3F},
103 {0xECDE, 0x5722},
104 {0xECDF, 0x5C6A},
105 {0xEDAA, 0x5D75},
106 {0xEEEB, 0x642F},
107 {0xEEEC, 0x6039},
108 {0xF056, 0x5D74},
109 {0xF057, 0x6243},
110 {0xF0CB, 0x5A28},
111 {0xF0CC, 0x6337},
112 {0xF163, 0x6430},
113 {0xF16B, 0x6761},
114 {0xF16C, 0x6438},
115 {0xF268, 0x6934},
116 {0xF269, 0x6573},
117 {0xF2C3, 0x664E},
118 {0xF375, 0x6762},
119 {0xF466, 0x6935},
120 {0xF4B5, 0x664D},
121 {0xF4B6, 0x6962},
122 {0xF4FD, 0x6A4C},
123 {0xF663, 0x6A4B},
124 {0xF664, 0x6C52},
125 {0xF977, 0x7167},
126 {0xF9C4, 0x7166},
127 {0xF9C5, 0x7234},
128 {0xF9C6, 0x7240},
129 {0xF9C7, 0x7235},
130 {0xF9D2, 0x7241},
131 {0xf9d6, 0x0000}
132};
133
134/* map CNS 11643-1992 Plane 2 to Big5 Level 2 */
135static const codes_t cnsPlane2ToBig5Level2[49] = { /* range */
136 {0x2121, 0xC940},
137 {0x212B, 0xC94B},
138 {0x214C, 0xC9BE},
139 {0x214D, 0xC96C},
140 {0x217D, 0xC9BF},
141 {0x224D, 0xCAF7},
142 {0x224E, 0xC9ED},
143 {0x2439, 0xCAF8},
144 {0x387E, 0xD77B},
145 {0x3F6A, 0xD77A},
146 {0x3F6B, 0xDBA7},
147 {0x4424, 0x0000},
148 {0x4176, 0xDDFC},
149 {0x4177, 0x0000},
150 {0x4424, 0xDDFD},
151 {0x554B, 0xEBF1},
152 {0x554C, 0xE8A3},
153 {0x5722, 0xECDE},
154 {0x5723, 0xE976},
155 {0x5A28, 0xF0CB},
156 {0x5A29, 0xEB5B},
157 {0x5B3F, 0xEBF2},
158 {0x5C6A, 0xECDF},
159 {0x5D74, 0xF056},
160 {0x5D75, 0xEDAA},
161 {0x6039, 0xEEEC},
162 {0x6243, 0xF057},
163 {0x6337, 0xF0CC},
164 {0x642F, 0xEEEB},
165 {0x6430, 0xF163},
166 {0x6438, 0xF16C},
167 {0x6573, 0xF269},
168 {0x664D, 0xF4B5},
169 {0x664E, 0xF2C3},
170 {0x6761, 0xF16B},
171 {0x6762, 0xF375},
172 {0x6934, 0xF268},
173 {0x6935, 0xF466},
174 {0x6962, 0xF4B6},
175 {0x6A4B, 0xF663},
176 {0x6A4C, 0xF4FD},
177 {0x6C52, 0xF664},
178 {0x7166, 0xF9C4},
179 {0x7167, 0xF977},
180 {0x7234, 0xF9C5},
181 {0x7235, 0xF9C7},
182 {0x7240, 0xF9C6},
183 {0x7241, 0xF9D2},
184 {0x7245, 0x0000}
185};
186
187/* Big Five Level 1 Correspondence to CNS 11643-1992 Plane 4 */
188static const unsigned short b1c4[][2] = {
189 {0xC879, 0x2123},
190 {0xC87B, 0x2124},
191 {0xC87D, 0x212A},
192 {0xC8A2, 0x2152}
193};
194
195/* Big Five Level 2 Correspondence to CNS 11643-1992 Plane 3 */
196static const unsigned short b2c3[][2] = {
197 {0xF9D6, 0x4337},
198 {0xF9D7, 0x4F50},
199 {0xF9D8, 0x444E},
200 {0xF9D9, 0x504A},
201 {0xF9DA, 0x2C5D},
202 {0xF9DB, 0x3D7E},
203 {0xF9DC, 0x4B5C}
204};
205
206static unsigned short BinarySearchRange
207 (const codes_t *array, int high, unsigned short code)
208{
209 int low,
210 mid,
211 distance,
212 tmp;
213
214 low = 0;
215 mid = high >> 1;
216
217 for (; low <= high; mid = (low + high) >> 1)
218 {
219 if ((array[mid].code <= code) && (array[mid + 1].code > code))
220 {
221 if (0 == array[mid].peer)
222 return 0;
223 if (code >= 0xa140U)
224 {
225 /* big5 to cns */
226 tmp = ((code & 0xff00) - (array[mid].code & 0xff00)) >> 8;
227 high = code & 0x00ff;
228 low = array[mid].code & 0x00ff;
229
230 /*
231 * NOTE: big5 high_byte: 0xa1-0xfe, low_byte: 0x40-0x7e,
232 * 0xa1-0xfe (radicals: 0x00-0x3e, 0x3f-0x9c) big5 radix is
233 * 0x9d. [region_low, region_high] We
234 * should remember big5 has two different regions (above).
235 * There is a bias for the distance between these regions.
236 * 0xa1 - 0x7e + bias = 1 (Distance between 0xa1 and 0x7e is
237 * 1.) bias = - 0x22.
238 */
239 distance = tmp * 0x9d + high - low +
240 (high >= 0xa1 ? (low >= 0xa1 ? 0 : -0x22)
241 : (low >= 0xa1 ? +0x22 : 0));
242
243 /*
244 * NOTE: we have to convert the distance into a code point.
245 * The code point's low_byte is 0x21 plus mod_0x5e. In the
246 * first, we extract the mod_0x5e of the starting code point,
247 * subtracting 0x21, and add distance to it. Then we calculate
248 * again mod_0x5e of them, and restore the final codepoint,
249 * adding 0x21.
250 */
251 tmp = (array[mid].peer & 0x00ff) + distance - 0x21;
252 tmp = (array[mid].peer & 0xff00) + ((tmp / 0x5e) << 8)
253 + 0x21 + tmp % 0x5e;
254 return tmp;
255 }
256 else
257 {
258 /* cns to big5 */
259 tmp = ((code & 0xff00) - (array[mid].code & 0xff00)) >> 8;
260
261 /*
262 * NOTE: ISO charsets ranges between 0x21-0xfe (94charset).
263 * Its radix is 0x5e. But there is no distance bias like big5.
264 */
265 distance = tmp * 0x5e
266 + ((int) (code & 0x00ff) - (int) (array[mid].code & 0x00ff));
267
268 /*
269 * NOTE: Similar to big5 to cns conversion, we extract
270 * mod_0x9d and restore mod_0x9d into a code point.
271 */
272 low = array[mid].peer & 0x00ff;
273 tmp = low + distance - (low >= 0xa1 ? 0x62 : 0x40);
274 low = tmp % 0x9d;
275 tmp = (array[mid].peer & 0xff00) + ((tmp / 0x9d) << 8)
276 + (low > 0x3e ? 0x62 : 0x40) + low;
277 return tmp;
278 }
279 }
280 else if (array[mid].code > code)
281 high = mid - 1;
282 else
283 low = mid + 1;
284 }
285
286 return 0;
287}
288
289
290unsigned short
291BIG5toCNS(unsigned short big5, unsigned char *lc)
292{
293 unsigned short cns = 0;
294 int i;
295
296 if (big5 < 0xc940U)
297 {
298 /* level 1 */
299
300 for (i = 0; i < sizeof(b1c4) / (sizeof(unsigned short) * 2); i++)
301 {
302 if (b1c4[i][0] == big5)
303 {
304 *lc = LC_CNS11643_4;
305 return (b1c4[i][1] | 0x8080U);
306 }
307 }
308
310 *lc = LC_CNS11643_1;
311 }
312 else if (big5 == 0xc94aU)
313 {
314 /* level 2 */
315 *lc = LC_CNS11643_1;
316 cns = 0x4442;
317 }
318 else
319 {
320 /* level 2 */
321 for (i = 0; i < sizeof(b2c3) / (sizeof(unsigned short) * 2); i++)
322 {
323 if (b2c3[i][0] == big5)
324 {
325 *lc = LC_CNS11643_3;
326 return (b2c3[i][1] | 0x8080U);
327 }
328 }
329
331 *lc = LC_CNS11643_2;
332 }
333
334 if (0 == cns)
335 { /* no mapping Big5 to CNS 11643-1992 */
336 *lc = 0;
337 return (unsigned short) '?';
338 }
339
340 return cns | 0x8080;
341}
342
343unsigned short
344CNStoBIG5(unsigned short cns, unsigned char lc)
345{
346 int i;
347 unsigned int big5 = 0;
348
349 cns &= 0x7f7f;
350
351 switch (lc)
352 {
353 case LC_CNS11643_1:
355 break;
356 case LC_CNS11643_2:
358 break;
359 case LC_CNS11643_3:
360 for (i = 0; i < sizeof(b2c3) / (sizeof(unsigned short) * 2); i++)
361 {
362 if (b2c3[i][1] == cns)
363 return b2c3[i][0];
364 }
365 break;
366 case LC_CNS11643_4:
367 for (i = 0; i < sizeof(b1c4) / (sizeof(unsigned short) * 2); i++)
368 {
369 if (b1c4[i][1] == cns)
370 return b1c4[i][0];
371 }
372 break;
373 default:
374 break;
375 }
376 return big5;
377}
static const codes_t big5Level1ToCnsPlane1[25]
Definition big5.c:24
static const codes_t big5Level2ToCnsPlane2[48]
Definition big5.c:83
static unsigned short BinarySearchRange(const codes_t *array, int high, unsigned short code)
Definition big5.c:207
unsigned short CNStoBIG5(unsigned short cns, unsigned char lc)
Definition big5.c:344
static const unsigned short b2c3[][2]
Definition big5.c:196
static const codes_t cnsPlane2ToBig5Level2[49]
Definition big5.c:135
unsigned short BIG5toCNS(unsigned short big5, unsigned char *lc)
Definition big5.c:291
static const codes_t cnsPlane1ToBig5Level1[26]
Definition big5.c:53
static const unsigned short b1c4[][2]
Definition big5.c:188
int i
Definition isn.c:77
#define LC_CNS11643_3
Definition pg_wchar.h:46
#define LC_CNS11643_1
Definition pg_wchar.h:44
#define LC_CNS11643_4
Definition pg_wchar.h:47
#define LC_CNS11643_2
Definition pg_wchar.h:45
static int fb(int x)
Definition big5.c:18
unsigned short code
Definition big5.c:19
unsigned short peer
Definition big5.c:20