PostgreSQL Source Code  git master
euc2004_sjis2004.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * EUC_JIS_2004, SHIFT_JIS_2004
4  *
5  * Copyright (c) 2007-2017, PostgreSQL Global Development Group
6  *
7  * IDENTIFICATION
8  * src/backend/utils/mb/conversion_procs/euc2004_sjis2004/euc2004_sjis2004.c
9  *
10  *-------------------------------------------------------------------------
11  */
12 
13 #include "postgres.h"
14 #include "fmgr.h"
15 #include "mb/pg_wchar.h"
16 
18 
21 
22 static void euc_jis_20042shift_jis_2004(const unsigned char *euc, unsigned char *p, int len);
23 static void shift_jis_20042euc_jis_2004(const unsigned char *sjis, unsigned char *p, int len);
24 
25 /* ----------
26  * conv_proc(
27  * INTEGER, -- source encoding id
28  * INTEGER, -- destination encoding id
29  * CSTRING, -- source string (null terminated C string)
30  * CSTRING, -- destination string (null terminated C string)
31  * INTEGER -- source string length
32  * ) returns VOID;
33  * ----------
34  */
35 
36 Datum
38 {
39  unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
40  unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
41  int len = PG_GETARG_INT32(4);
42 
44 
45  euc_jis_20042shift_jis_2004(src, dest, len);
46 
48 }
49 
50 Datum
52 {
53  unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
54  unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
55  int len = PG_GETARG_INT32(4);
56 
58 
59  shift_jis_20042euc_jis_2004(src, dest, len);
60 
62 }
63 
64 /*
65  * EUC_JIS_2004 -> SHIFT_JIS_2004
66  */
67 static void
68 euc_jis_20042shift_jis_2004(const unsigned char *euc, unsigned char *p, int len)
69 {
70  int c1,
71  ku,
72  ten;
73  int l;
74 
75  while (len > 0)
76  {
77  c1 = *euc;
78  if (!IS_HIGHBIT_SET(c1))
79  {
80  /* ASCII */
81  if (c1 == 0)
83  (const char *) euc, len);
84  *p++ = c1;
85  euc++;
86  len--;
87  continue;
88  }
89 
90  l = pg_encoding_verifymb(PG_EUC_JIS_2004, (const char *) euc, len);
91 
92  if (l < 0)
94  (const char *) euc, len);
95 
96  if (c1 == SS2 && l == 2) /* JIS X 0201 kana? */
97  {
98  *p++ = euc[1];
99  }
100  else if (c1 == SS3 && l == 3) /* JIS X 0213 plane 2? */
101  {
102  ku = euc[1] - 0xa0;
103  ten = euc[2] - 0xa0;
104 
105  switch (ku)
106  {
107  case 1:
108  case 3:
109  case 4:
110  case 5:
111  case 8:
112  case 12:
113  case 13:
114  case 14:
115  case 15:
116  *p++ = ((ku + 0x1df) >> 1) - (ku >> 3) * 3;
117  break;
118  default:
119  if (ku >= 78 && ku <= 94)
120  {
121  *p++ = (ku + 0x19b) >> 1;
122  }
123  else
125  (const char *) euc, len);
126  }
127 
128  if (ku % 2)
129  {
130  if (ten >= 1 && ten <= 63)
131  *p++ = ten + 0x3f;
132  else if (ten >= 64 && ten <= 94)
133  *p++ = ten + 0x40;
134  else
136  (const char *) euc, len);
137  }
138  else
139  *p++ = ten + 0x9e;
140  }
141 
142  else if (l == 2) /* JIS X 0213 plane 1? */
143  {
144  ku = c1 - 0xa0;
145  ten = euc[1] - 0xa0;
146 
147  if (ku >= 1 && ku <= 62)
148  *p++ = (ku + 0x101) >> 1;
149  else if (ku >= 63 && ku <= 94)
150  *p++ = (ku + 0x181) >> 1;
151  else
153  (const char *) euc, len);
154 
155  if (ku % 2)
156  {
157  if (ten >= 1 && ten <= 63)
158  *p++ = ten + 0x3f;
159  else if (ten >= 64 && ten <= 94)
160  *p++ = ten + 0x40;
161  else
163  (const char *) euc, len);
164  }
165  else
166  *p++ = ten + 0x9e;
167  }
168  else
170  (const char *) euc, len);
171 
172  euc += l;
173  len -= l;
174  }
175  *p = '\0';
176 }
177 
178 /*
179  * returns SHIFT_JIS_2004 "ku" code indicated by second byte
180  * *ku = 0: "ku" = even
181  * *ku = 1: "ku" = odd
182  */
183 static int
184 get_ten(int b, int *ku)
185 {
186  int ten;
187 
188  if (b >= 0x40 && b <= 0x7e)
189  {
190  ten = b - 0x3f;
191  *ku = 1;
192  }
193  else if (b >= 0x80 && b <= 0x9e)
194  {
195  ten = b - 0x40;
196  *ku = 1;
197  }
198  else if (b >= 0x9f && b <= 0xfc)
199  {
200  ten = b - 0x9e;
201  *ku = 0;
202  }
203  else
204  {
205  ten = -1; /* error */
206  *ku = 0; /* keep compiler quiet */
207  }
208  return ten;
209 }
210 
211 /*
212  * SHIFT_JIS_2004 ---> EUC_JIS_2004
213  */
214 
215 static void
216 shift_jis_20042euc_jis_2004(const unsigned char *sjis, unsigned char *p, int len)
217 {
218  int c1;
219  int ku,
220  ten,
221  kubun;
222  int plane;
223  int l;
224 
225  while (len > 0)
226  {
227  c1 = *sjis;
228 
229  if (!IS_HIGHBIT_SET(c1))
230  {
231  /* ASCII */
232  if (c1 == 0)
234  (const char *) sjis, len);
235  *p++ = c1;
236  sjis++;
237  len--;
238  continue;
239  }
240 
241  l = pg_encoding_verifymb(PG_SHIFT_JIS_2004, (const char *) sjis, len);
242 
243  if (l < 0 || l > len)
245  (const char *) sjis, len);
246 
247  if (c1 >= 0xa1 && c1 <= 0xdf && l == 1)
248  {
249  /* JIS X0201 (1 byte kana) */
250  *p++ = SS2;
251  *p++ = c1;
252  }
253  else if (l == 2)
254  {
255  int c2 = sjis[1];
256 
257  plane = 1;
258  ku = 1;
259  ten = 1;
260 
261  /*
262  * JIS X 0213
263  */
264  if (c1 >= 0x81 && c1 <= 0x9f) /* plane 1 1ku-62ku */
265  {
266  ku = (c1 << 1) - 0x100;
267  ten = get_ten(c2, &kubun);
268  if (ten < 0)
270  (const char *) sjis, len);
271  ku -= kubun;
272  }
273  else if (c1 >= 0xe0 && c1 <= 0xef) /* plane 1 62ku-94ku */
274  {
275  ku = (c1 << 1) - 0x180;
276  ten = get_ten(c2, &kubun);
277  if (ten < 0)
279 
280  (const char *) sjis, len);
281  ku -= kubun;
282  }
283  else if (c1 >= 0xf0 && c1 <= 0xf3) /* plane 2
284  * 1,3,4,5,8,12,13,14,15 ku */
285  {
286  plane = 2;
287  ten = get_ten(c2, &kubun);
288  if (ten < 0)
290  (const char *) sjis, len);
291  switch (c1)
292  {
293  case 0xf0:
294  ku = kubun == 0 ? 8 : 1;
295  break;
296  case 0xf1:
297  ku = kubun == 0 ? 4 : 3;
298  break;
299  case 0xf2:
300  ku = kubun == 0 ? 12 : 5;
301  break;
302  default:
303  ku = kubun == 0 ? 14 : 13;
304  break;
305  }
306  }
307  else if (c1 >= 0xf4 && c1 <= 0xfc) /* plane 2 78-94ku */
308  {
309  plane = 2;
310  ten = get_ten(c2, &kubun);
311  if (ten < 0)
313  (const char *) sjis, len);
314  if (c1 == 0xf4 && kubun == 1)
315  ku = 15;
316  else
317  ku = (c1 << 1) - 0x19a - kubun;
318  }
319  else
321  (const char *) sjis, len);
322 
323  if (plane == 2)
324  *p++ = SS3;
325 
326  *p++ = ku + 0xa0;
327  *p++ = ten + 0xa0;
328  }
329  sjis += l;
330  len -= l;
331  }
332  *p = '\0';
333 }
static int get_ten(int b, int *ku)
static void shift_jis_20042euc_jis_2004(const unsigned char *sjis, unsigned char *p, int len)
#define PG_GETARG_INT32(n)
Definition: fmgr.h:234
static void euc_jis_20042shift_jis_2004(const unsigned char *euc, unsigned char *p, int len)
Datum euc_jis_2004_to_shift_jis_2004(PG_FUNCTION_ARGS)
PG_MODULE_MAGIC
#define SS3
Definition: pg_wchar.h:36
int pg_encoding_verifymb(int encoding, const char *mbstr, int len)
Definition: wchar.c:1809
#define IS_HIGHBIT_SET(ch)
Definition: c.h:949
Datum shift_jis_2004_to_euc_jis_2004(PG_FUNCTION_ARGS)
uintptr_t Datum
Definition: postgres.h:372
#define PG_RETURN_VOID()
Definition: fmgr.h:309
#define CHECK_ENCODING_CONVERSION_ARGS(srcencoding, destencoding)
Definition: pg_wchar.h:503
void report_invalid_encoding(int encoding, const char *mbstr, int len)
Definition: wchar.c:1995
PG_FUNCTION_INFO_V1(euc_jis_2004_to_shift_jis_2004)
#define PG_GETARG_CSTRING(n)
Definition: fmgr.h:242
#define PG_FUNCTION_ARGS
Definition: fmgr.h:158
#define SS2
Definition: pg_wchar.h:35