PostgreSQL Source Code git master
All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
euc2004_sjis2004.c File Reference
#include "postgres.h"
#include "fmgr.h"
#include "mb/pg_wchar.h"
Include dependency graph for euc2004_sjis2004.c:

Go to the source code of this file.

Functions

 PG_FUNCTION_INFO_V1 (euc_jis_2004_to_shift_jis_2004)
 
 PG_FUNCTION_INFO_V1 (shift_jis_2004_to_euc_jis_2004)
 
static int euc_jis_20042shift_jis_2004 (const unsigned char *euc, unsigned char *p, int len, bool noError)
 
static int shift_jis_20042euc_jis_2004 (const unsigned char *sjis, unsigned char *p, int len, bool noError)
 
Datum euc_jis_2004_to_shift_jis_2004 (PG_FUNCTION_ARGS)
 
Datum shift_jis_2004_to_euc_jis_2004 (PG_FUNCTION_ARGS)
 
static int get_ten (int b, int *ku)
 

Variables

 PG_MODULE_MAGIC
 

Function Documentation

◆ euc_jis_20042shift_jis_2004()

static int euc_jis_20042shift_jis_2004 ( const unsigned char *  euc,
unsigned char *  p,
int  len,
bool  noError 
)
static

Definition at line 75 of file euc2004_sjis2004.c.

76{
77 const unsigned char *start = euc;
78 int c1,
79 ku,
80 ten;
81 int l;
82
83 while (len > 0)
84 {
85 c1 = *euc;
86 if (!IS_HIGHBIT_SET(c1))
87 {
88 /* ASCII */
89 if (c1 == 0)
90 {
91 if (noError)
92 break;
94 (const char *) euc, len);
95 }
96 *p++ = c1;
97 euc++;
98 len--;
99 continue;
100 }
101
102 l = pg_encoding_verifymbchar(PG_EUC_JIS_2004, (const char *) euc, len);
103
104 if (l < 0)
105 {
106 if (noError)
107 break;
109 (const char *) euc, len);
110 }
111
112 if (c1 == SS2 && l == 2) /* JIS X 0201 kana? */
113 {
114 *p++ = euc[1];
115 }
116 else if (c1 == SS3 && l == 3) /* JIS X 0213 plane 2? */
117 {
118 ku = euc[1] - 0xa0;
119 ten = euc[2] - 0xa0;
120
121 switch (ku)
122 {
123 case 1:
124 case 3:
125 case 4:
126 case 5:
127 case 8:
128 case 12:
129 case 13:
130 case 14:
131 case 15:
132 *p++ = ((ku + 0x1df) >> 1) - (ku >> 3) * 3;
133 break;
134 default:
135 if (ku >= 78 && ku <= 94)
136 {
137 *p++ = (ku + 0x19b) >> 1;
138 }
139 else
140 {
141 if (noError)
142 break;
144 (const char *) euc, len);
145 }
146 }
147
148 if (ku % 2)
149 {
150 if (ten >= 1 && ten <= 63)
151 *p++ = ten + 0x3f;
152 else if (ten >= 64 && ten <= 94)
153 *p++ = ten + 0x40;
154 else
155 {
156 if (noError)
157 break;
159 (const char *) euc, len);
160 }
161 }
162 else
163 *p++ = ten + 0x9e;
164 }
165
166 else if (l == 2) /* JIS X 0213 plane 1? */
167 {
168 ku = c1 - 0xa0;
169 ten = euc[1] - 0xa0;
170
171 if (ku >= 1 && ku <= 62)
172 *p++ = (ku + 0x101) >> 1;
173 else if (ku >= 63 && ku <= 94)
174 *p++ = (ku + 0x181) >> 1;
175 else
176 {
177 if (noError)
178 break;
180 (const char *) euc, len);
181 }
182
183 if (ku % 2)
184 {
185 if (ten >= 1 && ten <= 63)
186 *p++ = ten + 0x3f;
187 else if (ten >= 64 && ten <= 94)
188 *p++ = ten + 0x40;
189 else
190 {
191 if (noError)
192 break;
194 (const char *) euc, len);
195 }
196 }
197 else
198 *p++ = ten + 0x9e;
199 }
200 else
201 {
202 if (noError)
203 break;
205 (const char *) euc, len);
206 }
207
208 euc += l;
209 len -= l;
210 }
211 *p = '\0';
212
213 return euc - start;
214}
#define IS_HIGHBIT_SET(ch)
Definition: c.h:1112
return str start
void report_invalid_encoding(int encoding, const char *mbstr, int len)
Definition: mbutils.c:1698
const void size_t len
@ PG_EUC_JIS_2004
Definition: pg_wchar.h:231
#define SS2
Definition: pg_wchar.h:38
#define SS3
Definition: pg_wchar.h:39
int pg_encoding_verifymbchar(int encoding, const char *mbstr, int len)
Definition: wchar.c:2103

References IS_HIGHBIT_SET, len, pg_encoding_verifymbchar(), PG_EUC_JIS_2004, report_invalid_encoding(), SS2, SS3, and start.

Referenced by euc_jis_2004_to_shift_jis_2004().

◆ euc_jis_2004_to_shift_jis_2004()

Datum euc_jis_2004_to_shift_jis_2004 ( PG_FUNCTION_ARGS  )

Definition at line 40 of file euc2004_sjis2004.c.

41{
42 unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
43 unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
44 int len = PG_GETARG_INT32(4);
45 bool noError = PG_GETARG_BOOL(5);
46 int converted;
47
49
50 converted = euc_jis_20042shift_jis_2004(src, dest, len, noError);
51
52 PG_RETURN_INT32(converted);
53}
static int euc_jis_20042shift_jis_2004(const unsigned char *euc, unsigned char *p, int len, bool noError)
#define PG_GETARG_CSTRING(n)
Definition: fmgr.h:277
#define PG_RETURN_INT32(x)
Definition: fmgr.h:354
#define PG_GETARG_INT32(n)
Definition: fmgr.h:269
#define PG_GETARG_BOOL(n)
Definition: fmgr.h:274
@ PG_SHIFT_JIS_2004
Definition: pg_wchar.h:270
#define CHECK_ENCODING_CONVERSION_ARGS(srcencoding, destencoding)
Definition: pg_wchar.h:507

References CHECK_ENCODING_CONVERSION_ARGS, generate_unaccent_rules::dest, euc_jis_20042shift_jis_2004(), len, PG_EUC_JIS_2004, PG_GETARG_BOOL, PG_GETARG_CSTRING, PG_GETARG_INT32, PG_RETURN_INT32, and PG_SHIFT_JIS_2004.

◆ get_ten()

static int get_ten ( int  b,
int *  ku 
)
static

Definition at line 222 of file euc2004_sjis2004.c.

223{
224 int ten;
225
226 if (b >= 0x40 && b <= 0x7e)
227 {
228 ten = b - 0x3f;
229 *ku = 1;
230 }
231 else if (b >= 0x80 && b <= 0x9e)
232 {
233 ten = b - 0x40;
234 *ku = 1;
235 }
236 else if (b >= 0x9f && b <= 0xfc)
237 {
238 ten = b - 0x9e;
239 *ku = 0;
240 }
241 else
242 {
243 ten = -1; /* error */
244 *ku = 0; /* keep compiler quiet */
245 }
246 return ten;
247}
int b
Definition: isn.c:69

References b.

Referenced by shift_jis_20042euc_jis_2004().

◆ PG_FUNCTION_INFO_V1() [1/2]

PG_FUNCTION_INFO_V1 ( euc_jis_2004_to_shift_jis_2004  )

◆ PG_FUNCTION_INFO_V1() [2/2]

PG_FUNCTION_INFO_V1 ( shift_jis_2004_to_euc_jis_2004  )

◆ shift_jis_20042euc_jis_2004()

static int shift_jis_20042euc_jis_2004 ( const unsigned char *  sjis,
unsigned char *  p,
int  len,
bool  noError 
)
static

Definition at line 254 of file euc2004_sjis2004.c.

255{
256 const unsigned char *start = sjis;
257 int c1;
258 int ku,
259 ten,
260 kubun;
261 int plane;
262 int l;
263
264 while (len > 0)
265 {
266 c1 = *sjis;
267
268 if (!IS_HIGHBIT_SET(c1))
269 {
270 /* ASCII */
271 if (c1 == 0)
272 {
273 if (noError)
274 break;
276 (const char *) sjis, len);
277 }
278 *p++ = c1;
279 sjis++;
280 len--;
281 continue;
282 }
283
284 l = pg_encoding_verifymbchar(PG_SHIFT_JIS_2004, (const char *) sjis, len);
285
286 if (l < 0 || l > len)
287 {
288 if (noError)
289 break;
291 (const char *) sjis, len);
292 }
293
294 if (c1 >= 0xa1 && c1 <= 0xdf && l == 1)
295 {
296 /* JIS X0201 (1 byte kana) */
297 *p++ = SS2;
298 *p++ = c1;
299 }
300 else if (l == 2)
301 {
302 int c2 = sjis[1];
303
304 plane = 1;
305 ku = 1;
306 ten = 1;
307
308 /*
309 * JIS X 0213
310 */
311 if (c1 >= 0x81 && c1 <= 0x9f) /* plane 1 1ku-62ku */
312 {
313 ku = (c1 << 1) - 0x100;
314 ten = get_ten(c2, &kubun);
315 if (ten < 0)
316 {
317 if (noError)
318 break;
320 (const char *) sjis, len);
321 }
322 ku -= kubun;
323 }
324 else if (c1 >= 0xe0 && c1 <= 0xef) /* plane 1 62ku-94ku */
325 {
326 ku = (c1 << 1) - 0x180;
327 ten = get_ten(c2, &kubun);
328 if (ten < 0)
329 {
330 if (noError)
331 break;
333 (const char *) sjis, len);
334 }
335 ku -= kubun;
336 }
337 else if (c1 >= 0xf0 && c1 <= 0xf3) /* plane 2
338 * 1,3,4,5,8,12,13,14,15 ku */
339 {
340 plane = 2;
341 ten = get_ten(c2, &kubun);
342 if (ten < 0)
343 {
344 if (noError)
345 break;
347 (const char *) sjis, len);
348 }
349 switch (c1)
350 {
351 case 0xf0:
352 ku = kubun == 0 ? 8 : 1;
353 break;
354 case 0xf1:
355 ku = kubun == 0 ? 4 : 3;
356 break;
357 case 0xf2:
358 ku = kubun == 0 ? 12 : 5;
359 break;
360 default:
361 ku = kubun == 0 ? 14 : 13;
362 break;
363 }
364 }
365 else if (c1 >= 0xf4 && c1 <= 0xfc) /* plane 2 78-94ku */
366 {
367 plane = 2;
368 ten = get_ten(c2, &kubun);
369 if (ten < 0)
370 {
371 if (noError)
372 break;
374 (const char *) sjis, len);
375 }
376 if (c1 == 0xf4 && kubun == 1)
377 ku = 15;
378 else
379 ku = (c1 << 1) - 0x19a - kubun;
380 }
381 else
382 {
383 if (noError)
384 break;
386 (const char *) sjis, len);
387 }
388
389 if (plane == 2)
390 *p++ = SS3;
391
392 *p++ = ku + 0xa0;
393 *p++ = ten + 0xa0;
394 }
395 sjis += l;
396 len -= l;
397 }
398 *p = '\0';
399
400 return sjis - start;
401}
static int get_ten(int b, int *ku)

References get_ten(), IS_HIGHBIT_SET, len, pg_encoding_verifymbchar(), PG_SHIFT_JIS_2004, report_invalid_encoding(), SS2, SS3, and start.

Referenced by shift_jis_2004_to_euc_jis_2004().

◆ shift_jis_2004_to_euc_jis_2004()

Datum shift_jis_2004_to_euc_jis_2004 ( PG_FUNCTION_ARGS  )

Definition at line 56 of file euc2004_sjis2004.c.

57{
58 unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
59 unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
60 int len = PG_GETARG_INT32(4);
61 bool noError = PG_GETARG_BOOL(5);
62 int converted;
63
65
66 converted = shift_jis_20042euc_jis_2004(src, dest, len, noError);
67
68 PG_RETURN_INT32(converted);
69}
static int shift_jis_20042euc_jis_2004(const unsigned char *sjis, unsigned char *p, int len, bool noError)

References CHECK_ENCODING_CONVERSION_ARGS, generate_unaccent_rules::dest, len, PG_EUC_JIS_2004, PG_GETARG_BOOL, PG_GETARG_CSTRING, PG_GETARG_INT32, PG_RETURN_INT32, PG_SHIFT_JIS_2004, and shift_jis_20042euc_jis_2004().

Variable Documentation

◆ PG_MODULE_MAGIC

PG_MODULE_MAGIC

Definition at line 17 of file euc2004_sjis2004.c.