PostgreSQL Source Code  git master
euc2004_sjis2004.c File Reference
#include "postgres.h"
#include "fmgr.h"
#include "mb/pg_wchar.h"
Include dependency graph for euc2004_sjis2004.c:

Go to the source code of this file.

Functions

 PG_FUNCTION_INFO_V1 (euc_jis_2004_to_shift_jis_2004)
 
 PG_FUNCTION_INFO_V1 (shift_jis_2004_to_euc_jis_2004)
 
static int euc_jis_20042shift_jis_2004 (const unsigned char *euc, unsigned char *p, int len, bool noError)
 
static int shift_jis_20042euc_jis_2004 (const unsigned char *sjis, unsigned char *p, int len, bool noError)
 
Datum euc_jis_2004_to_shift_jis_2004 (PG_FUNCTION_ARGS)
 
Datum shift_jis_2004_to_euc_jis_2004 (PG_FUNCTION_ARGS)
 
static int get_ten (int b, int *ku)
 

Variables

 PG_MODULE_MAGIC
 

Function Documentation

◆ euc_jis_20042shift_jis_2004()

static int euc_jis_20042shift_jis_2004 ( const unsigned char *  euc,
unsigned char *  p,
int  len,
bool  noError 
)
static

Definition at line 75 of file euc2004_sjis2004.c.

References IS_HIGHBIT_SET, pg_encoding_verifymbchar(), PG_EUC_JIS_2004, report_invalid_encoding(), SS2, and SS3.

Referenced by euc_jis_2004_to_shift_jis_2004().

76 {
77  const unsigned char *start = euc;
78  int c1,
79  ku,
80  ten;
81  int l;
82 
83  while (len > 0)
84  {
85  c1 = *euc;
86  if (!IS_HIGHBIT_SET(c1))
87  {
88  /* ASCII */
89  if (c1 == 0)
90  {
91  if (noError)
92  break;
94  (const char *) euc, len);
95  }
96  *p++ = c1;
97  euc++;
98  len--;
99  continue;
100  }
101 
102  l = pg_encoding_verifymbchar(PG_EUC_JIS_2004, (const char *) euc, len);
103 
104  if (l < 0)
105  {
106  if (noError)
107  break;
109  (const char *) euc, len);
110  }
111 
112  if (c1 == SS2 && l == 2) /* JIS X 0201 kana? */
113  {
114  *p++ = euc[1];
115  }
116  else if (c1 == SS3 && l == 3) /* JIS X 0213 plane 2? */
117  {
118  ku = euc[1] - 0xa0;
119  ten = euc[2] - 0xa0;
120 
121  switch (ku)
122  {
123  case 1:
124  case 3:
125  case 4:
126  case 5:
127  case 8:
128  case 12:
129  case 13:
130  case 14:
131  case 15:
132  *p++ = ((ku + 0x1df) >> 1) - (ku >> 3) * 3;
133  break;
134  default:
135  if (ku >= 78 && ku <= 94)
136  {
137  *p++ = (ku + 0x19b) >> 1;
138  }
139  else
140  {
141  if (noError)
142  break;
144  (const char *) euc, len);
145  }
146  }
147 
148  if (ku % 2)
149  {
150  if (ten >= 1 && ten <= 63)
151  *p++ = ten + 0x3f;
152  else if (ten >= 64 && ten <= 94)
153  *p++ = ten + 0x40;
154  else
155  {
156  if (noError)
157  break;
159  (const char *) euc, len);
160  }
161  }
162  else
163  *p++ = ten + 0x9e;
164  }
165 
166  else if (l == 2) /* JIS X 0213 plane 1? */
167  {
168  ku = c1 - 0xa0;
169  ten = euc[1] - 0xa0;
170 
171  if (ku >= 1 && ku <= 62)
172  *p++ = (ku + 0x101) >> 1;
173  else if (ku >= 63 && ku <= 94)
174  *p++ = (ku + 0x181) >> 1;
175  else
176  {
177  if (noError)
178  break;
180  (const char *) euc, len);
181  }
182 
183  if (ku % 2)
184  {
185  if (ten >= 1 && ten <= 63)
186  *p++ = ten + 0x3f;
187  else if (ten >= 64 && ten <= 94)
188  *p++ = ten + 0x40;
189  else
190  {
191  if (noError)
192  break;
194  (const char *) euc, len);
195  }
196  }
197  else
198  *p++ = ten + 0x9e;
199  }
200  else
201  {
202  if (noError)
203  break;
205  (const char *) euc, len);
206  }
207 
208  euc += l;
209  len -= l;
210  }
211  *p = '\0';
212 
213  return euc - start;
214 }
void report_invalid_encoding(int encoding, const char *mbstr, int len)
Definition: mbutils.c:1647
#define SS3
Definition: pg_wchar.h:36
#define IS_HIGHBIT_SET(ch)
Definition: c.h:1156
int pg_encoding_verifymbchar(int encoding, const char *mbstr, int len)
Definition: wchar.c:1940
#define SS2
Definition: pg_wchar.h:35

◆ euc_jis_2004_to_shift_jis_2004()

Datum euc_jis_2004_to_shift_jis_2004 ( PG_FUNCTION_ARGS  )

Definition at line 40 of file euc2004_sjis2004.c.

References CHECK_ENCODING_CONVERSION_ARGS, generate_unaccent_rules::dest, euc_jis_20042shift_jis_2004(), PG_EUC_JIS_2004, PG_GETARG_BOOL, PG_GETARG_CSTRING, PG_GETARG_INT32, PG_RETURN_INT32, and PG_SHIFT_JIS_2004.

41 {
42  unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
43  unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
44  int len = PG_GETARG_INT32(4);
45  bool noError = PG_GETARG_BOOL(5);
46  int converted;
47 
49 
50  converted = euc_jis_20042shift_jis_2004(src, dest, len, noError);
51 
52  PG_RETURN_INT32(converted);
53 }
#define PG_GETARG_INT32(n)
Definition: fmgr.h:269
#define PG_RETURN_INT32(x)
Definition: fmgr.h:354
static int euc_jis_20042shift_jis_2004(const unsigned char *euc, unsigned char *p, int len, bool noError)
#define PG_GETARG_BOOL(n)
Definition: fmgr.h:274
#define CHECK_ENCODING_CONVERSION_ARGS(srcencoding, destencoding)
Definition: pg_wchar.h:527
#define PG_GETARG_CSTRING(n)
Definition: fmgr.h:277

◆ get_ten()

static int get_ten ( int  b,
int *  ku 
)
static

Definition at line 222 of file euc2004_sjis2004.c.

Referenced by shift_jis_20042euc_jis_2004().

223 {
224  int ten;
225 
226  if (b >= 0x40 && b <= 0x7e)
227  {
228  ten = b - 0x3f;
229  *ku = 1;
230  }
231  else if (b >= 0x80 && b <= 0x9e)
232  {
233  ten = b - 0x40;
234  *ku = 1;
235  }
236  else if (b >= 0x9f && b <= 0xfc)
237  {
238  ten = b - 0x9e;
239  *ku = 0;
240  }
241  else
242  {
243  ten = -1; /* error */
244  *ku = 0; /* keep compiler quiet */
245  }
246  return ten;
247 }

◆ PG_FUNCTION_INFO_V1() [1/2]

PG_FUNCTION_INFO_V1 ( euc_jis_2004_to_shift_jis_2004  )

◆ PG_FUNCTION_INFO_V1() [2/2]

PG_FUNCTION_INFO_V1 ( shift_jis_2004_to_euc_jis_2004  )

◆ shift_jis_20042euc_jis_2004()

static int shift_jis_20042euc_jis_2004 ( const unsigned char *  sjis,
unsigned char *  p,
int  len,
bool  noError 
)
static

Definition at line 254 of file euc2004_sjis2004.c.

References get_ten(), IS_HIGHBIT_SET, pg_encoding_verifymbchar(), PG_SHIFT_JIS_2004, report_invalid_encoding(), SS2, and SS3.

Referenced by shift_jis_2004_to_euc_jis_2004().

255 {
256  const unsigned char *start = sjis;
257  int c1;
258  int ku,
259  ten,
260  kubun;
261  int plane;
262  int l;
263 
264  while (len > 0)
265  {
266  c1 = *sjis;
267 
268  if (!IS_HIGHBIT_SET(c1))
269  {
270  /* ASCII */
271  if (c1 == 0)
272  {
273  if (noError)
274  break;
276  (const char *) sjis, len);
277  }
278  *p++ = c1;
279  sjis++;
280  len--;
281  continue;
282  }
283 
284  l = pg_encoding_verifymbchar(PG_SHIFT_JIS_2004, (const char *) sjis, len);
285 
286  if (l < 0 || l > len)
287  {
288  if (noError)
289  break;
291  (const char *) sjis, len);
292  }
293 
294  if (c1 >= 0xa1 && c1 <= 0xdf && l == 1)
295  {
296  /* JIS X0201 (1 byte kana) */
297  *p++ = SS2;
298  *p++ = c1;
299  }
300  else if (l == 2)
301  {
302  int c2 = sjis[1];
303 
304  plane = 1;
305  ku = 1;
306  ten = 1;
307 
308  /*
309  * JIS X 0213
310  */
311  if (c1 >= 0x81 && c1 <= 0x9f) /* plane 1 1ku-62ku */
312  {
313  ku = (c1 << 1) - 0x100;
314  ten = get_ten(c2, &kubun);
315  if (ten < 0)
316  {
317  if (noError)
318  break;
320  (const char *) sjis, len);
321  }
322  ku -= kubun;
323  }
324  else if (c1 >= 0xe0 && c1 <= 0xef) /* plane 1 62ku-94ku */
325  {
326  ku = (c1 << 1) - 0x180;
327  ten = get_ten(c2, &kubun);
328  if (ten < 0)
329  {
330  if (noError)
331  break;
333  (const char *) sjis, len);
334  }
335  ku -= kubun;
336  }
337  else if (c1 >= 0xf0 && c1 <= 0xf3) /* plane 2
338  * 1,3,4,5,8,12,13,14,15 ku */
339  {
340  plane = 2;
341  ten = get_ten(c2, &kubun);
342  if (ten < 0)
343  {
344  if (noError)
345  break;
347  (const char *) sjis, len);
348  }
349  switch (c1)
350  {
351  case 0xf0:
352  ku = kubun == 0 ? 8 : 1;
353  break;
354  case 0xf1:
355  ku = kubun == 0 ? 4 : 3;
356  break;
357  case 0xf2:
358  ku = kubun == 0 ? 12 : 5;
359  break;
360  default:
361  ku = kubun == 0 ? 14 : 13;
362  break;
363  }
364  }
365  else if (c1 >= 0xf4 && c1 <= 0xfc) /* plane 2 78-94ku */
366  {
367  plane = 2;
368  ten = get_ten(c2, &kubun);
369  if (ten < 0)
370  {
371  if (noError)
372  break;
374  (const char *) sjis, len);
375  }
376  if (c1 == 0xf4 && kubun == 1)
377  ku = 15;
378  else
379  ku = (c1 << 1) - 0x19a - kubun;
380  }
381  else
382  {
383  if (noError)
384  break;
386  (const char *) sjis, len);
387  }
388 
389  if (plane == 2)
390  *p++ = SS3;
391 
392  *p++ = ku + 0xa0;
393  *p++ = ten + 0xa0;
394  }
395  sjis += l;
396  len -= l;
397  }
398  *p = '\0';
399 
400  return sjis - start;
401 }
static int get_ten(int b, int *ku)
void report_invalid_encoding(int encoding, const char *mbstr, int len)
Definition: mbutils.c:1647
#define SS3
Definition: pg_wchar.h:36
#define IS_HIGHBIT_SET(ch)
Definition: c.h:1156
int pg_encoding_verifymbchar(int encoding, const char *mbstr, int len)
Definition: wchar.c:1940
#define SS2
Definition: pg_wchar.h:35

◆ shift_jis_2004_to_euc_jis_2004()

Datum shift_jis_2004_to_euc_jis_2004 ( PG_FUNCTION_ARGS  )

Definition at line 56 of file euc2004_sjis2004.c.

References CHECK_ENCODING_CONVERSION_ARGS, generate_unaccent_rules::dest, PG_EUC_JIS_2004, PG_GETARG_BOOL, PG_GETARG_CSTRING, PG_GETARG_INT32, PG_RETURN_INT32, PG_SHIFT_JIS_2004, and shift_jis_20042euc_jis_2004().

57 {
58  unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
59  unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
60  int len = PG_GETARG_INT32(4);
61  bool noError = PG_GETARG_BOOL(5);
62  int converted;
63 
65 
66  converted = shift_jis_20042euc_jis_2004(src, dest, len, noError);
67 
68  PG_RETURN_INT32(converted);
69 }
#define PG_GETARG_INT32(n)
Definition: fmgr.h:269
#define PG_RETURN_INT32(x)
Definition: fmgr.h:354
#define PG_GETARG_BOOL(n)
Definition: fmgr.h:274
static int shift_jis_20042euc_jis_2004(const unsigned char *sjis, unsigned char *p, int len, bool noError)
#define CHECK_ENCODING_CONVERSION_ARGS(srcencoding, destencoding)
Definition: pg_wchar.h:527
#define PG_GETARG_CSTRING(n)
Definition: fmgr.h:277

Variable Documentation

◆ PG_MODULE_MAGIC

PG_MODULE_MAGIC

Definition at line 17 of file euc2004_sjis2004.c.