PostgreSQL Source Code git master
Loading...
Searching...
No Matches
euc_jp_and_sjis.c File Reference
#include "postgres.h"
#include "fmgr.h"
#include "mb/pg_wchar.h"
#include "sjis.map"
Include dependency graph for euc_jp_and_sjis.c:

Go to the source code of this file.

Macros

#define PGSJISALTCODE   0x81ac
 
#define PGEUCALTCODE   0xa2ae
 

Functions

 PG_MODULE_MAGIC_EXT (.name="euc_jp_and_sjis",.version=PG_VERSION)
 
 PG_FUNCTION_INFO_V1 (euc_jp_to_sjis)
 
 PG_FUNCTION_INFO_V1 (sjis_to_euc_jp)
 
static int euc_jp2sjis (const unsigned char *euc, unsigned char *p, int len, bool noError)
 
static int sjis2euc_jp (const unsigned char *sjis, unsigned char *p, int len, bool noError)
 
Datum euc_jp_to_sjis (PG_FUNCTION_ARGS)
 
Datum sjis_to_euc_jp (PG_FUNCTION_ARGS)
 

Macro Definition Documentation

◆ PGEUCALTCODE

#define PGEUCALTCODE   0xa2ae

Definition at line 23 of file euc_jp_and_sjis.c.

◆ PGSJISALTCODE

#define PGSJISALTCODE   0x81ac

Definition at line 22 of file euc_jp_and_sjis.c.

Function Documentation

◆ euc_jp2sjis()

static int euc_jp2sjis ( const unsigned char euc,
unsigned char p,
int  len,
bool  noError 
)
static

Definition at line 91 of file euc_jp_and_sjis.c.

92{
93 const unsigned char *start = euc;
94 int c1,
95 c2,
96 k;
97 int l;
98
99 while (len > 0)
100 {
101 c1 = *euc;
102 if (!IS_HIGHBIT_SET(c1))
103 {
104 /* ASCII */
105 if (c1 == 0)
106 {
107 if (noError)
108 break;
110 (const char *) euc, len);
111 }
112 *p++ = c1;
113 euc++;
114 len--;
115 continue;
116 }
117 l = pg_encoding_verifymbchar(PG_EUC_JP, (const char *) euc, len);
118 if (l < 0)
119 {
120 if (noError)
121 break;
123 (const char *) euc, len);
124 }
125 if (c1 == SS2)
126 {
127 /* hankaku kana? */
128 *p++ = euc[1];
129 }
130 else if (c1 == SS3)
131 {
132 /* JIS X0212 kanji? */
133 c1 = euc[1];
134 c2 = euc[2];
135 k = c1 << 8 | c2;
136 if (k >= 0xf5a1)
137 {
138 /* UDC2 */
139 c1 -= 0x54;
140 *p++ = ((c1 - 0xa1) >> 1) + ((c1 < 0xdf) ? 0x81 : 0xc1) + 0x74;
141 *p++ = c2 - ((c1 & 1) ? ((c2 < 0xe0) ? 0x61 : 0x60) : 2);
142 }
143 else
144 {
145 int i,
146 k2;
147
148 /* IBM kanji */
149 for (i = 0;; i++)
150 {
151 k2 = ibmkanji[i].euc & 0xffff;
152 if (k2 == 0xffff)
153 {
154 *p++ = PGSJISALTCODE >> 8;
155 *p++ = PGSJISALTCODE & 0xff;
156 break;
157 }
158 if (k2 == k)
159 {
160 k = ibmkanji[i].sjis;
161 *p++ = k >> 8;
162 *p++ = k & 0xff;
163 break;
164 }
165 }
166 }
167 }
168 else
169 {
170 /* JIS X0208 kanji? */
171 c2 = euc[1];
172 k = (c1 << 8) | (c2 & 0xff);
173 if (k >= 0xf5a1)
174 {
175 /* UDC1 */
176 c1 -= 0x54;
177 *p++ = ((c1 - 0xa1) >> 1) + ((c1 < 0xdf) ? 0x81 : 0xc1) + 0x6f;
178 }
179 else
180 *p++ = ((c1 - 0xa1) >> 1) + ((c1 < 0xdf) ? 0x81 : 0xc1);
181 *p++ = c2 - ((c1 & 1) ? ((c2 < 0xe0) ? 0x61 : 0x60) : 2);
182 }
183 euc += l;
184 len -= l;
185 }
186 *p = '\0';
187
188 return euc - start;
189}
#define IS_HIGHBIT_SET(ch)
Definition c.h:1244
#define PGSJISALTCODE
return str start
int i
Definition isn.c:77
void report_invalid_encoding(int encoding, const char *mbstr, int len)
Definition mbutils.c:1824
const void size_t len
@ PG_EUC_JP
Definition pg_wchar.h:77
#define SS2
Definition pg_wchar.h:38
#define SS3
Definition pg_wchar.h:39
static int fb(int x)
int pg_encoding_verifymbchar(int encoding, const char *mbstr, int len)
Definition wchar.c:1988

References fb(), i, IS_HIGHBIT_SET, len, pg_encoding_verifymbchar(), PG_EUC_JP, PGSJISALTCODE, report_invalid_encoding(), SS2, SS3, and start.

Referenced by euc_jp_to_sjis().

◆ euc_jp_to_sjis()

Datum euc_jp_to_sjis ( PG_FUNCTION_ARGS  )

Definition at line 56 of file euc_jp_and_sjis.c.

57{
58 unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
59 unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
60 int len = PG_GETARG_INT32(4);
61 bool noError = PG_GETARG_BOOL(5);
62 int converted;
63
65
66 converted = euc_jp2sjis(src, dest, len, noError);
67
69}
static int euc_jp2sjis(const unsigned char *euc, unsigned char *p, int len, bool noError)
#define PG_GETARG_CSTRING(n)
Definition fmgr.h:278
#define PG_RETURN_INT32(x)
Definition fmgr.h:355
#define PG_GETARG_INT32(n)
Definition fmgr.h:269
#define PG_GETARG_BOOL(n)
Definition fmgr.h:274
@ PG_SJIS
Definition pg_wchar.h:114
#define CHECK_ENCODING_CONVERSION_ARGS(srcencoding, destencoding)
Definition pg_wchar.h:360

References CHECK_ENCODING_CONVERSION_ARGS, euc_jp2sjis(), fb(), len, PG_EUC_JP, PG_GETARG_BOOL, PG_GETARG_CSTRING, PG_GETARG_INT32, PG_RETURN_INT32, and PG_SJIS.

◆ PG_FUNCTION_INFO_V1() [1/2]

PG_FUNCTION_INFO_V1 ( euc_jp_to_sjis  )

◆ PG_FUNCTION_INFO_V1() [2/2]

PG_FUNCTION_INFO_V1 ( sjis_to_euc_jp  )

◆ PG_MODULE_MAGIC_EXT()

PG_MODULE_MAGIC_EXT ( name = "euc_jp_and_sjis",
version = PG_VERSION 
)

◆ sjis2euc_jp()

static int sjis2euc_jp ( const unsigned char sjis,
unsigned char p,
int  len,
bool  noError 
)
static

Definition at line 195 of file euc_jp_and_sjis.c.

196{
197 const unsigned char *start = sjis;
198 int c1,
199 c2,
200 i,
201 k,
202 k2;
203 int l;
204
205 while (len > 0)
206 {
207 c1 = *sjis;
208 if (!IS_HIGHBIT_SET(c1))
209 {
210 /* ASCII */
211 if (c1 == 0)
212 {
213 if (noError)
214 break;
216 (const char *) sjis, len);
217 }
218 *p++ = c1;
219 sjis++;
220 len--;
221 continue;
222 }
223 l = pg_encoding_verifymbchar(PG_SJIS, (const char *) sjis, len);
224 if (l < 0)
225 {
226 if (noError)
227 break;
229 (const char *) sjis, len);
230 }
231 if (c1 >= 0xa1 && c1 <= 0xdf)
232 {
233 /* JIS X0201 (1 byte kana) */
234 *p++ = SS2;
235 *p++ = c1;
236 }
237 else
238 {
239 /*
240 * JIS X0208, X0212, user defined extended characters
241 */
242 c2 = sjis[1];
243 k = (c1 << 8) + c2;
244 if (k >= 0xed40 && k < 0xf040)
245 {
246 /* NEC selection IBM kanji */
247 for (i = 0;; i++)
248 {
249 k2 = ibmkanji[i].nec;
250 if (k2 == 0xffff)
251 break;
252 if (k2 == k)
253 {
254 k = ibmkanji[i].sjis;
255 c1 = (k >> 8) & 0xff;
256 c2 = k & 0xff;
257 }
258 }
259 }
260
261 if (k < 0xeb3f)
262 {
263 /* JIS X0208 */
264 *p++ = ((c1 & 0x3f) << 1) + 0x9f + (c2 > 0x9e);
265 *p++ = c2 + ((c2 > 0x9e) ? 2 : 0x60) + (c2 < 0x80);
266 }
267 else if ((k >= 0xeb40 && k < 0xf040) || (k >= 0xfc4c && k <= 0xfcfc))
268 {
269 /* NEC selection IBM kanji - Other undecided justice */
270 *p++ = PGEUCALTCODE >> 8;
271 *p++ = PGEUCALTCODE & 0xff;
272 }
273 else if (k >= 0xf040 && k < 0xf540)
274 {
275 /*
276 * UDC1 mapping to X0208 85 ku - 94 ku JIS code 0x7521 -
277 * 0x7e7e EUC 0xf5a1 - 0xfefe
278 */
279 c1 -= 0x6f;
280 *p++ = ((c1 & 0x3f) << 1) + 0xf3 + (c2 > 0x9e);
281 *p++ = c2 + ((c2 > 0x9e) ? 2 : 0x60) + (c2 < 0x80);
282 }
283 else if (k >= 0xf540 && k < 0xfa40)
284 {
285 /*
286 * UDC2 mapping to X0212 85 ku - 94 ku JIS code 0x7521 -
287 * 0x7e7e EUC 0x8ff5a1 - 0x8ffefe
288 */
289 *p++ = SS3;
290 c1 -= 0x74;
291 *p++ = ((c1 & 0x3f) << 1) + 0xf3 + (c2 > 0x9e);
292 *p++ = c2 + ((c2 > 0x9e) ? 2 : 0x60) + (c2 < 0x80);
293 }
294 else if (k >= 0xfa40)
295 {
296 /*
297 * mapping IBM kanji to X0208 and X0212
298 *
299 */
300 for (i = 0;; i++)
301 {
302 k2 = ibmkanji[i].sjis;
303 if (k2 == 0xffff)
304 break;
305 if (k2 == k)
306 {
307 k = ibmkanji[i].euc;
308 if (k >= 0x8f0000)
309 {
310 *p++ = SS3;
311 *p++ = 0x80 | ((k & 0xff00) >> 8);
312 *p++ = 0x80 | (k & 0xff);
313 }
314 else
315 {
316 *p++ = 0x80 | (k >> 8);
317 *p++ = 0x80 | (k & 0xff);
318 }
319 }
320 }
321 }
322 }
323 sjis += l;
324 len -= l;
325 }
326 *p = '\0';
327
328 return sjis - start;
329}
#define PGEUCALTCODE

References fb(), i, IS_HIGHBIT_SET, len, pg_encoding_verifymbchar(), PG_SJIS, PGEUCALTCODE, report_invalid_encoding(), SS2, SS3, and start.

Referenced by sjis_to_euc_jp().

◆ sjis_to_euc_jp()

Datum sjis_to_euc_jp ( PG_FUNCTION_ARGS  )

Definition at line 72 of file euc_jp_and_sjis.c.

73{
74 unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
75 unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
76 int len = PG_GETARG_INT32(4);
77 bool noError = PG_GETARG_BOOL(5);
78 int converted;
79
81
82 converted = sjis2euc_jp(src, dest, len, noError);
83
85}
static int sjis2euc_jp(const unsigned char *sjis, unsigned char *p, int len, bool noError)

References CHECK_ENCODING_CONVERSION_ARGS, fb(), len, PG_EUC_JP, PG_GETARG_BOOL, PG_GETARG_CSTRING, PG_GETARG_INT32, PG_RETURN_INT32, PG_SJIS, and sjis2euc_jp().