PostgreSQL Source Code
git master
|
#include "c.h"
#include "mb/pg_wchar.h"
#include "common/unicode_nonspacing_table.h"
#include "common/unicode_east_asian_fw_table.h"
Go to the source code of this file.
Data Structures | |
struct | mbinterval |
Macros | |
#define | IS_EUC_RANGE_VALID(c) ((c) >= 0xa1 && (c) <= 0xfe) |
#define | pg_euccn_verifychar pg_euckr_verifychar |
#define | pg_euccn_verifystr pg_euckr_verifystr |
#define | ERR 0 |
#define | BGN 11 |
#define | CS1 16 |
#define | CS2 1 |
#define | CS3 5 |
#define | P3A 6 /* Lead was E0, check for 3-byte overlong */ |
#define | P3B 20 /* Lead was ED, check for surrogate */ |
#define | P4A 25 /* Lead was F0, check for 4-byte overlong */ |
#define | P4B 30 /* Lead was F4, check for too-large */ |
#define | END BGN |
#define | ASC (END << BGN) |
#define | L2A (CS1 << BGN) |
#define | L3A (P3A << BGN) |
#define | L3B (CS2 << BGN) |
#define | L3C (P3B << BGN) |
#define | L4A (P4A << BGN) |
#define | L4B (CS3 << BGN) |
#define | L4C (P4B << BGN) |
#define | CR1 (END << CS1) | (CS1 << CS2) | (CS2 << CS3) | (CS1 << P3B) | (CS2 << P4B) |
#define | CR2 (END << CS1) | (CS1 << CS2) | (CS2 << CS3) | (CS1 << P3B) | (CS2 << P4A) |
#define | CR3 (END << CS1) | (CS1 << CS2) | (CS2 << CS3) | (CS1 << P3A) | (CS2 << P4A) |
#define | ILL ERR |
#define | STRIDE_LENGTH (2 * sizeof(Vector8)) |
Functions | |
static int | pg_ascii2wchar_with_len (const unsigned char *from, pg_wchar *to, int len) |
static int | pg_ascii_mblen (const unsigned char *s) |
static int | pg_ascii_dsplen (const unsigned char *s) |
static int | pg_euc2wchar_with_len (const unsigned char *from, pg_wchar *to, int len) |
static int | pg_euc_mblen (const unsigned char *s) |
static int | pg_euc_dsplen (const unsigned char *s) |
static int | pg_eucjp2wchar_with_len (const unsigned char *from, pg_wchar *to, int len) |
static int | pg_eucjp_mblen (const unsigned char *s) |
static int | pg_eucjp_dsplen (const unsigned char *s) |
static int | pg_euckr2wchar_with_len (const unsigned char *from, pg_wchar *to, int len) |
static int | pg_euckr_mblen (const unsigned char *s) |
static int | pg_euckr_dsplen (const unsigned char *s) |
static int | pg_euccn2wchar_with_len (const unsigned char *from, pg_wchar *to, int len) |
static int | pg_euccn_mblen (const unsigned char *s) |
static int | pg_euccn_dsplen (const unsigned char *s) |
static int | pg_euctw2wchar_with_len (const unsigned char *from, pg_wchar *to, int len) |
static int | pg_euctw_mblen (const unsigned char *s) |
static int | pg_euctw_dsplen (const unsigned char *s) |
static int | pg_wchar2euc_with_len (const pg_wchar *from, unsigned char *to, int len) |
static int | pg_johab_mblen (const unsigned char *s) |
static int | pg_johab_dsplen (const unsigned char *s) |
static int | pg_utf2wchar_with_len (const unsigned char *from, pg_wchar *to, int len) |
unsigned char * | unicode_to_utf8 (pg_wchar c, unsigned char *utf8string) |
static int | pg_wchar2utf_with_len (const pg_wchar *from, unsigned char *to, int len) |
int | pg_utf_mblen (const unsigned char *s) |
static int | mbbisearch (pg_wchar ucs, const struct mbinterval *table, int max) |
static int | ucs_wcwidth (pg_wchar ucs) |
pg_wchar | utf8_to_unicode (const unsigned char *c) |
static int | pg_utf_dsplen (const unsigned char *s) |
static int | pg_mule2wchar_with_len (const unsigned char *from, pg_wchar *to, int len) |
static int | pg_wchar2mule_with_len (const pg_wchar *from, unsigned char *to, int len) |
int | pg_mule_mblen (const unsigned char *s) |
static int | pg_mule_dsplen (const unsigned char *s) |
static int | pg_latin12wchar_with_len (const unsigned char *from, pg_wchar *to, int len) |
static int | pg_wchar2single_with_len (const pg_wchar *from, unsigned char *to, int len) |
static int | pg_latin1_mblen (const unsigned char *s) |
static int | pg_latin1_dsplen (const unsigned char *s) |
static int | pg_sjis_mblen (const unsigned char *s) |
static int | pg_sjis_dsplen (const unsigned char *s) |
static int | pg_big5_mblen (const unsigned char *s) |
static int | pg_big5_dsplen (const unsigned char *s) |
static int | pg_gbk_mblen (const unsigned char *s) |
static int | pg_gbk_dsplen (const unsigned char *s) |
static int | pg_uhc_mblen (const unsigned char *s) |
static int | pg_uhc_dsplen (const unsigned char *s) |
static int | pg_gb18030_mblen (const unsigned char *s) |
static int | pg_gb18030_dsplen (const unsigned char *s) |
static int | pg_ascii_verifychar (const unsigned char *s, int len) |
static int | pg_ascii_verifystr (const unsigned char *s, int len) |
static int | pg_eucjp_verifychar (const unsigned char *s, int len) |
static int | pg_eucjp_verifystr (const unsigned char *s, int len) |
static int | pg_euckr_verifychar (const unsigned char *s, int len) |
static int | pg_euckr_verifystr (const unsigned char *s, int len) |
static int | pg_euctw_verifychar (const unsigned char *s, int len) |
static int | pg_euctw_verifystr (const unsigned char *s, int len) |
static int | pg_johab_verifychar (const unsigned char *s, int len) |
static int | pg_johab_verifystr (const unsigned char *s, int len) |
static int | pg_mule_verifychar (const unsigned char *s, int len) |
static int | pg_mule_verifystr (const unsigned char *s, int len) |
static int | pg_latin1_verifychar (const unsigned char *s, int len) |
static int | pg_latin1_verifystr (const unsigned char *s, int len) |
static int | pg_sjis_verifychar (const unsigned char *s, int len) |
static int | pg_sjis_verifystr (const unsigned char *s, int len) |
static int | pg_big5_verifychar (const unsigned char *s, int len) |
static int | pg_big5_verifystr (const unsigned char *s, int len) |
static int | pg_gbk_verifychar (const unsigned char *s, int len) |
static int | pg_gbk_verifystr (const unsigned char *s, int len) |
static int | pg_uhc_verifychar (const unsigned char *s, int len) |
static int | pg_uhc_verifystr (const unsigned char *s, int len) |
static int | pg_gb18030_verifychar (const unsigned char *s, int len) |
static int | pg_gb18030_verifystr (const unsigned char *s, int len) |
static int | pg_utf8_verifychar (const unsigned char *s, int len) |
static void | utf8_advance (const unsigned char *s, uint32 *state, int len) |
static int | pg_utf8_verifystr (const unsigned char *s, int len) |
bool | pg_utf8_islegal (const unsigned char *source, int length) |
int | pg_encoding_mblen (int encoding, const char *mbstr) |
int | pg_encoding_mblen_bounded (int encoding, const char *mbstr) |
int | pg_encoding_dsplen (int encoding, const char *mbstr) |
int | pg_encoding_verifymbchar (int encoding, const char *mbstr, int len) |
int | pg_encoding_verifymbstr (int encoding, const char *mbstr, int len) |
int | pg_encoding_max_length (int encoding) |
Variables | |
static const uint32 | Utf8Transition [256] |
const pg_wchar_tbl | pg_wchar_table [] |
#define P4A 25 /* Lead was F0, check for 4-byte overlong */ |
#define pg_euccn_verifychar pg_euckr_verifychar |
#define pg_euccn_verifystr pg_euckr_verifystr |
#define STRIDE_LENGTH (2 * sizeof(Vector8)) |
|
static |
|
static |
|
static |
Definition at line 69 of file wchar.c.
Referenced by pg_big5_dsplen(), pg_euc_dsplen(), pg_euccn_dsplen(), pg_eucjp_dsplen(), pg_euctw_dsplen(), pg_gb18030_dsplen(), pg_gbk_dsplen(), pg_latin1_dsplen(), pg_sjis_dsplen(), and pg_uhc_dsplen().
|
static |
|
static |
|
static |
|
static |
|
static |
Definition at line 961 of file wchar.c.
References IS_HIGHBIT_SET, and len.
Referenced by pg_big5_verifychar().
|
static |
Definition at line 1518 of file wchar.c.
References len, and pg_big5_mblen().
Referenced by pg_big5_verifystr().
|
static |
Definition at line 1538 of file wchar.c.
References IS_HIGHBIT_SET, len, and pg_big5_verifychar().
int pg_encoding_dsplen | ( | int | encoding, |
const char * | mbstr | ||
) |
Definition at line 2151 of file wchar.c.
References encoding, PG_SQL_ASCII, PG_VALID_ENCODING, and pg_wchar_table.
Referenced by PQdsplen(), and reportErrorPosition().
int pg_encoding_max_length | ( | int | encoding | ) |
Definition at line 2188 of file wchar.c.
References Assert(), encoding, pg_wchar_tbl::maxmblen, PG_VALID_ENCODING, and pg_wchar_table.
Referenced by ascii(), chr(), CopyConvertBuf(), pg_encoding_mbcliplen(), pg_verify_mbstr_len(), reportErrorPosition(), and type_maximum_size().
int pg_encoding_mblen | ( | int | encoding, |
const char * | mbstr | ||
) |
Definition at line 2130 of file wchar.c.
References encoding, PG_SQL_ASCII, PG_VALID_ENCODING, and pg_wchar_table.
Referenced by CopyAttributeOutCSV(), CopyAttributeOutText(), pg_encoding_mblen_bounded(), PQescapeInternal(), PQescapeStringInternal(), PQmblen(), PQmblenBounded(), report_invalid_encoding(), and report_untranslatable_char().
int pg_encoding_mblen_bounded | ( | int | encoding, |
const char * | mbstr | ||
) |
int pg_encoding_verifymbchar | ( | int | encoding, |
const char * | mbstr, | ||
int | len | ||
) |
Definition at line 2164 of file wchar.c.
References encoding, len, PG_SQL_ASCII, PG_VALID_ENCODING, and pg_wchar_table.
Referenced by big52euc_tw(), big52mic(), euc_jis_20042shift_jis_2004(), euc_jp2mic(), euc_jp2sjis(), euc_kr2mic(), euc_tw2big5(), euc_tw2mic(), LocalToUtf(), mic2big5(), mic2euc_jp(), mic2euc_kr(), mic2euc_tw(), mic2sjis(), shift_jis_20042euc_jis_2004(), and sjis2euc_jp().
int pg_encoding_verifymbstr | ( | int | encoding, |
const char * | mbstr, | ||
int | len | ||
) |
Definition at line 2177 of file wchar.c.
References encoding, len, PG_SQL_ASCII, PG_VALID_ENCODING, and pg_wchar_table.
Referenced by CopyConvertBuf(), and test_enc_conversion().
|
static |
Definition at line 83 of file wchar.c.
References IS_HIGHBIT_SET, len, SS2, and SS3.
Referenced by pg_eucjp2wchar_with_len(), and pg_euckr2wchar_with_len().
|
inlinestatic |
Definition at line 138 of file wchar.c.
References IS_HIGHBIT_SET, len, pg_ascii_dsplen(), SS2, and SS3.
Referenced by pg_euckr_dsplen(), and pg_johab_dsplen().
|
inlinestatic |
Definition at line 122 of file wchar.c.
References IS_HIGHBIT_SET, len, SS2, and SS3.
Referenced by pg_eucjp_mblen(), pg_euckr_mblen(), and pg_johab_mblen().
|
static |
|
static |
|
static |
|
static |
Definition at line 157 of file wchar.c.
References len, and pg_euc2wchar_with_len().
|
static |
Definition at line 169 of file wchar.c.
References IS_HIGHBIT_SET, len, pg_ascii_dsplen(), SS2, and SS3.
|
static |
|
static |
Definition at line 1121 of file wchar.c.
References IS_EUC_RANGE_VALID, IS_HIGHBIT_SET, len, SS2, and SS3.
Referenced by pg_eucjp_verifystr().
|
static |
Definition at line 1176 of file wchar.c.
References IS_HIGHBIT_SET, len, and pg_eucjp_verifychar().
|
static |
Definition at line 188 of file wchar.c.
References len, and pg_euc2wchar_with_len().
|
static |
|
static |
Definition at line 194 of file wchar.c.
References pg_euc_mblen().
|
static |
Definition at line 1205 of file wchar.c.
References IS_EUC_RANGE_VALID, IS_HIGHBIT_SET, and len.
Referenced by pg_euckr_verifystr().
|
static |
Definition at line 1234 of file wchar.c.
References IS_HIGHBIT_SET, len, and pg_euckr_verifychar().
|
static |
|
static |
Definition at line 333 of file wchar.c.
References IS_HIGHBIT_SET, len, pg_ascii_dsplen(), SS2, and SS3.
|
static |
|
static |
Definition at line 1267 of file wchar.c.
References IS_EUC_RANGE_VALID, IS_HIGHBIT_SET, len, SS2, and SS3.
Referenced by pg_euctw_verifystr().
|
static |
Definition at line 1317 of file wchar.c.
References IS_HIGHBIT_SET, len, and pg_euctw_verifychar().
|
static |
|
static |
|
static |
Definition at line 1665 of file wchar.c.
References IS_HIGHBIT_SET, and len.
Referenced by pg_gb18030_verifystr().
|
static |
Definition at line 1696 of file wchar.c.
References IS_HIGHBIT_SET, len, and pg_gb18030_verifychar().
|
static |
|
static |
Definition at line 988 of file wchar.c.
References IS_HIGHBIT_SET, and len.
Referenced by pg_gbk_verifychar().
|
static |
Definition at line 1567 of file wchar.c.
References len, and pg_gbk_mblen().
Referenced by pg_gbk_verifystr().
|
static |
Definition at line 1587 of file wchar.c.
References IS_HIGHBIT_SET, len, and pg_gbk_verifychar().
|
static |
Definition at line 407 of file wchar.c.
References pg_euc_dsplen().
|
static |
Definition at line 401 of file wchar.c.
References pg_euc_mblen().
Referenced by pg_johab_verifychar().
|
static |
Definition at line 1346 of file wchar.c.
References IS_EUC_RANGE_VALID, IS_HIGHBIT_SET, len, and pg_johab_mblen().
Referenced by pg_johab_verifystr().
|
static |
Definition at line 1370 of file wchar.c.
References IS_HIGHBIT_SET, len, and pg_johab_verifychar().
|
static |
|
static |
Definition at line 921 of file wchar.c.
References pg_ascii_dsplen().
|
static |
|
static |
|
static |
|
static |
int pg_mule_mblen | ( | const unsigned char * | s | ) |
Definition at line 832 of file wchar.c.
References IS_LC1, IS_LC2, IS_LCPRV1, IS_LCPRV2, and len.
Referenced by mic2latin(), mic2latin_with_table(), and pg_mule_verifychar().
|
static |
Definition at line 1399 of file wchar.c.
References IS_HIGHBIT_SET, len, and pg_mule_mblen().
Referenced by pg_mule_verifystr().
|
static |
Definition at line 1420 of file wchar.c.
References IS_HIGHBIT_SET, len, and pg_mule_verifychar().
|
static |
|
static |
Definition at line 930 of file wchar.c.
References IS_HIGHBIT_SET, and len.
Referenced by pg_sjis_verifychar().
|
static |
Definition at line 1466 of file wchar.c.
References ISSJISHEAD, ISSJISTAIL, len, and pg_sjis_mblen().
Referenced by pg_sjis_verifystr().
|
static |
Definition at line 1489 of file wchar.c.
References IS_HIGHBIT_SET, len, and pg_sjis_verifychar().
|
static |
|
static |
Definition at line 1015 of file wchar.c.
References IS_HIGHBIT_SET, and len.
Referenced by pg_uhc_verifychar().
|
static |
Definition at line 1616 of file wchar.c.
References len, and pg_uhc_mblen().
Referenced by pg_uhc_verifystr().
|
static |
Definition at line 1636 of file wchar.c.
References IS_HIGHBIT_SET, len, and pg_uhc_verifychar().
|
static |
bool pg_utf8_islegal | ( | const unsigned char * | source, |
int | length | ||
) |
Definition at line 2013 of file wchar.c.
Referenced by chr(), pg_utf8_string_len(), pg_utf8_verifychar(), utf8_to_iso8859_1(), and UtfToLocal().
|
static |
Definition at line 1725 of file wchar.c.
References len, and pg_utf8_islegal().
Referenced by pg_utf8_verifystr().
|
static |
Definition at line 1915 of file wchar.c.
References Assert(), BGN, END, ERR, IS_HIGHBIT_SET, is_valid_ascii(), len, pg_utf8_verifychar(), pg_utf_mblen(), STRIDE_LENGTH, and utf8_advance().
|
static |
int pg_utf_mblen | ( | const unsigned char * | s | ) |
Definition at line 549 of file wchar.c.
References len.
Referenced by json_lex_string(), pg_saslprep(), pg_unicode_to_server(), pg_unicode_to_server_noerror(), pg_utf8_string_len(), pg_utf8_verifystr(), pg_wchar2utf_with_len(), read_char(), unicode_is_normalized(), unicode_normalize_func(), utf8_to_iso8859_1(), and UtfToLocal().
|
static |
|
static |
Definition at line 766 of file wchar.c.
References IS_LC1, IS_LC2, IS_LCPRV1_A_RANGE, IS_LCPRV1_B_RANGE, IS_LCPRV2_A_RANGE, IS_LCPRV2_B_RANGE, LCPRV1_A, LCPRV1_B, LCPRV2_A, LCPRV2_B, and len.
|
static |
|
static |
Definition at line 518 of file wchar.c.
References len, pg_utf_mblen(), and unicode_to_utf8().
|
static |
Definition at line 639 of file wchar.c.
References east_asian_fw, mbbisearch(), and nonspacing.
Referenced by pg_utf_dsplen().
unsigned char* unicode_to_utf8 | ( | pg_wchar | c, |
unsigned char * | utf8string | ||
) |
Definition at line 483 of file wchar.c.
Referenced by json_lex_string(), pg_saslprep(), pg_unicode_to_server(), pg_unicode_to_server_noerror(), pg_wchar2utf_with_len(), and unicode_normalize_func().
|
static |
Definition at line 1897 of file wchar.c.
References len, and Utf8Transition.
Referenced by pg_utf8_verifystr().
pg_wchar utf8_to_unicode | ( | const unsigned char * | c | ) |
Definition at line 679 of file wchar.c.
Referenced by pg_saslprep(), pg_utf_dsplen(), read_char(), unicode_is_normalized(), and unicode_normalize_func().
const pg_wchar_tbl pg_wchar_table[] |
Definition at line 2076 of file wchar.c.
Referenced by pg_database_encoding_max_length(), pg_dsplen(), pg_encoding_dsplen(), pg_encoding_max_length(), pg_encoding_max_length_sql(), pg_encoding_mb2wchar_with_len(), pg_encoding_mbcliplen(), pg_encoding_mblen(), pg_encoding_verifymbchar(), pg_encoding_verifymbstr(), pg_encoding_wchar2mb_with_len(), pg_generic_charinc(), pg_mb2wchar(), pg_mb2wchar_with_len(), pg_mblen(), pg_verify_mbstr(), pg_verify_mbstr_len(), pg_wchar2mb(), and pg_wchar2mb_with_len().
|
static |
Definition at line 1839 of file wchar.c.
Referenced by utf8_advance().