PostgreSQL Source Code git master
|
#include "c.h"
#include "mb/pg_wchar.h"
#include "utils/ascii.h"
#include "common/unicode_nonspacing_table.h"
#include "common/unicode_east_asian_fw_table.h"
Go to the source code of this file.
Data Structures | |
struct | mbinterval |
Macros | |
#define | NONUTF8_INVALID_BYTE0 (0x8d) |
#define | NONUTF8_INVALID_BYTE1 (' ') |
#define | IS_EUC_RANGE_VALID(c) ((c) >= 0xa1 && (c) <= 0xfe) |
#define | pg_euccn_verifychar pg_euckr_verifychar |
#define | pg_euccn_verifystr pg_euckr_verifystr |
#define | ERR 0 |
#define | BGN 11 |
#define | CS1 16 |
#define | CS2 1 |
#define | CS3 5 |
#define | P3A 6 /* Lead was E0, check for 3-byte overlong */ |
#define | P3B 20 /* Lead was ED, check for surrogate */ |
#define | P4A 25 /* Lead was F0, check for 4-byte overlong */ |
#define | P4B 30 /* Lead was F4, check for too-large */ |
#define | END BGN |
#define | ASC (END << BGN) |
#define | L2A (CS1 << BGN) |
#define | L3A (P3A << BGN) |
#define | L3B (CS2 << BGN) |
#define | L3C (P3B << BGN) |
#define | L4A (P4A << BGN) |
#define | L4B (CS3 << BGN) |
#define | L4C (P4B << BGN) |
#define | CR1 (END << CS1) | (CS1 << CS2) | (CS2 << CS3) | (CS1 << P3B) | (CS2 << P4B) |
#define | CR2 (END << CS1) | (CS1 << CS2) | (CS2 << CS3) | (CS1 << P3B) | (CS2 << P4A) |
#define | CR3 (END << CS1) | (CS1 << CS2) | (CS2 << CS3) | (CS1 << P3A) | (CS2 << P4A) |
#define | ILL ERR |
#define | STRIDE_LENGTH (2 * sizeof(Vector8)) |
Functions | |
static int | pg_ascii2wchar_with_len (const unsigned char *from, pg_wchar *to, int len) |
static int | pg_ascii_mblen (const unsigned char *s) |
static int | pg_ascii_dsplen (const unsigned char *s) |
static int | pg_euc2wchar_with_len (const unsigned char *from, pg_wchar *to, int len) |
static int | pg_euc_mblen (const unsigned char *s) |
static int | pg_euc_dsplen (const unsigned char *s) |
static int | pg_eucjp2wchar_with_len (const unsigned char *from, pg_wchar *to, int len) |
static int | pg_eucjp_mblen (const unsigned char *s) |
static int | pg_eucjp_dsplen (const unsigned char *s) |
static int | pg_euckr2wchar_with_len (const unsigned char *from, pg_wchar *to, int len) |
static int | pg_euckr_mblen (const unsigned char *s) |
static int | pg_euckr_dsplen (const unsigned char *s) |
static int | pg_euccn2wchar_with_len (const unsigned char *from, pg_wchar *to, int len) |
static int | pg_euccn_mblen (const unsigned char *s) |
static int | pg_euccn_dsplen (const unsigned char *s) |
static int | pg_euctw2wchar_with_len (const unsigned char *from, pg_wchar *to, int len) |
static int | pg_euctw_mblen (const unsigned char *s) |
static int | pg_euctw_dsplen (const unsigned char *s) |
static int | pg_wchar2euc_with_len (const pg_wchar *from, unsigned char *to, int len) |
static int | pg_johab_mblen (const unsigned char *s) |
static int | pg_johab_dsplen (const unsigned char *s) |
static int | pg_utf2wchar_with_len (const unsigned char *from, pg_wchar *to, int len) |
static int | pg_wchar2utf_with_len (const pg_wchar *from, unsigned char *to, int len) |
int | pg_utf_mblen (const unsigned char *s) |
static int | mbbisearch (pg_wchar ucs, const struct mbinterval *table, int max) |
static int | ucs_wcwidth (pg_wchar ucs) |
static int | pg_utf_dsplen (const unsigned char *s) |
static int | pg_mule2wchar_with_len (const unsigned char *from, pg_wchar *to, int len) |
static int | pg_wchar2mule_with_len (const pg_wchar *from, unsigned char *to, int len) |
int | pg_mule_mblen (const unsigned char *s) |
static int | pg_mule_dsplen (const unsigned char *s) |
static int | pg_latin12wchar_with_len (const unsigned char *from, pg_wchar *to, int len) |
static int | pg_wchar2single_with_len (const pg_wchar *from, unsigned char *to, int len) |
static int | pg_latin1_mblen (const unsigned char *s) |
static int | pg_latin1_dsplen (const unsigned char *s) |
static int | pg_sjis_mblen (const unsigned char *s) |
static int | pg_sjis_dsplen (const unsigned char *s) |
static int | pg_big5_mblen (const unsigned char *s) |
static int | pg_big5_dsplen (const unsigned char *s) |
static int | pg_gbk_mblen (const unsigned char *s) |
static int | pg_gbk_dsplen (const unsigned char *s) |
static int | pg_uhc_mblen (const unsigned char *s) |
static int | pg_uhc_dsplen (const unsigned char *s) |
static int | pg_gb18030_mblen (const unsigned char *s) |
static int | pg_gb18030_dsplen (const unsigned char *s) |
static int | pg_ascii_verifychar (const unsigned char *s, int len) |
static int | pg_ascii_verifystr (const unsigned char *s, int len) |
static int | pg_eucjp_verifychar (const unsigned char *s, int len) |
static int | pg_eucjp_verifystr (const unsigned char *s, int len) |
static int | pg_euckr_verifychar (const unsigned char *s, int len) |
static int | pg_euckr_verifystr (const unsigned char *s, int len) |
static int | pg_euctw_verifychar (const unsigned char *s, int len) |
static int | pg_euctw_verifystr (const unsigned char *s, int len) |
static int | pg_johab_verifychar (const unsigned char *s, int len) |
static int | pg_johab_verifystr (const unsigned char *s, int len) |
static int | pg_mule_verifychar (const unsigned char *s, int len) |
static int | pg_mule_verifystr (const unsigned char *s, int len) |
static int | pg_latin1_verifychar (const unsigned char *s, int len) |
static int | pg_latin1_verifystr (const unsigned char *s, int len) |
static int | pg_sjis_verifychar (const unsigned char *s, int len) |
static int | pg_sjis_verifystr (const unsigned char *s, int len) |
static int | pg_big5_verifychar (const unsigned char *s, int len) |
static int | pg_big5_verifystr (const unsigned char *s, int len) |
static int | pg_gbk_verifychar (const unsigned char *s, int len) |
static int | pg_gbk_verifystr (const unsigned char *s, int len) |
static int | pg_uhc_verifychar (const unsigned char *s, int len) |
static int | pg_uhc_verifystr (const unsigned char *s, int len) |
static int | pg_gb18030_verifychar (const unsigned char *s, int len) |
static int | pg_gb18030_verifystr (const unsigned char *s, int len) |
static int | pg_utf8_verifychar (const unsigned char *s, int len) |
static void | utf8_advance (const unsigned char *s, uint32 *state, int len) |
static int | pg_utf8_verifystr (const unsigned char *s, int len) |
bool | pg_utf8_islegal (const unsigned char *source, int length) |
void | pg_encoding_set_invalid (int encoding, char *dst) |
int | pg_encoding_mblen (int encoding, const char *mbstr) |
int | pg_encoding_mblen_bounded (int encoding, const char *mbstr) |
int | pg_encoding_dsplen (int encoding, const char *mbstr) |
int | pg_encoding_verifymbchar (int encoding, const char *mbstr, int len) |
int | pg_encoding_verifymbstr (int encoding, const char *mbstr, int len) |
int | pg_encoding_max_length (int encoding) |
Variables | |
static const uint32 | Utf8Transition [256] |
const pg_wchar_tbl | pg_wchar_table [] |
#define pg_euccn_verifychar pg_euckr_verifychar |
#define pg_euccn_verifystr pg_euckr_verifystr |
#define STRIDE_LENGTH (2 * sizeof(Vector8)) |
|
static |
Definition at line 579 of file wchar.c.
Referenced by ucs_wcwidth().
|
static |
|
static |
Definition at line 89 of file wchar.c.
Referenced by pg_big5_dsplen(), pg_euc_dsplen(), pg_euccn_dsplen(), pg_eucjp_dsplen(), pg_euctw_dsplen(), pg_gb18030_dsplen(), pg_gbk_dsplen(), pg_latin1_dsplen(), pg_sjis_dsplen(), and pg_uhc_dsplen().
|
static |
|
static |
|
static |
|
static |
Definition at line 932 of file wchar.c.
References IS_HIGHBIT_SET, len, and pg_ascii_dsplen().
|
static |
Definition at line 920 of file wchar.c.
References IS_HIGHBIT_SET, and len.
Referenced by pg_big5_verifychar().
|
static |
Definition at line 1477 of file wchar.c.
References len, NONUTF8_INVALID_BYTE0, NONUTF8_INVALID_BYTE1, and pg_big5_mblen().
Referenced by pg_big5_verifystr().
|
static |
Definition at line 1502 of file wchar.c.
References IS_HIGHBIT_SET, len, pg_big5_verifychar(), and start.
int pg_encoding_dsplen | ( | int | encoding, |
const char * | mbstr | ||
) |
Definition at line 2137 of file wchar.c.
References encoding, PG_SQL_ASCII, PG_VALID_ENCODING, and pg_wchar_table.
Referenced by PQdsplen(), and reportErrorPosition().
int pg_encoding_max_length | ( | int | encoding | ) |
Definition at line 2174 of file wchar.c.
References Assert(), encoding, pg_wchar_tbl::maxmblen, PG_SQL_ASCII, PG_VALID_ENCODING, and pg_wchar_table.
Referenced by ascii(), chr(), CopyConvertBuf(), pg_encoding_mbcliplen(), pg_encoding_set_invalid(), pg_verify_mbstr_len(), reportErrorPosition(), test_enc_setup(), and type_maximum_size().
int pg_encoding_mblen | ( | int | encoding, |
const char * | mbstr | ||
) |
Definition at line 2116 of file wchar.c.
References encoding, PG_SQL_ASCII, PG_VALID_ENCODING, and pg_wchar_table.
Referenced by CopyAttributeOutCSV(), CopyAttributeOutText(), fmtIdEnc(), pg_encoding_mblen_bounded(), PQescapeInternal(), PQescapeStringInternal(), PQmblen(), PQmblenBounded(), report_invalid_encoding(), report_untranslatable_char(), and test_enc_setup().
int pg_encoding_mblen_bounded | ( | int | encoding, |
const char * | mbstr | ||
) |
Definition at line 2128 of file wchar.c.
References encoding, pg_encoding_mblen(), and strnlen().
void pg_encoding_set_invalid | ( | int | encoding, |
char * | dst | ||
) |
Definition at line 2049 of file wchar.c.
References Assert(), encoding, NONUTF8_INVALID_BYTE0, NONUTF8_INVALID_BYTE1, pg_encoding_max_length(), and PG_UTF8.
Referenced by appendStringLiteral(), fmtIdEnc(), PQescapeStringInternal(), and test_enc_setup().
int pg_encoding_verifymbchar | ( | int | encoding, |
const char * | mbstr, | ||
int | len | ||
) |
Definition at line 2150 of file wchar.c.
References encoding, len, PG_SQL_ASCII, PG_VALID_ENCODING, and pg_wchar_table.
Referenced by appendStringLiteral(), big52euc_tw(), big52mic(), euc_jis_20042shift_jis_2004(), euc_jp2mic(), euc_jp2sjis(), euc_kr2mic(), euc_tw2big5(), euc_tw2mic(), fmtIdEnc(), LocalToUtf(), mic2big5(), mic2euc_jp(), mic2euc_kr(), mic2euc_tw(), mic2sjis(), PQescapeStringInternal(), shift_jis_20042euc_jis_2004(), and sjis2euc_jp().
int pg_encoding_verifymbstr | ( | int | encoding, |
const char * | mbstr, | ||
int | len | ||
) |
Definition at line 2163 of file wchar.c.
References encoding, len, PG_SQL_ASCII, PG_VALID_ENCODING, and pg_wchar_table.
Referenced by add_file_to_manifest(), CopyConvertBuf(), handle_oauth_sasl_error(), parse_oauth_json(), PQescapeInternal(), test_enc_conversion(), test_enc_setup(), and test_one_vector_escape().
|
static |
Definition at line 103 of file wchar.c.
References IS_HIGHBIT_SET, len, SS2, and SS3.
Referenced by pg_eucjp2wchar_with_len(), and pg_euckr2wchar_with_len().
|
inlinestatic |
Definition at line 158 of file wchar.c.
References IS_HIGHBIT_SET, len, pg_ascii_dsplen(), SS2, and SS3.
Referenced by pg_euckr_dsplen(), and pg_johab_dsplen().
|
inlinestatic |
Definition at line 142 of file wchar.c.
References IS_HIGHBIT_SET, len, SS2, and SS3.
Referenced by pg_eucjp_mblen(), pg_euckr_mblen(), and pg_johab_mblen().
|
static |
Definition at line 230 of file wchar.c.
References IS_HIGHBIT_SET, len, SS2, and SS3.
|
static |
Definition at line 281 of file wchar.c.
References IS_HIGHBIT_SET, len, and pg_ascii_dsplen().
|
static |
|
static |
Definition at line 177 of file wchar.c.
References len, and pg_euc2wchar_with_len().
|
static |
Definition at line 189 of file wchar.c.
References IS_HIGHBIT_SET, len, pg_ascii_dsplen(), SS2, and SS3.
|
static |
|
static |
Definition at line 1080 of file wchar.c.
References IS_EUC_RANGE_VALID, IS_HIGHBIT_SET, len, SS2, and SS3.
Referenced by pg_eucjp_verifystr().
|
static |
Definition at line 1135 of file wchar.c.
References IS_HIGHBIT_SET, len, pg_eucjp_verifychar(), and start.
|
static |
Definition at line 208 of file wchar.c.
References len, and pg_euc2wchar_with_len().
|
static |
|
static |
Definition at line 214 of file wchar.c.
References pg_euc_mblen().
|
static |
Definition at line 1164 of file wchar.c.
References IS_EUC_RANGE_VALID, IS_HIGHBIT_SET, and len.
Referenced by pg_euckr_verifystr().
|
static |
Definition at line 1193 of file wchar.c.
References IS_HIGHBIT_SET, len, pg_euckr_verifychar(), and start.
|
static |
Definition at line 297 of file wchar.c.
References IS_HIGHBIT_SET, len, SS2, and SS3.
|
static |
Definition at line 353 of file wchar.c.
References IS_HIGHBIT_SET, len, pg_ascii_dsplen(), SS2, and SS3.
|
static |
|
static |
Definition at line 1226 of file wchar.c.
References IS_EUC_RANGE_VALID, IS_HIGHBIT_SET, len, SS2, and SS3.
Referenced by pg_euctw_verifystr().
|
static |
Definition at line 1276 of file wchar.c.
References IS_HIGHBIT_SET, len, pg_euctw_verifychar(), and start.
|
static |
Definition at line 1027 of file wchar.c.
References IS_HIGHBIT_SET, len, and pg_ascii_dsplen().
|
static |
|
static |
Definition at line 1639 of file wchar.c.
References IS_HIGHBIT_SET, and len.
Referenced by pg_gb18030_verifystr().
|
static |
Definition at line 1670 of file wchar.c.
References IS_HIGHBIT_SET, len, pg_gb18030_verifychar(), and start.
|
static |
Definition at line 959 of file wchar.c.
References IS_HIGHBIT_SET, len, and pg_ascii_dsplen().
|
static |
Definition at line 947 of file wchar.c.
References IS_HIGHBIT_SET, and len.
Referenced by pg_gbk_verifychar().
|
static |
Definition at line 1531 of file wchar.c.
References len, NONUTF8_INVALID_BYTE0, NONUTF8_INVALID_BYTE1, and pg_gbk_mblen().
Referenced by pg_gbk_verifystr().
|
static |
Definition at line 1556 of file wchar.c.
References IS_HIGHBIT_SET, len, pg_gbk_verifychar(), and start.
|
static |
Definition at line 427 of file wchar.c.
References pg_euc_dsplen().
|
static |
Definition at line 421 of file wchar.c.
References pg_euc_mblen().
Referenced by pg_johab_verifychar().
|
static |
Definition at line 1305 of file wchar.c.
References IS_EUC_RANGE_VALID, IS_HIGHBIT_SET, len, and pg_johab_mblen().
Referenced by pg_johab_verifystr().
|
static |
Definition at line 1329 of file wchar.c.
References IS_HIGHBIT_SET, len, pg_johab_verifychar(), and start.
|
static |
|
static |
Definition at line 880 of file wchar.c.
References pg_ascii_dsplen().
|
static |
|
static |
|
static |
|
static |
Definition at line 672 of file wchar.c.
|
static |
int pg_mule_mblen | ( | const unsigned char * | s | ) |
Definition at line 791 of file wchar.c.
References IS_LC1, IS_LC2, IS_LCPRV1, IS_LCPRV2, and len.
Referenced by mic2latin(), mic2latin_with_table(), and pg_mule_verifychar().
|
static |
Definition at line 1358 of file wchar.c.
References IS_HIGHBIT_SET, len, and pg_mule_mblen().
Referenced by pg_mule_verifystr().
|
static |
Definition at line 1379 of file wchar.c.
References IS_HIGHBIT_SET, len, pg_mule_verifychar(), and start.
|
static |
Definition at line 903 of file wchar.c.
References IS_HIGHBIT_SET, len, and pg_ascii_dsplen().
|
static |
Definition at line 889 of file wchar.c.
References IS_HIGHBIT_SET, and len.
Referenced by pg_sjis_verifychar().
|
static |
Definition at line 1425 of file wchar.c.
References ISSJISHEAD, ISSJISTAIL, len, and pg_sjis_mblen().
Referenced by pg_sjis_verifystr().
|
static |
Definition at line 1448 of file wchar.c.
References IS_HIGHBIT_SET, len, pg_sjis_verifychar(), and start.
|
static |
Definition at line 986 of file wchar.c.
References IS_HIGHBIT_SET, len, and pg_ascii_dsplen().
|
static |
Definition at line 974 of file wchar.c.
References IS_HIGHBIT_SET, and len.
Referenced by pg_uhc_verifychar().
|
static |
Definition at line 1585 of file wchar.c.
References len, NONUTF8_INVALID_BYTE0, NONUTF8_INVALID_BYTE1, and pg_uhc_mblen().
Referenced by pg_uhc_verifystr().
|
static |
Definition at line 1610 of file wchar.c.
References IS_HIGHBIT_SET, len, pg_uhc_verifychar(), and start.
|
static |
Definition at line 439 of file wchar.c.
References len.
bool pg_utf8_islegal | ( | const unsigned char * | source, |
int | length | ||
) |
Definition at line 1987 of file wchar.c.
Referenced by chr(), pg_utf8_string_len(), pg_utf8_verifychar(), utf8_to_iso8859_1(), and UtfToLocal().
|
static |
Definition at line 1699 of file wchar.c.
References len, and pg_utf8_islegal().
Referenced by pg_utf8_verifystr().
|
static |
Definition at line 1889 of file wchar.c.
References Assert(), BGN, END, ERR, IS_HIGHBIT_SET, is_valid_ascii(), len, pg_utf8_verifychar(), pg_utf_mblen(), start, STRIDE_LENGTH, and utf8_advance().
|
static |
int pg_utf_mblen | ( | const unsigned char * | s | ) |
Definition at line 536 of file wchar.c.
References len.
Referenced by pg_utf8_verifystr(), and pg_wchar2utf_with_len().
|
static |
Definition at line 375 of file wchar.c.
References len.
|
static |
Definition at line 725 of file wchar.c.
References IS_LC1, IS_LC2, IS_LCPRV1_A_RANGE, IS_LCPRV1_B_RANGE, IS_LCPRV2_A_RANGE, IS_LCPRV2_B_RANGE, LCPRV1_A, LCPRV1_B, LCPRV2_A, LCPRV2_B, and len.
|
static |
|
static |
Definition at line 505 of file wchar.c.
References len, pg_utf_mblen(), and unicode_to_utf8().
|
static |
Definition at line 626 of file wchar.c.
References east_asian_fw, mbbisearch(), and nonspacing.
Referenced by pg_utf_dsplen().
|
static |
Definition at line 1871 of file wchar.c.
References len, and Utf8Transition.
Referenced by pg_utf8_verifystr().
const pg_wchar_tbl pg_wchar_table[] |
Definition at line 2062 of file wchar.c.
Referenced by pg_database_encoding_max_length(), pg_dsplen(), pg_encoding_dsplen(), pg_encoding_max_length(), pg_encoding_max_length_sql(), pg_encoding_mb2wchar_with_len(), pg_encoding_mbcliplen(), pg_encoding_mblen(), pg_encoding_verifymbchar(), pg_encoding_verifymbstr(), pg_encoding_wchar2mb_with_len(), pg_generic_charinc(), pg_mb2wchar(), pg_mb2wchar_with_len(), pg_mblen(), pg_verify_mbstr(), pg_verify_mbstr_len(), pg_wchar2mb(), and pg_wchar2mb_with_len().
|
static |
Definition at line 1813 of file wchar.c.
Referenced by utf8_advance().