33 #define MAX_MULTIBYTE_CHAR_LEN 4
44 #define ISSJISHEAD(c) (((c) >= 0x81 && (c) <= 0x9f) || ((c) >= 0xe0 && (c) <= 0xfc))
45 #define ISSJISTAIL(c) (((c) >= 0x40 && (c) <= 0x7e) || ((c) >= 0x80 && (c) <= 0xfc))
105 #define LC_ISO8859_1 0x81
106 #define LC_ISO8859_2 0x82
107 #define LC_ISO8859_3 0x83
108 #define LC_ISO8859_4 0x84
109 #define LC_TIS620 0x85
110 #define LC_ISO8859_7 0x86
111 #define LC_ISO8859_6 0x87
112 #define LC_ISO8859_8 0x88
113 #define LC_JISX0201K 0x89
114 #define LC_JISX0201R 0x8a
119 #define LC_KOI8_R 0x8b
120 #define LC_ISO8859_5 0x8c
121 #define LC_ISO8859_9 0x8d
122 #define LC_ISO8859_15 0x8e
126 #define IS_LC1(c) ((unsigned char)(c) >= 0x81 && (unsigned char)(c) <= 0x8d)
132 #define LC_JISX0208_1978 0x90
133 #define LC_GB2312_80 0x91
134 #define LC_JISX0208 0x92
135 #define LC_KS5601 0x93
136 #define LC_JISX0212 0x94
137 #define LC_CNS11643_1 0x95
138 #define LC_CNS11643_2 0x96
139 #define LC_JISX0213_1 0x97
141 #define LC_BIG5_1 0x98
143 #define LC_BIG5_2 0x99
147 #define IS_LC2(c) ((unsigned char)(c) >= 0x90 && (unsigned char)(c) <= 0x99)
153 #define LCPRV1_A 0x9a
154 #define LCPRV1_B 0x9b
155 #define IS_LCPRV1(c) ((unsigned char)(c) == LCPRV1_A || (unsigned char)(c) == LCPRV1_B)
156 #define IS_LCPRV1_A_RANGE(c) \
157 ((unsigned char)(c) >= 0xa0 && (unsigned char)(c) <= 0xdf)
158 #define IS_LCPRV1_B_RANGE(c) \
159 ((unsigned char)(c) >= 0xe0 && (unsigned char)(c) <= 0xef)
165 #define LCPRV2_A 0x9c
166 #define LCPRV2_B 0x9d
167 #define IS_LCPRV2(c) ((unsigned char)(c) == LCPRV2_A || (unsigned char)(c) == LCPRV2_B)
168 #define IS_LCPRV2_A_RANGE(c) \
169 ((unsigned char)(c) >= 0xf0 && (unsigned char)(c) <= 0xf4)
170 #define IS_LCPRV2_B_RANGE(c) \
171 ((unsigned char)(c) >= 0xf5 && (unsigned char)(c) <= 0xfe)
176 #define LC_SISHENG 0xa0
180 #define LC_VISCII_LOWER 0xa2
182 #define LC_VISCII_UPPER 0xa3
184 #define LC_ARABIC_DIGIT 0xa4
185 #define LC_ARABIC_1_COLUMN 0xa5
186 #define LC_ASCII_RIGHT_TO_LEFT 0xa6
191 #define LC_ARABIC_2_COLUMN 0xa8
196 #define LC_INDIAN_1_COLUMN 0xf0
198 #define LC_TIBETAN_1_COLUMN 0xf1
200 #define LC_UNICODE_SUBSET_2 0xf2
202 #define LC_UNICODE_SUBSET_3 0xf3
204 #define LC_UNICODE_SUBSET 0xf4
206 #define LC_ETHIOPIC 0xf5
207 #define LC_CNS11643_3 0xf6
208 #define LC_CNS11643_4 0xf7
209 #define LC_CNS11643_5 0xf8
210 #define LC_CNS11643_6 0xf9
211 #define LC_CNS11643_7 0xfa
212 #define LC_INDIAN_2_COLUMN 0xfb
214 #define LC_TIBETAN 0xfc
291 #define PG_ENCODING_BE_LAST PG_KOI8U
297 #define PG_VALID_BE_ENCODING(_enc) \
298 ((_enc) >= 0 && (_enc) <= PG_ENCODING_BE_LAST)
300 #define PG_ENCODING_IS_CLIENT_ONLY(_enc) \
301 ((_enc) > PG_ENCODING_BE_LAST && (_enc) < _PG_LAST_ENCODING_)
303 #define PG_VALID_ENCODING(_enc) \
304 ((_enc) >= 0 && (_enc) < _PG_LAST_ENCODING_)
307 #define PG_VALID_FE_ENCODING(_enc) PG_VALID_ENCODING(_enc)
318 #define MAX_CONVERSION_GROWTH 4
336 #define MAX_CONVERSION_INPUT_LENGTH 16
345 #define MAX_UNICODE_EQUIVALENT_STRING 16
523 #define CHECK_ENCODING_CONVERSION_ARGS(srcencoding,destencoding) \
524 check_encoding_conversion_args(PG_GETARG_INT32(0), \
525 PG_GETARG_INT32(1), \
526 PG_GETARG_INT32(4), \
537 return (
c > 0 &&
c <= 0x10FFFF);
543 return (
c >= 0xD800 &&
c <= 0xDBFF);
549 return (
c >= 0xDC00 &&
c <= 0xDFFF);
555 return ((first & 0x3FF) << 10) + 0x10000 + (second & 0x3FF);
567 if ((*
c & 0x80) == 0)
569 else if ((*
c & 0xe0) == 0xc0)
570 return (
pg_wchar) (((
c[0] & 0x1f) << 6) |
572 else if ((*
c & 0xf0) == 0xe0)
573 return (
pg_wchar) (((
c[0] & 0x0f) << 12) |
574 ((
c[1] & 0x3f) << 6) |
576 else if ((*
c & 0xf8) == 0xf0)
577 return (
pg_wchar) (((
c[0] & 0x07) << 18) |
578 ((
c[1] & 0x3f) << 12) |
579 ((
c[2] & 0x3f) << 6) |
590 static inline unsigned char *
599 utf8string[0] = 0xC0 | ((
c >> 6) & 0x1F);
600 utf8string[1] = 0x80 | (
c & 0x3F);
602 else if (
c <= 0xFFFF)
604 utf8string[0] = 0xE0 | ((
c >> 12) & 0x0F);
605 utf8string[1] = 0x80 | ((
c >> 6) & 0x3F);
606 utf8string[2] = 0x80 | (
c & 0x3F);
610 utf8string[0] = 0xF0 | ((
c >> 18) & 0x07);
611 utf8string[1] = 0x80 | ((
c >> 12) & 0x3F);
612 utf8string[2] = 0x80 | ((
c >> 6) & 0x3F);
613 utf8string[3] = 0x80 | (
c & 0x3F);
629 else if (
c <= 0xFFFF)
644 #if defined(USE_PRIVATE_ENCODING_FUNCS) || !defined(FRONTEND)
645 #define pg_char_to_encoding pg_char_to_encoding_private
646 #define pg_encoding_to_char pg_encoding_to_char_private
647 #define pg_valid_server_encoding pg_valid_server_encoding_private
648 #define pg_valid_server_encoding_id pg_valid_server_encoding_id_private
649 #define pg_utf_mblen pg_utf_mblen_private
697 extern int pg_mblen(
const char *mbstr);
721 extern int pg_bind_textdomain_codeset(
const char *domainname);
730 unsigned char *src,
int srclen,
731 unsigned char *
dest,
int destlen,
742 extern unsigned short BIG5toCNS(
unsigned short big5,
unsigned char *lc);
743 extern unsigned short CNStoBIG5(
unsigned short cns,
unsigned char lc);
767 int expected_src_encoding,
768 int expected_dest_encoding);
774 extern int local2local(
const unsigned char *l,
unsigned char *p,
int len,
775 int src_encoding,
int dest_encoding,
776 const unsigned char *tab,
bool noError);
777 extern int latin2mic(
const unsigned char *l,
unsigned char *p,
int len,
778 int lc,
int encoding,
bool noError);
779 extern int mic2latin(
const unsigned char *mic,
unsigned char *p,
int len,
780 int lc,
int encoding,
bool noError);
783 const unsigned char *tab,
bool noError);
786 const unsigned char *tab,
bool noError);
789 extern WCHAR *pgwin32_message_to_UTF16(
const char *
str,
int len,
int *utf16len);
#define pg_attribute_noreturn()
static rewind_source * source
const char * pg_get_client_encoding_name(void)
int pg_encoding_mblen_bounded(int encoding, const char *mbstr)
unsigned char * pg_do_encoding_conversion(unsigned char *src, int len, int src_encoding, int dest_encoding)
int mic2latin_with_table(const unsigned char *mic, unsigned char *p, int len, int lc, int encoding, const unsigned char *tab, bool noError)
bool pg_utf8_islegal(const unsigned char *source, int length)
bool pg_unicode_to_server_noerror(pg_wchar c, unsigned char *s)
char * pg_any_to_server(const char *s, int len, int encoding)
int GetDatabaseEncoding(void)
int pg_encoding_wchar2mb_with_len(int encoding, const pg_wchar *from, char *to, int len)
int UtfToLocal(const unsigned char *utf, int len, unsigned char *iso, const pg_mb_radix_tree *map, const pg_utf_to_local_combined *cmap, int cmapsize, utf_local_conversion_func conv_func, int encoding, bool noError)
const char * get_encoding_name_for_icu(int encoding)
uint32(* utf_local_conversion_func)(uint32 code)
int(* wchar2mb_with_len_converter)(const pg_wchar *from, unsigned char *to, int len)
static unsigned char * unicode_to_utf8(pg_wchar c, unsigned char *utf8string)
int pg_verify_mbstr_len(int encoding, const char *mbstr, int len, bool noError)
void InitializeClientEncoding(void)
int pg_dsplen(const char *mbstr)
int pg_mbstrlen_with_len(const char *mbstr, int limit)
mbcharacter_incrementer pg_database_encoding_character_incrementer(void)
char * pg_client_to_server(const char *s, int len)
int pg_wchar2mb_with_len(const pg_wchar *from, char *to, int len)
int latin2mic_with_table(const unsigned char *l, unsigned char *p, int len, int lc, int encoding, const unsigned char *tab, bool noError)
int pg_mb2wchar(const char *from, pg_wchar *to)
bool(* mbcharacter_incrementer)(unsigned char *mbstr, int len)
int(* mbchar_verifier)(const unsigned char *mbstr, int len)
int pg_mbcharcliplen(const char *mbstr, int len, int limit)
int mic2latin(const unsigned char *mic, unsigned char *p, int len, int lc, int encoding, bool noError)
struct pg_enc2name pg_enc2name
unsigned short CNStoBIG5(unsigned short cns, unsigned char lc)
size_t pg_wchar_strlen(const pg_wchar *str)
int pg_wchar2mb(const pg_wchar *from, char *to)
void report_untranslatable_char(int src_encoding, int dest_encoding, const char *mbstr, int len) pg_attribute_noreturn()
int pg_mule_mblen(const unsigned char *s)
PGDLLIMPORT const pg_enc2name pg_enc2name_tbl[]
int pg_mbstrlen(const char *mbstr)
bool pg_verify_mbstr(int encoding, const char *mbstr, int len, bool noError)
PGDLLIMPORT const char * pg_enc2gettext_tbl[]
bool pg_verifymbstr(const char *mbstr, int len, bool noError)
int LocalToUtf(const unsigned char *iso, int len, unsigned char *utf, const pg_mb_radix_tree *map, const pg_local_to_utf_combined *cmap, int cmapsize, utf_local_conversion_func conv_func, int encoding, bool noError)
int pg_mbcliplen(const char *mbstr, int len, int limit)
int GetMessageEncoding(void)
static pg_wchar utf8_to_unicode(const unsigned char *c)
int pg_do_encoding_conversion_buf(Oid proc, int src_encoding, int dest_encoding, unsigned char *src, int srclen, unsigned char *dest, int destlen, bool noError)
int SetClientEncoding(int encoding)
void SetMessageEncoding(int encoding)
void pg_unicode_to_server(pg_wchar c, unsigned char *s)
static bool is_valid_unicode_codepoint(pg_wchar c)
void check_encoding_conversion_args(int src_encoding, int dest_encoding, int len, int expected_src_encoding, int expected_dest_encoding)
int pg_database_encoding_max_length(void)
int PrepareClientEncoding(int encoding)
static int unicode_utf8len(pg_wchar c)
int pg_valid_client_encoding(const char *name)
int pg_encoding_dsplen(int encoding, const char *mbstr)
const char * GetDatabaseEncodingName(void)
void report_invalid_encoding(int encoding, const char *mbstr, int len) pg_attribute_noreturn()
int pg_char_and_wchar_strcmp(const char *s1, const pg_wchar *s2)
char * pg_server_to_client(const char *s, int len)
unsigned short BIG5toCNS(unsigned short big5, unsigned char *lc)
#define pg_encoding_to_char
#define pg_valid_server_encoding_id
int pg_encoding_verifymbstr(int encoding, const char *mbstr, int len)
int pg_get_client_encoding(void)
#define pg_valid_server_encoding
bool is_encoding_supported_by_icu(int encoding)
int(* mb2wchar_with_len_converter)(const unsigned char *from, pg_wchar *to, int len)
#define pg_char_to_encoding
int(* mbstr_verifier)(const unsigned char *mbstr, int len)
int local2local(const unsigned char *l, unsigned char *p, int len, int src_encoding, int dest_encoding, const unsigned char *tab, bool noError)
int pg_encoding_max_length(int encoding)
int pg_encoding_mblen(int encoding, const char *mbstr)
PGDLLIMPORT const pg_wchar_tbl pg_wchar_table[]
static pg_wchar surrogate_pair_to_codepoint(pg_wchar first, pg_wchar second)
int pg_wchar_strncmp(const pg_wchar *s1, const pg_wchar *s2, size_t n)
static bool is_utf16_surrogate_first(pg_wchar c)
void SetDatabaseEncoding(int encoding)
int latin2mic(const unsigned char *l, unsigned char *p, int len, int lc, int encoding, bool noError)
int pg_encoding_mbcliplen(int encoding, const char *mbstr, int len, int limit)
int pg_char_and_wchar_strncmp(const char *s1, const pg_wchar *s2, size_t n)
int pg_encoding_verifymbchar(int encoding, const char *mbstr, int len)
int pg_encoding_mb2wchar_with_len(int encoding, const char *from, pg_wchar *to, int len)
char * pg_server_to_any(const char *s, int len, int encoding)
int(* mbdisplaylen_converter)(const unsigned char *mbstr)
int pg_mb2wchar_with_len(const char *from, pg_wchar *to, int len)
static bool is_utf16_surrogate_second(pg_wchar c)
int pg_mblen(const char *mbstr)
int(* mblen_converter)(const unsigned char *mbstr)