PostgreSQL Source Code  git master
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros
pg_wchar.h File Reference
This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Data Structures

struct  pg_enc2name
 
struct  pg_enc2gettext
 
struct  pg_wchar_tbl
 
struct  pg_mb_radix_tree
 
struct  pg_utf_to_local_combined
 
struct  pg_local_to_utf_combined
 

Macros

#define MAX_MULTIBYTE_CHAR_LEN   4
 
#define SS2   0x8e /* single shift 2 (JIS0201) */
 
#define SS3   0x8f /* single shift 3 (JIS0212) */
 
#define ISSJISHEAD(c)   (((c) >= 0x81 && (c) <= 0x9f) || ((c) >= 0xe0 && (c) <= 0xfc))
 
#define ISSJISTAIL(c)   (((c) >= 0x40 && (c) <= 0x7e) || ((c) >= 0x80 && (c) <= 0xfc))
 
#define LC_ISO8859_1   0x81 /* ISO8859 Latin 1 */
 
#define LC_ISO8859_2   0x82 /* ISO8859 Latin 2 */
 
#define LC_ISO8859_3   0x83 /* ISO8859 Latin 3 */
 
#define LC_ISO8859_4   0x84 /* ISO8859 Latin 4 */
 
#define LC_TIS620   0x85 /* Thai (not supported yet) */
 
#define LC_ISO8859_7   0x86 /* Greek (not supported yet) */
 
#define LC_ISO8859_6   0x87 /* Arabic (not supported yet) */
 
#define LC_ISO8859_8   0x88 /* Hebrew (not supported yet) */
 
#define LC_JISX0201K   0x89 /* Japanese 1 byte kana */
 
#define LC_JISX0201R   0x8a /* Japanese 1 byte Roman */
 
#define LC_KOI8_R   0x8b /* Cyrillic KOI8-R */
 
#define LC_ISO8859_5   0x8c /* ISO8859 Cyrillic */
 
#define LC_ISO8859_9   0x8d /* ISO8859 Latin 5 (not supported yet) */
 
#define LC_ISO8859_15   0x8e /* ISO8859 Latin 15 (not supported yet) */
 
#define IS_LC1(c)   ((unsigned char)(c) >= 0x81 && (unsigned char)(c) <= 0x8d)
 
#define LC_JISX0208_1978   0x90 /* Japanese Kanji, old JIS (not supported) */
 
#define LC_GB2312_80   0x91 /* Chinese */
 
#define LC_JISX0208   0x92 /* Japanese Kanji (JIS X 0208) */
 
#define LC_KS5601   0x93 /* Korean */
 
#define LC_JISX0212   0x94 /* Japanese Kanji (JIS X 0212) */
 
#define LC_CNS11643_1   0x95 /* CNS 11643-1992 Plane 1 */
 
#define LC_CNS11643_2   0x96 /* CNS 11643-1992 Plane 2 */
 
#define LC_JISX0213_1
 
#define LC_BIG5_1
 
#define LC_BIG5_2
 
#define IS_LC2(c)   ((unsigned char)(c) >= 0x90 && (unsigned char)(c) <= 0x99)
 
#define LCPRV1_A   0x9a
 
#define LCPRV1_B   0x9b
 
#define IS_LCPRV1(c)   ((unsigned char)(c) == LCPRV1_A || (unsigned char)(c) == LCPRV1_B)
 
#define IS_LCPRV1_A_RANGE(c)   ((unsigned char)(c) >= 0xa0 && (unsigned char)(c) <= 0xdf)
 
#define IS_LCPRV1_B_RANGE(c)   ((unsigned char)(c) >= 0xe0 && (unsigned char)(c) <= 0xef)
 
#define LCPRV2_A   0x9c
 
#define LCPRV2_B   0x9d
 
#define IS_LCPRV2(c)   ((unsigned char)(c) == LCPRV2_A || (unsigned char)(c) == LCPRV2_B)
 
#define IS_LCPRV2_A_RANGE(c)   ((unsigned char)(c) >= 0xf0 && (unsigned char)(c) <= 0xf4)
 
#define IS_LCPRV2_B_RANGE(c)   ((unsigned char)(c) >= 0xf5 && (unsigned char)(c) <= 0xfe)
 
#define LC_SISHENG
 
#define LC_IPA
 
#define LC_VISCII_LOWER
 
#define LC_VISCII_UPPER
 
#define LC_ARABIC_DIGIT   0xa4 /* Arabic digit (not supported) */
 
#define LC_ARABIC_1_COLUMN   0xa5 /* Arabic 1-column (not supported) */
 
#define LC_ASCII_RIGHT_TO_LEFT
 
#define LC_LAO
 
#define LC_ARABIC_2_COLUMN   0xa8 /* Arabic 1-column (not supported) */
 
#define LC_INDIAN_1_COLUMN
 
#define LC_TIBETAN_1_COLUMN
 
#define LC_UNICODE_SUBSET_2
 
#define LC_UNICODE_SUBSET_3
 
#define LC_UNICODE_SUBSET
 
#define LC_ETHIOPIC   0xf5 /* Ethiopic characters (not supported) */
 
#define LC_CNS11643_3   0xf6 /* CNS 11643-1992 Plane 3 */
 
#define LC_CNS11643_4   0xf7 /* CNS 11643-1992 Plane 4 */
 
#define LC_CNS11643_5   0xf8 /* CNS 11643-1992 Plane 5 */
 
#define LC_CNS11643_6   0xf9 /* CNS 11643-1992 Plane 6 */
 
#define LC_CNS11643_7   0xfa /* CNS 11643-1992 Plane 7 */
 
#define LC_INDIAN_2_COLUMN
 
#define LC_TIBETAN   0xfc /* Tibetan (not supported) */
 
#define PG_ENCODING_BE_LAST   PG_KOI8U
 
#define PG_VALID_BE_ENCODING(_enc)   ((_enc) >= 0 && (_enc) <= PG_ENCODING_BE_LAST)
 
#define PG_ENCODING_IS_CLIENT_ONLY(_enc)   ((_enc) > PG_ENCODING_BE_LAST && (_enc) < _PG_LAST_ENCODING_)
 
#define PG_VALID_ENCODING(_enc)   ((_enc) >= 0 && (_enc) < _PG_LAST_ENCODING_)
 
#define PG_VALID_FE_ENCODING(_enc)   PG_VALID_ENCODING(_enc)
 
#define MAX_CONVERSION_GROWTH   4
 
#define CHECK_ENCODING_CONVERSION_ARGS(srcencoding, destencoding)
 

Typedefs

typedef unsigned int pg_wchar
 
typedef enum pg_enc pg_enc
 
typedef struct pg_enc2name pg_enc2name
 
typedef struct pg_enc2gettext pg_enc2gettext
 
typedef int(* mb2wchar_with_len_converter )(const unsigned char *from, pg_wchar *to, int len)
 
typedef int(* wchar2mb_with_len_converter )(const pg_wchar *from, unsigned char *to, int len)
 
typedef int(* mblen_converter )(const unsigned char *mbstr)
 
typedef int(* mbdisplaylen_converter )(const unsigned char *mbstr)
 
typedef bool(* mbcharacter_incrementer )(unsigned char *mbstr, int len)
 
typedef int(* mbverifier )(const unsigned char *mbstr, int len)
 
typedef uint32(* utf_local_conversion_func )(uint32 code)
 

Enumerations

enum  pg_enc {
  PG_SQL_ASCII = 0, PG_EUC_JP, PG_EUC_CN, PG_EUC_KR,
  PG_EUC_TW, PG_EUC_JIS_2004, PG_UTF8, PG_MULE_INTERNAL,
  PG_LATIN1, PG_LATIN2, PG_LATIN3, PG_LATIN4,
  PG_LATIN5, PG_LATIN6, PG_LATIN7, PG_LATIN8,
  PG_LATIN9, PG_LATIN10, PG_WIN1256, PG_WIN1258,
  PG_WIN866, PG_WIN874, PG_KOI8R, PG_WIN1251,
  PG_WIN1252, PG_ISO_8859_5, PG_ISO_8859_6, PG_ISO_8859_7,
  PG_ISO_8859_8, PG_WIN1250, PG_WIN1253, PG_WIN1254,
  PG_WIN1255, PG_WIN1257, PG_KOI8U, PG_SJIS,
  PG_BIG5, PG_GBK, PG_UHC, PG_GB18030,
  PG_JOHAB, PG_SHIFT_JIS_2004, _PG_LAST_ENCODING_
}
 

Functions

bool is_encoding_supported_by_icu (int encoding)
 
const char * get_encoding_name_for_icu (int encoding)
 
int pg_char_to_encoding (const char *name)
 
const char * pg_encoding_to_char (int encoding)
 
int pg_valid_server_encoding_id (int encoding)
 
int pg_mb2wchar (const char *from, pg_wchar *to)
 
int pg_mb2wchar_with_len (const char *from, pg_wchar *to, int len)
 
int pg_encoding_mb2wchar_with_len (int encoding, const char *from, pg_wchar *to, int len)
 
int pg_wchar2mb (const pg_wchar *from, char *to)
 
int pg_wchar2mb_with_len (const pg_wchar *from, char *to, int len)
 
int pg_encoding_wchar2mb_with_len (int encoding, const pg_wchar *from, char *to, int len)
 
int pg_char_and_wchar_strcmp (const char *s1, const pg_wchar *s2)
 
int pg_wchar_strncmp (const pg_wchar *s1, const pg_wchar *s2, size_t n)
 
int pg_char_and_wchar_strncmp (const char *s1, const pg_wchar *s2, size_t n)
 
size_t pg_wchar_strlen (const pg_wchar *wstr)
 
int pg_mblen (const char *mbstr)
 
int pg_dsplen (const char *mbstr)
 
int pg_encoding_mblen (int encoding, const char *mbstr)
 
int pg_encoding_dsplen (int encoding, const char *mbstr)
 
int pg_encoding_verifymb (int encoding, const char *mbstr, int len)
 
int pg_mule_mblen (const unsigned char *mbstr)
 
int pg_mic_mblen (const unsigned char *mbstr)
 
int pg_mbstrlen (const char *mbstr)
 
int pg_mbstrlen_with_len (const char *mbstr, int len)
 
int pg_mbcliplen (const char *mbstr, int len, int limit)
 
int pg_encoding_mbcliplen (int encoding, const char *mbstr, int len, int limit)
 
int pg_mbcharcliplen (const char *mbstr, int len, int imit)
 
int pg_encoding_max_length (int encoding)
 
int pg_database_encoding_max_length (void)
 
mbcharacter_incrementer pg_database_encoding_character_incrementer (void)
 
int PrepareClientEncoding (int encoding)
 
int SetClientEncoding (int encoding)
 
void InitializeClientEncoding (void)
 
int pg_get_client_encoding (void)
 
const char * pg_get_client_encoding_name (void)
 
void SetDatabaseEncoding (int encoding)
 
int GetDatabaseEncoding (void)
 
const char * GetDatabaseEncodingName (void)
 
void SetMessageEncoding (int encoding)
 
int GetMessageEncoding (void)
 
int pg_valid_client_encoding (const char *name)
 
int pg_valid_server_encoding (const char *name)
 
unsigned char * unicode_to_utf8 (pg_wchar c, unsigned char *utf8string)
 
pg_wchar utf8_to_unicode (const unsigned char *c)
 
int pg_utf_mblen (const unsigned char *)
 
unsigned char * pg_do_encoding_conversion (unsigned char *src, int len, int src_encoding, int dest_encoding)
 
char * pg_client_to_server (const char *s, int len)
 
char * pg_server_to_client (const char *s, int len)
 
char * pg_any_to_server (const char *s, int len, int encoding)
 
char * pg_server_to_any (const char *s, int len, int encoding)
 
unsigned short BIG5toCNS (unsigned short big5, unsigned char *lc)
 
unsigned short CNStoBIG5 (unsigned short cns, unsigned char lc)
 
void UtfToLocal (const unsigned char *utf, int len, unsigned char *iso, const pg_mb_radix_tree *map, const pg_utf_to_local_combined *cmap, int cmapsize, utf_local_conversion_func conv_func, int encoding)
 
void LocalToUtf (const unsigned char *iso, int len, unsigned char *utf, const pg_mb_radix_tree *map, const pg_local_to_utf_combined *cmap, int cmapsize, utf_local_conversion_func conv_func, int encoding)
 
bool pg_verifymbstr (const char *mbstr, int len, bool noError)
 
bool pg_verify_mbstr (int encoding, const char *mbstr, int len, bool noError)
 
int pg_verify_mbstr_len (int encoding, const char *mbstr, int len, bool noError)
 
void check_encoding_conversion_args (int src_encoding, int dest_encoding, int len, int expected_src_encoding, int expected_dest_encoding)
 
void report_invalid_encoding (int encoding, const char *mbstr, int len) pg_attribute_noreturn()
 
void report_untranslatable_char (int src_encoding, int dest_encoding, const char *mbstr, int len) pg_attribute_noreturn()
 
void local2local (const unsigned char *l, unsigned char *p, int len, int src_encoding, int dest_encoding, const unsigned char *tab)
 
void pg_ascii2mic (const unsigned char *l, unsigned char *p, int len)
 
void pg_mic2ascii (const unsigned char *mic, unsigned char *p, int len)
 
void latin2mic (const unsigned char *l, unsigned char *p, int len, int lc, int encoding)
 
void mic2latin (const unsigned char *mic, unsigned char *p, int len, int lc, int encoding)
 
void latin2mic_with_table (const unsigned char *l, unsigned char *p, int len, int lc, int encoding, const unsigned char *tab)
 
void mic2latin_with_table (const unsigned char *mic, unsigned char *p, int len, int lc, int encoding, const unsigned char *tab)
 
bool pg_utf8_islegal (const unsigned char *source, int length)
 

Variables

const pg_enc2name pg_enc2name_tbl []
 
const pg_enc2gettext pg_enc2gettext_tbl []
 
const pg_wchar_tbl pg_wchar_table []
 

Macro Definition Documentation

#define CHECK_ENCODING_CONVERSION_ARGS (   srcencoding,
  destencoding 
)
Value:
(srcencoding), \
(destencoding))
#define PG_GETARG_INT32(n)
Definition: fmgr.h:234
void check_encoding_conversion_args(int src_encoding, int dest_encoding, int len, int expected_src_encoding, int expected_dest_encoding)
Definition: wchar.c:1966

Definition at line 503 of file pg_wchar.h.

Referenced by ascii_to_mic(), ascii_to_utf8(), big5_to_euc_tw(), big5_to_mic(), big5_to_utf8(), euc_cn_to_mic(), euc_cn_to_utf8(), euc_jis_2004_to_shift_jis_2004(), euc_jis_2004_to_utf8(), euc_jp_to_mic(), euc_jp_to_sjis(), euc_jp_to_utf8(), euc_kr_to_mic(), euc_kr_to_utf8(), euc_tw_to_big5(), euc_tw_to_mic(), euc_tw_to_utf8(), gb18030_to_utf8(), gbk_to_utf8(), iso8859_1_to_utf8(), iso8859_to_utf8(), iso_to_koi8r(), iso_to_mic(), iso_to_win1251(), iso_to_win866(), johab_to_utf8(), koi8r_to_iso(), koi8r_to_mic(), koi8r_to_utf8(), koi8r_to_win1251(), koi8r_to_win866(), koi8u_to_utf8(), latin1_to_mic(), latin2_to_mic(), latin2_to_win1250(), latin3_to_mic(), latin4_to_mic(), mic_to_ascii(), mic_to_big5(), mic_to_euc_cn(), mic_to_euc_jp(), mic_to_euc_kr(), mic_to_euc_tw(), mic_to_iso(), mic_to_koi8r(), mic_to_latin1(), mic_to_latin2(), mic_to_latin3(), mic_to_latin4(), mic_to_sjis(), mic_to_win1250(), mic_to_win1251(), mic_to_win866(), shift_jis_2004_to_euc_jis_2004(), shift_jis_2004_to_utf8(), sjis_to_euc_jp(), sjis_to_mic(), sjis_to_utf8(), uhc_to_utf8(), utf8_to_ascii(), utf8_to_big5(), utf8_to_euc_cn(), utf8_to_euc_jis_2004(), utf8_to_euc_jp(), utf8_to_euc_kr(), utf8_to_euc_tw(), utf8_to_gb18030(), utf8_to_gbk(), utf8_to_iso8859(), utf8_to_iso8859_1(), utf8_to_johab(), utf8_to_koi8r(), utf8_to_koi8u(), utf8_to_shift_jis_2004(), utf8_to_sjis(), utf8_to_uhc(), utf8_to_win(), win1250_to_latin2(), win1250_to_mic(), win1251_to_iso(), win1251_to_koi8r(), win1251_to_mic(), win1251_to_win866(), win866_to_iso(), win866_to_koi8r(), win866_to_mic(), win866_to_win1251(), and win_to_utf8().

#define IS_LC1 (   c)    ((unsigned char)(c) >= 0x81 && (unsigned char)(c) <= 0x8d)
#define IS_LC2 (   c)    ((unsigned char)(c) >= 0x90 && (unsigned char)(c) <= 0x99)
#define IS_LCPRV1 (   c)    ((unsigned char)(c) == LCPRV1_A || (unsigned char)(c) == LCPRV1_B)

Definition at line 152 of file pg_wchar.h.

Referenced by pg_mule2wchar_with_len(), pg_mule_dsplen(), and pg_mule_mblen().

#define IS_LCPRV1_A_RANGE (   c)    ((unsigned char)(c) >= 0xa0 && (unsigned char)(c) <= 0xdf)

Definition at line 153 of file pg_wchar.h.

Referenced by pg_wchar2mule_with_len().

#define IS_LCPRV1_B_RANGE (   c)    ((unsigned char)(c) >= 0xe0 && (unsigned char)(c) <= 0xef)

Definition at line 155 of file pg_wchar.h.

Referenced by pg_wchar2mule_with_len().

#define IS_LCPRV2 (   c)    ((unsigned char)(c) == LCPRV2_A || (unsigned char)(c) == LCPRV2_B)

Definition at line 164 of file pg_wchar.h.

Referenced by pg_mule2wchar_with_len(), pg_mule_dsplen(), and pg_mule_mblen().

#define IS_LCPRV2_A_RANGE (   c)    ((unsigned char)(c) >= 0xf0 && (unsigned char)(c) <= 0xf4)

Definition at line 165 of file pg_wchar.h.

Referenced by pg_wchar2mule_with_len().

#define IS_LCPRV2_B_RANGE (   c)    ((unsigned char)(c) >= 0xf5 && (unsigned char)(c) <= 0xfe)

Definition at line 167 of file pg_wchar.h.

Referenced by pg_wchar2mule_with_len().

#define ISSJISHEAD (   c)    (((c) >= 0x81 && (c) <= 0x9f) || ((c) >= 0xe0 && (c) <= 0xfc))

Definition at line 41 of file pg_wchar.h.

Referenced by pg_sjis_verifier(), and sjis2mic().

#define ISSJISTAIL (   c)    (((c) >= 0x40 && (c) <= 0x7e) || ((c) >= 0x80 && (c) <= 0xfc))

Definition at line 42 of file pg_wchar.h.

Referenced by pg_sjis_verifier(), and sjis2mic().

#define LC_ARABIC_1_COLUMN   0xa5 /* Arabic 1-column (not supported) */

Definition at line 182 of file pg_wchar.h.

#define LC_ARABIC_2_COLUMN   0xa8 /* Arabic 1-column (not supported) */

Definition at line 188 of file pg_wchar.h.

#define LC_ARABIC_DIGIT   0xa4 /* Arabic digit (not supported) */

Definition at line 181 of file pg_wchar.h.

#define LC_ASCII_RIGHT_TO_LEFT
Value:
0xa6 /* ASCII (left half of ISO8859-1) with
* right-to-left direction (not
* supported) */

Definition at line 183 of file pg_wchar.h.

#define LC_BIG5_1
Value:
0x98 /* Plane 1 Chinese traditional (not
* supported) */

Definition at line 138 of file pg_wchar.h.

#define LC_BIG5_2
Value:
0x99 /* Plane 1 Chinese traditional (not
* supported) */

Definition at line 140 of file pg_wchar.h.

#define LC_CNS11643_1   0x95 /* CNS 11643-1992 Plane 1 */

Definition at line 134 of file pg_wchar.h.

Referenced by BIG5toCNS(), CNStoBIG5(), euc_tw2mic(), mic2big5(), and mic2euc_tw().

#define LC_CNS11643_2   0x96 /* CNS 11643-1992 Plane 2 */

Definition at line 135 of file pg_wchar.h.

Referenced by BIG5toCNS(), CNStoBIG5(), euc_tw2mic(), mic2big5(), and mic2euc_tw().

#define LC_CNS11643_3   0xf6 /* CNS 11643-1992 Plane 3 */

Definition at line 204 of file pg_wchar.h.

Referenced by big52mic(), BIG5toCNS(), CNStoBIG5(), euc_tw2mic(), and mic2euc_tw().

#define LC_CNS11643_4   0xf7 /* CNS 11643-1992 Plane 4 */

Definition at line 205 of file pg_wchar.h.

Referenced by big52mic(), BIG5toCNS(), and CNStoBIG5().

#define LC_CNS11643_5   0xf8 /* CNS 11643-1992 Plane 5 */

Definition at line 206 of file pg_wchar.h.

#define LC_CNS11643_6   0xf9 /* CNS 11643-1992 Plane 6 */

Definition at line 207 of file pg_wchar.h.

#define LC_CNS11643_7   0xfa /* CNS 11643-1992 Plane 7 */

Definition at line 208 of file pg_wchar.h.

Referenced by mic2euc_tw().

#define LC_ETHIOPIC   0xf5 /* Ethiopic characters (not supported) */

Definition at line 203 of file pg_wchar.h.

#define LC_GB2312_80   0x91 /* Chinese */

Definition at line 130 of file pg_wchar.h.

Referenced by euc_cn2mic(), and mic2euc_cn().

#define LC_INDIAN_1_COLUMN
Value:
0xf0 /* Indian charset for 1-column width
* glyphs (not supported) */

Definition at line 193 of file pg_wchar.h.

#define LC_INDIAN_2_COLUMN
Value:
0xfb /* Indian charset for 2-column width
* glyphs (not supported) */

Definition at line 209 of file pg_wchar.h.

#define LC_IPA
Value:
0xa1 /* IPA (International Phonetic
* Association) (not supported) */

Definition at line 175 of file pg_wchar.h.

#define LC_ISO8859_1   0x81 /* ISO8859 Latin 1 */

Definition at line 102 of file pg_wchar.h.

Referenced by latin1_to_mic(), and mic_to_latin1().

#define LC_ISO8859_15   0x8e /* ISO8859 Latin 15 (not supported yet) */

Definition at line 119 of file pg_wchar.h.

#define LC_ISO8859_2   0x82 /* ISO8859 Latin 2 */

Definition at line 103 of file pg_wchar.h.

Referenced by latin2_to_mic(), mic_to_latin2(), mic_to_win1250(), and win1250_to_mic().

#define LC_ISO8859_3   0x83 /* ISO8859 Latin 3 */

Definition at line 104 of file pg_wchar.h.

Referenced by latin3_to_mic(), and mic_to_latin3().

#define LC_ISO8859_4   0x84 /* ISO8859 Latin 4 */

Definition at line 105 of file pg_wchar.h.

Referenced by latin4_to_mic(), and mic_to_latin4().

#define LC_ISO8859_5   0x8c /* ISO8859 Cyrillic */

Definition at line 117 of file pg_wchar.h.

#define LC_ISO8859_6   0x87 /* Arabic (not supported yet) */

Definition at line 108 of file pg_wchar.h.

#define LC_ISO8859_7   0x86 /* Greek (not supported yet) */

Definition at line 107 of file pg_wchar.h.

#define LC_ISO8859_8   0x88 /* Hebrew (not supported yet) */

Definition at line 109 of file pg_wchar.h.

#define LC_ISO8859_9   0x8d /* ISO8859 Latin 5 (not supported yet) */

Definition at line 118 of file pg_wchar.h.

#define LC_JISX0201K   0x89 /* Japanese 1 byte kana */

Definition at line 110 of file pg_wchar.h.

Referenced by euc_jp2mic(), mic2euc_jp(), mic2sjis(), and sjis2mic().

#define LC_JISX0201R   0x8a /* Japanese 1 byte Roman */

Definition at line 111 of file pg_wchar.h.

#define LC_JISX0208   0x92 /* Japanese Kanji (JIS X 0208) */

Definition at line 131 of file pg_wchar.h.

Referenced by euc_jp2mic(), mic2euc_jp(), mic2sjis(), and sjis2mic().

#define LC_JISX0208_1978   0x90 /* Japanese Kanji, old JIS (not supported) */

Definition at line 129 of file pg_wchar.h.

#define LC_JISX0212   0x94 /* Japanese Kanji (JIS X 0212) */

Definition at line 133 of file pg_wchar.h.

Referenced by euc_jp2mic(), mic2euc_jp(), mic2sjis(), and sjis2mic().

#define LC_JISX0213_1
Value:
0x97 /* Japanese Kanji (JIS X 0213 Plane 1)
* (not supported) */

Definition at line 136 of file pg_wchar.h.

#define LC_KOI8_R   0x8b /* Cyrillic KOI8-R */
#define LC_KS5601   0x93 /* Korean */

Definition at line 132 of file pg_wchar.h.

Referenced by euc_kr2mic(), and mic2euc_kr().

#define LC_LAO
Value:
0xa7 /* Lao characters (ISO10646 0E80..0EDF)
* (not supported) */

Definition at line 186 of file pg_wchar.h.

#define LC_SISHENG
Value:
0xa0 /* Chinese SiSheng characters for
* PinYin/ZhuYin (not supported) */

Definition at line 173 of file pg_wchar.h.

#define LC_TIBETAN   0xfc /* Tibetan (not supported) */

Definition at line 211 of file pg_wchar.h.

#define LC_TIBETAN_1_COLUMN
Value:
0xf1 /* Tibetan 1-column width glyphs (not
* supported) */

Definition at line 195 of file pg_wchar.h.

#define LC_TIS620   0x85 /* Thai (not supported yet) */

Definition at line 106 of file pg_wchar.h.

#define LC_UNICODE_SUBSET
Value:
0xf4 /* Unicode characters of the range
* U+0100..U+24FF. (not supported) */

Definition at line 201 of file pg_wchar.h.

#define LC_UNICODE_SUBSET_2
Value:
0xf2 /* Unicode characters of the range
* U+2500..U+33FF. (not supported) */

Definition at line 197 of file pg_wchar.h.

#define LC_UNICODE_SUBSET_3
Value:
0xf3 /* Unicode characters of the range
* U+E000..U+FFFF. (not supported) */

Definition at line 199 of file pg_wchar.h.

#define LC_VISCII_LOWER
Value:
0xa2 /* Vietnamese VISCII1.1 lower-case (not
* supported) */

Definition at line 177 of file pg_wchar.h.

#define LC_VISCII_UPPER
Value:
0xa3 /* Vietnamese VISCII1.1 upper-case (not
* supported) */

Definition at line 179 of file pg_wchar.h.

#define LCPRV1_A   0x9a

Definition at line 150 of file pg_wchar.h.

Referenced by pg_wchar2mule_with_len().

#define LCPRV1_B   0x9b

Definition at line 151 of file pg_wchar.h.

Referenced by pg_wchar2mule_with_len().

#define LCPRV2_A   0x9c

Definition at line 162 of file pg_wchar.h.

Referenced by pg_wchar2mule_with_len().

#define LCPRV2_B   0x9d

Definition at line 163 of file pg_wchar.h.

Referenced by big52mic(), euc_tw2mic(), mic2big5(), mic2euc_tw(), and pg_wchar2mule_with_len().

#define MAX_CONVERSION_GROWTH   4
#define MAX_MULTIBYTE_CHAR_LEN   4

Definition at line 30 of file pg_wchar.h.

Referenced by convertPgWchar(), and fillTrgm().

#define PG_ENCODING_BE_LAST   PG_KOI8U

Definition at line 289 of file pg_wchar.h.

Referenced by get_encoding_name_for_icu().

#define PG_ENCODING_IS_CLIENT_ONLY (   _enc)    ((_enc) > PG_ENCODING_BE_LAST && (_enc) < _PG_LAST_ENCODING_)

Definition at line 298 of file pg_wchar.h.

Referenced by BeginCopy().

#define PG_VALID_BE_ENCODING (   _enc)    ((_enc) >= 0 && (_enc) <= PG_ENCODING_BE_LAST)
#define PG_VALID_FE_ENCODING (   _enc)    PG_VALID_ENCODING(_enc)

Definition at line 305 of file pg_wchar.h.

Referenced by pg_valid_client_encoding(), PrepareClientEncoding(), and SetClientEncoding().

Typedef Documentation

typedef int(* mb2wchar_with_len_converter)(const unsigned char *from, pg_wchar *to, int len)

Definition at line 357 of file pg_wchar.h.

typedef bool(* mbcharacter_incrementer)(unsigned char *mbstr, int len)

Definition at line 369 of file pg_wchar.h.

typedef int(* mbdisplaylen_converter)(const unsigned char *mbstr)

Definition at line 367 of file pg_wchar.h.

typedef int(* mblen_converter)(const unsigned char *mbstr)

Definition at line 365 of file pg_wchar.h.

typedef int(* mbverifier)(const unsigned char *mbstr, int len)

Definition at line 371 of file pg_wchar.h.

typedef unsigned int pg_wchar

Definition at line 25 of file pg_wchar.h.

typedef uint32(* utf_local_conversion_func)(uint32 code)

Definition at line 495 of file pg_wchar.h.

typedef int(* wchar2mb_with_len_converter)(const pg_wchar *from, unsigned char *to, int len)

Definition at line 361 of file pg_wchar.h.

Enumeration Type Documentation

enum pg_enc
Enumerator
PG_SQL_ASCII 
PG_EUC_JP 
PG_EUC_CN 
PG_EUC_KR 
PG_EUC_TW 
PG_EUC_JIS_2004 
PG_UTF8 
PG_MULE_INTERNAL 
PG_LATIN1 
PG_LATIN2 
PG_LATIN3 
PG_LATIN4 
PG_LATIN5 
PG_LATIN6 
PG_LATIN7 
PG_LATIN8 
PG_LATIN9 
PG_LATIN10 
PG_WIN1256 
PG_WIN1258 
PG_WIN866 
PG_WIN874 
PG_KOI8R 
PG_WIN1251 
PG_WIN1252 
PG_ISO_8859_5 
PG_ISO_8859_6 
PG_ISO_8859_7 
PG_ISO_8859_8 
PG_WIN1250 
PG_WIN1253 
PG_WIN1254 
PG_WIN1255 
PG_WIN1257 
PG_KOI8U 
PG_SJIS 
PG_BIG5 
PG_GBK 
PG_UHC 
PG_GB18030 
PG_JOHAB 
PG_SHIFT_JIS_2004 
_PG_LAST_ENCODING_ 

Definition at line 238 of file pg_wchar.h.

239 {
240  PG_SQL_ASCII = 0, /* SQL/ASCII */
241  PG_EUC_JP, /* EUC for Japanese */
242  PG_EUC_CN, /* EUC for Chinese */
243  PG_EUC_KR, /* EUC for Korean */
244  PG_EUC_TW, /* EUC for Taiwan */
245  PG_EUC_JIS_2004, /* EUC-JIS-2004 */
246  PG_UTF8, /* Unicode UTF8 */
247  PG_MULE_INTERNAL, /* Mule internal code */
248  PG_LATIN1, /* ISO-8859-1 Latin 1 */
249  PG_LATIN2, /* ISO-8859-2 Latin 2 */
250  PG_LATIN3, /* ISO-8859-3 Latin 3 */
251  PG_LATIN4, /* ISO-8859-4 Latin 4 */
252  PG_LATIN5, /* ISO-8859-9 Latin 5 */
253  PG_LATIN6, /* ISO-8859-10 Latin6 */
254  PG_LATIN7, /* ISO-8859-13 Latin7 */
255  PG_LATIN8, /* ISO-8859-14 Latin8 */
256  PG_LATIN9, /* ISO-8859-15 Latin9 */
257  PG_LATIN10, /* ISO-8859-16 Latin10 */
258  PG_WIN1256, /* windows-1256 */
259  PG_WIN1258, /* Windows-1258 */
260  PG_WIN866, /* (MS-DOS CP866) */
261  PG_WIN874, /* windows-874 */
262  PG_KOI8R, /* KOI8-R */
263  PG_WIN1251, /* windows-1251 */
264  PG_WIN1252, /* windows-1252 */
265  PG_ISO_8859_5, /* ISO-8859-5 */
266  PG_ISO_8859_6, /* ISO-8859-6 */
267  PG_ISO_8859_7, /* ISO-8859-7 */
268  PG_ISO_8859_8, /* ISO-8859-8 */
269  PG_WIN1250, /* windows-1250 */
270  PG_WIN1253, /* windows-1253 */
271  PG_WIN1254, /* windows-1254 */
272  PG_WIN1255, /* windows-1255 */
273  PG_WIN1257, /* windows-1257 */
274  PG_KOI8U, /* KOI8-U */
275  /* PG_ENCODING_BE_LAST points to the above entry */
276 
277  /* followings are for client encoding only */
278  PG_SJIS, /* Shift JIS (Windows-932) */
279  PG_BIG5, /* Big5 (Windows-950) */
280  PG_GBK, /* GBK (Windows-936) */
281  PG_UHC, /* UHC (Windows-949) */
282  PG_GB18030, /* GB18030 */
283  PG_JOHAB, /* EUC for Korean JOHAB */
284  PG_SHIFT_JIS_2004, /* Shift-JIS-2004 */
285  _PG_LAST_ENCODING_ /* mark only */
286 
287 } pg_enc;
pg_enc
Definition: pg_wchar.h:238

Function Documentation

unsigned short BIG5toCNS ( unsigned short  big5,
unsigned char *  lc 
)

Definition at line 292 of file big5.c.

References b1c4, b2c3, BinarySearchRange(), i, LC_CNS11643_1, LC_CNS11643_2, LC_CNS11643_3, and LC_CNS11643_4.

Referenced by big52mic().

293 {
294  unsigned short cns = 0;
295  int i;
296 
297  if (big5 < 0xc940U)
298  {
299  /* level 1 */
300 
301  for (i = 0; i < sizeof(b1c4) / (sizeof(unsigned short) * 2); i++)
302  {
303  if (b1c4[i][0] == big5)
304  {
305  *lc = LC_CNS11643_4;
306  return (b1c4[i][1] | 0x8080U);
307  }
308  }
309 
310  if (0 < (cns = BinarySearchRange(big5Level1ToCnsPlane1, 23, big5)))
311  *lc = LC_CNS11643_1;
312  }
313  else if (big5 == 0xc94aU)
314  {
315  /* level 2 */
316  *lc = LC_CNS11643_1;
317  cns = 0x4442;
318  }
319  else
320  {
321  /* level 2 */
322  for (i = 0; i < sizeof(b2c3) / (sizeof(unsigned short) * 2); i++)
323  {
324  if (b2c3[i][0] == big5)
325  {
326  *lc = LC_CNS11643_3;
327  return (b2c3[i][1] | 0x8080U);
328  }
329  }
330 
331  if (0 < (cns = BinarySearchRange(big5Level2ToCnsPlane2, 46, big5)))
332  *lc = LC_CNS11643_2;
333  }
334 
335  if (0 == cns)
336  { /* no mapping Big5 to CNS 11643-1992 */
337  *lc = 0;
338  return (unsigned short) '?';
339  }
340 
341  return cns | 0x8080;
342 }
#define LC_CNS11643_1
Definition: pg_wchar.h:134
static unsigned short BinarySearchRange(const codes_t *array, int high, unsigned short code)
Definition: big5.c:208
static const codes_t big5Level2ToCnsPlane2[48]
Definition: big5.c:84
#define LC_CNS11643_4
Definition: pg_wchar.h:205
static const codes_t big5Level1ToCnsPlane1[25]
Definition: big5.c:25
#define LC_CNS11643_2
Definition: pg_wchar.h:135
#define LC_CNS11643_3
Definition: pg_wchar.h:204
static const unsigned short b1c4[][2]
Definition: big5.c:189
int i
static const unsigned short b2c3[][2]
Definition: big5.c:197
void check_encoding_conversion_args ( int  src_encoding,
int  dest_encoding,
int  len,
int  expected_src_encoding,
int  expected_dest_encoding 
)

Definition at line 1966 of file wchar.c.

References elog, ERROR, name, pg_enc2name_tbl, and PG_VALID_ENCODING.

1971 {
1972  if (!PG_VALID_ENCODING(src_encoding))
1973  elog(ERROR, "invalid source encoding ID: %d", src_encoding);
1974  if (src_encoding != expected_src_encoding && expected_src_encoding >= 0)
1975  elog(ERROR, "expected source encoding \"%s\", but got \"%s\"",
1976  pg_enc2name_tbl[expected_src_encoding].name,
1977  pg_enc2name_tbl[src_encoding].name);
1978  if (!PG_VALID_ENCODING(dest_encoding))
1979  elog(ERROR, "invalid destination encoding ID: %d", dest_encoding);
1980  if (dest_encoding != expected_dest_encoding && expected_dest_encoding >= 0)
1981  elog(ERROR, "expected destination encoding \"%s\", but got \"%s\"",
1982  pg_enc2name_tbl[expected_dest_encoding].name,
1983  pg_enc2name_tbl[dest_encoding].name);
1984  if (len < 0)
1985  elog(ERROR, "encoding conversion length must not be negative");
1986 }
#define ERROR
Definition: elog.h:43
#define PG_VALID_ENCODING(_enc)
Definition: pg_wchar.h:301
const pg_enc2name pg_enc2name_tbl[]
Definition: encnames.c:307
const char * name
Definition: encode.c:521
#define elog
Definition: elog.h:219
unsigned short CNStoBIG5 ( unsigned short  cns,
unsigned char  lc 
)

Definition at line 345 of file big5.c.

References b1c4, b2c3, BinarySearchRange(), i, LC_CNS11643_1, LC_CNS11643_2, LC_CNS11643_3, and LC_CNS11643_4.

Referenced by mic2big5().

346 {
347  int i;
348  unsigned int big5 = 0;
349 
350  cns &= 0x7f7f;
351 
352  switch (lc)
353  {
354  case LC_CNS11643_1:
355  big5 = BinarySearchRange(cnsPlane1ToBig5Level1, 24, cns);
356  break;
357  case LC_CNS11643_2:
358  big5 = BinarySearchRange(cnsPlane2ToBig5Level2, 47, cns);
359  break;
360  case LC_CNS11643_3:
361  for (i = 0; i < sizeof(b2c3) / (sizeof(unsigned short) * 2); i++)
362  {
363  if (b2c3[i][1] == cns)
364  return b2c3[i][0];
365  }
366  break;
367  case LC_CNS11643_4:
368  for (i = 0; i < sizeof(b1c4) / (sizeof(unsigned short) * 2); i++)
369  {
370  if (b1c4[i][1] == cns)
371  return b1c4[i][0];
372  }
373  default:
374  break;
375  }
376  return big5;
377 }
#define LC_CNS11643_1
Definition: pg_wchar.h:134
static unsigned short BinarySearchRange(const codes_t *array, int high, unsigned short code)
Definition: big5.c:208
#define LC_CNS11643_4
Definition: pg_wchar.h:205
static const codes_t cnsPlane2ToBig5Level2[49]
Definition: big5.c:136
static const codes_t cnsPlane1ToBig5Level1[26]
Definition: big5.c:54
#define LC_CNS11643_2
Definition: pg_wchar.h:135
#define LC_CNS11643_3
Definition: pg_wchar.h:204
static const unsigned short b1c4[][2]
Definition: big5.c:189
int i
static const unsigned short b2c3[][2]
Definition: big5.c:197
const char* get_encoding_name_for_icu ( int  encoding)

Definition at line 461 of file encnames.c.

References encoding, ereport, errcode(), errmsg(), ERROR, lengthof, pg_enc2icu_tbl, PG_ENCODING_BE_LAST, pg_encoding_to_char(), and StaticAssertStmt.

462 {
463  const char *icu_encoding_name;
464 
466  "pg_enc2icu_tbl incomplete");
467 
468  icu_encoding_name = pg_enc2icu_tbl[encoding];
469 
470  if (!icu_encoding_name)
471  ereport(ERROR,
472  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
473  errmsg("encoding \"%s\" not supported by ICU",
475 
476  return icu_encoding_name;
477 }
int errcode(int sqlerrcode)
Definition: elog.c:575
#define lengthof(array)
Definition: c.h:556
#define StaticAssertStmt(condition, errmessage)
Definition: c.h:763
#define ERROR
Definition: elog.h:43
#define ereport(elevel, rest)
Definition: elog.h:122
#define PG_ENCODING_BE_LAST
Definition: pg_wchar.h:289
static const char *const pg_enc2icu_tbl[]
Definition: encnames.c:415
static char * encoding
Definition: initdb.c:123
const char * pg_encoding_to_char(int encoding)
Definition: encnames.c:607
int errmsg(const char *fmt,...)
Definition: elog.c:797
const char* GetDatabaseEncodingName ( void  )
int GetMessageEncoding ( void  )

Definition at line 1034 of file mbutils.c.

References pg_enc2name::encoding.

1035 {
1036  return MessageEncoding->encoding;
1037 }
pg_enc encoding
Definition: pg_wchar.h:329
static const pg_enc2name * MessageEncoding
Definition: mbutils.c:75
void InitializeClientEncoding ( void  )

Definition at line 282 of file mbutils.c.

References Assert, backend_startup_complete, ereport, errcode(), errmsg(), FATAL, GetDatabaseEncodingName(), name, pending_client_encoding, pg_enc2name_tbl, PrepareClientEncoding(), and SetClientEncoding().

Referenced by InitPostgres().

283 {
286 
289  {
290  /*
291  * Oops, the requested conversion is not available. We couldn't fail
292  * before, but we can now.
293  */
294  ereport(FATAL,
295  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
296  errmsg("conversion between %s and %s is not supported",
299  }
300 }
int PrepareClientEncoding(int encoding)
Definition: mbutils.c:103
static bool backend_startup_complete
Definition: mbutils.c:83
static int pending_client_encoding
Definition: mbutils.c:84
int errcode(int sqlerrcode)
Definition: elog.c:575
#define FATAL
Definition: elog.h:52
const pg_enc2name pg_enc2name_tbl[]
Definition: encnames.c:307
int SetClientEncoding(int encoding)
Definition: mbutils.c:201
#define ereport(elevel, rest)
Definition: elog.h:122
#define Assert(condition)
Definition: c.h:681
const char * GetDatabaseEncodingName(void)
Definition: mbutils.c:1010
const char * name
Definition: encode.c:521
int errmsg(const char *fmt,...)
Definition: elog.c:797
bool is_encoding_supported_by_icu ( int  encoding)

Definition at line 455 of file encnames.c.

References pg_enc2icu_tbl.

Referenced by lookup_collation().

456 {
457  return (pg_enc2icu_tbl[encoding] != NULL);
458 }
static const char *const pg_enc2icu_tbl[]
Definition: encnames.c:415
static char * encoding
Definition: initdb.c:123
void latin2mic ( const unsigned char *  l,
unsigned char *  p,
int  len,
int  lc,
int  encoding 
)

Definition at line 71 of file conv.c.

References IS_HIGHBIT_SET, and report_invalid_encoding().

Referenced by koi8r_to_mic(), latin1_to_mic(), latin2_to_mic(), latin3_to_mic(), and latin4_to_mic().

73 {
74  int c1;
75 
76  while (len > 0)
77  {
78  c1 = *l;
79  if (c1 == 0)
80  report_invalid_encoding(encoding, (const char *) l, len);
81  if (IS_HIGHBIT_SET(c1))
82  *p++ = lc;
83  *p++ = c1;
84  l++;
85  len--;
86  }
87  *p = '\0';
88 }
#define IS_HIGHBIT_SET(ch)
Definition: c.h:979
static char * encoding
Definition: initdb.c:123
void report_invalid_encoding(int encoding, const char *mbstr, int len)
Definition: wchar.c:1995
void latin2mic_with_table ( const unsigned char *  l,
unsigned char *  p,
int  len,
int  lc,
int  encoding,
const unsigned char *  tab 
)

Definition at line 193 of file conv.c.

References HIGHBIT, IS_HIGHBIT_SET, PG_MULE_INTERNAL, report_invalid_encoding(), and report_untranslatable_char().

Referenced by iso_to_mic(), win1250_to_mic(), win1251_to_mic(), and win866_to_mic().

199 {
200  unsigned char c1,
201  c2;
202 
203  while (len > 0)
204  {
205  c1 = *l;
206  if (c1 == 0)
207  report_invalid_encoding(encoding, (const char *) l, len);
208  if (!IS_HIGHBIT_SET(c1))
209  *p++ = c1;
210  else
211  {
212  c2 = tab[c1 - HIGHBIT];
213  if (c2)
214  {
215  *p++ = lc;
216  *p++ = c2;
217  }
218  else
220  (const char *) l, len);
221  }
222  l++;
223  len--;
224  }
225  *p = '\0';
226 }
#define IS_HIGHBIT_SET(ch)
Definition: c.h:979
void report_untranslatable_char(int src_encoding, int dest_encoding, const char *mbstr, int len)
Definition: wchar.c:2027
#define HIGHBIT
Definition: c.h:978
static char * encoding
Definition: initdb.c:123
void report_invalid_encoding(int encoding, const char *mbstr, int len)
Definition: wchar.c:1995
void local2local ( const unsigned char *  l,
unsigned char *  p,
int  len,
int  src_encoding,
int  dest_encoding,
const unsigned char *  tab 
)

Definition at line 30 of file conv.c.

References HIGHBIT, IS_HIGHBIT_SET, report_invalid_encoding(), and report_untranslatable_char().

Referenced by iso_to_koi8r(), iso_to_win1251(), iso_to_win866(), koi8r_to_iso(), koi8r_to_win1251(), koi8r_to_win866(), latin2_to_win1250(), win1250_to_latin2(), win1251_to_iso(), win1251_to_koi8r(), win1251_to_win866(), win866_to_iso(), win866_to_koi8r(), and win866_to_win1251().

36 {
37  unsigned char c1,
38  c2;
39 
40  while (len > 0)
41  {
42  c1 = *l;
43  if (c1 == 0)
44  report_invalid_encoding(src_encoding, (const char *) l, len);
45  if (!IS_HIGHBIT_SET(c1))
46  *p++ = c1;
47  else
48  {
49  c2 = tab[c1 - HIGHBIT];
50  if (c2)
51  *p++ = c2;
52  else
53  report_untranslatable_char(src_encoding, dest_encoding,
54  (const char *) l, len);
55  }
56  l++;
57  len--;
58  }
59  *p = '\0';
60 }
#define IS_HIGHBIT_SET(ch)
Definition: c.h:979
void report_untranslatable_char(int src_encoding, int dest_encoding, const char *mbstr, int len)
Definition: wchar.c:2027
#define HIGHBIT
Definition: c.h:978
void report_invalid_encoding(int encoding, const char *mbstr, int len)
Definition: wchar.c:1995
void LocalToUtf ( const unsigned char *  iso,
int  len,
unsigned char *  utf,
const pg_mb_radix_tree map,
const pg_local_to_utf_combined cmap,
int  cmapsize,
utf_local_conversion_func  conv_func,
int  encoding 
)

Definition at line 666 of file conv.c.

References compare4(), elog, ereport, errcode(), errmsg(), ERROR, IS_HIGHBIT_SET, pg_encoding_verifymb(), pg_mb_radix_conv(), PG_UTF8, PG_VALID_ENCODING, report_invalid_encoding(), report_untranslatable_char(), store_coded_char(), pg_local_to_utf_combined::utf1, and pg_local_to_utf_combined::utf2.

Referenced by big5_to_utf8(), euc_cn_to_utf8(), euc_jis_2004_to_utf8(), euc_jp_to_utf8(), euc_kr_to_utf8(), euc_tw_to_utf8(), gb18030_to_utf8(), gbk_to_utf8(), iso8859_to_utf8(), johab_to_utf8(), koi8r_to_utf8(), koi8u_to_utf8(), shift_jis_2004_to_utf8(), sjis_to_utf8(), uhc_to_utf8(), and win_to_utf8().

672 {
673  uint32 iiso;
674  int l;
675  const pg_local_to_utf_combined *cp;
676 
678  ereport(ERROR,
679  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
680  errmsg("invalid encoding number: %d", encoding)));
681 
682  for (; len > 0; len -= l)
683  {
684  unsigned char b1 = 0;
685  unsigned char b2 = 0;
686  unsigned char b3 = 0;
687  unsigned char b4 = 0;
688 
689  /* "break" cases all represent errors */
690  if (*iso == '\0')
691  break;
692 
693  if (!IS_HIGHBIT_SET(*iso))
694  {
695  /* ASCII case is easy, assume it's one-to-one conversion */
696  *utf++ = *iso++;
697  l = 1;
698  continue;
699  }
700 
701  l = pg_encoding_verifymb(encoding, (const char *) iso, len);
702  if (l < 0)
703  break;
704 
705  /* collect coded char of length l */
706  if (l == 1)
707  b4 = *iso++;
708  else if (l == 2)
709  {
710  b3 = *iso++;
711  b4 = *iso++;
712  }
713  else if (l == 3)
714  {
715  b2 = *iso++;
716  b3 = *iso++;
717  b4 = *iso++;
718  }
719  else if (l == 4)
720  {
721  b1 = *iso++;
722  b2 = *iso++;
723  b3 = *iso++;
724  b4 = *iso++;
725  }
726  else
727  {
728  elog(ERROR, "unsupported character length %d", l);
729  iiso = 0; /* keep compiler quiet */
730  }
731  iiso = (b1 << 24 | b2 << 16 | b3 << 8 | b4);
732 
733  if (map)
734  {
735  uint32 converted = pg_mb_radix_conv(map, l, b1, b2, b3, b4);
736 
737  if (converted)
738  {
739  utf = store_coded_char(utf, converted);
740  continue;
741  }
742 
743  /* If there's a combined character map, try that */
744  if (cmap)
745  {
746  cp = bsearch(&iiso, cmap, cmapsize,
748 
749  if (cp)
750  {
751  utf = store_coded_char(utf, cp->utf1);
752  utf = store_coded_char(utf, cp->utf2);
753  continue;
754  }
755  }
756  }
757 
758  /* if there's a conversion function, try that */
759  if (conv_func)
760  {
761  uint32 converted = (*conv_func) (iiso);
762 
763  if (converted)
764  {
765  utf = store_coded_char(utf, converted);
766  continue;
767  }
768  }
769 
770  /* failed to translate this character */
772  (const char *) (iso - l), len);
773  }
774 
775  /* if we broke out of loop early, must be invalid input */
776  if (len > 0)
777  report_invalid_encoding(encoding, (const char *) iso, len);
778 
779  *utf = '\0';
780 }
static int compare4(const void *p1, const void *p2)
Definition: conv.c:309
int errcode(int sqlerrcode)
Definition: elog.c:575
static unsigned char * store_coded_char(unsigned char *dest, uint32 code)
Definition: conv.c:323
int pg_encoding_verifymb(int encoding, const char *mbstr, int len)
Definition: wchar.c:1809
#define IS_HIGHBIT_SET(ch)
Definition: c.h:979
#define ERROR
Definition: elog.h:43
void report_untranslatable_char(int src_encoding, int dest_encoding, const char *mbstr, int len)
Definition: wchar.c:2027
#define PG_VALID_ENCODING(_enc)
Definition: pg_wchar.h:301
static uint32 pg_mb_radix_conv(const pg_mb_radix_tree *rt, int l, unsigned char b1, unsigned char b2, unsigned char b3, unsigned char b4)
Definition: conv.c:343
unsigned int uint32
Definition: c.h:258
#define ereport(elevel, rest)
Definition: elog.h:122
static char * encoding
Definition: initdb.c:123
void report_invalid_encoding(int encoding, const char *mbstr, int len)
Definition: wchar.c:1995
int errmsg(const char *fmt,...)
Definition: elog.c:797
#define elog
Definition: elog.h:219
void mic2latin ( const unsigned char *  mic,
unsigned char *  p,
int  len,
int  lc,
int  encoding 
)

Definition at line 99 of file conv.c.

References IS_HIGHBIT_SET, pg_mic_mblen(), PG_MULE_INTERNAL, report_invalid_encoding(), and report_untranslatable_char().

Referenced by mic_to_koi8r(), mic_to_latin1(), mic_to_latin2(), mic_to_latin3(), and mic_to_latin4().

101 {
102  int c1;
103 
104  while (len > 0)
105  {
106  c1 = *mic;
107  if (c1 == 0)
108  report_invalid_encoding(PG_MULE_INTERNAL, (const char *) mic, len);
109  if (!IS_HIGHBIT_SET(c1))
110  {
111  /* easy for ASCII */
112  *p++ = c1;
113  mic++;
114  len--;
115  }
116  else
117  {
118  int l = pg_mic_mblen(mic);
119 
120  if (len < l)
121  report_invalid_encoding(PG_MULE_INTERNAL, (const char *) mic,
122  len);
123  if (l != 2 || c1 != lc || !IS_HIGHBIT_SET(mic[1]))
125  (const char *) mic, len);
126  *p++ = mic[1];
127  mic += 2;
128  len -= 2;
129  }
130  }
131  *p = '\0';
132 }
int pg_mic_mblen(const unsigned char *mbstr)
Definition: wchar.c:1776
#define IS_HIGHBIT_SET(ch)
Definition: c.h:979
void report_untranslatable_char(int src_encoding, int dest_encoding, const char *mbstr, int len)
Definition: wchar.c:2027
static char * encoding
Definition: initdb.c:123
void report_invalid_encoding(int encoding, const char *mbstr, int len)
Definition: wchar.c:1995
void mic2latin_with_table ( const unsigned char *  mic,
unsigned char *  p,
int  len,
int  lc,
int  encoding,
const unsigned char *  tab 
)

Definition at line 241 of file conv.c.

References HIGHBIT, IS_HIGHBIT_SET, pg_mic_mblen(), PG_MULE_INTERNAL, report_invalid_encoding(), and report_untranslatable_char().

Referenced by mic_to_iso(), mic_to_win1250(), mic_to_win1251(), and mic_to_win866().

247 {
248  unsigned char c1,
249  c2;
250 
251  while (len > 0)
252  {
253  c1 = *mic;
254  if (c1 == 0)
255  report_invalid_encoding(PG_MULE_INTERNAL, (const char *) mic, len);
256  if (!IS_HIGHBIT_SET(c1))
257  {
258  /* easy for ASCII */
259  *p++ = c1;
260  mic++;
261  len--;
262  }
263  else
264  {
265  int l = pg_mic_mblen(mic);
266 
267  if (len < l)
268  report_invalid_encoding(PG_MULE_INTERNAL, (const char *) mic,
269  len);
270  if (l != 2 || c1 != lc || !IS_HIGHBIT_SET(mic[1]) ||
271  (c2 = tab[mic[1] - HIGHBIT]) == 0)
272  {
274  (const char *) mic, len);
275  break; /* keep compiler quiet */
276  }
277  *p++ = c2;
278  mic += 2;
279  len -= 2;
280  }
281  }
282  *p = '\0';
283 }
int pg_mic_mblen(const unsigned char *mbstr)
Definition: wchar.c:1776
#define IS_HIGHBIT_SET(ch)
Definition: c.h:979
void report_untranslatable_char(int src_encoding, int dest_encoding, const char *mbstr, int len)
Definition: wchar.c:2027
#define HIGHBIT
Definition: c.h:978
static char * encoding
Definition: initdb.c:123
void report_invalid_encoding(int encoding, const char *mbstr, int len)
Definition: wchar.c:1995
char* pg_any_to_server ( const char *  s,
int  len,
int  encoding 
)

Definition at line 561 of file mbutils.c.

References pg_enc2name::encoding, ereport, errcode(), errmsg(), ERROR, i, IS_HIGHBIT_SET, name, perform_default_encoding_conversion(), pg_do_encoding_conversion(), pg_enc2name_tbl, PG_SQL_ASCII, PG_VALID_BE_ENCODING, and pg_verify_mbstr().

Referenced by ASN1_STRING_to_text(), CopyReadLine(), db_encoding_convert(), dsnowball_lexize(), pg_client_to_server(), pg_stat_statements_internal(), pgp_armor_headers(), PLyUnicode_Bytes(), read_extension_script_file(), t_readline(), unicode_to_sqlchar(), utf_u2e(), X509_NAME_to_cstring(), X509_NAME_to_text(), and xml_recv().

562 {
563  if (len <= 0)
564  return (char *) s; /* empty string is always valid */
565 
568  {
569  /*
570  * No conversion is needed, but we must still validate the data.
571  */
572  (void) pg_verify_mbstr(DatabaseEncoding->encoding, s, len, false);
573  return (char *) s;
574  }
575 
577  {
578  /*
579  * No conversion is possible, but we must still validate the data,
580  * because the client-side code might have done string escaping using
581  * the selected client_encoding. If the client encoding is ASCII-safe
582  * then we just do a straight validation under that encoding. For an
583  * ASCII-unsafe encoding we have a problem: we dare not pass such data
584  * to the parser but we have no way to convert it. We compromise by
585  * rejecting the data if it contains any non-ASCII characters.
586  */
588  (void) pg_verify_mbstr(encoding, s, len, false);
589  else
590  {
591  int i;
592 
593  for (i = 0; i < len; i++)
594  {
595  if (s[i] == '\0' || IS_HIGHBIT_SET(s[i]))
596  ereport(ERROR,
597  (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
598  errmsg("invalid byte value for encoding \"%s\": 0x%02x",
600  (unsigned char) s[i])));
601  }
602  }
603  return (char *) s;
604  }
605 
606  /* Fast path if we can use cached conversion function */
608  return perform_default_encoding_conversion(s, len, true);
609 
610  /* General case ... will not work outside transactions */
611  return (char *) pg_do_encoding_conversion((unsigned char *) s,
612  len,
613  encoding,
615 }
static char * perform_default_encoding_conversion(const char *src, int len, bool is_client_to_server)
Definition: mbutils.c:668
int errcode(int sqlerrcode)
Definition: elog.c:575
static const pg_enc2name * ClientEncoding
Definition: mbutils.c:73
unsigned char * pg_do_encoding_conversion(unsigned char *src, int len, int src_encoding, int dest_encoding)
Definition: mbutils.c:326
#define IS_HIGHBIT_SET(ch)
Definition: c.h:979
#define ERROR
Definition: elog.h:43
const pg_enc2name pg_enc2name_tbl[]
Definition: encnames.c:307
bool pg_verify_mbstr(int encoding, const char *mbstr, int len, bool noError)
Definition: wchar.c:1877
#define ereport(elevel, rest)
Definition: elog.h:122
pg_enc encoding
Definition: pg_wchar.h:329
static char * encoding
Definition: initdb.c:123
#define PG_VALID_BE_ENCODING(_enc)
Definition: pg_wchar.h:295
const char * name
Definition: encode.c:521
int errmsg(const char *fmt,...)
Definition: elog.c:797
static const pg_enc2name * DatabaseEncoding
Definition: mbutils.c:74
int i
void pg_ascii2mic ( const unsigned char *  l,
unsigned char *  p,
int  len 
)

Definition at line 143 of file conv.c.

References IS_HIGHBIT_SET, PG_SQL_ASCII, and report_invalid_encoding().

Referenced by ascii_to_mic(), and ascii_to_utf8().

144 {
145  int c1;
146 
147  while (len > 0)
148  {
149  c1 = *l;
150  if (c1 == 0 || IS_HIGHBIT_SET(c1))
151  report_invalid_encoding(PG_SQL_ASCII, (const char *) l, len);
152  *p++ = c1;
153  l++;
154  len--;
155  }
156  *p = '\0';
157 }
#define IS_HIGHBIT_SET(ch)
Definition: c.h:979
void report_invalid_encoding(int encoding, const char *mbstr, int len)
Definition: wchar.c:1995
int pg_char_and_wchar_strcmp ( const char *  s1,
const pg_wchar s2 
)

Definition at line 41 of file wstrcmp.c.

42 {
43  while ((pg_wchar) *s1 == *s2++)
44  if (*s1++ == 0)
45  return 0;
46  return *(const unsigned char *) s1 - *(const pg_wchar *) (s2 - 1);
47 }
char * s1
unsigned int pg_wchar
Definition: mbprint.c:31
char * s2
int pg_char_and_wchar_strncmp ( const char *  s1,
const pg_wchar s2,
size_t  n 
)

Definition at line 55 of file wstrncmp.c.

Referenced by cclass(), and element().

56 {
57  if (n == 0)
58  return 0;
59  do
60  {
61  if ((pg_wchar) ((unsigned char) *s1) != *s2++)
62  return ((pg_wchar) ((unsigned char) *s1) - *(s2 - 1));
63  if (*s1++ == 0)
64  break;
65  } while (--n != 0);
66  return 0;
67 }
char * s1
unsigned int pg_wchar
Definition: mbprint.c:31
char * s2
int pg_char_to_encoding ( const char *  name)

Definition at line 551 of file encnames.c.

552 {
553  unsigned int nel = lengthof(pg_encname_tbl);
554  const pg_encname *base = pg_encname_tbl,
555  *last = base + nel - 1,
556  *position;
557  int result;
558  char buff[NAMEDATALEN],
559  *key;
560 
561  if (name == NULL || *name == '\0')
562  return -1;
563 
564  if (strlen(name) >= NAMEDATALEN)
565  {
566 #ifdef FRONTEND
567  fprintf(stderr, "encoding name too long\n");
568  return -1;
569 #else
570  ereport(ERROR,
571  (errcode(ERRCODE_NAME_TOO_LONG),
572  errmsg("encoding name too long")));
573 #endif
574  }
575  key = clean_encoding_name(name, buff);
576 
577  while (last >= base)
578  {
579  position = base + ((last - base) >> 1);
580  result = key[0] - position->name[0];
581 
582  if (result == 0)
583  {
584  result = strcmp(key, position->name);
585  if (result == 0)
586  return position->encoding;
587  }
588  if (result < 0)
589  last = position - 1;
590  else
591  base = position + 1;
592  }
593  return -1;
594 }
int errcode(int sqlerrcode)
Definition: elog.c:575
#define lengthof(array)
Definition: c.h:556
#define NAMEDATALEN
#define ERROR
Definition: elog.h:43
static const pg_encname pg_encname_tbl[]
Definition: encnames.c:38
#define ereport(elevel, rest)
Definition: elog.h:122
const char * name
Definition: encode.c:521
int errmsg(const char *fmt,...)
Definition: elog.c:797
static char * clean_encoding_name(const char *key, char *newkey)
Definition: encnames.c:525
char* pg_client_to_server ( const char *  s,
int  len 
)

Definition at line 545 of file mbutils.c.

References pg_enc2name::encoding, and pg_any_to_server().

Referenced by exec_bind_message(), parse_fcall_arguments(), pq_getmsgstring(), and pq_getmsgtext().

546 {
547  return pg_any_to_server(s, len, ClientEncoding->encoding);
548 }
static const pg_enc2name * ClientEncoding
Definition: mbutils.c:73
pg_enc encoding
Definition: pg_wchar.h:329
char * pg_any_to_server(const char *s, int len, int encoding)
Definition: mbutils.c:561
mbcharacter_incrementer pg_database_encoding_character_incrementer ( void  )

Definition at line 1842 of file wchar.c.

References GetDatabaseEncoding(), PG_EUC_JP, pg_eucjp_increment(), pg_generic_charinc(), PG_UTF8, and pg_utf8_increment().

Referenced by make_greater_string().

1843 {
1844  /*
1845  * Eventually it might be best to add a field to pg_wchar_table[], but for
1846  * now we just use a switch.
1847  */
1848  switch (GetDatabaseEncoding())
1849  {
1850  case PG_UTF8:
1851  return pg_utf8_increment;
1852 
1853  case PG_EUC_JP:
1854  return pg_eucjp_increment;
1855 
1856  default:
1857  return pg_generic_charinc;
1858  }
1859 }
static bool pg_eucjp_increment(unsigned char *charptr, int length)
Definition: wchar.c:1639
static bool pg_generic_charinc(unsigned char *charptr, int len)
Definition: wchar.c:1527
static bool pg_utf8_increment(unsigned char *charptr, int length)
Definition: wchar.c:1561
int GetDatabaseEncoding(void)
Definition: mbutils.c:1004
unsigned char* pg_do_encoding_conversion ( unsigned char *  src,
int  len,
int  src_encoding,
int  dest_encoding 
)

Definition at line 326 of file mbutils.c.

References CStringGetDatum, elog, ereport, errcode(), errdetail(), errmsg(), ERROR, FindDefaultConversionProc(), Int32GetDatum, IsTransactionState(), MAX_CONVERSION_GROWTH, MaxAllocSize, OidFunctionCall5, OidIsValid, palloc(), pg_encoding_to_char(), PG_SQL_ASCII, and pg_verify_mbstr().

Referenced by convert_charset(), pg_any_to_server(), pg_convert(), and pg_server_to_any().

328 {
329  unsigned char *result;
330  Oid proc;
331 
332  if (len <= 0)
333  return src; /* empty string is always valid */
334 
335  if (src_encoding == dest_encoding)
336  return src; /* no conversion required, assume valid */
337 
338  if (dest_encoding == PG_SQL_ASCII)
339  return src; /* any string is valid in SQL_ASCII */
340 
341  if (src_encoding == PG_SQL_ASCII)
342  {
343  /* No conversion is possible, but we must validate the result */
344  (void) pg_verify_mbstr(dest_encoding, (const char *) src, len, false);
345  return src;
346  }
347 
348  if (!IsTransactionState()) /* shouldn't happen */
349  elog(ERROR, "cannot perform encoding conversion outside a transaction");
350 
351  proc = FindDefaultConversionProc(src_encoding, dest_encoding);
352  if (!OidIsValid(proc))
353  ereport(ERROR,
354  (errcode(ERRCODE_UNDEFINED_FUNCTION),
355  errmsg("default conversion function for encoding \"%s\" to \"%s\" does not exist",
356  pg_encoding_to_char(src_encoding),
357  pg_encoding_to_char(dest_encoding))));
358 
359  /*
360  * Allocate space for conversion result, being wary of integer overflow
361  */
362  if ((Size) len >= (MaxAllocSize / (Size) MAX_CONVERSION_GROWTH))
363  ereport(ERROR,
364  (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
365  errmsg("out of memory"),
366  errdetail("String of %d bytes is too long for encoding conversion.",
367  len)));
368 
369  result = palloc(len * MAX_CONVERSION_GROWTH + 1);
370 
371  OidFunctionCall5(proc,
372  Int32GetDatum(src_encoding),
373  Int32GetDatum(dest_encoding),
374  CStringGetDatum(src),
375  CStringGetDatum(result),
376  Int32GetDatum(len));
377  return result;
378 }
int errcode(int sqlerrcode)
Definition: elog.c:575
unsigned int Oid
Definition: postgres_ext.h:31
#define OidIsValid(objectId)
Definition: c.h:532
#define ERROR
Definition: elog.h:43
int errdetail(const char *fmt,...)
Definition: elog.c:873
#define CStringGetDatum(X)
Definition: postgres.h:584
bool pg_verify_mbstr(int encoding, const char *mbstr, int len, bool noError)
Definition: wchar.c:1877
#define ereport(elevel, rest)
Definition: elog.h:122
#define MaxAllocSize
Definition: memutils.h:40
const char * pg_encoding_to_char(int encoding)
Definition: encnames.c:607
size_t Size
Definition: c.h:350
Oid FindDefaultConversionProc(int32 for_encoding, int32 to_encoding)
Definition: namespace.c:3602
bool IsTransactionState(void)
Definition: xact.c:351
#define MAX_CONVERSION_GROWTH
Definition: pg_wchar.h:316
#define Int32GetDatum(X)
Definition: postgres.h:485
void * palloc(Size size)
Definition: mcxt.c:848
int errmsg(const char *fmt,...)
Definition: elog.c:797
#define elog
Definition: elog.h:219
#define OidFunctionCall5(functionId, arg1, arg2, arg3, arg4, arg5)
Definition: fmgr.h:631
int pg_dsplen ( const char *  mbstr)

Definition at line 767 of file mbutils.c.

References pg_wchar_tbl::dsplen, pg_enc2name::encoding, and pg_wchar_table.

Referenced by p_isspecial().

768 {
769  return pg_wchar_table[DatabaseEncoding->encoding].dsplen((const unsigned char *) mbstr);
770 }
mbdisplaylen_converter dsplen
Definition: pg_wchar.h:380
pg_enc encoding
Definition: pg_wchar.h:329
const pg_wchar_tbl pg_wchar_table[]
Definition: wchar.c:1729
static const pg_enc2name * DatabaseEncoding
Definition: mbutils.c:74
int pg_encoding_dsplen ( int  encoding,
const char *  mbstr 
)

Definition at line 1796 of file wchar.c.

References PG_SQL_ASCII, and PG_VALID_ENCODING.

Referenced by PQdsplen(), and reportErrorPosition().

1797 {
1798  return (PG_VALID_ENCODING(encoding) ?
1799  pg_wchar_table[encoding].dsplen((const unsigned char *) mbstr) :
1800  pg_wchar_table[PG_SQL_ASCII].dsplen((const unsigned char *) mbstr));
1801 }
#define PG_VALID_ENCODING(_enc)
Definition: pg_wchar.h:301
static char * encoding
Definition: initdb.c:123
const pg_wchar_tbl pg_wchar_table[]
Definition: wchar.c:1729
int pg_encoding_max_length ( int  encoding)

Definition at line 1820 of file wchar.c.

References Assert, encoding, pg_wchar_tbl::maxmblen, and PG_VALID_ENCODING.

Referenced by ascii(), chr(), pg_encoding_mbcliplen(), pg_verify_mbstr_len(), reportErrorPosition(), and type_maximum_size().

1821 {
1823 
1825 }
int maxmblen
Definition: pg_wchar.h:382
#define PG_VALID_ENCODING(_enc)
Definition: pg_wchar.h:301
static char * encoding
Definition: initdb.c:123
#define Assert(condition)
Definition: c.h:681
const pg_wchar_tbl pg_wchar_table[]
Definition: wchar.c:1729
int pg_encoding_mb2wchar_with_len ( int  encoding,
const char *  from,
pg_wchar to,
int  len 
)

Definition at line 730 of file mbutils.c.

References encoding, pg_wchar_tbl::mb2wchar_with_len, and pg_wchar_table.

732 {
733  return pg_wchar_table[encoding].mb2wchar_with_len((const unsigned char *) from, to, len);
734 }
static char * encoding
Definition: initdb.c:123
const pg_wchar_tbl pg_wchar_table[]
Definition: wchar.c:1729
mb2wchar_with_len_converter mb2wchar_with_len
Definition: pg_wchar.h:375
int pg_encoding_mbcliplen ( int  encoding,
const char *  mbstr,
int  len,
int  limit 
)

Definition at line 830 of file mbutils.c.

References cliplen(), encoding, pg_wchar_tbl::mblen, pg_encoding_max_length(), and pg_wchar_table.

Referenced by pg_mbcliplen().

832 {
833  mblen_converter mblen_fn;
834  int clen = 0;
835  int l;
836 
837  /* optimization for single byte encoding */
839  return cliplen(mbstr, len, limit);
840 
841  mblen_fn = pg_wchar_table[encoding].mblen;
842 
843  while (len > 0 && *mbstr)
844  {
845  l = (*mblen_fn) ((const unsigned char *) mbstr);
846  if ((clen + l) > limit)
847  break;
848  clen += l;
849  if (clen == limit)
850  break;
851  len -= l;
852  mbstr += l;
853  }
854  return clen;
855 }
int pg_encoding_max_length(int encoding)
Definition: wchar.c:1820
static char * encoding
Definition: initdb.c:123
static int cliplen(const char *str, int len, int limit)
Definition: mbutils.c:887
int(* mblen_converter)(const unsigned char *mbstr)
Definition: pg_wchar.h:365
const pg_wchar_tbl pg_wchar_table[]
Definition: wchar.c:1729
mblen_converter mblen
Definition: pg_wchar.h:379
int pg_encoding_mblen ( int  encoding,
const char *  mbstr 
)

Definition at line 1785 of file wchar.c.

References PG_SQL_ASCII, and PG_VALID_ENCODING.

Referenced by CopyAttributeOutCSV(), CopyAttributeOutText(), CopyReadLineText(), PQescapeInternal(), PQescapeStringInternal(), PQmblen(), report_invalid_encoding(), report_untranslatable_char(), and reportErrorPosition().

1786 {
1787  return (PG_VALID_ENCODING(encoding) ?
1788  pg_wchar_table[encoding].mblen((const unsigned char *) mbstr) :
1789  pg_wchar_table[PG_SQL_ASCII].mblen((const unsigned char *) mbstr));
1790 }
#define PG_VALID_ENCODING(_enc)
Definition: pg_wchar.h:301
static char * encoding
Definition: initdb.c:123
const pg_wchar_tbl pg_wchar_table[]
Definition: wchar.c:1729
const char* pg_encoding_to_char ( int  encoding)

Definition at line 607 of file encnames.c.

608 {
610  {
611  const pg_enc2name *p = &pg_enc2name_tbl[encoding];
612 
613  Assert(encoding == p->encoding);
614  return p->name;
615  }
616  return "";
617 }
#define PG_VALID_ENCODING(_enc)
Definition: pg_wchar.h:301
const pg_enc2name pg_enc2name_tbl[]
Definition: encnames.c:307
const char * name
Definition: pg_wchar.h:328
pg_enc encoding
Definition: pg_wchar.h:329
static char * encoding
Definition: initdb.c:123
#define Assert(condition)
Definition: c.h:681
int pg_encoding_verifymb ( int  encoding,
const char *  mbstr,
int  len 
)

Definition at line 1809 of file wchar.c.

References PG_SQL_ASCII, and PG_VALID_ENCODING.

Referenced by big52mic(), euc_jis_20042shift_jis_2004(), euc_jp2mic(), euc_jp2sjis(), euc_kr2mic(), euc_tw2mic(), LocalToUtf(), mic2big5(), mic2euc_jp(), mic2euc_kr(), mic2euc_tw(), mic2sjis(), shift_jis_20042euc_jis_2004(), and sjis2euc_jp().

1810 {
1811  return (PG_VALID_ENCODING(encoding) ?
1812  pg_wchar_table[encoding].mbverify((const unsigned char *) mbstr, len) :
1813  pg_wchar_table[PG_SQL_ASCII].mbverify((const unsigned char *) mbstr, len));
1814 }
#define PG_VALID_ENCODING(_enc)
Definition: pg_wchar.h:301
static char * encoding
Definition: initdb.c:123
const pg_wchar_tbl pg_wchar_table[]
Definition: wchar.c:1729
int pg_encoding_wchar2mb_with_len ( int  encoding,
const pg_wchar from,
char *  to,
int  len 
)

Definition at line 752 of file mbutils.c.

References encoding, pg_wchar_table, and pg_wchar_tbl::wchar2mb_with_len.

754 {
755  return pg_wchar_table[encoding].wchar2mb_with_len(from, (unsigned char *) to, len);
756 }
wchar2mb_with_len_converter wchar2mb_with_len
Definition: pg_wchar.h:377
static char * encoding
Definition: initdb.c:123
const pg_wchar_tbl pg_wchar_table[]
Definition: wchar.c:1729
int pg_get_client_encoding ( void  )

Definition at line 306 of file mbutils.c.

References pg_enc2name::encoding.

Referenced by BeginCopy(), and xml_send().

307 {
308  return ClientEncoding->encoding;
309 }
static const pg_enc2name * ClientEncoding
Definition: mbutils.c:73
pg_enc encoding
Definition: pg_wchar.h:329
const char* pg_get_client_encoding_name ( void  )

Definition at line 315 of file mbutils.c.

References pg_enc2name::name.

316 {
317  return ClientEncoding->name;
318 }
static const pg_enc2name * ClientEncoding
Definition: mbutils.c:73
const char * name
Definition: pg_wchar.h:328
int pg_mb2wchar ( const char *  from,
pg_wchar to 
)

Definition at line 716 of file mbutils.c.

References pg_enc2name::encoding, pg_wchar_tbl::mb2wchar_with_len, and pg_wchar_table.

717 {
718  return pg_wchar_table[DatabaseEncoding->encoding].mb2wchar_with_len((const unsigned char *) from, to, strlen(from));
719 }
pg_enc encoding
Definition: pg_wchar.h:329
const pg_wchar_tbl pg_wchar_table[]
Definition: wchar.c:1729
static const pg_enc2name * DatabaseEncoding
Definition: mbutils.c:74
mb2wchar_with_len_converter mb2wchar_with_len
Definition: pg_wchar.h:375
int pg_mb2wchar_with_len ( const char *  from,
pg_wchar to,
int  len 
)

Definition at line 723 of file mbutils.c.

References pg_enc2name::encoding, pg_wchar_tbl::mb2wchar_with_len, and pg_wchar_table.

Referenced by check_ident_usermap(), CheckAffix(), NIAddAffix(), parse_ident_line(), RE_compile(), RE_compile_and_cache(), RE_execute(), replace_text_regexp(), setup_regexp_matches(), text_position_setup(), and TParserInit().

724 {
725  return pg_wchar_table[DatabaseEncoding->encoding].mb2wchar_with_len((const unsigned char *) from, to, len);
726 }
pg_enc encoding
Definition: pg_wchar.h:329
const pg_wchar_tbl pg_wchar_table[]
Definition: wchar.c:1729
static const pg_enc2name * DatabaseEncoding
Definition: mbutils.c:74
mb2wchar_with_len_converter mb2wchar_with_len
Definition: pg_wchar.h:375
int pg_mbcharcliplen ( const char *  mbstr,
int  len,
int  imit 
)

Definition at line 862 of file mbutils.c.

References cliplen(), pg_database_encoding_max_length(), and pg_mblen().

Referenced by bpchar(), bpchar_input(), text_left(), text_right(), varchar(), and varchar_input().

863 {
864  int clen = 0;
865  int nch = 0;
866  int l;
867 
868  /* optimization for single byte encoding */
870  return cliplen(mbstr, len, limit);
871 
872  while (len > 0 && *mbstr)
873  {
874  l = pg_mblen(mbstr);
875  nch++;
876  if (nch > limit)
877  break;
878  clen += l;
879  len -= l;
880  mbstr += l;
881  }
882  return clen;
883 }
int pg_database_encoding_max_length(void)
Definition: wchar.c:1833
static int cliplen(const char *str, int len, int limit)
Definition: mbutils.c:887
int pg_mblen(const char *mbstr)
Definition: mbutils.c:760
int pg_mbcliplen ( const char *  mbstr,
int  len,
int  limit 
)
int pg_mblen ( const char *  mbstr)

Definition at line 760 of file mbutils.c.

References pg_enc2name::encoding, pg_wchar_tbl::mblen, and pg_wchar_table.

Referenced by addCompoundAffixFlagValue(), appendStringInfoRegexpSubstr(), charlen_to_bytelen(), check_replace_text_has_escape_char(), dotrim(), extract_mb_char(), find_word(), findchar(), findchar2(), findwrd(), gbt_var_node_cp_len(), get_modifiers(), get_nextfield(), get_wildcard_part(), getlexeme(), getNextFlagFromString(), gettoken_query(), gettoken_tsvector(), infix(), initTrie(), json_lex_string(), lpad(), lquery_in(), ltree_in(), make_trigrams(), map_sql_identifier_to_xml_name(), map_xml_name_to_sql_identifier(), match_prosrc_to_literal(), mb_strchr(), NIImportAffixes(), NIImportDictionary(), NIImportOOAffixes(), parse_affentry(), pg_mbcharcliplen(), pg_mbstrlen(), pg_mbstrlen_with_len(), readstoplist(), report_json_context(), rpad(), RS_compile(), RS_execute(), RS_isRegis(), similar_escape(), t_isalpha(), t_isdigit(), t_isprint(), t_isspace(), text_reverse(), text_substring(), text_to_array_internal(), thesaurusRead(), TParserGet(), translate(), ts_stat_sql(), tsvectorout(), unaccent_lexize(), varstr_levenshtein(), and wchareq().

761 {
762  return pg_wchar_table[DatabaseEncoding->encoding].mblen((const unsigned char *) mbstr);
763 }
pg_enc encoding
Definition: pg_wchar.h:329
const pg_wchar_tbl pg_wchar_table[]
Definition: wchar.c:1729
static const pg_enc2name * DatabaseEncoding
Definition: mbutils.c:74
mblen_converter mblen
Definition: pg_wchar.h:379
int pg_mbstrlen ( const char *  mbstr)

Definition at line 774 of file mbutils.c.

References pg_database_encoding_max_length(), and pg_mblen().

Referenced by text_format_append_string().

775 {
776  int len = 0;
777 
778  /* optimization for single byte encoding */
780  return strlen(mbstr);
781 
782  while (*mbstr)
783  {
784  mbstr += pg_mblen(mbstr);
785  len++;
786  }
787  return len;
788 }
int pg_database_encoding_max_length(void)
Definition: wchar.c:1833
int pg_mblen(const char *mbstr)
Definition: mbutils.c:760
int pg_mbstrlen_with_len ( const char *  mbstr,
int  len 
)

Definition at line 794 of file mbutils.c.

References pg_database_encoding_max_length(), and pg_mblen().

Referenced by bpchar(), bpchar_input(), bpcharlen(), executor_errposition(), lpad(), match_prosrc_to_query(), parser_errposition(), plpgsql_scanner_errposition(), rpad(), similar_escape(), text_left(), text_length(), text_right(), text_substring(), and varstr_levenshtein().

795 {
796  int len = 0;
797 
798  /* optimization for single byte encoding */
800  return limit;
801 
802  while (limit > 0 && *mbstr)
803  {
804  int l = pg_mblen(mbstr);
805 
806  limit -= l;
807  mbstr += l;
808  len++;
809  }
810  return len;
811 }
int pg_database_encoding_max_length(void)
Definition: wchar.c:1833
int pg_mblen(const char *mbstr)
Definition: mbutils.c:760
void pg_mic2ascii ( const unsigned char *  mic,
unsigned char *  p,
int  len 
)

Definition at line 163 of file conv.c.

References IS_HIGHBIT_SET, PG_MULE_INTERNAL, PG_SQL_ASCII, and report_untranslatable_char().

Referenced by mic_to_ascii(), and utf8_to_ascii().

164 {
165  int c1;
166 
167  while (len > 0)
168  {
169  c1 = *mic;
170  if (c1 == 0 || IS_HIGHBIT_SET(c1))
172  (const char *) mic, len);
173  *p++ = c1;
174  mic++;
175  len--;
176  }
177  *p = '\0';
178 }
#define IS_HIGHBIT_SET(ch)
Definition: c.h:979
void report_untranslatable_char(int src_encoding, int dest_encoding, const char *mbstr, int len)
Definition: wchar.c:2027
int pg_mic_mblen ( const unsigned char *  mbstr)

Definition at line 1776 of file wchar.c.

References pg_mule_mblen().

Referenced by mic2latin(), and mic2latin_with_table().

1777 {
1778  return pg_mule_mblen(mbstr);
1779 }
int pg_mule_mblen(const unsigned char *s)
Definition: wchar.c:866
int pg_mule_mblen ( const unsigned char *  mbstr)

Definition at line 866 of file wchar.c.

References IS_LC1, IS_LC2, IS_LCPRV1, and IS_LCPRV2.

Referenced by pg_mic_mblen(), and pg_mule_verifier().

867 {
868  int len;
869 
870  if (IS_LC1(*s))
871  len = 2;
872  else if (IS_LCPRV1(*s))
873  len = 3;
874  else if (IS_LC2(*s))
875  len = 3;
876  else if (IS_LCPRV2(*s))
877  len = 4;
878  else
879  len = 1; /* assume ASCII */
880  return len;
881 }
#define IS_LC2(c)
Definition: pg_wchar.h:144
#define IS_LCPRV2(c)
Definition: pg_wchar.h:164
#define IS_LCPRV1(c)
Definition: pg_wchar.h:152
#define IS_LC1(c)
Definition: pg_wchar.h:123
char* pg_server_to_any ( const char *  s,
int  len,
int  encoding 
)

Definition at line 634 of file mbutils.c.

References pg_enc2name::encoding, perform_default_encoding_conversion(), pg_do_encoding_conversion(), PG_SQL_ASCII, and pg_verify_mbstr().

Referenced by CopyAttributeOutCSV(), CopyAttributeOutText(), CopyTo(), dsnowball_lexize(), hv_fetch_string(), hv_store_string(), pg_server_to_client(), and utf_e2u().

635 {
636  if (len <= 0)
637  return (char *) s; /* empty string is always valid */
638 
641  return (char *) s; /* assume data is valid */
642 
644  {
645  /* No conversion is possible, but we must validate the result */
646  (void) pg_verify_mbstr(encoding, s, len, false);
647  return (char *) s;
648  }
649 
650  /* Fast path if we can use cached conversion function */
652  return perform_default_encoding_conversion(s, len, false);
653 
654  /* General case ... will not work outside transactions */
655  return (char *) pg_do_encoding_conversion((unsigned char *) s,
656  len,
658  encoding);
659 }
static char * perform_default_encoding_conversion(const char *src, int len, bool is_client_to_server)
Definition: mbutils.c:668
static const pg_enc2name * ClientEncoding
Definition: mbutils.c:73
unsigned char * pg_do_encoding_conversion(unsigned char *src, int len, int src_encoding, int dest_encoding)
Definition: mbutils.c:326
bool pg_verify_mbstr(int encoding, const char *mbstr, int len, bool noError)
Definition: wchar.c:1877
pg_enc encoding
Definition: pg_wchar.h:329
static char * encoding
Definition: initdb.c:123
static const pg_enc2name * DatabaseEncoding
Definition: mbutils.c:74
char* pg_server_to_client ( const char *  s,
int  len 
)

Definition at line 623 of file mbutils.c.

References pg_enc2name::encoding, and pg_server_to_any().

Referenced by pq_puttextmessage(), pq_sendcountedtext(), pq_sendstring(), pq_sendtext(), and pq_writestring().

624 {
625  return pg_server_to_any(s, len, ClientEncoding->encoding);
626 }
static const pg_enc2name * ClientEncoding
Definition: mbutils.c:73
char * pg_server_to_any(const char *s, int len, int encoding)
Definition: mbutils.c:634
pg_enc encoding
Definition: pg_wchar.h:329
bool pg_utf8_islegal ( const unsigned char *  source,
int  length 
)

Definition at line 1458 of file wchar.c.

Referenced by chr(), pg_utf8_string_len(), pg_utf8_verifier(), utf8_to_iso8859_1(), and UtfToLocal().

1459 {
1460  unsigned char a;
1461 
1462  switch (length)
1463  {
1464  default:
1465  /* reject lengths 5 and 6 for now */
1466  return false;
1467  case 4:
1468  a = source[3];
1469  if (a < 0x80 || a > 0xBF)
1470  return false;
1471  /* FALL THRU */
1472  case 3:
1473  a = source[2];
1474  if (a < 0x80 || a > 0xBF)
1475  return false;
1476  /* FALL THRU */
1477  case 2:
1478  a = source[1];
1479  switch (*source)
1480  {
1481  case 0xE0:
1482  if (a < 0xA0 || a > 0xBF)
1483  return false;
1484  break;
1485  case 0xED:
1486  if (a < 0x80 || a > 0x9F)
1487  return false;
1488  break;
1489  case 0xF0:
1490  if (a < 0x90 || a > 0xBF)
1491  return false;
1492  break;
1493  case 0xF4:
1494  if (a < 0x80 || a > 0x8F)
1495  return false;
1496  break;
1497  default:
1498  if (a < 0x80 || a > 0xBF)
1499  return false;
1500  break;
1501  }
1502  /* FALL THRU */
1503  case 1:
1504  a = *source;
1505  if (a >= 0x80 && a < 0xC2)
1506  return false;
1507  if (a > 0xF4)
1508  return false;
1509  break;
1510  }
1511  return true;
1512 }
int length(const List *list)
Definition: list.c:1271
int pg_utf_mblen ( const unsigned char *  )

Definition at line 541 of file wchar.c.

Referenced by json_lex_string(), pg_saslprep(), pg_utf8_string_len(), pg_utf8_verifier(), pg_wchar2utf_with_len(), utf8_to_iso8859_1(), and UtfToLocal().

542 {
543  int len;
544 
545  if ((*s & 0x80) == 0)
546  len = 1;
547  else if ((*s & 0xe0) == 0xc0)
548  len = 2;
549  else if ((*s & 0xf0) == 0xe0)
550  len = 3;
551  else if ((*s & 0xf8) == 0xf0)
552  len = 4;
553 #ifdef NOT_USED
554  else if ((*s & 0xfc) == 0xf8)
555  len = 5;
556  else if ((*s & 0xfe) == 0xfc)
557  len = 6;
558 #endif
559  else
560  len = 1;
561  return len;
562 }
int pg_valid_client_encoding ( const char *  name)

Definition at line 487 of file encnames.c.

References enc, pg_char_to_encoding(), and PG_VALID_FE_ENCODING.

Referenced by check_client_encoding().

488 {
489  int enc;
490 
491  if ((enc = pg_char_to_encoding(name)) < 0)
492  return -1;
493 
494  if (!PG_VALID_FE_ENCODING(enc))
495  return -1;
496 
497  return enc;
498 }
int pg_char_to_encoding(const char *name)
Definition: encnames.c:551
struct pg_encoding enc
Definition: encode.c:522
#define PG_VALID_FE_ENCODING(_enc)
Definition: pg_wchar.h:305
const char * name
Definition: encode.c:521
int pg_valid_server_encoding ( const char *  name)

Definition at line 501 of file encnames.c.

References enc, pg_char_to_encoding(), and PG_VALID_BE_ENCODING.

Referenced by createdb(), get_encoding_id(), and parse_extension_control_file().

502 {
503  int enc;
504 
505  if ((enc = pg_char_to_encoding(name)) < 0)
506  return -1;
507 
508  if (!PG_VALID_BE_ENCODING(enc))
509  return -1;
510 
511  return enc;
512 }
int pg_char_to_encoding(const char *name)
Definition: encnames.c:551
struct pg_encoding enc
Definition: encode.c:522
#define PG_VALID_BE_ENCODING(_enc)
Definition: pg_wchar.h:295
const char * name
Definition: encode.c:521
int pg_valid_server_encoding_id ( int  encoding)

Definition at line 515 of file encnames.c.

516 {
518 }
static char * encoding
Definition: initdb.c:123
#define PG_VALID_BE_ENCODING(_enc)
Definition: pg_wchar.h:295
bool pg_verify_mbstr ( int  encoding,
const char *  mbstr,
int  len,
bool  noError 
)

Definition at line 1877 of file wchar.c.

References pg_verify_mbstr_len().

Referenced by LogicalOutputWrite(), pg_any_to_server(), pg_do_encoding_conversion(), pg_server_to_any(), and t_readline().

1878 {
1879  return pg_verify_mbstr_len(encoding, mbstr, len, noError) >= 0;
1880 }
int pg_verify_mbstr_len(int encoding, const char *mbstr, int len, bool noError)
Definition: wchar.c:1894
static char * encoding
Definition: initdb.c:123
int pg_verify_mbstr_len ( int  encoding,
const char *  mbstr,
int  len,
bool  noError 
)

Definition at line 1894 of file wchar.c.

References Assert, encoding, IS_HIGHBIT_SET, pg_wchar_tbl::mbverify, pg_encoding_max_length(), PG_VALID_ENCODING, and report_invalid_encoding().

Referenced by length_in_encoding(), pg_convert(), pg_verify_mbstr(), pg_verifymbstr(), and read_extension_script_file().

1895 {
1896  mbverifier mbverify;
1897  int mb_len;
1898 
1900 
1901  /*
1902  * In single-byte encodings, we need only reject nulls (\0).
1903  */
1904  if (pg_encoding_max_length(encoding) <= 1)
1905  {
1906  const char *nullpos = memchr(mbstr, 0, len);
1907 
1908  if (nullpos == NULL)
1909  return len;
1910  if (noError)
1911  return -1;
1912  report_invalid_encoding(encoding, nullpos, 1);
1913  }
1914 
1915  /* fetch function pointer just once */
1916  mbverify = pg_wchar_table[encoding].mbverify;
1917 
1918  mb_len = 0;
1919 
1920  while (len > 0)
1921  {
1922  int l;
1923 
1924  /* fast path for ASCII-subset characters */
1925  if (!IS_HIGHBIT_SET(*mbstr))
1926  {
1927  if (*mbstr != '\0')
1928  {
1929  mb_len++;
1930  mbstr++;
1931  len--;
1932  continue;
1933  }
1934  if (noError)
1935  return -1;
1936  report_invalid_encoding(encoding, mbstr, len);
1937  }
1938 
1939  l = (*mbverify) ((const unsigned char *) mbstr, len);
1940 
1941  if (l < 0)
1942  {
1943  if (noError)
1944  return -1;
1945  report_invalid_encoding(encoding, mbstr, len);
1946  }
1947 
1948  mbstr += l;
1949  len -= l;
1950  mb_len++;
1951  }
1952  return mb_len;
1953 }
#define IS_HIGHBIT_SET(ch)
Definition: c.h:979
int pg_encoding_max_length(int encoding)
Definition: wchar.c:1820
#define PG_VALID_ENCODING(_enc)
Definition: pg_wchar.h:301
static char * encoding
Definition: initdb.c:123
#define Assert(condition)
Definition: c.h:681
void report_invalid_encoding(int encoding, const char *mbstr, int len)
Definition: wchar.c:1995
const pg_wchar_tbl pg_wchar_table[]
Definition: wchar.c:1729
int(* mbverifier)(const unsigned char *mbstr, int len)
Definition: pg_wchar.h:371
mbverifier mbverify
Definition: pg_wchar.h:381
bool pg_verifymbstr ( const char *  mbstr,
int  len,
bool  noError 
)

Definition at line 1866 of file wchar.c.

References GetDatabaseEncoding(), and pg_verify_mbstr_len().

Referenced by char2wchar(), CopyReadAttributesText(), plperl_spi_exec(), plperl_spi_prepare(), plperl_spi_query(), PLy_cursor_query(), PLy_output(), PLy_spi_execute_query(), PLy_spi_prepare(), PLyObject_AsString(), read_text_file(), and spg_text_leaf_consistent().

1867 {
1868  return
1869  pg_verify_mbstr_len(GetDatabaseEncoding(), mbstr, len, noError) >= 0;
1870 }
int pg_verify_mbstr_len(int encoding, const char *mbstr, int len, bool noError)
Definition: wchar.c:1894
int GetDatabaseEncoding(void)
Definition: mbutils.c:1004
int pg_wchar2mb ( const pg_wchar from,
char *  to 
)

Definition at line 738 of file mbutils.c.

References pg_enc2name::encoding, pg_wchar_strlen(), pg_wchar_table, and pg_wchar_tbl::wchar2mb_with_len.

739 {
740  return pg_wchar_table[DatabaseEncoding->encoding].wchar2mb_with_len(from, (unsigned char *) to, pg_wchar_strlen(from));
741 }
size_t pg_wchar_strlen(const pg_wchar *str)
Definition: wstrncmp.c:70
wchar2mb_with_len_converter wchar2mb_with_len
Definition: pg_wchar.h:377
pg_enc encoding
Definition: pg_wchar.h:329
const pg_wchar_tbl pg_wchar_table[]
Definition: wchar.c:1729
static const pg_enc2name * DatabaseEncoding
Definition: mbutils.c:74
int pg_wchar2mb_with_len ( const pg_wchar from,
char *  to,
int  len 
)

Definition at line 745 of file mbutils.c.

References pg_enc2name::encoding, pg_wchar_table, and pg_wchar_tbl::wchar2mb_with_len.

Referenced by convertPgWchar(), and regexp_fixed_prefix().

746 {
747  return pg_wchar_table[DatabaseEncoding->encoding].wchar2mb_with_len(from, (unsigned char *) to, len);
748 }
wchar2mb_with_len_converter wchar2mb_with_len
Definition: pg_wchar.h:377
pg_enc encoding
Definition: pg_wchar.h:329
const pg_wchar_tbl pg_wchar_table[]
Definition: wchar.c:1729
static const pg_enc2name * DatabaseEncoding
Definition: mbutils.c:74
size_t pg_wchar_strlen ( const pg_wchar wstr)

Definition at line 70 of file wstrncmp.c.

Referenced by pg_wchar2mb().

71 {
72  const pg_wchar *s;
73 
74  for (s = str; *s; ++s)
75  ;
76  return (s - str);
77 }
unsigned int pg_wchar
Definition: mbprint.c:31
int pg_wchar_strncmp ( const pg_wchar s1,
const pg_wchar s2,
size_t  n 
)

Definition at line 40 of file wstrncmp.c.

41 {
42  if (n == 0)
43  return 0;
44  do
45  {
46  if (*s1 != *s2++)
47  return (*s1 - *(s2 - 1));
48  if (*s1++ == 0)
49  break;
50  } while (--n != 0);
51  return 0;
52 }
char * s1
char * s2
int PrepareClientEncoding ( int  encoding)

Definition at line 103 of file mbutils.c.

References backend_startup_complete, ConvProcInfo::c_encoding, encoding, FindDefaultConversionProc(), fmgr_info_cxt(), GetDatabaseEncoding(), IsTransactionState(), lcons(), lfirst, MemoryContextAlloc(), MemoryContextSwitchTo(), OidIsValid, PG_SQL_ASCII, PG_VALID_FE_ENCODING, ConvProcInfo::s_encoding, ConvProcInfo::to_client_info, ConvProcInfo::to_server_info, and TopMemoryContext.

Referenced by check_client_encoding(), and InitializeClientEncoding().

104 {
105  int current_server_encoding;
106  ListCell *lc;
107 
109  return -1;
110 
111  /* Can't do anything during startup, per notes above */
113  return 0;
114 
115  current_server_encoding = GetDatabaseEncoding();
116 
117  /*
118  * Check for cases that require no conversion function.
119  */
120  if (current_server_encoding == encoding ||
121  current_server_encoding == PG_SQL_ASCII ||
123  return 0;
124 
125  if (IsTransactionState())
126  {
127  /*
128  * If we're in a live transaction, it's safe to access the catalogs,
129  * so look up the functions. We repeat the lookup even if the info is
130  * already cached, so that we can react to changes in the contents of
131  * pg_conversion.
132  */
133  Oid to_server_proc,
134  to_client_proc;
135  ConvProcInfo *convinfo;
136  MemoryContext oldcontext;
137 
138  to_server_proc = FindDefaultConversionProc(encoding,
139  current_server_encoding);
140  if (!OidIsValid(to_server_proc))
141  return -1;
142  to_client_proc = FindDefaultConversionProc(current_server_encoding,
143  encoding);
144  if (!OidIsValid(to_client_proc))
145  return -1;
146 
147  /*
148  * Load the fmgr info into TopMemoryContext (could still fail here)
149  */
151  sizeof(ConvProcInfo));
152  convinfo->s_encoding = current_server_encoding;
153  convinfo->c_encoding = encoding;
154  fmgr_info_cxt(to_server_proc, &convinfo->to_server_info,
156  fmgr_info_cxt(to_client_proc, &convinfo->to_client_info,
158 
159  /* Attach new info to head of list */
161  ConvProcList = lcons(convinfo, ConvProcList);
162  MemoryContextSwitchTo(oldcontext);
163 
164  /*
165  * We cannot yet remove any older entry for the same encoding pair,
166  * since it could still be in use. SetClientEncoding will clean up.
167  */
168 
169  return 0; /* success */
170  }
171  else
172  {
173  /*
174  * If we're not in a live transaction, the only thing we can do is
175  * restore a previous setting using the cache. This covers all
176  * transaction-rollback cases. The only case it might not work for is
177  * trying to change client_encoding on the fly by editing
178  * postgresql.conf and SIGHUP'ing. Which would probably be a stupid
179  * thing to do anyway.
180  */
181  foreach(lc, ConvProcList)
182  {
183  ConvProcInfo *oldinfo = (ConvProcInfo *) lfirst(lc);
184 
185  if (oldinfo->s_encoding == current_server_encoding &&
186  oldinfo->c_encoding == encoding)
187  return 0;
188  }
189 
190  return -1; /* it's not cached, so fail */
191  }
192 }
FmgrInfo to_server_info
Definition: mbutils.c:57
FmgrInfo to_client_info
Definition: mbutils.c:58
static bool backend_startup_complete
Definition: mbutils.c:83
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition: palloc.h:109
unsigned int Oid
Definition: postgres_ext.h:31
#define OidIsValid(objectId)
Definition: c.h:532
int s_encoding
Definition: mbutils.c:55
static List * ConvProcList
Definition: mbutils.c:61
#define PG_VALID_FE_ENCODING(_enc)
Definition: pg_wchar.h:305
void fmgr_info_cxt(Oid functionId, FmgrInfo *finfo, MemoryContext mcxt)
Definition: fmgr.c:132
MemoryContext TopMemoryContext
Definition: mcxt.c:43
int GetDatabaseEncoding(void)
Definition: mbutils.c:1004
static char * encoding
Definition: initdb.c:123
List * lcons(void *datum, List *list)
Definition: list.c:259
#define lfirst(lc)
Definition: pg_list.h:106
Oid FindDefaultConversionProc(int32 for_encoding, int32 to_encoding)
Definition: namespace.c:3602
bool IsTransactionState(void)
Definition: xact.c:351
void * MemoryContextAlloc(MemoryContext context, Size size)
Definition: mcxt.c:706
int c_encoding
Definition: mbutils.c:56
void report_invalid_encoding ( int  encoding,
const char *  mbstr,
int  len 
)

Definition at line 1995 of file wchar.c.

References buf, ereport, errcode(), errmsg(), ERROR, Min, name, pg_enc2name_tbl, and pg_encoding_mblen().

Referenced by big52mic(), euc_cn2mic(), euc_jis_20042shift_jis_2004(), euc_jp2mic(), euc_jp2sjis(), euc_kr2mic(), euc_tw2mic(), iso8859_1_to_utf8(), latin2mic(), latin2mic_with_table(), local2local(), LocalToUtf(), mic2big5(), mic2euc_cn(), mic2euc_jp(), mic2euc_kr(), mic2euc_tw(), mic2latin(), mic2latin_with_table(), mic2sjis(), pg_ascii2mic(), pg_verify_mbstr_len(), shift_jis_20042euc_jis_2004(), sjis2euc_jp(), sjis2mic(), utf8_to_iso8859_1(), and UtfToLocal().

1996 {
1997  int l = pg_encoding_mblen(encoding, mbstr);
1998  char buf[8 * 5 + 1];
1999  char *p = buf;
2000  int j,
2001  jlimit;
2002 
2003  jlimit = Min(l, len);
2004  jlimit = Min(jlimit, 8); /* prevent buffer overrun */
2005 
2006  for (j = 0; j < jlimit; j++)
2007  {
2008  p += sprintf(p, "0x%02x", (unsigned char) mbstr[j]);
2009  if (j < jlimit - 1)
2010  p += sprintf(p, " ");
2011  }
2012 
2013  ereport(ERROR,
2014  (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
2015  errmsg("invalid byte sequence for encoding \"%s\": %s",
2017  buf)));
2018 }
#define Min(x, y)
Definition: c.h:812
int errcode(int sqlerrcode)
Definition: elog.c:575
#define ERROR
Definition: elog.h:43
static char * buf
Definition: pg_test_fsync.c:67
const pg_enc2name pg_enc2name_tbl[]
Definition: encnames.c:307
int pg_encoding_mblen(int encoding, const char *mbstr)
Definition: wchar.c:1785
#define ereport(elevel, rest)
Definition: elog.h:122
static char * encoding
Definition: initdb.c:123
const char * name
Definition: encode.c:521
int errmsg(const char *fmt,...)
Definition: elog.c:797
void report_untranslatable_char ( int  src_encoding,
int  dest_encoding,
const char *  mbstr,
int  len 
)

Definition at line 2027 of file wchar.c.

References buf, ereport, errcode(), errmsg(), ERROR, Min, name, pg_enc2name_tbl, and pg_encoding_mblen().

Referenced by big52mic(), latin2mic_with_table(), local2local(), LocalToUtf(), mic2big5(), mic2euc_cn(), mic2euc_jp(), mic2euc_kr(), mic2euc_tw(), mic2latin(), mic2latin_with_table(), mic2sjis(), pg_mic2ascii(), utf8_to_iso8859_1(), and UtfToLocal().

2029 {
2030  int l = pg_encoding_mblen(src_encoding, mbstr);
2031  char buf[8 * 5 + 1];
2032  char *p = buf;
2033  int j,
2034  jlimit;
2035 
2036  jlimit = Min(l, len);
2037  jlimit = Min(jlimit, 8); /* prevent buffer overrun */
2038 
2039  for (j = 0; j < jlimit; j++)
2040  {
2041  p += sprintf(p, "0x%02x", (unsigned char) mbstr[j]);
2042  if (j < jlimit - 1)
2043  p += sprintf(p, " ");
2044  }
2045 
2046  ereport(ERROR,
2047  (errcode(ERRCODE_UNTRANSLATABLE_CHARACTER),
2048  errmsg("character with byte sequence %s in encoding \"%s\" has no equivalent in encoding \"%s\"",
2049  buf,
2050  pg_enc2name_tbl[src_encoding].name,
2051  pg_enc2name_tbl[dest_encoding].name)));
2052 }
#define Min(x, y)
Definition: c.h:812
int errcode(int sqlerrcode)
Definition: elog.c:575
#define ERROR
Definition: elog.h:43
static char * buf
Definition: pg_test_fsync.c:67
const pg_enc2name pg_enc2name_tbl[]
Definition: encnames.c:307
int pg_encoding_mblen(int encoding, const char *mbstr)
Definition: wchar.c:1785
#define ereport(elevel, rest)
Definition: elog.h:122
const char * name
Definition: encode.c:521
int errmsg(const char *fmt,...)
Definition: elog.c:797
int SetClientEncoding ( int  encoding)

Definition at line 201 of file mbutils.c.

References backend_startup_complete, ConvProcInfo::c_encoding, encoding, GetDatabaseEncoding(), lfirst, list_delete_cell(), list_head(), lnext, next, pending_client_encoding, pfree(), pg_enc2name_tbl, PG_SQL_ASCII, PG_VALID_FE_ENCODING, ConvProcInfo::s_encoding, ConvProcInfo::to_client_info, and ConvProcInfo::to_server_info.

Referenced by assign_client_encoding(), InitializeClientEncoding(), and ParallelWorkerMain().

202 {
203  int current_server_encoding;
204  bool found;
205  ListCell *lc;
206  ListCell *prev;
207  ListCell *next;
208 
210  return -1;
211 
212  /* Can't do anything during startup, per notes above */
214  {
216  return 0;
217  }
218 
219  current_server_encoding = GetDatabaseEncoding();
220 
221  /*
222  * Check for cases that require no conversion function.
223  */
224  if (current_server_encoding == encoding ||
225  current_server_encoding == PG_SQL_ASCII ||
227  {
229  ToServerConvProc = NULL;
230  ToClientConvProc = NULL;
231  return 0;
232  }
233 
234  /*
235  * Search the cache for the entry previously prepared by
236  * PrepareClientEncoding; if there isn't one, we lose. While at it,
237  * release any duplicate entries so that repeated Prepare/Set cycles don't
238  * leak memory.
239  */
240  found = false;
241  prev = NULL;
242  for (lc = list_head(ConvProcList); lc; lc = next)
243  {
244  ConvProcInfo *convinfo = (ConvProcInfo *) lfirst(lc);
245 
246  next = lnext(lc);
247 
248  if (convinfo->s_encoding == current_server_encoding &&
249  convinfo->c_encoding == encoding)
250  {
251  if (!found)
252  {
253  /* Found newest entry, so set up */
255  ToServerConvProc = &convinfo->to_server_info;
256  ToClientConvProc = &convinfo->to_client_info;
257  found = true;
258  }
259  else
260  {
261  /* Duplicate entry, release it */
263  pfree(convinfo);
264  continue; /* prev mustn't advance */
265  }
266  }
267 
268  prev = lc;
269  }
270 
271  if (found)
272  return 0; /* success */
273  else
274  return -1; /* it's not cached, so fail */
275 }
static int32 next
Definition: blutils.c:210
FmgrInfo to_server_info
Definition: mbutils.c:57
FmgrInfo to_client_info
Definition: mbutils.c:58
static bool backend_startup_complete
Definition: mbutils.c:83
static int pending_client_encoding
Definition: mbutils.c:84
static FmgrInfo * ToServerConvProc
Definition: mbutils.c:67
static const pg_enc2name * ClientEncoding
Definition: mbutils.c:73
int s_encoding
Definition: mbutils.c:55
void pfree(void *pointer)
Definition: mcxt.c:949
static List * ConvProcList
Definition: mbutils.c:61
const pg_enc2name pg_enc2name_tbl[]
Definition: encnames.c:307
static ListCell * list_head(const List *l)
Definition: pg_list.h:77
#define PG_VALID_FE_ENCODING(_enc)
Definition: pg_wchar.h:305
#define lnext(lc)
Definition: pg_list.h:105
List * list_delete_cell(List *list, ListCell *cell, ListCell *prev)
Definition: list.c:528
int GetDatabaseEncoding(void)
Definition: mbutils.c:1004
static char * encoding
Definition: initdb.c:123
#define lfirst(lc)
Definition: pg_list.h:106
static FmgrInfo * ToClientConvProc
Definition: mbutils.c:68
int c_encoding
Definition: mbutils.c:56
void SetDatabaseEncoding ( int  encoding)

Definition at line 898 of file mbutils.c.

References Assert, elog, encoding, pg_enc2name::encoding, ERROR, pg_enc2name_tbl, and PG_VALID_BE_ENCODING.

Referenced by CheckMyDatabase().

899 {
901  elog(ERROR, "invalid database encoding: %d", encoding);
902 
905 }
#define ERROR
Definition: elog.h:43
const pg_enc2name pg_enc2name_tbl[]
Definition: encnames.c:307
pg_enc encoding
Definition: pg_wchar.h:329
static char * encoding
Definition: initdb.c:123
#define PG_VALID_BE_ENCODING(_enc)
Definition: pg_wchar.h:295
#define Assert(condition)
Definition: c.h:681
static const pg_enc2name * DatabaseEncoding
Definition: mbutils.c:74
#define elog
Definition: elog.h:219
void SetMessageEncoding ( int  encoding)

Definition at line 908 of file mbutils.c.

References Assert, encoding, pg_enc2name::encoding, pg_enc2name_tbl, and PG_VALID_ENCODING.

Referenced by pg_perm_setlocale().

909 {
910  /* Some calls happen before we can elog()! */
912 
915 }
#define PG_VALID_ENCODING(_enc)
Definition: pg_wchar.h:301
const pg_enc2name pg_enc2name_tbl[]
Definition: encnames.c:307
pg_enc encoding
Definition: pg_wchar.h:329
static char * encoding
Definition: initdb.c:123
#define Assert(condition)
Definition: c.h:681
static const pg_enc2name * MessageEncoding
Definition: mbutils.c:75
unsigned char* unicode_to_utf8 ( pg_wchar  c,
unsigned char *  utf8string 
)

Definition at line 475 of file wchar.c.

Referenced by json_lex_string(), pg_saslprep(), pg_wchar2utf_with_len(), and unicode_to_sqlchar().

476 {
477  if (c <= 0x7F)
478  {
479  utf8string[0] = c;
480  }
481  else if (c <= 0x7FF)
482  {
483  utf8string[0] = 0xC0 | ((c >> 6) & 0x1F);
484  utf8string[1] = 0x80 | (c & 0x3F);
485  }
486  else if (c <= 0xFFFF)
487  {
488  utf8string[0] = 0xE0 | ((c >> 12) & 0x0F);
489  utf8string[1] = 0x80 | ((c >> 6) & 0x3F);
490  utf8string[2] = 0x80 | (c & 0x3F);
491  }
492  else
493  {
494  utf8string[0] = 0xF0 | ((c >> 18) & 0x07);
495  utf8string[1] = 0x80 | ((c >> 12) & 0x3F);
496  utf8string[2] = 0x80 | ((c >> 6) & 0x3F);
497  utf8string[3] = 0x80 | (c & 0x3F);
498  }
499 
500  return utf8string;
501 }
char * c
pg_wchar utf8_to_unicode ( const unsigned char *  c)

Definition at line 714 of file wchar.c.

Referenced by pg_saslprep(), and pg_utf_dsplen().

715 {
716  if ((*c & 0x80) == 0)
717  return (pg_wchar) c[0];
718  else if ((*c & 0xe0) == 0xc0)
719  return (pg_wchar) (((c[0] & 0x1f) << 6) |
720  (c[1] & 0x3f));
721  else if ((*c & 0xf0) == 0xe0)
722  return (pg_wchar) (((c[0] & 0x0f) << 12) |
723  ((c[1] & 0x3f) << 6) |
724  (c[2] & 0x3f));
725  else if ((*c & 0xf8) == 0xf0)
726  return (pg_wchar) (((c[0] & 0x07) << 18) |
727  ((c[1] & 0x3f) << 12) |
728  ((c[2] & 0x3f) << 6) |
729  (c[3] & 0x3f));
730  else
731  /* that is an invalid code on purpose */
732  return 0xffffffff;
733 }
char * c
unsigned int pg_wchar
Definition: mbprint.c:31
void UtfToLocal ( const unsigned char *  utf,
int  len,
unsigned char *  iso,
const pg_mb_radix_tree map,
const pg_utf_to_local_combined cmap,
int  cmapsize,
utf_local_conversion_func  conv_func,
int  encoding 
)

Definition at line 474 of file conv.c.

References pg_utf_to_local_combined::code, compare3(), elog, ereport, errcode(), errmsg(), ERROR, pg_mb_radix_conv(), PG_UTF8, pg_utf8_islegal(), pg_utf_mblen(), PG_VALID_ENCODING, report_invalid_encoding(), report_untranslatable_char(), and store_coded_char().

Referenced by utf8_to_big5(), utf8_to_euc_cn(), utf8_to_euc_jis_2004(), utf8_to_euc_jp(), utf8_to_euc_kr(), utf8_to_euc_tw(), utf8_to_gb18030(), utf8_to_gbk(), utf8_to_iso8859(), utf8_to_johab(), utf8_to_koi8r(), utf8_to_koi8u(), utf8_to_shift_jis_2004(), utf8_to_sjis(), utf8_to_uhc(), and utf8_to_win().

480 {
481  uint32 iutf;
482  int l;
483  const pg_utf_to_local_combined *cp;
484 
486  ereport(ERROR,
487  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
488  errmsg("invalid encoding number: %d", encoding)));
489 
490  for (; len > 0; len -= l)
491  {
492  unsigned char b1 = 0;
493  unsigned char b2 = 0;
494  unsigned char b3 = 0;
495  unsigned char b4 = 0;
496 
497  /* "break" cases all represent errors */
498  if (*utf == '\0')
499  break;
500 
501  l = pg_utf_mblen(utf);
502  if (len < l)
503  break;
504 
505  if (!pg_utf8_islegal(utf, l))
506  break;
507 
508  if (l == 1)
509  {
510  /* ASCII case is easy, assume it's one-to-one conversion */
511  *iso++ = *utf++;
512  continue;
513  }
514 
515  /* collect coded char of length l */
516  if (l == 2)
517  {
518  b3 = *utf++;
519  b4 = *utf++;
520  }
521  else if (l == 3)
522  {
523  b2 = *utf++;
524  b3 = *utf++;
525  b4 = *utf++;
526  }
527  else if (l == 4)
528  {
529  b1 = *utf++;
530  b2 = *utf++;
531  b3 = *utf++;
532  b4 = *utf++;
533  }
534  else
535  {
536  elog(ERROR, "unsupported character length %d", l);
537  iutf = 0; /* keep compiler quiet */
538  }
539  iutf = (b1 << 24 | b2 << 16 | b3 << 8 | b4);
540 
541  /* First, try with combined map if possible */
542  if (cmap && len > l)
543  {
544  const unsigned char *utf_save = utf;
545  int len_save = len;
546  int l_save = l;
547 
548  /* collect next character, same as above */
549  len -= l;
550 
551  l = pg_utf_mblen(utf);
552  if (len < l)
553  break;
554 
555  if (!pg_utf8_islegal(utf, l))
556  break;
557 
558  /* We assume ASCII character cannot be in combined map */
559  if (l > 1)
560  {
561  uint32 iutf2;
562  uint32 cutf[2];
563 
564  if (l == 2)
565  {
566  iutf2 = *utf++ << 8;
567  iutf2 |= *utf++;
568  }
569  else if (l == 3)
570  {
571  iutf2 = *utf++ << 16;
572  iutf2 |= *utf++ << 8;
573  iutf2 |= *utf++;
574  }
575  else if (l == 4)
576  {
577  iutf2 = *utf++ << 24;
578  iutf2 |= *utf++ << 16;
579  iutf2 |= *utf++ << 8;
580  iutf2 |= *utf++;
581  }
582  else
583  {
584  elog(ERROR, "unsupported character length %d", l);
585  iutf2 = 0; /* keep compiler quiet */
586  }
587 
588  cutf[0] = iutf;
589  cutf[1] = iutf2;
590 
591  cp = bsearch(cutf, cmap, cmapsize,
593 
594  if (cp)
595  {
596  iso = store_coded_char(iso, cp->code);
597  continue;
598  }
599  }
600 
601  /* fail, so back up to reprocess second character next time */
602  utf = utf_save;
603  len = len_save;
604  l = l_save;
605  }
606 
607  /* Now check ordinary map */
608  if (map)
609  {
610  uint32 converted = pg_mb_radix_conv(map, l, b1, b2, b3, b4);
611 
612  if (converted)
613  {
614  iso = store_coded_char(iso, converted);
615  continue;
616  }
617  }
618 
619  /* if there's a conversion function, try that */
620  if (conv_func)
621  {
622  uint32 converted = (*conv_func) (iutf);
623 
624  if (converted)
625  {
626  iso = store_coded_char(iso, converted);
627  continue;
628  }
629  }
630 
631  /* failed to translate this character */
633  (const char *) (utf - l), len);
634  }
635 
636  /* if we broke out of loop early, must be invalid input */
637  if (len > 0)
638  report_invalid_encoding(PG_UTF8, (const char *) utf, len);
639 
640  *iso = '\0';
641 }
bool pg_utf8_islegal(const unsigned char *source, int length)
Definition: wchar.c:1458
int errcode(int sqlerrcode)
Definition: elog.c:575
static unsigned char * store_coded_char(unsigned char *dest, uint32 code)
Definition: conv.c:323
#define ERROR
Definition: elog.h:43
void report_untranslatable_char(int src_encoding, int dest_encoding, const char *mbstr, int len)
Definition: wchar.c:2027
#define PG_VALID_ENCODING(_enc)
Definition: pg_wchar.h:301
static uint32 pg_mb_radix_conv(const pg_mb_radix_tree *rt, int l, unsigned char b1, unsigned char b2, unsigned char b3, unsigned char b4)
Definition: conv.c:343
unsigned int uint32
Definition: c.h:258
#define ereport(elevel, rest)
Definition: elog.h:122
static int compare3(const void *p1, const void *p2)
Definition: conv.c:290
static char * encoding
Definition: initdb.c:123
void report_invalid_encoding(int encoding, const char *mbstr, int len)
Definition: wchar.c:1995
int pg_utf_mblen(const unsigned char *s)
Definition: wchar.c:541
int errmsg(const char *fmt,...)
Definition: elog.c:797
#define elog
Definition: elog.h:219

Variable Documentation

const pg_enc2gettext pg_enc2gettext_tbl[]

Definition at line 359 of file encnames.c.