This graph shows which files directly or indirectly include this file:

Data Structures
struct	pg_enc2name

struct	pg_wchar_tbl

struct	pg_mb_radix_tree

struct	pg_utf_to_local_combined

struct	pg_local_to_utf_combined

Macros
#define	MAX_MULTIBYTE_CHAR_LEN 4

#define	SS2 0x8e /* single shift 2 (JIS0201) */

#define	SS3 0x8f /* single shift 3 (JIS0212) */

#define	ISSJISHEAD(c) (((c) >= 0x81 && (c) <= 0x9f) \|\| ((c) >= 0xe0 && (c) <= 0xfc))

#define	ISSJISTAIL(c) (((c) >= 0x40 && (c) <= 0x7e) \|\| ((c) >= 0x80 && (c) <= 0xfc))

#define	LC_ISO8859_1 0x81 /* ISO8859 Latin 1 */

#define	LC_ISO8859_2 0x82 /* ISO8859 Latin 2 */

#define	LC_ISO8859_3 0x83 /* ISO8859 Latin 3 */

#define	LC_ISO8859_4 0x84 /* ISO8859 Latin 4 */

#define	LC_TIS620 0x85 /* Thai (not supported yet) */

#define	LC_ISO8859_7 0x86 /* Greek (not supported yet) */

#define	LC_ISO8859_6 0x87 /* Arabic (not supported yet) */

#define	LC_ISO8859_8 0x88 /* Hebrew (not supported yet) */

#define	LC_JISX0201K 0x89 /* Japanese 1 byte kana */

#define	LC_JISX0201R 0x8a /* Japanese 1 byte Roman */

#define	LC_KOI8_R 0x8b /* Cyrillic KOI8-R */

#define	LC_ISO8859_5 0x8c /* ISO8859 Cyrillic */

#define	LC_ISO8859_9 0x8d /* ISO8859 Latin 5 (not supported yet) */

#define	LC_ISO8859_15 0x8e /* ISO8859 Latin 15 (not supported yet) */

#define	IS_LC1(c) ((unsigned char)(c) >= 0x81 && (unsigned char)(c) <= 0x8d)

#define	LC_JISX0208_1978 0x90 /* Japanese Kanji, old JIS (not supported) */

#define	LC_GB2312_80 0x91 /* Chinese */

#define	LC_JISX0208 0x92 /* Japanese Kanji (JIS X 0208) */

#define	LC_KS5601 0x93 /* Korean */

#define	LC_JISX0212 0x94 /* Japanese Kanji (JIS X 0212) */

#define	LC_CNS11643_1 0x95 /* CNS 11643-1992 Plane 1 */

#define	LC_CNS11643_2 0x96 /* CNS 11643-1992 Plane 2 */

#define	LC_JISX0213_1

#define	LC_BIG5_1

#define	LC_BIG5_2

#define	IS_LC2(c) ((unsigned char)(c) >= 0x90 && (unsigned char)(c) <= 0x99)

#define	LCPRV1_A 0x9a

#define	LCPRV1_B 0x9b

#define	IS_LCPRV1(c) ((unsigned char)(c) == LCPRV1_A \|\| (unsigned char)(c) == LCPRV1_B)

#define	IS_LCPRV1_A_RANGE(c) ((unsigned char)(c) >= 0xa0 && (unsigned char)(c) <= 0xdf)

#define	IS_LCPRV1_B_RANGE(c) ((unsigned char)(c) >= 0xe0 && (unsigned char)(c) <= 0xef)

#define	LCPRV2_A 0x9c

#define	LCPRV2_B 0x9d

#define	IS_LCPRV2(c) ((unsigned char)(c) == LCPRV2_A \|\| (unsigned char)(c) == LCPRV2_B)

#define	IS_LCPRV2_A_RANGE(c) ((unsigned char)(c) >= 0xf0 && (unsigned char)(c) <= 0xf4)

#define	IS_LCPRV2_B_RANGE(c) ((unsigned char)(c) >= 0xf5 && (unsigned char)(c) <= 0xfe)

#define	LC_SISHENG

#define	LC_IPA

#define	LC_VISCII_LOWER

#define	LC_VISCII_UPPER

#define	LC_ARABIC_DIGIT 0xa4 /* Arabic digit (not supported) */

#define	LC_ARABIC_1_COLUMN 0xa5 /* Arabic 1-column (not supported) */

#define	LC_ASCII_RIGHT_TO_LEFT

#define	LC_LAO

#define	LC_ARABIC_2_COLUMN 0xa8 /* Arabic 1-column (not supported) */

#define	LC_INDIAN_1_COLUMN

#define	LC_TIBETAN_1_COLUMN

#define	LC_UNICODE_SUBSET_2

#define	LC_UNICODE_SUBSET_3

#define	LC_UNICODE_SUBSET

#define	LC_ETHIOPIC 0xf5 /* Ethiopic characters (not supported) */

#define	LC_CNS11643_3 0xf6 /* CNS 11643-1992 Plane 3 */

#define	LC_CNS11643_4 0xf7 /* CNS 11643-1992 Plane 4 */

#define	LC_CNS11643_5 0xf8 /* CNS 11643-1992 Plane 5 */

#define	LC_CNS11643_6 0xf9 /* CNS 11643-1992 Plane 6 */

#define	LC_CNS11643_7 0xfa /* CNS 11643-1992 Plane 7 */

#define	LC_INDIAN_2_COLUMN

#define	LC_TIBETAN 0xfc /* Tibetan (not supported) */

#define	PG_ENCODING_BE_LAST PG_KOI8U

#define	PG_VALID_BE_ENCODING(_enc) ((_enc) >= 0 && (_enc) <= PG_ENCODING_BE_LAST)

#define	PG_ENCODING_IS_CLIENT_ONLY(_enc) ((_enc) > PG_ENCODING_BE_LAST && (_enc) < _PG_LAST_ENCODING_)

#define	PG_VALID_ENCODING(_enc) ((_enc) >= 0 && (_enc) < _PG_LAST_ENCODING_)

#define	PG_VALID_FE_ENCODING(_enc) PG_VALID_ENCODING(_enc)

#define	MAX_CONVERSION_GROWTH 4

#define	MAX_CONVERSION_INPUT_LENGTH 16

#define	MAX_UNICODE_EQUIVALENT_STRING 16

#define	CHECK_ENCODING_CONVERSION_ARGS(srcencoding, destencoding)

#define	pg_char_to_encoding pg_char_to_encoding_private

#define	pg_encoding_to_char pg_encoding_to_char_private

#define	pg_valid_server_encoding pg_valid_server_encoding_private

#define	pg_valid_server_encoding_id pg_valid_server_encoding_id_private

#define	pg_utf_mblen pg_utf_mblen_private

Typedefs
typedef unsigned int	pg_wchar

typedef enum pg_enc	pg_enc

typedef struct pg_enc2name	pg_enc2name

typedef int(*	mb2wchar_with_len_converter) (const unsigned char from, pg_wchar to, int len)

typedef int(*	wchar2mb_with_len_converter) (const pg_wchar from, unsigned char to, int len)

typedef int(*	mblen_converter) (const unsigned char *mbstr)

typedef int(*	mbdisplaylen_converter) (const unsigned char *mbstr)

typedef bool(*	mbcharacter_incrementer) (unsigned char *mbstr, int len)

typedef int(*	mbchar_verifier) (const unsigned char *mbstr, int len)

typedef int(*	mbstr_verifier) (const unsigned char *mbstr, int len)

typedef uint32(*	utf_local_conversion_func) (uint32 code)

Enumerations
enum	pg_enc { PG_SQL_ASCII = 0 , PG_EUC_JP , PG_EUC_CN , PG_EUC_KR , PG_EUC_TW , PG_EUC_JIS_2004 , PG_UTF8 , PG_MULE_INTERNAL , PG_LATIN1 , PG_LATIN2 , PG_LATIN3 , PG_LATIN4 , PG_LATIN5 , PG_LATIN6 , PG_LATIN7 , PG_LATIN8 , PG_LATIN9 , PG_LATIN10 , PG_WIN1256 , PG_WIN1258 , PG_WIN866 , PG_WIN874 , PG_KOI8R , PG_WIN1251 , PG_WIN1252 , PG_ISO_8859_5 , PG_ISO_8859_6 , PG_ISO_8859_7 , PG_ISO_8859_8 , PG_WIN1250 , PG_WIN1253 , PG_WIN1254 , PG_WIN1255 , PG_WIN1257 , PG_KOI8U , PG_SJIS , PG_BIG5 , PG_GBK , PG_UHC , PG_GB18030 , PG_JOHAB , PG_SHIFT_JIS_2004 , _PG_LAST_ENCODING_ }

Functions
static bool	is_valid_unicode_codepoint (pg_wchar c)

static bool	is_utf16_surrogate_first (pg_wchar c)

static bool	is_utf16_surrogate_second (pg_wchar c)

static pg_wchar	surrogate_pair_to_codepoint (pg_wchar first, pg_wchar second)

static pg_wchar	utf8_to_unicode (const unsigned char *c)

static unsigned char *	unicode_to_utf8 (pg_wchar c, unsigned char *utf8string)

static int	unicode_utf8len (pg_wchar c)

int	pg_char_to_encoding (const char *name)

const char *	pg_encoding_to_char (int encoding)

int	pg_valid_server_encoding_id (int encoding)

void	pg_encoding_set_invalid (int encoding, char *dst)

int	pg_encoding_mblen (int encoding, const char *mbstr)

int	pg_encoding_mblen_or_incomplete (int encoding, const char *mbstr, size_t remaining)

int	pg_encoding_mblen_bounded (int encoding, const char *mbstr)

int	pg_encoding_dsplen (int encoding, const char *mbstr)

int	pg_encoding_verifymbchar (int encoding, const char *mbstr, int len)

int	pg_encoding_verifymbstr (int encoding, const char *mbstr, int len)

int	pg_encoding_max_length (int encoding)

int	pg_valid_client_encoding (const char *name)

int	pg_valid_server_encoding (const char *name)

bool	is_encoding_supported_by_icu (int encoding)

const char *	get_encoding_name_for_icu (int encoding)

bool	pg_utf8_islegal (const unsigned char *source, int length)

int	pg_utf_mblen (const unsigned char *s)

int	pg_mule_mblen (const unsigned char *s)

int	pg_mb2wchar (const char from, pg_wchar to)

int	pg_mb2wchar_with_len (const char from, pg_wchar to, int len)

int	pg_encoding_mb2wchar_with_len (int encoding, const char from, pg_wchar to, int len)

int	pg_wchar2mb (const pg_wchar from, char to)

int	pg_wchar2mb_with_len (const pg_wchar from, char to, int len)

int	pg_encoding_wchar2mb_with_len (int encoding, const pg_wchar from, char to, int len)

int	pg_char_and_wchar_strcmp (const char s1, const pg_wchar s2)

int	pg_wchar_strncmp (const pg_wchar s1, const pg_wchar s2, size_t n)

int	pg_char_and_wchar_strncmp (const char s1, const pg_wchar s2, size_t n)

size_t	pg_wchar_strlen (const pg_wchar *str)

int	pg_mblen (const char *mbstr)

int	pg_dsplen (const char *mbstr)

int	pg_mbstrlen (const char *mbstr)

int	pg_mbstrlen_with_len (const char *mbstr, int limit)

int	pg_mbcliplen (const char *mbstr, int len, int limit)

int	pg_encoding_mbcliplen (int encoding, const char *mbstr, int len, int limit)

int	pg_mbcharcliplen (const char *mbstr, int len, int limit)

int	pg_database_encoding_max_length (void)

mbcharacter_incrementer	pg_database_encoding_character_incrementer (void)

int	PrepareClientEncoding (int encoding)

int	SetClientEncoding (int encoding)

void	InitializeClientEncoding (void)

int	pg_get_client_encoding (void)

const char *	pg_get_client_encoding_name (void)

void	SetDatabaseEncoding (int encoding)

int	GetDatabaseEncoding (void)

const char *	GetDatabaseEncodingName (void)

void	SetMessageEncoding (int encoding)

int	GetMessageEncoding (void)

unsigned char *	pg_do_encoding_conversion (unsigned char *src, int len, int src_encoding, int dest_encoding)

int	pg_do_encoding_conversion_buf (Oid proc, int src_encoding, int dest_encoding, unsigned char src, int srclen, unsigned char dest, int destlen, bool noError)

char *	pg_client_to_server (const char *s, int len)

char *	pg_server_to_client (const char *s, int len)

char *	pg_any_to_server (const char *s, int len, int encoding)

char *	pg_server_to_any (const char *s, int len, int encoding)

void	pg_unicode_to_server (pg_wchar c, unsigned char *s)

bool	pg_unicode_to_server_noerror (pg_wchar c, unsigned char *s)

unsigned short	BIG5toCNS (unsigned short big5, unsigned char *lc)

unsigned short	CNStoBIG5 (unsigned short cns, unsigned char lc)

int	UtfToLocal (const unsigned char utf, int len, unsigned char iso, const pg_mb_radix_tree map, const pg_utf_to_local_combined cmap, int cmapsize, utf_local_conversion_func conv_func, int encoding, bool noError)

int	LocalToUtf (const unsigned char iso, int len, unsigned char utf, const pg_mb_radix_tree map, const pg_local_to_utf_combined cmap, int cmapsize, utf_local_conversion_func conv_func, int encoding, bool noError)

bool	pg_verifymbstr (const char *mbstr, int len, bool noError)

bool	pg_verify_mbstr (int encoding, const char *mbstr, int len, bool noError)

int	pg_verify_mbstr_len (int encoding, const char *mbstr, int len, bool noError)

void	check_encoding_conversion_args (int src_encoding, int dest_encoding, int len, int expected_src_encoding, int expected_dest_encoding)

pg_noreturn void	report_invalid_encoding (int encoding, const char *mbstr, int len)

pg_noreturn void	report_untranslatable_char (int src_encoding, int dest_encoding, const char *mbstr, int len)

int	local2local (const unsigned char l, unsigned char p, int len, int src_encoding, int dest_encoding, const unsigned char *tab, bool noError)

int	latin2mic (const unsigned char l, unsigned char p, int len, int lc, int encoding, bool noError)

int	mic2latin (const unsigned char mic, unsigned char p, int len, int lc, int encoding, bool noError)

int	latin2mic_with_table (const unsigned char l, unsigned char p, int len, int lc, int encoding, const unsigned char *tab, bool noError)

int	mic2latin_with_table (const unsigned char mic, unsigned char p, int len, int lc, int encoding, const unsigned char *tab, bool noError)

Variables
PGDLLIMPORT const pg_enc2name	pg_enc2name_tbl []

PGDLLIMPORT const char *	pg_enc2gettext_tbl []

PGDLLIMPORT const pg_wchar_tbl	pg_wchar_table []

Macro Definition Documentation

◆ CHECK_ENCODING_CONVERSION_ARGS

#define CHECK_ENCODING_CONVERSION_ARGS	(	srcencoding,
		destencoding
	)

Value:

    check_encoding_conversion_args(PG_GETARG_INT32(0), \
                                   PG_GETARG_INT32(1), \
                                   PG_GETARG_INT32(4), \
                                   (srcencoding), \
                                   (destencoding))

Definition at line 507 of file pg_wchar.h.

◆ IS_LC1

#define IS_LC1 ( c ) ((unsigned char)(c) >= 0x81 && (unsigned char)(c) <= 0x8d)

Definition at line 126 of file pg_wchar.h.

◆ IS_LC2

#define IS_LC2 ( c ) ((unsigned char)(c) >= 0x90 && (unsigned char)(c) <= 0x99)

Definition at line 144 of file pg_wchar.h.

◆ IS_LCPRV1

#define IS_LCPRV1 ( c ) ((unsigned char)(c) == LCPRV1_A || (unsigned char)(c) == LCPRV1_B)

Definition at line 152 of file pg_wchar.h.

◆ IS_LCPRV1_A_RANGE

#define IS_LCPRV1_A_RANGE ( c ) ((unsigned char)(c) >= 0xa0 && (unsigned char)(c) <= 0xdf)

Definition at line 153 of file pg_wchar.h.

◆ IS_LCPRV1_B_RANGE

#define IS_LCPRV1_B_RANGE ( c ) ((unsigned char)(c) >= 0xe0 && (unsigned char)(c) <= 0xef)

Definition at line 155 of file pg_wchar.h.

◆ IS_LCPRV2

#define IS_LCPRV2 ( c ) ((unsigned char)(c) == LCPRV2_A || (unsigned char)(c) == LCPRV2_B)

Definition at line 164 of file pg_wchar.h.

◆ IS_LCPRV2_A_RANGE

#define IS_LCPRV2_A_RANGE ( c ) ((unsigned char)(c) >= 0xf0 && (unsigned char)(c) <= 0xf4)

Definition at line 165 of file pg_wchar.h.

◆ IS_LCPRV2_B_RANGE

#define IS_LCPRV2_B_RANGE ( c ) ((unsigned char)(c) >= 0xf5 && (unsigned char)(c) <= 0xfe)

Definition at line 167 of file pg_wchar.h.

◆ ISSJISHEAD

#define ISSJISHEAD ( c ) (((c) >= 0x81 && (c) <= 0x9f) || ((c) >= 0xe0 && (c) <= 0xfc))

Definition at line 44 of file pg_wchar.h.

◆ ISSJISTAIL

#define ISSJISTAIL ( c ) (((c) >= 0x40 && (c) <= 0x7e) || ((c) >= 0x80 && (c) <= 0xfc))

Definition at line 45 of file pg_wchar.h.

◆ LC_ARABIC_1_COLUMN

#define LC_ARABIC_1_COLUMN 0xa5 /* Arabic 1-column (not supported) */

Definition at line 178 of file pg_wchar.h.

◆ LC_ARABIC_2_COLUMN

#define LC_ARABIC_2_COLUMN 0xa8 /* Arabic 1-column (not supported) */

Definition at line 181 of file pg_wchar.h.

◆ LC_ARABIC_DIGIT

#define LC_ARABIC_DIGIT 0xa4 /* Arabic digit (not supported) */

Definition at line 177 of file pg_wchar.h.

◆ LC_ASCII_RIGHT_TO_LEFT

#define LC_ASCII_RIGHT_TO_LEFT

Value:

                                         0xa6   /* ASCII (left half of ISO8859-1) with
                                         * right-to-left direction (not
                                         * supported) */

Definition at line 179 of file pg_wchar.h.

◆ LC_BIG5_1

#define LC_BIG5_1

Value:

0x98 /* Plane 1 Chinese traditional (not

* supported) */

Definition at line 140 of file pg_wchar.h.

◆ LC_BIG5_2

#define LC_BIG5_2

Value:

0x99 /* Plane 1 Chinese traditional (not

* supported) */

Definition at line 141 of file pg_wchar.h.

◆ LC_CNS11643_1

#define LC_CNS11643_1 0x95 /* CNS 11643-1992 Plane 1 */

Definition at line 137 of file pg_wchar.h.

◆ LC_CNS11643_2

#define LC_CNS11643_2 0x96 /* CNS 11643-1992 Plane 2 */

Definition at line 138 of file pg_wchar.h.

◆ LC_CNS11643_3

#define LC_CNS11643_3 0xf6 /* CNS 11643-1992 Plane 3 */

Definition at line 192 of file pg_wchar.h.

◆ LC_CNS11643_4

#define LC_CNS11643_4 0xf7 /* CNS 11643-1992 Plane 4 */

Definition at line 193 of file pg_wchar.h.

◆ LC_CNS11643_5

#define LC_CNS11643_5 0xf8 /* CNS 11643-1992 Plane 5 */

Definition at line 194 of file pg_wchar.h.

◆ LC_CNS11643_6

#define LC_CNS11643_6 0xf9 /* CNS 11643-1992 Plane 6 */

Definition at line 195 of file pg_wchar.h.

◆ LC_CNS11643_7

#define LC_CNS11643_7 0xfa /* CNS 11643-1992 Plane 7 */

Definition at line 196 of file pg_wchar.h.

◆ LC_ETHIOPIC

#define LC_ETHIOPIC 0xf5 /* Ethiopic characters (not supported) */

Definition at line 191 of file pg_wchar.h.

◆ LC_GB2312_80

#define LC_GB2312_80 0x91 /* Chinese */

Definition at line 133 of file pg_wchar.h.

◆ LC_INDIAN_1_COLUMN

#define LC_INDIAN_1_COLUMN

Value:

0xf0 /* Indian charset for 1-column width

* glyphs (not supported) */

Definition at line 186 of file pg_wchar.h.

◆ LC_INDIAN_2_COLUMN

#define LC_INDIAN_2_COLUMN

Value:

0xfb /* Indian charset for 2-column width

* glyphs (not supported) */

Definition at line 197 of file pg_wchar.h.

◆ LC_IPA

#define LC_IPA

Value:

0xa1 /* IPA (International Phonetic

* Association) (not supported) */

Definition at line 174 of file pg_wchar.h.

◆ LC_ISO8859_1

#define LC_ISO8859_1 0x81 /* ISO8859 Latin 1 */

Definition at line 105 of file pg_wchar.h.

◆ LC_ISO8859_15

#define LC_ISO8859_15 0x8e /* ISO8859 Latin 15 (not supported yet) */

Definition at line 122 of file pg_wchar.h.

◆ LC_ISO8859_2

#define LC_ISO8859_2 0x82 /* ISO8859 Latin 2 */

Definition at line 106 of file pg_wchar.h.

◆ LC_ISO8859_3

#define LC_ISO8859_3 0x83 /* ISO8859 Latin 3 */

Definition at line 107 of file pg_wchar.h.

◆ LC_ISO8859_4

#define LC_ISO8859_4 0x84 /* ISO8859 Latin 4 */

Definition at line 108 of file pg_wchar.h.

◆ LC_ISO8859_5

#define LC_ISO8859_5 0x8c /* ISO8859 Cyrillic */

Definition at line 120 of file pg_wchar.h.

◆ LC_ISO8859_6

#define LC_ISO8859_6 0x87 /* Arabic (not supported yet) */

Definition at line 111 of file pg_wchar.h.

◆ LC_ISO8859_7

#define LC_ISO8859_7 0x86 /* Greek (not supported yet) */

Definition at line 110 of file pg_wchar.h.

◆ LC_ISO8859_8

#define LC_ISO8859_8 0x88 /* Hebrew (not supported yet) */

Definition at line 112 of file pg_wchar.h.

◆ LC_ISO8859_9

#define LC_ISO8859_9 0x8d /* ISO8859 Latin 5 (not supported yet) */

Definition at line 121 of file pg_wchar.h.

◆ LC_JISX0201K

#define LC_JISX0201K 0x89 /* Japanese 1 byte kana */

Definition at line 113 of file pg_wchar.h.

◆ LC_JISX0201R

#define LC_JISX0201R 0x8a /* Japanese 1 byte Roman */

Definition at line 114 of file pg_wchar.h.

◆ LC_JISX0208

#define LC_JISX0208 0x92 /* Japanese Kanji (JIS X 0208) */

Definition at line 134 of file pg_wchar.h.

◆ LC_JISX0208_1978

#define LC_JISX0208_1978 0x90 /* Japanese Kanji, old JIS (not supported) */

Definition at line 132 of file pg_wchar.h.

◆ LC_JISX0212

#define LC_JISX0212 0x94 /* Japanese Kanji (JIS X 0212) */

Definition at line 136 of file pg_wchar.h.

◆ LC_JISX0213_1

#define LC_JISX0213_1

Value:

0x97 /* Japanese Kanji (JIS X 0213 Plane 1)

* (not supported) */

Definition at line 139 of file pg_wchar.h.

◆ LC_KOI8_R

#define LC_KOI8_R 0x8b /* Cyrillic KOI8-R */

Definition at line 119 of file pg_wchar.h.

◆ LC_KS5601

#define LC_KS5601 0x93 /* Korean */

Definition at line 135 of file pg_wchar.h.

◆ LC_LAO

#define LC_LAO

Value:

0xa7 /* Lao characters (ISO10646 0E80..0EDF)

* (not supported) */

Definition at line 180 of file pg_wchar.h.

◆ LC_SISHENG

#define LC_SISHENG

Value:

0xa0 /* Chinese SiSheng characters for

* PinYin/ZhuYin (not supported) */

Definition at line 173 of file pg_wchar.h.

◆ LC_TIBETAN

#define LC_TIBETAN 0xfc /* Tibetan (not supported) */

Definition at line 198 of file pg_wchar.h.

◆ LC_TIBETAN_1_COLUMN

#define LC_TIBETAN_1_COLUMN

Value:

0xf1 /* Tibetan 1-column width glyphs (not

* supported) */

Definition at line 187 of file pg_wchar.h.

◆ LC_TIS620

#define LC_TIS620 0x85 /* Thai (not supported yet) */

Definition at line 109 of file pg_wchar.h.

◆ LC_UNICODE_SUBSET

#define LC_UNICODE_SUBSET

Value:

0xf4 /* Unicode characters of the range

* U+0100..U+24FF. (not supported) */

Definition at line 190 of file pg_wchar.h.

◆ LC_UNICODE_SUBSET_2

#define LC_UNICODE_SUBSET_2

Value:

0xf2 /* Unicode characters of the range

* U+2500..U+33FF. (not supported) */

Definition at line 188 of file pg_wchar.h.

◆ LC_UNICODE_SUBSET_3

#define LC_UNICODE_SUBSET_3

Value:

0xf3 /* Unicode characters of the range

* U+E000..U+FFFF. (not supported) */

Definition at line 189 of file pg_wchar.h.

◆ LC_VISCII_LOWER

#define LC_VISCII_LOWER

Value:

0xa2 /* Vietnamese VISCII1.1 lower-case (not

* supported) */

Definition at line 175 of file pg_wchar.h.

◆ LC_VISCII_UPPER

#define LC_VISCII_UPPER

Value:

0xa3 /* Vietnamese VISCII1.1 upper-case (not

* supported) */

Definition at line 176 of file pg_wchar.h.

◆ LCPRV1_A

#define LCPRV1_A 0x9a

Definition at line 150 of file pg_wchar.h.

◆ LCPRV1_B

#define LCPRV1_B 0x9b

Definition at line 151 of file pg_wchar.h.

◆ LCPRV2_A

#define LCPRV2_A 0x9c

Definition at line 162 of file pg_wchar.h.

◆ LCPRV2_B

#define LCPRV2_B 0x9d

Definition at line 163 of file pg_wchar.h.

◆ MAX_CONVERSION_GROWTH

#define MAX_CONVERSION_GROWTH 4

Definition at line 302 of file pg_wchar.h.

◆ MAX_CONVERSION_INPUT_LENGTH

#define MAX_CONVERSION_INPUT_LENGTH 16

Definition at line 320 of file pg_wchar.h.

◆ MAX_MULTIBYTE_CHAR_LEN

#define MAX_MULTIBYTE_CHAR_LEN 4

Definition at line 33 of file pg_wchar.h.

◆ MAX_UNICODE_EQUIVALENT_STRING

#define MAX_UNICODE_EQUIVALENT_STRING 16

Definition at line 329 of file pg_wchar.h.

◆ pg_char_to_encoding

#define pg_char_to_encoding pg_char_to_encoding_private

Definition at line 629 of file pg_wchar.h.

◆ PG_ENCODING_BE_LAST

#define PG_ENCODING_BE_LAST PG_KOI8U

Definition at line 275 of file pg_wchar.h.

◆ PG_ENCODING_IS_CLIENT_ONLY

#define PG_ENCODING_IS_CLIENT_ONLY ( _enc ) ((_enc) > PG_ENCODING_BE_LAST && (_enc) < _PG_LAST_ENCODING_)

Definition at line 284 of file pg_wchar.h.

◆ pg_encoding_to_char

#define pg_encoding_to_char pg_encoding_to_char_private

Definition at line 630 of file pg_wchar.h.

◆ pg_utf_mblen

#define pg_utf_mblen pg_utf_mblen_private

Definition at line 633 of file pg_wchar.h.

◆ PG_VALID_BE_ENCODING

#define PG_VALID_BE_ENCODING ( _enc ) ((_enc) >= 0 && (_enc) <= PG_ENCODING_BE_LAST)

Definition at line 281 of file pg_wchar.h.

◆ PG_VALID_ENCODING

#define PG_VALID_ENCODING ( _enc ) ((_enc) >= 0 && (_enc) < _PG_LAST_ENCODING_)

Definition at line 287 of file pg_wchar.h.

◆ PG_VALID_FE_ENCODING

#define PG_VALID_FE_ENCODING ( _enc ) PG_VALID_ENCODING(_enc)

Definition at line 291 of file pg_wchar.h.

◆ pg_valid_server_encoding

#define pg_valid_server_encoding pg_valid_server_encoding_private

Definition at line 631 of file pg_wchar.h.

◆ pg_valid_server_encoding_id

#define pg_valid_server_encoding_id pg_valid_server_encoding_id_private

Definition at line 632 of file pg_wchar.h.

◆ SS2

#define SS2 0x8e /* single shift 2 (JIS0201) */

Definition at line 38 of file pg_wchar.h.

◆ SS3

#define SS3 0x8f /* single shift 3 (JIS0212) */

Definition at line 39 of file pg_wchar.h.

Typedef Documentation

◆ mb2wchar_with_len_converter

typedef int(* mb2wchar_with_len_converter) (const unsigned char *from, pg_wchar *to, int len)

Definition at line 358 of file pg_wchar.h.

◆ mbchar_verifier

typedef int(* mbchar_verifier) (const unsigned char *mbstr, int len)

Definition at line 372 of file pg_wchar.h.

◆ mbcharacter_incrementer

typedef bool(* mbcharacter_incrementer) (unsigned char *mbstr, int len)

Definition at line 370 of file pg_wchar.h.

◆ mbdisplaylen_converter

typedef int(* mbdisplaylen_converter) (const unsigned char *mbstr)

Definition at line 368 of file pg_wchar.h.

◆ mblen_converter

typedef int(* mblen_converter) (const unsigned char *mbstr)

Definition at line 366 of file pg_wchar.h.

◆ mbstr_verifier

typedef int(* mbstr_verifier) (const unsigned char *mbstr, int len)

Definition at line 374 of file pg_wchar.h.

◆ pg_enc

typedef enum pg_enc pg_enc

◆ pg_enc2name

typedef struct pg_enc2name pg_enc2name

◆ pg_wchar

typedef unsigned int pg_wchar

Definition at line 28 of file pg_wchar.h.

◆ utf_local_conversion_func

typedef uint32(* utf_local_conversion_func) (uint32 code)

Definition at line 499 of file pg_wchar.h.

◆ wchar2mb_with_len_converter

typedef int(* wchar2mb_with_len_converter) (const pg_wchar *from, unsigned char *to, int len)

Definition at line 362 of file pg_wchar.h.

Enumeration Type Documentation

◆ pg_enc

enum pg_enc

Enumerator
PG_SQL_ASCII
PG_EUC_JP
PG_EUC_CN
PG_EUC_KR
PG_EUC_TW
PG_EUC_JIS_2004
PG_UTF8
PG_MULE_INTERNAL
PG_LATIN1
PG_LATIN2
PG_LATIN3
PG_LATIN4
PG_LATIN5
PG_LATIN6
PG_LATIN7
PG_LATIN8
PG_LATIN9
PG_LATIN10
PG_WIN1256
PG_WIN1258
PG_WIN866
PG_WIN874
PG_KOI8R
PG_WIN1251
PG_WIN1252
PG_ISO_8859_5
PG_ISO_8859_6
PG_ISO_8859_7
PG_ISO_8859_8
PG_WIN1250
PG_WIN1253
PG_WIN1254
PG_WIN1255
PG_WIN1257
PG_KOI8U
PG_SJIS
PG_BIG5
PG_GBK
PG_UHC
PG_GB18030
PG_JOHAB
PG_SHIFT_JIS_2004
_PG_LAST_ENCODING_

Definition at line 224 of file pg_wchar.h.

          : If you add some encoding don't forget to update
 *          the pg_enc2name_tbl[] array (in src/common/encnames.c),
 *          the pg_enc2gettext_tbl[] array (in src/common/encnames.c) and
 *          the pg_wchar_table[] array (in src/common/wchar.c) and to check
 *          PG_ENCODING_BE_LAST macro.
 *
 * PG_SQL_ASCII is default encoding and must be = 0.
 *
 * XXX  We must avoid renumbering any backend encoding until libpq's major
 * version number is increased beyond 5; it turns out that the backend
 * encoding IDs are effectively part of libpq's ABI as far as 8.2 initdb and
 * psql are concerned.
 */
typedef enum pg_enc
{
    PG_SQL_ASCII = 0,           /* SQL/ASCII */
    PG_EUC_JP,                  /* EUC for Japanese */
    PG_EUC_CN,                  /* EUC for Chinese */
    PG_EUC_KR,                  /* EUC for Korean */
    PG_EUC_TW,                  /* EUC for Taiwan */
    PG_EUC_JIS_2004,            /* EUC-JIS-2004 */
    PG_UTF8,                    /* Unicode UTF8 */
    PG_MULE_INTERNAL,           /* Mule internal code */
    PG_LATIN1,                  /* ISO-8859-1 Latin 1 */
    PG_LATIN2,                  /* ISO-8859-2 Latin 2 */
    PG_LATIN3,                  /* ISO-8859-3 Latin 3 */
    PG_LATIN4,                  /* ISO-8859-4 Latin 4 */
    PG_LATIN5,                  /* ISO-8859-9 Latin 5 */
    PG_LATIN6,                  /* ISO-8859-10 Latin6 */
    PG_LATIN7,                  /* ISO-8859-13 Latin7 */
    PG_LATIN8,                  /* ISO-8859-14 Latin8 */
    PG_LATIN9,                  /* ISO-8859-15 Latin9 */
    PG_LATIN10,                 /* ISO-8859-16 Latin10 */
    PG_WIN1256,                 /* windows-1256 */
    PG_WIN1258,                 /* Windows-1258 */
    PG_WIN866,                  /* (MS-DOS CP866) */
    PG_WIN874,                  /* windows-874 */
    PG_KOI8R,                   /* KOI8-R */
    PG_WIN1251,                 /* windows-1251 */
    PG_WIN1252,                 /* windows-1252 */
    PG_ISO_8859_5,              /* ISO-8859-5 */
    PG_ISO_8859_6,              /* ISO-8859-6 */
    PG_ISO_8859_7,              /* ISO-8859-7 */
    PG_ISO_8859_8,              /* ISO-8859-8 */
    PG_WIN1250,                 /* windows-1250 */
    PG_WIN1253,                 /* windows-1253 */
    PG_WIN1254,                 /* windows-1254 */

Function Documentation

◆ BIG5toCNS()

unsigned short BIG5toCNS	(	unsigned short	big5,
		unsigned char *	lc
	)

Definition at line 292 of file big5.c.

{
    unsigned short cns = 0;
    int         i;
 
    if (big5 < 0xc940U)
    {
        /* level 1 */
 
        for (i = 0; i < sizeof(b1c4) / (sizeof(unsigned short) * 2); i++)
        {
            if (b1c4[i][0] == big5)
            {
                *lc = LC_CNS11643_4;
                return (b1c4[i][1] | 0x8080U);
            }
        }
 
        if (0 < (cns = BinarySearchRange(big5Level1ToCnsPlane1, 23, big5)))
            *lc = LC_CNS11643_1;
    }
    else if (big5 == 0xc94aU)
    {
        /* level 2 */
        *lc = LC_CNS11643_1;
        cns = 0x4442;
    }
    else
    {
        /* level 2 */
        for (i = 0; i < sizeof(b2c3) / (sizeof(unsigned short) * 2); i++)
        {
            if (b2c3[i][0] == big5)
            {
                *lc = LC_CNS11643_3;
                return (b2c3[i][1] | 0x8080U);
            }
        }
 
        if (0 < (cns = BinarySearchRange(big5Level2ToCnsPlane2, 46, big5)))
            *lc = LC_CNS11643_2;
    }
 
    if (0 == cns)
    {                           /* no mapping Big5 to CNS 11643-1992 */
        *lc = 0;
        return (unsigned short) '?';
    }
 
    return cns | 0x8080;
}

References b1c4, b2c3, big5Level1ToCnsPlane1, big5Level2ToCnsPlane2, BinarySearchRange(), i, LC_CNS11643_1, LC_CNS11643_2, LC_CNS11643_3, and LC_CNS11643_4.

Referenced by big52euc_tw(), and big52mic().

◆ check_encoding_conversion_args()

void check_encoding_conversion_args	(	int	src_encoding,
		int	dest_encoding,
		int	len,
		int	expected_src_encoding,
		int	expected_dest_encoding
	)

Definition at line 1670 of file mbutils.c.

{
    if (!PG_VALID_ENCODING(src_encoding))
        elog(ERROR, "invalid source encoding ID: %d", src_encoding);
    if (src_encoding != expected_src_encoding && expected_src_encoding >= 0)
        elog(ERROR, "expected source encoding \"%s\", but got \"%s\"",
             pg_enc2name_tbl[expected_src_encoding].name,
             pg_enc2name_tbl[src_encoding].name);
    if (!PG_VALID_ENCODING(dest_encoding))
        elog(ERROR, "invalid destination encoding ID: %d", dest_encoding);
    if (dest_encoding != expected_dest_encoding && expected_dest_encoding >= 0)
        elog(ERROR, "expected destination encoding \"%s\", but got \"%s\"",
             pg_enc2name_tbl[expected_dest_encoding].name,
             pg_enc2name_tbl[dest_encoding].name);
    if (len < 0)
        elog(ERROR, "encoding conversion length must not be negative");
}

References elog, ERROR, len, name, pg_enc2name_tbl, and PG_VALID_ENCODING.

◆ CNStoBIG5()

unsigned short CNStoBIG5	(	unsigned short	cns,
		unsigned char	lc
	)

Definition at line 345 of file big5.c.

{
    int         i;
    unsigned int big5 = 0;
 
    cns &= 0x7f7f;
 
    switch (lc)
    {
        case LC_CNS11643_1:
            big5 = BinarySearchRange(cnsPlane1ToBig5Level1, 24, cns);
            break;
        case LC_CNS11643_2:
            big5 = BinarySearchRange(cnsPlane2ToBig5Level2, 47, cns);
            break;
        case LC_CNS11643_3:
            for (i = 0; i < sizeof(b2c3) / (sizeof(unsigned short) * 2); i++)
            {
                if (b2c3[i][1] == cns)
                    return b2c3[i][0];
            }
            break;
        case LC_CNS11643_4:
            for (i = 0; i < sizeof(b1c4) / (sizeof(unsigned short) * 2); i++)
            {
                if (b1c4[i][1] == cns)
                    return b1c4[i][0];
            }
        default:
            break;
    }
    return big5;
}

References b1c4, b2c3, BinarySearchRange(), cnsPlane1ToBig5Level1, cnsPlane2ToBig5Level2, i, LC_CNS11643_1, LC_CNS11643_2, LC_CNS11643_3, and LC_CNS11643_4.

Referenced by euc_tw2big5(), and mic2big5().

◆ get_encoding_name_for_icu()

const char * get_encoding_name_for_icu ( int encoding )

Definition at line 472 of file encnames.c.

{
    if (!PG_VALID_BE_ENCODING(encoding))
        return NULL;
    return pg_enc2icu_tbl[encoding];
}

References encoding, pg_enc2icu_tbl, and PG_VALID_BE_ENCODING.

◆ GetDatabaseEncoding()

int GetDatabaseEncoding ( void )

Definition at line 1262 of file mbutils.c.

{
    return DatabaseEncoding->encoding;
}

References DatabaseEncoding, and pg_enc2name::encoding.

Referenced by ascii(), BeginCopyFrom(), BeginCopyTo(), char2wchar(), chr(), CollationCreate(), CollationGetCollid(), compareStrings(), convert_from_utf8(), convert_to_utf8(), CopyConversionError(), CopyConvertBuf(), create_pg_locale_builtin(), create_pg_locale_icu(), create_pg_locale_libc(), cstr2sv(), dblink_connect(), dblink_get_conn(), DefineCollation(), Generic_Text_IC_like(), GenericMatchText(), get_collation_oid(), get_json_object_as_hash(), InitializeClientEncoding(), IsThereCollationInNamespace(), json_recv(), jsonb_from_cstring(), locate_stem_module(), LogicalOutputWrite(), makeJsonLexContext(), p_isspecial(), ParallelWorkerMain(), pg_database_encoding_character_incrementer(), pg_database_encoding_max_length(), pg_generic_charinc(), pg_perm_setlocale(), pg_set_regex_collation(), pg_unicode_to_server(), pg_unicode_to_server_noerror(), pg_verifymbstr(), pgss_store(), PLyUnicode_Bytes(), populate_array_json(), PrepareClientEncoding(), read_extension_script_file(), SetClientEncoding(), str_casefold(), sv2cstr(), text_position_setup(), to_ascii_default(), type_maximum_size(), unicode_assigned(), unicode_norm_form_from_string(), wchar2char(), xml_in(), xml_is_document(), xmlparse(), and xmltotext_with_options().

◆ GetDatabaseEncodingName()

const char * GetDatabaseEncodingName ( void )

Definition at line 1268 of file mbutils.c.

{
    return DatabaseEncoding->name;
}

References DatabaseEncoding, and pg_enc2name::name.

Referenced by check_client_encoding(), CheckMyDatabase(), connect_pg_server(), dblink_connect(), dblink_get_conn(), get_collation_oid(), InitializeClientEncoding(), IsThereCollationInNamespace(), json_errdetail(), libpqrcv_connect(), locate_stem_module(), pg_unicode_to_server(), ProcessConfigFileInternal(), and regcollationin().

◆ GetMessageEncoding()

int GetMessageEncoding ( void )

Definition at line 1309 of file mbutils.c.

{
    return MessageEncoding->encoding;
}

References pg_enc2name::encoding, and MessageEncoding.

◆ InitializeClientEncoding()

void InitializeClientEncoding ( void )

Definition at line 282 of file mbutils.c.

{
    int         current_server_encoding;
 
    Assert(!backend_startup_complete);
    backend_startup_complete = true;
 
    if (PrepareClientEncoding(pending_client_encoding) < 0 ||
        SetClientEncoding(pending_client_encoding) < 0)
    {
        /*
         * Oops, the requested conversion is not available. We couldn't fail
         * before, but we can now.
         */
        ereport(FATAL,
                (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
                 errmsg("conversion between %s and %s is not supported",
                        pg_enc2name_tbl[pending_client_encoding].name,
                        GetDatabaseEncodingName())));
    }
 
    /*
     * Also look up the UTF8-to-server conversion function if needed.  Since
     * the server encoding is fixed within any one backend process, we don't
     * have to do this more than once.
     */
    current_server_encoding = GetDatabaseEncoding();
    if (current_server_encoding != PG_UTF8 &&
        current_server_encoding != PG_SQL_ASCII)
    {
        Oid         utf8_to_server_proc;
 
        AssertCouldGetRelation();
        utf8_to_server_proc =
            FindDefaultConversionProc(PG_UTF8,
                                      current_server_encoding);
        /* If there's no such conversion, just leave the pointer as NULL */
        if (OidIsValid(utf8_to_server_proc))
        {
            FmgrInfo   *finfo;
 
            finfo = (FmgrInfo *) MemoryContextAlloc(TopMemoryContext,
                                                    sizeof(FmgrInfo));
            fmgr_info_cxt(utf8_to_server_proc, finfo,
                          TopMemoryContext);
            /* Set Utf8ToServerConvProc only after data is fully valid */
            Utf8ToServerConvProc = finfo;
        }
    }
}

References Assert(), AssertCouldGetRelation(), backend_startup_complete, ereport, errcode(), errmsg(), FATAL, FindDefaultConversionProc(), fmgr_info_cxt(), GetDatabaseEncoding(), GetDatabaseEncodingName(), MemoryContextAlloc(), name, OidIsValid, pending_client_encoding, pg_enc2name_tbl, PG_SQL_ASCII, PG_UTF8, PrepareClientEncoding(), SetClientEncoding(), TopMemoryContext, and Utf8ToServerConvProc.

Referenced by InitPostgres().

◆ is_encoding_supported_by_icu()

bool is_encoding_supported_by_icu ( int encoding )

Definition at line 461 of file encnames.c.

{
    if (!PG_VALID_BE_ENCODING(encoding))
        return false;
    return (pg_enc2icu_tbl[encoding] != NULL);
}

References encoding, pg_enc2icu_tbl, and PG_VALID_BE_ENCODING.

Referenced by check_icu_locale_encoding(), createdb(), DefineCollation(), and lookup_collation().

◆ is_utf16_surrogate_first()

static bool is_utf16_surrogate_first ( pg_wchar c )

inlinestatic

Definition at line 525 of file pg_wchar.h.

536{

Referenced by addUnicode(), json_lex_string(), str_udeescape(), and unistr().

◆ is_utf16_surrogate_second()

static bool is_utf16_surrogate_second ( pg_wchar c )

inlinestatic

Definition at line 531 of file pg_wchar.h.

536{

Referenced by addUnicode(), json_lex_string(), str_udeescape(), and unistr().

◆ is_valid_unicode_codepoint()

static bool is_valid_unicode_codepoint ( pg_wchar c )

inlinestatic

Definition at line 519 of file pg_wchar.h.

536{

Referenced by addunicode(), check_unicode_value(), pg_unicode_to_server(), pg_unicode_to_server_noerror(), and unistr().

◆ latin2mic()

int latin2mic	(	const unsigned char *	l,
		unsigned char *	p,
		int	len,
		int	lc,
		int	encoding,
		bool	noError
	)

Definition at line 89 of file conv.c.

{
    const unsigned char *start = l;
    int         c1;
 
    while (len > 0)
    {
        c1 = *l;
        if (c1 == 0)
        {
            if (noError)
                break;
            report_invalid_encoding(encoding, (const char *) l, len);
        }
        if (IS_HIGHBIT_SET(c1))
            *p++ = lc;
        *p++ = c1;
        l++;
        len--;
    }
    *p = '\0';
 
    return l - start;
}

References encoding, IS_HIGHBIT_SET, len, report_invalid_encoding(), and start.

Referenced by koi8r_to_mic(), latin1_to_mic(), latin2_to_mic(), latin3_to_mic(), and latin4_to_mic().

◆ latin2mic_with_table()

int latin2mic_with_table	(	const unsigned char *	l,
		unsigned char *	p,
		int	len,
		int	lc,
		int	encoding,
		const unsigned char *	tab,
		bool	noError
	)

Definition at line 194 of file conv.c.

{
    const unsigned char *start = l;
    unsigned char c1,
                c2;
 
    while (len > 0)
    {
        c1 = *l;
        if (c1 == 0)
        {
            if (noError)
                break;
            report_invalid_encoding(encoding, (const char *) l, len);
        }
        if (!IS_HIGHBIT_SET(c1))
            *p++ = c1;
        else
        {
            c2 = tab[c1 - HIGHBIT];
            if (c2)
            {
                *p++ = lc;
                *p++ = c2;
            }
            else
            {
                if (noError)
                    break;
                report_untranslatable_char(encoding, PG_MULE_INTERNAL,
                                           (const char *) l, len);
            }
        }
        l++;
        len--;
    }
    *p = '\0';
 
    return l - start;
}

References encoding, HIGHBIT, IS_HIGHBIT_SET, len, PG_MULE_INTERNAL, report_invalid_encoding(), report_untranslatable_char(), and start.

Referenced by iso_to_mic(), win1250_to_mic(), win1251_to_mic(), and win866_to_mic().

◆ local2local()

int local2local	(	const unsigned char *	l,
		unsigned char *	p,
		int	len,
		int	src_encoding,
		int	dest_encoding,
		const unsigned char *	tab,
		bool	noError
	)

Definition at line 33 of file conv.c.

{
    const unsigned char *start = l;
    unsigned char c1,
                c2;
 
    while (len > 0)
    {
        c1 = *l;
        if (c1 == 0)
        {
            if (noError)
                break;
            report_invalid_encoding(src_encoding, (const char *) l, len);
        }
        if (!IS_HIGHBIT_SET(c1))
            *p++ = c1;
        else
        {
            c2 = tab[c1 - HIGHBIT];
            if (c2)
                *p++ = c2;
            else
            {
                if (noError)
                    break;
                report_untranslatable_char(src_encoding, dest_encoding,
                                           (const char *) l, len);
            }
        }
        l++;
        len--;
    }
    *p = '\0';
 
    return l - start;
}

References HIGHBIT, IS_HIGHBIT_SET, len, report_invalid_encoding(), report_untranslatable_char(), and start.

Referenced by iso_to_koi8r(), iso_to_win1251(), iso_to_win866(), koi8r_to_iso(), koi8r_to_win1251(), koi8r_to_win866(), latin2_to_win1250(), win1250_to_latin2(), win1251_to_iso(), win1251_to_koi8r(), win1251_to_win866(), win866_to_iso(), win866_to_koi8r(), and win866_to_win1251().

◆ LocalToUtf()

int LocalToUtf	(	const unsigned char *	iso,
		int	len,
		unsigned char *	utf,
		const pg_mb_radix_tree *	map,
		const pg_local_to_utf_combined *	cmap,
		int	cmapsize,
		utf_local_conversion_func	conv_func,
		int	encoding,
		bool	noError
	)

Definition at line 717 of file conv.c.

{
    uint32      iiso;
    int         l;
    const pg_local_to_utf_combined *cp;
    const unsigned char *start = iso;
 
    if (!PG_VALID_ENCODING(encoding))
        ereport(ERROR,
                (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
                 errmsg("invalid encoding number: %d", encoding)));
 
    for (; len > 0; len -= l)
    {
        unsigned char b1 = 0;
        unsigned char b2 = 0;
        unsigned char b3 = 0;
        unsigned char b4 = 0;
 
        /* "break" cases all represent errors */
        if (*iso == '\0')
            break;
 
        if (!IS_HIGHBIT_SET(*iso))
        {
            /* ASCII case is easy, assume it's one-to-one conversion */
            *utf++ = *iso++;
            l = 1;
            continue;
        }
 
        l = pg_encoding_verifymbchar(encoding, (const char *) iso, len);
        if (l < 0)
            break;
 
        /* collect coded char of length l */
        if (l == 1)
            b4 = *iso++;
        else if (l == 2)
        {
            b3 = *iso++;
            b4 = *iso++;
        }
        else if (l == 3)
        {
            b2 = *iso++;
            b3 = *iso++;
            b4 = *iso++;
        }
        else if (l == 4)
        {
            b1 = *iso++;
            b2 = *iso++;
            b3 = *iso++;
            b4 = *iso++;
        }
        else
        {
            elog(ERROR, "unsupported character length %d", l);
            iiso = 0;           /* keep compiler quiet */
        }
        iiso = (b1 << 24 | b2 << 16 | b3 << 8 | b4);
 
        if (map)
        {
            uint32      converted = pg_mb_radix_conv(map, l, b1, b2, b3, b4);
 
            if (converted)
            {
                utf = store_coded_char(utf, converted);
                continue;
            }
 
            /* If there's a combined character map, try that */
            if (cmap)
            {
                cp = bsearch(&iiso, cmap, cmapsize,
                             sizeof(pg_local_to_utf_combined), compare4);
 
                if (cp)
                {
                    utf = store_coded_char(utf, cp->utf1);
                    utf = store_coded_char(utf, cp->utf2);
                    continue;
                }
            }
        }
 
        /* if there's a conversion function, try that */
        if (conv_func)
        {
            uint32      converted = (*conv_func) (iiso);
 
            if (converted)
            {
                utf = store_coded_char(utf, converted);
                continue;
            }
        }
 
        /* failed to translate this character */
        iso -= l;
        if (noError)
            break;
        report_untranslatable_char(encoding, PG_UTF8,
                                   (const char *) iso, len);
    }
 
    /* if we broke out of loop early, must be invalid input */
    if (len > 0 && !noError)
        report_invalid_encoding(encoding, (const char *) iso, len);
 
    *utf = '\0';
 
    return iso - start;
}

References compare4(), elog, encoding, ereport, errcode(), errmsg(), ERROR, IS_HIGHBIT_SET, len, pg_encoding_verifymbchar(), pg_mb_radix_conv(), PG_UTF8, PG_VALID_ENCODING, report_invalid_encoding(), report_untranslatable_char(), start, store_coded_char(), pg_local_to_utf_combined::utf1, and pg_local_to_utf_combined::utf2.

Referenced by big5_to_utf8(), euc_cn_to_utf8(), euc_jis_2004_to_utf8(), euc_jp_to_utf8(), euc_kr_to_utf8(), euc_tw_to_utf8(), gb18030_to_utf8(), gbk_to_utf8(), iso8859_to_utf8(), johab_to_utf8(), koi8r_to_utf8(), koi8u_to_utf8(), shift_jis_2004_to_utf8(), sjis_to_utf8(), uhc_to_utf8(), and win_to_utf8().

◆ mic2latin()

int mic2latin	(	const unsigned char *	mic,
		unsigned char *	p,
		int	len,
		int	lc,
		int	encoding,
		bool	noError
	)

Definition at line 127 of file conv.c.

{
    const unsigned char *start = mic;
    int         c1;
 
    while (len > 0)
    {
        c1 = *mic;
        if (c1 == 0)
        {
            if (noError)
                break;
            report_invalid_encoding(PG_MULE_INTERNAL, (const char *) mic, len);
        }
        if (!IS_HIGHBIT_SET(c1))
        {
            /* easy for ASCII */
            *p++ = c1;
            mic++;
            len--;
        }
        else
        {
            int         l = pg_mule_mblen(mic);
 
            if (len < l)
            {
                if (noError)
                    break;
                report_invalid_encoding(PG_MULE_INTERNAL, (const char *) mic,
                                        len);
            }
            if (l != 2 || c1 != lc || !IS_HIGHBIT_SET(mic[1]))
            {
                if (noError)
                    break;
                report_untranslatable_char(PG_MULE_INTERNAL, encoding,
                                           (const char *) mic, len);
            }
            *p++ = mic[1];
            mic += 2;
            len -= 2;
        }
    }
    *p = '\0';
 
    return mic - start;
}

References encoding, IS_HIGHBIT_SET, len, PG_MULE_INTERNAL, pg_mule_mblen(), report_invalid_encoding(), report_untranslatable_char(), and start.

Referenced by mic_to_koi8r(), mic_to_latin1(), mic_to_latin2(), mic_to_latin3(), and mic_to_latin4().

◆ mic2latin_with_table()

int mic2latin_with_table	(	const unsigned char *	mic,
		unsigned char *	p,
		int	len,
		int	lc,
		int	encoding,
		const unsigned char *	tab,
		bool	noError
	)

Definition at line 257 of file conv.c.

{
    const unsigned char *start = mic;
    unsigned char c1,
                c2;
 
    while (len > 0)
    {
        c1 = *mic;
        if (c1 == 0)
        {
            if (noError)
                break;
            report_invalid_encoding(PG_MULE_INTERNAL, (const char *) mic, len);
        }
        if (!IS_HIGHBIT_SET(c1))
        {
            /* easy for ASCII */
            *p++ = c1;
            mic++;
            len--;
        }
        else
        {
            int         l = pg_mule_mblen(mic);
 
            if (len < l)
            {
                if (noError)
                    break;
                report_invalid_encoding(PG_MULE_INTERNAL, (const char *) mic,
                                        len);
            }
            if (l != 2 || c1 != lc || !IS_HIGHBIT_SET(mic[1]) ||
                (c2 = tab[mic[1] - HIGHBIT]) == 0)
            {
                if (noError)
                    break;
                report_untranslatable_char(PG_MULE_INTERNAL, encoding,
                                           (const char *) mic, len);
                break;          /* keep compiler quiet */
            }
            *p++ = c2;
            mic += 2;
            len -= 2;
        }
    }
    *p = '\0';
 
    return mic - start;
}

References encoding, HIGHBIT, IS_HIGHBIT_SET, len, PG_MULE_INTERNAL, pg_mule_mblen(), report_invalid_encoding(), report_untranslatable_char(), and start.

Referenced by mic_to_iso(), mic_to_win1250(), mic_to_win1251(), and mic_to_win866().

◆ pg_any_to_server()

char * pg_any_to_server	(	const char *	s,
		int	len,
		int	encoding
	)

Definition at line 677 of file mbutils.c.

{
    if (len <= 0)
        return unconstify(char *, s);   /* empty string is always valid */
 
    if (encoding == DatabaseEncoding->encoding ||
        encoding == PG_SQL_ASCII)
    {
        /*
         * No conversion is needed, but we must still validate the data.
         */
        (void) pg_verify_mbstr(DatabaseEncoding->encoding, s, len, false);
        return unconstify(char *, s);
    }
 
    if (DatabaseEncoding->encoding == PG_SQL_ASCII)
    {
        /*
         * No conversion is possible, but we must still validate the data,
         * because the client-side code might have done string escaping using
         * the selected client_encoding.  If the client encoding is ASCII-safe
         * then we just do a straight validation under that encoding.  For an
         * ASCII-unsafe encoding we have a problem: we dare not pass such data
         * to the parser but we have no way to convert it.  We compromise by
         * rejecting the data if it contains any non-ASCII characters.
         */
        if (PG_VALID_BE_ENCODING(encoding))
            (void) pg_verify_mbstr(encoding, s, len, false);
        else
        {
            int         i;
 
            for (i = 0; i < len; i++)
            {
                if (s[i] == '\0' || IS_HIGHBIT_SET(s[i]))
                    ereport(ERROR,
                            (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
                             errmsg("invalid byte value for encoding \"%s\": 0x%02x",
                                    pg_enc2name_tbl[PG_SQL_ASCII].name,
                                    (unsigned char) s[i])));
            }
        }
        return unconstify(char *, s);
    }
 
    /* Fast path if we can use cached conversion function */
    if (encoding == ClientEncoding->encoding)
        return perform_default_encoding_conversion(s, len, true);
 
    /* General case ... will not work outside transactions */
    return (char *) pg_do_encoding_conversion((unsigned char *) unconstify(char *, s),
                                              len,
                                              encoding,
                                              DatabaseEncoding->encoding);
}

References ClientEncoding, DatabaseEncoding, encoding, pg_enc2name::encoding, ereport, errcode(), errmsg(), ERROR, i, IS_HIGHBIT_SET, len, name, perform_default_encoding_conversion(), pg_do_encoding_conversion(), pg_enc2name_tbl, PG_SQL_ASCII, PG_VALID_BE_ENCODING, pg_verify_mbstr(), and unconstify.

Referenced by ASN1_STRING_to_text(), cache_single_string(), db_encoding_convert(), dsnowball_lexize(), pg_client_to_server(), pg_stat_statements_internal(), pgp_armor_headers(), PLyUnicode_Bytes(), read_extension_script_file(), tsearch_readline(), utf_u2e(), X509_NAME_to_cstring(), and xml_recv().

◆ pg_char_and_wchar_strcmp()

int pg_char_and_wchar_strcmp	(	const char *	s1,
		const pg_wchar *	s2
	)

Definition at line 41 of file wstrcmp.c.

{
    while ((pg_wchar) *s1 == *s2++)
        if (*s1++ == 0)
            return 0;
    return *(const unsigned char *) s1 - *(const pg_wchar *) (s2 - 1);
}

References s1, and s2.

◆ pg_char_and_wchar_strncmp()

int pg_char_and_wchar_strncmp	(	const char *	s1,
		const pg_wchar *	s2,
		size_t	n
	)

Definition at line 55 of file wstrncmp.c.

{
    if (n == 0)
        return 0;
    do
    {
        if ((pg_wchar) ((unsigned char) *s1) != *s2++)
            return ((pg_wchar) ((unsigned char) *s1) - *(s2 - 1));
        if (*s1++ == 0)
            break;
    } while (--n != 0);
    return 0;
}

References s1, and s2.

Referenced by element(), and lookupcclass().

◆ pg_char_to_encoding()

int pg_char_to_encoding ( const char * name )

Definition at line 549 of file encnames.c.

{
    unsigned int nel = lengthof(pg_encname_tbl);
    const pg_encname *base = pg_encname_tbl,
               *last = base + nel - 1,
               *position;
    int         result;
    char        buff[NAMEDATALEN],
               *key;
 
    if (name == NULL || *name == '\0')
        return -1;
 
    if (strlen(name) >= NAMEDATALEN)
        return -1;              /* it's certainly not in the table */
 
    key = clean_encoding_name(name, buff);
 
    while (last >= base)
    {
        position = base + ((last - base) >> 1);
        result = key[0] - position->name[0];
 
        if (result == 0)
        {
            result = strcmp(key, position->name);
            if (result == 0)
                return position->encoding;
        }
        if (result < 0)
            last = position - 1;
        else
            base = position + 1;
    }
    return -1;
}

◆ pg_client_to_server()

char * pg_client_to_server	(	const char *	s,
		int	len
	)

Definition at line 661 of file mbutils.c.

{
    return pg_any_to_server(s, len, ClientEncoding->encoding);
}

References ClientEncoding, pg_enc2name::encoding, len, and pg_any_to_server().

Referenced by exec_bind_message(), parse_fcall_arguments(), pq_getmsgstring(), and pq_getmsgtext().

◆ pg_database_encoding_character_incrementer()

mbcharacter_incrementer pg_database_encoding_character_incrementer ( void )

Definition at line 1524 of file mbutils.c.

{
    /*
     * Eventually it might be best to add a field to pg_wchar_table[], but for
     * now we just use a switch.
     */
    switch (GetDatabaseEncoding())
    {
        case PG_UTF8:
            return pg_utf8_increment;
 
        case PG_EUC_JP:
            return pg_eucjp_increment;
 
        default:
            return pg_generic_charinc;
    }
}

References GetDatabaseEncoding(), PG_EUC_JP, pg_eucjp_increment(), pg_generic_charinc(), PG_UTF8, and pg_utf8_increment().

Referenced by make_greater_string().

◆ pg_database_encoding_max_length()

int pg_database_encoding_max_length ( void )

Definition at line 1547 of file mbutils.c.

{
    return pg_wchar_table[GetDatabaseEncoding()].maxmblen;
}

References GetDatabaseEncoding(), pg_wchar_tbl::maxmblen, and pg_wchar_table.

Referenced by bpcharlen(), charlen_to_bytelen(), dotrim(), downcase_identifier(), gbt_bpchar_consistent(), gbt_text_compress(), gbt_text_consistent(), generate_trgm_only(), Generic_Text_IC_like(), GenericMatchText(), infix(), init_tsvector_parser(), like_escape(), like_fixed_prefix(), lpad(), pg_mbcharcliplen(), pg_mbstrlen(), pg_mbstrlen_with_len(), protect_out_of_mem(), regexp_fixed_prefix(), rpad(), setup_regexp_matches(), setup_test_matches(), show_trgm(), strlower_libc(), strlower_libc_mb(), strtitle_libc(), strtitle_libc_mb(), strupper_libc(), strupper_libc_mb(), text_length(), text_position_setup(), text_reverse(), text_substring(), TParserInit(), translate(), and tsvectorout().

◆ pg_do_encoding_conversion()

unsigned char * pg_do_encoding_conversion	(	unsigned char *	src,
		int	len,
		int	src_encoding,
		int	dest_encoding
	)

Definition at line 357 of file mbutils.c.

{
    unsigned char *result;
    Oid         proc;
 
    if (len <= 0)
        return src;             /* empty string is always valid */
 
    if (src_encoding == dest_encoding)
        return src;             /* no conversion required, assume valid */
 
    if (dest_encoding == PG_SQL_ASCII)
        return src;             /* any string is valid in SQL_ASCII */
 
    if (src_encoding == PG_SQL_ASCII)
    {
        /* No conversion is possible, but we must validate the result */
        (void) pg_verify_mbstr(dest_encoding, (const char *) src, len, false);
        return src;
    }
 
    if (!IsTransactionState())  /* shouldn't happen */
        elog(ERROR, "cannot perform encoding conversion outside a transaction");
 
    proc = FindDefaultConversionProc(src_encoding, dest_encoding);
    if (!OidIsValid(proc))
        ereport(ERROR,
                (errcode(ERRCODE_UNDEFINED_FUNCTION),
                 errmsg("default conversion function for encoding \"%s\" to \"%s\" does not exist",
                        pg_encoding_to_char(src_encoding),
                        pg_encoding_to_char(dest_encoding))));
 
    /*
     * Allocate space for conversion result, being wary of integer overflow.
     *
     * len * MAX_CONVERSION_GROWTH is typically a vast overestimate of the
     * required space, so it might exceed MaxAllocSize even though the result
     * would actually fit.  We do not want to hand back a result string that
     * exceeds MaxAllocSize, because callers might not cope gracefully --- but
     * if we just allocate more than that, and don't use it, that's fine.
     */
    if ((Size) len >= (MaxAllocHugeSize / (Size) MAX_CONVERSION_GROWTH))
        ereport(ERROR,
                (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
                 errmsg("out of memory"),
                 errdetail("String of %d bytes is too long for encoding conversion.",
                           len)));
 
    result = (unsigned char *)
        MemoryContextAllocHuge(CurrentMemoryContext,
                               (Size) len * MAX_CONVERSION_GROWTH + 1);
 
    (void) OidFunctionCall6(proc,
                            Int32GetDatum(src_encoding),
                            Int32GetDatum(dest_encoding),
                            CStringGetDatum((char *) src),
                            CStringGetDatum((char *) result),
                            Int32GetDatum(len),
                            BoolGetDatum(false));
 
    /*
     * If the result is large, it's worth repalloc'ing to release any extra
     * space we asked for.  The cutoff here is somewhat arbitrary, but we
     * *must* check when len * MAX_CONVERSION_GROWTH exceeds MaxAllocSize.
     */
    if (len > 1000000)
    {
        Size        resultlen = strlen((char *) result);
 
        if (resultlen >= MaxAllocSize)
            ereport(ERROR,
                    (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
                     errmsg("out of memory"),
                     errdetail("String of %d bytes is too long for encoding conversion.",
                               len)));
 
        result = (unsigned char *) repalloc(result, resultlen + 1);
    }
 
    return result;
}

References BoolGetDatum(), CStringGetDatum(), CurrentMemoryContext, elog, ereport, errcode(), errdetail(), errmsg(), ERROR, FindDefaultConversionProc(), Int32GetDatum(), IsTransactionState(), len, MAX_CONVERSION_GROWTH, MaxAllocHugeSize, MaxAllocSize, MemoryContextAllocHuge(), OidFunctionCall6, OidIsValid, pg_encoding_to_char, PG_SQL_ASCII, pg_verify_mbstr(), and repalloc().

Referenced by convert_charset(), pg_any_to_server(), pg_convert(), and pg_server_to_any().

◆ pg_do_encoding_conversion_buf()

int pg_do_encoding_conversion_buf	(	Oid	proc,
		int	src_encoding,
		int	dest_encoding,
		unsigned char *	src,
		int	srclen,
		unsigned char *	dest,
		int	destlen,
		bool	noError
	)

Definition at line 470 of file mbutils.c.

{
    Datum       result;
 
    /*
     * If the destination buffer is not large enough to hold the result in the
     * worst case, limit the input size passed to the conversion function.
     */
    if ((Size) srclen >= ((destlen - 1) / (Size) MAX_CONVERSION_GROWTH))
        srclen = ((destlen - 1) / (Size) MAX_CONVERSION_GROWTH);
 
    result = OidFunctionCall6(proc,
                              Int32GetDatum(src_encoding),
                              Int32GetDatum(dest_encoding),
                              CStringGetDatum((char *) src),
                              CStringGetDatum((char *) dest),
                              Int32GetDatum(srclen),
                              BoolGetDatum(noError));
    return DatumGetInt32(result);
}

References BoolGetDatum(), CStringGetDatum(), DatumGetInt32(), generate_unaccent_rules::dest, Int32GetDatum(), MAX_CONVERSION_GROWTH, and OidFunctionCall6.

Referenced by CopyConversionError(), CopyConvertBuf(), and test_enc_conversion().

◆ pg_dsplen()

int pg_dsplen ( const char * mbstr )

Definition at line 1031 of file mbutils.c.

{
    return pg_wchar_table[DatabaseEncoding->encoding].dsplen((const unsigned char *) mbstr);
}

References DatabaseEncoding, pg_wchar_tbl::dsplen, pg_enc2name::encoding, and pg_wchar_table.

Referenced by p_isspecial().

◆ pg_encoding_dsplen()

int pg_encoding_dsplen	(	int	encoding,
		const char *	mbstr
	)

Definition at line 2176 of file wchar.c.

{
    return (PG_VALID_ENCODING(encoding) ?
            pg_wchar_table[encoding].dsplen((const unsigned char *) mbstr) :
            pg_wchar_table[PG_SQL_ASCII].dsplen((const unsigned char *) mbstr));
}

References encoding, PG_SQL_ASCII, PG_VALID_ENCODING, and pg_wchar_table.

Referenced by PQdsplen(), and reportErrorPosition().

◆ pg_encoding_max_length()

int pg_encoding_max_length ( int encoding )

Definition at line 2213 of file wchar.c.

{
    Assert(PG_VALID_ENCODING(encoding));
 
    /*
     * Check for the encoding despite the assert, due to some mingw versions
     * otherwise issuing bogus warnings.
     */
    return PG_VALID_ENCODING(encoding) ?
        pg_wchar_table[encoding].maxmblen :
        pg_wchar_table[PG_SQL_ASCII].maxmblen;
}

References Assert(), encoding, pg_wchar_tbl::maxmblen, PG_SQL_ASCII, PG_VALID_ENCODING, and pg_wchar_table.

Referenced by ascii(), chr(), CopyConvertBuf(), pg_encoding_mbcliplen(), pg_encoding_set_invalid(), pg_verify_mbstr_len(), reportErrorPosition(), test_enc_setup(), and type_maximum_size().

◆ pg_encoding_mb2wchar_with_len()

int pg_encoding_mb2wchar_with_len	(	int	encoding,
		const char *	from,
		pg_wchar *	to,
		int	len
	)

Definition at line 994 of file mbutils.c.

{
    return pg_wchar_table[encoding].mb2wchar_with_len((const unsigned char *) from, to, len);
}

References encoding, len, pg_wchar_tbl::mb2wchar_with_len, and pg_wchar_table.

◆ pg_encoding_mbcliplen()

int pg_encoding_mbcliplen	(	int	encoding,
		const char *	mbstr,
		int	len,
		int	limit
	)

Definition at line 1094 of file mbutils.c.

{
    mblen_converter mblen_fn;
    int         clen = 0;
    int         l;
 
    /* optimization for single byte encoding */
    if (pg_encoding_max_length(encoding) == 1)
        return cliplen(mbstr, len, limit);
 
    mblen_fn = pg_wchar_table[encoding].mblen;
 
    while (len > 0 && *mbstr)
    {
        l = (*mblen_fn) ((const unsigned char *) mbstr);
        if ((clen + l) > limit)
            break;
        clen += l;
        if (clen == limit)
            break;
        len -= l;
        mbstr += l;
    }
    return clen;
}

References cliplen(), encoding, len, pg_wchar_tbl::mblen, pg_encoding_max_length(), and pg_wchar_table.

Referenced by pg_mbcliplen().

◆ pg_encoding_mblen()

int pg_encoding_mblen	(	int	encoding,
		const char *	mbstr
	)

Definition at line 2135 of file wchar.c.

{
    return (PG_VALID_ENCODING(encoding) ?
            pg_wchar_table[encoding].mblen((const unsigned char *) mbstr) :
            pg_wchar_table[PG_SQL_ASCII].mblen((const unsigned char *) mbstr));
}

References encoding, PG_SQL_ASCII, PG_VALID_ENCODING, and pg_wchar_table.

Referenced by CopyAttributeOutCSV(), CopyAttributeOutText(), fmtIdEnc(), pg_encoding_mblen_bounded(), pg_encoding_mblen_or_incomplete(), PQescapeInternal(), PQmblen(), PQmblenBounded(), and test_enc_setup().

◆ pg_encoding_mblen_bounded()

int pg_encoding_mblen_bounded	(	int	encoding,
		const char *	mbstr
	)

Definition at line 2167 of file wchar.c.

{
    return strnlen(mbstr, pg_encoding_mblen(encoding, mbstr));
}

References encoding, pg_encoding_mblen(), and strnlen().

◆ pg_encoding_mblen_or_incomplete()

int pg_encoding_mblen_or_incomplete	(	int	encoding,
		const char *	mbstr,
		size_t	remaining
	)

Definition at line 2147 of file wchar.c.

{
    /*
     * Define zero remaining as too few, even for single-byte encodings.
     * pg_gb18030_mblen() reads one or two bytes; single-byte encodings read
     * zero; others read one.
     */
    if (remaining < 1 ||
        (encoding == PG_GB18030 && IS_HIGHBIT_SET(*mbstr) && remaining < 2))
        return INT_MAX;
    return pg_encoding_mblen(encoding, mbstr);
}

References encoding, IS_HIGHBIT_SET, pg_encoding_mblen(), PG_GB18030, and remaining.

Referenced by PQescapeInternal(), PQescapeStringInternal(), report_invalid_encoding(), and report_untranslatable_char().

◆ pg_encoding_set_invalid()

void pg_encoding_set_invalid	(	int	encoding,
		char *	dst
	)

Definition at line 2051 of file wchar.c.

{
    Assert(pg_encoding_max_length(encoding) > 1);
 
    dst[0] = (encoding == PG_UTF8 ? 0xc0 : NONUTF8_INVALID_BYTE0);
    dst[1] = NONUTF8_INVALID_BYTE1;
}

References Assert(), encoding, NONUTF8_INVALID_BYTE0, NONUTF8_INVALID_BYTE1, pg_encoding_max_length(), and PG_UTF8.

Referenced by appendStringLiteral(), fmtIdEnc(), PQescapeStringInternal(), and test_enc_setup().

◆ pg_encoding_to_char()

const char * pg_encoding_to_char ( int encoding )

Definition at line 587 of file encnames.c.

{
    if (PG_VALID_ENCODING(encoding))
    {
        const pg_enc2name *p = &pg_enc2name_tbl[encoding];
 
        Assert(encoding == p->encoding);
        return p->name;
    }
    return "";
}

◆ pg_encoding_verifymbchar()

int pg_encoding_verifymbchar	(	int	encoding,
		const char *	mbstr,
		int	len
	)

Definition at line 2189 of file wchar.c.

{
    return (PG_VALID_ENCODING(encoding) ?
            pg_wchar_table[encoding].mbverifychar((const unsigned char *) mbstr, len) :
            pg_wchar_table[PG_SQL_ASCII].mbverifychar((const unsigned char *) mbstr, len));
}

References encoding, len, PG_SQL_ASCII, PG_VALID_ENCODING, and pg_wchar_table.

Referenced by appendStringLiteral(), big52euc_tw(), big52mic(), euc_jis_20042shift_jis_2004(), euc_jp2mic(), euc_jp2sjis(), euc_kr2mic(), euc_tw2big5(), euc_tw2mic(), fmtIdEnc(), LocalToUtf(), mic2big5(), mic2euc_jp(), mic2euc_kr(), mic2euc_tw(), mic2sjis(), PQescapeStringInternal(), shift_jis_20042euc_jis_2004(), and sjis2euc_jp().

◆ pg_encoding_verifymbstr()

int pg_encoding_verifymbstr	(	int	encoding,
		const char *	mbstr,
		int	len
	)

Definition at line 2202 of file wchar.c.

{
    return (PG_VALID_ENCODING(encoding) ?
            pg_wchar_table[encoding].mbverifystr((const unsigned char *) mbstr, len) :
            pg_wchar_table[PG_SQL_ASCII].mbverifystr((const unsigned char *) mbstr, len));
}

References encoding, len, PG_SQL_ASCII, PG_VALID_ENCODING, and pg_wchar_table.

Referenced by add_file_to_manifest(), CopyConvertBuf(), handle_oauth_sasl_error(), parse_oauth_json(), PQescapeInternal(), test_enc_conversion(), test_enc_setup(), and test_one_vector_escape().

◆ pg_encoding_wchar2mb_with_len()

int pg_encoding_wchar2mb_with_len	(	int	encoding,
		const pg_wchar *	from,
		char *	to,
		int	len
	)

Definition at line 1016 of file mbutils.c.

{
    return pg_wchar_table[encoding].wchar2mb_with_len(from, (unsigned char *) to, len);
}

References encoding, len, pg_wchar_table, and pg_wchar_tbl::wchar2mb_with_len.

◆ pg_get_client_encoding()

int pg_get_client_encoding ( void )

Definition at line 337 of file mbutils.c.

{
    return ClientEncoding->encoding;
}

References ClientEncoding, and pg_enc2name::encoding.

Referenced by BeginCopyFrom(), BeginCopyTo(), and xml_send().

◆ pg_get_client_encoding_name()

const char * pg_get_client_encoding_name ( void )

Definition at line 346 of file mbutils.c.

{
    return ClientEncoding->name;
}

References ClientEncoding, and pg_enc2name::name.

◆ pg_mb2wchar()

int pg_mb2wchar	(	const char *	from,
		pg_wchar *	to
	)

Definition at line 980 of file mbutils.c.

{
    return pg_wchar_table[DatabaseEncoding->encoding].mb2wchar_with_len((const unsigned char *) from, to, strlen(from));
}

References DatabaseEncoding, pg_enc2name::encoding, pg_wchar_tbl::mb2wchar_with_len, and pg_wchar_table.

◆ pg_mb2wchar_with_len()

int pg_mb2wchar_with_len	(	const char *	from,
		pg_wchar *	to,
		int	len
	)

Definition at line 987 of file mbutils.c.

{
    return pg_wchar_table[DatabaseEncoding->encoding].mb2wchar_with_len((const unsigned char *) from, to, len);
}

References DatabaseEncoding, pg_enc2name::encoding, len, pg_wchar_tbl::mb2wchar_with_len, and pg_wchar_table.

Referenced by CheckAffix(), NIAddAffix(), RE_compile(), RE_compile_and_cache(), RE_execute(), regcomp_auth_token(), regexec_auth_token(), replace_text_regexp(), setup_regexp_matches(), setup_test_matches(), test_re_compile(), and TParserInit().

◆ pg_mbcharcliplen()

int pg_mbcharcliplen	(	const char *	mbstr,
		int	len,
		int	limit
	)

Definition at line 1126 of file mbutils.c.

{
    int         clen = 0;
    int         nch = 0;
    int         l;
 
    /* optimization for single byte encoding */
    if (pg_database_encoding_max_length() == 1)
        return cliplen(mbstr, len, limit);
 
    while (len > 0 && *mbstr)
    {
        l = pg_mblen(mbstr);
        nch++;
        if (nch > limit)
            break;
        clen += l;
        len -= l;
        mbstr += l;
    }
    return clen;
}

References cliplen(), len, pg_database_encoding_max_length(), and pg_mblen().

Referenced by bpchar(), bpchar_input(), text_left(), text_right(), varchar(), and varchar_input().

◆ pg_mbcliplen()

int pg_mbcliplen	(	const char *	mbstr,
		int	len,
		int	limit
	)

Definition at line 1084 of file mbutils.c.

{
    return pg_encoding_mbcliplen(DatabaseEncoding->encoding, mbstr,
                                 len, limit);
}

References DatabaseEncoding, pg_enc2name::encoding, len, and pg_encoding_mbcliplen().

Referenced by appendStringInfoStringQuoted(), bpchar_name(), ChooseIndexColumnNames(), CopyLimitPrintoutLength(), ExecBuildSlotPartitionKeyDescription(), ExecBuildSlotValueDescription(), make_colname_unique(), make_greater_string(), makeMultirangeTypeName(), makeObjectName(), MemoryContextStatsPrint(), nameconcatoid(), namein(), pgstat_clip_activity(), pgstat_report_appname(), PutMemoryContextsStatsTupleStore(), set_rtable_names(), text_name(), text_to_cstring_buffer(), and truncate_identifier().

◆ pg_mblen()

int pg_mblen ( const char * mbstr )

Definition at line 1024 of file mbutils.c.

{
    return pg_wchar_table[DatabaseEncoding->encoding].mblen((const unsigned char *) mbstr);
}

References DatabaseEncoding, pg_enc2name::encoding, pg_wchar_tbl::mblen, and pg_wchar_table.

Referenced by addCompoundAffixFlagValue(), bit_in(), charlen_to_bytelen(), DCH_from_char(), dotrim(), find_word(), findchar(), findchar2(), findwrd(), gbt_var_node_cp_len(), get_modifiers(), get_nextfield(), get_wildcard_part(), getlexeme(), getNextFlagFromString(), gettoken_query(), gettoken_query_standard(), gettoken_query_websearch(), gettoken_tsvector(), hex_decode_safe(), infix(), initTrie(), lpad(), make_trigrams(), map_sql_identifier_to_xml_name(), map_xml_name_to_sql_identifier(), match_prosrc_to_literal(), mb_strchr(), NIImportAffixes(), NIImportDictionary(), NIImportOOAffixes(), NUM_eat_non_data_chars(), NUM_processor(), parse_affentry(), parse_format(), parse_lquery(), parse_ltree(), parse_or_operator(), parse_re_flags(), parse_test_flags(), pg_base64_decode(), pg_mbcharcliplen(), pg_mbstrlen(), pg_mbstrlen_with_len(), prssyntaxerror(), px_crypt_shacrypt(), readstoplist(), report_json_context(), rpad(), RS_compile(), RS_execute(), RS_isRegis(), similar_escape_internal(), split_text(), t_isalnum(), t_isalpha(), text_format(), text_position_next(), text_position_next_internal(), text_reverse(), text_substring(), text_to_bits(), textregexreplace(), thesaurusRead(), TParserGet(), translate(), ts_stat_sql(), tsvectorout(), unaccent_lexize(), varbit_in(), varstr_levenshtein(), and wchareq().

◆ pg_mbstrlen()

int pg_mbstrlen ( const char * mbstr )

Definition at line 1038 of file mbutils.c.

{
    int         len = 0;
 
    /* optimization for single byte encoding */
    if (pg_database_encoding_max_length() == 1)
        return strlen(mbstr);
 
    while (*mbstr)
    {
        mbstr += pg_mblen(mbstr);
        len++;
    }
    return len;
}

References len, pg_database_encoding_max_length(), and pg_mblen().

Referenced by NUM_processor(), and text_format_append_string().

◆ pg_mbstrlen_with_len()

int pg_mbstrlen_with_len	(	const char *	mbstr,
		int	limit
	)

Definition at line 1058 of file mbutils.c.

{
    int         len = 0;
 
    /* optimization for single byte encoding */
    if (pg_database_encoding_max_length() == 1)
        return limit;
 
    while (limit > 0 && *mbstr)
    {
        int         l = pg_mblen(mbstr);
 
        limit -= l;
        mbstr += l;
        len++;
    }
    return len;
}

References len, pg_database_encoding_max_length(), and pg_mblen().

Referenced by bpchar(), bpchar_input(), bpcharlen(), executor_errposition(), lpad(), match_prosrc_to_query(), parser_errposition(), plpgsql_scanner_errposition(), rpad(), scanner_errposition(), similar_escape_internal(), text_left(), text_length(), text_position_get_match_pos(), text_right(), text_substring(), unicode_assigned(), unicode_is_normalized(), unicode_normalize_func(), and varstr_levenshtein().

◆ pg_mule_mblen()

int pg_mule_mblen ( const unsigned char * s )

Definition at line 793 of file wchar.c.

{
    int         len;
 
    if (IS_LC1(*s))
        len = 2;
    else if (IS_LCPRV1(*s))
        len = 3;
    else if (IS_LC2(*s))
        len = 3;
    else if (IS_LCPRV2(*s))
        len = 4;
    else
        len = 1;                /* assume ASCII */
    return len;
}

References IS_LC1, IS_LC2, IS_LCPRV1, IS_LCPRV2, and len.

Referenced by mic2latin(), mic2latin_with_table(), and pg_mule_verifychar().

◆ pg_server_to_any()

char * pg_server_to_any	(	const char *	s,
		int	len,
		int	encoding
	)

Definition at line 750 of file mbutils.c.

{
    if (len <= 0)
        return unconstify(char *, s);   /* empty string is always valid */
 
    if (encoding == DatabaseEncoding->encoding ||
        encoding == PG_SQL_ASCII)
        return unconstify(char *, s);   /* assume data is valid */
 
    if (DatabaseEncoding->encoding == PG_SQL_ASCII)
    {
        /* No conversion is possible, but we must validate the result */
        (void) pg_verify_mbstr(encoding, s, len, false);
        return unconstify(char *, s);
    }
 
    /* Fast path if we can use cached conversion function */
    if (encoding == ClientEncoding->encoding)
        return perform_default_encoding_conversion(s, len, false);
 
    /* General case ... will not work outside transactions */
    return (char *) pg_do_encoding_conversion((unsigned char *) unconstify(char *, s),
                                              len,
                                              DatabaseEncoding->encoding,
                                              encoding);
}

References ClientEncoding, DatabaseEncoding, encoding, pg_enc2name::encoding, len, perform_default_encoding_conversion(), pg_do_encoding_conversion(), PG_SQL_ASCII, pg_verify_mbstr(), and unconstify.

Referenced by compareStrings(), CopyAttributeOutCSV(), CopyAttributeOutText(), CopyToTextLikeStart(), daitch_mokotoff(), dsnowball_lexize(), hv_fetch_string(), hv_store_string(), pg_server_to_client(), PLyUnicode_FromStringAndSize(), and utf_e2u().

◆ pg_server_to_client()

char * pg_server_to_client	(	const char *	s,
		int	len
	)

Definition at line 739 of file mbutils.c.

{
    return pg_server_to_any(s, len, ClientEncoding->encoding);
}

References ClientEncoding, pg_enc2name::encoding, len, and pg_server_to_any().

Referenced by pq_puttextmessage(), pq_sendcountedtext(), pq_sendstring(), pq_sendtext(), and pq_writestring().

◆ pg_unicode_to_server()

void pg_unicode_to_server	(	pg_wchar	c,
		unsigned char *	s
	)

Definition at line 865 of file mbutils.c.

{
    unsigned char c_as_utf8[MAX_MULTIBYTE_CHAR_LEN + 1];
    int         c_as_utf8_len;
    int         server_encoding;
 
    /*
     * Complain if invalid Unicode code point.  The choice of errcode here is
     * debatable, but really our caller should have checked this anyway.
     */
    if (!is_valid_unicode_codepoint(c))
        ereport(ERROR,
                (errcode(ERRCODE_SYNTAX_ERROR),
                 errmsg("invalid Unicode code point")));
 
    /* Otherwise, if it's in ASCII range, conversion is trivial */
    if (c <= 0x7F)
    {
        s[0] = (unsigned char) c;
        s[1] = '\0';
        return;
    }
 
    /* If the server encoding is UTF-8, we just need to reformat the code */
    server_encoding = GetDatabaseEncoding();
    if (server_encoding == PG_UTF8)
    {
        unicode_to_utf8(c, s);
        s[pg_utf_mblen(s)] = '\0';
        return;
    }
 
    /* For all other cases, we must have a conversion function available */
    if (Utf8ToServerConvProc == NULL)
        ereport(ERROR,
                (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
                 errmsg("conversion between %s and %s is not supported",
                        pg_enc2name_tbl[PG_UTF8].name,
                        GetDatabaseEncodingName())));
 
    /* Construct UTF-8 source string */
    unicode_to_utf8(c, c_as_utf8);
    c_as_utf8_len = pg_utf_mblen(c_as_utf8);
    c_as_utf8[c_as_utf8_len] = '\0';
 
    /* Convert, or throw error if we can't */
    FunctionCall6(Utf8ToServerConvProc,
                  Int32GetDatum(PG_UTF8),
                  Int32GetDatum(server_encoding),
                  CStringGetDatum((char *) c_as_utf8),
                  CStringGetDatum((char *) s),
                  Int32GetDatum(c_as_utf8_len),
                  BoolGetDatum(false));
}

References BoolGetDatum(), CStringGetDatum(), ereport, errcode(), errmsg(), ERROR, FunctionCall6, GetDatabaseEncoding(), GetDatabaseEncodingName(), Int32GetDatum(), is_valid_unicode_codepoint(), MAX_MULTIBYTE_CHAR_LEN, name, pg_enc2name_tbl, PG_UTF8, pg_utf_mblen, unicode_to_utf8(), and Utf8ToServerConvProc.

Referenced by addunicode(), addUnicodeChar(), map_xml_name_to_sql_identifier(), str_udeescape(), and unistr().

◆ pg_unicode_to_server_noerror()

bool pg_unicode_to_server_noerror	(	pg_wchar	c,
		unsigned char *	s
	)

Definition at line 927 of file mbutils.c.

{
    unsigned char c_as_utf8[MAX_MULTIBYTE_CHAR_LEN + 1];
    int         c_as_utf8_len;
    int         converted_len;
    int         server_encoding;
 
    /* Fail if invalid Unicode code point */
    if (!is_valid_unicode_codepoint(c))
        return false;
 
    /* Otherwise, if it's in ASCII range, conversion is trivial */
    if (c <= 0x7F)
    {
        s[0] = (unsigned char) c;
        s[1] = '\0';
        return true;
    }
 
    /* If the server encoding is UTF-8, we just need to reformat the code */
    server_encoding = GetDatabaseEncoding();
    if (server_encoding == PG_UTF8)
    {
        unicode_to_utf8(c, s);
        s[pg_utf_mblen(s)] = '\0';
        return true;
    }
 
    /* For all other cases, we must have a conversion function available */
    if (Utf8ToServerConvProc == NULL)
        return false;
 
    /* Construct UTF-8 source string */
    unicode_to_utf8(c, c_as_utf8);
    c_as_utf8_len = pg_utf_mblen(c_as_utf8);
    c_as_utf8[c_as_utf8_len] = '\0';
 
    /* Convert, but without throwing error if we can't */
    converted_len = DatumGetInt32(FunctionCall6(Utf8ToServerConvProc,
                                                Int32GetDatum(PG_UTF8),
                                                Int32GetDatum(server_encoding),
                                                CStringGetDatum((char *) c_as_utf8),
                                                CStringGetDatum((char *) s),
                                                Int32GetDatum(c_as_utf8_len),
                                                BoolGetDatum(true)));
 
    /* Conversion was successful iff it consumed the whole input */
    return (converted_len == c_as_utf8_len);
}

References BoolGetDatum(), CStringGetDatum(), DatumGetInt32(), FunctionCall6, GetDatabaseEncoding(), Int32GetDatum(), is_valid_unicode_codepoint(), MAX_MULTIBYTE_CHAR_LEN, PG_UTF8, pg_utf_mblen, unicode_to_utf8(), and Utf8ToServerConvProc.

Referenced by addUnicodeChar(), and json_lex_string().

◆ pg_utf8_islegal()

bool pg_utf8_islegal	(	const unsigned char *	source,
		int	length
	)

Definition at line 1989 of file wchar.c.

{
    unsigned char a;
 
    switch (length)
    {
        default:
            /* reject lengths 5 and 6 for now */
            return false;
        case 4:
            a = source[3];
            if (a < 0x80 || a > 0xBF)
                return false;
            /* FALL THRU */
        case 3:
            a = source[2];
            if (a < 0x80 || a > 0xBF)
                return false;
            /* FALL THRU */
        case 2:
            a = source[1];
            switch (*source)
            {
                case 0xE0:
                    if (a < 0xA0 || a > 0xBF)
                        return false;
                    break;
                case 0xED:
                    if (a < 0x80 || a > 0x9F)
                        return false;
                    break;
                case 0xF0:
                    if (a < 0x90 || a > 0xBF)
                        return false;
                    break;
                case 0xF4:
                    if (a < 0x80 || a > 0x8F)
                        return false;
                    break;
                default:
                    if (a < 0x80 || a > 0xBF)
                        return false;
                    break;
            }
            /* FALL THRU */
        case 1:
            a = *source;
            if (a >= 0x80 && a < 0xC2)
                return false;
            if (a > 0xF4)
                return false;
            break;
    }
    return true;
}

References a, and source.

Referenced by chr(), pg_utf8_string_len(), pg_utf8_verifychar(), utf8_to_iso8859_1(), and UtfToLocal().

◆ pg_utf_mblen()

int pg_utf_mblen ( const unsigned char * s )

Definition at line 538 of file wchar.c.

{
    int         len;
 
    if ((*s & 0x80) == 0)
        len = 1;
    else if ((*s & 0xe0) == 0xc0)
        len = 2;
    else if ((*s & 0xf0) == 0xe0)
        len = 3;
    else if ((*s & 0xf8) == 0xf0)
        len = 4;
#ifdef NOT_USED
    else if ((*s & 0xfc) == 0xf8)
        len = 5;
    else if ((*s & 0xfe) == 0xfc)
        len = 6;
#endif
    else
        len = 1;
    return len;
}

References len.

Referenced by pg_utf8_verifystr(), and pg_wchar2utf_with_len().

◆ pg_valid_client_encoding()

int pg_valid_client_encoding ( const char * name )

Definition at line 485 of file encnames.c.

{
    int         enc;
 
    if ((enc = pg_char_to_encoding(name)) < 0)
        return -1;
 
    if (!PG_VALID_FE_ENCODING(enc))
        return -1;
 
    return enc;
}

References enc, name, pg_char_to_encoding(), and PG_VALID_FE_ENCODING.

Referenced by check_client_encoding().

◆ pg_valid_server_encoding()

int pg_valid_server_encoding ( const char * name )

Definition at line 499 of file encnames.c.

{
    int         enc;
 
    if ((enc = pg_char_to_encoding(name)) < 0)
        return -1;
 
    if (!PG_VALID_BE_ENCODING(enc))
        return -1;
 
    return enc;
}

References enc, name, pg_char_to_encoding(), and PG_VALID_BE_ENCODING.

◆ pg_valid_server_encoding_id()

int pg_valid_server_encoding_id ( int encoding )

Definition at line 513 of file encnames.c.

{
    return PG_VALID_BE_ENCODING(encoding);
}

◆ pg_verify_mbstr()

bool pg_verify_mbstr	(	int	encoding,
		const char *	mbstr,
		int	len,
		bool	noError
	)

Definition at line 1567 of file mbutils.c.

{
    int         oklen;
 
    Assert(PG_VALID_ENCODING(encoding));
 
    oklen = pg_wchar_table[encoding].mbverifystr((const unsigned char *) mbstr, len);
    if (oklen != len)
    {
        if (noError)
            return false;
        report_invalid_encoding(encoding, mbstr + oklen, len - oklen);
    }
    return true;
}

References Assert(), encoding, len, pg_wchar_tbl::mbverifystr, PG_VALID_ENCODING, pg_wchar_table, and report_invalid_encoding().

Referenced by AddFileToBackupManifest(), LogicalOutputWrite(), pg_any_to_server(), pg_convert(), pg_do_encoding_conversion(), pg_server_to_any(), pg_verifymbstr(), and read_extension_script_file().

◆ pg_verify_mbstr_len()

int pg_verify_mbstr_len	(	int	encoding,
		const char *	mbstr,
		int	len,
		bool	noError
	)

Definition at line 1598 of file mbutils.c.

{
    mbchar_verifier mbverifychar;
    int         mb_len;
 
    Assert(PG_VALID_ENCODING(encoding));
 
    /*
     * In single-byte encodings, we need only reject nulls (\0).
     */
    if (pg_encoding_max_length(encoding) <= 1)
    {
        const char *nullpos = memchr(mbstr, 0, len);
 
        if (nullpos == NULL)
            return len;
        if (noError)
            return -1;
        report_invalid_encoding(encoding, nullpos, 1);
    }
 
    /* fetch function pointer just once */
    mbverifychar = pg_wchar_table[encoding].mbverifychar;
 
    mb_len = 0;
 
    while (len > 0)
    {
        int         l;
 
        /* fast path for ASCII-subset characters */
        if (!IS_HIGHBIT_SET(*mbstr))
        {
            if (*mbstr != '\0')
            {
                mb_len++;
                mbstr++;
                len--;
                continue;
            }
            if (noError)
                return -1;
            report_invalid_encoding(encoding, mbstr, len);
        }
 
        l = (*mbverifychar) ((const unsigned char *) mbstr, len);
 
        if (l < 0)
        {
            if (noError)
                return -1;
            report_invalid_encoding(encoding, mbstr, len);
        }
 
        mbstr += l;
        len -= l;
        mb_len++;
    }
    return mb_len;
}

References Assert(), encoding, IS_HIGHBIT_SET, len, pg_wchar_tbl::mbverifychar, pg_encoding_max_length(), PG_VALID_ENCODING, pg_wchar_table, and report_invalid_encoding().

Referenced by length_in_encoding().

◆ pg_verifymbstr()

bool pg_verifymbstr	(	const char *	mbstr,
		int	len,
		bool	noError
	)

Definition at line 1557 of file mbutils.c.

{
    return pg_verify_mbstr(GetDatabaseEncoding(), mbstr, len, noError);
}

References GetDatabaseEncoding(), len, and pg_verify_mbstr().

Referenced by char2wchar(), CopyReadAttributesText(), plperl_spi_exec(), plperl_spi_prepare(), plperl_spi_query(), PLy_cursor_query(), PLy_output(), PLy_spi_execute_query(), PLy_spi_prepare(), PLyObject_AsString(), read_text_file(), and spg_text_leaf_consistent().

◆ pg_wchar2mb()

int pg_wchar2mb	(	const pg_wchar *	from,
		char *	to
	)

Definition at line 1002 of file mbutils.c.

{
    return pg_wchar_table[DatabaseEncoding->encoding].wchar2mb_with_len(from, (unsigned char *) to, pg_wchar_strlen(from));
}

References DatabaseEncoding, pg_enc2name::encoding, pg_wchar_strlen(), pg_wchar_table, and pg_wchar_tbl::wchar2mb_with_len.

◆ pg_wchar2mb_with_len()

int pg_wchar2mb_with_len	(	const pg_wchar *	from,
		char *	to,
		int	len
	)

Definition at line 1009 of file mbutils.c.

{
    return pg_wchar_table[DatabaseEncoding->encoding].wchar2mb_with_len(from, (unsigned char *) to, len);
}

References DatabaseEncoding, pg_enc2name::encoding, len, pg_wchar_table, and pg_wchar_tbl::wchar2mb_with_len.

Referenced by build_regexp_match_result(), build_regexp_split_result(), build_test_match_result(), convertPgWchar(), and regexp_fixed_prefix().

◆ pg_wchar_strlen()

size_t pg_wchar_strlen ( const pg_wchar * str )

Definition at line 70 of file wstrncmp.c.

{
    const pg_wchar *s;
 
    for (s = str; *s; ++s)
        ;
    return (s - str);
}

References str.

Referenced by pg_wchar2mb().

◆ pg_wchar_strncmp()

int pg_wchar_strncmp	(	const pg_wchar *	s1,
		const pg_wchar *	s2,
		size_t	n
	)

Definition at line 40 of file wstrncmp.c.

{
    if (n == 0)
        return 0;
    do
    {
        if (*s1 != *s2++)
            return (*s1 - *(s2 - 1));
        if (*s1++ == 0)
            break;
    } while (--n != 0);
    return 0;
}

References s1, and s2.

◆ PrepareClientEncoding()

int PrepareClientEncoding ( int encoding )

Definition at line 111 of file mbutils.c.

{
    int         current_server_encoding;
    ListCell   *lc;
 
    if (!PG_VALID_FE_ENCODING(encoding))
        return -1;
 
    /* Can't do anything during startup, per notes above */
    if (!backend_startup_complete)
        return 0;
 
    current_server_encoding = GetDatabaseEncoding();
 
    /*
     * Check for cases that require no conversion function.
     */
    if (current_server_encoding == encoding ||
        current_server_encoding == PG_SQL_ASCII ||
        encoding == PG_SQL_ASCII)
        return 0;
 
    if (IsTransactionState())
    {
        /*
         * If we're in a live transaction, it's safe to access the catalogs,
         * so look up the functions.  We repeat the lookup even if the info is
         * already cached, so that we can react to changes in the contents of
         * pg_conversion.
         */
        Oid         to_server_proc,
                    to_client_proc;
        ConvProcInfo *convinfo;
        MemoryContext oldcontext;
 
        to_server_proc = FindDefaultConversionProc(encoding,
                                                   current_server_encoding);
        if (!OidIsValid(to_server_proc))
            return -1;
        to_client_proc = FindDefaultConversionProc(current_server_encoding,
                                                   encoding);
        if (!OidIsValid(to_client_proc))
            return -1;
 
        /*
         * Load the fmgr info into TopMemoryContext (could still fail here)
         */
        convinfo = (ConvProcInfo *) MemoryContextAlloc(TopMemoryContext,
                                                       sizeof(ConvProcInfo));
        convinfo->s_encoding = current_server_encoding;
        convinfo->c_encoding = encoding;
        fmgr_info_cxt(to_server_proc, &convinfo->to_server_info,
                      TopMemoryContext);
        fmgr_info_cxt(to_client_proc, &convinfo->to_client_info,
                      TopMemoryContext);
 
        /* Attach new info to head of list */
        oldcontext = MemoryContextSwitchTo(TopMemoryContext);
        ConvProcList = lcons(convinfo, ConvProcList);
        MemoryContextSwitchTo(oldcontext);
 
        /*
         * We cannot yet remove any older entry for the same encoding pair,
         * since it could still be in use.  SetClientEncoding will clean up.
         */
 
        return 0;               /* success */
    }
    else
    {
        /*
         * If we're not in a live transaction, the only thing we can do is
         * restore a previous setting using the cache.  This covers all
         * transaction-rollback cases.  The only case it might not work for is
         * trying to change client_encoding on the fly by editing
         * postgresql.conf and SIGHUP'ing.  Which would probably be a stupid
         * thing to do anyway.
         */
        foreach(lc, ConvProcList)
        {
            ConvProcInfo *oldinfo = (ConvProcInfo *) lfirst(lc);
 
            if (oldinfo->s_encoding == current_server_encoding &&
                oldinfo->c_encoding == encoding)
                return 0;
        }
 
        return -1;              /* it's not cached, so fail */
    }
}

References backend_startup_complete, ConvProcInfo::c_encoding, ConvProcList, encoding, FindDefaultConversionProc(), fmgr_info_cxt(), GetDatabaseEncoding(), IsTransactionState(), lcons(), lfirst, MemoryContextAlloc(), MemoryContextSwitchTo(), OidIsValid, PG_SQL_ASCII, PG_VALID_FE_ENCODING, ConvProcInfo::s_encoding, ConvProcInfo::to_client_info, ConvProcInfo::to_server_info, and TopMemoryContext.

Referenced by check_client_encoding(), and InitializeClientEncoding().

◆ report_invalid_encoding()

pg_noreturn void report_invalid_encoding	(	int	encoding,
		const char *	mbstr,
		int	len
	)

Definition at line 1699 of file mbutils.c.

{
    int         l = pg_encoding_mblen_or_incomplete(encoding, mbstr, len);
    char        buf[8 * 5 + 1];
    char       *p = buf;
    int         j,
                jlimit;
 
    jlimit = Min(l, len);
    jlimit = Min(jlimit, 8);    /* prevent buffer overrun */
 
    for (j = 0; j < jlimit; j++)
    {
        p += sprintf(p, "0x%02x", (unsigned char) mbstr[j]);
        if (j < jlimit - 1)
            p += sprintf(p, " ");
    }
 
    ereport(ERROR,
            (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
             errmsg("invalid byte sequence for encoding \"%s\": %s",
                    pg_enc2name_tbl[encoding].name,
                    buf)));
}

References buf, encoding, ereport, errcode(), errmsg(), ERROR, j, len, Min, name, pg_enc2name_tbl, pg_encoding_mblen_or_incomplete(), and sprintf.

Referenced by big52euc_tw(), big52mic(), CopyConversionError(), euc_cn2mic(), euc_jis_20042shift_jis_2004(), euc_jp2mic(), euc_jp2sjis(), euc_kr2mic(), euc_tw2big5(), euc_tw2mic(), iso8859_1_to_utf8(), latin2mic(), latin2mic_with_table(), local2local(), LocalToUtf(), mic2big5(), mic2euc_cn(), mic2euc_jp(), mic2euc_kr(), mic2euc_tw(), mic2latin(), mic2latin_with_table(), mic2sjis(), pg_verify_mbstr(), pg_verify_mbstr_len(), shift_jis_20042euc_jis_2004(), sjis2euc_jp(), sjis2mic(), test_enc_conversion(), utf8_to_iso8859_1(), and UtfToLocal().

◆ report_untranslatable_char()

pg_noreturn void report_untranslatable_char	(	int	src_encoding,
		int	dest_encoding,
		const char *	mbstr,
		int	len
	)

Definition at line 1731 of file mbutils.c.

{
    int         l;
    char        buf[8 * 5 + 1];
    char       *p = buf;
    int         j,
                jlimit;
 
    /*
     * We probably could use plain pg_encoding_mblen(), because
     * gb18030_to_utf8() verifies before it converts.  All conversions should.
     * For src_encoding!=GB18030, len>0 meets pg_encoding_mblen() needs.  Even
     * so, be defensive, since a buggy conversion might pass invalid data.
     * This is not a performance-critical path.
     */
    l = pg_encoding_mblen_or_incomplete(src_encoding, mbstr, len);
    jlimit = Min(l, len);
    jlimit = Min(jlimit, 8);    /* prevent buffer overrun */
 
    for (j = 0; j < jlimit; j++)
    {
        p += sprintf(p, "0x%02x", (unsigned char) mbstr[j]);
        if (j < jlimit - 1)
            p += sprintf(p, " ");
    }
 
    ereport(ERROR,
            (errcode(ERRCODE_UNTRANSLATABLE_CHARACTER),
             errmsg("character with byte sequence %s in encoding \"%s\" has no equivalent in encoding \"%s\"",
                    buf,
                    pg_enc2name_tbl[src_encoding].name,
                    pg_enc2name_tbl[dest_encoding].name)));
}

References buf, ereport, errcode(), errmsg(), ERROR, j, len, Min, name, pg_enc2name_tbl, pg_encoding_mblen_or_incomplete(), and sprintf.

Referenced by big52euc_tw(), big52mic(), euc_tw2big5(), latin2mic_with_table(), local2local(), LocalToUtf(), mic2big5(), mic2euc_cn(), mic2euc_jp(), mic2euc_kr(), mic2euc_tw(), mic2latin(), mic2latin_with_table(), mic2sjis(), utf8_to_iso8859_1(), and UtfToLocal().

◆ SetClientEncoding()

int SetClientEncoding ( int encoding )

Definition at line 209 of file mbutils.c.

{
    int         current_server_encoding;
    bool        found;
    ListCell   *lc;
 
    if (!PG_VALID_FE_ENCODING(encoding))
        return -1;
 
    /* Can't do anything during startup, per notes above */
    if (!backend_startup_complete)
    {
        pending_client_encoding = encoding;
        return 0;
    }
 
    current_server_encoding = GetDatabaseEncoding();
 
    /*
     * Check for cases that require no conversion function.
     */
    if (current_server_encoding == encoding ||
        current_server_encoding == PG_SQL_ASCII ||
        encoding == PG_SQL_ASCII)
    {
        ClientEncoding = &pg_enc2name_tbl[encoding];
        ToServerConvProc = NULL;
        ToClientConvProc = NULL;
        return 0;
    }
 
    /*
     * Search the cache for the entry previously prepared by
     * PrepareClientEncoding; if there isn't one, we lose.  While at it,
     * release any duplicate entries so that repeated Prepare/Set cycles don't
     * leak memory.
     */
    found = false;
    foreach(lc, ConvProcList)
    {
        ConvProcInfo *convinfo = (ConvProcInfo *) lfirst(lc);
 
        if (convinfo->s_encoding == current_server_encoding &&
            convinfo->c_encoding == encoding)
        {
            if (!found)
            {
                /* Found newest entry, so set up */
                ClientEncoding = &pg_enc2name_tbl[encoding];
                ToServerConvProc = &convinfo->to_server_info;
                ToClientConvProc = &convinfo->to_client_info;
                found = true;
            }
            else
            {
                /* Duplicate entry, release it */
                ConvProcList = foreach_delete_current(ConvProcList, lc);
                pfree(convinfo);
            }
        }
    }
 
    if (found)
        return 0;               /* success */
    else
        return -1;              /* it's not cached, so fail */
}

References backend_startup_complete, ConvProcInfo::c_encoding, ClientEncoding, ConvProcList, encoding, foreach_delete_current, GetDatabaseEncoding(), lfirst, pending_client_encoding, pfree(), pg_enc2name_tbl, PG_SQL_ASCII, PG_VALID_FE_ENCODING, ConvProcInfo::s_encoding, ConvProcInfo::to_client_info, ConvProcInfo::to_server_info, ToClientConvProc, and ToServerConvProc.

Referenced by assign_client_encoding(), InitializeClientEncoding(), and ParallelWorkerMain().

◆ SetDatabaseEncoding()

void SetDatabaseEncoding ( int encoding )

Definition at line 1162 of file mbutils.c.

{
    if (!PG_VALID_BE_ENCODING(encoding))
        elog(ERROR, "invalid database encoding: %d", encoding);
 
    DatabaseEncoding = &pg_enc2name_tbl[encoding];
    Assert(DatabaseEncoding->encoding == encoding);
}

References Assert(), DatabaseEncoding, elog, encoding, pg_enc2name::encoding, ERROR, pg_enc2name_tbl, and PG_VALID_BE_ENCODING.

Referenced by CheckMyDatabase().

◆ SetMessageEncoding()

void SetMessageEncoding ( int encoding )

Definition at line 1172 of file mbutils.c.

{
    /* Some calls happen before we can elog()! */
    Assert(PG_VALID_ENCODING(encoding));
 
    MessageEncoding = &pg_enc2name_tbl[encoding];
    Assert(MessageEncoding->encoding == encoding);
}

References Assert(), encoding, pg_enc2name::encoding, MessageEncoding, pg_enc2name_tbl, and PG_VALID_ENCODING.

Referenced by pg_perm_setlocale().

◆ surrogate_pair_to_codepoint()

static pg_wchar surrogate_pair_to_codepoint	(	pg_wchar	first,
		pg_wchar	second
	)

inlinestatic

Definition at line 537 of file pg_wchar.h.

542{

Referenced by addUnicode(), json_lex_string(), str_udeescape(), and unistr().

◆ unicode_to_utf8()

unsigned char * unicode_to_utf8	(	pg_wchar	c,
		unsigned char *	utf8string
	)

inlinestatic

Definition at line 575 of file pg_wchar.h.

{
    if (c <= 0x7F)
    {
        utf8string[0] = c;
    }
    else if (c <= 0x7FF)
    {
        utf8string[0] = 0xC0 | ((c >> 6) & 0x1F);
        utf8string[1] = 0x80 | (c & 0x3F);
    }

Referenced by convert_case(), json_lex_string(), pg_saslprep(), pg_unicode_to_server(), pg_unicode_to_server_noerror(), pg_wchar2utf_with_len(), and unicode_normalize_func().

◆ unicode_utf8len()

static int unicode_utf8len ( pg_wchar c )

inlinestatic

Definition at line 607 of file pg_wchar.h.

    {
        utf8string[0] = 0xF0 | ((c >> 18) & 0x07);
        utf8string[1] = 0x80 | ((c >> 12) & 0x3F);
        utf8string[2] = 0x80 | ((c >> 6) & 0x3F);
        utf8string[3] = 0x80 | (c & 0x3F);
    }
 
    return utf8string;
}

Referenced by convert_case(), and initcap_wbnext().

◆ utf8_to_unicode()

pg_wchar utf8_to_unicode ( const unsigned char * c )

inlinestatic

Definition at line 549 of file pg_wchar.h.

{
    return ((first & 0x3FF) << 10) + 0x10000 + (second & 0x3FF);
}
 
/*
 * Convert a UTF-8 character to a Unicode code point.
 * This is a one-character version of pg_utf2wchar_with_len.
 *
 * No error checks here, c must point to a long-enough string.
 */
static inline pg_wchar
utf8_to_unicode(const unsigned char *c)
{
    if ((*c & 0x80) == 0)
        return (pg_wchar) c[0];

◆ UtfToLocal()

int UtfToLocal	(	const unsigned char *	utf,
		int	len,
		unsigned char *	iso,
		const pg_mb_radix_tree *	map,
		const pg_utf_to_local_combined *	cmap,
		int	cmapsize,
		utf_local_conversion_func	conv_func,
		int	encoding,
		bool	noError
	)

Definition at line 507 of file conv.c.

{
    uint32      iutf;
    int         l;
    const pg_utf_to_local_combined *cp;
    const unsigned char *start = utf;
 
    if (!PG_VALID_ENCODING(encoding))
        ereport(ERROR,
                (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
                 errmsg("invalid encoding number: %d", encoding)));
 
    for (; len > 0; len -= l)
    {
        unsigned char b1 = 0;
        unsigned char b2 = 0;
        unsigned char b3 = 0;
        unsigned char b4 = 0;
 
        /* "break" cases all represent errors */
        if (*utf == '\0')
            break;
 
        l = pg_utf_mblen(utf);
        if (len < l)
            break;
 
        if (!pg_utf8_islegal(utf, l))
            break;
 
        if (l == 1)
        {
            /* ASCII case is easy, assume it's one-to-one conversion */
            *iso++ = *utf++;
            continue;
        }
 
        /* collect coded char of length l */
        if (l == 2)
        {
            b3 = *utf++;
            b4 = *utf++;
        }
        else if (l == 3)
        {
            b2 = *utf++;
            b3 = *utf++;
            b4 = *utf++;
        }
        else if (l == 4)
        {
            b1 = *utf++;
            b2 = *utf++;
            b3 = *utf++;
            b4 = *utf++;
        }
        else
        {
            elog(ERROR, "unsupported character length %d", l);
            iutf = 0;           /* keep compiler quiet */
        }
        iutf = (b1 << 24 | b2 << 16 | b3 << 8 | b4);
 
        /* First, try with combined map if possible */
        if (cmap && len > l)
        {
            const unsigned char *utf_save = utf;
            int         len_save = len;
            int         l_save = l;
 
            /* collect next character, same as above */
            len -= l;
 
            l = pg_utf_mblen(utf);
            if (len < l)
            {
                /* need more data to decide if this is a combined char */
                utf -= l_save;
                break;
            }
 
            if (!pg_utf8_islegal(utf, l))
            {
                if (!noError)
                    report_invalid_encoding(PG_UTF8, (const char *) utf, len);
                utf -= l_save;
                break;
            }
 
            /* We assume ASCII character cannot be in combined map */
            if (l > 1)
            {
                uint32      iutf2;
                uint32      cutf[2];
 
                if (l == 2)
                {
                    iutf2 = *utf++ << 8;
                    iutf2 |= *utf++;
                }
                else if (l == 3)
                {
                    iutf2 = *utf++ << 16;
                    iutf2 |= *utf++ << 8;
                    iutf2 |= *utf++;
                }
                else if (l == 4)
                {
                    iutf2 = *utf++ << 24;
                    iutf2 |= *utf++ << 16;
                    iutf2 |= *utf++ << 8;
                    iutf2 |= *utf++;
                }
                else
                {
                    elog(ERROR, "unsupported character length %d", l);
                    iutf2 = 0;  /* keep compiler quiet */
                }
 
                cutf[0] = iutf;
                cutf[1] = iutf2;
 
                cp = bsearch(cutf, cmap, cmapsize,
                             sizeof(pg_utf_to_local_combined), compare3);
 
                if (cp)
                {
                    iso = store_coded_char(iso, cp->code);
                    continue;
                }
            }
 
            /* fail, so back up to reprocess second character next time */
            utf = utf_save;
            len = len_save;
            l = l_save;
        }
 
        /* Now check ordinary map */
        if (map)
        {
            uint32      converted = pg_mb_radix_conv(map, l, b1, b2, b3, b4);
 
            if (converted)
            {
                iso = store_coded_char(iso, converted);
                continue;
            }
        }
 
        /* if there's a conversion function, try that */
        if (conv_func)
        {
            uint32      converted = (*conv_func) (iutf);
 
            if (converted)
            {
                iso = store_coded_char(iso, converted);
                continue;
            }
        }
 
        /* failed to translate this character */
        utf -= l;
        if (noError)
            break;
        report_untranslatable_char(PG_UTF8, encoding,
                                   (const char *) utf, len);
    }
 
    /* if we broke out of loop early, must be invalid input */
    if (len > 0 && !noError)
        report_invalid_encoding(PG_UTF8, (const char *) utf, len);
 
    *iso = '\0';
 
    return utf - start;
}

References pg_utf_to_local_combined::code, compare3(), elog, encoding, ereport, errcode(), errmsg(), ERROR, len, pg_mb_radix_conv(), PG_UTF8, pg_utf8_islegal(), pg_utf_mblen, PG_VALID_ENCODING, report_invalid_encoding(), report_untranslatable_char(), start, and store_coded_char().

Referenced by utf8_to_big5(), utf8_to_euc_cn(), utf8_to_euc_jis_2004(), utf8_to_euc_jp(), utf8_to_euc_kr(), utf8_to_euc_tw(), utf8_to_gb18030(), utf8_to_gbk(), utf8_to_iso8859(), utf8_to_johab(), utf8_to_koi8r(), utf8_to_koi8u(), utf8_to_shift_jis_2004(), utf8_to_sjis(), utf8_to_uhc(), and utf8_to_win().

Variable Documentation

◆ pg_enc2gettext_tbl

PGDLLIMPORT const char* pg_enc2gettext_tbl[]

extern

Definition at line 360 of file encnames.c.

◆ pg_enc2name_tbl

PGDLLIMPORT const pg_enc2name pg_enc2name_tbl[]

extern

Definition at line 308 of file encnames.c.

Referenced by check_encoding_conversion_args(), InitializeClientEncoding(), pg_any_to_server(), pg_encoding_to_char(), pg_unicode_to_server(), report_invalid_encoding(), report_untranslatable_char(), SetClientEncoding(), SetDatabaseEncoding(), SetMessageEncoding(), and test_enc_setup().

◆ pg_wchar_table

PGDLLIMPORT const pg_wchar_tbl pg_wchar_table[]

extern

Data Structures

Macros

Typedefs

Enumerations

Functions

Variables

Macro Definition Documentation

◆ CHECK_ENCODING_CONVERSION_ARGS

◆ IS_LC1

◆ IS_LC2

◆ IS_LCPRV1

◆ IS_LCPRV1_A_RANGE

◆ IS_LCPRV1_B_RANGE

◆ IS_LCPRV2

◆ IS_LCPRV2_A_RANGE

◆ IS_LCPRV2_B_RANGE

◆ ISSJISHEAD

◆ ISSJISTAIL

◆ LC_ARABIC_1_COLUMN

◆ LC_ARABIC_2_COLUMN

◆ LC_ARABIC_DIGIT

◆ LC_ASCII_RIGHT_TO_LEFT

◆ LC_BIG5_1

◆ LC_BIG5_2

◆ LC_CNS11643_1

◆ LC_CNS11643_2

◆ LC_CNS11643_3

◆ LC_CNS11643_4

◆ LC_CNS11643_5

◆ LC_CNS11643_6

◆ LC_CNS11643_7

◆ LC_ETHIOPIC

◆ LC_GB2312_80

◆ LC_INDIAN_1_COLUMN

◆ LC_INDIAN_2_COLUMN

◆ LC_IPA

◆ LC_ISO8859_1

◆ LC_ISO8859_15

◆ LC_ISO8859_2

◆ LC_ISO8859_3

◆ LC_ISO8859_4

◆ LC_ISO8859_5

◆ LC_ISO8859_6

◆ LC_ISO8859_7

◆ LC_ISO8859_8

◆ LC_ISO8859_9

◆ LC_JISX0201K

◆ LC_JISX0201R

◆ LC_JISX0208

◆ LC_JISX0208_1978

◆ LC_JISX0212

◆ LC_JISX0213_1

◆ LC_KOI8_R

◆ LC_KS5601

◆ LC_LAO

◆ LC_SISHENG

◆ LC_TIBETAN

◆ LC_TIBETAN_1_COLUMN

◆ LC_TIS620

◆ LC_UNICODE_SUBSET

◆ LC_UNICODE_SUBSET_2

◆ LC_UNICODE_SUBSET_3

◆ LC_VISCII_LOWER

◆ LC_VISCII_UPPER

◆ LCPRV1_A

◆ LCPRV1_B

◆ LCPRV2_A

◆ LCPRV2_B

◆ MAX_CONVERSION_GROWTH

◆ MAX_CONVERSION_INPUT_LENGTH

◆ MAX_MULTIBYTE_CHAR_LEN

◆ MAX_UNICODE_EQUIVALENT_STRING

◆ pg_char_to_encoding

◆ PG_ENCODING_BE_LAST

◆ PG_ENCODING_IS_CLIENT_ONLY

◆ pg_encoding_to_char

◆ pg_utf_mblen

◆ PG_VALID_BE_ENCODING

◆ PG_VALID_ENCODING

◆ PG_VALID_FE_ENCODING