PostgreSQL Source Code  git master
pg_wchar.h File Reference
This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Data Structures

struct  pg_enc2name
 
struct  pg_enc2gettext
 
struct  pg_wchar_tbl
 
struct  pg_mb_radix_tree
 
struct  pg_utf_to_local_combined
 
struct  pg_local_to_utf_combined
 

Macros

#define MAX_MULTIBYTE_CHAR_LEN   4
 
#define SS2   0x8e /* single shift 2 (JIS0201) */
 
#define SS3   0x8f /* single shift 3 (JIS0212) */
 
#define ISSJISHEAD(c)   (((c) >= 0x81 && (c) <= 0x9f) || ((c) >= 0xe0 && (c) <= 0xfc))
 
#define ISSJISTAIL(c)   (((c) >= 0x40 && (c) <= 0x7e) || ((c) >= 0x80 && (c) <= 0xfc))
 
#define LC_ISO8859_1   0x81 /* ISO8859 Latin 1 */
 
#define LC_ISO8859_2   0x82 /* ISO8859 Latin 2 */
 
#define LC_ISO8859_3   0x83 /* ISO8859 Latin 3 */
 
#define LC_ISO8859_4   0x84 /* ISO8859 Latin 4 */
 
#define LC_TIS620   0x85 /* Thai (not supported yet) */
 
#define LC_ISO8859_7   0x86 /* Greek (not supported yet) */
 
#define LC_ISO8859_6   0x87 /* Arabic (not supported yet) */
 
#define LC_ISO8859_8   0x88 /* Hebrew (not supported yet) */
 
#define LC_JISX0201K   0x89 /* Japanese 1 byte kana */
 
#define LC_JISX0201R   0x8a /* Japanese 1 byte Roman */
 
#define LC_KOI8_R   0x8b /* Cyrillic KOI8-R */
 
#define LC_ISO8859_5   0x8c /* ISO8859 Cyrillic */
 
#define LC_ISO8859_9   0x8d /* ISO8859 Latin 5 (not supported yet) */
 
#define LC_ISO8859_15   0x8e /* ISO8859 Latin 15 (not supported yet) */
 
#define IS_LC1(c)   ((unsigned char)(c) >= 0x81 && (unsigned char)(c) <= 0x8d)
 
#define LC_JISX0208_1978   0x90 /* Japanese Kanji, old JIS (not supported) */
 
#define LC_GB2312_80   0x91 /* Chinese */
 
#define LC_JISX0208   0x92 /* Japanese Kanji (JIS X 0208) */
 
#define LC_KS5601   0x93 /* Korean */
 
#define LC_JISX0212   0x94 /* Japanese Kanji (JIS X 0212) */
 
#define LC_CNS11643_1   0x95 /* CNS 11643-1992 Plane 1 */
 
#define LC_CNS11643_2   0x96 /* CNS 11643-1992 Plane 2 */
 
#define LC_JISX0213_1
 
#define LC_BIG5_1
 
#define LC_BIG5_2
 
#define IS_LC2(c)   ((unsigned char)(c) >= 0x90 && (unsigned char)(c) <= 0x99)
 
#define LCPRV1_A   0x9a
 
#define LCPRV1_B   0x9b
 
#define IS_LCPRV1(c)   ((unsigned char)(c) == LCPRV1_A || (unsigned char)(c) == LCPRV1_B)
 
#define IS_LCPRV1_A_RANGE(c)   ((unsigned char)(c) >= 0xa0 && (unsigned char)(c) <= 0xdf)
 
#define IS_LCPRV1_B_RANGE(c)   ((unsigned char)(c) >= 0xe0 && (unsigned char)(c) <= 0xef)
 
#define LCPRV2_A   0x9c
 
#define LCPRV2_B   0x9d
 
#define IS_LCPRV2(c)   ((unsigned char)(c) == LCPRV2_A || (unsigned char)(c) == LCPRV2_B)
 
#define IS_LCPRV2_A_RANGE(c)   ((unsigned char)(c) >= 0xf0 && (unsigned char)(c) <= 0xf4)
 
#define IS_LCPRV2_B_RANGE(c)   ((unsigned char)(c) >= 0xf5 && (unsigned char)(c) <= 0xfe)
 
#define LC_SISHENG
 
#define LC_IPA
 
#define LC_VISCII_LOWER
 
#define LC_VISCII_UPPER
 
#define LC_ARABIC_DIGIT   0xa4 /* Arabic digit (not supported) */
 
#define LC_ARABIC_1_COLUMN   0xa5 /* Arabic 1-column (not supported) */
 
#define LC_ASCII_RIGHT_TO_LEFT
 
#define LC_LAO
 
#define LC_ARABIC_2_COLUMN   0xa8 /* Arabic 1-column (not supported) */
 
#define LC_INDIAN_1_COLUMN
 
#define LC_TIBETAN_1_COLUMN
 
#define LC_UNICODE_SUBSET_2
 
#define LC_UNICODE_SUBSET_3
 
#define LC_UNICODE_SUBSET
 
#define LC_ETHIOPIC   0xf5 /* Ethiopic characters (not supported) */
 
#define LC_CNS11643_3   0xf6 /* CNS 11643-1992 Plane 3 */
 
#define LC_CNS11643_4   0xf7 /* CNS 11643-1992 Plane 4 */
 
#define LC_CNS11643_5   0xf8 /* CNS 11643-1992 Plane 5 */
 
#define LC_CNS11643_6   0xf9 /* CNS 11643-1992 Plane 6 */
 
#define LC_CNS11643_7   0xfa /* CNS 11643-1992 Plane 7 */
 
#define LC_INDIAN_2_COLUMN
 
#define LC_TIBETAN   0xfc /* Tibetan (not supported) */
 
#define PG_ENCODING_BE_LAST   PG_KOI8U
 
#define PG_VALID_BE_ENCODING(_enc)   ((_enc) >= 0 && (_enc) <= PG_ENCODING_BE_LAST)
 
#define PG_ENCODING_IS_CLIENT_ONLY(_enc)   ((_enc) > PG_ENCODING_BE_LAST && (_enc) < _PG_LAST_ENCODING_)
 
#define PG_VALID_ENCODING(_enc)   ((_enc) >= 0 && (_enc) < _PG_LAST_ENCODING_)
 
#define PG_VALID_FE_ENCODING(_enc)   PG_VALID_ENCODING(_enc)
 
#define MAX_CONVERSION_GROWTH   4
 
#define MAX_CONVERSION_INPUT_LENGTH   16
 
#define MAX_UNICODE_EQUIVALENT_STRING   16
 
#define CHECK_ENCODING_CONVERSION_ARGS(srcencoding, destencoding)
 

Typedefs

typedef unsigned int pg_wchar
 
typedef enum pg_enc pg_enc
 
typedef struct pg_enc2name pg_enc2name
 
typedef struct pg_enc2gettext pg_enc2gettext
 
typedef int(* mb2wchar_with_len_converter) (const unsigned char *from, pg_wchar *to, int len)
 
typedef int(* wchar2mb_with_len_converter) (const pg_wchar *from, unsigned char *to, int len)
 
typedef int(* mblen_converter) (const unsigned char *mbstr)
 
typedef int(* mbdisplaylen_converter) (const unsigned char *mbstr)
 
typedef bool(* mbcharacter_incrementer) (unsigned char *mbstr, int len)
 
typedef int(* mbchar_verifier) (const unsigned char *mbstr, int len)
 
typedef int(* mbstr_verifier) (const unsigned char *mbstr, int len)
 
typedef uint32(* utf_local_conversion_func) (uint32 code)
 

Enumerations

enum  pg_enc {
  PG_SQL_ASCII = 0, PG_EUC_JP, PG_EUC_CN, PG_EUC_KR,
  PG_EUC_TW, PG_EUC_JIS_2004, PG_UTF8, PG_MULE_INTERNAL,
  PG_LATIN1, PG_LATIN2, PG_LATIN3, PG_LATIN4,
  PG_LATIN5, PG_LATIN6, PG_LATIN7, PG_LATIN8,
  PG_LATIN9, PG_LATIN10, PG_WIN1256, PG_WIN1258,
  PG_WIN866, PG_WIN874, PG_KOI8R, PG_WIN1251,
  PG_WIN1252, PG_ISO_8859_5, PG_ISO_8859_6, PG_ISO_8859_7,
  PG_ISO_8859_8, PG_WIN1250, PG_WIN1253, PG_WIN1254,
  PG_WIN1255, PG_WIN1257, PG_KOI8U, PG_SJIS,
  PG_BIG5, PG_GBK, PG_UHC, PG_GB18030,
  PG_JOHAB, PG_SHIFT_JIS_2004, _PG_LAST_ENCODING_
}
 

Functions

static bool is_valid_unicode_codepoint (pg_wchar c)
 
static bool is_utf16_surrogate_first (pg_wchar c)
 
static bool is_utf16_surrogate_second (pg_wchar c)
 
static pg_wchar surrogate_pair_to_codepoint (pg_wchar first, pg_wchar second)
 
int pg_char_to_encoding (const char *name)
 
const char * pg_encoding_to_char (int encoding)
 
int pg_valid_server_encoding_id (int encoding)
 
int pg_encoding_mblen (int encoding, const char *mbstr)
 
int pg_encoding_mblen_bounded (int encoding, const char *mbstr)
 
int pg_encoding_dsplen (int encoding, const char *mbstr)
 
int pg_encoding_verifymbchar (int encoding, const char *mbstr, int len)
 
int pg_encoding_verifymbstr (int encoding, const char *mbstr, int len)
 
int pg_encoding_max_length (int encoding)
 
int pg_valid_client_encoding (const char *name)
 
int pg_valid_server_encoding (const char *name)
 
bool is_encoding_supported_by_icu (int encoding)
 
const char * get_encoding_name_for_icu (int encoding)
 
unsigned char * unicode_to_utf8 (pg_wchar c, unsigned char *utf8string)
 
pg_wchar utf8_to_unicode (const unsigned char *c)
 
bool pg_utf8_islegal (const unsigned char *source, int length)
 
int pg_utf_mblen (const unsigned char *s)
 
int pg_mule_mblen (const unsigned char *s)
 
int pg_mb2wchar (const char *from, pg_wchar *to)
 
int pg_mb2wchar_with_len (const char *from, pg_wchar *to, int len)
 
int pg_encoding_mb2wchar_with_len (int encoding, const char *from, pg_wchar *to, int len)
 
int pg_wchar2mb (const pg_wchar *from, char *to)
 
int pg_wchar2mb_with_len (const pg_wchar *from, char *to, int len)
 
int pg_encoding_wchar2mb_with_len (int encoding, const pg_wchar *from, char *to, int len)
 
int pg_char_and_wchar_strcmp (const char *s1, const pg_wchar *s2)
 
int pg_wchar_strncmp (const pg_wchar *s1, const pg_wchar *s2, size_t n)
 
int pg_char_and_wchar_strncmp (const char *s1, const pg_wchar *s2, size_t n)
 
size_t pg_wchar_strlen (const pg_wchar *wstr)
 
int pg_mblen (const char *mbstr)
 
int pg_dsplen (const char *mbstr)
 
int pg_mbstrlen (const char *mbstr)
 
int pg_mbstrlen_with_len (const char *mbstr, int len)
 
int pg_mbcliplen (const char *mbstr, int len, int limit)
 
int pg_encoding_mbcliplen (int encoding, const char *mbstr, int len, int limit)
 
int pg_mbcharcliplen (const char *mbstr, int len, int limit)
 
int pg_database_encoding_max_length (void)
 
mbcharacter_incrementer pg_database_encoding_character_incrementer (void)
 
int PrepareClientEncoding (int encoding)
 
int SetClientEncoding (int encoding)
 
void InitializeClientEncoding (void)
 
int pg_get_client_encoding (void)
 
const char * pg_get_client_encoding_name (void)
 
void SetDatabaseEncoding (int encoding)
 
int GetDatabaseEncoding (void)
 
const char * GetDatabaseEncodingName (void)
 
void SetMessageEncoding (int encoding)
 
int GetMessageEncoding (void)
 
unsigned char * pg_do_encoding_conversion (unsigned char *src, int len, int src_encoding, int dest_encoding)
 
int pg_do_encoding_conversion_buf (Oid proc, int src_encoding, int dest_encoding, unsigned char *src, int srclen, unsigned char *dst, int dstlen, bool noError)
 
char * pg_client_to_server (const char *s, int len)
 
char * pg_server_to_client (const char *s, int len)
 
char * pg_any_to_server (const char *s, int len, int encoding)
 
char * pg_server_to_any (const char *s, int len, int encoding)
 
void pg_unicode_to_server (pg_wchar c, unsigned char *s)
 
unsigned short BIG5toCNS (unsigned short big5, unsigned char *lc)
 
unsigned short CNStoBIG5 (unsigned short cns, unsigned char lc)
 
int UtfToLocal (const unsigned char *utf, int len, unsigned char *iso, const pg_mb_radix_tree *map, const pg_utf_to_local_combined *cmap, int cmapsize, utf_local_conversion_func conv_func, int encoding, bool noError)
 
int LocalToUtf (const unsigned char *iso, int len, unsigned char *utf, const pg_mb_radix_tree *map, const pg_local_to_utf_combined *cmap, int cmapsize, utf_local_conversion_func conv_func, int encoding, bool noError)
 
bool pg_verifymbstr (const char *mbstr, int len, bool noError)
 
bool pg_verify_mbstr (int encoding, const char *mbstr, int len, bool noError)
 
int pg_verify_mbstr_len (int encoding, const char *mbstr, int len, bool noError)
 
void check_encoding_conversion_args (int src_encoding, int dest_encoding, int len, int expected_src_encoding, int expected_dest_encoding)
 
void report_invalid_encoding (int encoding, const char *mbstr, int len) pg_attribute_noreturn()
 
void report_untranslatable_char (int src_encoding, int dest_encoding, const char *mbstr, int len) pg_attribute_noreturn()
 
int local2local (const unsigned char *l, unsigned char *p, int len, int src_encoding, int dest_encoding, const unsigned char *tab, bool noError)
 
int latin2mic (const unsigned char *l, unsigned char *p, int len, int lc, int encoding, bool noError)
 
int mic2latin (const unsigned char *mic, unsigned char *p, int len, int lc, int encoding, bool noError)
 
int latin2mic_with_table (const unsigned char *l, unsigned char *p, int len, int lc, int encoding, const unsigned char *tab, bool noError)
 
int mic2latin_with_table (const unsigned char *mic, unsigned char *p, int len, int lc, int encoding, const unsigned char *tab, bool noError)
 

Variables

const pg_enc2name pg_enc2name_tbl []
 
const pg_enc2gettext pg_enc2gettext_tbl []
 
const pg_wchar_tbl pg_wchar_table []
 

Macro Definition Documentation

◆ CHECK_ENCODING_CONVERSION_ARGS

#define CHECK_ENCODING_CONVERSION_ARGS (   srcencoding,
  destencoding 
)
Value:
(srcencoding), \
(destencoding))
#define PG_GETARG_INT32(n)
Definition: fmgr.h:269
void check_encoding_conversion_args(int src_encoding, int dest_encoding, int len, int expected_src_encoding, int expected_dest_encoding)
Definition: mbutils.c:1618

Definition at line 527 of file pg_wchar.h.

Referenced by big5_to_euc_tw(), big5_to_mic(), big5_to_utf8(), euc_cn_to_mic(), euc_cn_to_utf8(), euc_jis_2004_to_shift_jis_2004(), euc_jis_2004_to_utf8(), euc_jp_to_mic(), euc_jp_to_sjis(), euc_jp_to_utf8(), euc_kr_to_mic(), euc_kr_to_utf8(), euc_tw_to_big5(), euc_tw_to_mic(), euc_tw_to_utf8(), gb18030_to_utf8(), gbk_to_utf8(), iso8859_1_to_utf8(), iso8859_to_utf8(), iso_to_koi8r(), iso_to_mic(), iso_to_win1251(), iso_to_win866(), johab_to_utf8(), koi8r_to_iso(), koi8r_to_mic(), koi8r_to_utf8(), koi8r_to_win1251(), koi8r_to_win866(), koi8u_to_utf8(), latin1_to_mic(), latin2_to_mic(), latin2_to_win1250(), latin3_to_mic(), latin4_to_mic(), mic_to_big5(), mic_to_euc_cn(), mic_to_euc_jp(), mic_to_euc_kr(), mic_to_euc_tw(), mic_to_iso(), mic_to_koi8r(), mic_to_latin1(), mic_to_latin2(), mic_to_latin3(), mic_to_latin4(), mic_to_sjis(), mic_to_win1250(), mic_to_win1251(), mic_to_win866(), shift_jis_2004_to_euc_jis_2004(), shift_jis_2004_to_utf8(), sjis_to_euc_jp(), sjis_to_mic(), sjis_to_utf8(), uhc_to_utf8(), utf8_to_big5(), utf8_to_euc_cn(), utf8_to_euc_jis_2004(), utf8_to_euc_jp(), utf8_to_euc_kr(), utf8_to_euc_tw(), utf8_to_gb18030(), utf8_to_gbk(), utf8_to_iso8859(), utf8_to_iso8859_1(), utf8_to_johab(), utf8_to_koi8r(), utf8_to_koi8u(), utf8_to_shift_jis_2004(), utf8_to_sjis(), utf8_to_uhc(), utf8_to_win(), win1250_to_latin2(), win1250_to_mic(), win1251_to_iso(), win1251_to_koi8r(), win1251_to_mic(), win1251_to_win866(), win866_to_iso(), win866_to_koi8r(), win866_to_mic(), win866_to_win1251(), and win_to_utf8().

◆ IS_LC1

#define IS_LC1 (   c)    ((unsigned char)(c) >= 0x81 && (unsigned char)(c) <= 0x8d)

◆ IS_LC2

#define IS_LC2 (   c)    ((unsigned char)(c) >= 0x90 && (unsigned char)(c) <= 0x99)

◆ IS_LCPRV1

#define IS_LCPRV1 (   c)    ((unsigned char)(c) == LCPRV1_A || (unsigned char)(c) == LCPRV1_B)

Definition at line 152 of file pg_wchar.h.

Referenced by pg_mule2wchar_with_len(), pg_mule_dsplen(), and pg_mule_mblen().

◆ IS_LCPRV1_A_RANGE

#define IS_LCPRV1_A_RANGE (   c)    ((unsigned char)(c) >= 0xa0 && (unsigned char)(c) <= 0xdf)

Definition at line 153 of file pg_wchar.h.

Referenced by pg_wchar2mule_with_len().

◆ IS_LCPRV1_B_RANGE

#define IS_LCPRV1_B_RANGE (   c)    ((unsigned char)(c) >= 0xe0 && (unsigned char)(c) <= 0xef)

Definition at line 155 of file pg_wchar.h.

Referenced by pg_wchar2mule_with_len().

◆ IS_LCPRV2

#define IS_LCPRV2 (   c)    ((unsigned char)(c) == LCPRV2_A || (unsigned char)(c) == LCPRV2_B)

Definition at line 164 of file pg_wchar.h.

Referenced by pg_mule2wchar_with_len(), pg_mule_dsplen(), and pg_mule_mblen().

◆ IS_LCPRV2_A_RANGE

#define IS_LCPRV2_A_RANGE (   c)    ((unsigned char)(c) >= 0xf0 && (unsigned char)(c) <= 0xf4)

Definition at line 165 of file pg_wchar.h.

Referenced by pg_wchar2mule_with_len().

◆ IS_LCPRV2_B_RANGE

#define IS_LCPRV2_B_RANGE (   c)    ((unsigned char)(c) >= 0xf5 && (unsigned char)(c) <= 0xfe)

Definition at line 167 of file pg_wchar.h.

Referenced by pg_wchar2mule_with_len().

◆ ISSJISHEAD

#define ISSJISHEAD (   c)    (((c) >= 0x81 && (c) <= 0x9f) || ((c) >= 0xe0 && (c) <= 0xfc))

Definition at line 41 of file pg_wchar.h.

Referenced by pg_sjis_verifychar(), and sjis2mic().

◆ ISSJISTAIL

#define ISSJISTAIL (   c)    (((c) >= 0x40 && (c) <= 0x7e) || ((c) >= 0x80 && (c) <= 0xfc))

Definition at line 42 of file pg_wchar.h.

Referenced by pg_sjis_verifychar(), and sjis2mic().

◆ LC_ARABIC_1_COLUMN

#define LC_ARABIC_1_COLUMN   0xa5 /* Arabic 1-column (not supported) */

Definition at line 182 of file pg_wchar.h.

◆ LC_ARABIC_2_COLUMN

#define LC_ARABIC_2_COLUMN   0xa8 /* Arabic 1-column (not supported) */

Definition at line 188 of file pg_wchar.h.

◆ LC_ARABIC_DIGIT

#define LC_ARABIC_DIGIT   0xa4 /* Arabic digit (not supported) */

Definition at line 181 of file pg_wchar.h.

◆ LC_ASCII_RIGHT_TO_LEFT

#define LC_ASCII_RIGHT_TO_LEFT
Value:
0xa6 /* ASCII (left half of ISO8859-1) with
* right-to-left direction (not
* supported) */

Definition at line 183 of file pg_wchar.h.

◆ LC_BIG5_1

#define LC_BIG5_1
Value:
0x98 /* Plane 1 Chinese traditional (not
* supported) */

Definition at line 138 of file pg_wchar.h.

◆ LC_BIG5_2

#define LC_BIG5_2
Value:
0x99 /* Plane 1 Chinese traditional (not
* supported) */

Definition at line 140 of file pg_wchar.h.

◆ LC_CNS11643_1

#define LC_CNS11643_1   0x95 /* CNS 11643-1992 Plane 1 */

◆ LC_CNS11643_2

#define LC_CNS11643_2   0x96 /* CNS 11643-1992 Plane 2 */

◆ LC_CNS11643_3

#define LC_CNS11643_3   0xf6 /* CNS 11643-1992 Plane 3 */

◆ LC_CNS11643_4

#define LC_CNS11643_4   0xf7 /* CNS 11643-1992 Plane 4 */

Definition at line 205 of file pg_wchar.h.

Referenced by big52mic(), BIG5toCNS(), and CNStoBIG5().

◆ LC_CNS11643_5

#define LC_CNS11643_5   0xf8 /* CNS 11643-1992 Plane 5 */

Definition at line 206 of file pg_wchar.h.

◆ LC_CNS11643_6

#define LC_CNS11643_6   0xf9 /* CNS 11643-1992 Plane 6 */

Definition at line 207 of file pg_wchar.h.

◆ LC_CNS11643_7

#define LC_CNS11643_7   0xfa /* CNS 11643-1992 Plane 7 */

Definition at line 208 of file pg_wchar.h.

Referenced by big52euc_tw(), and mic2euc_tw().

◆ LC_ETHIOPIC

#define LC_ETHIOPIC   0xf5 /* Ethiopic characters (not supported) */

Definition at line 203 of file pg_wchar.h.

◆ LC_GB2312_80

#define LC_GB2312_80   0x91 /* Chinese */

Definition at line 130 of file pg_wchar.h.

Referenced by euc_cn2mic(), and mic2euc_cn().

◆ LC_INDIAN_1_COLUMN

#define LC_INDIAN_1_COLUMN
Value:
0xf0 /* Indian charset for 1-column width
* glyphs (not supported) */

Definition at line 193 of file pg_wchar.h.

◆ LC_INDIAN_2_COLUMN

#define LC_INDIAN_2_COLUMN
Value:
0xfb /* Indian charset for 2-column width
* glyphs (not supported) */

Definition at line 209 of file pg_wchar.h.

◆ LC_IPA

#define LC_IPA
Value:
0xa1 /* IPA (International Phonetic
* Association) (not supported) */

Definition at line 175 of file pg_wchar.h.

◆ LC_ISO8859_1

#define LC_ISO8859_1   0x81 /* ISO8859 Latin 1 */

Definition at line 102 of file pg_wchar.h.

Referenced by latin1_to_mic(), and mic_to_latin1().

◆ LC_ISO8859_15

#define LC_ISO8859_15   0x8e /* ISO8859 Latin 15 (not supported yet) */

Definition at line 119 of file pg_wchar.h.

◆ LC_ISO8859_2

#define LC_ISO8859_2   0x82 /* ISO8859 Latin 2 */

Definition at line 103 of file pg_wchar.h.

Referenced by latin2_to_mic(), mic_to_latin2(), mic_to_win1250(), and win1250_to_mic().

◆ LC_ISO8859_3

#define LC_ISO8859_3   0x83 /* ISO8859 Latin 3 */

Definition at line 104 of file pg_wchar.h.

Referenced by latin3_to_mic(), and mic_to_latin3().

◆ LC_ISO8859_4

#define LC_ISO8859_4   0x84 /* ISO8859 Latin 4 */

Definition at line 105 of file pg_wchar.h.

Referenced by latin4_to_mic(), and mic_to_latin4().

◆ LC_ISO8859_5

#define LC_ISO8859_5   0x8c /* ISO8859 Cyrillic */

Definition at line 117 of file pg_wchar.h.

◆ LC_ISO8859_6

#define LC_ISO8859_6   0x87 /* Arabic (not supported yet) */

Definition at line 108 of file pg_wchar.h.

◆ LC_ISO8859_7

#define LC_ISO8859_7   0x86 /* Greek (not supported yet) */

Definition at line 107 of file pg_wchar.h.

◆ LC_ISO8859_8

#define LC_ISO8859_8   0x88 /* Hebrew (not supported yet) */

Definition at line 109 of file pg_wchar.h.

◆ LC_ISO8859_9

#define LC_ISO8859_9   0x8d /* ISO8859 Latin 5 (not supported yet) */

Definition at line 118 of file pg_wchar.h.

◆ LC_JISX0201K

#define LC_JISX0201K   0x89 /* Japanese 1 byte kana */

Definition at line 110 of file pg_wchar.h.

Referenced by euc_jp2mic(), mic2euc_jp(), mic2sjis(), and sjis2mic().

◆ LC_JISX0201R

#define LC_JISX0201R   0x8a /* Japanese 1 byte Roman */

Definition at line 111 of file pg_wchar.h.

◆ LC_JISX0208

#define LC_JISX0208   0x92 /* Japanese Kanji (JIS X 0208) */

Definition at line 131 of file pg_wchar.h.

Referenced by euc_jp2mic(), mic2euc_jp(), mic2sjis(), and sjis2mic().

◆ LC_JISX0208_1978

#define LC_JISX0208_1978   0x90 /* Japanese Kanji, old JIS (not supported) */

Definition at line 129 of file pg_wchar.h.

◆ LC_JISX0212

#define LC_JISX0212   0x94 /* Japanese Kanji (JIS X 0212) */

Definition at line 133 of file pg_wchar.h.

Referenced by euc_jp2mic(), mic2euc_jp(), mic2sjis(), and sjis2mic().

◆ LC_JISX0213_1

#define LC_JISX0213_1
Value:
0x97 /* Japanese Kanji (JIS X 0213 Plane 1)
* (not supported) */

Definition at line 136 of file pg_wchar.h.

◆ LC_KOI8_R

#define LC_KOI8_R   0x8b /* Cyrillic KOI8-R */

◆ LC_KS5601

#define LC_KS5601   0x93 /* Korean */

Definition at line 132 of file pg_wchar.h.

Referenced by euc_kr2mic(), and mic2euc_kr().

◆ LC_LAO

#define LC_LAO
Value:
0xa7 /* Lao characters (ISO10646 0E80..0EDF)
* (not supported) */

Definition at line 186 of file pg_wchar.h.

◆ LC_SISHENG

#define LC_SISHENG
Value:
0xa0 /* Chinese SiSheng characters for
* PinYin/ZhuYin (not supported) */

Definition at line 173 of file pg_wchar.h.

◆ LC_TIBETAN

#define LC_TIBETAN   0xfc /* Tibetan (not supported) */

Definition at line 211 of file pg_wchar.h.

◆ LC_TIBETAN_1_COLUMN

#define LC_TIBETAN_1_COLUMN
Value:
0xf1 /* Tibetan 1-column width glyphs (not
* supported) */

Definition at line 195 of file pg_wchar.h.

◆ LC_TIS620

#define LC_TIS620   0x85 /* Thai (not supported yet) */

Definition at line 106 of file pg_wchar.h.

◆ LC_UNICODE_SUBSET

#define LC_UNICODE_SUBSET
Value:
0xf4 /* Unicode characters of the range
* U+0100..U+24FF. (not supported) */

Definition at line 201 of file pg_wchar.h.

◆ LC_UNICODE_SUBSET_2

#define LC_UNICODE_SUBSET_2
Value:
0xf2 /* Unicode characters of the range
* U+2500..U+33FF. (not supported) */

Definition at line 197 of file pg_wchar.h.

◆ LC_UNICODE_SUBSET_3

#define LC_UNICODE_SUBSET_3
Value:
0xf3 /* Unicode characters of the range
* U+E000..U+FFFF. (not supported) */

Definition at line 199 of file pg_wchar.h.

◆ LC_VISCII_LOWER

#define LC_VISCII_LOWER
Value:
0xa2 /* Vietnamese VISCII1.1 lower-case (not
* supported) */

Definition at line 177 of file pg_wchar.h.

◆ LC_VISCII_UPPER

#define LC_VISCII_UPPER
Value:
0xa3 /* Vietnamese VISCII1.1 upper-case (not
* supported) */

Definition at line 179 of file pg_wchar.h.

◆ LCPRV1_A

#define LCPRV1_A   0x9a

Definition at line 150 of file pg_wchar.h.

Referenced by pg_wchar2mule_with_len().

◆ LCPRV1_B

#define LCPRV1_B   0x9b

Definition at line 151 of file pg_wchar.h.

Referenced by pg_wchar2mule_with_len().

◆ LCPRV2_A

#define LCPRV2_A   0x9c

Definition at line 162 of file pg_wchar.h.

Referenced by pg_wchar2mule_with_len().

◆ LCPRV2_B

#define LCPRV2_B   0x9d

Definition at line 163 of file pg_wchar.h.

Referenced by big52mic(), euc_tw2mic(), mic2big5(), mic2euc_tw(), and pg_wchar2mule_with_len().

◆ MAX_CONVERSION_GROWTH

◆ MAX_CONVERSION_INPUT_LENGTH

#define MAX_CONVERSION_INPUT_LENGTH   16

Definition at line 334 of file pg_wchar.h.

Referenced by CopyConvertBuf().

◆ MAX_MULTIBYTE_CHAR_LEN

#define MAX_MULTIBYTE_CHAR_LEN   4

◆ MAX_UNICODE_EQUIVALENT_STRING

#define MAX_UNICODE_EQUIVALENT_STRING   16

Definition at line 343 of file pg_wchar.h.

Referenced by json_lex_string(), map_xml_name_to_sql_identifier(), str_udeescape(), and unistr().

◆ PG_ENCODING_BE_LAST

#define PG_ENCODING_BE_LAST   PG_KOI8U

Definition at line 289 of file pg_wchar.h.

Referenced by get_encoding_name_for_icu().

◆ PG_ENCODING_IS_CLIENT_ONLY

#define PG_ENCODING_IS_CLIENT_ONLY (   _enc)    ((_enc) > PG_ENCODING_BE_LAST && (_enc) < _PG_LAST_ENCODING_)

Definition at line 298 of file pg_wchar.h.

Referenced by BeginCopyTo().

◆ PG_VALID_BE_ENCODING

◆ PG_VALID_ENCODING

◆ PG_VALID_FE_ENCODING

#define PG_VALID_FE_ENCODING (   _enc)    PG_VALID_ENCODING(_enc)

Definition at line 305 of file pg_wchar.h.

Referenced by pg_valid_client_encoding(), PrepareClientEncoding(), and SetClientEncoding().

◆ SS2

◆ SS3

Typedef Documentation

◆ mb2wchar_with_len_converter

typedef int(* mb2wchar_with_len_converter) (const unsigned char *from, pg_wchar *to, int len)

Definition at line 378 of file pg_wchar.h.

◆ mbchar_verifier

typedef int(* mbchar_verifier) (const unsigned char *mbstr, int len)

Definition at line 392 of file pg_wchar.h.

◆ mbcharacter_incrementer

typedef bool(* mbcharacter_incrementer) (unsigned char *mbstr, int len)

Definition at line 390 of file pg_wchar.h.

◆ mbdisplaylen_converter

typedef int(* mbdisplaylen_converter) (const unsigned char *mbstr)

Definition at line 388 of file pg_wchar.h.

◆ mblen_converter

typedef int(* mblen_converter) (const unsigned char *mbstr)

Definition at line 386 of file pg_wchar.h.

◆ mbstr_verifier

typedef int(* mbstr_verifier) (const unsigned char *mbstr, int len)

Definition at line 394 of file pg_wchar.h.

◆ pg_enc

typedef enum pg_enc pg_enc

◆ pg_enc2gettext

◆ pg_enc2name

typedef struct pg_enc2name pg_enc2name

◆ pg_wchar

typedef unsigned int pg_wchar

Definition at line 25 of file pg_wchar.h.

◆ utf_local_conversion_func

typedef uint32(* utf_local_conversion_func) (uint32 code)

Definition at line 519 of file pg_wchar.h.

◆ wchar2mb_with_len_converter

typedef int(* wchar2mb_with_len_converter) (const pg_wchar *from, unsigned char *to, int len)

Definition at line 382 of file pg_wchar.h.

Enumeration Type Documentation

◆ pg_enc

enum pg_enc
Enumerator
PG_SQL_ASCII 
PG_EUC_JP 
PG_EUC_CN 
PG_EUC_KR 
PG_EUC_TW 
PG_EUC_JIS_2004 
PG_UTF8 
PG_MULE_INTERNAL 
PG_LATIN1 
PG_LATIN2 
PG_LATIN3 
PG_LATIN4 
PG_LATIN5 
PG_LATIN6 
PG_LATIN7 
PG_LATIN8 
PG_LATIN9 
PG_LATIN10 
PG_WIN1256 
PG_WIN1258 
PG_WIN866 
PG_WIN874 
PG_KOI8R 
PG_WIN1251 
PG_WIN1252 
PG_ISO_8859_5 
PG_ISO_8859_6 
PG_ISO_8859_7 
PG_ISO_8859_8 
PG_WIN1250 
PG_WIN1253 
PG_WIN1254 
PG_WIN1255 
PG_WIN1257 
PG_KOI8U 
PG_SJIS 
PG_BIG5 
PG_GBK 
PG_UHC 
PG_GB18030 
PG_JOHAB 
PG_SHIFT_JIS_2004 
_PG_LAST_ENCODING_ 

Definition at line 238 of file pg_wchar.h.

239 {
240  PG_SQL_ASCII = 0, /* SQL/ASCII */
241  PG_EUC_JP, /* EUC for Japanese */
242  PG_EUC_CN, /* EUC for Chinese */
243  PG_EUC_KR, /* EUC for Korean */
244  PG_EUC_TW, /* EUC for Taiwan */
245  PG_EUC_JIS_2004, /* EUC-JIS-2004 */
246  PG_UTF8, /* Unicode UTF8 */
247  PG_MULE_INTERNAL, /* Mule internal code */
248  PG_LATIN1, /* ISO-8859-1 Latin 1 */
249  PG_LATIN2, /* ISO-8859-2 Latin 2 */
250  PG_LATIN3, /* ISO-8859-3 Latin 3 */
251  PG_LATIN4, /* ISO-8859-4 Latin 4 */
252  PG_LATIN5, /* ISO-8859-9 Latin 5 */
253  PG_LATIN6, /* ISO-8859-10 Latin6 */
254  PG_LATIN7, /* ISO-8859-13 Latin7 */
255  PG_LATIN8, /* ISO-8859-14 Latin8 */
256  PG_LATIN9, /* ISO-8859-15 Latin9 */
257  PG_LATIN10, /* ISO-8859-16 Latin10 */
258  PG_WIN1256, /* windows-1256 */
259  PG_WIN1258, /* Windows-1258 */
260  PG_WIN866, /* (MS-DOS CP866) */
261  PG_WIN874, /* windows-874 */
262  PG_KOI8R, /* KOI8-R */
263  PG_WIN1251, /* windows-1251 */
264  PG_WIN1252, /* windows-1252 */
265  PG_ISO_8859_5, /* ISO-8859-5 */
266  PG_ISO_8859_6, /* ISO-8859-6 */
267  PG_ISO_8859_7, /* ISO-8859-7 */
268  PG_ISO_8859_8, /* ISO-8859-8 */
269  PG_WIN1250, /* windows-1250 */
270  PG_WIN1253, /* windows-1253 */
271  PG_WIN1254, /* windows-1254 */
272  PG_WIN1255, /* windows-1255 */
273  PG_WIN1257, /* windows-1257 */
274  PG_KOI8U, /* KOI8-U */
275  /* PG_ENCODING_BE_LAST points to the above entry */
276 
277  /* followings are for client encoding only */
278  PG_SJIS, /* Shift JIS (Windows-932) */
279  PG_BIG5, /* Big5 (Windows-950) */
280  PG_GBK, /* GBK (Windows-936) */
281  PG_UHC, /* UHC (Windows-949) */
282  PG_GB18030, /* GB18030 */
283  PG_JOHAB, /* EUC for Korean JOHAB */
284  PG_SHIFT_JIS_2004, /* Shift-JIS-2004 */
285  _PG_LAST_ENCODING_ /* mark only */
286 
287 } pg_enc;
pg_enc
Definition: pg_wchar.h:238

Function Documentation

◆ BIG5toCNS()

unsigned short BIG5toCNS ( unsigned short  big5,
unsigned char *  lc 
)

Definition at line 292 of file big5.c.

References b1c4, b2c3, BinarySearchRange(), i, LC_CNS11643_1, LC_CNS11643_2, LC_CNS11643_3, and LC_CNS11643_4.

Referenced by big52euc_tw(), big52mic(), and surrogate_pair_to_codepoint().

293 {
294  unsigned short cns = 0;
295  int i;
296 
297  if (big5 < 0xc940U)
298  {
299  /* level 1 */
300 
301  for (i = 0; i < sizeof(b1c4) / (sizeof(unsigned short) * 2); i++)
302  {
303  if (b1c4[i][0] == big5)
304  {
305  *lc = LC_CNS11643_4;
306  return (b1c4[i][1] | 0x8080U);
307  }
308  }
309 
310  if (0 < (cns = BinarySearchRange(big5Level1ToCnsPlane1, 23, big5)))
311  *lc = LC_CNS11643_1;
312  }
313  else if (big5 == 0xc94aU)
314  {
315  /* level 2 */
316  *lc = LC_CNS11643_1;
317  cns = 0x4442;
318  }
319  else
320  {
321  /* level 2 */
322  for (i = 0; i < sizeof(b2c3) / (sizeof(unsigned short) * 2); i++)
323  {
324  if (b2c3[i][0] == big5)
325  {
326  *lc = LC_CNS11643_3;
327  return (b2c3[i][1] | 0x8080U);
328  }
329  }
330 
331  if (0 < (cns = BinarySearchRange(big5Level2ToCnsPlane2, 46, big5)))
332  *lc = LC_CNS11643_2;
333  }
334 
335  if (0 == cns)
336  { /* no mapping Big5 to CNS 11643-1992 */
337  *lc = 0;
338  return (unsigned short) '?';
339  }
340 
341  return cns | 0x8080;
342 }
#define LC_CNS11643_1
Definition: pg_wchar.h:134
static unsigned short BinarySearchRange(const codes_t *array, int high, unsigned short code)
Definition: big5.c:208
static const codes_t big5Level2ToCnsPlane2[48]
Definition: big5.c:84
#define LC_CNS11643_4
Definition: pg_wchar.h:205
static const codes_t big5Level1ToCnsPlane1[25]
Definition: big5.c:25
#define LC_CNS11643_2
Definition: pg_wchar.h:135
#define LC_CNS11643_3
Definition: pg_wchar.h:204
static const unsigned short b1c4[][2]
Definition: big5.c:189
int i
static const unsigned short b2c3[][2]
Definition: big5.c:197

◆ check_encoding_conversion_args()

void check_encoding_conversion_args ( int  src_encoding,
int  dest_encoding,
int  len,
int  expected_src_encoding,
int  expected_dest_encoding 
)

Definition at line 1618 of file mbutils.c.

References elog, ERROR, name, pg_enc2name_tbl, and PG_VALID_ENCODING.

Referenced by surrogate_pair_to_codepoint().

1623 {
1624  if (!PG_VALID_ENCODING(src_encoding))
1625  elog(ERROR, "invalid source encoding ID: %d", src_encoding);
1626  if (src_encoding != expected_src_encoding && expected_src_encoding >= 0)
1627  elog(ERROR, "expected source encoding \"%s\", but got \"%s\"",
1628  pg_enc2name_tbl[expected_src_encoding].name,
1629  pg_enc2name_tbl[src_encoding].name);
1630  if (!PG_VALID_ENCODING(dest_encoding))
1631  elog(ERROR, "invalid destination encoding ID: %d", dest_encoding);
1632  if (dest_encoding != expected_dest_encoding && expected_dest_encoding >= 0)
1633  elog(ERROR, "expected destination encoding \"%s\", but got \"%s\"",
1634  pg_enc2name_tbl[expected_dest_encoding].name,
1635  pg_enc2name_tbl[dest_encoding].name);
1636  if (len < 0)
1637  elog(ERROR, "encoding conversion length must not be negative");
1638 }
#define ERROR
Definition: elog.h:46
#define PG_VALID_ENCODING(_enc)
Definition: pg_wchar.h:301
const pg_enc2name pg_enc2name_tbl[]
Definition: encnames.c:309
const char * name
Definition: encode.c:515
#define elog(elevel,...)
Definition: elog.h:232

◆ CNStoBIG5()

unsigned short CNStoBIG5 ( unsigned short  cns,
unsigned char  lc 
)

Definition at line 345 of file big5.c.

References b1c4, b2c3, BinarySearchRange(), i, LC_CNS11643_1, LC_CNS11643_2, LC_CNS11643_3, and LC_CNS11643_4.

Referenced by euc_tw2big5(), mic2big5(), and surrogate_pair_to_codepoint().

346 {
347  int i;
348  unsigned int big5 = 0;
349 
350  cns &= 0x7f7f;
351 
352  switch (lc)
353  {
354  case LC_CNS11643_1:
355  big5 = BinarySearchRange(cnsPlane1ToBig5Level1, 24, cns);
356  break;
357  case LC_CNS11643_2:
358  big5 = BinarySearchRange(cnsPlane2ToBig5Level2, 47, cns);
359  break;
360  case LC_CNS11643_3:
361  for (i = 0; i < sizeof(b2c3) / (sizeof(unsigned short) * 2); i++)
362  {
363  if (b2c3[i][1] == cns)
364  return b2c3[i][0];
365  }
366  break;
367  case LC_CNS11643_4:
368  for (i = 0; i < sizeof(b1c4) / (sizeof(unsigned short) * 2); i++)
369  {
370  if (b1c4[i][1] == cns)
371  return b1c4[i][0];
372  }
373  default:
374  break;
375  }
376  return big5;
377 }
#define LC_CNS11643_1
Definition: pg_wchar.h:134
static unsigned short BinarySearchRange(const codes_t *array, int high, unsigned short code)
Definition: big5.c:208
#define LC_CNS11643_4
Definition: pg_wchar.h:205
static const codes_t cnsPlane2ToBig5Level2[49]
Definition: big5.c:136
static const codes_t cnsPlane1ToBig5Level1[26]
Definition: big5.c:54
#define LC_CNS11643_2
Definition: pg_wchar.h:135
#define LC_CNS11643_3
Definition: pg_wchar.h:204
static const unsigned short b1c4[][2]
Definition: big5.c:189
int i
static const unsigned short b2c3[][2]
Definition: big5.c:197

◆ get_encoding_name_for_icu()

const char* get_encoding_name_for_icu ( int  encoding)

Definition at line 470 of file encnames.c.

References pg_encname::encoding, lengthof, pg_enc2icu_tbl, PG_ENCODING_BE_LAST, PG_VALID_BE_ENCODING, and StaticAssertStmt.

Referenced by get_collation_actual_version(), and surrogate_pair_to_codepoint().

471 {
473  "pg_enc2icu_tbl incomplete");
474 
476  return NULL;
477  return pg_enc2icu_tbl[encoding];
478 }
#define lengthof(array)
Definition: c.h:734
#define StaticAssertStmt(condition, errmessage)
Definition: c.h:918
#define PG_ENCODING_BE_LAST
Definition: pg_wchar.h:289
static const char *const pg_enc2icu_tbl[]
Definition: encnames.c:415
#define PG_VALID_BE_ENCODING(_enc)
Definition: pg_wchar.h:295
int32 encoding
Definition: pg_database.h:41

◆ GetDatabaseEncoding()

int GetDatabaseEncoding ( void  )

Definition at line 1210 of file mbutils.c.

References pg_enc2name::encoding.

Referenced by ascii(), BeginCopyFrom(), BeginCopyTo(), char2wchar(), chr(), CollationCreate(), CollationGetCollid(), compareStrings(), convert_from_utf8(), convert_to_utf8(), CopyConversionError(), CopyConvertBuf(), cstr2sv(), dblink_connect(), dblink_get_conn(), DefineCollation(), Generic_Text_IC_like(), GenericMatchText(), get_collation_actual_version(), get_collation_oid(), get_json_object_as_hash(), InitializeClientEncoding(), IsThereCollationInNamespace(), json_recv(), jsonb_from_cstring(), locate_stem_module(), LogicalOutputWrite(), makeJsonLexContext(), p_isspecial(), ParallelWorkerMain(), pg_database_encoding_character_incrementer(), pg_database_encoding_max_length(), pg_generic_charinc(), pg_perm_setlocale(), pg_set_regex_collation(), pg_unicode_to_server(), pg_verifymbstr(), pgss_store(), PLyUnicode_Bytes(), populate_array_json(), PrepareClientEncoding(), read_extension_script_file(), SetClientEncoding(), SetMessageEncoding(), SPI_sql_row_to_xmlelement(), surrogate_pair_to_codepoint(), sv2cstr(), text_position_setup(), to_ascii_default(), type_maximum_size(), unicode_norm_form_from_string(), varstr_abbrev_convert(), varstr_cmp(), varstr_sortsupport(), varstrfastcmp_locale(), wchar2char(), xml_in(), xml_is_document(), xmlparse(), and xpath_exists().

1211 {
1212  return DatabaseEncoding->encoding;
1213 }
pg_enc encoding
Definition: pg_wchar.h:356
static const pg_enc2name * DatabaseEncoding
Definition: mbutils.c:81

◆ GetDatabaseEncodingName()

const char* GetDatabaseEncodingName ( void  )

◆ GetMessageEncoding()

int GetMessageEncoding ( void  )

Definition at line 1257 of file mbutils.c.

References pg_enc2name::encoding.

Referenced by DebugFileOpen(), report_untranslatable_char(), SetMessageEncoding(), and surrogate_pair_to_codepoint().

1258 {
1259  return MessageEncoding->encoding;
1260 }
pg_enc encoding
Definition: pg_wchar.h:356
static const pg_enc2name * MessageEncoding
Definition: mbutils.c:82

◆ InitializeClientEncoding()

void InitializeClientEncoding ( void  )

Definition at line 281 of file mbutils.c.

References Assert, backend_startup_complete, ereport, errcode(), errmsg(), FATAL, FindDefaultConversionProc(), fmgr_info_cxt(), GetDatabaseEncoding(), GetDatabaseEncodingName(), IsTransactionState(), MemoryContextAlloc(), name, OidIsValid, pending_client_encoding, pg_enc2name_tbl, PG_SQL_ASCII, PG_UTF8, PrepareClientEncoding(), SetClientEncoding(), and TopMemoryContext.

Referenced by InitPostgres(), and surrogate_pair_to_codepoint().

282 {
283  int current_server_encoding;
284 
287 
290  {
291  /*
292  * Oops, the requested conversion is not available. We couldn't fail
293  * before, but we can now.
294  */
295  ereport(FATAL,
296  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
297  errmsg("conversion between %s and %s is not supported",
300  }
301 
302  /*
303  * Also look up the UTF8-to-server conversion function if needed. Since
304  * the server encoding is fixed within any one backend process, we don't
305  * have to do this more than once.
306  */
307  current_server_encoding = GetDatabaseEncoding();
308  if (current_server_encoding != PG_UTF8 &&
309  current_server_encoding != PG_SQL_ASCII)
310  {
311  Oid utf8_to_server_proc;
312 
314  utf8_to_server_proc =
316  current_server_encoding);
317  /* If there's no such conversion, just leave the pointer as NULL */
318  if (OidIsValid(utf8_to_server_proc))
319  {
320  FmgrInfo *finfo;
321 
323  sizeof(FmgrInfo));
324  fmgr_info_cxt(utf8_to_server_proc, finfo,
326  /* Set Utf8ToServerConvProc only after data is fully valid */
327  Utf8ToServerConvProc = finfo;
328  }
329  }
330 }
Definition: fmgr.h:56
static FmgrInfo * Utf8ToServerConvProc
Definition: mbutils.c:75
int PrepareClientEncoding(int encoding)
Definition: mbutils.c:110
static bool backend_startup_complete
Definition: mbutils.c:90
static int pending_client_encoding
Definition: mbutils.c:91
int errcode(int sqlerrcode)
Definition: elog.c:698
unsigned int Oid
Definition: postgres_ext.h:31
#define OidIsValid(objectId)
Definition: c.h:710
#define FATAL
Definition: elog.h:49
const pg_enc2name pg_enc2name_tbl[]
Definition: encnames.c:309
int SetClientEncoding(int encoding)
Definition: mbutils.c:208
void fmgr_info_cxt(Oid functionId, FmgrInfo *finfo, MemoryContext mcxt)
Definition: fmgr.c:136
MemoryContext TopMemoryContext
Definition: mcxt.c:48
int GetDatabaseEncoding(void)
Definition: mbutils.c:1210
#define ereport(elevel,...)
Definition: elog.h:157
#define Assert(condition)
Definition: c.h:804
const char * GetDatabaseEncodingName(void)
Definition: mbutils.c:1216
Oid FindDefaultConversionProc(int32 for_encoding, int32 to_encoding)
Definition: namespace.c:3755
bool IsTransactionState(void)
Definition: xact.c:371
const char * name
Definition: encode.c:515
int errmsg(const char *fmt,...)
Definition: elog.c:909
void * MemoryContextAlloc(MemoryContext context, Size size)
Definition: mcxt.c:863

◆ is_encoding_supported_by_icu()

bool is_encoding_supported_by_icu ( int  encoding)

Definition at line 459 of file encnames.c.

References pg_enc2icu_tbl, and PG_VALID_BE_ENCODING.

Referenced by lookup_collation(), and surrogate_pair_to_codepoint().

460 {
462  return false;
463  return (pg_enc2icu_tbl[encoding] != NULL);
464 }
static const char *const pg_enc2icu_tbl[]
Definition: encnames.c:415
#define PG_VALID_BE_ENCODING(_enc)
Definition: pg_wchar.h:295
int32 encoding
Definition: pg_database.h:41

◆ is_utf16_surrogate_first()

static bool is_utf16_surrogate_first ( pg_wchar  c)
inlinestatic

Definition at line 545 of file pg_wchar.h.

Referenced by json_lex_string(), str_udeescape(), and unistr().

546 {
547  return (c >= 0xD800 && c <= 0xDBFF);
548 }
char * c

◆ is_utf16_surrogate_second()

static bool is_utf16_surrogate_second ( pg_wchar  c)
inlinestatic

Definition at line 551 of file pg_wchar.h.

Referenced by json_lex_string(), str_udeescape(), and unistr().

552 {
553  return (c >= 0xDC00 && c <= 0xDFFF);
554 }
char * c

◆ is_valid_unicode_codepoint()

static bool is_valid_unicode_codepoint ( pg_wchar  c)
inlinestatic

Definition at line 539 of file pg_wchar.h.

Referenced by check_unicode_value(), pg_unicode_to_server(), and unistr().

540 {
541  return (c > 0 && c <= 0x10FFFF);
542 }
char * c

◆ latin2mic()

int latin2mic ( const unsigned char *  l,
unsigned char *  p,
int  len,
int  lc,
int  encoding,
bool  noError 
)

Definition at line 89 of file conv.c.

References IS_HIGHBIT_SET, and report_invalid_encoding().

Referenced by koi8r_to_mic(), latin1_to_mic(), latin2_to_mic(), latin3_to_mic(), latin4_to_mic(), and surrogate_pair_to_codepoint().

91 {
92  const unsigned char *start = l;
93  int c1;
94 
95  while (len > 0)
96  {
97  c1 = *l;
98  if (c1 == 0)
99  {
100  if (noError)
101  break;
102  report_invalid_encoding(encoding, (const char *) l, len);
103  }
104  if (IS_HIGHBIT_SET(c1))
105  *p++ = lc;
106  *p++ = c1;
107  l++;
108  len--;
109  }
110  *p = '\0';
111 
112  return l - start;
113 }
void report_invalid_encoding(int encoding, const char *mbstr, int len)
Definition: mbutils.c:1647
#define IS_HIGHBIT_SET(ch)
Definition: c.h:1156
int32 encoding
Definition: pg_database.h:41

◆ latin2mic_with_table()

int latin2mic_with_table ( const unsigned char *  l,
unsigned char *  p,
int  len,
int  lc,
int  encoding,
const unsigned char *  tab,
bool  noError 
)

Definition at line 194 of file conv.c.

References HIGHBIT, IS_HIGHBIT_SET, PG_MULE_INTERNAL, report_invalid_encoding(), and report_untranslatable_char().

Referenced by iso_to_mic(), surrogate_pair_to_codepoint(), win1250_to_mic(), win1251_to_mic(), and win866_to_mic().

201 {
202  const unsigned char *start = l;
203  unsigned char c1,
204  c2;
205 
206  while (len > 0)
207  {
208  c1 = *l;
209  if (c1 == 0)
210  {
211  if (noError)
212  break;
213  report_invalid_encoding(encoding, (const char *) l, len);
214  }
215  if (!IS_HIGHBIT_SET(c1))
216  *p++ = c1;
217  else
218  {
219  c2 = tab[c1 - HIGHBIT];
220  if (c2)
221  {
222  *p++ = lc;
223  *p++ = c2;
224  }
225  else
226  {
227  if (noError)
228  break;
230  (const char *) l, len);
231  }
232  }
233  l++;
234  len--;
235  }
236  *p = '\0';
237 
238  return l - start;
239 }
void report_invalid_encoding(int encoding, const char *mbstr, int len)
Definition: mbutils.c:1647
#define IS_HIGHBIT_SET(ch)
Definition: c.h:1156
#define HIGHBIT
Definition: c.h:1155
int32 encoding
Definition: pg_database.h:41
void report_untranslatable_char(int src_encoding, int dest_encoding, const char *mbstr, int len)
Definition: mbutils.c:1679

◆ local2local()

int local2local ( const unsigned char *  l,
unsigned char *  p,
int  len,
int  src_encoding,
int  dest_encoding,
const unsigned char *  tab,
bool  noError 
)

Definition at line 33 of file conv.c.

References HIGHBIT, IS_HIGHBIT_SET, report_invalid_encoding(), and report_untranslatable_char().

Referenced by iso_to_koi8r(), iso_to_win1251(), iso_to_win866(), koi8r_to_iso(), koi8r_to_win1251(), koi8r_to_win866(), latin2_to_win1250(), surrogate_pair_to_codepoint(), win1250_to_latin2(), win1251_to_iso(), win1251_to_koi8r(), win1251_to_win866(), win866_to_iso(), win866_to_koi8r(), and win866_to_win1251().

40 {
41  const unsigned char *start = l;
42  unsigned char c1,
43  c2;
44 
45  while (len > 0)
46  {
47  c1 = *l;
48  if (c1 == 0)
49  {
50  if (noError)
51  break;
52  report_invalid_encoding(src_encoding, (const char *) l, len);
53  }
54  if (!IS_HIGHBIT_SET(c1))
55  *p++ = c1;
56  else
57  {
58  c2 = tab[c1 - HIGHBIT];
59  if (c2)
60  *p++ = c2;
61  else
62  {
63  if (noError)
64  break;
65  report_untranslatable_char(src_encoding, dest_encoding,
66  (const char *) l, len);
67  }
68  }
69  l++;
70  len--;
71  }
72  *p = '\0';
73 
74  return l - start;
75 }
void report_invalid_encoding(int encoding, const char *mbstr, int len)
Definition: mbutils.c:1647
#define IS_HIGHBIT_SET(ch)
Definition: c.h:1156
#define HIGHBIT
Definition: c.h:1155
void report_untranslatable_char(int src_encoding, int dest_encoding, const char *mbstr, int len)
Definition: mbutils.c:1679

◆ LocalToUtf()

int LocalToUtf ( const unsigned char *  iso,
int  len,
unsigned char *  utf,
const pg_mb_radix_tree map,
const pg_local_to_utf_combined cmap,
int  cmapsize,
utf_local_conversion_func  conv_func,
int  encoding,
bool  noError 
)

Definition at line 717 of file conv.c.

References compare4(), elog, ereport, errcode(), errmsg(), ERROR, IS_HIGHBIT_SET, pg_encoding_verifymbchar(), pg_mb_radix_conv(), PG_UTF8, PG_VALID_ENCODING, report_invalid_encoding(), report_untranslatable_char(), store_coded_char(), pg_local_to_utf_combined::utf1, and pg_local_to_utf_combined::utf2.

Referenced by big5_to_utf8(), euc_cn_to_utf8(), euc_jis_2004_to_utf8(), euc_jp_to_utf8(), euc_kr_to_utf8(), euc_tw_to_utf8(), gb18030_to_utf8(), gbk_to_utf8(), iso8859_to_utf8(), johab_to_utf8(), koi8r_to_utf8(), koi8u_to_utf8(), shift_jis_2004_to_utf8(), sjis_to_utf8(), surrogate_pair_to_codepoint(), uhc_to_utf8(), and win_to_utf8().

724 {
725  uint32 iiso;
726  int l;
727  const pg_local_to_utf_combined *cp;
728  const unsigned char *start = iso;
729 
731  ereport(ERROR,
732  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
733  errmsg("invalid encoding number: %d", encoding)));
734 
735  for (; len > 0; len -= l)
736  {
737  unsigned char b1 = 0;
738  unsigned char b2 = 0;
739  unsigned char b3 = 0;
740  unsigned char b4 = 0;
741 
742  /* "break" cases all represent errors */
743  if (*iso == '\0')
744  break;
745 
746  if (!IS_HIGHBIT_SET(*iso))
747  {
748  /* ASCII case is easy, assume it's one-to-one conversion */
749  *utf++ = *iso++;
750  l = 1;
751  continue;
752  }
753 
754  l = pg_encoding_verifymbchar(encoding, (const char *) iso, len);
755  if (l < 0)
756  break;
757 
758  /* collect coded char of length l */
759  if (l == 1)
760  b4 = *iso++;
761  else if (l == 2)
762  {
763  b3 = *iso++;
764  b4 = *iso++;
765  }
766  else if (l == 3)
767  {
768  b2 = *iso++;
769  b3 = *iso++;
770  b4 = *iso++;
771  }
772  else if (l == 4)
773  {
774  b1 = *iso++;
775  b2 = *iso++;
776  b3 = *iso++;
777  b4 = *iso++;
778  }
779  else
780  {
781  elog(ERROR, "unsupported character length %d", l);
782  iiso = 0; /* keep compiler quiet */
783  }
784  iiso = (b1 << 24 | b2 << 16 | b3 << 8 | b4);
785 
786  if (map)
787  {
788  uint32 converted = pg_mb_radix_conv(map, l, b1, b2, b3, b4);
789 
790  if (converted)
791  {
792  utf = store_coded_char(utf, converted);
793  continue;
794  }
795 
796  /* If there's a combined character map, try that */
797  if (cmap)
798  {
799  cp = bsearch(&iiso, cmap, cmapsize,
801 
802  if (cp)
803  {
804  utf = store_coded_char(utf, cp->utf1);
805  utf = store_coded_char(utf, cp->utf2);
806  continue;
807  }
808  }
809  }
810 
811  /* if there's a conversion function, try that */
812  if (conv_func)
813  {
814  uint32 converted = (*conv_func) (iiso);
815 
816  if (converted)
817  {
818  utf = store_coded_char(utf, converted);
819  continue;
820  }
821  }
822 
823  /* failed to translate this character */
824  iso -= l;
825  if (noError)
826  break;
828  (const char *) iso, len);
829  }
830 
831  /* if we broke out of loop early, must be invalid input */
832  if (len > 0 && !noError)
833  report_invalid_encoding(encoding, (const char *) iso, len);
834 
835  *utf = '\0';
836 
837  return iso - start;
838 }
static int compare4(const void *p1, const void *p2)
Definition: conv.c:339
void report_invalid_encoding(int encoding, const char *mbstr, int len)
Definition: mbutils.c:1647
int errcode(int sqlerrcode)
Definition: elog.c:698
static unsigned char * store_coded_char(unsigned char *dest, uint32 code)
Definition: conv.c:353
#define IS_HIGHBIT_SET(ch)
Definition: c.h:1156
#define ERROR
Definition: elog.h:46
#define PG_VALID_ENCODING(_enc)
Definition: pg_wchar.h:301
static uint32 pg_mb_radix_conv(const pg_mb_radix_tree *rt, int l, unsigned char b1, unsigned char b2, unsigned char b3, unsigned char b4)
Definition: conv.c:373
unsigned int uint32
Definition: c.h:441
#define ereport(elevel,...)
Definition: elog.h:157
int32 encoding
Definition: pg_database.h:41
void report_untranslatable_char(int src_encoding, int dest_encoding, const char *mbstr, int len)
Definition: mbutils.c:1679
int pg_encoding_verifymbchar(int encoding, const char *mbstr, int len)
Definition: wchar.c:1955
int errmsg(const char *fmt,...)
Definition: elog.c:909
#define elog(elevel,...)
Definition: elog.h:232

◆ mic2latin()

int mic2latin ( const unsigned char *  mic,
unsigned char *  p,
int  len,
int  lc,
int  encoding,
bool  noError 
)

Definition at line 127 of file conv.c.

References IS_HIGHBIT_SET, PG_MULE_INTERNAL, pg_mule_mblen(), report_invalid_encoding(), and report_untranslatable_char().

Referenced by mic_to_koi8r(), mic_to_latin1(), mic_to_latin2(), mic_to_latin3(), mic_to_latin4(), and surrogate_pair_to_codepoint().

129 {
130  const unsigned char *start = mic;
131  int c1;
132 
133  while (len > 0)
134  {
135  c1 = *mic;
136  if (c1 == 0)
137  {
138  if (noError)
139  break;
140  report_invalid_encoding(PG_MULE_INTERNAL, (const char *) mic, len);
141  }
142  if (!IS_HIGHBIT_SET(c1))
143  {
144  /* easy for ASCII */
145  *p++ = c1;
146  mic++;
147  len--;
148  }
149  else
150  {
151  int l = pg_mule_mblen(mic);
152 
153  if (len < l)
154  {
155  if (noError)
156  break;
157  report_invalid_encoding(PG_MULE_INTERNAL, (const char *) mic,
158  len);
159  }
160  if (l != 2 || c1 != lc || !IS_HIGHBIT_SET(mic[1]))
161  {
162  if (noError)
163  break;
165  (const char *) mic, len);
166  }
167  *p++ = mic[1];
168  mic += 2;
169  len -= 2;
170  }
171  }
172  *p = '\0';
173 
174  return mic - start;
175 }
int pg_mule_mblen(const unsigned char *s)
Definition: wchar.c:839
void report_invalid_encoding(int encoding, const char *mbstr, int len)
Definition: mbutils.c:1647
#define IS_HIGHBIT_SET(ch)
Definition: c.h:1156
int32 encoding
Definition: pg_database.h:41
void report_untranslatable_char(int src_encoding, int dest_encoding, const char *mbstr, int len)
Definition: mbutils.c:1679

◆ mic2latin_with_table()

int mic2latin_with_table ( const unsigned char *  mic,
unsigned char *  p,
int  len,
int  lc,
int  encoding,
const unsigned char *  tab,
bool  noError 
)

Definition at line 257 of file conv.c.

References HIGHBIT, IS_HIGHBIT_SET, PG_MULE_INTERNAL, pg_mule_mblen(), report_invalid_encoding(), and report_untranslatable_char().

Referenced by mic_to_iso(), mic_to_win1250(), mic_to_win1251(), mic_to_win866(), and surrogate_pair_to_codepoint().

264 {
265  const unsigned char *start = mic;
266  unsigned char c1,
267  c2;
268 
269  while (len > 0)
270  {
271  c1 = *mic;
272  if (c1 == 0)
273  {
274  if (noError)
275  break;
276  report_invalid_encoding(PG_MULE_INTERNAL, (const char *) mic, len);
277  }
278  if (!IS_HIGHBIT_SET(c1))
279  {
280  /* easy for ASCII */
281  *p++ = c1;
282  mic++;
283  len--;
284  }
285  else
286  {
287  int l = pg_mule_mblen(mic);
288 
289  if (len < l)
290  {
291  if (noError)
292  break;
293  report_invalid_encoding(PG_MULE_INTERNAL, (const char *) mic,
294  len);
295  }
296  if (l != 2 || c1 != lc || !IS_HIGHBIT_SET(mic[1]) ||
297  (c2 = tab[mic[1] - HIGHBIT]) == 0)
298  {
299  if (noError)
300  break;
302  (const char *) mic, len);
303  break; /* keep compiler quiet */
304  }
305  *p++ = c2;
306  mic += 2;
307  len -= 2;
308  }
309  }
310  *p = '\0';
311 
312  return mic - start;
313 }
int pg_mule_mblen(const unsigned char *s)
Definition: wchar.c:839
void report_invalid_encoding(int encoding, const char *mbstr, int len)
Definition: mbutils.c:1647
#define IS_HIGHBIT_SET(ch)
Definition: c.h:1156
#define HIGHBIT
Definition: c.h:1155
int32 encoding
Definition: pg_database.h:41
void report_untranslatable_char(int src_encoding, int dest_encoding, const char *mbstr, int len)
Definition: mbutils.c:1679

◆ pg_any_to_server()

char* pg_any_to_server ( const char *  s,
int  len,
int  encoding 
)

Definition at line 676 of file mbutils.c.

References pg_enc2name::encoding, ereport, errcode(), errmsg(), ERROR, i, IS_HIGHBIT_SET, name, perform_default_encoding_conversion(), pg_do_encoding_conversion(), pg_enc2name_tbl, PG_SQL_ASCII, PG_VALID_BE_ENCODING, pg_verify_mbstr(), and unconstify.

Referenced by ASN1_STRING_to_text(), cache_single_string(), db_encoding_convert(), dsnowball_lexize(), pg_client_to_server(), pg_stat_statements_internal(), pgp_armor_headers(), PLyUnicode_Bytes(), read_extension_script_file(), surrogate_pair_to_codepoint(), tsearch_readline(), utf_u2e(), X509_NAME_to_cstring(), and xml_recv().

677 {
678  if (len <= 0)
679  return unconstify(char *, s); /* empty string is always valid */
680 
683  {
684  /*
685  * No conversion is needed, but we must still validate the data.
686  */
687  (void) pg_verify_mbstr(DatabaseEncoding->encoding, s, len, false);
688  return unconstify(char *, s);
689  }
690 
692  {
693  /*
694  * No conversion is possible, but we must still validate the data,
695  * because the client-side code might have done string escaping using
696  * the selected client_encoding. If the client encoding is ASCII-safe
697  * then we just do a straight validation under that encoding. For an
698  * ASCII-unsafe encoding we have a problem: we dare not pass such data
699  * to the parser but we have no way to convert it. We compromise by
700  * rejecting the data if it contains any non-ASCII characters.
701  */
703  (void) pg_verify_mbstr(encoding, s, len, false);
704  else
705  {
706  int i;
707 
708  for (i = 0; i < len; i++)
709  {
710  if (s[i] == '\0' || IS_HIGHBIT_SET(s[i]))
711  ereport(ERROR,
712  (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
713  errmsg("invalid byte value for encoding \"%s\": 0x%02x",
715  (unsigned char) s[i])));
716  }
717  }
718  return unconstify(char *, s);
719  }
720 
721  /* Fast path if we can use cached conversion function */
723  return perform_default_encoding_conversion(s, len, true);
724 
725  /* General case ... will not work outside transactions */
726  return (char *) pg_do_encoding_conversion((unsigned char *) unconstify(char *, s),
727  len,
728  encoding,
730 }
static char * perform_default_encoding_conversion(const char *src, int len, bool is_client_to_server)
Definition: mbutils.c:783
int errcode(int sqlerrcode)
Definition: elog.c:698
static const pg_enc2name * ClientEncoding
Definition: mbutils.c:80
unsigned char * pg_do_encoding_conversion(unsigned char *src, int len, int src_encoding, int dest_encoding)
Definition: mbutils.c:356
bool pg_verify_mbstr(int encoding, const char *mbstr, int len, bool noError)
Definition: mbutils.c:1515
#define IS_HIGHBIT_SET(ch)
Definition: c.h:1156
#define ERROR
Definition: elog.h:46
const pg_enc2name pg_enc2name_tbl[]
Definition: encnames.c:309
#define unconstify(underlying_type, expr)
Definition: c.h:1243
pg_enc encoding
Definition: pg_wchar.h:356
#define ereport(elevel,...)
Definition: elog.h:157
#define PG_VALID_BE_ENCODING(_enc)
Definition: pg_wchar.h:295
int32 encoding
Definition: pg_database.h:41
const char * name
Definition: encode.c:515
int errmsg(const char *fmt,...)
Definition: elog.c:909
static const pg_enc2name * DatabaseEncoding
Definition: mbutils.c:81
int i

◆ pg_char_and_wchar_strcmp()

int pg_char_and_wchar_strcmp ( const char *  s1,
const pg_wchar s2 
)

Definition at line 41 of file wstrcmp.c.

Referenced by surrogate_pair_to_codepoint().

42 {
43  while ((pg_wchar) *s1 == *s2++)
44  if (*s1++ == 0)
45  return 0;
46  return *(const unsigned char *) s1 - *(const pg_wchar *) (s2 - 1);
47 }
char * s1
unsigned int pg_wchar
Definition: mbprint.c:31
char * s2

◆ pg_char_and_wchar_strncmp()

int pg_char_and_wchar_strncmp ( const char *  s1,
const pg_wchar s2,
size_t  n 
)

Definition at line 55 of file wstrncmp.c.

Referenced by element(), lookupcclass(), and surrogate_pair_to_codepoint().

56 {
57  if (n == 0)
58  return 0;
59  do
60  {
61  if ((pg_wchar) ((unsigned char) *s1) != *s2++)
62  return ((pg_wchar) ((unsigned char) *s1) - *(s2 - 1));
63  if (*s1++ == 0)
64  break;
65  } while (--n != 0);
66  return 0;
67 }
char * s1
unsigned int pg_wchar
Definition: mbprint.c:31
char * s2

◆ pg_char_to_encoding()

int pg_char_to_encoding ( const char *  name)

Definition at line 550 of file encnames.c.

Referenced by surrogate_pair_to_codepoint().

551 {
552  unsigned int nel = lengthof(pg_encname_tbl);
553  const pg_encname *base = pg_encname_tbl,
554  *last = base + nel - 1,
555  *position;
556  int result;
557  char buff[NAMEDATALEN],
558  *key;
559 
560  if (name == NULL || *name == '\0')
561  return -1;
562 
563  if (strlen(name) >= NAMEDATALEN)
564  return -1; /* it's certainly not in the table */
565 
566  key = clean_encoding_name(name, buff);
567 
568  while (last >= base)
569  {
570  position = base + ((last - base) >> 1);
571  result = key[0] - position->name[0];
572 
573  if (result == 0)
574  {
575  result = strcmp(key, position->name);
576  if (result == 0)
577  return position->encoding;
578  }
579  if (result < 0)
580  last = position - 1;
581  else
582  base = position + 1;
583  }
584  return -1;
585 }
#define lengthof(array)
Definition: c.h:734
#define NAMEDATALEN
static const pg_encname pg_encname_tbl[]
Definition: encnames.c:39
const char * name
Definition: encode.c:515
static char * clean_encoding_name(const char *key, char *newkey)
Definition: encnames.c:525

◆ pg_client_to_server()

char* pg_client_to_server ( const char *  s,
int  len 
)

Definition at line 660 of file mbutils.c.

References pg_enc2name::encoding, and pg_any_to_server().

Referenced by exec_bind_message(), parse_fcall_arguments(), pq_getmsgstring(), pq_getmsgtext(), and surrogate_pair_to_codepoint().

661 {
662  return pg_any_to_server(s, len, ClientEncoding->encoding);
663 }
static const pg_enc2name * ClientEncoding
Definition: mbutils.c:80
pg_enc encoding
Definition: pg_wchar.h:356
char * pg_any_to_server(const char *s, int len, int encoding)
Definition: mbutils.c:676

◆ pg_database_encoding_character_incrementer()

mbcharacter_incrementer pg_database_encoding_character_incrementer ( void  )

Definition at line 1472 of file mbutils.c.

References GetDatabaseEncoding(), PG_EUC_JP, pg_eucjp_increment(), pg_generic_charinc(), PG_UTF8, and pg_utf8_increment().

Referenced by make_greater_string(), and surrogate_pair_to_codepoint().

1473 {
1474  /*
1475  * Eventually it might be best to add a field to pg_wchar_table[], but for
1476  * now we just use a switch.
1477  */
1478  switch (GetDatabaseEncoding())
1479  {
1480  case PG_UTF8:
1481  return pg_utf8_increment;
1482 
1483  case PG_EUC_JP:
1484  return pg_eucjp_increment;
1485 
1486  default:
1487  return pg_generic_charinc;
1488  }
1489 }
static bool pg_eucjp_increment(unsigned char *charptr, int length)
Definition: mbutils.c:1386
static bool pg_utf8_increment(unsigned char *charptr, int length)
Definition: mbutils.c:1308
static bool pg_generic_charinc(unsigned char *charptr, int len)
Definition: mbutils.c:1274
int GetDatabaseEncoding(void)
Definition: mbutils.c:1210

◆ pg_database_encoding_max_length()

◆ pg_do_encoding_conversion()

unsigned char* pg_do_encoding_conversion ( unsigned char *  src,
int  len,
int  src_encoding,
int  dest_encoding 
)

Definition at line 356 of file mbutils.c.

References BoolGetDatum, CStringGetDatum, CurrentMemoryContext, elog, ereport, errcode(), errdetail(), errmsg(), ERROR, FindDefaultConversionProc(), Int32GetDatum, IsTransactionState(), MAX_CONVERSION_GROWTH, MaxAllocHugeSize, MaxAllocSize, MemoryContextAllocHuge(), OidFunctionCall6, OidIsValid, pg_encoding_to_char(), PG_SQL_ASCII, pg_verify_mbstr(), and repalloc().

Referenced by convert_charset(), pg_any_to_server(), pg_convert(), pg_server_to_any(), report_untranslatable_char(), surrogate_pair_to_codepoint(), and xml_is_document().

358 {
359  unsigned char *result;
360  Oid proc;
361 
362  if (len <= 0)
363  return src; /* empty string is always valid */
364 
365  if (src_encoding == dest_encoding)
366  return src; /* no conversion required, assume valid */
367 
368  if (dest_encoding == PG_SQL_ASCII)
369  return src; /* any string is valid in SQL_ASCII */
370 
371  if (src_encoding == PG_SQL_ASCII)
372  {
373  /* No conversion is possible, but we must validate the result */
374  (void) pg_verify_mbstr(dest_encoding, (const char *) src, len, false);
375  return src;
376  }
377 
378  if (!IsTransactionState()) /* shouldn't happen */
379  elog(ERROR, "cannot perform encoding conversion outside a transaction");
380 
381  proc = FindDefaultConversionProc(src_encoding, dest_encoding);
382  if (!OidIsValid(proc))
383  ereport(ERROR,
384  (errcode(ERRCODE_UNDEFINED_FUNCTION),
385  errmsg("default conversion function for encoding \"%s\" to \"%s\" does not exist",
386  pg_encoding_to_char(src_encoding),
387  pg_encoding_to_char(dest_encoding))));
388 
389  /*
390  * Allocate space for conversion result, being wary of integer overflow.
391  *
392  * len * MAX_CONVERSION_GROWTH is typically a vast overestimate of the
393  * required space, so it might exceed MaxAllocSize even though the result
394  * would actually fit. We do not want to hand back a result string that
395  * exceeds MaxAllocSize, because callers might not cope gracefully --- but
396  * if we just allocate more than that, and don't use it, that's fine.
397  */
398  if ((Size) len >= (MaxAllocHugeSize / (Size) MAX_CONVERSION_GROWTH))
399  ereport(ERROR,
400  (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
401  errmsg("out of memory"),
402  errdetail("String of %d bytes is too long for encoding conversion.",
403  len)));
404 
405  result = (unsigned char *)
407  (Size) len * MAX_CONVERSION_GROWTH + 1);
408 
409  (void) OidFunctionCall6(proc,
410  Int32GetDatum(src_encoding),
411  Int32GetDatum(dest_encoding),
412  CStringGetDatum(src),
413  CStringGetDatum(result),
414  Int32GetDatum(len),
415  BoolGetDatum(false));
416 
417  /*
418  * If the result is large, it's worth repalloc'ing to release any extra
419  * space we asked for. The cutoff here is somewhat arbitrary, but we
420  * *must* check when len * MAX_CONVERSION_GROWTH exceeds MaxAllocSize.
421  */
422  if (len > 1000000)
423  {
424  Size resultlen = strlen((char *) result);
425 
426  if (resultlen >= MaxAllocSize)
427  ereport(ERROR,
428  (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
429  errmsg("out of memory"),
430  errdetail("String of %d bytes is too long for encoding conversion.",
431  len)));
432 
433  result = (unsigned char *) repalloc(result, resultlen + 1);
434  }
435 
436  return result;
437 }
#define OidFunctionCall6(functionId, arg1, arg2, arg3, arg4, arg5, arg6)
Definition: fmgr.h:674
int errcode(int sqlerrcode)
Definition: elog.c:698
unsigned int Oid
Definition: postgres_ext.h:31
#define OidIsValid(objectId)
Definition: c.h:710
#define MaxAllocHugeSize
Definition: memutils.h:44
bool pg_verify_mbstr(int encoding, const char *mbstr, int len, bool noError)
Definition: mbutils.c:1515
#define ERROR
Definition: elog.h:46
void * MemoryContextAllocHuge(MemoryContext context, Size size)
Definition: mcxt.c:1218
int errdetail(const char *fmt,...)
Definition: elog.c:1042
#define CStringGetDatum(X)
Definition: postgres.h:622
MemoryContext CurrentMemoryContext
Definition: mcxt.c:42
#define MaxAllocSize
Definition: memutils.h:40
#define BoolGetDatum(X)
Definition: postgres.h:446
#define ereport(elevel,...)
Definition: elog.h:157
const char * pg_encoding_to_char(int encoding)
Definition: encnames.c:588
size_t Size
Definition: c.h:540
Oid FindDefaultConversionProc(int32 for_encoding, int32 to_encoding)
Definition: namespace.c:3755
bool IsTransactionState(void)
Definition: xact.c:371
void * repalloc(void *pointer, Size size)
Definition: mcxt.c:1182
#define MAX_CONVERSION_GROWTH
Definition: pg_wchar.h:316
#define Int32GetDatum(X)
Definition: postgres.h:523
int errmsg(const char *fmt,...)
Definition: elog.c:909
#define elog(elevel,...)
Definition: elog.h:232

◆ pg_do_encoding_conversion_buf()

int pg_do_encoding_conversion_buf ( Oid  proc,
int  src_encoding,
int  dest_encoding,
unsigned char *  src,
int  srclen,
unsigned char *  dst,
int  dstlen,
bool  noError 
)

Definition at line 469 of file mbutils.c.

References BoolGetDatum, CStringGetDatum, DatumGetInt32, Int32GetDatum, MAX_CONVERSION_GROWTH, and OidFunctionCall6.

Referenced by CopyConversionError(), CopyConvertBuf(), surrogate_pair_to_codepoint(), and test_enc_conversion().

475 {
476  Datum result;
477 
478  /*
479  * If the destination buffer is not large enough to hold the result in the
480  * worst case, limit the input size passed to the conversion function.
481  */
482  if ((Size) srclen >= ((destlen - 1) / (Size) MAX_CONVERSION_GROWTH))
483  srclen = ((destlen - 1) / (Size) MAX_CONVERSION_GROWTH);
484 
485  result = OidFunctionCall6(proc,
486  Int32GetDatum(src_encoding),
487  Int32GetDatum(dest_encoding),
488  CStringGetDatum(src),
490  Int32GetDatum(srclen),
491  BoolGetDatum(noError));
492  return DatumGetInt32(result);
493 }
#define OidFunctionCall6(functionId, arg1, arg2, arg3, arg4, arg5, arg6)
Definition: fmgr.h:674
#define DatumGetInt32(X)
Definition: postgres.h:516
#define CStringGetDatum(X)
Definition: postgres.h:622
uintptr_t Datum
Definition: postgres.h:411
#define BoolGetDatum(X)
Definition: postgres.h:446
size_t Size
Definition: c.h:540
#define MAX_CONVERSION_GROWTH
Definition: pg_wchar.h:316
#define Int32GetDatum(X)
Definition: postgres.h:523

◆ pg_dsplen()

int pg_dsplen ( const char *  mbstr)

Definition at line 973 of file mbutils.c.

References pg_wchar_tbl::dsplen, pg_enc2name::encoding, and pg_wchar_table.

Referenced by p_isspecial(), and surrogate_pair_to_codepoint().

974 {
975  return pg_wchar_table[DatabaseEncoding->encoding].dsplen((const unsigned char *) mbstr);
976 }
mbdisplaylen_converter dsplen
Definition: pg_wchar.h:403
pg_enc encoding
Definition: pg_wchar.h:356
const pg_wchar_tbl pg_wchar_table[]
Definition: wchar.c:1867
static const pg_enc2name * DatabaseEncoding
Definition: mbutils.c:81

◆ pg_encoding_dsplen()

int pg_encoding_dsplen ( int  encoding,
const char *  mbstr 
)

Definition at line 1942 of file wchar.c.

References PG_SQL_ASCII, and PG_VALID_ENCODING.

Referenced by PQdsplen(), reportErrorPosition(), and surrogate_pair_to_codepoint().

1943 {
1944  return (PG_VALID_ENCODING(encoding) ?
1945  pg_wchar_table[encoding].dsplen((const unsigned char *) mbstr) :
1946  pg_wchar_table[PG_SQL_ASCII].dsplen((const unsigned char *) mbstr));
1947 }
#define PG_VALID_ENCODING(_enc)
Definition: pg_wchar.h:301
int32 encoding
Definition: pg_database.h:41
const pg_wchar_tbl pg_wchar_table[]
Definition: wchar.c:1867

◆ pg_encoding_max_length()

int pg_encoding_max_length ( int  encoding)

Definition at line 1979 of file wchar.c.

References Assert, encoding, pg_wchar_tbl::maxmblen, and PG_VALID_ENCODING.

Referenced by ascii(), chr(), pg_encoding_mbcliplen(), pg_verify_mbstr_len(), reportErrorPosition(), surrogate_pair_to_codepoint(), and type_maximum_size().

1980 {
1982 
1984 }
int maxmblen
Definition: pg_wchar.h:406
#define PG_VALID_ENCODING(_enc)
Definition: pg_wchar.h:301
#define Assert(condition)
Definition: c.h:804
int32 encoding
Definition: pg_database.h:41
const pg_wchar_tbl pg_wchar_table[]
Definition: wchar.c:1867

◆ pg_encoding_mb2wchar_with_len()

int pg_encoding_mb2wchar_with_len ( int  encoding,
const char *  from,
pg_wchar to,
int  len 
)

Definition at line 936 of file mbutils.c.

References encoding, pg_wchar_tbl::mb2wchar_with_len, and pg_wchar_table.

Referenced by surrogate_pair_to_codepoint(), and xml_is_document().

938 {
939  return pg_wchar_table[encoding].mb2wchar_with_len((const unsigned char *) from, to, len);
940 }
int32 encoding
Definition: pg_database.h:41
const pg_wchar_tbl pg_wchar_table[]
Definition: wchar.c:1867
mb2wchar_with_len_converter mb2wchar_with_len
Definition: pg_wchar.h:398

◆ pg_encoding_mbcliplen()

int pg_encoding_mbcliplen ( int  encoding,
const char *  mbstr,
int  len,
int  limit 
)

Definition at line 1036 of file mbutils.c.

References cliplen(), encoding, pg_wchar_tbl::mblen, pg_encoding_max_length(), and pg_wchar_table.

Referenced by pg_mbcliplen(), and surrogate_pair_to_codepoint().

1038 {
1039  mblen_converter mblen_fn;
1040  int clen = 0;
1041  int l;
1042 
1043  /* optimization for single byte encoding */
1044  if (pg_encoding_max_length(encoding) == 1)
1045  return cliplen(mbstr, len, limit);
1046 
1047  mblen_fn = pg_wchar_table[encoding].mblen;
1048 
1049  while (len > 0 && *mbstr)
1050  {
1051  l = (*mblen_fn) ((const unsigned char *) mbstr);
1052  if ((clen + l) > limit)
1053  break;
1054  clen += l;
1055  if (clen == limit)
1056  break;
1057  len -= l;
1058  mbstr += l;
1059  }
1060  return clen;
1061 }
int(* mblen_converter)(const unsigned char *mbstr)
Definition: pg_wchar.h:386
int pg_encoding_max_length(int encoding)
Definition: wchar.c:1979
static int cliplen(const char *str, int len, int limit)
Definition: mbutils.c:1093
int32 encoding
Definition: pg_database.h:41
const pg_wchar_tbl pg_wchar_table[]
Definition: wchar.c:1867
mblen_converter mblen
Definition: pg_wchar.h:402

◆ pg_encoding_mblen()

int pg_encoding_mblen ( int  encoding,
const char *  mbstr 
)

Definition at line 1921 of file wchar.c.

References PG_SQL_ASCII, and PG_VALID_ENCODING.

Referenced by CopyAttributeOutCSV(), CopyAttributeOutText(), pg_encoding_mblen_bounded(), PQescapeInternal(), PQescapeStringInternal(), PQmblen(), PQmblenBounded(), report_invalid_encoding(), report_untranslatable_char(), surrogate_pair_to_codepoint(), and xml_is_document().

1922 {
1923  return (PG_VALID_ENCODING(encoding) ?
1924  pg_wchar_table[encoding].mblen((const unsigned char *) mbstr) :
1925  pg_wchar_table[PG_SQL_ASCII].mblen((const unsigned char *) mbstr));
1926 }
#define PG_VALID_ENCODING(_enc)
Definition: pg_wchar.h:301
int32 encoding
Definition: pg_database.h:41
const pg_wchar_tbl pg_wchar_table[]
Definition: wchar.c:1867

◆ pg_encoding_mblen_bounded()

int pg_encoding_mblen_bounded ( int  encoding,
const char *  mbstr 
)

Definition at line 1933 of file wchar.c.

References pg_encoding_mblen(), and strnlen().

Referenced by json_lex_string(), and surrogate_pair_to_codepoint().

1934 {
1935  return strnlen(mbstr, pg_encoding_mblen(encoding, mbstr));
1936 }
int pg_encoding_mblen(int encoding, const char *mbstr)
Definition: wchar.c:1921
size_t strnlen(const char *str, size_t maxlen)
Definition: strnlen.c:26
int32 encoding
Definition: pg_database.h:41

◆ pg_encoding_to_char()

const char* pg_encoding_to_char ( int  encoding)

Definition at line 588 of file encnames.c.

Referenced by surrogate_pair_to_codepoint().

589 {
591  {
592  const pg_enc2name *p = &pg_enc2name_tbl[encoding];
593 
594  Assert(encoding == p->encoding);
595  return p->name;
596  }
597  return "";
598 }
#define PG_VALID_ENCODING(_enc)
Definition: pg_wchar.h:301
const pg_enc2name pg_enc2name_tbl[]
Definition: encnames.c:309
const char * name
Definition: pg_wchar.h:355
pg_enc encoding
Definition: pg_wchar.h:356
#define Assert(condition)
Definition: c.h:804
int32 encoding
Definition: pg_database.h:41

◆ pg_encoding_verifymbchar()

int pg_encoding_verifymbchar ( int  encoding,
const char *  mbstr,
int  len 
)

Definition at line 1955 of file wchar.c.

References PG_SQL_ASCII, and PG_VALID_ENCODING.

Referenced by big52euc_tw(), big52mic(), euc_jis_20042shift_jis_2004(), euc_jp2mic(), euc_jp2sjis(), euc_kr2mic(), euc_tw2big5(), euc_tw2mic(), LocalToUtf(), mic2big5(), mic2euc_jp(), mic2euc_kr(), mic2euc_tw(), mic2sjis(), shift_jis_20042euc_jis_2004(), sjis2euc_jp(), and surrogate_pair_to_codepoint().

1956 {
1957  return (PG_VALID_ENCODING(encoding) ?
1958  pg_wchar_table[encoding].mbverifychar((const unsigned char *) mbstr, len) :
1959  pg_wchar_table[PG_SQL_ASCII].mbverifychar((const unsigned char *) mbstr, len));
1960 }
#define PG_VALID_ENCODING(_enc)
Definition: pg_wchar.h:301
int32 encoding
Definition: pg_database.h:41
const pg_wchar_tbl pg_wchar_table[]
Definition: wchar.c:1867

◆ pg_encoding_verifymbstr()

int pg_encoding_verifymbstr ( int  encoding,
const char *  mbstr,
int  len 
)

Definition at line 1968 of file wchar.c.

References PG_SQL_ASCII, and PG_VALID_ENCODING.

Referenced by CopyConvertBuf(), surrogate_pair_to_codepoint(), and test_enc_conversion().

1969 {
1970  return (PG_VALID_ENCODING(encoding) ?
1971  pg_wchar_table[encoding].mbverifystr((const unsigned char *) mbstr, len) :
1972  pg_wchar_table[PG_SQL_ASCII].mbverifystr((const unsigned char *) mbstr, len));
1973 }
#define PG_VALID_ENCODING(_enc)
Definition: pg_wchar.h:301
int32 encoding
Definition: pg_database.h:41
const pg_wchar_tbl pg_wchar_table[]
Definition: wchar.c:1867

◆ pg_encoding_wchar2mb_with_len()

int pg_encoding_wchar2mb_with_len ( int  encoding,
const pg_wchar from,
char *  to,
int  len 
)

Definition at line 958 of file mbutils.c.

References encoding, pg_wchar_table, and pg_wchar_tbl::wchar2mb_with_len.

Referenced by surrogate_pair_to_codepoint().

960 {
961  return pg_wchar_table[encoding].wchar2mb_with_len(from, (unsigned char *) to, len);
962 }
wchar2mb_with_len_converter wchar2mb_with_len
Definition: pg_wchar.h:400
int32 encoding
Definition: pg_database.h:41
const pg_wchar_tbl pg_wchar_table[]
Definition: wchar.c:1867

◆ pg_get_client_encoding()

int pg_get_client_encoding ( void  )

Definition at line 336 of file mbutils.c.

References pg_enc2name::encoding.

Referenced by BeginCopyFrom(), BeginCopyTo(), surrogate_pair_to_codepoint(), and xml_send().

337 {
338  return ClientEncoding->encoding;
339 }
static const pg_enc2name * ClientEncoding
Definition: mbutils.c:80
pg_enc encoding
Definition: pg_wchar.h:356

◆ pg_get_client_encoding_name()

const char* pg_get_client_encoding_name ( void  )

Definition at line 345 of file mbutils.c.

References pg_enc2name::name.

Referenced by surrogate_pair_to_codepoint().

346 {
347  return ClientEncoding->name;
348 }
static const pg_enc2name * ClientEncoding
Definition: mbutils.c:80
const char * name
Definition: pg_wchar.h:355

◆ pg_mb2wchar()

int pg_mb2wchar ( const char *  from,
pg_wchar to 
)

Definition at line 922 of file mbutils.c.

References pg_enc2name::encoding, pg_wchar_tbl::mb2wchar_with_len, and pg_wchar_table.

Referenced by surrogate_pair_to_codepoint().

923 {
924  return pg_wchar_table[DatabaseEncoding->encoding].mb2wchar_with_len((const unsigned char *) from, to, strlen(from));
925 }
pg_enc encoding
Definition: pg_wchar.h:356
const pg_wchar_tbl pg_wchar_table[]
Definition: wchar.c:1867
static const pg_enc2name * DatabaseEncoding
Definition: mbutils.c:81
mb2wchar_with_len_converter mb2wchar_with_len
Definition: pg_wchar.h:398

◆ pg_mb2wchar_with_len()

int pg_mb2wchar_with_len ( const char *  from,
pg_wchar to,
int  len 
)

Definition at line 929 of file mbutils.c.

References pg_enc2name::encoding, pg_wchar_tbl::mb2wchar_with_len, and pg_wchar_table.

Referenced by check_ident_usermap(), CheckAffix(), NIAddAffix(), parse_ident_line(), RE_compile(), RE_compile_and_cache(), RE_execute(), replace_text_regexp(), setup_regexp_matches(), setup_test_matches(), surrogate_pair_to_codepoint(), test_re_compile(), and TParserInit().

930 {
931  return pg_wchar_table[DatabaseEncoding->encoding].mb2wchar_with_len((const unsigned char *) from, to, len);
932 }
pg_enc encoding
Definition: pg_wchar.h:356
const pg_wchar_tbl pg_wchar_table[]
Definition: wchar.c:1867
static const pg_enc2name * DatabaseEncoding
Definition: mbutils.c:81
mb2wchar_with_len_converter mb2wchar_with_len
Definition: pg_wchar.h:398

◆ pg_mbcharcliplen()

int pg_mbcharcliplen ( const char *  mbstr,
int  len,
int  limit 
)

Definition at line 1068 of file mbutils.c.

References cliplen(), pg_database_encoding_max_length(), and pg_mblen().

Referenced by bpchar(), bpchar_input(), surrogate_pair_to_codepoint(), text_left(), text_right(), varchar(), and varchar_input().

1069 {
1070  int clen = 0;
1071  int nch = 0;
1072  int l;
1073 
1074  /* optimization for single byte encoding */
1076  return cliplen(mbstr, len, limit);
1077 
1078  while (len > 0 && *mbstr)
1079  {
1080  l = pg_mblen(mbstr);
1081  nch++;
1082  if (nch > limit)
1083  break;
1084  clen += l;
1085  len -= l;
1086  mbstr += l;
1087  }
1088  return clen;
1089 }
static int cliplen(const char *str, int len, int limit)
Definition: mbutils.c:1093
int pg_mblen(const char *mbstr)
Definition: mbutils.c:966
int pg_database_encoding_max_length(void)
Definition: mbutils.c:1495

◆ pg_mbcliplen()

◆ pg_mblen()

int pg_mblen ( const char *  mbstr)

Definition at line 966 of file mbutils.c.

References pg_enc2name::encoding, pg_wchar_tbl::mblen, and pg_wchar_table.

Referenced by addCompoundAffixFlagValue(), appendStringInfoRegexpSubstr(), bit_in(), charlen_to_bytelen(), check_replace_text_has_escape_char(), DCH_from_char(), dotrim(), find_word(), findchar(), findchar2(), findwrd(), gbt_var_node_cp_len(), get_hex(), get_modifiers(), get_nextfield(), get_val(), get_wildcard_part(), getlexeme(), getNextFlagFromString(), gettoken_query(), gettoken_query_standard(), gettoken_query_websearch(), gettoken_tsvector(), infix(), initTrie(), lpad(), make_trigrams(), map_sql_identifier_to_xml_name(), map_xml_name_to_sql_identifier(), match_prosrc_to_literal(), mb_strchr(), NIImportAffixes(), NIImportDictionary(), NIImportOOAffixes(), NUM_eat_non_data_chars(), NUM_processor(), parse_affentry(), parse_format(), parse_hstore(), parse_lquery(), parse_ltree(), parse_or_operator(), parse_re_flags(), parse_test_flags(), pg_base64_decode(), pg_mbcharcliplen(), pg_mbstrlen(), pg_mbstrlen_with_len(), readstoplist(), report_json_context(), rpad(), RS_compile(), RS_execute(), RS_isRegis(), similar_escape_internal(), split_text(), surrogate_pair_to_codepoint(), t_isalpha(), t_isdigit(), t_isprint(), t_isspace(), text_format(), text_position_get_match_pos(), text_position_next(), text_reverse(), text_substring(), text_to_bits(), textregexreplace(), thesaurusRead(), TParserGet(), translate(), ts_stat_sql(), tsvectorout(), unaccent_lexize(), varbit_in(), varstr_levenshtein(), wchareq(), and xml_is_document().

967 {
968  return pg_wchar_table[DatabaseEncoding->encoding].mblen((const unsigned char *) mbstr);
969 }
pg_enc encoding
Definition: pg_wchar.h:356
const pg_wchar_tbl pg_wchar_table[]
Definition: wchar.c:1867
static const pg_enc2name * DatabaseEncoding
Definition: mbutils.c:81
mblen_converter mblen
Definition: pg_wchar.h:402

◆ pg_mbstrlen()

int pg_mbstrlen ( const char *  mbstr)

Definition at line 980 of file mbutils.c.

References pg_database_encoding_max_length(), and pg_mblen().

Referenced by cache_locale_time(), NUM_processor(), surrogate_pair_to_codepoint(), and text_format_append_string().

981 {
982  int len = 0;
983 
984  /* optimization for single byte encoding */
986  return strlen(mbstr);
987 
988  while (*mbstr)
989  {
990  mbstr += pg_mblen(mbstr);
991  len++;
992  }
993  return len;
994 }
int pg_mblen(const char *mbstr)
Definition: mbutils.c:966
int pg_database_encoding_max_length(void)
Definition: mbutils.c:1495

◆ pg_mbstrlen_with_len()

int pg_mbstrlen_with_len ( const char *  mbstr,
int  len 
)

Definition at line 1000 of file mbutils.c.

References pg_database_encoding_max_length(), and pg_mblen().

Referenced by bpchar(), bpchar_input(), bpcharlen(), executor_errposition(), lpad(), match_prosrc_to_query(), parser_errposition(), plpgsql_scanner_errposition(), rpad(), similar_escape_internal(), surrogate_pair_to_codepoint(), text_left(), text_length(), text_right(), text_substring(), unicode_is_normalized(), unicode_normalize_func(), and varstr_levenshtein().

1001 {
1002  int len = 0;
1003 
1004  /* optimization for single byte encoding */
1006  return limit;
1007 
1008  while (limit > 0 && *mbstr)
1009  {
1010  int l = pg_mblen(mbstr);
1011 
1012  limit -= l;
1013  mbstr += l;
1014  len++;
1015  }
1016  return len;
1017 }
int pg_mblen(const char *mbstr)
Definition: mbutils.c:966
int pg_database_encoding_max_length(void)
Definition: mbutils.c:1495

◆ pg_mule_mblen()

int pg_mule_mblen ( const unsigned char *  s)

Definition at line 839 of file wchar.c.

References IS_LC1, IS_LC2, IS_LCPRV1, and IS_LCPRV2.

Referenced by mic2latin(), mic2latin_with_table(), pg_mule_verifychar(), and surrogate_pair_to_codepoint().

840 {
841  int len;
842 
843  if (IS_LC1(*s))
844  len = 2;
845  else if (IS_LCPRV1(*s))
846  len = 3;
847  else if (IS_LC2(*s))
848  len = 3;
849  else if (IS_LCPRV2(*s))
850  len = 4;
851  else
852  len = 1; /* assume ASCII */
853  return len;
854 }
#define IS_LC2(c)
Definition: pg_wchar.h:144
#define IS_LCPRV2(c)
Definition: pg_wchar.h:164
#define IS_LCPRV1(c)
Definition: pg_wchar.h:152
#define IS_LC1(c)
Definition: pg_wchar.h:123

◆ pg_server_to_any()

char* pg_server_to_any ( const char *  s,
int  len,
int  encoding 
)

Definition at line 749 of file mbutils.c.

References pg_enc2name::encoding, perform_default_encoding_conversion(), pg_do_encoding_conversion(), PG_SQL_ASCII, pg_verify_mbstr(), and unconstify.

Referenced by compareStrings(), CopyAttributeOutCSV(), CopyAttributeOutText(), DoCopyTo(), dsnowball_lexize(), hv_fetch_string(), hv_store_string(), pg_server_to_client(), PLyUnicode_AsString(), surrogate_pair_to_codepoint(), utf_e2u(), and xml_is_document().

750 {
751  if (len <= 0)
752  return unconstify(char *, s); /* empty string is always valid */
753 
756  return unconstify(char *, s); /* assume data is valid */
757 
759  {
760  /* No conversion is possible, but we must validate the result */
761  (void) pg_verify_mbstr(encoding, s, len, false);
762  return unconstify(char *, s);
763  }
764 
765  /* Fast path if we can use cached conversion function */
767  return perform_default_encoding_conversion(s, len, false);
768 
769  /* General case ... will not work outside transactions */
770  return (char *) pg_do_encoding_conversion((unsigned char *) unconstify(char *, s),
771  len,
773  encoding);
774 }
static char * perform_default_encoding_conversion(const char *src, int len, bool is_client_to_server)
Definition: mbutils.c:783
static const pg_enc2name * ClientEncoding
Definition: mbutils.c:80
unsigned char * pg_do_encoding_conversion(unsigned char *src, int len, int src_encoding, int dest_encoding)
Definition: mbutils.c:356
bool pg_verify_mbstr(int encoding, const char *mbstr, int len, bool noError)
Definition: mbutils.c:1515
#define unconstify(underlying_type, expr)
Definition: c.h:1243
pg_enc encoding
Definition: pg_wchar.h:356
int32 encoding
Definition: pg_database.h:41
static const pg_enc2name * DatabaseEncoding
Definition: mbutils.c:81

◆ pg_server_to_client()

char* pg_server_to_client ( const char *  s,
int  len 
)

Definition at line 738 of file mbutils.c.

References pg_enc2name::encoding, and pg_server_to_any().

Referenced by pq_puttextmessage(), pq_sendcountedtext(), pq_sendstring(), pq_sendtext(), pq_writestring(), and surrogate_pair_to_codepoint().

739 {
740  return pg_server_to_any(s, len, ClientEncoding->encoding);
741 }
static const pg_enc2name * ClientEncoding
Definition: mbutils.c:80
char * pg_server_to_any(const char *s, int len, int encoding)
Definition: mbutils.c:749
pg_enc encoding
Definition: pg_wchar.h:356

◆ pg_unicode_to_server()

void pg_unicode_to_server ( pg_wchar  c,
unsigned char *  s 
)

Definition at line 864 of file mbutils.c.

References BoolGetDatum, CStringGetDatum, ereport, errcode(), errmsg(), ERROR, FunctionCall6, GetDatabaseEncoding(), GetDatabaseEncodingName(), Int32GetDatum, is_valid_unicode_codepoint(), MAX_MULTIBYTE_CHAR_LEN, name, pg_enc2name_tbl, PG_UTF8, pg_utf_mblen(), and unicode_to_utf8().

Referenced by json_lex_string(), map_xml_name_to_sql_identifier(), str_udeescape(), surrogate_pair_to_codepoint(), and unistr().

865 {
866  unsigned char c_as_utf8[MAX_MULTIBYTE_CHAR_LEN + 1];
867  int c_as_utf8_len;
868  int server_encoding;
869 
870  /*
871  * Complain if invalid Unicode code point. The choice of errcode here is
872  * debatable, but really our caller should have checked this anyway.
873  */
875  ereport(ERROR,
876  (errcode(ERRCODE_SYNTAX_ERROR),
877  errmsg("invalid Unicode code point")));
878 
879  /* Otherwise, if it's in ASCII range, conversion is trivial */
880  if (c <= 0x7F)
881  {
882  s[0] = (unsigned char) c;
883  s[1] = '\0';
884  return;
885  }
886 
887  /* If the server encoding is UTF-8, we just need to reformat the code */
888  server_encoding = GetDatabaseEncoding();
889  if (server_encoding == PG_UTF8)
890  {
891  unicode_to_utf8(c, s);
892  s[pg_utf_mblen(s)] = '\0';
893  return;
894  }
895 
896  /* For all other cases, we must have a conversion function available */
897  if (Utf8ToServerConvProc == NULL)
898  ereport(ERROR,
899  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
900  errmsg("conversion between %s and %s is not supported",
903 
904  /* Construct UTF-8 source string */
905  unicode_to_utf8(c, c_as_utf8);
906  c_as_utf8_len = pg_utf_mblen(c_as_utf8);
907  c_as_utf8[c_as_utf8_len] = '\0';
908 
909  /* Convert, or throw error if we can't */
912  Int32GetDatum(server_encoding),
913  CStringGetDatum(c_as_utf8),
914  CStringGetDatum(s),
915  Int32GetDatum(c_as_utf8_len),
916  BoolGetDatum(false));
917 }
static FmgrInfo * Utf8ToServerConvProc
Definition: mbutils.c:75
unsigned char * unicode_to_utf8(pg_wchar c, unsigned char *utf8string)
Definition: wchar.c:483
static bool is_valid_unicode_codepoint(pg_wchar c)
Definition: pg_wchar.h:539
int errcode(int sqlerrcode)
Definition: elog.c:698
#define ERROR
Definition: elog.h:46
char * c
const pg_enc2name pg_enc2name_tbl[]
Definition: encnames.c:309
#define CStringGetDatum(X)
Definition: postgres.h:622
#define MAX_MULTIBYTE_CHAR_LEN
Definition: pg_wchar.h:30
int GetDatabaseEncoding(void)
Definition: mbutils.c:1210
#define BoolGetDatum(X)
Definition: postgres.h:446
#define ereport(elevel,...)
Definition: elog.h:157
const char * GetDatabaseEncodingName(void)
Definition: mbutils.c:1216
#define FunctionCall6(flinfo, arg1, arg2, arg3, arg4, arg5, arg6)
Definition: fmgr.h:654
const char * name
Definition: encode.c:515
int pg_utf_mblen(const unsigned char *s)
Definition: wchar.c:549
#define Int32GetDatum(X)
Definition: postgres.h:523
int errmsg(const char *fmt,...)
Definition: elog.c:909

◆ pg_utf8_islegal()

bool pg_utf8_islegal ( const unsigned char *  source,
int  length 
)

Definition at line 1804 of file wchar.c.

References source.

Referenced by chr(), pg_utf8_string_len(), pg_utf8_verifychar(), surrogate_pair_to_codepoint(), utf8_to_iso8859_1(), and UtfToLocal().

1805 {
1806  unsigned char a;
1807 
1808  switch (length)
1809  {
1810  default:
1811  /* reject lengths 5 and 6 for now */
1812  return false;
1813  case 4:
1814  a = source[3];
1815  if (a < 0x80 || a > 0xBF)
1816  return false;
1817  /* FALL THRU */
1818  case 3:
1819  a = source[2];
1820  if (a < 0x80 || a > 0xBF)
1821  return false;
1822  /* FALL THRU */
1823  case 2:
1824  a = source[1];
1825  switch (*source)
1826  {
1827  case 0xE0:
1828  if (a < 0xA0 || a > 0xBF)
1829  return false;
1830  break;
1831  case 0xED:
1832  if (a < 0x80 || a > 0x9F)
1833  return false;
1834  break;
1835  case 0xF0:
1836  if (a < 0x90 || a > 0xBF)
1837  return false;
1838  break;
1839  case 0xF4:
1840  if (a < 0x80 || a > 0x8F)
1841  return false;
1842  break;
1843  default:
1844  if (a < 0x80 || a > 0xBF)
1845  return false;
1846  break;
1847  }
1848  /* FALL THRU */
1849  case 1:
1850  a = *source;
1851  if (a >= 0x80 && a < 0xC2)
1852  return false;
1853  if (a > 0xF4)
1854  return false;
1855  break;
1856  }
1857  return true;
1858 }
static rewind_source * source
Definition: pg_rewind.c:79

◆ pg_utf_mblen()

int pg_utf_mblen ( const unsigned char *  s)

Definition at line 549 of file wchar.c.

Referenced by json_lex_string(), pg_saslprep(), pg_unicode_to_server(), pg_utf8_string_len(), pg_wchar2utf_with_len(), surrogate_pair_to_codepoint(), unicode_is_normalized(), unicode_normalize_func(), utf8_to_iso8859_1(), and UtfToLocal().

550 {
551  int len;
552 
553  if ((*s & 0x80) == 0)
554  len = 1;
555  else if ((*s & 0xe0) == 0xc0)
556  len = 2;
557  else if ((*s & 0xf0) == 0xe0)
558  len = 3;
559  else if ((*s & 0xf8) == 0xf0)
560  len = 4;
561 #ifdef NOT_USED
562  else if ((*s & 0xfc) == 0xf8)
563  len = 5;
564  else if ((*s & 0xfe) == 0xfc)
565  len = 6;
566 #endif
567  else
568  len = 1;
569  return len;
570 }

◆ pg_valid_client_encoding()

int pg_valid_client_encoding ( const char *  name)

Definition at line 486 of file encnames.c.

References enc, pg_char_to_encoding(), and PG_VALID_FE_ENCODING.

Referenced by check_client_encoding(), and surrogate_pair_to_codepoint().

487 {
488  int enc;
489 
490  if ((enc = pg_char_to_encoding(name)) < 0)
491  return -1;
492 
493  if (!PG_VALID_FE_ENCODING(enc))
494  return -1;
495 
496  return enc;
497 }
int pg_char_to_encoding(const char *name)
Definition: encnames.c:550
struct pg_encoding enc
Definition: encode.c:516
#define PG_VALID_FE_ENCODING(_enc)
Definition: pg_wchar.h:305
const char * name
Definition: encode.c:515

◆ pg_valid_server_encoding()

int pg_valid_server_encoding ( const char *  name)

Definition at line 500 of file encnames.c.

References enc, pg_char_to_encoding(), and PG_VALID_BE_ENCODING.

Referenced by createdb(), get_encoding_id(), parse_extension_control_file(), and surrogate_pair_to_codepoint().

501 {
502  int enc;
503 
504  if ((enc = pg_char_to_encoding(name)) < 0)
505  return -1;
506 
507  if (!PG_VALID_BE_ENCODING(enc))
508  return -1;
509 
510  return enc;
511 }
int pg_char_to_encoding(const char *name)
Definition: encnames.c:550
struct pg_encoding enc
Definition: encode.c:516
#define PG_VALID_BE_ENCODING(_enc)
Definition: pg_wchar.h:295
const char * name
Definition: encode.c:515

◆ pg_valid_server_encoding_id()

int pg_valid_server_encoding_id ( int  encoding)

Definition at line 514 of file encnames.c.

Referenced by surrogate_pair_to_codepoint().

515 {
517 }
#define PG_VALID_BE_ENCODING(_enc)
Definition: pg_wchar.h:295
int32 encoding
Definition: pg_database.h:41

◆ pg_verify_mbstr()

bool pg_verify_mbstr ( int  encoding,
const char *  mbstr,
int  len,
bool  noError 
)

Definition at line 1515 of file mbutils.c.

References Assert, encoding, pg_wchar_tbl::mbverifystr, PG_VALID_ENCODING, pg_wchar_table, and report_invalid_encoding().

Referenced by AddFileToBackupManifest(), LogicalOutputWrite(), pg_any_to_server(), pg_convert(), pg_do_encoding_conversion(), pg_server_to_any(), pg_verifymbstr(), read_extension_script_file(), and surrogate_pair_to_codepoint().

1516 {
1517  int oklen;
1518 
1520 
1521  oklen = pg_wchar_table[encoding].mbverifystr((const unsigned char *) mbstr, len);
1522  if (oklen != len)
1523  {
1524  if (noError)
1525  return false;
1526  report_invalid_encoding(encoding, mbstr + oklen, len - oklen);
1527  }
1528  return true;
1529 }
void report_invalid_encoding(int encoding, const char *mbstr, int len)
Definition: mbutils.c:1647
mbstr_verifier mbverifystr
Definition: pg_wchar.h:405
#define PG_VALID_ENCODING(_enc)
Definition: pg_wchar.h:301
#define Assert(condition)
Definition: c.h:804
int32 encoding
Definition: pg_database.h:41
const pg_wchar_tbl pg_wchar_table[]
Definition: wchar.c:1867

◆ pg_verify_mbstr_len()

int pg_verify_mbstr_len ( int  encoding,
const char *  mbstr,
int  len,
bool  noError 
)

Definition at line 1546 of file mbutils.c.

References Assert, encoding, IS_HIGHBIT_SET, pg_wchar_tbl::mbverifychar, pg_encoding_max_length(), PG_VALID_ENCODING, pg_wchar_table, and report_invalid_encoding().

Referenced by length_in_encoding(), and surrogate_pair_to_codepoint().

1547 {
1548  mbchar_verifier mbverifychar;
1549  int mb_len;
1550 
1552 
1553  /*
1554  * In single-byte encodings, we need only reject nulls (\0).
1555  */
1556  if (pg_encoding_max_length(encoding) <= 1)
1557  {
1558  const char *nullpos = memchr(mbstr, 0, len);
1559 
1560  if (nullpos == NULL)
1561  return len;
1562  if (noError)
1563  return -1;
1564  report_invalid_encoding(encoding, nullpos, 1);
1565  }
1566 
1567  /* fetch function pointer just once */
1568  mbverifychar = pg_wchar_table[encoding].mbverifychar;
1569 
1570  mb_len = 0;
1571 
1572  while (len > 0)
1573  {
1574  int l;
1575 
1576  /* fast path for ASCII-subset characters */
1577  if (!IS_HIGHBIT_SET(*mbstr))
1578  {
1579  if (*mbstr != '\0')
1580  {
1581  mb_len++;
1582  mbstr++;
1583  len--;
1584  continue;
1585  }
1586  if (noError)
1587  return -1;
1588  report_invalid_encoding(encoding, mbstr, len);
1589  }
1590 
1591  l = (*mbverifychar) ((const unsigned char *) mbstr, len);
1592 
1593  if (l < 0)
1594  {
1595  if (noError)
1596  return -1;
1597  report_invalid_encoding(encoding, mbstr, len);
1598  }
1599 
1600  mbstr += l;
1601  len -= l;
1602  mb_len++;
1603  }
1604  return mb_len;
1605 }
void report_invalid_encoding(int encoding, const char *mbstr, int len)
Definition: mbutils.c:1647
#define IS_HIGHBIT_SET(ch)
Definition: c.h:1156
int(* mbchar_verifier)(const unsigned char *mbstr, int len)
Definition: pg_wchar.h:392
int pg_encoding_max_length(int encoding)
Definition: wchar.c:1979
#define PG_VALID_ENCODING(_enc)
Definition: pg_wchar.h:301
mbchar_verifier mbverifychar
Definition: pg_wchar.h:404
#define Assert(condition)
Definition: c.h:804
int32 encoding
Definition: pg_database.h:41
const pg_wchar_tbl pg_wchar_table[]
Definition: wchar.c:1867

◆ pg_verifymbstr()

bool pg_verifymbstr ( const char *  mbstr,
int  len,
bool  noError 
)

Definition at line 1505 of file mbutils.c.

References GetDatabaseEncoding(), and pg_verify_mbstr().

Referenced by char2wchar(), CopyReadAttributesText(), plperl_spi_exec(), plperl_spi_prepare(), plperl_spi_query(), PLy_cursor_query(), PLy_output(), PLy_spi_execute_query(), PLy_spi_prepare(), PLyObject_AsString(), read_text_file(), spg_text_leaf_consistent(), and surrogate_pair_to_codepoint().

1506 {
1507  return pg_verify_mbstr(GetDatabaseEncoding(), mbstr, len, noError);
1508 }
bool pg_verify_mbstr(int encoding, const char *mbstr, int len, bool noError)
Definition: mbutils.c:1515
int GetDatabaseEncoding(void)
Definition: mbutils.c:1210

◆ pg_wchar2mb()

int pg_wchar2mb ( const pg_wchar from,
char *  to 
)

Definition at line 944 of file mbutils.c.

References pg_enc2name::encoding, pg_wchar_strlen(), pg_wchar_table, and pg_wchar_tbl::wchar2mb_with_len.

Referenced by surrogate_pair_to_codepoint().

945 {
946  return pg_wchar_table[DatabaseEncoding->encoding].wchar2mb_with_len(from, (unsigned char *) to, pg_wchar_strlen(from));
947 }
size_t pg_wchar_strlen(const pg_wchar *str)
Definition: wstrncmp.c:70
wchar2mb_with_len_converter wchar2mb_with_len
Definition: pg_wchar.h:400
pg_enc encoding
Definition: pg_wchar.h:356
const pg_wchar_tbl pg_wchar_table[]
Definition: wchar.c:1867
static const pg_enc2name * DatabaseEncoding
Definition: mbutils.c:81

◆ pg_wchar2mb_with_len()

int pg_wchar2mb_with_len ( const pg_wchar from,
char *  to,
int  len 
)

Definition at line 951 of file mbutils.c.

References pg_enc2name::encoding, pg_wchar_table, and pg_wchar_tbl::wchar2mb_with_len.

Referenced by build_regexp_match_result(), build_regexp_split_result(), build_test_match_result(), convertPgWchar(), regexp_fixed_prefix(), and surrogate_pair_to_codepoint().

952 {
953  return pg_wchar_table[DatabaseEncoding->encoding].wchar2mb_with_len(from, (unsigned char *) to, len);
954 }
wchar2mb_with_len_converter wchar2mb_with_len
Definition: pg_wchar.h:400
pg_enc encoding
Definition: pg_wchar.h:356
const pg_wchar_tbl pg_wchar_table[]
Definition: wchar.c:1867
static const pg_enc2name * DatabaseEncoding
Definition: mbutils.c:81

◆ pg_wchar_strlen()

size_t pg_wchar_strlen ( const pg_wchar wstr)

Definition at line 70 of file wstrncmp.c.

Referenced by pg_wchar2mb(), and surrogate_pair_to_codepoint().

71 {
72  const pg_wchar *s;
73 
74  for (s = str; *s; ++s)
75  ;
76  return (s - str);
77 }
unsigned int pg_wchar
Definition: mbprint.c:31

◆ pg_wchar_strncmp()

int pg_wchar_strncmp ( const pg_wchar s1,
const pg_wchar s2,
size_t  n 
)

Definition at line 40 of file wstrncmp.c.

Referenced by surrogate_pair_to_codepoint().

41 {
42  if (n == 0)
43  return 0;
44  do
45  {
46  if (*s1 != *s2++)
47  return (*s1 - *(s2 - 1));
48  if (*s1++ == 0)
49  break;
50  } while (--n != 0);
51  return 0;
52 }
char * s1
char * s2

◆ PrepareClientEncoding()

int PrepareClientEncoding ( int  encoding)

Definition at line 110 of file mbutils.c.

References backend_startup_complete, ConvProcInfo::c_encoding, encoding, FindDefaultConversionProc(), fmgr_info_cxt(), GetDatabaseEncoding(), IsTransactionState(), lcons(), lfirst, MemoryContextAlloc(), MemoryContextSwitchTo(), OidIsValid, PG_SQL_ASCII, PG_VALID_FE_ENCODING, ConvProcInfo::s_encoding, ConvProcInfo::to_client_info, ConvProcInfo::to_server_info, and TopMemoryContext.

Referenced by check_client_encoding(), InitializeClientEncoding(), and surrogate_pair_to_codepoint().

111 {
112  int current_server_encoding;
113  ListCell *lc;
114 
116  return -1;
117 
118  /* Can't do anything during startup, per notes above */
120  return 0;
121 
122  current_server_encoding = GetDatabaseEncoding();
123 
124  /*
125  * Check for cases that require no conversion function.
126  */
127  if (current_server_encoding == encoding ||
128  current_server_encoding == PG_SQL_ASCII ||
130  return 0;
131 
132  if (IsTransactionState())
133  {
134  /*
135  * If we're in a live transaction, it's safe to access the catalogs,
136  * so look up the functions. We repeat the lookup even if the info is
137  * already cached, so that we can react to changes in the contents of
138  * pg_conversion.
139  */
140  Oid to_server_proc,
141  to_client_proc;
142  ConvProcInfo *convinfo;
143  MemoryContext oldcontext;
144 
145  to_server_proc = FindDefaultConversionProc(encoding,
146  current_server_encoding);
147  if (!OidIsValid(to_server_proc))
148  return -1;
149  to_client_proc = FindDefaultConversionProc(current_server_encoding,
150  encoding);
151  if (!OidIsValid(to_client_proc))
152  return -1;
153 
154  /*
155  * Load the fmgr info into TopMemoryContext (could still fail here)
156  */
158  sizeof(ConvProcInfo));
159  convinfo->s_encoding = current_server_encoding;
160  convinfo->c_encoding = encoding;
161  fmgr_info_cxt(to_server_proc, &convinfo->to_server_info,
163  fmgr_info_cxt(to_client_proc, &convinfo->to_client_info,
165 
166  /* Attach new info to head of list */
168  ConvProcList = lcons(convinfo, ConvProcList);
169  MemoryContextSwitchTo(oldcontext);
170 
171  /*
172  * We cannot yet remove any older entry for the same encoding pair,
173  * since it could still be in use. SetClientEncoding will clean up.
174  */
175 
176  return 0; /* success */
177  }
178  else
179  {
180  /*
181  * If we're not in a live transaction, the only thing we can do is
182  * restore a previous setting using the cache. This covers all
183  * transaction-rollback cases. The only case it might not work for is
184  * trying to change client_encoding on the fly by editing
185  * postgresql.conf and SIGHUP'ing. Which would probably be a stupid
186  * thing to do anyway.
187  */
188  foreach(lc, ConvProcList)
189  {
190  ConvProcInfo *oldinfo = (ConvProcInfo *) lfirst(lc);
191 
192  if (oldinfo->s_encoding == current_server_encoding &&
193  oldinfo->c_encoding == encoding)
194  return 0;
195  }
196 
197  return -1; /* it's not cached, so fail */
198  }
199 }
FmgrInfo to_server_info
Definition: mbutils.c:57
FmgrInfo to_client_info
Definition: mbutils.c:58
static bool backend_startup_complete
Definition: mbutils.c:90
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition: palloc.h:109
unsigned int Oid
Definition: postgres_ext.h:31
#define OidIsValid(objectId)
Definition: c.h:710
int s_encoding
Definition: mbutils.c:55
static List * ConvProcList
Definition: mbutils.c:61
#define PG_VALID_FE_ENCODING(_enc)
Definition: pg_wchar.h:305
void fmgr_info_cxt(Oid functionId, FmgrInfo *finfo, MemoryContext mcxt)
Definition: fmgr.c:136
MemoryContext TopMemoryContext
Definition: mcxt.c:48
int GetDatabaseEncoding(void)
Definition: mbutils.c:1210
List * lcons(void *datum, List *list)
Definition: list.c:468
#define lfirst(lc)
Definition: pg_list.h:169
Oid FindDefaultConversionProc(int32 for_encoding, int32 to_encoding)
Definition: namespace.c:3755
bool IsTransactionState(void)
Definition: xact.c:371
int32 encoding
Definition: pg_database.h:41
void * MemoryContextAlloc(MemoryContext context, Size size)
Definition: mcxt.c:863
int c_encoding
Definition: mbutils.c:56

◆ report_invalid_encoding()

void report_invalid_encoding ( int  encoding,
const char *  mbstr,
int  len 
)

Definition at line 1647 of file mbutils.c.

References buf, ereport, errcode(), errmsg(), ERROR, Min, name, pg_enc2name_tbl, pg_encoding_mblen(), and sprintf.

Referenced by big52euc_tw(), big52mic(), CopyConversionError(), euc_cn2mic(), euc_jis_20042shift_jis_2004(), euc_jp2mic(), euc_jp2sjis(), euc_kr2mic(), euc_tw2big5(), euc_tw2mic(), iso8859_1_to_utf8(), latin2mic(), latin2mic_with_table(), local2local(), LocalToUtf(), mic2big5(), mic2euc_cn(), mic2euc_jp(), mic2euc_kr(), mic2euc_tw(), mic2latin(), mic2latin_with_table(), mic2sjis(), pg_verify_mbstr(), pg_verify_mbstr_len(), shift_jis_20042euc_jis_2004(), sjis2euc_jp(), sjis2mic(), surrogate_pair_to_codepoint(), test_enc_conversion(), utf8_to_iso8859_1(), and UtfToLocal().

1648 {
1649  int l = pg_encoding_mblen(encoding, mbstr);
1650  char buf[8 * 5 + 1];
1651  char *p = buf;
1652  int j,
1653  jlimit;
1654 
1655  jlimit = Min(l, len);
1656  jlimit = Min(jlimit, 8); /* prevent buffer overrun */
1657 
1658  for (j = 0; j < jlimit; j++)
1659  {
1660  p += sprintf(p, "0x%02x", (unsigned char) mbstr[j]);
1661  if (j < jlimit - 1)
1662  p += sprintf(p, " ");
1663  }
1664 
1665  ereport(ERROR,
1666  (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
1667  errmsg("invalid byte sequence for encoding \"%s\": %s",
1669  buf)));
1670 }
#define Min(x, y)
Definition: c.h:986
int errcode(int sqlerrcode)
Definition: elog.c:698
#define sprintf
Definition: port.h:218
#define ERROR
Definition: elog.h:46
static char * buf
Definition: pg_test_fsync.c:68
const pg_enc2name pg_enc2name_tbl[]
Definition: encnames.c:309
int pg_encoding_mblen(int encoding, const char *mbstr)
Definition: wchar.c:1921
#define ereport(elevel,...)
Definition: elog.h:157
int32 encoding
Definition: pg_database.h:41
const char * name
Definition: encode.c:515
int errmsg(const char *fmt,...)
Definition: elog.c:909

◆ report_untranslatable_char()

void report_untranslatable_char ( int  src_encoding,
int  dest_encoding,
const char *  mbstr,
int  len 
)

Definition at line 1679 of file mbutils.c.

References buf, ereport, errcode(), errmsg(), ERROR, GetMessageEncoding(), IsTransactionState(), Min, name, palloc(), pfree(), pg_do_encoding_conversion(), pg_enc2name_tbl, pg_encoding_mblen(), PG_SQL_ASCII, PG_UTF8, sprintf, and generate_unaccent_rules::str.

Referenced by big52euc_tw(), big52mic(), euc_tw2big5(), latin2mic_with_table(), local2local(), LocalToUtf(), mic2big5(), mic2euc_cn(), mic2euc_jp(), mic2euc_kr(), mic2euc_tw(), mic2latin(), mic2latin_with_table(), mic2sjis(), surrogate_pair_to_codepoint(), utf8_to_iso8859_1(), and UtfToLocal().

1681 {
1682  int l = pg_encoding_mblen(src_encoding, mbstr);
1683  char buf[8 * 5 + 1];
1684  char *p = buf;
1685  int j,
1686  jlimit;
1687 
1688  jlimit = Min(l, len);
1689  jlimit = Min(jlimit, 8); /* prevent buffer overrun */
1690 
1691  for (j = 0; j < jlimit; j++)
1692  {
1693  p += sprintf(p, "0x%02x", (unsigned char) mbstr[j]);
1694  if (j < jlimit - 1)
1695  p += sprintf(p, " ");
1696  }
1697 
1698  ereport(ERROR,
1699  (errcode(ERRCODE_UNTRANSLATABLE_CHARACTER),
1700  errmsg("character with byte sequence %s in encoding \"%s\" has no equivalent in encoding \"%s\"",
1701  buf,
1702  pg_enc2name_tbl[src_encoding].name,
1703  pg_enc2name_tbl[dest_encoding].name)));
1704 }
#define Min(x, y)
Definition: c.h:986
int errcode(int sqlerrcode)
Definition: elog.c:698
#define sprintf
Definition: port.h:218
#define ERROR
Definition: elog.h:46
static char * buf
Definition: pg_test_fsync.c:68
const pg_enc2name pg_enc2name_tbl[]
Definition: encnames.c:309
int pg_encoding_mblen(int encoding, const char *mbstr)
Definition: wchar.c:1921
#define ereport(elevel,...)
Definition: elog.h:157
const char * name
Definition: encode.c:515
int errmsg(const char *fmt,...)
Definition: elog.c:909

◆ SetClientEncoding()

int SetClientEncoding ( int  encoding)

Definition at line 208 of file mbutils.c.

References backend_startup_complete, ConvProcInfo::c_encoding, encoding, foreach_delete_current, GetDatabaseEncoding(), lfirst, pending_client_encoding, pfree(), pg_enc2name_tbl, PG_SQL_ASCII, PG_VALID_FE_ENCODING, ConvProcInfo::s_encoding, ConvProcInfo::to_client_info, and ConvProcInfo::to_server_info.

Referenced by assign_client_encoding(), InitializeClientEncoding(), ParallelWorkerMain(), and surrogate_pair_to_codepoint().

209 {
210  int current_server_encoding;
211  bool found;
212  ListCell *lc;
213 
215  return -1;
216 
217  /* Can't do anything during startup, per notes above */
219  {
221  return 0;
222  }
223 
224  current_server_encoding = GetDatabaseEncoding();
225 
226  /*
227  * Check for cases that require no conversion function.
228  */
229  if (current_server_encoding == encoding ||
230  current_server_encoding == PG_SQL_ASCII ||
232  {
234  ToServerConvProc = NULL;
235  ToClientConvProc = NULL;
236  return 0;
237  }
238 
239  /*
240  * Search the cache for the entry previously prepared by
241  * PrepareClientEncoding; if there isn't one, we lose. While at it,
242  * release any duplicate entries so that repeated Prepare/Set cycles don't
243  * leak memory.
244  */
245  found = false;
246  foreach(lc, ConvProcList)
247  {
248  ConvProcInfo *convinfo = (ConvProcInfo *) lfirst(lc);
249 
250  if (convinfo->s_encoding == current_server_encoding &&
251  convinfo->c_encoding == encoding)
252  {
253  if (!found)
254  {
255  /* Found newest entry, so set up */
257  ToServerConvProc = &convinfo->to_server_info;
258  ToClientConvProc = &convinfo->to_client_info;
259  found = true;
260  }
261  else
262  {
263  /* Duplicate entry, release it */
265  pfree(convinfo);
266  }
267  }
268  }
269 
270  if (found)
271  return 0; /* success */
272  else
273  return -1; /* it's not cached, so fail */
274 }
FmgrInfo to_server_info
Definition: mbutils.c:57
FmgrInfo to_client_info
Definition: mbutils.c:58
static bool backend_startup_complete
Definition: mbutils.c:90
static int pending_client_encoding
Definition: mbutils.c:91
static FmgrInfo * ToServerConvProc
Definition: mbutils.c:67
static const pg_enc2name * ClientEncoding
Definition: mbutils.c:80
#define foreach_delete_current(lst, cell)
Definition: pg_list.h:369
int s_encoding
Definition: mbutils.c:55
void pfree(void *pointer)
Definition: mcxt.c:1169
static List * ConvProcList
Definition: mbutils.c:61
const pg_enc2name pg_enc2name_tbl[]
Definition: encnames.c:309
#define PG_VALID_FE_ENCODING(_enc)
Definition: pg_wchar.h:305
int GetDatabaseEncoding(void)
Definition: mbutils.c:1210
#define lfirst(lc)
Definition: pg_list.h:169
int32 encoding
Definition: pg_database.h:41
static FmgrInfo * ToClientConvProc
Definition: mbutils.c:68
int c_encoding
Definition: mbutils.c:56

◆ SetDatabaseEncoding()

void SetDatabaseEncoding ( int  encoding)

Definition at line 1104 of file mbutils.c.

References Assert, elog, encoding, pg_enc2name::encoding, ERROR, pg_enc2name_tbl, and PG_VALID_BE_ENCODING.

Referenced by CheckMyDatabase(), and surrogate_pair_to_codepoint().

1105 {
1107  elog(ERROR, "invalid database encoding: %d", encoding);
1108 
1111 }
#define ERROR
Definition: elog.h:46
const pg_enc2name pg_enc2name_tbl[]
Definition: encnames.c:309
pg_enc encoding
Definition: pg_wchar.h:356
#define PG_VALID_BE_ENCODING(_enc)
Definition: pg_wchar.h:295
#define Assert(condition)
Definition: c.h:804
int32 encoding
Definition: pg_database.h:41
static const pg_enc2name * DatabaseEncoding
Definition: mbutils.c:81
#define elog(elevel,...)
Definition: elog.h:232

◆ SetMessageEncoding()

void SetMessageEncoding ( int  encoding)

Definition at line 1114 of file mbutils.c.

References Assert, CurrentMemoryContext, elog, encoding, pg_enc2name::encoding, GetDatabaseEncoding(), GetMessageEncoding(), i, LOG, pg_enc2gettext::name, name, pg_enc2gettext_tbl, pg_enc2name_tbl, pg_get_encoding_from_locale(), PG_SQL_ASCII, pg_strcasecmp(), PG_VALID_ENCODING, setlocale, and write_stderr.

Referenced by pg_perm_setlocale(), and surrogate_pair_to_codepoint().

1115 {
1116  /* Some calls happen before we can elog()! */
1118 
1121 }
#define PG_VALID_ENCODING(_enc)
Definition: pg_wchar.h:301
const pg_enc2name pg_enc2name_tbl[]
Definition: encnames.c:309
pg_enc encoding
Definition: pg_wchar.h:356
#define Assert(condition)
Definition: c.h:804
int32 encoding
Definition: pg_database.h:41
static const pg_enc2name * MessageEncoding
Definition: mbutils.c:82

◆ surrogate_pair_to_codepoint()

static pg_wchar surrogate_pair_to_codepoint ( pg_wchar  first,
pg_wchar  second 
)
inlinestatic

Definition at line 557 of file pg_wchar.h.

References BIG5toCNS(), check_encoding_conversion_args(), CNStoBIG5(), pg_enc2name::encoding, get_encoding_name_for_icu(), GetDatabaseEncoding(), GetDatabaseEncodingName(), GetMessageEncoding(), InitializeClientEncoding(), is_encoding_supported_by_icu(), latin2mic(), latin2mic_with_table(), local2local(), LocalToUtf(), mic2latin(), mic2latin_with_table(), pg_enc2name::name, pg_any_to_server(), pg_attribute_noreturn, pg_char_and_wchar_strcmp(), pg_char_and_wchar_strncmp(), pg_char_to_encoding(), pg_client_to_server(), pg_database_encoding_character_incrementer(), pg_database_encoding_max_length(), pg_do_encoding_conversion(), pg_do_encoding_conversion_buf(), pg_dsplen(), pg_encoding_dsplen(), pg_encoding_max_length(), pg_encoding_mb2wchar_with_len(), pg_encoding_mbcliplen(), pg_encoding_mblen(), pg_encoding_mblen_bounded(), pg_encoding_to_char(), pg_encoding_verifymbchar(), pg_encoding_verifymbstr(), pg_encoding_wchar2mb_with_len(), pg_get_client_encoding(), pg_get_client_encoding_name(), pg_mb2wchar(), pg_mb2wchar_with_len(), pg_mbcharcliplen(), pg_mbcliplen(), pg_mblen(), pg_mbstrlen(), pg_mbstrlen_with_len(), pg_mule_mblen(), pg_server_to_any(), pg_server_to_client(), pg_unicode_to_server(), pg_utf8_islegal(), pg_utf_mblen(), pg_valid_client_encoding(), pg_valid_server_encoding(), pg_valid_server_encoding_id(), pg_verify_mbstr(), pg_verify_mbstr_len(), pg_verifymbstr(), pg_wchar2mb(), pg_wchar2mb_with_len(), pg_wchar_strlen(), pg_wchar_strncmp(), PrepareClientEncoding(), report_invalid_encoding(), report_untranslatable_char(), s1, s2, SetClientEncoding(), SetDatabaseEncoding(), SetMessageEncoding(), source, generate_unaccent_rules::str, unicode_to_utf8(), utf8_to_unicode(), and UtfToLocal().

Referenced by json_lex_string(), str_udeescape(), and unistr().

558 {
559  return ((first & 0x3FF) << 10) + 0x10000 + (second & 0x3FF);
560 }

◆ unicode_to_utf8()

unsigned char* unicode_to_utf8 ( pg_wchar  c,
unsigned char *  utf8string 
)

Definition at line 483 of file wchar.c.

Referenced by json_lex_string(), pg_saslprep(), pg_unicode_to_server(), pg_wchar2utf_with_len(), surrogate_pair_to_codepoint(), and unicode_normalize_func().

484 {
485  if (c <= 0x7F)
486  {
487  utf8string[0] = c;
488  }
489  else if (c <= 0x7FF)
490  {
491  utf8string[0] = 0xC0 | ((c >> 6) & 0x1F);
492  utf8string[1] = 0x80 | (c & 0x3F);
493  }
494  else if (c <= 0xFFFF)
495  {
496  utf8string[0] = 0xE0 | ((c >> 12) & 0x0F);
497  utf8string[1] = 0x80 | ((c >> 6) & 0x3F);
498  utf8string[2] = 0x80 | (c & 0x3F);
499  }
500  else
501  {
502  utf8string[0] = 0xF0 | ((c >> 18) & 0x07);
503  utf8string[1] = 0x80 | ((c >> 12) & 0x3F);
504  utf8string[2] = 0x80 | ((c >> 6) & 0x3F);
505  utf8string[3] = 0x80 | (c & 0x3F);
506  }
507 
508  return utf8string;
509 }
char * c

◆ utf8_to_unicode()

pg_wchar utf8_to_unicode ( const unsigned char *  c)

Definition at line 686 of file wchar.c.

Referenced by pg_saslprep(), pg_utf_dsplen(), surrogate_pair_to_codepoint(), unicode_is_normalized(), and unicode_normalize_func().

687 {
688  if ((*c & 0x80) == 0)
689  return (pg_wchar) c[0];
690  else if ((*c & 0xe0) == 0xc0)
691  return (pg_wchar) (((c[0] & 0x1f) << 6) |
692  (c[1] & 0x3f));
693  else if ((*c & 0xf0) == 0xe0)
694  return (pg_wchar) (((c[0] & 0x0f) << 12) |
695  ((c[1] & 0x3f) << 6) |
696  (c[2] & 0x3f));
697  else if ((*c & 0xf8) == 0xf0)
698  return (pg_wchar) (((c[0] & 0x07) << 18) |
699  ((c[1] & 0x3f) << 12) |
700  ((c[2] & 0x3f) << 6) |
701  (c[3] & 0x3f));
702  else
703  /* that is an invalid code on purpose */
704  return 0xffffffff;
705 }
char * c
unsigned int pg_wchar
Definition: mbprint.c:31

◆ UtfToLocal()

int UtfToLocal ( const unsigned char *  utf,
int  len,
unsigned char *  iso,
const pg_mb_radix_tree map,
const pg_utf_to_local_combined cmap,
int  cmapsize,
utf_local_conversion_func  conv_func,
int  encoding,
bool  noError 
)

Definition at line 507 of file conv.c.

References pg_utf_to_local_combined::code, compare3(), elog, ereport, errcode(), errmsg(), ERROR, pg_mb_radix_conv(), PG_UTF8, pg_utf8_islegal(), pg_utf_mblen(), PG_VALID_ENCODING, report_invalid_encoding(), report_untranslatable_char(), and store_coded_char().

Referenced by surrogate_pair_to_codepoint(), utf8_to_big5(), utf8_to_euc_cn(), utf8_to_euc_jis_2004(), utf8_to_euc_jp(), utf8_to_euc_kr(), utf8_to_euc_tw(), utf8_to_gb18030(), utf8_to_gbk(), utf8_to_iso8859(), utf8_to_johab(), utf8_to_koi8r(), utf8_to_koi8u(), utf8_to_shift_jis_2004(), utf8_to_sjis(), utf8_to_uhc(), and utf8_to_win().

513 {
514  uint32 iutf;
515  int l;
516  const pg_utf_to_local_combined *cp;
517  const unsigned char *start = utf;
518 
520  ereport(ERROR,
521  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
522  errmsg("invalid encoding number: %d", encoding)));
523 
524  for (; len > 0; len -= l)
525  {
526  unsigned char b1 = 0;
527  unsigned char b2 = 0;
528  unsigned char b3 = 0;
529  unsigned char b4 = 0;
530 
531  /* "break" cases all represent errors */
532  if (*utf == '\0')
533  break;
534 
535  l = pg_utf_mblen(utf);
536  if (len < l)
537  break;
538 
539  if (!pg_utf8_islegal(utf, l))
540  break;
541 
542  if (l == 1)
543  {
544  /* ASCII case is easy, assume it's one-to-one conversion */
545  *iso++ = *utf++;
546  continue;
547  }
548 
549  /* collect coded char of length l */
550  if (l == 2)
551  {
552  b3 = *utf++;
553  b4 = *utf++;
554  }
555  else if (l == 3)
556  {
557  b2 = *utf++;
558  b3 = *utf++;
559  b4 = *utf++;
560  }
561  else if (l == 4)
562  {
563  b1 = *utf++;
564  b2 = *utf++;
565  b3 = *utf++;
566  b4 = *utf++;
567  }
568  else
569  {
570  elog(ERROR, "unsupported character length %d", l);
571  iutf = 0; /* keep compiler quiet */
572  }
573  iutf = (b1 << 24 | b2 << 16 | b3 << 8 | b4);
574 
575  /* First, try with combined map if possible */
576  if (cmap && len > l)
577  {
578  const unsigned char *utf_save = utf;
579  int len_save = len;
580  int l_save = l;
581 
582  /* collect next character, same as above */
583  len -= l;
584 
585  l = pg_utf_mblen(utf);
586  if (len < l)
587  {
588  /* need more data to decide if this is a combined char */
589  utf -= l_save;
590  break;
591  }
592 
593  if (!pg_utf8_islegal(utf, l))
594  {
595  if (!noError)
596  report_invalid_encoding(PG_UTF8, (const char *) utf, len);
597  utf -= l_save;
598  break;
599  }
600 
601  /* We assume ASCII character cannot be in combined map */
602  if (l > 1)
603  {
604  uint32 iutf2;
605  uint32 cutf[2];
606 
607  if (l == 2)
608  {
609  iutf2 = *utf++ << 8;
610  iutf2 |= *utf++;
611  }
612  else if (l == 3)
613  {
614  iutf2 = *utf++ << 16;
615  iutf2 |= *utf++ << 8;
616  iutf2 |= *utf++;
617  }
618  else if (l == 4)
619  {
620  iutf2 = *utf++ << 24;
621  iutf2 |= *utf++ << 16;
622  iutf2 |= *utf++ << 8;
623  iutf2 |= *utf++;
624  }
625  else
626  {
627  elog(ERROR, "unsupported character length %d", l);
628  iutf2 = 0; /* keep compiler quiet */
629  }
630 
631  cutf[0] = iutf;
632  cutf[1] = iutf2;
633 
634  cp = bsearch(cutf, cmap, cmapsize,
636 
637  if (cp)
638  {
639  iso = store_coded_char(iso, cp->code);
640  continue;
641  }
642  }
643 
644  /* fail, so back up to reprocess second character next time */
645  utf = utf_save;
646  len = len_save;
647  l = l_save;
648  }
649 
650  /* Now check ordinary map */
651  if (map)
652  {
653  uint32 converted = pg_mb_radix_conv(map, l, b1, b2, b3, b4);
654 
655  if (converted)
656  {
657  iso = store_coded_char(iso, converted);
658  continue;
659  }
660  }
661 
662  /* if there's a conversion function, try that */
663  if (conv_func)
664  {
665  uint32 converted = (*conv_func) (iutf);
666 
667  if (converted)
668  {
669  iso = store_coded_char(iso, converted);
670  continue;
671  }
672  }
673 
674  /* failed to translate this character */
675  utf -= l;
676  if (noError)
677  break;
679  (const char *) utf, len);
680  }
681 
682  /* if we broke out of loop early, must be invalid input */
683  if (len > 0 && !noError)
684  report_invalid_encoding(PG_UTF8, (const char *) utf, len);
685 
686  *iso = '\0';
687 
688  return utf - start;
689 }
bool pg_utf8_islegal(const unsigned char *source, int length)
Definition: wchar.c:1804
void report_invalid_encoding(int encoding, const char *mbstr, int len)
Definition: mbutils.c:1647
int errcode(int sqlerrcode)
Definition: elog.c:698
static unsigned char * store_coded_char(unsigned char *dest, uint32 code)
Definition: conv.c:353
#define ERROR
Definition: elog.h:46
#define PG_VALID_ENCODING(_enc)
Definition: pg_wchar.h:301
static uint32 pg_mb_radix_conv(const pg_mb_radix_tree *rt, int l, unsigned char b1, unsigned char b2, unsigned char b3, unsigned char b4)
Definition: conv.c:373
unsigned int uint32
Definition: c.h:441
#define ereport(elevel,...)
Definition: elog.h:157
static int compare3(const void *p1, const void *p2)
Definition: conv.c:320
int32 encoding
Definition: pg_database.h:41
void report_untranslatable_char(int src_encoding, int dest_encoding, const char *mbstr, int len)
Definition: mbutils.c:1679
int pg_utf_mblen(const unsigned char *s)
Definition: wchar.c:549
int errmsg(const char *fmt,...)
Definition: elog.c:909
#define elog(elevel,...)
Definition: elog.h:232

Variable Documentation

◆ pg_enc2gettext_tbl

const pg_enc2gettext pg_enc2gettext_tbl[]

Definition at line 361 of file encnames.c.

Referenced by SetMessageEncoding().

◆ pg_enc2name_tbl

◆ pg_wchar_table