PostgreSQL Source Code git master
All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
pg_wchar.h File Reference
This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Data Structures

struct  pg_enc2name
 
struct  pg_wchar_tbl
 
struct  pg_mb_radix_tree
 
struct  pg_utf_to_local_combined
 
struct  pg_local_to_utf_combined
 

Macros

#define MAX_MULTIBYTE_CHAR_LEN   4
 
#define SS2   0x8e /* single shift 2 (JIS0201) */
 
#define SS3   0x8f /* single shift 3 (JIS0212) */
 
#define ISSJISHEAD(c)   (((c) >= 0x81 && (c) <= 0x9f) || ((c) >= 0xe0 && (c) <= 0xfc))
 
#define ISSJISTAIL(c)   (((c) >= 0x40 && (c) <= 0x7e) || ((c) >= 0x80 && (c) <= 0xfc))
 
#define LC_ISO8859_1   0x81 /* ISO8859 Latin 1 */
 
#define LC_ISO8859_2   0x82 /* ISO8859 Latin 2 */
 
#define LC_ISO8859_3   0x83 /* ISO8859 Latin 3 */
 
#define LC_ISO8859_4   0x84 /* ISO8859 Latin 4 */
 
#define LC_TIS620   0x85 /* Thai (not supported yet) */
 
#define LC_ISO8859_7   0x86 /* Greek (not supported yet) */
 
#define LC_ISO8859_6   0x87 /* Arabic (not supported yet) */
 
#define LC_ISO8859_8   0x88 /* Hebrew (not supported yet) */
 
#define LC_JISX0201K   0x89 /* Japanese 1 byte kana */
 
#define LC_JISX0201R   0x8a /* Japanese 1 byte Roman */
 
#define LC_KOI8_R   0x8b /* Cyrillic KOI8-R */
 
#define LC_ISO8859_5   0x8c /* ISO8859 Cyrillic */
 
#define LC_ISO8859_9   0x8d /* ISO8859 Latin 5 (not supported yet) */
 
#define LC_ISO8859_15   0x8e /* ISO8859 Latin 15 (not supported yet) */
 
#define IS_LC1(c)   ((unsigned char)(c) >= 0x81 && (unsigned char)(c) <= 0x8d)
 
#define LC_JISX0208_1978   0x90 /* Japanese Kanji, old JIS (not supported) */
 
#define LC_GB2312_80   0x91 /* Chinese */
 
#define LC_JISX0208   0x92 /* Japanese Kanji (JIS X 0208) */
 
#define LC_KS5601   0x93 /* Korean */
 
#define LC_JISX0212   0x94 /* Japanese Kanji (JIS X 0212) */
 
#define LC_CNS11643_1   0x95 /* CNS 11643-1992 Plane 1 */
 
#define LC_CNS11643_2   0x96 /* CNS 11643-1992 Plane 2 */
 
#define LC_JISX0213_1
 
#define LC_BIG5_1
 
#define LC_BIG5_2
 
#define IS_LC2(c)   ((unsigned char)(c) >= 0x90 && (unsigned char)(c) <= 0x99)
 
#define LCPRV1_A   0x9a
 
#define LCPRV1_B   0x9b
 
#define IS_LCPRV1(c)   ((unsigned char)(c) == LCPRV1_A || (unsigned char)(c) == LCPRV1_B)
 
#define IS_LCPRV1_A_RANGE(c)    ((unsigned char)(c) >= 0xa0 && (unsigned char)(c) <= 0xdf)
 
#define IS_LCPRV1_B_RANGE(c)    ((unsigned char)(c) >= 0xe0 && (unsigned char)(c) <= 0xef)
 
#define LCPRV2_A   0x9c
 
#define LCPRV2_B   0x9d
 
#define IS_LCPRV2(c)   ((unsigned char)(c) == LCPRV2_A || (unsigned char)(c) == LCPRV2_B)
 
#define IS_LCPRV2_A_RANGE(c)    ((unsigned char)(c) >= 0xf0 && (unsigned char)(c) <= 0xf4)
 
#define IS_LCPRV2_B_RANGE(c)    ((unsigned char)(c) >= 0xf5 && (unsigned char)(c) <= 0xfe)
 
#define LC_SISHENG
 
#define LC_IPA
 
#define LC_VISCII_LOWER
 
#define LC_VISCII_UPPER
 
#define LC_ARABIC_DIGIT   0xa4 /* Arabic digit (not supported) */
 
#define LC_ARABIC_1_COLUMN   0xa5 /* Arabic 1-column (not supported) */
 
#define LC_ASCII_RIGHT_TO_LEFT
 
#define LC_LAO
 
#define LC_ARABIC_2_COLUMN   0xa8 /* Arabic 1-column (not supported) */
 
#define LC_INDIAN_1_COLUMN
 
#define LC_TIBETAN_1_COLUMN
 
#define LC_UNICODE_SUBSET_2
 
#define LC_UNICODE_SUBSET_3
 
#define LC_UNICODE_SUBSET
 
#define LC_ETHIOPIC   0xf5 /* Ethiopic characters (not supported) */
 
#define LC_CNS11643_3   0xf6 /* CNS 11643-1992 Plane 3 */
 
#define LC_CNS11643_4   0xf7 /* CNS 11643-1992 Plane 4 */
 
#define LC_CNS11643_5   0xf8 /* CNS 11643-1992 Plane 5 */
 
#define LC_CNS11643_6   0xf9 /* CNS 11643-1992 Plane 6 */
 
#define LC_CNS11643_7   0xfa /* CNS 11643-1992 Plane 7 */
 
#define LC_INDIAN_2_COLUMN
 
#define LC_TIBETAN   0xfc /* Tibetan (not supported) */
 
#define PG_ENCODING_BE_LAST   PG_KOI8U
 
#define PG_VALID_BE_ENCODING(_enc)    ((_enc) >= 0 && (_enc) <= PG_ENCODING_BE_LAST)
 
#define PG_ENCODING_IS_CLIENT_ONLY(_enc)    ((_enc) > PG_ENCODING_BE_LAST && (_enc) < _PG_LAST_ENCODING_)
 
#define PG_VALID_ENCODING(_enc)    ((_enc) >= 0 && (_enc) < _PG_LAST_ENCODING_)
 
#define PG_VALID_FE_ENCODING(_enc)   PG_VALID_ENCODING(_enc)
 
#define MAX_CONVERSION_GROWTH   4
 
#define MAX_CONVERSION_INPUT_LENGTH   16
 
#define MAX_UNICODE_EQUIVALENT_STRING   16
 
#define CHECK_ENCODING_CONVERSION_ARGS(srcencoding, destencoding)
 
#define pg_char_to_encoding   pg_char_to_encoding_private
 
#define pg_encoding_to_char   pg_encoding_to_char_private
 
#define pg_valid_server_encoding   pg_valid_server_encoding_private
 
#define pg_valid_server_encoding_id   pg_valid_server_encoding_id_private
 
#define pg_utf_mblen   pg_utf_mblen_private
 

Typedefs

typedef unsigned int pg_wchar
 
typedef enum pg_enc pg_enc
 
typedef struct pg_enc2name pg_enc2name
 
typedef int(* mb2wchar_with_len_converter) (const unsigned char *from, pg_wchar *to, int len)
 
typedef int(* wchar2mb_with_len_converter) (const pg_wchar *from, unsigned char *to, int len)
 
typedef int(* mblen_converter) (const unsigned char *mbstr)
 
typedef int(* mbdisplaylen_converter) (const unsigned char *mbstr)
 
typedef bool(* mbcharacter_incrementer) (unsigned char *mbstr, int len)
 
typedef int(* mbchar_verifier) (const unsigned char *mbstr, int len)
 
typedef int(* mbstr_verifier) (const unsigned char *mbstr, int len)
 
typedef uint32(* utf_local_conversion_func) (uint32 code)
 

Enumerations

enum  pg_enc {
  PG_SQL_ASCII = 0 , PG_EUC_JP , PG_EUC_CN , PG_EUC_KR ,
  PG_EUC_TW , PG_EUC_JIS_2004 , PG_UTF8 , PG_MULE_INTERNAL ,
  PG_LATIN1 , PG_LATIN2 , PG_LATIN3 , PG_LATIN4 ,
  PG_LATIN5 , PG_LATIN6 , PG_LATIN7 , PG_LATIN8 ,
  PG_LATIN9 , PG_LATIN10 , PG_WIN1256 , PG_WIN1258 ,
  PG_WIN866 , PG_WIN874 , PG_KOI8R , PG_WIN1251 ,
  PG_WIN1252 , PG_ISO_8859_5 , PG_ISO_8859_6 , PG_ISO_8859_7 ,
  PG_ISO_8859_8 , PG_WIN1250 , PG_WIN1253 , PG_WIN1254 ,
  PG_WIN1255 , PG_WIN1257 , PG_KOI8U , PG_SJIS ,
  PG_BIG5 , PG_GBK , PG_UHC , PG_GB18030 ,
  PG_JOHAB , PG_SHIFT_JIS_2004 , _PG_LAST_ENCODING_
}
 

Functions

static bool is_valid_unicode_codepoint (pg_wchar c)
 
static bool is_utf16_surrogate_first (pg_wchar c)
 
static bool is_utf16_surrogate_second (pg_wchar c)
 
static pg_wchar surrogate_pair_to_codepoint (pg_wchar first, pg_wchar second)
 
static pg_wchar utf8_to_unicode (const unsigned char *c)
 
static unsigned char * unicode_to_utf8 (pg_wchar c, unsigned char *utf8string)
 
static int unicode_utf8len (pg_wchar c)
 
int pg_char_to_encoding (const char *name)
 
const char * pg_encoding_to_char (int encoding)
 
int pg_valid_server_encoding_id (int encoding)
 
void pg_encoding_set_invalid (int encoding, char *dst)
 
int pg_encoding_mblen (int encoding, const char *mbstr)
 
int pg_encoding_mblen_bounded (int encoding, const char *mbstr)
 
int pg_encoding_dsplen (int encoding, const char *mbstr)
 
int pg_encoding_verifymbchar (int encoding, const char *mbstr, int len)
 
int pg_encoding_verifymbstr (int encoding, const char *mbstr, int len)
 
int pg_encoding_max_length (int encoding)
 
int pg_valid_client_encoding (const char *name)
 
int pg_valid_server_encoding (const char *name)
 
bool is_encoding_supported_by_icu (int encoding)
 
const char * get_encoding_name_for_icu (int encoding)
 
bool pg_utf8_islegal (const unsigned char *source, int length)
 
int pg_utf_mblen (const unsigned char *s)
 
int pg_mule_mblen (const unsigned char *s)
 
int pg_mb2wchar (const char *from, pg_wchar *to)
 
int pg_mb2wchar_with_len (const char *from, pg_wchar *to, int len)
 
int pg_encoding_mb2wchar_with_len (int encoding, const char *from, pg_wchar *to, int len)
 
int pg_wchar2mb (const pg_wchar *from, char *to)
 
int pg_wchar2mb_with_len (const pg_wchar *from, char *to, int len)
 
int pg_encoding_wchar2mb_with_len (int encoding, const pg_wchar *from, char *to, int len)
 
int pg_char_and_wchar_strcmp (const char *s1, const pg_wchar *s2)
 
int pg_wchar_strncmp (const pg_wchar *s1, const pg_wchar *s2, size_t n)
 
int pg_char_and_wchar_strncmp (const char *s1, const pg_wchar *s2, size_t n)
 
size_t pg_wchar_strlen (const pg_wchar *str)
 
int pg_mblen (const char *mbstr)
 
int pg_dsplen (const char *mbstr)
 
int pg_mbstrlen (const char *mbstr)
 
int pg_mbstrlen_with_len (const char *mbstr, int limit)
 
int pg_mbcliplen (const char *mbstr, int len, int limit)
 
int pg_encoding_mbcliplen (int encoding, const char *mbstr, int len, int limit)
 
int pg_mbcharcliplen (const char *mbstr, int len, int limit)
 
int pg_database_encoding_max_length (void)
 
mbcharacter_incrementer pg_database_encoding_character_incrementer (void)
 
int PrepareClientEncoding (int encoding)
 
int SetClientEncoding (int encoding)
 
void InitializeClientEncoding (void)
 
int pg_get_client_encoding (void)
 
const char * pg_get_client_encoding_name (void)
 
void SetDatabaseEncoding (int encoding)
 
int GetDatabaseEncoding (void)
 
const char * GetDatabaseEncodingName (void)
 
void SetMessageEncoding (int encoding)
 
int GetMessageEncoding (void)
 
unsigned char * pg_do_encoding_conversion (unsigned char *src, int len, int src_encoding, int dest_encoding)
 
int pg_do_encoding_conversion_buf (Oid proc, int src_encoding, int dest_encoding, unsigned char *src, int srclen, unsigned char *dest, int destlen, bool noError)
 
char * pg_client_to_server (const char *s, int len)
 
char * pg_server_to_client (const char *s, int len)
 
char * pg_any_to_server (const char *s, int len, int encoding)
 
char * pg_server_to_any (const char *s, int len, int encoding)
 
void pg_unicode_to_server (pg_wchar c, unsigned char *s)
 
bool pg_unicode_to_server_noerror (pg_wchar c, unsigned char *s)
 
unsigned short BIG5toCNS (unsigned short big5, unsigned char *lc)
 
unsigned short CNStoBIG5 (unsigned short cns, unsigned char lc)
 
int UtfToLocal (const unsigned char *utf, int len, unsigned char *iso, const pg_mb_radix_tree *map, const pg_utf_to_local_combined *cmap, int cmapsize, utf_local_conversion_func conv_func, int encoding, bool noError)
 
int LocalToUtf (const unsigned char *iso, int len, unsigned char *utf, const pg_mb_radix_tree *map, const pg_local_to_utf_combined *cmap, int cmapsize, utf_local_conversion_func conv_func, int encoding, bool noError)
 
bool pg_verifymbstr (const char *mbstr, int len, bool noError)
 
bool pg_verify_mbstr (int encoding, const char *mbstr, int len, bool noError)
 
int pg_verify_mbstr_len (int encoding, const char *mbstr, int len, bool noError)
 
void check_encoding_conversion_args (int src_encoding, int dest_encoding, int len, int expected_src_encoding, int expected_dest_encoding)
 
pg_noreturn void report_invalid_encoding (int encoding, const char *mbstr, int len)
 
pg_noreturn void report_untranslatable_char (int src_encoding, int dest_encoding, const char *mbstr, int len)
 
int local2local (const unsigned char *l, unsigned char *p, int len, int src_encoding, int dest_encoding, const unsigned char *tab, bool noError)
 
int latin2mic (const unsigned char *l, unsigned char *p, int len, int lc, int encoding, bool noError)
 
int mic2latin (const unsigned char *mic, unsigned char *p, int len, int lc, int encoding, bool noError)
 
int latin2mic_with_table (const unsigned char *l, unsigned char *p, int len, int lc, int encoding, const unsigned char *tab, bool noError)
 
int mic2latin_with_table (const unsigned char *mic, unsigned char *p, int len, int lc, int encoding, const unsigned char *tab, bool noError)
 

Variables

PGDLLIMPORT const pg_enc2name pg_enc2name_tbl []
 
PGDLLIMPORT const char * pg_enc2gettext_tbl []
 
PGDLLIMPORT const pg_wchar_tbl pg_wchar_table []
 

Macro Definition Documentation

◆ CHECK_ENCODING_CONVERSION_ARGS

#define CHECK_ENCODING_CONVERSION_ARGS (   srcencoding,
  destencoding 
)
Value:
(srcencoding), \
(destencoding))
#define PG_GETARG_INT32(n)
Definition: fmgr.h:269
void check_encoding_conversion_args(int src_encoding, int dest_encoding, int len, int expected_src_encoding, int expected_dest_encoding)
Definition: mbutils.c:1669

Definition at line 507 of file pg_wchar.h.

◆ IS_LC1

#define IS_LC1 (   c)    ((unsigned char)(c) >= 0x81 && (unsigned char)(c) <= 0x8d)

Definition at line 126 of file pg_wchar.h.

◆ IS_LC2

#define IS_LC2 (   c)    ((unsigned char)(c) >= 0x90 && (unsigned char)(c) <= 0x99)

Definition at line 144 of file pg_wchar.h.

◆ IS_LCPRV1

#define IS_LCPRV1 (   c)    ((unsigned char)(c) == LCPRV1_A || (unsigned char)(c) == LCPRV1_B)

Definition at line 152 of file pg_wchar.h.

◆ IS_LCPRV1_A_RANGE

#define IS_LCPRV1_A_RANGE (   c)     ((unsigned char)(c) >= 0xa0 && (unsigned char)(c) <= 0xdf)

Definition at line 153 of file pg_wchar.h.

◆ IS_LCPRV1_B_RANGE

#define IS_LCPRV1_B_RANGE (   c)     ((unsigned char)(c) >= 0xe0 && (unsigned char)(c) <= 0xef)

Definition at line 155 of file pg_wchar.h.

◆ IS_LCPRV2

#define IS_LCPRV2 (   c)    ((unsigned char)(c) == LCPRV2_A || (unsigned char)(c) == LCPRV2_B)

Definition at line 164 of file pg_wchar.h.

◆ IS_LCPRV2_A_RANGE

#define IS_LCPRV2_A_RANGE (   c)     ((unsigned char)(c) >= 0xf0 && (unsigned char)(c) <= 0xf4)

Definition at line 165 of file pg_wchar.h.

◆ IS_LCPRV2_B_RANGE

#define IS_LCPRV2_B_RANGE (   c)     ((unsigned char)(c) >= 0xf5 && (unsigned char)(c) <= 0xfe)

Definition at line 167 of file pg_wchar.h.

◆ ISSJISHEAD

#define ISSJISHEAD (   c)    (((c) >= 0x81 && (c) <= 0x9f) || ((c) >= 0xe0 && (c) <= 0xfc))

Definition at line 44 of file pg_wchar.h.

◆ ISSJISTAIL

#define ISSJISTAIL (   c)    (((c) >= 0x40 && (c) <= 0x7e) || ((c) >= 0x80 && (c) <= 0xfc))

Definition at line 45 of file pg_wchar.h.

◆ LC_ARABIC_1_COLUMN

#define LC_ARABIC_1_COLUMN   0xa5 /* Arabic 1-column (not supported) */

Definition at line 178 of file pg_wchar.h.

◆ LC_ARABIC_2_COLUMN

#define LC_ARABIC_2_COLUMN   0xa8 /* Arabic 1-column (not supported) */

Definition at line 181 of file pg_wchar.h.

◆ LC_ARABIC_DIGIT

#define LC_ARABIC_DIGIT   0xa4 /* Arabic digit (not supported) */

Definition at line 177 of file pg_wchar.h.

◆ LC_ASCII_RIGHT_TO_LEFT

#define LC_ASCII_RIGHT_TO_LEFT
Value:
0xa6 /* ASCII (left half of ISO8859-1) with
* right-to-left direction (not
* supported) */

Definition at line 179 of file pg_wchar.h.

◆ LC_BIG5_1

#define LC_BIG5_1
Value:
0x98 /* Plane 1 Chinese traditional (not
* supported) */

Definition at line 140 of file pg_wchar.h.

◆ LC_BIG5_2

#define LC_BIG5_2
Value:
0x99 /* Plane 1 Chinese traditional (not
* supported) */

Definition at line 141 of file pg_wchar.h.

◆ LC_CNS11643_1

#define LC_CNS11643_1   0x95 /* CNS 11643-1992 Plane 1 */

Definition at line 137 of file pg_wchar.h.

◆ LC_CNS11643_2

#define LC_CNS11643_2   0x96 /* CNS 11643-1992 Plane 2 */

Definition at line 138 of file pg_wchar.h.

◆ LC_CNS11643_3

#define LC_CNS11643_3   0xf6 /* CNS 11643-1992 Plane 3 */

Definition at line 192 of file pg_wchar.h.

◆ LC_CNS11643_4

#define LC_CNS11643_4   0xf7 /* CNS 11643-1992 Plane 4 */

Definition at line 193 of file pg_wchar.h.

◆ LC_CNS11643_5

#define LC_CNS11643_5   0xf8 /* CNS 11643-1992 Plane 5 */

Definition at line 194 of file pg_wchar.h.

◆ LC_CNS11643_6

#define LC_CNS11643_6   0xf9 /* CNS 11643-1992 Plane 6 */

Definition at line 195 of file pg_wchar.h.

◆ LC_CNS11643_7

#define LC_CNS11643_7   0xfa /* CNS 11643-1992 Plane 7 */

Definition at line 196 of file pg_wchar.h.

◆ LC_ETHIOPIC

#define LC_ETHIOPIC   0xf5 /* Ethiopic characters (not supported) */

Definition at line 191 of file pg_wchar.h.

◆ LC_GB2312_80

#define LC_GB2312_80   0x91 /* Chinese */

Definition at line 133 of file pg_wchar.h.

◆ LC_INDIAN_1_COLUMN

#define LC_INDIAN_1_COLUMN
Value:
0xf0 /* Indian charset for 1-column width
* glyphs (not supported) */

Definition at line 186 of file pg_wchar.h.

◆ LC_INDIAN_2_COLUMN

#define LC_INDIAN_2_COLUMN
Value:
0xfb /* Indian charset for 2-column width
* glyphs (not supported) */

Definition at line 197 of file pg_wchar.h.

◆ LC_IPA

#define LC_IPA
Value:
0xa1 /* IPA (International Phonetic
* Association) (not supported) */

Definition at line 174 of file pg_wchar.h.

◆ LC_ISO8859_1

#define LC_ISO8859_1   0x81 /* ISO8859 Latin 1 */

Definition at line 105 of file pg_wchar.h.

◆ LC_ISO8859_15

#define LC_ISO8859_15   0x8e /* ISO8859 Latin 15 (not supported yet) */

Definition at line 122 of file pg_wchar.h.

◆ LC_ISO8859_2

#define LC_ISO8859_2   0x82 /* ISO8859 Latin 2 */

Definition at line 106 of file pg_wchar.h.

◆ LC_ISO8859_3

#define LC_ISO8859_3   0x83 /* ISO8859 Latin 3 */

Definition at line 107 of file pg_wchar.h.

◆ LC_ISO8859_4

#define LC_ISO8859_4   0x84 /* ISO8859 Latin 4 */

Definition at line 108 of file pg_wchar.h.

◆ LC_ISO8859_5

#define LC_ISO8859_5   0x8c /* ISO8859 Cyrillic */

Definition at line 120 of file pg_wchar.h.

◆ LC_ISO8859_6

#define LC_ISO8859_6   0x87 /* Arabic (not supported yet) */

Definition at line 111 of file pg_wchar.h.

◆ LC_ISO8859_7

#define LC_ISO8859_7   0x86 /* Greek (not supported yet) */

Definition at line 110 of file pg_wchar.h.

◆ LC_ISO8859_8

#define LC_ISO8859_8   0x88 /* Hebrew (not supported yet) */

Definition at line 112 of file pg_wchar.h.

◆ LC_ISO8859_9

#define LC_ISO8859_9   0x8d /* ISO8859 Latin 5 (not supported yet) */

Definition at line 121 of file pg_wchar.h.

◆ LC_JISX0201K

#define LC_JISX0201K   0x89 /* Japanese 1 byte kana */

Definition at line 113 of file pg_wchar.h.

◆ LC_JISX0201R

#define LC_JISX0201R   0x8a /* Japanese 1 byte Roman */

Definition at line 114 of file pg_wchar.h.

◆ LC_JISX0208

#define LC_JISX0208   0x92 /* Japanese Kanji (JIS X 0208) */

Definition at line 134 of file pg_wchar.h.

◆ LC_JISX0208_1978

#define LC_JISX0208_1978   0x90 /* Japanese Kanji, old JIS (not supported) */

Definition at line 132 of file pg_wchar.h.

◆ LC_JISX0212

#define LC_JISX0212   0x94 /* Japanese Kanji (JIS X 0212) */

Definition at line 136 of file pg_wchar.h.

◆ LC_JISX0213_1

#define LC_JISX0213_1
Value:
0x97 /* Japanese Kanji (JIS X 0213 Plane 1)
* (not supported) */

Definition at line 139 of file pg_wchar.h.

◆ LC_KOI8_R

#define LC_KOI8_R   0x8b /* Cyrillic KOI8-R */

Definition at line 119 of file pg_wchar.h.

◆ LC_KS5601

#define LC_KS5601   0x93 /* Korean */

Definition at line 135 of file pg_wchar.h.

◆ LC_LAO

#define LC_LAO
Value:
0xa7 /* Lao characters (ISO10646 0E80..0EDF)
* (not supported) */

Definition at line 180 of file pg_wchar.h.

◆ LC_SISHENG

#define LC_SISHENG
Value:
0xa0 /* Chinese SiSheng characters for
* PinYin/ZhuYin (not supported) */

Definition at line 173 of file pg_wchar.h.

◆ LC_TIBETAN

#define LC_TIBETAN   0xfc /* Tibetan (not supported) */

Definition at line 198 of file pg_wchar.h.

◆ LC_TIBETAN_1_COLUMN

#define LC_TIBETAN_1_COLUMN
Value:
0xf1 /* Tibetan 1-column width glyphs (not
* supported) */

Definition at line 187 of file pg_wchar.h.

◆ LC_TIS620

#define LC_TIS620   0x85 /* Thai (not supported yet) */

Definition at line 109 of file pg_wchar.h.

◆ LC_UNICODE_SUBSET

#define LC_UNICODE_SUBSET
Value:
0xf4 /* Unicode characters of the range
* U+0100..U+24FF. (not supported) */

Definition at line 190 of file pg_wchar.h.

◆ LC_UNICODE_SUBSET_2

#define LC_UNICODE_SUBSET_2
Value:
0xf2 /* Unicode characters of the range
* U+2500..U+33FF. (not supported) */

Definition at line 188 of file pg_wchar.h.

◆ LC_UNICODE_SUBSET_3

#define LC_UNICODE_SUBSET_3
Value:
0xf3 /* Unicode characters of the range
* U+E000..U+FFFF. (not supported) */

Definition at line 189 of file pg_wchar.h.

◆ LC_VISCII_LOWER

#define LC_VISCII_LOWER
Value:
0xa2 /* Vietnamese VISCII1.1 lower-case (not
* supported) */

Definition at line 175 of file pg_wchar.h.

◆ LC_VISCII_UPPER

#define LC_VISCII_UPPER
Value:
0xa3 /* Vietnamese VISCII1.1 upper-case (not
* supported) */

Definition at line 176 of file pg_wchar.h.

◆ LCPRV1_A

#define LCPRV1_A   0x9a

Definition at line 150 of file pg_wchar.h.

◆ LCPRV1_B

#define LCPRV1_B   0x9b

Definition at line 151 of file pg_wchar.h.

◆ LCPRV2_A

#define LCPRV2_A   0x9c

Definition at line 162 of file pg_wchar.h.

◆ LCPRV2_B

#define LCPRV2_B   0x9d

Definition at line 163 of file pg_wchar.h.

◆ MAX_CONVERSION_GROWTH

#define MAX_CONVERSION_GROWTH   4

Definition at line 302 of file pg_wchar.h.

◆ MAX_CONVERSION_INPUT_LENGTH

#define MAX_CONVERSION_INPUT_LENGTH   16

Definition at line 320 of file pg_wchar.h.

◆ MAX_MULTIBYTE_CHAR_LEN

#define MAX_MULTIBYTE_CHAR_LEN   4

Definition at line 33 of file pg_wchar.h.

◆ MAX_UNICODE_EQUIVALENT_STRING

#define MAX_UNICODE_EQUIVALENT_STRING   16

Definition at line 329 of file pg_wchar.h.

◆ pg_char_to_encoding

#define pg_char_to_encoding   pg_char_to_encoding_private

Definition at line 629 of file pg_wchar.h.

◆ PG_ENCODING_BE_LAST

#define PG_ENCODING_BE_LAST   PG_KOI8U

Definition at line 275 of file pg_wchar.h.

◆ PG_ENCODING_IS_CLIENT_ONLY

#define PG_ENCODING_IS_CLIENT_ONLY (   _enc)     ((_enc) > PG_ENCODING_BE_LAST && (_enc) < _PG_LAST_ENCODING_)

Definition at line 284 of file pg_wchar.h.

◆ pg_encoding_to_char

#define pg_encoding_to_char   pg_encoding_to_char_private

Definition at line 630 of file pg_wchar.h.

◆ pg_utf_mblen

#define pg_utf_mblen   pg_utf_mblen_private

Definition at line 633 of file pg_wchar.h.

◆ PG_VALID_BE_ENCODING

#define PG_VALID_BE_ENCODING (   _enc)     ((_enc) >= 0 && (_enc) <= PG_ENCODING_BE_LAST)

Definition at line 281 of file pg_wchar.h.

◆ PG_VALID_ENCODING

#define PG_VALID_ENCODING (   _enc)     ((_enc) >= 0 && (_enc) < _PG_LAST_ENCODING_)

Definition at line 287 of file pg_wchar.h.

◆ PG_VALID_FE_ENCODING

#define PG_VALID_FE_ENCODING (   _enc)    PG_VALID_ENCODING(_enc)

Definition at line 291 of file pg_wchar.h.

◆ pg_valid_server_encoding

#define pg_valid_server_encoding   pg_valid_server_encoding_private

Definition at line 631 of file pg_wchar.h.

◆ pg_valid_server_encoding_id

#define pg_valid_server_encoding_id   pg_valid_server_encoding_id_private

Definition at line 632 of file pg_wchar.h.

◆ SS2

#define SS2   0x8e /* single shift 2 (JIS0201) */

Definition at line 38 of file pg_wchar.h.

◆ SS3

#define SS3   0x8f /* single shift 3 (JIS0212) */

Definition at line 39 of file pg_wchar.h.

Typedef Documentation

◆ mb2wchar_with_len_converter

typedef int(* mb2wchar_with_len_converter) (const unsigned char *from, pg_wchar *to, int len)

Definition at line 358 of file pg_wchar.h.

◆ mbchar_verifier

typedef int(* mbchar_verifier) (const unsigned char *mbstr, int len)

Definition at line 372 of file pg_wchar.h.

◆ mbcharacter_incrementer

typedef bool(* mbcharacter_incrementer) (unsigned char *mbstr, int len)

Definition at line 370 of file pg_wchar.h.

◆ mbdisplaylen_converter

typedef int(* mbdisplaylen_converter) (const unsigned char *mbstr)

Definition at line 368 of file pg_wchar.h.

◆ mblen_converter

typedef int(* mblen_converter) (const unsigned char *mbstr)

Definition at line 366 of file pg_wchar.h.

◆ mbstr_verifier

typedef int(* mbstr_verifier) (const unsigned char *mbstr, int len)

Definition at line 374 of file pg_wchar.h.

◆ pg_enc

typedef enum pg_enc pg_enc

◆ pg_enc2name

typedef struct pg_enc2name pg_enc2name

◆ pg_wchar

typedef unsigned int pg_wchar

Definition at line 28 of file pg_wchar.h.

◆ utf_local_conversion_func

typedef uint32(* utf_local_conversion_func) (uint32 code)

Definition at line 499 of file pg_wchar.h.

◆ wchar2mb_with_len_converter

typedef int(* wchar2mb_with_len_converter) (const pg_wchar *from, unsigned char *to, int len)

Definition at line 362 of file pg_wchar.h.

Enumeration Type Documentation

◆ pg_enc

enum pg_enc
Enumerator
PG_SQL_ASCII 
PG_EUC_JP 
PG_EUC_CN 
PG_EUC_KR 
PG_EUC_TW 
PG_EUC_JIS_2004 
PG_UTF8 
PG_MULE_INTERNAL 
PG_LATIN1 
PG_LATIN2 
PG_LATIN3 
PG_LATIN4 
PG_LATIN5 
PG_LATIN6 
PG_LATIN7 
PG_LATIN8 
PG_LATIN9 
PG_LATIN10 
PG_WIN1256 
PG_WIN1258 
PG_WIN866 
PG_WIN874 
PG_KOI8R 
PG_WIN1251 
PG_WIN1252 
PG_ISO_8859_5 
PG_ISO_8859_6 
PG_ISO_8859_7 
PG_ISO_8859_8 
PG_WIN1250 
PG_WIN1253 
PG_WIN1254 
PG_WIN1255 
PG_WIN1257 
PG_KOI8U 
PG_SJIS 
PG_BIG5 
PG_GBK 
PG_UHC 
PG_GB18030 
PG_JOHAB 
PG_SHIFT_JIS_2004 
_PG_LAST_ENCODING_ 

Definition at line 224 of file pg_wchar.h.

227 : If you add some encoding don't forget to update
228 * the pg_enc2name_tbl[] array (in src/common/encnames.c),
229 * the pg_enc2gettext_tbl[] array (in src/common/encnames.c) and
230 * the pg_wchar_table[] array (in src/common/wchar.c) and to check
231 * PG_ENCODING_BE_LAST macro.
232 *
233 * PG_SQL_ASCII is default encoding and must be = 0.
234 *
235 * XXX We must avoid renumbering any backend encoding until libpq's major
236 * version number is increased beyond 5; it turns out that the backend
237 * encoding IDs are effectively part of libpq's ABI as far as 8.2 initdb and
238 * psql are concerned.
239 */
240typedef enum pg_enc
241{
242 PG_SQL_ASCII = 0, /* SQL/ASCII */
243 PG_EUC_JP, /* EUC for Japanese */
244 PG_EUC_CN, /* EUC for Chinese */
245 PG_EUC_KR, /* EUC for Korean */
246 PG_EUC_TW, /* EUC for Taiwan */
247 PG_EUC_JIS_2004, /* EUC-JIS-2004 */
248 PG_UTF8, /* Unicode UTF8 */
249 PG_MULE_INTERNAL, /* Mule internal code */
250 PG_LATIN1, /* ISO-8859-1 Latin 1 */
251 PG_LATIN2, /* ISO-8859-2 Latin 2 */
252 PG_LATIN3, /* ISO-8859-3 Latin 3 */
253 PG_LATIN4, /* ISO-8859-4 Latin 4 */
254 PG_LATIN5, /* ISO-8859-9 Latin 5 */
255 PG_LATIN6, /* ISO-8859-10 Latin6 */
256 PG_LATIN7, /* ISO-8859-13 Latin7 */
257 PG_LATIN8, /* ISO-8859-14 Latin8 */
258 PG_LATIN9, /* ISO-8859-15 Latin9 */
259 PG_LATIN10, /* ISO-8859-16 Latin10 */
260 PG_WIN1256, /* windows-1256 */
261 PG_WIN1258, /* Windows-1258 */
262 PG_WIN866, /* (MS-DOS CP866) */
263 PG_WIN874, /* windows-874 */
264 PG_KOI8R, /* KOI8-R */
265 PG_WIN1251, /* windows-1251 */
266 PG_WIN1252, /* windows-1252 */
267 PG_ISO_8859_5, /* ISO-8859-5 */
268 PG_ISO_8859_6, /* ISO-8859-6 */
269 PG_ISO_8859_7, /* ISO-8859-7 */
270 PG_ISO_8859_8, /* ISO-8859-8 */
271 PG_WIN1250, /* windows-1250 */
272 PG_WIN1253, /* windows-1253 */
273 PG_WIN1254, /* windows-1254 */
int32 encoding
Definition: pg_database.h:41

Function Documentation

◆ BIG5toCNS()

unsigned short BIG5toCNS ( unsigned short  big5,
unsigned char *  lc 
)

Definition at line 292 of file big5.c.

293{
294 unsigned short cns = 0;
295 int i;
296
297 if (big5 < 0xc940U)
298 {
299 /* level 1 */
300
301 for (i = 0; i < sizeof(b1c4) / (sizeof(unsigned short) * 2); i++)
302 {
303 if (b1c4[i][0] == big5)
304 {
305 *lc = LC_CNS11643_4;
306 return (b1c4[i][1] | 0x8080U);
307 }
308 }
309
310 if (0 < (cns = BinarySearchRange(big5Level1ToCnsPlane1, 23, big5)))
311 *lc = LC_CNS11643_1;
312 }
313 else if (big5 == 0xc94aU)
314 {
315 /* level 2 */
316 *lc = LC_CNS11643_1;
317 cns = 0x4442;
318 }
319 else
320 {
321 /* level 2 */
322 for (i = 0; i < sizeof(b2c3) / (sizeof(unsigned short) * 2); i++)
323 {
324 if (b2c3[i][0] == big5)
325 {
326 *lc = LC_CNS11643_3;
327 return (b2c3[i][1] | 0x8080U);
328 }
329 }
330
331 if (0 < (cns = BinarySearchRange(big5Level2ToCnsPlane2, 46, big5)))
332 *lc = LC_CNS11643_2;
333 }
334
335 if (0 == cns)
336 { /* no mapping Big5 to CNS 11643-1992 */
337 *lc = 0;
338 return (unsigned short) '?';
339 }
340
341 return cns | 0x8080;
342}
static const codes_t big5Level1ToCnsPlane1[25]
Definition: big5.c:25
static const codes_t big5Level2ToCnsPlane2[48]
Definition: big5.c:84
static unsigned short BinarySearchRange(const codes_t *array, int high, unsigned short code)
Definition: big5.c:208
static const unsigned short b2c3[][2]
Definition: big5.c:197
static const unsigned short b1c4[][2]
Definition: big5.c:189
int i
Definition: isn.c:77
#define LC_CNS11643_3
Definition: pg_wchar.h:192
#define LC_CNS11643_1
Definition: pg_wchar.h:137
#define LC_CNS11643_4
Definition: pg_wchar.h:193
#define LC_CNS11643_2
Definition: pg_wchar.h:138

References b1c4, b2c3, big5Level1ToCnsPlane1, big5Level2ToCnsPlane2, BinarySearchRange(), i, LC_CNS11643_1, LC_CNS11643_2, LC_CNS11643_3, and LC_CNS11643_4.

Referenced by big52euc_tw(), and big52mic().

◆ check_encoding_conversion_args()

void check_encoding_conversion_args ( int  src_encoding,
int  dest_encoding,
int  len,
int  expected_src_encoding,
int  expected_dest_encoding 
)

Definition at line 1669 of file mbutils.c.

1674{
1675 if (!PG_VALID_ENCODING(src_encoding))
1676 elog(ERROR, "invalid source encoding ID: %d", src_encoding);
1677 if (src_encoding != expected_src_encoding && expected_src_encoding >= 0)
1678 elog(ERROR, "expected source encoding \"%s\", but got \"%s\"",
1679 pg_enc2name_tbl[expected_src_encoding].name,
1680 pg_enc2name_tbl[src_encoding].name);
1681 if (!PG_VALID_ENCODING(dest_encoding))
1682 elog(ERROR, "invalid destination encoding ID: %d", dest_encoding);
1683 if (dest_encoding != expected_dest_encoding && expected_dest_encoding >= 0)
1684 elog(ERROR, "expected destination encoding \"%s\", but got \"%s\"",
1685 pg_enc2name_tbl[expected_dest_encoding].name,
1686 pg_enc2name_tbl[dest_encoding].name);
1687 if (len < 0)
1688 elog(ERROR, "encoding conversion length must not be negative");
1689}
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:226
const pg_enc2name pg_enc2name_tbl[]
Definition: encnames.c:308
const void size_t len
#define PG_VALID_ENCODING(_enc)
Definition: pg_wchar.h:287
const char * name

References elog, ERROR, len, name, pg_enc2name_tbl, and PG_VALID_ENCODING.

◆ CNStoBIG5()

unsigned short CNStoBIG5 ( unsigned short  cns,
unsigned char  lc 
)

Definition at line 345 of file big5.c.

346{
347 int i;
348 unsigned int big5 = 0;
349
350 cns &= 0x7f7f;
351
352 switch (lc)
353 {
354 case LC_CNS11643_1:
356 break;
357 case LC_CNS11643_2:
359 break;
360 case LC_CNS11643_3:
361 for (i = 0; i < sizeof(b2c3) / (sizeof(unsigned short) * 2); i++)
362 {
363 if (b2c3[i][1] == cns)
364 return b2c3[i][0];
365 }
366 break;
367 case LC_CNS11643_4:
368 for (i = 0; i < sizeof(b1c4) / (sizeof(unsigned short) * 2); i++)
369 {
370 if (b1c4[i][1] == cns)
371 return b1c4[i][0];
372 }
373 default:
374 break;
375 }
376 return big5;
377}
static const codes_t cnsPlane2ToBig5Level2[49]
Definition: big5.c:136
static const codes_t cnsPlane1ToBig5Level1[26]
Definition: big5.c:54

References b1c4, b2c3, BinarySearchRange(), cnsPlane1ToBig5Level1, cnsPlane2ToBig5Level2, i, LC_CNS11643_1, LC_CNS11643_2, LC_CNS11643_3, and LC_CNS11643_4.

Referenced by euc_tw2big5(), and mic2big5().

◆ get_encoding_name_for_icu()

const char * get_encoding_name_for_icu ( int  encoding)

Definition at line 472 of file encnames.c.

473{
475 return NULL;
476 return pg_enc2icu_tbl[encoding];
477}
static const char *const pg_enc2icu_tbl[]
Definition: encnames.c:414
#define PG_VALID_BE_ENCODING(_enc)
Definition: pg_wchar.h:281

References encoding, pg_enc2icu_tbl, and PG_VALID_BE_ENCODING.

◆ GetDatabaseEncoding()

int GetDatabaseEncoding ( void  )

Definition at line 1261 of file mbutils.c.

1262{
1263 return DatabaseEncoding->encoding;
1264}
static const pg_enc2name * DatabaseEncoding
Definition: mbutils.c:81
pg_enc encoding
Definition: pg_wchar.h:342

References DatabaseEncoding, and pg_enc2name::encoding.

Referenced by ascii(), BeginCopyFrom(), BeginCopyTo(), char2wchar(), chr(), CollationCreate(), CollationGetCollid(), compareStrings(), convert_from_utf8(), convert_to_utf8(), CopyConversionError(), CopyConvertBuf(), create_pg_locale_builtin(), create_pg_locale_icu(), create_pg_locale_libc(), cstr2sv(), dblink_connect(), dblink_get_conn(), DefineCollation(), Generic_Text_IC_like(), GenericMatchText(), get_collation_oid(), get_json_object_as_hash(), InitializeClientEncoding(), IsThereCollationInNamespace(), json_recv(), jsonb_from_cstring(), locate_stem_module(), LogicalOutputWrite(), makeJsonLexContext(), p_isspecial(), ParallelWorkerMain(), pg_database_encoding_character_incrementer(), pg_database_encoding_max_length(), pg_generic_charinc(), pg_perm_setlocale(), pg_set_regex_collation(), pg_unicode_to_server(), pg_unicode_to_server_noerror(), pg_verifymbstr(), pgss_store(), PLyUnicode_Bytes(), populate_array_json(), PrepareClientEncoding(), read_extension_script_file(), SetClientEncoding(), str_casefold(), sv2cstr(), text_position_setup(), to_ascii_default(), type_maximum_size(), unicode_assigned(), unicode_norm_form_from_string(), wchar2char(), xml_in(), xml_is_document(), xmlparse(), and xmltotext_with_options().

◆ GetDatabaseEncodingName()

◆ GetMessageEncoding()

int GetMessageEncoding ( void  )

Definition at line 1308 of file mbutils.c.

1309{
1310 return MessageEncoding->encoding;
1311}
static const pg_enc2name * MessageEncoding
Definition: mbutils.c:82

References pg_enc2name::encoding, and MessageEncoding.

◆ InitializeClientEncoding()

void InitializeClientEncoding ( void  )

Definition at line 281 of file mbutils.c.

282{
283 int current_server_encoding;
284
287
290 {
291 /*
292 * Oops, the requested conversion is not available. We couldn't fail
293 * before, but we can now.
294 */
296 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
297 errmsg("conversion between %s and %s is not supported",
300 }
301
302 /*
303 * Also look up the UTF8-to-server conversion function if needed. Since
304 * the server encoding is fixed within any one backend process, we don't
305 * have to do this more than once.
306 */
307 current_server_encoding = GetDatabaseEncoding();
308 if (current_server_encoding != PG_UTF8 &&
309 current_server_encoding != PG_SQL_ASCII)
310 {
311 Oid utf8_to_server_proc;
312
314 utf8_to_server_proc =
316 current_server_encoding);
317 /* If there's no such conversion, just leave the pointer as NULL */
318 if (OidIsValid(utf8_to_server_proc))
319 {
320 FmgrInfo *finfo;
321
323 sizeof(FmgrInfo));
324 fmgr_info_cxt(utf8_to_server_proc, finfo,
326 /* Set Utf8ToServerConvProc only after data is fully valid */
327 Utf8ToServerConvProc = finfo;
328 }
329 }
330}
#define OidIsValid(objectId)
Definition: c.h:746
int errcode(int sqlerrcode)
Definition: elog.c:854
int errmsg(const char *fmt,...)
Definition: elog.c:1071
#define FATAL
Definition: elog.h:41
#define ereport(elevel,...)
Definition: elog.h:149
void fmgr_info_cxt(Oid functionId, FmgrInfo *finfo, MemoryContext mcxt)
Definition: fmgr.c:137
Assert(PointerIsAligned(start, uint64))
int GetDatabaseEncoding(void)
Definition: mbutils.c:1261
static FmgrInfo * Utf8ToServerConvProc
Definition: mbutils.c:75
const char * GetDatabaseEncodingName(void)
Definition: mbutils.c:1267
int SetClientEncoding(int encoding)
Definition: mbutils.c:208
int PrepareClientEncoding(int encoding)
Definition: mbutils.c:110
static bool backend_startup_complete
Definition: mbutils.c:90
static int pending_client_encoding
Definition: mbutils.c:91
void * MemoryContextAlloc(MemoryContext context, Size size)
Definition: mcxt.c:1256
MemoryContext TopMemoryContext
Definition: mcxt.c:165
Oid FindDefaultConversionProc(int32 for_encoding, int32 to_encoding)
Definition: namespace.c:4080
@ PG_SQL_ASCII
Definition: pg_wchar.h:226
@ PG_UTF8
Definition: pg_wchar.h:232
unsigned int Oid
Definition: postgres_ext.h:30
Definition: fmgr.h:57
bool IsTransactionState(void)
Definition: xact.c:387

References Assert(), backend_startup_complete, ereport, errcode(), errmsg(), FATAL, FindDefaultConversionProc(), fmgr_info_cxt(), GetDatabaseEncoding(), GetDatabaseEncodingName(), IsTransactionState(), MemoryContextAlloc(), name, OidIsValid, pending_client_encoding, pg_enc2name_tbl, PG_SQL_ASCII, PG_UTF8, PrepareClientEncoding(), SetClientEncoding(), TopMemoryContext, and Utf8ToServerConvProc.

Referenced by InitPostgres().

◆ is_encoding_supported_by_icu()

bool is_encoding_supported_by_icu ( int  encoding)

Definition at line 461 of file encnames.c.

462{
464 return false;
465 return (pg_enc2icu_tbl[encoding] != NULL);
466}

References encoding, pg_enc2icu_tbl, and PG_VALID_BE_ENCODING.

Referenced by check_icu_locale_encoding(), createdb(), DefineCollation(), and lookup_collation().

◆ is_utf16_surrogate_first()

static bool is_utf16_surrogate_first ( pg_wchar  c)
inlinestatic

Definition at line 525 of file pg_wchar.h.

536{

Referenced by addUnicode(), json_lex_string(), str_udeescape(), and unistr().

◆ is_utf16_surrogate_second()

static bool is_utf16_surrogate_second ( pg_wchar  c)
inlinestatic

Definition at line 531 of file pg_wchar.h.

536{

Referenced by addUnicode(), json_lex_string(), str_udeescape(), and unistr().

◆ is_valid_unicode_codepoint()

static bool is_valid_unicode_codepoint ( pg_wchar  c)
inlinestatic

◆ latin2mic()

int latin2mic ( const unsigned char *  l,
unsigned char *  p,
int  len,
int  lc,
int  encoding,
bool  noError 
)

Definition at line 89 of file conv.c.

91{
92 const unsigned char *start = l;
93 int c1;
94
95 while (len > 0)
96 {
97 c1 = *l;
98 if (c1 == 0)
99 {
100 if (noError)
101 break;
102 report_invalid_encoding(encoding, (const char *) l, len);
103 }
104 if (IS_HIGHBIT_SET(c1))
105 *p++ = lc;
106 *p++ = c1;
107 l++;
108 len--;
109 }
110 *p = '\0';
111
112 return l - start;
113}
#define IS_HIGHBIT_SET(ch)
Definition: c.h:1126
return str start
void report_invalid_encoding(int encoding, const char *mbstr, int len)
Definition: mbutils.c:1698

References encoding, IS_HIGHBIT_SET, len, report_invalid_encoding(), and start.

Referenced by koi8r_to_mic(), latin1_to_mic(), latin2_to_mic(), latin3_to_mic(), and latin4_to_mic().

◆ latin2mic_with_table()

int latin2mic_with_table ( const unsigned char *  l,
unsigned char *  p,
int  len,
int  lc,
int  encoding,
const unsigned char *  tab,
bool  noError 
)

Definition at line 194 of file conv.c.

201{
202 const unsigned char *start = l;
203 unsigned char c1,
204 c2;
205
206 while (len > 0)
207 {
208 c1 = *l;
209 if (c1 == 0)
210 {
211 if (noError)
212 break;
213 report_invalid_encoding(encoding, (const char *) l, len);
214 }
215 if (!IS_HIGHBIT_SET(c1))
216 *p++ = c1;
217 else
218 {
219 c2 = tab[c1 - HIGHBIT];
220 if (c2)
221 {
222 *p++ = lc;
223 *p++ = c2;
224 }
225 else
226 {
227 if (noError)
228 break;
230 (const char *) l, len);
231 }
232 }
233 l++;
234 len--;
235 }
236 *p = '\0';
237
238 return l - start;
239}
#define HIGHBIT
Definition: c.h:1125
void report_untranslatable_char(int src_encoding, int dest_encoding, const char *mbstr, int len)
Definition: mbutils.c:1730
@ PG_MULE_INTERNAL
Definition: pg_wchar.h:233

References encoding, HIGHBIT, IS_HIGHBIT_SET, len, PG_MULE_INTERNAL, report_invalid_encoding(), report_untranslatable_char(), and start.

Referenced by iso_to_mic(), win1250_to_mic(), win1251_to_mic(), and win866_to_mic().

◆ local2local()

int local2local ( const unsigned char *  l,
unsigned char *  p,
int  len,
int  src_encoding,
int  dest_encoding,
const unsigned char *  tab,
bool  noError 
)

Definition at line 33 of file conv.c.

40{
41 const unsigned char *start = l;
42 unsigned char c1,
43 c2;
44
45 while (len > 0)
46 {
47 c1 = *l;
48 if (c1 == 0)
49 {
50 if (noError)
51 break;
52 report_invalid_encoding(src_encoding, (const char *) l, len);
53 }
54 if (!IS_HIGHBIT_SET(c1))
55 *p++ = c1;
56 else
57 {
58 c2 = tab[c1 - HIGHBIT];
59 if (c2)
60 *p++ = c2;
61 else
62 {
63 if (noError)
64 break;
65 report_untranslatable_char(src_encoding, dest_encoding,
66 (const char *) l, len);
67 }
68 }
69 l++;
70 len--;
71 }
72 *p = '\0';
73
74 return l - start;
75}

References HIGHBIT, IS_HIGHBIT_SET, len, report_invalid_encoding(), report_untranslatable_char(), and start.

Referenced by iso_to_koi8r(), iso_to_win1251(), iso_to_win866(), koi8r_to_iso(), koi8r_to_win1251(), koi8r_to_win866(), latin2_to_win1250(), win1250_to_latin2(), win1251_to_iso(), win1251_to_koi8r(), win1251_to_win866(), win866_to_iso(), win866_to_koi8r(), and win866_to_win1251().

◆ LocalToUtf()

int LocalToUtf ( const unsigned char *  iso,
int  len,
unsigned char *  utf,
const pg_mb_radix_tree map,
const pg_local_to_utf_combined cmap,
int  cmapsize,
utf_local_conversion_func  conv_func,
int  encoding,
bool  noError 
)

Definition at line 717 of file conv.c.

724{
725 uint32 iiso;
726 int l;
727 const pg_local_to_utf_combined *cp;
728 const unsigned char *start = iso;
729
732 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
733 errmsg("invalid encoding number: %d", encoding)));
734
735 for (; len > 0; len -= l)
736 {
737 unsigned char b1 = 0;
738 unsigned char b2 = 0;
739 unsigned char b3 = 0;
740 unsigned char b4 = 0;
741
742 /* "break" cases all represent errors */
743 if (*iso == '\0')
744 break;
745
746 if (!IS_HIGHBIT_SET(*iso))
747 {
748 /* ASCII case is easy, assume it's one-to-one conversion */
749 *utf++ = *iso++;
750 l = 1;
751 continue;
752 }
753
754 l = pg_encoding_verifymbchar(encoding, (const char *) iso, len);
755 if (l < 0)
756 break;
757
758 /* collect coded char of length l */
759 if (l == 1)
760 b4 = *iso++;
761 else if (l == 2)
762 {
763 b3 = *iso++;
764 b4 = *iso++;
765 }
766 else if (l == 3)
767 {
768 b2 = *iso++;
769 b3 = *iso++;
770 b4 = *iso++;
771 }
772 else if (l == 4)
773 {
774 b1 = *iso++;
775 b2 = *iso++;
776 b3 = *iso++;
777 b4 = *iso++;
778 }
779 else
780 {
781 elog(ERROR, "unsupported character length %d", l);
782 iiso = 0; /* keep compiler quiet */
783 }
784 iiso = (b1 << 24 | b2 << 16 | b3 << 8 | b4);
785
786 if (map)
787 {
788 uint32 converted = pg_mb_radix_conv(map, l, b1, b2, b3, b4);
789
790 if (converted)
791 {
792 utf = store_coded_char(utf, converted);
793 continue;
794 }
795
796 /* If there's a combined character map, try that */
797 if (cmap)
798 {
799 cp = bsearch(&iiso, cmap, cmapsize,
801
802 if (cp)
803 {
804 utf = store_coded_char(utf, cp->utf1);
805 utf = store_coded_char(utf, cp->utf2);
806 continue;
807 }
808 }
809 }
810
811 /* if there's a conversion function, try that */
812 if (conv_func)
813 {
814 uint32 converted = (*conv_func) (iiso);
815
816 if (converted)
817 {
818 utf = store_coded_char(utf, converted);
819 continue;
820 }
821 }
822
823 /* failed to translate this character */
824 iso -= l;
825 if (noError)
826 break;
828 (const char *) iso, len);
829 }
830
831 /* if we broke out of loop early, must be invalid input */
832 if (len > 0 && !noError)
833 report_invalid_encoding(encoding, (const char *) iso, len);
834
835 *utf = '\0';
836
837 return iso - start;
838}
uint32_t uint32
Definition: c.h:502
static unsigned char * store_coded_char(unsigned char *dest, uint32 code)
Definition: conv.c:353
static uint32 pg_mb_radix_conv(const pg_mb_radix_tree *rt, int l, unsigned char b1, unsigned char b2, unsigned char b3, unsigned char b4)
Definition: conv.c:373
static int compare4(const void *p1, const void *p2)
Definition: conv.c:339
int pg_encoding_verifymbchar(int encoding, const char *mbstr, int len)
Definition: wchar.c:2150

References compare4(), elog, encoding, ereport, errcode(), errmsg(), ERROR, IS_HIGHBIT_SET, len, pg_encoding_verifymbchar(), pg_mb_radix_conv(), PG_UTF8, PG_VALID_ENCODING, report_invalid_encoding(), report_untranslatable_char(), start, store_coded_char(), pg_local_to_utf_combined::utf1, and pg_local_to_utf_combined::utf2.

Referenced by big5_to_utf8(), euc_cn_to_utf8(), euc_jis_2004_to_utf8(), euc_jp_to_utf8(), euc_kr_to_utf8(), euc_tw_to_utf8(), gb18030_to_utf8(), gbk_to_utf8(), iso8859_to_utf8(), johab_to_utf8(), koi8r_to_utf8(), koi8u_to_utf8(), shift_jis_2004_to_utf8(), sjis_to_utf8(), uhc_to_utf8(), and win_to_utf8().

◆ mic2latin()

int mic2latin ( const unsigned char *  mic,
unsigned char *  p,
int  len,
int  lc,
int  encoding,
bool  noError 
)

Definition at line 127 of file conv.c.

129{
130 const unsigned char *start = mic;
131 int c1;
132
133 while (len > 0)
134 {
135 c1 = *mic;
136 if (c1 == 0)
137 {
138 if (noError)
139 break;
140 report_invalid_encoding(PG_MULE_INTERNAL, (const char *) mic, len);
141 }
142 if (!IS_HIGHBIT_SET(c1))
143 {
144 /* easy for ASCII */
145 *p++ = c1;
146 mic++;
147 len--;
148 }
149 else
150 {
151 int l = pg_mule_mblen(mic);
152
153 if (len < l)
154 {
155 if (noError)
156 break;
157 report_invalid_encoding(PG_MULE_INTERNAL, (const char *) mic,
158 len);
159 }
160 if (l != 2 || c1 != lc || !IS_HIGHBIT_SET(mic[1]))
161 {
162 if (noError)
163 break;
165 (const char *) mic, len);
166 }
167 *p++ = mic[1];
168 mic += 2;
169 len -= 2;
170 }
171 }
172 *p = '\0';
173
174 return mic - start;
175}
int pg_mule_mblen(const unsigned char *s)
Definition: wchar.c:791

References encoding, IS_HIGHBIT_SET, len, PG_MULE_INTERNAL, pg_mule_mblen(), report_invalid_encoding(), report_untranslatable_char(), and start.

Referenced by mic_to_koi8r(), mic_to_latin1(), mic_to_latin2(), mic_to_latin3(), and mic_to_latin4().

◆ mic2latin_with_table()

int mic2latin_with_table ( const unsigned char *  mic,
unsigned char *  p,
int  len,
int  lc,
int  encoding,
const unsigned char *  tab,
bool  noError 
)

Definition at line 257 of file conv.c.

264{
265 const unsigned char *start = mic;
266 unsigned char c1,
267 c2;
268
269 while (len > 0)
270 {
271 c1 = *mic;
272 if (c1 == 0)
273 {
274 if (noError)
275 break;
276 report_invalid_encoding(PG_MULE_INTERNAL, (const char *) mic, len);
277 }
278 if (!IS_HIGHBIT_SET(c1))
279 {
280 /* easy for ASCII */
281 *p++ = c1;
282 mic++;
283 len--;
284 }
285 else
286 {
287 int l = pg_mule_mblen(mic);
288
289 if (len < l)
290 {
291 if (noError)
292 break;
293 report_invalid_encoding(PG_MULE_INTERNAL, (const char *) mic,
294 len);
295 }
296 if (l != 2 || c1 != lc || !IS_HIGHBIT_SET(mic[1]) ||
297 (c2 = tab[mic[1] - HIGHBIT]) == 0)
298 {
299 if (noError)
300 break;
302 (const char *) mic, len);
303 break; /* keep compiler quiet */
304 }
305 *p++ = c2;
306 mic += 2;
307 len -= 2;
308 }
309 }
310 *p = '\0';
311
312 return mic - start;
313}

References encoding, HIGHBIT, IS_HIGHBIT_SET, len, PG_MULE_INTERNAL, pg_mule_mblen(), report_invalid_encoding(), report_untranslatable_char(), and start.

Referenced by mic_to_iso(), mic_to_win1250(), mic_to_win1251(), and mic_to_win866().

◆ pg_any_to_server()

char * pg_any_to_server ( const char *  s,
int  len,
int  encoding 
)

Definition at line 676 of file mbutils.c.

677{
678 if (len <= 0)
679 return unconstify(char *, s); /* empty string is always valid */
680
683 {
684 /*
685 * No conversion is needed, but we must still validate the data.
686 */
687 (void) pg_verify_mbstr(DatabaseEncoding->encoding, s, len, false);
688 return unconstify(char *, s);
689 }
690
692 {
693 /*
694 * No conversion is possible, but we must still validate the data,
695 * because the client-side code might have done string escaping using
696 * the selected client_encoding. If the client encoding is ASCII-safe
697 * then we just do a straight validation under that encoding. For an
698 * ASCII-unsafe encoding we have a problem: we dare not pass such data
699 * to the parser but we have no way to convert it. We compromise by
700 * rejecting the data if it contains any non-ASCII characters.
701 */
703 (void) pg_verify_mbstr(encoding, s, len, false);
704 else
705 {
706 int i;
707
708 for (i = 0; i < len; i++)
709 {
710 if (s[i] == '\0' || IS_HIGHBIT_SET(s[i]))
712 (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
713 errmsg("invalid byte value for encoding \"%s\": 0x%02x",
715 (unsigned char) s[i])));
716 }
717 }
718 return unconstify(char *, s);
719 }
720
721 /* Fast path if we can use cached conversion function */
724
725 /* General case ... will not work outside transactions */
726 return (char *) pg_do_encoding_conversion((unsigned char *) unconstify(char *, s),
727 len,
728 encoding,
730}
#define unconstify(underlying_type, expr)
Definition: c.h:1216
static const pg_enc2name * ClientEncoding
Definition: mbutils.c:80
unsigned char * pg_do_encoding_conversion(unsigned char *src, int len, int src_encoding, int dest_encoding)
Definition: mbutils.c:356
bool pg_verify_mbstr(int encoding, const char *mbstr, int len, bool noError)
Definition: mbutils.c:1566
static char * perform_default_encoding_conversion(const char *src, int len, bool is_client_to_server)
Definition: mbutils.c:783

References ClientEncoding, DatabaseEncoding, encoding, pg_enc2name::encoding, ereport, errcode(), errmsg(), ERROR, i, IS_HIGHBIT_SET, len, name, perform_default_encoding_conversion(), pg_do_encoding_conversion(), pg_enc2name_tbl, PG_SQL_ASCII, PG_VALID_BE_ENCODING, pg_verify_mbstr(), and unconstify.

Referenced by ASN1_STRING_to_text(), cache_single_string(), db_encoding_convert(), dsnowball_lexize(), pg_client_to_server(), pg_stat_statements_internal(), pgp_armor_headers(), PLyUnicode_Bytes(), read_extension_script_file(), tsearch_readline(), utf_u2e(), X509_NAME_to_cstring(), and xml_recv().

◆ pg_char_and_wchar_strcmp()

int pg_char_and_wchar_strcmp ( const char *  s1,
const pg_wchar s2 
)

Definition at line 41 of file wstrcmp.c.

42{
43 while ((pg_wchar) *s1 == *s2++)
44 if (*s1++ == 0)
45 return 0;
46 return *(const unsigned char *) s1 - *(const pg_wchar *) (s2 - 1);
47}
unsigned int pg_wchar
Definition: mbprint.c:31
char * s1
char * s2

References s1, and s2.

◆ pg_char_and_wchar_strncmp()

int pg_char_and_wchar_strncmp ( const char *  s1,
const pg_wchar s2,
size_t  n 
)

Definition at line 55 of file wstrncmp.c.

56{
57 if (n == 0)
58 return 0;
59 do
60 {
61 if ((pg_wchar) ((unsigned char) *s1) != *s2++)
62 return ((pg_wchar) ((unsigned char) *s1) - *(s2 - 1));
63 if (*s1++ == 0)
64 break;
65 } while (--n != 0);
66 return 0;
67}

References s1, and s2.

Referenced by element(), and lookupcclass().

◆ pg_char_to_encoding()

int pg_char_to_encoding ( const char *  name)

Definition at line 549 of file encnames.c.

550{
551 unsigned int nel = lengthof(pg_encname_tbl);
552 const pg_encname *base = pg_encname_tbl,
553 *last = base + nel - 1,
554 *position;
555 int result;
556 char buff[NAMEDATALEN],
557 *key;
558
559 if (name == NULL || *name == '\0')
560 return -1;
561
562 if (strlen(name) >= NAMEDATALEN)
563 return -1; /* it's certainly not in the table */
564
566
567 while (last >= base)
568 {
569 position = base + ((last - base) >> 1);
570 result = key[0] - position->name[0];
571
572 if (result == 0)
573 {
574 result = strcmp(key, position->name);
575 if (result == 0)
576 return position->encoding;
577 }
578 if (result < 0)
579 last = position - 1;
580 else
581 base = position + 1;
582 }
583 return -1;
584}
#define lengthof(array)
Definition: c.h:759
static char * clean_encoding_name(const char *key, char *newkey)
Definition: encnames.c:524
static const pg_encname pg_encname_tbl[]
Definition: encnames.c:39
#define NAMEDATALEN

◆ pg_client_to_server()

char * pg_client_to_server ( const char *  s,
int  len 
)

Definition at line 660 of file mbutils.c.

661{
663}
char * pg_any_to_server(const char *s, int len, int encoding)
Definition: mbutils.c:676

References ClientEncoding, pg_enc2name::encoding, len, and pg_any_to_server().

Referenced by exec_bind_message(), parse_fcall_arguments(), pq_getmsgstring(), and pq_getmsgtext().

◆ pg_database_encoding_character_incrementer()

mbcharacter_incrementer pg_database_encoding_character_incrementer ( void  )

Definition at line 1523 of file mbutils.c.

1524{
1525 /*
1526 * Eventually it might be best to add a field to pg_wchar_table[], but for
1527 * now we just use a switch.
1528 */
1529 switch (GetDatabaseEncoding())
1530 {
1531 case PG_UTF8:
1532 return pg_utf8_increment;
1533
1534 case PG_EUC_JP:
1535 return pg_eucjp_increment;
1536
1537 default:
1538 return pg_generic_charinc;
1539 }
1540}
static bool pg_generic_charinc(unsigned char *charptr, int len)
Definition: mbutils.c:1325
static bool pg_utf8_increment(unsigned char *charptr, int length)
Definition: mbutils.c:1359
static bool pg_eucjp_increment(unsigned char *charptr, int length)
Definition: mbutils.c:1437
@ PG_EUC_JP
Definition: pg_wchar.h:227

References GetDatabaseEncoding(), PG_EUC_JP, pg_eucjp_increment(), pg_generic_charinc(), PG_UTF8, and pg_utf8_increment().

Referenced by make_greater_string().

◆ pg_database_encoding_max_length()

◆ pg_do_encoding_conversion()

unsigned char * pg_do_encoding_conversion ( unsigned char *  src,
int  len,
int  src_encoding,
int  dest_encoding 
)

Definition at line 356 of file mbutils.c.

358{
359 unsigned char *result;
360 Oid proc;
361
362 if (len <= 0)
363 return src; /* empty string is always valid */
364
365 if (src_encoding == dest_encoding)
366 return src; /* no conversion required, assume valid */
367
368 if (dest_encoding == PG_SQL_ASCII)
369 return src; /* any string is valid in SQL_ASCII */
370
371 if (src_encoding == PG_SQL_ASCII)
372 {
373 /* No conversion is possible, but we must validate the result */
374 (void) pg_verify_mbstr(dest_encoding, (const char *) src, len, false);
375 return src;
376 }
377
378 if (!IsTransactionState()) /* shouldn't happen */
379 elog(ERROR, "cannot perform encoding conversion outside a transaction");
380
381 proc = FindDefaultConversionProc(src_encoding, dest_encoding);
382 if (!OidIsValid(proc))
384 (errcode(ERRCODE_UNDEFINED_FUNCTION),
385 errmsg("default conversion function for encoding \"%s\" to \"%s\" does not exist",
386 pg_encoding_to_char(src_encoding),
387 pg_encoding_to_char(dest_encoding))));
388
389 /*
390 * Allocate space for conversion result, being wary of integer overflow.
391 *
392 * len * MAX_CONVERSION_GROWTH is typically a vast overestimate of the
393 * required space, so it might exceed MaxAllocSize even though the result
394 * would actually fit. We do not want to hand back a result string that
395 * exceeds MaxAllocSize, because callers might not cope gracefully --- but
396 * if we just allocate more than that, and don't use it, that's fine.
397 */
400 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
401 errmsg("out of memory"),
402 errdetail("String of %d bytes is too long for encoding conversion.",
403 len)));
404
405 result = (unsigned char *)
408
409 (void) OidFunctionCall6(proc,
410 Int32GetDatum(src_encoding),
411 Int32GetDatum(dest_encoding),
412 CStringGetDatum((char *) src),
413 CStringGetDatum((char *) result),
415 BoolGetDatum(false));
416
417 /*
418 * If the result is large, it's worth repalloc'ing to release any extra
419 * space we asked for. The cutoff here is somewhat arbitrary, but we
420 * *must* check when len * MAX_CONVERSION_GROWTH exceeds MaxAllocSize.
421 */
422 if (len > 1000000)
423 {
424 Size resultlen = strlen((char *) result);
425
426 if (resultlen >= MaxAllocSize)
428 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
429 errmsg("out of memory"),
430 errdetail("String of %d bytes is too long for encoding conversion.",
431 len)));
432
433 result = (unsigned char *) repalloc(result, resultlen + 1);
434 }
435
436 return result;
437}
size_t Size
Definition: c.h:576
int errdetail(const char *fmt,...)
Definition: elog.c:1204
#define MaxAllocSize
Definition: fe_memutils.h:22
#define OidFunctionCall6(functionId, arg1, arg2, arg3, arg4, arg5, arg6)
Definition: fmgr.h:730
void * repalloc(void *pointer, Size size)
Definition: mcxt.c:2167
MemoryContext CurrentMemoryContext
Definition: mcxt.c:159
void * MemoryContextAllocHuge(MemoryContext context, Size size)
Definition: mcxt.c:2266
#define MaxAllocHugeSize
Definition: memutils.h:48
#define MAX_CONVERSION_GROWTH
Definition: pg_wchar.h:302
#define pg_encoding_to_char
Definition: pg_wchar.h:630
static Datum BoolGetDatum(bool X)
Definition: postgres.h:107
static Datum CStringGetDatum(const char *X)
Definition: postgres.h:355
static Datum Int32GetDatum(int32 X)
Definition: postgres.h:217

References BoolGetDatum(), CStringGetDatum(), CurrentMemoryContext, elog, ereport, errcode(), errdetail(), errmsg(), ERROR, FindDefaultConversionProc(), Int32GetDatum(), IsTransactionState(), len, MAX_CONVERSION_GROWTH, MaxAllocHugeSize, MaxAllocSize, MemoryContextAllocHuge(), OidFunctionCall6, OidIsValid, pg_encoding_to_char, PG_SQL_ASCII, pg_verify_mbstr(), and repalloc().

Referenced by convert_charset(), pg_any_to_server(), pg_convert(), and pg_server_to_any().

◆ pg_do_encoding_conversion_buf()

int pg_do_encoding_conversion_buf ( Oid  proc,
int  src_encoding,
int  dest_encoding,
unsigned char *  src,
int  srclen,
unsigned char *  dest,
int  destlen,
bool  noError 
)

Definition at line 469 of file mbutils.c.

475{
476 Datum result;
477
478 /*
479 * If the destination buffer is not large enough to hold the result in the
480 * worst case, limit the input size passed to the conversion function.
481 */
482 if ((Size) srclen >= ((destlen - 1) / (Size) MAX_CONVERSION_GROWTH))
483 srclen = ((destlen - 1) / (Size) MAX_CONVERSION_GROWTH);
484
485 result = OidFunctionCall6(proc,
486 Int32GetDatum(src_encoding),
487 Int32GetDatum(dest_encoding),
488 CStringGetDatum((char *) src),
489 CStringGetDatum((char *) dest),
490 Int32GetDatum(srclen),
491 BoolGetDatum(noError));
492 return DatumGetInt32(result);
493}
uintptr_t Datum
Definition: postgres.h:69
static int32 DatumGetInt32(Datum X)
Definition: postgres.h:207

References BoolGetDatum(), CStringGetDatum(), DatumGetInt32(), generate_unaccent_rules::dest, Int32GetDatum(), MAX_CONVERSION_GROWTH, and OidFunctionCall6.

Referenced by CopyConversionError(), CopyConvertBuf(), and test_enc_conversion().

◆ pg_dsplen()

int pg_dsplen ( const char *  mbstr)

Definition at line 1030 of file mbutils.c.

1031{
1032 return pg_wchar_table[DatabaseEncoding->encoding].dsplen((const unsigned char *) mbstr);
1033}
mbdisplaylen_converter dsplen
Definition: pg_wchar.h:383

References DatabaseEncoding, pg_wchar_tbl::dsplen, pg_enc2name::encoding, and pg_wchar_table.

Referenced by p_isspecial().

◆ pg_encoding_dsplen()

int pg_encoding_dsplen ( int  encoding,
const char *  mbstr 
)

Definition at line 2137 of file wchar.c.

2138{
2139 return (PG_VALID_ENCODING(encoding) ?
2140 pg_wchar_table[encoding].dsplen((const unsigned char *) mbstr) :
2141 pg_wchar_table[PG_SQL_ASCII].dsplen((const unsigned char *) mbstr));
2142}

References encoding, PG_SQL_ASCII, PG_VALID_ENCODING, and pg_wchar_table.

Referenced by PQdsplen(), and reportErrorPosition().

◆ pg_encoding_max_length()

int pg_encoding_max_length ( int  encoding)

Definition at line 2174 of file wchar.c.

2175{
2177
2178 /*
2179 * Check for the encoding despite the assert, due to some mingw versions
2180 * otherwise issuing bogus warnings.
2181 */
2182 return PG_VALID_ENCODING(encoding) ?
2185}

References Assert(), encoding, pg_wchar_tbl::maxmblen, PG_SQL_ASCII, PG_VALID_ENCODING, and pg_wchar_table.

Referenced by ascii(), chr(), CopyConvertBuf(), pg_encoding_mbcliplen(), pg_encoding_set_invalid(), pg_verify_mbstr_len(), reportErrorPosition(), test_enc_setup(), and type_maximum_size().

◆ pg_encoding_mb2wchar_with_len()

int pg_encoding_mb2wchar_with_len ( int  encoding,
const char *  from,
pg_wchar to,
int  len 
)

Definition at line 993 of file mbutils.c.

995{
996 return pg_wchar_table[encoding].mb2wchar_with_len((const unsigned char *) from, to, len);
997}
mb2wchar_with_len_converter mb2wchar_with_len
Definition: pg_wchar.h:378

References encoding, len, pg_wchar_tbl::mb2wchar_with_len, and pg_wchar_table.

◆ pg_encoding_mbcliplen()

int pg_encoding_mbcliplen ( int  encoding,
const char *  mbstr,
int  len,
int  limit 
)

Definition at line 1093 of file mbutils.c.

1095{
1096 mblen_converter mblen_fn;
1097 int clen = 0;
1098 int l;
1099
1100 /* optimization for single byte encoding */
1102 return cliplen(mbstr, len, limit);
1103
1104 mblen_fn = pg_wchar_table[encoding].mblen;
1105
1106 while (len > 0 && *mbstr)
1107 {
1108 l = (*mblen_fn) ((const unsigned char *) mbstr);
1109 if ((clen + l) > limit)
1110 break;
1111 clen += l;
1112 if (clen == limit)
1113 break;
1114 len -= l;
1115 mbstr += l;
1116 }
1117 return clen;
1118}
static int cliplen(const char *str, int len, int limit)
Definition: mbutils.c:1150
int(* mblen_converter)(const unsigned char *mbstr)
Definition: pg_wchar.h:366
mblen_converter mblen
Definition: pg_wchar.h:382
int pg_encoding_max_length(int encoding)
Definition: wchar.c:2174

References cliplen(), encoding, len, pg_wchar_tbl::mblen, pg_encoding_max_length(), and pg_wchar_table.

Referenced by pg_mbcliplen().

◆ pg_encoding_mblen()

int pg_encoding_mblen ( int  encoding,
const char *  mbstr 
)

◆ pg_encoding_mblen_bounded()

int pg_encoding_mblen_bounded ( int  encoding,
const char *  mbstr 
)

Definition at line 2128 of file wchar.c.

2129{
2130 return strnlen(mbstr, pg_encoding_mblen(encoding, mbstr));
2131}
size_t strnlen(const char *str, size_t maxlen)
Definition: strnlen.c:26
int pg_encoding_mblen(int encoding, const char *mbstr)
Definition: wchar.c:2116

References encoding, pg_encoding_mblen(), and strnlen().

◆ pg_encoding_set_invalid()

void pg_encoding_set_invalid ( int  encoding,
char *  dst 
)

Definition at line 2049 of file wchar.c.

2050{
2052
2053 dst[0] = (encoding == PG_UTF8 ? 0xc0 : NONUTF8_INVALID_BYTE0);
2054 dst[1] = NONUTF8_INVALID_BYTE1;
2055}
#define NONUTF8_INVALID_BYTE0
Definition: wchar.c:34
#define NONUTF8_INVALID_BYTE1
Definition: wchar.c:35

References Assert(), encoding, NONUTF8_INVALID_BYTE0, NONUTF8_INVALID_BYTE1, pg_encoding_max_length(), and PG_UTF8.

Referenced by appendStringLiteral(), fmtIdEnc(), PQescapeStringInternal(), and test_enc_setup().

◆ pg_encoding_to_char()

const char * pg_encoding_to_char ( int  encoding)

Definition at line 587 of file encnames.c.

588{
590 {
592
593 Assert(encoding == p->encoding);
594 return p->name;
595 }
596 return "";
597}

◆ pg_encoding_verifymbchar()

int pg_encoding_verifymbchar ( int  encoding,
const char *  mbstr,
int  len 
)

◆ pg_encoding_verifymbstr()

int pg_encoding_verifymbstr ( int  encoding,
const char *  mbstr,
int  len 
)

Definition at line 2163 of file wchar.c.

2164{
2165 return (PG_VALID_ENCODING(encoding) ?
2166 pg_wchar_table[encoding].mbverifystr((const unsigned char *) mbstr, len) :
2167 pg_wchar_table[PG_SQL_ASCII].mbverifystr((const unsigned char *) mbstr, len));
2168}

References encoding, len, PG_SQL_ASCII, PG_VALID_ENCODING, and pg_wchar_table.

Referenced by add_file_to_manifest(), CopyConvertBuf(), handle_oauth_sasl_error(), parse_oauth_json(), PQescapeInternal(), test_enc_conversion(), test_enc_setup(), and test_one_vector_escape().

◆ pg_encoding_wchar2mb_with_len()

int pg_encoding_wchar2mb_with_len ( int  encoding,
const pg_wchar from,
char *  to,
int  len 
)

Definition at line 1015 of file mbutils.c.

1017{
1018 return pg_wchar_table[encoding].wchar2mb_with_len(from, (unsigned char *) to, len);
1019}
wchar2mb_with_len_converter wchar2mb_with_len
Definition: pg_wchar.h:380

References encoding, len, pg_wchar_table, and pg_wchar_tbl::wchar2mb_with_len.

◆ pg_get_client_encoding()

int pg_get_client_encoding ( void  )

Definition at line 336 of file mbutils.c.

337{
338 return ClientEncoding->encoding;
339}

References ClientEncoding, and pg_enc2name::encoding.

Referenced by BeginCopyFrom(), BeginCopyTo(), and xml_send().

◆ pg_get_client_encoding_name()

const char * pg_get_client_encoding_name ( void  )

Definition at line 345 of file mbutils.c.

346{
347 return ClientEncoding->name;
348}

References ClientEncoding, and pg_enc2name::name.

◆ pg_mb2wchar()

int pg_mb2wchar ( const char *  from,
pg_wchar to 
)

Definition at line 979 of file mbutils.c.

980{
981 return pg_wchar_table[DatabaseEncoding->encoding].mb2wchar_with_len((const unsigned char *) from, to, strlen(from));
982}

References DatabaseEncoding, pg_enc2name::encoding, pg_wchar_tbl::mb2wchar_with_len, and pg_wchar_table.

◆ pg_mb2wchar_with_len()

◆ pg_mbcharcliplen()

int pg_mbcharcliplen ( const char *  mbstr,
int  len,
int  limit 
)

Definition at line 1125 of file mbutils.c.

1126{
1127 int clen = 0;
1128 int nch = 0;
1129 int l;
1130
1131 /* optimization for single byte encoding */
1133 return cliplen(mbstr, len, limit);
1134
1135 while (len > 0 && *mbstr)
1136 {
1137 l = pg_mblen(mbstr);
1138 nch++;
1139 if (nch > limit)
1140 break;
1141 clen += l;
1142 len -= l;
1143 mbstr += l;
1144 }
1145 return clen;
1146}
int pg_database_encoding_max_length(void)
Definition: mbutils.c:1546
int pg_mblen(const char *mbstr)
Definition: mbutils.c:1023

References cliplen(), len, pg_database_encoding_max_length(), and pg_mblen().

Referenced by bpchar(), bpchar_input(), text_left(), text_right(), varchar(), and varchar_input().

◆ pg_mbcliplen()

◆ pg_mblen()

int pg_mblen ( const char *  mbstr)

Definition at line 1023 of file mbutils.c.

1024{
1025 return pg_wchar_table[DatabaseEncoding->encoding].mblen((const unsigned char *) mbstr);
1026}

References DatabaseEncoding, pg_enc2name::encoding, pg_wchar_tbl::mblen, and pg_wchar_table.

Referenced by addCompoundAffixFlagValue(), bit_in(), charlen_to_bytelen(), DCH_from_char(), dotrim(), find_word(), findchar(), findchar2(), findwrd(), gbt_var_node_cp_len(), get_modifiers(), get_nextfield(), get_wildcard_part(), getlexeme(), getNextFlagFromString(), gettoken_query(), gettoken_query_standard(), gettoken_query_websearch(), gettoken_tsvector(), hex_decode_safe(), infix(), initTrie(), lpad(), make_trigrams(), map_sql_identifier_to_xml_name(), map_xml_name_to_sql_identifier(), match_prosrc_to_literal(), mb_strchr(), NIImportAffixes(), NIImportDictionary(), NIImportOOAffixes(), NUM_eat_non_data_chars(), NUM_processor(), parse_affentry(), parse_format(), parse_lquery(), parse_ltree(), parse_or_operator(), parse_re_flags(), parse_test_flags(), pg_base64_decode(), pg_mbcharcliplen(), pg_mbstrlen(), pg_mbstrlen_with_len(), prssyntaxerror(), px_crypt_shacrypt(), readstoplist(), report_json_context(), rpad(), RS_compile(), RS_execute(), RS_isRegis(), similar_escape_internal(), split_text(), t_isalnum(), t_isalpha(), text_format(), text_position_next(), text_position_next_internal(), text_reverse(), text_substring(), text_to_bits(), textregexreplace(), thesaurusRead(), TParserGet(), translate(), ts_stat_sql(), tsvectorout(), unaccent_lexize(), varbit_in(), varstr_levenshtein(), and wchareq().

◆ pg_mbstrlen()

int pg_mbstrlen ( const char *  mbstr)

Definition at line 1037 of file mbutils.c.

1038{
1039 int len = 0;
1040
1041 /* optimization for single byte encoding */
1043 return strlen(mbstr);
1044
1045 while (*mbstr)
1046 {
1047 mbstr += pg_mblen(mbstr);
1048 len++;
1049 }
1050 return len;
1051}

References len, pg_database_encoding_max_length(), and pg_mblen().

Referenced by NUM_processor(), and text_format_append_string().

◆ pg_mbstrlen_with_len()

int pg_mbstrlen_with_len ( const char *  mbstr,
int  limit 
)

Definition at line 1057 of file mbutils.c.

1058{
1059 int len = 0;
1060
1061 /* optimization for single byte encoding */
1063 return limit;
1064
1065 while (limit > 0 && *mbstr)
1066 {
1067 int l = pg_mblen(mbstr);
1068
1069 limit -= l;
1070 mbstr += l;
1071 len++;
1072 }
1073 return len;
1074}

References len, pg_database_encoding_max_length(), and pg_mblen().

Referenced by bpchar(), bpchar_input(), bpcharlen(), executor_errposition(), lpad(), match_prosrc_to_query(), parser_errposition(), plpgsql_scanner_errposition(), rpad(), scanner_errposition(), similar_escape_internal(), text_left(), text_length(), text_position_get_match_pos(), text_right(), text_substring(), unicode_assigned(), unicode_is_normalized(), unicode_normalize_func(), and varstr_levenshtein().

◆ pg_mule_mblen()

int pg_mule_mblen ( const unsigned char *  s)

Definition at line 791 of file wchar.c.

792{
793 int len;
794
795 if (IS_LC1(*s))
796 len = 2;
797 else if (IS_LCPRV1(*s))
798 len = 3;
799 else if (IS_LC2(*s))
800 len = 3;
801 else if (IS_LCPRV2(*s))
802 len = 4;
803 else
804 len = 1; /* assume ASCII */
805 return len;
806}
#define IS_LCPRV2(c)
Definition: pg_wchar.h:164
#define IS_LC2(c)
Definition: pg_wchar.h:144
#define IS_LCPRV1(c)
Definition: pg_wchar.h:152
#define IS_LC1(c)
Definition: pg_wchar.h:126

References IS_LC1, IS_LC2, IS_LCPRV1, IS_LCPRV2, and len.

Referenced by mic2latin(), mic2latin_with_table(), and pg_mule_verifychar().

◆ pg_server_to_any()

char * pg_server_to_any ( const char *  s,
int  len,
int  encoding 
)

Definition at line 749 of file mbutils.c.

750{
751 if (len <= 0)
752 return unconstify(char *, s); /* empty string is always valid */
753
756 return unconstify(char *, s); /* assume data is valid */
757
759 {
760 /* No conversion is possible, but we must validate the result */
761 (void) pg_verify_mbstr(encoding, s, len, false);
762 return unconstify(char *, s);
763 }
764
765 /* Fast path if we can use cached conversion function */
768
769 /* General case ... will not work outside transactions */
770 return (char *) pg_do_encoding_conversion((unsigned char *) unconstify(char *, s),
771 len,
773 encoding);
774}

References ClientEncoding, DatabaseEncoding, encoding, pg_enc2name::encoding, len, perform_default_encoding_conversion(), pg_do_encoding_conversion(), PG_SQL_ASCII, pg_verify_mbstr(), and unconstify.

Referenced by compareStrings(), CopyAttributeOutCSV(), CopyAttributeOutText(), CopyToTextLikeStart(), daitch_mokotoff(), dsnowball_lexize(), hv_fetch_string(), hv_store_string(), pg_server_to_client(), PLyUnicode_FromStringAndSize(), and utf_e2u().

◆ pg_server_to_client()

char * pg_server_to_client ( const char *  s,
int  len 
)

Definition at line 738 of file mbutils.c.

739{
741}
char * pg_server_to_any(const char *s, int len, int encoding)
Definition: mbutils.c:749

References ClientEncoding, pg_enc2name::encoding, len, and pg_server_to_any().

Referenced by pq_puttextmessage(), pq_sendcountedtext(), pq_sendstring(), pq_sendtext(), and pq_writestring().

◆ pg_unicode_to_server()

void pg_unicode_to_server ( pg_wchar  c,
unsigned char *  s 
)

Definition at line 864 of file mbutils.c.

865{
866 unsigned char c_as_utf8[MAX_MULTIBYTE_CHAR_LEN + 1];
867 int c_as_utf8_len;
868 int server_encoding;
869
870 /*
871 * Complain if invalid Unicode code point. The choice of errcode here is
872 * debatable, but really our caller should have checked this anyway.
873 */
876 (errcode(ERRCODE_SYNTAX_ERROR),
877 errmsg("invalid Unicode code point")));
878
879 /* Otherwise, if it's in ASCII range, conversion is trivial */
880 if (c <= 0x7F)
881 {
882 s[0] = (unsigned char) c;
883 s[1] = '\0';
884 return;
885 }
886
887 /* If the server encoding is UTF-8, we just need to reformat the code */
888 server_encoding = GetDatabaseEncoding();
889 if (server_encoding == PG_UTF8)
890 {
891 unicode_to_utf8(c, s);
892 s[pg_utf_mblen(s)] = '\0';
893 return;
894 }
895
896 /* For all other cases, we must have a conversion function available */
897 if (Utf8ToServerConvProc == NULL)
899 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
900 errmsg("conversion between %s and %s is not supported",
903
904 /* Construct UTF-8 source string */
905 unicode_to_utf8(c, c_as_utf8);
906 c_as_utf8_len = pg_utf_mblen(c_as_utf8);
907 c_as_utf8[c_as_utf8_len] = '\0';
908
909 /* Convert, or throw error if we can't */
912 Int32GetDatum(server_encoding),
913 CStringGetDatum((char *) c_as_utf8),
914 CStringGetDatum((char *) s),
915 Int32GetDatum(c_as_utf8_len),
916 BoolGetDatum(false));
917}
#define FunctionCall6(flinfo, arg1, arg2, arg3, arg4, arg5, arg6)
Definition: fmgr.h:710
#define MAX_MULTIBYTE_CHAR_LEN
Definition: pg_wchar.h:33
#define pg_utf_mblen
Definition: pg_wchar.h:633
static unsigned char * unicode_to_utf8(pg_wchar c, unsigned char *utf8string)
Definition: pg_wchar.h:575
static bool is_valid_unicode_codepoint(pg_wchar c)
Definition: pg_wchar.h:519
char * c

References BoolGetDatum(), CStringGetDatum(), ereport, errcode(), errmsg(), ERROR, FunctionCall6, GetDatabaseEncoding(), GetDatabaseEncodingName(), Int32GetDatum(), is_valid_unicode_codepoint(), MAX_MULTIBYTE_CHAR_LEN, name, pg_enc2name_tbl, PG_UTF8, pg_utf_mblen, unicode_to_utf8(), and Utf8ToServerConvProc.

Referenced by addunicode(), addUnicodeChar(), map_xml_name_to_sql_identifier(), str_udeescape(), and unistr().

◆ pg_unicode_to_server_noerror()

bool pg_unicode_to_server_noerror ( pg_wchar  c,
unsigned char *  s 
)

Definition at line 926 of file mbutils.c.

927{
928 unsigned char c_as_utf8[MAX_MULTIBYTE_CHAR_LEN + 1];
929 int c_as_utf8_len;
930 int converted_len;
931 int server_encoding;
932
933 /* Fail if invalid Unicode code point */
935 return false;
936
937 /* Otherwise, if it's in ASCII range, conversion is trivial */
938 if (c <= 0x7F)
939 {
940 s[0] = (unsigned char) c;
941 s[1] = '\0';
942 return true;
943 }
944
945 /* If the server encoding is UTF-8, we just need to reformat the code */
946 server_encoding = GetDatabaseEncoding();
947 if (server_encoding == PG_UTF8)
948 {
949 unicode_to_utf8(c, s);
950 s[pg_utf_mblen(s)] = '\0';
951 return true;
952 }
953
954 /* For all other cases, we must have a conversion function available */
955 if (Utf8ToServerConvProc == NULL)
956 return false;
957
958 /* Construct UTF-8 source string */
959 unicode_to_utf8(c, c_as_utf8);
960 c_as_utf8_len = pg_utf_mblen(c_as_utf8);
961 c_as_utf8[c_as_utf8_len] = '\0';
962
963 /* Convert, but without throwing error if we can't */
966 Int32GetDatum(server_encoding),
967 CStringGetDatum((char *) c_as_utf8),
968 CStringGetDatum((char *) s),
969 Int32GetDatum(c_as_utf8_len),
970 BoolGetDatum(true)));
971
972 /* Conversion was successful iff it consumed the whole input */
973 return (converted_len == c_as_utf8_len);
974}

References BoolGetDatum(), CStringGetDatum(), DatumGetInt32(), FunctionCall6, GetDatabaseEncoding(), Int32GetDatum(), is_valid_unicode_codepoint(), MAX_MULTIBYTE_CHAR_LEN, PG_UTF8, pg_utf_mblen, unicode_to_utf8(), and Utf8ToServerConvProc.

Referenced by addUnicodeChar(), and json_lex_string().

◆ pg_utf8_islegal()

bool pg_utf8_islegal ( const unsigned char *  source,
int  length 
)

Definition at line 1987 of file wchar.c.

1988{
1989 unsigned char a;
1990
1991 switch (length)
1992 {
1993 default:
1994 /* reject lengths 5 and 6 for now */
1995 return false;
1996 case 4:
1997 a = source[3];
1998 if (a < 0x80 || a > 0xBF)
1999 return false;
2000 /* FALL THRU */
2001 case 3:
2002 a = source[2];
2003 if (a < 0x80 || a > 0xBF)
2004 return false;
2005 /* FALL THRU */
2006 case 2:
2007 a = source[1];
2008 switch (*source)
2009 {
2010 case 0xE0:
2011 if (a < 0xA0 || a > 0xBF)
2012 return false;
2013 break;
2014 case 0xED:
2015 if (a < 0x80 || a > 0x9F)
2016 return false;
2017 break;
2018 case 0xF0:
2019 if (a < 0x90 || a > 0xBF)
2020 return false;
2021 break;
2022 case 0xF4:
2023 if (a < 0x80 || a > 0x8F)
2024 return false;
2025 break;
2026 default:
2027 if (a < 0x80 || a > 0xBF)
2028 return false;
2029 break;
2030 }
2031 /* FALL THRU */
2032 case 1:
2033 a = *source;
2034 if (a >= 0x80 && a < 0xC2)
2035 return false;
2036 if (a > 0xF4)
2037 return false;
2038 break;
2039 }
2040 return true;
2041}
int a
Definition: isn.c:73
static rewind_source * source
Definition: pg_rewind.c:89

References a, and source.

Referenced by chr(), pg_utf8_string_len(), pg_utf8_verifychar(), utf8_to_iso8859_1(), and UtfToLocal().

◆ pg_utf_mblen()

int pg_utf_mblen ( const unsigned char *  s)

Definition at line 536 of file wchar.c.

537{
538 int len;
539
540 if ((*s & 0x80) == 0)
541 len = 1;
542 else if ((*s & 0xe0) == 0xc0)
543 len = 2;
544 else if ((*s & 0xf0) == 0xe0)
545 len = 3;
546 else if ((*s & 0xf8) == 0xf0)
547 len = 4;
548#ifdef NOT_USED
549 else if ((*s & 0xfc) == 0xf8)
550 len = 5;
551 else if ((*s & 0xfe) == 0xfc)
552 len = 6;
553#endif
554 else
555 len = 1;
556 return len;
557}

References len.

Referenced by pg_utf8_verifystr(), and pg_wchar2utf_with_len().

◆ pg_valid_client_encoding()

int pg_valid_client_encoding ( const char *  name)

Definition at line 485 of file encnames.c.

486{
487 int enc;
488
489 if ((enc = pg_char_to_encoding(name)) < 0)
490 return -1;
491
493 return -1;
494
495 return enc;
496}
enc
int pg_char_to_encoding(const char *name)
Definition: encnames.c:549
#define PG_VALID_FE_ENCODING(_enc)
Definition: pg_wchar.h:291

References enc, name, pg_char_to_encoding(), and PG_VALID_FE_ENCODING.

Referenced by check_client_encoding().

◆ pg_valid_server_encoding()

int pg_valid_server_encoding ( const char *  name)

Definition at line 499 of file encnames.c.

500{
501 int enc;
502
503 if ((enc = pg_char_to_encoding(name)) < 0)
504 return -1;
505
507 return -1;
508
509 return enc;
510}

References enc, name, pg_char_to_encoding(), and PG_VALID_BE_ENCODING.

◆ pg_valid_server_encoding_id()

int pg_valid_server_encoding_id ( int  encoding)

Definition at line 513 of file encnames.c.

514{
516}

◆ pg_verify_mbstr()

bool pg_verify_mbstr ( int  encoding,
const char *  mbstr,
int  len,
bool  noError 
)

Definition at line 1566 of file mbutils.c.

1567{
1568 int oklen;
1569
1571
1572 oklen = pg_wchar_table[encoding].mbverifystr((const unsigned char *) mbstr, len);
1573 if (oklen != len)
1574 {
1575 if (noError)
1576 return false;
1577 report_invalid_encoding(encoding, mbstr + oklen, len - oklen);
1578 }
1579 return true;
1580}
mbstr_verifier mbverifystr
Definition: pg_wchar.h:385

References Assert(), encoding, len, pg_wchar_tbl::mbverifystr, PG_VALID_ENCODING, pg_wchar_table, and report_invalid_encoding().

Referenced by AddFileToBackupManifest(), LogicalOutputWrite(), pg_any_to_server(), pg_convert(), pg_do_encoding_conversion(), pg_server_to_any(), pg_verifymbstr(), and read_extension_script_file().

◆ pg_verify_mbstr_len()

int pg_verify_mbstr_len ( int  encoding,
const char *  mbstr,
int  len,
bool  noError 
)

Definition at line 1597 of file mbutils.c.

1598{
1599 mbchar_verifier mbverifychar;
1600 int mb_len;
1601
1603
1604 /*
1605 * In single-byte encodings, we need only reject nulls (\0).
1606 */
1608 {
1609 const char *nullpos = memchr(mbstr, 0, len);
1610
1611 if (nullpos == NULL)
1612 return len;
1613 if (noError)
1614 return -1;
1615 report_invalid_encoding(encoding, nullpos, 1);
1616 }
1617
1618 /* fetch function pointer just once */
1619 mbverifychar = pg_wchar_table[encoding].mbverifychar;
1620
1621 mb_len = 0;
1622
1623 while (len > 0)
1624 {
1625 int l;
1626
1627 /* fast path for ASCII-subset characters */
1628 if (!IS_HIGHBIT_SET(*mbstr))
1629 {
1630 if (*mbstr != '\0')
1631 {
1632 mb_len++;
1633 mbstr++;
1634 len--;
1635 continue;
1636 }
1637 if (noError)
1638 return -1;
1640 }
1641
1642 l = (*mbverifychar) ((const unsigned char *) mbstr, len);
1643
1644 if (l < 0)
1645 {
1646 if (noError)
1647 return -1;
1649 }
1650
1651 mbstr += l;
1652 len -= l;
1653 mb_len++;
1654 }
1655 return mb_len;
1656}
int(* mbchar_verifier)(const unsigned char *mbstr, int len)
Definition: pg_wchar.h:372
mbchar_verifier mbverifychar
Definition: pg_wchar.h:384

References Assert(), encoding, IS_HIGHBIT_SET, len, pg_wchar_tbl::mbverifychar, pg_encoding_max_length(), PG_VALID_ENCODING, pg_wchar_table, and report_invalid_encoding().

Referenced by length_in_encoding().

◆ pg_verifymbstr()

bool pg_verifymbstr ( const char *  mbstr,
int  len,
bool  noError 
)

◆ pg_wchar2mb()

int pg_wchar2mb ( const pg_wchar from,
char *  to 
)

Definition at line 1001 of file mbutils.c.

1002{
1003 return pg_wchar_table[DatabaseEncoding->encoding].wchar2mb_with_len(from, (unsigned char *) to, pg_wchar_strlen(from));
1004}
size_t pg_wchar_strlen(const pg_wchar *str)
Definition: wstrncmp.c:70

References DatabaseEncoding, pg_enc2name::encoding, pg_wchar_strlen(), pg_wchar_table, and pg_wchar_tbl::wchar2mb_with_len.

◆ pg_wchar2mb_with_len()

int pg_wchar2mb_with_len ( const pg_wchar from,
char *  to,
int  len 
)

◆ pg_wchar_strlen()

size_t pg_wchar_strlen ( const pg_wchar str)

Definition at line 70 of file wstrncmp.c.

71{
72 const pg_wchar *s;
73
74 for (s = str; *s; ++s)
75 ;
76 return (s - str);
77}
const char * str

References str.

Referenced by pg_wchar2mb().

◆ pg_wchar_strncmp()

int pg_wchar_strncmp ( const pg_wchar s1,
const pg_wchar s2,
size_t  n 
)

Definition at line 40 of file wstrncmp.c.

41{
42 if (n == 0)
43 return 0;
44 do
45 {
46 if (*s1 != *s2++)
47 return (*s1 - *(s2 - 1));
48 if (*s1++ == 0)
49 break;
50 } while (--n != 0);
51 return 0;
52}

References s1, and s2.

◆ PrepareClientEncoding()

int PrepareClientEncoding ( int  encoding)

Definition at line 110 of file mbutils.c.

111{
112 int current_server_encoding;
113 ListCell *lc;
114
116 return -1;
117
118 /* Can't do anything during startup, per notes above */
120 return 0;
121
122 current_server_encoding = GetDatabaseEncoding();
123
124 /*
125 * Check for cases that require no conversion function.
126 */
127 if (current_server_encoding == encoding ||
128 current_server_encoding == PG_SQL_ASCII ||
130 return 0;
131
132 if (IsTransactionState())
133 {
134 /*
135 * If we're in a live transaction, it's safe to access the catalogs,
136 * so look up the functions. We repeat the lookup even if the info is
137 * already cached, so that we can react to changes in the contents of
138 * pg_conversion.
139 */
140 Oid to_server_proc,
141 to_client_proc;
142 ConvProcInfo *convinfo;
143 MemoryContext oldcontext;
144
145 to_server_proc = FindDefaultConversionProc(encoding,
146 current_server_encoding);
147 if (!OidIsValid(to_server_proc))
148 return -1;
149 to_client_proc = FindDefaultConversionProc(current_server_encoding,
150 encoding);
151 if (!OidIsValid(to_client_proc))
152 return -1;
153
154 /*
155 * Load the fmgr info into TopMemoryContext (could still fail here)
156 */
158 sizeof(ConvProcInfo));
159 convinfo->s_encoding = current_server_encoding;
160 convinfo->c_encoding = encoding;
161 fmgr_info_cxt(to_server_proc, &convinfo->to_server_info,
163 fmgr_info_cxt(to_client_proc, &convinfo->to_client_info,
165
166 /* Attach new info to head of list */
168 ConvProcList = lcons(convinfo, ConvProcList);
169 MemoryContextSwitchTo(oldcontext);
170
171 /*
172 * We cannot yet remove any older entry for the same encoding pair,
173 * since it could still be in use. SetClientEncoding will clean up.
174 */
175
176 return 0; /* success */
177 }
178 else
179 {
180 /*
181 * If we're not in a live transaction, the only thing we can do is
182 * restore a previous setting using the cache. This covers all
183 * transaction-rollback cases. The only case it might not work for is
184 * trying to change client_encoding on the fly by editing
185 * postgresql.conf and SIGHUP'ing. Which would probably be a stupid
186 * thing to do anyway.
187 */
188 foreach(lc, ConvProcList)
189 {
190 ConvProcInfo *oldinfo = (ConvProcInfo *) lfirst(lc);
191
192 if (oldinfo->s_encoding == current_server_encoding &&
193 oldinfo->c_encoding == encoding)
194 return 0;
195 }
196
197 return -1; /* it's not cached, so fail */
198 }
199}
List * lcons(void *datum, List *list)
Definition: list.c:495
static List * ConvProcList
Definition: mbutils.c:61
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition: palloc.h:124
#define lfirst(lc)
Definition: pg_list.h:172
int s_encoding
Definition: mbutils.c:55
FmgrInfo to_client_info
Definition: mbutils.c:58
int c_encoding
Definition: mbutils.c:56
FmgrInfo to_server_info
Definition: mbutils.c:57

References backend_startup_complete, ConvProcInfo::c_encoding, ConvProcList, encoding, FindDefaultConversionProc(), fmgr_info_cxt(), GetDatabaseEncoding(), IsTransactionState(), lcons(), lfirst, MemoryContextAlloc(), MemoryContextSwitchTo(), OidIsValid, PG_SQL_ASCII, PG_VALID_FE_ENCODING, ConvProcInfo::s_encoding, ConvProcInfo::to_client_info, ConvProcInfo::to_server_info, and TopMemoryContext.

Referenced by check_client_encoding(), and InitializeClientEncoding().

◆ report_invalid_encoding()

pg_noreturn void report_invalid_encoding ( int  encoding,
const char *  mbstr,
int  len 
)

Definition at line 1698 of file mbutils.c.

1699{
1700 int l = pg_encoding_mblen(encoding, mbstr);
1701 char buf[8 * 5 + 1];
1702 char *p = buf;
1703 int j,
1704 jlimit;
1705
1706 jlimit = Min(l, len);
1707 jlimit = Min(jlimit, 8); /* prevent buffer overrun */
1708
1709 for (j = 0; j < jlimit; j++)
1710 {
1711 p += sprintf(p, "0x%02x", (unsigned char) mbstr[j]);
1712 if (j < jlimit - 1)
1713 p += sprintf(p, " ");
1714 }
1715
1716 ereport(ERROR,
1717 (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
1718 errmsg("invalid byte sequence for encoding \"%s\": %s",
1720 buf)));
1721}
#define Min(x, y)
Definition: c.h:975
int j
Definition: isn.c:78
static char * buf
Definition: pg_test_fsync.c:72
#define sprintf
Definition: port.h:241

References buf, encoding, ereport, errcode(), errmsg(), ERROR, j, len, Min, name, pg_enc2name_tbl, pg_encoding_mblen(), and sprintf.

Referenced by big52euc_tw(), big52mic(), CopyConversionError(), euc_cn2mic(), euc_jis_20042shift_jis_2004(), euc_jp2mic(), euc_jp2sjis(), euc_kr2mic(), euc_tw2big5(), euc_tw2mic(), iso8859_1_to_utf8(), latin2mic(), latin2mic_with_table(), local2local(), LocalToUtf(), mic2big5(), mic2euc_cn(), mic2euc_jp(), mic2euc_kr(), mic2euc_tw(), mic2latin(), mic2latin_with_table(), mic2sjis(), pg_verify_mbstr(), pg_verify_mbstr_len(), shift_jis_20042euc_jis_2004(), sjis2euc_jp(), sjis2mic(), test_enc_conversion(), utf8_to_iso8859_1(), and UtfToLocal().

◆ report_untranslatable_char()

pg_noreturn void report_untranslatable_char ( int  src_encoding,
int  dest_encoding,
const char *  mbstr,
int  len 
)

Definition at line 1730 of file mbutils.c.

1732{
1733 int l = pg_encoding_mblen(src_encoding, mbstr);
1734 char buf[8 * 5 + 1];
1735 char *p = buf;
1736 int j,
1737 jlimit;
1738
1739 jlimit = Min(l, len);
1740 jlimit = Min(jlimit, 8); /* prevent buffer overrun */
1741
1742 for (j = 0; j < jlimit; j++)
1743 {
1744 p += sprintf(p, "0x%02x", (unsigned char) mbstr[j]);
1745 if (j < jlimit - 1)
1746 p += sprintf(p, " ");
1747 }
1748
1749 ereport(ERROR,
1750 (errcode(ERRCODE_UNTRANSLATABLE_CHARACTER),
1751 errmsg("character with byte sequence %s in encoding \"%s\" has no equivalent in encoding \"%s\"",
1752 buf,
1753 pg_enc2name_tbl[src_encoding].name,
1754 pg_enc2name_tbl[dest_encoding].name)));
1755}

References buf, ereport, errcode(), errmsg(), ERROR, j, len, Min, name, pg_enc2name_tbl, pg_encoding_mblen(), and sprintf.

Referenced by big52euc_tw(), big52mic(), euc_tw2big5(), latin2mic_with_table(), local2local(), LocalToUtf(), mic2big5(), mic2euc_cn(), mic2euc_jp(), mic2euc_kr(), mic2euc_tw(), mic2latin(), mic2latin_with_table(), mic2sjis(), utf8_to_iso8859_1(), and UtfToLocal().

◆ SetClientEncoding()

int SetClientEncoding ( int  encoding)

Definition at line 208 of file mbutils.c.

209{
210 int current_server_encoding;
211 bool found;
212 ListCell *lc;
213
215 return -1;
216
217 /* Can't do anything during startup, per notes above */
219 {
221 return 0;
222 }
223
224 current_server_encoding = GetDatabaseEncoding();
225
226 /*
227 * Check for cases that require no conversion function.
228 */
229 if (current_server_encoding == encoding ||
230 current_server_encoding == PG_SQL_ASCII ||
232 {
234 ToServerConvProc = NULL;
235 ToClientConvProc = NULL;
236 return 0;
237 }
238
239 /*
240 * Search the cache for the entry previously prepared by
241 * PrepareClientEncoding; if there isn't one, we lose. While at it,
242 * release any duplicate entries so that repeated Prepare/Set cycles don't
243 * leak memory.
244 */
245 found = false;
246 foreach(lc, ConvProcList)
247 {
248 ConvProcInfo *convinfo = (ConvProcInfo *) lfirst(lc);
249
250 if (convinfo->s_encoding == current_server_encoding &&
251 convinfo->c_encoding == encoding)
252 {
253 if (!found)
254 {
255 /* Found newest entry, so set up */
257 ToServerConvProc = &convinfo->to_server_info;
258 ToClientConvProc = &convinfo->to_client_info;
259 found = true;
260 }
261 else
262 {
263 /* Duplicate entry, release it */
265 pfree(convinfo);
266 }
267 }
268 }
269
270 if (found)
271 return 0; /* success */
272 else
273 return -1; /* it's not cached, so fail */
274}
static FmgrInfo * ToServerConvProc
Definition: mbutils.c:67
static FmgrInfo * ToClientConvProc
Definition: mbutils.c:68
void pfree(void *pointer)
Definition: mcxt.c:2147
#define foreach_delete_current(lst, var_or_cell)
Definition: pg_list.h:391

References backend_startup_complete, ConvProcInfo::c_encoding, ClientEncoding, ConvProcList, encoding, foreach_delete_current, GetDatabaseEncoding(), lfirst, pending_client_encoding, pfree(), pg_enc2name_tbl, PG_SQL_ASCII, PG_VALID_FE_ENCODING, ConvProcInfo::s_encoding, ConvProcInfo::to_client_info, ConvProcInfo::to_server_info, ToClientConvProc, and ToServerConvProc.

Referenced by assign_client_encoding(), InitializeClientEncoding(), and ParallelWorkerMain().

◆ SetDatabaseEncoding()

void SetDatabaseEncoding ( int  encoding)

Definition at line 1161 of file mbutils.c.

1162{
1164 elog(ERROR, "invalid database encoding: %d", encoding);
1165
1168}

References Assert(), DatabaseEncoding, elog, encoding, pg_enc2name::encoding, ERROR, pg_enc2name_tbl, and PG_VALID_BE_ENCODING.

Referenced by CheckMyDatabase().

◆ SetMessageEncoding()

void SetMessageEncoding ( int  encoding)

Definition at line 1171 of file mbutils.c.

1172{
1173 /* Some calls happen before we can elog()! */
1175
1178}

References Assert(), encoding, pg_enc2name::encoding, MessageEncoding, pg_enc2name_tbl, and PG_VALID_ENCODING.

Referenced by pg_perm_setlocale().

◆ surrogate_pair_to_codepoint()

static pg_wchar surrogate_pair_to_codepoint ( pg_wchar  first,
pg_wchar  second 
)
inlinestatic

Definition at line 537 of file pg_wchar.h.

542{

Referenced by addUnicode(), json_lex_string(), str_udeescape(), and unistr().

◆ unicode_to_utf8()

unsigned char * unicode_to_utf8 ( pg_wchar  c,
unsigned char *  utf8string 
)
inlinestatic

Definition at line 575 of file pg_wchar.h.

592{
593 if (c <= 0x7F)
594 {
595 utf8string[0] = c;
596 }
597 else if (c <= 0x7FF)
598 {
599 utf8string[0] = 0xC0 | ((c >> 6) & 0x1F);
600 utf8string[1] = 0x80 | (c & 0x3F);
601 }

Referenced by convert_case(), json_lex_string(), pg_saslprep(), pg_unicode_to_server(), pg_unicode_to_server_noerror(), pg_wchar2utf_with_len(), and unicode_normalize_func().

◆ unicode_utf8len()

static int unicode_utf8len ( pg_wchar  c)
inlinestatic

Definition at line 607 of file pg_wchar.h.

609 {
610 utf8string[0] = 0xF0 | ((c >> 18) & 0x07);
611 utf8string[1] = 0x80 | ((c >> 12) & 0x3F);
612 utf8string[2] = 0x80 | ((c >> 6) & 0x3F);
613 utf8string[3] = 0x80 | (c & 0x3F);
614 }
615
616 return utf8string;
617}

Referenced by convert_case(), and initcap_wbnext().

◆ utf8_to_unicode()

pg_wchar utf8_to_unicode ( const unsigned char *  c)
inlinestatic

Definition at line 549 of file pg_wchar.h.

554{
555 return ((first & 0x3FF) << 10) + 0x10000 + (second & 0x3FF);
556}
557
558/*
559 * Convert a UTF-8 character to a Unicode code point.
560 * This is a one-character version of pg_utf2wchar_with_len.
561 *
562 * No error checks here, c must point to a long-enough string.
563 */
564static inline pg_wchar
565utf8_to_unicode(const unsigned char *c)
566{
567 if ((*c & 0x80) == 0)
568 return (pg_wchar) c[0];
static pg_wchar utf8_to_unicode(const unsigned char *c)
Definition: pg_wchar.h:549

◆ UtfToLocal()

int UtfToLocal ( const unsigned char *  utf,
int  len,
unsigned char *  iso,
const pg_mb_radix_tree map,
const pg_utf_to_local_combined cmap,
int  cmapsize,
utf_local_conversion_func  conv_func,
int  encoding,
bool  noError 
)

Definition at line 507 of file conv.c.

513{
514 uint32 iutf;
515 int l;
516 const pg_utf_to_local_combined *cp;
517 const unsigned char *start = utf;
518
521 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
522 errmsg("invalid encoding number: %d", encoding)));
523
524 for (; len > 0; len -= l)
525 {
526 unsigned char b1 = 0;
527 unsigned char b2 = 0;
528 unsigned char b3 = 0;
529 unsigned char b4 = 0;
530
531 /* "break" cases all represent errors */
532 if (*utf == '\0')
533 break;
534
535 l = pg_utf_mblen(utf);
536 if (len < l)
537 break;
538
539 if (!pg_utf8_islegal(utf, l))
540 break;
541
542 if (l == 1)
543 {
544 /* ASCII case is easy, assume it's one-to-one conversion */
545 *iso++ = *utf++;
546 continue;
547 }
548
549 /* collect coded char of length l */
550 if (l == 2)
551 {
552 b3 = *utf++;
553 b4 = *utf++;
554 }
555 else if (l == 3)
556 {
557 b2 = *utf++;
558 b3 = *utf++;
559 b4 = *utf++;
560 }
561 else if (l == 4)
562 {
563 b1 = *utf++;
564 b2 = *utf++;
565 b3 = *utf++;
566 b4 = *utf++;
567 }
568 else
569 {
570 elog(ERROR, "unsupported character length %d", l);
571 iutf = 0; /* keep compiler quiet */
572 }
573 iutf = (b1 << 24 | b2 << 16 | b3 << 8 | b4);
574
575 /* First, try with combined map if possible */
576 if (cmap && len > l)
577 {
578 const unsigned char *utf_save = utf;
579 int len_save = len;
580 int l_save = l;
581
582 /* collect next character, same as above */
583 len -= l;
584
585 l = pg_utf_mblen(utf);
586 if (len < l)
587 {
588 /* need more data to decide if this is a combined char */
589 utf -= l_save;
590 break;
591 }
592
593 if (!pg_utf8_islegal(utf, l))
594 {
595 if (!noError)
596 report_invalid_encoding(PG_UTF8, (const char *) utf, len);
597 utf -= l_save;
598 break;
599 }
600
601 /* We assume ASCII character cannot be in combined map */
602 if (l > 1)
603 {
604 uint32 iutf2;
605 uint32 cutf[2];
606
607 if (l == 2)
608 {
609 iutf2 = *utf++ << 8;
610 iutf2 |= *utf++;
611 }
612 else if (l == 3)
613 {
614 iutf2 = *utf++ << 16;
615 iutf2 |= *utf++ << 8;
616 iutf2 |= *utf++;
617 }
618 else if (l == 4)
619 {
620 iutf2 = *utf++ << 24;
621 iutf2 |= *utf++ << 16;
622 iutf2 |= *utf++ << 8;
623 iutf2 |= *utf++;
624 }
625 else
626 {
627 elog(ERROR, "unsupported character length %d", l);
628 iutf2 = 0; /* keep compiler quiet */
629 }
630
631 cutf[0] = iutf;
632 cutf[1] = iutf2;
633
634 cp = bsearch(cutf, cmap, cmapsize,
636
637 if (cp)
638 {
639 iso = store_coded_char(iso, cp->code);
640 continue;
641 }
642 }
643
644 /* fail, so back up to reprocess second character next time */
645 utf = utf_save;
646 len = len_save;
647 l = l_save;
648 }
649
650 /* Now check ordinary map */
651 if (map)
652 {
653 uint32 converted = pg_mb_radix_conv(map, l, b1, b2, b3, b4);
654
655 if (converted)
656 {
657 iso = store_coded_char(iso, converted);
658 continue;
659 }
660 }
661
662 /* if there's a conversion function, try that */
663 if (conv_func)
664 {
665 uint32 converted = (*conv_func) (iutf);
666
667 if (converted)
668 {
669 iso = store_coded_char(iso, converted);
670 continue;
671 }
672 }
673
674 /* failed to translate this character */
675 utf -= l;
676 if (noError)
677 break;
679 (const char *) utf, len);
680 }
681
682 /* if we broke out of loop early, must be invalid input */
683 if (len > 0 && !noError)
684 report_invalid_encoding(PG_UTF8, (const char *) utf, len);
685
686 *iso = '\0';
687
688 return utf - start;
689}
static int compare3(const void *p1, const void *p2)
Definition: conv.c:320
bool pg_utf8_islegal(const unsigned char *source, int length)
Definition: wchar.c:1987

References pg_utf_to_local_combined::code, compare3(), elog, encoding, ereport, errcode(), errmsg(), ERROR, len, pg_mb_radix_conv(), PG_UTF8, pg_utf8_islegal(), pg_utf_mblen, PG_VALID_ENCODING, report_invalid_encoding(), report_untranslatable_char(), start, and store_coded_char().

Referenced by utf8_to_big5(), utf8_to_euc_cn(), utf8_to_euc_jis_2004(), utf8_to_euc_jp(), utf8_to_euc_kr(), utf8_to_euc_tw(), utf8_to_gb18030(), utf8_to_gbk(), utf8_to_iso8859(), utf8_to_johab(), utf8_to_koi8r(), utf8_to_koi8u(), utf8_to_shift_jis_2004(), utf8_to_sjis(), utf8_to_uhc(), and utf8_to_win().

Variable Documentation

◆ pg_enc2gettext_tbl

PGDLLIMPORT const char* pg_enc2gettext_tbl[]
extern

Definition at line 360 of file encnames.c.

◆ pg_enc2name_tbl

◆ pg_wchar_table