PostgreSQL Source Code git master
mbutils.c File Reference
#include "postgres.h"
#include "access/xact.h"
#include "catalog/namespace.h"
#include "mb/pg_wchar.h"
#include "utils/fmgrprotos.h"
#include "utils/memutils.h"
#include "utils/relcache.h"
#include "varatt.h"
Include dependency graph for mbutils.c:

Go to the source code of this file.

Data Structures

struct  ConvProcInfo
 

Typedefs

typedef struct ConvProcInfo ConvProcInfo
 

Functions

static char * perform_default_encoding_conversion (const char *src, int len, bool is_client_to_server)
 
static int cliplen (const char *str, int len, int limit)
 
int PrepareClientEncoding (int encoding)
 
int SetClientEncoding (int encoding)
 
void InitializeClientEncoding (void)
 
int pg_get_client_encoding (void)
 
const char * pg_get_client_encoding_name (void)
 
unsigned char * pg_do_encoding_conversion (unsigned char *src, int len, int src_encoding, int dest_encoding)
 
int pg_do_encoding_conversion_buf (Oid proc, int src_encoding, int dest_encoding, unsigned char *src, int srclen, unsigned char *dest, int destlen, bool noError)
 
Datum pg_convert_to (PG_FUNCTION_ARGS)
 
Datum pg_convert_from (PG_FUNCTION_ARGS)
 
Datum pg_convert (PG_FUNCTION_ARGS)
 
Datum length_in_encoding (PG_FUNCTION_ARGS)
 
Datum pg_encoding_max_length_sql (PG_FUNCTION_ARGS)
 
char * pg_client_to_server (const char *s, int len)
 
char * pg_any_to_server (const char *s, int len, int encoding)
 
char * pg_server_to_client (const char *s, int len)
 
char * pg_server_to_any (const char *s, int len, int encoding)
 
void pg_unicode_to_server (char32_t c, unsigned char *s)
 
bool pg_unicode_to_server_noerror (char32_t c, unsigned char *s)
 
int pg_mb2wchar (const char *from, pg_wchar *to)
 
int pg_mb2wchar_with_len (const char *from, pg_wchar *to, int len)
 
int pg_encoding_mb2wchar_with_len (int encoding, const char *from, pg_wchar *to, int len)
 
int pg_wchar2mb (const pg_wchar *from, char *to)
 
int pg_wchar2mb_with_len (const pg_wchar *from, char *to, int len)
 
int pg_encoding_wchar2mb_with_len (int encoding, const pg_wchar *from, char *to, int len)
 
int pg_mblen (const char *mbstr)
 
int pg_dsplen (const char *mbstr)
 
int pg_mbstrlen (const char *mbstr)
 
int pg_mbstrlen_with_len (const char *mbstr, int limit)
 
int pg_mbcliplen (const char *mbstr, int len, int limit)
 
int pg_encoding_mbcliplen (int encoding, const char *mbstr, int len, int limit)
 
int pg_mbcharcliplen (const char *mbstr, int len, int limit)
 
void SetDatabaseEncoding (int encoding)
 
void SetMessageEncoding (int encoding)
 
int GetDatabaseEncoding (void)
 
const char * GetDatabaseEncodingName (void)
 
Datum getdatabaseencoding (PG_FUNCTION_ARGS)
 
Datum pg_client_encoding (PG_FUNCTION_ARGS)
 
Datum PG_char_to_encoding (PG_FUNCTION_ARGS)
 
Datum PG_encoding_to_char (PG_FUNCTION_ARGS)
 
int GetMessageEncoding (void)
 
static bool pg_generic_charinc (unsigned char *charptr, int len)
 
static bool pg_utf8_increment (unsigned char *charptr, int length)
 
static bool pg_eucjp_increment (unsigned char *charptr, int length)
 
mbcharacter_incrementer pg_database_encoding_character_incrementer (void)
 
int pg_database_encoding_max_length (void)
 
bool pg_verifymbstr (const char *mbstr, int len, bool noError)
 
bool pg_verify_mbstr (int encoding, const char *mbstr, int len, bool noError)
 
int pg_verify_mbstr_len (int encoding, const char *mbstr, int len, bool noError)
 
void check_encoding_conversion_args (int src_encoding, int dest_encoding, int len, int expected_src_encoding, int expected_dest_encoding)
 
void report_invalid_encoding (int encoding, const char *mbstr, int len)
 
void report_untranslatable_char (int src_encoding, int dest_encoding, const char *mbstr, int len)
 

Variables

static ListConvProcList = NIL
 
static FmgrInfoToServerConvProc = NULL
 
static FmgrInfoToClientConvProc = NULL
 
static FmgrInfoUtf8ToServerConvProc = NULL
 
static const pg_enc2nameClientEncoding = &pg_enc2name_tbl[PG_SQL_ASCII]
 
static const pg_enc2nameDatabaseEncoding = &pg_enc2name_tbl[PG_SQL_ASCII]
 
static const pg_enc2nameMessageEncoding = &pg_enc2name_tbl[PG_SQL_ASCII]
 
static bool backend_startup_complete = false
 
static int pending_client_encoding = PG_SQL_ASCII
 

Typedef Documentation

◆ ConvProcInfo

typedef struct ConvProcInfo ConvProcInfo

Function Documentation

◆ check_encoding_conversion_args()

void check_encoding_conversion_args ( int  src_encoding,
int  dest_encoding,
int  len,
int  expected_src_encoding,
int  expected_dest_encoding 
)

Definition at line 1672 of file mbutils.c.

1677{
1678 if (!PG_VALID_ENCODING(src_encoding))
1679 elog(ERROR, "invalid source encoding ID: %d", src_encoding);
1680 if (src_encoding != expected_src_encoding && expected_src_encoding >= 0)
1681 elog(ERROR, "expected source encoding \"%s\", but got \"%s\"",
1682 pg_enc2name_tbl[expected_src_encoding].name,
1683 pg_enc2name_tbl[src_encoding].name);
1684 if (!PG_VALID_ENCODING(dest_encoding))
1685 elog(ERROR, "invalid destination encoding ID: %d", dest_encoding);
1686 if (dest_encoding != expected_dest_encoding && expected_dest_encoding >= 0)
1687 elog(ERROR, "expected destination encoding \"%s\", but got \"%s\"",
1688 pg_enc2name_tbl[expected_dest_encoding].name,
1689 pg_enc2name_tbl[dest_encoding].name);
1690 if (len < 0)
1691 elog(ERROR, "encoding conversion length must not be negative");
1692}
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:226
const pg_enc2name pg_enc2name_tbl[]
Definition: encnames.c:308
const void size_t len
#define PG_VALID_ENCODING(_enc)
Definition: pg_wchar.h:287
const char * name

References elog, ERROR, len, name, pg_enc2name_tbl, and PG_VALID_ENCODING.

◆ cliplen()

static int cliplen ( const char *  str,
int  len,
int  limit 
)
static

Definition at line 1153 of file mbutils.c.

1154{
1155 int l = 0;
1156
1157 len = Min(len, limit);
1158 while (l < len && str[l])
1159 l++;
1160 return l;
1161}
#define Min(x, y)
Definition: c.h:1016
const char * str

References len, Min, and str.

Referenced by pg_encoding_mbcliplen(), pg_mbcharcliplen(), and pgstat_clip_activity().

◆ getdatabaseencoding()

Datum getdatabaseencoding ( PG_FUNCTION_ARGS  )

Definition at line 1276 of file mbutils.c.

1277{
1279}
#define DirectFunctionCall1(func, arg1)
Definition: fmgr.h:682
static const pg_enc2name * DatabaseEncoding
Definition: mbutils.c:82
Datum namein(PG_FUNCTION_ARGS)
Definition: name.c:48
static Datum CStringGetDatum(const char *X)
Definition: postgres.h:360
const char * name
Definition: pg_wchar.h:341

References CStringGetDatum(), DatabaseEncoding, DirectFunctionCall1, pg_enc2name::name, and namein().

◆ GetDatabaseEncoding()

int GetDatabaseEncoding ( void  )

Definition at line 1264 of file mbutils.c.

1265{
1266 return DatabaseEncoding->encoding;
1267}
pg_enc encoding
Definition: pg_wchar.h:342

References DatabaseEncoding, and pg_enc2name::encoding.

Referenced by ascii(), BeginCopyFrom(), BeginCopyTo(), char2wchar(), chr(), CollationCreate(), CollationGetCollid(), compareStrings(), convert_from_utf8(), convert_to_utf8(), CopyConversionError(), CopyConvertBuf(), create_pg_locale_builtin(), create_pg_locale_icu(), create_pg_locale_libc(), cstr2sv(), dblink_connect(), dblink_get_conn(), DefineCollation(), Generic_Text_IC_like(), GenericMatchText(), get_collation_oid(), get_json_object_as_hash(), InitializeClientEncoding(), IsThereCollationInNamespace(), json_recv(), jsonb_from_cstring(), locate_stem_module(), LogicalOutputWrite(), makeJsonLexContext(), p_isspecial(), ParallelWorkerMain(), pg_database_encoding_character_incrementer(), pg_database_encoding_max_length(), pg_generic_charinc(), pg_perm_setlocale(), pg_unicode_to_server(), pg_unicode_to_server_noerror(), pg_verifymbstr(), pgss_store(), PLyUnicode_Bytes(), populate_array_json(), PrepareClientEncoding(), read_extension_script_file(), regc_ctype_get_cache(), SetClientEncoding(), str_casefold(), sv2cstr(), text_position_setup(), to_ascii_default(), to_char32(), to_pg_wchar(), tolower_libc_mb(), tolower_libc_sb(), toupper_libc_mb(), toupper_libc_sb(), type_maximum_size(), unicode_assigned(), unicode_norm_form_from_string(), wchar2char(), xml_in(), xml_is_document(), xmlparse(), and xmltotext_with_options().

◆ GetDatabaseEncodingName()

◆ GetMessageEncoding()

int GetMessageEncoding ( void  )

Definition at line 1311 of file mbutils.c.

1312{
1313 return MessageEncoding->encoding;
1314}
static const pg_enc2name * MessageEncoding
Definition: mbutils.c:83

References pg_enc2name::encoding, and MessageEncoding.

◆ InitializeClientEncoding()

void InitializeClientEncoding ( void  )

Definition at line 282 of file mbutils.c.

283{
284 int current_server_encoding;
285
288
291 {
292 /*
293 * Oops, the requested conversion is not available. We couldn't fail
294 * before, but we can now.
295 */
297 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
298 errmsg("conversion between %s and %s is not supported",
301 }
302
303 /*
304 * Also look up the UTF8-to-server conversion function if needed. Since
305 * the server encoding is fixed within any one backend process, we don't
306 * have to do this more than once.
307 */
308 current_server_encoding = GetDatabaseEncoding();
309 if (current_server_encoding != PG_UTF8 &&
310 current_server_encoding != PG_SQL_ASCII)
311 {
312 Oid utf8_to_server_proc;
313
315 utf8_to_server_proc =
317 current_server_encoding);
318 /* If there's no such conversion, just leave the pointer as NULL */
319 if (OidIsValid(utf8_to_server_proc))
320 {
321 FmgrInfo *finfo;
322
324 sizeof(FmgrInfo));
325 fmgr_info_cxt(utf8_to_server_proc, finfo,
327 /* Set Utf8ToServerConvProc only after data is fully valid */
328 Utf8ToServerConvProc = finfo;
329 }
330 }
331}
#define OidIsValid(objectId)
Definition: c.h:788
int errcode(int sqlerrcode)
Definition: elog.c:863
int errmsg(const char *fmt,...)
Definition: elog.c:1080
#define FATAL
Definition: elog.h:41
#define ereport(elevel,...)
Definition: elog.h:150
void fmgr_info_cxt(Oid functionId, FmgrInfo *finfo, MemoryContext mcxt)
Definition: fmgr.c:138
Assert(PointerIsAligned(start, uint64))
int GetDatabaseEncoding(void)
Definition: mbutils.c:1264
static FmgrInfo * Utf8ToServerConvProc
Definition: mbutils.c:76
const char * GetDatabaseEncodingName(void)
Definition: mbutils.c:1270
int SetClientEncoding(int encoding)
Definition: mbutils.c:209
int PrepareClientEncoding(int encoding)
Definition: mbutils.c:111
static bool backend_startup_complete
Definition: mbutils.c:91
static int pending_client_encoding
Definition: mbutils.c:92
void * MemoryContextAlloc(MemoryContext context, Size size)
Definition: mcxt.c:1229
MemoryContext TopMemoryContext
Definition: mcxt.c:166
Oid FindDefaultConversionProc(int32 for_encoding, int32 to_encoding)
Definition: namespace.c:4150
@ PG_SQL_ASCII
Definition: pg_wchar.h:226
@ PG_UTF8
Definition: pg_wchar.h:232
unsigned int Oid
Definition: postgres_ext.h:32
static void AssertCouldGetRelation(void)
Definition: relcache.h:44
Definition: fmgr.h:57

References Assert(), AssertCouldGetRelation(), backend_startup_complete, ereport, errcode(), errmsg(), FATAL, FindDefaultConversionProc(), fmgr_info_cxt(), GetDatabaseEncoding(), GetDatabaseEncodingName(), MemoryContextAlloc(), name, OidIsValid, pending_client_encoding, pg_enc2name_tbl, PG_SQL_ASCII, PG_UTF8, PrepareClientEncoding(), SetClientEncoding(), TopMemoryContext, and Utf8ToServerConvProc.

Referenced by InitPostgres().

◆ length_in_encoding()

Datum length_in_encoding ( PG_FUNCTION_ARGS  )

Definition at line 618 of file mbutils.c.

619{
620 bytea *string = PG_GETARG_BYTEA_PP(0);
621 char *src_encoding_name = NameStr(*PG_GETARG_NAME(1));
622 int src_encoding = pg_char_to_encoding(src_encoding_name);
623 const char *src_str;
624 int len;
625 int retval;
626
627 if (src_encoding < 0)
629 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
630 errmsg("invalid encoding name \"%s\"",
631 src_encoding_name)));
632
633 len = VARSIZE_ANY_EXHDR(string);
634 src_str = VARDATA_ANY(string);
635
636 retval = pg_verify_mbstr_len(src_encoding, src_str, len, false);
637
638 PG_RETURN_INT32(retval);
639}
#define NameStr(name)
Definition: c.h:765
#define PG_GETARG_BYTEA_PP(n)
Definition: fmgr.h:308
#define PG_GETARG_NAME(n)
Definition: fmgr.h:278
#define PG_RETURN_INT32(x)
Definition: fmgr.h:354
int pg_verify_mbstr_len(int encoding, const char *mbstr, int len, bool noError)
Definition: mbutils.c:1600
#define pg_char_to_encoding
Definition: pg_wchar.h:629
Definition: c.h:706
static Size VARSIZE_ANY_EXHDR(const void *PTR)
Definition: varatt.h:472
static char * VARDATA_ANY(const void *PTR)
Definition: varatt.h:486

References ereport, errcode(), errmsg(), ERROR, len, NameStr, pg_char_to_encoding, PG_GETARG_BYTEA_PP, PG_GETARG_NAME, PG_RETURN_INT32, pg_verify_mbstr_len(), VARDATA_ANY(), and VARSIZE_ANY_EXHDR().

◆ perform_default_encoding_conversion()

static char * perform_default_encoding_conversion ( const char *  src,
int  len,
bool  is_client_to_server 
)
static

Definition at line 786 of file mbutils.c.

788{
789 char *result;
790 int src_encoding,
791 dest_encoding;
792 FmgrInfo *flinfo;
793
794 if (is_client_to_server)
795 {
796 src_encoding = ClientEncoding->encoding;
797 dest_encoding = DatabaseEncoding->encoding;
798 flinfo = ToServerConvProc;
799 }
800 else
801 {
802 src_encoding = DatabaseEncoding->encoding;
803 dest_encoding = ClientEncoding->encoding;
804 flinfo = ToClientConvProc;
805 }
806
807 if (flinfo == NULL)
808 return unconstify(char *, src);
809
810 /*
811 * Allocate space for conversion result, being wary of integer overflow.
812 * See comments in pg_do_encoding_conversion.
813 */
816 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
817 errmsg("out of memory"),
818 errdetail("String of %d bytes is too long for encoding conversion.",
819 len)));
820
821 result = (char *)
824
825 FunctionCall6(flinfo,
826 Int32GetDatum(src_encoding),
827 Int32GetDatum(dest_encoding),
828 CStringGetDatum(src),
829 CStringGetDatum(result),
831 BoolGetDatum(false));
832
833 /*
834 * Release extra space if there might be a lot --- see comments in
835 * pg_do_encoding_conversion.
836 */
837 if (len > 1000000)
838 {
839 Size resultlen = strlen(result);
840
841 if (resultlen >= MaxAllocSize)
843 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
844 errmsg("out of memory"),
845 errdetail("String of %d bytes is too long for encoding conversion.",
846 len)));
847
848 result = (char *) repalloc(result, resultlen + 1);
849 }
850
851 return result;
852}
#define unconstify(underlying_type, expr)
Definition: c.h:1243
size_t Size
Definition: c.h:624
int errdetail(const char *fmt,...)
Definition: elog.c:1216
#define MaxAllocSize
Definition: fe_memutils.h:22
#define FunctionCall6(flinfo, arg1, arg2, arg3, arg4, arg5, arg6)
Definition: fmgr.h:710
static const pg_enc2name * ClientEncoding
Definition: mbutils.c:81
static FmgrInfo * ToServerConvProc
Definition: mbutils.c:68
static FmgrInfo * ToClientConvProc
Definition: mbutils.c:69
void * repalloc(void *pointer, Size size)
Definition: mcxt.c:1610
MemoryContext CurrentMemoryContext
Definition: mcxt.c:160
void * MemoryContextAllocHuge(MemoryContext context, Size size)
Definition: mcxt.c:1703
#define MaxAllocHugeSize
Definition: memutils.h:45
#define MAX_CONVERSION_GROWTH
Definition: pg_wchar.h:302
static Datum BoolGetDatum(bool X)
Definition: postgres.h:112
static Datum Int32GetDatum(int32 X)
Definition: postgres.h:222

References BoolGetDatum(), ClientEncoding, CStringGetDatum(), CurrentMemoryContext, DatabaseEncoding, pg_enc2name::encoding, ereport, errcode(), errdetail(), errmsg(), ERROR, FunctionCall6, Int32GetDatum(), len, MAX_CONVERSION_GROWTH, MaxAllocHugeSize, MaxAllocSize, MemoryContextAllocHuge(), repalloc(), ToClientConvProc, ToServerConvProc, and unconstify.

Referenced by pg_any_to_server(), and pg_server_to_any().

◆ pg_any_to_server()

char * pg_any_to_server ( const char *  s,
int  len,
int  encoding 
)

Definition at line 679 of file mbutils.c.

680{
681 if (len <= 0)
682 return unconstify(char *, s); /* empty string is always valid */
683
686 {
687 /*
688 * No conversion is needed, but we must still validate the data.
689 */
690 (void) pg_verify_mbstr(DatabaseEncoding->encoding, s, len, false);
691 return unconstify(char *, s);
692 }
693
695 {
696 /*
697 * No conversion is possible, but we must still validate the data,
698 * because the client-side code might have done string escaping using
699 * the selected client_encoding. If the client encoding is ASCII-safe
700 * then we just do a straight validation under that encoding. For an
701 * ASCII-unsafe encoding we have a problem: we dare not pass such data
702 * to the parser but we have no way to convert it. We compromise by
703 * rejecting the data if it contains any non-ASCII characters.
704 */
706 (void) pg_verify_mbstr(encoding, s, len, false);
707 else
708 {
709 int i;
710
711 for (i = 0; i < len; i++)
712 {
713 if (s[i] == '\0' || IS_HIGHBIT_SET(s[i]))
715 (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
716 errmsg("invalid byte value for encoding \"%s\": 0x%02x",
718 (unsigned char) s[i])));
719 }
720 }
721 return unconstify(char *, s);
722 }
723
724 /* Fast path if we can use cached conversion function */
727
728 /* General case ... will not work outside transactions */
729 return (char *) pg_do_encoding_conversion((unsigned char *) unconstify(char *, s),
730 len,
731 encoding,
733}
#define IS_HIGHBIT_SET(ch)
Definition: c.h:1153
int i
Definition: isn.c:77
unsigned char * pg_do_encoding_conversion(unsigned char *src, int len, int src_encoding, int dest_encoding)
Definition: mbutils.c:357
bool pg_verify_mbstr(int encoding, const char *mbstr, int len, bool noError)
Definition: mbutils.c:1569
static char * perform_default_encoding_conversion(const char *src, int len, bool is_client_to_server)
Definition: mbutils.c:786
int32 encoding
Definition: pg_database.h:41
#define PG_VALID_BE_ENCODING(_enc)
Definition: pg_wchar.h:281

References ClientEncoding, DatabaseEncoding, encoding, pg_enc2name::encoding, ereport, errcode(), errmsg(), ERROR, i, IS_HIGHBIT_SET, len, name, perform_default_encoding_conversion(), pg_do_encoding_conversion(), pg_enc2name_tbl, PG_SQL_ASCII, PG_VALID_BE_ENCODING, pg_verify_mbstr(), and unconstify.

Referenced by ASN1_STRING_to_text(), cache_single_string(), db_encoding_convert(), dsnowball_lexize(), pg_client_to_server(), pg_stat_statements_internal(), pgp_armor_headers(), PLyUnicode_Bytes(), read_extension_script_file(), tsearch_readline(), utf_u2e(), X509_NAME_to_cstring(), and xml_recv().

◆ PG_char_to_encoding()

Datum PG_char_to_encoding ( PG_FUNCTION_ARGS  )

Definition at line 1288 of file mbutils.c.

1289{
1290 Name s = PG_GETARG_NAME(0);
1291
1293}
Definition: c.h:760

References NameStr, pg_char_to_encoding, PG_GETARG_NAME, and PG_RETURN_INT32.

◆ pg_client_encoding()

Datum pg_client_encoding ( PG_FUNCTION_ARGS  )

◆ pg_client_to_server()

char * pg_client_to_server ( const char *  s,
int  len 
)

Definition at line 663 of file mbutils.c.

664{
666}
char * pg_any_to_server(const char *s, int len, int encoding)
Definition: mbutils.c:679

References ClientEncoding, pg_enc2name::encoding, len, and pg_any_to_server().

Referenced by exec_bind_message(), parse_fcall_arguments(), pq_getmsgstring(), and pq_getmsgtext().

◆ pg_convert()

Datum pg_convert ( PG_FUNCTION_ARGS  )

Definition at line 556 of file mbutils.c.

557{
558 bytea *string = PG_GETARG_BYTEA_PP(0);
559 char *src_encoding_name = NameStr(*PG_GETARG_NAME(1));
560 int src_encoding = pg_char_to_encoding(src_encoding_name);
561 char *dest_encoding_name = NameStr(*PG_GETARG_NAME(2));
562 int dest_encoding = pg_char_to_encoding(dest_encoding_name);
563 const char *src_str;
564 char *dest_str;
565 bytea *retval;
566 int len;
567
568 if (src_encoding < 0)
570 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
571 errmsg("invalid source encoding name \"%s\"",
572 src_encoding_name)));
573 if (dest_encoding < 0)
575 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
576 errmsg("invalid destination encoding name \"%s\"",
577 dest_encoding_name)));
578
579 /* make sure that source string is valid */
580 len = VARSIZE_ANY_EXHDR(string);
581 src_str = VARDATA_ANY(string);
582 (void) pg_verify_mbstr(src_encoding, src_str, len, false);
583
584 /* perform conversion */
585 dest_str = (char *) pg_do_encoding_conversion((unsigned char *) unconstify(char *, src_str),
586 len,
587 src_encoding,
588 dest_encoding);
589
590
591 /* return source string if no conversion happened */
592 if (dest_str == src_str)
593 PG_RETURN_BYTEA_P(string);
594
595 /*
596 * build bytea data type structure.
597 */
598 len = strlen(dest_str);
599 retval = (bytea *) palloc(len + VARHDRSZ);
600 SET_VARSIZE(retval, len + VARHDRSZ);
601 memcpy(VARDATA(retval), dest_str, len);
602 pfree(dest_str);
603
604 /* free memory if allocated by the toaster */
605 PG_FREE_IF_COPY(string, 0);
606
607 PG_RETURN_BYTEA_P(retval);
608}
#define VARHDRSZ
Definition: c.h:711
#define PG_FREE_IF_COPY(ptr, n)
Definition: fmgr.h:260
#define PG_RETURN_BYTEA_P(x)
Definition: fmgr.h:371
void pfree(void *pointer)
Definition: mcxt.c:1594
void * palloc(Size size)
Definition: mcxt.c:1365
static char * VARDATA(const void *PTR)
Definition: varatt.h:305
static void SET_VARSIZE(void *PTR, Size len)
Definition: varatt.h:432

References ereport, errcode(), errmsg(), ERROR, len, NameStr, palloc(), pfree(), pg_char_to_encoding, pg_do_encoding_conversion(), PG_FREE_IF_COPY, PG_GETARG_BYTEA_PP, PG_GETARG_NAME, PG_RETURN_BYTEA_P, pg_verify_mbstr(), SET_VARSIZE(), unconstify, VARDATA(), VARDATA_ANY(), VARHDRSZ, and VARSIZE_ANY_EXHDR().

Referenced by pg_convert_from(), and pg_convert_to().

◆ pg_convert_from()

Datum pg_convert_from ( PG_FUNCTION_ARGS  )

Definition at line 529 of file mbutils.c.

530{
531 Datum string = PG_GETARG_DATUM(0);
532 Datum src_encoding_name = PG_GETARG_DATUM(1);
533 Datum dest_encoding_name = DirectFunctionCall1(namein,
535 Datum result;
536
537 result = DirectFunctionCall3(pg_convert, string,
538 src_encoding_name, dest_encoding_name);
539
540 /*
541 * pg_convert returns a bytea, which we in turn return as text, relying on
542 * the fact that they are both in fact varlena types, and thus
543 * structurally identical. Although not all bytea values are valid text,
544 * in this case it will be because we've told pg_convert to return one
545 * that is valid as text in the current database encoding.
546 */
547 PG_RETURN_DATUM(result);
548}
#define PG_GETARG_DATUM(n)
Definition: fmgr.h:268
#define PG_RETURN_DATUM(x)
Definition: fmgr.h:353
#define DirectFunctionCall3(func, arg1, arg2, arg3)
Definition: fmgr.h:686
Datum pg_convert(PG_FUNCTION_ARGS)
Definition: mbutils.c:556
uint64_t Datum
Definition: postgres.h:70

References CStringGetDatum(), DatabaseEncoding, DirectFunctionCall1, DirectFunctionCall3, pg_enc2name::name, namein(), pg_convert(), PG_GETARG_DATUM, and PG_RETURN_DATUM.

◆ pg_convert_to()

Datum pg_convert_to ( PG_FUNCTION_ARGS  )

Definition at line 503 of file mbutils.c.

504{
505 Datum string = PG_GETARG_DATUM(0);
506 Datum dest_encoding_name = PG_GETARG_DATUM(1);
507 Datum src_encoding_name = DirectFunctionCall1(namein,
509 Datum result;
510
511 /*
512 * pg_convert expects a bytea as its first argument. We're passing it a
513 * text argument here, relying on the fact that they are both in fact
514 * varlena types, and thus structurally identical.
515 */
516 result = DirectFunctionCall3(pg_convert, string,
517 src_encoding_name, dest_encoding_name);
518
519 PG_RETURN_DATUM(result);
520}

References CStringGetDatum(), DatabaseEncoding, DirectFunctionCall1, DirectFunctionCall3, pg_enc2name::name, namein(), pg_convert(), PG_GETARG_DATUM, and PG_RETURN_DATUM.

◆ pg_database_encoding_character_incrementer()

mbcharacter_incrementer pg_database_encoding_character_incrementer ( void  )

Definition at line 1526 of file mbutils.c.

1527{
1528 /*
1529 * Eventually it might be best to add a field to pg_wchar_table[], but for
1530 * now we just use a switch.
1531 */
1532 switch (GetDatabaseEncoding())
1533 {
1534 case PG_UTF8:
1535 return pg_utf8_increment;
1536
1537 case PG_EUC_JP:
1538 return pg_eucjp_increment;
1539
1540 default:
1541 return pg_generic_charinc;
1542 }
1543}
static bool pg_generic_charinc(unsigned char *charptr, int len)
Definition: mbutils.c:1328
static bool pg_utf8_increment(unsigned char *charptr, int length)
Definition: mbutils.c:1362
static bool pg_eucjp_increment(unsigned char *charptr, int length)
Definition: mbutils.c:1440
@ PG_EUC_JP
Definition: pg_wchar.h:227

References GetDatabaseEncoding(), PG_EUC_JP, pg_eucjp_increment(), pg_generic_charinc(), PG_UTF8, and pg_utf8_increment().

Referenced by make_greater_string().

◆ pg_database_encoding_max_length()

◆ pg_do_encoding_conversion()

unsigned char * pg_do_encoding_conversion ( unsigned char *  src,
int  len,
int  src_encoding,
int  dest_encoding 
)

Definition at line 357 of file mbutils.c.

359{
360 unsigned char *result;
361 Oid proc;
362
363 if (len <= 0)
364 return src; /* empty string is always valid */
365
366 if (src_encoding == dest_encoding)
367 return src; /* no conversion required, assume valid */
368
369 if (dest_encoding == PG_SQL_ASCII)
370 return src; /* any string is valid in SQL_ASCII */
371
372 if (src_encoding == PG_SQL_ASCII)
373 {
374 /* No conversion is possible, but we must validate the result */
375 (void) pg_verify_mbstr(dest_encoding, (const char *) src, len, false);
376 return src;
377 }
378
379 if (!IsTransactionState()) /* shouldn't happen */
380 elog(ERROR, "cannot perform encoding conversion outside a transaction");
381
382 proc = FindDefaultConversionProc(src_encoding, dest_encoding);
383 if (!OidIsValid(proc))
385 (errcode(ERRCODE_UNDEFINED_FUNCTION),
386 errmsg("default conversion function for encoding \"%s\" to \"%s\" does not exist",
387 pg_encoding_to_char(src_encoding),
388 pg_encoding_to_char(dest_encoding))));
389
390 /*
391 * Allocate space for conversion result, being wary of integer overflow.
392 *
393 * len * MAX_CONVERSION_GROWTH is typically a vast overestimate of the
394 * required space, so it might exceed MaxAllocSize even though the result
395 * would actually fit. We do not want to hand back a result string that
396 * exceeds MaxAllocSize, because callers might not cope gracefully --- but
397 * if we just allocate more than that, and don't use it, that's fine.
398 */
401 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
402 errmsg("out of memory"),
403 errdetail("String of %d bytes is too long for encoding conversion.",
404 len)));
405
406 result = (unsigned char *)
409
410 (void) OidFunctionCall6(proc,
411 Int32GetDatum(src_encoding),
412 Int32GetDatum(dest_encoding),
413 CStringGetDatum((char *) src),
414 CStringGetDatum((char *) result),
416 BoolGetDatum(false));
417
418 /*
419 * If the result is large, it's worth repalloc'ing to release any extra
420 * space we asked for. The cutoff here is somewhat arbitrary, but we
421 * *must* check when len * MAX_CONVERSION_GROWTH exceeds MaxAllocSize.
422 */
423 if (len > 1000000)
424 {
425 Size resultlen = strlen((char *) result);
426
427 if (resultlen >= MaxAllocSize)
429 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
430 errmsg("out of memory"),
431 errdetail("String of %d bytes is too long for encoding conversion.",
432 len)));
433
434 result = (unsigned char *) repalloc(result, resultlen + 1);
435 }
436
437 return result;
438}
#define OidFunctionCall6(functionId, arg1, arg2, arg3, arg4, arg5, arg6)
Definition: fmgr.h:730
#define pg_encoding_to_char
Definition: pg_wchar.h:630
bool IsTransactionState(void)
Definition: xact.c:388

References BoolGetDatum(), CStringGetDatum(), CurrentMemoryContext, elog, ereport, errcode(), errdetail(), errmsg(), ERROR, FindDefaultConversionProc(), Int32GetDatum(), IsTransactionState(), len, MAX_CONVERSION_GROWTH, MaxAllocHugeSize, MaxAllocSize, MemoryContextAllocHuge(), OidFunctionCall6, OidIsValid, pg_encoding_to_char, PG_SQL_ASCII, pg_verify_mbstr(), and repalloc().

Referenced by convert_charset(), pg_any_to_server(), pg_convert(), and pg_server_to_any().

◆ pg_do_encoding_conversion_buf()

int pg_do_encoding_conversion_buf ( Oid  proc,
int  src_encoding,
int  dest_encoding,
unsigned char *  src,
int  srclen,
unsigned char *  dest,
int  destlen,
bool  noError 
)

Definition at line 470 of file mbutils.c.

476{
477 Datum result;
478
479 /*
480 * If the destination buffer is not large enough to hold the result in the
481 * worst case, limit the input size passed to the conversion function.
482 */
483 if ((Size) srclen >= ((destlen - 1) / (Size) MAX_CONVERSION_GROWTH))
484 srclen = ((destlen - 1) / (Size) MAX_CONVERSION_GROWTH);
485
486 result = OidFunctionCall6(proc,
487 Int32GetDatum(src_encoding),
488 Int32GetDatum(dest_encoding),
489 CStringGetDatum((char *) src),
490 CStringGetDatum((char *) dest),
491 Int32GetDatum(srclen),
492 BoolGetDatum(noError));
493 return DatumGetInt32(result);
494}
static int32 DatumGetInt32(Datum X)
Definition: postgres.h:212

References BoolGetDatum(), CStringGetDatum(), DatumGetInt32(), generate_unaccent_rules::dest, Int32GetDatum(), MAX_CONVERSION_GROWTH, and OidFunctionCall6.

Referenced by CopyConversionError(), CopyConvertBuf(), and test_enc_conversion().

◆ pg_dsplen()

int pg_dsplen ( const char *  mbstr)

Definition at line 1033 of file mbutils.c.

1034{
1035 return pg_wchar_table[DatabaseEncoding->encoding].dsplen((const unsigned char *) mbstr);
1036}
mbdisplaylen_converter dsplen
Definition: pg_wchar.h:383

References DatabaseEncoding, pg_wchar_tbl::dsplen, pg_enc2name::encoding, and pg_wchar_table.

Referenced by p_isspecial().

◆ pg_encoding_max_length_sql()

Datum pg_encoding_max_length_sql ( PG_FUNCTION_ARGS  )

Definition at line 647 of file mbutils.c.

648{
649 int encoding = PG_GETARG_INT32(0);
650
653 else
655}
#define PG_RETURN_NULL()
Definition: fmgr.h:345
#define PG_GETARG_INT32(n)
Definition: fmgr.h:269

References encoding, PG_GETARG_INT32, PG_RETURN_INT32, PG_RETURN_NULL, PG_VALID_ENCODING, and pg_wchar_table.

◆ pg_encoding_mb2wchar_with_len()

int pg_encoding_mb2wchar_with_len ( int  encoding,
const char *  from,
pg_wchar to,
int  len 
)

Definition at line 996 of file mbutils.c.

998{
999 return pg_wchar_table[encoding].mb2wchar_with_len((const unsigned char *) from, to, len);
1000}
mb2wchar_with_len_converter mb2wchar_with_len
Definition: pg_wchar.h:378

References encoding, len, pg_wchar_tbl::mb2wchar_with_len, and pg_wchar_table.

◆ pg_encoding_mbcliplen()

int pg_encoding_mbcliplen ( int  encoding,
const char *  mbstr,
int  len,
int  limit 
)

Definition at line 1096 of file mbutils.c.

1098{
1099 mblen_converter mblen_fn;
1100 int clen = 0;
1101 int l;
1102
1103 /* optimization for single byte encoding */
1105 return cliplen(mbstr, len, limit);
1106
1107 mblen_fn = pg_wchar_table[encoding].mblen;
1108
1109 while (len > 0 && *mbstr)
1110 {
1111 l = (*mblen_fn) ((const unsigned char *) mbstr);
1112 if ((clen + l) > limit)
1113 break;
1114 clen += l;
1115 if (clen == limit)
1116 break;
1117 len -= l;
1118 mbstr += l;
1119 }
1120 return clen;
1121}
static int cliplen(const char *str, int len, int limit)
Definition: mbutils.c:1153
int(* mblen_converter)(const unsigned char *mbstr)
Definition: pg_wchar.h:366
mblen_converter mblen
Definition: pg_wchar.h:382
int pg_encoding_max_length(int encoding)
Definition: wchar.c:2213

References cliplen(), encoding, len, pg_wchar_tbl::mblen, pg_encoding_max_length(), and pg_wchar_table.

Referenced by pg_mbcliplen().

◆ PG_encoding_to_char()

Datum PG_encoding_to_char ( PG_FUNCTION_ARGS  )

Definition at line 1296 of file mbutils.c.

1297{
1299 const char *encoding_name = pg_encoding_to_char(encoding);
1300
1301 return DirectFunctionCall1(namein, CStringGetDatum(encoding_name));
1302}
int32_t int32
Definition: c.h:548

References CStringGetDatum(), DirectFunctionCall1, encoding, namein(), pg_encoding_to_char, and PG_GETARG_INT32.

◆ pg_encoding_wchar2mb_with_len()

int pg_encoding_wchar2mb_with_len ( int  encoding,
const pg_wchar from,
char *  to,
int  len 
)

Definition at line 1018 of file mbutils.c.

1020{
1021 return pg_wchar_table[encoding].wchar2mb_with_len(from, (unsigned char *) to, len);
1022}
wchar2mb_with_len_converter wchar2mb_with_len
Definition: pg_wchar.h:380

References encoding, len, pg_wchar_table, and pg_wchar_tbl::wchar2mb_with_len.

◆ pg_eucjp_increment()

static bool pg_eucjp_increment ( unsigned char *  charptr,
int  length 
)
static

Definition at line 1440 of file mbutils.c.

1441{
1442 unsigned char c1,
1443 c2;
1444 int i;
1445
1446 c1 = *charptr;
1447
1448 switch (c1)
1449 {
1450 case SS2: /* JIS X 0201 */
1451 if (length != 2)
1452 return false;
1453
1454 c2 = charptr[1];
1455
1456 if (c2 >= 0xdf)
1457 charptr[0] = charptr[1] = 0xa1;
1458 else if (c2 < 0xa1)
1459 charptr[1] = 0xa1;
1460 else
1461 charptr[1]++;
1462 break;
1463
1464 case SS3: /* JIS X 0212 */
1465 if (length != 3)
1466 return false;
1467
1468 for (i = 2; i > 0; i--)
1469 {
1470 c2 = charptr[i];
1471 if (c2 < 0xa1)
1472 {
1473 charptr[i] = 0xa1;
1474 return true;
1475 }
1476 else if (c2 < 0xfe)
1477 {
1478 charptr[i]++;
1479 return true;
1480 }
1481 }
1482
1483 /* Out of 3-byte code region */
1484 return false;
1485
1486 default:
1487 if (IS_HIGHBIT_SET(c1)) /* JIS X 0208? */
1488 {
1489 if (length != 2)
1490 return false;
1491
1492 for (i = 1; i >= 0; i--)
1493 {
1494 c2 = charptr[i];
1495 if (c2 < 0xa1)
1496 {
1497 charptr[i] = 0xa1;
1498 return true;
1499 }
1500 else if (c2 < 0xfe)
1501 {
1502 charptr[i]++;
1503 return true;
1504 }
1505 }
1506
1507 /* Out of 2 byte code region */
1508 return false;
1509 }
1510 else
1511 { /* ASCII, single byte */
1512 if (c1 > 0x7e)
1513 return false;
1514 (*charptr)++;
1515 }
1516 break;
1517 }
1518
1519 return true;
1520}
#define SS2
Definition: pg_wchar.h:38
#define SS3
Definition: pg_wchar.h:39

References i, IS_HIGHBIT_SET, SS2, and SS3.

Referenced by pg_database_encoding_character_incrementer().

◆ pg_generic_charinc()

static bool pg_generic_charinc ( unsigned char *  charptr,
int  len 
)
static

Definition at line 1328 of file mbutils.c.

1329{
1330 unsigned char *lastbyte = charptr + len - 1;
1331 mbchar_verifier mbverify;
1332
1333 /* We can just invoke the character verifier directly. */
1335
1336 while (*lastbyte < (unsigned char) 255)
1337 {
1338 (*lastbyte)++;
1339 if ((*mbverify) (charptr, len) == len)
1340 return true;
1341 }
1342
1343 return false;
1344}
int(* mbchar_verifier)(const unsigned char *mbstr, int len)
Definition: pg_wchar.h:372
mbchar_verifier mbverifychar
Definition: pg_wchar.h:384

References GetDatabaseEncoding(), len, pg_wchar_tbl::mbverifychar, and pg_wchar_table.

Referenced by pg_database_encoding_character_incrementer().

◆ pg_get_client_encoding()

int pg_get_client_encoding ( void  )

Definition at line 337 of file mbutils.c.

338{
339 return ClientEncoding->encoding;
340}

References ClientEncoding, and pg_enc2name::encoding.

Referenced by BeginCopyFrom(), BeginCopyTo(), and xml_send().

◆ pg_get_client_encoding_name()

const char * pg_get_client_encoding_name ( void  )

Definition at line 346 of file mbutils.c.

347{
348 return ClientEncoding->name;
349}

References ClientEncoding, and pg_enc2name::name.

◆ pg_mb2wchar()

int pg_mb2wchar ( const char *  from,
pg_wchar to 
)

Definition at line 982 of file mbutils.c.

983{
984 return pg_wchar_table[DatabaseEncoding->encoding].mb2wchar_with_len((const unsigned char *) from, to, strlen(from));
985}

References DatabaseEncoding, pg_enc2name::encoding, pg_wchar_tbl::mb2wchar_with_len, and pg_wchar_table.

◆ pg_mb2wchar_with_len()

◆ pg_mbcharcliplen()

int pg_mbcharcliplen ( const char *  mbstr,
int  len,
int  limit 
)

Definition at line 1128 of file mbutils.c.

1129{
1130 int clen = 0;
1131 int nch = 0;
1132 int l;
1133
1134 /* optimization for single byte encoding */
1136 return cliplen(mbstr, len, limit);
1137
1138 while (len > 0 && *mbstr)
1139 {
1140 l = pg_mblen(mbstr);
1141 nch++;
1142 if (nch > limit)
1143 break;
1144 clen += l;
1145 len -= l;
1146 mbstr += l;
1147 }
1148 return clen;
1149}
int pg_database_encoding_max_length(void)
Definition: mbutils.c:1549
int pg_mblen(const char *mbstr)
Definition: mbutils.c:1026

References cliplen(), len, pg_database_encoding_max_length(), and pg_mblen().

Referenced by bpchar(), bpchar_input(), text_left(), text_right(), varchar(), and varchar_input().

◆ pg_mbcliplen()

◆ pg_mblen()

int pg_mblen ( const char *  mbstr)

Definition at line 1026 of file mbutils.c.

1027{
1028 return pg_wchar_table[DatabaseEncoding->encoding].mblen((const unsigned char *) mbstr);
1029}

References DatabaseEncoding, pg_enc2name::encoding, pg_wchar_tbl::mblen, and pg_wchar_table.

Referenced by addCompoundAffixFlagValue(), bit_in(), charlen_to_bytelen(), DCH_from_char(), dotrim(), find_word(), findchar(), findchar2(), findwrd(), gbt_var_node_cp_len(), get_modifiers(), get_nextfield(), get_wildcard_part(), getlexeme(), getNextFlagFromString(), gettoken_query(), gettoken_query_standard(), gettoken_query_websearch(), gettoken_tsvector(), hex_decode_safe_scalar(), infix(), initTrie(), lpad(), make_trigrams(), map_sql_identifier_to_xml_name(), map_xml_name_to_sql_identifier(), match_prosrc_to_literal(), mb_strchr(), NIImportAffixes(), NIImportDictionary(), NIImportOOAffixes(), NUM_eat_non_data_chars(), NUM_processor(), parse_affentry(), parse_format(), parse_lquery(), parse_ltree(), parse_or_operator(), parse_re_flags(), parse_test_flags(), pg_base64_decode_internal(), pg_mbcharcliplen(), pg_mbstrlen(), pg_mbstrlen_with_len(), prssyntaxerror(), px_crypt_shacrypt(), readstoplist(), report_json_context(), rpad(), RS_compile(), RS_execute(), RS_isRegis(), similar_escape_internal(), split_text(), t_isalnum(), t_isalpha(), text_format(), text_position_next(), text_position_next_internal(), text_reverse(), text_substring(), text_to_bits(), textregexreplace(), thesaurusRead(), TParserGet(), translate(), ts_stat_sql(), tsvectorout(), unaccent_lexize(), varbit_in(), varstr_levenshtein(), and wchareq().

◆ pg_mbstrlen()

int pg_mbstrlen ( const char *  mbstr)

Definition at line 1040 of file mbutils.c.

1041{
1042 int len = 0;
1043
1044 /* optimization for single byte encoding */
1046 return strlen(mbstr);
1047
1048 while (*mbstr)
1049 {
1050 mbstr += pg_mblen(mbstr);
1051 len++;
1052 }
1053 return len;
1054}

References len, pg_database_encoding_max_length(), and pg_mblen().

Referenced by NUM_processor(), and text_format_append_string().

◆ pg_mbstrlen_with_len()

int pg_mbstrlen_with_len ( const char *  mbstr,
int  limit 
)

Definition at line 1060 of file mbutils.c.

1061{
1062 int len = 0;
1063
1064 /* optimization for single byte encoding */
1066 return limit;
1067
1068 while (limit > 0 && *mbstr)
1069 {
1070 int l = pg_mblen(mbstr);
1071
1072 limit -= l;
1073 mbstr += l;
1074 len++;
1075 }
1076 return len;
1077}

References len, pg_database_encoding_max_length(), and pg_mblen().

Referenced by bpchar(), bpchar_input(), bpcharlen(), executor_errposition(), lpad(), match_prosrc_to_query(), parser_errposition(), plpgsql_scanner_errposition(), rpad(), scanner_errposition(), similar_escape_internal(), text_left(), text_length(), text_position_get_match_pos(), text_right(), text_substring(), unicode_assigned(), unicode_is_normalized(), unicode_normalize_func(), and varstr_levenshtein().

◆ pg_server_to_any()

char * pg_server_to_any ( const char *  s,
int  len,
int  encoding 
)

Definition at line 752 of file mbutils.c.

753{
754 if (len <= 0)
755 return unconstify(char *, s); /* empty string is always valid */
756
759 return unconstify(char *, s); /* assume data is valid */
760
762 {
763 /* No conversion is possible, but we must validate the result */
764 (void) pg_verify_mbstr(encoding, s, len, false);
765 return unconstify(char *, s);
766 }
767
768 /* Fast path if we can use cached conversion function */
771
772 /* General case ... will not work outside transactions */
773 return (char *) pg_do_encoding_conversion((unsigned char *) unconstify(char *, s),
774 len,
776 encoding);
777}

References ClientEncoding, DatabaseEncoding, encoding, pg_enc2name::encoding, len, perform_default_encoding_conversion(), pg_do_encoding_conversion(), PG_SQL_ASCII, pg_verify_mbstr(), and unconstify.

Referenced by compareStrings(), CopyAttributeOutCSV(), CopyAttributeOutText(), CopyToTextLikeStart(), daitch_mokotoff(), dsnowball_lexize(), hv_fetch_string(), hv_store_string(), pg_server_to_client(), PLyUnicode_FromStringAndSize(), and utf_e2u().

◆ pg_server_to_client()

char * pg_server_to_client ( const char *  s,
int  len 
)

Definition at line 741 of file mbutils.c.

742{
744}
char * pg_server_to_any(const char *s, int len, int encoding)
Definition: mbutils.c:752

References ClientEncoding, pg_enc2name::encoding, len, and pg_server_to_any().

Referenced by pq_puttextmessage(), pq_sendcountedtext(), pq_sendstring(), pq_sendtext(), and pq_writestring().

◆ pg_unicode_to_server()

void pg_unicode_to_server ( char32_t  c,
unsigned char *  s 
)

Definition at line 867 of file mbutils.c.

868{
869 unsigned char c_as_utf8[MAX_MULTIBYTE_CHAR_LEN + 1];
870 int c_as_utf8_len;
871 int server_encoding;
872
873 /*
874 * Complain if invalid Unicode code point. The choice of errcode here is
875 * debatable, but really our caller should have checked this anyway.
876 */
879 (errcode(ERRCODE_SYNTAX_ERROR),
880 errmsg("invalid Unicode code point")));
881
882 /* Otherwise, if it's in ASCII range, conversion is trivial */
883 if (c <= 0x7F)
884 {
885 s[0] = (unsigned char) c;
886 s[1] = '\0';
887 return;
888 }
889
890 /* If the server encoding is UTF-8, we just need to reformat the code */
891 server_encoding = GetDatabaseEncoding();
892 if (server_encoding == PG_UTF8)
893 {
894 unicode_to_utf8(c, s);
895 s[pg_utf_mblen(s)] = '\0';
896 return;
897 }
898
899 /* For all other cases, we must have a conversion function available */
900 if (Utf8ToServerConvProc == NULL)
902 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
903 errmsg("conversion between %s and %s is not supported",
906
907 /* Construct UTF-8 source string */
908 unicode_to_utf8(c, c_as_utf8);
909 c_as_utf8_len = pg_utf_mblen(c_as_utf8);
910 c_as_utf8[c_as_utf8_len] = '\0';
911
912 /* Convert, or throw error if we can't */
915 Int32GetDatum(server_encoding),
916 CStringGetDatum((char *) c_as_utf8),
917 CStringGetDatum((char *) s),
918 Int32GetDatum(c_as_utf8_len),
919 BoolGetDatum(false));
920}
#define MAX_MULTIBYTE_CHAR_LEN
Definition: pg_wchar.h:33
#define pg_utf_mblen
Definition: pg_wchar.h:633
static unsigned char * unicode_to_utf8(char32_t c, unsigned char *utf8string)
Definition: pg_wchar.h:575
static bool is_valid_unicode_codepoint(char32_t c)
Definition: pg_wchar.h:519
char * c

References BoolGetDatum(), CStringGetDatum(), ereport, errcode(), errmsg(), ERROR, FunctionCall6, GetDatabaseEncoding(), GetDatabaseEncodingName(), Int32GetDatum(), is_valid_unicode_codepoint(), MAX_MULTIBYTE_CHAR_LEN, name, pg_enc2name_tbl, PG_UTF8, pg_utf_mblen, unicode_to_utf8(), and Utf8ToServerConvProc.

Referenced by addunicode(), addUnicodeChar(), map_xml_name_to_sql_identifier(), str_udeescape(), and unistr().

◆ pg_unicode_to_server_noerror()

bool pg_unicode_to_server_noerror ( char32_t  c,
unsigned char *  s 
)

Definition at line 929 of file mbutils.c.

930{
931 unsigned char c_as_utf8[MAX_MULTIBYTE_CHAR_LEN + 1];
932 int c_as_utf8_len;
933 int converted_len;
934 int server_encoding;
935
936 /* Fail if invalid Unicode code point */
938 return false;
939
940 /* Otherwise, if it's in ASCII range, conversion is trivial */
941 if (c <= 0x7F)
942 {
943 s[0] = (unsigned char) c;
944 s[1] = '\0';
945 return true;
946 }
947
948 /* If the server encoding is UTF-8, we just need to reformat the code */
949 server_encoding = GetDatabaseEncoding();
950 if (server_encoding == PG_UTF8)
951 {
952 unicode_to_utf8(c, s);
953 s[pg_utf_mblen(s)] = '\0';
954 return true;
955 }
956
957 /* For all other cases, we must have a conversion function available */
958 if (Utf8ToServerConvProc == NULL)
959 return false;
960
961 /* Construct UTF-8 source string */
962 unicode_to_utf8(c, c_as_utf8);
963 c_as_utf8_len = pg_utf_mblen(c_as_utf8);
964 c_as_utf8[c_as_utf8_len] = '\0';
965
966 /* Convert, but without throwing error if we can't */
969 Int32GetDatum(server_encoding),
970 CStringGetDatum((char *) c_as_utf8),
971 CStringGetDatum((char *) s),
972 Int32GetDatum(c_as_utf8_len),
973 BoolGetDatum(true)));
974
975 /* Conversion was successful iff it consumed the whole input */
976 return (converted_len == c_as_utf8_len);
977}

References BoolGetDatum(), CStringGetDatum(), DatumGetInt32(), FunctionCall6, GetDatabaseEncoding(), Int32GetDatum(), is_valid_unicode_codepoint(), MAX_MULTIBYTE_CHAR_LEN, PG_UTF8, pg_utf_mblen, unicode_to_utf8(), and Utf8ToServerConvProc.

Referenced by addUnicodeChar(), and json_lex_string().

◆ pg_utf8_increment()

static bool pg_utf8_increment ( unsigned char *  charptr,
int  length 
)
static

Definition at line 1362 of file mbutils.c.

1363{
1364 unsigned char a;
1365 unsigned char limit;
1366
1367 switch (length)
1368 {
1369 default:
1370 /* reject lengths 5 and 6 for now */
1371 return false;
1372 case 4:
1373 a = charptr[3];
1374 if (a < 0xBF)
1375 {
1376 charptr[3]++;
1377 break;
1378 }
1379 /* FALL THRU */
1380 case 3:
1381 a = charptr[2];
1382 if (a < 0xBF)
1383 {
1384 charptr[2]++;
1385 break;
1386 }
1387 /* FALL THRU */
1388 case 2:
1389 a = charptr[1];
1390 switch (*charptr)
1391 {
1392 case 0xED:
1393 limit = 0x9F;
1394 break;
1395 case 0xF4:
1396 limit = 0x8F;
1397 break;
1398 default:
1399 limit = 0xBF;
1400 break;
1401 }
1402 if (a < limit)
1403 {
1404 charptr[1]++;
1405 break;
1406 }
1407 /* FALL THRU */
1408 case 1:
1409 a = *charptr;
1410 if (a == 0x7F || a == 0xDF || a == 0xEF || a == 0xF4)
1411 return false;
1412 charptr[0]++;
1413 break;
1414 }
1415
1416 return true;
1417}
int a
Definition: isn.c:73

References a.

Referenced by pg_database_encoding_character_incrementer().

◆ pg_verify_mbstr()

bool pg_verify_mbstr ( int  encoding,
const char *  mbstr,
int  len,
bool  noError 
)

Definition at line 1569 of file mbutils.c.

1570{
1571 int oklen;
1572
1574
1575 oklen = pg_wchar_table[encoding].mbverifystr((const unsigned char *) mbstr, len);
1576 if (oklen != len)
1577 {
1578 if (noError)
1579 return false;
1580 report_invalid_encoding(encoding, mbstr + oklen, len - oklen);
1581 }
1582 return true;
1583}
void report_invalid_encoding(int encoding, const char *mbstr, int len)
Definition: mbutils.c:1701
mbstr_verifier mbverifystr
Definition: pg_wchar.h:385

References Assert(), encoding, len, pg_wchar_tbl::mbverifystr, PG_VALID_ENCODING, pg_wchar_table, and report_invalid_encoding().

Referenced by AddFileToBackupManifest(), LogicalOutputWrite(), pg_any_to_server(), pg_convert(), pg_do_encoding_conversion(), pg_server_to_any(), pg_verifymbstr(), and read_extension_script_file().

◆ pg_verify_mbstr_len()

int pg_verify_mbstr_len ( int  encoding,
const char *  mbstr,
int  len,
bool  noError 
)

Definition at line 1600 of file mbutils.c.

1601{
1602 mbchar_verifier mbverifychar;
1603 int mb_len;
1604
1606
1607 /*
1608 * In single-byte encodings, we need only reject nulls (\0).
1609 */
1611 {
1612 const char *nullpos = memchr(mbstr, 0, len);
1613
1614 if (nullpos == NULL)
1615 return len;
1616 if (noError)
1617 return -1;
1618 report_invalid_encoding(encoding, nullpos, 1);
1619 }
1620
1621 /* fetch function pointer just once */
1622 mbverifychar = pg_wchar_table[encoding].mbverifychar;
1623
1624 mb_len = 0;
1625
1626 while (len > 0)
1627 {
1628 int l;
1629
1630 /* fast path for ASCII-subset characters */
1631 if (!IS_HIGHBIT_SET(*mbstr))
1632 {
1633 if (*mbstr != '\0')
1634 {
1635 mb_len++;
1636 mbstr++;
1637 len--;
1638 continue;
1639 }
1640 if (noError)
1641 return -1;
1643 }
1644
1645 l = (*mbverifychar) ((const unsigned char *) mbstr, len);
1646
1647 if (l < 0)
1648 {
1649 if (noError)
1650 return -1;
1652 }
1653
1654 mbstr += l;
1655 len -= l;
1656 mb_len++;
1657 }
1658 return mb_len;
1659}

References Assert(), encoding, IS_HIGHBIT_SET, len, pg_wchar_tbl::mbverifychar, pg_encoding_max_length(), PG_VALID_ENCODING, pg_wchar_table, and report_invalid_encoding().

Referenced by length_in_encoding().

◆ pg_verifymbstr()

bool pg_verifymbstr ( const char *  mbstr,
int  len,
bool  noError 
)

◆ pg_wchar2mb()

int pg_wchar2mb ( const pg_wchar from,
char *  to 
)

Definition at line 1004 of file mbutils.c.

1005{
1006 return pg_wchar_table[DatabaseEncoding->encoding].wchar2mb_with_len(from, (unsigned char *) to, pg_wchar_strlen(from));
1007}
size_t pg_wchar_strlen(const pg_wchar *str)
Definition: wstrncmp.c:70

References DatabaseEncoding, pg_enc2name::encoding, pg_wchar_strlen(), pg_wchar_table, and pg_wchar_tbl::wchar2mb_with_len.

◆ pg_wchar2mb_with_len()

int pg_wchar2mb_with_len ( const pg_wchar from,
char *  to,
int  len 
)

◆ PrepareClientEncoding()

int PrepareClientEncoding ( int  encoding)

Definition at line 111 of file mbutils.c.

112{
113 int current_server_encoding;
114 ListCell *lc;
115
117 return -1;
118
119 /* Can't do anything during startup, per notes above */
121 return 0;
122
123 current_server_encoding = GetDatabaseEncoding();
124
125 /*
126 * Check for cases that require no conversion function.
127 */
128 if (current_server_encoding == encoding ||
129 current_server_encoding == PG_SQL_ASCII ||
131 return 0;
132
133 if (IsTransactionState())
134 {
135 /*
136 * If we're in a live transaction, it's safe to access the catalogs,
137 * so look up the functions. We repeat the lookup even if the info is
138 * already cached, so that we can react to changes in the contents of
139 * pg_conversion.
140 */
141 Oid to_server_proc,
142 to_client_proc;
143 ConvProcInfo *convinfo;
144 MemoryContext oldcontext;
145
146 to_server_proc = FindDefaultConversionProc(encoding,
147 current_server_encoding);
148 if (!OidIsValid(to_server_proc))
149 return -1;
150 to_client_proc = FindDefaultConversionProc(current_server_encoding,
151 encoding);
152 if (!OidIsValid(to_client_proc))
153 return -1;
154
155 /*
156 * Load the fmgr info into TopMemoryContext (could still fail here)
157 */
159 sizeof(ConvProcInfo));
160 convinfo->s_encoding = current_server_encoding;
161 convinfo->c_encoding = encoding;
162 fmgr_info_cxt(to_server_proc, &convinfo->to_server_info,
164 fmgr_info_cxt(to_client_proc, &convinfo->to_client_info,
166
167 /* Attach new info to head of list */
169 ConvProcList = lcons(convinfo, ConvProcList);
170 MemoryContextSwitchTo(oldcontext);
171
172 /*
173 * We cannot yet remove any older entry for the same encoding pair,
174 * since it could still be in use. SetClientEncoding will clean up.
175 */
176
177 return 0; /* success */
178 }
179 else
180 {
181 /*
182 * If we're not in a live transaction, the only thing we can do is
183 * restore a previous setting using the cache. This covers all
184 * transaction-rollback cases. The only case it might not work for is
185 * trying to change client_encoding on the fly by editing
186 * postgresql.conf and SIGHUP'ing. Which would probably be a stupid
187 * thing to do anyway.
188 */
189 foreach(lc, ConvProcList)
190 {
191 ConvProcInfo *oldinfo = (ConvProcInfo *) lfirst(lc);
192
193 if (oldinfo->s_encoding == current_server_encoding &&
194 oldinfo->c_encoding == encoding)
195 return 0;
196 }
197
198 return -1; /* it's not cached, so fail */
199 }
200}
List * lcons(void *datum, List *list)
Definition: list.c:495
static List * ConvProcList
Definition: mbutils.c:62
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition: palloc.h:124
#define lfirst(lc)
Definition: pg_list.h:172
#define PG_VALID_FE_ENCODING(_enc)
Definition: pg_wchar.h:291
int s_encoding
Definition: mbutils.c:56
FmgrInfo to_client_info
Definition: mbutils.c:59
int c_encoding
Definition: mbutils.c:57
FmgrInfo to_server_info
Definition: mbutils.c:58

References backend_startup_complete, ConvProcInfo::c_encoding, ConvProcList, encoding, FindDefaultConversionProc(), fmgr_info_cxt(), GetDatabaseEncoding(), IsTransactionState(), lcons(), lfirst, MemoryContextAlloc(), MemoryContextSwitchTo(), OidIsValid, PG_SQL_ASCII, PG_VALID_FE_ENCODING, ConvProcInfo::s_encoding, ConvProcInfo::to_client_info, ConvProcInfo::to_server_info, and TopMemoryContext.

Referenced by check_client_encoding(), and InitializeClientEncoding().

◆ report_invalid_encoding()

void report_invalid_encoding ( int  encoding,
const char *  mbstr,
int  len 
)

Definition at line 1701 of file mbutils.c.

1702{
1704 char buf[8 * 5 + 1];
1705 char *p = buf;
1706 int j,
1707 jlimit;
1708
1709 jlimit = Min(l, len);
1710 jlimit = Min(jlimit, 8); /* prevent buffer overrun */
1711
1712 for (j = 0; j < jlimit; j++)
1713 {
1714 p += sprintf(p, "0x%02x", (unsigned char) mbstr[j]);
1715 if (j < jlimit - 1)
1716 p += sprintf(p, " ");
1717 }
1718
1719 ereport(ERROR,
1720 (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
1721 errmsg("invalid byte sequence for encoding \"%s\": %s",
1723 buf)));
1724}
int j
Definition: isn.c:78
static char buf[DEFAULT_XLOG_SEG_SIZE]
Definition: pg_test_fsync.c:71
#define sprintf
Definition: port.h:262
int pg_encoding_mblen_or_incomplete(int encoding, const char *mbstr, size_t remaining)
Definition: wchar.c:2147

References buf, encoding, ereport, errcode(), errmsg(), ERROR, j, len, Min, name, pg_enc2name_tbl, pg_encoding_mblen_or_incomplete(), and sprintf.

Referenced by big52euc_tw(), big52mic(), CopyConversionError(), euc_cn2mic(), euc_jis_20042shift_jis_2004(), euc_jp2mic(), euc_jp2sjis(), euc_kr2mic(), euc_tw2big5(), euc_tw2mic(), iso8859_1_to_utf8(), latin2mic(), latin2mic_with_table(), local2local(), LocalToUtf(), mic2big5(), mic2euc_cn(), mic2euc_jp(), mic2euc_kr(), mic2euc_tw(), mic2latin(), mic2latin_with_table(), mic2sjis(), pg_verify_mbstr(), pg_verify_mbstr_len(), shift_jis_20042euc_jis_2004(), sjis2euc_jp(), sjis2mic(), test_enc_conversion(), utf8_to_iso8859_1(), and UtfToLocal().

◆ report_untranslatable_char()

void report_untranslatable_char ( int  src_encoding,
int  dest_encoding,
const char *  mbstr,
int  len 
)

Definition at line 1733 of file mbutils.c.

1735{
1736 int l;
1737 char buf[8 * 5 + 1];
1738 char *p = buf;
1739 int j,
1740 jlimit;
1741
1742 /*
1743 * We probably could use plain pg_encoding_mblen(), because
1744 * gb18030_to_utf8() verifies before it converts. All conversions should.
1745 * For src_encoding!=GB18030, len>0 meets pg_encoding_mblen() needs. Even
1746 * so, be defensive, since a buggy conversion might pass invalid data.
1747 * This is not a performance-critical path.
1748 */
1749 l = pg_encoding_mblen_or_incomplete(src_encoding, mbstr, len);
1750 jlimit = Min(l, len);
1751 jlimit = Min(jlimit, 8); /* prevent buffer overrun */
1752
1753 for (j = 0; j < jlimit; j++)
1754 {
1755 p += sprintf(p, "0x%02x", (unsigned char) mbstr[j]);
1756 if (j < jlimit - 1)
1757 p += sprintf(p, " ");
1758 }
1759
1760 ereport(ERROR,
1761 (errcode(ERRCODE_UNTRANSLATABLE_CHARACTER),
1762 errmsg("character with byte sequence %s in encoding \"%s\" has no equivalent in encoding \"%s\"",
1763 buf,
1764 pg_enc2name_tbl[src_encoding].name,
1765 pg_enc2name_tbl[dest_encoding].name)));
1766}

References buf, ereport, errcode(), errmsg(), ERROR, j, len, Min, name, pg_enc2name_tbl, pg_encoding_mblen_or_incomplete(), and sprintf.

Referenced by big52euc_tw(), big52mic(), euc_tw2big5(), latin2mic_with_table(), local2local(), LocalToUtf(), mic2big5(), mic2euc_cn(), mic2euc_jp(), mic2euc_kr(), mic2euc_tw(), mic2latin(), mic2latin_with_table(), mic2sjis(), utf8_to_iso8859_1(), and UtfToLocal().

◆ SetClientEncoding()

int SetClientEncoding ( int  encoding)

Definition at line 209 of file mbutils.c.

210{
211 int current_server_encoding;
212 bool found;
213 ListCell *lc;
214
216 return -1;
217
218 /* Can't do anything during startup, per notes above */
220 {
222 return 0;
223 }
224
225 current_server_encoding = GetDatabaseEncoding();
226
227 /*
228 * Check for cases that require no conversion function.
229 */
230 if (current_server_encoding == encoding ||
231 current_server_encoding == PG_SQL_ASCII ||
233 {
235 ToServerConvProc = NULL;
236 ToClientConvProc = NULL;
237 return 0;
238 }
239
240 /*
241 * Search the cache for the entry previously prepared by
242 * PrepareClientEncoding; if there isn't one, we lose. While at it,
243 * release any duplicate entries so that repeated Prepare/Set cycles don't
244 * leak memory.
245 */
246 found = false;
247 foreach(lc, ConvProcList)
248 {
249 ConvProcInfo *convinfo = (ConvProcInfo *) lfirst(lc);
250
251 if (convinfo->s_encoding == current_server_encoding &&
252 convinfo->c_encoding == encoding)
253 {
254 if (!found)
255 {
256 /* Found newest entry, so set up */
258 ToServerConvProc = &convinfo->to_server_info;
259 ToClientConvProc = &convinfo->to_client_info;
260 found = true;
261 }
262 else
263 {
264 /* Duplicate entry, release it */
266 pfree(convinfo);
267 }
268 }
269 }
270
271 if (found)
272 return 0; /* success */
273 else
274 return -1; /* it's not cached, so fail */
275}
#define foreach_delete_current(lst, var_or_cell)
Definition: pg_list.h:391

References backend_startup_complete, ConvProcInfo::c_encoding, ClientEncoding, ConvProcList, encoding, foreach_delete_current, GetDatabaseEncoding(), lfirst, pending_client_encoding, pfree(), pg_enc2name_tbl, PG_SQL_ASCII, PG_VALID_FE_ENCODING, ConvProcInfo::s_encoding, ConvProcInfo::to_client_info, ConvProcInfo::to_server_info, ToClientConvProc, and ToServerConvProc.

Referenced by assign_client_encoding(), InitializeClientEncoding(), and ParallelWorkerMain().

◆ SetDatabaseEncoding()

void SetDatabaseEncoding ( int  encoding)

Definition at line 1164 of file mbutils.c.

1165{
1167 elog(ERROR, "invalid database encoding: %d", encoding);
1168
1171}

References Assert(), DatabaseEncoding, elog, encoding, pg_enc2name::encoding, ERROR, pg_enc2name_tbl, and PG_VALID_BE_ENCODING.

Referenced by CheckMyDatabase().

◆ SetMessageEncoding()

void SetMessageEncoding ( int  encoding)

Definition at line 1174 of file mbutils.c.

1175{
1176 /* Some calls happen before we can elog()! */
1178
1181}

References Assert(), encoding, pg_enc2name::encoding, MessageEncoding, pg_enc2name_tbl, and PG_VALID_ENCODING.

Referenced by pg_perm_setlocale().

Variable Documentation

◆ backend_startup_complete

bool backend_startup_complete = false
static

Definition at line 91 of file mbutils.c.

Referenced by InitializeClientEncoding(), PrepareClientEncoding(), and SetClientEncoding().

◆ ClientEncoding

◆ ConvProcList

List* ConvProcList = NIL
static

Definition at line 62 of file mbutils.c.

Referenced by PrepareClientEncoding(), and SetClientEncoding().

◆ DatabaseEncoding

◆ MessageEncoding

const pg_enc2name* MessageEncoding = &pg_enc2name_tbl[PG_SQL_ASCII]
static

Definition at line 83 of file mbutils.c.

Referenced by GetMessageEncoding(), and SetMessageEncoding().

◆ pending_client_encoding

int pending_client_encoding = PG_SQL_ASCII
static

Definition at line 92 of file mbutils.c.

Referenced by InitializeClientEncoding(), and SetClientEncoding().

◆ ToClientConvProc

FmgrInfo* ToClientConvProc = NULL
static

Definition at line 69 of file mbutils.c.

Referenced by perform_default_encoding_conversion(), and SetClientEncoding().

◆ ToServerConvProc

FmgrInfo* ToServerConvProc = NULL
static

Definition at line 68 of file mbutils.c.

Referenced by perform_default_encoding_conversion(), and SetClientEncoding().

◆ Utf8ToServerConvProc

FmgrInfo* Utf8ToServerConvProc = NULL
static