PostgreSQL Source Code git master
Loading...
Searching...
No Matches
mbutils.c File Reference
#include "postgres.h"
#include "access/xact.h"
#include "catalog/namespace.h"
#include "mb/pg_wchar.h"
#include "utils/fmgrprotos.h"
#include "utils/memdebug.h"
#include "utils/memutils.h"
#include "utils/relcache.h"
#include "varatt.h"
Include dependency graph for mbutils.c:

Go to the source code of this file.

Data Structures

struct  ConvProcInfo
 

Typedefs

typedef struct ConvProcInfo ConvProcInfo
 

Functions

static charperform_default_encoding_conversion (const char *src, int len, bool is_client_to_server)
 
static int cliplen (const char *str, int len, int limit)
 
static pg_noreturn void report_invalid_encoding_int (int encoding, const char *mbstr, int mblen, int len)
 
static pg_noreturn void report_invalid_encoding_db (const char *mbstr, int mblen, int len)
 
int PrepareClientEncoding (int encoding)
 
int SetClientEncoding (int encoding)
 
void InitializeClientEncoding (void)
 
int pg_get_client_encoding (void)
 
const charpg_get_client_encoding_name (void)
 
unsigned charpg_do_encoding_conversion (unsigned char *src, int len, int src_encoding, int dest_encoding)
 
int pg_do_encoding_conversion_buf (Oid proc, int src_encoding, int dest_encoding, unsigned char *src, int srclen, unsigned char *dest, int destlen, bool noError)
 
Datum pg_convert_to (PG_FUNCTION_ARGS)
 
Datum pg_convert_from (PG_FUNCTION_ARGS)
 
Datum pg_convert (PG_FUNCTION_ARGS)
 
Datum length_in_encoding (PG_FUNCTION_ARGS)
 
Datum pg_encoding_max_length_sql (PG_FUNCTION_ARGS)
 
charpg_client_to_server (const char *s, int len)
 
charpg_any_to_server (const char *s, int len, int encoding)
 
charpg_server_to_client (const char *s, int len)
 
charpg_server_to_any (const char *s, int len, int encoding)
 
void pg_unicode_to_server (char32_t c, unsigned char *s)
 
bool pg_unicode_to_server_noerror (char32_t c, unsigned char *s)
 
int pg_mb2wchar (const char *from, pg_wchar *to)
 
int pg_mb2wchar_with_len (const char *from, pg_wchar *to, int len)
 
int pg_encoding_mb2wchar_with_len (int encoding, const char *from, pg_wchar *to, int len)
 
int pg_wchar2mb (const pg_wchar *from, char *to)
 
int pg_wchar2mb_with_len (const pg_wchar *from, char *to, int len)
 
int pg_encoding_wchar2mb_with_len (int encoding, const pg_wchar *from, char *to, int len)
 
int pg_mblen_cstr (const char *mbstr)
 
int pg_mblen_range (const char *mbstr, const char *end)
 
int pg_mblen_with_len (const char *mbstr, int limit)
 
int pg_mblen_unbounded (const char *mbstr)
 
int pg_mblen (const char *mbstr)
 
int pg_dsplen (const char *mbstr)
 
int pg_mbstrlen (const char *mbstr)
 
int pg_mbstrlen_with_len (const char *mbstr, int limit)
 
int pg_mbcliplen (const char *mbstr, int len, int limit)
 
int pg_encoding_mbcliplen (int encoding, const char *mbstr, int len, int limit)
 
int pg_mbcharcliplen (const char *mbstr, int len, int limit)
 
void SetDatabaseEncoding (int encoding)
 
void SetMessageEncoding (int encoding)
 
int GetDatabaseEncoding (void)
 
const charGetDatabaseEncodingName (void)
 
Datum getdatabaseencoding (PG_FUNCTION_ARGS)
 
Datum pg_client_encoding (PG_FUNCTION_ARGS)
 
Datum PG_char_to_encoding (PG_FUNCTION_ARGS)
 
Datum PG_encoding_to_char (PG_FUNCTION_ARGS)
 
int GetMessageEncoding (void)
 
static bool pg_generic_charinc (unsigned char *charptr, int len)
 
static bool pg_utf8_increment (unsigned char *charptr, int length)
 
static bool pg_eucjp_increment (unsigned char *charptr, int length)
 
mbcharacter_incrementer pg_database_encoding_character_incrementer (void)
 
int pg_database_encoding_max_length (void)
 
bool pg_verifymbstr (const char *mbstr, int len, bool noError)
 
bool pg_verify_mbstr (int encoding, const char *mbstr, int len, bool noError)
 
int pg_verify_mbstr_len (int encoding, const char *mbstr, int len, bool noError)
 
void check_encoding_conversion_args (int src_encoding, int dest_encoding, int len, int expected_src_encoding, int expected_dest_encoding)
 
void report_invalid_encoding (int encoding, const char *mbstr, int len)
 
void report_untranslatable_char (int src_encoding, int dest_encoding, const char *mbstr, int len)
 

Variables

static ListConvProcList = NIL
 
static FmgrInfoToServerConvProc = NULL
 
static FmgrInfoToClientConvProc = NULL
 
static FmgrInfoUtf8ToServerConvProc = NULL
 
static const pg_enc2nameClientEncoding = &pg_enc2name_tbl[PG_SQL_ASCII]
 
static const pg_enc2nameDatabaseEncoding = &pg_enc2name_tbl[PG_SQL_ASCII]
 
static const pg_enc2nameMessageEncoding = &pg_enc2name_tbl[PG_SQL_ASCII]
 
static bool backend_startup_complete = false
 
static int pending_client_encoding = PG_SQL_ASCII
 

Typedef Documentation

◆ ConvProcInfo

Function Documentation

◆ check_encoding_conversion_args()

void check_encoding_conversion_args ( int  src_encoding,
int  dest_encoding,
int  len,
int  expected_src_encoding,
int  expected_dest_encoding 
)

Definition at line 1795 of file mbutils.c.

1800{
1802 elog(ERROR, "invalid source encoding ID: %d", src_encoding);
1804 elog(ERROR, "expected source encoding \"%s\", but got \"%s\"",
1808 elog(ERROR, "invalid destination encoding ID: %d", dest_encoding);
1810 elog(ERROR, "expected destination encoding \"%s\", but got \"%s\"",
1813 if (len < 0)
1814 elog(ERROR, "encoding conversion length must not be negative");
1815}
#define ERROR
Definition elog.h:39
#define elog(elevel,...)
Definition elog.h:226
const pg_enc2name pg_enc2name_tbl[]
Definition encnames.c:308
const void size_t len
#define PG_VALID_ENCODING(_enc)
Definition pg_wchar.h:287
static int fb(int x)
const char * name

References elog, ERROR, fb(), len, name, pg_enc2name_tbl, and PG_VALID_ENCODING.

◆ cliplen()

static int cliplen ( const char str,
int  len,
int  limit 
)
static

Definition at line 1276 of file mbutils.c.

1277{
1278 int l = 0;
1279
1280 len = Min(len, limit);
1281 while (l < len && str[l])
1282 l++;
1283 return l;
1284}
#define Min(x, y)
Definition c.h:997
const char * str

References len, Min, and str.

Referenced by pg_encoding_mbcliplen(), pg_mbcharcliplen(), and pgstat_clip_activity().

◆ getdatabaseencoding()

Datum getdatabaseencoding ( PG_FUNCTION_ARGS  )

Definition at line 1399 of file mbutils.c.

1400{
1402}
#define DirectFunctionCall1(func, arg1)
Definition fmgr.h:684
static const pg_enc2name * DatabaseEncoding
Definition mbutils.c:83
Datum namein(PG_FUNCTION_ARGS)
Definition name.c:48
static Datum CStringGetDatum(const char *X)
Definition postgres.h:380
const char * name
Definition pg_wchar.h:341

References CStringGetDatum(), DatabaseEncoding, DirectFunctionCall1, pg_enc2name::name, and namein().

◆ GetDatabaseEncoding()

int GetDatabaseEncoding ( void  )

Definition at line 1387 of file mbutils.c.

1388{
1389 return DatabaseEncoding->encoding;
1390}
pg_enc encoding
Definition pg_wchar.h:342

References DatabaseEncoding, and pg_enc2name::encoding.

Referenced by ascii(), BeginCopyFrom(), BeginCopyTo(), char2wchar(), chr(), CollationCreate(), CollationGetCollid(), compareStrings(), convert_from_utf8(), convert_to_utf8(), CopyConversionError(), CopyConvertBuf(), create_pg_locale_builtin(), create_pg_locale_icu(), create_pg_locale_libc(), cstr2sv(), dblink_connect(), dblink_get_conn(), DefineCollation(), Generic_Text_IC_like(), GenericMatchText(), get_collation_oid(), get_json_object_as_hash(), InitializeClientEncoding(), IsThereCollationInNamespace(), json_recv(), jsonb_from_cstring(), locate_stem_module(), LogicalOutputWrite(), make_trigrams(), makeJsonLexContext(), p_isspecial(), ParallelWorkerMain(), pg_database_encoding_character_incrementer(), pg_database_encoding_max_length(), pg_generic_charinc(), pg_perm_setlocale(), pg_unicode_to_server(), pg_unicode_to_server_noerror(), pg_verifymbstr(), pgss_store(), PLyUnicode_Bytes(), populate_array_json(), PrepareClientEncoding(), read_extension_script_file(), regc_ctype_get_cache(), report_invalid_encoding_db(), SetClientEncoding(), str_casefold(), sv2cstr(), text_position_setup(), to_ascii_default(), to_char32(), to_pg_wchar(), tolower_libc_mb(), tolower_libc_sb(), toupper_libc_mb(), toupper_libc_sb(), type_maximum_size(), unicode_assigned(), unicode_norm_form_from_string(), wchar2char(), xml_in(), xml_is_document(), xmlparse(), and xmltotext_with_options().

◆ GetDatabaseEncodingName()

◆ GetMessageEncoding()

int GetMessageEncoding ( void  )

Definition at line 1434 of file mbutils.c.

1435{
1436 return MessageEncoding->encoding;
1437}
static const pg_enc2name * MessageEncoding
Definition mbutils.c:84

References pg_enc2name::encoding, and MessageEncoding.

◆ InitializeClientEncoding()

void InitializeClientEncoding ( void  )

Definition at line 290 of file mbutils.c.

291{
293
296
299 {
300 /*
301 * Oops, the requested conversion is not available. We couldn't fail
302 * before, but we can now.
303 */
306 errmsg("conversion between %s and %s is not supported",
309 }
310
311 /*
312 * Also look up the UTF8-to-server conversion function if needed. Since
313 * the server encoding is fixed within any one backend process, we don't
314 * have to do this more than once.
315 */
319 {
321
326 /* If there's no such conversion, just leave the pointer as NULL */
328 {
329 FmgrInfo *finfo;
330
332 sizeof(FmgrInfo));
335 /* Set Utf8ToServerConvProc only after data is fully valid */
336 Utf8ToServerConvProc = finfo;
337 }
338 }
339}
#define Assert(condition)
Definition c.h:873
#define OidIsValid(objectId)
Definition c.h:788
int errcode(int sqlerrcode)
Definition elog.c:864
int errmsg(const char *fmt,...)
Definition elog.c:1081
#define FATAL
Definition elog.h:41
#define ereport(elevel,...)
Definition elog.h:150
void fmgr_info_cxt(Oid functionId, FmgrInfo *finfo, MemoryContext mcxt)
Definition fmgr.c:138
#define PG_UTF8
Definition mbprint.c:43
int GetDatabaseEncoding(void)
Definition mbutils.c:1387
static FmgrInfo * Utf8ToServerConvProc
Definition mbutils.c:77
const char * GetDatabaseEncodingName(void)
Definition mbutils.c:1393
int SetClientEncoding(int encoding)
Definition mbutils.c:217
int PrepareClientEncoding(int encoding)
Definition mbutils.c:119
static bool backend_startup_complete
Definition mbutils.c:92
static int pending_client_encoding
Definition mbutils.c:93
void * MemoryContextAlloc(MemoryContext context, Size size)
Definition mcxt.c:1232
MemoryContext TopMemoryContext
Definition mcxt.c:166
Oid FindDefaultConversionProc(int32 for_encoding, int32 to_encoding)
Definition namespace.c:4150
@ PG_SQL_ASCII
Definition pg_wchar.h:226
unsigned int Oid
static void AssertCouldGetRelation(void)
Definition relcache.h:44

References Assert, AssertCouldGetRelation(), backend_startup_complete, ereport, errcode(), errmsg(), FATAL, fb(), FindDefaultConversionProc(), fmgr_info_cxt(), GetDatabaseEncoding(), GetDatabaseEncodingName(), MemoryContextAlloc(), name, OidIsValid, pending_client_encoding, pg_enc2name_tbl, PG_SQL_ASCII, PG_UTF8, PrepareClientEncoding(), SetClientEncoding(), TopMemoryContext, and Utf8ToServerConvProc.

Referenced by InitPostgres().

◆ length_in_encoding()

Datum length_in_encoding ( PG_FUNCTION_ARGS  )

Definition at line 626 of file mbutils.c.

627{
628 bytea *string = PG_GETARG_BYTEA_PP(0);
631 const char *src_str;
632 int len;
633 int retval;
634
635 if (src_encoding < 0)
638 errmsg("invalid encoding name \"%s\"",
640
641 len = VARSIZE_ANY_EXHDR(string);
642 src_str = VARDATA_ANY(string);
643
644 retval = pg_verify_mbstr_len(src_encoding, src_str, len, false);
645
646 PG_RETURN_INT32(retval);
647}
#define NameStr(name)
Definition c.h:765
#define PG_GETARG_BYTEA_PP(n)
Definition fmgr.h:309
#define PG_GETARG_NAME(n)
Definition fmgr.h:279
#define PG_RETURN_INT32(x)
Definition fmgr.h:355
int pg_verify_mbstr_len(int encoding, const char *mbstr, int len, bool noError)
Definition mbutils.c:1723
#define pg_char_to_encoding
Definition pg_wchar.h:629
Definition c.h:706
static Size VARSIZE_ANY_EXHDR(const void *PTR)
Definition varatt.h:472
static char * VARDATA_ANY(const void *PTR)
Definition varatt.h:486

References ereport, errcode(), errmsg(), ERROR, fb(), len, NameStr, pg_char_to_encoding, PG_GETARG_BYTEA_PP, PG_GETARG_NAME, PG_RETURN_INT32, pg_verify_mbstr_len(), VARDATA_ANY(), and VARSIZE_ANY_EXHDR().

◆ perform_default_encoding_conversion()

static char * perform_default_encoding_conversion ( const char src,
int  len,
bool  is_client_to_server 
)
static

Definition at line 794 of file mbutils.c.

796{
797 char *result;
798 int src_encoding,
800 FmgrInfo *flinfo;
801
803 {
806 flinfo = ToServerConvProc;
807 }
808 else
809 {
812 flinfo = ToClientConvProc;
813 }
814
815 if (flinfo == NULL)
816 return unconstify(char *, src);
817
818 /*
819 * Allocate space for conversion result, being wary of integer overflow.
820 * See comments in pg_do_encoding_conversion.
821 */
825 errmsg("out of memory"),
826 errdetail("String of %d bytes is too long for encoding conversion.",
827 len)));
828
829 result = (char *)
832
833 FunctionCall6(flinfo,
836 CStringGetDatum(src),
837 CStringGetDatum(result),
839 BoolGetDatum(false));
840
841 /*
842 * Release extra space if there might be a lot --- see comments in
843 * pg_do_encoding_conversion.
844 */
845 if (len > 1000000)
846 {
847 Size resultlen = strlen(result);
848
849 if (resultlen >= MaxAllocSize)
852 errmsg("out of memory"),
853 errdetail("String of %d bytes is too long for encoding conversion.",
854 len)));
855
856 result = (char *) repalloc(result, resultlen + 1);
857 }
858
859 return result;
860}
#define unconstify(underlying_type, expr)
Definition c.h:1240
size_t Size
Definition c.h:619
int errdetail(const char *fmt,...)
Definition elog.c:1217
#define MaxAllocSize
Definition fe_memutils.h:22
#define FunctionCall6(flinfo, arg1, arg2, arg3, arg4, arg5, arg6)
Definition fmgr.h:712
static const pg_enc2name * ClientEncoding
Definition mbutils.c:82
static FmgrInfo * ToServerConvProc
Definition mbutils.c:69
static FmgrInfo * ToClientConvProc
Definition mbutils.c:70
void * repalloc(void *pointer, Size size)
Definition mcxt.c:1632
MemoryContext CurrentMemoryContext
Definition mcxt.c:160
void * MemoryContextAllocHuge(MemoryContext context, Size size)
Definition mcxt.c:1725
#define MaxAllocHugeSize
Definition memutils.h:45
#define MAX_CONVERSION_GROWTH
Definition pg_wchar.h:302
static Datum BoolGetDatum(bool X)
Definition postgres.h:112
static Datum Int32GetDatum(int32 X)
Definition postgres.h:222

References BoolGetDatum(), ClientEncoding, CStringGetDatum(), CurrentMemoryContext, DatabaseEncoding, pg_enc2name::encoding, ereport, errcode(), errdetail(), errmsg(), ERROR, fb(), FunctionCall6, Int32GetDatum(), len, MAX_CONVERSION_GROWTH, MaxAllocHugeSize, MaxAllocSize, MemoryContextAllocHuge(), repalloc(), ToClientConvProc, ToServerConvProc, and unconstify.

Referenced by pg_any_to_server(), and pg_server_to_any().

◆ pg_any_to_server()

char * pg_any_to_server ( const char s,
int  len,
int  encoding 
)

Definition at line 687 of file mbutils.c.

688{
689 if (len <= 0)
690 return unconstify(char *, s); /* empty string is always valid */
691
694 {
695 /*
696 * No conversion is needed, but we must still validate the data.
697 */
699 return unconstify(char *, s);
700 }
701
703 {
704 /*
705 * No conversion is possible, but we must still validate the data,
706 * because the client-side code might have done string escaping using
707 * the selected client_encoding. If the client encoding is ASCII-safe
708 * then we just do a straight validation under that encoding. For an
709 * ASCII-unsafe encoding we have a problem: we dare not pass such data
710 * to the parser but we have no way to convert it. We compromise by
711 * rejecting the data if it contains any non-ASCII characters.
712 */
714 (void) pg_verify_mbstr(encoding, s, len, false);
715 else
716 {
717 int i;
718
719 for (i = 0; i < len; i++)
720 {
721 if (s[i] == '\0' || IS_HIGHBIT_SET(s[i]))
724 errmsg("invalid byte value for encoding \"%s\": 0x%02x",
726 (unsigned char) s[i])));
727 }
728 }
729 return unconstify(char *, s);
730 }
731
732 /* Fast path if we can use cached conversion function */
735
736 /* General case ... will not work outside transactions */
737 return (char *) pg_do_encoding_conversion((unsigned char *) unconstify(char *, s),
738 len,
739 encoding,
741}
#define IS_HIGHBIT_SET(ch)
Definition c.h:1150
static char * encoding
Definition initdb.c:139
int i
Definition isn.c:77
unsigned char * pg_do_encoding_conversion(unsigned char *src, int len, int src_encoding, int dest_encoding)
Definition mbutils.c:365
bool pg_verify_mbstr(int encoding, const char *mbstr, int len, bool noError)
Definition mbutils.c:1692
static char * perform_default_encoding_conversion(const char *src, int len, bool is_client_to_server)
Definition mbutils.c:794
#define PG_VALID_BE_ENCODING(_enc)
Definition pg_wchar.h:281

References ClientEncoding, DatabaseEncoding, encoding, pg_enc2name::encoding, ereport, errcode(), errmsg(), ERROR, fb(), i, IS_HIGHBIT_SET, len, name, perform_default_encoding_conversion(), pg_do_encoding_conversion(), pg_enc2name_tbl, PG_SQL_ASCII, PG_VALID_BE_ENCODING, pg_verify_mbstr(), and unconstify.

Referenced by ASN1_STRING_to_text(), cache_single_string(), db_encoding_convert(), dsnowball_lexize(), pg_client_to_server(), pg_stat_statements_internal(), pgp_armor_headers(), PLyUnicode_Bytes(), read_extension_script_file(), tsearch_readline(), utf_u2e(), utf_u2e(), X509_NAME_to_cstring(), and xml_recv().

◆ PG_char_to_encoding()

Datum PG_char_to_encoding ( PG_FUNCTION_ARGS  )

Definition at line 1411 of file mbutils.c.

1412{
1413 Name s = PG_GETARG_NAME(0);
1414
1416}
Definition c.h:760

References NameStr, pg_char_to_encoding, PG_GETARG_NAME, and PG_RETURN_INT32.

◆ pg_client_encoding()

Datum pg_client_encoding ( PG_FUNCTION_ARGS  )

◆ pg_client_to_server()

char * pg_client_to_server ( const char s,
int  len 
)

Definition at line 671 of file mbutils.c.

672{
674}
char * pg_any_to_server(const char *s, int len, int encoding)
Definition mbutils.c:687

References ClientEncoding, pg_enc2name::encoding, len, and pg_any_to_server().

Referenced by exec_bind_message(), parse_fcall_arguments(), pq_getmsgstring(), and pq_getmsgtext().

◆ pg_convert()

Datum pg_convert ( PG_FUNCTION_ARGS  )

Definition at line 564 of file mbutils.c.

565{
566 bytea *string = PG_GETARG_BYTEA_PP(0);
571 const char *src_str;
572 char *dest_str;
573 bytea *retval;
574 int len;
575
576 if (src_encoding < 0)
579 errmsg("invalid source encoding name \"%s\"",
581 if (dest_encoding < 0)
584 errmsg("invalid destination encoding name \"%s\"",
586
587 /* make sure that source string is valid */
588 len = VARSIZE_ANY_EXHDR(string);
589 src_str = VARDATA_ANY(string);
591
592 /* perform conversion */
593 dest_str = (char *) pg_do_encoding_conversion((unsigned char *) unconstify(char *, src_str),
594 len,
597
598
599 /* return source string if no conversion happened */
600 if (dest_str == src_str)
601 PG_RETURN_BYTEA_P(string);
602
603 /*
604 * build bytea data type structure.
605 */
607 retval = (bytea *) palloc(len + VARHDRSZ);
608 SET_VARSIZE(retval, len + VARHDRSZ);
609 memcpy(VARDATA(retval), dest_str, len);
611
612 /* free memory if allocated by the toaster */
613 PG_FREE_IF_COPY(string, 0);
614
615 PG_RETURN_BYTEA_P(retval);
616}
#define VARHDRSZ
Definition c.h:711
#define PG_FREE_IF_COPY(ptr, n)
Definition fmgr.h:260
#define PG_RETURN_BYTEA_P(x)
Definition fmgr.h:373
void pfree(void *pointer)
Definition mcxt.c:1616
void * palloc(Size size)
Definition mcxt.c:1387
static char * VARDATA(const void *PTR)
Definition varatt.h:305
static void SET_VARSIZE(void *PTR, Size len)
Definition varatt.h:432

References ereport, errcode(), errmsg(), ERROR, fb(), len, NameStr, palloc(), pfree(), pg_char_to_encoding, pg_do_encoding_conversion(), PG_FREE_IF_COPY, PG_GETARG_BYTEA_PP, PG_GETARG_NAME, PG_RETURN_BYTEA_P, pg_verify_mbstr(), SET_VARSIZE(), unconstify, VARDATA(), VARDATA_ANY(), VARHDRSZ, and VARSIZE_ANY_EXHDR().

Referenced by pg_convert_from(), and pg_convert_to().

◆ pg_convert_from()

Datum pg_convert_from ( PG_FUNCTION_ARGS  )

Definition at line 537 of file mbutils.c.

538{
539 Datum string = PG_GETARG_DATUM(0);
543 Datum result;
544
545 result = DirectFunctionCall3(pg_convert, string,
547
548 /*
549 * pg_convert returns a bytea, which we in turn return as text, relying on
550 * the fact that they are both in fact varlena types, and thus
551 * structurally identical. Although not all bytea values are valid text,
552 * in this case it will be because we've told pg_convert to return one
553 * that is valid as text in the current database encoding.
554 */
555 PG_RETURN_DATUM(result);
556}
#define PG_GETARG_DATUM(n)
Definition fmgr.h:268
#define PG_RETURN_DATUM(x)
Definition fmgr.h:354
#define DirectFunctionCall3(func, arg1, arg2, arg3)
Definition fmgr.h:688
Datum pg_convert(PG_FUNCTION_ARGS)
Definition mbutils.c:564
uint64_t Datum
Definition postgres.h:70

References CStringGetDatum(), DatabaseEncoding, DirectFunctionCall1, DirectFunctionCall3, fb(), pg_enc2name::name, namein(), pg_convert(), PG_GETARG_DATUM, and PG_RETURN_DATUM.

◆ pg_convert_to()

Datum pg_convert_to ( PG_FUNCTION_ARGS  )

Definition at line 511 of file mbutils.c.

512{
513 Datum string = PG_GETARG_DATUM(0);
517 Datum result;
518
519 /*
520 * pg_convert expects a bytea as its first argument. We're passing it a
521 * text argument here, relying on the fact that they are both in fact
522 * varlena types, and thus structurally identical.
523 */
524 result = DirectFunctionCall3(pg_convert, string,
526
527 PG_RETURN_DATUM(result);
528}

References CStringGetDatum(), DatabaseEncoding, DirectFunctionCall1, DirectFunctionCall3, fb(), pg_enc2name::name, namein(), pg_convert(), PG_GETARG_DATUM, and PG_RETURN_DATUM.

◆ pg_database_encoding_character_incrementer()

mbcharacter_incrementer pg_database_encoding_character_incrementer ( void  )

Definition at line 1649 of file mbutils.c.

1650{
1651 /*
1652 * Eventually it might be best to add a field to pg_wchar_table[], but for
1653 * now we just use a switch.
1654 */
1655 switch (GetDatabaseEncoding())
1656 {
1657 case PG_UTF8:
1658 return pg_utf8_increment;
1659
1660 case PG_EUC_JP:
1661 return pg_eucjp_increment;
1662
1663 default:
1664 return pg_generic_charinc;
1665 }
1666}
static bool pg_generic_charinc(unsigned char *charptr, int len)
Definition mbutils.c:1451
static bool pg_utf8_increment(unsigned char *charptr, int length)
Definition mbutils.c:1485
static bool pg_eucjp_increment(unsigned char *charptr, int length)
Definition mbutils.c:1563
@ PG_EUC_JP
Definition pg_wchar.h:227

References GetDatabaseEncoding(), PG_EUC_JP, pg_eucjp_increment(), pg_generic_charinc(), PG_UTF8, and pg_utf8_increment().

Referenced by make_greater_string().

◆ pg_database_encoding_max_length()

◆ pg_do_encoding_conversion()

unsigned char * pg_do_encoding_conversion ( unsigned char src,
int  len,
int  src_encoding,
int  dest_encoding 
)

Definition at line 365 of file mbutils.c.

367{
368 unsigned char *result;
369 Oid proc;
370
371 if (len <= 0)
372 return src; /* empty string is always valid */
373
375 return src; /* no conversion required, assume valid */
376
378 return src; /* any string is valid in SQL_ASCII */
379
381 {
382 /* No conversion is possible, but we must validate the result */
383 (void) pg_verify_mbstr(dest_encoding, (const char *) src, len, false);
384 return src;
385 }
386
387 if (!IsTransactionState()) /* shouldn't happen */
388 elog(ERROR, "cannot perform encoding conversion outside a transaction");
389
391 if (!OidIsValid(proc))
394 errmsg("default conversion function for encoding \"%s\" to \"%s\" does not exist",
397
398 /*
399 * Allocate space for conversion result, being wary of integer overflow.
400 *
401 * len * MAX_CONVERSION_GROWTH is typically a vast overestimate of the
402 * required space, so it might exceed MaxAllocSize even though the result
403 * would actually fit. We do not want to hand back a result string that
404 * exceeds MaxAllocSize, because callers might not cope gracefully --- but
405 * if we just allocate more than that, and don't use it, that's fine.
406 */
410 errmsg("out of memory"),
411 errdetail("String of %d bytes is too long for encoding conversion.",
412 len)));
413
414 result = (unsigned char *)
417
418 (void) OidFunctionCall6(proc,
421 CStringGetDatum((char *) src),
422 CStringGetDatum((char *) result),
424 BoolGetDatum(false));
425
426 /*
427 * If the result is large, it's worth repalloc'ing to release any extra
428 * space we asked for. The cutoff here is somewhat arbitrary, but we
429 * *must* check when len * MAX_CONVERSION_GROWTH exceeds MaxAllocSize.
430 */
431 if (len > 1000000)
432 {
433 Size resultlen = strlen((char *) result);
434
435 if (resultlen >= MaxAllocSize)
438 errmsg("out of memory"),
439 errdetail("String of %d bytes is too long for encoding conversion.",
440 len)));
441
442 result = (unsigned char *) repalloc(result, resultlen + 1);
443 }
444
445 return result;
446}
#define OidFunctionCall6(functionId, arg1, arg2, arg3, arg4, arg5, arg6)
Definition fmgr.h:732
#define pg_encoding_to_char
Definition pg_wchar.h:630
bool IsTransactionState(void)
Definition xact.c:388

References BoolGetDatum(), CStringGetDatum(), CurrentMemoryContext, elog, ereport, errcode(), errdetail(), errmsg(), ERROR, fb(), FindDefaultConversionProc(), Int32GetDatum(), IsTransactionState(), len, MAX_CONVERSION_GROWTH, MaxAllocHugeSize, MaxAllocSize, MemoryContextAllocHuge(), OidFunctionCall6, OidIsValid, pg_encoding_to_char, PG_SQL_ASCII, pg_verify_mbstr(), and repalloc().

Referenced by convert_charset(), pg_any_to_server(), pg_convert(), and pg_server_to_any().

◆ pg_do_encoding_conversion_buf()

int pg_do_encoding_conversion_buf ( Oid  proc,
int  src_encoding,
int  dest_encoding,
unsigned char src,
int  srclen,
unsigned char dest,
int  destlen,
bool  noError 
)

Definition at line 478 of file mbutils.c.

484{
485 Datum result;
486
487 /*
488 * If the destination buffer is not large enough to hold the result in the
489 * worst case, limit the input size passed to the conversion function.
490 */
491 if ((Size) srclen >= ((destlen - 1) / (Size) MAX_CONVERSION_GROWTH))
493
494 result = OidFunctionCall6(proc,
497 CStringGetDatum((char *) src),
498 CStringGetDatum((char *) dest),
501 return DatumGetInt32(result);
502}
static int32 DatumGetInt32(Datum X)
Definition postgres.h:212

References BoolGetDatum(), CStringGetDatum(), DatumGetInt32(), fb(), Int32GetDatum(), MAX_CONVERSION_GROWTH, and OidFunctionCall6.

Referenced by CopyConversionError(), CopyConvertBuf(), and test_enc_conversion().

◆ pg_dsplen()

int pg_dsplen ( const char mbstr)

Definition at line 1156 of file mbutils.c.

1157{
1158 return pg_wchar_table[DatabaseEncoding->encoding].dsplen((const unsigned char *) mbstr);
1159}
mbdisplaylen_converter dsplen
Definition pg_wchar.h:383

References DatabaseEncoding, pg_wchar_tbl::dsplen, pg_enc2name::encoding, fb(), and pg_wchar_table.

Referenced by p_isspecial().

◆ pg_encoding_max_length_sql()

Datum pg_encoding_max_length_sql ( PG_FUNCTION_ARGS  )

Definition at line 655 of file mbutils.c.

656{
657 int encoding = PG_GETARG_INT32(0);
658
661 else
663}
#define PG_RETURN_NULL()
Definition fmgr.h:346
#define PG_GETARG_INT32(n)
Definition fmgr.h:269

References encoding, PG_GETARG_INT32, PG_RETURN_INT32, PG_RETURN_NULL, PG_VALID_ENCODING, and pg_wchar_table.

◆ pg_encoding_mb2wchar_with_len()

int pg_encoding_mb2wchar_with_len ( int  encoding,
const char from,
pg_wchar to,
int  len 
)

Definition at line 1004 of file mbutils.c.

1006{
1007 return pg_wchar_table[encoding].mb2wchar_with_len((const unsigned char *) from, to, len);
1008}
mb2wchar_with_len_converter mb2wchar_with_len
Definition pg_wchar.h:378

References encoding, len, pg_wchar_tbl::mb2wchar_with_len, and pg_wchar_table.

Referenced by test_text_to_wchars().

◆ pg_encoding_mbcliplen()

int pg_encoding_mbcliplen ( int  encoding,
const char mbstr,
int  len,
int  limit 
)

Definition at line 1219 of file mbutils.c.

1221{
1223 int clen = 0;
1224 int l;
1225
1226 /* optimization for single byte encoding */
1228 return cliplen(mbstr, len, limit);
1229
1231
1232 while (len > 0 && *mbstr)
1233 {
1234 l = (*mblen_fn) ((const unsigned char *) mbstr);
1235 if ((clen + l) > limit)
1236 break;
1237 clen += l;
1238 if (clen == limit)
1239 break;
1240 len -= l;
1241 mbstr += l;
1242 }
1243 return clen;
1244}
static int cliplen(const char *str, int len, int limit)
Definition mbutils.c:1276
int(* mblen_converter)(const unsigned char *mbstr)
Definition pg_wchar.h:366
mblen_converter mblen
Definition pg_wchar.h:382
int pg_encoding_max_length(int encoding)
Definition wchar.c:2235

References cliplen(), encoding, fb(), len, pg_wchar_tbl::mblen, pg_encoding_max_length(), and pg_wchar_table.

Referenced by pg_mbcliplen().

◆ PG_encoding_to_char()

Datum PG_encoding_to_char ( PG_FUNCTION_ARGS  )

Definition at line 1419 of file mbutils.c.

1420{
1423
1425}
int32_t int32
Definition c.h:542

References CStringGetDatum(), DirectFunctionCall1, encoding, fb(), namein(), pg_encoding_to_char, and PG_GETARG_INT32.

◆ pg_encoding_wchar2mb_with_len()

int pg_encoding_wchar2mb_with_len ( int  encoding,
const pg_wchar from,
char to,
int  len 
)

Definition at line 1026 of file mbutils.c.

1028{
1029 return pg_wchar_table[encoding].wchar2mb_with_len(from, (unsigned char *) to, len);
1030}
wchar2mb_with_len_converter wchar2mb_with_len
Definition pg_wchar.h:380

References encoding, len, pg_wchar_table, and pg_wchar_tbl::wchar2mb_with_len.

Referenced by test_wchars_to_text().

◆ pg_eucjp_increment()

static bool pg_eucjp_increment ( unsigned char charptr,
int  length 
)
static

Definition at line 1563 of file mbutils.c.

1564{
1565 unsigned char c1,
1566 c2;
1567 int i;
1568
1569 c1 = *charptr;
1570
1571 switch (c1)
1572 {
1573 case SS2: /* JIS X 0201 */
1574 if (length != 2)
1575 return false;
1576
1577 c2 = charptr[1];
1578
1579 if (c2 >= 0xdf)
1580 charptr[0] = charptr[1] = 0xa1;
1581 else if (c2 < 0xa1)
1582 charptr[1] = 0xa1;
1583 else
1584 charptr[1]++;
1585 break;
1586
1587 case SS3: /* JIS X 0212 */
1588 if (length != 3)
1589 return false;
1590
1591 for (i = 2; i > 0; i--)
1592 {
1593 c2 = charptr[i];
1594 if (c2 < 0xa1)
1595 {
1596 charptr[i] = 0xa1;
1597 return true;
1598 }
1599 else if (c2 < 0xfe)
1600 {
1601 charptr[i]++;
1602 return true;
1603 }
1604 }
1605
1606 /* Out of 3-byte code region */
1607 return false;
1608
1609 default:
1610 if (IS_HIGHBIT_SET(c1)) /* JIS X 0208? */
1611 {
1612 if (length != 2)
1613 return false;
1614
1615 for (i = 1; i >= 0; i--)
1616 {
1617 c2 = charptr[i];
1618 if (c2 < 0xa1)
1619 {
1620 charptr[i] = 0xa1;
1621 return true;
1622 }
1623 else if (c2 < 0xfe)
1624 {
1625 charptr[i]++;
1626 return true;
1627 }
1628 }
1629
1630 /* Out of 2 byte code region */
1631 return false;
1632 }
1633 else
1634 { /* ASCII, single byte */
1635 if (c1 > 0x7e)
1636 return false;
1637 (*charptr)++;
1638 }
1639 break;
1640 }
1641
1642 return true;
1643}
#define SS2
Definition pg_wchar.h:38
#define SS3
Definition pg_wchar.h:39

References fb(), i, IS_HIGHBIT_SET, SS2, and SS3.

Referenced by pg_database_encoding_character_incrementer().

◆ pg_generic_charinc()

static bool pg_generic_charinc ( unsigned char charptr,
int  len 
)
static

Definition at line 1451 of file mbutils.c.

1452{
1453 unsigned char *lastbyte = charptr + len - 1;
1455
1456 /* We can just invoke the character verifier directly. */
1458
1459 while (*lastbyte < (unsigned char) 255)
1460 {
1461 (*lastbyte)++;
1462 if ((*mbverify) (charptr, len) == len)
1463 return true;
1464 }
1465
1466 return false;
1467}
int(* mbchar_verifier)(const unsigned char *mbstr, int len)
Definition pg_wchar.h:372
mbchar_verifier mbverifychar
Definition pg_wchar.h:384

References fb(), GetDatabaseEncoding(), len, pg_wchar_tbl::mbverifychar, and pg_wchar_table.

Referenced by pg_database_encoding_character_incrementer().

◆ pg_get_client_encoding()

int pg_get_client_encoding ( void  )

Definition at line 345 of file mbutils.c.

346{
347 return ClientEncoding->encoding;
348}

References ClientEncoding, and pg_enc2name::encoding.

Referenced by BeginCopyFrom(), BeginCopyTo(), and xml_send().

◆ pg_get_client_encoding_name()

const char * pg_get_client_encoding_name ( void  )

Definition at line 354 of file mbutils.c.

355{
356 return ClientEncoding->name;
357}

References ClientEncoding, and pg_enc2name::name.

◆ pg_mb2wchar()

int pg_mb2wchar ( const char from,
pg_wchar to 
)

Definition at line 990 of file mbutils.c.

991{
992 return pg_wchar_table[DatabaseEncoding->encoding].mb2wchar_with_len((const unsigned char *) from, to, strlen(from));
993}

References DatabaseEncoding, pg_enc2name::encoding, fb(), pg_wchar_tbl::mb2wchar_with_len, and pg_wchar_table.

◆ pg_mb2wchar_with_len()

◆ pg_mbcharcliplen()

int pg_mbcharcliplen ( const char mbstr,
int  len,
int  limit 
)

Definition at line 1251 of file mbutils.c.

1252{
1253 int clen = 0;
1254 int nch = 0;
1255 int l;
1256
1257 /* optimization for single byte encoding */
1259 return cliplen(mbstr, len, limit);
1260
1261 while (len > 0 && *mbstr)
1262 {
1264 nch++;
1265 if (nch > limit)
1266 break;
1267 clen += l;
1268 len -= l;
1269 mbstr += l;
1270 }
1271 return clen;
1272}
int pg_mblen_with_len(const char *mbstr, int limit)
Definition mbutils.c:1107
int pg_database_encoding_max_length(void)
Definition mbutils.c:1672

References cliplen(), fb(), len, pg_database_encoding_max_length(), and pg_mblen_with_len().

Referenced by bpchar(), bpchar_input(), text_left(), text_right(), varchar(), and varchar_input().

◆ pg_mbcliplen()

◆ pg_mblen()

int pg_mblen ( const char mbstr)

Definition at line 1149 of file mbutils.c.

1150{
1151 return pg_mblen_unbounded(mbstr);
1152}
int pg_mblen_unbounded(const char *mbstr)
Definition mbutils.c:1135

References fb(), and pg_mblen_unbounded().

◆ pg_mblen_cstr()

int pg_mblen_cstr ( const char mbstr)

Definition at line 1045 of file mbutils.c.

1046{
1047 int length = pg_wchar_table[DatabaseEncoding->encoding].mblen((const unsigned char *) mbstr);
1048
1049 /*
1050 * The .mblen functions return 1 when given a pointer to a terminator.
1051 * Some callers depend on that, so we tolerate it for now. Well-behaved
1052 * callers check the leading byte for a terminator *before* calling.
1053 */
1054 for (int i = 1; i < length; ++i)
1055 if (unlikely(mbstr[i] == 0))
1057
1058 /*
1059 * String should be NUL-terminated, but checking that would make typical
1060 * callers O(N^2), tripling Valgrind check-world time. Unless
1061 * VALGRIND_EXPENSIVE, check 1 byte after each actual character. (If we
1062 * found a character, not a terminator, the next byte must be a terminator
1063 * or the start of the next character.) If the caller iterates the whole
1064 * string, the last call will diagnose a missing terminator.
1065 */
1066 if (mbstr[0] != '\0')
1067 {
1068#ifdef VALGRIND_EXPENSIVE
1070#else
1072#endif
1073 }
1074
1075 return length;
1076}
#define unlikely(x)
Definition c.h:412
static pg_noreturn void report_invalid_encoding_db(const char *mbstr, int mblen, int len)
Definition mbutils.c:1857
#define VALGRIND_CHECK_MEM_IS_DEFINED(addr, size)
Definition memdebug.h:23

References DatabaseEncoding, pg_enc2name::encoding, fb(), i, pg_wchar_tbl::mblen, pg_wchar_table, report_invalid_encoding_db(), unlikely, and VALGRIND_CHECK_MEM_IS_DEFINED.

Referenced by addCompoundAffixFlagValue(), bit_in(), DCH_from_char(), find_word(), findchar(), findchar2(), findwrd(), get_modifiers(), get_nextfield(), getNextFlagFromString(), gettoken_query(), gettoken_query_standard(), gettoken_query_websearch(), gettoken_tsvector(), initTrie(), map_sql_identifier_to_xml_name(), map_xml_name_to_sql_identifier(), match_prosrc_to_literal(), mb_strchr(), NIImportAffixes(), NIImportDictionary(), NIImportOOAffixes(), parse_affentry(), parse_format(), parse_lquery(), parse_ltree(), parse_or_operator(), pg_mbstrlen(), prssyntaxerror(), px_crypt_shacrypt(), readstoplist(), RS_compile(), RS_execute(), RS_isRegis(), test_mblen_func(), text_to_bits(), thesaurusRead(), ts_copychar_cstr(), and varbit_in().

◆ pg_mblen_range()

◆ pg_mblen_unbounded()

int pg_mblen_unbounded ( const char mbstr)

Definition at line 1135 of file mbutils.c.

1136{
1137 int length = pg_wchar_table[DatabaseEncoding->encoding].mblen((const unsigned char *) mbstr);
1138
1140
1141 return length;
1142}

References DatabaseEncoding, pg_enc2name::encoding, fb(), pg_wchar_tbl::mblen, pg_wchar_table, and VALGRIND_CHECK_MEM_IS_DEFINED.

Referenced by charlen_to_bytelen(), lpad(), make_trigrams(), pg_mblen(), rpad(), test_mblen_func(), and text_substring().

◆ pg_mblen_with_len()

int pg_mblen_with_len ( const char mbstr,
int  limit 
)

Definition at line 1107 of file mbutils.c.

1108{
1109 int length = pg_wchar_table[DatabaseEncoding->encoding].mblen((const unsigned char *) mbstr);
1110
1111 Assert(limit >= 1);
1112#ifdef VALGRIND_EXPENSIVE
1114#else
1116#endif
1117
1118 if (unlikely(length > limit))
1119 report_invalid_encoding_db(mbstr, length, limit);
1120
1121 return length;
1122}

References Assert, DatabaseEncoding, pg_enc2name::encoding, fb(), pg_wchar_tbl::mblen, pg_wchar_table, report_invalid_encoding_db(), unlikely, and VALGRIND_CHECK_MEM_IS_DEFINED.

Referenced by pg_mbcharcliplen(), pg_mbstrlen_with_len(), test_mblen_func(), and wchareq().

◆ pg_mbstrlen()

int pg_mbstrlen ( const char mbstr)

Definition at line 1163 of file mbutils.c.

1164{
1165 int len = 0;
1166
1167 /* optimization for single byte encoding */
1169 return strlen(mbstr);
1170
1171 while (*mbstr)
1172 {
1174 len++;
1175 }
1176 return len;
1177}
int pg_mblen_cstr(const char *mbstr)
Definition mbutils.c:1045

References fb(), len, pg_database_encoding_max_length(), and pg_mblen_cstr().

Referenced by NUM_processor(), and text_format_append_string().

◆ pg_mbstrlen_with_len()

int pg_mbstrlen_with_len ( const char mbstr,
int  limit 
)

Definition at line 1183 of file mbutils.c.

1184{
1185 int len = 0;
1186
1187 /* optimization for single byte encoding */
1189 return limit;
1190
1191 while (limit > 0 && *mbstr)
1192 {
1193 int l = pg_mblen_with_len(mbstr, limit);
1194
1195 limit -= l;
1196 mbstr += l;
1197 len++;
1198 }
1199 return len;
1200}

References fb(), len, pg_database_encoding_max_length(), and pg_mblen_with_len().

Referenced by bpchar(), bpchar_input(), bpcharlen(), executor_errposition(), lpad(), match_prosrc_to_query(), parser_errposition(), plpgsql_scanner_errposition(), rpad(), scanner_errposition(), similar_escape_internal(), text_left(), text_length(), text_position_get_match_pos(), text_right(), text_substring(), unicode_assigned(), unicode_is_normalized(), unicode_normalize_func(), and varstr_levenshtein().

◆ pg_server_to_any()

char * pg_server_to_any ( const char s,
int  len,
int  encoding 
)

Definition at line 760 of file mbutils.c.

761{
762 if (len <= 0)
763 return unconstify(char *, s); /* empty string is always valid */
764
767 return unconstify(char *, s); /* assume data is valid */
768
770 {
771 /* No conversion is possible, but we must validate the result */
772 (void) pg_verify_mbstr(encoding, s, len, false);
773 return unconstify(char *, s);
774 }
775
776 /* Fast path if we can use cached conversion function */
779
780 /* General case ... will not work outside transactions */
781 return (char *) pg_do_encoding_conversion((unsigned char *) unconstify(char *, s),
782 len,
784 encoding);
785}

References ClientEncoding, DatabaseEncoding, encoding, pg_enc2name::encoding, fb(), len, perform_default_encoding_conversion(), pg_do_encoding_conversion(), PG_SQL_ASCII, pg_verify_mbstr(), and unconstify.

Referenced by compareStrings(), CopyAttributeOutCSV(), CopyAttributeOutText(), CopyToTextLikeStart(), daitch_mokotoff(), dsnowball_lexize(), hv_fetch_string(), hv_store_string(), pg_server_to_client(), PLyUnicode_FromStringAndSize(), utf_e2u(), and utf_e2u().

◆ pg_server_to_client()

char * pg_server_to_client ( const char s,
int  len 
)

Definition at line 749 of file mbutils.c.

750{
752}
char * pg_server_to_any(const char *s, int len, int encoding)
Definition mbutils.c:760

References ClientEncoding, pg_enc2name::encoding, len, and pg_server_to_any().

Referenced by pq_puttextmessage(), pq_sendcountedtext(), pq_sendstring(), pq_sendtext(), and pq_writestring().

◆ pg_unicode_to_server()

void pg_unicode_to_server ( char32_t  c,
unsigned char s 
)

Definition at line 875 of file mbutils.c.

876{
877 unsigned char c_as_utf8[MAX_MULTIBYTE_CHAR_LEN + 1];
878 int c_as_utf8_len;
879 int server_encoding;
880
881 /*
882 * Complain if invalid Unicode code point. The choice of errcode here is
883 * debatable, but really our caller should have checked this anyway.
884 */
888 errmsg("invalid Unicode code point")));
889
890 /* Otherwise, if it's in ASCII range, conversion is trivial */
891 if (c <= 0x7F)
892 {
893 s[0] = (unsigned char) c;
894 s[1] = '\0';
895 return;
896 }
897
898 /* If the server encoding is UTF-8, we just need to reformat the code */
901 {
902 unicode_to_utf8(c, s);
903 s[pg_utf_mblen(s)] = '\0';
904 return;
905 }
906
907 /* For all other cases, we must have a conversion function available */
911 errmsg("conversion between %s and %s is not supported",
914
915 /* Construct UTF-8 source string */
918 c_as_utf8[c_as_utf8_len] = '\0';
919
920 /* Convert, or throw error if we can't */
924 CStringGetDatum((char *) c_as_utf8),
925 CStringGetDatum((char *) s),
927 BoolGetDatum(false));
928}
#define MAX_MULTIBYTE_CHAR_LEN
Definition pg_wchar.h:33
#define pg_utf_mblen
Definition pg_wchar.h:633
static unsigned char * unicode_to_utf8(char32_t c, unsigned char *utf8string)
Definition pg_wchar.h:575
static bool is_valid_unicode_codepoint(char32_t c)
Definition pg_wchar.h:519
char * c

References BoolGetDatum(), CStringGetDatum(), ereport, errcode(), errmsg(), ERROR, fb(), FunctionCall6, GetDatabaseEncoding(), GetDatabaseEncodingName(), Int32GetDatum(), is_valid_unicode_codepoint(), MAX_MULTIBYTE_CHAR_LEN, name, pg_enc2name_tbl, PG_UTF8, pg_utf_mblen, unicode_to_utf8(), and Utf8ToServerConvProc.

Referenced by addunicode(), addUnicodeChar(), map_xml_name_to_sql_identifier(), str_udeescape(), and unistr().

◆ pg_unicode_to_server_noerror()

bool pg_unicode_to_server_noerror ( char32_t  c,
unsigned char s 
)

Definition at line 937 of file mbutils.c.

938{
939 unsigned char c_as_utf8[MAX_MULTIBYTE_CHAR_LEN + 1];
940 int c_as_utf8_len;
941 int converted_len;
942 int server_encoding;
943
944 /* Fail if invalid Unicode code point */
946 return false;
947
948 /* Otherwise, if it's in ASCII range, conversion is trivial */
949 if (c <= 0x7F)
950 {
951 s[0] = (unsigned char) c;
952 s[1] = '\0';
953 return true;
954 }
955
956 /* If the server encoding is UTF-8, we just need to reformat the code */
959 {
960 unicode_to_utf8(c, s);
961 s[pg_utf_mblen(s)] = '\0';
962 return true;
963 }
964
965 /* For all other cases, we must have a conversion function available */
967 return false;
968
969 /* Construct UTF-8 source string */
972 c_as_utf8[c_as_utf8_len] = '\0';
973
974 /* Convert, but without throwing error if we can't */
978 CStringGetDatum((char *) c_as_utf8),
979 CStringGetDatum((char *) s),
981 BoolGetDatum(true)));
982
983 /* Conversion was successful iff it consumed the whole input */
984 return (converted_len == c_as_utf8_len);
985}

References BoolGetDatum(), CStringGetDatum(), DatumGetInt32(), fb(), FunctionCall6, GetDatabaseEncoding(), Int32GetDatum(), is_valid_unicode_codepoint(), MAX_MULTIBYTE_CHAR_LEN, PG_UTF8, pg_utf_mblen, unicode_to_utf8(), and Utf8ToServerConvProc.

Referenced by addUnicodeChar(), and json_lex_string().

◆ pg_utf8_increment()

static bool pg_utf8_increment ( unsigned char charptr,
int  length 
)
static

Definition at line 1485 of file mbutils.c.

1486{
1487 unsigned char a;
1488 unsigned char limit;
1489
1490 switch (length)
1491 {
1492 default:
1493 /* reject lengths 5 and 6 for now */
1494 return false;
1495 case 4:
1496 a = charptr[3];
1497 if (a < 0xBF)
1498 {
1499 charptr[3]++;
1500 break;
1501 }
1502 /* FALL THRU */
1503 case 3:
1504 a = charptr[2];
1505 if (a < 0xBF)
1506 {
1507 charptr[2]++;
1508 break;
1509 }
1510 /* FALL THRU */
1511 case 2:
1512 a = charptr[1];
1513 switch (*charptr)
1514 {
1515 case 0xED:
1516 limit = 0x9F;
1517 break;
1518 case 0xF4:
1519 limit = 0x8F;
1520 break;
1521 default:
1522 limit = 0xBF;
1523 break;
1524 }
1525 if (a < limit)
1526 {
1527 charptr[1]++;
1528 break;
1529 }
1530 /* FALL THRU */
1531 case 1:
1532 a = *charptr;
1533 if (a == 0x7F || a == 0xDF || a == 0xEF || a == 0xF4)
1534 return false;
1535 charptr[0]++;
1536 break;
1537 }
1538
1539 return true;
1540}
int a
Definition isn.c:73

References a, and fb().

Referenced by pg_database_encoding_character_incrementer().

◆ pg_verify_mbstr()

bool pg_verify_mbstr ( int  encoding,
const char mbstr,
int  len,
bool  noError 
)

Definition at line 1692 of file mbutils.c.

1693{
1694 int oklen;
1695
1697
1698 oklen = pg_wchar_table[encoding].mbverifystr((const unsigned char *) mbstr, len);
1699 if (oklen != len)
1700 {
1701 if (noError)
1702 return false;
1704 }
1705 return true;
1706}
void report_invalid_encoding(int encoding, const char *mbstr, int len)
Definition mbutils.c:1824
mbstr_verifier mbverifystr
Definition pg_wchar.h:385

References Assert, encoding, fb(), len, pg_wchar_tbl::mbverifystr, PG_VALID_ENCODING, pg_wchar_table, and report_invalid_encoding().

Referenced by AddFileToBackupManifest(), LogicalOutputWrite(), pg_any_to_server(), pg_convert(), pg_do_encoding_conversion(), pg_server_to_any(), pg_verifymbstr(), and read_extension_script_file().

◆ pg_verify_mbstr_len()

int pg_verify_mbstr_len ( int  encoding,
const char mbstr,
int  len,
bool  noError 
)

Definition at line 1723 of file mbutils.c.

1724{
1725 mbchar_verifier mbverifychar;
1726 int mb_len;
1727
1729
1730 /*
1731 * In single-byte encodings, we need only reject nulls (\0).
1732 */
1734 {
1735 const char *nullpos = memchr(mbstr, 0, len);
1736
1737 if (nullpos == NULL)
1738 return len;
1739 if (noError)
1740 return -1;
1742 }
1743
1744 /* fetch function pointer just once */
1745 mbverifychar = pg_wchar_table[encoding].mbverifychar;
1746
1747 mb_len = 0;
1748
1749 while (len > 0)
1750 {
1751 int l;
1752
1753 /* fast path for ASCII-subset characters */
1754 if (!IS_HIGHBIT_SET(*mbstr))
1755 {
1756 if (*mbstr != '\0')
1757 {
1758 mb_len++;
1759 mbstr++;
1760 len--;
1761 continue;
1762 }
1763 if (noError)
1764 return -1;
1766 }
1767
1768 l = (*mbverifychar) ((const unsigned char *) mbstr, len);
1769
1770 if (l < 0)
1771 {
1772 if (noError)
1773 return -1;
1775 }
1776
1777 mbstr += l;
1778 len -= l;
1779 mb_len++;
1780 }
1781 return mb_len;
1782}

References Assert, encoding, fb(), IS_HIGHBIT_SET, len, pg_wchar_tbl::mbverifychar, pg_encoding_max_length(), PG_VALID_ENCODING, pg_wchar_table, and report_invalid_encoding().

Referenced by length_in_encoding().

◆ pg_verifymbstr()

◆ pg_wchar2mb()

int pg_wchar2mb ( const pg_wchar from,
char to 
)

Definition at line 1012 of file mbutils.c.

1013{
1014 return pg_wchar_table[DatabaseEncoding->encoding].wchar2mb_with_len(from, (unsigned char *) to, pg_wchar_strlen(from));
1015}
size_t pg_wchar_strlen(const pg_wchar *str)
Definition wstrncmp.c:70

References DatabaseEncoding, pg_enc2name::encoding, pg_wchar_strlen(), pg_wchar_table, and pg_wchar_tbl::wchar2mb_with_len.

◆ pg_wchar2mb_with_len()

◆ PrepareClientEncoding()

int PrepareClientEncoding ( int  encoding)

Definition at line 119 of file mbutils.c.

120{
122 ListCell *lc;
123
125 return -1;
126
127 /* Can't do anything during startup, per notes above */
129 return 0;
130
132
133 /*
134 * Check for cases that require no conversion function.
135 */
139 return 0;
140
141 if (IsTransactionState())
142 {
143 /*
144 * If we're in a live transaction, it's safe to access the catalogs,
145 * so look up the functions. We repeat the lookup even if the info is
146 * already cached, so that we can react to changes in the contents of
147 * pg_conversion.
148 */
152 MemoryContext oldcontext;
153
157 return -1;
159 encoding);
161 return -1;
162
163 /*
164 * Load the fmgr info into TopMemoryContext (could still fail here)
165 */
167 sizeof(ConvProcInfo));
168 convinfo->s_encoding = current_server_encoding;
169 convinfo->c_encoding = encoding;
170 fmgr_info_cxt(to_server_proc, &convinfo->to_server_info,
172 fmgr_info_cxt(to_client_proc, &convinfo->to_client_info,
174
175 /* Attach new info to head of list */
178 MemoryContextSwitchTo(oldcontext);
179
180 /*
181 * We cannot yet remove any older entry for the same encoding pair,
182 * since it could still be in use. SetClientEncoding will clean up.
183 */
184
185 return 0; /* success */
186 }
187 else
188 {
189 /*
190 * If we're not in a live transaction, the only thing we can do is
191 * restore a previous setting using the cache. This covers all
192 * transaction-rollback cases. The only case it might not work for is
193 * trying to change client_encoding on the fly by editing
194 * postgresql.conf and SIGHUP'ing. Which would probably be a stupid
195 * thing to do anyway.
196 */
197 foreach(lc, ConvProcList)
198 {
200
201 if (oldinfo->s_encoding == current_server_encoding &&
202 oldinfo->c_encoding == encoding)
203 return 0;
204 }
205
206 return -1; /* it's not cached, so fail */
207 }
208}
List * lcons(void *datum, List *list)
Definition list.c:495
static List * ConvProcList
Definition mbutils.c:63
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition palloc.h:124
#define lfirst(lc)
Definition pg_list.h:172
#define PG_VALID_FE_ENCODING(_enc)
Definition pg_wchar.h:291

References backend_startup_complete, ConvProcList, encoding, fb(), FindDefaultConversionProc(), fmgr_info_cxt(), GetDatabaseEncoding(), IsTransactionState(), lcons(), lfirst, MemoryContextAlloc(), MemoryContextSwitchTo(), OidIsValid, PG_SQL_ASCII, PG_VALID_FE_ENCODING, and TopMemoryContext.

Referenced by check_client_encoding(), and InitializeClientEncoding().

◆ report_invalid_encoding()

◆ report_invalid_encoding_db()

static void report_invalid_encoding_db ( const char mbstr,
int  mblen,
int  len 
)
static

◆ report_invalid_encoding_int()

static void report_invalid_encoding_int ( int  encoding,
const char mbstr,
int  mblen,
int  len 
)
static

Definition at line 1832 of file mbutils.c.

1833{
1834 char buf[8 * 5 + 1];
1835 char *p = buf;
1836 int j,
1837 jlimit;
1838
1839 jlimit = Min(mblen, len);
1840 jlimit = Min(jlimit, 8); /* prevent buffer overrun */
1841
1842 for (j = 0; j < jlimit; j++)
1843 {
1844 p += sprintf(p, "0x%02x", (unsigned char) mbstr[j]);
1845 if (j < jlimit - 1)
1846 p += sprintf(p, " ");
1847 }
1848
1849 ereport(ERROR,
1851 errmsg("invalid byte sequence for encoding \"%s\": %s",
1853 buf)));
1854}
int j
Definition isn.c:78
static char buf[DEFAULT_XLOG_SEG_SIZE]
#define sprintf
Definition port.h:262

References buf, encoding, ereport, errcode(), errmsg(), ERROR, fb(), j, len, Min, name, pg_enc2name_tbl, and sprintf.

Referenced by report_invalid_encoding(), and report_invalid_encoding_db().

◆ report_untranslatable_char()

void report_untranslatable_char ( int  src_encoding,
int  dest_encoding,
const char mbstr,
int  len 
)

Definition at line 1869 of file mbutils.c.

1871{
1872 int l;
1873 char buf[8 * 5 + 1];
1874 char *p = buf;
1875 int j,
1876 jlimit;
1877
1878 /*
1879 * We probably could use plain pg_encoding_mblen(), because
1880 * gb18030_to_utf8() verifies before it converts. All conversions should.
1881 * For src_encoding!=GB18030, len>0 meets pg_encoding_mblen() needs. Even
1882 * so, be defensive, since a buggy conversion might pass invalid data.
1883 * This is not a performance-critical path.
1884 */
1886 jlimit = Min(l, len);
1887 jlimit = Min(jlimit, 8); /* prevent buffer overrun */
1888
1889 for (j = 0; j < jlimit; j++)
1890 {
1891 p += sprintf(p, "0x%02x", (unsigned char) mbstr[j]);
1892 if (j < jlimit - 1)
1893 p += sprintf(p, " ");
1894 }
1895
1896 ereport(ERROR,
1898 errmsg("character with byte sequence %s in encoding \"%s\" has no equivalent in encoding \"%s\"",
1899 buf,
1902}

References buf, ereport, errcode(), errmsg(), ERROR, fb(), j, len, Min, name, pg_enc2name_tbl, pg_encoding_mblen_or_incomplete(), and sprintf.

Referenced by big52euc_tw(), big52mic(), euc_tw2big5(), latin2mic_with_table(), local2local(), LocalToUtf(), mic2big5(), mic2euc_cn(), mic2euc_jp(), mic2euc_kr(), mic2euc_tw(), mic2latin(), mic2latin_with_table(), mic2sjis(), utf8_to_iso8859_1(), and UtfToLocal().

◆ SetClientEncoding()

int SetClientEncoding ( int  encoding)

Definition at line 217 of file mbutils.c.

218{
220 bool found;
221 ListCell *lc;
222
224 return -1;
225
226 /* Can't do anything during startup, per notes above */
228 {
230 return 0;
231 }
232
234
235 /*
236 * Check for cases that require no conversion function.
237 */
241 {
245 return 0;
246 }
247
248 /*
249 * Search the cache for the entry previously prepared by
250 * PrepareClientEncoding; if there isn't one, we lose. While at it,
251 * release any duplicate entries so that repeated Prepare/Set cycles don't
252 * leak memory.
253 */
254 found = false;
255 foreach(lc, ConvProcList)
256 {
258
259 if (convinfo->s_encoding == current_server_encoding &&
260 convinfo->c_encoding == encoding)
261 {
262 if (!found)
263 {
264 /* Found newest entry, so set up */
266 ToServerConvProc = &convinfo->to_server_info;
267 ToClientConvProc = &convinfo->to_client_info;
268 found = true;
269 }
270 else
271 {
272 /* Duplicate entry, release it */
275 }
276 }
277 }
278
279 if (found)
280 return 0; /* success */
281 else
282 return -1; /* it's not cached, so fail */
283}
#define foreach_delete_current(lst, var_or_cell)
Definition pg_list.h:391

References backend_startup_complete, ClientEncoding, ConvProcList, encoding, fb(), foreach_delete_current, GetDatabaseEncoding(), lfirst, pending_client_encoding, pfree(), pg_enc2name_tbl, PG_SQL_ASCII, PG_VALID_FE_ENCODING, ToClientConvProc, and ToServerConvProc.

Referenced by assign_client_encoding(), InitializeClientEncoding(), and ParallelWorkerMain().

◆ SetDatabaseEncoding()

void SetDatabaseEncoding ( int  encoding)

Definition at line 1287 of file mbutils.c.

1288{
1290 elog(ERROR, "invalid database encoding: %d", encoding);
1291
1294}

References Assert, DatabaseEncoding, elog, encoding, pg_enc2name::encoding, ERROR, pg_enc2name_tbl, and PG_VALID_BE_ENCODING.

Referenced by CheckMyDatabase().

◆ SetMessageEncoding()

void SetMessageEncoding ( int  encoding)

Definition at line 1297 of file mbutils.c.

1298{
1299 /* Some calls happen before we can elog()! */
1301
1304}

References Assert, encoding, pg_enc2name::encoding, MessageEncoding, pg_enc2name_tbl, and PG_VALID_ENCODING.

Referenced by pg_perm_setlocale().

Variable Documentation

◆ backend_startup_complete

bool backend_startup_complete = false
static

Definition at line 92 of file mbutils.c.

Referenced by InitializeClientEncoding(), PrepareClientEncoding(), and SetClientEncoding().

◆ ClientEncoding

◆ ConvProcList

List* ConvProcList = NIL
static

Definition at line 63 of file mbutils.c.

Referenced by PrepareClientEncoding(), and SetClientEncoding().

◆ DatabaseEncoding

◆ MessageEncoding

const pg_enc2name* MessageEncoding = &pg_enc2name_tbl[PG_SQL_ASCII]
static

Definition at line 84 of file mbutils.c.

Referenced by GetMessageEncoding(), and SetMessageEncoding().

◆ pending_client_encoding

int pending_client_encoding = PG_SQL_ASCII
static

Definition at line 93 of file mbutils.c.

Referenced by InitializeClientEncoding(), and SetClientEncoding().

◆ ToClientConvProc

FmgrInfo* ToClientConvProc = NULL
static

Definition at line 70 of file mbutils.c.

Referenced by perform_default_encoding_conversion(), and SetClientEncoding().

◆ ToServerConvProc

FmgrInfo* ToServerConvProc = NULL
static

Definition at line 69 of file mbutils.c.

Referenced by perform_default_encoding_conversion(), and SetClientEncoding().

◆ Utf8ToServerConvProc

FmgrInfo* Utf8ToServerConvProc = NULL
static