PostgreSQL Source Code  git master
mbutils.c File Reference
#include "postgres.h"
#include "access/xact.h"
#include "catalog/namespace.h"
#include "mb/pg_wchar.h"
#include "utils/builtins.h"
#include "utils/memutils.h"
#include "utils/syscache.h"
Include dependency graph for mbutils.c:

Go to the source code of this file.

Data Structures

struct  ConvProcInfo
 

Typedefs

typedef struct ConvProcInfo ConvProcInfo
 

Functions

static char * perform_default_encoding_conversion (const char *src, int len, bool is_client_to_server)
 
static int cliplen (const char *str, int len, int limit)
 
int PrepareClientEncoding (int encoding)
 
int SetClientEncoding (int encoding)
 
void InitializeClientEncoding (void)
 
int pg_get_client_encoding (void)
 
const char * pg_get_client_encoding_name (void)
 
unsigned char * pg_do_encoding_conversion (unsigned char *src, int len, int src_encoding, int dest_encoding)
 
Datum pg_convert_to (PG_FUNCTION_ARGS)
 
Datum pg_convert_from (PG_FUNCTION_ARGS)
 
Datum pg_convert (PG_FUNCTION_ARGS)
 
Datum length_in_encoding (PG_FUNCTION_ARGS)
 
Datum pg_encoding_max_length_sql (PG_FUNCTION_ARGS)
 
char * pg_client_to_server (const char *s, int len)
 
char * pg_any_to_server (const char *s, int len, int encoding)
 
char * pg_server_to_client (const char *s, int len)
 
char * pg_server_to_any (const char *s, int len, int encoding)
 
void pg_unicode_to_server (pg_wchar c, unsigned char *s)
 
int pg_mb2wchar (const char *from, pg_wchar *to)
 
int pg_mb2wchar_with_len (const char *from, pg_wchar *to, int len)
 
int pg_encoding_mb2wchar_with_len (int encoding, const char *from, pg_wchar *to, int len)
 
int pg_wchar2mb (const pg_wchar *from, char *to)
 
int pg_wchar2mb_with_len (const pg_wchar *from, char *to, int len)
 
int pg_encoding_wchar2mb_with_len (int encoding, const pg_wchar *from, char *to, int len)
 
int pg_mblen (const char *mbstr)
 
int pg_dsplen (const char *mbstr)
 
int pg_mbstrlen (const char *mbstr)
 
int pg_mbstrlen_with_len (const char *mbstr, int limit)
 
int pg_mbcliplen (const char *mbstr, int len, int limit)
 
int pg_encoding_mbcliplen (int encoding, const char *mbstr, int len, int limit)
 
int pg_mbcharcliplen (const char *mbstr, int len, int limit)
 
void SetDatabaseEncoding (int encoding)
 
void SetMessageEncoding (int encoding)
 
int GetDatabaseEncoding (void)
 
const char * GetDatabaseEncodingName (void)
 
Datum getdatabaseencoding (PG_FUNCTION_ARGS)
 
Datum pg_client_encoding (PG_FUNCTION_ARGS)
 
Datum PG_char_to_encoding (PG_FUNCTION_ARGS)
 
Datum PG_encoding_to_char (PG_FUNCTION_ARGS)
 
int GetMessageEncoding (void)
 
static bool pg_generic_charinc (unsigned char *charptr, int len)
 
static bool pg_utf8_increment (unsigned char *charptr, int length)
 
static bool pg_eucjp_increment (unsigned char *charptr, int length)
 
mbcharacter_incrementer pg_database_encoding_character_incrementer (void)
 
int pg_database_encoding_max_length (void)
 
bool pg_verifymbstr (const char *mbstr, int len, bool noError)
 
bool pg_verify_mbstr (int encoding, const char *mbstr, int len, bool noError)
 
int pg_verify_mbstr_len (int encoding, const char *mbstr, int len, bool noError)
 
void check_encoding_conversion_args (int src_encoding, int dest_encoding, int len, int expected_src_encoding, int expected_dest_encoding)
 
void report_invalid_encoding (int encoding, const char *mbstr, int len)
 
void report_untranslatable_char (int src_encoding, int dest_encoding, const char *mbstr, int len)
 

Variables

static ListConvProcList = NIL
 
static FmgrInfoToServerConvProc = NULL
 
static FmgrInfoToClientConvProc = NULL
 
static FmgrInfoUtf8ToServerConvProc = NULL
 
static const pg_enc2nameClientEncoding = &pg_enc2name_tbl[PG_SQL_ASCII]
 
static const pg_enc2nameDatabaseEncoding = &pg_enc2name_tbl[PG_SQL_ASCII]
 
static const pg_enc2nameMessageEncoding = &pg_enc2name_tbl[PG_SQL_ASCII]
 
static bool backend_startup_complete = false
 
static int pending_client_encoding = PG_SQL_ASCII
 

Typedef Documentation

◆ ConvProcInfo

typedef struct ConvProcInfo ConvProcInfo

Function Documentation

◆ check_encoding_conversion_args()

void check_encoding_conversion_args ( int  src_encoding,
int  dest_encoding,
int  len,
int  expected_src_encoding,
int  expected_dest_encoding 
)

Definition at line 1546 of file mbutils.c.

References elog, ERROR, name, pg_enc2name_tbl, and PG_VALID_ENCODING.

Referenced by surrogate_pair_to_codepoint().

1551 {
1552  if (!PG_VALID_ENCODING(src_encoding))
1553  elog(ERROR, "invalid source encoding ID: %d", src_encoding);
1554  if (src_encoding != expected_src_encoding && expected_src_encoding >= 0)
1555  elog(ERROR, "expected source encoding \"%s\", but got \"%s\"",
1556  pg_enc2name_tbl[expected_src_encoding].name,
1557  pg_enc2name_tbl[src_encoding].name);
1558  if (!PG_VALID_ENCODING(dest_encoding))
1559  elog(ERROR, "invalid destination encoding ID: %d", dest_encoding);
1560  if (dest_encoding != expected_dest_encoding && expected_dest_encoding >= 0)
1561  elog(ERROR, "expected destination encoding \"%s\", but got \"%s\"",
1562  pg_enc2name_tbl[expected_dest_encoding].name,
1563  pg_enc2name_tbl[dest_encoding].name);
1564  if (len < 0)
1565  elog(ERROR, "encoding conversion length must not be negative");
1566 }
#define ERROR
Definition: elog.h:43
#define PG_VALID_ENCODING(_enc)
Definition: pg_wchar.h:301
const pg_enc2name pg_enc2name_tbl[]
Definition: encnames.c:309
const char * name
Definition: encode.c:521
#define elog(elevel,...)
Definition: elog.h:214

◆ cliplen()

static int cliplen ( const char *  str,
int  len,
int  limit 
)
static

Definition at line 1034 of file mbutils.c.

References Min.

Referenced by pg_encoding_mbcliplen(), pg_mbcharcliplen(), and pgstat_clip_activity().

1035 {
1036  int l = 0;
1037 
1038  len = Min(len, limit);
1039  while (l < len && str[l])
1040  l++;
1041  return l;
1042 }
#define Min(x, y)
Definition: c.h:920

◆ GetDatabaseEncoding()

int GetDatabaseEncoding ( void  )

◆ getdatabaseencoding()

Datum getdatabaseencoding ( PG_FUNCTION_ARGS  )

Definition at line 1163 of file mbutils.c.

References CStringGetDatum, DirectFunctionCall1, pg_enc2name::name, and namein().

1164 {
1166 }
Datum namein(PG_FUNCTION_ARGS)
Definition: name.c:48
#define DirectFunctionCall1(func, arg1)
Definition: fmgr.h:615
#define CStringGetDatum(X)
Definition: postgres.h:578
const char * name
Definition: pg_wchar.h:337
static const pg_enc2name * DatabaseEncoding
Definition: mbutils.c:81

◆ GetDatabaseEncodingName()

const char* GetDatabaseEncodingName ( void  )

◆ GetMessageEncoding()

int GetMessageEncoding ( void  )

Definition at line 1198 of file mbutils.c.

References pg_enc2name::encoding.

Referenced by DebugFileOpen(), report_untranslatable_char(), SetMessageEncoding(), and surrogate_pair_to_codepoint().

1199 {
1200  return MessageEncoding->encoding;
1201 }
pg_enc encoding
Definition: pg_wchar.h:338
static const pg_enc2name * MessageEncoding
Definition: mbutils.c:82

◆ InitializeClientEncoding()

void InitializeClientEncoding ( void  )

Definition at line 281 of file mbutils.c.

References Assert, backend_startup_complete, ereport, errcode(), errmsg(), FATAL, FindDefaultConversionProc(), fmgr_info_cxt(), GetDatabaseEncoding(), GetDatabaseEncodingName(), IsTransactionState(), MemoryContextAlloc(), name, OidIsValid, pending_client_encoding, pg_enc2name_tbl, PG_SQL_ASCII, PG_UTF8, PrepareClientEncoding(), SetClientEncoding(), and TopMemoryContext.

Referenced by InitPostgres(), and surrogate_pair_to_codepoint().

282 {
283  int current_server_encoding;
284 
287 
290  {
291  /*
292  * Oops, the requested conversion is not available. We couldn't fail
293  * before, but we can now.
294  */
295  ereport(FATAL,
296  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
297  errmsg("conversion between %s and %s is not supported",
300  }
301 
302  /*
303  * Also look up the UTF8-to-server conversion function if needed. Since
304  * the server encoding is fixed within any one backend process, we don't
305  * have to do this more than once.
306  */
307  current_server_encoding = GetDatabaseEncoding();
308  if (current_server_encoding != PG_UTF8 &&
309  current_server_encoding != PG_SQL_ASCII)
310  {
311  Oid utf8_to_server_proc;
312 
314  utf8_to_server_proc =
316  current_server_encoding);
317  /* If there's no such conversion, just leave the pointer as NULL */
318  if (OidIsValid(utf8_to_server_proc))
319  {
320  FmgrInfo *finfo;
321 
323  sizeof(FmgrInfo));
324  fmgr_info_cxt(utf8_to_server_proc, finfo,
326  /* Set Utf8ToServerConvProc only after data is fully valid */
327  Utf8ToServerConvProc = finfo;
328  }
329  }
330 }
Definition: fmgr.h:56
static FmgrInfo * Utf8ToServerConvProc
Definition: mbutils.c:75
int PrepareClientEncoding(int encoding)
Definition: mbutils.c:110
static bool backend_startup_complete
Definition: mbutils.c:90
static int pending_client_encoding
Definition: mbutils.c:91
int errcode(int sqlerrcode)
Definition: elog.c:610
unsigned int Oid
Definition: postgres_ext.h:31
#define OidIsValid(objectId)
Definition: c.h:644
#define FATAL
Definition: elog.h:52
const pg_enc2name pg_enc2name_tbl[]
Definition: encnames.c:309
int SetClientEncoding(int encoding)
Definition: mbutils.c:208
void fmgr_info_cxt(Oid functionId, FmgrInfo *finfo, MemoryContext mcxt)
Definition: fmgr.c:134
MemoryContext TopMemoryContext
Definition: mcxt.c:44
int GetDatabaseEncoding(void)
Definition: mbutils.c:1151
#define ereport(elevel,...)
Definition: elog.h:144
#define Assert(condition)
Definition: c.h:738
const char * GetDatabaseEncodingName(void)
Definition: mbutils.c:1157
Oid FindDefaultConversionProc(int32 for_encoding, int32 to_encoding)
Definition: namespace.c:3710
bool IsTransactionState(void)
Definition: xact.c:355
const char * name
Definition: encode.c:521
int errmsg(const char *fmt,...)
Definition: elog.c:824
void * MemoryContextAlloc(MemoryContext context, Size size)
Definition: mcxt.c:796

◆ length_in_encoding()

Datum length_in_encoding ( PG_FUNCTION_ARGS  )

Definition at line 558 of file mbutils.c.

References ereport, errcode(), errmsg(), ERROR, NameStr, pg_char_to_encoding(), PG_GETARG_BYTEA_PP, PG_GETARG_NAME, PG_RETURN_INT32, pg_verify_mbstr_len(), VARDATA_ANY, and VARSIZE_ANY_EXHDR.

559 {
560  bytea *string = PG_GETARG_BYTEA_PP(0);
561  char *src_encoding_name = NameStr(*PG_GETARG_NAME(1));
562  int src_encoding = pg_char_to_encoding(src_encoding_name);
563  const char *src_str;
564  int len;
565  int retval;
566 
567  if (src_encoding < 0)
568  ereport(ERROR,
569  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
570  errmsg("invalid encoding name \"%s\"",
571  src_encoding_name)));
572 
573  len = VARSIZE_ANY_EXHDR(string);
574  src_str = VARDATA_ANY(string);
575 
576  retval = pg_verify_mbstr_len(src_encoding, src_str, len, false);
577 
578  PG_RETURN_INT32(retval);
579 }
int pg_char_to_encoding(const char *name)
Definition: encnames.c:550
#define VARDATA_ANY(PTR)
Definition: postgres.h:348
#define PG_RETURN_INT32(x)
Definition: fmgr.h:344
int errcode(int sqlerrcode)
Definition: elog.c:610
#define ERROR
Definition: elog.h:43
#define ereport(elevel,...)
Definition: elog.h:144
#define PG_GETARG_BYTEA_PP(n)
Definition: fmgr.h:302
#define VARSIZE_ANY_EXHDR(PTR)
Definition: postgres.h:341
int errmsg(const char *fmt,...)
Definition: elog.c:824
#define NameStr(name)
Definition: c.h:615
Definition: c.h:555
int pg_verify_mbstr_len(int encoding, const char *mbstr, int len, bool noError)
Definition: mbutils.c:1474
#define PG_GETARG_NAME(n)
Definition: fmgr.h:273

◆ perform_default_encoding_conversion()

static char * perform_default_encoding_conversion ( const char *  src,
int  len,
bool  is_client_to_server 
)
static

Definition at line 726 of file mbutils.c.

References CStringGetDatum, CurrentMemoryContext, pg_enc2name::encoding, ereport, errcode(), errdetail(), errmsg(), ERROR, FunctionCall5, Int32GetDatum, MAX_CONVERSION_GROWTH, MaxAllocHugeSize, MaxAllocSize, MemoryContextAllocHuge(), repalloc(), ToClientConvProc, ToServerConvProc, and unconstify.

Referenced by pg_any_to_server(), and pg_server_to_any().

728 {
729  char *result;
730  int src_encoding,
731  dest_encoding;
732  FmgrInfo *flinfo;
733 
734  if (is_client_to_server)
735  {
736  src_encoding = ClientEncoding->encoding;
737  dest_encoding = DatabaseEncoding->encoding;
738  flinfo = ToServerConvProc;
739  }
740  else
741  {
742  src_encoding = DatabaseEncoding->encoding;
743  dest_encoding = ClientEncoding->encoding;
744  flinfo = ToClientConvProc;
745  }
746 
747  if (flinfo == NULL)
748  return unconstify(char *, src);
749 
750  /*
751  * Allocate space for conversion result, being wary of integer overflow.
752  * See comments in pg_do_encoding_conversion.
753  */
754  if ((Size) len >= (MaxAllocHugeSize / (Size) MAX_CONVERSION_GROWTH))
755  ereport(ERROR,
756  (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
757  errmsg("out of memory"),
758  errdetail("String of %d bytes is too long for encoding conversion.",
759  len)));
760 
761  result = (char *)
763  (Size) len * MAX_CONVERSION_GROWTH + 1);
764 
765  FunctionCall5(flinfo,
766  Int32GetDatum(src_encoding),
767  Int32GetDatum(dest_encoding),
768  CStringGetDatum(src),
769  CStringGetDatum(result),
770  Int32GetDatum(len));
771 
772  /*
773  * Release extra space if there might be a lot --- see comments in
774  * pg_do_encoding_conversion.
775  */
776  if (len > 1000000)
777  {
778  Size resultlen = strlen(result);
779 
780  if (resultlen >= MaxAllocSize)
781  ereport(ERROR,
782  (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
783  errmsg("out of memory"),
784  errdetail("String of %d bytes is too long for encoding conversion.",
785  len)));
786 
787  result = (char *) repalloc(result, resultlen + 1);
788  }
789 
790  return result;
791 }
Definition: fmgr.h:56
static FmgrInfo * ToServerConvProc
Definition: mbutils.c:67
int errcode(int sqlerrcode)
Definition: elog.c:610
static const pg_enc2name * ClientEncoding
Definition: mbutils.c:80
#define MaxAllocHugeSize
Definition: memutils.h:44
#define ERROR
Definition: elog.h:43
#define FunctionCall5(flinfo, arg1, arg2, arg3, arg4, arg5)
Definition: fmgr.h:641
void * MemoryContextAllocHuge(MemoryContext context, Size size)
Definition: mcxt.c:1105
int errdetail(const char *fmt,...)
Definition: elog.c:957
#define CStringGetDatum(X)
Definition: postgres.h:578
MemoryContext CurrentMemoryContext
Definition: mcxt.c:38
#define MaxAllocSize
Definition: memutils.h:40
#define unconstify(underlying_type, expr)
Definition: c.h:1206
pg_enc encoding
Definition: pg_wchar.h:338
#define ereport(elevel,...)
Definition: elog.h:144
size_t Size
Definition: c.h:466
void * repalloc(void *pointer, Size size)
Definition: mcxt.c:1069
#define MAX_CONVERSION_GROWTH
Definition: pg_wchar.h:316
#define Int32GetDatum(X)
Definition: postgres.h:479
static FmgrInfo * ToClientConvProc
Definition: mbutils.c:68
int errmsg(const char *fmt,...)
Definition: elog.c:824
static const pg_enc2name * DatabaseEncoding
Definition: mbutils.c:81

◆ pg_any_to_server()

char* pg_any_to_server ( const char *  s,
int  len,
int  encoding 
)

Definition at line 619 of file mbutils.c.

References pg_enc2name::encoding, ereport, errcode(), errmsg(), ERROR, i, IS_HIGHBIT_SET, name, perform_default_encoding_conversion(), pg_do_encoding_conversion(), pg_enc2name_tbl, PG_SQL_ASCII, PG_VALID_BE_ENCODING, pg_verify_mbstr(), and unconstify.

Referenced by ASN1_STRING_to_text(), cache_single_string(), CopyReadLine(), db_encoding_convert(), dsnowball_lexize(), pg_client_to_server(), pg_stat_statements_internal(), pgp_armor_headers(), PLyUnicode_Bytes(), read_extension_script_file(), surrogate_pair_to_codepoint(), t_readline(), utf_u2e(), X509_NAME_to_cstring(), X509_NAME_to_text(), and xml_recv().

620 {
621  if (len <= 0)
622  return unconstify(char *, s); /* empty string is always valid */
623 
626  {
627  /*
628  * No conversion is needed, but we must still validate the data.
629  */
630  (void) pg_verify_mbstr(DatabaseEncoding->encoding, s, len, false);
631  return unconstify(char *, s);
632  }
633 
635  {
636  /*
637  * No conversion is possible, but we must still validate the data,
638  * because the client-side code might have done string escaping using
639  * the selected client_encoding. If the client encoding is ASCII-safe
640  * then we just do a straight validation under that encoding. For an
641  * ASCII-unsafe encoding we have a problem: we dare not pass such data
642  * to the parser but we have no way to convert it. We compromise by
643  * rejecting the data if it contains any non-ASCII characters.
644  */
646  (void) pg_verify_mbstr(encoding, s, len, false);
647  else
648  {
649  int i;
650 
651  for (i = 0; i < len; i++)
652  {
653  if (s[i] == '\0' || IS_HIGHBIT_SET(s[i]))
654  ereport(ERROR,
655  (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
656  errmsg("invalid byte value for encoding \"%s\": 0x%02x",
658  (unsigned char) s[i])));
659  }
660  }
661  return unconstify(char *, s);
662  }
663 
664  /* Fast path if we can use cached conversion function */
666  return perform_default_encoding_conversion(s, len, true);
667 
668  /* General case ... will not work outside transactions */
669  return (char *) pg_do_encoding_conversion((unsigned char *) unconstify(char *, s),
670  len,
671  encoding,
673 }
static char * perform_default_encoding_conversion(const char *src, int len, bool is_client_to_server)
Definition: mbutils.c:726
int errcode(int sqlerrcode)
Definition: elog.c:610
static const pg_enc2name * ClientEncoding
Definition: mbutils.c:80
unsigned char * pg_do_encoding_conversion(unsigned char *src, int len, int src_encoding, int dest_encoding)
Definition: mbutils.c:356
bool pg_verify_mbstr(int encoding, const char *mbstr, int len, bool noError)
Definition: mbutils.c:1457
#define IS_HIGHBIT_SET(ch)
Definition: c.h:1119
#define ERROR
Definition: elog.h:43
const pg_enc2name pg_enc2name_tbl[]
Definition: encnames.c:309
#define unconstify(underlying_type, expr)
Definition: c.h:1206
pg_enc encoding
Definition: pg_wchar.h:338
#define ereport(elevel,...)
Definition: elog.h:144
#define PG_VALID_BE_ENCODING(_enc)
Definition: pg_wchar.h:295
int32 encoding
Definition: pg_database.h:41
const char * name
Definition: encode.c:521
int errmsg(const char *fmt,...)
Definition: elog.c:824
static const pg_enc2name * DatabaseEncoding
Definition: mbutils.c:81
int i

◆ PG_char_to_encoding()

Datum PG_char_to_encoding ( PG_FUNCTION_ARGS  )

Definition at line 1175 of file mbutils.c.

References NameStr, pg_char_to_encoding(), PG_GETARG_NAME, and PG_RETURN_INT32.

1176 {
1177  Name s = PG_GETARG_NAME(0);
1178 
1180 }
int pg_char_to_encoding(const char *name)
Definition: encnames.c:550
#define PG_RETURN_INT32(x)
Definition: fmgr.h:344
Definition: c.h:609
#define NameStr(name)
Definition: c.h:615
#define PG_GETARG_NAME(n)
Definition: fmgr.h:273

◆ pg_client_encoding()

Datum pg_client_encoding ( PG_FUNCTION_ARGS  )

Definition at line 1169 of file mbutils.c.

References CStringGetDatum, DirectFunctionCall1, pg_enc2name::name, and namein().

1170 {
1172 }
Datum namein(PG_FUNCTION_ARGS)
Definition: name.c:48
static const pg_enc2name * ClientEncoding
Definition: mbutils.c:80
#define DirectFunctionCall1(func, arg1)
Definition: fmgr.h:615
#define CStringGetDatum(X)
Definition: postgres.h:578
const char * name
Definition: pg_wchar.h:337

◆ pg_client_to_server()

char* pg_client_to_server ( const char *  s,
int  len 
)

Definition at line 603 of file mbutils.c.

References pg_enc2name::encoding, and pg_any_to_server().

Referenced by exec_bind_message(), parse_fcall_arguments(), pq_getmsgstring(), pq_getmsgtext(), and surrogate_pair_to_codepoint().

604 {
605  return pg_any_to_server(s, len, ClientEncoding->encoding);
606 }
static const pg_enc2name * ClientEncoding
Definition: mbutils.c:80
pg_enc encoding
Definition: pg_wchar.h:338
char * pg_any_to_server(const char *s, int len, int encoding)
Definition: mbutils.c:619

◆ pg_convert()

Datum pg_convert ( PG_FUNCTION_ARGS  )

Definition at line 496 of file mbutils.c.

References ereport, errcode(), errmsg(), ERROR, NameStr, palloc(), pfree(), pg_char_to_encoding(), pg_do_encoding_conversion(), PG_FREE_IF_COPY, PG_GETARG_BYTEA_PP, PG_GETARG_NAME, PG_RETURN_BYTEA_P, pg_verify_mbstr_len(), SET_VARSIZE, unconstify, VARDATA, VARDATA_ANY, VARHDRSZ, and VARSIZE_ANY_EXHDR.

Referenced by pg_convert_from(), and pg_convert_to().

497 {
498  bytea *string = PG_GETARG_BYTEA_PP(0);
499  char *src_encoding_name = NameStr(*PG_GETARG_NAME(1));
500  int src_encoding = pg_char_to_encoding(src_encoding_name);
501  char *dest_encoding_name = NameStr(*PG_GETARG_NAME(2));
502  int dest_encoding = pg_char_to_encoding(dest_encoding_name);
503  const char *src_str;
504  char *dest_str;
505  bytea *retval;
506  int len;
507 
508  if (src_encoding < 0)
509  ereport(ERROR,
510  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
511  errmsg("invalid source encoding name \"%s\"",
512  src_encoding_name)));
513  if (dest_encoding < 0)
514  ereport(ERROR,
515  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
516  errmsg("invalid destination encoding name \"%s\"",
517  dest_encoding_name)));
518 
519  /* make sure that source string is valid */
520  len = VARSIZE_ANY_EXHDR(string);
521  src_str = VARDATA_ANY(string);
522  pg_verify_mbstr_len(src_encoding, src_str, len, false);
523 
524  /* perform conversion */
525  dest_str = (char *) pg_do_encoding_conversion((unsigned char *) unconstify(char *, src_str),
526  len,
527  src_encoding,
528  dest_encoding);
529 
530  /* update len if conversion actually happened */
531  if (dest_str != src_str)
532  len = strlen(dest_str);
533 
534  /*
535  * build bytea data type structure.
536  */
537  retval = (bytea *) palloc(len + VARHDRSZ);
538  SET_VARSIZE(retval, len + VARHDRSZ);
539  memcpy(VARDATA(retval), dest_str, len);
540 
541  if (dest_str != src_str)
542  pfree(dest_str);
543 
544  /* free memory if allocated by the toaster */
545  PG_FREE_IF_COPY(string, 0);
546 
547  PG_RETURN_BYTEA_P(retval);
548 }
int pg_char_to_encoding(const char *name)
Definition: encnames.c:550
#define VARDATA_ANY(PTR)
Definition: postgres.h:348
#define VARDATA(PTR)
Definition: postgres.h:302
#define VARHDRSZ
Definition: c.h:561
int errcode(int sqlerrcode)
Definition: elog.c:610
#define PG_RETURN_BYTEA_P(x)
Definition: fmgr.h:360
unsigned char * pg_do_encoding_conversion(unsigned char *src, int len, int src_encoding, int dest_encoding)
Definition: mbutils.c:356
void pfree(void *pointer)
Definition: mcxt.c:1056
#define ERROR
Definition: elog.h:43
#define unconstify(underlying_type, expr)
Definition: c.h:1206
#define ereport(elevel,...)
Definition: elog.h:144
#define PG_GETARG_BYTEA_PP(n)
Definition: fmgr.h:302
#define PG_FREE_IF_COPY(ptr, n)
Definition: fmgr.h:255
#define VARSIZE_ANY_EXHDR(PTR)
Definition: postgres.h:341
void * palloc(Size size)
Definition: mcxt.c:949
int errmsg(const char *fmt,...)
Definition: elog.c:824
#define NameStr(name)
Definition: c.h:615
Definition: c.h:555
#define SET_VARSIZE(PTR, len)
Definition: postgres.h:329
int pg_verify_mbstr_len(int encoding, const char *mbstr, int len, bool noError)
Definition: mbutils.c:1474
#define PG_GETARG_NAME(n)
Definition: fmgr.h:273

◆ pg_convert_from()

Datum pg_convert_from ( PG_FUNCTION_ARGS  )

Definition at line 469 of file mbutils.c.

References CStringGetDatum, DirectFunctionCall1, DirectFunctionCall3, pg_enc2name::name, namein(), pg_convert(), PG_GETARG_DATUM, and PG_RETURN_DATUM.

470 {
471  Datum string = PG_GETARG_DATUM(0);
472  Datum src_encoding_name = PG_GETARG_DATUM(1);
473  Datum dest_encoding_name = DirectFunctionCall1(namein,
475  Datum result;
476 
477  result = DirectFunctionCall3(pg_convert, string,
478  src_encoding_name, dest_encoding_name);
479 
480  /*
481  * pg_convert returns a bytea, which we in turn return as text, relying on
482  * the fact that they are both in fact varlena types, and thus
483  * structurally identical. Although not all bytea values are valid text,
484  * in this case it will be because we've told pg_convert to return one
485  * that is valid as text in the current database encoding.
486  */
487  PG_RETURN_DATUM(result);
488 }
Datum namein(PG_FUNCTION_ARGS)
Definition: name.c:48
#define PG_GETARG_DATUM(n)
Definition: fmgr.h:263
#define DirectFunctionCall1(func, arg1)
Definition: fmgr.h:615
#define CStringGetDatum(X)
Definition: postgres.h:578
const char * name
Definition: pg_wchar.h:337
#define DirectFunctionCall3(func, arg1, arg2, arg3)
Definition: fmgr.h:619
uintptr_t Datum
Definition: postgres.h:367
#define PG_RETURN_DATUM(x)
Definition: fmgr.h:343
Datum pg_convert(PG_FUNCTION_ARGS)
Definition: mbutils.c:496
static const pg_enc2name * DatabaseEncoding
Definition: mbutils.c:81

◆ pg_convert_to()

Datum pg_convert_to ( PG_FUNCTION_ARGS  )

Definition at line 444 of file mbutils.c.

References CStringGetDatum, DirectFunctionCall1, DirectFunctionCall3, pg_enc2name::name, namein(), pg_convert(), PG_GETARG_DATUM, and PG_RETURN_DATUM.

445 {
446  Datum string = PG_GETARG_DATUM(0);
447  Datum dest_encoding_name = PG_GETARG_DATUM(1);
448  Datum src_encoding_name = DirectFunctionCall1(namein,
450  Datum result;
451 
452  /*
453  * pg_convert expects a bytea as its first argument. We're passing it a
454  * text argument here, relying on the fact that they are both in fact
455  * varlena types, and thus structurally identical.
456  */
457  result = DirectFunctionCall3(pg_convert, string,
458  src_encoding_name, dest_encoding_name);
459 
460  PG_RETURN_DATUM(result);
461 }
Datum namein(PG_FUNCTION_ARGS)
Definition: name.c:48
#define PG_GETARG_DATUM(n)
Definition: fmgr.h:263
#define DirectFunctionCall1(func, arg1)
Definition: fmgr.h:615
#define CStringGetDatum(X)
Definition: postgres.h:578
const char * name
Definition: pg_wchar.h:337
#define DirectFunctionCall3(func, arg1, arg2, arg3)
Definition: fmgr.h:619
uintptr_t Datum
Definition: postgres.h:367
#define PG_RETURN_DATUM(x)
Definition: fmgr.h:343
Datum pg_convert(PG_FUNCTION_ARGS)
Definition: mbutils.c:496
static const pg_enc2name * DatabaseEncoding
Definition: mbutils.c:81

◆ pg_database_encoding_character_incrementer()

mbcharacter_incrementer pg_database_encoding_character_incrementer ( void  )

Definition at line 1413 of file mbutils.c.

References GetDatabaseEncoding(), PG_EUC_JP, pg_eucjp_increment(), pg_generic_charinc(), PG_UTF8, and pg_utf8_increment().

Referenced by make_greater_string(), and surrogate_pair_to_codepoint().

1414 {
1415  /*
1416  * Eventually it might be best to add a field to pg_wchar_table[], but for
1417  * now we just use a switch.
1418  */
1419  switch (GetDatabaseEncoding())
1420  {
1421  case PG_UTF8:
1422  return pg_utf8_increment;
1423 
1424  case PG_EUC_JP:
1425  return pg_eucjp_increment;
1426 
1427  default:
1428  return pg_generic_charinc;
1429  }
1430 }
static bool pg_eucjp_increment(unsigned char *charptr, int length)
Definition: mbutils.c:1327
static bool pg_utf8_increment(unsigned char *charptr, int length)
Definition: mbutils.c:1249
static bool pg_generic_charinc(unsigned char *charptr, int len)
Definition: mbutils.c:1215
int GetDatabaseEncoding(void)
Definition: mbutils.c:1151

◆ pg_database_encoding_max_length()

◆ pg_do_encoding_conversion()

unsigned char* pg_do_encoding_conversion ( unsigned char *  src,
int  len,
int  src_encoding,
int  dest_encoding 
)

Definition at line 356 of file mbutils.c.

References CStringGetDatum, CurrentMemoryContext, elog, ereport, errcode(), errdetail(), errmsg(), ERROR, FindDefaultConversionProc(), Int32GetDatum, IsTransactionState(), MAX_CONVERSION_GROWTH, MaxAllocHugeSize, MaxAllocSize, MemoryContextAllocHuge(), OidFunctionCall5, OidIsValid, pg_encoding_to_char(), PG_SQL_ASCII, pg_verify_mbstr(), and repalloc().

Referenced by convert_charset(), pg_any_to_server(), pg_convert(), pg_server_to_any(), report_untranslatable_char(), surrogate_pair_to_codepoint(), and xml_is_document().

358 {
359  unsigned char *result;
360  Oid proc;
361 
362  if (len <= 0)
363  return src; /* empty string is always valid */
364 
365  if (src_encoding == dest_encoding)
366  return src; /* no conversion required, assume valid */
367 
368  if (dest_encoding == PG_SQL_ASCII)
369  return src; /* any string is valid in SQL_ASCII */
370 
371  if (src_encoding == PG_SQL_ASCII)
372  {
373  /* No conversion is possible, but we must validate the result */
374  (void) pg_verify_mbstr(dest_encoding, (const char *) src, len, false);
375  return src;
376  }
377 
378  if (!IsTransactionState()) /* shouldn't happen */
379  elog(ERROR, "cannot perform encoding conversion outside a transaction");
380 
381  proc = FindDefaultConversionProc(src_encoding, dest_encoding);
382  if (!OidIsValid(proc))
383  ereport(ERROR,
384  (errcode(ERRCODE_UNDEFINED_FUNCTION),
385  errmsg("default conversion function for encoding \"%s\" to \"%s\" does not exist",
386  pg_encoding_to_char(src_encoding),
387  pg_encoding_to_char(dest_encoding))));
388 
389  /*
390  * Allocate space for conversion result, being wary of integer overflow.
391  *
392  * len * MAX_CONVERSION_GROWTH is typically a vast overestimate of the
393  * required space, so it might exceed MaxAllocSize even though the result
394  * would actually fit. We do not want to hand back a result string that
395  * exceeds MaxAllocSize, because callers might not cope gracefully --- but
396  * if we just allocate more than that, and don't use it, that's fine.
397  */
398  if ((Size) len >= (MaxAllocHugeSize / (Size) MAX_CONVERSION_GROWTH))
399  ereport(ERROR,
400  (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
401  errmsg("out of memory"),
402  errdetail("String of %d bytes is too long for encoding conversion.",
403  len)));
404 
405  result = (unsigned char *)
407  (Size) len * MAX_CONVERSION_GROWTH + 1);
408 
409  OidFunctionCall5(proc,
410  Int32GetDatum(src_encoding),
411  Int32GetDatum(dest_encoding),
412  CStringGetDatum(src),
413  CStringGetDatum(result),
414  Int32GetDatum(len));
415 
416  /*
417  * If the result is large, it's worth repalloc'ing to release any extra
418  * space we asked for. The cutoff here is somewhat arbitrary, but we
419  * *must* check when len * MAX_CONVERSION_GROWTH exceeds MaxAllocSize.
420  */
421  if (len > 1000000)
422  {
423  Size resultlen = strlen((char *) result);
424 
425  if (resultlen >= MaxAllocSize)
426  ereport(ERROR,
427  (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
428  errmsg("out of memory"),
429  errdetail("String of %d bytes is too long for encoding conversion.",
430  len)));
431 
432  result = (unsigned char *) repalloc(result, resultlen + 1);
433  }
434 
435  return result;
436 }
int errcode(int sqlerrcode)
Definition: elog.c:610
unsigned int Oid
Definition: postgres_ext.h:31
#define OidIsValid(objectId)
Definition: c.h:644
#define MaxAllocHugeSize
Definition: memutils.h:44
bool pg_verify_mbstr(int encoding, const char *mbstr, int len, bool noError)
Definition: mbutils.c:1457
#define ERROR
Definition: elog.h:43
void * MemoryContextAllocHuge(MemoryContext context, Size size)
Definition: mcxt.c:1105
int errdetail(const char *fmt,...)
Definition: elog.c:957
#define CStringGetDatum(X)
Definition: postgres.h:578
MemoryContext CurrentMemoryContext
Definition: mcxt.c:38
#define MaxAllocSize
Definition: memutils.h:40
#define ereport(elevel,...)
Definition: elog.h:144
const char * pg_encoding_to_char(int encoding)
Definition: encnames.c:588
size_t Size
Definition: c.h:466
Oid FindDefaultConversionProc(int32 for_encoding, int32 to_encoding)
Definition: namespace.c:3710
bool IsTransactionState(void)
Definition: xact.c:355
void * repalloc(void *pointer, Size size)
Definition: mcxt.c:1069
#define MAX_CONVERSION_GROWTH
Definition: pg_wchar.h:316
#define Int32GetDatum(X)
Definition: postgres.h:479
int errmsg(const char *fmt,...)
Definition: elog.c:824
#define elog(elevel,...)
Definition: elog.h:214
#define OidFunctionCall5(functionId, arg1, arg2, arg3, arg4, arg5)
Definition: fmgr.h:661

◆ pg_dsplen()

int pg_dsplen ( const char *  mbstr)

Definition at line 914 of file mbutils.c.

References pg_wchar_tbl::dsplen, pg_enc2name::encoding, and pg_wchar_table.

Referenced by p_isspecial(), and surrogate_pair_to_codepoint().

915 {
916  return pg_wchar_table[DatabaseEncoding->encoding].dsplen((const unsigned char *) mbstr);
917 }
mbdisplaylen_converter dsplen
Definition: pg_wchar.h:383
pg_enc encoding
Definition: pg_wchar.h:338
const pg_wchar_tbl pg_wchar_table[]
Definition: wchar.c:1505
static const pg_enc2name * DatabaseEncoding
Definition: mbutils.c:81

◆ pg_encoding_max_length_sql()

Datum pg_encoding_max_length_sql ( PG_FUNCTION_ARGS  )

Definition at line 587 of file mbutils.c.

References encoding, PG_GETARG_INT32, PG_RETURN_INT32, PG_RETURN_NULL, PG_VALID_ENCODING, and pg_wchar_table.

588 {
589  int encoding = PG_GETARG_INT32(0);
590 
591  if (PG_VALID_ENCODING(encoding))
592  PG_RETURN_INT32(pg_wchar_table[encoding].maxmblen);
593  else
594  PG_RETURN_NULL();
595 }
#define PG_GETARG_INT32(n)
Definition: fmgr.h:264
#define PG_RETURN_INT32(x)
Definition: fmgr.h:344
#define PG_VALID_ENCODING(_enc)
Definition: pg_wchar.h:301
int32 encoding
Definition: pg_database.h:41
const pg_wchar_tbl pg_wchar_table[]
Definition: wchar.c:1505
#define PG_RETURN_NULL()
Definition: fmgr.h:335

◆ pg_encoding_mb2wchar_with_len()

int pg_encoding_mb2wchar_with_len ( int  encoding,
const char *  from,
pg_wchar to,
int  len 
)

Definition at line 877 of file mbutils.c.

References encoding, pg_wchar_tbl::mb2wchar_with_len, and pg_wchar_table.

Referenced by surrogate_pair_to_codepoint(), and xml_is_document().

879 {
880  return pg_wchar_table[encoding].mb2wchar_with_len((const unsigned char *) from, to, len);
881 }
int32 encoding
Definition: pg_database.h:41
const pg_wchar_tbl pg_wchar_table[]
Definition: wchar.c:1505
mb2wchar_with_len_converter mb2wchar_with_len
Definition: pg_wchar.h:378

◆ pg_encoding_mbcliplen()

int pg_encoding_mbcliplen ( int  encoding,
const char *  mbstr,
int  len,
int  limit 
)

Definition at line 977 of file mbutils.c.

References cliplen(), encoding, pg_wchar_tbl::mblen, pg_encoding_max_length(), and pg_wchar_table.

Referenced by pg_mbcliplen(), and surrogate_pair_to_codepoint().

979 {
980  mblen_converter mblen_fn;
981  int clen = 0;
982  int l;
983 
984  /* optimization for single byte encoding */
986  return cliplen(mbstr, len, limit);
987 
988  mblen_fn = pg_wchar_table[encoding].mblen;
989 
990  while (len > 0 && *mbstr)
991  {
992  l = (*mblen_fn) ((const unsigned char *) mbstr);
993  if ((clen + l) > limit)
994  break;
995  clen += l;
996  if (clen == limit)
997  break;
998  len -= l;
999  mbstr += l;
1000  }
1001  return clen;
1002 }
int(* mblen_converter)(const unsigned char *mbstr)
Definition: pg_wchar.h:368
int pg_encoding_max_length(int encoding)
Definition: wchar.c:1589
static int cliplen(const char *str, int len, int limit)
Definition: mbutils.c:1034
int32 encoding
Definition: pg_database.h:41
const pg_wchar_tbl pg_wchar_table[]
Definition: wchar.c:1505
mblen_converter mblen
Definition: pg_wchar.h:382

◆ PG_encoding_to_char()

Datum PG_encoding_to_char ( PG_FUNCTION_ARGS  )

Definition at line 1183 of file mbutils.c.

References CStringGetDatum, DirectFunctionCall1, encoding, namein(), pg_encoding_to_char(), and PG_GETARG_INT32.

1184 {
1186  const char *encoding_name = pg_encoding_to_char(encoding);
1187 
1188  return DirectFunctionCall1(namein, CStringGetDatum(encoding_name));
1189 }
#define PG_GETARG_INT32(n)
Definition: fmgr.h:264
Datum namein(PG_FUNCTION_ARGS)
Definition: name.c:48
#define DirectFunctionCall1(func, arg1)
Definition: fmgr.h:615
signed int int32
Definition: c.h:355
#define CStringGetDatum(X)
Definition: postgres.h:578
const char * pg_encoding_to_char(int encoding)
Definition: encnames.c:588
int32 encoding
Definition: pg_database.h:41

◆ pg_encoding_wchar2mb_with_len()

int pg_encoding_wchar2mb_with_len ( int  encoding,
const pg_wchar from,
char *  to,
int  len 
)

Definition at line 899 of file mbutils.c.

References encoding, pg_wchar_table, and pg_wchar_tbl::wchar2mb_with_len.

Referenced by surrogate_pair_to_codepoint().

901 {
902  return pg_wchar_table[encoding].wchar2mb_with_len(from, (unsigned char *) to, len);
903 }
wchar2mb_with_len_converter wchar2mb_with_len
Definition: pg_wchar.h:380
int32 encoding
Definition: pg_database.h:41
const pg_wchar_tbl pg_wchar_table[]
Definition: wchar.c:1505

◆ pg_eucjp_increment()

static bool pg_eucjp_increment ( unsigned char *  charptr,
int  length 
)
static

Definition at line 1327 of file mbutils.c.

References i, IS_HIGHBIT_SET, SS2, and SS3.

Referenced by pg_database_encoding_character_incrementer().

1328 {
1329  unsigned char c1,
1330  c2;
1331  int i;
1332 
1333  c1 = *charptr;
1334 
1335  switch (c1)
1336  {
1337  case SS2: /* JIS X 0201 */
1338  if (length != 2)
1339  return false;
1340 
1341  c2 = charptr[1];
1342 
1343  if (c2 >= 0xdf)
1344  charptr[0] = charptr[1] = 0xa1;
1345  else if (c2 < 0xa1)
1346  charptr[1] = 0xa1;
1347  else
1348  charptr[1]++;
1349  break;
1350 
1351  case SS3: /* JIS X 0212 */
1352  if (length != 3)
1353  return false;
1354 
1355  for (i = 2; i > 0; i--)
1356  {
1357  c2 = charptr[i];
1358  if (c2 < 0xa1)
1359  {
1360  charptr[i] = 0xa1;
1361  return true;
1362  }
1363  else if (c2 < 0xfe)
1364  {
1365  charptr[i]++;
1366  return true;
1367  }
1368  }
1369 
1370  /* Out of 3-byte code region */
1371  return false;
1372 
1373  default:
1374  if (IS_HIGHBIT_SET(c1)) /* JIS X 0208? */
1375  {
1376  if (length != 2)
1377  return false;
1378 
1379  for (i = 1; i >= 0; i--)
1380  {
1381  c2 = charptr[i];
1382  if (c2 < 0xa1)
1383  {
1384  charptr[i] = 0xa1;
1385  return true;
1386  }
1387  else if (c2 < 0xfe)
1388  {
1389  charptr[i]++;
1390  return true;
1391  }
1392  }
1393 
1394  /* Out of 2 byte code region */
1395  return false;
1396  }
1397  else
1398  { /* ASCII, single byte */
1399  if (c1 > 0x7e)
1400  return false;
1401  (*charptr)++;
1402  }
1403  break;
1404  }
1405 
1406  return true;
1407 }
#define SS3
Definition: pg_wchar.h:36
#define IS_HIGHBIT_SET(ch)
Definition: c.h:1119
int i
#define SS2
Definition: pg_wchar.h:35

◆ pg_generic_charinc()

static bool pg_generic_charinc ( unsigned char *  charptr,
int  len 
)
static

Definition at line 1215 of file mbutils.c.

References GetDatabaseEncoding(), pg_wchar_tbl::mbverify, and pg_wchar_table.

Referenced by pg_database_encoding_character_incrementer().

1216 {
1217  unsigned char *lastbyte = charptr + len - 1;
1218  mbverifier mbverify;
1219 
1220  /* We can just invoke the character verifier directly. */
1222 
1223  while (*lastbyte < (unsigned char) 255)
1224  {
1225  (*lastbyte)++;
1226  if ((*mbverify) (charptr, len) == len)
1227  return true;
1228  }
1229 
1230  return false;
1231 }
int(* mbverifier)(const unsigned char *mbstr, int len)
Definition: pg_wchar.h:374
int GetDatabaseEncoding(void)
Definition: mbutils.c:1151
const pg_wchar_tbl pg_wchar_table[]
Definition: wchar.c:1505
mbverifier mbverify
Definition: pg_wchar.h:384

◆ pg_get_client_encoding()

int pg_get_client_encoding ( void  )

Definition at line 336 of file mbutils.c.

References pg_enc2name::encoding.

Referenced by BeginCopy(), surrogate_pair_to_codepoint(), and xml_send().

337 {
338  return ClientEncoding->encoding;
339 }
static const pg_enc2name * ClientEncoding
Definition: mbutils.c:80
pg_enc encoding
Definition: pg_wchar.h:338

◆ pg_get_client_encoding_name()

const char* pg_get_client_encoding_name ( void  )

Definition at line 345 of file mbutils.c.

References pg_enc2name::name.

Referenced by surrogate_pair_to_codepoint().

346 {
347  return ClientEncoding->name;
348 }
static const pg_enc2name * ClientEncoding
Definition: mbutils.c:80
const char * name
Definition: pg_wchar.h:337

◆ pg_mb2wchar()

int pg_mb2wchar ( const char *  from,
pg_wchar to 
)

Definition at line 863 of file mbutils.c.

References pg_enc2name::encoding, pg_wchar_tbl::mb2wchar_with_len, and pg_wchar_table.

Referenced by surrogate_pair_to_codepoint().

864 {
865  return pg_wchar_table[DatabaseEncoding->encoding].mb2wchar_with_len((const unsigned char *) from, to, strlen(from));
866 }
pg_enc encoding
Definition: pg_wchar.h:338
const pg_wchar_tbl pg_wchar_table[]
Definition: wchar.c:1505
static const pg_enc2name * DatabaseEncoding
Definition: mbutils.c:81
mb2wchar_with_len_converter mb2wchar_with_len
Definition: pg_wchar.h:378

◆ pg_mb2wchar_with_len()

int pg_mb2wchar_with_len ( const char *  from,
pg_wchar to,
int  len 
)

Definition at line 870 of file mbutils.c.

References pg_enc2name::encoding, pg_wchar_tbl::mb2wchar_with_len, and pg_wchar_table.

Referenced by check_ident_usermap(), CheckAffix(), NIAddAffix(), parse_ident_line(), RE_compile(), RE_compile_and_cache(), RE_execute(), replace_text_regexp(), setup_regexp_matches(), surrogate_pair_to_codepoint(), and TParserInit().

871 {
872  return pg_wchar_table[DatabaseEncoding->encoding].mb2wchar_with_len((const unsigned char *) from, to, len);
873 }
pg_enc encoding
Definition: pg_wchar.h:338
const pg_wchar_tbl pg_wchar_table[]
Definition: wchar.c:1505
static const pg_enc2name * DatabaseEncoding
Definition: mbutils.c:81
mb2wchar_with_len_converter mb2wchar_with_len
Definition: pg_wchar.h:378

◆ pg_mbcharcliplen()

int pg_mbcharcliplen ( const char *  mbstr,
int  len,
int  limit 
)

Definition at line 1009 of file mbutils.c.

References cliplen(), pg_database_encoding_max_length(), and pg_mblen().

Referenced by bpchar(), bpchar_input(), surrogate_pair_to_codepoint(), text_left(), text_right(), varchar(), and varchar_input().

1010 {
1011  int clen = 0;
1012  int nch = 0;
1013  int l;
1014 
1015  /* optimization for single byte encoding */
1017  return cliplen(mbstr, len, limit);
1018 
1019  while (len > 0 && *mbstr)
1020  {
1021  l = pg_mblen(mbstr);
1022  nch++;
1023  if (nch > limit)
1024  break;
1025  clen += l;
1026  len -= l;
1027  mbstr += l;
1028  }
1029  return clen;
1030 }
static int cliplen(const char *str, int len, int limit)
Definition: mbutils.c:1034
int pg_mblen(const char *mbstr)
Definition: mbutils.c:907
int pg_database_encoding_max_length(void)
Definition: mbutils.c:1436

◆ pg_mbcliplen()

◆ pg_mblen()

int pg_mblen ( const char *  mbstr)

Definition at line 907 of file mbutils.c.

References pg_enc2name::encoding, pg_wchar_tbl::mblen, and pg_wchar_table.

Referenced by addCompoundAffixFlagValue(), appendStringInfoRegexpSubstr(), charlen_to_bytelen(), check_replace_text_has_escape_char(), DCH_from_char(), dotrim(), find_word(), findchar(), findchar2(), findwrd(), gbt_var_node_cp_len(), get_modifiers(), get_nextfield(), get_wildcard_part(), getlexeme(), getNextFlagFromString(), gettoken_query(), gettoken_query_standard(), gettoken_query_websearch(), gettoken_tsvector(), infix(), initTrie(), lpad(), lquery_in(), ltree_in(), make_trigrams(), map_sql_identifier_to_xml_name(), map_xml_name_to_sql_identifier(), match_prosrc_to_literal(), mb_strchr(), NIImportAffixes(), NIImportDictionary(), NIImportOOAffixes(), NUM_eat_non_data_chars(), NUM_processor(), parse_affentry(), parse_format(), parse_or_operator(), pg_mbcharcliplen(), pg_mbstrlen(), pg_mbstrlen_with_len(), readstoplist(), report_json_context(), rpad(), RS_compile(), RS_execute(), RS_isRegis(), similar_escape_internal(), surrogate_pair_to_codepoint(), t_isalpha(), t_isdigit(), t_isprint(), t_isspace(), text_position_get_match_pos(), text_position_next(), text_reverse(), text_substring(), text_to_array_internal(), thesaurusRead(), TParserGet(), translate(), ts_stat_sql(), tsvectorout(), unaccent_lexize(), varstr_levenshtein(), wchareq(), and xml_is_document().

908 {
909  return pg_wchar_table[DatabaseEncoding->encoding].mblen((const unsigned char *) mbstr);
910 }
pg_enc encoding
Definition: pg_wchar.h:338
const pg_wchar_tbl pg_wchar_table[]
Definition: wchar.c:1505
static const pg_enc2name * DatabaseEncoding
Definition: mbutils.c:81
mblen_converter mblen
Definition: pg_wchar.h:382

◆ pg_mbstrlen()

int pg_mbstrlen ( const char *  mbstr)

Definition at line 921 of file mbutils.c.

References pg_database_encoding_max_length(), and pg_mblen().

Referenced by NUM_processor(), surrogate_pair_to_codepoint(), and text_format_append_string().

922 {
923  int len = 0;
924 
925  /* optimization for single byte encoding */
927  return strlen(mbstr);
928 
929  while (*mbstr)
930  {
931  mbstr += pg_mblen(mbstr);
932  len++;
933  }
934  return len;
935 }
int pg_mblen(const char *mbstr)
Definition: mbutils.c:907
int pg_database_encoding_max_length(void)
Definition: mbutils.c:1436

◆ pg_mbstrlen_with_len()

int pg_mbstrlen_with_len ( const char *  mbstr,
int  limit 
)

Definition at line 941 of file mbutils.c.

References pg_database_encoding_max_length(), and pg_mblen().

Referenced by bpchar(), bpchar_input(), bpcharlen(), executor_errposition(), lpad(), match_prosrc_to_query(), parser_errposition(), plpgsql_scanner_errposition(), rpad(), similar_escape_internal(), surrogate_pair_to_codepoint(), text_left(), text_length(), text_right(), text_substring(), and varstr_levenshtein().

942 {
943  int len = 0;
944 
945  /* optimization for single byte encoding */
947  return limit;
948 
949  while (limit > 0 && *mbstr)
950  {
951  int l = pg_mblen(mbstr);
952 
953  limit -= l;
954  mbstr += l;
955  len++;
956  }
957  return len;
958 }
int pg_mblen(const char *mbstr)
Definition: mbutils.c:907
int pg_database_encoding_max_length(void)
Definition: mbutils.c:1436

◆ pg_server_to_any()

char* pg_server_to_any ( const char *  s,
int  len,
int  encoding 
)

Definition at line 692 of file mbutils.c.

References pg_enc2name::encoding, perform_default_encoding_conversion(), pg_do_encoding_conversion(), PG_SQL_ASCII, pg_verify_mbstr(), and unconstify.

Referenced by compareStrings(), CopyAttributeOutCSV(), CopyAttributeOutText(), CopyTo(), dsnowball_lexize(), hv_fetch_string(), hv_store_string(), pg_server_to_client(), PLyUnicode_AsString(), surrogate_pair_to_codepoint(), utf_e2u(), and xml_is_document().

693 {
694  if (len <= 0)
695  return unconstify(char *, s); /* empty string is always valid */
696 
699  return unconstify(char *, s); /* assume data is valid */
700 
702  {
703  /* No conversion is possible, but we must validate the result */
704  (void) pg_verify_mbstr(encoding, s, len, false);
705  return unconstify(char *, s);
706  }
707 
708  /* Fast path if we can use cached conversion function */
710  return perform_default_encoding_conversion(s, len, false);
711 
712  /* General case ... will not work outside transactions */
713  return (char *) pg_do_encoding_conversion((unsigned char *) unconstify(char *, s),
714  len,
716  encoding);
717 }
static char * perform_default_encoding_conversion(const char *src, int len, bool is_client_to_server)
Definition: mbutils.c:726
static const pg_enc2name * ClientEncoding
Definition: mbutils.c:80
unsigned char * pg_do_encoding_conversion(unsigned char *src, int len, int src_encoding, int dest_encoding)
Definition: mbutils.c:356
bool pg_verify_mbstr(int encoding, const char *mbstr, int len, bool noError)
Definition: mbutils.c:1457
#define unconstify(underlying_type, expr)
Definition: c.h:1206
pg_enc encoding
Definition: pg_wchar.h:338
int32 encoding
Definition: pg_database.h:41
static const pg_enc2name * DatabaseEncoding
Definition: mbutils.c:81

◆ pg_server_to_client()

char* pg_server_to_client ( const char *  s,
int  len 
)

Definition at line 681 of file mbutils.c.

References pg_enc2name::encoding, and pg_server_to_any().

Referenced by pq_puttextmessage(), pq_sendcountedtext(), pq_sendstring(), pq_sendtext(), pq_writestring(), and surrogate_pair_to_codepoint().

682 {
683  return pg_server_to_any(s, len, ClientEncoding->encoding);
684 }
static const pg_enc2name * ClientEncoding
Definition: mbutils.c:80
char * pg_server_to_any(const char *s, int len, int encoding)
Definition: mbutils.c:692
pg_enc encoding
Definition: pg_wchar.h:338

◆ pg_unicode_to_server()

void pg_unicode_to_server ( pg_wchar  c,
unsigned char *  s 
)

Definition at line 806 of file mbutils.c.

References CStringGetDatum, ereport, errcode(), errmsg(), ERROR, FunctionCall5, GetDatabaseEncoding(), GetDatabaseEncodingName(), Int32GetDatum, is_valid_unicode_codepoint(), MAX_MULTIBYTE_CHAR_LEN, name, pg_enc2name_tbl, PG_UTF8, pg_utf_mblen(), and unicode_to_utf8().

Referenced by json_lex_string(), map_xml_name_to_sql_identifier(), str_udeescape(), and surrogate_pair_to_codepoint().

807 {
808  unsigned char c_as_utf8[MAX_MULTIBYTE_CHAR_LEN + 1];
809  int c_as_utf8_len;
810  int server_encoding;
811 
812  /*
813  * Complain if invalid Unicode code point. The choice of errcode here is
814  * debatable, but really our caller should have checked this anyway.
815  */
817  ereport(ERROR,
818  (errcode(ERRCODE_SYNTAX_ERROR),
819  errmsg("invalid Unicode code point")));
820 
821  /* Otherwise, if it's in ASCII range, conversion is trivial */
822  if (c <= 0x7F)
823  {
824  s[0] = (unsigned char) c;
825  s[1] = '\0';
826  return;
827  }
828 
829  /* If the server encoding is UTF-8, we just need to reformat the code */
830  server_encoding = GetDatabaseEncoding();
831  if (server_encoding == PG_UTF8)
832  {
833  unicode_to_utf8(c, s);
834  s[pg_utf_mblen(s)] = '\0';
835  return;
836  }
837 
838  /* For all other cases, we must have a conversion function available */
839  if (Utf8ToServerConvProc == NULL)
840  ereport(ERROR,
841  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
842  errmsg("conversion between %s and %s is not supported",
845 
846  /* Construct UTF-8 source string */
847  unicode_to_utf8(c, c_as_utf8);
848  c_as_utf8_len = pg_utf_mblen(c_as_utf8);
849  c_as_utf8[c_as_utf8_len] = '\0';
850 
851  /* Convert, or throw error if we can't */
854  Int32GetDatum(server_encoding),
855  CStringGetDatum(c_as_utf8),
856  CStringGetDatum(s),
857  Int32GetDatum(c_as_utf8_len));
858 }
static FmgrInfo * Utf8ToServerConvProc
Definition: mbutils.c:75
unsigned char * unicode_to_utf8(pg_wchar c, unsigned char *utf8string)
Definition: wchar.c:483
static bool is_valid_unicode_codepoint(pg_wchar c)
Definition: pg_wchar.h:518
int errcode(int sqlerrcode)
Definition: elog.c:610
#define ERROR
Definition: elog.h:43
#define FunctionCall5(flinfo, arg1, arg2, arg3, arg4, arg5)
Definition: fmgr.h:641
char * c
const pg_enc2name pg_enc2name_tbl[]
Definition: encnames.c:309
#define CStringGetDatum(X)
Definition: postgres.h:578
#define MAX_MULTIBYTE_CHAR_LEN
Definition: pg_wchar.h:30
int GetDatabaseEncoding(void)
Definition: mbutils.c:1151
#define ereport(elevel,...)
Definition: elog.h:144
const char * GetDatabaseEncodingName(void)
Definition: mbutils.c:1157
const char * name
Definition: encode.c:521
int pg_utf_mblen(const unsigned char *s)
Definition: wchar.c:549
#define Int32GetDatum(X)
Definition: postgres.h:479
int errmsg(const char *fmt,...)
Definition: elog.c:824

◆ pg_utf8_increment()

static bool pg_utf8_increment ( unsigned char *  charptr,
int  length 
)
static

Definition at line 1249 of file mbutils.c.

Referenced by pg_database_encoding_character_incrementer().

1250 {
1251  unsigned char a;
1252  unsigned char limit;
1253 
1254  switch (length)
1255  {
1256  default:
1257  /* reject lengths 5 and 6 for now */
1258  return false;
1259  case 4:
1260  a = charptr[3];
1261  if (a < 0xBF)
1262  {
1263  charptr[3]++;
1264  break;
1265  }
1266  /* FALL THRU */
1267  case 3:
1268  a = charptr[2];
1269  if (a < 0xBF)
1270  {
1271  charptr[2]++;
1272  break;
1273  }
1274  /* FALL THRU */
1275  case 2:
1276  a = charptr[1];
1277  switch (*charptr)
1278  {
1279  case 0xED:
1280  limit = 0x9F;
1281  break;
1282  case 0xF4:
1283  limit = 0x8F;
1284  break;
1285  default:
1286  limit = 0xBF;
1287  break;
1288  }
1289  if (a < limit)
1290  {
1291  charptr[1]++;
1292  break;
1293  }
1294  /* FALL THRU */
1295  case 1:
1296  a = *charptr;
1297  if (a == 0x7F || a == 0xDF || a == 0xEF || a == 0xF4)
1298  return false;
1299  charptr[0]++;
1300  break;
1301  }
1302 
1303  return true;
1304 }

◆ pg_verify_mbstr()

bool pg_verify_mbstr ( int  encoding,
const char *  mbstr,
int  len,
bool  noError 
)

Definition at line 1457 of file mbutils.c.

References pg_verify_mbstr_len().

Referenced by LogicalOutputWrite(), pg_any_to_server(), pg_do_encoding_conversion(), pg_server_to_any(), surrogate_pair_to_codepoint(), and t_readline().

1458 {
1459  return pg_verify_mbstr_len(encoding, mbstr, len, noError) >= 0;
1460 }
int32 encoding
Definition: pg_database.h:41
int pg_verify_mbstr_len(int encoding, const char *mbstr, int len, bool noError)
Definition: mbutils.c:1474

◆ pg_verify_mbstr_len()

int pg_verify_mbstr_len ( int  encoding,
const char *  mbstr,
int  len,
bool  noError 
)

Definition at line 1474 of file mbutils.c.

References Assert, encoding, IS_HIGHBIT_SET, pg_wchar_tbl::mbverify, pg_encoding_max_length(), PG_VALID_ENCODING, pg_wchar_table, and report_invalid_encoding().

Referenced by length_in_encoding(), pg_convert(), pg_verify_mbstr(), pg_verifymbstr(), read_extension_script_file(), and surrogate_pair_to_codepoint().

1475 {
1476  mbverifier mbverify;
1477  int mb_len;
1478 
1480 
1481  /*
1482  * In single-byte encodings, we need only reject nulls (\0).
1483  */
1484  if (pg_encoding_max_length(encoding) <= 1)
1485  {
1486  const char *nullpos = memchr(mbstr, 0, len);
1487 
1488  if (nullpos == NULL)
1489  return len;
1490  if (noError)
1491  return -1;
1492  report_invalid_encoding(encoding, nullpos, 1);
1493  }
1494 
1495  /* fetch function pointer just once */
1496  mbverify = pg_wchar_table[encoding].mbverify;
1497 
1498  mb_len = 0;
1499 
1500  while (len > 0)
1501  {
1502  int l;
1503 
1504  /* fast path for ASCII-subset characters */
1505  if (!IS_HIGHBIT_SET(*mbstr))
1506  {
1507  if (*mbstr != '\0')
1508  {
1509  mb_len++;
1510  mbstr++;
1511  len--;
1512  continue;
1513  }
1514  if (noError)
1515  return -1;
1516  report_invalid_encoding(encoding, mbstr, len);
1517  }
1518 
1519  l = (*mbverify) ((const unsigned char *) mbstr, len);
1520 
1521  if (l < 0)
1522  {
1523  if (noError)
1524  return -1;
1525  report_invalid_encoding(encoding, mbstr, len);
1526  }
1527 
1528  mbstr += l;
1529  len -= l;
1530  mb_len++;
1531  }
1532  return mb_len;
1533 }
void report_invalid_encoding(int encoding, const char *mbstr, int len)
Definition: mbutils.c:1575
#define IS_HIGHBIT_SET(ch)
Definition: c.h:1119
int pg_encoding_max_length(int encoding)
Definition: wchar.c:1589
#define PG_VALID_ENCODING(_enc)
Definition: pg_wchar.h:301
int(* mbverifier)(const unsigned char *mbstr, int len)
Definition: pg_wchar.h:374
#define Assert(condition)
Definition: c.h:738
int32 encoding
Definition: pg_database.h:41
const pg_wchar_tbl pg_wchar_table[]
Definition: wchar.c:1505
mbverifier mbverify
Definition: pg_wchar.h:384

◆ pg_verifymbstr()

bool pg_verifymbstr ( const char *  mbstr,
int  len,
bool  noError 
)

Definition at line 1446 of file mbutils.c.

References GetDatabaseEncoding(), and pg_verify_mbstr_len().

Referenced by char2wchar(), CopyReadAttributesText(), plperl_spi_exec(), plperl_spi_prepare(), plperl_spi_query(), PLy_cursor_query(), PLy_output(), PLy_spi_execute_query(), PLy_spi_prepare(), PLyObject_AsString(), read_text_file(), spg_text_leaf_consistent(), and surrogate_pair_to_codepoint().

1447 {
1448  return
1449  pg_verify_mbstr_len(GetDatabaseEncoding(), mbstr, len, noError) >= 0;
1450 }
int GetDatabaseEncoding(void)
Definition: mbutils.c:1151
int pg_verify_mbstr_len(int encoding, const char *mbstr, int len, bool noError)
Definition: mbutils.c:1474

◆ pg_wchar2mb()

int pg_wchar2mb ( const pg_wchar from,
char *  to 
)

Definition at line 885 of file mbutils.c.

References pg_enc2name::encoding, pg_wchar_strlen(), pg_wchar_table, and pg_wchar_tbl::wchar2mb_with_len.

Referenced by surrogate_pair_to_codepoint().

886 {
887  return pg_wchar_table[DatabaseEncoding->encoding].wchar2mb_with_len(from, (unsigned char *) to, pg_wchar_strlen(from));
888 }
size_t pg_wchar_strlen(const pg_wchar *str)
Definition: wstrncmp.c:70
wchar2mb_with_len_converter wchar2mb_with_len
Definition: pg_wchar.h:380
pg_enc encoding
Definition: pg_wchar.h:338
const pg_wchar_tbl pg_wchar_table[]
Definition: wchar.c:1505
static const pg_enc2name * DatabaseEncoding
Definition: mbutils.c:81

◆ pg_wchar2mb_with_len()

int pg_wchar2mb_with_len ( const pg_wchar from,
char *  to,
int  len 
)

Definition at line 892 of file mbutils.c.

References pg_enc2name::encoding, pg_wchar_table, and pg_wchar_tbl::wchar2mb_with_len.

Referenced by build_regexp_match_result(), build_regexp_split_result(), convertPgWchar(), regexp_fixed_prefix(), and surrogate_pair_to_codepoint().

893 {
894  return pg_wchar_table[DatabaseEncoding->encoding].wchar2mb_with_len(from, (unsigned char *) to, len);
895 }
wchar2mb_with_len_converter wchar2mb_with_len
Definition: pg_wchar.h:380
pg_enc encoding
Definition: pg_wchar.h:338
const pg_wchar_tbl pg_wchar_table[]
Definition: wchar.c:1505
static const pg_enc2name * DatabaseEncoding
Definition: mbutils.c:81

◆ PrepareClientEncoding()

int PrepareClientEncoding ( int  encoding)

Definition at line 110 of file mbutils.c.

References backend_startup_complete, ConvProcInfo::c_encoding, encoding, FindDefaultConversionProc(), fmgr_info_cxt(), GetDatabaseEncoding(), IsTransactionState(), lcons(), lfirst, MemoryContextAlloc(), MemoryContextSwitchTo(), OidIsValid, PG_SQL_ASCII, PG_VALID_FE_ENCODING, ConvProcInfo::s_encoding, ConvProcInfo::to_client_info, ConvProcInfo::to_server_info, and TopMemoryContext.

Referenced by check_client_encoding(), InitializeClientEncoding(), and surrogate_pair_to_codepoint().

111 {
112  int current_server_encoding;
113  ListCell *lc;
114 
116  return -1;
117 
118  /* Can't do anything during startup, per notes above */
120  return 0;
121 
122  current_server_encoding = GetDatabaseEncoding();
123 
124  /*
125  * Check for cases that require no conversion function.
126  */
127  if (current_server_encoding == encoding ||
128  current_server_encoding == PG_SQL_ASCII ||
130  return 0;
131 
132  if (IsTransactionState())
133  {
134  /*
135  * If we're in a live transaction, it's safe to access the catalogs,
136  * so look up the functions. We repeat the lookup even if the info is
137  * already cached, so that we can react to changes in the contents of
138  * pg_conversion.
139  */
140  Oid to_server_proc,
141  to_client_proc;
142  ConvProcInfo *convinfo;
143  MemoryContext oldcontext;
144 
145  to_server_proc = FindDefaultConversionProc(encoding,
146  current_server_encoding);
147  if (!OidIsValid(to_server_proc))
148  return -1;
149  to_client_proc = FindDefaultConversionProc(current_server_encoding,
150  encoding);
151  if (!OidIsValid(to_client_proc))
152  return -1;
153 
154  /*
155  * Load the fmgr info into TopMemoryContext (could still fail here)
156  */
158  sizeof(ConvProcInfo));
159  convinfo->s_encoding = current_server_encoding;
160  convinfo->c_encoding = encoding;
161  fmgr_info_cxt(to_server_proc, &convinfo->to_server_info,
163  fmgr_info_cxt(to_client_proc, &convinfo->to_client_info,
165 
166  /* Attach new info to head of list */
168  ConvProcList = lcons(convinfo, ConvProcList);
169  MemoryContextSwitchTo(oldcontext);
170 
171  /*
172  * We cannot yet remove any older entry for the same encoding pair,
173  * since it could still be in use. SetClientEncoding will clean up.
174  */
175 
176  return 0; /* success */
177  }
178  else
179  {
180  /*
181  * If we're not in a live transaction, the only thing we can do is
182  * restore a previous setting using the cache. This covers all
183  * transaction-rollback cases. The only case it might not work for is
184  * trying to change client_encoding on the fly by editing
185  * postgresql.conf and SIGHUP'ing. Which would probably be a stupid
186  * thing to do anyway.
187  */
188  foreach(lc, ConvProcList)
189  {
190  ConvProcInfo *oldinfo = (ConvProcInfo *) lfirst(lc);
191 
192  if (oldinfo->s_encoding == current_server_encoding &&
193  oldinfo->c_encoding == encoding)
194  return 0;
195  }
196 
197  return -1; /* it's not cached, so fail */
198  }
199 }
FmgrInfo to_server_info
Definition: mbutils.c:57
FmgrInfo to_client_info
Definition: mbutils.c:58
static bool backend_startup_complete
Definition: mbutils.c:90
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition: palloc.h:109
unsigned int Oid
Definition: postgres_ext.h:31
#define OidIsValid(objectId)
Definition: c.h:644
int s_encoding
Definition: mbutils.c:55
static List * ConvProcList
Definition: mbutils.c:61
#define PG_VALID_FE_ENCODING(_enc)
Definition: pg_wchar.h:305
void fmgr_info_cxt(Oid functionId, FmgrInfo *finfo, MemoryContext mcxt)
Definition: fmgr.c:134
MemoryContext TopMemoryContext
Definition: mcxt.c:44
int GetDatabaseEncoding(void)
Definition: mbutils.c:1151
List * lcons(void *datum, List *list)
Definition: list.c:454
#define lfirst(lc)
Definition: pg_list.h:190
Oid FindDefaultConversionProc(int32 for_encoding, int32 to_encoding)
Definition: namespace.c:3710
bool IsTransactionState(void)
Definition: xact.c:355
int32 encoding
Definition: pg_database.h:41
void * MemoryContextAlloc(MemoryContext context, Size size)
Definition: mcxt.c:796
int c_encoding
Definition: mbutils.c:56

◆ report_invalid_encoding()

void report_invalid_encoding ( int  encoding,
const char *  mbstr,
int  len 
)

Definition at line 1575 of file mbutils.c.

References buf, ereport, errcode(), errmsg(), ERROR, Min, name, pg_enc2name_tbl, pg_encoding_mblen(), and sprintf.

Referenced by big52mic(), euc_cn2mic(), euc_jis_20042shift_jis_2004(), euc_jp2mic(), euc_jp2sjis(), euc_kr2mic(), euc_tw2mic(), iso8859_1_to_utf8(), latin2mic(), latin2mic_with_table(), local2local(), LocalToUtf(), mic2big5(), mic2euc_cn(), mic2euc_jp(), mic2euc_kr(), mic2euc_tw(), mic2latin(), mic2latin_with_table(), mic2sjis(), pg_verify_mbstr_len(), shift_jis_20042euc_jis_2004(), sjis2euc_jp(), sjis2mic(), surrogate_pair_to_codepoint(), utf8_to_iso8859_1(), and UtfToLocal().

1576 {
1577  int l = pg_encoding_mblen(encoding, mbstr);
1578  char buf[8 * 5 + 1];
1579  char *p = buf;
1580  int j,
1581  jlimit;
1582 
1583  jlimit = Min(l, len);
1584  jlimit = Min(jlimit, 8); /* prevent buffer overrun */
1585 
1586  for (j = 0; j < jlimit; j++)
1587  {
1588  p += sprintf(p, "0x%02x", (unsigned char) mbstr[j]);
1589  if (j < jlimit - 1)
1590  p += sprintf(p, " ");
1591  }
1592 
1593  ereport(ERROR,
1594  (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
1595  errmsg("invalid byte sequence for encoding \"%s\": %s",
1597  buf)));
1598 }
#define Min(x, y)
Definition: c.h:920
int errcode(int sqlerrcode)
Definition: elog.c:610
#define sprintf
Definition: port.h:194
#define ERROR
Definition: elog.h:43
static char * buf
Definition: pg_test_fsync.c:67
const pg_enc2name pg_enc2name_tbl[]
Definition: encnames.c:309
int pg_encoding_mblen(int encoding, const char *mbstr)
Definition: wchar.c:1554
#define ereport(elevel,...)
Definition: elog.h:144
int32 encoding
Definition: pg_database.h:41
const char * name
Definition: encode.c:521
int errmsg(const char *fmt,...)
Definition: elog.c:824

◆ report_untranslatable_char()

void report_untranslatable_char ( int  src_encoding,
int  dest_encoding,
const char *  mbstr,
int  len 
)

Definition at line 1607 of file mbutils.c.

References buf, ereport, errcode(), errmsg(), ERROR, GetMessageEncoding(), IsTransactionState(), Min, name, palloc(), pfree(), pg_do_encoding_conversion(), pg_enc2name_tbl, pg_encoding_mblen(), PG_SQL_ASCII, PG_UTF8, sprintf, and generate_unaccent_rules::str.

Referenced by big52mic(), latin2mic_with_table(), local2local(), LocalToUtf(), mic2big5(), mic2euc_cn(), mic2euc_jp(), mic2euc_kr(), mic2euc_tw(), mic2latin(), mic2latin_with_table(), mic2sjis(), surrogate_pair_to_codepoint(), utf8_to_iso8859_1(), and UtfToLocal().

1609 {
1610  int l = pg_encoding_mblen(src_encoding, mbstr);
1611  char buf[8 * 5 + 1];
1612  char *p = buf;
1613  int j,
1614  jlimit;
1615 
1616  jlimit = Min(l, len);
1617  jlimit = Min(jlimit, 8); /* prevent buffer overrun */
1618 
1619  for (j = 0; j < jlimit; j++)
1620  {
1621  p += sprintf(p, "0x%02x", (unsigned char) mbstr[j]);
1622  if (j < jlimit - 1)
1623  p += sprintf(p, " ");
1624  }
1625 
1626  ereport(ERROR,
1627  (errcode(ERRCODE_UNTRANSLATABLE_CHARACTER),
1628  errmsg("character with byte sequence %s in encoding \"%s\" has no equivalent in encoding \"%s\"",
1629  buf,
1630  pg_enc2name_tbl[src_encoding].name,
1631  pg_enc2name_tbl[dest_encoding].name)));
1632 }
#define Min(x, y)
Definition: c.h:920
int errcode(int sqlerrcode)
Definition: elog.c:610
#define sprintf
Definition: port.h:194
#define ERROR
Definition: elog.h:43
static char * buf
Definition: pg_test_fsync.c:67
const pg_enc2name pg_enc2name_tbl[]
Definition: encnames.c:309
int pg_encoding_mblen(int encoding, const char *mbstr)
Definition: wchar.c:1554
#define ereport(elevel,...)
Definition: elog.h:144
const char * name
Definition: encode.c:521
int errmsg(const char *fmt,...)
Definition: elog.c:824

◆ SetClientEncoding()

int SetClientEncoding ( int  encoding)

Definition at line 208 of file mbutils.c.

References backend_startup_complete, ConvProcInfo::c_encoding, encoding, foreach_delete_current, GetDatabaseEncoding(), lfirst, pending_client_encoding, pfree(), pg_enc2name_tbl, PG_SQL_ASCII, PG_VALID_FE_ENCODING, ConvProcInfo::s_encoding, ConvProcInfo::to_client_info, and ConvProcInfo::to_server_info.

Referenced by assign_client_encoding(), InitializeClientEncoding(), ParallelWorkerMain(), and surrogate_pair_to_codepoint().

209 {
210  int current_server_encoding;
211  bool found;
212  ListCell *lc;
213 
215  return -1;
216 
217  /* Can't do anything during startup, per notes above */
219  {
221  return 0;
222  }
223 
224  current_server_encoding = GetDatabaseEncoding();
225 
226  /*
227  * Check for cases that require no conversion function.
228  */
229  if (current_server_encoding == encoding ||
230  current_server_encoding == PG_SQL_ASCII ||
232  {
234  ToServerConvProc = NULL;
235  ToClientConvProc = NULL;
236  return 0;
237  }
238 
239  /*
240  * Search the cache for the entry previously prepared by
241  * PrepareClientEncoding; if there isn't one, we lose. While at it,
242  * release any duplicate entries so that repeated Prepare/Set cycles don't
243  * leak memory.
244  */
245  found = false;
246  foreach(lc, ConvProcList)
247  {
248  ConvProcInfo *convinfo = (ConvProcInfo *) lfirst(lc);
249 
250  if (convinfo->s_encoding == current_server_encoding &&
251  convinfo->c_encoding == encoding)
252  {
253  if (!found)
254  {
255  /* Found newest entry, so set up */
257  ToServerConvProc = &convinfo->to_server_info;
258  ToClientConvProc = &convinfo->to_client_info;
259  found = true;
260  }
261  else
262  {
263  /* Duplicate entry, release it */
265  pfree(convinfo);
266  }
267  }
268  }
269 
270  if (found)
271  return 0; /* success */
272  else
273  return -1; /* it's not cached, so fail */
274 }
FmgrInfo to_server_info
Definition: mbutils.c:57
FmgrInfo to_client_info
Definition: mbutils.c:58
static bool backend_startup_complete
Definition: mbutils.c:90
static int pending_client_encoding
Definition: mbutils.c:91
static FmgrInfo * ToServerConvProc
Definition: mbutils.c:67
static const pg_enc2name * ClientEncoding
Definition: mbutils.c:80
#define foreach_delete_current(lst, cell)
Definition: pg_list.h:368
int s_encoding
Definition: mbutils.c:55
void pfree(void *pointer)
Definition: mcxt.c:1056
static List * ConvProcList
Definition: mbutils.c:61
const pg_enc2name pg_enc2name_tbl[]
Definition: encnames.c:309
#define PG_VALID_FE_ENCODING(_enc)
Definition: pg_wchar.h:305
int GetDatabaseEncoding(void)
Definition: mbutils.c:1151
#define lfirst(lc)
Definition: pg_list.h:190
int32 encoding
Definition: pg_database.h:41
static FmgrInfo * ToClientConvProc
Definition: mbutils.c:68
int c_encoding
Definition: mbutils.c:56

◆ SetDatabaseEncoding()

void SetDatabaseEncoding ( int  encoding)

Definition at line 1045 of file mbutils.c.

References Assert, elog, encoding, pg_enc2name::encoding, ERROR, pg_enc2name_tbl, and PG_VALID_BE_ENCODING.

Referenced by CheckMyDatabase(), and surrogate_pair_to_codepoint().

1046 {
1048  elog(ERROR, "invalid database encoding: %d", encoding);
1049 
1052 }
#define ERROR
Definition: elog.h:43
const pg_enc2name pg_enc2name_tbl[]
Definition: encnames.c:309
pg_enc encoding
Definition: pg_wchar.h:338
#define PG_VALID_BE_ENCODING(_enc)
Definition: pg_wchar.h:295
#define Assert(condition)
Definition: c.h:738
int32 encoding
Definition: pg_database.h:41
static const pg_enc2name * DatabaseEncoding
Definition: mbutils.c:81
#define elog(elevel,...)
Definition: elog.h:214

◆ SetMessageEncoding()

void SetMessageEncoding ( int  encoding)

Definition at line 1055 of file mbutils.c.

References Assert, CurrentMemoryContext, elog, encoding, pg_enc2name::encoding, GetDatabaseEncoding(), GetMessageEncoding(), i, LOG, pg_enc2gettext::name, name, pg_enc2gettext_tbl, pg_enc2name_tbl, pg_get_encoding_from_locale(), PG_SQL_ASCII, pg_strcasecmp(), PG_VALID_ENCODING, setlocale, and write_stderr.

Referenced by pg_perm_setlocale(), and surrogate_pair_to_codepoint().

1056 {
1057  /* Some calls happen before we can elog()! */
1059 
1062 }
#define PG_VALID_ENCODING(_enc)
Definition: pg_wchar.h:301
const pg_enc2name pg_enc2name_tbl[]
Definition: encnames.c:309
pg_enc encoding
Definition: pg_wchar.h:338
#define Assert(condition)
Definition: c.h:738
int32 encoding
Definition: pg_database.h:41
static const pg_enc2name * MessageEncoding
Definition: mbutils.c:82

Variable Documentation

◆ backend_startup_complete

bool backend_startup_complete = false
static

Definition at line 90 of file mbutils.c.

Referenced by InitializeClientEncoding(), PrepareClientEncoding(), and SetClientEncoding().

◆ ClientEncoding

const pg_enc2name* ClientEncoding = &pg_enc2name_tbl[PG_SQL_ASCII]
static

Definition at line 80 of file mbutils.c.

◆ ConvProcList

List* ConvProcList = NIL
static

Definition at line 61 of file mbutils.c.

◆ DatabaseEncoding

const pg_enc2name* DatabaseEncoding = &pg_enc2name_tbl[PG_SQL_ASCII]
static

Definition at line 81 of file mbutils.c.

◆ MessageEncoding

const pg_enc2name* MessageEncoding = &pg_enc2name_tbl[PG_SQL_ASCII]
static

Definition at line 82 of file mbutils.c.

◆ pending_client_encoding

int pending_client_encoding = PG_SQL_ASCII
static

Definition at line 91 of file mbutils.c.

Referenced by InitializeClientEncoding(), and SetClientEncoding().

◆ ToClientConvProc

FmgrInfo* ToClientConvProc = NULL
static

Definition at line 68 of file mbutils.c.

Referenced by perform_default_encoding_conversion().

◆ ToServerConvProc

FmgrInfo* ToServerConvProc = NULL
static

Definition at line 67 of file mbutils.c.

Referenced by perform_default_encoding_conversion().

◆ Utf8ToServerConvProc

FmgrInfo* Utf8ToServerConvProc = NULL
static

Definition at line 75 of file mbutils.c.