PostgreSQL Source Code  git master
mbutils.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * mbutils.c
4  * This file contains functions for encoding conversion.
5  *
6  * The string-conversion functions in this file share some API quirks.
7  * Note the following:
8  *
9  * The functions return a palloc'd, null-terminated string if conversion
10  * is required. However, if no conversion is performed, the given source
11  * string pointer is returned as-is.
12  *
13  * Although the presence of a length argument means that callers can pass
14  * non-null-terminated strings, care is required because the same string
15  * will be passed back if no conversion occurs. Such callers *must* check
16  * whether result == src and handle that case differently.
17  *
18  * If the source and destination encodings are the same, the source string
19  * is returned without any verification; it's assumed to be valid data.
20  * If that might not be the case, the caller is responsible for validating
21  * the string using a separate call to pg_verify_mbstr(). Whenever the
22  * source and destination encodings are different, the functions ensure that
23  * the result is validly encoded according to the destination encoding.
24  *
25  *
26  * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
27  * Portions Copyright (c) 1994, Regents of the University of California
28  *
29  *
30  * IDENTIFICATION
31  * src/backend/utils/mb/mbutils.c
32  *
33  *-------------------------------------------------------------------------
34  */
35 #include "postgres.h"
36 
37 #include "access/xact.h"
38 #include "catalog/namespace.h"
39 #include "mb/pg_wchar.h"
40 #include "utils/builtins.h"
41 #include "utils/memutils.h"
42 #include "utils/syscache.h"
43 
44 /*
45  * We maintain a simple linked list caching the fmgr lookup info for the
46  * currently selected conversion functions, as well as any that have been
47  * selected previously in the current session. (We remember previous
48  * settings because we must be able to restore a previous setting during
49  * transaction rollback, without doing any fresh catalog accesses.)
50  *
51  * Since we'll never release this data, we just keep it in TopMemoryContext.
52  */
53 typedef struct ConvProcInfo
54 {
55  int s_encoding; /* server and client encoding IDs */
57  FmgrInfo to_server_info; /* lookup info for conversion procs */
59 } ConvProcInfo;
60 
61 static List *ConvProcList = NIL; /* List of ConvProcInfo */
62 
63 /*
64  * These variables point to the currently active conversion functions,
65  * or are NULL when no conversion is needed.
66  */
67 static FmgrInfo *ToServerConvProc = NULL;
68 static FmgrInfo *ToClientConvProc = NULL;
69 
70 /*
71  * These variables track the currently-selected encodings.
72  */
76 
77 /*
78  * During backend startup we can't set client encoding because we (a)
79  * can't look up the conversion functions, and (b) may not know the database
80  * encoding yet either. So SetClientEncoding() just accepts anything and
81  * remembers it for InitializeClientEncoding() to apply later.
82  */
83 static bool backend_startup_complete = false;
85 
86 
87 /* Internal functions */
88 static char *perform_default_encoding_conversion(const char *src,
89  int len, bool is_client_to_server);
90 static int cliplen(const char *str, int len, int limit);
91 
92 
93 /*
94  * Prepare for a future call to SetClientEncoding. Success should mean
95  * that SetClientEncoding is guaranteed to succeed for this encoding request.
96  *
97  * (But note that success before backend_startup_complete does not guarantee
98  * success after ...)
99  *
100  * Returns 0 if okay, -1 if not (bad encoding or can't support conversion)
101  */
102 int
104 {
105  int current_server_encoding;
106  ListCell *lc;
107 
108  if (!PG_VALID_FE_ENCODING(encoding))
109  return -1;
110 
111  /* Can't do anything during startup, per notes above */
113  return 0;
114 
115  current_server_encoding = GetDatabaseEncoding();
116 
117  /*
118  * Check for cases that require no conversion function.
119  */
120  if (current_server_encoding == encoding ||
121  current_server_encoding == PG_SQL_ASCII ||
122  encoding == PG_SQL_ASCII)
123  return 0;
124 
125  if (IsTransactionState())
126  {
127  /*
128  * If we're in a live transaction, it's safe to access the catalogs,
129  * so look up the functions. We repeat the lookup even if the info is
130  * already cached, so that we can react to changes in the contents of
131  * pg_conversion.
132  */
133  Oid to_server_proc,
134  to_client_proc;
135  ConvProcInfo *convinfo;
136  MemoryContext oldcontext;
137 
138  to_server_proc = FindDefaultConversionProc(encoding,
139  current_server_encoding);
140  if (!OidIsValid(to_server_proc))
141  return -1;
142  to_client_proc = FindDefaultConversionProc(current_server_encoding,
143  encoding);
144  if (!OidIsValid(to_client_proc))
145  return -1;
146 
147  /*
148  * Load the fmgr info into TopMemoryContext (could still fail here)
149  */
151  sizeof(ConvProcInfo));
152  convinfo->s_encoding = current_server_encoding;
153  convinfo->c_encoding = encoding;
154  fmgr_info_cxt(to_server_proc, &convinfo->to_server_info,
156  fmgr_info_cxt(to_client_proc, &convinfo->to_client_info,
158 
159  /* Attach new info to head of list */
161  ConvProcList = lcons(convinfo, ConvProcList);
162  MemoryContextSwitchTo(oldcontext);
163 
164  /*
165  * We cannot yet remove any older entry for the same encoding pair,
166  * since it could still be in use. SetClientEncoding will clean up.
167  */
168 
169  return 0; /* success */
170  }
171  else
172  {
173  /*
174  * If we're not in a live transaction, the only thing we can do is
175  * restore a previous setting using the cache. This covers all
176  * transaction-rollback cases. The only case it might not work for is
177  * trying to change client_encoding on the fly by editing
178  * postgresql.conf and SIGHUP'ing. Which would probably be a stupid
179  * thing to do anyway.
180  */
181  foreach(lc, ConvProcList)
182  {
183  ConvProcInfo *oldinfo = (ConvProcInfo *) lfirst(lc);
184 
185  if (oldinfo->s_encoding == current_server_encoding &&
186  oldinfo->c_encoding == encoding)
187  return 0;
188  }
189 
190  return -1; /* it's not cached, so fail */
191  }
192 }
193 
194 /*
195  * Set the active client encoding and set up the conversion-function pointers.
196  * PrepareClientEncoding should have been called previously for this encoding.
197  *
198  * Returns 0 if okay, -1 if not (bad encoding or can't support conversion)
199  */
200 int
202 {
203  int current_server_encoding;
204  bool found;
205  ListCell *lc;
206 
207  if (!PG_VALID_FE_ENCODING(encoding))
208  return -1;
209 
210  /* Can't do anything during startup, per notes above */
212  {
214  return 0;
215  }
216 
217  current_server_encoding = GetDatabaseEncoding();
218 
219  /*
220  * Check for cases that require no conversion function.
221  */
222  if (current_server_encoding == encoding ||
223  current_server_encoding == PG_SQL_ASCII ||
224  encoding == PG_SQL_ASCII)
225  {
226  ClientEncoding = &pg_enc2name_tbl[encoding];
227  ToServerConvProc = NULL;
228  ToClientConvProc = NULL;
229  return 0;
230  }
231 
232  /*
233  * Search the cache for the entry previously prepared by
234  * PrepareClientEncoding; if there isn't one, we lose. While at it,
235  * release any duplicate entries so that repeated Prepare/Set cycles don't
236  * leak memory.
237  */
238  found = false;
239  foreach(lc, ConvProcList)
240  {
241  ConvProcInfo *convinfo = (ConvProcInfo *) lfirst(lc);
242 
243  if (convinfo->s_encoding == current_server_encoding &&
244  convinfo->c_encoding == encoding)
245  {
246  if (!found)
247  {
248  /* Found newest entry, so set up */
249  ClientEncoding = &pg_enc2name_tbl[encoding];
250  ToServerConvProc = &convinfo->to_server_info;
251  ToClientConvProc = &convinfo->to_client_info;
252  found = true;
253  }
254  else
255  {
256  /* Duplicate entry, release it */
257  ConvProcList = foreach_delete_current(ConvProcList, lc);
258  pfree(convinfo);
259  }
260  }
261  }
262 
263  if (found)
264  return 0; /* success */
265  else
266  return -1; /* it's not cached, so fail */
267 }
268 
269 /*
270  * Initialize client encoding conversions.
271  * Called from InitPostgres() once during backend startup.
272  */
273 void
275 {
278 
281  {
282  /*
283  * Oops, the requested conversion is not available. We couldn't fail
284  * before, but we can now.
285  */
286  ereport(FATAL,
287  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
288  errmsg("conversion between %s and %s is not supported",
291  }
292 }
293 
294 /*
295  * returns the current client encoding
296  */
297 int
299 {
300  return ClientEncoding->encoding;
301 }
302 
303 /*
304  * returns the current client encoding name
305  */
306 const char *
308 {
309  return ClientEncoding->name;
310 }
311 
312 /*
313  * Convert src string to another encoding (general case).
314  *
315  * See the notes about string conversion functions at the top of this file.
316  */
317 unsigned char *
318 pg_do_encoding_conversion(unsigned char *src, int len,
319  int src_encoding, int dest_encoding)
320 {
321  unsigned char *result;
322  Oid proc;
323 
324  if (len <= 0)
325  return src; /* empty string is always valid */
326 
327  if (src_encoding == dest_encoding)
328  return src; /* no conversion required, assume valid */
329 
330  if (dest_encoding == PG_SQL_ASCII)
331  return src; /* any string is valid in SQL_ASCII */
332 
333  if (src_encoding == PG_SQL_ASCII)
334  {
335  /* No conversion is possible, but we must validate the result */
336  (void) pg_verify_mbstr(dest_encoding, (const char *) src, len, false);
337  return src;
338  }
339 
340  if (!IsTransactionState()) /* shouldn't happen */
341  elog(ERROR, "cannot perform encoding conversion outside a transaction");
342 
343  proc = FindDefaultConversionProc(src_encoding, dest_encoding);
344  if (!OidIsValid(proc))
345  ereport(ERROR,
346  (errcode(ERRCODE_UNDEFINED_FUNCTION),
347  errmsg("default conversion function for encoding \"%s\" to \"%s\" does not exist",
348  pg_encoding_to_char(src_encoding),
349  pg_encoding_to_char(dest_encoding))));
350 
351  /*
352  * Allocate space for conversion result, being wary of integer overflow.
353  *
354  * len * MAX_CONVERSION_GROWTH is typically a vast overestimate of the
355  * required space, so it might exceed MaxAllocSize even though the result
356  * would actually fit. We do not want to hand back a result string that
357  * exceeds MaxAllocSize, because callers might not cope gracefully --- but
358  * if we just allocate more than that, and don't use it, that's fine.
359  */
360  if ((Size) len >= (MaxAllocHugeSize / (Size) MAX_CONVERSION_GROWTH))
361  ereport(ERROR,
362  (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
363  errmsg("out of memory"),
364  errdetail("String of %d bytes is too long for encoding conversion.",
365  len)));
366 
367  result = (unsigned char *)
369  (Size) len * MAX_CONVERSION_GROWTH + 1);
370 
371  OidFunctionCall5(proc,
372  Int32GetDatum(src_encoding),
373  Int32GetDatum(dest_encoding),
374  CStringGetDatum(src),
375  CStringGetDatum(result),
376  Int32GetDatum(len));
377 
378  /*
379  * If the result is large, it's worth repalloc'ing to release any extra
380  * space we asked for. The cutoff here is somewhat arbitrary, but we
381  * *must* check when len * MAX_CONVERSION_GROWTH exceeds MaxAllocSize.
382  */
383  if (len > 1000000)
384  {
385  Size resultlen = strlen((char *) result);
386 
387  if (resultlen >= MaxAllocSize)
388  ereport(ERROR,
389  (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
390  errmsg("out of memory"),
391  errdetail("String of %d bytes is too long for encoding conversion.",
392  len)));
393 
394  result = (unsigned char *) repalloc(result, resultlen + 1);
395  }
396 
397  return result;
398 }
399 
400 /*
401  * Convert string to encoding encoding_name. The source
402  * encoding is the DB encoding.
403  *
404  * BYTEA convert_to(TEXT string, NAME encoding_name) */
405 Datum
407 {
408  Datum string = PG_GETARG_DATUM(0);
409  Datum dest_encoding_name = PG_GETARG_DATUM(1);
410  Datum src_encoding_name = DirectFunctionCall1(namein,
411  CStringGetDatum(DatabaseEncoding->name));
412  Datum result;
413 
414  /*
415  * pg_convert expects a bytea as its first argument. We're passing it a
416  * text argument here, relying on the fact that they are both in fact
417  * varlena types, and thus structurally identical.
418  */
419  result = DirectFunctionCall3(pg_convert, string,
420  src_encoding_name, dest_encoding_name);
421 
422  PG_RETURN_DATUM(result);
423 }
424 
425 /*
426  * Convert string from encoding encoding_name. The destination
427  * encoding is the DB encoding.
428  *
429  * TEXT convert_from(BYTEA string, NAME encoding_name) */
430 Datum
432 {
433  Datum string = PG_GETARG_DATUM(0);
434  Datum src_encoding_name = PG_GETARG_DATUM(1);
435  Datum dest_encoding_name = DirectFunctionCall1(namein,
436  CStringGetDatum(DatabaseEncoding->name));
437  Datum result;
438 
439  result = DirectFunctionCall3(pg_convert, string,
440  src_encoding_name, dest_encoding_name);
441 
442  /*
443  * pg_convert returns a bytea, which we in turn return as text, relying on
444  * the fact that they are both in fact varlena types, and thus
445  * structurally identical. Although not all bytea values are valid text,
446  * in this case it will be because we've told pg_convert to return one
447  * that is valid as text in the current database encoding.
448  */
449  PG_RETURN_DATUM(result);
450 }
451 
452 /*
453  * Convert string between two arbitrary encodings.
454  *
455  * BYTEA convert(BYTEA string, NAME src_encoding_name, NAME dest_encoding_name)
456  */
457 Datum
459 {
460  bytea *string = PG_GETARG_BYTEA_PP(0);
461  char *src_encoding_name = NameStr(*PG_GETARG_NAME(1));
462  int src_encoding = pg_char_to_encoding(src_encoding_name);
463  char *dest_encoding_name = NameStr(*PG_GETARG_NAME(2));
464  int dest_encoding = pg_char_to_encoding(dest_encoding_name);
465  const char *src_str;
466  char *dest_str;
467  bytea *retval;
468  int len;
469 
470  if (src_encoding < 0)
471  ereport(ERROR,
472  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
473  errmsg("invalid source encoding name \"%s\"",
474  src_encoding_name)));
475  if (dest_encoding < 0)
476  ereport(ERROR,
477  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
478  errmsg("invalid destination encoding name \"%s\"",
479  dest_encoding_name)));
480 
481  /* make sure that source string is valid */
482  len = VARSIZE_ANY_EXHDR(string);
483  src_str = VARDATA_ANY(string);
484  pg_verify_mbstr_len(src_encoding, src_str, len, false);
485 
486  /* perform conversion */
487  dest_str = (char *) pg_do_encoding_conversion((unsigned char *) unconstify(char *, src_str),
488  len,
489  src_encoding,
490  dest_encoding);
491 
492  /* update len if conversion actually happened */
493  if (dest_str != src_str)
494  len = strlen(dest_str);
495 
496  /*
497  * build bytea data type structure.
498  */
499  retval = (bytea *) palloc(len + VARHDRSZ);
500  SET_VARSIZE(retval, len + VARHDRSZ);
501  memcpy(VARDATA(retval), dest_str, len);
502 
503  if (dest_str != src_str)
504  pfree(dest_str);
505 
506  /* free memory if allocated by the toaster */
507  PG_FREE_IF_COPY(string, 0);
508 
509  PG_RETURN_BYTEA_P(retval);
510 }
511 
512 /*
513  * get the length of the string considered as text in the specified
514  * encoding. Raises an error if the data is not valid in that
515  * encoding.
516  *
517  * INT4 length (BYTEA string, NAME src_encoding_name)
518  */
519 Datum
521 {
522  bytea *string = PG_GETARG_BYTEA_PP(0);
523  char *src_encoding_name = NameStr(*PG_GETARG_NAME(1));
524  int src_encoding = pg_char_to_encoding(src_encoding_name);
525  const char *src_str;
526  int len;
527  int retval;
528 
529  if (src_encoding < 0)
530  ereport(ERROR,
531  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
532  errmsg("invalid encoding name \"%s\"",
533  src_encoding_name)));
534 
535  len = VARSIZE_ANY_EXHDR(string);
536  src_str = VARDATA_ANY(string);
537 
538  retval = pg_verify_mbstr_len(src_encoding, src_str, len, false);
539 
540  PG_RETURN_INT32(retval);
541 }
542 
543 /*
544  * Get maximum multibyte character length in the specified encoding.
545  *
546  * Note encoding is specified numerically, not by name as above.
547  */
548 Datum
550 {
551  int encoding = PG_GETARG_INT32(0);
552 
553  if (PG_VALID_ENCODING(encoding))
554  PG_RETURN_INT32(pg_wchar_table[encoding].maxmblen);
555  else
556  PG_RETURN_NULL();
557 }
558 
559 /*
560  * Convert client encoding to server encoding.
561  *
562  * See the notes about string conversion functions at the top of this file.
563  */
564 char *
565 pg_client_to_server(const char *s, int len)
566 {
567  return pg_any_to_server(s, len, ClientEncoding->encoding);
568 }
569 
570 /*
571  * Convert any encoding to server encoding.
572  *
573  * See the notes about string conversion functions at the top of this file.
574  *
575  * Unlike the other string conversion functions, this will apply validation
576  * even if encoding == DatabaseEncoding->encoding. This is because this is
577  * used to process data coming in from outside the database, and we never
578  * want to just assume validity.
579  */
580 char *
581 pg_any_to_server(const char *s, int len, int encoding)
582 {
583  if (len <= 0)
584  return unconstify(char *, s); /* empty string is always valid */
585 
586  if (encoding == DatabaseEncoding->encoding ||
587  encoding == PG_SQL_ASCII)
588  {
589  /*
590  * No conversion is needed, but we must still validate the data.
591  */
592  (void) pg_verify_mbstr(DatabaseEncoding->encoding, s, len, false);
593  return unconstify(char *, s);
594  }
595 
596  if (DatabaseEncoding->encoding == PG_SQL_ASCII)
597  {
598  /*
599  * No conversion is possible, but we must still validate the data,
600  * because the client-side code might have done string escaping using
601  * the selected client_encoding. If the client encoding is ASCII-safe
602  * then we just do a straight validation under that encoding. For an
603  * ASCII-unsafe encoding we have a problem: we dare not pass such data
604  * to the parser but we have no way to convert it. We compromise by
605  * rejecting the data if it contains any non-ASCII characters.
606  */
607  if (PG_VALID_BE_ENCODING(encoding))
608  (void) pg_verify_mbstr(encoding, s, len, false);
609  else
610  {
611  int i;
612 
613  for (i = 0; i < len; i++)
614  {
615  if (s[i] == '\0' || IS_HIGHBIT_SET(s[i]))
616  ereport(ERROR,
617  (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
618  errmsg("invalid byte value for encoding \"%s\": 0x%02x",
620  (unsigned char) s[i])));
621  }
622  }
623  return unconstify(char *, s);
624  }
625 
626  /* Fast path if we can use cached conversion function */
627  if (encoding == ClientEncoding->encoding)
628  return perform_default_encoding_conversion(s, len, true);
629 
630  /* General case ... will not work outside transactions */
631  return (char *) pg_do_encoding_conversion((unsigned char *) unconstify(char *, s),
632  len,
633  encoding,
634  DatabaseEncoding->encoding);
635 }
636 
637 /*
638  * Convert server encoding to client encoding.
639  *
640  * See the notes about string conversion functions at the top of this file.
641  */
642 char *
643 pg_server_to_client(const char *s, int len)
644 {
645  return pg_server_to_any(s, len, ClientEncoding->encoding);
646 }
647 
648 /*
649  * Convert server encoding to any encoding.
650  *
651  * See the notes about string conversion functions at the top of this file.
652  */
653 char *
654 pg_server_to_any(const char *s, int len, int encoding)
655 {
656  if (len <= 0)
657  return unconstify(char *, s); /* empty string is always valid */
658 
659  if (encoding == DatabaseEncoding->encoding ||
660  encoding == PG_SQL_ASCII)
661  return unconstify(char *, s); /* assume data is valid */
662 
663  if (DatabaseEncoding->encoding == PG_SQL_ASCII)
664  {
665  /* No conversion is possible, but we must validate the result */
666  (void) pg_verify_mbstr(encoding, s, len, false);
667  return unconstify(char *, s);
668  }
669 
670  /* Fast path if we can use cached conversion function */
671  if (encoding == ClientEncoding->encoding)
672  return perform_default_encoding_conversion(s, len, false);
673 
674  /* General case ... will not work outside transactions */
675  return (char *) pg_do_encoding_conversion((unsigned char *) unconstify(char *, s),
676  len,
677  DatabaseEncoding->encoding,
678  encoding);
679 }
680 
681 /*
682  * Perform default encoding conversion using cached FmgrInfo. Since
683  * this function does not access database at all, it is safe to call
684  * outside transactions. If the conversion has not been set up by
685  * SetClientEncoding(), no conversion is performed.
686  */
687 static char *
688 perform_default_encoding_conversion(const char *src, int len,
689  bool is_client_to_server)
690 {
691  char *result;
692  int src_encoding,
693  dest_encoding;
694  FmgrInfo *flinfo;
695 
696  if (is_client_to_server)
697  {
698  src_encoding = ClientEncoding->encoding;
699  dest_encoding = DatabaseEncoding->encoding;
700  flinfo = ToServerConvProc;
701  }
702  else
703  {
704  src_encoding = DatabaseEncoding->encoding;
705  dest_encoding = ClientEncoding->encoding;
706  flinfo = ToClientConvProc;
707  }
708 
709  if (flinfo == NULL)
710  return unconstify(char *, src);
711 
712  /*
713  * Allocate space for conversion result, being wary of integer overflow.
714  * See comments in pg_do_encoding_conversion.
715  */
716  if ((Size) len >= (MaxAllocHugeSize / (Size) MAX_CONVERSION_GROWTH))
717  ereport(ERROR,
718  (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
719  errmsg("out of memory"),
720  errdetail("String of %d bytes is too long for encoding conversion.",
721  len)));
722 
723  result = (char *)
725  (Size) len * MAX_CONVERSION_GROWTH + 1);
726 
727  FunctionCall5(flinfo,
728  Int32GetDatum(src_encoding),
729  Int32GetDatum(dest_encoding),
730  CStringGetDatum(src),
731  CStringGetDatum(result),
732  Int32GetDatum(len));
733 
734  /*
735  * Release extra space if there might be a lot --- see comments in
736  * pg_do_encoding_conversion.
737  */
738  if (len > 1000000)
739  {
740  Size resultlen = strlen(result);
741 
742  if (resultlen >= MaxAllocSize)
743  ereport(ERROR,
744  (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
745  errmsg("out of memory"),
746  errdetail("String of %d bytes is too long for encoding conversion.",
747  len)));
748 
749  result = (char *) repalloc(result, resultlen + 1);
750  }
751 
752  return result;
753 }
754 
755 
756 /* convert a multibyte string to a wchar */
757 int
758 pg_mb2wchar(const char *from, pg_wchar *to)
759 {
760  return pg_wchar_table[DatabaseEncoding->encoding].mb2wchar_with_len((const unsigned char *) from, to, strlen(from));
761 }
762 
763 /* convert a multibyte string to a wchar with a limited length */
764 int
765 pg_mb2wchar_with_len(const char *from, pg_wchar *to, int len)
766 {
767  return pg_wchar_table[DatabaseEncoding->encoding].mb2wchar_with_len((const unsigned char *) from, to, len);
768 }
769 
770 /* same, with any encoding */
771 int
773  const char *from, pg_wchar *to, int len)
774 {
775  return pg_wchar_table[encoding].mb2wchar_with_len((const unsigned char *) from, to, len);
776 }
777 
778 /* convert a wchar string to a multibyte */
779 int
780 pg_wchar2mb(const pg_wchar *from, char *to)
781 {
782  return pg_wchar_table[DatabaseEncoding->encoding].wchar2mb_with_len(from, (unsigned char *) to, pg_wchar_strlen(from));
783 }
784 
785 /* convert a wchar string to a multibyte with a limited length */
786 int
787 pg_wchar2mb_with_len(const pg_wchar *from, char *to, int len)
788 {
789  return pg_wchar_table[DatabaseEncoding->encoding].wchar2mb_with_len(from, (unsigned char *) to, len);
790 }
791 
792 /* same, with any encoding */
793 int
795  const pg_wchar *from, char *to, int len)
796 {
797  return pg_wchar_table[encoding].wchar2mb_with_len(from, (unsigned char *) to, len);
798 }
799 
800 /* returns the byte length of a multibyte character */
801 int
802 pg_mblen(const char *mbstr)
803 {
804  return pg_wchar_table[DatabaseEncoding->encoding].mblen((const unsigned char *) mbstr);
805 }
806 
807 /* returns the display length of a multibyte character */
808 int
809 pg_dsplen(const char *mbstr)
810 {
811  return pg_wchar_table[DatabaseEncoding->encoding].dsplen((const unsigned char *) mbstr);
812 }
813 
814 /* returns the length (counted in wchars) of a multibyte string */
815 int
816 pg_mbstrlen(const char *mbstr)
817 {
818  int len = 0;
819 
820  /* optimization for single byte encoding */
822  return strlen(mbstr);
823 
824  while (*mbstr)
825  {
826  mbstr += pg_mblen(mbstr);
827  len++;
828  }
829  return len;
830 }
831 
832 /* returns the length (counted in wchars) of a multibyte string
833  * (not necessarily NULL terminated)
834  */
835 int
836 pg_mbstrlen_with_len(const char *mbstr, int limit)
837 {
838  int len = 0;
839 
840  /* optimization for single byte encoding */
842  return limit;
843 
844  while (limit > 0 && *mbstr)
845  {
846  int l = pg_mblen(mbstr);
847 
848  limit -= l;
849  mbstr += l;
850  len++;
851  }
852  return len;
853 }
854 
855 /*
856  * returns the byte length of a multibyte string
857  * (not necessarily NULL terminated)
858  * that is no longer than limit.
859  * this function does not break multibyte character boundary.
860  */
861 int
862 pg_mbcliplen(const char *mbstr, int len, int limit)
863 {
864  return pg_encoding_mbcliplen(DatabaseEncoding->encoding, mbstr,
865  len, limit);
866 }
867 
868 /*
869  * pg_mbcliplen with specified encoding
870  */
871 int
872 pg_encoding_mbcliplen(int encoding, const char *mbstr,
873  int len, int limit)
874 {
875  mblen_converter mblen_fn;
876  int clen = 0;
877  int l;
878 
879  /* optimization for single byte encoding */
880  if (pg_encoding_max_length(encoding) == 1)
881  return cliplen(mbstr, len, limit);
882 
883  mblen_fn = pg_wchar_table[encoding].mblen;
884 
885  while (len > 0 && *mbstr)
886  {
887  l = (*mblen_fn) ((const unsigned char *) mbstr);
888  if ((clen + l) > limit)
889  break;
890  clen += l;
891  if (clen == limit)
892  break;
893  len -= l;
894  mbstr += l;
895  }
896  return clen;
897 }
898 
899 /*
900  * Similar to pg_mbcliplen except the limit parameter specifies the
901  * character length, not the byte length.
902  */
903 int
904 pg_mbcharcliplen(const char *mbstr, int len, int limit)
905 {
906  int clen = 0;
907  int nch = 0;
908  int l;
909 
910  /* optimization for single byte encoding */
912  return cliplen(mbstr, len, limit);
913 
914  while (len > 0 && *mbstr)
915  {
916  l = pg_mblen(mbstr);
917  nch++;
918  if (nch > limit)
919  break;
920  clen += l;
921  len -= l;
922  mbstr += l;
923  }
924  return clen;
925 }
926 
927 /* mbcliplen for any single-byte encoding */
928 static int
929 cliplen(const char *str, int len, int limit)
930 {
931  int l = 0;
932 
933  len = Min(len, limit);
934  while (l < len && str[l])
935  l++;
936  return l;
937 }
938 
939 void
941 {
942  if (!PG_VALID_BE_ENCODING(encoding))
943  elog(ERROR, "invalid database encoding: %d", encoding);
944 
945  DatabaseEncoding = &pg_enc2name_tbl[encoding];
946  Assert(DatabaseEncoding->encoding == encoding);
947 }
948 
949 void
951 {
952  /* Some calls happen before we can elog()! */
953  Assert(PG_VALID_ENCODING(encoding));
954 
955  MessageEncoding = &pg_enc2name_tbl[encoding];
956  Assert(MessageEncoding->encoding == encoding);
957 }
958 
959 #ifdef ENABLE_NLS
960 /*
961  * Make one bind_textdomain_codeset() call, translating a pg_enc to a gettext
962  * codeset. Fails for MULE_INTERNAL, an encoding unknown to gettext; can also
963  * fail for gettext-internal causes like out-of-memory.
964  */
965 static bool
966 raw_pg_bind_textdomain_codeset(const char *domainname, int encoding)
967 {
968  bool elog_ok = (CurrentMemoryContext != NULL);
969  int i;
970 
971  for (i = 0; pg_enc2gettext_tbl[i].name != NULL; i++)
972  {
973  if (pg_enc2gettext_tbl[i].encoding == encoding)
974  {
975  if (bind_textdomain_codeset(domainname,
976  pg_enc2gettext_tbl[i].name) != NULL)
977  return true;
978 
979  if (elog_ok)
980  elog(LOG, "bind_textdomain_codeset failed");
981  else
982  write_stderr("bind_textdomain_codeset failed");
983 
984  break;
985  }
986  }
987 
988  return false;
989 }
990 
991 /*
992  * Bind a gettext message domain to the codeset corresponding to the database
993  * encoding. For SQL_ASCII, instead bind to the codeset implied by LC_CTYPE.
994  * Return the MessageEncoding implied by the new settings.
995  *
996  * On most platforms, gettext defaults to the codeset implied by LC_CTYPE.
997  * When that matches the database encoding, we don't need to do anything. In
998  * CREATE DATABASE, we enforce or trust that the locale's codeset matches the
999  * database encoding, except for the C locale. (On Windows, we also permit a
1000  * discrepancy under the UTF8 encoding.) For the C locale, explicitly bind
1001  * gettext to the right codeset.
1002  *
1003  * On Windows, gettext defaults to the Windows ANSI code page. This is a
1004  * convenient departure for software that passes the strings to Windows ANSI
1005  * APIs, but we don't do that. Compel gettext to use database encoding or,
1006  * failing that, the LC_CTYPE encoding as it would on other platforms.
1007  *
1008  * This function is called before elog() and palloc() are usable.
1009  */
1010 int
1011 pg_bind_textdomain_codeset(const char *domainname)
1012 {
1013  bool elog_ok = (CurrentMemoryContext != NULL);
1014  int encoding = GetDatabaseEncoding();
1015  int new_msgenc;
1016 
1017 #ifndef WIN32
1018  const char *ctype = setlocale(LC_CTYPE, NULL);
1019 
1020  if (pg_strcasecmp(ctype, "C") == 0 || pg_strcasecmp(ctype, "POSIX") == 0)
1021 #endif
1022  if (encoding != PG_SQL_ASCII &&
1023  raw_pg_bind_textdomain_codeset(domainname, encoding))
1024  return encoding;
1025 
1026  new_msgenc = pg_get_encoding_from_locale(NULL, elog_ok);
1027  if (new_msgenc < 0)
1028  new_msgenc = PG_SQL_ASCII;
1029 
1030 #ifdef WIN32
1031  if (!raw_pg_bind_textdomain_codeset(domainname, new_msgenc))
1032  /* On failure, the old message encoding remains valid. */
1033  return GetMessageEncoding();
1034 #endif
1035 
1036  return new_msgenc;
1037 }
1038 #endif
1039 
1040 /*
1041  * The database encoding, also called the server encoding, represents the
1042  * encoding of data stored in text-like data types. Affected types include
1043  * cstring, text, varchar, name, xml, and json.
1044  */
1045 int
1047 {
1048  return DatabaseEncoding->encoding;
1049 }
1050 
1051 const char *
1053 {
1054  return DatabaseEncoding->name;
1055 }
1056 
1057 Datum
1059 {
1060  return DirectFunctionCall1(namein, CStringGetDatum(DatabaseEncoding->name));
1061 }
1062 
1063 Datum
1065 {
1066  return DirectFunctionCall1(namein, CStringGetDatum(ClientEncoding->name));
1067 }
1068 
1069 Datum
1071 {
1072  Name s = PG_GETARG_NAME(0);
1073 
1075 }
1076 
1077 Datum
1079 {
1081  const char *encoding_name = pg_encoding_to_char(encoding);
1082 
1083  return DirectFunctionCall1(namein, CStringGetDatum(encoding_name));
1084 }
1085 
1086 /*
1087  * gettext() returns messages in this encoding. This often matches the
1088  * database encoding, but it differs for SQL_ASCII databases, for processes
1089  * not attached to a database, and under a database encoding lacking iconv
1090  * support (MULE_INTERNAL).
1091  */
1092 int
1094 {
1095  return MessageEncoding->encoding;
1096 }
1097 
1098 
1099 /*
1100  * Generic character incrementer function.
1101  *
1102  * Not knowing anything about the properties of the encoding in use, we just
1103  * keep incrementing the last byte until we get a validly-encoded result,
1104  * or we run out of values to try. We don't bother to try incrementing
1105  * higher-order bytes, so there's no growth in runtime for wider characters.
1106  * (If we did try to do that, we'd need to consider the likelihood that 255
1107  * is not a valid final byte in the encoding.)
1108  */
1109 static bool
1110 pg_generic_charinc(unsigned char *charptr, int len)
1111 {
1112  unsigned char *lastbyte = charptr + len - 1;
1113  mbverifier mbverify;
1114 
1115  /* We can just invoke the character verifier directly. */
1117 
1118  while (*lastbyte < (unsigned char) 255)
1119  {
1120  (*lastbyte)++;
1121  if ((*mbverify) (charptr, len) == len)
1122  return true;
1123  }
1124 
1125  return false;
1126 }
1127 
1128 /*
1129  * UTF-8 character incrementer function.
1130  *
1131  * For a one-byte character less than 0x7F, we just increment the byte.
1132  *
1133  * For a multibyte character, every byte but the first must fall between 0x80
1134  * and 0xBF; and the first byte must be between 0xC0 and 0xF4. We increment
1135  * the last byte that's not already at its maximum value. If we can't find a
1136  * byte that's less than the maximum allowable value, we simply fail. We also
1137  * need some special-case logic to skip regions used for surrogate pair
1138  * handling, as those should not occur in valid UTF-8.
1139  *
1140  * Note that we don't reset lower-order bytes back to their minimums, since
1141  * we can't afford to make an exhaustive search (see make_greater_string).
1142  */
1143 static bool
1144 pg_utf8_increment(unsigned char *charptr, int length)
1145 {
1146  unsigned char a;
1147  unsigned char limit;
1148 
1149  switch (length)
1150  {
1151  default:
1152  /* reject lengths 5 and 6 for now */
1153  return false;
1154  case 4:
1155  a = charptr[3];
1156  if (a < 0xBF)
1157  {
1158  charptr[3]++;
1159  break;
1160  }
1161  /* FALL THRU */
1162  case 3:
1163  a = charptr[2];
1164  if (a < 0xBF)
1165  {
1166  charptr[2]++;
1167  break;
1168  }
1169  /* FALL THRU */
1170  case 2:
1171  a = charptr[1];
1172  switch (*charptr)
1173  {
1174  case 0xED:
1175  limit = 0x9F;
1176  break;
1177  case 0xF4:
1178  limit = 0x8F;
1179  break;
1180  default:
1181  limit = 0xBF;
1182  break;
1183  }
1184  if (a < limit)
1185  {
1186  charptr[1]++;
1187  break;
1188  }
1189  /* FALL THRU */
1190  case 1:
1191  a = *charptr;
1192  if (a == 0x7F || a == 0xDF || a == 0xEF || a == 0xF4)
1193  return false;
1194  charptr[0]++;
1195  break;
1196  }
1197 
1198  return true;
1199 }
1200 
1201 /*
1202  * EUC-JP character incrementer function.
1203  *
1204  * If the sequence starts with SS2 (0x8e), it must be a two-byte sequence
1205  * representing JIS X 0201 characters with the second byte ranging between
1206  * 0xa1 and 0xdf. We just increment the last byte if it's less than 0xdf,
1207  * and otherwise rewrite the whole sequence to 0xa1 0xa1.
1208  *
1209  * If the sequence starts with SS3 (0x8f), it must be a three-byte sequence
1210  * in which the last two bytes range between 0xa1 and 0xfe. The last byte
1211  * is incremented if possible, otherwise the second-to-last byte.
1212  *
1213  * If the sequence starts with a value other than the above and its MSB
1214  * is set, it must be a two-byte sequence representing JIS X 0208 characters
1215  * with both bytes ranging between 0xa1 and 0xfe. The last byte is
1216  * incremented if possible, otherwise the second-to-last byte.
1217  *
1218  * Otherwise, the sequence is a single-byte ASCII character. It is
1219  * incremented up to 0x7f.
1220  */
1221 static bool
1222 pg_eucjp_increment(unsigned char *charptr, int length)
1223 {
1224  unsigned char c1,
1225  c2;
1226  int i;
1227 
1228  c1 = *charptr;
1229 
1230  switch (c1)
1231  {
1232  case SS2: /* JIS X 0201 */
1233  if (length != 2)
1234  return false;
1235 
1236  c2 = charptr[1];
1237 
1238  if (c2 >= 0xdf)
1239  charptr[0] = charptr[1] = 0xa1;
1240  else if (c2 < 0xa1)
1241  charptr[1] = 0xa1;
1242  else
1243  charptr[1]++;
1244  break;
1245 
1246  case SS3: /* JIS X 0212 */
1247  if (length != 3)
1248  return false;
1249 
1250  for (i = 2; i > 0; i--)
1251  {
1252  c2 = charptr[i];
1253  if (c2 < 0xa1)
1254  {
1255  charptr[i] = 0xa1;
1256  return true;
1257  }
1258  else if (c2 < 0xfe)
1259  {
1260  charptr[i]++;
1261  return true;
1262  }
1263  }
1264 
1265  /* Out of 3-byte code region */
1266  return false;
1267 
1268  default:
1269  if (IS_HIGHBIT_SET(c1)) /* JIS X 0208? */
1270  {
1271  if (length != 2)
1272  return false;
1273 
1274  for (i = 1; i >= 0; i--)
1275  {
1276  c2 = charptr[i];
1277  if (c2 < 0xa1)
1278  {
1279  charptr[i] = 0xa1;
1280  return true;
1281  }
1282  else if (c2 < 0xfe)
1283  {
1284  charptr[i]++;
1285  return true;
1286  }
1287  }
1288 
1289  /* Out of 2 byte code region */
1290  return false;
1291  }
1292  else
1293  { /* ASCII, single byte */
1294  if (c1 > 0x7e)
1295  return false;
1296  (*charptr)++;
1297  }
1298  break;
1299  }
1300 
1301  return true;
1302 }
1303 
1304 /*
1305  * get the character incrementer for the encoding for the current database
1306  */
1309 {
1310  /*
1311  * Eventually it might be best to add a field to pg_wchar_table[], but for
1312  * now we just use a switch.
1313  */
1314  switch (GetDatabaseEncoding())
1315  {
1316  case PG_UTF8:
1317  return pg_utf8_increment;
1318 
1319  case PG_EUC_JP:
1320  return pg_eucjp_increment;
1321 
1322  default:
1323  return pg_generic_charinc;
1324  }
1325 }
1326 
1327 /*
1328  * fetch maximum length of the encoding for the current database
1329  */
1330 int
1332 {
1334 }
1335 
1336 /*
1337  * Verify mbstr to make sure that it is validly encoded in the current
1338  * database encoding. Otherwise same as pg_verify_mbstr().
1339  */
1340 bool
1341 pg_verifymbstr(const char *mbstr, int len, bool noError)
1342 {
1343  return
1344  pg_verify_mbstr_len(GetDatabaseEncoding(), mbstr, len, noError) >= 0;
1345 }
1346 
1347 /*
1348  * Verify mbstr to make sure that it is validly encoded in the specified
1349  * encoding.
1350  */
1351 bool
1352 pg_verify_mbstr(int encoding, const char *mbstr, int len, bool noError)
1353 {
1354  return pg_verify_mbstr_len(encoding, mbstr, len, noError) >= 0;
1355 }
1356 
1357 /*
1358  * Verify mbstr to make sure that it is validly encoded in the specified
1359  * encoding.
1360  *
1361  * mbstr is not necessarily zero terminated; length of mbstr is
1362  * specified by len.
1363  *
1364  * If OK, return length of string in the encoding.
1365  * If a problem is found, return -1 when noError is
1366  * true; when noError is false, ereport() a descriptive message.
1367  */
1368 int
1369 pg_verify_mbstr_len(int encoding, const char *mbstr, int len, bool noError)
1370 {
1371  mbverifier mbverify;
1372  int mb_len;
1373 
1374  Assert(PG_VALID_ENCODING(encoding));
1375 
1376  /*
1377  * In single-byte encodings, we need only reject nulls (\0).
1378  */
1379  if (pg_encoding_max_length(encoding) <= 1)
1380  {
1381  const char *nullpos = memchr(mbstr, 0, len);
1382 
1383  if (nullpos == NULL)
1384  return len;
1385  if (noError)
1386  return -1;
1387  report_invalid_encoding(encoding, nullpos, 1);
1388  }
1389 
1390  /* fetch function pointer just once */
1391  mbverify = pg_wchar_table[encoding].mbverify;
1392 
1393  mb_len = 0;
1394 
1395  while (len > 0)
1396  {
1397  int l;
1398 
1399  /* fast path for ASCII-subset characters */
1400  if (!IS_HIGHBIT_SET(*mbstr))
1401  {
1402  if (*mbstr != '\0')
1403  {
1404  mb_len++;
1405  mbstr++;
1406  len--;
1407  continue;
1408  }
1409  if (noError)
1410  return -1;
1411  report_invalid_encoding(encoding, mbstr, len);
1412  }
1413 
1414  l = (*mbverify) ((const unsigned char *) mbstr, len);
1415 
1416  if (l < 0)
1417  {
1418  if (noError)
1419  return -1;
1420  report_invalid_encoding(encoding, mbstr, len);
1421  }
1422 
1423  mbstr += l;
1424  len -= l;
1425  mb_len++;
1426  }
1427  return mb_len;
1428 }
1429 
1430 /*
1431  * check_encoding_conversion_args: check arguments of a conversion function
1432  *
1433  * "expected" arguments can be either an encoding ID or -1 to indicate that
1434  * the caller will check whether it accepts the ID.
1435  *
1436  * Note: the errors here are not really user-facing, so elog instead of
1437  * ereport seems sufficient. Also, we trust that the "expected" encoding
1438  * arguments are valid encoding IDs, but we don't trust the actuals.
1439  */
1440 void
1442  int dest_encoding,
1443  int len,
1444  int expected_src_encoding,
1445  int expected_dest_encoding)
1446 {
1447  if (!PG_VALID_ENCODING(src_encoding))
1448  elog(ERROR, "invalid source encoding ID: %d", src_encoding);
1449  if (src_encoding != expected_src_encoding && expected_src_encoding >= 0)
1450  elog(ERROR, "expected source encoding \"%s\", but got \"%s\"",
1451  pg_enc2name_tbl[expected_src_encoding].name,
1452  pg_enc2name_tbl[src_encoding].name);
1453  if (!PG_VALID_ENCODING(dest_encoding))
1454  elog(ERROR, "invalid destination encoding ID: %d", dest_encoding);
1455  if (dest_encoding != expected_dest_encoding && expected_dest_encoding >= 0)
1456  elog(ERROR, "expected destination encoding \"%s\", but got \"%s\"",
1457  pg_enc2name_tbl[expected_dest_encoding].name,
1458  pg_enc2name_tbl[dest_encoding].name);
1459  if (len < 0)
1460  elog(ERROR, "encoding conversion length must not be negative");
1461 }
1462 
1463 /*
1464  * report_invalid_encoding: complain about invalid multibyte character
1465  *
1466  * note: len is remaining length of string, not length of character;
1467  * len must be greater than zero, as we always examine the first byte.
1468  */
1469 void
1470 report_invalid_encoding(int encoding, const char *mbstr, int len)
1471 {
1472  int l = pg_encoding_mblen(encoding, mbstr);
1473  char buf[8 * 5 + 1];
1474  char *p = buf;
1475  int j,
1476  jlimit;
1477 
1478  jlimit = Min(l, len);
1479  jlimit = Min(jlimit, 8); /* prevent buffer overrun */
1480 
1481  for (j = 0; j < jlimit; j++)
1482  {
1483  p += sprintf(p, "0x%02x", (unsigned char) mbstr[j]);
1484  if (j < jlimit - 1)
1485  p += sprintf(p, " ");
1486  }
1487 
1488  ereport(ERROR,
1489  (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
1490  errmsg("invalid byte sequence for encoding \"%s\": %s",
1491  pg_enc2name_tbl[encoding].name,
1492  buf)));
1493 }
1494 
1495 /*
1496  * report_untranslatable_char: complain about untranslatable character
1497  *
1498  * note: len is remaining length of string, not length of character;
1499  * len must be greater than zero, as we always examine the first byte.
1500  */
1501 void
1502 report_untranslatable_char(int src_encoding, int dest_encoding,
1503  const char *mbstr, int len)
1504 {
1505  int l = pg_encoding_mblen(src_encoding, mbstr);
1506  char buf[8 * 5 + 1];
1507  char *p = buf;
1508  int j,
1509  jlimit;
1510 
1511  jlimit = Min(l, len);
1512  jlimit = Min(jlimit, 8); /* prevent buffer overrun */
1513 
1514  for (j = 0; j < jlimit; j++)
1515  {
1516  p += sprintf(p, "0x%02x", (unsigned char) mbstr[j]);
1517  if (j < jlimit - 1)
1518  p += sprintf(p, " ");
1519  }
1520 
1521  ereport(ERROR,
1522  (errcode(ERRCODE_UNTRANSLATABLE_CHARACTER),
1523  errmsg("character with byte sequence %s in encoding \"%s\" has no equivalent in encoding \"%s\"",
1524  buf,
1525  pg_enc2name_tbl[src_encoding].name,
1526  pg_enc2name_tbl[dest_encoding].name)));
1527 }
1528 
1529 
1530 #ifdef WIN32
1531 /*
1532  * Convert from MessageEncoding to a palloc'ed, null-terminated utf16
1533  * string. The character length is also passed to utf16len if not
1534  * null. Returns NULL iff failed. Before MessageEncoding initialization, "str"
1535  * should be ASCII-only; this will function as though MessageEncoding is UTF8.
1536  */
1537 WCHAR *
1538 pgwin32_message_to_UTF16(const char *str, int len, int *utf16len)
1539 {
1540  int msgenc = GetMessageEncoding();
1541  WCHAR *utf16;
1542  int dstlen;
1543  UINT codepage;
1544 
1545  if (msgenc == PG_SQL_ASCII)
1546  /* No conversion is possible, and SQL_ASCII is never utf16. */
1547  return NULL;
1548 
1549  codepage = pg_enc2name_tbl[msgenc].codepage;
1550 
1551  /*
1552  * Use MultiByteToWideChar directly if there is a corresponding codepage,
1553  * or double conversion through UTF8 if not. Double conversion is needed,
1554  * for example, in an ENCODING=LATIN8, LC_CTYPE=C database.
1555  */
1556  if (codepage != 0)
1557  {
1558  utf16 = (WCHAR *) palloc(sizeof(WCHAR) * (len + 1));
1559  dstlen = MultiByteToWideChar(codepage, 0, str, len, utf16, len);
1560  utf16[dstlen] = (WCHAR) 0;
1561  }
1562  else
1563  {
1564  char *utf8;
1565 
1566  /*
1567  * XXX pg_do_encoding_conversion() requires a transaction. In the
1568  * absence of one, hope for the input to be valid UTF8.
1569  */
1570  if (IsTransactionState())
1571  {
1572  utf8 = (char *) pg_do_encoding_conversion((unsigned char *) str,
1573  len,
1574  msgenc,
1575  PG_UTF8);
1576  if (utf8 != str)
1577  len = strlen(utf8);
1578  }
1579  else
1580  utf8 = (char *) str;
1581 
1582  utf16 = (WCHAR *) palloc(sizeof(WCHAR) * (len + 1));
1583  dstlen = MultiByteToWideChar(CP_UTF8, 0, utf8, len, utf16, len);
1584  utf16[dstlen] = (WCHAR) 0;
1585 
1586  if (utf8 != str)
1587  pfree(utf8);
1588  }
1589 
1590  if (dstlen == 0 && len > 0)
1591  {
1592  pfree(utf16);
1593  return NULL; /* error */
1594  }
1595 
1596  if (utf16len)
1597  *utf16len = dstlen;
1598  return utf16;
1599 }
1600 
1601 #endif /* WIN32 */
#define NIL
Definition: pg_list.h:65
#define PG_GETARG_INT32(n)
Definition: fmgr.h:264
Definition: fmgr.h:56
int pg_mbcharcliplen(const char *mbstr, int len, int limit)
Definition: mbutils.c:904
int(* mblen_converter)(const unsigned char *mbstr)
Definition: pg_wchar.h:359
Datum namein(PG_FUNCTION_ARGS)
Definition: name.c:48
void SetMessageEncoding(int encoding)
Definition: mbutils.c:950
int pg_char_to_encoding(const char *name)
Definition: encnames.c:550
#define VARDATA_ANY(PTR)
Definition: postgres.h:348
#define VARDATA(PTR)
Definition: postgres.h:302
int PrepareClientEncoding(int encoding)
Definition: mbutils.c:103
#define setlocale(a, b)
Definition: win32_port.h:408
Datum getdatabaseencoding(PG_FUNCTION_ARGS)
Definition: mbutils.c:1058
int pg_encoding_mb2wchar_with_len(int encoding, const char *from, pg_wchar *to, int len)
Definition: mbutils.c:772
char * pg_server_to_client(const char *s, int len)
Definition: mbutils.c:643
#define PG_GETARG_DATUM(n)
Definition: fmgr.h:263
#define VARHDRSZ
Definition: c.h:562
FmgrInfo to_server_info
Definition: mbutils.c:57
FmgrInfo to_client_info
Definition: mbutils.c:58
void report_invalid_encoding(int encoding, const char *mbstr, int len)
Definition: mbutils.c:1470
static bool backend_startup_complete
Definition: mbutils.c:83
#define Min(x, y)
Definition: c.h:911
Datum PG_char_to_encoding(PG_FUNCTION_ARGS)
Definition: mbutils.c:1070
static int pending_client_encoding
Definition: mbutils.c:84
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition: palloc.h:109
#define PG_RETURN_INT32(x)
Definition: fmgr.h:344
static FmgrInfo * ToServerConvProc
Definition: mbutils.c:67
#define write_stderr(str)
Definition: parallel.c:181
static char * perform_default_encoding_conversion(const char *src, int len, bool is_client_to_server)
Definition: mbutils.c:688
int errcode(int sqlerrcode)
Definition: elog.c:608
const char * name
Definition: pg_wchar.h:343
static const pg_enc2name * ClientEncoding
Definition: mbutils.c:73
size_t pg_wchar_strlen(const pg_wchar *str)
Definition: wstrncmp.c:70
mbcharacter_incrementer pg_database_encoding_character_incrementer(void)
Definition: mbutils.c:1308
#define SS3
Definition: pg_wchar.h:36
#define DirectFunctionCall1(func, arg1)
Definition: fmgr.h:615
int pg_strcasecmp(const char *s1, const char *s2)
Definition: pgstrcasecmp.c:36
#define PG_RETURN_BYTEA_P(x)
Definition: fmgr.h:360
#define LOG
Definition: elog.h:26
unsigned int Oid
Definition: postgres_ext.h:31
unsigned char * pg_do_encoding_conversion(unsigned char *src, int len, int src_encoding, int dest_encoding)
Definition: mbutils.c:318
#define OidIsValid(objectId)
Definition: c.h:645
void check_encoding_conversion_args(int src_encoding, int dest_encoding, int len, int expected_src_encoding, int expected_dest_encoding)
Definition: mbutils.c:1441
int maxmblen
Definition: pg_wchar.h:376
#define MaxAllocHugeSize
Definition: memutils.h:44
char * pg_server_to_any(const char *s, int len, int encoding)
Definition: mbutils.c:654
signed int int32
Definition: c.h:347
int pg_wchar2mb(const pg_wchar *from, char *to)
Definition: mbutils.c:780
bool pg_verify_mbstr(int encoding, const char *mbstr, int len, bool noError)
Definition: mbutils.c:1352
const pg_enc2gettext pg_enc2gettext_tbl[]
Definition: encnames.c:361
int pg_mbstrlen_with_len(const char *mbstr, int limit)
Definition: mbutils.c:836
#define foreach_delete_current(lst, cell)
Definition: pg_list.h:368
mbdisplaylen_converter dsplen
Definition: pg_wchar.h:374
static bool pg_eucjp_increment(unsigned char *charptr, int length)
Definition: mbutils.c:1222
int s_encoding
Definition: mbutils.c:55
#define sprintf
Definition: port.h:194
Datum pg_encoding_max_length_sql(PG_FUNCTION_ARGS)
Definition: mbutils.c:549
void pfree(void *pointer)
Definition: mcxt.c:1056
char * pg_client_to_server(const char *s, int len)
Definition: mbutils.c:565
#define IS_HIGHBIT_SET(ch)
Definition: c.h:1106
#define ERROR
Definition: elog.h:43
#define FunctionCall5(flinfo, arg1, arg2, arg3, arg4, arg5)
Definition: fmgr.h:641
static bool pg_utf8_increment(unsigned char *charptr, int length)
Definition: mbutils.c:1144
#define FATAL
Definition: elog.h:52
Datum pg_client_encoding(PG_FUNCTION_ARGS)
Definition: mbutils.c:1064
int pg_mbcliplen(const char *mbstr, int len, int limit)
Definition: mbutils.c:862
static List * ConvProcList
Definition: mbutils.c:61
Definition: c.h:610
static bool pg_generic_charinc(unsigned char *charptr, int len)
Definition: mbutils.c:1110
int pg_encoding_mbcliplen(int encoding, const char *mbstr, int len, int limit)
Definition: mbutils.c:872
int pg_encoding_max_length(int encoding)
Definition: wchar.c:1589
#define PG_VALID_ENCODING(_enc)
Definition: pg_wchar.h:301
static char * buf
Definition: pg_test_fsync.c:67
void * MemoryContextAllocHuge(MemoryContext context, Size size)
Definition: mcxt.c:1105
const pg_enc2name pg_enc2name_tbl[]
Definition: encnames.c:309
int(* mbverifier)(const unsigned char *mbstr, int len)
Definition: pg_wchar.h:365
int errdetail(const char *fmt,...)
Definition: elog.c:955
int pg_encoding_mblen(int encoding, const char *mbstr)
Definition: wchar.c:1554
#define CStringGetDatum(X)
Definition: postgres.h:578
int SetClientEncoding(int encoding)
Definition: mbutils.c:201
const char * name
Definition: pg_wchar.h:328
struct ConvProcInfo ConvProcInfo
MemoryContext CurrentMemoryContext
Definition: mcxt.c:38
#define PG_VALID_FE_ENCODING(_enc)
Definition: pg_wchar.h:305
void fmgr_info_cxt(Oid functionId, FmgrInfo *finfo, MemoryContext mcxt)
Definition: fmgr.c:134
int pg_mb2wchar(const char *from, pg_wchar *to)
Definition: mbutils.c:758
#define ereport(elevel, rest)
Definition: elog.h:141
unsigned int pg_wchar
Definition: mbprint.c:31
MemoryContext TopMemoryContext
Definition: mcxt.c:44
int pg_encoding_wchar2mb_with_len(int encoding, const pg_wchar *from, char *to, int len)
Definition: mbutils.c:794
#define DirectFunctionCall3(func, arg1, arg2, arg3)
Definition: fmgr.h:619
#define MaxAllocSize
Definition: memutils.h:40
#define unconstify(underlying_type, expr)
Definition: c.h:1193
Datum length_in_encoding(PG_FUNCTION_ARGS)
Definition: mbutils.c:520
uintptr_t Datum
Definition: postgres.h:367
void SetDatabaseEncoding(int encoding)
Definition: mbutils.c:940
#define PG_RETURN_DATUM(x)
Definition: fmgr.h:343
int GetDatabaseEncoding(void)
Definition: mbutils.c:1046
int pg_get_client_encoding(void)
Definition: mbutils.c:298
int pg_mbstrlen(const char *mbstr)
Definition: mbutils.c:816
int pg_get_encoding_from_locale(const char *ctype, bool write_message)
Definition: chklocale.c:433
bool pg_verifymbstr(const char *mbstr, int len, bool noError)
Definition: mbutils.c:1341
wchar2mb_with_len_converter wchar2mb_with_len
Definition: pg_wchar.h:371
int pg_mb2wchar_with_len(const char *from, pg_wchar *to, int len)
Definition: mbutils.c:765
pg_enc encoding
Definition: pg_wchar.h:329
int pg_dsplen(const char *mbstr)
Definition: mbutils.c:809
const char * pg_encoding_to_char(int encoding)
Definition: encnames.c:588
List * lcons(void *datum, List *list)
Definition: list.c:454
#define PG_VALID_BE_ENCODING(_enc)
Definition: pg_wchar.h:295
#define Assert(condition)
Definition: c.h:739
#define lfirst(lc)
Definition: pg_list.h:190
const char * GetDatabaseEncodingName(void)
Definition: mbutils.c:1052
static int cliplen(const char *str, int len, int limit)
Definition: mbutils.c:929
Datum pg_convert(PG_FUNCTION_ARGS)
Definition: mbutils.c:458
size_t Size
Definition: c.h:467
const char * pg_get_client_encoding_name(void)
Definition: mbutils.c:307
#define PG_GETARG_BYTEA_PP(n)
Definition: fmgr.h:302
Oid FindDefaultConversionProc(int32 for_encoding, int32 to_encoding)
Definition: namespace.c:3673
bool IsTransactionState(void)
Definition: xact.c:355
#define PG_FREE_IF_COPY(ptr, n)
Definition: fmgr.h:255
Datum pg_convert_from(PG_FUNCTION_ARGS)
Definition: mbutils.c:431
int pg_mblen(const char *mbstr)
Definition: mbutils.c:802
int32 encoding
Definition: pg_database.h:41
void * repalloc(void *pointer, Size size)
Definition: mcxt.c:1069
static const pg_enc2name * MessageEncoding
Definition: mbutils.c:75
void InitializeClientEncoding(void)
Definition: mbutils.c:274
void report_untranslatable_char(int src_encoding, int dest_encoding, const char *mbstr, int len)
Definition: mbutils.c:1502
const char * name
Definition: encode.c:521
int pg_database_encoding_max_length(void)
Definition: mbutils.c:1331
int GetMessageEncoding(void)
Definition: mbutils.c:1093
#define MAX_CONVERSION_GROWTH
Definition: pg_wchar.h:316
bool(* mbcharacter_incrementer)(unsigned char *mbstr, int len)
Definition: pg_wchar.h:363
#define Int32GetDatum(X)
Definition: postgres.h:479
static FmgrInfo * ToClientConvProc
Definition: mbutils.c:68
#define VARSIZE_ANY_EXHDR(PTR)
Definition: postgres.h:341
void * palloc(Size size)
Definition: mcxt.c:949
int errmsg(const char *fmt,...)
Definition: elog.c:822
const pg_wchar_tbl pg_wchar_table[]
Definition: wchar.c:1505
Datum PG_encoding_to_char(PG_FUNCTION_ARGS)
Definition: mbutils.c:1078
void * MemoryContextAlloc(MemoryContext context, Size size)
Definition: mcxt.c:796
static const pg_enc2name * DatabaseEncoding
Definition: mbutils.c:74
#define elog(elevel,...)
Definition: elog.h:228
int i
#define NameStr(name)
Definition: c.h:616
Definition: c.h:556
#define PG_FUNCTION_ARGS
Definition: fmgr.h:188
mb2wchar_with_len_converter mb2wchar_with_len
Definition: pg_wchar.h:369
#define SET_VARSIZE(PTR, len)
Definition: postgres.h:329
mbverifier mbverify
Definition: pg_wchar.h:375
int pg_wchar2mb_with_len(const pg_wchar *from, char *to, int len)
Definition: mbutils.c:787
char * pg_any_to_server(const char *s, int len, int encoding)
Definition: mbutils.c:581
mblen_converter mblen
Definition: pg_wchar.h:373
int pg_verify_mbstr_len(int encoding, const char *mbstr, int len, bool noError)
Definition: mbutils.c:1369
int c_encoding
Definition: mbutils.c:56
Definition: pg_list.h:50
Datum pg_convert_to(PG_FUNCTION_ARGS)
Definition: mbutils.c:406
#define PG_RETURN_NULL()
Definition: fmgr.h:335
#define PG_GETARG_NAME(n)
Definition: fmgr.h:273
#define SS2
Definition: pg_wchar.h:35
#define OidFunctionCall5(functionId, arg1, arg2, arg3, arg4, arg5)
Definition: fmgr.h:661