PostgreSQL Source Code  git master
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros
ascii.c
Go to the documentation of this file.
1 /*-----------------------------------------------------------------------
2  * ascii.c
3  * The PostgreSQL routine for string to ascii conversion.
4  *
5  * Portions Copyright (c) 1999-2017, PostgreSQL Global Development Group
6  *
7  * IDENTIFICATION
8  * src/backend/utils/adt/ascii.c
9  *
10  *-----------------------------------------------------------------------
11  */
12 #include "postgres.h"
13 
14 #include "mb/pg_wchar.h"
15 #include "utils/ascii.h"
16 #include "utils/builtins.h"
17 
18 static void pg_to_ascii(unsigned char *src, unsigned char *src_end,
19  unsigned char *dest, int enc);
20 static text *encode_to_ascii(text *data, int enc);
21 
22 
23 /* ----------
24  * to_ascii
25  * ----------
26  */
27 static void
28 pg_to_ascii(unsigned char *src, unsigned char *src_end, unsigned char *dest, int enc)
29 {
30  unsigned char *x;
31  const unsigned char *ascii;
32  int range;
33 
34  /*
35  * relevant start for an encoding
36  */
37 #define RANGE_128 128
38 #define RANGE_160 160
39 
40  if (enc == PG_LATIN1)
41  {
42  /*
43  * ISO-8859-1 <range: 160 -- 255>
44  */
45  ascii = (const unsigned char *) " cL Y \"Ca -R 'u ., ?AAAAAAACEEEEIIII NOOOOOxOUUUUYTBaaaaaaaceeeeiiii nooooo/ouuuuyty";
46  range = RANGE_160;
47  }
48  else if (enc == PG_LATIN2)
49  {
50  /*
51  * ISO-8859-2 <range: 160 -- 255>
52  */
53  ascii = (const unsigned char *) " A L LS \"SSTZ-ZZ a,l'ls ,sstz\"zzRAAAALCCCEEEEIIDDNNOOOOxRUUUUYTBraaaalccceeeeiiddnnoooo/ruuuuyt.";
54  range = RANGE_160;
55  }
56  else if (enc == PG_LATIN9)
57  {
58  /*
59  * ISO-8859-15 <range: 160 -- 255>
60  */
61  ascii = (const unsigned char *) " cL YS sCa -R Zu .z EeY?AAAAAAACEEEEIIII NOOOOOxOUUUUYTBaaaaaaaceeeeiiii nooooo/ouuuuyty";
62  range = RANGE_160;
63  }
64  else if (enc == PG_WIN1250)
65  {
66  /*
67  * Window CP1250 <range: 128 -- 255>
68  */
69  ascii = (const unsigned char *) " ' \" %S<STZZ `'\"\".-- s>stzz L A \"CS -RZ ,l'u .,as L\"lzRAAAALCCCEEEEIIDDNNOOOOxRUUUUYTBraaaalccceeeeiiddnnoooo/ruuuuyt ";
70  range = RANGE_128;
71  }
72  else
73  {
74  ereport(ERROR,
75  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
76  errmsg("encoding conversion from %s to ASCII not supported",
77  pg_encoding_to_char(enc))));
78  return; /* keep compiler quiet */
79  }
80 
81  /*
82  * Encode
83  */
84  for (x = src; x < src_end; x++)
85  {
86  if (*x < 128)
87  *dest++ = *x;
88  else if (*x < range)
89  *dest++ = ' '; /* bogus 128 to 'range' */
90  else
91  *dest++ = ascii[*x - range];
92  }
93 }
94 
95 /* ----------
96  * encode text
97  *
98  * The text datum is overwritten in-place, therefore this coding method
99  * cannot support conversions that change the string length!
100  * ----------
101  */
102 static text *
104 {
105  pg_to_ascii((unsigned char *) VARDATA(data), /* src */
106  (unsigned char *) (data) + VARSIZE(data), /* src end */
107  (unsigned char *) VARDATA(data), /* dest */
108  enc); /* encoding */
109 
110  return data;
111 }
112 
113 /* ----------
114  * convert to ASCII - enc is set as 'name' arg.
115  * ----------
116  */
117 Datum
119 {
120  text *data = PG_GETARG_TEXT_P_COPY(0);
121  char *encname = NameStr(*PG_GETARG_NAME(1));
122  int enc = pg_char_to_encoding(encname);
123 
124  if (enc < 0)
125  ereport(ERROR,
126  (errcode(ERRCODE_UNDEFINED_OBJECT),
127  errmsg("%s is not a valid encoding name", encname)));
128 
129  PG_RETURN_TEXT_P(encode_to_ascii(data, enc));
130 }
131 
132 /* ----------
133  * convert to ASCII - enc is set as int4
134  * ----------
135  */
136 Datum
138 {
139  text *data = PG_GETARG_TEXT_P_COPY(0);
140  int enc = PG_GETARG_INT32(1);
141 
142  if (!PG_VALID_ENCODING(enc))
143  ereport(ERROR,
144  (errcode(ERRCODE_UNDEFINED_OBJECT),
145  errmsg("%d is not a valid encoding code", enc)));
146 
147  PG_RETURN_TEXT_P(encode_to_ascii(data, enc));
148 }
149 
150 /* ----------
151  * convert to ASCII - current enc is DatabaseEncoding
152  * ----------
153  */
154 Datum
156 {
157  text *data = PG_GETARG_TEXT_P_COPY(0);
158  int enc = GetDatabaseEncoding();
159 
160  PG_RETURN_TEXT_P(encode_to_ascii(data, enc));
161 }
162 
163 /* ----------
164  * Copy a string in an arbitrary backend-safe encoding, converting it to a
165  * valid ASCII string by replacing non-ASCII bytes with '?'. Otherwise the
166  * behavior is identical to strlcpy(), except that we don't bother with a
167  * return value.
168  *
169  * This must not trigger ereport(ERROR), as it is called in postmaster.
170  * ----------
171  */
172 void
173 ascii_safe_strlcpy(char *dest, const char *src, size_t destsiz)
174 {
175  if (destsiz == 0) /* corner case: no room for trailing nul */
176  return;
177 
178  while (--destsiz > 0)
179  {
180  /* use unsigned char here to avoid compiler warning */
181  unsigned char ch = *src++;
182 
183  if (ch == '\0')
184  break;
185  /* Keep printable ASCII characters */
186  if (32 <= ch && ch <= 127)
187  *dest = ch;
188  /* White-space is also OK */
189  else if (ch == '\n' || ch == '\r' || ch == '\t')
190  *dest = ch;
191  /* Everything else is replaced with '?' */
192  else
193  *dest = '?';
194  dest++;
195  }
196 
197  *dest = '\0';
198 }
#define PG_GETARG_INT32(n)
Definition: fmgr.h:234
static void pg_to_ascii(unsigned char *src, unsigned char *src_end, unsigned char *dest, int enc)
Definition: ascii.c:28
int pg_char_to_encoding(const char *name)
Definition: encnames.c:551
#define RANGE_128
#define VARDATA(PTR)
Definition: postgres.h:303
#define VARSIZE(PTR)
Definition: postgres.h:304
Datum to_ascii_encname(PG_FUNCTION_ARGS)
Definition: ascii.c:118
#define RANGE_160
Datum ascii(PG_FUNCTION_ARGS)
int errcode(int sqlerrcode)
Definition: elog.c:575
void ascii_safe_strlcpy(char *dest, const char *src, size_t destsiz)
Definition: ascii.c:173
Datum to_ascii_enc(PG_FUNCTION_ARGS)
Definition: ascii.c:137
#define PG_GETARG_TEXT_P_COPY(n)
Definition: fmgr.h:279
Datum to_ascii_default(PG_FUNCTION_ARGS)
Definition: ascii.c:155
#define ERROR
Definition: elog.h:43
struct pg_encoding enc
Definition: encode.c:522
#define PG_VALID_ENCODING(_enc)
Definition: pg_wchar.h:301
static struct cvec * range(struct vars *v, chr a, chr b, int cases)
Definition: regc_locale.c:416
#define ereport(elevel, rest)
Definition: elog.h:122
uintptr_t Datum
Definition: postgres.h:372
int GetDatabaseEncoding(void)
Definition: mbutils.c:1015
#define PG_RETURN_TEXT_P(x)
Definition: fmgr.h:331
const char * pg_encoding_to_char(int encoding)
Definition: encnames.c:607
int errmsg(const char *fmt,...)
Definition: elog.c:797
#define NameStr(name)
Definition: c.h:493
Definition: c.h:433
#define PG_FUNCTION_ARGS
Definition: fmgr.h:158
static text * encode_to_ascii(text *data, int enc)
Definition: ascii.c:103
#define PG_GETARG_NAME(n)
Definition: fmgr.h:243