PostgreSQL Source Code  git master
ascii.c
Go to the documentation of this file.
1 /*-----------------------------------------------------------------------
2  * ascii.c
3  * The PostgreSQL routine for string to ascii conversion.
4  *
5  * Portions Copyright (c) 1999-2024, PostgreSQL Global Development Group
6  *
7  * IDENTIFICATION
8  * src/backend/utils/adt/ascii.c
9  *
10  *-----------------------------------------------------------------------
11  */
12 #include "postgres.h"
13 
14 #include "mb/pg_wchar.h"
15 #include "utils/ascii.h"
16 #include "utils/fmgrprotos.h"
17 #include "varatt.h"
18 
19 static void pg_to_ascii(unsigned char *src, unsigned char *src_end,
20  unsigned char *dest, int enc);
21 static text *encode_to_ascii(text *data, int enc);
22 
23 
24 /* ----------
25  * to_ascii
26  * ----------
27  */
28 static void
29 pg_to_ascii(unsigned char *src, unsigned char *src_end, unsigned char *dest, int enc)
30 {
31  unsigned char *x;
32  const unsigned char *ascii;
33  int range;
34 
35  /*
36  * relevant start for an encoding
37  */
38 #define RANGE_128 128
39 #define RANGE_160 160
40 
41  if (enc == PG_LATIN1)
42  {
43  /*
44  * ISO-8859-1 <range: 160 -- 255>
45  */
46  ascii = (const unsigned char *) " cL Y \"Ca -R 'u ., ?AAAAAAACEEEEIIII NOOOOOxOUUUUYTBaaaaaaaceeeeiiii nooooo/ouuuuyty";
47  range = RANGE_160;
48  }
49  else if (enc == PG_LATIN2)
50  {
51  /*
52  * ISO-8859-2 <range: 160 -- 255>
53  */
54  ascii = (const unsigned char *) " A L LS \"SSTZ-ZZ a,l'ls ,sstz\"zzRAAAALCCCEEEEIIDDNNOOOOxRUUUUYTBraaaalccceeeeiiddnnoooo/ruuuuyt.";
55  range = RANGE_160;
56  }
57  else if (enc == PG_LATIN9)
58  {
59  /*
60  * ISO-8859-15 <range: 160 -- 255>
61  */
62  ascii = (const unsigned char *) " cL YS sCa -R Zu .z EeY?AAAAAAACEEEEIIII NOOOOOxOUUUUYTBaaaaaaaceeeeiiii nooooo/ouuuuyty";
63  range = RANGE_160;
64  }
65  else if (enc == PG_WIN1250)
66  {
67  /*
68  * Window CP1250 <range: 128 -- 255>
69  */
70  ascii = (const unsigned char *) " ' \" %S<STZZ `'\"\".-- s>stzz L A \"CS -RZ ,l'u .,as L\"lzRAAAALCCCEEEEIIDDNNOOOOxRUUUUYTBraaaalccceeeeiiddnnoooo/ruuuuyt ";
71  range = RANGE_128;
72  }
73  else
74  {
75  ereport(ERROR,
76  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
77  errmsg("encoding conversion from %s to ASCII not supported",
79  return; /* keep compiler quiet */
80  }
81 
82  /*
83  * Encode
84  */
85  for (x = src; x < src_end; x++)
86  {
87  if (*x < 128)
88  *dest++ = *x;
89  else if (*x < range)
90  *dest++ = ' '; /* bogus 128 to 'range' */
91  else
92  *dest++ = ascii[*x - range];
93  }
94 }
95 
96 /* ----------
97  * encode text
98  *
99  * The text datum is overwritten in-place, therefore this coding method
100  * cannot support conversions that change the string length!
101  * ----------
102  */
103 static text *
105 {
106  pg_to_ascii((unsigned char *) VARDATA(data), /* src */
107  (unsigned char *) (data) + VARSIZE(data), /* src end */
108  (unsigned char *) VARDATA(data), /* dest */
109  enc); /* encoding */
110 
111  return data;
112 }
113 
114 /* ----------
115  * convert to ASCII - enc is set as 'name' arg.
116  * ----------
117  */
118 Datum
120 {
122  char *encname = NameStr(*PG_GETARG_NAME(1));
123  int enc = pg_char_to_encoding(encname);
124 
125  if (enc < 0)
126  ereport(ERROR,
127  (errcode(ERRCODE_UNDEFINED_OBJECT),
128  errmsg("%s is not a valid encoding name", encname)));
129 
131 }
132 
133 /* ----------
134  * convert to ASCII - enc is set as int4
135  * ----------
136  */
137 Datum
139 {
141  int enc = PG_GETARG_INT32(1);
142 
143  if (!PG_VALID_ENCODING(enc))
144  ereport(ERROR,
145  (errcode(ERRCODE_UNDEFINED_OBJECT),
146  errmsg("%d is not a valid encoding code", enc)));
147 
149 }
150 
151 /* ----------
152  * convert to ASCII - current enc is DatabaseEncoding
153  * ----------
154  */
155 Datum
157 {
159  int enc = GetDatabaseEncoding();
160 
162 }
163 
164 /* ----------
165  * Copy a string in an arbitrary backend-safe encoding, converting it to a
166  * valid ASCII string by replacing non-ASCII bytes with '?'. Otherwise the
167  * behavior is identical to strlcpy(), except that we don't bother with a
168  * return value.
169  *
170  * This must not trigger ereport(ERROR), as it is called in postmaster.
171  * ----------
172  */
173 void
174 ascii_safe_strlcpy(char *dest, const char *src, size_t destsiz)
175 {
176  if (destsiz == 0) /* corner case: no room for trailing nul */
177  return;
178 
179  while (--destsiz > 0)
180  {
181  /* use unsigned char here to avoid compiler warning */
182  unsigned char ch = *src++;
183 
184  if (ch == '\0')
185  break;
186  /* Keep printable ASCII characters */
187  if (32 <= ch && ch <= 127)
188  *dest = ch;
189  /* White-space is also OK */
190  else if (ch == '\n' || ch == '\r' || ch == '\t')
191  *dest = ch;
192  /* Everything else is replaced with '?' */
193  else
194  *dest = '?';
195  dest++;
196  }
197 
198  *dest = '\0';
199 }
void ascii_safe_strlcpy(char *dest, const char *src, size_t destsiz)
Definition: ascii.c:174
#define RANGE_160
Datum to_ascii_enc(PG_FUNCTION_ARGS)
Definition: ascii.c:138
Datum to_ascii_default(PG_FUNCTION_ARGS)
Definition: ascii.c:156
Datum to_ascii_encname(PG_FUNCTION_ARGS)
Definition: ascii.c:119
static void pg_to_ascii(unsigned char *src, unsigned char *src_end, unsigned char *dest, int enc)
Definition: ascii.c:29
#define RANGE_128
static text * encode_to_ascii(text *data, int enc)
Definition: ascii.c:104
#define NameStr(name)
Definition: c.h:733
enc
int errcode(int sqlerrcode)
Definition: elog.c:859
int errmsg(const char *fmt,...)
Definition: elog.c:1072
#define ERROR
Definition: elog.h:39
#define ereport(elevel,...)
Definition: elog.h:149
#define PG_GETARG_NAME(n)
Definition: fmgr.h:278
#define PG_RETURN_TEXT_P(x)
Definition: fmgr.h:372
#define PG_GETARG_INT32(n)
Definition: fmgr.h:269
#define PG_GETARG_TEXT_P_COPY(n)
Definition: fmgr.h:315
#define PG_FUNCTION_ARGS
Definition: fmgr.h:193
int x
Definition: isn.c:71
int GetDatabaseEncoding(void)
Definition: mbutils.c:1261
Datum ascii(PG_FUNCTION_ARGS)
const void * data
@ PG_LATIN9
Definition: pg_wchar.h:242
@ PG_LATIN2
Definition: pg_wchar.h:235
@ PG_WIN1250
Definition: pg_wchar.h:255
@ PG_LATIN1
Definition: pg_wchar.h:234
#define PG_VALID_ENCODING(_enc)
Definition: pg_wchar.h:287
#define pg_encoding_to_char
Definition: pg_wchar.h:569
#define pg_char_to_encoding
Definition: pg_wchar.h:568
uintptr_t Datum
Definition: postgres.h:64
static struct cvec * range(struct vars *v, chr a, chr b, int cases)
Definition: regc_locale.c:412
Definition: c.h:674
#define VARDATA(PTR)
Definition: varatt.h:278
#define VARSIZE(PTR)
Definition: varatt.h:279