PostgreSQL Source Code git master
ascii.c
Go to the documentation of this file.
1/*-----------------------------------------------------------------------
2 * ascii.c
3 * The PostgreSQL routine for string to ascii conversion.
4 *
5 * Portions Copyright (c) 1999-2025, PostgreSQL Global Development Group
6 *
7 * IDENTIFICATION
8 * src/backend/utils/adt/ascii.c
9 *
10 *-----------------------------------------------------------------------
11 */
12#include "postgres.h"
13
14#include "mb/pg_wchar.h"
15#include "utils/ascii.h"
16#include "utils/fmgrprotos.h"
17#include "varatt.h"
18
19static void pg_to_ascii(unsigned char *src, unsigned char *src_end,
20 unsigned char *dest, int enc);
21static text *encode_to_ascii(text *data, int enc);
22
23
24/* ----------
25 * to_ascii
26 * ----------
27 */
28static void
29pg_to_ascii(unsigned char *src, unsigned char *src_end, unsigned char *dest, int enc)
30{
31 unsigned char *x;
32 const unsigned char *ascii;
33 int range;
34
35 /*
36 * relevant start for an encoding
37 */
38#define RANGE_128 128
39#define RANGE_160 160
40
41 if (enc == PG_LATIN1)
42 {
43 /*
44 * ISO-8859-1 <range: 160 -- 255>
45 */
46 ascii = (const unsigned char *) " cL Y \"Ca -R 'u ., ?AAAAAAACEEEEIIII NOOOOOxOUUUUYTBaaaaaaaceeeeiiii nooooo/ouuuuyty";
48 }
49 else if (enc == PG_LATIN2)
50 {
51 /*
52 * ISO-8859-2 <range: 160 -- 255>
53 */
54 ascii = (const unsigned char *) " A L LS \"SSTZ-ZZ a,l'ls ,sstz\"zzRAAAALCCCEEEEIIDDNNOOOOxRUUUUYTBraaaalccceeeeiiddnnoooo/ruuuuyt.";
56 }
57 else if (enc == PG_LATIN9)
58 {
59 /*
60 * ISO-8859-15 <range: 160 -- 255>
61 */
62 ascii = (const unsigned char *) " cL YS sCa -R Zu .z EeY?AAAAAAACEEEEIIII NOOOOOxOUUUUYTBaaaaaaaceeeeiiii nooooo/ouuuuyty";
64 }
65 else if (enc == PG_WIN1250)
66 {
67 /*
68 * Window CP1250 <range: 128 -- 255>
69 */
70 ascii = (const unsigned char *) " ' \" %S<STZZ `'\"\".-- s>stzz L A \"CS -RZ ,l'u .,as L\"lzRAAAALCCCEEEEIIDDNNOOOOxRUUUUYTBraaaalccceeeeiiddnnoooo/ruuuuyt ";
72 }
73 else
74 {
76 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
77 errmsg("encoding conversion from %s to ASCII not supported",
79 return; /* keep compiler quiet */
80 }
81
82 /*
83 * Encode
84 */
85 for (x = src; x < src_end; x++)
86 {
87 if (*x < 128)
88 *dest++ = *x;
89 else if (*x < range)
90 *dest++ = ' '; /* bogus 128 to 'range' */
91 else
92 *dest++ = ascii[*x - range];
93 }
94}
95
96/* ----------
97 * encode text
98 *
99 * The text datum is overwritten in-place, therefore this coding method
100 * cannot support conversions that change the string length!
101 * ----------
102 */
103static text *
105{
106 pg_to_ascii((unsigned char *) VARDATA(data), /* src */
107 (unsigned char *) (data) + VARSIZE(data), /* src end */
108 (unsigned char *) VARDATA(data), /* dest */
109 enc); /* encoding */
110
111 return data;
112}
113
114/* ----------
115 * convert to ASCII - enc is set as 'name' arg.
116 * ----------
117 */
118Datum
120{
122 char *encname = NameStr(*PG_GETARG_NAME(1));
123 int enc = pg_char_to_encoding(encname);
124
125 if (enc < 0)
127 (errcode(ERRCODE_UNDEFINED_OBJECT),
128 errmsg("%s is not a valid encoding name", encname)));
129
131}
132
133/* ----------
134 * convert to ASCII - enc is set as int4
135 * ----------
136 */
137Datum
139{
141 int enc = PG_GETARG_INT32(1);
142
145 (errcode(ERRCODE_UNDEFINED_OBJECT),
146 errmsg("%d is not a valid encoding code", enc)));
147
149}
150
151/* ----------
152 * convert to ASCII - current enc is DatabaseEncoding
153 * ----------
154 */
155Datum
157{
159 int enc = GetDatabaseEncoding();
160
162}
163
164/* ----------
165 * Copy a string in an arbitrary backend-safe encoding, converting it to a
166 * valid ASCII string by replacing non-ASCII bytes with '?'. Otherwise the
167 * behavior is identical to strlcpy(), except that we don't bother with a
168 * return value.
169 *
170 * This must not trigger ereport(ERROR), as it is called in postmaster.
171 * ----------
172 */
173void
174ascii_safe_strlcpy(char *dest, const char *src, size_t destsiz)
175{
176 if (destsiz == 0) /* corner case: no room for trailing nul */
177 return;
178
179 while (--destsiz > 0)
180 {
181 /* use unsigned char here to avoid compiler warning */
182 unsigned char ch = *src++;
183
184 if (ch == '\0')
185 break;
186 /* Keep printable ASCII characters */
187 if (32 <= ch && ch <= 127)
188 *dest = ch;
189 /* White-space is also OK */
190 else if (ch == '\n' || ch == '\r' || ch == '\t')
191 *dest = ch;
192 /* Everything else is replaced with '?' */
193 else
194 *dest = '?';
195 dest++;
196 }
197
198 *dest = '\0';
199}
void ascii_safe_strlcpy(char *dest, const char *src, size_t destsiz)
Definition: ascii.c:174
#define RANGE_160
Datum to_ascii_enc(PG_FUNCTION_ARGS)
Definition: ascii.c:138
Datum to_ascii_default(PG_FUNCTION_ARGS)
Definition: ascii.c:156
Datum to_ascii_encname(PG_FUNCTION_ARGS)
Definition: ascii.c:119
static void pg_to_ascii(unsigned char *src, unsigned char *src_end, unsigned char *dest, int enc)
Definition: ascii.c:29
#define RANGE_128
static text * encode_to_ascii(text *data, int enc)
Definition: ascii.c:104
#define NameStr(name)
Definition: c.h:703
enc
int errcode(int sqlerrcode)
Definition: elog.c:853
int errmsg(const char *fmt,...)
Definition: elog.c:1070
#define ERROR
Definition: elog.h:39
#define ereport(elevel,...)
Definition: elog.h:149
#define PG_GETARG_NAME(n)
Definition: fmgr.h:278
#define PG_RETURN_TEXT_P(x)
Definition: fmgr.h:372
#define PG_GETARG_INT32(n)
Definition: fmgr.h:269
#define PG_GETARG_TEXT_P_COPY(n)
Definition: fmgr.h:315
#define PG_FUNCTION_ARGS
Definition: fmgr.h:193
int x
Definition: isn.c:70
int GetDatabaseEncoding(void)
Definition: mbutils.c:1261
Datum ascii(PG_FUNCTION_ARGS)
const void * data
@ PG_LATIN9
Definition: pg_wchar.h:242
@ PG_LATIN2
Definition: pg_wchar.h:235
@ PG_WIN1250
Definition: pg_wchar.h:255
@ PG_LATIN1
Definition: pg_wchar.h:234
#define PG_VALID_ENCODING(_enc)
Definition: pg_wchar.h:287
#define pg_encoding_to_char
Definition: pg_wchar.h:630
#define pg_char_to_encoding
Definition: pg_wchar.h:629
uintptr_t Datum
Definition: postgres.h:69
static struct cvec * range(struct vars *v, chr a, chr b, int cases)
Definition: regc_locale.c:412
Definition: c.h:644
#define VARDATA(PTR)
Definition: varatt.h:278
#define VARSIZE(PTR)
Definition: varatt.h:279