PostgreSQL Source Code  git master
stringutils.c
Go to the documentation of this file.
1 /*
2  * psql - the PostgreSQL interactive terminal
3  *
4  * Copyright (c) 2000-2024, PostgreSQL Global Development Group
5  *
6  * src/bin/psql/stringutils.c
7  */
8 #include "postgres_fe.h"
9 
10 #include <ctype.h>
11 
12 #include "common.h"
13 #include "stringutils.h"
14 
15 
16 /*
17  * Replacement for strtok() (a.k.a. poor man's flex)
18  *
19  * Splits a string into tokens, returning one token per call, then NULL
20  * when no more tokens exist in the given string.
21  *
22  * The calling convention is similar to that of strtok, but with more
23  * frammishes.
24  *
25  * s - string to parse, if NULL continue parsing the last string
26  * whitespace - set of whitespace characters that separate tokens
27  * delim - set of non-whitespace separator characters (or NULL)
28  * quote - set of characters that can quote a token (NULL if none)
29  * escape - character that can quote quotes (0 if none)
30  * e_strings - if true, treat E'...' syntax as a valid token
31  * del_quotes - if true, strip quotes from the returned token, else return
32  * it exactly as found in the string
33  * encoding - the active character-set encoding
34  *
35  * Characters in 'delim', if any, will be returned as single-character
36  * tokens unless part of a quoted token.
37  *
38  * Double occurrences of the quoting character are always taken to represent
39  * a single quote character in the data. If escape isn't 0, then escape
40  * followed by anything (except \0) is a data character too.
41  *
42  * The combination of e_strings and del_quotes both true is not currently
43  * handled. This could be fixed but it's not needed anywhere at the moment.
44  *
45  * Note that the string s is _not_ overwritten in this implementation.
46  *
47  * NB: it's okay to vary delim, quote, and escape from one call to the
48  * next on a single source string, but changing whitespace is a bad idea
49  * since you might lose data.
50  */
51 char *
52 strtokx(const char *s,
53  const char *whitespace,
54  const char *delim,
55  const char *quote,
56  char escape,
57  bool e_strings,
58  bool del_quotes,
59  int encoding)
60 {
61  static char *storage = NULL; /* store the local copy of the users
62  * string here */
63  static char *string = NULL; /* pointer into storage where to continue on
64  * next call */
65 
66  /* variously abused variables: */
67  unsigned int offset;
68  char *start;
69  char *p;
70 
71  if (s)
72  {
73  free(storage);
74 
75  /*
76  * We may need extra space to insert delimiter nulls for adjacent
77  * tokens. 2X the space is a gross overestimate, but it's unlikely
78  * that this code will be used on huge strings anyway.
79  */
80  storage = pg_malloc(2 * strlen(s) + 1);
81  strcpy(storage, s);
82  string = storage;
83  }
84 
85  if (!storage)
86  return NULL;
87 
88  /* skip leading whitespace */
89  offset = strspn(string, whitespace);
90  start = &string[offset];
91 
92  /* end of string reached? */
93  if (*start == '\0')
94  {
95  /* technically we don't need to free here, but we're nice */
96  free(storage);
97  storage = NULL;
98  string = NULL;
99  return NULL;
100  }
101 
102  /* test if delimiter character */
103  if (delim && strchr(delim, *start))
104  {
105  /*
106  * If not at end of string, we need to insert a null to terminate the
107  * returned token. We can just overwrite the next character if it
108  * happens to be in the whitespace set ... otherwise move over the
109  * rest of the string to make room. (This is why we allocated extra
110  * space above).
111  */
112  p = start + 1;
113  if (*p != '\0')
114  {
115  if (!strchr(whitespace, *p))
116  memmove(p + 1, p, strlen(p) + 1);
117  *p = '\0';
118  string = p + 1;
119  }
120  else
121  {
122  /* at end of string, so no extra work */
123  string = p;
124  }
125 
126  return start;
127  }
128 
129  /* check for E string */
130  p = start;
131  if (e_strings &&
132  (*p == 'E' || *p == 'e') &&
133  p[1] == '\'')
134  {
135  quote = "'";
136  escape = '\\'; /* if std strings before, not any more */
137  p++;
138  }
139 
140  /* test if quoting character */
141  if (quote && strchr(quote, *p))
142  {
143  /* okay, we have a quoted token, now scan for the closer */
144  char thisquote = *p++;
145 
146  for (; *p; p += PQmblenBounded(p, encoding))
147  {
148  if (*p == escape && p[1] != '\0')
149  p++; /* process escaped anything */
150  else if (*p == thisquote && p[1] == thisquote)
151  p++; /* process doubled quote */
152  else if (*p == thisquote)
153  {
154  p++; /* skip trailing quote */
155  break;
156  }
157  }
158 
159  /*
160  * If not at end of string, we need to insert a null to terminate the
161  * returned token. See notes above.
162  */
163  if (*p != '\0')
164  {
165  if (!strchr(whitespace, *p))
166  memmove(p + 1, p, strlen(p) + 1);
167  *p = '\0';
168  string = p + 1;
169  }
170  else
171  {
172  /* at end of string, so no extra work */
173  string = p;
174  }
175 
176  /* Clean up the token if caller wants that */
177  if (del_quotes)
178  strip_quotes(start, thisquote, escape, encoding);
179 
180  return start;
181  }
182 
183  /*
184  * Otherwise no quoting character. Scan till next whitespace, delimiter
185  * or quote. NB: at this point, *start is known not to be '\0',
186  * whitespace, delim, or quote, so we will consume at least one character.
187  */
188  offset = strcspn(start, whitespace);
189 
190  if (delim)
191  {
192  unsigned int offset2 = strcspn(start, delim);
193 
194  if (offset > offset2)
195  offset = offset2;
196  }
197 
198  if (quote)
199  {
200  unsigned int offset2 = strcspn(start, quote);
201 
202  if (offset > offset2)
203  offset = offset2;
204  }
205 
206  p = start + offset;
207 
208  /*
209  * If not at end of string, we need to insert a null to terminate the
210  * returned token. See notes above.
211  */
212  if (*p != '\0')
213  {
214  if (!strchr(whitespace, *p))
215  memmove(p + 1, p, strlen(p) + 1);
216  *p = '\0';
217  string = p + 1;
218  }
219  else
220  {
221  /* at end of string, so no extra work */
222  string = p;
223  }
224 
225  return start;
226 }
227 
228 
229 /*
230  * strip_quotes
231  *
232  * Remove quotes from the string at *source. Leading and trailing occurrences
233  * of 'quote' are removed; embedded double occurrences of 'quote' are reduced
234  * to single occurrences; if 'escape' is not 0 then 'escape' removes special
235  * significance of next character.
236  *
237  * Note that the source string is overwritten in-place.
238  */
239 void
240 strip_quotes(char *source, char quote, char escape, int encoding)
241 {
242  char *src;
243  char *dst;
244 
245  Assert(source != NULL);
246  Assert(quote != '\0');
247 
248  src = dst = source;
249 
250  if (*src && *src == quote)
251  src++; /* skip leading quote */
252 
253  while (*src)
254  {
255  char c = *src;
256  int i;
257 
258  if (c == quote && src[1] == '\0')
259  break; /* skip trailing quote */
260  else if (c == quote && src[1] == quote)
261  src++; /* process doubled quote */
262  else if (c == escape && src[1] != '\0')
263  src++; /* process escaped character */
264 
265  i = PQmblenBounded(src, encoding);
266  while (i--)
267  *dst++ = *src++;
268  }
269 
270  *dst = '\0';
271 }
272 
273 
274 /*
275  * quote_if_needed
276  *
277  * Opposite of strip_quotes(). If "source" denotes itself literally without
278  * quoting or escaping, returns NULL. Otherwise, returns a malloc'd copy with
279  * quoting and escaping applied:
280  *
281  * source - string to parse
282  * entails_quote - any of these present? need outer quotes
283  * quote - doubled within string, affixed to both ends
284  * escape - doubled within string
285  * force_quote - if true, quote the output even if it doesn't "need" it
286  * encoding - the active character-set encoding
287  *
288  * Do not use this as a substitute for PQescapeStringConn(). Use it for
289  * strings to be parsed by strtokx() or psql_scan_slash_option().
290  */
291 char *
292 quote_if_needed(const char *source, const char *entails_quote,
293  char quote, char escape, bool force_quote,
294  int encoding)
295 {
296  const char *src;
297  char *ret;
298  char *dst;
299  bool need_quotes = force_quote;
300 
301  Assert(source != NULL);
302  Assert(quote != '\0');
303 
304  src = source;
305  dst = ret = pg_malloc(2 * strlen(src) + 3); /* excess */
306 
307  *dst++ = quote;
308 
309  while (*src)
310  {
311  char c = *src;
312  int i;
313 
314  if (c == quote)
315  {
316  need_quotes = true;
317  *dst++ = quote;
318  }
319  else if (c == escape)
320  {
321  need_quotes = true;
322  *dst++ = escape;
323  }
324  else if (strchr(entails_quote, c))
325  need_quotes = true;
326 
327  i = PQmblenBounded(src, encoding);
328  while (i--)
329  *dst++ = *src++;
330  }
331 
332  *dst++ = quote;
333  *dst = '\0';
334 
335  if (!need_quotes)
336  {
337  free(ret);
338  ret = NULL;
339  }
340 
341  return ret;
342 }
#define Assert(condition)
Definition: c.h:861
int PQmblenBounded(const char *s, int encoding)
Definition: fe-misc.c:1234
void * pg_malloc(size_t size)
Definition: fe_memutils.c:47
return str start
#define free(a)
Definition: header.h:65
#define storage
Definition: indent_codes.h:68
int i
Definition: isn.c:72
int32 encoding
Definition: pg_database.h:41
static rewind_source * source
Definition: pg_rewind.c:89
char * c
char * quote_if_needed(const char *source, const char *entails_quote, char quote, char escape, bool force_quote, int encoding)
Definition: stringutils.c:292
char * strtokx(const char *s, const char *whitespace, const char *delim, const char *quote, char escape, bool e_strings, bool del_quotes, int encoding)
Definition: stringutils.c:52
void strip_quotes(char *source, char quote, char escape, int encoding)
Definition: stringutils.c:240