PostgreSQL Source Code  git master
read.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * read.c
4  * routines to convert a string (legal ascii representation of node) back
5  * to nodes
6  *
7  * Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group
8  * Portions Copyright (c) 1994, Regents of the University of California
9  *
10  *
11  * IDENTIFICATION
12  * src/backend/nodes/read.c
13  *
14  * HISTORY
15  * AUTHOR DATE MAJOR EVENT
16  * Andrew Yu Nov 2, 1994 file creation
17  *
18  *-------------------------------------------------------------------------
19  */
20 #include "postgres.h"
21 
22 #include <ctype.h>
23 
24 #include "common/string.h"
25 #include "nodes/pg_list.h"
26 #include "nodes/readfuncs.h"
27 #include "nodes/value.h"
28 
29 
30 /* Static state for pg_strtok */
31 static char *pg_strtok_ptr = NULL;
32 
33 
34 /*
35  * stringToNode -
36  * returns a Node with a given legal ASCII representation
37  */
38 void *
40 {
41  char *save_strtok;
42  void *retval;
43 
44  /*
45  * We save and restore the pre-existing state of pg_strtok. This makes the
46  * world safe for re-entrant invocation of stringToNode, without incurring
47  * a lot of notational overhead by having to pass the next-character
48  * pointer around through all the readfuncs.c code.
49  */
50  save_strtok = pg_strtok_ptr;
51 
52  pg_strtok_ptr = str; /* point pg_strtok at the string to read */
53 
54  retval = nodeRead(NULL, 0); /* do the reading */
55 
56  pg_strtok_ptr = save_strtok;
57 
58  return retval;
59 }
60 
61 /*****************************************************************************
62  *
63  * the lisp token parser
64  *
65  *****************************************************************************/
66 
67 /*
68  * pg_strtok --- retrieve next "token" from a string.
69  *
70  * Works kinda like strtok, except it never modifies the source string.
71  * (Instead of storing nulls into the string, the length of the token
72  * is returned to the caller.)
73  * Also, the rules about what is a token are hard-wired rather than being
74  * configured by passing a set of terminating characters.
75  *
76  * The string is assumed to have been initialized already by stringToNode.
77  *
78  * The rules for tokens are:
79  * * Whitespace (space, tab, newline) always separates tokens.
80  * * The characters '(', ')', '{', '}' form individual tokens even
81  * without any whitespace around them.
82  * * Otherwise, a token is all the characters up to the next whitespace
83  * or occurrence of one of the four special characters.
84  * * A backslash '\' can be used to quote whitespace or one of the four
85  * special characters, so that it is treated as a plain token character.
86  * Backslashes themselves must also be backslashed for consistency.
87  * Any other character can be, but need not be, backslashed as well.
88  * * If the resulting token is '<>' (with no backslash), it is returned
89  * as a non-NULL pointer to the token but with length == 0. Note that
90  * there is no other way to get a zero-length token.
91  *
92  * Returns a pointer to the start of the next token, and the length of the
93  * token (including any embedded backslashes!) in *length. If there are
94  * no more tokens, NULL and 0 are returned.
95  *
96  * NOTE: this routine doesn't remove backslashes; the caller must do so
97  * if necessary (see "debackslash").
98  *
99  * NOTE: prior to release 7.0, this routine also had a special case to treat
100  * a token starting with '"' as extending to the next '"'. This code was
101  * broken, however, since it would fail to cope with a string containing an
102  * embedded '"'. I have therefore removed this special case, and instead
103  * introduced rules for using backslashes to quote characters. Higher-level
104  * code should add backslashes to a string constant to ensure it is treated
105  * as a single token.
106  */
107 char *
109 {
110  char *local_str; /* working pointer to string */
111  char *ret_str; /* start of token to return */
112 
113  local_str = pg_strtok_ptr;
114 
115  while (*local_str == ' ' || *local_str == '\n' || *local_str == '\t')
116  local_str++;
117 
118  if (*local_str == '\0')
119  {
120  *length = 0;
121  pg_strtok_ptr = local_str;
122  return NULL; /* no more tokens */
123  }
124 
125  /*
126  * Now pointing at start of next token.
127  */
128  ret_str = local_str;
129 
130  if (*local_str == '(' || *local_str == ')' ||
131  *local_str == '{' || *local_str == '}')
132  {
133  /* special 1-character token */
134  local_str++;
135  }
136  else
137  {
138  /* Normal token, possibly containing backslashes */
139  while (*local_str != '\0' &&
140  *local_str != ' ' && *local_str != '\n' &&
141  *local_str != '\t' &&
142  *local_str != '(' && *local_str != ')' &&
143  *local_str != '{' && *local_str != '}')
144  {
145  if (*local_str == '\\' && local_str[1] != '\0')
146  local_str += 2;
147  else
148  local_str++;
149  }
150  }
151 
152  *length = local_str - ret_str;
153 
154  /* Recognize special case for "empty" token */
155  if (*length == 2 && ret_str[0] == '<' && ret_str[1] == '>')
156  *length = 0;
157 
158  pg_strtok_ptr = local_str;
159 
160  return ret_str;
161 }
162 
163 /*
164  * debackslash -
165  * create a palloc'd string holding the given token.
166  * any protective backslashes in the token are removed.
167  */
168 char *
169 debackslash(char *token, int length)
170 {
171  char *result = palloc(length + 1);
172  char *ptr = result;
173 
174  while (length > 0)
175  {
176  if (*token == '\\' && length > 1)
177  token++, length--;
178  *ptr++ = *token++;
179  length--;
180  }
181  *ptr = '\0';
182  return result;
183 }
184 
185 #define RIGHT_PAREN (1000000 + 1)
186 #define LEFT_PAREN (1000000 + 2)
187 #define LEFT_BRACE (1000000 + 3)
188 #define OTHER_TOKEN (1000000 + 4)
189 
190 /*
191  * nodeTokenType -
192  * returns the type of the node token contained in token.
193  * It returns one of the following valid NodeTags:
194  * T_Integer, T_Float, T_String, T_BitString
195  * and some of its own:
196  * RIGHT_PAREN, LEFT_PAREN, LEFT_BRACE, OTHER_TOKEN
197  *
198  * Assumption: the ascii representation is legal
199  */
200 static NodeTag
201 nodeTokenType(char *token, int length)
202 {
203  NodeTag retval;
204  char *numptr;
205  int numlen;
206 
207  /*
208  * Check if the token is a number
209  */
210  numptr = token;
211  numlen = length;
212  if (*numptr == '+' || *numptr == '-')
213  numptr++, numlen--;
214  if ((numlen > 0 && isdigit((unsigned char) *numptr)) ||
215  (numlen > 1 && *numptr == '.' && isdigit((unsigned char) numptr[1])))
216  {
217  /*
218  * Yes. Figure out whether it is integral or float; this requires
219  * both a syntax check and a range check. strtoint() can do both for
220  * us. We know the token will end at a character that strtoint will
221  * stop at, so we do not need to modify the string.
222  */
223  char *endptr;
224 
225  errno = 0;
226  (void) strtoint(token, &endptr, 10);
227  if (endptr != token + length || errno == ERANGE)
228  return T_Float;
229  return T_Integer;
230  }
231 
232  /*
233  * these three cases do not need length checks, since pg_strtok() will
234  * always treat them as single-byte tokens
235  */
236  else if (*token == '(')
237  retval = LEFT_PAREN;
238  else if (*token == ')')
239  retval = RIGHT_PAREN;
240  else if (*token == '{')
241  retval = LEFT_BRACE;
242  else if (*token == '"' && length > 1 && token[length - 1] == '"')
243  retval = T_String;
244  else if (*token == 'b')
245  retval = T_BitString;
246  else
247  retval = OTHER_TOKEN;
248  return retval;
249 }
250 
251 /*
252  * nodeRead -
253  * Slightly higher-level reader.
254  *
255  * This routine applies some semantic knowledge on top of the purely
256  * lexical tokenizer pg_strtok(). It can read
257  * * Value token nodes (integers, floats, or strings);
258  * * General nodes (via parseNodeString() from readfuncs.c);
259  * * Lists of the above;
260  * * Lists of integers or OIDs.
261  * The return value is declared void *, not Node *, to avoid having to
262  * cast it explicitly in callers that assign to fields of different types.
263  *
264  * External callers should always pass NULL/0 for the arguments. Internally
265  * a non-NULL token may be passed when the upper recursion level has already
266  * scanned the first token of a node's representation.
267  *
268  * We assume pg_strtok is already initialized with a string to read (hence
269  * this should only be invoked from within a stringToNode operation).
270  */
271 void *
272 nodeRead(char *token, int tok_len)
273 {
274  Node *result;
275  NodeTag type;
276 
277  if (token == NULL) /* need to read a token? */
278  {
279  token = pg_strtok(&tok_len);
280 
281  if (token == NULL) /* end of input */
282  return NULL;
283  }
284 
285  type = nodeTokenType(token, tok_len);
286 
287  switch ((int) type)
288  {
289  case LEFT_BRACE:
290  result = parseNodeString();
291  token = pg_strtok(&tok_len);
292  if (token == NULL || token[0] != '}')
293  elog(ERROR, "did not find '}' at end of input node");
294  break;
295  case LEFT_PAREN:
296  {
297  List *l = NIL;
298 
299  /*----------
300  * Could be an integer list: (i int int ...)
301  * or an OID list: (o int int ...)
302  * or a list of nodes/values: (node node ...)
303  *----------
304  */
305  token = pg_strtok(&tok_len);
306  if (token == NULL)
307  elog(ERROR, "unterminated List structure");
308  if (tok_len == 1 && token[0] == 'i')
309  {
310  /* List of integers */
311  for (;;)
312  {
313  int val;
314  char *endptr;
315 
316  token = pg_strtok(&tok_len);
317  if (token == NULL)
318  elog(ERROR, "unterminated List structure");
319  if (token[0] == ')')
320  break;
321  val = (int) strtol(token, &endptr, 10);
322  if (endptr != token + tok_len)
323  elog(ERROR, "unrecognized integer: \"%.*s\"",
324  tok_len, token);
325  l = lappend_int(l, val);
326  }
327  }
328  else if (tok_len == 1 && token[0] == 'o')
329  {
330  /* List of OIDs */
331  for (;;)
332  {
333  Oid val;
334  char *endptr;
335 
336  token = pg_strtok(&tok_len);
337  if (token == NULL)
338  elog(ERROR, "unterminated List structure");
339  if (token[0] == ')')
340  break;
341  val = (Oid) strtoul(token, &endptr, 10);
342  if (endptr != token + tok_len)
343  elog(ERROR, "unrecognized OID: \"%.*s\"",
344  tok_len, token);
345  l = lappend_oid(l, val);
346  }
347  }
348  else
349  {
350  /* List of other node types */
351  for (;;)
352  {
353  /* We have already scanned next token... */
354  if (token[0] == ')')
355  break;
356  l = lappend(l, nodeRead(token, tok_len));
357  token = pg_strtok(&tok_len);
358  if (token == NULL)
359  elog(ERROR, "unterminated List structure");
360  }
361  }
362  result = (Node *) l;
363  break;
364  }
365  case RIGHT_PAREN:
366  elog(ERROR, "unexpected right parenthesis");
367  result = NULL; /* keep compiler happy */
368  break;
369  case OTHER_TOKEN:
370  if (tok_len == 0)
371  {
372  /* must be "<>" --- represents a null pointer */
373  result = NULL;
374  }
375  else
376  {
377  elog(ERROR, "unrecognized token: \"%.*s\"", tok_len, token);
378  result = NULL; /* keep compiler happy */
379  }
380  break;
381  case T_Integer:
382 
383  /*
384  * we know that the token terminates on a char atoi will stop at
385  */
386  result = (Node *) makeInteger(atoi(token));
387  break;
388  case T_Float:
389  {
390  char *fval = (char *) palloc(tok_len + 1);
391 
392  memcpy(fval, token, tok_len);
393  fval[tok_len] = '\0';
394  result = (Node *) makeFloat(fval);
395  }
396  break;
397  case T_String:
398  /* need to remove leading and trailing quotes, and backslashes */
399  result = (Node *) makeString(debackslash(token + 1, tok_len - 2));
400  break;
401  case T_BitString:
402  {
403  char *val = palloc(tok_len);
404 
405  /* skip leading 'b' */
406  memcpy(val, token + 1, tok_len - 1);
407  val[tok_len - 1] = '\0';
408  result = (Node *) makeBitString(val);
409  break;
410  }
411  default:
412  elog(ERROR, "unrecognized node type: %d", (int) type);
413  result = NULL; /* keep compiler happy */
414  break;
415  }
416 
417  return (void *) result;
418 }
Value * makeString(char *str)
Definition: value.c:53
#define NIL
Definition: pg_list.h:69
int length(const List *list)
Definition: list.c:1333
void * stringToNode(char *str)
Definition: read.c:39
char * pg_strtok(int *length)
Definition: read.c:108
Definition: nodes.h:517
void * nodeRead(char *token, int tok_len)
Definition: read.c:272
unsigned int Oid
Definition: postgres_ext.h:31
NodeTag
Definition: nodes.h:26
List * lappend_oid(List *list, Oid datum)
Definition: list.c:164
static NodeTag nodeTokenType(char *token, int length)
Definition: read.c:201
#define ERROR
Definition: elog.h:43
#define RIGHT_PAREN
Definition: read.c:185
Value * makeBitString(char *str)
Definition: value.c:68
int strtoint(const char *pg_restrict str, char **pg_restrict endptr, int base)
Definition: string.c:50
char * debackslash(char *token, int length)
Definition: read.c:169
List * lappend_int(List *list, int datum)
Definition: list.c:146
List * lappend(List *list, void *datum)
Definition: list.c:128
Value * makeInteger(int i)
Definition: value.c:23
Value * makeFloat(char *numericStr)
Definition: value.c:38
#define LEFT_BRACE
Definition: read.c:187
static char * pg_strtok_ptr
Definition: read.c:31
Node * parseNodeString(void)
Definition: readfuncs.c:2494
Definition: nodes.h:288
void * palloc(Size size)
Definition: mcxt.c:924
#define elog
Definition: elog.h:219
#define OTHER_TOKEN
Definition: read.c:188
Definition: pg_list.h:45
long val
Definition: informix.c:689
#define LEFT_PAREN
Definition: read.c:186