PostgreSQL Source Code  git master
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros
parser.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * parser.c
4  * Main entry point/driver for PostgreSQL grammar
5  *
6  * Note that the grammar is not allowed to perform any table access
7  * (since we need to be able to do basic parsing even while inside an
8  * aborted transaction). Therefore, the data structures returned by
9  * the grammar are "raw" parsetrees that still need to be analyzed by
10  * analyze.c and related files.
11  *
12  *
13  * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
14  * Portions Copyright (c) 1994, Regents of the University of California
15  *
16  * IDENTIFICATION
17  * src/backend/parser/parser.c
18  *
19  *-------------------------------------------------------------------------
20  */
21 
22 #include "postgres.h"
23 
24 #include "parser/gramparse.h"
25 #include "parser/parser.h"
26 
27 
28 /*
29  * raw_parser
30  * Given a query in string form, do lexical and grammatical analysis.
31  *
32  * Returns a list of raw (un-analyzed) parse trees. The immediate elements
33  * of the list are always RawStmt nodes.
34  */
35 List *
36 raw_parser(const char *str)
37 {
39  base_yy_extra_type yyextra;
40  int yyresult;
41 
42  /* initialize the flex scanner */
43  yyscanner = scanner_init(str, &yyextra.core_yy_extra,
45 
46  /* base_yylex() only needs this much initialization */
47  yyextra.have_lookahead = false;
48 
49  /* initialize the bison parser */
50  parser_init(&yyextra);
51 
52  /* Parse! */
53  yyresult = base_yyparse(yyscanner);
54 
55  /* Clean up (release memory) */
56  scanner_finish(yyscanner);
57 
58  if (yyresult) /* error */
59  return NIL;
60 
61  return yyextra.parsetree;
62 }
63 
64 
65 /*
66  * Intermediate filter between parser and core lexer (core_yylex in scan.l).
67  *
68  * This filter is needed because in some cases the standard SQL grammar
69  * requires more than one token lookahead. We reduce these cases to one-token
70  * lookahead by replacing tokens here, in order to keep the grammar LALR(1).
71  *
72  * Using a filter is simpler than trying to recognize multiword tokens
73  * directly in scan.l, because we'd have to allow for comments between the
74  * words. Furthermore it's not clear how to do that without re-introducing
75  * scanner backtrack, which would cost more performance than this filter
76  * layer does.
77  *
78  * The filter also provides a convenient place to translate between
79  * the core_YYSTYPE and YYSTYPE representations (which are really the
80  * same thing anyway, but notationally they're different).
81  */
82 int
83 base_yylex(YYSTYPE *lvalp, YYLTYPE *llocp, core_yyscan_t yyscanner)
84 {
85  base_yy_extra_type *yyextra = pg_yyget_extra(yyscanner);
86  int cur_token;
87  int next_token;
88  int cur_token_length;
89  YYLTYPE cur_yylloc;
90 
91  /* Get next token --- we might already have it */
92  if (yyextra->have_lookahead)
93  {
94  cur_token = yyextra->lookahead_token;
95  lvalp->core_yystype = yyextra->lookahead_yylval;
96  *llocp = yyextra->lookahead_yylloc;
97  *(yyextra->lookahead_end) = yyextra->lookahead_hold_char;
98  yyextra->have_lookahead = false;
99  }
100  else
101  cur_token = core_yylex(&(lvalp->core_yystype), llocp, yyscanner);
102 
103  /*
104  * If this token isn't one that requires lookahead, just return it. If it
105  * does, determine the token length. (We could get that via strlen(), but
106  * since we have such a small set of possibilities, hardwiring seems
107  * feasible and more efficient.)
108  */
109  switch (cur_token)
110  {
111  case NOT:
112  cur_token_length = 3;
113  break;
114  case NULLS_P:
115  cur_token_length = 5;
116  break;
117  case WITH:
118  cur_token_length = 4;
119  break;
120  default:
121  return cur_token;
122  }
123 
124  /*
125  * Identify end+1 of current token. core_yylex() has temporarily stored a
126  * '\0' here, and will undo that when we call it again. We need to redo
127  * it to fully revert the lookahead call for error reporting purposes.
128  */
129  yyextra->lookahead_end = yyextra->core_yy_extra.scanbuf +
130  *llocp + cur_token_length;
131  Assert(*(yyextra->lookahead_end) == '\0');
132 
133  /*
134  * Save and restore *llocp around the call. It might look like we could
135  * avoid this by just passing &lookahead_yylloc to core_yylex(), but that
136  * does not work because flex actually holds onto the last-passed pointer
137  * internally, and will use that for error reporting. We need any error
138  * reports to point to the current token, not the next one.
139  */
140  cur_yylloc = *llocp;
141 
142  /* Get next token, saving outputs into lookahead variables */
143  next_token = core_yylex(&(yyextra->lookahead_yylval), llocp, yyscanner);
144  yyextra->lookahead_token = next_token;
145  yyextra->lookahead_yylloc = *llocp;
146 
147  *llocp = cur_yylloc;
148 
149  /* Now revert the un-truncation of the current token */
150  yyextra->lookahead_hold_char = *(yyextra->lookahead_end);
151  *(yyextra->lookahead_end) = '\0';
152 
153  yyextra->have_lookahead = true;
154 
155  /* Replace cur_token if needed, based on lookahead */
156  switch (cur_token)
157  {
158  case NOT:
159  /* Replace NOT by NOT_LA if it's followed by BETWEEN, IN, etc */
160  switch (next_token)
161  {
162  case BETWEEN:
163  case IN_P:
164  case LIKE:
165  case ILIKE:
166  case SIMILAR:
167  cur_token = NOT_LA;
168  break;
169  }
170  break;
171 
172  case NULLS_P:
173  /* Replace NULLS_P by NULLS_LA if it's followed by FIRST or LAST */
174  switch (next_token)
175  {
176  case FIRST_P:
177  case LAST_P:
178  cur_token = NULLS_LA;
179  break;
180  }
181  break;
182 
183  case WITH:
184  /* Replace WITH by WITH_LA if it's followed by TIME or ORDINALITY */
185  switch (next_token)
186  {
187  case TIME:
188  case ORDINALITY:
189  cur_token = WITH_LA;
190  break;
191  }
192  break;
193  }
194 
195  return cur_token;
196 }
#define NIL
Definition: pg_list.h:69
char * lookahead_end
Definition: gramparse.h:49
List * raw_parser(const char *str)
Definition: parser.c:36
int base_yyparse(core_yyscan_t yyscanner)
void * core_yyscan_t
Definition: scanner.h:116
core_YYSTYPE lookahead_yylval
Definition: gramparse.h:47
const int NumScanKeywords
Definition: keywords.c:45
core_yy_extra_type core_yy_extra
Definition: gramparse.h:40
static bool next_token(char **lineptr, char *buf, int bufsz, bool *initial_quote, bool *terminating_comma, int elevel, char **err_msg)
Definition: hba.c:195
char * scanbuf
Definition: scanner.h:72
const ScanKeyword ScanKeywords[]
Definition: keywords.c:41
void parser_init(base_yy_extra_type *yyext)
#define YYLTYPE
Definition: scanner.h:44
int base_yylex(YYSTYPE *lvalp, YYLTYPE *llocp, core_yyscan_t yyscanner)
Definition: parser.c:83
int core_yylex(core_YYSTYPE *lvalp, YYLTYPE *llocp, core_yyscan_t yyscanner)
#define pg_yyget_extra(yyscanner)
Definition: gramparse.h:64
#define Assert(condition)
Definition: c.h:675
YYLTYPE lookahead_yylloc
Definition: gramparse.h:48
core_yyscan_t scanner_init(const char *str, core_yy_extra_type *yyext, const ScanKeyword *keywords, int num_keywords)
char lookahead_hold_char
Definition: gramparse.h:50
static core_yyscan_t yyscanner
Definition: pl_scanner.c:210
void scanner_finish(core_yyscan_t yyscanner)
Definition: pg_list.h:45