PostgreSQL Source Code git master
exprscan.l
Go to the documentation of this file.
1%top{
2/*-------------------------------------------------------------------------
3 *
4 * exprscan.l
5 * lexical scanner for pgbench backslash commands
6 *
7 * This lexer supports two operating modes:
8 *
9 * In INITIAL state, just parse off whitespace-separated words (this mode
10 * is basically equivalent to strtok(), which is what we used to use).
11 *
12 * In EXPR state, lex for the simple expression syntax of exprparse.y.
13 *
14 * In either mode, stop upon hitting newline or end of string.
15 *
16 * Note that this lexer operates within the framework created by psqlscan.l,
17 *
18 * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
19 * Portions Copyright (c) 1994, Regents of the University of California
20 *
21 * src/bin/pgbench/exprscan.l
22 *
23 *-------------------------------------------------------------------------
24 */
25#include "postgres_fe.h"
26
27/*
28 * NB: include exprparse.h only AFTER including pgbench.h, because pgbench.h
29 * contains definitions needed for YYSTYPE. Likewise, pgbench.h must come after
30 * psqlscan_int.h for yyscan_t.
31 */
33#include "pgbench.h"
34#include "exprparse.h"
35}
36
37%{
38/* context information for reporting errors in expressions */
39static const char *expr_source = NULL;
40static int expr_lineno = 0;
41static int expr_start_offset = 0;
42static const char *expr_command = NULL;
43
44/* indicates whether last yylex() call read a newline */
45static bool last_was_newline = false;
46
47/* LCOV_EXCL_START */
48
static int expr_lineno
Definition: exprscan.l:40
static const char * expr_source
Definition: exprscan.l:39
static bool last_was_newline
Definition: exprscan.l:45
static int expr_start_offset
Definition: exprscan.l:41
static const char * expr_command
Definition: exprscan.l:42
49%}
50
51/* Except for the prefix, these options should match psqlscan.l */
52%option reentrant
53%option bison-bridge
54%option 8bit
55%option never-interactive
56%option nodefault
57%option noinput
58%option nounput
59%option noyywrap
60%option warn
61%option prefix="expr_yy"
62
63/* Character classes */
64alpha [a-zA-Z\200-\377_]
65digit [0-9]
66alnum [A-Za-z\200-\377_0-9]
67/* {space} + {nonspace} + {newline} should cover all characters */
68space [ \t\r\f\v]
69nonspace [^ \t\r\f\v\n]
70newline [\n]
71
72/* Line continuation marker */
73continuation \\\r?{newline}
74
75/* case insensitive keywords */
76and [Aa][Nn][Dd]
77or [Oo][Rr]
78not [Nn][Oo][Tt]
79case [Cc][Aa][Ss][Ee]
80when [Ww][Hh][Ee][Nn]
81then [Tt][Hh][Ee][Nn]
82else [Ee][Ll][Ss][Ee]
83end [Ee][Nn][Dd]
84true [Tt][Rr][Uu][Ee]
85false [Ff][Aa][Ll][Ss][Ee]
86null [Nn][Uu][Ll][Ll]
87is [Ii][Ss]
88isnull [Ii][Ss][Nn][Uu][Ll][Ll]
89notnull [Nn][Oo][Tt][Nn][Uu][Ll][Ll]
90
91/* Exclusive states */
92%x EXPR
93
94%%
95
96%{
97 /* Declare some local variables inside yylex(), for convenience */
98 PsqlScanState cur_state = yyextra;
#define yyextra
Definition: scan.l:1118
99
100 /*
101 * Force flex into the state indicated by start_state. This has a
102 * couple of purposes: it lets some of the functions below set a new
103 * starting state without ugly direct access to flex variables, and it
104 * allows us to transition from one flex lexer to another so that we
105 * can lex different parts of the source string using separate lexers.
106 */
107 BEGIN(cur_state->start_state);
108
109 /* Reset was-newline flag */
110 last_was_newline = false;
111%}
112
113 /* INITIAL state */
114
115{nonspace}+ {
116 /* Found a word, emit and return it */
117 psqlscan_emit(cur_state, yytext, yyleng);
118 return 1;
119 }
void psqlscan_emit(PsqlScanState state, const char *txt, int len)
Definition: psqlscan.l:1463
#define yyleng
Definition: scan.l:1124
120
121 /*
122 * We need this rule to avoid returning "word\" instead of recognizing
123 * a continuation marker just after a word:
124 */
125{nonspace}+{continuation} {
126 /* Found "word\\\r?\n", emit and return just "word" */
127 int wordlen = yyleng - 2;
128 if (yytext[wordlen] == '\r')
129 wordlen--;
130 Assert(yytext[wordlen] == '\\');
131 psqlscan_emit(cur_state, yytext, wordlen);
132 return 1;
133 }
#define Assert(condition)
Definition: c.h:815
134
135{space}+ { /* ignore */ }
136
137{continuation} { /* ignore */ }
138
139{newline} {
140 /* report end of command */
141 last_was_newline = true;
142 return 0;
143 }
144
145 /* EXPR state */
146
147<EXPR>{
148
149"+" { return '+'; }
150"-" { return '-'; }
151"*" { return '*'; }
152"/" { return '/'; }
153"%" { return '%'; } /* C version, also in Pg SQL */
154"=" { return '='; }
155"<>" { return NE_OP; }
156"!=" { return NE_OP; } /* C version, also in Pg SQL */
157"<=" { return LE_OP; }
158">=" { return GE_OP; }
159"<<" { return LS_OP; }
160">>" { return RS_OP; }
161"<" { return '<'; }
162">" { return '>'; }
163"|" { return '|'; }
164"&" { return '&'; }
165"#" { return '#'; }
166"~" { return '~'; }
167
168"(" { return '('; }
169")" { return ')'; }
170"," { return ','; }
171
172{and} { return AND_OP; }
173{or} { return OR_OP; }
174{not} { return NOT_OP; }
175{is} { return IS_OP; }
176{isnull} { return ISNULL_OP; }
177{notnull} { return NOTNULL_OP; }
178
179{case} { return CASE_KW; }
180{when} { return WHEN_KW; }
181{then} { return THEN_KW; }
182{else} { return ELSE_KW; }
183{end} { return END_KW; }
184
185:{alnum}+ {
186 yylval->str = pg_strdup(yytext + 1);
187 return VARIABLE;
188 }
char * pg_strdup(const char *in)
Definition: fe_memutils.c:85
189
190{null} { return NULL_CONST; }
191{true} {
192 yylval->bval = true;
193 return BOOLEAN_CONST;
194 }
195{false} {
196 yylval->bval = false;
197 return BOOLEAN_CONST;
198 }
199"9223372036854775808" {
200 /*
201 * Special handling for PG_INT64_MIN, which can't
202 * accurately be represented here, as the minus sign is
203 * lexed separately and INT64_MIN can't be represented as
204 * a positive integer.
205 */
206 return MAXINT_PLUS_ONE_CONST;
207 }
208{digit}+ {
209 if (!strtoint64(yytext, true, &yylval->ival))
210 expr_yyerror_more(yyscanner, "bigint constant overflow",
211 strdup(yytext));
212 return INTEGER_CONST;
213 }
void expr_yyerror_more(yyscan_t yyscanner, const char *message, const char *more)
Definition: exprscan.l:271
bool strtoint64(const char *str, bool errorOK, int64 *result)
Definition: pgbench.c:988
214{digit}+(\.{digit}*)?([eE][-+]?{digit}+)? {
215 if (!strtodouble(yytext, true, &yylval->dval))
216 expr_yyerror_more(yyscanner, "double constant overflow",
217 strdup(yytext));
218 return DOUBLE_CONST;
219 }
bool strtodouble(const char *str, bool errorOK, double *dv)
Definition: pgbench.c:1059
220\.{digit}+([eE][-+]?{digit}+)? {
221 if (!strtodouble(yytext, true, &yylval->dval))
222 expr_yyerror_more(yyscanner, "double constant overflow",
223 strdup(yytext));
224 return DOUBLE_CONST;
225 }
226{alpha}{alnum}* {
227 yylval->str = pg_strdup(yytext);
228 return FUNCTION;
229 }
230
231{space}+ { /* ignore */ }
232
233{continuation} { /* ignore */ }
234
235{newline} {
236 /* report end of command */
237 last_was_newline = true;
238 return 0;
239 }
240
241. {
242 /*
243 * must strdup yytext so that expr_yyerror_more doesn't
244 * change it while finding end of line
245 */
246 expr_yyerror_more(yyscanner, "unexpected character",
247 pg_strdup(yytext));
248 /* NOTREACHED, syntax_error calls exit() */
249 return 0;
250 }
251
252}
253
254<<EOF>> {
255 if (cur_state->buffer_stack == NULL)
256 return 0; /* end of input reached */
257
258 /*
259 * We were expanding a variable, so pop the inclusion
260 * stack and keep lexing
261 */
262 psqlscan_pop_buffer_stack(cur_state);
264 }
void psqlscan_select_top_buffer(PsqlScanState state)
Definition: psqlscan.l:1377
void psqlscan_pop_buffer_stack(PsqlScanState state)
Definition: psqlscan.l:1359
265
266%%
267
268/* LCOV_EXCL_STOP */
269
270void
271expr_yyerror_more(yyscan_t yyscanner, const char *message, const char *more)
272{
273 PsqlScanState state = yyget_extra(yyscanner);
274 int error_detection_offset = expr_scanner_offset(state) - 1;
275 YYSTYPE lval;
276 char *full_line;
277
278 /*
279 * While parsing an expression, we may not have collected the whole line
280 * yet from the input source. Lex till EOL so we can report whole line.
281 * (If we're at EOF, it's okay to call yylex() an extra time.)
282 */
283 if (!last_was_newline)
284 {
285 while (yylex(&lval, yyscanner))
286 /* skip */ ;
287 }
288
289 /* Extract the line, trimming trailing newline if any */
293 true);
294
296 message, more, error_detection_offset - expr_start_offset);
297}
298
299/*
300 * (The first argument is enforced by Bison to match the first argument of
301 * yyparse(), but it is not used here.)
302 */
303void
304expr_yyerror(PgBenchExpr **expr_parse_result_p, yyscan_t yyscanner, const char *message)
305{
306 expr_yyerror_more(yyscanner, message, NULL);
307}
308
309/*
310 * Collect a space-separated word from a backslash command and return it
311 * in word_buf, along with its starting string offset in *offset.
312 * Returns true if successful, false if at end of command.
313 */
314bool
316{
317 int lexresult;
318 YYSTYPE lval;
319
320 /* Must be scanning already */
321 Assert(state->scanbufhandle != NULL);
322
323 /* Set current output target */
324 state->output_buf = word_buf;
325 resetPQExpBuffer(word_buf);
326
327 /* Set input source */
328 if (state->buffer_stack != NULL)
329 yy_switch_to_buffer(state->buffer_stack->buf, state->scanner);
330 else
331 yy_switch_to_buffer(state->scanbufhandle, state->scanner);
332
333 /* Set start state */
334 state->start_state = INITIAL;
335
336 /* And lex. */
337 lexresult = yylex(&lval, state->scanner);
338
339 /*
340 * Save start offset of word, if any. We could do this more efficiently,
341 * but for now this seems fine.
342 */
343 if (lexresult)
344 *offset = expr_scanner_offset(state) - word_buf->len;
345 else
346 *offset = -1;
347
348 /*
349 * In case the caller returns to using the regular SQL lexer, reselect the
350 * appropriate initial state.
351 */
353
354 return (bool) lexresult;
355}
356
357/*
358 * Prepare to lex an expression via expr_yyparse().
359 *
360 * Returns the yyscan_t that is to be passed to expr_yyparse().
361 * (This is just state->scanner, but callers don't need to know that.)
362 */
365 const char *source, int lineno, int start_offset,
366 const char *command)
367{
368 /* Save error context info */
370 expr_lineno = lineno;
371 expr_start_offset = start_offset;
372 expr_command = command;
373
374 /* Must be scanning already */
375 Assert(state->scanbufhandle != NULL);
376
377 /* Set current output target */
378 state->output_buf = NULL;
379
380 /* Set input source */
381 if (state->buffer_stack != NULL)
382 yy_switch_to_buffer(state->buffer_stack->buf, state->scanner);
383 else
384 yy_switch_to_buffer(state->scanbufhandle, state->scanner);
385
386 /* Set start state */
387 state->start_state = EXPR;
388
389 return state->scanner;
390}
391
392/*
393 * Finish lexing an expression.
394 */
395void
397{
398 PsqlScanState state = yyget_extra(yyscanner);
399
400 /*
401 * Reselect appropriate initial state for SQL lexer.
402 */
404}
405
406/*
407 * Get offset from start of string to end of current lexer token.
408 *
409 * We rely on the knowledge that flex modifies the scan buffer by storing
410 * a NUL at the end of the current token (yytext). Note that this might
411 * not work quite right if we were parsing a sub-buffer, but since pgbench
412 * never invokes that functionality, it doesn't matter.
413 */
414int
416{
417 return strlen(state->scanbuf);
418}
419
420/*
421 * Get a malloc'd copy of the lexer input string from start_offset
422 * to just before end_offset. If chomp is true, drop any trailing
423 * newline(s).
424 */
425char *
427 int start_offset, int end_offset,
428 bool chomp)
429{
430 char *result;
431 const char *scanptr = state->scanbuf + start_offset;
432 int slen = end_offset - start_offset;
433
434 Assert(slen >= 0);
435 Assert(end_offset <= strlen(state->scanbuf));
436
437 if (chomp)
438 {
439 while (slen > 0 &&
440 (scanptr[slen - 1] == '\n' || scanptr[slen - 1] == '\r'))
441 slen--;
442 }
443
444 result = (char *) pg_malloc(slen + 1);
445 memcpy(result, scanptr, slen);
446 result[slen] = '\0';
447
448 return result;
449}
450
451/*
452 * Get the line number associated with the given string offset
453 * (which must not be past the end of where we've lexed to).
454 */
455int
457{
458 int lineno = 1;
459 const char *p = state->scanbuf;
460
461 while (*p && offset > 0)
462 {
463 if (*p == '\n')
464 lineno++;
465 p++, offset--;
466 }
467 return lineno;
468}
void * yyscan_t
Definition: cubedata.h:67
int expr_scanner_offset(PsqlScanState state)
Definition: exprscan.l:415
char * expr_scanner_get_substring(PsqlScanState state, int start_offset, int end_offset, bool chomp)
Definition: exprscan.l:426
bool expr_lex_one_word(PsqlScanState state, PQExpBuffer word_buf, int *offset)
Definition: exprscan.l:315
int expr_scanner_get_lineno(PsqlScanState state, int offset)
Definition: exprscan.l:456
void expr_yyerror(PgBenchExpr **expr_parse_result_p, yyscan_t yyscanner, const char *message)
Definition: exprscan.l:304
int yylex(YYSTYPE *yylval_param, yyscan_t yyscanner)
Definition: exprscan.l:94
void expr_scanner_finish(yyscan_t yyscanner)
Definition: exprscan.l:396
yyscan_t expr_scanner_init(PsqlScanState state, const char *source, int lineno, int start_offset, const char *command)
Definition: exprscan.l:364
void * pg_malloc(size_t size)
Definition: fe_memutils.c:47
static rewind_source * source
Definition: pg_rewind.c:89
void syntax_error(const char *source, int lineno, const char *line, const char *command, const char *msg, const char *more, int column)
Definition: pgbench.c:5501
void resetPQExpBuffer(PQExpBuffer str)
Definition: pqexpbuffer.c:146
void psql_scan_reselect_sql_lexer(PsqlScanState state)
Definition: psqlscan.l:1295
int YYSTYPE
Definition: psqlscanslash.l:39
Definition: regguts.h:323