PostgreSQL Source Code git master
scan.l
Go to the documentation of this file.
1%top{
2/*-------------------------------------------------------------------------
3 *
4 * scan.l
5 * lexical scanner for PostgreSQL
6 *
7 * NOTE NOTE NOTE:
8 *
9 * The rules in this file must be kept in sync with src/fe_utils/psqlscan.l
10 * and src/interfaces/ecpg/preproc/pgc.l!
11 *
12 * The rules are designed so that the scanner never has to backtrack,
13 * in the sense that there is always a rule that can match the input
14 * consumed so far (the rule action may internally throw back some input
15 * with yyless(), however). As explained in the flex manual, this makes
16 * for a useful speed increase --- several percent faster when measuring
17 * raw parsing (Flex + Bison). The extra complexity is mostly in the rules
18 * for handling float numbers and continued string literals. If you change
19 * the lexical rules, verify that you haven't broken the no-backtrack
20 * property by running flex with the "-b" option and checking that the
21 * resulting "lex.backup" file says that no backing up is needed. (As of
22 * Postgres 9.2, this check is made automatically by the Makefile.)
23 *
24 *
25 * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
26 * Portions Copyright (c) 1994, Regents of the University of California
27 *
28 * IDENTIFICATION
29 * src/backend/parser/scan.l
30 *
31 *-------------------------------------------------------------------------
32 */
33#include "postgres.h"
34
35#include <ctype.h>
36#include <unistd.h>
37
38#include "common/string.h"
39#include "gramparse.h"
40#include "nodes/miscnodes.h"
41#include "parser/parser.h" /* only needed for GUC variables */
42#include "parser/scansup.h"
43#include "port/pg_bitutils.h"
44#include "mb/pg_wchar.h"
45#include "utils/builtins.h"
46}
47
48%{
49
50/* LCOV_EXCL_START */
51
52/* Avoid exit() on fatal scanner errors (a bit ugly -- see yy_fatal_error) */
53#undef fprintf
54#define fprintf(file, fmt, msg) fprintf_to_ereport(fmt, msg)
55
56static void
57fprintf_to_ereport(const char *fmt, const char *msg)
58{
59 ereport(ERROR, (errmsg_internal("%s", msg)));
60}
61
62/*
63 * GUC variables. This is a DIRECT violation of the warning given at the
64 * head of gram.y, ie flex/bison code must not depend on any GUC variables;
65 * as such, changing their values can induce very unintuitive behavior.
66 * But we shall have to live with it until we can remove these variables.
67 */
71
72/*
73 * Constant data exported from this file. This array maps from the
74 * zero-based keyword numbers returned by ScanKeywordLookup to the
75 * Bison token numbers needed by gram.y. This is exported because
76 * callers need to pass it to scanner_init, if they are using the
77 * standard keyword list ScanKeywords.
78 */
79#define PG_KEYWORD(kwname, value, category, collabel) value,
80
82#include "parser/kwlist.h"
83};
84
85#undef PG_KEYWORD
86
87/*
88 * Set the type of YYSTYPE.
89 */
90#define YYSTYPE core_YYSTYPE
91
92/*
93 * Each call to yylex must set yylloc to the location of the found token
94 * (expressed as a byte offset from the start of the input text).
95 * When we parse a token that requires multiple lexer rules to process,
96 * this should be done in the first such rule, else yylloc will point
97 * into the middle of the token.
98 */
99#define SET_YYLLOC() (*(yylloc) = yytext - yyextra->scanbuf)
100
101/*
102 * Advance yylloc by the given number of bytes.
103 */
104#define ADVANCE_YYLLOC(delta) ( *(yylloc) += (delta) )
105
106/*
107 * Sometimes, we do want yylloc to point into the middle of a token; this is
108 * useful for instance to throw an error about an escape sequence within a
109 * string literal. But if we find no error there, we want to revert yylloc
110 * to the token start, so that that's the location reported to the parser.
111 * Use PUSH_YYLLOC/POP_YYLLOC to save/restore yylloc around such code.
112 * (Currently the implied "stack" is just one location, but someday we might
113 * need to nest these.)
114 */
115#define PUSH_YYLLOC() (yyextra->save_yylloc = *(yylloc))
116#define POP_YYLLOC() (*(yylloc) = yyextra->save_yylloc)
117
118#define startlit() ( yyextra->literallen = 0 )
119static void addlit(char *ytext, int yleng, core_yyscan_t yyscanner);
120static void addlitchar(unsigned char ychar, core_yyscan_t yyscanner);
121static char *litbufdup(core_yyscan_t yyscanner);
122static unsigned char unescape_single_char(unsigned char c, core_yyscan_t yyscanner);
123static int process_integer_literal(const char *token, YYSTYPE *lval, int base);
124static void addunicode(pg_wchar c, yyscan_t yyscanner);
125
126#define yyerror(msg) scanner_yyerror(msg, yyscanner)
127
128#define lexer_errposition() scanner_errposition(*(yylloc), yyscanner)
129
130static void check_string_escape_warning(unsigned char ychar, core_yyscan_t yyscanner);
131static void check_escape_warning(core_yyscan_t yyscanner);
132
uint16_t uint16
Definition: c.h:487
void * yyscan_t
Definition: cubedata.h:67
int errmsg_internal(const char *fmt,...)
Definition: elog.c:1157
#define ERROR
Definition: elog.h:39
#define ereport(elevel,...)
Definition: elog.h:149
#define token
Definition: indent_globs.h:126
static void const char * fmt
unsigned int pg_wchar
Definition: mbprint.c:31
@ BACKSLASH_QUOTE_SAFE_ENCODING
Definition: parser.h:52
char * c
int YYSTYPE
Definition: psqlscanslash.l:39
static void check_escape_warning(core_yyscan_t yyscanner)
Definition: scan.l:1451
bool escape_string_warning
Definition: scan.l:69
static void addlitchar(unsigned char ychar, core_yyscan_t yyscanner)
Definition: scan.l:1327
static int process_integer_literal(const char *token, YYSTYPE *lval, int base)
Definition: scan.l:1362
static char * litbufdup(core_yyscan_t yyscanner)
Definition: scan.l:1346
static void fprintf_to_ereport(const char *fmt, const char *msg)
Definition: scan.l:57
int backslash_quote
Definition: scan.l:68
static void addlit(char *ytext, int yleng, core_yyscan_t yyscanner)
Definition: scan.l:1311
static void addunicode(pg_wchar c, yyscan_t yyscanner)
Definition: scan.l:1379
bool standard_conforming_strings
Definition: scan.l:70
static unsigned char unescape_single_char(unsigned char c, core_yyscan_t yyscanner)
Definition: scan.l:1398
static void check_string_escape_warning(unsigned char ychar, core_yyscan_t yyscanner)
Definition: scan.l:1424
const uint16 ScanKeywordTokens[]
Definition: scan.l:81
void * core_yyscan_t
Definition: scanner.h:121
133%}
134
135%option reentrant
136%option bison-bridge
137%option bison-locations
138%option 8bit
139%option never-interactive
140%option nodefault
141%option noinput
142%option nounput
143%option noyywrap
144%option noyyalloc
145%option noyyrealloc
146%option noyyfree
147%option warn
148%option prefix="core_yy"
149%option extra-type="core_yy_extra_type *"
150
151/*
152 * OK, here is a short description of lex/flex rules behavior.
153 * The longest pattern which matches an input string is always chosen.
154 * For equal-length patterns, the first occurring in the rules list is chosen.
155 * INITIAL is the starting state, to which all non-conditional rules apply.
156 * Exclusive states change parsing rules while the state is active. When in
157 * an exclusive state, only those rules defined for that state apply.
158 *
159 * We use exclusive states for quoted strings, extended comments,
160 * and to eliminate parsing troubles for numeric strings.
161 * Exclusive states:
162 * <xb> bit string literal
163 * <xc> extended C-style comments
164 * <xd> delimited identifiers (double-quoted identifiers)
165 * <xh> hexadecimal byte string
166 * <xq> standard quoted strings
167 * <xqs> quote stop (detect continued strings)
168 * <xe> extended quoted strings (support backslash escape sequences)
169 * <xdolq> $foo$ quoted strings
170 * <xui> quoted identifier with Unicode escapes
171 * <xus> quoted string with Unicode escapes
172 * <xeu> Unicode surrogate pair in extended quoted string
173 *
174 * Remember to add an <<EOF>> case whenever you add a new exclusive state!
175 * The default one is probably not the right thing.
176 */
177
178%x xb
179%x xc
180%x xd
181%x xh
182%x xq
183%x xqs
184%x xe
185%x xdolq
186%x xui
187%x xus
188%x xeu
189
190/*
191 * In order to make the world safe for Windows and Mac clients as well as
192 * Unix ones, we accept either \n or \r as a newline. A DOS-style \r\n
193 * sequence will be seen as two successive newlines, but that doesn't cause
194 * any problems. Comments that start with -- and extend to the next
195 * newline are treated as equivalent to a single whitespace character.
196 *
197 * NOTE a fine point: if there is no newline following --, we will absorb
198 * everything to the end of the input as a comment. This is correct. Older
199 * versions of Postgres failed to recognize -- as a comment if the input
200 * did not end with a newline.
201 *
202 * non_newline_space tracks all the other space characters except newlines.
203 *
204 * XXX if you change the set of whitespace characters, fix scanner_isspace()
205 * to agree.
206 */
207
208space [ \t\n\r\f\v]
209non_newline_space [ \t\f\v]
210newline [\n\r]
211non_newline [^\n\r]
212
213comment ("--"{non_newline}*)
214
215whitespace ({space}+|{comment})
216
217/*
218 * SQL requires at least one newline in the whitespace separating
219 * string literals that are to be concatenated. Silly, but who are we
220 * to argue? Note that {whitespace_with_newline} should not have * after
221 * it, whereas {whitespace} should generally have a * after it...
222 */
223
224special_whitespace ({space}+|{comment}{newline})
225non_newline_whitespace ({non_newline_space}|{comment})
226whitespace_with_newline ({non_newline_whitespace}*{newline}{special_whitespace}*)
227
228quote '
229/* If we see {quote} then {quotecontinue}, the quoted string continues */
230quotecontinue {whitespace_with_newline}{quote}
231
232/*
233 * {quotecontinuefail} is needed to avoid lexer backup when we fail to match
234 * {quotecontinue}. It might seem that this could just be {whitespace}*,
235 * but if there's a dash after {whitespace_with_newline}, it must be consumed
236 * to see if there's another dash --- which would start a {comment} and thus
237 * allow continuation of the {quotecontinue} token.
238 */
239quotecontinuefail {whitespace}*"-"?
240
241/* Bit string
242 * It is tempting to scan the string for only those characters
243 * which are allowed. However, this leads to silently swallowed
244 * characters if illegal characters are included in the string.
245 * For example, if xbinside is [01] then B'ABCD' is interpreted
246 * as a zero-length string, and the ABCD' is lost!
247 * Better to pass the string forward and let the input routines
248 * validate the contents.
249 */
250xbstart [bB]{quote}
251xbinside [^']*
252
253/* Hexadecimal byte string */
254xhstart [xX]{quote}
255xhinside [^']*
256
257/* National character */
258xnstart [nN]{quote}
259
260/* Quoted string that allows backslash escapes */
261xestart [eE]{quote}
262xeinside [^\\']+
263xeescape [\\][^0-7]
264xeoctesc [\\][0-7]{1,3}
265xehexesc [\\]x[0-9A-Fa-f]{1,2}
266xeunicode [\\](u[0-9A-Fa-f]{4}|U[0-9A-Fa-f]{8})
267xeunicodefail [\\](u[0-9A-Fa-f]{0,3}|U[0-9A-Fa-f]{0,7})
268
269/* Extended quote
270 * xqdouble implements embedded quote, ''''
271 */
272xqstart {quote}
273xqdouble {quote}{quote}
274xqinside [^']+
275
276/* $foo$ style quotes ("dollar quoting")
277 * The quoted string starts with $foo$ where "foo" is an optional string
278 * in the form of an identifier, except that it may not contain "$",
279 * and extends to the first occurrence of an identical string.
280 * There is *no* processing of the quoted text.
281 *
282 * {dolqfailed} is an error rule to avoid scanner backup when {dolqdelim}
283 * fails to match its trailing "$".
284 */
285dolq_start [A-Za-z\200-\377_]
286dolq_cont [A-Za-z\200-\377_0-9]
287dolqdelim \$({dolq_start}{dolq_cont}*)?\$
288dolqfailed \${dolq_start}{dolq_cont}*
289dolqinside [^$]+
290
291/* Double quote
292 * Allows embedded spaces and other special characters into identifiers.
293 */
294dquote \"
295xdstart {dquote}
296xdstop {dquote}
297xddouble {dquote}{dquote}
298xdinside [^"]+
299
300/* Quoted identifier with Unicode escapes */
301xuistart [uU]&{dquote}
302
303/* Quoted string with Unicode escapes */
304xusstart [uU]&{quote}
305
306/* error rule to avoid backup */
307xufailed [uU]&
308
309
310/* C-style comments
311 *
312 * The "extended comment" syntax closely resembles allowable operator syntax.
313 * The tricky part here is to get lex to recognize a string starting with
314 * slash-star as a comment, when interpreting it as an operator would produce
315 * a longer match --- remember lex will prefer a longer match! Also, if we
316 * have something like plus-slash-star, lex will think this is a 3-character
317 * operator whereas we want to see it as a + operator and a comment start.
318 * The solution is two-fold:
319 * 1. append {op_chars}* to xcstart so that it matches as much text as
320 * {operator} would. Then the tie-breaker (first matching rule of same
321 * length) ensures xcstart wins. We put back the extra stuff with yyless()
322 * in case it contains a star-slash that should terminate the comment.
323 * 2. In the operator rule, check for slash-star within the operator, and
324 * if found throw it back with yyless(). This handles the plus-slash-star
325 * problem.
326 * Dash-dash comments have similar interactions with the operator rule.
327 */
328xcstart \/\*{op_chars}*
329xcstop \*+\/
330xcinside [^*/]+
331
332ident_start [A-Za-z\200-\377_]
333ident_cont [A-Za-z\200-\377_0-9\$]
334
335identifier {ident_start}{ident_cont}*
336
337/* Assorted special-case operators and operator-like tokens */
338typecast "::"
339dot_dot \.\.
340colon_equals ":="
341
342/*
343 * These operator-like tokens (unlike the above ones) also match the {operator}
344 * rule, which means that they might be overridden by a longer match if they
345 * are followed by a comment start or a + or - character. Accordingly, if you
346 * add to this list, you must also add corresponding code to the {operator}
347 * block to return the correct token in such cases. (This is not needed in
348 * psqlscan.l since the token value is ignored there.)
349 */
350equals_greater "=>"
351less_equals "<="
352greater_equals ">="
353less_greater "<>"
354not_equals "!="
355
356/*
357 * "self" is the set of chars that should be returned as single-character
358 * tokens. "op_chars" is the set of chars that can make up "Op" tokens,
359 * which can be one or more characters long (but if a single-char token
360 * appears in the "self" set, it is not to be returned as an Op). Note
361 * that the sets overlap, but each has some chars that are not in the other.
362 *
363 * If you change either set, adjust the character lists appearing in the
364 * rule for "operator"!
365 */
366self [,()\[\].;\:\+\-\*\/\%\^<>\=]
367op_chars [\~\!\@\#\^\&\|\`\?\+\-\*\/\%<>\=]
368operator {op_chars}+
369
370/*
371 * Numbers
372 *
373 * Unary minus is not part of a number here. Instead we pass it separately to
374 * the parser, and there it gets coerced via doNegate().
375 *
376 * {numericfail} is used because we would like "1..10" to lex as 1, dot_dot, 10.
377 *
378 * {realfail} is added to prevent the need for scanner
379 * backup when the {real} rule fails to match completely.
380 */
381decdigit [0-9]
382hexdigit [0-9A-Fa-f]
383octdigit [0-7]
384bindigit [0-1]
385
386decinteger {decdigit}(_?{decdigit})*
387hexinteger 0[xX](_?{hexdigit})+
388octinteger 0[oO](_?{octdigit})+
389bininteger 0[bB](_?{bindigit})+
390
391hexfail 0[xX]_?
392octfail 0[oO]_?
393binfail 0[bB]_?
394
395numeric (({decinteger}\.{decinteger}?)|(\.{decinteger}))
396numericfail {decinteger}\.\.
397
398real ({decinteger}|{numeric})[Ee][-+]?{decinteger}
399realfail ({decinteger}|{numeric})[Ee][-+]
400
401/* Positional parameters don't accept underscores. */
402param \${decdigit}+
403
404/*
405 * An identifier immediately following an integer literal is disallowed because
406 * in some cases it's ambiguous what is meant: for example, 0x1234 could be
407 * either a hexinteger or a decinteger "0" and an identifier "x1234". We can
408 * detect such problems by seeing if integer_junk matches a longer substring
409 * than any of the XXXinteger patterns (decinteger, hexinteger, octinteger,
410 * bininteger). One "junk" pattern is sufficient because
411 * {decinteger}{identifier} will match all the same strings we'd match with
412 * {hexinteger}{identifier} etc.
413 *
414 * Note that the rule for integer_junk must appear after the ones for
415 * XXXinteger to make this work correctly: 0x1234 will match both hexinteger
416 * and integer_junk, and we need hexinteger to be chosen in that case.
417 *
418 * Also disallow strings matched by numeric_junk, real_junk and param_junk
419 * for consistency.
420 */
421integer_junk {decinteger}{identifier}
422numeric_junk {numeric}{identifier}
423real_junk {real}{identifier}
424param_junk \${decdigit}+{identifier}
425
426other .
427
428/*
429 * Dollar quoted strings are totally opaque, and no escaping is done on them.
430 * Other quoted strings must allow some special characters such as single-quote
431 * and newline.
432 * Embedded single-quotes are implemented both in the SQL standard
433 * style of two adjacent single quotes "''" and in the Postgres/Java style
434 * of escaped-quote "\'".
435 * Other embedded escaped characters are matched explicitly and the leading
436 * backslash is dropped from the string.
437 * Note that xcstart must appear before operator, as explained above!
438 * Also whitespace (comment) must appear before operator.
439 */
440
442
443{whitespace} {
444 /* ignore */
445 }
446
447{xcstart} {
448 /* Set location in case of syntax error in comment */
449 SET_YYLLOC();
450 yyextra->xcdepth = 0;
451 BEGIN(xc);
452 /* Put back any characters past slash-star; see above */
453 yyless(2);
454 }
#define yyextra
Definition: scan.l:1118
#define SET_YYLLOC()
Definition: scan.l:99
455
456<xc>{
457{xcstart} {
458 (yyextra->xcdepth)++;
459 /* Put back any characters past slash-star; see above */
460 yyless(2);
461 }
462
463{xcstop} {
464 if (yyextra->xcdepth <= 0)
465 BEGIN(INITIAL);
466 else
467 (yyextra->xcdepth)--;
468 }
469
470{xcinside} {
471 /* ignore */
472 }
473
474{op_chars} {
475 /* ignore */
476 }
477
478\*+ {
479 /* ignore */
480 }
481
482<<EOF>> {
483 yyerror("unterminated /* comment");
484 }
#define yyerror(msg)
Definition: scan.l:126
485} /* <xc> */
486
487{xbstart} {
488 /* Binary bit type.
489 * At some point we should simply pass the string
490 * forward to the parser and label it there.
491 * In the meantime, place a leading "b" on the string
492 * to mark it for the input routine as a binary string.
493 */
494 SET_YYLLOC();
495 BEGIN(xb);
496 startlit();
497 addlitchar('b', yyscanner);
498 }
#define startlit()
Definition: scan.l:118
499<xh>{xhinside} |
500<xb>{xbinside} {
501 addlit(yytext, yyleng, yyscanner);
502 }
#define yyleng
Definition: scan.l:1124
503<xb><<EOF>> { yyerror("unterminated bit string literal"); }
504
505{xhstart} {
506 /* Hexadecimal bit type.
507 * At some point we should simply pass the string
508 * forward to the parser and label it there.
509 * In the meantime, place a leading "x" on the string
510 * to mark it for the input routine as a hex string.
511 */
512 SET_YYLLOC();
513 BEGIN(xh);
514 startlit();
515 addlitchar('x', yyscanner);
516 }
517<xh><<EOF>> { yyerror("unterminated hexadecimal string literal"); }
518
519{xnstart} {
520 /* National character.
521 * We will pass this along as a normal character string,
522 * but preceded with an internally-generated "NCHAR".
523 */
524 int kwnum;
525
526 SET_YYLLOC();
527 yyless(1); /* eat only 'n' this time */
528
529 kwnum = ScanKeywordLookup("nchar",
530 yyextra->keywordlist);
531 if (kwnum >= 0)
532 {
533 yylval->keyword = GetScanKeyword(kwnum,
534 yyextra->keywordlist);
535 return yyextra->keyword_tokens[kwnum];
536 }
537 else
538 {
539 /* If NCHAR isn't a keyword, just return "n" */
540 yylval->str = pstrdup("n");
541 return IDENT;
542 }
543 }
int ScanKeywordLookup(const char *str, const ScanKeywordList *keywords)
Definition: kwlookup.c:38
static const char * GetScanKeyword(int n, const ScanKeywordList *keywords)
Definition: kwlookup.h:39
char * pstrdup(const char *in)
Definition: mcxt.c:1696
544
545{xqstart} {
546 yyextra->warn_on_first_escape = true;
547 yyextra->saw_non_ascii = false;
548 SET_YYLLOC();
549 if (yyextra->standard_conforming_strings)
550 BEGIN(xq);
551 else
552 BEGIN(xe);
553 startlit();
554 }
555{xestart} {
556 yyextra->warn_on_first_escape = false;
557 yyextra->saw_non_ascii = false;
558 SET_YYLLOC();
559 BEGIN(xe);
560 startlit();
561 }
562{xusstart} {
563 SET_YYLLOC();
564 if (!yyextra->standard_conforming_strings)
566 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
567 errmsg("unsafe use of string constant with Unicode escapes"),
568 errdetail("String constants with Unicode escapes cannot be used when \"standard_conforming_strings\" is off."),
570 BEGIN(xus);
571 startlit();
572 }
int errdetail(const char *fmt,...)
Definition: elog.c:1203
int errcode(int sqlerrcode)
Definition: elog.c:853
int errmsg(const char *fmt,...)
Definition: elog.c:1070
#define lexer_errposition()
Definition: scan.l:128
573
574<xb,xh,xq,xe,xus>{quote} {
575 /*
576 * When we are scanning a quoted string and see an end
577 * quote, we must look ahead for a possible continuation.
578 * If we don't see one, we know the end quote was in fact
579 * the end of the string. To reduce the lexer table size,
580 * we use a single "xqs" state to do the lookahead for all
581 * types of strings.
582 */
583 yyextra->state_before_str_stop = YYSTATE;
584 BEGIN(xqs);
585 }
586<xqs>{quotecontinue} {
587 /*
588 * Found a quote continuation, so return to the in-quote
589 * state and continue scanning the literal. Nothing is
590 * added to the literal's contents.
591 */
592 BEGIN(yyextra->state_before_str_stop);
593 }
594<xqs>{quotecontinuefail} |
595<xqs>{other} |
596<xqs><<EOF>> {
597 /*
598 * Failed to see a quote continuation. Throw back
599 * everything after the end quote, and handle the string
600 * according to the state we were in previously.
601 */
602 yyless(0);
603 BEGIN(INITIAL);
604
605 switch (yyextra->state_before_str_stop)
606 {
607 case xb:
608 yylval->str = litbufdup(yyscanner);
609 return BCONST;
610 case xh:
611 yylval->str = litbufdup(yyscanner);
612 return XCONST;
613 case xq:
614 case xe:
615 /*
616 * Check that the data remains valid, if it might
617 * have been made invalid by unescaping any chars.
618 */
619 if (yyextra->saw_non_ascii)
620 pg_verifymbstr(yyextra->literalbuf,
621 yyextra->literallen,
622 false);
623 yylval->str = litbufdup(yyscanner);
624 return SCONST;
625 case xus:
626 yylval->str = litbufdup(yyscanner);
627 return USCONST;
628 default:
629 yyerror("unhandled previous state in xqs");
630 }
631 }
bool pg_verifymbstr(const char *mbstr, int len, bool noError)
Definition: mbutils.c:1556
632
633<xq,xe,xus>{xqdouble} {
634 addlitchar('\'', yyscanner);
635 }
636<xq,xus>{xqinside} {
637 addlit(yytext, yyleng, yyscanner);
638 }
639<xe>{xeinside} {
640 addlit(yytext, yyleng, yyscanner);
641 }
642<xe>{xeunicode} {
643 pg_wchar c = strtoul(yytext + 2, NULL, 16);
644
645 /*
646 * For consistency with other productions, issue any
647 * escape warning with cursor pointing to start of string.
648 * We might want to change that, someday.
649 */
650 check_escape_warning(yyscanner);
651
652 /* Remember start of overall string token ... */
653 PUSH_YYLLOC();
654 /* ... and set the error cursor to point at this esc seq */
655 SET_YYLLOC();
656
658 {
659 yyextra->utf16_first_part = c;
660 BEGIN(xeu);
661 }
663 yyerror("invalid Unicode surrogate pair");
664 else
665 addunicode(c, yyscanner);
666
667 /* Restore yylloc to be start of string token */
668 POP_YYLLOC();
669 }
static bool is_utf16_surrogate_first(pg_wchar c)
Definition: pg_wchar.h:525
static bool is_utf16_surrogate_second(pg_wchar c)
Definition: pg_wchar.h:531
#define PUSH_YYLLOC()
Definition: scan.l:115
#define POP_YYLLOC()
Definition: scan.l:116
670<xeu>{xeunicode} {
671 pg_wchar c = strtoul(yytext + 2, NULL, 16);
672
673 /* Remember start of overall string token ... */
674 PUSH_YYLLOC();
675 /* ... and set the error cursor to point at this esc seq */
676 SET_YYLLOC();
677
679 yyerror("invalid Unicode surrogate pair");
680
681 c = surrogate_pair_to_codepoint(yyextra->utf16_first_part, c);
682
683 addunicode(c, yyscanner);
684
685 /* Restore yylloc to be start of string token */
686 POP_YYLLOC();
687
688 BEGIN(xe);
689 }
static pg_wchar surrogate_pair_to_codepoint(pg_wchar first, pg_wchar second)
Definition: pg_wchar.h:537
690<xeu>. |
691<xeu>\n |
692<xeu><<EOF>> {
693 /* Set the error cursor to point at missing esc seq */
694 SET_YYLLOC();
695 yyerror("invalid Unicode surrogate pair");
696 }
697<xe,xeu>{xeunicodefail} {
698 /* Set the error cursor to point at malformed esc seq */
699 SET_YYLLOC();
701 (errcode(ERRCODE_INVALID_ESCAPE_SEQUENCE),
702 errmsg("invalid Unicode escape"),
703 errhint("Unicode escapes must be \\uXXXX or \\UXXXXXXXX."),
705 }
int errhint(const char *fmt,...)
Definition: elog.c:1317
706<xe>{xeescape} {
707 if (yytext[1] == '\'')
708 {
709 if (yyextra->backslash_quote == BACKSLASH_QUOTE_OFF ||
710 (yyextra->backslash_quote == BACKSLASH_QUOTE_SAFE_ENCODING &&
713 (errcode(ERRCODE_NONSTANDARD_USE_OF_ESCAPE_CHARACTER),
714 errmsg("unsafe use of \\' in a string literal"),
715 errhint("Use '' to write quotes in strings. \\' is insecure in client-only encodings."),
717 }
718 check_string_escape_warning(yytext[1], yyscanner);
719 addlitchar(unescape_single_char(yytext[1], yyscanner),
720 yyscanner);
721 }
int pg_get_client_encoding(void)
Definition: mbutils.c:336
@ BACKSLASH_QUOTE_OFF
Definition: parser.h:50
#define PG_ENCODING_IS_CLIENT_ONLY(_enc)
Definition: pg_wchar.h:284
722<xe>{xeoctesc} {
723 unsigned char c = strtoul(yytext + 1, NULL, 8);
724
725 check_escape_warning(yyscanner);
726 addlitchar(c, yyscanner);
727 if (c == '\0' || IS_HIGHBIT_SET(c))
728 yyextra->saw_non_ascii = true;
729 }
#define IS_HIGHBIT_SET(ch)
Definition: c.h:1112
730<xe>{xehexesc} {
731 unsigned char c = strtoul(yytext + 2, NULL, 16);
732
733 check_escape_warning(yyscanner);
734 addlitchar(c, yyscanner);
735 if (c == '\0' || IS_HIGHBIT_SET(c))
736 yyextra->saw_non_ascii = true;
737 }
738<xe>. {
739 /* This is only needed for \ just before EOF */
740 addlitchar(yytext[0], yyscanner);
741 }
742<xq,xe,xus><<EOF>> { yyerror("unterminated quoted string"); }
743
744{dolqdelim} {
745 SET_YYLLOC();
746 yyextra->dolqstart = pstrdup(yytext);
747 BEGIN(xdolq);
748 startlit();
749 }
750{dolqfailed} {
751 SET_YYLLOC();
752 /* throw back all but the initial "$" */
753 yyless(1);
754 /* and treat it as {other} */
755 return yytext[0];
756 }
757<xdolq>{dolqdelim} {
758 if (strcmp(yytext, yyextra->dolqstart) == 0)
759 {
760 pfree(yyextra->dolqstart);
761 yyextra->dolqstart = NULL;
762 BEGIN(INITIAL);
763 yylval->str = litbufdup(yyscanner);
764 return SCONST;
765 }
766 else
767 {
768 /*
769 * When we fail to match $...$ to dolqstart, transfer
770 * the $... part to the output, but put back the final
771 * $ for rescanning. Consider $delim$...$junk$delim$
772 */
773 addlit(yytext, yyleng - 1, yyscanner);
774 yyless(yyleng - 1);
775 }
776 }
void pfree(void *pointer)
Definition: mcxt.c:1521
777<xdolq>{dolqinside} {
778 addlit(yytext, yyleng, yyscanner);
779 }
780<xdolq>{dolqfailed} {
781 addlit(yytext, yyleng, yyscanner);
782 }
783<xdolq>. {
784 /* This is only needed for $ inside the quoted text */
785 addlitchar(yytext[0], yyscanner);
786 }
787<xdolq><<EOF>> { yyerror("unterminated dollar-quoted string"); }
788
789{xdstart} {
790 SET_YYLLOC();
791 BEGIN(xd);
792 startlit();
793 }
794{xuistart} {
795 SET_YYLLOC();
796 BEGIN(xui);
797 startlit();
798 }
799<xd>{xdstop} {
800 char *ident;
801
802 BEGIN(INITIAL);
803 if (yyextra->literallen == 0)
804 yyerror("zero-length delimited identifier");
805 ident = litbufdup(yyscanner);
806 if (yyextra->literallen >= NAMEDATALEN)
807 truncate_identifier(ident, yyextra->literallen, true);
808 yylval->str = ident;
809 return IDENT;
810 }
#define ident
Definition: indent_codes.h:47
#define NAMEDATALEN
void truncate_identifier(char *ident, int len, bool warn)
Definition: scansup.c:93
811<xui>{dquote} {
812 BEGIN(INITIAL);
813 if (yyextra->literallen == 0)
814 yyerror("zero-length delimited identifier");
815 /* can't truncate till after we de-escape the ident */
816 yylval->str = litbufdup(yyscanner);
817 return UIDENT;
818 }
819<xd,xui>{xddouble} {
820 addlitchar('"', yyscanner);
821 }
822<xd,xui>{xdinside} {
823 addlit(yytext, yyleng, yyscanner);
824 }
825<xd,xui><<EOF>> { yyerror("unterminated quoted identifier"); }
826
827{xufailed} {
828 char *ident;
829
830 SET_YYLLOC();
831 /* throw back all but the initial u/U */
832 yyless(1);
833 /* and treat it as {identifier} */
835 yylval->str = ident;
836 return IDENT;
837 }
char * downcase_truncate_identifier(const char *ident, int len, bool warn)
Definition: scansup.c:37
838
839{typecast} {
840 SET_YYLLOC();
841 return TYPECAST;
842 }
843
844{dot_dot} {
845 SET_YYLLOC();
846 return DOT_DOT;
847 }
848
849{colon_equals} {
850 SET_YYLLOC();
851 return COLON_EQUALS;
852 }
853
854{equals_greater} {
855 SET_YYLLOC();
856 return EQUALS_GREATER;
857 }
858
859{less_equals} {
860 SET_YYLLOC();
861 return LESS_EQUALS;
862 }
863
864{greater_equals} {
865 SET_YYLLOC();
866 return GREATER_EQUALS;
867 }
868
869{less_greater} {
870 /* We accept both "<>" and "!=" as meaning NOT_EQUALS */
871 SET_YYLLOC();
872 return NOT_EQUALS;
873 }
874
875{not_equals} {
876 /* We accept both "<>" and "!=" as meaning NOT_EQUALS */
877 SET_YYLLOC();
878 return NOT_EQUALS;
879 }
880
881{self} {
882 SET_YYLLOC();
883 return yytext[0];
884 }
885
886{operator} {
887 /*
888 * Check for embedded slash-star or dash-dash; those
889 * are comment starts, so operator must stop there.
890 * Note that slash-star or dash-dash at the first
891 * character will match a prior rule, not this one.
892 */
893 int nchars = yyleng;
894 char *slashstar = strstr(yytext, "/*");
895 char *dashdash = strstr(yytext, "--");
896
897 if (slashstar && dashdash)
898 {
899 /* if both appear, take the first one */
900 if (slashstar > dashdash)
901 slashstar = dashdash;
902 }
903 else if (!slashstar)
904 slashstar = dashdash;
905 if (slashstar)
906 nchars = slashstar - yytext;
907
908 /*
909 * For SQL compatibility, '+' and '-' cannot be the
910 * last char of a multi-char operator unless the operator
911 * contains chars that are not in SQL operators.
912 * The idea is to lex '=-' as two operators, but not
913 * to forbid operator names like '?-' that could not be
914 * sequences of SQL operators.
915 */
916 if (nchars > 1 &&
917 (yytext[nchars - 1] == '+' ||
918 yytext[nchars - 1] == '-'))
919 {
920 int ic;
921
922 for (ic = nchars - 2; ic >= 0; ic--)
923 {
924 char c = yytext[ic];
925 if (c == '~' || c == '!' || c == '@' ||
926 c == '#' || c == '^' || c == '&' ||
927 c == '|' || c == '`' || c == '?' ||
928 c == '%')
929 break;
930 }
931 if (ic < 0)
932 {
933 /*
934 * didn't find a qualifying character, so remove
935 * all trailing [+-]
936 */
937 do {
938 nchars--;
939 } while (nchars > 1 &&
940 (yytext[nchars - 1] == '+' ||
941 yytext[nchars - 1] == '-'));
942 }
943 }
944
945 SET_YYLLOC();
946
947 if (nchars < yyleng)
948 {
949 /* Strip the unwanted chars from the token */
950 yyless(nchars);
951 /*
952 * If what we have left is only one char, and it's
953 * one of the characters matching "self", then
954 * return it as a character token the same way
955 * that the "self" rule would have.
956 */
957 if (nchars == 1 &&
958 strchr(",()[].;:+-*/%^<>=", yytext[0]))
959 return yytext[0];
960 /*
961 * Likewise, if what we have left is two chars, and
962 * those match the tokens ">=", "<=", "=>", "<>" or
963 * "!=", then we must return the appropriate token
964 * rather than the generic Op.
965 */
966 if (nchars == 2)
967 {
968 if (yytext[0] == '=' && yytext[1] == '>')
969 return EQUALS_GREATER;
970 if (yytext[0] == '>' && yytext[1] == '=')
971 return GREATER_EQUALS;
972 if (yytext[0] == '<' && yytext[1] == '=')
973 return LESS_EQUALS;
974 if (yytext[0] == '<' && yytext[1] == '>')
975 return NOT_EQUALS;
976 if (yytext[0] == '!' && yytext[1] == '=')
977 return NOT_EQUALS;
978 }
979 }
980
981 /*
982 * Complain if operator is too long. Unlike the case
983 * for identifiers, we make this an error not a notice-
984 * and-truncate, because the odds are we are looking at
985 * a syntactic mistake anyway.
986 */
987 if (nchars >= NAMEDATALEN)
988 yyerror("operator too long");
989
990 yylval->str = pstrdup(yytext);
991 return Op;
992 }
993
994{param} {
995 ErrorSaveContext escontext = {T_ErrorSaveContext};
996 int32 val;
997
998 SET_YYLLOC();
999 val = pg_strtoint32_safe(yytext + 1, (Node *) &escontext);
1000 if (escontext.error_occurred)
1001 yyerror("parameter number too large");
1002 yylval->ival = val;
1003 return PARAM;
1004 }
int32_t int32
Definition: c.h:484
long val
Definition: informix.c:689
int32 pg_strtoint32_safe(const char *s, Node *escontext)
Definition: numutils.c:389
bool error_occurred
Definition: miscnodes.h:47
Definition: nodes.h:129
1005{param_junk} {
1006 SET_YYLLOC();
1007 yyerror("trailing junk after parameter");
1008 }
1009
1010{decinteger} {
1011 SET_YYLLOC();
1012 return process_integer_literal(yytext, yylval, 10);
1013 }
1014{hexinteger} {
1015 SET_YYLLOC();
1016 return process_integer_literal(yytext, yylval, 16);
1017 }
1018{octinteger} {
1019 SET_YYLLOC();
1020 return process_integer_literal(yytext, yylval, 8);
1021 }
1022{bininteger} {
1023 SET_YYLLOC();
1024 return process_integer_literal(yytext, yylval, 2);
1025 }
1026{hexfail} {
1027 SET_YYLLOC();
1028 yyerror("invalid hexadecimal integer");
1029 }
1030{octfail} {
1031 SET_YYLLOC();
1032 yyerror("invalid octal integer");
1033 }
1034{binfail} {
1035 SET_YYLLOC();
1036 yyerror("invalid binary integer");
1037 }
1038{numeric} {
1039 SET_YYLLOC();
1040 yylval->str = pstrdup(yytext);
1041 return FCONST;
1042 }
1043{numericfail} {
1044 /* throw back the .., and treat as integer */
1045 yyless(yyleng - 2);
1046 SET_YYLLOC();
1047 return process_integer_literal(yytext, yylval, 10);
1048 }
1049{real} {
1050 SET_YYLLOC();
1051 yylval->str = pstrdup(yytext);
1052 return FCONST;
1053 }
1054{realfail} {
1055 SET_YYLLOC();
1056 yyerror("trailing junk after numeric literal");
1057 }
1058{integer_junk} {
1059 SET_YYLLOC();
1060 yyerror("trailing junk after numeric literal");
1061 }
1062{numeric_junk} {
1063 SET_YYLLOC();
1064 yyerror("trailing junk after numeric literal");
1065 }
1066{real_junk} {
1067 SET_YYLLOC();
1068 yyerror("trailing junk after numeric literal");
1069 }
1070
1071
1072{identifier} {
1073 int kwnum;
1074 char *ident;
1075
1076 SET_YYLLOC();
1077
1078 /* Is it a keyword? */
1079 kwnum = ScanKeywordLookup(yytext,
1080 yyextra->keywordlist);
1081 if (kwnum >= 0)
1082 {
1083 yylval->keyword = GetScanKeyword(kwnum,
1084 yyextra->keywordlist);
1085 return yyextra->keyword_tokens[kwnum];
1086 }
1087
1088 /*
1089 * No. Convert the identifier to lower case, and truncate
1090 * if necessary.
1091 */
1092 ident = downcase_truncate_identifier(yytext, yyleng, true);
1093 yylval->str = ident;
1094 return IDENT;
1095 }
1096
1097{other} {
1098 SET_YYLLOC();
1099 return yytext[0];
1100 }
1101
1102<<EOF>> {
1103 SET_YYLLOC();
1104 yyterminate();
1105 }
1106
1107%%
1108
1109/* LCOV_EXCL_STOP */
1110
1111/*
1112 * Arrange access to yyextra for subroutines of the main yylex() function.
1113 * We expect each subroutine to have a yyscanner parameter. Rather than
1114 * use the yyget_xxx functions, which might or might not get inlined by the
1115 * compiler, we cheat just a bit and cast yyscanner to the right type.
1116 */
1117#undef yyextra
1118#define yyextra (((struct yyguts_t *) yyscanner)->yyextra_r)
1119
1120/* Likewise for a couple of other things we need. */
1121#undef yylloc
1122#define yylloc (((struct yyguts_t *) yyscanner)->yylloc_r)
1123#undef yyleng
1124#define yyleng (((struct yyguts_t *) yyscanner)->yyleng_r)
1125
1126
1127/*
1128 * scanner_errposition
1129 * Report a lexer or grammar error cursor position, if possible.
1130 *
1131 * This is expected to be used within an ereport() call, or via an error
1132 * callback such as setup_scanner_errposition_callback(). The return value
1133 * is a dummy (always 0, in fact).
1134 *
1135 * Note that this can only be used for messages emitted during raw parsing
1136 * (essentially, scan.l, parser.c, and gram.y), since it requires the
1137 * yyscanner struct to still be available.
1138 */
1139int
1140scanner_errposition(int location, core_yyscan_t yyscanner)
1141{
1142 int pos;
1143
1144 if (location < 0)
1145 return 0; /* no-op if location is unknown */
1146
1147 /* Convert byte offset to character number */
1148 pos = pg_mbstrlen_with_len(yyextra->scanbuf, location) + 1;
1149 /* And pass it to the ereport mechanism */
1150 return errposition(pos);
1151}
1152
1153/*
1154 * Error context callback for inserting scanner error location.
1155 *
1156 * Note that this will be called for *any* error occurring while the
1157 * callback is installed. We avoid inserting an irrelevant error location
1158 * if the error is a query cancel --- are there any other important cases?
1159 */
1160static void
1162{
1164
1165 if (geterrcode() != ERRCODE_QUERY_CANCELED)
1166 (void) scanner_errposition(scbstate->location, scbstate->yyscanner);
1167}
1168
1169/*
1170 * setup_scanner_errposition_callback
1171 * Arrange for non-scanner errors to report an error position
1172 *
1173 * Sometimes the scanner calls functions that aren't part of the scanner
1174 * subsystem and can't reasonably be passed the yyscanner pointer; yet
1175 * we would like any errors thrown in those functions to be tagged with an
1176 * error location. Use this function to set up an error context stack
1177 * entry that will accomplish that. Usage pattern:
1178 *
1179 * declare a local variable "ScannerCallbackState scbstate"
1180 * ...
1181 * setup_scanner_errposition_callback(&scbstate, yyscanner, location);
1182 * call function that might throw error;
1183 * cancel_scanner_errposition_callback(&scbstate);
1184 */
1185void
1187 core_yyscan_t yyscanner,
1188 int location)
1189{
1190 /* Setup error traceback support for ereport() */
1191 scbstate->yyscanner = yyscanner;
1192 scbstate->location = location;
1194 scbstate->errcallback.arg = scbstate;
1196 error_context_stack = &scbstate->errcallback;
1197}
1198
1199/*
1200 * Cancel a previously-set-up errposition callback.
1201 */
1202void
1204{
1205 /* Pop the error context stack */
1207}
1208
1209/*
1210 * scanner_yyerror
1211 * Report a lexer or grammar error.
1212 *
1213 * The message's cursor position is whatever YYLLOC was last set to,
1214 * ie, the start of the current token if called within yylex(), or the
1215 * most recently lexed token if called from the grammar.
1216 * This is OK for syntax error messages from the Bison parser, because Bison
1217 * parsers report error as soon as the first unparsable token is reached.
1218 * Beware of using yyerror for other purposes, as the cursor position might
1219 * be misleading!
1220 */
1221void
1222scanner_yyerror(const char *message, core_yyscan_t yyscanner)
1223{
1224 const char *loc = yyextra->scanbuf + *yylloc;
1225
1226 if (*loc == YY_END_OF_BUFFER_CHAR)
1227 {
1228 ereport(ERROR,
1229 (errcode(ERRCODE_SYNTAX_ERROR),
1230 /* translator: %s is typically the translation of "syntax error" */
1231 errmsg("%s at end of input", _(message)),
1233 }
1234 else
1235 {
1236 ereport(ERROR,
1237 (errcode(ERRCODE_SYNTAX_ERROR),
1238 /* translator: first %s is typically the translation of "syntax error" */
1239 errmsg("%s at or near \"%s\"", _(message), loc),
1241 }
1242}
1243
1244
1245/*
1246 * Called before any actual parsing is done
1247 */
1249scanner_init(const char *str,
1250 core_yy_extra_type *yyext,
1251 const ScanKeywordList *keywordlist,
1252 const uint16 *keyword_tokens)
1253{
1254 Size slen = strlen(str);
1255 yyscan_t scanner;
1256
1257 if (yylex_init(&scanner) != 0)
1258 elog(ERROR, "yylex_init() failed: %m");
1259
1260 core_yyset_extra(yyext, scanner);
1261
1262 yyext->keywordlist = keywordlist;
1263 yyext->keyword_tokens = keyword_tokens;
1264
1268
1269 /*
1270 * Make a scan buffer with special termination needed by flex.
1271 */
1272 yyext->scanbuf = (char *) palloc(slen + 2);
1273 yyext->scanbuflen = slen;
1274 memcpy(yyext->scanbuf, str, slen);
1275 yyext->scanbuf[slen] = yyext->scanbuf[slen + 1] = YY_END_OF_BUFFER_CHAR;
1276 yy_scan_buffer(yyext->scanbuf, slen + 2, scanner);
1277
1278 /* initialize literal buffer to a reasonable but expansible size */
1279 yyext->literalalloc = 1024;
1280 yyext->literalbuf = (char *) palloc(yyext->literalalloc);
1281 yyext->literallen = 0;
1282
1283 return scanner;
1284}
1285
1286
1287/*
1288 * Called after parsing is done to clean up after scanner_init()
1289 */
1290void
1292{
1293 /*
1294 * We don't bother to call yylex_destroy(), because all it would do is
1295 * pfree a small amount of control storage. It's cheaper to leak the
1296 * storage until the parsing context is destroyed. The amount of space
1297 * involved is usually negligible compared to the output parse tree
1298 * anyway.
1299 *
1300 * We do bother to pfree the scanbuf and literal buffer, but only if they
1301 * represent a nontrivial amount of space. The 8K cutoff is arbitrary.
1302 */
1303 if (yyextra->scanbuflen >= 8192)
1304 pfree(yyextra->scanbuf);
1305 if (yyextra->literalalloc >= 8192)
1306 pfree(yyextra->literalbuf);
1307}
1308
1309
1310static void
1311addlit(char *ytext, int yleng, core_yyscan_t yyscanner)
1312{
1313 /* enlarge buffer if needed */
1314 if ((yyextra->literallen + yleng) >= yyextra->literalalloc)
1315 {
1316 yyextra->literalalloc = pg_nextpower2_32(yyextra->literallen + yleng + 1);
1317 yyextra->literalbuf = (char *) repalloc(yyextra->literalbuf,
1318 yyextra->literalalloc);
1319 }
1320 /* append new data */
1321 memcpy(yyextra->literalbuf + yyextra->literallen, ytext, yleng);
1322 yyextra->literallen += yleng;
1323}
1324
1325
1326static void
1327addlitchar(unsigned char ychar, core_yyscan_t yyscanner)
1328{
1329 /* enlarge buffer if needed */
1330 if ((yyextra->literallen + 1) >= yyextra->literalalloc)
1331 {
1332 yyextra->literalalloc *= 2;
1333 yyextra->literalbuf = (char *) repalloc(yyextra->literalbuf,
1334 yyextra->literalalloc);
1335 }
1336 /* append new data */
1337 yyextra->literalbuf[yyextra->literallen] = ychar;
1338 yyextra->literallen += 1;
1339}
1340
1341
1342/*
1343 * Create a palloc'd copy of literalbuf, adding a trailing null.
1344 */
1345static char *
1347{
1348 int llen = yyextra->literallen;
1349 char *new;
1350
1351 new = palloc(llen + 1);
1352 memcpy(new, yyextra->literalbuf, llen);
1353 new[llen] = '\0';
1354 return new;
1355}
1356
1357/*
1358 * Process {decinteger}, {hexinteger}, etc. Note this will also do the right
1359 * thing with {numeric}, ie digits and a decimal point.
1360 */
1361static int
1362process_integer_literal(const char *token, YYSTYPE *lval, int base)
1363{
1364 ErrorSaveContext escontext = {T_ErrorSaveContext};
1365 int32 val;
1366
1367 val = pg_strtoint32_safe(token, (Node *) &escontext);
1368 if (escontext.error_occurred)
1369 {
1370 /* integer too large (or contains decimal pt), treat it as a float */
1371 lval->str = pstrdup(token);
1372 return FCONST;
1373 }
1374 lval->ival = val;
1375 return ICONST;
1376}
1377
1378static void
1380{
1381 ScannerCallbackState scbstate;
1383
1385 yyerror("invalid Unicode escape value");
1386
1387 /*
1388 * We expect that pg_unicode_to_server() will complain about any
1389 * unconvertible code point, so we don't have to set saw_non_ascii.
1390 */
1391 setup_scanner_errposition_callback(&scbstate, yyscanner, *(yylloc));
1392 pg_unicode_to_server(c, (unsigned char *) buf);
1394 addlit(buf, strlen(buf), yyscanner);
1395}
1396
1397static unsigned char
1398unescape_single_char(unsigned char c, core_yyscan_t yyscanner)
1399{
1400 switch (c)
1401 {
1402 case 'b':
1403 return '\b';
1404 case 'f':
1405 return '\f';
1406 case 'n':
1407 return '\n';
1408 case 'r':
1409 return '\r';
1410 case 't':
1411 return '\t';
1412 case 'v':
1413 return '\v';
1414 default:
1415 /* check for backslash followed by non-7-bit-ASCII */
1416 if (c == '\0' || IS_HIGHBIT_SET(c))
1417 yyextra->saw_non_ascii = true;
1418
1419 return c;
1420 }
1421}
1422
1423static void
1424check_string_escape_warning(unsigned char ychar, core_yyscan_t yyscanner)
1425{
1426 if (ychar == '\'')
1427 {
1428 if (yyextra->warn_on_first_escape && yyextra->escape_string_warning)
1430 (errcode(ERRCODE_NONSTANDARD_USE_OF_ESCAPE_CHARACTER),
1431 errmsg("nonstandard use of \\' in a string literal"),
1432 errhint("Use '' to write quotes in strings, or use the escape string syntax (E'...')."),
1434 yyextra->warn_on_first_escape = false; /* warn only once per string */
1435 }
1436 else if (ychar == '\\')
1437 {
1438 if (yyextra->warn_on_first_escape && yyextra->escape_string_warning)
1440 (errcode(ERRCODE_NONSTANDARD_USE_OF_ESCAPE_CHARACTER),
1441 errmsg("nonstandard use of \\\\ in a string literal"),
1442 errhint("Use the escape string syntax for backslashes, e.g., E'\\\\'."),
1444 yyextra->warn_on_first_escape = false; /* warn only once per string */
1445 }
1446 else
1447 check_escape_warning(yyscanner);
1448}
1449
1450static void
1452{
1453 if (yyextra->warn_on_first_escape && yyextra->escape_string_warning)
1455 (errcode(ERRCODE_NONSTANDARD_USE_OF_ESCAPE_CHARACTER),
1456 errmsg("nonstandard use of escape in a string literal"),
1457 errhint("Use the escape string syntax for escapes, e.g., E'\\r\\n'."),
1459 yyextra->warn_on_first_escape = false; /* warn only once per string */
1460}
1461
1462/*
1463 * Interface functions to make flex use palloc() instead of malloc().
1464 * It'd be better to make these static, but flex insists otherwise.
1465 */
1466
1467void *
1468core_yyalloc(yy_size_t bytes, core_yyscan_t yyscanner)
1469{
1470 return palloc(bytes);
1471}
1472
1473void *
1474core_yyrealloc(void *ptr, yy_size_t bytes, core_yyscan_t yyscanner)
1475{
1476 if (ptr)
1477 return repalloc(ptr, bytes);
1478 else
1479 return palloc(bytes);
1480}
1481
1482void
1483core_yyfree(void *ptr, core_yyscan_t yyscanner)
1484{
1485 if (ptr)
1486 pfree(ptr);
1487}
size_t Size
Definition: c.h:562
int geterrcode(void)
Definition: elog.c:1561
ErrorContextCallback * error_context_stack
Definition: elog.c:94
int errposition(int cursorpos)
Definition: elog.c:1446
#define _(x)
Definition: elog.c:90
#define WARNING
Definition: elog.h:36
#define elog(elevel,...)
Definition: elog.h:225
const char * str
int pg_mbstrlen_with_len(const char *mbstr, int limit)
Definition: mbutils.c:1057
void pg_unicode_to_server(pg_wchar c, unsigned char *s)
Definition: mbutils.c:864
void * repalloc(void *pointer, Size size)
Definition: mcxt.c:1541
void * palloc(Size size)
Definition: mcxt.c:1317
void * arg
static uint32 pg_nextpower2_32(uint32 num)
Definition: pg_bitutils.h:189
static char * buf
Definition: pg_test_fsync.c:72
#define MAX_UNICODE_EQUIVALENT_STRING
Definition: pg_wchar.h:329
static bool is_valid_unicode_codepoint(pg_wchar c)
Definition: pg_wchar.h:519
int scanner_errposition(int location, core_yyscan_t yyscanner)
Definition: scan.l:1140
core_yyscan_t scanner_init(const char *str, core_yy_extra_type *yyext, const ScanKeywordList *keywordlist, const uint16 *keyword_tokens)
Definition: scan.l:1249
#define yylloc
Definition: scan.l:1122
void * core_yyrealloc(void *ptr, yy_size_t bytes, core_yyscan_t yyscanner)
Definition: scan.l:1474
static void scb_error_callback(void *arg)
Definition: scan.l:1161
void setup_scanner_errposition_callback(ScannerCallbackState *scbstate, core_yyscan_t yyscanner, int location)
Definition: scan.l:1186
void scanner_finish(core_yyscan_t yyscanner)
Definition: scan.l:1291
void cancel_scanner_errposition_callback(ScannerCallbackState *scbstate)
Definition: scan.l:1203
void * core_yyalloc(yy_size_t bytes, core_yyscan_t yyscanner)
Definition: scan.l:1468
void core_yyfree(void *ptr, core_yyscan_t yyscanner)
Definition: scan.l:1483
void scanner_yyerror(const char *message, core_yyscan_t yyscanner)
Definition: scan.l:1222
struct ErrorContextCallback * previous
Definition: elog.h:296
void(* callback)(void *arg)
Definition: elog.h:297
ErrorContextCallback errcallback
Definition: scanner.h:128
core_yyscan_t yyscanner
Definition: scanner.h:126
bool escape_string_warning
Definition: scanner.h:88
char * literalbuf
Definition: scanner.h:98
bool standard_conforming_strings
Definition: scanner.h:89
const ScanKeywordList * keywordlist
Definition: scanner.h:78
char * scanbuf
Definition: scanner.h:72
const uint16 * keyword_tokens
Definition: scanner.h:79