PostgreSQL Source Code  git master
All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
scanner.h
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * scanner.h
4  * API for the core scanner (flex machine)
5  *
6  * The core scanner is also used by PL/pgSQL, so we provide a public API
7  * for it. However, the rest of the backend is only expected to use the
8  * higher-level API provided by parser.h.
9  *
10  *
11  * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
12  * Portions Copyright (c) 1994, Regents of the University of California
13  *
14  * src/include/parser/scanner.h
15  *
16  *-------------------------------------------------------------------------
17  */
18 
19 #ifndef SCANNER_H
20 #define SCANNER_H
21 
22 #include "common/keywords.h"
23 
24 /*
25  * The scanner returns extra data about scanned tokens in this union type.
26  * Note that this is a subset of the fields used in YYSTYPE of the bison
27  * parsers built atop the scanner.
28  */
29 typedef union core_YYSTYPE
30 {
31  int ival; /* for integer literals */
32  char *str; /* for identifiers and non-integer literals */
33  const char *keyword; /* canonical spelling of keywords */
35 
36 /*
37  * We track token locations in terms of byte offsets from the start of the
38  * source string, not the column number/line number representation that
39  * bison uses by default. Also, to minimize overhead we track only one
40  * location (usually the first token location) for each construct, not
41  * the beginning and ending locations as bison does by default. It's
42  * therefore sufficient to make YYLTYPE an int.
43  */
44 #define YYLTYPE int
45 
46 /*
47  * Another important component of the scanner's API is the token code numbers.
48  * However, those are not defined in this file, because bison insists on
49  * defining them for itself. The token codes used by the core scanner are
50  * the ASCII characters plus these:
51  * %token <str> IDENT UIDENT FCONST SCONST USCONST BCONST XCONST Op
52  * %token <ival> ICONST PARAM
53  * %token TYPECAST DOT_DOT COLON_EQUALS EQUALS_GREATER
54  * %token LESS_EQUALS GREATER_EQUALS NOT_EQUALS
55  * The above token definitions *must* be the first ones declared in any
56  * bison parser built atop this scanner, so that they will have consistent
57  * numbers assigned to them (specifically, IDENT = 258 and so on).
58  */
59 
60 /*
61  * The YY_EXTRA data that a flex scanner allows us to pass around.
62  * Private state needed by the core scanner goes here. Note that the actual
63  * yy_extra struct may be larger and have this as its first component, thus
64  * allowing the calling parser to keep some fields of its own in YY_EXTRA.
65  */
66 typedef struct core_yy_extra_type
67 {
68  /*
69  * The string the scanner is physically scanning. We keep this mainly so
70  * that we can cheaply compute the offset of the current token (yytext).
71  */
72  char *scanbuf;
74 
75  /*
76  * The keyword list to use, and the associated grammar token codes.
77  */
80 
81  /*
82  * Scanner settings to use. These are initialized from the corresponding
83  * GUC variables by scanner_init(). Callers can modify them after
84  * scanner_init() if they don't want the scanner's behavior to follow the
85  * prevailing GUC settings.
86  */
90 
91  /*
92  * literalbuf is used to accumulate literal values when multiple rules are
93  * needed to parse a single literal. Call startlit() to reset buffer to
94  * empty, addlit() to add text. NOTE: the string in literalbuf is NOT
95  * necessarily null-terminated, but there always IS room to add a trailing
96  * null at offset literallen. We store a null only when we need it.
97  */
98  char *literalbuf; /* palloc'd expandable buffer */
99  int literallen; /* actual current string length */
100  int literalalloc; /* current allocated buffer size */
101 
102  /*
103  * Random assorted scanner state.
104  */
105  int state_before_str_stop; /* start cond. before end quote */
106  int xcdepth; /* depth of nesting in slash-star comments */
107  char *dolqstart; /* current $foo$ quote start string */
108  YYLTYPE save_yylloc; /* one-element stack for PUSH_YYLLOC() */
109 
110  /* first part of UTF16 surrogate pair for Unicode escapes */
112 
113  /* state variables for literal-lexing warnings */
117 
118 /*
119  * The type of yyscanner is opaque outside scan.l.
120  */
121 typedef void *core_yyscan_t;
122 
123 /* Support for scanner_errposition_callback function */
124 typedef struct ScannerCallbackState
125 {
127  int location;
130 
131 
132 /* Constant data exported from parser/scan.l */
133 extern PGDLLIMPORT const uint16 ScanKeywordTokens[];
134 
135 /* Entry points in parser/scan.l */
136 extern core_yyscan_t scanner_init(const char *str,
137  core_yy_extra_type *yyext,
138  const ScanKeywordList *keywordlist,
139  const uint16 *keyword_tokens);
141 extern int core_yylex(core_YYSTYPE *yylval_param, YYLTYPE *yylloc_param,
143 extern int scanner_errposition(int location, core_yyscan_t yyscanner);
146  int location);
148 extern void scanner_yyerror(const char *message, core_yyscan_t yyscanner) pg_attribute_noreturn();
149 
150 #endif /* SCANNER_H */
unsigned short uint16
Definition: c.h:517
#define PGDLLIMPORT
Definition: c.h:1321
signed int int32
Definition: c.h:508
#define pg_attribute_noreturn()
Definition: c.h:232
size_t Size
Definition: c.h:610
const char * str
static core_yyscan_t yyscanner
Definition: pl_scanner.c:106
const char * YYLTYPE
int scanner_errposition(int location, core_yyscan_t yyscanner)
core_yyscan_t scanner_init(const char *str, core_yy_extra_type *yyext, const ScanKeywordList *keywordlist, const uint16 *keyword_tokens)
void setup_scanner_errposition_callback(ScannerCallbackState *scbstate, core_yyscan_t yyscanner, int location)
void scanner_finish(core_yyscan_t yyscanner)
PGDLLIMPORT const uint16 ScanKeywordTokens[]
void cancel_scanner_errposition_callback(ScannerCallbackState *scbstate)
union core_YYSTYPE core_YYSTYPE
struct core_yy_extra_type core_yy_extra_type
void * core_yyscan_t
Definition: scanner.h:121
struct ScannerCallbackState ScannerCallbackState
int core_yylex(core_YYSTYPE *yylval_param, YYLTYPE *yylloc_param, core_yyscan_t yyscanner)
void scanner_yyerror(const char *message, core_yyscan_t yyscanner) pg_attribute_noreturn()
ErrorContextCallback errcallback
Definition: scanner.h:128
core_yyscan_t yyscanner
Definition: scanner.h:126
bool escape_string_warning
Definition: scanner.h:88
int state_before_str_stop
Definition: scanner.h:105
YYLTYPE save_yylloc
Definition: scanner.h:108
char * literalbuf
Definition: scanner.h:98
bool warn_on_first_escape
Definition: scanner.h:114
bool standard_conforming_strings
Definition: scanner.h:89
char * dolqstart
Definition: scanner.h:107
const ScanKeywordList * keywordlist
Definition: scanner.h:78
char * scanbuf
Definition: scanner.h:72
const uint16 * keyword_tokens
Definition: scanner.h:79
int32 utf16_first_part
Definition: scanner.h:111
int ival
Definition: scanner.h:31
const char * keyword
Definition: scanner.h:33
char * str
Definition: scanner.h:32