PostgreSQL Source Code  git master
psqlscan_int.h
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * psqlscan_int.h
4  * lexical scanner internal declarations
5  *
6  * This file declares the PsqlScanStateData structure used by psqlscan.l
7  * and shared by other lexers compatible with it, such as psqlscanslash.l.
8  *
9  * One difficult aspect of this code is that we need to work in multibyte
10  * encodings that are not ASCII-safe. A "safe" encoding is one in which each
11  * byte of a multibyte character has the high bit set (it's >= 0x80). Since
12  * all our lexing rules treat all high-bit-set characters alike, we don't
13  * really need to care whether such a byte is part of a sequence or not.
14  * In an "unsafe" encoding, we still expect the first byte of a multibyte
15  * sequence to be >= 0x80, but later bytes might not be. If we scan such
16  * a sequence as-is, the lexing rules could easily be fooled into matching
17  * such bytes to ordinary ASCII characters. Our solution for this is to
18  * substitute 0xFF for each non-first byte within the data presented to flex.
19  * The flex rules will then pass the FF's through unmolested. The
20  * psqlscan_emit() subroutine is responsible for looking back to the original
21  * string and replacing FF's with the corresponding original bytes.
22  *
23  * Another interesting thing we do here is scan different parts of the same
24  * input with physically separate flex lexers (ie, lexers written in separate
25  * .l files). We can get away with this because the only part of the
26  * persistent state of a flex lexer that depends on its parsing rule tables
27  * is the start state number, which is easy enough to manage --- usually,
28  * in fact, we just need to set it to INITIAL when changing lexers. But to
29  * make that work at all, we must use re-entrant lexers, so that all the
30  * relevant state is in the yyscan_t attached to the PsqlScanState;
31  * if we were using lexers with separate static state we would soon end up
32  * with dangling buffer pointers in one or the other. Also note that this
33  * is unlikely to work very nicely if the lexers aren't all built with the
34  * same flex version, or if they don't use the same flex options.
35  *
36  *
37  * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
38  * Portions Copyright (c) 1994, Regents of the University of California
39  *
40  * src/include/fe_utils/psqlscan_int.h
41  *
42  *-------------------------------------------------------------------------
43  */
44 #ifndef PSQLSCAN_INT_H
45 #define PSQLSCAN_INT_H
46 
47 #include "fe_utils/psqlscan.h"
48 
49 /*
50  * These are just to allow this file to be compilable standalone for header
51  * validity checking; in actual use, this file should always be included
52  * from the body of a flex file, where these symbols are already defined.
53  */
54 #ifndef YY_TYPEDEF_YY_BUFFER_STATE
55 #define YY_TYPEDEF_YY_BUFFER_STATE
56 typedef struct yy_buffer_state *YY_BUFFER_STATE;
57 #endif
58 #ifndef YY_TYPEDEF_YY_SCANNER_T
59 #define YY_TYPEDEF_YY_SCANNER_T
60 typedef void *yyscan_t;
61 #endif
62 
63 /*
64  * We use a stack of flex buffers to handle substitution of psql variables.
65  * Each stacked buffer contains the as-yet-unread text from one psql variable.
66  * When we pop the stack all the way, we resume reading from the outer buffer
67  * identified by scanbufhandle.
68  */
69 typedef struct StackElem
70 {
71  YY_BUFFER_STATE buf; /* flex input control structure */
72  char *bufstring; /* data actually being scanned by flex */
73  char *origstring; /* copy of original data, if needed */
74  char *varname; /* name of variable providing data, or NULL */
75  struct StackElem *next;
77 
78 /*
79  * All working state of the lexer must be stored in PsqlScanStateData
80  * between calls. This allows us to have multiple open lexer operations,
81  * which is needed for nested include files. The lexer itself is not
82  * recursive, but it must be re-entrant.
83  */
84 typedef struct PsqlScanStateData
85 {
86  yyscan_t scanner; /* Flex's state for this PsqlScanState */
87 
88  PQExpBuffer output_buf; /* current output buffer */
89 
90  StackElem *buffer_stack; /* stack of variable expansion buffers */
91 
92  /*
93  * These variables always refer to the outer buffer, never to any stacked
94  * variable-expansion buffer.
95  */
97  char *scanbuf; /* start of outer-level input buffer */
98  const char *scanline; /* current input line at outer level */
99 
100  /* safe_encoding, curline, refline are used by emit() to replace FFs */
101  int encoding; /* encoding being used now */
102  bool safe_encoding; /* is current encoding "safe"? */
103  bool std_strings; /* are string literals standard? */
104  const char *curline; /* actual flex input string for cur buf */
105  const char *refline; /* original data for cur buffer */
106 
107  /*
108  * All this state lives across successive input lines, until explicitly
109  * reset by psql_scan_reset. start_state is adopted by yylex() on entry,
110  * and updated with its finishing state on exit.
111  */
112  int start_state; /* yylex's starting/finishing state */
113  int state_before_str_stop; /* start cond. before end quote */
114  int paren_depth; /* depth of nesting in parentheses */
115  int xcdepth; /* depth of nesting in slash-star comments */
116  char *dolqstart; /* current $foo$ quote start string */
117 
118  /*
119  * State to track boundaries of BEGIN ... END blocks in function
120  * definitions, so that semicolons do not send query too early.
121  */
122  int identifier_count; /* identifiers since start of statement */
123  char identifiers[4]; /* records the first few identifiers */
124  int begin_depth; /* depth of begin/end pairs */
125 
126  /*
127  * Callback functions provided by the program making use of the lexer,
128  * plus a void* callback passthrough argument.
129  */
133 
134 
135 /*
136  * Functions exported by psqlscan.l, but only meant for use within
137  * compatible lexers.
138  */
140  const char *newstr, const char *varname);
144  const char *varname);
146  const char *txt, int len,
147  char **txtcopy);
148 extern void psqlscan_emit(PsqlScanState state, const char *txt, int len);
150  const char *txt, int len);
152  const char *txt, int len,
153  PsqlScanQuoteType quote);
155  const char *txt, int len);
156 
157 #endif /* PSQLSCAN_INT_H */
const void size_t len
PsqlScanQuoteType
Definition: psqlscan.h:53
void * yyscan_t
Definition: psqlscan_int.h:60
void psqlscan_escape_variable(PsqlScanState state, const char *txt, int len, PsqlScanQuoteType quote)
struct StackElem StackElem
YY_BUFFER_STATE psqlscan_prepare_buffer(PsqlScanState state, const char *txt, int len, char **txtcopy)
void psqlscan_select_top_buffer(PsqlScanState state)
struct PsqlScanStateData PsqlScanStateData
void psqlscan_emit(PsqlScanState state, const char *txt, int len)
struct yy_buffer_state * YY_BUFFER_STATE
Definition: psqlscan_int.h:56
void psqlscan_push_new_buffer(PsqlScanState state, const char *newstr, const char *varname)
void psqlscan_pop_buffer_stack(PsqlScanState state)
void psqlscan_test_variable(PsqlScanState state, const char *txt, int len)
char * psqlscan_extract_substring(PsqlScanState state, const char *txt, int len)
bool psqlscan_var_is_current_source(PsqlScanState state, const char *varname)
const char * refline
Definition: psqlscan_int.h:105
StackElem * buffer_stack
Definition: psqlscan_int.h:90
const char * scanline
Definition: psqlscan_int.h:98
PQExpBuffer output_buf
Definition: psqlscan_int.h:88
const PsqlScanCallbacks * callbacks
Definition: psqlscan_int.h:130
YY_BUFFER_STATE scanbufhandle
Definition: psqlscan_int.h:96
const char * curline
Definition: psqlscan_int.h:104
char * origstring
Definition: psqlscan_int.h:73
char * varname
Definition: psqlscan_int.h:74
YY_BUFFER_STATE buf
Definition: psqlscan_int.h:71
char * bufstring
Definition: psqlscan_int.h:72
struct StackElem * next
Definition: psqlscan_int.h:75
Definition: regguts.h:323