PostgreSQL Source Code  git master
pl_scanner.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * pl_scanner.c
4  * lexical scanning for PL/pgSQL
5  *
6  *
7  * Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group
8  * Portions Copyright (c) 1994, Regents of the University of California
9  *
10  *
11  * IDENTIFICATION
12  * src/pl/plpgsql/src/pl_scanner.c
13  *
14  *-------------------------------------------------------------------------
15  */
16 #include "postgres.h"
17 
18 #include "mb/pg_wchar.h"
19 #include "parser/scanner.h"
20 
21 #include "plpgsql.h"
22 #include "pl_gram.h" /* must be after parser/scanner.h */
23 
24 
25 #define PG_KEYWORD(a,b,c) {a,b,c},
26 
27 
28 /* Klugy flag to tell scanner how to look up identifiers */
30 
31 /*
32  * A word about keywords:
33  *
34  * We keep reserved and unreserved keywords in separate arrays. The
35  * reserved keywords are passed to the core scanner, so they will be
36  * recognized before (and instead of) any variable name. Unreserved words
37  * are checked for separately, usually after determining that the identifier
38  * isn't a known variable name. If plpgsql_IdentifierLookup is DECLARE then
39  * no variable names will be recognized, so the unreserved words always work.
40  * (Note in particular that this helps us avoid reserving keywords that are
41  * only needed in DECLARE sections.)
42  *
43  * In certain contexts it is desirable to prefer recognizing an unreserved
44  * keyword over recognizing a variable name. In particular, at the start
45  * of a statement we should prefer unreserved keywords unless the statement
46  * looks like an assignment (i.e., first token is followed by ':=' or '[').
47  * This rule allows most statement-introducing keywords to be kept unreserved.
48  * (We still have to reserve initial keywords that might follow a block
49  * label, unfortunately, since the method used to determine if we are at
50  * start of statement doesn't recognize such cases. We'd also have to
51  * reserve any keyword that could legitimately be followed by ':=' or '['.)
52  * Some additional cases are handled in pl_gram.y using tok_is_keyword().
53  *
54  * We try to avoid reserving more keywords than we have to; but there's
55  * little point in not reserving a word if it's reserved in the core grammar.
56  * Currently, the following words are reserved here but not in the core:
57  * BEGIN BY DECLARE EXECUTE FOREACH IF LOOP STRICT WHILE
58  */
59 
60 /*
61  * Lists of keyword (name, token-value, category) entries.
62  *
63  * !!WARNING!!: These lists must be sorted by ASCII name, because binary
64  * search is used to locate entries.
65  *
66  * Be careful not to put the same word in both lists. Also be sure that
67  * pl_gram.y's unreserved_keyword production agrees with the second list.
68  */
69 
70 static const ScanKeyword reserved_keywords[] = {
71  PG_KEYWORD("all", K_ALL, RESERVED_KEYWORD)
72  PG_KEYWORD("begin", K_BEGIN, RESERVED_KEYWORD)
73  PG_KEYWORD("by", K_BY, RESERVED_KEYWORD)
74  PG_KEYWORD("case", K_CASE, RESERVED_KEYWORD)
75  PG_KEYWORD("declare", K_DECLARE, RESERVED_KEYWORD)
76  PG_KEYWORD("else", K_ELSE, RESERVED_KEYWORD)
77  PG_KEYWORD("end", K_END, RESERVED_KEYWORD)
78  PG_KEYWORD("execute", K_EXECUTE, RESERVED_KEYWORD)
79  PG_KEYWORD("for", K_FOR, RESERVED_KEYWORD)
80  PG_KEYWORD("foreach", K_FOREACH, RESERVED_KEYWORD)
81  PG_KEYWORD("from", K_FROM, RESERVED_KEYWORD)
82  PG_KEYWORD("if", K_IF, RESERVED_KEYWORD)
83  PG_KEYWORD("in", K_IN, RESERVED_KEYWORD)
84  PG_KEYWORD("into", K_INTO, RESERVED_KEYWORD)
85  PG_KEYWORD("loop", K_LOOP, RESERVED_KEYWORD)
86  PG_KEYWORD("not", K_NOT, RESERVED_KEYWORD)
87  PG_KEYWORD("null", K_NULL, RESERVED_KEYWORD)
88  PG_KEYWORD("or", K_OR, RESERVED_KEYWORD)
89  PG_KEYWORD("strict", K_STRICT, RESERVED_KEYWORD)
90  PG_KEYWORD("then", K_THEN, RESERVED_KEYWORD)
91  PG_KEYWORD("to", K_TO, RESERVED_KEYWORD)
92  PG_KEYWORD("using", K_USING, RESERVED_KEYWORD)
93  PG_KEYWORD("when", K_WHEN, RESERVED_KEYWORD)
94  PG_KEYWORD("while", K_WHILE, RESERVED_KEYWORD)
95 };
96 
97 static const int num_reserved_keywords = lengthof(reserved_keywords);
98 
99 static const ScanKeyword unreserved_keywords[] = {
100  PG_KEYWORD("absolute", K_ABSOLUTE, UNRESERVED_KEYWORD)
101  PG_KEYWORD("alias", K_ALIAS, UNRESERVED_KEYWORD)
102  PG_KEYWORD("array", K_ARRAY, UNRESERVED_KEYWORD)
103  PG_KEYWORD("assert", K_ASSERT, UNRESERVED_KEYWORD)
104  PG_KEYWORD("backward", K_BACKWARD, UNRESERVED_KEYWORD)
105  PG_KEYWORD("call", K_CALL, UNRESERVED_KEYWORD)
106  PG_KEYWORD("close", K_CLOSE, UNRESERVED_KEYWORD)
107  PG_KEYWORD("collate", K_COLLATE, UNRESERVED_KEYWORD)
108  PG_KEYWORD("column", K_COLUMN, UNRESERVED_KEYWORD)
109  PG_KEYWORD("column_name", K_COLUMN_NAME, UNRESERVED_KEYWORD)
110  PG_KEYWORD("commit", K_COMMIT, UNRESERVED_KEYWORD)
111  PG_KEYWORD("constant", K_CONSTANT, UNRESERVED_KEYWORD)
112  PG_KEYWORD("constraint", K_CONSTRAINT, UNRESERVED_KEYWORD)
113  PG_KEYWORD("constraint_name", K_CONSTRAINT_NAME, UNRESERVED_KEYWORD)
114  PG_KEYWORD("continue", K_CONTINUE, UNRESERVED_KEYWORD)
115  PG_KEYWORD("current", K_CURRENT, UNRESERVED_KEYWORD)
116  PG_KEYWORD("cursor", K_CURSOR, UNRESERVED_KEYWORD)
117  PG_KEYWORD("datatype", K_DATATYPE, UNRESERVED_KEYWORD)
118  PG_KEYWORD("debug", K_DEBUG, UNRESERVED_KEYWORD)
119  PG_KEYWORD("default", K_DEFAULT, UNRESERVED_KEYWORD)
120  PG_KEYWORD("detail", K_DETAIL, UNRESERVED_KEYWORD)
121  PG_KEYWORD("diagnostics", K_DIAGNOSTICS, UNRESERVED_KEYWORD)
122  PG_KEYWORD("do", K_DO, UNRESERVED_KEYWORD)
123  PG_KEYWORD("dump", K_DUMP, UNRESERVED_KEYWORD)
124  PG_KEYWORD("elseif", K_ELSIF, UNRESERVED_KEYWORD)
125  PG_KEYWORD("elsif", K_ELSIF, UNRESERVED_KEYWORD)
126  PG_KEYWORD("errcode", K_ERRCODE, UNRESERVED_KEYWORD)
127  PG_KEYWORD("error", K_ERROR, UNRESERVED_KEYWORD)
128  PG_KEYWORD("exception", K_EXCEPTION, UNRESERVED_KEYWORD)
129  PG_KEYWORD("exit", K_EXIT, UNRESERVED_KEYWORD)
130  PG_KEYWORD("fetch", K_FETCH, UNRESERVED_KEYWORD)
131  PG_KEYWORD("first", K_FIRST, UNRESERVED_KEYWORD)
132  PG_KEYWORD("forward", K_FORWARD, UNRESERVED_KEYWORD)
133  PG_KEYWORD("get", K_GET, UNRESERVED_KEYWORD)
134  PG_KEYWORD("hint", K_HINT, UNRESERVED_KEYWORD)
135  PG_KEYWORD("import", K_IMPORT, UNRESERVED_KEYWORD)
136  PG_KEYWORD("info", K_INFO, UNRESERVED_KEYWORD)
137  PG_KEYWORD("insert", K_INSERT, UNRESERVED_KEYWORD)
138  PG_KEYWORD("is", K_IS, UNRESERVED_KEYWORD)
139  PG_KEYWORD("last", K_LAST, UNRESERVED_KEYWORD)
140  PG_KEYWORD("log", K_LOG, UNRESERVED_KEYWORD)
141  PG_KEYWORD("message", K_MESSAGE, UNRESERVED_KEYWORD)
142  PG_KEYWORD("message_text", K_MESSAGE_TEXT, UNRESERVED_KEYWORD)
143  PG_KEYWORD("move", K_MOVE, UNRESERVED_KEYWORD)
144  PG_KEYWORD("next", K_NEXT, UNRESERVED_KEYWORD)
145  PG_KEYWORD("no", K_NO, UNRESERVED_KEYWORD)
146  PG_KEYWORD("notice", K_NOTICE, UNRESERVED_KEYWORD)
147  PG_KEYWORD("open", K_OPEN, UNRESERVED_KEYWORD)
148  PG_KEYWORD("option", K_OPTION, UNRESERVED_KEYWORD)
149  PG_KEYWORD("perform", K_PERFORM, UNRESERVED_KEYWORD)
150  PG_KEYWORD("pg_context", K_PG_CONTEXT, UNRESERVED_KEYWORD)
151  PG_KEYWORD("pg_datatype_name", K_PG_DATATYPE_NAME, UNRESERVED_KEYWORD)
152  PG_KEYWORD("pg_exception_context", K_PG_EXCEPTION_CONTEXT, UNRESERVED_KEYWORD)
153  PG_KEYWORD("pg_exception_detail", K_PG_EXCEPTION_DETAIL, UNRESERVED_KEYWORD)
154  PG_KEYWORD("pg_exception_hint", K_PG_EXCEPTION_HINT, UNRESERVED_KEYWORD)
155  PG_KEYWORD("print_strict_params", K_PRINT_STRICT_PARAMS, UNRESERVED_KEYWORD)
156  PG_KEYWORD("prior", K_PRIOR, UNRESERVED_KEYWORD)
157  PG_KEYWORD("query", K_QUERY, UNRESERVED_KEYWORD)
158  PG_KEYWORD("raise", K_RAISE, UNRESERVED_KEYWORD)
159  PG_KEYWORD("relative", K_RELATIVE, UNRESERVED_KEYWORD)
160  PG_KEYWORD("reset", K_RESET, UNRESERVED_KEYWORD)
161  PG_KEYWORD("result_oid", K_RESULT_OID, UNRESERVED_KEYWORD)
162  PG_KEYWORD("return", K_RETURN, UNRESERVED_KEYWORD)
163  PG_KEYWORD("returned_sqlstate", K_RETURNED_SQLSTATE, UNRESERVED_KEYWORD)
164  PG_KEYWORD("reverse", K_REVERSE, UNRESERVED_KEYWORD)
165  PG_KEYWORD("rollback", K_ROLLBACK, UNRESERVED_KEYWORD)
166  PG_KEYWORD("row_count", K_ROW_COUNT, UNRESERVED_KEYWORD)
167  PG_KEYWORD("rowtype", K_ROWTYPE, UNRESERVED_KEYWORD)
168  PG_KEYWORD("schema", K_SCHEMA, UNRESERVED_KEYWORD)
169  PG_KEYWORD("schema_name", K_SCHEMA_NAME, UNRESERVED_KEYWORD)
170  PG_KEYWORD("scroll", K_SCROLL, UNRESERVED_KEYWORD)
171  PG_KEYWORD("set", K_SET, UNRESERVED_KEYWORD)
172  PG_KEYWORD("slice", K_SLICE, UNRESERVED_KEYWORD)
173  PG_KEYWORD("sqlstate", K_SQLSTATE, UNRESERVED_KEYWORD)
174  PG_KEYWORD("stacked", K_STACKED, UNRESERVED_KEYWORD)
175  PG_KEYWORD("table", K_TABLE, UNRESERVED_KEYWORD)
176  PG_KEYWORD("table_name", K_TABLE_NAME, UNRESERVED_KEYWORD)
177  PG_KEYWORD("type", K_TYPE, UNRESERVED_KEYWORD)
178  PG_KEYWORD("use_column", K_USE_COLUMN, UNRESERVED_KEYWORD)
179  PG_KEYWORD("use_variable", K_USE_VARIABLE, UNRESERVED_KEYWORD)
180  PG_KEYWORD("variable_conflict", K_VARIABLE_CONFLICT, UNRESERVED_KEYWORD)
181  PG_KEYWORD("warning", K_WARNING, UNRESERVED_KEYWORD)
182 };
183 
184 static const int num_unreserved_keywords = lengthof(unreserved_keywords);
185 
186 /*
187  * This macro must recognize all tokens that can immediately precede a
188  * PL/pgSQL executable statement (that is, proc_sect or proc_stmt in the
189  * grammar). Fortunately, there are not very many, so hard-coding in this
190  * fashion seems sufficient.
191  */
192 #define AT_STMT_START(prev_token) \
193  ((prev_token) == ';' || \
194  (prev_token) == K_BEGIN || \
195  (prev_token) == K_THEN || \
196  (prev_token) == K_ELSE || \
197  (prev_token) == K_LOOP)
198 
199 
200 /* Auxiliary data about a token (other than the token type) */
201 typedef struct
202 {
203  YYSTYPE lval; /* semantic information */
204  YYLTYPE lloc; /* offset in scanbuf */
205  int leng; /* length in bytes */
206 } TokenAuxData;
207 
208 /*
209  * Scanner working state. At some point we might wish to fold all this
210  * into a YY_EXTRA struct. For the moment, there is no need for plpgsql's
211  * lexer to be re-entrant, and the notational burden of passing a yyscanner
212  * pointer around is great enough to not want to do it without need.
213  */
214 
215 /* The stuff the core lexer needs */
216 static core_yyscan_t yyscanner = NULL;
218 
219 /* The original input string */
220 static const char *scanorig;
221 
222 /* Current token's length (corresponds to plpgsql_yylval and plpgsql_yylloc) */
223 static int plpgsql_yyleng;
224 
225 /* Current token's code (corresponds to plpgsql_yylval and plpgsql_yylloc) */
226 static int plpgsql_yytoken;
227 
228 /* Token pushback stack */
229 #define MAX_PUSHBACKS 4
230 
231 static int num_pushbacks;
234 
235 /* State for plpgsql_location_to_lineno() */
236 static const char *cur_line_start;
237 static const char *cur_line_end;
238 static int cur_line_num;
239 
240 /* Internal functions */
241 static int internal_yylex(TokenAuxData *auxdata);
242 static void push_back_token(int token, TokenAuxData *auxdata);
243 static void location_lineno_init(void);
244 
245 
246 /*
247  * This is the yylex routine called from the PL/pgSQL grammar.
248  * It is a wrapper around the core lexer, with the ability to recognize
249  * PL/pgSQL variables and return them as special T_DATUM tokens. If a
250  * word or compound word does not match any variable name, or if matching
251  * is turned off by plpgsql_IdentifierLookup, it is returned as
252  * T_WORD or T_CWORD respectively, or as an unreserved keyword if it
253  * matches one of those.
254  */
255 int
257 {
258  int tok1;
259  TokenAuxData aux1;
260  const ScanKeyword *kw;
261 
262  tok1 = internal_yylex(&aux1);
263  if (tok1 == IDENT || tok1 == PARAM)
264  {
265  int tok2;
266  TokenAuxData aux2;
267 
268  tok2 = internal_yylex(&aux2);
269  if (tok2 == '.')
270  {
271  int tok3;
272  TokenAuxData aux3;
273 
274  tok3 = internal_yylex(&aux3);
275  if (tok3 == IDENT)
276  {
277  int tok4;
278  TokenAuxData aux4;
279 
280  tok4 = internal_yylex(&aux4);
281  if (tok4 == '.')
282  {
283  int tok5;
284  TokenAuxData aux5;
285 
286  tok5 = internal_yylex(&aux5);
287  if (tok5 == IDENT)
288  {
289  if (plpgsql_parse_tripword(aux1.lval.str,
290  aux3.lval.str,
291  aux5.lval.str,
292  &aux1.lval.wdatum,
293  &aux1.lval.cword))
294  tok1 = T_DATUM;
295  else
296  tok1 = T_CWORD;
297  }
298  else
299  {
300  /* not A.B.C, so just process A.B */
301  push_back_token(tok5, &aux5);
302  push_back_token(tok4, &aux4);
303  if (plpgsql_parse_dblword(aux1.lval.str,
304  aux3.lval.str,
305  &aux1.lval.wdatum,
306  &aux1.lval.cword))
307  tok1 = T_DATUM;
308  else
309  tok1 = T_CWORD;
310  }
311  }
312  else
313  {
314  /* not A.B.C, so just process A.B */
315  push_back_token(tok4, &aux4);
316  if (plpgsql_parse_dblword(aux1.lval.str,
317  aux3.lval.str,
318  &aux1.lval.wdatum,
319  &aux1.lval.cword))
320  tok1 = T_DATUM;
321  else
322  tok1 = T_CWORD;
323  }
324  }
325  else
326  {
327  /* not A.B, so just process A */
328  push_back_token(tok3, &aux3);
329  push_back_token(tok2, &aux2);
330  if (plpgsql_parse_word(aux1.lval.str,
331  core_yy.scanbuf + aux1.lloc,
332  &aux1.lval.wdatum,
333  &aux1.lval.word))
334  tok1 = T_DATUM;
335  else if (!aux1.lval.word.quoted &&
336  (kw = ScanKeywordLookup(aux1.lval.word.ident,
337  unreserved_keywords,
339  {
340  aux1.lval.keyword = kw->name;
341  tok1 = kw->value;
342  }
343  else
344  tok1 = T_WORD;
345  }
346  }
347  else
348  {
349  /* not A.B, so just process A */
350  push_back_token(tok2, &aux2);
351 
352  /*
353  * If we are at start of statement, prefer unreserved keywords
354  * over variable names, unless the next token is assignment or
355  * '[', in which case prefer variable names. (Note we need not
356  * consider '.' as the next token; that case was handled above,
357  * and we always prefer variable names in that case.) If we are
358  * not at start of statement, always prefer variable names over
359  * unreserved keywords.
360  */
362  !(tok2 == '=' || tok2 == COLON_EQUALS || tok2 == '['))
363  {
364  /* try for unreserved keyword, then for variable name */
365  if (core_yy.scanbuf[aux1.lloc] != '"' &&
366  (kw = ScanKeywordLookup(aux1.lval.str,
367  unreserved_keywords,
369  {
370  aux1.lval.keyword = kw->name;
371  tok1 = kw->value;
372  }
373  else if (plpgsql_parse_word(aux1.lval.str,
374  core_yy.scanbuf + aux1.lloc,
375  &aux1.lval.wdatum,
376  &aux1.lval.word))
377  tok1 = T_DATUM;
378  else
379  tok1 = T_WORD;
380  }
381  else
382  {
383  /* try for variable name, then for unreserved keyword */
384  if (plpgsql_parse_word(aux1.lval.str,
385  core_yy.scanbuf + aux1.lloc,
386  &aux1.lval.wdatum,
387  &aux1.lval.word))
388  tok1 = T_DATUM;
389  else if (!aux1.lval.word.quoted &&
390  (kw = ScanKeywordLookup(aux1.lval.word.ident,
391  unreserved_keywords,
393  {
394  aux1.lval.keyword = kw->name;
395  tok1 = kw->value;
396  }
397  else
398  tok1 = T_WORD;
399  }
400  }
401  }
402  else
403  {
404  /*
405  * Not a potential plpgsql variable name, just return the data.
406  *
407  * Note that we also come through here if the grammar pushed back a
408  * T_DATUM, T_CWORD, T_WORD, or unreserved-keyword token returned by a
409  * previous lookup cycle; thus, pushbacks do not incur extra lookup
410  * work, since we'll never do the above code twice for the same token.
411  * This property also makes it safe to rely on the old value of
412  * plpgsql_yytoken in the is-this-start-of-statement test above.
413  */
414  }
415 
416  plpgsql_yylval = aux1.lval;
417  plpgsql_yylloc = aux1.lloc;
418  plpgsql_yyleng = aux1.leng;
419  plpgsql_yytoken = tok1;
420  return tok1;
421 }
422 
423 /*
424  * Internal yylex function. This wraps the core lexer and adds one feature:
425  * a token pushback stack. We also make a couple of trivial single-token
426  * translations from what the core lexer does to what we want, in particular
427  * interfacing from the core_YYSTYPE to YYSTYPE union.
428  */
429 static int
431 {
432  int token;
433  const char *yytext;
434 
435  if (num_pushbacks > 0)
436  {
437  num_pushbacks--;
438  token = pushback_token[num_pushbacks];
439  *auxdata = pushback_auxdata[num_pushbacks];
440  }
441  else
442  {
443  token = core_yylex(&auxdata->lval.core_yystype,
444  &auxdata->lloc,
445  yyscanner);
446 
447  /* remember the length of yytext before it gets changed */
448  yytext = core_yy.scanbuf + auxdata->lloc;
449  auxdata->leng = strlen(yytext);
450 
451  /* Check for << >> and #, which the core considers operators */
452  if (token == Op)
453  {
454  if (strcmp(auxdata->lval.str, "<<") == 0)
455  token = LESS_LESS;
456  else if (strcmp(auxdata->lval.str, ">>") == 0)
457  token = GREATER_GREATER;
458  else if (strcmp(auxdata->lval.str, "#") == 0)
459  token = '#';
460  }
461 
462  /* The core returns PARAM as ival, but we treat it like IDENT */
463  else if (token == PARAM)
464  {
465  auxdata->lval.str = pstrdup(yytext);
466  }
467  }
468 
469  return token;
470 }
471 
472 /*
473  * Push back a token to be re-read by next internal_yylex() call.
474  */
475 static void
476 push_back_token(int token, TokenAuxData *auxdata)
477 {
479  elog(ERROR, "too many tokens pushed back");
480  pushback_token[num_pushbacks] = token;
481  pushback_auxdata[num_pushbacks] = *auxdata;
482  num_pushbacks++;
483 }
484 
485 /*
486  * Push back a single token to be re-read by next plpgsql_yylex() call.
487  *
488  * NOTE: this does not cause yylval or yylloc to "back up". Also, it
489  * is not a good idea to push back a token code other than what you read.
490  */
491 void
493 {
494  TokenAuxData auxdata;
495 
496  auxdata.lval = plpgsql_yylval;
497  auxdata.lloc = plpgsql_yylloc;
498  auxdata.leng = plpgsql_yyleng;
499  push_back_token(token, &auxdata);
500 }
501 
502 /*
503  * Tell whether a token is an unreserved keyword.
504  *
505  * (If it is, its lowercased form was returned as the token value, so we
506  * do not need to offer that data here.)
507  */
508 bool
510 {
511  int i;
512 
513  for (i = 0; i < num_unreserved_keywords; i++)
514  {
515  if (unreserved_keywords[i].value == token)
516  return true;
517  }
518  return false;
519 }
520 
521 /*
522  * Append the function text starting at startlocation and extending to
523  * (not including) endlocation onto the existing contents of "buf".
524  */
525 void
527  int startlocation, int endlocation)
528 {
529  Assert(startlocation <= endlocation);
530  appendBinaryStringInfo(buf, scanorig + startlocation,
531  endlocation - startlocation);
532 }
533 
534 /*
535  * Peek one token ahead in the input stream. Only the token code is
536  * made available, not any of the auxiliary info such as location.
537  *
538  * NB: no variable or unreserved keyword lookup is performed here, they will
539  * be returned as IDENT. Reserved keywords are resolved as usual.
540  */
541 int
543 {
544  int tok1;
545  TokenAuxData aux1;
546 
547  tok1 = internal_yylex(&aux1);
548  push_back_token(tok1, &aux1);
549  return tok1;
550 }
551 
552 /*
553  * Peek two tokens ahead in the input stream. The first token and its
554  * location in the query are returned in *tok1_p and *tok1_loc, second token
555  * and its location in *tok2_p and *tok2_loc.
556  *
557  * NB: no variable or unreserved keyword lookup is performed here, they will
558  * be returned as IDENT. Reserved keywords are resolved as usual.
559  */
560 void
561 plpgsql_peek2(int *tok1_p, int *tok2_p, int *tok1_loc, int *tok2_loc)
562 {
563  int tok1,
564  tok2;
565  TokenAuxData aux1,
566  aux2;
567 
568  tok1 = internal_yylex(&aux1);
569  tok2 = internal_yylex(&aux2);
570 
571  *tok1_p = tok1;
572  if (tok1_loc)
573  *tok1_loc = aux1.lloc;
574  *tok2_p = tok2;
575  if (tok2_loc)
576  *tok2_loc = aux2.lloc;
577 
578  push_back_token(tok2, &aux2);
579  push_back_token(tok1, &aux1);
580 }
581 
582 /*
583  * plpgsql_scanner_errposition
584  * Report an error cursor position, if possible.
585  *
586  * This is expected to be used within an ereport() call. The return value
587  * is a dummy (always 0, in fact).
588  *
589  * Note that this can only be used for messages emitted during initial
590  * parsing of a plpgsql function, since it requires the scanorig string
591  * to still be available.
592  */
593 int
595 {
596  int pos;
597 
598  if (location < 0 || scanorig == NULL)
599  return 0; /* no-op if location is unknown */
600 
601  /* Convert byte offset to character number */
602  pos = pg_mbstrlen_with_len(scanorig, location) + 1;
603  /* And pass it to the ereport mechanism */
604  (void) internalerrposition(pos);
605  /* Also pass the function body string */
606  return internalerrquery(scanorig);
607 }
608 
609 /*
610  * plpgsql_yyerror
611  * Report a lexer or grammar error.
612  *
613  * The message's cursor position refers to the current token (the one
614  * last returned by plpgsql_yylex()).
615  * This is OK for syntax error messages from the Bison parser, because Bison
616  * parsers report error as soon as the first unparsable token is reached.
617  * Beware of using yyerror for other purposes, as the cursor position might
618  * be misleading!
619  */
620 void
621 plpgsql_yyerror(const char *message)
622 {
623  char *yytext = core_yy.scanbuf + plpgsql_yylloc;
624 
625  if (*yytext == '\0')
626  {
627  ereport(ERROR,
628  (errcode(ERRCODE_SYNTAX_ERROR),
629  /* translator: %s is typically the translation of "syntax error" */
630  errmsg("%s at end of input", _(message)),
631  plpgsql_scanner_errposition(plpgsql_yylloc)));
632  }
633  else
634  {
635  /*
636  * If we have done any lookahead then flex will have restored the
637  * character after the end-of-token. Zap it again so that we report
638  * only the single token here. This modifies scanbuf but we no longer
639  * care about that.
640  */
641  yytext[plpgsql_yyleng] = '\0';
642 
643  ereport(ERROR,
644  (errcode(ERRCODE_SYNTAX_ERROR),
645  /* translator: first %s is typically the translation of "syntax error" */
646  errmsg("%s at or near \"%s\"", _(message), yytext),
647  plpgsql_scanner_errposition(plpgsql_yylloc)));
648  }
649 }
650 
651 /*
652  * Given a location (a byte offset in the function source text),
653  * return a line number.
654  *
655  * We expect that this is typically called for a sequence of increasing
656  * location values, so optimize accordingly by tracking the endpoints
657  * of the "current" line.
658  */
659 int
661 {
662  const char *loc;
663 
664  if (location < 0 || scanorig == NULL)
665  return 0; /* garbage in, garbage out */
666  loc = scanorig + location;
667 
668  /* be correct, but not fast, if input location goes backwards */
669  if (loc < cur_line_start)
671 
672  while (cur_line_end != NULL && loc > cur_line_end)
673  {
675  cur_line_num++;
676  cur_line_end = strchr(cur_line_start, '\n');
677  }
678 
679  return cur_line_num;
680 }
681 
682 /* initialize or reset the state for plpgsql_location_to_lineno */
683 static void
685 {
687  cur_line_num = 1;
688 
689  cur_line_end = strchr(cur_line_start, '\n');
690 }
691 
692 /* return the most recently computed lineno */
693 int
695 {
696  return cur_line_num;
697 }
698 
699 
700 /*
701  * Called before any actual parsing is done
702  *
703  * Note: the passed "str" must remain valid until plpgsql_scanner_finish().
704  * Although it is not fed directly to flex, we need the original string
705  * to cite in error messages.
706  */
707 void
709 {
710  /* Start up the core scanner */
711  yyscanner = scanner_init(str, &core_yy,
712  reserved_keywords, num_reserved_keywords);
713 
714  /*
715  * scanorig points to the original string, which unlike the scanner's
716  * scanbuf won't be modified on-the-fly by flex. Notice that although
717  * yytext points into scanbuf, we rely on being able to apply locations
718  * (offsets from string start) to scanorig as well.
719  */
720  scanorig = str;
721 
722  /* Other setup */
724  plpgsql_yytoken = 0;
725 
726  num_pushbacks = 0;
727 
729 }
730 
731 /*
732  * Called after parsing is done to clean up after plpgsql_scanner_init()
733  */
734 void
736 {
737  /* release storage */
739  /* avoid leaving any dangling pointers */
740  yyscanner = NULL;
741  scanorig = NULL;
742 }
#define MAX_PUSHBACKS
Definition: pl_scanner.c:229
int plpgsql_latest_lineno(void)
Definition: pl_scanner.c:694
static int cur_line_num
Definition: pl_scanner.c:238
bool plpgsql_parse_dblword(char *word1, char *word2, PLwdatum *wdatum, PLcword *cword)
Definition: pl_comp.c:1418
void * core_yyscan_t
Definition: scanner.h:116
static TokenAuxData pushback_auxdata[MAX_PUSHBACKS]
Definition: pl_scanner.c:233
const ScanKeyword * ScanKeywordLookup(const char *text, const ScanKeyword *keywords, int num_keywords)
Definition: keywords.c:64
IdentifierLookup
Definition: plpgsql.h:1123
char * pstrdup(const char *in)
Definition: mcxt.c:1161
YYLTYPE lloc
Definition: pl_scanner.c:204
void plpgsql_append_source_text(StringInfo buf, int startlocation, int endlocation)
Definition: pl_scanner.c:526
int errcode(int sqlerrcode)
Definition: elog.c:575
#define PG_KEYWORD(a, b, c)
Definition: pl_scanner.c:25
static const ScanKeyword reserved_keywords[]
Definition: pl_scanner.c:70
#define UNRESERVED_KEYWORD
Definition: keywords.h:18
#define lengthof(array)
Definition: c.h:629
static void location_lineno_init(void)
Definition: pl_scanner.c:684
static const int num_unreserved_keywords
Definition: pl_scanner.c:184
YYSTYPE lval
Definition: pl_scanner.c:203
static const char * cur_line_start
Definition: pl_scanner.c:236
void plpgsql_yyerror(const char *message)
Definition: pl_scanner.c:621
char * scanbuf
Definition: scanner.h:72
int pg_mbstrlen_with_len(const char *mbstr, int limit)
Definition: mbutils.c:794
int plpgsql_peek(void)
Definition: pl_scanner.c:542
bool plpgsql_token_is_unreserved_keyword(int token)
Definition: pl_scanner.c:509
void plpgsql_scanner_finish(void)
Definition: pl_scanner.c:735
#define ERROR
Definition: elog.h:43
static char * buf
Definition: pg_test_fsync.c:67
static int plpgsql_yytoken
Definition: pl_scanner.c:226
const char * name
Definition: keywords.h:26
#define YYLTYPE
Definition: scanner.h:44
static const ScanKeyword unreserved_keywords[]
Definition: pl_scanner.c:99
static int internal_yylex(TokenAuxData *auxdata)
Definition: pl_scanner.c:430
#define ereport(elevel, rest)
Definition: elog.h:122
bool plpgsql_parse_word(char *word1, const char *yytxt, PLwdatum *wdatum, PLword *word)
Definition: pl_comp.c:1363
void plpgsql_peek2(int *tok1_p, int *tok2_p, int *tok1_loc, int *tok2_loc)
Definition: pl_scanner.c:561
static const char * scanorig
Definition: pl_scanner.c:220
IdentifierLookup plpgsql_IdentifierLookup
Definition: pl_scanner.c:29
static int num_pushbacks
Definition: pl_scanner.c:231
static int pushback_token[MAX_PUSHBACKS]
Definition: pl_scanner.c:232
static struct @131 value
#define RESERVED_KEYWORD
Definition: keywords.h:21
int core_yylex(core_YYSTYPE *lvalp, YYLTYPE *llocp, core_yyscan_t yyscanner)
int16 value
Definition: keywords.h:27
int plpgsql_location_to_lineno(int location)
Definition: pl_scanner.c:660
int internalerrquery(const char *query)
Definition: elog.c:1161
static core_yy_extra_type core_yy
Definition: pl_scanner.c:217
#define Assert(condition)
Definition: c.h:699
static int plpgsql_yyleng
Definition: pl_scanner.c:223
#define AT_STMT_START(prev_token)
Definition: pl_scanner.c:192
int plpgsql_scanner_errposition(int location)
Definition: pl_scanner.c:594
core_yyscan_t scanner_init(const char *str, core_yy_extra_type *yyext, const ScanKeyword *keywords, int num_keywords)
static core_yyscan_t yyscanner
Definition: pl_scanner.c:216
void plpgsql_scanner_init(const char *str)
Definition: pl_scanner.c:708
int errmsg(const char *fmt,...)
Definition: elog.c:797
static const char * cur_line_end
Definition: pl_scanner.c:237
bool plpgsql_parse_tripword(char *word1, char *word2, char *word3, PLwdatum *wdatum, PLcword *cword)
Definition: pl_comp.c:1498
int i
static const int num_reserved_keywords
Definition: pl_scanner.c:97
void scanner_finish(core_yyscan_t yyscanner)
#define elog
Definition: elog.h:219
int plpgsql_yylex(void)
Definition: pl_scanner.c:256
static void push_back_token(int token, TokenAuxData *auxdata)
Definition: pl_scanner.c:476
void plpgsql_push_back_token(int token)
Definition: pl_scanner.c:492
#define _(x)
Definition: elog.c:84
void appendBinaryStringInfo(StringInfo str, const char *data, int datalen)
Definition: stringinfo.c:208
int internalerrposition(int cursorpos)
Definition: elog.c:1141