PostgreSQL Source Code git master
Loading...
Searching...
No Matches
lexi.c
Go to the documentation of this file.
1/*-
2 * Copyright (c) 1985 Sun Microsystems, Inc.
3 * Copyright (c) 1980, 1993
4 * The Regents of the University of California. All rights reserved.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of the University nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 */
31
32#if 0
33#ifndef lint
34static char sccsid[] = "@(#)lexi.c 8.1 (Berkeley) 6/6/93";
35#endif /* not lint */
36#endif
37
38#include "c.h"
39
40/*
41 * Here we have the token scanner for indent. It scans off one token and puts
42 * it in the global variable "token". It returns a code, indicating the type
43 * of token scanned.
44 */
45
46#include <err.h>
47#include <stdio.h>
48#include <ctype.h>
49#include <stdlib.h>
50#include <string.h>
51#include "indent_globs.h"
52#include "indent_codes.h"
53#include "indent.h"
54
55#define alphanum 1
56#ifdef undef
57#define opchar 3
58#endif
59
60struct templ {
61 const char *rwd;
62 int rwcode;
63};
64
65/*
66 * This table has to be sorted alphabetically, because it'll be used in binary
67 * search. For the same reason, string must be the first thing in struct templ.
68 */
69struct templ specials[] =
70{
71 {"_Bool", 4},
72 {"_Complex", 4},
73 {"_Imaginary", 4},
74 {"auto", 10},
75 {"bool", 4},
76 {"break", 9},
77 {"case", 8},
78 {"char", 4},
79 {"complex", 4},
80 {"const", 4},
81 {"continue", 12},
82 {"default", 8},
83 {"do", 6},
84 {"double", 4},
85 {"else", 6},
86 {"enum", 3},
87 {"extern", 10},
88 {"float", 4},
89 {"for", 5},
90 {"global", 4},
91 {"goto", 9},
92 {"if", 5},
93 {"imaginary", 4},
94 {"inline", 12},
95 {"int", 4},
96 {"long", 4},
97 {"offsetof", 1},
98 {"register", 10},
99 {"restrict", 12},
100 {"return", 9},
101 {"short", 4},
102 {"signed", 4},
103 {"sizeof", 2},
104 {"static", 10},
105 {"struct", 3},
106 {"switch", 7},
107 {"typedef", 11},
108 {"union", 3},
109 {"unsigned", 4},
110 {"void", 4},
111 {"volatile", 4},
112 {"while", 5}
113};
114
115const char **typenames;
118
119char chartype[128] =
120{ /* this is used to facilitate the decision of
121 * what type (alphanumeric, operator) each
122 * character is */
123 0, 0, 0, 0, 0, 0, 0, 0,
124 0, 0, 0, 0, 0, 0, 0, 0,
125 0, 0, 0, 0, 0, 0, 0, 0,
126 0, 0, 0, 0, 0, 0, 0, 0,
127 0, 3, 0, 0, 1, 3, 3, 0,
128 0, 0, 3, 3, 0, 3, 0, 3,
129 1, 1, 1, 1, 1, 1, 1, 1,
130 1, 1, 0, 0, 3, 3, 3, 3,
131 0, 1, 1, 1, 1, 1, 1, 1,
132 1, 1, 1, 1, 1, 1, 1, 1,
133 1, 1, 1, 1, 1, 1, 1, 1,
134 1, 1, 1, 0, 0, 0, 3, 1,
135 0, 1, 1, 1, 1, 1, 1, 1,
136 1, 1, 1, 1, 1, 1, 1, 1,
137 1, 1, 1, 1, 1, 1, 1, 1,
138 1, 1, 1, 0, 3, 0, 3, 0
139};
140
141static int
142strcmp_type(const void *e1, const void *e2)
143{
144 return (strcmp(e1, *(const char * const *)e2));
145}
146
147/*
148 * Decide whether "foo(..." is a function definition or declaration.
149 *
150 * At call, we are looking at the '('. Look ahead to find the first
151 * '{', ';' or ',' that is not within parentheses or comments; then
152 * it's a definition if we found '{', otherwise a declaration.
153 * Note that this rule is fooled by K&R-style parameter declarations,
154 * but telling the difference between those and function attributes
155 * seems like more trouble than it's worth. This code could also be
156 * fooled by mismatched parens or apparent comment starts within string
157 * literals, but that seems unlikely in the context it's used in.
158 */
159static int
161{
162 int paren_depth = 0;
163 int in_comment = false;
164 int in_slash_comment = false;
165 int lastc = 0;
166
167 /* We may need to look past the end of the current buffer. */
169 for (;;) {
170 int c;
171
172 /* Fetch next character. */
173 if (tp < buf_end)
174 c = *tp++;
175 else {
176 c = lookahead();
177 if (c == EOF)
178 break;
179 }
180 /* Handle comments. */
181 if (in_comment) {
182 if (lastc == '*' && c == '/')
183 in_comment = false;
184 } else if (lastc == '/' && c == '*' && !in_slash_comment)
185 in_comment = true;
186 else if (in_slash_comment) {
187 if (c == '\n')
188 in_slash_comment = false;
189 } else if (lastc == '/' && c == '/')
190 in_slash_comment = true;
191 /* Count nested parens properly. */
192 else if (c == '(')
193 paren_depth++;
194 else if (c == ')') {
195 paren_depth--;
196 /*
197 * If we find unbalanced parens, we must have started inside a
198 * declaration.
199 */
200 if (paren_depth < 0)
201 return false;
202 } else if (paren_depth == 0) {
203 /* We are outside any parentheses or comments. */
204 if (c == '{')
205 return true;
206 else if (c == ';' || c == ',')
207 return false;
208 }
209 lastc = c;
210 }
211 /* Hit EOF --- for lack of anything better, assume "not a definition". */
212 return false;
213}
214
215int
217{
218 int unary_delim; /* this is set to 1 if the current token
219 * forces a following operator to be unary */
220 int code; /* internal code to be returned */
221 char qchar; /* the delimiter character for a string */
222
223 e_token = s_token; /* point to start of place to save token */
224 unary_delim = false;
225 state->col_1 = state->last_nl; /* tell world that this token started
226 * in column 1 iff the last thing
227 * scanned was a newline */
228 state->last_nl = false;
229
230 while (*buf_ptr == ' ' || *buf_ptr == '\t') { /* get rid of blanks */
231 state->col_1 = false; /* leading blanks imply token is not in column
232 * 1 */
233 if (++buf_ptr >= buf_end)
234 fill_buffer();
235 }
236
237 /* Scan an alphanumeric token */
238 if (chartype[*buf_ptr & 127] == alphanum ||
239 (buf_ptr[0] == '.' && isdigit((unsigned char)buf_ptr[1]))) {
240 /*
241 * we have a character or number
242 */
243 struct templ *p;
244
245 if (isdigit((unsigned char)*buf_ptr) ||
246 (buf_ptr[0] == '.' && isdigit((unsigned char)buf_ptr[1]))) {
247 int seendot = 0,
248 seenexp = 0,
249 seensfx = 0;
250
251 /*
252 * base 2, base 8, base 16:
253 */
254 if (buf_ptr[0] == '0' && buf_ptr[1] != '.') {
255 int len;
256
257 if (buf_ptr[1] == 'b' || buf_ptr[1] == 'B')
258 len = strspn(buf_ptr + 2, "01") + 2;
259 else if (buf_ptr[1] == 'x' || buf_ptr[1] == 'X')
260 len = strspn(buf_ptr + 2, "0123456789ABCDEFabcdef") + 2;
261 else
262 len = strspn(buf_ptr + 1, "012345678") + 1;
263 if (len > 0) {
266 e_token += len;
267 buf_ptr += len;
268 }
269 else
270 diag2(1, "Unterminated literal");
271 }
272 else /* base 10: */
273 while (1) {
274 if (*buf_ptr == '.') {
275 if (seendot)
276 break;
277 else
278 seendot++;
279 }
281 *e_token++ = *buf_ptr++;
282 if (!isdigit((unsigned char)*buf_ptr) && *buf_ptr != '.') {
283 if ((*buf_ptr != 'E' && *buf_ptr != 'e') || seenexp)
284 break;
285 else {
286 seenexp++;
287 seendot++;
288 *e_token++ = *buf_ptr++;
289 if (*buf_ptr == '+' || *buf_ptr == '-')
290 *e_token++ = *buf_ptr++;
291 }
292 }
293 }
294
295 while (1) {
297 if (!(seensfx & 1) && (*buf_ptr == 'U' || *buf_ptr == 'u')) {
298 *e_token++ = *buf_ptr++;
299 seensfx |= 1;
300 continue;
301 }
302 if (!(seensfx & 2) && (strchr("fFlL", *buf_ptr) != NULL)) {
303 if (buf_ptr[1] == buf_ptr[0])
304 *e_token++ = *buf_ptr++;
305 *e_token++ = *buf_ptr++;
306 seensfx |= 2;
307 continue;
308 }
309 break;
310 }
311 }
312 else
313 while (chartype[*buf_ptr & 127] == alphanum || *buf_ptr == BACKSLASH) {
314 /* fill_buffer() terminates buffer with newline */
315 if (*buf_ptr == BACKSLASH) {
316 if (*(buf_ptr + 1) == '\n') {
317 buf_ptr += 2;
318 if (buf_ptr >= buf_end)
319 fill_buffer();
320 } else
321 break;
322 }
324 /* copy it over */
325 *e_token++ = *buf_ptr++;
326 if (buf_ptr >= buf_end)
327 fill_buffer();
328 }
329 *e_token = '\0';
330
331 if (s_token[0] == 'L' && s_token[1] == '\0' &&
332 (*buf_ptr == '"' || *buf_ptr == '\''))
333 return (strpfx);
334
335 while (*buf_ptr == ' ' || *buf_ptr == '\t') { /* get rid of blanks */
336 if (++buf_ptr >= buf_end)
337 fill_buffer();
338 }
339 state->keyword = 0;
340 if (state->last_token == structure && !state->p_l_follow) {
341 /* if last token was 'struct' and we're not
342 * in parentheses, then this token
343 * should be treated as a declaration */
344 state->last_u_d = true;
345 return (decl);
346 }
347 /*
348 * Operator after identifier is binary unless last token was 'struct'
349 */
350 state->last_u_d = (state->last_token == structure);
351
352 p = bsearch(s_token,
353 specials,
354 sizeof(specials) / sizeof(specials[0]),
355 sizeof(specials[0]),
357 if (p == NULL) { /* not a special keyword... */
358 char *u;
359
360 /* ... so maybe a type_t or a typedef */
361 if ((auto_typedefs && ((u = strrchr(s_token, '_')) != NULL) &&
362 strcmp(u, "_t") == 0) || (typename_top >= 0 &&
364 sizeof(typenames[0]), strcmp_type))) {
365 state->keyword = 4; /* a type name */
366 if (state->last_token != period && state->last_token != unary_op)
367 state->last_u_d = true;
368 goto found_typename;
369 }
370 } else { /* we have a keyword */
371 state->keyword = p->rwcode;
372 state->last_u_d = true;
373 switch (p->rwcode) {
374 case 7: /* it is a switch */
375 return (swstmt);
376 case 8: /* a case or default */
377 return (casestmt);
378
379 case 3: /* a "struct" */
380 /* FALLTHROUGH */
381 case 4: /* one of the declaration keywords */
383 if (state->p_l_follow) {
384 /* inside parens: cast, param list, offsetof or sizeof */
385 state->cast_mask |= (1 << state->p_l_follow) & ~state->not_cast_mask;
386 }
387 if (state->last_token == period || state->last_token == unary_op) {
388 state->keyword = 0;
389 break;
390 }
391 if (p != NULL && p->rwcode == 3)
392 return (structure);
393 if (state->p_l_follow)
394 break;
395 return (decl);
396
397 case 5: /* if, while, for */
398 return (sp_paren);
399
400 case 6: /* do, else */
401 return (sp_nparen);
402
403 case 10: /* storage class specifier */
404 return (storage);
405
406 case 11: /* typedef */
407 return (type_def);
408
409 default: /* all others are treated like any other
410 * identifier */
411 return (ident);
412 } /* end of switch */
413 } /* end of if (found_it) */
414 if (*buf_ptr == '(' && state->tos <= 1 && state->ind_level == 0 &&
415 state->in_parameter_declaration == 0 && state->block_init == 0) {
417 strncpy(state->procname, token, sizeof state->procname - 1);
418 if (state->in_decl)
419 state->in_parameter_declaration = 1;
420 return (funcname);
421 }
422 }
423 /*
424 * The following hack attempts to guess whether or not the current
425 * token is in fact a declaration keyword -- one that has been
426 * typedefd
427 */
428 else if (!state->p_l_follow && !state->block_init &&
429 !state->in_stmt &&
430 ((*buf_ptr == '*' && buf_ptr[1] != '=') ||
431 isalpha((unsigned char)*buf_ptr)) &&
432 (state->last_token == semicolon || state->last_token == lbrace ||
433 state->last_token == rbrace)) {
434 state->keyword = 4; /* a type name */
435 state->last_u_d = true;
436 return decl;
437 }
438 if (state->last_token == decl) /* if this is a declared variable,
439 * then following sign is unary */
440 state->last_u_d = true; /* will make "int a -1" work */
441 return (ident); /* the ident is not in the list */
442 } /* end of processing for alphanum character */
443
444 /* Scan a non-alphanumeric token */
445
446 CHECK_SIZE_TOKEN(3); /* things like "<<=" */
447 *e_token++ = *buf_ptr; /* if it is only a one-character token, it is
448 * moved here */
449 *e_token = '\0';
450 if (++buf_ptr >= buf_end)
451 fill_buffer();
452
453 switch (*token) {
454 case '\n':
455 unary_delim = state->last_u_d;
456 state->last_nl = true; /* remember that we just had a newline */
457 code = (had_eof ? 0 : newline);
458
459 /*
460 * if data has been exhausted, the newline is a dummy, and we should
461 * return code to stop
462 */
463 break;
464
465 case '\'': /* start of quoted character */
466 case '"': /* start of string */
467 qchar = *token;
468 do { /* copy the string */
469 while (1) { /* move one character or [/<char>]<char> */
470 if (*buf_ptr == '\n') {
471 diag2(1, "Unterminated literal");
472 goto stop_lit;
473 }
475 *e_token = *buf_ptr++;
476 if (buf_ptr >= buf_end)
477 fill_buffer();
478 if (*e_token == BACKSLASH) { /* if escape, copy extra char */
479 if (*buf_ptr == '\n') /* check for escaped newline */
480 ++line_no;
481 *++e_token = *buf_ptr++;
482 ++e_token; /* we must increment this again because we
483 * copied two chars */
484 if (buf_ptr >= buf_end)
485 fill_buffer();
486 }
487 else
488 break; /* we copied one character */
489 } /* end of while (1) */
490 } while (*e_token++ != qchar);
492 code = ident;
493 break;
494
495 case ('('):
496 case ('['):
497 unary_delim = true;
498 code = lparen;
499 break;
500
501 case (')'):
502 case (']'):
503 code = rparen;
504 break;
505
506 case '#':
507 unary_delim = state->last_u_d;
508 code = preesc;
509 break;
510
511 case '?':
512 unary_delim = true;
513 code = question;
514 break;
515
516 case (':'):
517 code = colon;
518 unary_delim = true;
519 break;
520
521 case (';'):
522 unary_delim = true;
523 code = semicolon;
524 break;
525
526 case ('{'):
527 unary_delim = true;
528
529 /*
530 * if (state->in_or_st) state->block_init = 1;
531 */
532 /* ? code = state->block_init ? lparen : lbrace; */
533 code = lbrace;
534 break;
535
536 case ('}'):
537 unary_delim = true;
538 /* ? code = state->block_init ? rparen : rbrace; */
539 code = rbrace;
540 break;
541
542 case 014: /* a form feed */
543 unary_delim = state->last_u_d;
544 state->last_nl = true; /* remember this so we can set 'state->col_1'
545 * right */
546 code = form_feed;
547 break;
548
549 case (','):
550 unary_delim = true;
551 code = comma;
552 break;
553
554 case '.':
555 unary_delim = false;
556 code = period;
557 break;
558
559 case '-':
560 case '+': /* check for -, +, --, ++ */
561 code = (state->last_u_d ? unary_op : binary_op);
562 unary_delim = true;
563
564 if (*buf_ptr == token[0]) {
565 /* check for doubled character */
566 *e_token++ = *buf_ptr++;
567 /* buffer overflow will be checked at end of loop */
568 if (state->last_token == ident || state->last_token == rparen) {
569 code = (state->last_u_d ? unary_op : postop);
570 /* check for following ++ or -- */
571 unary_delim = false;
572 }
573 }
574 else if (*buf_ptr == '=')
575 /* check for operator += */
576 *e_token++ = *buf_ptr++;
577 else if (*buf_ptr == '>') {
578 /* check for operator -> */
579 *e_token++ = *buf_ptr++;
580 unary_delim = false;
581 code = unary_op;
582 state->want_blank = false;
583 }
584 break; /* buffer overflow will be checked at end of
585 * switch */
586
587 case '=':
588 if (state->in_or_st)
589 state->block_init = 1;
590#ifdef undef
591 if (chartype[*buf_ptr & 127] == opchar) { /* we have two char assignment */
592 e_token[-1] = *buf_ptr++;
593 if ((e_token[-1] == '<' || e_token[-1] == '>') && e_token[-1] == *buf_ptr)
594 *e_token++ = *buf_ptr++;
595 *e_token++ = '='; /* Flip =+ to += */
596 *e_token = 0;
597 }
598#else
599 if (*buf_ptr == '=') {/* == */
600 *e_token++ = '='; /* Flip =+ to += */
601 buf_ptr++;
602 *e_token = 0;
603 }
604#endif
605 code = binary_op;
606 unary_delim = true;
607 break;
608 /* can drop thru!!! */
609
610 case '>':
611 case '<':
612 case '!': /* ops like <, <<, <=, !=, etc */
613 if (*buf_ptr == '>' || *buf_ptr == '<' || *buf_ptr == '=') {
614 *e_token++ = *buf_ptr;
615 if (++buf_ptr >= buf_end)
616 fill_buffer();
617 }
618 if (*buf_ptr == '=')
619 *e_token++ = *buf_ptr++;
620 code = (state->last_u_d ? unary_op : binary_op);
621 unary_delim = true;
622 break;
623
624 case '*':
625 unary_delim = true;
626 if (!state->last_u_d) {
627 if (*buf_ptr == '=')
628 *e_token++ = *buf_ptr++;
629 code = binary_op;
630 break;
631 }
632 while (*buf_ptr == '*' || isspace((unsigned char)*buf_ptr)) {
633 if (*buf_ptr == '*') {
635 *e_token++ = *buf_ptr;
636 }
637 if (++buf_ptr >= buf_end)
638 fill_buffer();
639 }
640 code = unary_op;
641 break;
642
643 default:
644 if (token[0] == '/' && *buf_ptr == '*') {
645 /* it is start of comment */
646 *e_token++ = '*';
647
648 if (++buf_ptr >= buf_end)
649 fill_buffer();
650
651 code = comment;
652 unary_delim = state->last_u_d;
653 break;
654 }
655 while (*(e_token - 1) == *buf_ptr || *buf_ptr == '=') {
656 /*
657 * handle ||, &&, etc, and also things as in int *****i
658 */
660 *e_token++ = *buf_ptr;
661 if (++buf_ptr >= buf_end)
662 fill_buffer();
663 }
664 code = (state->last_u_d ? unary_op : binary_op);
665 unary_delim = true;
666
667
668 } /* end of switch */
669 if (buf_ptr >= buf_end) /* check for input buffer empty */
670 fill_buffer();
671 state->last_u_d = unary_delim;
673 *e_token = '\0'; /* null terminate the token */
674 return (code);
675}
676
677void
679{
680
681 typenames = (const char **)malloc(sizeof(typenames[0]) *
682 (typename_count = 16));
683 if (typenames == NULL)
684 err(1, NULL);
685}
686
687void
688add_typename(const char *key)
689{
690 int comparison;
691 const char *copy;
692
693 if (typename_top + 1 >= typename_count) {
694 typenames = realloc((void *)typenames,
695 sizeof(typenames[0]) * (typename_count *= 2));
696 if (typenames == NULL)
697 err(1, NULL);
698 }
699 if (typename_top == -1)
700 typenames[++typename_top] = copy = strdup(key);
701 else if ((comparison = strcmp(key, typenames[typename_top])) >= 0) {
702 /* take advantage of sorted input */
703 if (comparison == 0) /* remove duplicates */
704 return;
705 typenames[++typename_top] = copy = strdup(key);
706 }
707 else {
708 int p;
709
710 for (p = 0; (comparison = strcmp(key, typenames[p])) > 0; p++)
711 /* find place for the new key */;
712 if (comparison == 0) /* remove duplicates */
713 return;
714 memmove(&typenames[p + 1], &typenames[p],
715 sizeof(typenames[0]) * (++typename_top - p));
716 typenames[p] = copy = strdup(key);
717 }
718
719 if (copy == NULL)
720 err(1, NULL);
721}
memcpy(sums, checksumBaseOffsets, sizeof(checksumBaseOffsets))
void err(int eval, const char *fmt,...)
Definition err.c:43
int lookahead(void)
Definition io.c:275
void diag2(int, const char *)
Definition io.c:590
void lookahead_reset(void)
Definition io.c:320
void fill_buffer(void)
Definition io.c:346
#define comma
#define form_feed
#define lparen
#define swstmt
#define sp_nparen
#define rbrace
#define colon
#define postop
#define period
#define comment
#define question
#define lbrace
#define semicolon
#define ident
#define type_def
#define preesc
#define structure
#define funcname
#define binary_op
#define casestmt
#define storage
#define newline
#define sp_paren
#define decl
#define unary_op
#define strpfx
#define rparen
int auto_typedefs
#define token
int had_eof
#define CHECK_SIZE_TOKEN(desired_size)
char * buf_ptr
char * e_token
int line_no
char * buf_end
char * s_token
#define BACKSLASH
void add_typename(const char *key)
Definition lexi.c:688
char chartype[128]
Definition lexi.c:119
int typename_count
Definition lexi.c:116
static int strcmp_type(const void *e1, const void *e2)
Definition lexi.c:142
int typename_top
Definition lexi.c:117
int lexi(struct parser_state *state)
Definition lexi.c:216
const char ** typenames
Definition lexi.c:115
void alloc_typenames(void)
Definition lexi.c:678
static int is_func_definition(char *tp)
Definition lexi.c:160
#define alphanum
Definition lexi.c:55
struct templ specials[]
Definition lexi.c:69
const void size_t len
char * c
static int fb(int x)
#define realloc(a, b)
#define malloc(a)
Definition lexi.c:60
const char * rwd
Definition lexi.c:61
int rwcode
Definition lexi.c:62