PostgreSQL Source Code git master
Loading...
Searching...
No Matches
lexi.c File Reference
#include "c.h"
#include <err.h>
#include <stdio.h>
#include <ctype.h>
#include <stdlib.h>
#include <string.h>
#include "indent_globs.h"
#include "indent_codes.h"
#include "indent.h"
Include dependency graph for lexi.c:

Go to the source code of this file.

Data Structures

struct  templ
 

Macros

#define alphanum   1
 

Functions

static int strcmp_type (const void *e1, const void *e2)
 
static int is_func_definition (char *tp)
 
int lexi (struct parser_state *state)
 
void alloc_typenames (void)
 
void add_typename (const char *key)
 

Variables

struct templ specials []
 
const char ** typenames
 
int typename_count
 
int typename_top = -1
 
char chartype [128]
 

Macro Definition Documentation

◆ alphanum

#define alphanum   1

Definition at line 55 of file lexi.c.

Function Documentation

◆ add_typename()

void add_typename ( const char key)

Definition at line 688 of file lexi.c.

689{
690 int comparison;
691 const char *copy;
692
693 if (typename_top + 1 >= typename_count) {
694 typenames = realloc((void *)typenames,
695 sizeof(typenames[0]) * (typename_count *= 2));
696 if (typenames == NULL)
697 err(1, NULL);
698 }
699 if (typename_top == -1)
700 typenames[++typename_top] = copy = strdup(key);
701 else if ((comparison = strcmp(key, typenames[typename_top])) >= 0) {
702 /* take advantage of sorted input */
703 if (comparison == 0) /* remove duplicates */
704 return;
705 typenames[++typename_top] = copy = strdup(key);
706 }
707 else {
708 int p;
709
710 for (p = 0; (comparison = strcmp(key, typenames[p])) > 0; p++)
711 /* find place for the new key */;
712 if (comparison == 0) /* remove duplicates */
713 return;
714 memmove(&typenames[p + 1], &typenames[p],
715 sizeof(typenames[0]) * (++typename_top - p));
716 typenames[p] = copy = strdup(key);
717 }
718
719 if (copy == NULL)
720 err(1, NULL);
721}
void err(int eval, const char *fmt,...)
Definition err.c:43
int typename_count
Definition lexi.c:116
int typename_top
Definition lexi.c:117
const char ** typenames
Definition lexi.c:115
static int fb(int x)
#define realloc(a, b)

References err(), fb(), realloc, typename_count, typename_top, and typenames.

Referenced by add_typedefs_from_file(), and set_option().

◆ alloc_typenames()

void alloc_typenames ( void  )

Definition at line 678 of file lexi.c.

679{
680
681 typenames = (const char **)malloc(sizeof(typenames[0]) *
682 (typename_count = 16));
683 if (typenames == NULL)
684 err(1, NULL);
685}
#define malloc(a)

References err(), fb(), malloc, typename_count, and typenames.

Referenced by main().

◆ is_func_definition()

static int is_func_definition ( char tp)
static

Definition at line 160 of file lexi.c.

161{
162 int paren_depth = 0;
163 int in_comment = false;
164 int in_slash_comment = false;
165 int lastc = 0;
166
167 /* We may need to look past the end of the current buffer. */
169 for (;;) {
170 int c;
171
172 /* Fetch next character. */
173 if (tp < buf_end)
174 c = *tp++;
175 else {
176 c = lookahead();
177 if (c == EOF)
178 break;
179 }
180 /* Handle comments. */
181 if (in_comment) {
182 if (lastc == '*' && c == '/')
183 in_comment = false;
184 } else if (lastc == '/' && c == '*' && !in_slash_comment)
185 in_comment = true;
186 else if (in_slash_comment) {
187 if (c == '\n')
188 in_slash_comment = false;
189 } else if (lastc == '/' && c == '/')
190 in_slash_comment = true;
191 /* Count nested parens properly. */
192 else if (c == '(')
193 paren_depth++;
194 else if (c == ')') {
195 paren_depth--;
196 /*
197 * If we find unbalanced parens, we must have started inside a
198 * declaration.
199 */
200 if (paren_depth < 0)
201 return false;
202 } else if (paren_depth == 0) {
203 /* We are outside any parentheses or comments. */
204 if (c == '{')
205 return true;
206 else if (c == ';' || c == ',')
207 return false;
208 }
209 lastc = c;
210 }
211 /* Hit EOF --- for lack of anything better, assume "not a definition". */
212 return false;
213}
int lookahead(void)
Definition io.c:275
void lookahead_reset(void)
Definition io.c:320
char * buf_end
char * c

References buf_end, fb(), lookahead(), and lookahead_reset().

Referenced by lexi().

◆ lexi()

int lexi ( struct parser_state state)

Definition at line 216 of file lexi.c.

217{
218 int unary_delim; /* this is set to 1 if the current token
219 * forces a following operator to be unary */
220 int code; /* internal code to be returned */
221 char qchar; /* the delimiter character for a string */
222
223 e_token = s_token; /* point to start of place to save token */
224 unary_delim = false;
225 state->col_1 = state->last_nl; /* tell world that this token started
226 * in column 1 iff the last thing
227 * scanned was a newline */
228 state->last_nl = false;
229
230 while (*buf_ptr == ' ' || *buf_ptr == '\t') { /* get rid of blanks */
231 state->col_1 = false; /* leading blanks imply token is not in column
232 * 1 */
233 if (++buf_ptr >= buf_end)
234 fill_buffer();
235 }
236
237 /* Scan an alphanumeric token */
238 if (chartype[*buf_ptr & 127] == alphanum ||
239 (buf_ptr[0] == '.' && isdigit((unsigned char)buf_ptr[1]))) {
240 /*
241 * we have a character or number
242 */
243 struct templ *p;
244
245 if (isdigit((unsigned char)*buf_ptr) ||
246 (buf_ptr[0] == '.' && isdigit((unsigned char)buf_ptr[1]))) {
247 int seendot = 0,
248 seenexp = 0,
249 seensfx = 0;
250
251 /*
252 * base 2, base 8, base 16:
253 */
254 if (buf_ptr[0] == '0' && buf_ptr[1] != '.') {
255 int len;
256
257 if (buf_ptr[1] == 'b' || buf_ptr[1] == 'B')
258 len = strspn(buf_ptr + 2, "01") + 2;
259 else if (buf_ptr[1] == 'x' || buf_ptr[1] == 'X')
260 len = strspn(buf_ptr + 2, "0123456789ABCDEFabcdef") + 2;
261 else
262 len = strspn(buf_ptr + 1, "012345678") + 1;
263 if (len > 0) {
266 e_token += len;
267 buf_ptr += len;
268 }
269 else
270 diag2(1, "Unterminated literal");
271 }
272 else /* base 10: */
273 while (1) {
274 if (*buf_ptr == '.') {
275 if (seendot)
276 break;
277 else
278 seendot++;
279 }
281 *e_token++ = *buf_ptr++;
282 if (!isdigit((unsigned char)*buf_ptr) && *buf_ptr != '.') {
283 if ((*buf_ptr != 'E' && *buf_ptr != 'e') || seenexp)
284 break;
285 else {
286 seenexp++;
287 seendot++;
288 *e_token++ = *buf_ptr++;
289 if (*buf_ptr == '+' || *buf_ptr == '-')
290 *e_token++ = *buf_ptr++;
291 }
292 }
293 }
294
295 while (1) {
297 if (!(seensfx & 1) && (*buf_ptr == 'U' || *buf_ptr == 'u')) {
298 *e_token++ = *buf_ptr++;
299 seensfx |= 1;
300 continue;
301 }
302 if (!(seensfx & 2) && (strchr("fFlL", *buf_ptr) != NULL)) {
303 if (buf_ptr[1] == buf_ptr[0])
304 *e_token++ = *buf_ptr++;
305 *e_token++ = *buf_ptr++;
306 seensfx |= 2;
307 continue;
308 }
309 break;
310 }
311 }
312 else
313 while (chartype[*buf_ptr & 127] == alphanum || *buf_ptr == BACKSLASH) {
314 /* fill_buffer() terminates buffer with newline */
315 if (*buf_ptr == BACKSLASH) {
316 if (*(buf_ptr + 1) == '\n') {
317 buf_ptr += 2;
318 if (buf_ptr >= buf_end)
319 fill_buffer();
320 } else
321 break;
322 }
324 /* copy it over */
325 *e_token++ = *buf_ptr++;
326 if (buf_ptr >= buf_end)
327 fill_buffer();
328 }
329 *e_token = '\0';
330
331 if (s_token[0] == 'L' && s_token[1] == '\0' &&
332 (*buf_ptr == '"' || *buf_ptr == '\''))
333 return (strpfx);
334
335 while (*buf_ptr == ' ' || *buf_ptr == '\t') { /* get rid of blanks */
336 if (++buf_ptr >= buf_end)
337 fill_buffer();
338 }
339 state->keyword = 0;
340 if (state->last_token == structure && !state->p_l_follow) {
341 /* if last token was 'struct' and we're not
342 * in parentheses, then this token
343 * should be treated as a declaration */
344 state->last_u_d = true;
345 return (decl);
346 }
347 /*
348 * Operator after identifier is binary unless last token was 'struct'
349 */
350 state->last_u_d = (state->last_token == structure);
351
352 p = bsearch(s_token,
353 specials,
354 sizeof(specials) / sizeof(specials[0]),
355 sizeof(specials[0]),
357 if (p == NULL) { /* not a special keyword... */
358 char *u;
359
360 /* ... so maybe a type_t or a typedef */
361 if ((auto_typedefs && ((u = strrchr(s_token, '_')) != NULL) &&
362 strcmp(u, "_t") == 0) || (typename_top >= 0 &&
364 sizeof(typenames[0]), strcmp_type))) {
365 state->keyword = 4; /* a type name */
366 if (state->last_token != period && state->last_token != unary_op)
367 state->last_u_d = true;
368 goto found_typename;
369 }
370 } else { /* we have a keyword */
371 state->keyword = p->rwcode;
372 state->last_u_d = true;
373 switch (p->rwcode) {
374 case 7: /* it is a switch */
375 return (swstmt);
376 case 8: /* a case or default */
377 return (casestmt);
378
379 case 3: /* a "struct" */
380 /* FALLTHROUGH */
381 case 4: /* one of the declaration keywords */
383 if (state->p_l_follow) {
384 /* inside parens: cast, param list, offsetof or sizeof */
385 state->cast_mask |= (1 << state->p_l_follow) & ~state->not_cast_mask;
386 }
387 if (state->last_token == period || state->last_token == unary_op) {
388 state->keyword = 0;
389 break;
390 }
391 if (p != NULL && p->rwcode == 3)
392 return (structure);
393 if (state->p_l_follow)
394 break;
395 return (decl);
396
397 case 5: /* if, while, for */
398 return (sp_paren);
399
400 case 6: /* do, else */
401 return (sp_nparen);
402
403 case 10: /* storage class specifier */
404 return (storage);
405
406 case 11: /* typedef */
407 return (type_def);
408
409 default: /* all others are treated like any other
410 * identifier */
411 return (ident);
412 } /* end of switch */
413 } /* end of if (found_it) */
414 if (*buf_ptr == '(' && state->tos <= 1 && state->ind_level == 0 &&
415 state->in_parameter_declaration == 0 && state->block_init == 0) {
417 strncpy(state->procname, token, sizeof state->procname - 1);
418 if (state->in_decl)
419 state->in_parameter_declaration = 1;
420 return (funcname);
421 }
422 }
423 /*
424 * The following hack attempts to guess whether or not the current
425 * token is in fact a declaration keyword -- one that has been
426 * typedefd
427 */
428 else if (!state->p_l_follow && !state->block_init &&
429 !state->in_stmt &&
430 ((*buf_ptr == '*' && buf_ptr[1] != '=') ||
431 isalpha((unsigned char)*buf_ptr)) &&
432 (state->last_token == semicolon || state->last_token == lbrace ||
433 state->last_token == rbrace)) {
434 state->keyword = 4; /* a type name */
435 state->last_u_d = true;
436 return decl;
437 }
438 if (state->last_token == decl) /* if this is a declared variable,
439 * then following sign is unary */
440 state->last_u_d = true; /* will make "int a -1" work */
441 return (ident); /* the ident is not in the list */
442 } /* end of processing for alphanum character */
443
444 /* Scan a non-alphanumeric token */
445
446 CHECK_SIZE_TOKEN(3); /* things like "<<=" */
447 *e_token++ = *buf_ptr; /* if it is only a one-character token, it is
448 * moved here */
449 *e_token = '\0';
450 if (++buf_ptr >= buf_end)
451 fill_buffer();
452
453 switch (*token) {
454 case '\n':
455 unary_delim = state->last_u_d;
456 state->last_nl = true; /* remember that we just had a newline */
457 code = (had_eof ? 0 : newline);
458
459 /*
460 * if data has been exhausted, the newline is a dummy, and we should
461 * return code to stop
462 */
463 break;
464
465 case '\'': /* start of quoted character */
466 case '"': /* start of string */
467 qchar = *token;
468 do { /* copy the string */
469 while (1) { /* move one character or [/<char>]<char> */
470 if (*buf_ptr == '\n') {
471 diag2(1, "Unterminated literal");
472 goto stop_lit;
473 }
475 *e_token = *buf_ptr++;
476 if (buf_ptr >= buf_end)
477 fill_buffer();
478 if (*e_token == BACKSLASH) { /* if escape, copy extra char */
479 if (*buf_ptr == '\n') /* check for escaped newline */
480 ++line_no;
481 *++e_token = *buf_ptr++;
482 ++e_token; /* we must increment this again because we
483 * copied two chars */
484 if (buf_ptr >= buf_end)
485 fill_buffer();
486 }
487 else
488 break; /* we copied one character */
489 } /* end of while (1) */
490 } while (*e_token++ != qchar);
492 code = ident;
493 break;
494
495 case ('('):
496 case ('['):
497 unary_delim = true;
498 code = lparen;
499 break;
500
501 case (')'):
502 case (']'):
503 code = rparen;
504 break;
505
506 case '#':
507 unary_delim = state->last_u_d;
508 code = preesc;
509 break;
510
511 case '?':
512 unary_delim = true;
513 code = question;
514 break;
515
516 case (':'):
517 code = colon;
518 unary_delim = true;
519 break;
520
521 case (';'):
522 unary_delim = true;
523 code = semicolon;
524 break;
525
526 case ('{'):
527 unary_delim = true;
528
529 /*
530 * if (state->in_or_st) state->block_init = 1;
531 */
532 /* ? code = state->block_init ? lparen : lbrace; */
533 code = lbrace;
534 break;
535
536 case ('}'):
537 unary_delim = true;
538 /* ? code = state->block_init ? rparen : rbrace; */
539 code = rbrace;
540 break;
541
542 case 014: /* a form feed */
543 unary_delim = state->last_u_d;
544 state->last_nl = true; /* remember this so we can set 'state->col_1'
545 * right */
546 code = form_feed;
547 break;
548
549 case (','):
550 unary_delim = true;
551 code = comma;
552 break;
553
554 case '.':
555 unary_delim = false;
556 code = period;
557 break;
558
559 case '-':
560 case '+': /* check for -, +, --, ++ */
561 code = (state->last_u_d ? unary_op : binary_op);
562 unary_delim = true;
563
564 if (*buf_ptr == token[0]) {
565 /* check for doubled character */
566 *e_token++ = *buf_ptr++;
567 /* buffer overflow will be checked at end of loop */
568 if (state->last_token == ident || state->last_token == rparen) {
569 code = (state->last_u_d ? unary_op : postop);
570 /* check for following ++ or -- */
571 unary_delim = false;
572 }
573 }
574 else if (*buf_ptr == '=')
575 /* check for operator += */
576 *e_token++ = *buf_ptr++;
577 else if (*buf_ptr == '>') {
578 /* check for operator -> */
579 *e_token++ = *buf_ptr++;
580 unary_delim = false;
581 code = unary_op;
582 state->want_blank = false;
583 }
584 break; /* buffer overflow will be checked at end of
585 * switch */
586
587 case '=':
588 if (state->in_or_st)
589 state->block_init = 1;
590#ifdef undef
591 if (chartype[*buf_ptr & 127] == opchar) { /* we have two char assignment */
592 e_token[-1] = *buf_ptr++;
593 if ((e_token[-1] == '<' || e_token[-1] == '>') && e_token[-1] == *buf_ptr)
594 *e_token++ = *buf_ptr++;
595 *e_token++ = '='; /* Flip =+ to += */
596 *e_token = 0;
597 }
598#else
599 if (*buf_ptr == '=') {/* == */
600 *e_token++ = '='; /* Flip =+ to += */
601 buf_ptr++;
602 *e_token = 0;
603 }
604#endif
605 code = binary_op;
606 unary_delim = true;
607 break;
608 /* can drop thru!!! */
609
610 case '>':
611 case '<':
612 case '!': /* ops like <, <<, <=, !=, etc */
613 if (*buf_ptr == '>' || *buf_ptr == '<' || *buf_ptr == '=') {
614 *e_token++ = *buf_ptr;
615 if (++buf_ptr >= buf_end)
616 fill_buffer();
617 }
618 if (*buf_ptr == '=')
619 *e_token++ = *buf_ptr++;
620 code = (state->last_u_d ? unary_op : binary_op);
621 unary_delim = true;
622 break;
623
624 case '*':
625 unary_delim = true;
626 if (!state->last_u_d) {
627 if (*buf_ptr == '=')
628 *e_token++ = *buf_ptr++;
629 code = binary_op;
630 break;
631 }
632 while (*buf_ptr == '*' || isspace((unsigned char)*buf_ptr)) {
633 if (*buf_ptr == '*') {
635 *e_token++ = *buf_ptr;
636 }
637 if (++buf_ptr >= buf_end)
638 fill_buffer();
639 }
640 code = unary_op;
641 break;
642
643 default:
644 if (token[0] == '/' && *buf_ptr == '*') {
645 /* it is start of comment */
646 *e_token++ = '*';
647
648 if (++buf_ptr >= buf_end)
649 fill_buffer();
650
651 code = comment;
652 unary_delim = state->last_u_d;
653 break;
654 }
655 while (*(e_token - 1) == *buf_ptr || *buf_ptr == '=') {
656 /*
657 * handle ||, &&, etc, and also things as in int *****i
658 */
660 *e_token++ = *buf_ptr;
661 if (++buf_ptr >= buf_end)
662 fill_buffer();
663 }
664 code = (state->last_u_d ? unary_op : binary_op);
665 unary_delim = true;
666
667
668 } /* end of switch */
669 if (buf_ptr >= buf_end) /* check for input buffer empty */
670 fill_buffer();
671 state->last_u_d = unary_delim;
673 *e_token = '\0'; /* null terminate the token */
674 return (code);
675}
memcpy(sums, checksumBaseOffsets, sizeof(checksumBaseOffsets))
void diag2(int, const char *)
Definition io.c:590
void fill_buffer(void)
Definition io.c:346
#define comma
#define form_feed
#define lparen
#define swstmt
#define sp_nparen
#define rbrace
#define colon
#define postop
#define period
#define comment
#define question
#define lbrace
#define semicolon
#define ident
#define type_def
#define preesc
#define structure
#define funcname
#define binary_op
#define casestmt
#define storage
#define newline
#define sp_paren
#define decl
#define unary_op
#define strpfx
#define rparen
int auto_typedefs
#define token
int had_eof
#define CHECK_SIZE_TOKEN(desired_size)
char * buf_ptr
char * e_token
int line_no
char * s_token
#define BACKSLASH
char chartype[128]
Definition lexi.c:119
static int strcmp_type(const void *e1, const void *e2)
Definition lexi.c:142
static int is_func_definition(char *tp)
Definition lexi.c:160
#define alphanum
Definition lexi.c:55
struct templ specials[]
Definition lexi.c:69
const void size_t len
Definition lexi.c:60
int rwcode
Definition lexi.c:62

References alphanum, auto_typedefs, BACKSLASH, binary_op, buf_end, buf_ptr, casestmt, chartype, CHECK_SIZE_TOKEN, colon, comma, comment, decl, diag2(), e_token, fb(), fill_buffer(), form_feed, funcname, had_eof, ident, is_func_definition(), lbrace, len, line_no, lparen, memcpy(), newline, period, postop, preesc, question, rbrace, rparen, templ::rwcode, s_token, semicolon, sp_nparen, sp_paren, specials, storage, strcmp_type(), strpfx, structure, swstmt, token, type_def, typename_top, typenames, and unary_op.

Referenced by main().

◆ strcmp_type()

static int strcmp_type ( const void e1,
const void e2 
)
static

Definition at line 142 of file lexi.c.

143{
144 return (strcmp(e1, *(const char * const *)e2));
145}

References fb().

Referenced by lexi().

Variable Documentation

◆ chartype

char chartype[128]
Initial value:
=
{
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 3, 0, 0, 1, 3, 3, 0,
0, 0, 3, 3, 0, 3, 0, 3,
1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 0, 0, 3, 3, 3, 3,
0, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 0, 0, 0, 3, 1,
0, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 0, 3, 0, 3, 0
}

Definition at line 119 of file lexi.c.

120{ /* this is used to facilitate the decision of
121 * what type (alphanumeric, operator) each
122 * character is */
123 0, 0, 0, 0, 0, 0, 0, 0,
124 0, 0, 0, 0, 0, 0, 0, 0,
125 0, 0, 0, 0, 0, 0, 0, 0,
126 0, 0, 0, 0, 0, 0, 0, 0,
127 0, 3, 0, 0, 1, 3, 3, 0,
128 0, 0, 3, 3, 0, 3, 0, 3,
129 1, 1, 1, 1, 1, 1, 1, 1,
130 1, 1, 0, 0, 3, 3, 3, 3,
131 0, 1, 1, 1, 1, 1, 1, 1,
132 1, 1, 1, 1, 1, 1, 1, 1,
133 1, 1, 1, 1, 1, 1, 1, 1,
134 1, 1, 1, 0, 0, 0, 3, 1,
135 0, 1, 1, 1, 1, 1, 1, 1,
136 1, 1, 1, 1, 1, 1, 1, 1,
137 1, 1, 1, 1, 1, 1, 1, 1,
138 1, 1, 1, 0, 3, 0, 3, 0
139};

Referenced by lexi().

◆ specials

struct templ specials[]

Definition at line 69 of file lexi.c.

70{
71 {"_Bool", 4},
72 {"_Complex", 4},
73 {"_Imaginary", 4},
74 {"auto", 10},
75 {"bool", 4},
76 {"break", 9},
77 {"case", 8},
78 {"char", 4},
79 {"complex", 4},
80 {"const", 4},
81 {"continue", 12},
82 {"default", 8},
83 {"do", 6},
84 {"double", 4},
85 {"else", 6},
86 {"enum", 3},
87 {"extern", 10},
88 {"float", 4},
89 {"for", 5},
90 {"global", 4},
91 {"goto", 9},
92 {"if", 5},
93 {"imaginary", 4},
94 {"inline", 12},
95 {"int", 4},
96 {"long", 4},
97 {"offsetof", 1},
98 {"register", 10},
99 {"restrict", 12},
100 {"return", 9},
101 {"short", 4},
102 {"signed", 4},
103 {"sizeof", 2},
104 {"static", 10},
105 {"struct", 3},
106 {"switch", 7},
107 {"typedef", 11},
108 {"union", 3},
109 {"unsigned", 4},
110 {"void", 4},
111 {"volatile", 4},
112 {"while", 5}
113};

Referenced by lexi().

◆ typename_count

int typename_count

Definition at line 116 of file lexi.c.

Referenced by add_typename(), and alloc_typenames().

◆ typename_top

int typename_top = -1

Definition at line 117 of file lexi.c.

Referenced by add_typename(), and lexi().

◆ typenames