PostgreSQL Source Code git master
All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
lexi.c File Reference
#include "c.h"
#include <err.h>
#include <stdio.h>
#include <ctype.h>
#include <stdlib.h>
#include <string.h>
#include "indent_globs.h"
#include "indent_codes.h"
#include "indent.h"
Include dependency graph for lexi.c:

Go to the source code of this file.

Data Structures

struct  templ
 

Macros

#define alphanum   1
 

Functions

static int strcmp_type (const void *e1, const void *e2)
 
static int is_func_definition (char *tp)
 
int lexi (struct parser_state *state)
 
void alloc_typenames (void)
 
void add_typename (const char *key)
 

Variables

struct templ specials []
 
const char ** typenames
 
int typename_count
 
int typename_top = -1
 
char chartype [128]
 

Macro Definition Documentation

◆ alphanum

#define alphanum   1

Definition at line 55 of file lexi.c.

Function Documentation

◆ add_typename()

void add_typename ( const char *  key)

Definition at line 687 of file lexi.c.

688{
689 int comparison;
690 const char *copy;
691
692 if (typename_top + 1 >= typename_count) {
693 typenames = realloc((void *)typenames,
694 sizeof(typenames[0]) * (typename_count *= 2));
695 if (typenames == NULL)
696 err(1, NULL);
697 }
698 if (typename_top == -1)
699 typenames[++typename_top] = copy = strdup(key);
700 else if ((comparison = strcmp(key, typenames[typename_top])) >= 0) {
701 /* take advantage of sorted input */
702 if (comparison == 0) /* remove duplicates */
703 return;
704 typenames[++typename_top] = copy = strdup(key);
705 }
706 else {
707 int p;
708
709 for (p = 0; (comparison = strcmp(key, typenames[p])) > 0; p++)
710 /* find place for the new key */;
711 if (comparison == 0) /* remove duplicates */
712 return;
713 memmove(&typenames[p + 1], &typenames[p],
714 sizeof(typenames[0]) * (++typename_top - p));
715 typenames[p] = copy = strdup(key);
716 }
717
718 if (copy == NULL)
719 err(1, NULL);
720}
void err(int eval, const char *fmt,...)
Definition: err.c:43
#define realloc(a, b)
Definition: header.h:60
int typename_count
Definition: lexi.c:116
int typename_top
Definition: lexi.c:117
const char ** typenames
Definition: lexi.c:115

References err(), sort-test::key, realloc, typename_count, typename_top, and typenames.

Referenced by add_typedefs_from_file(), and set_option().

◆ alloc_typenames()

void alloc_typenames ( void  )

Definition at line 677 of file lexi.c.

678{
679
680 typenames = (const char **)malloc(sizeof(typenames[0]) *
681 (typename_count = 16));
682 if (typenames == NULL)
683 err(1, NULL);
684}
#define malloc(a)
Definition: header.h:50

References err(), malloc, typename_count, and typenames.

Referenced by main().

◆ is_func_definition()

static int is_func_definition ( char *  tp)
static

Definition at line 160 of file lexi.c.

161{
162 int paren_depth = 0;
163 int in_comment = false;
164 int in_slash_comment = false;
165 int lastc = 0;
166
167 /* We may need to look past the end of the current buffer. */
169 for (;;) {
170 int c;
171
172 /* Fetch next character. */
173 if (tp < buf_end)
174 c = *tp++;
175 else {
176 c = lookahead();
177 if (c == EOF)
178 break;
179 }
180 /* Handle comments. */
181 if (in_comment) {
182 if (lastc == '*' && c == '/')
183 in_comment = false;
184 } else if (lastc == '/' && c == '*' && !in_slash_comment)
185 in_comment = true;
186 else if (in_slash_comment) {
187 if (c == '\n')
188 in_slash_comment = false;
189 } else if (lastc == '/' && c == '/')
190 in_slash_comment = true;
191 /* Count nested parens properly. */
192 else if (c == '(')
193 paren_depth++;
194 else if (c == ')') {
195 paren_depth--;
196 /*
197 * If we find unbalanced parens, we must have started inside a
198 * declaration.
199 */
200 if (paren_depth < 0)
201 return false;
202 } else if (paren_depth == 0) {
203 /* We are outside any parentheses or comments. */
204 if (c == '{')
205 return true;
206 else if (c == ';' || c == ',')
207 return false;
208 }
209 lastc = c;
210 }
211 /* Hit EOF --- for lack of anything better, assume "not a definition". */
212 return false;
213}
int lookahead(void)
Definition: io.c:275
void lookahead_reset(void)
Definition: io.c:320
char * buf_end
char * c

References buf_end, lookahead(), and lookahead_reset().

Referenced by lexi().

◆ lexi()

int lexi ( struct parser_state state)

Definition at line 216 of file lexi.c.

217{
218 int unary_delim; /* this is set to 1 if the current token
219 * forces a following operator to be unary */
220 int code; /* internal code to be returned */
221 char qchar; /* the delimiter character for a string */
222
223 e_token = s_token; /* point to start of place to save token */
224 unary_delim = false;
225 state->col_1 = state->last_nl; /* tell world that this token started
226 * in column 1 iff the last thing
227 * scanned was a newline */
228 state->last_nl = false;
229
230 while (*buf_ptr == ' ' || *buf_ptr == '\t') { /* get rid of blanks */
231 state->col_1 = false; /* leading blanks imply token is not in column
232 * 1 */
233 if (++buf_ptr >= buf_end)
234 fill_buffer();
235 }
236
237 /* Scan an alphanumeric token */
238 if (chartype[*buf_ptr & 127] == alphanum ||
239 (buf_ptr[0] == '.' && isdigit((unsigned char)buf_ptr[1]))) {
240 /*
241 * we have a character or number
242 */
243 struct templ *p;
244
245 if (isdigit((unsigned char)*buf_ptr) ||
246 (buf_ptr[0] == '.' && isdigit((unsigned char)buf_ptr[1]))) {
247 int seendot = 0,
248 seenexp = 0,
249 seensfx = 0;
250
251 /*
252 * base 2, base 8, base 16:
253 */
254 if (buf_ptr[0] == '0' && buf_ptr[1] != '.') {
255 int len;
256
257 if (buf_ptr[1] == 'b' || buf_ptr[1] == 'B')
258 len = strspn(buf_ptr + 2, "01") + 2;
259 else if (buf_ptr[1] == 'x' || buf_ptr[1] == 'X')
260 len = strspn(buf_ptr + 2, "0123456789ABCDEFabcdef") + 2;
261 else
262 len = strspn(buf_ptr + 1, "012345678") + 1;
263 if (len > 0) {
265 memcpy(e_token, buf_ptr, len);
266 e_token += len;
267 buf_ptr += len;
268 }
269 else
270 diag2(1, "Unterminated literal");
271 }
272 else /* base 10: */
273 while (1) {
274 if (*buf_ptr == '.') {
275 if (seendot)
276 break;
277 else
278 seendot++;
279 }
281 *e_token++ = *buf_ptr++;
282 if (!isdigit((unsigned char)*buf_ptr) && *buf_ptr != '.') {
283 if ((*buf_ptr != 'E' && *buf_ptr != 'e') || seenexp)
284 break;
285 else {
286 seenexp++;
287 seendot++;
288 *e_token++ = *buf_ptr++;
289 if (*buf_ptr == '+' || *buf_ptr == '-')
290 *e_token++ = *buf_ptr++;
291 }
292 }
293 }
294
295 while (1) {
297 if (!(seensfx & 1) && (*buf_ptr == 'U' || *buf_ptr == 'u')) {
298 *e_token++ = *buf_ptr++;
299 seensfx |= 1;
300 continue;
301 }
302 if (!(seensfx & 2) && (strchr("fFlL", *buf_ptr) != NULL)) {
303 if (buf_ptr[1] == buf_ptr[0])
304 *e_token++ = *buf_ptr++;
305 *e_token++ = *buf_ptr++;
306 seensfx |= 2;
307 continue;
308 }
309 break;
310 }
311 }
312 else
313 while (chartype[*buf_ptr & 127] == alphanum || *buf_ptr == BACKSLASH) {
314 /* fill_buffer() terminates buffer with newline */
315 if (*buf_ptr == BACKSLASH) {
316 if (*(buf_ptr + 1) == '\n') {
317 buf_ptr += 2;
318 if (buf_ptr >= buf_end)
319 fill_buffer();
320 } else
321 break;
322 }
324 /* copy it over */
325 *e_token++ = *buf_ptr++;
326 if (buf_ptr >= buf_end)
327 fill_buffer();
328 }
329 *e_token = '\0';
330
331 if (s_token[0] == 'L' && s_token[1] == '\0' &&
332 (*buf_ptr == '"' || *buf_ptr == '\''))
333 return (strpfx);
334
335 while (*buf_ptr == ' ' || *buf_ptr == '\t') { /* get rid of blanks */
336 if (++buf_ptr >= buf_end)
337 fill_buffer();
338 }
339 state->keyword = 0;
340 if (state->last_token == structure && !state->p_l_follow) {
341 /* if last token was 'struct' and we're not
342 * in parentheses, then this token
343 * should be treated as a declaration */
344 state->last_u_d = true;
345 return (decl);
346 }
347 /*
348 * Operator after identifier is binary unless last token was 'struct'
349 */
350 state->last_u_d = (state->last_token == structure);
351
352 p = bsearch(s_token,
353 specials,
354 sizeof(specials) / sizeof(specials[0]),
355 sizeof(specials[0]),
357 if (p == NULL) { /* not a special keyword... */
358 char *u;
359
360 /* ... so maybe a type_t or a typedef */
361 if ((auto_typedefs && ((u = strrchr(s_token, '_')) != NULL) &&
362 strcmp(u, "_t") == 0) || (typename_top >= 0 &&
363 bsearch(s_token, typenames, typename_top + 1,
364 sizeof(typenames[0]), strcmp_type))) {
365 state->keyword = 4; /* a type name */
366 state->last_u_d = true;
367 goto found_typename;
368 }
369 } else { /* we have a keyword */
370 state->keyword = p->rwcode;
371 state->last_u_d = true;
372 switch (p->rwcode) {
373 case 7: /* it is a switch */
374 return (swstmt);
375 case 8: /* a case or default */
376 return (casestmt);
377
378 case 3: /* a "struct" */
379 /* FALLTHROUGH */
380 case 4: /* one of the declaration keywords */
381 found_typename:
382 if (state->p_l_follow) {
383 /* inside parens: cast, param list, offsetof or sizeof */
384 state->cast_mask |= (1 << state->p_l_follow) & ~state->not_cast_mask;
385 }
386 if (state->last_token == period || state->last_token == unary_op) {
387 state->keyword = 0;
388 break;
389 }
390 if (p != NULL && p->rwcode == 3)
391 return (structure);
392 if (state->p_l_follow)
393 break;
394 return (decl);
395
396 case 5: /* if, while, for */
397 return (sp_paren);
398
399 case 6: /* do, else */
400 return (sp_nparen);
401
402 case 10: /* storage class specifier */
403 return (storage);
404
405 case 11: /* typedef */
406 return (type_def);
407
408 default: /* all others are treated like any other
409 * identifier */
410 return (ident);
411 } /* end of switch */
412 } /* end of if (found_it) */
413 if (*buf_ptr == '(' && state->tos <= 1 && state->ind_level == 0 &&
414 state->in_parameter_declaration == 0 && state->block_init == 0) {
416 strncpy(state->procname, token, sizeof state->procname - 1);
417 if (state->in_decl)
418 state->in_parameter_declaration = 1;
419 return (funcname);
420 }
421 }
422 /*
423 * The following hack attempts to guess whether or not the current
424 * token is in fact a declaration keyword -- one that has been
425 * typedefd
426 */
427 else if (!state->p_l_follow && !state->block_init &&
428 !state->in_stmt &&
429 ((*buf_ptr == '*' && buf_ptr[1] != '=') ||
430 isalpha((unsigned char)*buf_ptr)) &&
431 (state->last_token == semicolon || state->last_token == lbrace ||
432 state->last_token == rbrace)) {
433 state->keyword = 4; /* a type name */
434 state->last_u_d = true;
435 return decl;
436 }
437 if (state->last_token == decl) /* if this is a declared variable,
438 * then following sign is unary */
439 state->last_u_d = true; /* will make "int a -1" work */
440 return (ident); /* the ident is not in the list */
441 } /* end of processing for alphanum character */
442
443 /* Scan a non-alphanumeric token */
444
445 CHECK_SIZE_TOKEN(3); /* things like "<<=" */
446 *e_token++ = *buf_ptr; /* if it is only a one-character token, it is
447 * moved here */
448 *e_token = '\0';
449 if (++buf_ptr >= buf_end)
450 fill_buffer();
451
452 switch (*token) {
453 case '\n':
454 unary_delim = state->last_u_d;
455 state->last_nl = true; /* remember that we just had a newline */
456 code = (had_eof ? 0 : newline);
457
458 /*
459 * if data has been exhausted, the newline is a dummy, and we should
460 * return code to stop
461 */
462 break;
463
464 case '\'': /* start of quoted character */
465 case '"': /* start of string */
466 qchar = *token;
467 do { /* copy the string */
468 while (1) { /* move one character or [/<char>]<char> */
469 if (*buf_ptr == '\n') {
470 diag2(1, "Unterminated literal");
471 goto stop_lit;
472 }
474 *e_token = *buf_ptr++;
475 if (buf_ptr >= buf_end)
476 fill_buffer();
477 if (*e_token == BACKSLASH) { /* if escape, copy extra char */
478 if (*buf_ptr == '\n') /* check for escaped newline */
479 ++line_no;
480 *++e_token = *buf_ptr++;
481 ++e_token; /* we must increment this again because we
482 * copied two chars */
483 if (buf_ptr >= buf_end)
484 fill_buffer();
485 }
486 else
487 break; /* we copied one character */
488 } /* end of while (1) */
489 } while (*e_token++ != qchar);
490stop_lit:
491 code = ident;
492 break;
493
494 case ('('):
495 case ('['):
496 unary_delim = true;
497 code = lparen;
498 break;
499
500 case (')'):
501 case (']'):
502 code = rparen;
503 break;
504
505 case '#':
506 unary_delim = state->last_u_d;
507 code = preesc;
508 break;
509
510 case '?':
511 unary_delim = true;
512 code = question;
513 break;
514
515 case (':'):
516 code = colon;
517 unary_delim = true;
518 break;
519
520 case (';'):
521 unary_delim = true;
522 code = semicolon;
523 break;
524
525 case ('{'):
526 unary_delim = true;
527
528 /*
529 * if (state->in_or_st) state->block_init = 1;
530 */
531 /* ? code = state->block_init ? lparen : lbrace; */
532 code = lbrace;
533 break;
534
535 case ('}'):
536 unary_delim = true;
537 /* ? code = state->block_init ? rparen : rbrace; */
538 code = rbrace;
539 break;
540
541 case 014: /* a form feed */
542 unary_delim = state->last_u_d;
543 state->last_nl = true; /* remember this so we can set 'state->col_1'
544 * right */
545 code = form_feed;
546 break;
547
548 case (','):
549 unary_delim = true;
550 code = comma;
551 break;
552
553 case '.':
554 unary_delim = false;
555 code = period;
556 break;
557
558 case '-':
559 case '+': /* check for -, +, --, ++ */
560 code = (state->last_u_d ? unary_op : binary_op);
561 unary_delim = true;
562
563 if (*buf_ptr == token[0]) {
564 /* check for doubled character */
565 *e_token++ = *buf_ptr++;
566 /* buffer overflow will be checked at end of loop */
567 if (state->last_token == ident || state->last_token == rparen) {
568 code = (state->last_u_d ? unary_op : postop);
569 /* check for following ++ or -- */
570 unary_delim = false;
571 }
572 }
573 else if (*buf_ptr == '=')
574 /* check for operator += */
575 *e_token++ = *buf_ptr++;
576 else if (*buf_ptr == '>') {
577 /* check for operator -> */
578 *e_token++ = *buf_ptr++;
579 unary_delim = false;
580 code = unary_op;
581 state->want_blank = false;
582 }
583 break; /* buffer overflow will be checked at end of
584 * switch */
585
586 case '=':
587 if (state->in_or_st)
588 state->block_init = 1;
589#ifdef undef
590 if (chartype[*buf_ptr & 127] == opchar) { /* we have two char assignment */
591 e_token[-1] = *buf_ptr++;
592 if ((e_token[-1] == '<' || e_token[-1] == '>') && e_token[-1] == *buf_ptr)
593 *e_token++ = *buf_ptr++;
594 *e_token++ = '='; /* Flip =+ to += */
595 *e_token = 0;
596 }
597#else
598 if (*buf_ptr == '=') {/* == */
599 *e_token++ = '='; /* Flip =+ to += */
600 buf_ptr++;
601 *e_token = 0;
602 }
603#endif
604 code = binary_op;
605 unary_delim = true;
606 break;
607 /* can drop thru!!! */
608
609 case '>':
610 case '<':
611 case '!': /* ops like <, <<, <=, !=, etc */
612 if (*buf_ptr == '>' || *buf_ptr == '<' || *buf_ptr == '=') {
613 *e_token++ = *buf_ptr;
614 if (++buf_ptr >= buf_end)
615 fill_buffer();
616 }
617 if (*buf_ptr == '=')
618 *e_token++ = *buf_ptr++;
619 code = (state->last_u_d ? unary_op : binary_op);
620 unary_delim = true;
621 break;
622
623 case '*':
624 unary_delim = true;
625 if (!state->last_u_d) {
626 if (*buf_ptr == '=')
627 *e_token++ = *buf_ptr++;
628 code = binary_op;
629 break;
630 }
631 while (*buf_ptr == '*' || isspace((unsigned char)*buf_ptr)) {
632 if (*buf_ptr == '*') {
634 *e_token++ = *buf_ptr;
635 }
636 if (++buf_ptr >= buf_end)
637 fill_buffer();
638 }
639 code = unary_op;
640 break;
641
642 default:
643 if (token[0] == '/' && *buf_ptr == '*') {
644 /* it is start of comment */
645 *e_token++ = '*';
646
647 if (++buf_ptr >= buf_end)
648 fill_buffer();
649
650 code = comment;
651 unary_delim = state->last_u_d;
652 break;
653 }
654 while (*(e_token - 1) == *buf_ptr || *buf_ptr == '=') {
655 /*
656 * handle ||, &&, etc, and also things as in int *****i
657 */
659 *e_token++ = *buf_ptr;
660 if (++buf_ptr >= buf_end)
661 fill_buffer();
662 }
663 code = (state->last_u_d ? unary_op : binary_op);
664 unary_delim = true;
665
666
667 } /* end of switch */
668 if (buf_ptr >= buf_end) /* check for input buffer empty */
669 fill_buffer();
670 state->last_u_d = unary_delim;
672 *e_token = '\0'; /* null terminate the token */
673 return (code);
674}
void diag2(int, const char *)
Definition: io.c:590
void fill_buffer(void)
Definition: io.c:346
#define comma
Definition: indent_codes.h:48
#define form_feed
Definition: indent_codes.h:52
#define lparen
Definition: indent_codes.h:36
#define swstmt
Definition: indent_codes.h:50
#define sp_nparen
Definition: indent_codes.h:55
#define rbrace
Definition: indent_codes.h:46
#define colon
Definition: indent_codes.h:43
#define postop
Definition: indent_codes.h:40
#define period
Definition: indent_codes.h:66
#define comment
Definition: indent_codes.h:49
#define question
Definition: indent_codes.h:41
#define lbrace
Definition: indent_codes.h:45
#define semicolon
Definition: indent_codes.h:44
#define ident
Definition: indent_codes.h:47
#define type_def
Definition: indent_codes.h:70
#define preesc
Definition: indent_codes.h:51
#define structure
Definition: indent_codes.h:71
#define funcname
Definition: indent_codes.h:69
#define binary_op
Definition: indent_codes.h:39
#define casestmt
Definition: indent_codes.h:42
#define storage
Definition: indent_codes.h:68
#define newline
Definition: indent_codes.h:35
#define sp_paren
Definition: indent_codes.h:54
#define decl
Definition: indent_codes.h:53
#define unary_op
Definition: indent_codes.h:38
#define strpfx
Definition: indent_codes.h:67
#define rparen
Definition: indent_codes.h:37
int auto_typedefs
#define token
Definition: indent_globs.h:126
int had_eof
#define CHECK_SIZE_TOKEN(desired_size)
Definition: indent_globs.h:99
char * buf_ptr
char * e_token
int line_no
char * s_token
#define BACKSLASH
Definition: indent_globs.h:35
char chartype[128]
Definition: lexi.c:119
static int strcmp_type(const void *e1, const void *e2)
Definition: lexi.c:142
static int is_func_definition(char *tp)
Definition: lexi.c:160
#define alphanum
Definition: lexi.c:55
struct templ specials[]
Definition: lexi.c:69
const void size_t len
Definition: regguts.h:323
Definition: lexi.c:60
int rwcode
Definition: lexi.c:62

References alphanum, auto_typedefs, BACKSLASH, binary_op, buf_end, buf_ptr, casestmt, chartype, CHECK_SIZE_TOKEN, colon, comma, comment, decl, diag2(), e_token, fill_buffer(), form_feed, funcname, had_eof, ident, is_func_definition(), lbrace, len, line_no, lparen, newline, period, postop, preesc, question, rbrace, rparen, templ::rwcode, s_token, semicolon, sp_nparen, sp_paren, specials, storage, strcmp_type(), strpfx, structure, swstmt, token, type_def, typename_top, typenames, and unary_op.

Referenced by main().

◆ strcmp_type()

static int strcmp_type ( const void *  e1,
const void *  e2 
)
static

Definition at line 142 of file lexi.c.

143{
144 return (strcmp(e1, *(const char * const *)e2));
145}

Referenced by lexi().

Variable Documentation

◆ chartype

char chartype[128]
Initial value:
=
{
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 3, 0, 0, 1, 3, 3, 0,
0, 0, 3, 3, 0, 3, 0, 3,
1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 0, 0, 3, 3, 3, 3,
0, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 0, 0, 0, 3, 1,
0, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 0, 3, 0, 3, 0
}

Definition at line 119 of file lexi.c.

Referenced by lexi().

◆ specials

struct templ specials[]

Definition at line 69 of file lexi.c.

Referenced by lexi().

◆ typename_count

int typename_count

Definition at line 116 of file lexi.c.

Referenced by add_typename(), and alloc_typenames().

◆ typename_top

int typename_top = -1

Definition at line 117 of file lexi.c.

Referenced by add_typename(), and lexi().

◆ typenames