PostgreSQL Source Code  git master
lexi.c File Reference
#include "c.h"
#include <err.h>
#include <stdio.h>
#include <ctype.h>
#include <stdlib.h>
#include <string.h>
#include "indent_globs.h"
#include "indent_codes.h"
#include "indent.h"
Include dependency graph for lexi.c:

Go to the source code of this file.

Data Structures

struct  templ
 

Macros

#define alphanum   1
 

Functions

static int strcmp_type (const void *e1, const void *e2)
 
static int is_func_definition (char *tp)
 
int lexi (struct parser_state *state)
 
void alloc_typenames (void)
 
void add_typename (const char *key)
 

Variables

struct templ specials []
 
const char ** typenames
 
int typename_count
 
int typename_top = -1
 
char chartype [128]
 

Macro Definition Documentation

◆ alphanum

#define alphanum   1

Definition at line 55 of file lexi.c.

Function Documentation

◆ add_typename()

void add_typename ( const char *  key)

Definition at line 687 of file lexi.c.

688 {
689  int comparison;
690  const char *copy;
691 
692  if (typename_top + 1 >= typename_count) {
693  typenames = realloc((void *)typenames,
694  sizeof(typenames[0]) * (typename_count *= 2));
695  if (typenames == NULL)
696  err(1, NULL);
697  }
698  if (typename_top == -1)
699  typenames[++typename_top] = copy = strdup(key);
700  else if ((comparison = strcmp(key, typenames[typename_top])) >= 0) {
701  /* take advantage of sorted input */
702  if (comparison == 0) /* remove duplicates */
703  return;
704  typenames[++typename_top] = copy = strdup(key);
705  }
706  else {
707  int p;
708 
709  for (p = 0; (comparison = strcmp(key, typenames[p])) > 0; p++)
710  /* find place for the new key */;
711  if (comparison == 0) /* remove duplicates */
712  return;
713  memmove(&typenames[p + 1], &typenames[p],
714  sizeof(typenames[0]) * (++typename_top - p));
715  typenames[p] = copy = strdup(key);
716  }
717 
718  if (copy == NULL)
719  err(1, NULL);
720 }
void err(int eval, const char *fmt,...)
Definition: err.c:43
#define realloc(a, b)
Definition: header.h:60
int typename_count
Definition: lexi.c:116
int typename_top
Definition: lexi.c:117
const char ** typenames
Definition: lexi.c:115

References err(), sort-test::key, realloc, typename_count, typename_top, and typenames.

Referenced by add_typedefs_from_file(), and set_option().

◆ alloc_typenames()

void alloc_typenames ( void  )

Definition at line 677 of file lexi.c.

678 {
679 
680  typenames = (const char **)malloc(sizeof(typenames[0]) *
681  (typename_count = 16));
682  if (typenames == NULL)
683  err(1, NULL);
684 }
#define malloc(a)
Definition: header.h:50

References err(), malloc, typename_count, and typenames.

Referenced by main().

◆ is_func_definition()

static int is_func_definition ( char *  tp)
static

Definition at line 160 of file lexi.c.

161 {
162  int paren_depth = 0;
163  int in_comment = false;
164  int in_slash_comment = false;
165  int lastc = 0;
166 
167  /* We may need to look past the end of the current buffer. */
168  lookahead_reset();
169  for (;;) {
170  int c;
171 
172  /* Fetch next character. */
173  if (tp < buf_end)
174  c = *tp++;
175  else {
176  c = lookahead();
177  if (c == EOF)
178  break;
179  }
180  /* Handle comments. */
181  if (in_comment) {
182  if (lastc == '*' && c == '/')
183  in_comment = false;
184  } else if (lastc == '/' && c == '*' && !in_slash_comment)
185  in_comment = true;
186  else if (in_slash_comment) {
187  if (c == '\n')
188  in_slash_comment = false;
189  } else if (lastc == '/' && c == '/')
190  in_slash_comment = true;
191  /* Count nested parens properly. */
192  else if (c == '(')
193  paren_depth++;
194  else if (c == ')') {
195  paren_depth--;
196  /*
197  * If we find unbalanced parens, we must have started inside a
198  * declaration.
199  */
200  if (paren_depth < 0)
201  return false;
202  } else if (paren_depth == 0) {
203  /* We are outside any parentheses or comments. */
204  if (c == '{')
205  return true;
206  else if (c == ';' || c == ',')
207  return false;
208  }
209  lastc = c;
210  }
211  /* Hit EOF --- for lack of anything better, assume "not a definition". */
212  return false;
213 }
int lookahead(void)
Definition: io.c:275
void lookahead_reset(void)
Definition: io.c:320
char * buf_end
char * c

References buf_end, lookahead(), and lookahead_reset().

Referenced by lexi().

◆ lexi()

int lexi ( struct parser_state state)

Definition at line 216 of file lexi.c.

217 {
218  int unary_delim; /* this is set to 1 if the current token
219  * forces a following operator to be unary */
220  int code; /* internal code to be returned */
221  char qchar; /* the delimiter character for a string */
222 
223  e_token = s_token; /* point to start of place to save token */
224  unary_delim = false;
225  state->col_1 = state->last_nl; /* tell world that this token started
226  * in column 1 iff the last thing
227  * scanned was a newline */
228  state->last_nl = false;
229 
230  while (*buf_ptr == ' ' || *buf_ptr == '\t') { /* get rid of blanks */
231  state->col_1 = false; /* leading blanks imply token is not in column
232  * 1 */
233  if (++buf_ptr >= buf_end)
234  fill_buffer();
235  }
236 
237  /* Scan an alphanumeric token */
238  if (chartype[*buf_ptr & 127] == alphanum ||
239  (buf_ptr[0] == '.' && isdigit((unsigned char)buf_ptr[1]))) {
240  /*
241  * we have a character or number
242  */
243  struct templ *p;
244 
245  if (isdigit((unsigned char)*buf_ptr) ||
246  (buf_ptr[0] == '.' && isdigit((unsigned char)buf_ptr[1]))) {
247  int seendot = 0,
248  seenexp = 0,
249  seensfx = 0;
250 
251  /*
252  * base 2, base 8, base 16:
253  */
254  if (buf_ptr[0] == '0' && buf_ptr[1] != '.') {
255  int len;
256 
257  if (buf_ptr[1] == 'b' || buf_ptr[1] == 'B')
258  len = strspn(buf_ptr + 2, "01") + 2;
259  else if (buf_ptr[1] == 'x' || buf_ptr[1] == 'X')
260  len = strspn(buf_ptr + 2, "0123456789ABCDEFabcdef") + 2;
261  else
262  len = strspn(buf_ptr + 1, "012345678") + 1;
263  if (len > 0) {
265  memcpy(e_token, buf_ptr, len);
266  e_token += len;
267  buf_ptr += len;
268  }
269  else
270  diag2(1, "Unterminated literal");
271  }
272  else /* base 10: */
273  while (1) {
274  if (*buf_ptr == '.') {
275  if (seendot)
276  break;
277  else
278  seendot++;
279  }
280  CHECK_SIZE_TOKEN(3);
281  *e_token++ = *buf_ptr++;
282  if (!isdigit((unsigned char)*buf_ptr) && *buf_ptr != '.') {
283  if ((*buf_ptr != 'E' && *buf_ptr != 'e') || seenexp)
284  break;
285  else {
286  seenexp++;
287  seendot++;
288  *e_token++ = *buf_ptr++;
289  if (*buf_ptr == '+' || *buf_ptr == '-')
290  *e_token++ = *buf_ptr++;
291  }
292  }
293  }
294 
295  while (1) {
296  CHECK_SIZE_TOKEN(2);
297  if (!(seensfx & 1) && (*buf_ptr == 'U' || *buf_ptr == 'u')) {
298  *e_token++ = *buf_ptr++;
299  seensfx |= 1;
300  continue;
301  }
302  if (!(seensfx & 2) && (strchr("fFlL", *buf_ptr) != NULL)) {
303  if (buf_ptr[1] == buf_ptr[0])
304  *e_token++ = *buf_ptr++;
305  *e_token++ = *buf_ptr++;
306  seensfx |= 2;
307  continue;
308  }
309  break;
310  }
311  }
312  else
313  while (chartype[*buf_ptr & 127] == alphanum || *buf_ptr == BACKSLASH) {
314  /* fill_buffer() terminates buffer with newline */
315  if (*buf_ptr == BACKSLASH) {
316  if (*(buf_ptr + 1) == '\n') {
317  buf_ptr += 2;
318  if (buf_ptr >= buf_end)
319  fill_buffer();
320  } else
321  break;
322  }
323  CHECK_SIZE_TOKEN(1);
324  /* copy it over */
325  *e_token++ = *buf_ptr++;
326  if (buf_ptr >= buf_end)
327  fill_buffer();
328  }
329  *e_token = '\0';
330 
331  if (s_token[0] == 'L' && s_token[1] == '\0' &&
332  (*buf_ptr == '"' || *buf_ptr == '\''))
333  return (strpfx);
334 
335  while (*buf_ptr == ' ' || *buf_ptr == '\t') { /* get rid of blanks */
336  if (++buf_ptr >= buf_end)
337  fill_buffer();
338  }
339  state->keyword = 0;
340  if (state->last_token == structure && !state->p_l_follow) {
341  /* if last token was 'struct' and we're not
342  * in parentheses, then this token
343  * should be treated as a declaration */
344  state->last_u_d = true;
345  return (decl);
346  }
347  /*
348  * Operator after identifier is binary unless last token was 'struct'
349  */
350  state->last_u_d = (state->last_token == structure);
351 
352  p = bsearch(s_token,
353  specials,
354  sizeof(specials) / sizeof(specials[0]),
355  sizeof(specials[0]),
356  strcmp_type);
357  if (p == NULL) { /* not a special keyword... */
358  char *u;
359 
360  /* ... so maybe a type_t or a typedef */
361  if ((auto_typedefs && ((u = strrchr(s_token, '_')) != NULL) &&
362  strcmp(u, "_t") == 0) || (typename_top >= 0 &&
363  bsearch(s_token, typenames, typename_top + 1,
364  sizeof(typenames[0]), strcmp_type))) {
365  state->keyword = 4; /* a type name */
366  state->last_u_d = true;
367  goto found_typename;
368  }
369  } else { /* we have a keyword */
370  state->keyword = p->rwcode;
371  state->last_u_d = true;
372  switch (p->rwcode) {
373  case 7: /* it is a switch */
374  return (swstmt);
375  case 8: /* a case or default */
376  return (casestmt);
377 
378  case 3: /* a "struct" */
379  /* FALLTHROUGH */
380  case 4: /* one of the declaration keywords */
381  found_typename:
382  if (state->p_l_follow) {
383  /* inside parens: cast, param list, offsetof or sizeof */
384  state->cast_mask |= (1 << state->p_l_follow) & ~state->not_cast_mask;
385  }
386  if (state->last_token == period || state->last_token == unary_op) {
387  state->keyword = 0;
388  break;
389  }
390  if (p != NULL && p->rwcode == 3)
391  return (structure);
392  if (state->p_l_follow)
393  break;
394  return (decl);
395 
396  case 5: /* if, while, for */
397  return (sp_paren);
398 
399  case 6: /* do, else */
400  return (sp_nparen);
401 
402  case 10: /* storage class specifier */
403  return (storage);
404 
405  case 11: /* typedef */
406  return (type_def);
407 
408  default: /* all others are treated like any other
409  * identifier */
410  return (ident);
411  } /* end of switch */
412  } /* end of if (found_it) */
413  if (*buf_ptr == '(' && state->tos <= 1 && state->ind_level == 0 &&
414  state->in_parameter_declaration == 0 && state->block_init == 0) {
416  strncpy(state->procname, token, sizeof state->procname - 1);
417  if (state->in_decl)
418  state->in_parameter_declaration = 1;
419  return (funcname);
420  }
421  }
422  /*
423  * The following hack attempts to guess whether or not the current
424  * token is in fact a declaration keyword -- one that has been
425  * typedefd
426  */
427  else if (!state->p_l_follow && !state->block_init &&
428  !state->in_stmt &&
429  ((*buf_ptr == '*' && buf_ptr[1] != '=') ||
430  isalpha((unsigned char)*buf_ptr)) &&
431  (state->last_token == semicolon || state->last_token == lbrace ||
432  state->last_token == rbrace)) {
433  state->keyword = 4; /* a type name */
434  state->last_u_d = true;
435  return decl;
436  }
437  if (state->last_token == decl) /* if this is a declared variable,
438  * then following sign is unary */
439  state->last_u_d = true; /* will make "int a -1" work */
440  return (ident); /* the ident is not in the list */
441  } /* end of processing for alphanum character */
442 
443  /* Scan a non-alphanumeric token */
444 
445  CHECK_SIZE_TOKEN(3); /* things like "<<=" */
446  *e_token++ = *buf_ptr; /* if it is only a one-character token, it is
447  * moved here */
448  *e_token = '\0';
449  if (++buf_ptr >= buf_end)
450  fill_buffer();
451 
452  switch (*token) {
453  case '\n':
454  unary_delim = state->last_u_d;
455  state->last_nl = true; /* remember that we just had a newline */
456  code = (had_eof ? 0 : newline);
457 
458  /*
459  * if data has been exhausted, the newline is a dummy, and we should
460  * return code to stop
461  */
462  break;
463 
464  case '\'': /* start of quoted character */
465  case '"': /* start of string */
466  qchar = *token;
467  do { /* copy the string */
468  while (1) { /* move one character or [/<char>]<char> */
469  if (*buf_ptr == '\n') {
470  diag2(1, "Unterminated literal");
471  goto stop_lit;
472  }
473  CHECK_SIZE_TOKEN(2);
474  *e_token = *buf_ptr++;
475  if (buf_ptr >= buf_end)
476  fill_buffer();
477  if (*e_token == BACKSLASH) { /* if escape, copy extra char */
478  if (*buf_ptr == '\n') /* check for escaped newline */
479  ++line_no;
480  *++e_token = *buf_ptr++;
481  ++e_token; /* we must increment this again because we
482  * copied two chars */
483  if (buf_ptr >= buf_end)
484  fill_buffer();
485  }
486  else
487  break; /* we copied one character */
488  } /* end of while (1) */
489  } while (*e_token++ != qchar);
490 stop_lit:
491  code = ident;
492  break;
493 
494  case ('('):
495  case ('['):
496  unary_delim = true;
497  code = lparen;
498  break;
499 
500  case (')'):
501  case (']'):
502  code = rparen;
503  break;
504 
505  case '#':
506  unary_delim = state->last_u_d;
507  code = preesc;
508  break;
509 
510  case '?':
511  unary_delim = true;
512  code = question;
513  break;
514 
515  case (':'):
516  code = colon;
517  unary_delim = true;
518  break;
519 
520  case (';'):
521  unary_delim = true;
522  code = semicolon;
523  break;
524 
525  case ('{'):
526  unary_delim = true;
527 
528  /*
529  * if (state->in_or_st) state->block_init = 1;
530  */
531  /* ? code = state->block_init ? lparen : lbrace; */
532  code = lbrace;
533  break;
534 
535  case ('}'):
536  unary_delim = true;
537  /* ? code = state->block_init ? rparen : rbrace; */
538  code = rbrace;
539  break;
540 
541  case 014: /* a form feed */
542  unary_delim = state->last_u_d;
543  state->last_nl = true; /* remember this so we can set 'state->col_1'
544  * right */
545  code = form_feed;
546  break;
547 
548  case (','):
549  unary_delim = true;
550  code = comma;
551  break;
552 
553  case '.':
554  unary_delim = false;
555  code = period;
556  break;
557 
558  case '-':
559  case '+': /* check for -, +, --, ++ */
560  code = (state->last_u_d ? unary_op : binary_op);
561  unary_delim = true;
562 
563  if (*buf_ptr == token[0]) {
564  /* check for doubled character */
565  *e_token++ = *buf_ptr++;
566  /* buffer overflow will be checked at end of loop */
567  if (state->last_token == ident || state->last_token == rparen) {
568  code = (state->last_u_d ? unary_op : postop);
569  /* check for following ++ or -- */
570  unary_delim = false;
571  }
572  }
573  else if (*buf_ptr == '=')
574  /* check for operator += */
575  *e_token++ = *buf_ptr++;
576  else if (*buf_ptr == '>') {
577  /* check for operator -> */
578  *e_token++ = *buf_ptr++;
579  unary_delim = false;
580  code = unary_op;
581  state->want_blank = false;
582  }
583  break; /* buffer overflow will be checked at end of
584  * switch */
585 
586  case '=':
587  if (state->in_or_st)
588  state->block_init = 1;
589 #ifdef undef
590  if (chartype[*buf_ptr & 127] == opchar) { /* we have two char assignment */
591  e_token[-1] = *buf_ptr++;
592  if ((e_token[-1] == '<' || e_token[-1] == '>') && e_token[-1] == *buf_ptr)
593  *e_token++ = *buf_ptr++;
594  *e_token++ = '='; /* Flip =+ to += */
595  *e_token = 0;
596  }
597 #else
598  if (*buf_ptr == '=') {/* == */
599  *e_token++ = '='; /* Flip =+ to += */
600  buf_ptr++;
601  *e_token = 0;
602  }
603 #endif
604  code = binary_op;
605  unary_delim = true;
606  break;
607  /* can drop thru!!! */
608 
609  case '>':
610  case '<':
611  case '!': /* ops like <, <<, <=, !=, etc */
612  if (*buf_ptr == '>' || *buf_ptr == '<' || *buf_ptr == '=') {
613  *e_token++ = *buf_ptr;
614  if (++buf_ptr >= buf_end)
615  fill_buffer();
616  }
617  if (*buf_ptr == '=')
618  *e_token++ = *buf_ptr++;
619  code = (state->last_u_d ? unary_op : binary_op);
620  unary_delim = true;
621  break;
622 
623  case '*':
624  unary_delim = true;
625  if (!state->last_u_d) {
626  if (*buf_ptr == '=')
627  *e_token++ = *buf_ptr++;
628  code = binary_op;
629  break;
630  }
631  while (*buf_ptr == '*' || isspace((unsigned char)*buf_ptr)) {
632  if (*buf_ptr == '*') {
633  CHECK_SIZE_TOKEN(1);
634  *e_token++ = *buf_ptr;
635  }
636  if (++buf_ptr >= buf_end)
637  fill_buffer();
638  }
639  code = unary_op;
640  break;
641 
642  default:
643  if (token[0] == '/' && *buf_ptr == '*') {
644  /* it is start of comment */
645  *e_token++ = '*';
646 
647  if (++buf_ptr >= buf_end)
648  fill_buffer();
649 
650  code = comment;
651  unary_delim = state->last_u_d;
652  break;
653  }
654  while (*(e_token - 1) == *buf_ptr || *buf_ptr == '=') {
655  /*
656  * handle ||, &&, etc, and also things as in int *****i
657  */
658  CHECK_SIZE_TOKEN(1);
659  *e_token++ = *buf_ptr;
660  if (++buf_ptr >= buf_end)
661  fill_buffer();
662  }
663  code = (state->last_u_d ? unary_op : binary_op);
664  unary_delim = true;
665 
666 
667  } /* end of switch */
668  if (buf_ptr >= buf_end) /* check for input buffer empty */
669  fill_buffer();
670  state->last_u_d = unary_delim;
671  CHECK_SIZE_TOKEN(1);
672  *e_token = '\0'; /* null terminate the token */
673  return (code);
674 }
void diag2(int, const char *)
Definition: io.c:590
void fill_buffer(void)
Definition: io.c:346
#define comma
Definition: indent_codes.h:48
#define form_feed
Definition: indent_codes.h:52
#define lparen
Definition: indent_codes.h:36
#define swstmt
Definition: indent_codes.h:50
#define sp_nparen
Definition: indent_codes.h:55
#define rbrace
Definition: indent_codes.h:46
#define colon
Definition: indent_codes.h:43
#define postop
Definition: indent_codes.h:40
#define period
Definition: indent_codes.h:66
#define comment
Definition: indent_codes.h:49
#define question
Definition: indent_codes.h:41
#define lbrace
Definition: indent_codes.h:45
#define semicolon
Definition: indent_codes.h:44
#define ident
Definition: indent_codes.h:47
#define type_def
Definition: indent_codes.h:70
#define preesc
Definition: indent_codes.h:51
#define structure
Definition: indent_codes.h:71
#define funcname
Definition: indent_codes.h:69
#define binary_op
Definition: indent_codes.h:39
#define casestmt
Definition: indent_codes.h:42
#define storage
Definition: indent_codes.h:68
#define newline
Definition: indent_codes.h:35
#define sp_paren
Definition: indent_codes.h:54
#define decl
Definition: indent_codes.h:53
#define unary_op
Definition: indent_codes.h:38
#define strpfx
Definition: indent_codes.h:67
#define rparen
Definition: indent_codes.h:37
int auto_typedefs
#define token
Definition: indent_globs.h:126
int had_eof
#define CHECK_SIZE_TOKEN(desired_size)
Definition: indent_globs.h:99
char * buf_ptr
char * e_token
int line_no
char * s_token
#define BACKSLASH
Definition: indent_globs.h:35
char chartype[128]
Definition: lexi.c:119
static int strcmp_type(const void *e1, const void *e2)
Definition: lexi.c:142
static int is_func_definition(char *tp)
Definition: lexi.c:160
#define alphanum
Definition: lexi.c:55
struct templ specials[]
Definition: lexi.c:69
const void size_t len
Definition: regguts.h:323
Definition: lexi.c:60
int rwcode
Definition: lexi.c:62

References alphanum, auto_typedefs, BACKSLASH, binary_op, buf_end, buf_ptr, casestmt, chartype, CHECK_SIZE_TOKEN, colon, comma, comment, decl, diag2(), e_token, fill_buffer(), form_feed, funcname, had_eof, ident, is_func_definition(), lbrace, len, line_no, lparen, newline, period, postop, preesc, question, rbrace, rparen, templ::rwcode, s_token, semicolon, sp_nparen, sp_paren, specials, storage, strcmp_type(), strpfx, structure, swstmt, token, type_def, typename_top, typenames, and unary_op.

Referenced by main().

◆ strcmp_type()

static int strcmp_type ( const void *  e1,
const void *  e2 
)
static

Definition at line 142 of file lexi.c.

143 {
144  return (strcmp(e1, *(const char * const *)e2));
145 }

Referenced by lexi().

Variable Documentation

◆ chartype

char chartype[128]
Initial value:
=
{
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 3, 0, 0, 1, 3, 3, 0,
0, 0, 3, 3, 0, 3, 0, 3,
1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 0, 0, 3, 3, 3, 3,
0, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 0, 0, 0, 3, 1,
0, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 0, 3, 0, 3, 0
}

Definition at line 119 of file lexi.c.

Referenced by lexi().

◆ specials

struct templ specials[]

Definition at line 1 of file lexi.c.

Referenced by lexi().

◆ typename_count

int typename_count

Definition at line 116 of file lexi.c.

Referenced by add_typename(), and alloc_typenames().

◆ typename_top

int typename_top = -1

Definition at line 117 of file lexi.c.

Referenced by add_typename(), and lexi().

◆ typenames