PostgreSQL Source Code  git master
regc_lex.c File Reference
This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Macros

#define ATEOS()   (v->now >= v->stop)
 
#define HAVE(n)   (v->stop - v->now >= (n))
 
#define NEXT1(c)   (!ATEOS() && *v->now == CHR(c))
 
#define NEXT2(a, b)   (HAVE(2) && *v->now == CHR(a) && *(v->now+1) == CHR(b))
 
#define NEXT3(a, b, c)
 
#define SET(c)   (v->nexttype = (c))
 
#define SETV(c, n)   (v->nexttype = (c), v->nextvalue = (n))
 
#define RET(c)   return (SET(c), 1)
 
#define RETV(c, n)   return (SETV(c, n), 1)
 
#define FAILW(e)   return (ERR(e), 0) /* ERR does SET(EOS) */
 
#define LASTTYPE(t)   (v->lasttype == (t))
 
#define L_ERE   1 /* mainline ERE/ARE */
 
#define L_BRE   2 /* mainline BRE */
 
#define L_Q   3 /* REG_QUOTE */
 
#define L_EBND   4 /* ERE/ARE bound */
 
#define L_BBND   5 /* BRE bound */
 
#define L_BRACK   6 /* brackets */
 
#define L_CEL   7 /* collating element */
 
#define L_ECL   8 /* equivalence class */
 
#define L_CCL   9 /* character class */
 
#define INTOCON(c)   (v->lexcon = (c))
 
#define INCON(con)   (v->lexcon == (con))
 
#define ENDOF(array)   ((array) + sizeof(array)/sizeof(chr))
 

Functions

static void lexstart (struct vars *v)
 
static void prefixes (struct vars *v)
 
static int next (struct vars *v)
 
static int lexescape (struct vars *v)
 
static chr lexdigits (struct vars *v, int base, int minlen, int maxlen)
 
static int brenext (struct vars *v, chr c)
 
static void skip (struct vars *v)
 
static chr newline (void)
 
static chr chrnamed (struct vars *v, const chr *startp, const chr *endp, chr lastresort)
 

Macro Definition Documentation

◆ ATEOS

#define ATEOS ( )    (v->now >= v->stop)

Definition at line 36 of file regc_lex.c.

◆ ENDOF

#define ENDOF (   array)    ((array) + sizeof(array)/sizeof(chr))

Definition at line 64 of file regc_lex.c.

◆ FAILW

#define FAILW (   e)    return (ERR(e), 0) /* ERR does SET(EOS) */

Definition at line 47 of file regc_lex.c.

◆ HAVE

#define HAVE (   n)    (v->stop - v->now >= (n))

Definition at line 37 of file regc_lex.c.

◆ INCON

#define INCON (   con)    (v->lexcon == (con))

Definition at line 61 of file regc_lex.c.

◆ INTOCON

#define INTOCON (   c)    (v->lexcon = (c))

Definition at line 60 of file regc_lex.c.

◆ L_BBND

#define L_BBND   5 /* BRE bound */

Definition at line 55 of file regc_lex.c.

◆ L_BRACK

#define L_BRACK   6 /* brackets */

Definition at line 56 of file regc_lex.c.

◆ L_BRE

#define L_BRE   2 /* mainline BRE */

Definition at line 52 of file regc_lex.c.

◆ L_CCL

#define L_CCL   9 /* character class */

Definition at line 59 of file regc_lex.c.

◆ L_CEL

#define L_CEL   7 /* collating element */

Definition at line 57 of file regc_lex.c.

◆ L_EBND

#define L_EBND   4 /* ERE/ARE bound */

Definition at line 54 of file regc_lex.c.

◆ L_ECL

#define L_ECL   8 /* equivalence class */

Definition at line 58 of file regc_lex.c.

◆ L_ERE

#define L_ERE   1 /* mainline ERE/ARE */

Definition at line 51 of file regc_lex.c.

◆ L_Q

#define L_Q   3 /* REG_QUOTE */

Definition at line 53 of file regc_lex.c.

◆ LASTTYPE

#define LASTTYPE (   t)    (v->lasttype == (t))

Definition at line 48 of file regc_lex.c.

◆ NEXT1

#define NEXT1 (   c)    (!ATEOS() && *v->now == CHR(c))

Definition at line 38 of file regc_lex.c.

◆ NEXT2

#define NEXT2 (   a,
  b 
)    (HAVE(2) && *v->now == CHR(a) && *(v->now+1) == CHR(b))

Definition at line 39 of file regc_lex.c.

◆ NEXT3

#define NEXT3 (   a,
  b,
  c 
)
Value:
(HAVE(3) && *v->now == CHR(a) && \
*(v->now+1) == CHR(b) && \
*(v->now+2) == CHR(c))
int b
Definition: isn.c:70
int a
Definition: isn.c:69
char * c
#define HAVE(n)
Definition: regc_lex.c:37
#define CHR(c)
Definition: regcustom.h:61

Definition at line 40 of file regc_lex.c.

◆ RET

#define RET (   c)    return (SET(c), 1)

Definition at line 45 of file regc_lex.c.

◆ RETV

#define RETV (   c,
 
)    return (SETV(c, n), 1)

Definition at line 46 of file regc_lex.c.

◆ SET

#define SET (   c)    (v->nexttype = (c))

Definition at line 43 of file regc_lex.c.

◆ SETV

#define SETV (   c,
 
)    (v->nexttype = (c), v->nextvalue = (n))

Definition at line 44 of file regc_lex.c.

Function Documentation

◆ brenext()

static int brenext ( struct vars v,
chr  c 
)
static

Definition at line 853 of file regc_lex.c.

855 {
856  switch (c)
857  {
858  case CHR('*'):
859  if (LASTTYPE(EMPTY) || LASTTYPE('(') || LASTTYPE('^'))
860  RETV(PLAIN, c);
861  RETV('*', 1);
862  break;
863  case CHR('['):
864  if (HAVE(6) && *(v->now + 0) == CHR('[') &&
865  *(v->now + 1) == CHR(':') &&
866  (*(v->now + 2) == CHR('<') ||
867  *(v->now + 2) == CHR('>')) &&
868  *(v->now + 3) == CHR(':') &&
869  *(v->now + 4) == CHR(']') &&
870  *(v->now + 5) == CHR(']'))
871  {
872  c = *(v->now + 2);
873  v->now += 6;
875  RET((c == CHR('<')) ? '<' : '>');
876  }
877  INTOCON(L_BRACK);
878  if (NEXT1('^'))
879  {
880  v->now++;
881  RETV('[', 0);
882  }
883  RETV('[', 1);
884  break;
885  case CHR('.'):
886  RET('.');
887  break;
888  case CHR('^'):
889  if (LASTTYPE(EMPTY))
890  RET('^');
891  if (LASTTYPE('('))
892  {
893  NOTE(REG_UUNSPEC);
894  RET('^');
895  }
896  RETV(PLAIN, c);
897  break;
898  case CHR('$'):
899  if (v->cflags & REG_EXPANDED)
900  skip(v);
901  if (ATEOS())
902  RET('$');
903  if (NEXT2('\\', ')'))
904  {
905  NOTE(REG_UUNSPEC);
906  RET('$');
907  }
908  RETV(PLAIN, c);
909  break;
910  case CHR('\\'):
911  break; /* see below */
912  default:
913  RETV(PLAIN, c);
914  break;
915  }
916 
917  assert(c == CHR('\\'));
918 
919  if (ATEOS())
921 
922  c = *v->now++;
923  switch (c)
924  {
925  case CHR('{'):
926  INTOCON(L_BBND);
927  NOTE(REG_UBOUNDS);
928  RET('{');
929  break;
930  case CHR('('):
931  RETV('(', 1);
932  break;
933  case CHR(')'):
934  RETV(')', c);
935  break;
936  case CHR('<'):
938  RET('<');
939  break;
940  case CHR('>'):
942  RET('>');
943  break;
944  case CHR('1'):
945  case CHR('2'):
946  case CHR('3'):
947  case CHR('4'):
948  case CHR('5'):
949  case CHR('6'):
950  case CHR('7'):
951  case CHR('8'):
952  case CHR('9'):
954  RETV(BACKREF, (chr) DIGITVAL(c));
955  break;
956  default:
957  if (iscalnum(c))
958  {
960  NOTE(REG_UUNSPEC);
961  }
962  RETV(PLAIN, c);
963  break;
964  }
965 
967  return 0;
968 }
Datum now(PG_FUNCTION_ARGS)
Definition: timestamp.c:1546
if(TABLE==NULL||TABLE_index==NULL)
Definition: isn.c:77
#define NEXT2(a, b)
Definition: regc_lex.c:39
#define RET(c)
Definition: regc_lex.c:45
#define INTOCON(c)
Definition: regc_lex.c:60
#define L_BBND
Definition: regc_lex.c:55
#define ATEOS()
Definition: regc_lex.c:36
static void skip(struct vars *v)
Definition: regc_lex.c:974
#define LASTTYPE(t)
Definition: regc_lex.c:48
#define RETV(c, n)
Definition: regc_lex.c:46
#define FAILW(e)
Definition: regc_lex.c:47
#define L_BRACK
Definition: regc_lex.c:56
#define NEXT1(c)
Definition: regc_lex.c:38
#define NOTE(b)
Definition: regcomp.c:325
#define EMPTY
Definition: regcomp.c:329
#define BACKREF
Definition: regcomp.c:333
#define PLAIN
Definition: regcomp.c:331
#define DIGITVAL(c)
Definition: regcustom.h:62
#define iscalnum(x)
Definition: regcustom.h:89
pg_wchar chr
Definition: regcustom.h:58
#define assert(x)
Definition: regcustom.h:55
#define REG_UBOUNDS
Definition: regex.h:62
#define REG_EESCAPE
Definition: regex.h:142
#define REG_EXPANDED
Definition: regex.h:108
#define REG_UUNSPEC
Definition: regex.h:68
#define REG_UNONPOSIX
Definition: regex.h:67
#define REG_UBSALNUM
Definition: regex.h:64
#define REG_UBACKREF
Definition: regex.h:60
#define NOTREACHED
Definition: regguts.h:91
const chr * now
Definition: regcomp.c:284

References assert, ATEOS, BACKREF, vars::cflags, CHR, DIGITVAL, EMPTY, FAILW, HAVE, INTOCON, iscalnum, L_BBND, L_BRACK, LASTTYPE, NEXT1, NEXT2, NOTE, NOTREACHED, vars::now, PLAIN, REG_EESCAPE, REG_EXPANDED, REG_UBACKREF, REG_UBOUNDS, REG_UBSALNUM, REG_UNONPOSIX, REG_UUNSPEC, RET, RETV, and skip().

Referenced by next().

◆ chrnamed()

static chr chrnamed ( struct vars v,
const chr startp,
const chr endp,
chr  lastresort 
)
static

Definition at line 1014 of file regc_lex.c.

1018 {
1019  chr c;
1020  int errsave;
1021  int e;
1022  struct cvec *cv;
1023 
1024  errsave = v->err;
1025  v->err = 0;
1026  c = element(v, startp, endp);
1027  e = v->err;
1028  v->err = errsave;
1029 
1030  if (e != 0)
1031  return lastresort;
1032 
1033  cv = range(v, c, c, 0);
1034  if (cv->nchrs == 0)
1035  return lastresort;
1036  return cv->chrs[0];
1037 }
#define errsave(context,...)
Definition: elog.h:260
e
Definition: preproc-init.c:82
static struct cvec * range(struct vars *v, chr a, chr b, int cases)
Definition: regc_locale.c:412
static chr element(struct vars *v, const chr *startp, const chr *endp)
Definition: regc_locale.c:376
Definition: regguts.h:274
int nchrs
Definition: regguts.h:275
chr * chrs
Definition: regguts.h:277
int err
Definition: regcomp.c:286

References cvec::chrs, element(), vars::err, errsave, cvec::nchrs, and range().

Referenced by lexescape().

◆ lexdigits()

static chr lexdigits ( struct vars v,
int  base,
int  minlen,
int  maxlen 
)
static

Definition at line 772 of file regc_lex.c.

776 {
777  uchr n; /* unsigned to avoid overflow misbehavior */
778  int len;
779  chr c;
780  int d;
781  const uchr ub = (uchr) base;
782 
783  n = 0;
784  for (len = 0; len < maxlen && !ATEOS(); len++)
785  {
786  c = *v->now++;
787  switch (c)
788  {
789  case CHR('0'):
790  case CHR('1'):
791  case CHR('2'):
792  case CHR('3'):
793  case CHR('4'):
794  case CHR('5'):
795  case CHR('6'):
796  case CHR('7'):
797  case CHR('8'):
798  case CHR('9'):
799  d = DIGITVAL(c);
800  break;
801  case CHR('a'):
802  case CHR('A'):
803  d = 10;
804  break;
805  case CHR('b'):
806  case CHR('B'):
807  d = 11;
808  break;
809  case CHR('c'):
810  case CHR('C'):
811  d = 12;
812  break;
813  case CHR('d'):
814  case CHR('D'):
815  d = 13;
816  break;
817  case CHR('e'):
818  case CHR('E'):
819  d = 14;
820  break;
821  case CHR('f'):
822  case CHR('F'):
823  d = 15;
824  break;
825  default:
826  v->now--; /* oops, not a digit at all */
827  d = -1;
828  break;
829  }
830 
831  if (d >= base)
832  { /* not a plausible digit */
833  v->now--;
834  d = -1;
835  }
836  if (d < 0)
837  break; /* NOTE BREAK OUT */
838  n = n * ub + (uchr) d;
839  }
840  if (len < minlen)
841  ERR(REG_EESCAPE);
842 
843  return (chr) n;
844 }
#define ERR
Definition: _int.h:161
const void size_t len
unsigned uchr
Definition: regcustom.h:59

References ATEOS, CHR, DIGITVAL, ERR, len, vars::now, and REG_EESCAPE.

Referenced by lexescape().

◆ lexescape()

static int lexescape ( struct vars v)
static

Definition at line 601 of file regc_lex.c.

602 {
603  chr c;
604  static const chr alert[] = {
605  CHR('a'), CHR('l'), CHR('e'), CHR('r'), CHR('t')
606  };
607  static const chr esc[] = {
608  CHR('E'), CHR('S'), CHR('C')
609  };
610  const chr *save;
611 
612  assert(v->cflags & REG_ADVF);
613 
614  assert(!ATEOS());
615  c = *v->now++;
616  if (!iscalnum(c))
617  RETV(PLAIN, c);
618 
620  switch (c)
621  {
622  case CHR('a'):
623  RETV(PLAIN, chrnamed(v, alert, ENDOF(alert), CHR('\007')));
624  break;
625  case CHR('A'):
626  RETV(SBEGIN, 0);
627  break;
628  case CHR('b'):
629  RETV(PLAIN, CHR('\b'));
630  break;
631  case CHR('B'):
632  RETV(PLAIN, CHR('\\'));
633  break;
634  case CHR('c'):
635  NOTE(REG_UUNPORT);
636  if (ATEOS())
638  RETV(PLAIN, (chr) (*v->now++ & 037));
639  break;
640  case CHR('d'):
641  NOTE(REG_ULOCALE);
643  break;
644  case CHR('D'):
645  NOTE(REG_ULOCALE);
647  break;
648  case CHR('e'):
649  NOTE(REG_UUNPORT);
650  RETV(PLAIN, chrnamed(v, esc, ENDOF(esc), CHR('\033')));
651  break;
652  case CHR('f'):
653  RETV(PLAIN, CHR('\f'));
654  break;
655  case CHR('m'):
656  RET('<');
657  break;
658  case CHR('M'):
659  RET('>');
660  break;
661  case CHR('n'):
662  RETV(PLAIN, CHR('\n'));
663  break;
664  case CHR('r'):
665  RETV(PLAIN, CHR('\r'));
666  break;
667  case CHR('s'):
668  NOTE(REG_ULOCALE);
670  break;
671  case CHR('S'):
672  NOTE(REG_ULOCALE);
674  break;
675  case CHR('t'):
676  RETV(PLAIN, CHR('\t'));
677  break;
678  case CHR('u'):
679  c = lexdigits(v, 16, 4, 4);
680  if (ISERR() || !CHR_IS_IN_RANGE(c))
682  RETV(PLAIN, c);
683  break;
684  case CHR('U'):
685  c = lexdigits(v, 16, 8, 8);
686  if (ISERR() || !CHR_IS_IN_RANGE(c))
688  RETV(PLAIN, c);
689  break;
690  case CHR('v'):
691  RETV(PLAIN, CHR('\v'));
692  break;
693  case CHR('w'):
694  NOTE(REG_ULOCALE);
695  RETV(CCLASSS, CC_WORD);
696  break;
697  case CHR('W'):
698  NOTE(REG_ULOCALE);
699  RETV(CCLASSC, CC_WORD);
700  break;
701  case CHR('x'):
702  NOTE(REG_UUNPORT);
703  c = lexdigits(v, 16, 1, 255); /* REs >255 long outside spec */
704  if (ISERR() || !CHR_IS_IN_RANGE(c))
706  RETV(PLAIN, c);
707  break;
708  case CHR('y'):
709  NOTE(REG_ULOCALE);
710  RETV(WBDRY, 0);
711  break;
712  case CHR('Y'):
713  NOTE(REG_ULOCALE);
714  RETV(NWBDRY, 0);
715  break;
716  case CHR('Z'):
717  RETV(SEND, 0);
718  break;
719  case CHR('1'):
720  case CHR('2'):
721  case CHR('3'):
722  case CHR('4'):
723  case CHR('5'):
724  case CHR('6'):
725  case CHR('7'):
726  case CHR('8'):
727  case CHR('9'):
728  save = v->now;
729  v->now--; /* put first digit back */
730  c = lexdigits(v, 10, 1, 255); /* REs >255 long outside spec */
731  if (ISERR())
733  /* ugly heuristic (first test is "exactly 1 digit?") */
734  if (v->now == save || ((int) c > 0 && (int) c <= v->nsubexp))
735  {
737  RETV(BACKREF, c);
738  }
739  /* oops, doesn't look like it's a backref after all... */
740  v->now = save;
741  /* and fall through into octal number */
742  /* FALLTHROUGH */
743  case CHR('0'):
744  NOTE(REG_UUNPORT);
745  v->now--; /* put first digit back */
746  c = lexdigits(v, 8, 1, 3);
747  if (ISERR())
749  if (c > 0xff)
750  {
751  /* out of range, so we handled one digit too much */
752  v->now--;
753  c >>= 3;
754  }
755  RETV(PLAIN, c);
756  break;
757  default:
758  assert(iscalpha(c));
759  FAILW(REG_EESCAPE); /* unknown alphabetic escape */
760  break;
761  }
763 }
static chr lexdigits(struct vars *v, int base, int minlen, int maxlen)
Definition: regc_lex.c:772
#define ENDOF(array)
Definition: regc_lex.c:64
static chr chrnamed(struct vars *v, const chr *startp, const chr *endp, chr lastresort)
Definition: regc_lex.c:1014
#define NWBDRY
Definition: regcomp.c:345
#define SBEGIN
Definition: regcomp.c:346
#define ISERR()
Definition: regcomp.c:317
#define CCLASSS
Definition: regcomp.c:338
#define WBDRY
Definition: regcomp.c:344
#define CCLASSC
Definition: regcomp.c:339
#define SEND
Definition: regcomp.c:347
#define CHR_IS_IN_RANGE(c)
Definition: regcustom.h:76
#define iscalpha(x)
Definition: regcustom.h:90
#define REG_ADVF
Definition: regex.h:102
#define REG_ULOCALE
Definition: regex.h:70
#define REG_UUNPORT
Definition: regex.h:69
@ CC_WORD
Definition: regguts.h:136
@ CC_SPACE
Definition: regguts.h:136
@ CC_DIGIT
Definition: regguts.h:135
int cflags
Definition: regcomp.c:287

References assert, ATEOS, BACKREF, CC_DIGIT, CC_SPACE, CC_WORD, CCLASSC, CCLASSS, vars::cflags, CHR, CHR_IS_IN_RANGE, chrnamed(), ENDOF, FAILW, iscalnum, iscalpha, ISERR, lexdigits(), NOTE, NOTREACHED, vars::now, NWBDRY, PLAIN, REG_ADVF, REG_EESCAPE, REG_UBACKREF, REG_ULOCALE, REG_UNONPOSIX, REG_UUNPORT, RET, RETV, SBEGIN, SEND, and WBDRY.

Referenced by next().

◆ lexstart()

static void lexstart ( struct vars v)
static

Definition at line 70 of file regc_lex.c.

71 {
72  prefixes(v); /* may turn on new type bits etc. */
73  NOERR();
74 
75  if (v->cflags & REG_QUOTE)
76  {
78  INTOCON(L_Q);
79  }
80  else if (v->cflags & REG_EXTENDED)
81  {
82  assert(!(v->cflags & REG_QUOTE));
83  INTOCON(L_ERE);
84  }
85  else
86  {
87  assert(!(v->cflags & (REG_QUOTE | REG_ADVF)));
88  INTOCON(L_BRE);
89  }
90 
91  v->nexttype = EMPTY; /* remember we were at the start */
92  next(v); /* set up the first token */
93 }
#define L_ERE
Definition: regc_lex.c:51
#define L_Q
Definition: regc_lex.c:53
static void prefixes(struct vars *v)
Definition: regc_lex.c:99
#define L_BRE
Definition: regc_lex.c:52
static int next(struct vars *v)
Definition: regc_lex.c:200
#define NOERR()
Definition: regcomp.c:321
#define REG_ADVANCED
Definition: regex.h:103
#define REG_EXTENDED
Definition: regex.h:101
#define REG_NEWLINE
Definition: regex.h:111
#define REG_QUOTE
Definition: regex.h:104
int nexttype
Definition: regcomp.c:289

References assert, vars::cflags, EMPTY, INTOCON, L_BRE, L_ERE, L_Q, next(), vars::nexttype, NOERR, prefixes(), REG_ADVANCED, REG_ADVF, REG_EXPANDED, REG_EXTENDED, REG_NEWLINE, and REG_QUOTE.

◆ newline()

static chr newline ( void  )
static

Definition at line 1002 of file regc_lex.c.

1003 {
1004  return CHR('\n');
1005 }

References CHR.

◆ next()

static int next ( struct vars v)
static

Definition at line 200 of file regc_lex.c.

201 {
202  chr c;
203 
204 next_restart: /* loop here after eating a comment */
205 
206  /* errors yield an infinite sequence of failures */
207  if (ISERR())
208  return 0; /* the error has set nexttype to EOS */
209 
210  /* remember flavor of last token */
211  v->lasttype = v->nexttype;
212 
213  /* REG_BOSONLY */
214  if (v->nexttype == EMPTY && (v->cflags & REG_BOSONLY))
215  {
216  /* at start of a REG_BOSONLY RE */
217  RETV(SBEGIN, 0); /* same as \A */
218  }
219 
220  /* skip white space etc. if appropriate (not in literal or []) */
221  if (v->cflags & REG_EXPANDED)
222  switch (v->lexcon)
223  {
224  case L_ERE:
225  case L_BRE:
226  case L_EBND:
227  case L_BBND:
228  skip(v);
229  break;
230  }
231 
232  /* handle EOS, depending on context */
233  if (ATEOS())
234  {
235  switch (v->lexcon)
236  {
237  case L_ERE:
238  case L_BRE:
239  case L_Q:
240  RET(EOS);
241  break;
242  case L_EBND:
243  case L_BBND:
244  FAILW(REG_EBRACE);
245  break;
246  case L_BRACK:
247  case L_CEL:
248  case L_ECL:
249  case L_CCL:
250  FAILW(REG_EBRACK);
251  break;
252  }
254  }
255 
256  /* okay, time to actually get a character */
257  c = *v->now++;
258 
259  /* deal with the easy contexts, punt EREs to code below */
260  switch (v->lexcon)
261  {
262  case L_BRE: /* punt BREs to separate function */
263  return brenext(v, c);
264  break;
265  case L_ERE: /* see below */
266  break;
267  case L_Q: /* literal strings are easy */
268  RETV(PLAIN, c);
269  break;
270  case L_BBND: /* bounds are fairly simple */
271  case L_EBND:
272  switch (c)
273  {
274  case CHR('0'):
275  case CHR('1'):
276  case CHR('2'):
277  case CHR('3'):
278  case CHR('4'):
279  case CHR('5'):
280  case CHR('6'):
281  case CHR('7'):
282  case CHR('8'):
283  case CHR('9'):
284  RETV(DIGIT, (chr) DIGITVAL(c));
285  break;
286  case CHR(','):
287  RET(',');
288  break;
289  case CHR('}'): /* ERE bound ends with } */
290  if (INCON(L_EBND))
291  {
292  INTOCON(L_ERE);
293  if ((v->cflags & REG_ADVF) && NEXT1('?'))
294  {
295  v->now++;
297  RETV('}', 0);
298  }
299  RETV('}', 1);
300  }
301  else
302  FAILW(REG_BADBR);
303  break;
304  case CHR('\\'): /* BRE bound ends with \} */
305  if (INCON(L_BBND) && NEXT1('}'))
306  {
307  v->now++;
308  INTOCON(L_BRE);
309  RETV('}', 1);
310  }
311  else
312  FAILW(REG_BADBR);
313  break;
314  default:
315  FAILW(REG_BADBR);
316  break;
317  }
319  break;
320  case L_BRACK: /* brackets are not too hard */
321  switch (c)
322  {
323  case CHR(']'):
324  if (LASTTYPE('['))
325  RETV(PLAIN, c);
326  else
327  {
328  INTOCON((v->cflags & REG_EXTENDED) ?
329  L_ERE : L_BRE);
330  RET(']');
331  }
332  break;
333  case CHR('\\'):
334  NOTE(REG_UBBS);
335  if (!(v->cflags & REG_ADVF))
336  RETV(PLAIN, c);
338  if (ATEOS())
340  if (!lexescape(v))
341  return 0;
342  switch (v->nexttype)
343  { /* not all escapes okay here */
344  case PLAIN:
345  case CCLASSS:
346  case CCLASSC:
347  return 1;
348  break;
349  }
350  /* not one of the acceptable escapes */
352  break;
353  case CHR('-'):
354  if (LASTTYPE('[') || NEXT1(']'))
355  RETV(PLAIN, c);
356  else
357  RETV(RANGE, c);
358  break;
359  case CHR('['):
360  if (ATEOS())
361  FAILW(REG_EBRACK);
362  switch (*v->now++)
363  {
364  case CHR('.'):
365  INTOCON(L_CEL);
366  /* might or might not be locale-specific */
367  RET(COLLEL);
368  break;
369  case CHR('='):
370  INTOCON(L_ECL);
371  NOTE(REG_ULOCALE);
372  RET(ECLASS);
373  break;
374  case CHR(':'):
375  INTOCON(L_CCL);
376  NOTE(REG_ULOCALE);
377  RET(CCLASS);
378  break;
379  default: /* oops */
380  v->now--;
381  RETV(PLAIN, c);
382  break;
383  }
385  break;
386  default:
387  RETV(PLAIN, c);
388  break;
389  }
391  break;
392  case L_CEL: /* collating elements are easy */
393  if (c == CHR('.') && NEXT1(']'))
394  {
395  v->now++;
396  INTOCON(L_BRACK);
397  RETV(END, '.');
398  }
399  else
400  RETV(PLAIN, c);
401  break;
402  case L_ECL: /* ditto equivalence classes */
403  if (c == CHR('=') && NEXT1(']'))
404  {
405  v->now++;
406  INTOCON(L_BRACK);
407  RETV(END, '=');
408  }
409  else
410  RETV(PLAIN, c);
411  break;
412  case L_CCL: /* ditto character classes */
413  if (c == CHR(':') && NEXT1(']'))
414  {
415  v->now++;
416  INTOCON(L_BRACK);
417  RETV(END, ':');
418  }
419  else
420  RETV(PLAIN, c);
421  break;
422  default:
424  break;
425  }
426 
427  /* that got rid of everything except EREs and AREs */
428  assert(INCON(L_ERE));
429 
430  /* deal with EREs and AREs, except for backslashes */
431  switch (c)
432  {
433  case CHR('|'):
434  RET('|');
435  break;
436  case CHR('*'):
437  if ((v->cflags & REG_ADVF) && NEXT1('?'))
438  {
439  v->now++;
441  RETV('*', 0);
442  }
443  RETV('*', 1);
444  break;
445  case CHR('+'):
446  if ((v->cflags & REG_ADVF) && NEXT1('?'))
447  {
448  v->now++;
450  RETV('+', 0);
451  }
452  RETV('+', 1);
453  break;
454  case CHR('?'):
455  if ((v->cflags & REG_ADVF) && NEXT1('?'))
456  {
457  v->now++;
459  RETV('?', 0);
460  }
461  RETV('?', 1);
462  break;
463  case CHR('{'): /* bounds start or plain character */
464  if (v->cflags & REG_EXPANDED)
465  skip(v);
466  if (ATEOS() || !iscdigit(*v->now))
467  {
468  NOTE(REG_UBRACES);
469  NOTE(REG_UUNSPEC);
470  RETV(PLAIN, c);
471  }
472  else
473  {
474  NOTE(REG_UBOUNDS);
475  INTOCON(L_EBND);
476  RET('{');
477  }
479  break;
480  case CHR('('): /* parenthesis, or advanced extension */
481  if ((v->cflags & REG_ADVF) && NEXT1('?'))
482  {
484  v->now++;
485  if (ATEOS())
486  FAILW(REG_BADRPT);
487  switch (*v->now++)
488  {
489  case CHR(':'): /* non-capturing paren */
490  RETV('(', 0);
491  break;
492  case CHR('#'): /* comment */
493  while (!ATEOS() && *v->now != CHR(')'))
494  v->now++;
495  if (!ATEOS())
496  v->now++;
497  assert(v->nexttype == v->lasttype);
498  goto next_restart;
499  case CHR('='): /* positive lookahead */
502  break;
503  case CHR('!'): /* negative lookahead */
506  break;
507  case CHR('<'):
508  if (ATEOS())
509  FAILW(REG_BADRPT);
510  switch (*v->now++)
511  {
512  case CHR('='): /* positive lookbehind */
515  break;
516  case CHR('!'): /* negative lookbehind */
519  break;
520  default:
521  FAILW(REG_BADRPT);
522  break;
523  }
525  break;
526  default:
527  FAILW(REG_BADRPT);
528  break;
529  }
531  }
532  RETV('(', 1);
533  break;
534  case CHR(')'):
535  if (LASTTYPE('('))
536  NOTE(REG_UUNSPEC);
537  RETV(')', c);
538  break;
539  case CHR('['): /* easy except for [[:<:]] and [[:>:]] */
540  if (HAVE(6) && *(v->now + 0) == CHR('[') &&
541  *(v->now + 1) == CHR(':') &&
542  (*(v->now + 2) == CHR('<') ||
543  *(v->now + 2) == CHR('>')) &&
544  *(v->now + 3) == CHR(':') &&
545  *(v->now + 4) == CHR(']') &&
546  *(v->now + 5) == CHR(']'))
547  {
548  c = *(v->now + 2);
549  v->now += 6;
551  RET((c == CHR('<')) ? '<' : '>');
552  }
553  INTOCON(L_BRACK);
554  if (NEXT1('^'))
555  {
556  v->now++;
557  RETV('[', 0);
558  }
559  RETV('[', 1);
560  break;
561  case CHR('.'):
562  RET('.');
563  break;
564  case CHR('^'):
565  RET('^');
566  break;
567  case CHR('$'):
568  RET('$');
569  break;
570  case CHR('\\'): /* mostly punt backslashes to code below */
571  if (ATEOS())
573  break;
574  default: /* ordinary character */
575  RETV(PLAIN, c);
576  break;
577  }
578 
579  /* ERE/ARE backslash handling; backslash already eaten */
580  assert(!ATEOS());
581  if (!(v->cflags & REG_ADVF))
582  { /* only AREs have non-trivial escapes */
583  if (iscalnum(*v->now))
584  {
586  NOTE(REG_UUNSPEC);
587  }
588  RETV(PLAIN, *v->now++);
589  }
590  return lexescape(v);
591 }
#define END
Definition: _int.h:160
while(p+4<=pend)
#define INCON(con)
Definition: regc_lex.c:61
static int lexescape(struct vars *v)
Definition: regc_lex.c:601
#define L_CEL
Definition: regc_lex.c:57
#define L_EBND
Definition: regc_lex.c:54
#define L_ECL
Definition: regc_lex.c:58
static int brenext(struct vars *v, chr c)
Definition: regc_lex.c:853
#define L_CCL
Definition: regc_lex.c:59
#define COLLEL
Definition: regcomp.c:334
#define CCLASS
Definition: regcomp.c:336
#define DIGIT
Definition: regcomp.c:332
#define ECLASS
Definition: regcomp.c:335
#define LACON
Definition: regcomp.c:341
#define EOS
Definition: regcomp.c:330
#define RANGE
Definition: regcomp.c:340
#define iscdigit(x)
Definition: regcustom.h:91
#define REG_EBRACK
Definition: regex.h:144
#define REG_BADRPT
Definition: regex.h:150
#define REG_ULOOKAROUND
Definition: regex.h:61
#define REG_UBBS
Definition: regex.h:66
#define REG_BADBR
Definition: regex.h:147
#define REG_EBRACE
Definition: regex.h:146
#define REG_BOSONLY
Definition: regex.h:114
#define REG_UBRACES
Definition: regex.h:63
#define LATYPE_AHEAD_NEG
Definition: regguts.h:100
#define LATYPE_BEHIND_POS
Definition: regguts.h:101
#define LATYPE_BEHIND_NEG
Definition: regguts.h:102
#define LATYPE_AHEAD_POS
Definition: regguts.h:99
int lexcon
Definition: regcomp.c:291
int lasttype
Definition: regcomp.c:288

References assert, ATEOS, brenext(), CCLASS, CCLASSC, CCLASSS, vars::cflags, CHR, COLLEL, DIGIT, DIGITVAL, ECLASS, EMPTY, END, EOS, FAILW, HAVE, INCON, INTOCON, iscalnum, iscdigit, ISERR, L_BBND, L_BRACK, L_BRE, L_CCL, L_CEL, L_EBND, L_ECL, L_ERE, L_Q, LACON, LASTTYPE, vars::lasttype, LATYPE_AHEAD_NEG, LATYPE_AHEAD_POS, LATYPE_BEHIND_NEG, LATYPE_BEHIND_POS, vars::lexcon, lexescape(), NEXT1, vars::nexttype, NOTE, NOTREACHED, vars::now, PLAIN, RANGE, REG_ADVF, REG_BADBR, REG_BADRPT, REG_BOSONLY, REG_EBRACE, REG_EBRACK, REG_EESCAPE, REG_EXPANDED, REG_EXTENDED, REG_UBBS, REG_UBOUNDS, REG_UBRACES, REG_UBSALNUM, REG_ULOCALE, REG_ULOOKAROUND, REG_UNONPOSIX, REG_UUNSPEC, RET, RETV, SBEGIN, and skip().

Referenced by lexstart().

◆ prefixes()

static void prefixes ( struct vars v)
static

Definition at line 99 of file regc_lex.c.

100 {
101  /* literal string doesn't get any of this stuff */
102  if (v->cflags & REG_QUOTE)
103  return;
104 
105  /* initial "***" gets special things */
106  if (HAVE(4) && NEXT3('*', '*', '*'))
107  switch (*(v->now + 3))
108  {
109  case CHR('?'): /* "***?" error, msg shows version */
110  ERR(REG_BADPAT);
111  return; /* proceed no further */
112  break;
113  case CHR('='): /* "***=" shifts to literal string */
115  v->cflags |= REG_QUOTE;
117  v->now += 4;
118  return; /* and there can be no more prefixes */
119  break;
120  case CHR(':'): /* "***:" shifts to AREs */
122  v->cflags |= REG_ADVANCED;
123  v->now += 4;
124  break;
125  default: /* otherwise *** is just an error */
126  ERR(REG_BADRPT);
127  return;
128  break;
129  }
130 
131  /* BREs and EREs don't get embedded options */
132  if ((v->cflags & REG_ADVANCED) != REG_ADVANCED)
133  return;
134 
135  /* embedded options (AREs only) */
136  if (HAVE(3) && NEXT2('(', '?') && iscalpha(*(v->now + 2)))
137  {
139  v->now += 2;
140  for (; !ATEOS() && iscalpha(*v->now); v->now++)
141  switch (*v->now)
142  {
143  case CHR('b'): /* BREs (but why???) */
144  v->cflags &= ~(REG_ADVANCED | REG_QUOTE);
145  break;
146  case CHR('c'): /* case sensitive */
147  v->cflags &= ~REG_ICASE;
148  break;
149  case CHR('e'): /* plain EREs */
150  v->cflags |= REG_EXTENDED;
151  v->cflags &= ~(REG_ADVF | REG_QUOTE);
152  break;
153  case CHR('i'): /* case insensitive */
154  v->cflags |= REG_ICASE;
155  break;
156  case CHR('m'): /* Perloid synonym for n */
157  case CHR('n'): /* \n affects ^ $ . [^ */
158  v->cflags |= REG_NEWLINE;
159  break;
160  case CHR('p'): /* ~Perl, \n affects . [^ */
161  v->cflags |= REG_NLSTOP;
162  v->cflags &= ~REG_NLANCH;
163  break;
164  case CHR('q'): /* literal string */
165  v->cflags |= REG_QUOTE;
166  v->cflags &= ~REG_ADVANCED;
167  break;
168  case CHR('s'): /* single line, \n ordinary */
169  v->cflags &= ~REG_NEWLINE;
170  break;
171  case CHR('t'): /* tight syntax */
172  v->cflags &= ~REG_EXPANDED;
173  break;
174  case CHR('w'): /* weird, \n affects ^ $ only */
175  v->cflags &= ~REG_NLSTOP;
176  v->cflags |= REG_NLANCH;
177  break;
178  case CHR('x'): /* expanded syntax */
179  v->cflags |= REG_EXPANDED;
180  break;
181  default:
182  ERR(REG_BADOPT);
183  return;
184  }
185  if (!NEXT1(')'))
186  {
187  ERR(REG_BADOPT);
188  return;
189  }
190  v->now++;
191  if (v->cflags & REG_QUOTE)
192  v->cflags &= ~(REG_EXPANDED | REG_NEWLINE);
193  }
194 }
#define NEXT3(a, b, c)
Definition: regc_lex.c:40
#define REG_BADOPT
Definition: regex.h:154
#define REG_ICASE
Definition: regex.h:106
#define REG_NLANCH
Definition: regex.h:110
#define REG_NLSTOP
Definition: regex.h:109
#define REG_BADPAT
Definition: regex.h:139

References ATEOS, vars::cflags, CHR, ERR, HAVE, iscalpha, NEXT1, NEXT2, NEXT3, NOTE, vars::now, REG_ADVANCED, REG_ADVF, REG_BADOPT, REG_BADPAT, REG_BADRPT, REG_EXPANDED, REG_EXTENDED, REG_ICASE, REG_NEWLINE, REG_NLANCH, REG_NLSTOP, REG_QUOTE, and REG_UNONPOSIX.

Referenced by lexstart(), and NIImportAffixes().

◆ skip()

static void skip ( struct vars v)
static

Definition at line 974 of file regc_lex.c.

975 {
976  const chr *start = v->now;
977 
978  assert(v->cflags & REG_EXPANDED);
979 
980  for (;;)
981  {
982  while (!ATEOS() && iscspace(*v->now))
983  v->now++;
984  if (ATEOS() || *v->now != CHR('#'))
985  break; /* NOTE BREAK OUT */
986  assert(NEXT1('#'));
987  while (!ATEOS() && *v->now != CHR('\n'))
988  v->now++;
989  /* leave the newline to be picked up by the iscspace loop */
990  }
991 
992  if (v->now != start)
994 }
#define iscspace(x)
Definition: regcustom.h:92

References assert, ATEOS, vars::cflags, CHR, iscspace, NEXT1, NOTE, vars::now, REG_EXPANDED, and REG_UNONPOSIX.

Referenced by brenext(), and next().