PostgreSQL Source Code  git master
regc_lex.c File Reference
This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Macros

#define ATEOS()   (v->now >= v->stop)
 
#define HAVE(n)   (v->stop - v->now >= (n))
 
#define NEXT1(c)   (!ATEOS() && *v->now == CHR(c))
 
#define NEXT2(a, b)   (HAVE(2) && *v->now == CHR(a) && *(v->now+1) == CHR(b))
 
#define NEXT3(a, b, c)
 
#define SET(c)   (v->nexttype = (c))
 
#define SETV(c, n)   (v->nexttype = (c), v->nextvalue = (n))
 
#define RET(c)   return (SET(c), 1)
 
#define RETV(c, n)   return (SETV(c, n), 1)
 
#define FAILW(e)   return (ERR(e), 0) /* ERR does SET(EOS) */
 
#define LASTTYPE(t)   (v->lasttype == (t))
 
#define L_ERE   1 /* mainline ERE/ARE */
 
#define L_BRE   2 /* mainline BRE */
 
#define L_Q   3 /* REG_QUOTE */
 
#define L_EBND   4 /* ERE/ARE bound */
 
#define L_BBND   5 /* BRE bound */
 
#define L_BRACK   6 /* brackets */
 
#define L_CEL   7 /* collating element */
 
#define L_ECL   8 /* equivalence class */
 
#define L_CCL   9 /* character class */
 
#define INTOCON(c)   (v->lexcon = (c))
 
#define INCON(con)   (v->lexcon == (con))
 
#define ENDOF(array)   ((array) + sizeof(array)/sizeof(chr))
 

Functions

static void lexstart (struct vars *v)
 
static void prefixes (struct vars *v)
 
static int next (struct vars *v)
 
static int lexescape (struct vars *v)
 
static chr lexdigits (struct vars *v, int base, int minlen, int maxlen)
 
static int brenext (struct vars *v, chr c)
 
static void skip (struct vars *v)
 
static chr newline (void)
 
static chr chrnamed (struct vars *v, const chr *startp, const chr *endp, chr lastresort)
 

Macro Definition Documentation

◆ ATEOS

#define ATEOS ( )    (v->now >= v->stop)

Definition at line 36 of file regc_lex.c.

◆ ENDOF

#define ENDOF (   array)    ((array) + sizeof(array)/sizeof(chr))

Definition at line 64 of file regc_lex.c.

◆ FAILW

#define FAILW (   e)    return (ERR(e), 0) /* ERR does SET(EOS) */

Definition at line 47 of file regc_lex.c.

◆ HAVE

#define HAVE (   n)    (v->stop - v->now >= (n))

Definition at line 37 of file regc_lex.c.

◆ INCON

#define INCON (   con)    (v->lexcon == (con))

Definition at line 61 of file regc_lex.c.

◆ INTOCON

#define INTOCON (   c)    (v->lexcon = (c))

Definition at line 60 of file regc_lex.c.

◆ L_BBND

#define L_BBND   5 /* BRE bound */

Definition at line 55 of file regc_lex.c.

◆ L_BRACK

#define L_BRACK   6 /* brackets */

Definition at line 56 of file regc_lex.c.

◆ L_BRE

#define L_BRE   2 /* mainline BRE */

Definition at line 52 of file regc_lex.c.

◆ L_CCL

#define L_CCL   9 /* character class */

Definition at line 59 of file regc_lex.c.

◆ L_CEL

#define L_CEL   7 /* collating element */

Definition at line 57 of file regc_lex.c.

◆ L_EBND

#define L_EBND   4 /* ERE/ARE bound */

Definition at line 54 of file regc_lex.c.

◆ L_ECL

#define L_ECL   8 /* equivalence class */

Definition at line 58 of file regc_lex.c.

◆ L_ERE

#define L_ERE   1 /* mainline ERE/ARE */

Definition at line 51 of file regc_lex.c.

◆ L_Q

#define L_Q   3 /* REG_QUOTE */

Definition at line 53 of file regc_lex.c.

◆ LASTTYPE

#define LASTTYPE (   t)    (v->lasttype == (t))

Definition at line 48 of file regc_lex.c.

◆ NEXT1

#define NEXT1 (   c)    (!ATEOS() && *v->now == CHR(c))

Definition at line 38 of file regc_lex.c.

◆ NEXT2

#define NEXT2 (   a,
  b 
)    (HAVE(2) && *v->now == CHR(a) && *(v->now+1) == CHR(b))

Definition at line 39 of file regc_lex.c.

◆ NEXT3

#define NEXT3 (   a,
  b,
  c 
)
Value:
(HAVE(3) && *v->now == CHR(a) && \
*(v->now+1) == CHR(b) && \
*(v->now+2) == CHR(c))
int b
Definition: isn.c:69
int a
Definition: isn.c:68
char * c
#define HAVE(n)
Definition: regc_lex.c:37
#define CHR(c)
Definition: regcustom.h:62

Definition at line 40 of file regc_lex.c.

◆ RET

#define RET (   c)    return (SET(c), 1)

Definition at line 45 of file regc_lex.c.

◆ RETV

#define RETV (   c,
 
)    return (SETV(c, n), 1)

Definition at line 46 of file regc_lex.c.

◆ SET

#define SET (   c)    (v->nexttype = (c))

Definition at line 43 of file regc_lex.c.

◆ SETV

#define SETV (   c,
 
)    (v->nexttype = (c), v->nextvalue = (n))

Definition at line 44 of file regc_lex.c.

Function Documentation

◆ brenext()

static int brenext ( struct vars v,
chr  c 
)
static

Definition at line 861 of file regc_lex.c.

863 {
864  switch (c)
865  {
866  case CHR('*'):
867  if (LASTTYPE(EMPTY) || LASTTYPE('(') || LASTTYPE('^'))
868  RETV(PLAIN, c);
869  RETV('*', 1);
870  break;
871  case CHR('['):
872  if (HAVE(6) && *(v->now + 0) == CHR('[') &&
873  *(v->now + 1) == CHR(':') &&
874  (*(v->now + 2) == CHR('<') ||
875  *(v->now + 2) == CHR('>')) &&
876  *(v->now + 3) == CHR(':') &&
877  *(v->now + 4) == CHR(']') &&
878  *(v->now + 5) == CHR(']'))
879  {
880  c = *(v->now + 2);
881  v->now += 6;
883  RET((c == CHR('<')) ? '<' : '>');
884  }
885  INTOCON(L_BRACK);
886  if (NEXT1('^'))
887  {
888  v->now++;
889  RETV('[', 0);
890  }
891  RETV('[', 1);
892  break;
893  case CHR('.'):
894  RET('.');
895  break;
896  case CHR('^'):
897  if (LASTTYPE(EMPTY))
898  RET('^');
899  if (LASTTYPE('('))
900  {
901  NOTE(REG_UUNSPEC);
902  RET('^');
903  }
904  RETV(PLAIN, c);
905  break;
906  case CHR('$'):
907  if (v->cflags & REG_EXPANDED)
908  skip(v);
909  if (ATEOS())
910  RET('$');
911  if (NEXT2('\\', ')'))
912  {
913  NOTE(REG_UUNSPEC);
914  RET('$');
915  }
916  RETV(PLAIN, c);
917  break;
918  case CHR('\\'):
919  break; /* see below */
920  default:
921  RETV(PLAIN, c);
922  break;
923  }
924 
925  assert(c == CHR('\\'));
926 
927  if (ATEOS())
929 
930  c = *v->now++;
931  switch (c)
932  {
933  case CHR('{'):
934  INTOCON(L_BBND);
935  NOTE(REG_UBOUNDS);
936  RET('{');
937  break;
938  case CHR('('):
939  RETV('(', 1);
940  break;
941  case CHR(')'):
942  RETV(')', c);
943  break;
944  case CHR('<'):
946  RET('<');
947  break;
948  case CHR('>'):
950  RET('>');
951  break;
952  case CHR('1'):
953  case CHR('2'):
954  case CHR('3'):
955  case CHR('4'):
956  case CHR('5'):
957  case CHR('6'):
958  case CHR('7'):
959  case CHR('8'):
960  case CHR('9'):
962  RETV(BACKREF, (chr) DIGITVAL(c));
963  break;
964  default:
965  if (iscalnum(c))
966  {
968  NOTE(REG_UUNSPEC);
969  }
970  RETV(PLAIN, c);
971  break;
972  }
973 
975  return 0;
976 }
Datum now(PG_FUNCTION_ARGS)
Definition: timestamp.c:1608
if(TABLE==NULL||TABLE_index==NULL)
Definition: isn.c:76
@ NOTE
Definition: pg_regress.c:88
#define NEXT2(a, b)
Definition: regc_lex.c:39
#define RET(c)
Definition: regc_lex.c:45
#define INTOCON(c)
Definition: regc_lex.c:60
#define L_BBND
Definition: regc_lex.c:55
#define ATEOS()
Definition: regc_lex.c:36
static void skip(struct vars *v)
Definition: regc_lex.c:982
#define LASTTYPE(t)
Definition: regc_lex.c:48
#define RETV(c, n)
Definition: regc_lex.c:46
#define FAILW(e)
Definition: regc_lex.c:47
#define L_BRACK
Definition: regc_lex.c:56
#define NEXT1(c)
Definition: regc_lex.c:38
#define EMPTY
Definition: regcomp.c:329
#define BACKREF
Definition: regcomp.c:333
#define PLAIN
Definition: regcomp.c:331
#define DIGITVAL(c)
Definition: regcustom.h:63
#define iscalnum(x)
Definition: regcustom.h:90
pg_wchar chr
Definition: regcustom.h:59
#define assert(x)
Definition: regcustom.h:56
#define REG_UBOUNDS
Definition: regex.h:140
#define REG_EESCAPE
Definition: regex.h:220
#define REG_EXPANDED
Definition: regex.h:186
#define REG_UUNSPEC
Definition: regex.h:146
#define REG_UNONPOSIX
Definition: regex.h:145
#define REG_UBSALNUM
Definition: regex.h:142
#define REG_UBACKREF
Definition: regex.h:138
#define NOTREACHED
Definition: regguts.h:96
const chr * now
Definition: regcomp.c:284

References assert, ATEOS, BACKREF, vars::cflags, CHR, DIGITVAL, EMPTY, FAILW, HAVE, INTOCON, iscalnum, L_BBND, L_BRACK, LASTTYPE, NEXT1, NEXT2, NOTE, NOTREACHED, vars::now, PLAIN, REG_EESCAPE, REG_EXPANDED, REG_UBACKREF, REG_UBOUNDS, REG_UBSALNUM, REG_UNONPOSIX, REG_UUNSPEC, RET, RETV, and skip().

Referenced by next().

◆ chrnamed()

static chr chrnamed ( struct vars v,
const chr startp,
const chr endp,
chr  lastresort 
)
static

Definition at line 1022 of file regc_lex.c.

1026 {
1027  chr c;
1028  int errsave;
1029  int e;
1030  struct cvec *cv;
1031 
1032  errsave = v->err;
1033  v->err = 0;
1034  c = element(v, startp, endp);
1035  e = v->err;
1036  v->err = errsave;
1037 
1038  if (e != 0)
1039  return lastresort;
1040 
1041  cv = range(v, c, c, 0);
1042  if (cv->nchrs == 0)
1043  return lastresort;
1044  return cv->chrs[0];
1045 }
#define errsave(context,...)
Definition: elog.h:261
e
Definition: preproc-init.c:82
static struct cvec * range(struct vars *v, chr a, chr b, int cases)
Definition: regc_locale.c:412
static chr element(struct vars *v, const chr *startp, const chr *endp)
Definition: regc_locale.c:376
Definition: regguts.h:279
int nchrs
Definition: regguts.h:280
chr * chrs
Definition: regguts.h:282
int err
Definition: regcomp.c:286

References cvec::chrs, element(), vars::err, errsave, cvec::nchrs, and range().

Referenced by lexescape().

◆ lexdigits()

static chr lexdigits ( struct vars v,
int  base,
int  minlen,
int  maxlen 
)
static

Definition at line 780 of file regc_lex.c.

784 {
785  uchr n; /* unsigned to avoid overflow misbehavior */
786  int len;
787  chr c;
788  int d;
789  const uchr ub = (uchr) base;
790 
791  n = 0;
792  for (len = 0; len < maxlen && !ATEOS(); len++)
793  {
794  c = *v->now++;
795  switch (c)
796  {
797  case CHR('0'):
798  case CHR('1'):
799  case CHR('2'):
800  case CHR('3'):
801  case CHR('4'):
802  case CHR('5'):
803  case CHR('6'):
804  case CHR('7'):
805  case CHR('8'):
806  case CHR('9'):
807  d = DIGITVAL(c);
808  break;
809  case CHR('a'):
810  case CHR('A'):
811  d = 10;
812  break;
813  case CHR('b'):
814  case CHR('B'):
815  d = 11;
816  break;
817  case CHR('c'):
818  case CHR('C'):
819  d = 12;
820  break;
821  case CHR('d'):
822  case CHR('D'):
823  d = 13;
824  break;
825  case CHR('e'):
826  case CHR('E'):
827  d = 14;
828  break;
829  case CHR('f'):
830  case CHR('F'):
831  d = 15;
832  break;
833  default:
834  v->now--; /* oops, not a digit at all */
835  d = -1;
836  break;
837  }
838 
839  if (d >= base)
840  { /* not a plausible digit */
841  v->now--;
842  d = -1;
843  }
844  if (d < 0)
845  break; /* NOTE BREAK OUT */
846  n = n * ub + (uchr) d;
847  }
848  if (len < minlen)
849  ERR(REG_EESCAPE);
850 
851  return (chr) n;
852 }
#define ERR
Definition: _int.h:161
const void size_t len
unsigned uchr
Definition: regcustom.h:60

References ATEOS, CHR, DIGITVAL, ERR, len, vars::now, and REG_EESCAPE.

Referenced by lexescape().

◆ lexescape()

static int lexescape ( struct vars v)
static

Definition at line 601 of file regc_lex.c.

602 {
603  chr c;
604  static const chr alert[] = {
605  CHR('a'), CHR('l'), CHR('e'), CHR('r'), CHR('t')
606  };
607  static const chr esc[] = {
608  CHR('E'), CHR('S'), CHR('C')
609  };
610  const chr *save;
611 
612  assert(v->cflags & REG_ADVF);
613 
614  assert(!ATEOS());
615  c = *v->now++;
616 
617  /* if it's not alphanumeric ASCII, treat it as a plain character */
618  if (!('a' <= c && c <= 'z') &&
619  !('A' <= c && c <= 'Z') &&
620  !('0' <= c && c <= '9'))
621  RETV(PLAIN, c);
622 
624  switch (c)
625  {
626  case CHR('a'):
627  RETV(PLAIN, chrnamed(v, alert, ENDOF(alert), CHR('\007')));
628  break;
629  case CHR('A'):
630  RETV(SBEGIN, 0);
631  break;
632  case CHR('b'):
633  RETV(PLAIN, CHR('\b'));
634  break;
635  case CHR('B'):
636  RETV(PLAIN, CHR('\\'));
637  break;
638  case CHR('c'):
639  NOTE(REG_UUNPORT);
640  if (ATEOS())
642  RETV(PLAIN, (chr) (*v->now++ & 037));
643  break;
644  case CHR('d'):
645  NOTE(REG_ULOCALE);
647  break;
648  case CHR('D'):
649  NOTE(REG_ULOCALE);
651  break;
652  case CHR('e'):
653  NOTE(REG_UUNPORT);
654  RETV(PLAIN, chrnamed(v, esc, ENDOF(esc), CHR('\033')));
655  break;
656  case CHR('f'):
657  RETV(PLAIN, CHR('\f'));
658  break;
659  case CHR('m'):
660  RET('<');
661  break;
662  case CHR('M'):
663  RET('>');
664  break;
665  case CHR('n'):
666  RETV(PLAIN, CHR('\n'));
667  break;
668  case CHR('r'):
669  RETV(PLAIN, CHR('\r'));
670  break;
671  case CHR('s'):
672  NOTE(REG_ULOCALE);
674  break;
675  case CHR('S'):
676  NOTE(REG_ULOCALE);
678  break;
679  case CHR('t'):
680  RETV(PLAIN, CHR('\t'));
681  break;
682  case CHR('u'):
683  c = lexdigits(v, 16, 4, 4);
684  if (ISERR() || !CHR_IS_IN_RANGE(c))
686  RETV(PLAIN, c);
687  break;
688  case CHR('U'):
689  c = lexdigits(v, 16, 8, 8);
690  if (ISERR() || !CHR_IS_IN_RANGE(c))
692  RETV(PLAIN, c);
693  break;
694  case CHR('v'):
695  RETV(PLAIN, CHR('\v'));
696  break;
697  case CHR('w'):
698  NOTE(REG_ULOCALE);
699  RETV(CCLASSS, CC_WORD);
700  break;
701  case CHR('W'):
702  NOTE(REG_ULOCALE);
703  RETV(CCLASSC, CC_WORD);
704  break;
705  case CHR('x'):
706  NOTE(REG_UUNPORT);
707  c = lexdigits(v, 16, 1, 255); /* REs >255 long outside spec */
708  if (ISERR() || !CHR_IS_IN_RANGE(c))
710  RETV(PLAIN, c);
711  break;
712  case CHR('y'):
713  NOTE(REG_ULOCALE);
714  RETV(WBDRY, 0);
715  break;
716  case CHR('Y'):
717  NOTE(REG_ULOCALE);
718  RETV(NWBDRY, 0);
719  break;
720  case CHR('Z'):
721  RETV(SEND, 0);
722  break;
723  case CHR('1'):
724  case CHR('2'):
725  case CHR('3'):
726  case CHR('4'):
727  case CHR('5'):
728  case CHR('6'):
729  case CHR('7'):
730  case CHR('8'):
731  case CHR('9'):
732  save = v->now;
733  v->now--; /* put first digit back */
734  c = lexdigits(v, 10, 1, 255); /* REs >255 long outside spec */
735  if (ISERR())
737  /* ugly heuristic (first test is "exactly 1 digit?") */
738  if (v->now == save || ((int) c > 0 && (int) c <= v->nsubexp))
739  {
741  RETV(BACKREF, c);
742  }
743  /* oops, doesn't look like it's a backref after all... */
744  v->now = save;
745  /* and fall through into octal number */
746  /* FALLTHROUGH */
747  case CHR('0'):
748  NOTE(REG_UUNPORT);
749  v->now--; /* put first digit back */
750  c = lexdigits(v, 8, 1, 3);
751  if (ISERR())
753  if (c > 0xff)
754  {
755  /* out of range, so we handled one digit too much */
756  v->now--;
757  c >>= 3;
758  }
759  RETV(PLAIN, c);
760  break;
761  default:
762 
763  /*
764  * Throw an error for unrecognized ASCII alpha escape sequences,
765  * which reserves them for future use if needed.
766  */
768  break;
769  }
771 }
static chr lexdigits(struct vars *v, int base, int minlen, int maxlen)
Definition: regc_lex.c:780
#define ENDOF(array)
Definition: regc_lex.c:64
static chr chrnamed(struct vars *v, const chr *startp, const chr *endp, chr lastresort)
Definition: regc_lex.c:1022
#define NWBDRY
Definition: regcomp.c:345
#define SBEGIN
Definition: regcomp.c:347
#define ISERR()
Definition: regcomp.c:317
#define CCLASSS
Definition: regcomp.c:338
#define WBDRY
Definition: regcomp.c:344
#define CCLASSC
Definition: regcomp.c:339
#define SEND
Definition: regcomp.c:348
#define CHR_IS_IN_RANGE(c)
Definition: regcustom.h:77
#define REG_ADVF
Definition: regex.h:180
#define REG_ULOCALE
Definition: regex.h:148
#define REG_UUNPORT
Definition: regex.h:147
@ CC_WORD
Definition: regguts.h:141
@ CC_SPACE
Definition: regguts.h:141
@ CC_DIGIT
Definition: regguts.h:140
int cflags
Definition: regcomp.c:287

References assert, ATEOS, BACKREF, CC_DIGIT, CC_SPACE, CC_WORD, CCLASSC, CCLASSS, vars::cflags, CHR, CHR_IS_IN_RANGE, chrnamed(), ENDOF, FAILW, ISERR, lexdigits(), NOTE, NOTREACHED, vars::now, NWBDRY, PLAIN, REG_ADVF, REG_EESCAPE, REG_UBACKREF, REG_ULOCALE, REG_UNONPOSIX, REG_UUNPORT, RET, RETV, SBEGIN, SEND, and WBDRY.

Referenced by next().

◆ lexstart()

static void lexstart ( struct vars v)
static

Definition at line 70 of file regc_lex.c.

71 {
72  prefixes(v); /* may turn on new type bits etc. */
73  NOERR();
74 
75  if (v->cflags & REG_QUOTE)
76  {
78  INTOCON(L_Q);
79  }
80  else if (v->cflags & REG_EXTENDED)
81  {
82  assert(!(v->cflags & REG_QUOTE));
83  INTOCON(L_ERE);
84  }
85  else
86  {
87  assert(!(v->cflags & (REG_QUOTE | REG_ADVF)));
88  INTOCON(L_BRE);
89  }
90 
91  v->nexttype = EMPTY; /* remember we were at the start */
92  next(v); /* set up the first token */
93 }
#define L_ERE
Definition: regc_lex.c:51
#define L_Q
Definition: regc_lex.c:53
static void prefixes(struct vars *v)
Definition: regc_lex.c:99
#define L_BRE
Definition: regc_lex.c:52
static int next(struct vars *v)
Definition: regc_lex.c:200
#define NOERR()
Definition: regcomp.c:321
#define REG_ADVANCED
Definition: regex.h:181
#define REG_EXTENDED
Definition: regex.h:179
#define REG_NEWLINE
Definition: regex.h:189
#define REG_QUOTE
Definition: regex.h:182
int nexttype
Definition: regcomp.c:289

References assert, vars::cflags, EMPTY, INTOCON, L_BRE, L_ERE, L_Q, next(), vars::nexttype, NOERR, prefixes(), REG_ADVANCED, REG_ADVF, REG_EXPANDED, REG_EXTENDED, REG_NEWLINE, and REG_QUOTE.

◆ newline()

static chr newline ( void  )
static

Definition at line 1010 of file regc_lex.c.

1011 {
1012  return CHR('\n');
1013 }

References CHR.

◆ next()

static int next ( struct vars v)
static

Definition at line 200 of file regc_lex.c.

201 {
202  chr c;
203 
204 next_restart: /* loop here after eating a comment */
205 
206  /* errors yield an infinite sequence of failures */
207  if (ISERR())
208  return 0; /* the error has set nexttype to EOS */
209 
210  /* remember flavor of last token */
211  v->lasttype = v->nexttype;
212 
213  /* REG_BOSONLY */
214  if (v->nexttype == EMPTY && (v->cflags & REG_BOSONLY))
215  {
216  /* at start of a REG_BOSONLY RE */
217  RETV(SBEGIN, 0); /* same as \A */
218  }
219 
220  /* skip white space etc. if appropriate (not in literal or []) */
221  if (v->cflags & REG_EXPANDED)
222  switch (v->lexcon)
223  {
224  case L_ERE:
225  case L_BRE:
226  case L_EBND:
227  case L_BBND:
228  skip(v);
229  break;
230  }
231 
232  /* handle EOS, depending on context */
233  if (ATEOS())
234  {
235  switch (v->lexcon)
236  {
237  case L_ERE:
238  case L_BRE:
239  case L_Q:
240  RET(EOS);
241  break;
242  case L_EBND:
243  case L_BBND:
244  FAILW(REG_EBRACE);
245  break;
246  case L_BRACK:
247  case L_CEL:
248  case L_ECL:
249  case L_CCL:
250  FAILW(REG_EBRACK);
251  break;
252  }
254  }
255 
256  /* okay, time to actually get a character */
257  c = *v->now++;
258 
259  /* deal with the easy contexts, punt EREs to code below */
260  switch (v->lexcon)
261  {
262  case L_BRE: /* punt BREs to separate function */
263  return brenext(v, c);
264  break;
265  case L_ERE: /* see below */
266  break;
267  case L_Q: /* literal strings are easy */
268  RETV(PLAIN, c);
269  break;
270  case L_BBND: /* bounds are fairly simple */
271  case L_EBND:
272  switch (c)
273  {
274  case CHR('0'):
275  case CHR('1'):
276  case CHR('2'):
277  case CHR('3'):
278  case CHR('4'):
279  case CHR('5'):
280  case CHR('6'):
281  case CHR('7'):
282  case CHR('8'):
283  case CHR('9'):
284  RETV(DIGIT, (chr) DIGITVAL(c));
285  break;
286  case CHR(','):
287  RET(',');
288  break;
289  case CHR('}'): /* ERE bound ends with } */
290  if (INCON(L_EBND))
291  {
292  INTOCON(L_ERE);
293  if ((v->cflags & REG_ADVF) && NEXT1('?'))
294  {
295  v->now++;
297  RETV('}', 0);
298  }
299  RETV('}', 1);
300  }
301  else
302  FAILW(REG_BADBR);
303  break;
304  case CHR('\\'): /* BRE bound ends with \} */
305  if (INCON(L_BBND) && NEXT1('}'))
306  {
307  v->now++;
308  INTOCON(L_BRE);
309  RETV('}', 1);
310  }
311  else
312  FAILW(REG_BADBR);
313  break;
314  default:
315  FAILW(REG_BADBR);
316  break;
317  }
319  break;
320  case L_BRACK: /* brackets are not too hard */
321  switch (c)
322  {
323  case CHR(']'):
324  if (LASTTYPE('['))
325  RETV(PLAIN, c);
326  else
327  {
328  INTOCON((v->cflags & REG_EXTENDED) ?
329  L_ERE : L_BRE);
330  RET(']');
331  }
332  break;
333  case CHR('\\'):
334  NOTE(REG_UBBS);
335  if (!(v->cflags & REG_ADVF))
336  RETV(PLAIN, c);
338  if (ATEOS())
340  if (!lexescape(v))
341  return 0;
342  switch (v->nexttype)
343  { /* not all escapes okay here */
344  case PLAIN:
345  case CCLASSS:
346  case CCLASSC:
347  return 1;
348  break;
349  }
350  /* not one of the acceptable escapes */
352  break;
353  case CHR('-'):
354  if (LASTTYPE('[') || NEXT1(']'))
355  RETV(PLAIN, c);
356  else
357  RETV(RANGE, c);
358  break;
359  case CHR('['):
360  if (ATEOS())
361  FAILW(REG_EBRACK);
362  switch (*v->now++)
363  {
364  case CHR('.'):
365  INTOCON(L_CEL);
366  /* might or might not be locale-specific */
367  RET(COLLEL);
368  break;
369  case CHR('='):
370  INTOCON(L_ECL);
371  NOTE(REG_ULOCALE);
372  RET(ECLASS);
373  break;
374  case CHR(':'):
375  INTOCON(L_CCL);
376  NOTE(REG_ULOCALE);
377  RET(CCLASS);
378  break;
379  default: /* oops */
380  v->now--;
381  RETV(PLAIN, c);
382  break;
383  }
385  break;
386  default:
387  RETV(PLAIN, c);
388  break;
389  }
391  break;
392  case L_CEL: /* collating elements are easy */
393  if (c == CHR('.') && NEXT1(']'))
394  {
395  v->now++;
396  INTOCON(L_BRACK);
397  RETV(END, '.');
398  }
399  else
400  RETV(PLAIN, c);
401  break;
402  case L_ECL: /* ditto equivalence classes */
403  if (c == CHR('=') && NEXT1(']'))
404  {
405  v->now++;
406  INTOCON(L_BRACK);
407  RETV(END, '=');
408  }
409  else
410  RETV(PLAIN, c);
411  break;
412  case L_CCL: /* ditto character classes */
413  if (c == CHR(':') && NEXT1(']'))
414  {
415  v->now++;
416  INTOCON(L_BRACK);
417  RETV(END, ':');
418  }
419  else
420  RETV(PLAIN, c);
421  break;
422  default:
424  break;
425  }
426 
427  /* that got rid of everything except EREs and AREs */
428  assert(INCON(L_ERE));
429 
430  /* deal with EREs and AREs, except for backslashes */
431  switch (c)
432  {
433  case CHR('|'):
434  RET('|');
435  break;
436  case CHR('*'):
437  if ((v->cflags & REG_ADVF) && NEXT1('?'))
438  {
439  v->now++;
441  RETV('*', 0);
442  }
443  RETV('*', 1);
444  break;
445  case CHR('+'):
446  if ((v->cflags & REG_ADVF) && NEXT1('?'))
447  {
448  v->now++;
450  RETV('+', 0);
451  }
452  RETV('+', 1);
453  break;
454  case CHR('?'):
455  if ((v->cflags & REG_ADVF) && NEXT1('?'))
456  {
457  v->now++;
459  RETV('?', 0);
460  }
461  RETV('?', 1);
462  break;
463  case CHR('{'): /* bounds start or plain character */
464  if (v->cflags & REG_EXPANDED)
465  skip(v);
466  if (ATEOS() || !iscdigit(*v->now))
467  {
468  NOTE(REG_UBRACES);
469  NOTE(REG_UUNSPEC);
470  RETV(PLAIN, c);
471  }
472  else
473  {
474  NOTE(REG_UBOUNDS);
475  INTOCON(L_EBND);
476  RET('{');
477  }
479  break;
480  case CHR('('): /* parenthesis, or advanced extension */
481  if ((v->cflags & REG_ADVF) && NEXT1('?'))
482  {
484  v->now++;
485  if (ATEOS())
486  FAILW(REG_BADRPT);
487  switch (*v->now++)
488  {
489  case CHR(':'): /* non-capturing paren */
490  RETV('(', 0);
491  break;
492  case CHR('#'): /* comment */
493  while (!ATEOS() && *v->now != CHR(')'))
494  v->now++;
495  if (!ATEOS())
496  v->now++;
497  assert(v->nexttype == v->lasttype);
498  goto next_restart;
499  case CHR('='): /* positive lookahead */
502  break;
503  case CHR('!'): /* negative lookahead */
506  break;
507  case CHR('<'):
508  if (ATEOS())
509  FAILW(REG_BADRPT);
510  switch (*v->now++)
511  {
512  case CHR('='): /* positive lookbehind */
515  break;
516  case CHR('!'): /* negative lookbehind */
519  break;
520  default:
521  FAILW(REG_BADRPT);
522  break;
523  }
525  break;
526  default:
527  FAILW(REG_BADRPT);
528  break;
529  }
531  }
532  RETV('(', 1);
533  break;
534  case CHR(')'):
535  if (LASTTYPE('('))
536  NOTE(REG_UUNSPEC);
537  RETV(')', c);
538  break;
539  case CHR('['): /* easy except for [[:<:]] and [[:>:]] */
540  if (HAVE(6) && *(v->now + 0) == CHR('[') &&
541  *(v->now + 1) == CHR(':') &&
542  (*(v->now + 2) == CHR('<') ||
543  *(v->now + 2) == CHR('>')) &&
544  *(v->now + 3) == CHR(':') &&
545  *(v->now + 4) == CHR(']') &&
546  *(v->now + 5) == CHR(']'))
547  {
548  c = *(v->now + 2);
549  v->now += 6;
551  RET((c == CHR('<')) ? '<' : '>');
552  }
553  INTOCON(L_BRACK);
554  if (NEXT1('^'))
555  {
556  v->now++;
557  RETV('[', 0);
558  }
559  RETV('[', 1);
560  break;
561  case CHR('.'):
562  RET('.');
563  break;
564  case CHR('^'):
565  RET('^');
566  break;
567  case CHR('$'):
568  RET('$');
569  break;
570  case CHR('\\'): /* mostly punt backslashes to code below */
571  if (ATEOS())
573  break;
574  default: /* ordinary character */
575  RETV(PLAIN, c);
576  break;
577  }
578 
579  /* ERE/ARE backslash handling; backslash already eaten */
580  assert(!ATEOS());
581  if (!(v->cflags & REG_ADVF))
582  { /* only AREs have non-trivial escapes */
583  if (iscalnum(*v->now))
584  {
586  NOTE(REG_UUNSPEC);
587  }
588  RETV(PLAIN, *v->now++);
589  }
590  return lexescape(v);
591 }
#define END
Definition: _int.h:160
while(p+4<=pend)
#define INCON(con)
Definition: regc_lex.c:61
static int lexescape(struct vars *v)
Definition: regc_lex.c:601
#define L_CEL
Definition: regc_lex.c:57
#define L_EBND
Definition: regc_lex.c:54
#define L_ECL
Definition: regc_lex.c:58
static int brenext(struct vars *v, chr c)
Definition: regc_lex.c:861
#define L_CCL
Definition: regc_lex.c:59
#define COLLEL
Definition: regcomp.c:334
#define CCLASS
Definition: regcomp.c:336
#define DIGIT
Definition: regcomp.c:332
#define ECLASS
Definition: regcomp.c:335
#define LACON
Definition: regcomp.c:341
#define EOS
Definition: regcomp.c:330
#define RANGE
Definition: regcomp.c:340
#define iscdigit(x)
Definition: regcustom.h:92
#define REG_EBRACK
Definition: regex.h:222
#define REG_BADRPT
Definition: regex.h:228
#define REG_ULOOKAROUND
Definition: regex.h:139
#define REG_UBBS
Definition: regex.h:144
#define REG_BADBR
Definition: regex.h:225
#define REG_EBRACE
Definition: regex.h:224
#define REG_BOSONLY
Definition: regex.h:192
#define REG_UBRACES
Definition: regex.h:141
#define LATYPE_AHEAD_NEG
Definition: regguts.h:105
#define LATYPE_BEHIND_POS
Definition: regguts.h:106
#define LATYPE_BEHIND_NEG
Definition: regguts.h:107
#define LATYPE_AHEAD_POS
Definition: regguts.h:104
int lexcon
Definition: regcomp.c:291
int lasttype
Definition: regcomp.c:288

References assert, ATEOS, brenext(), CCLASS, CCLASSC, CCLASSS, vars::cflags, CHR, COLLEL, DIGIT, DIGITVAL, ECLASS, EMPTY, END, EOS, FAILW, HAVE, INCON, INTOCON, iscalnum, iscdigit, ISERR, L_BBND, L_BRACK, L_BRE, L_CCL, L_CEL, L_EBND, L_ECL, L_ERE, L_Q, LACON, LASTTYPE, vars::lasttype, LATYPE_AHEAD_NEG, LATYPE_AHEAD_POS, LATYPE_BEHIND_NEG, LATYPE_BEHIND_POS, vars::lexcon, lexescape(), NEXT1, vars::nexttype, NOTE, NOTREACHED, vars::now, PLAIN, RANGE, REG_ADVF, REG_BADBR, REG_BADRPT, REG_BOSONLY, REG_EBRACE, REG_EBRACK, REG_EESCAPE, REG_EXPANDED, REG_EXTENDED, REG_UBBS, REG_UBOUNDS, REG_UBRACES, REG_UBSALNUM, REG_ULOCALE, REG_ULOOKAROUND, REG_UNONPOSIX, REG_UUNSPEC, RET, RETV, SBEGIN, and skip().

Referenced by lexstart().

◆ prefixes()

static void prefixes ( struct vars v)
static

Definition at line 99 of file regc_lex.c.

100 {
101  /* literal string doesn't get any of this stuff */
102  if (v->cflags & REG_QUOTE)
103  return;
104 
105  /* initial "***" gets special things */
106  if (HAVE(4) && NEXT3('*', '*', '*'))
107  switch (*(v->now + 3))
108  {
109  case CHR('?'): /* "***?" error, msg shows version */
110  ERR(REG_BADPAT);
111  return; /* proceed no further */
112  break;
113  case CHR('='): /* "***=" shifts to literal string */
115  v->cflags |= REG_QUOTE;
117  v->now += 4;
118  return; /* and there can be no more prefixes */
119  break;
120  case CHR(':'): /* "***:" shifts to AREs */
122  v->cflags |= REG_ADVANCED;
123  v->now += 4;
124  break;
125  default: /* otherwise *** is just an error */
126  ERR(REG_BADRPT);
127  return;
128  break;
129  }
130 
131  /* BREs and EREs don't get embedded options */
132  if ((v->cflags & REG_ADVANCED) != REG_ADVANCED)
133  return;
134 
135  /* embedded options (AREs only) */
136  if (HAVE(3) && NEXT2('(', '?') && iscalpha(*(v->now + 2)))
137  {
139  v->now += 2;
140  for (; !ATEOS() && iscalpha(*v->now); v->now++)
141  switch (*v->now)
142  {
143  case CHR('b'): /* BREs (but why???) */
144  v->cflags &= ~(REG_ADVANCED | REG_QUOTE);
145  break;
146  case CHR('c'): /* case sensitive */
147  v->cflags &= ~REG_ICASE;
148  break;
149  case CHR('e'): /* plain EREs */
150  v->cflags |= REG_EXTENDED;
151  v->cflags &= ~(REG_ADVF | REG_QUOTE);
152  break;
153  case CHR('i'): /* case insensitive */
154  v->cflags |= REG_ICASE;
155  break;
156  case CHR('m'): /* Perloid synonym for n */
157  case CHR('n'): /* \n affects ^ $ . [^ */
158  v->cflags |= REG_NEWLINE;
159  break;
160  case CHR('p'): /* ~Perl, \n affects . [^ */
161  v->cflags |= REG_NLSTOP;
162  v->cflags &= ~REG_NLANCH;
163  break;
164  case CHR('q'): /* literal string */
165  v->cflags |= REG_QUOTE;
166  v->cflags &= ~REG_ADVANCED;
167  break;
168  case CHR('s'): /* single line, \n ordinary */
169  v->cflags &= ~REG_NEWLINE;
170  break;
171  case CHR('t'): /* tight syntax */
172  v->cflags &= ~REG_EXPANDED;
173  break;
174  case CHR('w'): /* weird, \n affects ^ $ only */
175  v->cflags &= ~REG_NLSTOP;
176  v->cflags |= REG_NLANCH;
177  break;
178  case CHR('x'): /* expanded syntax */
179  v->cflags |= REG_EXPANDED;
180  break;
181  default:
182  ERR(REG_BADOPT);
183  return;
184  }
185  if (!NEXT1(')'))
186  {
187  ERR(REG_BADOPT);
188  return;
189  }
190  v->now++;
191  if (v->cflags & REG_QUOTE)
192  v->cflags &= ~(REG_EXPANDED | REG_NEWLINE);
193  }
194 }
#define NEXT3(a, b, c)
Definition: regc_lex.c:40
#define iscalpha(x)
Definition: regcustom.h:91
#define REG_BADOPT
Definition: regex.h:232
#define REG_ICASE
Definition: regex.h:184
#define REG_NLANCH
Definition: regex.h:188
#define REG_NLSTOP
Definition: regex.h:187
#define REG_BADPAT
Definition: regex.h:217

References ATEOS, vars::cflags, CHR, ERR, HAVE, iscalpha, NEXT1, NEXT2, NEXT3, NOTE, vars::now, REG_ADVANCED, REG_ADVF, REG_BADOPT, REG_BADPAT, REG_BADRPT, REG_EXPANDED, REG_EXTENDED, REG_ICASE, REG_NEWLINE, REG_NLANCH, REG_NLSTOP, REG_QUOTE, and REG_UNONPOSIX.

Referenced by lexstart(), and NIImportAffixes().

◆ skip()

static void skip ( struct vars v)
static

Definition at line 982 of file regc_lex.c.

983 {
984  const chr *start = v->now;
985 
986  assert(v->cflags & REG_EXPANDED);
987 
988  for (;;)
989  {
990  while (!ATEOS() && iscspace(*v->now))
991  v->now++;
992  if (ATEOS() || *v->now != CHR('#'))
993  break; /* NOTE BREAK OUT */
994  assert(NEXT1('#'));
995  while (!ATEOS() && *v->now != CHR('\n'))
996  v->now++;
997  /* leave the newline to be picked up by the iscspace loop */
998  }
999 
1000  if (v->now != start)
1002 }
return str start
#define iscspace(x)
Definition: regcustom.h:93

References assert, ATEOS, vars::cflags, CHR, iscspace, NEXT1, NOTE, vars::now, REG_EXPANDED, REG_UNONPOSIX, and start.

Referenced by brenext(), and next().