PostgreSQL Source Code git master
regc_lex.c File Reference
This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Macros

#define ATEOS()   (v->now >= v->stop)
 
#define HAVE(n)   (v->stop - v->now >= (n))
 
#define NEXT1(c)   (!ATEOS() && *v->now == CHR(c))
 
#define NEXT2(a, b)   (HAVE(2) && *v->now == CHR(a) && *(v->now+1) == CHR(b))
 
#define NEXT3(a, b, c)
 
#define SET(c)   (v->nexttype = (c))
 
#define SETV(c, n)   (v->nexttype = (c), v->nextvalue = (n))
 
#define RET(c)   return (SET(c), 1)
 
#define RETV(c, n)   return (SETV(c, n), 1)
 
#define FAILW(e)   return (ERR(e), 0) /* ERR does SET(EOS) */
 
#define LASTTYPE(t)   (v->lasttype == (t))
 
#define L_ERE   1 /* mainline ERE/ARE */
 
#define L_BRE   2 /* mainline BRE */
 
#define L_Q   3 /* REG_QUOTE */
 
#define L_EBND   4 /* ERE/ARE bound */
 
#define L_BBND   5 /* BRE bound */
 
#define L_BRACK   6 /* brackets */
 
#define L_CEL   7 /* collating element */
 
#define L_ECL   8 /* equivalence class */
 
#define L_CCL   9 /* character class */
 
#define INTOCON(c)   (v->lexcon = (c))
 
#define INCON(con)   (v->lexcon == (con))
 
#define ENDOF(array)   ((array) + sizeof(array)/sizeof(chr))
 

Functions

static void lexstart (struct vars *v)
 
static void prefixes (struct vars *v)
 
static int next (struct vars *v)
 
static int lexescape (struct vars *v)
 
static chr lexdigits (struct vars *v, int base, int minlen, int maxlen)
 
static int brenext (struct vars *v, chr c)
 
static void skip (struct vars *v)
 
static chr newline (void)
 
static chr chrnamed (struct vars *v, const chr *startp, const chr *endp, chr lastresort)
 

Macro Definition Documentation

◆ ATEOS

#define ATEOS ( )    (v->now >= v->stop)

Definition at line 36 of file regc_lex.c.

◆ ENDOF

#define ENDOF (   array)    ((array) + sizeof(array)/sizeof(chr))

Definition at line 64 of file regc_lex.c.

◆ FAILW

#define FAILW (   e)    return (ERR(e), 0) /* ERR does SET(EOS) */

Definition at line 47 of file regc_lex.c.

◆ HAVE

#define HAVE (   n)    (v->stop - v->now >= (n))

Definition at line 37 of file regc_lex.c.

◆ INCON

#define INCON (   con)    (v->lexcon == (con))

Definition at line 61 of file regc_lex.c.

◆ INTOCON

#define INTOCON (   c)    (v->lexcon = (c))

Definition at line 60 of file regc_lex.c.

◆ L_BBND

#define L_BBND   5 /* BRE bound */

Definition at line 55 of file regc_lex.c.

◆ L_BRACK

#define L_BRACK   6 /* brackets */

Definition at line 56 of file regc_lex.c.

◆ L_BRE

#define L_BRE   2 /* mainline BRE */

Definition at line 52 of file regc_lex.c.

◆ L_CCL

#define L_CCL   9 /* character class */

Definition at line 59 of file regc_lex.c.

◆ L_CEL

#define L_CEL   7 /* collating element */

Definition at line 57 of file regc_lex.c.

◆ L_EBND

#define L_EBND   4 /* ERE/ARE bound */

Definition at line 54 of file regc_lex.c.

◆ L_ECL

#define L_ECL   8 /* equivalence class */

Definition at line 58 of file regc_lex.c.

◆ L_ERE

#define L_ERE   1 /* mainline ERE/ARE */

Definition at line 51 of file regc_lex.c.

◆ L_Q

#define L_Q   3 /* REG_QUOTE */

Definition at line 53 of file regc_lex.c.

◆ LASTTYPE

#define LASTTYPE (   t)    (v->lasttype == (t))

Definition at line 48 of file regc_lex.c.

◆ NEXT1

#define NEXT1 (   c)    (!ATEOS() && *v->now == CHR(c))

Definition at line 38 of file regc_lex.c.

◆ NEXT2

#define NEXT2 (   a,
  b 
)    (HAVE(2) && *v->now == CHR(a) && *(v->now+1) == CHR(b))

Definition at line 39 of file regc_lex.c.

◆ NEXT3

#define NEXT3 (   a,
  b,
  c 
)
Value:
(HAVE(3) && *v->now == CHR(a) && \
*(v->now+1) == CHR(b) && \
*(v->now+2) == CHR(c))
int b
Definition: isn.c:69
int a
Definition: isn.c:68
char * c
#define HAVE(n)
Definition: regc_lex.c:37
#define CHR(c)
Definition: regcustom.h:62

Definition at line 40 of file regc_lex.c.

◆ RET

#define RET (   c)    return (SET(c), 1)

Definition at line 45 of file regc_lex.c.

◆ RETV

#define RETV (   c,
 
)    return (SETV(c, n), 1)

Definition at line 46 of file regc_lex.c.

◆ SET

#define SET (   c)    (v->nexttype = (c))

Definition at line 43 of file regc_lex.c.

◆ SETV

#define SETV (   c,
 
)    (v->nexttype = (c), v->nextvalue = (n))

Definition at line 44 of file regc_lex.c.

Function Documentation

◆ brenext()

static int brenext ( struct vars v,
chr  c 
)
static

Definition at line 861 of file regc_lex.c.

863{
864 switch (c)
865 {
866 case CHR('*'):
867 if (LASTTYPE(EMPTY) || LASTTYPE('(') || LASTTYPE('^'))
868 RETV(PLAIN, c);
869 RETV('*', 1);
870 break;
871 case CHR('['):
872 if (HAVE(6) && *(v->now + 0) == CHR('[') &&
873 *(v->now + 1) == CHR(':') &&
874 (*(v->now + 2) == CHR('<') ||
875 *(v->now + 2) == CHR('>')) &&
876 *(v->now + 3) == CHR(':') &&
877 *(v->now + 4) == CHR(']') &&
878 *(v->now + 5) == CHR(']'))
879 {
880 c = *(v->now + 2);
881 v->now += 6;
883 RET((c == CHR('<')) ? '<' : '>');
884 }
886 if (NEXT1('^'))
887 {
888 v->now++;
889 RETV('[', 0);
890 }
891 RETV('[', 1);
892 break;
893 case CHR('.'):
894 RET('.');
895 break;
896 case CHR('^'):
897 if (LASTTYPE(EMPTY))
898 RET('^');
899 if (LASTTYPE('('))
900 {
902 RET('^');
903 }
904 RETV(PLAIN, c);
905 break;
906 case CHR('$'):
907 if (v->cflags & REG_EXPANDED)
908 skip(v);
909 if (ATEOS())
910 RET('$');
911 if (NEXT2('\\', ')'))
912 {
914 RET('$');
915 }
916 RETV(PLAIN, c);
917 break;
918 case CHR('\\'):
919 break; /* see below */
920 default:
921 RETV(PLAIN, c);
922 break;
923 }
924
925 assert(c == CHR('\\'));
926
927 if (ATEOS())
929
930 c = *v->now++;
931 switch (c)
932 {
933 case CHR('{'):
936 RET('{');
937 break;
938 case CHR('('):
939 RETV('(', 1);
940 break;
941 case CHR(')'):
942 RETV(')', c);
943 break;
944 case CHR('<'):
946 RET('<');
947 break;
948 case CHR('>'):
950 RET('>');
951 break;
952 case CHR('1'):
953 case CHR('2'):
954 case CHR('3'):
955 case CHR('4'):
956 case CHR('5'):
957 case CHR('6'):
958 case CHR('7'):
959 case CHR('8'):
960 case CHR('9'):
963 break;
964 default:
965 if (iscalnum(c))
966 {
969 }
970 RETV(PLAIN, c);
971 break;
972 }
973
975 return 0;
976}
Datum now(PG_FUNCTION_ARGS)
Definition: timestamp.c:1608
if(TABLE==NULL||TABLE_index==NULL)
Definition: isn.c:76
@ NOTE
Definition: pg_regress.c:88
#define NEXT2(a, b)
Definition: regc_lex.c:39
#define RET(c)
Definition: regc_lex.c:45
#define INTOCON(c)
Definition: regc_lex.c:60
#define L_BBND
Definition: regc_lex.c:55
#define ATEOS()
Definition: regc_lex.c:36
static void skip(struct vars *v)
Definition: regc_lex.c:982
#define LASTTYPE(t)
Definition: regc_lex.c:48
#define RETV(c, n)
Definition: regc_lex.c:46
#define FAILW(e)
Definition: regc_lex.c:47
#define L_BRACK
Definition: regc_lex.c:56
#define NEXT1(c)
Definition: regc_lex.c:38
#define EMPTY
Definition: regcomp.c:329
#define BACKREF
Definition: regcomp.c:333
#define PLAIN
Definition: regcomp.c:331
#define DIGITVAL(c)
Definition: regcustom.h:63
#define iscalnum(x)
Definition: regcustom.h:90
pg_wchar chr
Definition: regcustom.h:59
#define assert(x)
Definition: regcustom.h:56
#define REG_UBOUNDS
Definition: regex.h:140
#define REG_EESCAPE
Definition: regex.h:220
#define REG_EXPANDED
Definition: regex.h:186
#define REG_UUNSPEC
Definition: regex.h:146
#define REG_UNONPOSIX
Definition: regex.h:145
#define REG_UBSALNUM
Definition: regex.h:142
#define REG_UBACKREF
Definition: regex.h:138
#define NOTREACHED
Definition: regguts.h:96
const chr * now
Definition: regcomp.c:284

References assert, ATEOS, BACKREF, vars::cflags, CHR, DIGITVAL, EMPTY, FAILW, HAVE, INTOCON, iscalnum, L_BBND, L_BRACK, LASTTYPE, NEXT1, NEXT2, NOTE, NOTREACHED, vars::now, PLAIN, REG_EESCAPE, REG_EXPANDED, REG_UBACKREF, REG_UBOUNDS, REG_UBSALNUM, REG_UNONPOSIX, REG_UUNSPEC, RET, RETV, and skip().

Referenced by next().

◆ chrnamed()

static chr chrnamed ( struct vars v,
const chr startp,
const chr endp,
chr  lastresort 
)
static

Definition at line 1022 of file regc_lex.c.

1026{
1027 chr c;
1028 int errsave;
1029 int e;
1030 struct cvec *cv;
1031
1032 errsave = v->err;
1033 v->err = 0;
1034 c = element(v, startp, endp);
1035 e = v->err;
1036 v->err = errsave;
1037
1038 if (e != 0)
1039 return lastresort;
1040
1041 cv = range(v, c, c, 0);
1042 if (cv->nchrs == 0)
1043 return lastresort;
1044 return cv->chrs[0];
1045}
#define errsave(context,...)
Definition: elog.h:261
e
Definition: preproc-init.c:82
static struct cvec * range(struct vars *v, chr a, chr b, int cases)
Definition: regc_locale.c:412
static chr element(struct vars *v, const chr *startp, const chr *endp)
Definition: regc_locale.c:376
Definition: regguts.h:279
int nchrs
Definition: regguts.h:280
chr * chrs
Definition: regguts.h:282
int err
Definition: regcomp.c:286

References cvec::chrs, element(), vars::err, errsave, cvec::nchrs, and range().

Referenced by lexescape().

◆ lexdigits()

static chr lexdigits ( struct vars v,
int  base,
int  minlen,
int  maxlen 
)
static

Definition at line 780 of file regc_lex.c.

784{
785 uchr n; /* unsigned to avoid overflow misbehavior */
786 int len;
787 chr c;
788 int d;
789 const uchr ub = (uchr) base;
790
791 n = 0;
792 for (len = 0; len < maxlen && !ATEOS(); len++)
793 {
794 c = *v->now++;
795 switch (c)
796 {
797 case CHR('0'):
798 case CHR('1'):
799 case CHR('2'):
800 case CHR('3'):
801 case CHR('4'):
802 case CHR('5'):
803 case CHR('6'):
804 case CHR('7'):
805 case CHR('8'):
806 case CHR('9'):
807 d = DIGITVAL(c);
808 break;
809 case CHR('a'):
810 case CHR('A'):
811 d = 10;
812 break;
813 case CHR('b'):
814 case CHR('B'):
815 d = 11;
816 break;
817 case CHR('c'):
818 case CHR('C'):
819 d = 12;
820 break;
821 case CHR('d'):
822 case CHR('D'):
823 d = 13;
824 break;
825 case CHR('e'):
826 case CHR('E'):
827 d = 14;
828 break;
829 case CHR('f'):
830 case CHR('F'):
831 d = 15;
832 break;
833 default:
834 v->now--; /* oops, not a digit at all */
835 d = -1;
836 break;
837 }
838
839 if (d >= base)
840 { /* not a plausible digit */
841 v->now--;
842 d = -1;
843 }
844 if (d < 0)
845 break; /* NOTE BREAK OUT */
846 n = n * ub + (uchr) d;
847 }
848 if (len < minlen)
850
851 return (chr) n;
852}
#define ERR
Definition: _int.h:161
const void size_t len
unsigned uchr
Definition: regcustom.h:60

References ATEOS, CHR, DIGITVAL, ERR, len, vars::now, and REG_EESCAPE.

Referenced by lexescape().

◆ lexescape()

static int lexescape ( struct vars v)
static

Definition at line 601 of file regc_lex.c.

602{
603 chr c;
604 static const chr alert[] = {
605 CHR('a'), CHR('l'), CHR('e'), CHR('r'), CHR('t')
606 };
607 static const chr esc[] = {
608 CHR('E'), CHR('S'), CHR('C')
609 };
610 const chr *save;
611
612 assert(v->cflags & REG_ADVF);
613
614 assert(!ATEOS());
615 c = *v->now++;
616
617 /* if it's not alphanumeric ASCII, treat it as a plain character */
618 if (!('a' <= c && c <= 'z') &&
619 !('A' <= c && c <= 'Z') &&
620 !('0' <= c && c <= '9'))
621 RETV(PLAIN, c);
622
624 switch (c)
625 {
626 case CHR('a'):
627 RETV(PLAIN, chrnamed(v, alert, ENDOF(alert), CHR('\007')));
628 break;
629 case CHR('A'):
630 RETV(SBEGIN, 0);
631 break;
632 case CHR('b'):
633 RETV(PLAIN, CHR('\b'));
634 break;
635 case CHR('B'):
636 RETV(PLAIN, CHR('\\'));
637 break;
638 case CHR('c'):
640 if (ATEOS())
642 RETV(PLAIN, (chr) (*v->now++ & 037));
643 break;
644 case CHR('d'):
647 break;
648 case CHR('D'):
651 break;
652 case CHR('e'):
654 RETV(PLAIN, chrnamed(v, esc, ENDOF(esc), CHR('\033')));
655 break;
656 case CHR('f'):
657 RETV(PLAIN, CHR('\f'));
658 break;
659 case CHR('m'):
660 RET('<');
661 break;
662 case CHR('M'):
663 RET('>');
664 break;
665 case CHR('n'):
666 RETV(PLAIN, CHR('\n'));
667 break;
668 case CHR('r'):
669 RETV(PLAIN, CHR('\r'));
670 break;
671 case CHR('s'):
674 break;
675 case CHR('S'):
678 break;
679 case CHR('t'):
680 RETV(PLAIN, CHR('\t'));
681 break;
682 case CHR('u'):
683 c = lexdigits(v, 16, 4, 4);
684 if (ISERR() || !CHR_IS_IN_RANGE(c))
686 RETV(PLAIN, c);
687 break;
688 case CHR('U'):
689 c = lexdigits(v, 16, 8, 8);
690 if (ISERR() || !CHR_IS_IN_RANGE(c))
692 RETV(PLAIN, c);
693 break;
694 case CHR('v'):
695 RETV(PLAIN, CHR('\v'));
696 break;
697 case CHR('w'):
700 break;
701 case CHR('W'):
704 break;
705 case CHR('x'):
707 c = lexdigits(v, 16, 1, 255); /* REs >255 long outside spec */
708 if (ISERR() || !CHR_IS_IN_RANGE(c))
710 RETV(PLAIN, c);
711 break;
712 case CHR('y'):
714 RETV(WBDRY, 0);
715 break;
716 case CHR('Y'):
718 RETV(NWBDRY, 0);
719 break;
720 case CHR('Z'):
721 RETV(SEND, 0);
722 break;
723 case CHR('1'):
724 case CHR('2'):
725 case CHR('3'):
726 case CHR('4'):
727 case CHR('5'):
728 case CHR('6'):
729 case CHR('7'):
730 case CHR('8'):
731 case CHR('9'):
732 save = v->now;
733 v->now--; /* put first digit back */
734 c = lexdigits(v, 10, 1, 255); /* REs >255 long outside spec */
735 if (ISERR())
737 /* ugly heuristic (first test is "exactly 1 digit?") */
738 if (v->now == save || ((int) c > 0 && (int) c <= v->nsubexp))
739 {
741 RETV(BACKREF, c);
742 }
743 /* oops, doesn't look like it's a backref after all... */
744 v->now = save;
745 /* and fall through into octal number */
746 /* FALLTHROUGH */
747 case CHR('0'):
749 v->now--; /* put first digit back */
750 c = lexdigits(v, 8, 1, 3);
751 if (ISERR())
753 if (c > 0xff)
754 {
755 /* out of range, so we handled one digit too much */
756 v->now--;
757 c >>= 3;
758 }
759 RETV(PLAIN, c);
760 break;
761 default:
762
763 /*
764 * Throw an error for unrecognized ASCII alpha escape sequences,
765 * which reserves them for future use if needed.
766 */
768 break;
769 }
771}
static chr lexdigits(struct vars *v, int base, int minlen, int maxlen)
Definition: regc_lex.c:780
#define ENDOF(array)
Definition: regc_lex.c:64
static chr chrnamed(struct vars *v, const chr *startp, const chr *endp, chr lastresort)
Definition: regc_lex.c:1022
#define NWBDRY
Definition: regcomp.c:345
#define SBEGIN
Definition: regcomp.c:347
#define ISERR()
Definition: regcomp.c:317
#define CCLASSS
Definition: regcomp.c:338
#define WBDRY
Definition: regcomp.c:344
#define CCLASSC
Definition: regcomp.c:339
#define SEND
Definition: regcomp.c:348
#define CHR_IS_IN_RANGE(c)
Definition: regcustom.h:77
#define REG_ADVF
Definition: regex.h:180
#define REG_ULOCALE
Definition: regex.h:148
#define REG_UUNPORT
Definition: regex.h:147
@ CC_WORD
Definition: regguts.h:141
@ CC_SPACE
Definition: regguts.h:141
@ CC_DIGIT
Definition: regguts.h:140
int cflags
Definition: regcomp.c:287

References assert, ATEOS, BACKREF, CC_DIGIT, CC_SPACE, CC_WORD, CCLASSC, CCLASSS, vars::cflags, CHR, CHR_IS_IN_RANGE, chrnamed(), ENDOF, FAILW, ISERR, lexdigits(), NOTE, NOTREACHED, vars::now, NWBDRY, PLAIN, REG_ADVF, REG_EESCAPE, REG_UBACKREF, REG_ULOCALE, REG_UNONPOSIX, REG_UUNPORT, RET, RETV, SBEGIN, SEND, and WBDRY.

Referenced by next().

◆ lexstart()

static void lexstart ( struct vars v)
static

Definition at line 70 of file regc_lex.c.

71{
72 prefixes(v); /* may turn on new type bits etc. */
73 NOERR();
74
75 if (v->cflags & REG_QUOTE)
76 {
78 INTOCON(L_Q);
79 }
80 else if (v->cflags & REG_EXTENDED)
81 {
82 assert(!(v->cflags & REG_QUOTE));
84 }
85 else
86 {
87 assert(!(v->cflags & (REG_QUOTE | REG_ADVF)));
89 }
90
91 v->nexttype = EMPTY; /* remember we were at the start */
92 next(v); /* set up the first token */
93}
#define L_ERE
Definition: regc_lex.c:51
#define L_Q
Definition: regc_lex.c:53
static void prefixes(struct vars *v)
Definition: regc_lex.c:99
#define L_BRE
Definition: regc_lex.c:52
static int next(struct vars *v)
Definition: regc_lex.c:200
#define NOERR()
Definition: regcomp.c:321
#define REG_ADVANCED
Definition: regex.h:181
#define REG_EXTENDED
Definition: regex.h:179
#define REG_NEWLINE
Definition: regex.h:189
#define REG_QUOTE
Definition: regex.h:182
int nexttype
Definition: regcomp.c:289

References assert, vars::cflags, EMPTY, INTOCON, L_BRE, L_ERE, L_Q, next(), vars::nexttype, NOERR, prefixes(), REG_ADVANCED, REG_ADVF, REG_EXPANDED, REG_EXTENDED, REG_NEWLINE, and REG_QUOTE.

◆ newline()

static chr newline ( void  )
static

Definition at line 1010 of file regc_lex.c.

1011{
1012 return CHR('\n');
1013}

References CHR.

◆ next()

static int next ( struct vars v)
static

Definition at line 200 of file regc_lex.c.

201{
202 chr c;
203
204next_restart: /* loop here after eating a comment */
205
206 /* errors yield an infinite sequence of failures */
207 if (ISERR())
208 return 0; /* the error has set nexttype to EOS */
209
210 /* remember flavor of last token */
211 v->lasttype = v->nexttype;
212
213 /* REG_BOSONLY */
214 if (v->nexttype == EMPTY && (v->cflags & REG_BOSONLY))
215 {
216 /* at start of a REG_BOSONLY RE */
217 RETV(SBEGIN, 0); /* same as \A */
218 }
219
220 /* skip white space etc. if appropriate (not in literal or []) */
221 if (v->cflags & REG_EXPANDED)
222 switch (v->lexcon)
223 {
224 case L_ERE:
225 case L_BRE:
226 case L_EBND:
227 case L_BBND:
228 skip(v);
229 break;
230 }
231
232 /* handle EOS, depending on context */
233 if (ATEOS())
234 {
235 switch (v->lexcon)
236 {
237 case L_ERE:
238 case L_BRE:
239 case L_Q:
240 RET(EOS);
241 break;
242 case L_EBND:
243 case L_BBND:
245 break;
246 case L_BRACK:
247 case L_CEL:
248 case L_ECL:
249 case L_CCL:
251 break;
252 }
254 }
255
256 /* okay, time to actually get a character */
257 c = *v->now++;
258
259 /* deal with the easy contexts, punt EREs to code below */
260 switch (v->lexcon)
261 {
262 case L_BRE: /* punt BREs to separate function */
263 return brenext(v, c);
264 break;
265 case L_ERE: /* see below */
266 break;
267 case L_Q: /* literal strings are easy */
268 RETV(PLAIN, c);
269 break;
270 case L_BBND: /* bounds are fairly simple */
271 case L_EBND:
272 switch (c)
273 {
274 case CHR('0'):
275 case CHR('1'):
276 case CHR('2'):
277 case CHR('3'):
278 case CHR('4'):
279 case CHR('5'):
280 case CHR('6'):
281 case CHR('7'):
282 case CHR('8'):
283 case CHR('9'):
284 RETV(DIGIT, (chr) DIGITVAL(c));
285 break;
286 case CHR(','):
287 RET(',');
288 break;
289 case CHR('}'): /* ERE bound ends with } */
290 if (INCON(L_EBND))
291 {
292 INTOCON(L_ERE);
293 if ((v->cflags & REG_ADVF) && NEXT1('?'))
294 {
295 v->now++;
297 RETV('}', 0);
298 }
299 RETV('}', 1);
300 }
301 else
303 break;
304 case CHR('\\'): /* BRE bound ends with \} */
305 if (INCON(L_BBND) && NEXT1('}'))
306 {
307 v->now++;
308 INTOCON(L_BRE);
309 RETV('}', 1);
310 }
311 else
313 break;
314 default:
316 break;
317 }
319 break;
320 case L_BRACK: /* brackets are not too hard */
321 switch (c)
322 {
323 case CHR(']'):
324 if (LASTTYPE('['))
325 RETV(PLAIN, c);
326 else
327 {
329 L_ERE : L_BRE);
330 RET(']');
331 }
332 break;
333 case CHR('\\'):
334 NOTE(REG_UBBS);
335 if (!(v->cflags & REG_ADVF))
336 RETV(PLAIN, c);
338 if (ATEOS())
340 if (!lexescape(v))
341 return 0;
342 switch (v->nexttype)
343 { /* not all escapes okay here */
344 case PLAIN:
345 case CCLASSS:
346 case CCLASSC:
347 return 1;
348 break;
349 }
350 /* not one of the acceptable escapes */
352 break;
353 case CHR('-'):
354 if (LASTTYPE('[') || NEXT1(']'))
355 RETV(PLAIN, c);
356 else
357 RETV(RANGE, c);
358 break;
359 case CHR('['):
360 if (ATEOS())
362 switch (*v->now++)
363 {
364 case CHR('.'):
365 INTOCON(L_CEL);
366 /* might or might not be locale-specific */
367 RET(COLLEL);
368 break;
369 case CHR('='):
370 INTOCON(L_ECL);
372 RET(ECLASS);
373 break;
374 case CHR(':'):
375 INTOCON(L_CCL);
377 RET(CCLASS);
378 break;
379 default: /* oops */
380 v->now--;
381 RETV(PLAIN, c);
382 break;
383 }
385 break;
386 default:
387 RETV(PLAIN, c);
388 break;
389 }
391 break;
392 case L_CEL: /* collating elements are easy */
393 if (c == CHR('.') && NEXT1(']'))
394 {
395 v->now++;
397 RETV(END, '.');
398 }
399 else
400 RETV(PLAIN, c);
401 break;
402 case L_ECL: /* ditto equivalence classes */
403 if (c == CHR('=') && NEXT1(']'))
404 {
405 v->now++;
407 RETV(END, '=');
408 }
409 else
410 RETV(PLAIN, c);
411 break;
412 case L_CCL: /* ditto character classes */
413 if (c == CHR(':') && NEXT1(']'))
414 {
415 v->now++;
417 RETV(END, ':');
418 }
419 else
420 RETV(PLAIN, c);
421 break;
422 default:
424 break;
425 }
426
427 /* that got rid of everything except EREs and AREs */
429
430 /* deal with EREs and AREs, except for backslashes */
431 switch (c)
432 {
433 case CHR('|'):
434 RET('|');
435 break;
436 case CHR('*'):
437 if ((v->cflags & REG_ADVF) && NEXT1('?'))
438 {
439 v->now++;
441 RETV('*', 0);
442 }
443 RETV('*', 1);
444 break;
445 case CHR('+'):
446 if ((v->cflags & REG_ADVF) && NEXT1('?'))
447 {
448 v->now++;
450 RETV('+', 0);
451 }
452 RETV('+', 1);
453 break;
454 case CHR('?'):
455 if ((v->cflags & REG_ADVF) && NEXT1('?'))
456 {
457 v->now++;
459 RETV('?', 0);
460 }
461 RETV('?', 1);
462 break;
463 case CHR('{'): /* bounds start or plain character */
464 if (v->cflags & REG_EXPANDED)
465 skip(v);
466 if (ATEOS() || !iscdigit(*v->now))
467 {
470 RETV(PLAIN, c);
471 }
472 else
473 {
476 RET('{');
477 }
479 break;
480 case CHR('('): /* parenthesis, or advanced extension */
481 if ((v->cflags & REG_ADVF) && NEXT1('?'))
482 {
484 v->now++;
485 if (ATEOS())
487 switch (*v->now++)
488 {
489 case CHR(':'): /* non-capturing paren */
490 RETV('(', 0);
491 break;
492 case CHR('#'): /* comment */
493 while (!ATEOS() && *v->now != CHR(')'))
494 v->now++;
495 if (!ATEOS())
496 v->now++;
497 assert(v->nexttype == v->lasttype);
498 goto next_restart;
499 case CHR('='): /* positive lookahead */
502 break;
503 case CHR('!'): /* negative lookahead */
506 break;
507 case CHR('<'):
508 if (ATEOS())
510 switch (*v->now++)
511 {
512 case CHR('='): /* positive lookbehind */
515 break;
516 case CHR('!'): /* negative lookbehind */
519 break;
520 default:
522 break;
523 }
525 break;
526 default:
528 break;
529 }
531 }
532 RETV('(', 1);
533 break;
534 case CHR(')'):
535 if (LASTTYPE('('))
537 RETV(')', c);
538 break;
539 case CHR('['): /* easy except for [[:<:]] and [[:>:]] */
540 if (HAVE(6) && *(v->now + 0) == CHR('[') &&
541 *(v->now + 1) == CHR(':') &&
542 (*(v->now + 2) == CHR('<') ||
543 *(v->now + 2) == CHR('>')) &&
544 *(v->now + 3) == CHR(':') &&
545 *(v->now + 4) == CHR(']') &&
546 *(v->now + 5) == CHR(']'))
547 {
548 c = *(v->now + 2);
549 v->now += 6;
551 RET((c == CHR('<')) ? '<' : '>');
552 }
554 if (NEXT1('^'))
555 {
556 v->now++;
557 RETV('[', 0);
558 }
559 RETV('[', 1);
560 break;
561 case CHR('.'):
562 RET('.');
563 break;
564 case CHR('^'):
565 RET('^');
566 break;
567 case CHR('$'):
568 RET('$');
569 break;
570 case CHR('\\'): /* mostly punt backslashes to code below */
571 if (ATEOS())
573 break;
574 default: /* ordinary character */
575 RETV(PLAIN, c);
576 break;
577 }
578
579 /* ERE/ARE backslash handling; backslash already eaten */
580 assert(!ATEOS());
581 if (!(v->cflags & REG_ADVF))
582 { /* only AREs have non-trivial escapes */
583 if (iscalnum(*v->now))
584 {
587 }
588 RETV(PLAIN, *v->now++);
589 }
590 return lexescape(v);
591}
#define END
Definition: _int.h:160
while(p+4<=pend)
#define INCON(con)
Definition: regc_lex.c:61
static int lexescape(struct vars *v)
Definition: regc_lex.c:601
#define L_CEL
Definition: regc_lex.c:57
#define L_EBND
Definition: regc_lex.c:54
#define L_ECL
Definition: regc_lex.c:58
static int brenext(struct vars *v, chr c)
Definition: regc_lex.c:861
#define L_CCL
Definition: regc_lex.c:59
#define COLLEL
Definition: regcomp.c:334
#define CCLASS
Definition: regcomp.c:336
#define DIGIT
Definition: regcomp.c:332
#define ECLASS
Definition: regcomp.c:335
#define LACON
Definition: regcomp.c:341
#define EOS
Definition: regcomp.c:330
#define RANGE
Definition: regcomp.c:340
#define iscdigit(x)
Definition: regcustom.h:92
#define REG_EBRACK
Definition: regex.h:222
#define REG_BADRPT
Definition: regex.h:228
#define REG_ULOOKAROUND
Definition: regex.h:139
#define REG_UBBS
Definition: regex.h:144
#define REG_BADBR
Definition: regex.h:225
#define REG_EBRACE
Definition: regex.h:224
#define REG_BOSONLY
Definition: regex.h:192
#define REG_UBRACES
Definition: regex.h:141
#define LATYPE_AHEAD_NEG
Definition: regguts.h:105
#define LATYPE_BEHIND_POS
Definition: regguts.h:106
#define LATYPE_BEHIND_NEG
Definition: regguts.h:107
#define LATYPE_AHEAD_POS
Definition: regguts.h:104
int lexcon
Definition: regcomp.c:291
int lasttype
Definition: regcomp.c:288

References assert, ATEOS, brenext(), CCLASS, CCLASSC, CCLASSS, vars::cflags, CHR, COLLEL, DIGIT, DIGITVAL, ECLASS, EMPTY, END, EOS, FAILW, HAVE, INCON, INTOCON, iscalnum, iscdigit, ISERR, L_BBND, L_BRACK, L_BRE, L_CCL, L_CEL, L_EBND, L_ECL, L_ERE, L_Q, LACON, LASTTYPE, vars::lasttype, LATYPE_AHEAD_NEG, LATYPE_AHEAD_POS, LATYPE_BEHIND_NEG, LATYPE_BEHIND_POS, vars::lexcon, lexescape(), NEXT1, vars::nexttype, NOTE, NOTREACHED, vars::now, PLAIN, RANGE, REG_ADVF, REG_BADBR, REG_BADRPT, REG_BOSONLY, REG_EBRACE, REG_EBRACK, REG_EESCAPE, REG_EXPANDED, REG_EXTENDED, REG_UBBS, REG_UBOUNDS, REG_UBRACES, REG_UBSALNUM, REG_ULOCALE, REG_ULOOKAROUND, REG_UNONPOSIX, REG_UUNSPEC, RET, RETV, SBEGIN, and skip().

Referenced by lexstart().

◆ prefixes()

static void prefixes ( struct vars v)
static

Definition at line 99 of file regc_lex.c.

100{
101 /* literal string doesn't get any of this stuff */
102 if (v->cflags & REG_QUOTE)
103 return;
104
105 /* initial "***" gets special things */
106 if (HAVE(4) && NEXT3('*', '*', '*'))
107 switch (*(v->now + 3))
108 {
109 case CHR('?'): /* "***?" error, msg shows version */
111 return; /* proceed no further */
112 break;
113 case CHR('='): /* "***=" shifts to literal string */
115 v->cflags |= REG_QUOTE;
117 v->now += 4;
118 return; /* and there can be no more prefixes */
119 break;
120 case CHR(':'): /* "***:" shifts to AREs */
122 v->cflags |= REG_ADVANCED;
123 v->now += 4;
124 break;
125 default: /* otherwise *** is just an error */
127 return;
128 break;
129 }
130
131 /* BREs and EREs don't get embedded options */
132 if ((v->cflags & REG_ADVANCED) != REG_ADVANCED)
133 return;
134
135 /* embedded options (AREs only) */
136 if (HAVE(3) && NEXT2('(', '?') && iscalpha(*(v->now + 2)))
137 {
139 v->now += 2;
140 for (; !ATEOS() && iscalpha(*v->now); v->now++)
141 switch (*v->now)
142 {
143 case CHR('b'): /* BREs (but why???) */
144 v->cflags &= ~(REG_ADVANCED | REG_QUOTE);
145 break;
146 case CHR('c'): /* case sensitive */
147 v->cflags &= ~REG_ICASE;
148 break;
149 case CHR('e'): /* plain EREs */
150 v->cflags |= REG_EXTENDED;
151 v->cflags &= ~(REG_ADVF | REG_QUOTE);
152 break;
153 case CHR('i'): /* case insensitive */
154 v->cflags |= REG_ICASE;
155 break;
156 case CHR('m'): /* Perloid synonym for n */
157 case CHR('n'): /* \n affects ^ $ . [^ */
158 v->cflags |= REG_NEWLINE;
159 break;
160 case CHR('p'): /* ~Perl, \n affects . [^ */
161 v->cflags |= REG_NLSTOP;
162 v->cflags &= ~REG_NLANCH;
163 break;
164 case CHR('q'): /* literal string */
165 v->cflags |= REG_QUOTE;
166 v->cflags &= ~REG_ADVANCED;
167 break;
168 case CHR('s'): /* single line, \n ordinary */
169 v->cflags &= ~REG_NEWLINE;
170 break;
171 case CHR('t'): /* tight syntax */
172 v->cflags &= ~REG_EXPANDED;
173 break;
174 case CHR('w'): /* weird, \n affects ^ $ only */
175 v->cflags &= ~REG_NLSTOP;
176 v->cflags |= REG_NLANCH;
177 break;
178 case CHR('x'): /* expanded syntax */
179 v->cflags |= REG_EXPANDED;
180 break;
181 default:
183 return;
184 }
185 if (!NEXT1(')'))
186 {
188 return;
189 }
190 v->now++;
191 if (v->cflags & REG_QUOTE)
193 }
194}
#define NEXT3(a, b, c)
Definition: regc_lex.c:40
#define iscalpha(x)
Definition: regcustom.h:91
#define REG_BADOPT
Definition: regex.h:232
#define REG_ICASE
Definition: regex.h:184
#define REG_NLANCH
Definition: regex.h:188
#define REG_NLSTOP
Definition: regex.h:187
#define REG_BADPAT
Definition: regex.h:217

References ATEOS, vars::cflags, CHR, ERR, HAVE, iscalpha, NEXT1, NEXT2, NEXT3, NOTE, vars::now, REG_ADVANCED, REG_ADVF, REG_BADOPT, REG_BADPAT, REG_BADRPT, REG_EXPANDED, REG_EXTENDED, REG_ICASE, REG_NEWLINE, REG_NLANCH, REG_NLSTOP, REG_QUOTE, and REG_UNONPOSIX.

Referenced by lexstart(), and NIImportAffixes().

◆ skip()

static void skip ( struct vars v)
static

Definition at line 982 of file regc_lex.c.

983{
984 const chr *start = v->now;
985
987
988 for (;;)
989 {
990 while (!ATEOS() && iscspace(*v->now))
991 v->now++;
992 if (ATEOS() || *v->now != CHR('#'))
993 break; /* NOTE BREAK OUT */
994 assert(NEXT1('#'));
995 while (!ATEOS() && *v->now != CHR('\n'))
996 v->now++;
997 /* leave the newline to be picked up by the iscspace loop */
998 }
999
1000 if (v->now != start)
1002}
return str start
#define iscspace(x)
Definition: regcustom.h:93

References assert, ATEOS, vars::cflags, CHR, iscspace, NEXT1, NOTE, vars::now, REG_EXPANDED, REG_UNONPOSIX, and start.

Referenced by brenext(), and next().