PostgreSQL Source Code  git master
regc_lex.c File Reference
This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Macros

#define ATEOS()   (v->now >= v->stop)
 
#define HAVE(n)   (v->stop - v->now >= (n))
 
#define NEXT1(c)   (!ATEOS() && *v->now == CHR(c))
 
#define NEXT2(a, b)   (HAVE(2) && *v->now == CHR(a) && *(v->now+1) == CHR(b))
 
#define NEXT3(a, b, c)
 
#define SET(c)   (v->nexttype = (c))
 
#define SETV(c, n)   (v->nexttype = (c), v->nextvalue = (n))
 
#define RET(c)   return (SET(c), 1)
 
#define RETV(c, n)   return (SETV(c, n), 1)
 
#define FAILW(e)   return (ERR(e), 0) /* ERR does SET(EOS) */
 
#define LASTTYPE(t)   (v->lasttype == (t))
 
#define L_ERE   1 /* mainline ERE/ARE */
 
#define L_BRE   2 /* mainline BRE */
 
#define L_Q   3 /* REG_QUOTE */
 
#define L_EBND   4 /* ERE/ARE bound */
 
#define L_BBND   5 /* BRE bound */
 
#define L_BRACK   6 /* brackets */
 
#define L_CEL   7 /* collating element */
 
#define L_ECL   8 /* equivalence class */
 
#define L_CCL   9 /* character class */
 
#define INTOCON(c)   (v->lexcon = (c))
 
#define INCON(con)   (v->lexcon == (con))
 
#define ENDOF(array)   ((array) + sizeof(array)/sizeof(chr))
 

Functions

static void lexstart (struct vars *v)
 
static void prefixes (struct vars *v)
 
static int next (struct vars *v)
 
static int lexescape (struct vars *v)
 
static chr lexdigits (struct vars *v, int base, int minlen, int maxlen)
 
static int brenext (struct vars *v, chr c)
 
static void skip (struct vars *v)
 
static chr newline (void)
 
static chr chrnamed (struct vars *v, const chr *startp, const chr *endp, chr lastresort)
 

Macro Definition Documentation

◆ ATEOS

#define ATEOS ( )    (v->now >= v->stop)

Definition at line 36 of file regc_lex.c.

Referenced by brenext(), lexdigits(), lexescape(), next(), prefixes(), and skip().

◆ ENDOF

#define ENDOF (   array)    ((array) + sizeof(array)/sizeof(chr))

Definition at line 64 of file regc_lex.c.

Referenced by lexescape().

◆ FAILW

#define FAILW (   e)    return (ERR(e), 0) /* ERR does SET(EOS) */

Definition at line 47 of file regc_lex.c.

Referenced by brenext(), lexescape(), and next().

◆ HAVE

#define HAVE (   n)    (v->stop - v->now >= (n))

Definition at line 37 of file regc_lex.c.

Referenced by brenext(), next(), and prefixes().

◆ INCON

#define INCON (   con)    (v->lexcon == (con))

Definition at line 61 of file regc_lex.c.

Referenced by next().

◆ INTOCON

#define INTOCON (   c)    (v->lexcon = (c))

Definition at line 60 of file regc_lex.c.

Referenced by brenext(), lexstart(), and next().

◆ L_BBND

#define L_BBND   5 /* BRE bound */

Definition at line 55 of file regc_lex.c.

Referenced by brenext(), and next().

◆ L_BRACK

#define L_BRACK   6 /* brackets */

Definition at line 56 of file regc_lex.c.

Referenced by brenext(), and next().

◆ L_BRE

#define L_BRE   2 /* mainline BRE */

Definition at line 52 of file regc_lex.c.

Referenced by lexstart(), and next().

◆ L_CCL

#define L_CCL   9 /* character class */

Definition at line 59 of file regc_lex.c.

Referenced by next().

◆ L_CEL

#define L_CEL   7 /* collating element */

Definition at line 57 of file regc_lex.c.

Referenced by next().

◆ L_EBND

#define L_EBND   4 /* ERE/ARE bound */

Definition at line 54 of file regc_lex.c.

Referenced by next().

◆ L_ECL

#define L_ECL   8 /* equivalence class */

Definition at line 58 of file regc_lex.c.

Referenced by next().

◆ L_ERE

#define L_ERE   1 /* mainline ERE/ARE */

Definition at line 51 of file regc_lex.c.

Referenced by lexstart(), and next().

◆ L_Q

#define L_Q   3 /* REG_QUOTE */

Definition at line 53 of file regc_lex.c.

Referenced by lexstart(), and next().

◆ LASTTYPE

#define LASTTYPE (   t)    (v->lasttype == (t))

Definition at line 48 of file regc_lex.c.

Referenced by brenext(), and next().

◆ NEXT1

#define NEXT1 (   c)    (!ATEOS() && *v->now == CHR(c))

Definition at line 38 of file regc_lex.c.

Referenced by brenext(), next(), prefixes(), and skip().

◆ NEXT2

#define NEXT2 (   a,
 
)    (HAVE(2) && *v->now == CHR(a) && *(v->now+1) == CHR(b))

Definition at line 39 of file regc_lex.c.

Referenced by brenext(), and prefixes().

◆ NEXT3

#define NEXT3 (   a,
  b,
  c 
)
Value:
(HAVE(3) && *v->now == CHR(a) && \
*(v->now+1) == CHR(b) && \
*(v->now+2) == CHR(c))
char * c
#define HAVE(n)
Definition: regc_lex.c:37
#define CHR(c)
Definition: regcustom.h:69

Definition at line 40 of file regc_lex.c.

Referenced by prefixes().

◆ RET

#define RET (   c)    return (SET(c), 1)

Definition at line 45 of file regc_lex.c.

Referenced by brenext(), lexescape(), and next().

◆ RETV

#define RETV (   c,
 
)    return (SETV(c, n), 1)

Definition at line 46 of file regc_lex.c.

Referenced by brenext(), lexescape(), and next().

◆ SET

#define SET (   c)    (v->nexttype = (c))

Definition at line 43 of file regc_lex.c.

◆ SETV

#define SETV (   c,
 
)    (v->nexttype = (c), v->nextvalue = (n))

Definition at line 44 of file regc_lex.c.

Function Documentation

◆ brenext()

static int brenext ( struct vars v,
chr  c 
)
static

Definition at line 852 of file regc_lex.c.

References assert, ATEOS, BACKREF, vars::cflags, CHR, DIGITVAL, EMPTY, FAILW, HAVE, INTOCON, iscalnum, L_BBND, L_BRACK, LASTTYPE, NEXT1, NEXT2, NOTE, NOTREACHED, vars::now, PLAIN, REG_EESCAPE, REG_EXPANDED, REG_UBACKREF, REG_UBOUNDS, REG_UBSALNUM, REG_UNONPOSIX, REG_UUNSPEC, RET, RETV, and skip().

Referenced by next().

854 {
855  switch (c)
856  {
857  case CHR('*'):
858  if (LASTTYPE(EMPTY) || LASTTYPE('(') || LASTTYPE('^'))
859  RETV(PLAIN, c);
860  RETV('*', 1);
861  break;
862  case CHR('['):
863  if (HAVE(6) && *(v->now + 0) == CHR('[') &&
864  *(v->now + 1) == CHR(':') &&
865  (*(v->now + 2) == CHR('<') ||
866  *(v->now + 2) == CHR('>')) &&
867  *(v->now + 3) == CHR(':') &&
868  *(v->now + 4) == CHR(']') &&
869  *(v->now + 5) == CHR(']'))
870  {
871  c = *(v->now + 2);
872  v->now += 6;
874  RET((c == CHR('<')) ? '<' : '>');
875  }
876  INTOCON(L_BRACK);
877  if (NEXT1('^'))
878  {
879  v->now++;
880  RETV('[', 0);
881  }
882  RETV('[', 1);
883  break;
884  case CHR('.'):
885  RET('.');
886  break;
887  case CHR('^'):
888  if (LASTTYPE(EMPTY))
889  RET('^');
890  if (LASTTYPE('('))
891  {
892  NOTE(REG_UUNSPEC);
893  RET('^');
894  }
895  RETV(PLAIN, c);
896  break;
897  case CHR('$'):
898  if (v->cflags & REG_EXPANDED)
899  skip(v);
900  if (ATEOS())
901  RET('$');
902  if (NEXT2('\\', ')'))
903  {
904  NOTE(REG_UUNSPEC);
905  RET('$');
906  }
907  RETV(PLAIN, c);
908  break;
909  case CHR('\\'):
910  break; /* see below */
911  default:
912  RETV(PLAIN, c);
913  break;
914  }
915 
916  assert(c == CHR('\\'));
917 
918  if (ATEOS())
920 
921  c = *v->now++;
922  switch (c)
923  {
924  case CHR('{'):
925  INTOCON(L_BBND);
926  NOTE(REG_UBOUNDS);
927  RET('{');
928  break;
929  case CHR('('):
930  RETV('(', 1);
931  break;
932  case CHR(')'):
933  RETV(')', c);
934  break;
935  case CHR('<'):
937  RET('<');
938  break;
939  case CHR('>'):
941  RET('>');
942  break;
943  case CHR('1'):
944  case CHR('2'):
945  case CHR('3'):
946  case CHR('4'):
947  case CHR('5'):
948  case CHR('6'):
949  case CHR('7'):
950  case CHR('8'):
951  case CHR('9'):
953  RETV(BACKREF, (chr) DIGITVAL(c));
954  break;
955  default:
956  if (iscalnum(c))
957  {
959  NOTE(REG_UUNSPEC);
960  }
961  RETV(PLAIN, c);
962  break;
963  }
964 
966  return 0;
967 }
static void skip(struct vars *v)
Definition: regc_lex.c:973
#define DIGITVAL(c)
Definition: regcustom.h:70
#define REG_UBSALNUM
Definition: regex.h:64
#define NEXT1(c)
Definition: regc_lex.c:38
#define NEXT2(a, b)
Definition: regc_lex.c:39
#define L_BRACK
Definition: regc_lex.c:56
#define RET(c)
Definition: regc_lex.c:45
#define NOTREACHED
Definition: regguts.h:91
pg_wchar chr
Definition: regcustom.h:66
#define iscalnum(x)
Definition: regcustom.h:98
char * c
#define assert(TEST)
Definition: imath.c:73
#define REG_EESCAPE
Definition: regex.h:144
#define LASTTYPE(t)
Definition: regc_lex.c:48
#define BACKREF
Definition: regcomp.c:289
#define INTOCON(c)
Definition: regc_lex.c:60
#define PLAIN
Definition: regcomp.c:287
#define REG_UBACKREF
Definition: regex.h:60
#define RETV(c, n)
Definition: regc_lex.c:46
#define REG_UUNSPEC
Definition: regex.h:69
const chr * now
Definition: regcomp.c:240
#define REG_UBOUNDS
Definition: regex.h:62
#define EMPTY
Definition: regcomp.c:285
#define NOTE(b)
Definition: regcomp.c:281
#define FAILW(e)
Definition: regc_lex.c:47
#define REG_EXPANDED
Definition: regex.h:110
#define HAVE(n)
Definition: regc_lex.c:37
#define L_BBND
Definition: regc_lex.c:55
#define ATEOS()
Definition: regc_lex.c:36
#define REG_UNONPOSIX
Definition: regex.h:68
#define CHR(c)
Definition: regcustom.h:69
Datum now(PG_FUNCTION_ARGS)
Definition: timestamp.c:1544

◆ chrnamed()

static chr chrnamed ( struct vars v,
const chr startp,
const chr endp,
chr  lastresort 
)
static

Definition at line 1013 of file regc_lex.c.

References cvec::chrs, element(), vars::err, cvec::nchrs, and range().

Referenced by lexescape().

1017 {
1018  chr c;
1019  int errsave;
1020  int e;
1021  struct cvec *cv;
1022 
1023  errsave = v->err;
1024  v->err = 0;
1025  c = element(v, startp, endp);
1026  e = v->err;
1027  v->err = errsave;
1028 
1029  if (e != 0)
1030  return lastresort;
1031 
1032  cv = range(v, c, c, 0);
1033  if (cv->nchrs == 0)
1034  return lastresort;
1035  return cv->chrs[0];
1036 }
pg_wchar chr
Definition: regcustom.h:66
int nchrs
Definition: regguts.h:275
char * c
static struct cvec * range(struct vars *v, chr a, chr b, int cases)
Definition: regc_locale.c:412
Definition: regguts.h:273
int err
Definition: regcomp.c:242
static chr element(struct vars *v, const chr *startp, const chr *endp)
Definition: regc_locale.c:376
chr * chrs
Definition: regguts.h:277
e
Definition: preproc-init.c:82

◆ lexdigits()

static chr lexdigits ( struct vars v,
int  base,
int  minlen,
int  maxlen 
)
static

Definition at line 771 of file regc_lex.c.

References ATEOS, CHR, DIGITVAL, ERR, vars::now, and REG_EESCAPE.

Referenced by lexescape().

775 {
776  uchr n; /* unsigned to avoid overflow misbehavior */
777  int len;
778  chr c;
779  int d;
780  const uchr ub = (uchr) base;
781 
782  n = 0;
783  for (len = 0; len < maxlen && !ATEOS(); len++)
784  {
785  c = *v->now++;
786  switch (c)
787  {
788  case CHR('0'):
789  case CHR('1'):
790  case CHR('2'):
791  case CHR('3'):
792  case CHR('4'):
793  case CHR('5'):
794  case CHR('6'):
795  case CHR('7'):
796  case CHR('8'):
797  case CHR('9'):
798  d = DIGITVAL(c);
799  break;
800  case CHR('a'):
801  case CHR('A'):
802  d = 10;
803  break;
804  case CHR('b'):
805  case CHR('B'):
806  d = 11;
807  break;
808  case CHR('c'):
809  case CHR('C'):
810  d = 12;
811  break;
812  case CHR('d'):
813  case CHR('D'):
814  d = 13;
815  break;
816  case CHR('e'):
817  case CHR('E'):
818  d = 14;
819  break;
820  case CHR('f'):
821  case CHR('F'):
822  d = 15;
823  break;
824  default:
825  v->now--; /* oops, not a digit at all */
826  d = -1;
827  break;
828  }
829 
830  if (d >= base)
831  { /* not a plausible digit */
832  v->now--;
833  d = -1;
834  }
835  if (d < 0)
836  break; /* NOTE BREAK OUT */
837  n = n * ub + (uchr) d;
838  }
839  if (len < minlen)
840  ERR(REG_EESCAPE);
841 
842  return (chr) n;
843 }
#define DIGITVAL(c)
Definition: regcustom.h:70
#define ERR
Definition: _int.h:161
pg_wchar chr
Definition: regcustom.h:66
char * c
#define REG_EESCAPE
Definition: regex.h:144
unsigned uchr
Definition: regcustom.h:67
const chr * now
Definition: regcomp.c:240
#define ATEOS()
Definition: regc_lex.c:36
#define CHR(c)
Definition: regcustom.h:69

◆ lexescape()

static int lexescape ( struct vars v)
static

Definition at line 600 of file regc_lex.c.

References assert, ATEOS, BACKREF, CC_DIGIT, CC_SPACE, CC_WORD, CCLASSC, CCLASSS, vars::cflags, CHR, CHR_IS_IN_RANGE, chrnamed(), ENDOF, FAILW, iscalnum, iscalpha, ISERR, lexdigits(), NOTE, NOTREACHED, vars::now, NWBDRY, PLAIN, REG_ADVF, REG_EESCAPE, REG_UBACKREF, REG_ULOCALE, REG_UNONPOSIX, REG_UUNPORT, RET, RETV, SBEGIN, SEND, and WBDRY.

Referenced by next().

601 {
602  chr c;
603  static const chr alert[] = {
604  CHR('a'), CHR('l'), CHR('e'), CHR('r'), CHR('t')
605  };
606  static const chr esc[] = {
607  CHR('E'), CHR('S'), CHR('C')
608  };
609  const chr *save;
610 
611  assert(v->cflags & REG_ADVF);
612 
613  assert(!ATEOS());
614  c = *v->now++;
615  if (!iscalnum(c))
616  RETV(PLAIN, c);
617 
619  switch (c)
620  {
621  case CHR('a'):
622  RETV(PLAIN, chrnamed(v, alert, ENDOF(alert), CHR('\007')));
623  break;
624  case CHR('A'):
625  RETV(SBEGIN, 0);
626  break;
627  case CHR('b'):
628  RETV(PLAIN, CHR('\b'));
629  break;
630  case CHR('B'):
631  RETV(PLAIN, CHR('\\'));
632  break;
633  case CHR('c'):
634  NOTE(REG_UUNPORT);
635  if (ATEOS())
637  RETV(PLAIN, (chr) (*v->now++ & 037));
638  break;
639  case CHR('d'):
640  NOTE(REG_ULOCALE);
642  break;
643  case CHR('D'):
644  NOTE(REG_ULOCALE);
646  break;
647  case CHR('e'):
648  NOTE(REG_UUNPORT);
649  RETV(PLAIN, chrnamed(v, esc, ENDOF(esc), CHR('\033')));
650  break;
651  case CHR('f'):
652  RETV(PLAIN, CHR('\f'));
653  break;
654  case CHR('m'):
655  RET('<');
656  break;
657  case CHR('M'):
658  RET('>');
659  break;
660  case CHR('n'):
661  RETV(PLAIN, CHR('\n'));
662  break;
663  case CHR('r'):
664  RETV(PLAIN, CHR('\r'));
665  break;
666  case CHR('s'):
667  NOTE(REG_ULOCALE);
669  break;
670  case CHR('S'):
671  NOTE(REG_ULOCALE);
673  break;
674  case CHR('t'):
675  RETV(PLAIN, CHR('\t'));
676  break;
677  case CHR('u'):
678  c = lexdigits(v, 16, 4, 4);
679  if (ISERR() || !CHR_IS_IN_RANGE(c))
681  RETV(PLAIN, c);
682  break;
683  case CHR('U'):
684  c = lexdigits(v, 16, 8, 8);
685  if (ISERR() || !CHR_IS_IN_RANGE(c))
687  RETV(PLAIN, c);
688  break;
689  case CHR('v'):
690  RETV(PLAIN, CHR('\v'));
691  break;
692  case CHR('w'):
693  NOTE(REG_ULOCALE);
694  RETV(CCLASSS, CC_WORD);
695  break;
696  case CHR('W'):
697  NOTE(REG_ULOCALE);
698  RETV(CCLASSC, CC_WORD);
699  break;
700  case CHR('x'):
701  NOTE(REG_UUNPORT);
702  c = lexdigits(v, 16, 1, 255); /* REs >255 long outside spec */
703  if (ISERR() || !CHR_IS_IN_RANGE(c))
705  RETV(PLAIN, c);
706  break;
707  case CHR('y'):
708  NOTE(REG_ULOCALE);
709  RETV(WBDRY, 0);
710  break;
711  case CHR('Y'):
712  NOTE(REG_ULOCALE);
713  RETV(NWBDRY, 0);
714  break;
715  case CHR('Z'):
716  RETV(SEND, 0);
717  break;
718  case CHR('1'):
719  case CHR('2'):
720  case CHR('3'):
721  case CHR('4'):
722  case CHR('5'):
723  case CHR('6'):
724  case CHR('7'):
725  case CHR('8'):
726  case CHR('9'):
727  save = v->now;
728  v->now--; /* put first digit back */
729  c = lexdigits(v, 10, 1, 255); /* REs >255 long outside spec */
730  if (ISERR())
732  /* ugly heuristic (first test is "exactly 1 digit?") */
733  if (v->now == save || ((int) c > 0 && (int) c <= v->nsubexp))
734  {
736  RETV(BACKREF, c);
737  }
738  /* oops, doesn't look like it's a backref after all... */
739  v->now = save;
740  /* and fall through into octal number */
741  /* FALLTHROUGH */
742  case CHR('0'):
743  NOTE(REG_UUNPORT);
744  v->now--; /* put first digit back */
745  c = lexdigits(v, 8, 1, 3);
746  if (ISERR())
748  if (c > 0xff)
749  {
750  /* out of range, so we handled one digit too much */
751  v->now--;
752  c >>= 3;
753  }
754  RETV(PLAIN, c);
755  break;
756  default:
757  assert(iscalpha(c));
758  FAILW(REG_EESCAPE); /* unknown alphabetic escape */
759  break;
760  }
762 }
#define REG_ULOCALE
Definition: regex.h:72
#define ISERR()
Definition: regcomp.c:273
#define RET(c)
Definition: regc_lex.c:45
#define NOTREACHED
Definition: regguts.h:91
static chr chrnamed(struct vars *v, const chr *startp, const chr *endp, chr lastresort)
Definition: regc_lex.c:1013
#define ENDOF(array)
Definition: regc_lex.c:64
#define CHR_IS_IN_RANGE(c)
Definition: regcustom.h:85
pg_wchar chr
Definition: regcustom.h:66
#define iscalnum(x)
Definition: regcustom.h:98
#define CCLASSS
Definition: regcomp.c:294
static chr lexdigits(struct vars *v, int base, int minlen, int maxlen)
Definition: regc_lex.c:771
char * c
#define assert(TEST)
Definition: imath.c:73
#define REG_EESCAPE
Definition: regex.h:144
#define SBEGIN
Definition: regcomp.c:302
#define BACKREF
Definition: regcomp.c:289
#define CCLASSC
Definition: regcomp.c:295
#define iscalpha(x)
Definition: regcustom.h:99
#define REG_UUNPORT
Definition: regex.h:71
#define REG_ADVF
Definition: regex.h:104
#define PLAIN
Definition: regcomp.c:287
#define REG_UBACKREF
Definition: regex.h:60
#define RETV(c, n)
Definition: regc_lex.c:46
#define NWBDRY
Definition: regcomp.c:301
const chr * now
Definition: regcomp.c:240
#define NOTE(b)
Definition: regcomp.c:281
#define FAILW(e)
Definition: regc_lex.c:47
#define SEND
Definition: regcomp.c:303
int cflags
Definition: regcomp.c:243
#define ATEOS()
Definition: regc_lex.c:36
#define REG_UNONPOSIX
Definition: regex.h:68
#define WBDRY
Definition: regcomp.c:300
#define CHR(c)
Definition: regcustom.h:69
Datum now(PG_FUNCTION_ARGS)
Definition: timestamp.c:1544

◆ lexstart()

static void lexstart ( struct vars v)
static

Definition at line 70 of file regc_lex.c.

References assert, vars::cflags, EMPTY, INTOCON, L_BRE, L_ERE, L_Q, next(), vars::nexttype, NOERR, prefixes(), REG_ADVANCED, REG_ADVF, REG_EXPANDED, REG_EXTENDED, REG_NEWLINE, and REG_QUOTE.

71 {
72  prefixes(v); /* may turn on new type bits etc. */
73  NOERR();
74 
75  if (v->cflags & REG_QUOTE)
76  {
78  INTOCON(L_Q);
79  }
80  else if (v->cflags & REG_EXTENDED)
81  {
82  assert(!(v->cflags & REG_QUOTE));
83  INTOCON(L_ERE);
84  }
85  else
86  {
87  assert(!(v->cflags & (REG_QUOTE | REG_ADVF)));
88  INTOCON(L_BRE);
89  }
90 
91  v->nexttype = EMPTY; /* remember we were at the start */
92  next(v); /* set up the first token */
93 }
int nexttype
Definition: regcomp.c:245
#define REG_QUOTE
Definition: regex.h:106
#define L_ERE
Definition: regc_lex.c:51
static int next(struct vars *v)
Definition: regc_lex.c:200
static void prefixes(struct vars *v)
Definition: regc_lex.c:99
#define assert(TEST)
Definition: imath.c:73
#define REG_NEWLINE
Definition: regex.h:113
#define REG_ADVANCED
Definition: regex.h:105
#define REG_EXTENDED
Definition: regex.h:103
#define INTOCON(c)
Definition: regc_lex.c:60
#define REG_ADVF
Definition: regex.h:104
#define L_Q
Definition: regc_lex.c:53
#define L_BRE
Definition: regc_lex.c:52
#define EMPTY
Definition: regcomp.c:285
#define NOERR()
Definition: regcomp.c:277
int cflags
Definition: regcomp.c:243
#define REG_EXPANDED
Definition: regex.h:110

◆ newline()

static chr newline ( void  )
static

Definition at line 1001 of file regc_lex.c.

References CHR.

Referenced by load_hba(), load_ident(), and replace_token().

1002 {
1003  return CHR('\n');
1004 }
#define CHR(c)
Definition: regcustom.h:69

◆ next()

static int next ( struct vars v)
static

Definition at line 200 of file regc_lex.c.

References assert, ATEOS, brenext(), CCLASS, CCLASSC, CCLASSS, vars::cflags, CHR, COLLEL, DIGIT, DIGITVAL, ECLASS, EMPTY, END, EOS, FAILW, HAVE, INCON, INTOCON, iscalnum, iscdigit, ISERR, L_BBND, L_BRACK, L_BRE, L_CCL, L_CEL, L_EBND, L_ECL, L_ERE, L_Q, LACON, LASTTYPE, vars::lasttype, LATYPE_AHEAD_NEG, LATYPE_AHEAD_POS, LATYPE_BEHIND_NEG, LATYPE_BEHIND_POS, vars::lexcon, lexescape(), NEXT1, vars::nexttype, NOTE, NOTREACHED, vars::now, PLAIN, RANGE, REG_ADVF, REG_BADBR, REG_BADRPT, REG_BOSONLY, REG_EBRACE, REG_EBRACK, REG_EESCAPE, REG_EXPANDED, REG_EXTENDED, REG_UBBS, REG_UBOUNDS, REG_UBRACES, REG_UBSALNUM, REG_ULOCALE, REG_ULOOKAROUND, REG_UNONPOSIX, REG_UUNSPEC, RET, RETV, SBEGIN, and skip().

Referenced by lexstart().

201 {
202  chr c;
203 
204  /* errors yield an infinite sequence of failures */
205  if (ISERR())
206  return 0; /* the error has set nexttype to EOS */
207 
208  /* remember flavor of last token */
209  v->lasttype = v->nexttype;
210 
211  /* REG_BOSONLY */
212  if (v->nexttype == EMPTY && (v->cflags & REG_BOSONLY))
213  {
214  /* at start of a REG_BOSONLY RE */
215  RETV(SBEGIN, 0); /* same as \A */
216  }
217 
218  /* skip white space etc. if appropriate (not in literal or []) */
219  if (v->cflags & REG_EXPANDED)
220  switch (v->lexcon)
221  {
222  case L_ERE:
223  case L_BRE:
224  case L_EBND:
225  case L_BBND:
226  skip(v);
227  break;
228  }
229 
230  /* handle EOS, depending on context */
231  if (ATEOS())
232  {
233  switch (v->lexcon)
234  {
235  case L_ERE:
236  case L_BRE:
237  case L_Q:
238  RET(EOS);
239  break;
240  case L_EBND:
241  case L_BBND:
242  FAILW(REG_EBRACE);
243  break;
244  case L_BRACK:
245  case L_CEL:
246  case L_ECL:
247  case L_CCL:
248  FAILW(REG_EBRACK);
249  break;
250  }
252  }
253 
254  /* okay, time to actually get a character */
255  c = *v->now++;
256 
257  /* deal with the easy contexts, punt EREs to code below */
258  switch (v->lexcon)
259  {
260  case L_BRE: /* punt BREs to separate function */
261  return brenext(v, c);
262  break;
263  case L_ERE: /* see below */
264  break;
265  case L_Q: /* literal strings are easy */
266  RETV(PLAIN, c);
267  break;
268  case L_BBND: /* bounds are fairly simple */
269  case L_EBND:
270  switch (c)
271  {
272  case CHR('0'):
273  case CHR('1'):
274  case CHR('2'):
275  case CHR('3'):
276  case CHR('4'):
277  case CHR('5'):
278  case CHR('6'):
279  case CHR('7'):
280  case CHR('8'):
281  case CHR('9'):
282  RETV(DIGIT, (chr) DIGITVAL(c));
283  break;
284  case CHR(','):
285  RET(',');
286  break;
287  case CHR('}'): /* ERE bound ends with } */
288  if (INCON(L_EBND))
289  {
290  INTOCON(L_ERE);
291  if ((v->cflags & REG_ADVF) && NEXT1('?'))
292  {
293  v->now++;
295  RETV('}', 0);
296  }
297  RETV('}', 1);
298  }
299  else
300  FAILW(REG_BADBR);
301  break;
302  case CHR('\\'): /* BRE bound ends with \} */
303  if (INCON(L_BBND) && NEXT1('}'))
304  {
305  v->now++;
306  INTOCON(L_BRE);
307  RETV('}', 1);
308  }
309  else
310  FAILW(REG_BADBR);
311  break;
312  default:
313  FAILW(REG_BADBR);
314  break;
315  }
317  break;
318  case L_BRACK: /* brackets are not too hard */
319  switch (c)
320  {
321  case CHR(']'):
322  if (LASTTYPE('['))
323  RETV(PLAIN, c);
324  else
325  {
326  INTOCON((v->cflags & REG_EXTENDED) ?
327  L_ERE : L_BRE);
328  RET(']');
329  }
330  break;
331  case CHR('\\'):
332  NOTE(REG_UBBS);
333  if (!(v->cflags & REG_ADVF))
334  RETV(PLAIN, c);
336  if (ATEOS())
338  if (!lexescape(v))
339  return 0;
340  switch (v->nexttype)
341  { /* not all escapes okay here */
342  case PLAIN:
343  case CCLASSS:
344  case CCLASSC:
345  return 1;
346  break;
347  }
348  /* not one of the acceptable escapes */
350  break;
351  case CHR('-'):
352  if (LASTTYPE('[') || NEXT1(']'))
353  RETV(PLAIN, c);
354  else
355  RETV(RANGE, c);
356  break;
357  case CHR('['):
358  if (ATEOS())
359  FAILW(REG_EBRACK);
360  switch (*v->now++)
361  {
362  case CHR('.'):
363  INTOCON(L_CEL);
364  /* might or might not be locale-specific */
365  RET(COLLEL);
366  break;
367  case CHR('='):
368  INTOCON(L_ECL);
369  NOTE(REG_ULOCALE);
370  RET(ECLASS);
371  break;
372  case CHR(':'):
373  INTOCON(L_CCL);
374  NOTE(REG_ULOCALE);
375  RET(CCLASS);
376  break;
377  default: /* oops */
378  v->now--;
379  RETV(PLAIN, c);
380  break;
381  }
383  break;
384  default:
385  RETV(PLAIN, c);
386  break;
387  }
389  break;
390  case L_CEL: /* collating elements are easy */
391  if (c == CHR('.') && NEXT1(']'))
392  {
393  v->now++;
394  INTOCON(L_BRACK);
395  RETV(END, '.');
396  }
397  else
398  RETV(PLAIN, c);
399  break;
400  case L_ECL: /* ditto equivalence classes */
401  if (c == CHR('=') && NEXT1(']'))
402  {
403  v->now++;
404  INTOCON(L_BRACK);
405  RETV(END, '=');
406  }
407  else
408  RETV(PLAIN, c);
409  break;
410  case L_CCL: /* ditto character classes */
411  if (c == CHR(':') && NEXT1(']'))
412  {
413  v->now++;
414  INTOCON(L_BRACK);
415  RETV(END, ':');
416  }
417  else
418  RETV(PLAIN, c);
419  break;
420  default:
422  break;
423  }
424 
425  /* that got rid of everything except EREs and AREs */
426  assert(INCON(L_ERE));
427 
428  /* deal with EREs and AREs, except for backslashes */
429  switch (c)
430  {
431  case CHR('|'):
432  RET('|');
433  break;
434  case CHR('*'):
435  if ((v->cflags & REG_ADVF) && NEXT1('?'))
436  {
437  v->now++;
439  RETV('*', 0);
440  }
441  RETV('*', 1);
442  break;
443  case CHR('+'):
444  if ((v->cflags & REG_ADVF) && NEXT1('?'))
445  {
446  v->now++;
448  RETV('+', 0);
449  }
450  RETV('+', 1);
451  break;
452  case CHR('?'):
453  if ((v->cflags & REG_ADVF) && NEXT1('?'))
454  {
455  v->now++;
457  RETV('?', 0);
458  }
459  RETV('?', 1);
460  break;
461  case CHR('{'): /* bounds start or plain character */
462  if (v->cflags & REG_EXPANDED)
463  skip(v);
464  if (ATEOS() || !iscdigit(*v->now))
465  {
466  NOTE(REG_UBRACES);
467  NOTE(REG_UUNSPEC);
468  RETV(PLAIN, c);
469  }
470  else
471  {
472  NOTE(REG_UBOUNDS);
473  INTOCON(L_EBND);
474  RET('{');
475  }
477  break;
478  case CHR('('): /* parenthesis, or advanced extension */
479  if ((v->cflags & REG_ADVF) && NEXT1('?'))
480  {
482  v->now++;
483  if (ATEOS())
484  FAILW(REG_BADRPT);
485  switch (*v->now++)
486  {
487  case CHR(':'): /* non-capturing paren */
488  RETV('(', 0);
489  break;
490  case CHR('#'): /* comment */
491  while (!ATEOS() && *v->now != CHR(')'))
492  v->now++;
493  if (!ATEOS())
494  v->now++;
495  assert(v->nexttype == v->lasttype);
496  return next(v);
497  break;
498  case CHR('='): /* positive lookahead */
501  break;
502  case CHR('!'): /* negative lookahead */
505  break;
506  case CHR('<'):
507  if (ATEOS())
508  FAILW(REG_BADRPT);
509  switch (*v->now++)
510  {
511  case CHR('='): /* positive lookbehind */
514  break;
515  case CHR('!'): /* negative lookbehind */
518  break;
519  default:
520  FAILW(REG_BADRPT);
521  break;
522  }
524  break;
525  default:
526  FAILW(REG_BADRPT);
527  break;
528  }
530  }
531  RETV('(', 1);
532  break;
533  case CHR(')'):
534  if (LASTTYPE('('))
535  NOTE(REG_UUNSPEC);
536  RETV(')', c);
537  break;
538  case CHR('['): /* easy except for [[:<:]] and [[:>:]] */
539  if (HAVE(6) && *(v->now + 0) == CHR('[') &&
540  *(v->now + 1) == CHR(':') &&
541  (*(v->now + 2) == CHR('<') ||
542  *(v->now + 2) == CHR('>')) &&
543  *(v->now + 3) == CHR(':') &&
544  *(v->now + 4) == CHR(']') &&
545  *(v->now + 5) == CHR(']'))
546  {
547  c = *(v->now + 2);
548  v->now += 6;
550  RET((c == CHR('<')) ? '<' : '>');
551  }
552  INTOCON(L_BRACK);
553  if (NEXT1('^'))
554  {
555  v->now++;
556  RETV('[', 0);
557  }
558  RETV('[', 1);
559  break;
560  case CHR('.'):
561  RET('.');
562  break;
563  case CHR('^'):
564  RET('^');
565  break;
566  case CHR('$'):
567  RET('$');
568  break;
569  case CHR('\\'): /* mostly punt backslashes to code below */
570  if (ATEOS())
572  break;
573  default: /* ordinary character */
574  RETV(PLAIN, c);
575  break;
576  }
577 
578  /* ERE/ARE backslash handling; backslash already eaten */
579  assert(!ATEOS());
580  if (!(v->cflags & REG_ADVF))
581  { /* only AREs have non-trivial escapes */
582  if (iscalnum(*v->now))
583  {
585  NOTE(REG_UUNSPEC);
586  }
587  RETV(PLAIN, *v->now++);
588  }
589  return lexescape(v);
590 }
#define RANGE
Definition: regcomp.c:296
static void skip(struct vars *v)
Definition: regc_lex.c:973
int nexttype
Definition: regcomp.c:245
#define DIGITVAL(c)
Definition: regcustom.h:70
#define REG_UBSALNUM
Definition: regex.h:64
#define NEXT1(c)
Definition: regc_lex.c:38
#define CCLASS
Definition: regcomp.c:292
#define L_BRACK
Definition: regc_lex.c:56
static int lexescape(struct vars *v)
Definition: regc_lex.c:600
#define LATYPE_BEHIND_POS
Definition: regguts.h:101
#define REG_EBRACE
Definition: regex.h:148
int lasttype
Definition: regcomp.c:244
#define REG_ULOCALE
Definition: regex.h:72
#define LATYPE_AHEAD_NEG
Definition: regguts.h:100
#define ISERR()
Definition: regcomp.c:273
#define RET(c)
Definition: regc_lex.c:45
#define NOTREACHED
Definition: regguts.h:91
#define LATYPE_AHEAD_POS
Definition: regguts.h:99
#define REG_EBRACK
Definition: regex.h:146
pg_wchar chr
Definition: regcustom.h:66
#define iscalnum(x)
Definition: regcustom.h:98
#define CCLASSS
Definition: regcomp.c:294
#define END
Definition: _int.h:160
#define INCON(con)
Definition: regc_lex.c:61
#define LACON
Definition: regcomp.c:297
#define REG_BOSONLY
Definition: regex.h:116
#define L_ERE
Definition: regc_lex.c:51
#define REG_BADBR
Definition: regex.h:149
static int next(struct vars *v)
Definition: regc_lex.c:200
char * c
#define REG_BADRPT
Definition: regex.h:152
#define assert(TEST)
Definition: imath.c:73
#define REG_UBBS
Definition: regex.h:67
#define REG_EESCAPE
Definition: regex.h:144
#define SBEGIN
Definition: regcomp.c:302
#define LASTTYPE(t)
Definition: regc_lex.c:48
#define LATYPE_BEHIND_NEG
Definition: regguts.h:102
#define CCLASSC
Definition: regcomp.c:295
#define REG_EXTENDED
Definition: regex.h:103
#define INTOCON(c)
Definition: regc_lex.c:60
#define REG_ADVF
Definition: regex.h:104
#define PLAIN
Definition: regcomp.c:287
#define L_Q
Definition: regc_lex.c:53
#define L_ECL
Definition: regc_lex.c:58
#define EOS
Definition: regcomp.c:286
#define RETV(c, n)
Definition: regc_lex.c:46
static int brenext(struct vars *v, chr c)
Definition: regc_lex.c:852
#define L_CEL
Definition: regc_lex.c:57
#define REG_UUNSPEC
Definition: regex.h:69
#define L_BRE
Definition: regc_lex.c:52
const chr * now
Definition: regcomp.c:240
#define L_CCL
Definition: regc_lex.c:59
#define REG_UBOUNDS
Definition: regex.h:62
#define COLLEL
Definition: regcomp.c:290
#define EMPTY
Definition: regcomp.c:285
#define NOTE(b)
Definition: regcomp.c:281
int lexcon
Definition: regcomp.c:247
#define FAILW(e)
Definition: regc_lex.c:47
int cflags
Definition: regcomp.c:243
#define REG_EXPANDED
Definition: regex.h:110
#define HAVE(n)
Definition: regc_lex.c:37
#define L_BBND
Definition: regc_lex.c:55
#define iscdigit(x)
Definition: regcustom.h:100
#define ATEOS()
Definition: regc_lex.c:36
#define REG_UNONPOSIX
Definition: regex.h:68
#define REG_UBRACES
Definition: regex.h:63
#define CHR(c)
Definition: regcustom.h:69
#define L_EBND
Definition: regc_lex.c:54
#define REG_ULOOKAROUND
Definition: regex.h:61
Datum now(PG_FUNCTION_ARGS)
Definition: timestamp.c:1544
#define DIGIT
Definition: regcomp.c:288
#define ECLASS
Definition: regcomp.c:291

◆ prefixes()

static void prefixes ( struct vars v)
static

Definition at line 99 of file regc_lex.c.

References ATEOS, vars::cflags, CHR, ERR, HAVE, iscalpha, NEXT1, NEXT2, NEXT3, NOTE, vars::now, REG_ADVANCED, REG_ADVF, REG_BADOPT, REG_BADPAT, REG_BADRPT, REG_EXPANDED, REG_EXTENDED, REG_ICASE, REG_NEWLINE, REG_NLANCH, REG_NLSTOP, REG_QUOTE, and REG_UNONPOSIX.

Referenced by lexstart(), and NIImportAffixes().

100 {
101  /* literal string doesn't get any of this stuff */
102  if (v->cflags & REG_QUOTE)
103  return;
104 
105  /* initial "***" gets special things */
106  if (HAVE(4) && NEXT3('*', '*', '*'))
107  switch (*(v->now + 3))
108  {
109  case CHR('?'): /* "***?" error, msg shows version */
110  ERR(REG_BADPAT);
111  return; /* proceed no further */
112  break;
113  case CHR('='): /* "***=" shifts to literal string */
115  v->cflags |= REG_QUOTE;
117  v->now += 4;
118  return; /* and there can be no more prefixes */
119  break;
120  case CHR(':'): /* "***:" shifts to AREs */
122  v->cflags |= REG_ADVANCED;
123  v->now += 4;
124  break;
125  default: /* otherwise *** is just an error */
126  ERR(REG_BADRPT);
127  return;
128  break;
129  }
130 
131  /* BREs and EREs don't get embedded options */
132  if ((v->cflags & REG_ADVANCED) != REG_ADVANCED)
133  return;
134 
135  /* embedded options (AREs only) */
136  if (HAVE(3) && NEXT2('(', '?') && iscalpha(*(v->now + 2)))
137  {
139  v->now += 2;
140  for (; !ATEOS() && iscalpha(*v->now); v->now++)
141  switch (*v->now)
142  {
143  case CHR('b'): /* BREs (but why???) */
144  v->cflags &= ~(REG_ADVANCED | REG_QUOTE);
145  break;
146  case CHR('c'): /* case sensitive */
147  v->cflags &= ~REG_ICASE;
148  break;
149  case CHR('e'): /* plain EREs */
150  v->cflags |= REG_EXTENDED;
151  v->cflags &= ~(REG_ADVF | REG_QUOTE);
152  break;
153  case CHR('i'): /* case insensitive */
154  v->cflags |= REG_ICASE;
155  break;
156  case CHR('m'): /* Perloid synonym for n */
157  case CHR('n'): /* \n affects ^ $ . [^ */
158  v->cflags |= REG_NEWLINE;
159  break;
160  case CHR('p'): /* ~Perl, \n affects . [^ */
161  v->cflags |= REG_NLSTOP;
162  v->cflags &= ~REG_NLANCH;
163  break;
164  case CHR('q'): /* literal string */
165  v->cflags |= REG_QUOTE;
166  v->cflags &= ~REG_ADVANCED;
167  break;
168  case CHR('s'): /* single line, \n ordinary */
169  v->cflags &= ~REG_NEWLINE;
170  break;
171  case CHR('t'): /* tight syntax */
172  v->cflags &= ~REG_EXPANDED;
173  break;
174  case CHR('w'): /* weird, \n affects ^ $ only */
175  v->cflags &= ~REG_NLSTOP;
176  v->cflags |= REG_NLANCH;
177  break;
178  case CHR('x'): /* expanded syntax */
179  v->cflags |= REG_EXPANDED;
180  break;
181  default:
182  ERR(REG_BADOPT);
183  return;
184  }
185  if (!NEXT1(')'))
186  {
187  ERR(REG_BADOPT);
188  return;
189  }
190  v->now++;
191  if (v->cflags & REG_QUOTE)
192  v->cflags &= ~(REG_EXPANDED | REG_NEWLINE);
193  }
194 }
#define REG_NLSTOP
Definition: regex.h:111
#define NEXT1(c)
Definition: regc_lex.c:38
#define NEXT2(a, b)
Definition: regc_lex.c:39
#define ERR
Definition: _int.h:161
#define REG_BADOPT
Definition: regex.h:156
#define REG_QUOTE
Definition: regex.h:106
#define REG_ICASE
Definition: regex.h:108
#define REG_BADRPT
Definition: regex.h:152
#define REG_NEWLINE
Definition: regex.h:113
#define REG_ADVANCED
Definition: regex.h:105
#define NEXT3(a, b, c)
Definition: regc_lex.c:40
#define REG_EXTENDED
Definition: regex.h:103
#define iscalpha(x)
Definition: regcustom.h:99
#define REG_ADVF
Definition: regex.h:104
const chr * now
Definition: regcomp.c:240
#define REG_NLANCH
Definition: regex.h:112
#define NOTE(b)
Definition: regcomp.c:281
int cflags
Definition: regcomp.c:243
#define REG_EXPANDED
Definition: regex.h:110
#define HAVE(n)
Definition: regc_lex.c:37
#define ATEOS()
Definition: regc_lex.c:36
#define REG_UNONPOSIX
Definition: regex.h:68
#define CHR(c)
Definition: regcustom.h:69
#define REG_BADPAT
Definition: regex.h:141

◆ skip()

static void skip ( struct vars v)
static

Definition at line 973 of file regc_lex.c.

References assert, ATEOS, vars::cflags, CHR, iscspace, NEXT1, NOTE, vars::now, REG_EXPANDED, and REG_UNONPOSIX.

Referenced by brenext(), and next().

974 {
975  const chr *start = v->now;
976 
977  assert(v->cflags & REG_EXPANDED);
978 
979  for (;;)
980  {
981  while (!ATEOS() && iscspace(*v->now))
982  v->now++;
983  if (ATEOS() || *v->now != CHR('#'))
984  break; /* NOTE BREAK OUT */
985  assert(NEXT1('#'));
986  while (!ATEOS() && *v->now != CHR('\n'))
987  v->now++;
988  /* leave the newline to be picked up by the iscspace loop */
989  }
990 
991  if (v->now != start)
993 }
#define NEXT1(c)
Definition: regc_lex.c:38
#define iscspace(x)
Definition: regcustom.h:101
pg_wchar chr
Definition: regcustom.h:66
#define assert(TEST)
Definition: imath.c:73
const chr * now
Definition: regcomp.c:240
#define NOTE(b)
Definition: regcomp.c:281
int cflags
Definition: regcomp.c:243
#define REG_EXPANDED
Definition: regex.h:110
#define ATEOS()
Definition: regc_lex.c:36
#define REG_UNONPOSIX
Definition: regex.h:68
#define CHR(c)
Definition: regcustom.h:69