PostgreSQL Source Code  git master
rege_dfa.c File Reference
This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Functions

static chrlongest (struct vars *v, struct dfa *d, chr *start, chr *stop, int *hitstopp)
 
static chrshortest (struct vars *v, struct dfa *d, chr *start, chr *min, chr *max, chr **coldp, int *hitstopp)
 
static int matchuntil (struct vars *v, struct dfa *d, chr *probe, struct sset **lastcss, chr **lastcp)
 
static chrdfa_backref (struct vars *v, struct dfa *d, chr *start, chr *min, chr *max, bool shortest)
 
static chrlastcold (struct vars *v, struct dfa *d)
 
static struct dfanewdfa (struct vars *v, struct cnfa *cnfa, struct colormap *cm, struct smalldfa *sml)
 
static void freedfa (struct dfa *d)
 
static unsigned hash (unsigned *uv, int n)
 
static struct ssetinitialize (struct vars *v, struct dfa *d, chr *start)
 
static struct ssetmiss (struct vars *v, struct dfa *d, struct sset *css, color co, chr *cp, chr *start)
 
static int lacon (struct vars *v, struct cnfa *pcnfa, chr *cp, color co)
 
static struct ssetgetvacant (struct vars *v, struct dfa *d, chr *cp, chr *start)
 
static struct ssetpickss (struct vars *v, struct dfa *d, chr *cp, chr *start)
 

Function Documentation

◆ dfa_backref()

static chr* dfa_backref ( struct vars v,
struct dfa d,
chr start,
chr min,
chr max,
bool  shortest 
)
static

Definition at line 506 of file rege_dfa.c.

References dfa::backmax, dfa::backmin, dfa::backno, DUPINF, vars::g, vars::pmatch, regmatch_t::rm_eo, regmatch_t::rm_so, and vars::start.

Referenced by longest(), and shortest().

512 {
513  int n = d->backno;
514  int backmin = d->backmin;
515  int backmax = d->backmax;
516  size_t numreps;
517  size_t minreps;
518  size_t maxreps;
519  size_t brlen;
520  chr *brstring;
521  chr *p;
522 
523  /* get the backreferenced string (caller should have checked this) */
524  if (v->pmatch[n].rm_so == -1)
525  return NULL;
526  brstring = v->start + v->pmatch[n].rm_so;
527  brlen = v->pmatch[n].rm_eo - v->pmatch[n].rm_so;
528 
529  /* special-case zero-length backreference to avoid divide by zero */
530  if (brlen == 0)
531  {
532  /*
533  * matches only a zero-length string, but any number of repetitions
534  * can be considered to be present
535  */
536  if (min == start && backmin <= backmax)
537  return start;
538  return NULL;
539  }
540 
541  /*
542  * convert min and max into numbers of possible repetitions of the backref
543  * string, rounding appropriately
544  */
545  if (min <= start)
546  minreps = 0;
547  else
548  minreps = (min - start - 1) / brlen + 1;
549  maxreps = (max - start) / brlen;
550 
551  /* apply bounds, then see if there is any allowed match length */
552  if (minreps < backmin)
553  minreps = backmin;
554  if (backmax != DUPINF && maxreps > backmax)
555  maxreps = backmax;
556  if (maxreps < minreps)
557  return NULL;
558 
559  /* quick exit if zero-repetitions match is valid and preferred */
560  if (shortest && minreps == 0)
561  return start;
562 
563  /* okay, compare the actual string contents */
564  p = start;
565  numreps = 0;
566  while (numreps < maxreps)
567  {
568  if ((*v->g->compare) (brstring, p, brlen) != 0)
569  break;
570  p += brlen;
571  numreps++;
572  if (shortest && numreps >= minreps)
573  break;
574  }
575 
576  if (numreps >= minreps)
577  return p;
578  return NULL;
579 }
short backmin
Definition: regexec.c:81
regoff_t rm_so
Definition: regex.h:87
chr * start
Definition: regexec.c:114
regoff_t rm_eo
Definition: regex.h:88
pg_wchar chr
Definition: regcustom.h:66
struct guts * g
Definition: regexec.c:109
static chr * shortest(struct vars *v, struct dfa *d, chr *start, chr *min, chr *max, chr **coldp, int *hitstopp)
Definition: rege_dfa.c:204
#define DUPINF
Definition: regguts.h:94
int backno
Definition: regexec.c:80
regmatch_t * pmatch
Definition: regexec.c:112
short backmax
Definition: regexec.c:82

◆ freedfa()

static void freedfa ( struct dfa d)
static

Definition at line 691 of file rege_dfa.c.

References dfa::arraysmalloced, FREE, dfa::incarea, dfa::ismalloced, dfa::outsarea, dfa::ssets, and dfa::statesarea.

Referenced by newdfa().

692 {
693  if (d->arraysmalloced)
694  {
695  if (d->ssets != NULL)
696  FREE(d->ssets);
697  if (d->statesarea != NULL)
698  FREE(d->statesarea);
699  if (d->outsarea != NULL)
700  FREE(d->outsarea);
701  if (d->incarea != NULL)
702  FREE(d->incarea);
703  }
704 
705  if (d->ismalloced)
706  FREE(d);
707 }
struct sset * ssets
Definition: regexec.c:70
struct arcp * incarea
Definition: regexec.c:74
#define FREE(ptr)
Definition: cryptohash.c:37
bool ismalloced
Definition: regexec.c:83
bool arraysmalloced
Definition: regexec.c:84
struct sset ** outsarea
Definition: regexec.c:73
unsigned * statesarea
Definition: regexec.c:71

◆ getvacant()

static struct sset* getvacant ( struct vars v,
struct dfa d,
chr cp,
chr start 
)
static

Definition at line 977 of file rege_dfa.c.

References assert, arcp::co, FDEBUG, sset::flags, i, sset::inchain, sset::ins, dfa::lastnopr, dfa::lastpost, sset::lastseen, LOCKED, dfa::ncolors, NOPROGRESS, sset::outs, pickss(), POSTSTATE, arcp::ss, and dfa::ssets.

Referenced by initialize(), and miss().

981 {
982  int i;
983  struct sset *ss;
984  struct sset *p;
985  struct arcp ap;
986  color co;
987 
988  ss = pickss(v, d, cp, start);
989  if (ss == NULL)
990  return NULL;
991  assert(!(ss->flags & LOCKED));
992 
993  /* clear out its inarcs, including self-referential ones */
994  ap = ss->ins;
995  while ((p = ap.ss) != NULL)
996  {
997  co = ap.co;
998  FDEBUG(("zapping c%d's %ld outarc\n", (int) (p - d->ssets), (long) co));
999  p->outs[co] = NULL;
1000  ap = p->inchain[co];
1001  p->inchain[co].ss = NULL; /* paranoia */
1002  }
1003  ss->ins.ss = NULL;
1004 
1005  /* take it off the inarc chains of the ssets reached by its outarcs */
1006  for (i = 0; i < d->ncolors; i++)
1007  {
1008  p = ss->outs[i];
1009  assert(p != ss); /* not self-referential */
1010  if (p == NULL)
1011  continue; /* NOTE CONTINUE */
1012  FDEBUG(("del outarc %d from c%d's in chn\n", i, (int) (p - d->ssets)));
1013  if (p->ins.ss == ss && p->ins.co == i)
1014  p->ins = ss->inchain[i];
1015  else
1016  {
1017  struct arcp lastap = {NULL, 0};
1018 
1019  assert(p->ins.ss != NULL);
1020  for (ap = p->ins; ap.ss != NULL &&
1021  !(ap.ss == ss && ap.co == i);
1022  ap = ap.ss->inchain[ap.co])
1023  lastap = ap;
1024  assert(ap.ss != NULL);
1025  lastap.ss->inchain[lastap.co] = ss->inchain[i];
1026  }
1027  ss->outs[i] = NULL;
1028  ss->inchain[i].ss = NULL;
1029  }
1030 
1031  /* if ss was a success state, may need to remember location */
1032  if ((ss->flags & POSTSTATE) && ss->lastseen != d->lastpost &&
1033  (d->lastpost == NULL || d->lastpost < ss->lastseen))
1034  d->lastpost = ss->lastseen;
1035 
1036  /* likewise for a no-progress state */
1037  if ((ss->flags & NOPROGRESS) && ss->lastseen != d->lastnopr &&
1038  (d->lastnopr == NULL || d->lastnopr < ss->lastseen))
1039  d->lastnopr = ss->lastseen;
1040 
1041  return ss;
1042 }
struct sset * ssets
Definition: regexec.c:70
chr * lastseen
Definition: regexec.c:58
struct sset ** outs
Definition: regexec.c:59
short color
Definition: regguts.h:150
static struct sset * pickss(struct vars *v, struct dfa *d, chr *cp, chr *start)
Definition: rege_dfa.c:1048
struct arcp * inchain
Definition: regexec.c:60
#define NOPROGRESS
Definition: regexec.c:56
#define assert(TEST)
Definition: imath.c:73
int ncolors
Definition: regexec.c:68
chr * lastnopr
Definition: regexec.c:78
#define FDEBUG(arglist)
Definition: regguts.h:116
Definition: regexec.c:39
#define POSTSTATE
Definition: regexec.c:54
struct arcp ins
Definition: regexec.c:57
#define LOCKED
Definition: regexec.c:55
Definition: regexec.c:45
int i
chr * lastpost
Definition: regexec.c:77
struct sset * ss
Definition: regexec.c:41
int flags
Definition: regexec.c:52
color co
Definition: regexec.c:42

◆ hash()

◆ initialize()

static struct sset* initialize ( struct vars v,
struct dfa d,
chr start 
)
static

Definition at line 731 of file rege_dfa.c.

References assert, BSET, dfa::cnfa, sset::flags, getvacant(), HASH, sset::hash, i, dfa::lastnopr, dfa::lastpost, sset::lastseen, LOCKED, NOPROGRESS, dfa::nssused, cnfa::post, cnfa::pre, dfa::ssets, STARTER, sset::states, and dfa::wordsper.

Referenced by longest(), matchuntil(), and shortest().

734 {
735  struct sset *ss;
736  int i;
737 
738  /* is previous one still there? */
739  if (d->nssused > 0 && (d->ssets[0].flags & STARTER))
740  ss = &d->ssets[0];
741  else
742  { /* no, must (re)build it */
743  ss = getvacant(v, d, start, start);
744  if (ss == NULL)
745  return NULL;
746  for (i = 0; i < d->wordsper; i++)
747  ss->states[i] = 0;
748  BSET(ss->states, d->cnfa->pre);
749  ss->hash = HASH(ss->states, d->wordsper);
750  assert(d->cnfa->pre != d->cnfa->post);
751  ss->flags = STARTER | LOCKED | NOPROGRESS;
752  /* lastseen dealt with below */
753  }
754 
755  for (i = 0; i < d->nssused; i++)
756  d->ssets[i].lastseen = NULL;
757  ss->lastseen = start; /* maybe untrue, but harmless */
758  d->lastpost = NULL;
759  d->lastnopr = NULL;
760  return ss;
761 }
struct sset * ssets
Definition: regexec.c:70
#define STARTER
Definition: regexec.c:53
chr * lastseen
Definition: regexec.c:58
struct cnfa * cnfa
Definition: regexec.c:75
int pre
Definition: regguts.h:408
static struct sset * getvacant(struct vars *v, struct dfa *d, chr *cp, chr *start)
Definition: rege_dfa.c:977
#define HASH(sign, val, siglen)
Definition: hstore_gist.c:45
unsigned * states
Definition: regexec.c:47
#define NOPROGRESS
Definition: regexec.c:56
#define assert(TEST)
Definition: imath.c:73
int nssused
Definition: regexec.c:66
int wordsper
Definition: regexec.c:69
chr * lastnopr
Definition: regexec.c:78
int post
Definition: regguts.h:409
#define BSET(uv, sn)
Definition: regguts.h:126
#define LOCKED
Definition: regexec.c:55
Definition: regexec.c:45
int i
chr * lastpost
Definition: regexec.c:77
unsigned hash
Definition: regexec.c:48
int flags
Definition: regexec.c:52

◆ lacon()

static int lacon ( struct vars v,
struct cnfa pcnfa,
chr cp,
color  co 
)
static

Definition at line 920 of file rege_dfa.c.

References assert, ERR, FDEBUG, vars::g, getladfa(), guts::lacons, subre::latype, LATYPE_IS_AHEAD, LATYPE_IS_POS, vars::lblastcp, vars::lblastcss, matchuntil(), cnfa::ncolors, vars::re, REG_ETOOBIG, shortest(), STACK_TOO_DEEP, and vars::stop.

Referenced by miss().

924 {
925  int n;
926  struct subre *sub;
927  struct dfa *d;
928  chr *end;
929  int satisfied;
930 
931  /* Since this is recursive, it could be driven to stack overflow */
932  if (STACK_TOO_DEEP(v->re))
933  {
934  ERR(REG_ETOOBIG);
935  return 0;
936  }
937 
938  n = co - pcnfa->ncolors;
939  assert(n > 0 && n < v->g->nlacons && v->g->lacons != NULL);
940  FDEBUG(("=== testing lacon %d\n", n));
941  sub = &v->g->lacons[n];
942  d = getladfa(v, n);
943  if (d == NULL)
944  return 0;
945  if (LATYPE_IS_AHEAD(sub->latype))
946  {
947  /* used to use longest() here, but shortest() could be much cheaper */
948  end = shortest(v, d, cp, cp, v->stop,
949  (chr **) NULL, (int *) NULL);
950  satisfied = LATYPE_IS_POS(sub->latype) ? (end != NULL) : (end == NULL);
951  }
952  else
953  {
954  /*
955  * To avoid doing O(N^2) work when repeatedly testing a lookbehind
956  * constraint in an N-character string, we use matchuntil() which can
957  * cache the DFA state across calls. We only need to restart if the
958  * probe point decreases, which is not common. The NFA we're using is
959  * a search NFA, so it doesn't mind scanning over stuff before the
960  * nominal match.
961  */
962  satisfied = matchuntil(v, d, cp, &v->lblastcss[n], &v->lblastcp[n]);
963  if (!LATYPE_IS_POS(sub->latype))
964  satisfied = !satisfied;
965  }
966  FDEBUG(("=== lacon %d satisfied %d\n", n, satisfied));
967  return satisfied;
968 }
static int matchuntil(struct vars *v, struct dfa *d, chr *probe, struct sset **lastcss, chr **lastcp)
Definition: rege_dfa.c:371
#define LATYPE_IS_POS(la)
Definition: regguts.h:103
#define ERR
Definition: _int.h:161
struct subre * lacons
Definition: regguts.h:539
char latype
Definition: regguts.h:490
Definition: regexec.c:63
struct sset ** lblastcss
Definition: regexec.c:120
pg_wchar chr
Definition: regcustom.h:66
regex_t * re
Definition: regcomp.c:239
#define assert(TEST)
Definition: imath.c:73
#define LATYPE_IS_AHEAD(la)
Definition: regguts.h:104
struct guts * g
Definition: regexec.c:109
#define FDEBUG(arglist)
Definition: regguts.h:116
static chr * shortest(struct vars *v, struct dfa *d, chr *start, chr *min, chr *max, chr **coldp, int *hitstopp)
Definition: rege_dfa.c:204
#define REG_ETOOBIG
Definition: regex.h:157
Definition: regguts.h:471
const chr * stop
Definition: regcomp.c:241
chr ** lblastcp
Definition: regexec.c:121
static struct dfa * getladfa(struct vars *, int)
Definition: regexec.c:391
#define STACK_TOO_DEEP(re)
Definition: regguts.h:520
int ncolors
Definition: regguts.h:404

◆ lastcold()

static chr* lastcold ( struct vars v,
struct dfa d 
)
static

Definition at line 585 of file rege_dfa.c.

References sset::flags, i, dfa::lastnopr, sset::lastseen, NOPROGRESS, dfa::nssused, dfa::ssets, and vars::start.

Referenced by shortest().

587 {
588  struct sset *ss;
589  chr *nopr;
590  int i;
591 
592  nopr = d->lastnopr;
593  if (nopr == NULL)
594  nopr = v->start;
595  for (ss = d->ssets, i = d->nssused; i > 0; ss++, i--)
596  if ((ss->flags & NOPROGRESS) && nopr < ss->lastseen)
597  nopr = ss->lastseen;
598  return nopr;
599 }
struct sset * ssets
Definition: regexec.c:70
chr * lastseen
Definition: regexec.c:58
chr * start
Definition: regexec.c:114
pg_wchar chr
Definition: regcustom.h:66
#define NOPROGRESS
Definition: regexec.c:56
int nssused
Definition: regexec.c:66
chr * lastnopr
Definition: regexec.c:78
Definition: regexec.c:45
int i
int flags
Definition: regexec.c:52

◆ longest()

static chr* longest ( struct vars v,
struct dfa d,
chr start,
chr stop,
int *  hitstopp 
)
static

Definition at line 42 of file rege_dfa.c.

References assert, dfa::backno, cnfa::bos, dfa::cm, dfa::cnfa, dfa_backref(), DUPINF, vars::eflags, cnfa::eos, FDEBUG, sset::flags, cnfa::flags, GETCOLOR, i, initialize(), ISERR, dfa::lastpost, sset::lastseen, MATCHALL, cnfa::maxmatchall, cnfa::minmatchall, miss(), vars::nmatch, dfa::nssused, sset::outs, vars::pmatch, POSTSTATE, REG_FTRACE, REG_NOTBOL, REG_NOTEOL, regmatch_t::rm_so, dfa::ssets, vars::start, and vars::stop.

Referenced by ProcessSyncRequests().

47 {
48  chr *cp;
49  chr *realstop = (stop == v->stop) ? stop : stop + 1;
50  color co;
51  struct sset *css;
52  struct sset *ss;
53  chr *post;
54  int i;
55  struct colormap *cm = d->cm;
56 
57  /* prevent "uninitialized variable" warnings */
58  if (hitstopp != NULL)
59  *hitstopp = 0;
60 
61  /* if this is a backref to a known string, just match against that */
62  if (d->backno >= 0)
63  {
64  assert((size_t) d->backno < v->nmatch);
65  if (v->pmatch[d->backno].rm_so >= 0)
66  {
67  cp = dfa_backref(v, d, start, start, stop, false);
68  if (cp == v->stop && stop == v->stop && hitstopp != NULL)
69  *hitstopp = 1;
70  return cp;
71  }
72  }
73 
74  /* fast path for matchall NFAs */
75  if (d->cnfa->flags & MATCHALL)
76  {
77  size_t nchr = stop - start;
78  size_t maxmatchall = d->cnfa->maxmatchall;
79 
80  if (nchr < d->cnfa->minmatchall)
81  return NULL;
82  if (maxmatchall == DUPINF)
83  {
84  if (stop == v->stop && hitstopp != NULL)
85  *hitstopp = 1;
86  }
87  else
88  {
89  if (stop == v->stop && nchr <= maxmatchall + 1 && hitstopp != NULL)
90  *hitstopp = 1;
91  if (nchr > maxmatchall)
92  return start + maxmatchall;
93  }
94  return stop;
95  }
96 
97  /* initialize */
98  css = initialize(v, d, start);
99  if (css == NULL)
100  return NULL;
101  cp = start;
102 
103  /* startup */
104  FDEBUG(("+++ startup +++\n"));
105  if (cp == v->start)
106  {
107  co = d->cnfa->bos[(v->eflags & REG_NOTBOL) ? 0 : 1];
108  FDEBUG(("color %ld\n", (long) co));
109  }
110  else
111  {
112  co = GETCOLOR(cm, *(cp - 1));
113  FDEBUG(("char %c, color %ld\n", (char) *(cp - 1), (long) co));
114  }
115  css = miss(v, d, css, co, cp, start);
116  if (css == NULL)
117  return NULL;
118  css->lastseen = cp;
119 
120  /*
121  * This is the main text-scanning loop. It seems worth having two copies
122  * to avoid the overhead of REG_FTRACE tests here, even in REG_DEBUG
123  * builds, when you're not actively tracing.
124  */
125 #ifdef REG_DEBUG
126  if (v->eflags & REG_FTRACE)
127  {
128  while (cp < realstop)
129  {
130  FDEBUG(("+++ at c%d +++\n", (int) (css - d->ssets)));
131  co = GETCOLOR(cm, *cp);
132  FDEBUG(("char %c, color %ld\n", (char) *cp, (long) co));
133  ss = css->outs[co];
134  if (ss == NULL)
135  {
136  ss = miss(v, d, css, co, cp + 1, start);
137  if (ss == NULL)
138  break; /* NOTE BREAK OUT */
139  }
140  cp++;
141  ss->lastseen = cp;
142  css = ss;
143  }
144  }
145  else
146 #endif
147  {
148  while (cp < realstop)
149  {
150  co = GETCOLOR(cm, *cp);
151  ss = css->outs[co];
152  if (ss == NULL)
153  {
154  ss = miss(v, d, css, co, cp + 1, start);
155  if (ss == NULL)
156  break; /* NOTE BREAK OUT */
157  }
158  cp++;
159  ss->lastseen = cp;
160  css = ss;
161  }
162  }
163 
164  if (ISERR())
165  return NULL;
166 
167  /* shutdown */
168  FDEBUG(("+++ shutdown at c%d +++\n", (int) (css - d->ssets)));
169  if (cp == v->stop && stop == v->stop)
170  {
171  if (hitstopp != NULL)
172  *hitstopp = 1;
173  co = d->cnfa->eos[(v->eflags & REG_NOTEOL) ? 0 : 1];
174  FDEBUG(("color %ld\n", (long) co));
175  ss = miss(v, d, css, co, cp, start);
176  if (ISERR())
177  return NULL;
178  /* special case: match ended at eol? */
179  if (ss != NULL && (ss->flags & POSTSTATE))
180  return cp;
181  else if (ss != NULL)
182  ss->lastseen = cp; /* to be tidy */
183  }
184 
185  /* find last match, if any */
186  post = d->lastpost;
187  for (ss = d->ssets, i = d->nssused; i > 0; ss++, i--)
188  if ((ss->flags & POSTSTATE) && post != ss->lastseen &&
189  (post == NULL || post < ss->lastseen))
190  post = ss->lastseen;
191  if (post != NULL) /* found one */
192  return post - 1;
193 
194  return NULL;
195 }
struct sset * ssets
Definition: regexec.c:70
chr * lastseen
Definition: regexec.c:58
#define REG_NOTBOL
Definition: regex.h:126
struct sset ** outs
Definition: regexec.c:59
int maxmatchall
Definition: regguts.h:419
struct colormap * cm
Definition: regexec.c:76
struct cnfa * cnfa
Definition: regexec.c:75
regoff_t rm_so
Definition: regex.h:87
Definition: regguts.h:401
short color
Definition: regguts.h:150
static chr * dfa_backref(struct vars *v, struct dfa *d, chr *start, chr *min, chr *max, bool shortest)
Definition: rege_dfa.c:506
#define REG_FTRACE
Definition: regex.h:129
#define ISERR()
Definition: regcomp.c:273
static struct sset * initialize(struct vars *v, struct dfa *d, chr *start)
Definition: rege_dfa.c:731
chr * start
Definition: regexec.c:114
pg_wchar chr
Definition: regcustom.h:66
#define assert(TEST)
Definition: imath.c:73
int nssused
Definition: regexec.c:66
#define MATCHALL
Definition: regguts.h:407
int eflags
Definition: regexec.c:110
color bos[2]
Definition: regguts.h:410
#define FDEBUG(arglist)
Definition: regguts.h:116
static struct sset * miss(struct vars *v, struct dfa *d, struct sset *css, color co, chr *cp, chr *start)
Definition: rege_dfa.c:777
#define DUPINF
Definition: regguts.h:94
int backno
Definition: regexec.c:80
regmatch_t * pmatch
Definition: regexec.c:112
int flags
Definition: regguts.h:405
#define POSTSTATE
Definition: regexec.c:54
int minmatchall
Definition: regguts.h:418
size_t nmatch
Definition: regexec.c:111
const chr * stop
Definition: regcomp.c:241
Definition: regexec.c:45
int i
chr * lastpost
Definition: regexec.c:77
int flags
Definition: regexec.c:52
#define REG_NOTEOL
Definition: regex.h:127
#define GETCOLOR(cm, c)
Definition: regguts.h:252
color eos[2]
Definition: regguts.h:411

◆ matchuntil()

static int matchuntil ( struct vars v,
struct dfa d,
chr probe,
struct sset **  lastcss,
chr **  lastcp 
)
static

Definition at line 371 of file rege_dfa.c.

References assert, cnfa::bos, dfa::cm, dfa::cnfa, DUPINF, vars::eflags, cnfa::eos, FDEBUG, sset::flags, cnfa::flags, GETCOLOR, initialize(), sset::lastseen, MATCHALL, cnfa::maxmatchall, cnfa::minmatchall, miss(), sset::outs, POSTSTATE, REG_FTRACE, REG_NOTBOL, REG_NOTEOL, dfa::ssets, vars::start, and vars::stop.

Referenced by lacon().

376 {
377  chr *cp = *lastcp;
378  color co;
379  struct sset *css = *lastcss;
380  struct sset *ss;
381  struct colormap *cm = d->cm;
382 
383  /* fast path for matchall NFAs */
384  if (d->cnfa->flags & MATCHALL)
385  {
386  size_t nchr = probe - v->start;
387 
388  if (nchr < d->cnfa->minmatchall)
389  return 0;
390  /* maxmatchall will always be infinity, cf. makesearch() */
391  assert(d->cnfa->maxmatchall == DUPINF);
392  return 1;
393  }
394 
395  /* initialize and startup, or restart, if necessary */
396  if (cp == NULL || cp > probe)
397  {
398  cp = v->start;
399  css = initialize(v, d, cp);
400  if (css == NULL)
401  return 0;
402 
403  FDEBUG((">>> startup >>>\n"));
404  co = d->cnfa->bos[(v->eflags & REG_NOTBOL) ? 0 : 1];
405  FDEBUG(("color %ld\n", (long) co));
406 
407  css = miss(v, d, css, co, cp, v->start);
408  if (css == NULL)
409  return 0;
410  css->lastseen = cp;
411  }
412  else if (css == NULL)
413  {
414  /* we previously found that no match is possible beyond *lastcp */
415  return 0;
416  }
417  ss = css;
418 
419  /*
420  * This is the main text-scanning loop. It seems worth having two copies
421  * to avoid the overhead of REG_FTRACE tests here, even in REG_DEBUG
422  * builds, when you're not actively tracing.
423  */
424 #ifdef REG_DEBUG
425  if (v->eflags & REG_FTRACE)
426  {
427  while (cp < probe)
428  {
429  FDEBUG((">>> at c%d >>>\n", (int) (css - d->ssets)));
430  co = GETCOLOR(cm, *cp);
431  FDEBUG(("char %c, color %ld\n", (char) *cp, (long) co));
432  ss = css->outs[co];
433  if (ss == NULL)
434  {
435  ss = miss(v, d, css, co, cp + 1, v->start);
436  if (ss == NULL)
437  break; /* NOTE BREAK OUT */
438  }
439  cp++;
440  ss->lastseen = cp;
441  css = ss;
442  }
443  }
444  else
445 #endif
446  {
447  while (cp < probe)
448  {
449  co = GETCOLOR(cm, *cp);
450  ss = css->outs[co];
451  if (ss == NULL)
452  {
453  ss = miss(v, d, css, co, cp + 1, v->start);
454  if (ss == NULL)
455  break; /* NOTE BREAK OUT */
456  }
457  cp++;
458  ss->lastseen = cp;
459  css = ss;
460  }
461  }
462 
463  *lastcss = ss;
464  *lastcp = cp;
465 
466  if (ss == NULL)
467  return 0; /* impossible match, or internal error */
468 
469  /* We need to process one more chr, or the EOS symbol, to check match */
470  if (cp < v->stop)
471  {
472  FDEBUG((">>> at c%d >>>\n", (int) (css - d->ssets)));
473  co = GETCOLOR(cm, *cp);
474  FDEBUG(("char %c, color %ld\n", (char) *cp, (long) co));
475  ss = css->outs[co];
476  if (ss == NULL)
477  ss = miss(v, d, css, co, cp + 1, v->start);
478  }
479  else
480  {
481  assert(cp == v->stop);
482  co = d->cnfa->eos[(v->eflags & REG_NOTEOL) ? 0 : 1];
483  FDEBUG(("color %ld\n", (long) co));
484  ss = miss(v, d, css, co, cp, v->start);
485  }
486 
487  if (ss == NULL || !(ss->flags & POSTSTATE))
488  return 0;
489 
490  return 1;
491 }
struct sset * ssets
Definition: regexec.c:70
chr * lastseen
Definition: regexec.c:58
#define REG_NOTBOL
Definition: regex.h:126
struct sset ** outs
Definition: regexec.c:59
int maxmatchall
Definition: regguts.h:419
struct colormap * cm
Definition: regexec.c:76
struct cnfa * cnfa
Definition: regexec.c:75
Definition: regguts.h:401
short color
Definition: regguts.h:150
#define REG_FTRACE
Definition: regex.h:129
static struct sset * initialize(struct vars *v, struct dfa *d, chr *start)
Definition: rege_dfa.c:731
chr * start
Definition: regexec.c:114
pg_wchar chr
Definition: regcustom.h:66
#define assert(TEST)
Definition: imath.c:73
#define MATCHALL
Definition: regguts.h:407
int eflags
Definition: regexec.c:110
color bos[2]
Definition: regguts.h:410
#define FDEBUG(arglist)
Definition: regguts.h:116
static struct sset * miss(struct vars *v, struct dfa *d, struct sset *css, color co, chr *cp, chr *start)
Definition: rege_dfa.c:777
#define DUPINF
Definition: regguts.h:94
int flags
Definition: regguts.h:405
#define POSTSTATE
Definition: regexec.c:54
int minmatchall
Definition: regguts.h:418
const chr * stop
Definition: regcomp.c:241
Definition: regexec.c:45
int flags
Definition: regexec.c:52
#define REG_NOTEOL
Definition: regex.h:127
#define GETCOLOR(cm, c)
Definition: regguts.h:252
color eos[2]
Definition: regguts.h:411

◆ miss()

static struct sset* miss ( struct vars v,
struct dfa d,
struct sset css,
color  co,
chr cp,
chr start 
)
static

Definition at line 777 of file rege_dfa.c.

References assert, BSET, CANCEL_REQUESTED, colormap::cd, dfa::cm, dfa::cnfa, CNFA_NOPROGRESS, arcp::co, carc::co, COLORLESS, ERR, FDEBUG, sset::flags, colordesc::flags, cnfa::flags, getvacant(), HASH, sset::hash, HASLACONS, HIT, i, sset::inchain, sset::ins, ISBSET, ISERR, lacon(), cnfa::ncolors, NOPROGRESS, dfa::nssused, dfa::nstates, sset::outs, cnfa::post, POSTSTATE, PSEUDO, RAINBOW, vars::re, REG_CANCEL, arcp::ss, dfa::ssets, sset::states, cnfa::states, cnfa::stflags, carc::to, dfa::wordsper, and dfa::work.

Referenced by longest(), matchuntil(), and shortest().

783 {
784  struct cnfa *cnfa = d->cnfa;
785  int i;
786  unsigned h;
787  struct carc *ca;
788  struct sset *p;
789  int ispseudocolor;
790  int ispost;
791  int noprogress;
792  int gotstate;
793  int dolacons;
794  int sawlacons;
795 
796  /* for convenience, we can be called even if it might not be a miss */
797  if (css->outs[co] != NULL)
798  {
799  FDEBUG(("hit\n"));
800  return css->outs[co];
801  }
802  FDEBUG(("miss\n"));
803 
804  /*
805  * Checking for operation cancel in the inner text search loop seems
806  * unduly expensive. As a compromise, check during cache misses.
807  */
808  if (CANCEL_REQUESTED(v->re))
809  {
810  ERR(REG_CANCEL);
811  return NULL;
812  }
813 
814  /*
815  * What set of states would we end up in after consuming the co character?
816  * We first consider PLAIN arcs that consume the character, and then look
817  * to see what LACON arcs could be traversed after consuming it.
818  */
819  for (i = 0; i < d->wordsper; i++)
820  d->work[i] = 0; /* build new stateset bitmap in d->work */
821  ispseudocolor = d->cm->cd[co].flags & PSEUDO;
822  ispost = 0;
823  noprogress = 1;
824  gotstate = 0;
825  for (i = 0; i < d->nstates; i++)
826  if (ISBSET(css->states, i))
827  for (ca = cnfa->states[i]; ca->co != COLORLESS; ca++)
828  if (ca->co == co ||
829  (ca->co == RAINBOW && !ispseudocolor))
830  {
831  BSET(d->work, ca->to);
832  gotstate = 1;
833  if (ca->to == cnfa->post)
834  ispost = 1;
835  if (!(cnfa->stflags[ca->to] & CNFA_NOPROGRESS))
836  noprogress = 0;
837  FDEBUG(("%d -> %d\n", i, ca->to));
838  }
839  if (!gotstate)
840  return NULL; /* character cannot reach any new state */
841  dolacons = (cnfa->flags & HASLACONS);
842  sawlacons = 0;
843  /* outer loop handles transitive closure of reachable-by-LACON states */
844  while (dolacons)
845  {
846  dolacons = 0;
847  for (i = 0; i < d->nstates; i++)
848  if (ISBSET(d->work, i))
849  for (ca = cnfa->states[i]; ca->co != COLORLESS; ca++)
850  {
851  if (ca->co < cnfa->ncolors)
852  continue; /* not a LACON arc */
853  if (ISBSET(d->work, ca->to))
854  continue; /* arc would be a no-op anyway */
855  sawlacons = 1; /* this LACON affects our result */
856  if (!lacon(v, cnfa, cp, ca->co))
857  {
858  if (ISERR())
859  return NULL;
860  continue; /* LACON arc cannot be traversed */
861  }
862  if (ISERR())
863  return NULL;
864  BSET(d->work, ca->to);
865  dolacons = 1;
866  if (ca->to == cnfa->post)
867  ispost = 1;
868  if (!(cnfa->stflags[ca->to] & CNFA_NOPROGRESS))
869  noprogress = 0;
870  FDEBUG(("%d :> %d\n", i, ca->to));
871  }
872  }
873  h = HASH(d->work, d->wordsper);
874 
875  /* Is this stateset already in the cache? */
876  for (p = d->ssets, i = d->nssused; i > 0; p++, i--)
877  if (HIT(h, d->work, p, d->wordsper))
878  {
879  FDEBUG(("cached c%d\n", (int) (p - d->ssets)));
880  break; /* NOTE BREAK OUT */
881  }
882  if (i == 0)
883  { /* nope, need a new cache entry */
884  p = getvacant(v, d, cp, start);
885  if (p == NULL)
886  return NULL;
887  assert(p != css);
888  for (i = 0; i < d->wordsper; i++)
889  p->states[i] = d->work[i];
890  p->hash = h;
891  p->flags = (ispost) ? POSTSTATE : 0;
892  if (noprogress)
893  p->flags |= NOPROGRESS;
894  /* lastseen to be dealt with by caller */
895  }
896 
897  /*
898  * Link new stateset to old, unless a LACON affected the result, in which
899  * case we don't create the link. That forces future transitions across
900  * this same arc (same prior stateset and character color) to come through
901  * miss() again, so that we can recheck the LACON(s), which might or might
902  * not pass since context will be different.
903  */
904  if (!sawlacons)
905  {
906  FDEBUG(("c%d[%d]->c%d\n",
907  (int) (css - d->ssets), co, (int) (p - d->ssets)));
908  css->outs[co] = p;
909  css->inchain[co] = p->ins;
910  p->ins.ss = css;
911  p->ins.co = co;
912  }
913  return p;
914 }
struct sset * ssets
Definition: regexec.c:70
struct sset ** outs
Definition: regexec.c:59
#define ERR
Definition: _int.h:161
struct colormap * cm
Definition: regexec.c:76
#define CNFA_NOPROGRESS
Definition: regguts.h:413
struct cnfa * cnfa
Definition: regexec.c:75
#define RAINBOW
Definition: regguts.h:154
Definition: regguts.h:401
unsigned * work
Definition: regexec.c:72
#define ISERR()
Definition: regcomp.c:273
struct carc ** states
Definition: regguts.h:414
struct colordesc * cd
Definition: regguts.h:231
#define HIT(h, bv, ss, nw)
Definition: regexec.c:50
static struct sset * getvacant(struct vars *v, struct dfa *d, chr *cp, chr *start)
Definition: rege_dfa.c:977
#define HASH(sign, val, siglen)
Definition: hstore_gist.c:45
unsigned * states
Definition: regexec.c:47
regex_t * re
Definition: regcomp.c:239
struct arcp * inchain
Definition: regexec.c:60
#define NOPROGRESS
Definition: regexec.c:56
#define assert(TEST)
Definition: imath.c:73
char * stflags
Definition: regguts.h:412
int nstates
Definition: regexec.c:67
int nssused
Definition: regexec.c:66
int wordsper
Definition: regexec.c:69
#define FDEBUG(arglist)
Definition: regguts.h:116
int to
Definition: regguts.h:398
#define ISBSET(uv, sn)
Definition: regguts.h:127
int flags
Definition: regguts.h:179
int flags
Definition: regguts.h:405
#define POSTSTATE
Definition: regexec.c:54
struct arcp ins
Definition: regexec.c:57
int post
Definition: regguts.h:409
#define BSET(uv, sn)
Definition: regguts.h:126
#define PSEUDO
Definition: regguts.h:181
color co
Definition: regguts.h:397
Definition: regexec.c:45
#define CANCEL_REQUESTED(re)
Definition: regguts.h:517
int i
#define COLORLESS
Definition: regguts.h:153
static int lacon(struct vars *v, struct cnfa *pcnfa, chr *cp, color co)
Definition: rege_dfa.c:920
unsigned hash
Definition: regexec.c:48
struct sset * ss
Definition: regexec.c:41
int flags
Definition: regexec.c:52
#define HASLACONS
Definition: regguts.h:406
int ncolors
Definition: regguts.h:404
color co
Definition: regexec.c:42
#define REG_CANCEL
Definition: regex.h:159
Definition: regguts.h:395

◆ newdfa()

static struct dfa* newdfa ( struct vars v,
struct cnfa cnfa,
struct colormap cm,
struct smalldfa sml 
)
static

Definition at line 607 of file rege_dfa.c.

References dfa::arraysmalloced, assert, dfa::backmax, dfa::backmin, dfa::backno, dfa::cm, dfa::cnfa, smalldfa::dfa, vars::eflags, ERR, FEWCOLORS, freedfa(), dfa::incarea, smalldfa::incarea, dfa::ismalloced, dfa::lastnopr, dfa::lastpost, MALLOC, dfa::ncolors, cnfa::ncolors, dfa::nssets, dfa::nssused, dfa::nstates, cnfa::nstates, dfa::outsarea, smalldfa::outsarea, REG_ESPACE, REG_SMALL, dfa::search, dfa::ssets, smalldfa::ssets, dfa::statesarea, smalldfa::statesarea, UBITS, dfa::wordsper, dfa::work, and WORK.

611 {
612  struct dfa *d;
613  size_t nss = cnfa->nstates * 2;
614  int wordsper = (cnfa->nstates + UBITS - 1) / UBITS;
615  bool ismalloced = false;
616 
617  assert(cnfa != NULL && cnfa->nstates != 0);
618 
619  if (nss <= FEWSTATES && cnfa->ncolors <= FEWCOLORS)
620  {
621  assert(wordsper == 1);
622  if (sml == NULL)
623  {
624  sml = (struct smalldfa *) MALLOC(sizeof(struct smalldfa));
625  if (sml == NULL)
626  {
627  ERR(REG_ESPACE);
628  return NULL;
629  }
630  ismalloced = true;
631  }
632  d = &sml->dfa;
633  d->ssets = sml->ssets;
634  d->statesarea = sml->statesarea;
635  d->work = &d->statesarea[nss];
636  d->outsarea = sml->outsarea;
637  d->incarea = sml->incarea;
638  d->ismalloced = ismalloced;
639  d->arraysmalloced = false; /* not separately allocated, anyway */
640  }
641  else
642  {
643  d = (struct dfa *) MALLOC(sizeof(struct dfa));
644  if (d == NULL)
645  {
646  ERR(REG_ESPACE);
647  return NULL;
648  }
649  d->ssets = (struct sset *) MALLOC(nss * sizeof(struct sset));
650  d->statesarea = (unsigned *) MALLOC((nss + WORK) * wordsper *
651  sizeof(unsigned));
652  d->work = &d->statesarea[nss * wordsper];
653  d->outsarea = (struct sset **) MALLOC(nss * cnfa->ncolors *
654  sizeof(struct sset *));
655  d->incarea = (struct arcp *) MALLOC(nss * cnfa->ncolors *
656  sizeof(struct arcp));
657  d->ismalloced = true;
658  d->arraysmalloced = true;
659  /* now freedfa() will behave sanely */
660  if (d->ssets == NULL || d->statesarea == NULL ||
661  d->outsarea == NULL || d->incarea == NULL)
662  {
663  freedfa(d);
664  ERR(REG_ESPACE);
665  return NULL;
666  }
667  }
668 
669  d->nssets = (v->eflags & REG_SMALL) ? 7 : nss;
670  d->nssused = 0;
671  d->nstates = cnfa->nstates;
672  d->ncolors = cnfa->ncolors;
673  d->wordsper = wordsper;
674  d->cnfa = cnfa;
675  d->cm = cm;
676  d->lastpost = NULL;
677  d->lastnopr = NULL;
678  d->search = d->ssets;
679  d->backno = -1; /* may be set by caller */
680  d->backmin = d->backmax = 0;
681 
682  /* initialization of sset fields is done as needed */
683 
684  return d;
685 }
struct sset * ssets
Definition: regexec.c:70
#define ERR
Definition: _int.h:161
short backmin
Definition: regexec.c:81
struct arcp * incarea
Definition: regexec.c:74
struct colormap * cm
Definition: regexec.c:76
#define REG_ESPACE
Definition: regex.h:151
struct cnfa * cnfa
Definition: regexec.c:75
#define WORK
Definition: regexec.c:87
unsigned * work
Definition: regexec.c:72
struct sset * search
Definition: regexec.c:79
static void freedfa(struct dfa *d)
Definition: rege_dfa.c:691
#define FEWCOLORS
Definition: regexec.c:91
#define MALLOC(n)
Definition: regcustom.h:60
Definition: regexec.c:63
int nstates
Definition: regguts.h:403
unsigned statesarea[FEWSTATES *2+WORK]
Definition: regexec.c:96
bool ismalloced
Definition: regexec.c:83
struct dfa dfa
Definition: regexec.c:94
bool arraysmalloced
Definition: regexec.c:84
#define assert(TEST)
Definition: imath.c:73
int ncolors
Definition: regexec.c:68
int nstates
Definition: regexec.c:67
int nssets
Definition: regexec.c:65
int nssused
Definition: regexec.c:66
int wordsper
Definition: regexec.c:69
int eflags
Definition: regexec.c:110
#define UBITS
Definition: regguts.h:125
struct arcp incarea[FEWSTATES *2 *FEWCOLORS]
Definition: regexec.c:98
chr * lastnopr
Definition: regexec.c:78
#define REG_SMALL
Definition: regex.h:131
Definition: regexec.c:39
struct sset ssets[FEWSTATES *2]
Definition: regexec.c:95
int backno
Definition: regexec.c:80
short backmax
Definition: regexec.c:82
struct sset * outsarea[FEWSTATES *2 *FEWCOLORS]
Definition: regexec.c:97
struct sset ** outsarea
Definition: regexec.c:73
Definition: regexec.c:45
chr * lastpost
Definition: regexec.c:77
unsigned * statesarea
Definition: regexec.c:71
int ncolors
Definition: regguts.h:404

◆ pickss()

static struct sset* pickss ( struct vars v,
struct dfa d,
chr cp,
chr start 
)
static

Definition at line 1048 of file rege_dfa.c.

References arcp::co, ERR, FDEBUG, sset::flags, i, dfa::incarea, sset::inchain, sset::ins, sset::lastseen, LOCKED, dfa::ncolors, dfa::nssets, dfa::nssused, sset::outs, dfa::outsarea, REG_ASSERT, dfa::search, arcp::ss, dfa::ssets, sset::states, dfa::statesarea, WHITE, and dfa::wordsper.

Referenced by getvacant().

1052 {
1053  int i;
1054  struct sset *ss;
1055  struct sset *end;
1056  chr *ancient;
1057 
1058  /* shortcut for cases where cache isn't full */
1059  if (d->nssused < d->nssets)
1060  {
1061  i = d->nssused;
1062  d->nssused++;
1063  ss = &d->ssets[i];
1064  FDEBUG(("new c%d\n", i));
1065  /* set up innards */
1066  ss->states = &d->statesarea[i * d->wordsper];
1067  ss->flags = 0;
1068  ss->ins.ss = NULL;
1069  ss->ins.co = WHITE; /* give it some value */
1070  ss->outs = &d->outsarea[i * d->ncolors];
1071  ss->inchain = &d->incarea[i * d->ncolors];
1072  for (i = 0; i < d->ncolors; i++)
1073  {
1074  ss->outs[i] = NULL;
1075  ss->inchain[i].ss = NULL;
1076  }
1077  return ss;
1078  }
1079 
1080  /* look for oldest, or old enough anyway */
1081  if (cp - start > d->nssets * 2 / 3) /* oldest 33% are expendable */
1082  ancient = cp - d->nssets * 2 / 3;
1083  else
1084  ancient = start;
1085  for (ss = d->search, end = &d->ssets[d->nssets]; ss < end; ss++)
1086  if ((ss->lastseen == NULL || ss->lastseen < ancient) &&
1087  !(ss->flags & LOCKED))
1088  {
1089  d->search = ss + 1;
1090  FDEBUG(("replacing c%d\n", (int) (ss - d->ssets)));
1091  return ss;
1092  }
1093  for (ss = d->ssets, end = d->search; ss < end; ss++)
1094  if ((ss->lastseen == NULL || ss->lastseen < ancient) &&
1095  !(ss->flags & LOCKED))
1096  {
1097  d->search = ss + 1;
1098  FDEBUG(("replacing c%d\n", (int) (ss - d->ssets)));
1099  return ss;
1100  }
1101 
1102  /* nobody's old enough?!? -- something's really wrong */
1103  FDEBUG(("cannot find victim to replace!\n"));
1104  ERR(REG_ASSERT);
1105  return NULL;
1106 }
struct sset * ssets
Definition: regexec.c:70
chr * lastseen
Definition: regexec.c:58
struct sset ** outs
Definition: regexec.c:59
#define ERR
Definition: _int.h:161
struct arcp * incarea
Definition: regexec.c:74
struct sset * search
Definition: regexec.c:79
pg_wchar chr
Definition: regcustom.h:66
unsigned * states
Definition: regexec.c:47
struct arcp * inchain
Definition: regexec.c:60
int ncolors
Definition: regexec.c:68
int nssets
Definition: regexec.c:65
int nssused
Definition: regexec.c:66
int wordsper
Definition: regexec.c:69
#define FDEBUG(arglist)
Definition: regguts.h:116
#define REG_ASSERT
Definition: regex.h:153
#define WHITE
Definition: regguts.h:155
struct arcp ins
Definition: regexec.c:57
struct sset ** outsarea
Definition: regexec.c:73
#define LOCKED
Definition: regexec.c:55
Definition: regexec.c:45
int i
struct sset * ss
Definition: regexec.c:41
unsigned * statesarea
Definition: regexec.c:71
int flags
Definition: regexec.c:52
color co
Definition: regexec.c:42

◆ shortest()

static chr* shortest ( struct vars v,
struct dfa d,
chr start,
chr min,
chr max,
chr **  coldp,
int *  hitstopp 
)
static

Definition at line 204 of file rege_dfa.c.

References assert, dfa::backno, cnfa::bos, dfa::cm, dfa::cnfa, dfa_backref(), DUPINF, vars::eflags, cnfa::eos, FDEBUG, sset::flags, cnfa::flags, GETCOLOR, initialize(), lastcold(), sset::lastseen, MATCHALL, cnfa::maxmatchall, cnfa::minmatchall, miss(), vars::nmatch, sset::outs, vars::pmatch, POSTSTATE, REG_FTRACE, REG_NOTBOL, REG_NOTEOL, regmatch_t::rm_so, dfa::ssets, vars::start, and vars::stop.

Referenced by lacon().

211 {
212  chr *cp;
213  chr *realmin = (min == v->stop) ? min : min + 1;
214  chr *realmax = (max == v->stop) ? max : max + 1;
215  color co;
216  struct sset *css;
217  struct sset *ss;
218  struct colormap *cm = d->cm;
219 
220  /* prevent "uninitialized variable" warnings */
221  if (coldp != NULL)
222  *coldp = NULL;
223  if (hitstopp != NULL)
224  *hitstopp = 0;
225 
226  /* if this is a backref to a known string, just match against that */
227  if (d->backno >= 0)
228  {
229  assert((size_t) d->backno < v->nmatch);
230  if (v->pmatch[d->backno].rm_so >= 0)
231  {
232  cp = dfa_backref(v, d, start, min, max, true);
233  if (cp != NULL && coldp != NULL)
234  *coldp = start;
235  /* there is no case where we should set *hitstopp */
236  return cp;
237  }
238  }
239 
240  /* fast path for matchall NFAs */
241  if (d->cnfa->flags & MATCHALL)
242  {
243  size_t nchr = min - start;
244 
245  if (d->cnfa->maxmatchall != DUPINF &&
246  nchr > d->cnfa->maxmatchall)
247  return NULL;
248  if ((max - start) < d->cnfa->minmatchall)
249  return NULL;
250  if (nchr < d->cnfa->minmatchall)
251  min = start + d->cnfa->minmatchall;
252  if (coldp != NULL)
253  *coldp = start;
254  /* there is no case where we should set *hitstopp */
255  return min;
256  }
257 
258  /* initialize */
259  css = initialize(v, d, start);
260  if (css == NULL)
261  return NULL;
262  cp = start;
263 
264  /* startup */
265  FDEBUG(("--- startup ---\n"));
266  if (cp == v->start)
267  {
268  co = d->cnfa->bos[(v->eflags & REG_NOTBOL) ? 0 : 1];
269  FDEBUG(("color %ld\n", (long) co));
270  }
271  else
272  {
273  co = GETCOLOR(cm, *(cp - 1));
274  FDEBUG(("char %c, color %ld\n", (char) *(cp - 1), (long) co));
275  }
276  css = miss(v, d, css, co, cp, start);
277  if (css == NULL)
278  return NULL;
279  css->lastseen = cp;
280  ss = css;
281 
282  /*
283  * This is the main text-scanning loop. It seems worth having two copies
284  * to avoid the overhead of REG_FTRACE tests here, even in REG_DEBUG
285  * builds, when you're not actively tracing.
286  */
287 #ifdef REG_DEBUG
288  if (v->eflags & REG_FTRACE)
289  {
290  while (cp < realmax)
291  {
292  FDEBUG(("--- at c%d ---\n", (int) (css - d->ssets)));
293  co = GETCOLOR(cm, *cp);
294  FDEBUG(("char %c, color %ld\n", (char) *cp, (long) co));
295  ss = css->outs[co];
296  if (ss == NULL)
297  {
298  ss = miss(v, d, css, co, cp + 1, start);
299  if (ss == NULL)
300  break; /* NOTE BREAK OUT */
301  }
302  cp++;
303  ss->lastseen = cp;
304  css = ss;
305  if ((ss->flags & POSTSTATE) && cp >= realmin)
306  break; /* NOTE BREAK OUT */
307  }
308  }
309  else
310 #endif
311  {
312  while (cp < realmax)
313  {
314  co = GETCOLOR(cm, *cp);
315  ss = css->outs[co];
316  if (ss == NULL)
317  {
318  ss = miss(v, d, css, co, cp + 1, start);
319  if (ss == NULL)
320  break; /* NOTE BREAK OUT */
321  }
322  cp++;
323  ss->lastseen = cp;
324  css = ss;
325  if ((ss->flags & POSTSTATE) && cp >= realmin)
326  break; /* NOTE BREAK OUT */
327  }
328  }
329 
330  if (ss == NULL)
331  return NULL;
332 
333  if (coldp != NULL) /* report last no-progress state set, if any */
334  *coldp = lastcold(v, d);
335 
336  if ((ss->flags & POSTSTATE) && cp > min)
337  {
338  assert(cp >= realmin);
339  cp--;
340  }
341  else if (cp == v->stop && max == v->stop)
342  {
343  co = d->cnfa->eos[(v->eflags & REG_NOTEOL) ? 0 : 1];
344  FDEBUG(("color %ld\n", (long) co));
345  ss = miss(v, d, css, co, cp, start);
346  /* match might have ended at eol */
347  if ((ss == NULL || !(ss->flags & POSTSTATE)) && hitstopp != NULL)
348  *hitstopp = 1;
349  }
350 
351  if (ss == NULL || !(ss->flags & POSTSTATE))
352  return NULL;
353 
354  return cp;
355 }
struct sset * ssets
Definition: regexec.c:70
chr * lastseen
Definition: regexec.c:58
#define REG_NOTBOL
Definition: regex.h:126
struct sset ** outs
Definition: regexec.c:59
int maxmatchall
Definition: regguts.h:419
struct colormap * cm
Definition: regexec.c:76
struct cnfa * cnfa
Definition: regexec.c:75
regoff_t rm_so
Definition: regex.h:87
Definition: regguts.h:401
short color
Definition: regguts.h:150
static chr * dfa_backref(struct vars *v, struct dfa *d, chr *start, chr *min, chr *max, bool shortest)
Definition: rege_dfa.c:506
#define REG_FTRACE
Definition: regex.h:129
static struct sset * initialize(struct vars *v, struct dfa *d, chr *start)
Definition: rege_dfa.c:731
chr * start
Definition: regexec.c:114
pg_wchar chr
Definition: regcustom.h:66
static chr * lastcold(struct vars *v, struct dfa *d)
Definition: rege_dfa.c:585
#define assert(TEST)
Definition: imath.c:73
#define MATCHALL
Definition: regguts.h:407
int eflags
Definition: regexec.c:110
color bos[2]
Definition: regguts.h:410
#define FDEBUG(arglist)
Definition: regguts.h:116
static struct sset * miss(struct vars *v, struct dfa *d, struct sset *css, color co, chr *cp, chr *start)
Definition: rege_dfa.c:777
#define DUPINF
Definition: regguts.h:94
int backno
Definition: regexec.c:80
regmatch_t * pmatch
Definition: regexec.c:112
int flags
Definition: regguts.h:405
#define POSTSTATE
Definition: regexec.c:54
int minmatchall
Definition: regguts.h:418
size_t nmatch
Definition: regexec.c:111
const chr * stop
Definition: regcomp.c:241
Definition: regexec.c:45
int flags
Definition: regexec.c:52
#define REG_NOTEOL
Definition: regex.h:127
#define GETCOLOR(cm, c)
Definition: regguts.h:252
size_t max
Definition: regguts.h:229
color eos[2]
Definition: regguts.h:411