PostgreSQL Source Code  git master
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros
rege_dfa.c File Reference
This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Functions

static chrlongest (struct vars *v, struct dfa *d, chr *start, chr *stop, int *hitstopp)
 
static chrshortest (struct vars *v, struct dfa *d, chr *start, chr *min, chr *max, chr **coldp, int *hitstopp)
 
static int matchuntil (struct vars *v, struct dfa *d, chr *probe, struct sset **lastcss, chr **lastcp)
 
static chrlastcold (struct vars *v, struct dfa *d)
 
static struct dfanewdfa (struct vars *v, struct cnfa *cnfa, struct colormap *cm, struct smalldfa *sml)
 
static void freedfa (struct dfa *d)
 
static unsigned hash (unsigned *uv, int n)
 
static struct ssetinitialize (struct vars *v, struct dfa *d, chr *start)
 
static struct ssetmiss (struct vars *v, struct dfa *d, struct sset *css, color co, chr *cp, chr *start)
 
static int lacon (struct vars *v, struct cnfa *pcnfa, chr *cp, color co)
 
static struct ssetgetvacant (struct vars *v, struct dfa *d, chr *cp, chr *start)
 
static struct ssetpickss (struct vars *v, struct dfa *d, chr *cp, chr *start)
 

Function Documentation

static void freedfa ( struct dfa d)
static

Definition at line 517 of file rege_dfa.c.

References dfa::cptsmalloced, FREE, dfa::incarea, dfa::mallocarea, dfa::outsarea, dfa::ssets, and dfa::statesarea.

Referenced by newdfa().

518 {
519  if (d->cptsmalloced)
520  {
521  if (d->ssets != NULL)
522  FREE(d->ssets);
523  if (d->statesarea != NULL)
524  FREE(d->statesarea);
525  if (d->outsarea != NULL)
526  FREE(d->outsarea);
527  if (d->incarea != NULL)
528  FREE(d->incarea);
529  }
530 
531  if (d->mallocarea != NULL)
532  FREE(d->mallocarea);
533 }
struct sset * ssets
Definition: regexec.c:70
struct arcp * incarea
Definition: regexec.c:74
int cptsmalloced
Definition: regexec.c:80
char * mallocarea
Definition: regexec.c:81
struct sset ** outsarea
Definition: regexec.c:73
unsigned * statesarea
Definition: regexec.c:71
#define FREE(size)
Definition: saslprep.c:51
static struct sset* getvacant ( struct vars v,
struct dfa d,
chr cp,
chr start 
)
static

Definition at line 800 of file rege_dfa.c.

References assert, arcp::co, FDEBUG, sset::flags, i, sset::inchain, sset::ins, dfa::lastnopr, dfa::lastpost, sset::lastseen, LOCKED, dfa::ncolors, NOPROGRESS, sset::outs, pickss(), POSTSTATE, arcp::ss, and dfa::ssets.

Referenced by initialize(), and miss().

804 {
805  int i;
806  struct sset *ss;
807  struct sset *p;
808  struct arcp ap;
809  color co;
810 
811  ss = pickss(v, d, cp, start);
812  if (ss == NULL)
813  return NULL;
814  assert(!(ss->flags & LOCKED));
815 
816  /* clear out its inarcs, including self-referential ones */
817  ap = ss->ins;
818  while ((p = ap.ss) != NULL)
819  {
820  co = ap.co;
821  FDEBUG(("zapping c%d's %ld outarc\n", (int) (p - d->ssets), (long) co));
822  p->outs[co] = NULL;
823  ap = p->inchain[co];
824  p->inchain[co].ss = NULL; /* paranoia */
825  }
826  ss->ins.ss = NULL;
827 
828  /* take it off the inarc chains of the ssets reached by its outarcs */
829  for (i = 0; i < d->ncolors; i++)
830  {
831  p = ss->outs[i];
832  assert(p != ss); /* not self-referential */
833  if (p == NULL)
834  continue; /* NOTE CONTINUE */
835  FDEBUG(("del outarc %d from c%d's in chn\n", i, (int) (p - d->ssets)));
836  if (p->ins.ss == ss && p->ins.co == i)
837  p->ins = ss->inchain[i];
838  else
839  {
840  struct arcp lastap = {NULL, 0};
841 
842  assert(p->ins.ss != NULL);
843  for (ap = p->ins; ap.ss != NULL &&
844  !(ap.ss == ss && ap.co == i);
845  ap = ap.ss->inchain[ap.co])
846  lastap = ap;
847  assert(ap.ss != NULL);
848  lastap.ss->inchain[lastap.co] = ss->inchain[i];
849  }
850  ss->outs[i] = NULL;
851  ss->inchain[i].ss = NULL;
852  }
853 
854  /* if ss was a success state, may need to remember location */
855  if ((ss->flags & POSTSTATE) && ss->lastseen != d->lastpost &&
856  (d->lastpost == NULL || d->lastpost < ss->lastseen))
857  d->lastpost = ss->lastseen;
858 
859  /* likewise for a no-progress state */
860  if ((ss->flags & NOPROGRESS) && ss->lastseen != d->lastnopr &&
861  (d->lastnopr == NULL || d->lastnopr < ss->lastseen))
862  d->lastnopr = ss->lastseen;
863 
864  return ss;
865 }
struct sset * ssets
Definition: regexec.c:70
chr * lastseen
Definition: regexec.c:58
struct sset ** outs
Definition: regexec.c:59
short color
Definition: regguts.h:134
static struct sset * pickss(struct vars *v, struct dfa *d, chr *cp, chr *start)
Definition: rege_dfa.c:871
struct arcp * inchain
Definition: regexec.c:60
#define NOPROGRESS
Definition: regexec.c:56
#define assert(TEST)
Definition: imath.c:37
int ncolors
Definition: regexec.c:68
chr * lastnopr
Definition: regexec.c:78
#define FDEBUG(arglist)
Definition: regguts.h:116
Definition: regexec.c:39
#define POSTSTATE
Definition: regexec.c:54
struct arcp ins
Definition: regexec.c:57
#define LOCKED
Definition: regexec.c:55
Definition: regexec.c:45
int i
chr * lastpost
Definition: regexec.c:77
struct sset * ss
Definition: regexec.c:41
int flags
Definition: regexec.c:52
color co
Definition: regexec.c:42
static unsigned hash ( unsigned *  uv,
int  n 
)
static
static struct sset* initialize ( struct vars v,
struct dfa d,
chr start 
)
static

Definition at line 557 of file rege_dfa.c.

References assert, BSET, dfa::cnfa, sset::flags, getvacant(), HASH, sset::hash, i, dfa::lastnopr, dfa::lastpost, sset::lastseen, LOCKED, NOPROGRESS, dfa::nssused, cnfa::post, cnfa::pre, dfa::ssets, STARTER, sset::states, and dfa::wordsper.

Referenced by longest(), matchuntil(), and shortest().

560 {
561  struct sset *ss;
562  int i;
563 
564  /* is previous one still there? */
565  if (d->nssused > 0 && (d->ssets[0].flags & STARTER))
566  ss = &d->ssets[0];
567  else
568  { /* no, must (re)build it */
569  ss = getvacant(v, d, start, start);
570  if (ss == NULL)
571  return NULL;
572  for (i = 0; i < d->wordsper; i++)
573  ss->states[i] = 0;
574  BSET(ss->states, d->cnfa->pre);
575  ss->hash = HASH(ss->states, d->wordsper);
576  assert(d->cnfa->pre != d->cnfa->post);
577  ss->flags = STARTER | LOCKED | NOPROGRESS;
578  /* lastseen dealt with below */
579  }
580 
581  for (i = 0; i < d->nssused; i++)
582  d->ssets[i].lastseen = NULL;
583  ss->lastseen = start; /* maybe untrue, but harmless */
584  d->lastpost = NULL;
585  d->lastnopr = NULL;
586  return ss;
587 }
struct sset * ssets
Definition: regexec.c:70
#define STARTER
Definition: regexec.c:53
chr * lastseen
Definition: regexec.c:58
struct cnfa * cnfa
Definition: regexec.c:75
int pre
Definition: regguts.h:360
#define HASH(sign, val)
Definition: hstore_gist.c:38
static struct sset * getvacant(struct vars *v, struct dfa *d, chr *cp, chr *start)
Definition: rege_dfa.c:800
unsigned * states
Definition: regexec.c:47
#define NOPROGRESS
Definition: regexec.c:56
#define assert(TEST)
Definition: imath.c:37
int nssused
Definition: regexec.c:66
int wordsper
Definition: regexec.c:69
chr * lastnopr
Definition: regexec.c:78
int post
Definition: regguts.h:361
#define BSET(uv, sn)
Definition: regguts.h:126
#define LOCKED
Definition: regexec.c:55
Definition: regexec.c:45
int i
chr * lastpost
Definition: regexec.c:77
unsigned hash
Definition: regexec.c:48
int flags
Definition: regexec.c:52
static int lacon ( struct vars v,
struct cnfa pcnfa,
chr cp,
color  co 
)
static

Definition at line 743 of file rege_dfa.c.

References assert, ERR, FDEBUG, vars::g, getladfa(), guts::lacons, LATYPE_IS_AHEAD, LATYPE_IS_POS, vars::lblastcp, vars::lblastcss, matchuntil(), cnfa::ncolors, vars::re, REG_ETOOBIG, shortest(), STACK_TOO_DEEP, vars::stop, and subre::subno.

Referenced by miss().

747 {
748  int n;
749  struct subre *sub;
750  struct dfa *d;
751  chr *end;
752  int satisfied;
753 
754  /* Since this is recursive, it could be driven to stack overflow */
755  if (STACK_TOO_DEEP(v->re))
756  {
757  ERR(REG_ETOOBIG);
758  return 0;
759  }
760 
761  n = co - pcnfa->ncolors;
762  assert(n > 0 && n < v->g->nlacons && v->g->lacons != NULL);
763  FDEBUG(("=== testing lacon %d\n", n));
764  sub = &v->g->lacons[n];
765  d = getladfa(v, n);
766  if (d == NULL)
767  return 0;
768  if (LATYPE_IS_AHEAD(sub->subno))
769  {
770  /* used to use longest() here, but shortest() could be much cheaper */
771  end = shortest(v, d, cp, cp, v->stop,
772  (chr **) NULL, (int *) NULL);
773  satisfied = LATYPE_IS_POS(sub->subno) ? (end != NULL) : (end == NULL);
774  }
775  else
776  {
777  /*
778  * To avoid doing O(N^2) work when repeatedly testing a lookbehind
779  * constraint in an N-character string, we use matchuntil() which can
780  * cache the DFA state across calls. We only need to restart if the
781  * probe point decreases, which is not common. The NFA we're using is
782  * a search NFA, so it doesn't mind scanning over stuff before the
783  * nominal match.
784  */
785  satisfied = matchuntil(v, d, cp, &v->lblastcss[n], &v->lblastcp[n]);
786  if (!LATYPE_IS_POS(sub->subno))
787  satisfied = !satisfied;
788  }
789  FDEBUG(("=== lacon %d satisfied %d\n", n, satisfied));
790  return satisfied;
791 }
static int matchuntil(struct vars *v, struct dfa *d, chr *probe, struct sset **lastcss, chr **lastcp)
Definition: rege_dfa.c:303
#define LATYPE_IS_POS(la)
Definition: regguts.h:103
#define ERR
Definition: _int.h:146
struct subre * lacons
Definition: regguts.h:474
int subno
Definition: regguts.h:427
Definition: regexec.c:63
struct sset ** lblastcss
Definition: regexec.c:117
pg_wchar chr
Definition: regcustom.h:68
regex_t * re
Definition: regcomp.c:226
#define assert(TEST)
Definition: imath.c:37
#define LATYPE_IS_AHEAD(la)
Definition: regguts.h:104
struct guts * g
Definition: regexec.c:106
#define FDEBUG(arglist)
Definition: regguts.h:116
static chr * shortest(struct vars *v, struct dfa *d, chr *start, chr *min, chr *max, chr **coldp, int *hitstopp)
Definition: rege_dfa.c:168
#define REG_ETOOBIG
Definition: regex.h:155
Definition: regguts.h:408
const chr * stop
Definition: regcomp.c:228
chr ** lblastcp
Definition: regexec.c:118
static struct dfa * getladfa(struct vars *, int)
Definition: regexec.c:355
#define STACK_TOO_DEEP(re)
Definition: regguts.h:455
int ncolors
Definition: regguts.h:357
static chr* lastcold ( struct vars v,
struct dfa d 
)
static

Definition at line 417 of file rege_dfa.c.

References sset::flags, i, dfa::lastnopr, sset::lastseen, NOPROGRESS, dfa::nssused, dfa::ssets, and vars::start.

Referenced by shortest().

419 {
420  struct sset *ss;
421  chr *nopr;
422  int i;
423 
424  nopr = d->lastnopr;
425  if (nopr == NULL)
426  nopr = v->start;
427  for (ss = d->ssets, i = d->nssused; i > 0; ss++, i--)
428  if ((ss->flags & NOPROGRESS) && nopr < ss->lastseen)
429  nopr = ss->lastseen;
430  return nopr;
431 }
struct sset * ssets
Definition: regexec.c:70
chr * lastseen
Definition: regexec.c:58
chr * start
Definition: regexec.c:111
pg_wchar chr
Definition: regcustom.h:68
#define NOPROGRESS
Definition: regexec.c:56
int nssused
Definition: regexec.c:66
chr * lastnopr
Definition: regexec.c:78
Definition: regexec.c:45
int i
int flags
Definition: regexec.c:52
static chr* longest ( struct vars v,
struct dfa d,
chr start,
chr stop,
int *  hitstopp 
)
static

Definition at line 42 of file rege_dfa.c.

References cnfa::bos, dfa::cm, dfa::cnfa, vars::eflags, cnfa::eos, FDEBUG, sset::flags, GETCOLOR, i, initialize(), ISERR, dfa::lastpost, sset::lastseen, miss(), dfa::nssused, sset::outs, POSTSTATE, REG_FTRACE, REG_NOTBOL, REG_NOTEOL, dfa::ssets, vars::start, and vars::stop.

Referenced by mdsync().

47 {
48  chr *cp;
49  chr *realstop = (stop == v->stop) ? stop : stop + 1;
50  color co;
51  struct sset *css;
52  struct sset *ss;
53  chr *post;
54  int i;
55  struct colormap *cm = d->cm;
56 
57  /* prevent "uninitialized variable" warnings */
58  if (hitstopp != NULL)
59  *hitstopp = 0;
60 
61  /* initialize */
62  css = initialize(v, d, start);
63  if (css == NULL)
64  return NULL;
65  cp = start;
66 
67  /* startup */
68  FDEBUG(("+++ startup +++\n"));
69  if (cp == v->start)
70  {
71  co = d->cnfa->bos[(v->eflags & REG_NOTBOL) ? 0 : 1];
72  FDEBUG(("color %ld\n", (long) co));
73  }
74  else
75  {
76  co = GETCOLOR(cm, *(cp - 1));
77  FDEBUG(("char %c, color %ld\n", (char) *(cp - 1), (long) co));
78  }
79  css = miss(v, d, css, co, cp, start);
80  if (css == NULL)
81  return NULL;
82  css->lastseen = cp;
83 
84  /*
85  * This is the main text-scanning loop. It seems worth having two copies
86  * to avoid the overhead of REG_FTRACE tests here, even in REG_DEBUG
87  * builds, when you're not actively tracing.
88  */
89 #ifdef REG_DEBUG
90  if (v->eflags & REG_FTRACE)
91  {
92  while (cp < realstop)
93  {
94  FDEBUG(("+++ at c%d +++\n", (int) (css - d->ssets)));
95  co = GETCOLOR(cm, *cp);
96  FDEBUG(("char %c, color %ld\n", (char) *cp, (long) co));
97  ss = css->outs[co];
98  if (ss == NULL)
99  {
100  ss = miss(v, d, css, co, cp + 1, start);
101  if (ss == NULL)
102  break; /* NOTE BREAK OUT */
103  }
104  cp++;
105  ss->lastseen = cp;
106  css = ss;
107  }
108  }
109  else
110 #endif
111  {
112  while (cp < realstop)
113  {
114  co = GETCOLOR(cm, *cp);
115  ss = css->outs[co];
116  if (ss == NULL)
117  {
118  ss = miss(v, d, css, co, cp + 1, start);
119  if (ss == NULL)
120  break; /* NOTE BREAK OUT */
121  }
122  cp++;
123  ss->lastseen = cp;
124  css = ss;
125  }
126  }
127 
128  if (ISERR())
129  return NULL;
130 
131  /* shutdown */
132  FDEBUG(("+++ shutdown at c%d +++\n", (int) (css - d->ssets)));
133  if (cp == v->stop && stop == v->stop)
134  {
135  if (hitstopp != NULL)
136  *hitstopp = 1;
137  co = d->cnfa->eos[(v->eflags & REG_NOTEOL) ? 0 : 1];
138  FDEBUG(("color %ld\n", (long) co));
139  ss = miss(v, d, css, co, cp, start);
140  if (ISERR())
141  return NULL;
142  /* special case: match ended at eol? */
143  if (ss != NULL && (ss->flags & POSTSTATE))
144  return cp;
145  else if (ss != NULL)
146  ss->lastseen = cp; /* to be tidy */
147  }
148 
149  /* find last match, if any */
150  post = d->lastpost;
151  for (ss = d->ssets, i = d->nssused; i > 0; ss++, i--)
152  if ((ss->flags & POSTSTATE) && post != ss->lastseen &&
153  (post == NULL || post < ss->lastseen))
154  post = ss->lastseen;
155  if (post != NULL) /* found one */
156  return post - 1;
157 
158  return NULL;
159 }
struct sset * ssets
Definition: regexec.c:70
chr * lastseen
Definition: regexec.c:58
#define REG_NOTBOL
Definition: regex.h:124
struct sset ** outs
Definition: regexec.c:59
struct colormap * cm
Definition: regexec.c:76
struct cnfa * cnfa
Definition: regexec.c:75
short color
Definition: regguts.h:134
#define REG_FTRACE
Definition: regex.h:127
#define ISERR()
Definition: regcomp.c:262
static struct sset * initialize(struct vars *v, struct dfa *d, chr *start)
Definition: rege_dfa.c:557
chr * start
Definition: regexec.c:111
pg_wchar chr
Definition: regcustom.h:68
int nssused
Definition: regexec.c:66
int eflags
Definition: regexec.c:107
color bos[2]
Definition: regguts.h:362
#define FDEBUG(arglist)
Definition: regguts.h:116
static struct sset * miss(struct vars *v, struct dfa *d, struct sset *css, color co, chr *cp, chr *start)
Definition: rege_dfa.c:603
#define POSTSTATE
Definition: regexec.c:54
const chr * stop
Definition: regcomp.c:228
Definition: regexec.c:45
int i
chr * lastpost
Definition: regexec.c:77
int flags
Definition: regexec.c:52
#define REG_NOTEOL
Definition: regex.h:125
#define GETCOLOR(cm, c)
Definition: regguts.h:235
color eos[2]
Definition: regguts.h:363
static int matchuntil ( struct vars v,
struct dfa d,
chr probe,
struct sset **  lastcss,
chr **  lastcp 
)
static

Definition at line 303 of file rege_dfa.c.

References assert, cnfa::bos, dfa::cm, dfa::cnfa, vars::eflags, cnfa::eos, FDEBUG, sset::flags, GETCOLOR, initialize(), sset::lastseen, miss(), sset::outs, POSTSTATE, REG_FTRACE, REG_NOTBOL, REG_NOTEOL, dfa::ssets, vars::start, and vars::stop.

Referenced by lacon().

308 {
309  chr *cp = *lastcp;
310  color co;
311  struct sset *css = *lastcss;
312  struct sset *ss;
313  struct colormap *cm = d->cm;
314 
315  /* initialize and startup, or restart, if necessary */
316  if (cp == NULL || cp > probe)
317  {
318  cp = v->start;
319  css = initialize(v, d, cp);
320  if (css == NULL)
321  return 0;
322 
323  FDEBUG((">>> startup >>>\n"));
324  co = d->cnfa->bos[(v->eflags & REG_NOTBOL) ? 0 : 1];
325  FDEBUG(("color %ld\n", (long) co));
326 
327  css = miss(v, d, css, co, cp, v->start);
328  if (css == NULL)
329  return 0;
330  css->lastseen = cp;
331  }
332  else if (css == NULL)
333  {
334  /* we previously found that no match is possible beyond *lastcp */
335  return 0;
336  }
337  ss = css;
338 
339  /*
340  * This is the main text-scanning loop. It seems worth having two copies
341  * to avoid the overhead of REG_FTRACE tests here, even in REG_DEBUG
342  * builds, when you're not actively tracing.
343  */
344 #ifdef REG_DEBUG
345  if (v->eflags & REG_FTRACE)
346  {
347  while (cp < probe)
348  {
349  FDEBUG((">>> at c%d >>>\n", (int) (css - d->ssets)));
350  co = GETCOLOR(cm, *cp);
351  FDEBUG(("char %c, color %ld\n", (char) *cp, (long) co));
352  ss = css->outs[co];
353  if (ss == NULL)
354  {
355  ss = miss(v, d, css, co, cp + 1, v->start);
356  if (ss == NULL)
357  break; /* NOTE BREAK OUT */
358  }
359  cp++;
360  ss->lastseen = cp;
361  css = ss;
362  }
363  }
364  else
365 #endif
366  {
367  while (cp < probe)
368  {
369  co = GETCOLOR(cm, *cp);
370  ss = css->outs[co];
371  if (ss == NULL)
372  {
373  ss = miss(v, d, css, co, cp + 1, v->start);
374  if (ss == NULL)
375  break; /* NOTE BREAK OUT */
376  }
377  cp++;
378  ss->lastseen = cp;
379  css = ss;
380  }
381  }
382 
383  *lastcss = ss;
384  *lastcp = cp;
385 
386  if (ss == NULL)
387  return 0; /* impossible match, or internal error */
388 
389  /* We need to process one more chr, or the EOS symbol, to check match */
390  if (cp < v->stop)
391  {
392  FDEBUG((">>> at c%d >>>\n", (int) (css - d->ssets)));
393  co = GETCOLOR(cm, *cp);
394  FDEBUG(("char %c, color %ld\n", (char) *cp, (long) co));
395  ss = css->outs[co];
396  if (ss == NULL)
397  ss = miss(v, d, css, co, cp + 1, v->start);
398  }
399  else
400  {
401  assert(cp == v->stop);
402  co = d->cnfa->eos[(v->eflags & REG_NOTEOL) ? 0 : 1];
403  FDEBUG(("color %ld\n", (long) co));
404  ss = miss(v, d, css, co, cp, v->start);
405  }
406 
407  if (ss == NULL || !(ss->flags & POSTSTATE))
408  return 0;
409 
410  return 1;
411 }
struct sset * ssets
Definition: regexec.c:70
chr * lastseen
Definition: regexec.c:58
#define REG_NOTBOL
Definition: regex.h:124
struct sset ** outs
Definition: regexec.c:59
struct colormap * cm
Definition: regexec.c:76
struct cnfa * cnfa
Definition: regexec.c:75
short color
Definition: regguts.h:134
#define REG_FTRACE
Definition: regex.h:127
static struct sset * initialize(struct vars *v, struct dfa *d, chr *start)
Definition: rege_dfa.c:557
chr * start
Definition: regexec.c:111
pg_wchar chr
Definition: regcustom.h:68
#define assert(TEST)
Definition: imath.c:37
int eflags
Definition: regexec.c:107
color bos[2]
Definition: regguts.h:362
#define FDEBUG(arglist)
Definition: regguts.h:116
static struct sset * miss(struct vars *v, struct dfa *d, struct sset *css, color co, chr *cp, chr *start)
Definition: rege_dfa.c:603
#define POSTSTATE
Definition: regexec.c:54
const chr * stop
Definition: regcomp.c:228
Definition: regexec.c:45
int flags
Definition: regexec.c:52
#define REG_NOTEOL
Definition: regex.h:125
#define GETCOLOR(cm, c)
Definition: regguts.h:235
color eos[2]
Definition: regguts.h:363
static struct sset* miss ( struct vars v,
struct dfa d,
struct sset css,
color  co,
chr cp,
chr start 
)
static

Definition at line 603 of file rege_dfa.c.

References assert, BSET, CANCEL_REQUESTED, dfa::cnfa, CNFA_NOPROGRESS, arcp::co, carc::co, COLORLESS, ERR, FDEBUG, sset::flags, cnfa::flags, getvacant(), HASH, sset::hash, HASLACONS, HIT, i, sset::inchain, sset::ins, ISBSET, ISERR, lacon(), cnfa::ncolors, NOPROGRESS, dfa::nssused, dfa::nstates, sset::outs, cnfa::post, POSTSTATE, vars::re, REG_CANCEL, arcp::ss, dfa::ssets, sset::states, cnfa::states, cnfa::stflags, carc::to, dfa::wordsper, and dfa::work.

Referenced by longest(), matchuntil(), and shortest().

609 {
610  struct cnfa *cnfa = d->cnfa;
611  int i;
612  unsigned h;
613  struct carc *ca;
614  struct sset *p;
615  int ispost;
616  int noprogress;
617  int gotstate;
618  int dolacons;
619  int sawlacons;
620 
621  /* for convenience, we can be called even if it might not be a miss */
622  if (css->outs[co] != NULL)
623  {
624  FDEBUG(("hit\n"));
625  return css->outs[co];
626  }
627  FDEBUG(("miss\n"));
628 
629  /*
630  * Checking for operation cancel in the inner text search loop seems
631  * unduly expensive. As a compromise, check during cache misses.
632  */
633  if (CANCEL_REQUESTED(v->re))
634  {
635  ERR(REG_CANCEL);
636  return NULL;
637  }
638 
639  /*
640  * What set of states would we end up in after consuming the co character?
641  * We first consider PLAIN arcs that consume the character, and then look
642  * to see what LACON arcs could be traversed after consuming it.
643  */
644  for (i = 0; i < d->wordsper; i++)
645  d->work[i] = 0; /* build new stateset bitmap in d->work */
646  ispost = 0;
647  noprogress = 1;
648  gotstate = 0;
649  for (i = 0; i < d->nstates; i++)
650  if (ISBSET(css->states, i))
651  for (ca = cnfa->states[i]; ca->co != COLORLESS; ca++)
652  if (ca->co == co)
653  {
654  BSET(d->work, ca->to);
655  gotstate = 1;
656  if (ca->to == cnfa->post)
657  ispost = 1;
658  if (!(cnfa->stflags[ca->to] & CNFA_NOPROGRESS))
659  noprogress = 0;
660  FDEBUG(("%d -> %d\n", i, ca->to));
661  }
662  if (!gotstate)
663  return NULL; /* character cannot reach any new state */
664  dolacons = (cnfa->flags & HASLACONS);
665  sawlacons = 0;
666  /* outer loop handles transitive closure of reachable-by-LACON states */
667  while (dolacons)
668  {
669  dolacons = 0;
670  for (i = 0; i < d->nstates; i++)
671  if (ISBSET(d->work, i))
672  for (ca = cnfa->states[i]; ca->co != COLORLESS; ca++)
673  {
674  if (ca->co < cnfa->ncolors)
675  continue; /* not a LACON arc */
676  if (ISBSET(d->work, ca->to))
677  continue; /* arc would be a no-op anyway */
678  sawlacons = 1; /* this LACON affects our result */
679  if (!lacon(v, cnfa, cp, ca->co))
680  {
681  if (ISERR())
682  return NULL;
683  continue; /* LACON arc cannot be traversed */
684  }
685  if (ISERR())
686  return NULL;
687  BSET(d->work, ca->to);
688  dolacons = 1;
689  if (ca->to == cnfa->post)
690  ispost = 1;
691  if (!(cnfa->stflags[ca->to] & CNFA_NOPROGRESS))
692  noprogress = 0;
693  FDEBUG(("%d :> %d\n", i, ca->to));
694  }
695  }
696  h = HASH(d->work, d->wordsper);
697 
698  /* Is this stateset already in the cache? */
699  for (p = d->ssets, i = d->nssused; i > 0; p++, i--)
700  if (HIT(h, d->work, p, d->wordsper))
701  {
702  FDEBUG(("cached c%d\n", (int) (p - d->ssets)));
703  break; /* NOTE BREAK OUT */
704  }
705  if (i == 0)
706  { /* nope, need a new cache entry */
707  p = getvacant(v, d, cp, start);
708  if (p == NULL)
709  return NULL;
710  assert(p != css);
711  for (i = 0; i < d->wordsper; i++)
712  p->states[i] = d->work[i];
713  p->hash = h;
714  p->flags = (ispost) ? POSTSTATE : 0;
715  if (noprogress)
716  p->flags |= NOPROGRESS;
717  /* lastseen to be dealt with by caller */
718  }
719 
720  /*
721  * Link new stateset to old, unless a LACON affected the result, in which
722  * case we don't create the link. That forces future transitions across
723  * this same arc (same prior stateset and character color) to come through
724  * miss() again, so that we can recheck the LACON(s), which might or might
725  * not pass since context will be different.
726  */
727  if (!sawlacons)
728  {
729  FDEBUG(("c%d[%d]->c%d\n",
730  (int) (css - d->ssets), co, (int) (p - d->ssets)));
731  css->outs[co] = p;
732  css->inchain[co] = p->ins;
733  p->ins.ss = css;
734  p->ins.co = co;
735  }
736  return p;
737 }
struct sset * ssets
Definition: regexec.c:70
struct sset ** outs
Definition: regexec.c:59
#define ERR
Definition: _int.h:146
#define CNFA_NOPROGRESS
Definition: regguts.h:365
struct cnfa * cnfa
Definition: regexec.c:75
Definition: regguts.h:354
unsigned * work
Definition: regexec.c:72
#define HASH(sign, val)
Definition: hstore_gist.c:38
#define ISERR()
Definition: regcomp.c:262
struct carc ** states
Definition: regguts.h:366
#define HIT(h, bv, ss, nw)
Definition: regexec.c:50
static struct sset * getvacant(struct vars *v, struct dfa *d, chr *cp, chr *start)
Definition: rege_dfa.c:800
unsigned * states
Definition: regexec.c:47
regex_t * re
Definition: regcomp.c:226
struct arcp * inchain
Definition: regexec.c:60
#define NOPROGRESS
Definition: regexec.c:56
#define assert(TEST)
Definition: imath.c:37
char * stflags
Definition: regguts.h:364
int nstates
Definition: regexec.c:67
int nssused
Definition: regexec.c:66
int wordsper
Definition: regexec.c:69
#define FDEBUG(arglist)
Definition: regguts.h:116
int to
Definition: regguts.h:351
#define ISBSET(uv, sn)
Definition: regguts.h:127
int flags
Definition: regguts.h:358
#define POSTSTATE
Definition: regexec.c:54
struct arcp ins
Definition: regexec.c:57
int post
Definition: regguts.h:361
#define BSET(uv, sn)
Definition: regguts.h:126
color co
Definition: regguts.h:350
Definition: regexec.c:45
#define CANCEL_REQUESTED(re)
Definition: regguts.h:452
int i
#define COLORLESS
Definition: regguts.h:137
static int lacon(struct vars *v, struct cnfa *pcnfa, chr *cp, color co)
Definition: rege_dfa.c:743
unsigned hash
Definition: regexec.c:48
struct sset * ss
Definition: regexec.c:41
int flags
Definition: regexec.c:52
#define HASLACONS
Definition: regguts.h:359
int ncolors
Definition: regguts.h:357
color co
Definition: regexec.c:42
#define REG_CANCEL
Definition: regex.h:157
Definition: regguts.h:348
static struct dfa* newdfa ( struct vars v,
struct cnfa cnfa,
struct colormap cm,
struct smalldfa sml 
)
static

Definition at line 437 of file rege_dfa.c.

References assert, dfa::cm, dfa::cnfa, dfa::cptsmalloced, smalldfa::dfa, vars::eflags, ERR, FEWCOLORS, freedfa(), dfa::incarea, smalldfa::incarea, dfa::lastnopr, dfa::lastpost, MALLOC, dfa::mallocarea, dfa::ncolors, cnfa::ncolors, dfa::nssets, dfa::nssused, dfa::nstates, cnfa::nstates, dfa::outsarea, smalldfa::outsarea, REG_ESPACE, REG_SMALL, dfa::search, dfa::ssets, smalldfa::ssets, dfa::statesarea, smalldfa::statesarea, UBITS, dfa::wordsper, dfa::work, and WORK.

441 {
442  struct dfa *d;
443  size_t nss = cnfa->nstates * 2;
444  int wordsper = (cnfa->nstates + UBITS - 1) / UBITS;
445  struct smalldfa *smallwas = sml;
446 
447  assert(cnfa != NULL && cnfa->nstates != 0);
448 
449  if (nss <= FEWSTATES && cnfa->ncolors <= FEWCOLORS)
450  {
451  assert(wordsper == 1);
452  if (sml == NULL)
453  {
454  sml = (struct smalldfa *) MALLOC(sizeof(struct smalldfa));
455  if (sml == NULL)
456  {
457  ERR(REG_ESPACE);
458  return NULL;
459  }
460  }
461  d = &sml->dfa;
462  d->ssets = sml->ssets;
463  d->statesarea = sml->statesarea;
464  d->work = &d->statesarea[nss];
465  d->outsarea = sml->outsarea;
466  d->incarea = sml->incarea;
467  d->cptsmalloced = 0;
468  d->mallocarea = (smallwas == NULL) ? (char *) sml : NULL;
469  }
470  else
471  {
472  d = (struct dfa *) MALLOC(sizeof(struct dfa));
473  if (d == NULL)
474  {
475  ERR(REG_ESPACE);
476  return NULL;
477  }
478  d->ssets = (struct sset *) MALLOC(nss * sizeof(struct sset));
479  d->statesarea = (unsigned *) MALLOC((nss + WORK) * wordsper *
480  sizeof(unsigned));
481  d->work = &d->statesarea[nss * wordsper];
482  d->outsarea = (struct sset **) MALLOC(nss * cnfa->ncolors *
483  sizeof(struct sset *));
484  d->incarea = (struct arcp *) MALLOC(nss * cnfa->ncolors *
485  sizeof(struct arcp));
486  d->cptsmalloced = 1;
487  d->mallocarea = (char *) d;
488  if (d->ssets == NULL || d->statesarea == NULL ||
489  d->outsarea == NULL || d->incarea == NULL)
490  {
491  freedfa(d);
492  ERR(REG_ESPACE);
493  return NULL;
494  }
495  }
496 
497  d->nssets = (v->eflags & REG_SMALL) ? 7 : nss;
498  d->nssused = 0;
499  d->nstates = cnfa->nstates;
500  d->ncolors = cnfa->ncolors;
501  d->wordsper = wordsper;
502  d->cnfa = cnfa;
503  d->cm = cm;
504  d->lastpost = NULL;
505  d->lastnopr = NULL;
506  d->search = d->ssets;
507 
508  /* initialization of sset fields is done as needed */
509 
510  return d;
511 }
struct sset * ssets
Definition: regexec.c:70
#define ERR
Definition: _int.h:146
struct arcp * incarea
Definition: regexec.c:74
struct colormap * cm
Definition: regexec.c:76
#define REG_ESPACE
Definition: regex.h:149
struct cnfa * cnfa
Definition: regexec.c:75
#define WORK
Definition: regexec.c:84
unsigned * work
Definition: regexec.c:72
struct sset * search
Definition: regexec.c:79
static void freedfa(struct dfa *d)
Definition: rege_dfa.c:517
#define FEWCOLORS
Definition: regexec.c:88
#define MALLOC(n)
Definition: regcustom.h:62
Definition: regexec.c:63
int nstates
Definition: regguts.h:356
unsigned statesarea[FEWSTATES *2+WORK]
Definition: regexec.c:93
struct dfa dfa
Definition: regexec.c:91
int cptsmalloced
Definition: regexec.c:80
#define assert(TEST)
Definition: imath.c:37
int ncolors
Definition: regexec.c:68
int nstates
Definition: regexec.c:67
int nssets
Definition: regexec.c:65
int nssused
Definition: regexec.c:66
int wordsper
Definition: regexec.c:69
int eflags
Definition: regexec.c:107
#define UBITS
Definition: regguts.h:125
struct arcp incarea[FEWSTATES *2 *FEWCOLORS]
Definition: regexec.c:95
chr * lastnopr
Definition: regexec.c:78
#define REG_SMALL
Definition: regex.h:129
Definition: regexec.c:39
struct sset ssets[FEWSTATES *2]
Definition: regexec.c:92
char * mallocarea
Definition: regexec.c:81
struct sset * outsarea[FEWSTATES *2 *FEWCOLORS]
Definition: regexec.c:94
struct sset ** outsarea
Definition: regexec.c:73
Definition: regexec.c:45
chr * lastpost
Definition: regexec.c:77
unsigned * statesarea
Definition: regexec.c:71
int ncolors
Definition: regguts.h:357
static struct sset* pickss ( struct vars v,
struct dfa d,
chr cp,
chr start 
)
static

Definition at line 871 of file rege_dfa.c.

References arcp::co, ERR, FDEBUG, sset::flags, i, dfa::incarea, sset::inchain, sset::ins, sset::lastseen, LOCKED, dfa::ncolors, dfa::nssets, dfa::nssused, sset::outs, dfa::outsarea, REG_ASSERT, dfa::search, arcp::ss, dfa::ssets, sset::states, dfa::statesarea, WHITE, and dfa::wordsper.

Referenced by getvacant().

875 {
876  int i;
877  struct sset *ss;
878  struct sset *end;
879  chr *ancient;
880 
881  /* shortcut for cases where cache isn't full */
882  if (d->nssused < d->nssets)
883  {
884  i = d->nssused;
885  d->nssused++;
886  ss = &d->ssets[i];
887  FDEBUG(("new c%d\n", i));
888  /* set up innards */
889  ss->states = &d->statesarea[i * d->wordsper];
890  ss->flags = 0;
891  ss->ins.ss = NULL;
892  ss->ins.co = WHITE; /* give it some value */
893  ss->outs = &d->outsarea[i * d->ncolors];
894  ss->inchain = &d->incarea[i * d->ncolors];
895  for (i = 0; i < d->ncolors; i++)
896  {
897  ss->outs[i] = NULL;
898  ss->inchain[i].ss = NULL;
899  }
900  return ss;
901  }
902 
903  /* look for oldest, or old enough anyway */
904  if (cp - start > d->nssets * 2 / 3) /* oldest 33% are expendable */
905  ancient = cp - d->nssets * 2 / 3;
906  else
907  ancient = start;
908  for (ss = d->search, end = &d->ssets[d->nssets]; ss < end; ss++)
909  if ((ss->lastseen == NULL || ss->lastseen < ancient) &&
910  !(ss->flags & LOCKED))
911  {
912  d->search = ss + 1;
913  FDEBUG(("replacing c%d\n", (int) (ss - d->ssets)));
914  return ss;
915  }
916  for (ss = d->ssets, end = d->search; ss < end; ss++)
917  if ((ss->lastseen == NULL || ss->lastseen < ancient) &&
918  !(ss->flags & LOCKED))
919  {
920  d->search = ss + 1;
921  FDEBUG(("replacing c%d\n", (int) (ss - d->ssets)));
922  return ss;
923  }
924 
925  /* nobody's old enough?!? -- something's really wrong */
926  FDEBUG(("cannot find victim to replace!\n"));
927  ERR(REG_ASSERT);
928  return NULL;
929 }
struct sset * ssets
Definition: regexec.c:70
chr * lastseen
Definition: regexec.c:58
struct sset ** outs
Definition: regexec.c:59
#define ERR
Definition: _int.h:146
struct arcp * incarea
Definition: regexec.c:74
struct sset * search
Definition: regexec.c:79
pg_wchar chr
Definition: regcustom.h:68
unsigned * states
Definition: regexec.c:47
struct arcp * inchain
Definition: regexec.c:60
int ncolors
Definition: regexec.c:68
int nssets
Definition: regexec.c:65
int nssused
Definition: regexec.c:66
int wordsper
Definition: regexec.c:69
#define FDEBUG(arglist)
Definition: regguts.h:116
#define REG_ASSERT
Definition: regex.h:151
#define WHITE
Definition: regguts.h:138
struct arcp ins
Definition: regexec.c:57
struct sset ** outsarea
Definition: regexec.c:73
#define LOCKED
Definition: regexec.c:55
Definition: regexec.c:45
int i
struct sset * ss
Definition: regexec.c:41
unsigned * statesarea
Definition: regexec.c:71
int flags
Definition: regexec.c:52
color co
Definition: regexec.c:42
static chr* shortest ( struct vars v,
struct dfa d,
chr start,
chr min,
chr max,
chr **  coldp,
int *  hitstopp 
)
static

Definition at line 168 of file rege_dfa.c.

References assert, cnfa::bos, dfa::cm, dfa::cnfa, vars::eflags, cnfa::eos, FDEBUG, sset::flags, GETCOLOR, initialize(), lastcold(), sset::lastseen, miss(), sset::outs, POSTSTATE, REG_FTRACE, REG_NOTBOL, REG_NOTEOL, dfa::ssets, vars::start, and vars::stop.

Referenced by lacon().

175 {
176  chr *cp;
177  chr *realmin = (min == v->stop) ? min : min + 1;
178  chr *realmax = (max == v->stop) ? max : max + 1;
179  color co;
180  struct sset *css;
181  struct sset *ss;
182  struct colormap *cm = d->cm;
183 
184  /* prevent "uninitialized variable" warnings */
185  if (coldp != NULL)
186  *coldp = NULL;
187  if (hitstopp != NULL)
188  *hitstopp = 0;
189 
190  /* initialize */
191  css = initialize(v, d, start);
192  if (css == NULL)
193  return NULL;
194  cp = start;
195 
196  /* startup */
197  FDEBUG(("--- startup ---\n"));
198  if (cp == v->start)
199  {
200  co = d->cnfa->bos[(v->eflags & REG_NOTBOL) ? 0 : 1];
201  FDEBUG(("color %ld\n", (long) co));
202  }
203  else
204  {
205  co = GETCOLOR(cm, *(cp - 1));
206  FDEBUG(("char %c, color %ld\n", (char) *(cp - 1), (long) co));
207  }
208  css = miss(v, d, css, co, cp, start);
209  if (css == NULL)
210  return NULL;
211  css->lastseen = cp;
212  ss = css;
213 
214  /*
215  * This is the main text-scanning loop. It seems worth having two copies
216  * to avoid the overhead of REG_FTRACE tests here, even in REG_DEBUG
217  * builds, when you're not actively tracing.
218  */
219 #ifdef REG_DEBUG
220  if (v->eflags & REG_FTRACE)
221  {
222  while (cp < realmax)
223  {
224  FDEBUG(("--- at c%d ---\n", (int) (css - d->ssets)));
225  co = GETCOLOR(cm, *cp);
226  FDEBUG(("char %c, color %ld\n", (char) *cp, (long) co));
227  ss = css->outs[co];
228  if (ss == NULL)
229  {
230  ss = miss(v, d, css, co, cp + 1, start);
231  if (ss == NULL)
232  break; /* NOTE BREAK OUT */
233  }
234  cp++;
235  ss->lastseen = cp;
236  css = ss;
237  if ((ss->flags & POSTSTATE) && cp >= realmin)
238  break; /* NOTE BREAK OUT */
239  }
240  }
241  else
242 #endif
243  {
244  while (cp < realmax)
245  {
246  co = GETCOLOR(cm, *cp);
247  ss = css->outs[co];
248  if (ss == NULL)
249  {
250  ss = miss(v, d, css, co, cp + 1, start);
251  if (ss == NULL)
252  break; /* NOTE BREAK OUT */
253  }
254  cp++;
255  ss->lastseen = cp;
256  css = ss;
257  if ((ss->flags & POSTSTATE) && cp >= realmin)
258  break; /* NOTE BREAK OUT */
259  }
260  }
261 
262  if (ss == NULL)
263  return NULL;
264 
265  if (coldp != NULL) /* report last no-progress state set, if any */
266  *coldp = lastcold(v, d);
267 
268  if ((ss->flags & POSTSTATE) && cp > min)
269  {
270  assert(cp >= realmin);
271  cp--;
272  }
273  else if (cp == v->stop && max == v->stop)
274  {
275  co = d->cnfa->eos[(v->eflags & REG_NOTEOL) ? 0 : 1];
276  FDEBUG(("color %ld\n", (long) co));
277  ss = miss(v, d, css, co, cp, start);
278  /* match might have ended at eol */
279  if ((ss == NULL || !(ss->flags & POSTSTATE)) && hitstopp != NULL)
280  *hitstopp = 1;
281  }
282 
283  if (ss == NULL || !(ss->flags & POSTSTATE))
284  return NULL;
285 
286  return cp;
287 }
struct sset * ssets
Definition: regexec.c:70
chr * lastseen
Definition: regexec.c:58
#define REG_NOTBOL
Definition: regex.h:124
struct sset ** outs
Definition: regexec.c:59
struct colormap * cm
Definition: regexec.c:76
struct cnfa * cnfa
Definition: regexec.c:75
short color
Definition: regguts.h:134
#define REG_FTRACE
Definition: regex.h:127
static struct sset * initialize(struct vars *v, struct dfa *d, chr *start)
Definition: rege_dfa.c:557
chr * start
Definition: regexec.c:111
pg_wchar chr
Definition: regcustom.h:68
static chr * lastcold(struct vars *v, struct dfa *d)
Definition: rege_dfa.c:417
#define assert(TEST)
Definition: imath.c:37
int eflags
Definition: regexec.c:107
color bos[2]
Definition: regguts.h:362
#define FDEBUG(arglist)
Definition: regguts.h:116
static struct sset * miss(struct vars *v, struct dfa *d, struct sset *css, color co, chr *cp, chr *start)
Definition: rege_dfa.c:603
#define POSTSTATE
Definition: regexec.c:54
const chr * stop
Definition: regcomp.c:228
Definition: regexec.c:45
int flags
Definition: regexec.c:52
#define REG_NOTEOL
Definition: regex.h:125
#define GETCOLOR(cm, c)
Definition: regguts.h:235
size_t max
Definition: regguts.h:212
color eos[2]
Definition: regguts.h:363