PostgreSQL Source Code  git master
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros
rege_dfa.c
Go to the documentation of this file.
1 /*
2  * DFA routines
3  * This file is #included by regexec.c.
4  *
5  * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved.
6  *
7  * Development of this software was funded, in part, by Cray Research Inc.,
8  * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics
9  * Corporation, none of whom are responsible for the results. The author
10  * thanks all of them.
11  *
12  * Redistribution and use in source and binary forms -- with or without
13  * modification -- are permitted for any purpose, provided that
14  * redistributions in source form retain this entire copyright notice and
15  * indicate the origin and nature of any modifications.
16  *
17  * I'd appreciate being given credit for this package in the documentation
18  * of software which uses it, but that is not a requirement.
19  *
20  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
21  * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
22  * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
23  * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
24  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
25  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
26  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
27  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
28  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
29  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30  *
31  * src/backend/regex/rege_dfa.c
32  *
33  */
34 
35 /*
36  * longest - longest-preferred matching engine
37  *
38  * On success, returns match endpoint address. Returns NULL on no match.
39  * Internal errors also return NULL, with v->err set.
40  */
41 static chr *
42 longest(struct vars * v,
43  struct dfa * d,
44  chr *start, /* where the match should start */
45  chr *stop, /* match must end at or before here */
46  int *hitstopp) /* record whether hit v->stop, if non-NULL */
47 {
48  chr *cp;
49  chr *realstop = (stop == v->stop) ? stop : stop + 1;
50  color co;
51  struct sset *css;
52  struct sset *ss;
53  chr *post;
54  int i;
55  struct colormap *cm = d->cm;
56 
57  /* prevent "uninitialized variable" warnings */
58  if (hitstopp != NULL)
59  *hitstopp = 0;
60 
61  /* initialize */
62  css = initialize(v, d, start);
63  if (css == NULL)
64  return NULL;
65  cp = start;
66 
67  /* startup */
68  FDEBUG(("+++ startup +++\n"));
69  if (cp == v->start)
70  {
71  co = d->cnfa->bos[(v->eflags & REG_NOTBOL) ? 0 : 1];
72  FDEBUG(("color %ld\n", (long) co));
73  }
74  else
75  {
76  co = GETCOLOR(cm, *(cp - 1));
77  FDEBUG(("char %c, color %ld\n", (char) *(cp - 1), (long) co));
78  }
79  css = miss(v, d, css, co, cp, start);
80  if (css == NULL)
81  return NULL;
82  css->lastseen = cp;
83 
84  /*
85  * This is the main text-scanning loop. It seems worth having two copies
86  * to avoid the overhead of REG_FTRACE tests here, even in REG_DEBUG
87  * builds, when you're not actively tracing.
88  */
89 #ifdef REG_DEBUG
90  if (v->eflags & REG_FTRACE)
91  {
92  while (cp < realstop)
93  {
94  FDEBUG(("+++ at c%d +++\n", (int) (css - d->ssets)));
95  co = GETCOLOR(cm, *cp);
96  FDEBUG(("char %c, color %ld\n", (char) *cp, (long) co));
97  ss = css->outs[co];
98  if (ss == NULL)
99  {
100  ss = miss(v, d, css, co, cp + 1, start);
101  if (ss == NULL)
102  break; /* NOTE BREAK OUT */
103  }
104  cp++;
105  ss->lastseen = cp;
106  css = ss;
107  }
108  }
109  else
110 #endif
111  {
112  while (cp < realstop)
113  {
114  co = GETCOLOR(cm, *cp);
115  ss = css->outs[co];
116  if (ss == NULL)
117  {
118  ss = miss(v, d, css, co, cp + 1, start);
119  if (ss == NULL)
120  break; /* NOTE BREAK OUT */
121  }
122  cp++;
123  ss->lastseen = cp;
124  css = ss;
125  }
126  }
127 
128  if (ISERR())
129  return NULL;
130 
131  /* shutdown */
132  FDEBUG(("+++ shutdown at c%d +++\n", (int) (css - d->ssets)));
133  if (cp == v->stop && stop == v->stop)
134  {
135  if (hitstopp != NULL)
136  *hitstopp = 1;
137  co = d->cnfa->eos[(v->eflags & REG_NOTEOL) ? 0 : 1];
138  FDEBUG(("color %ld\n", (long) co));
139  ss = miss(v, d, css, co, cp, start);
140  if (ISERR())
141  return NULL;
142  /* special case: match ended at eol? */
143  if (ss != NULL && (ss->flags & POSTSTATE))
144  return cp;
145  else if (ss != NULL)
146  ss->lastseen = cp; /* to be tidy */
147  }
148 
149  /* find last match, if any */
150  post = d->lastpost;
151  for (ss = d->ssets, i = d->nssused; i > 0; ss++, i--)
152  if ((ss->flags & POSTSTATE) && post != ss->lastseen &&
153  (post == NULL || post < ss->lastseen))
154  post = ss->lastseen;
155  if (post != NULL) /* found one */
156  return post - 1;
157 
158  return NULL;
159 }
160 
161 /*
162  * shortest - shortest-preferred matching engine
163  *
164  * On success, returns match endpoint address. Returns NULL on no match.
165  * Internal errors also return NULL, with v->err set.
166  */
167 static chr *
168 shortest(struct vars * v,
169  struct dfa * d,
170  chr *start, /* where the match should start */
171  chr *min, /* match must end at or after here */
172  chr *max, /* match must end at or before here */
173  chr **coldp, /* store coldstart pointer here, if non-NULL */
174  int *hitstopp) /* record whether hit v->stop, if non-NULL */
175 {
176  chr *cp;
177  chr *realmin = (min == v->stop) ? min : min + 1;
178  chr *realmax = (max == v->stop) ? max : max + 1;
179  color co;
180  struct sset *css;
181  struct sset *ss;
182  struct colormap *cm = d->cm;
183 
184  /* prevent "uninitialized variable" warnings */
185  if (coldp != NULL)
186  *coldp = NULL;
187  if (hitstopp != NULL)
188  *hitstopp = 0;
189 
190  /* initialize */
191  css = initialize(v, d, start);
192  if (css == NULL)
193  return NULL;
194  cp = start;
195 
196  /* startup */
197  FDEBUG(("--- startup ---\n"));
198  if (cp == v->start)
199  {
200  co = d->cnfa->bos[(v->eflags & REG_NOTBOL) ? 0 : 1];
201  FDEBUG(("color %ld\n", (long) co));
202  }
203  else
204  {
205  co = GETCOLOR(cm, *(cp - 1));
206  FDEBUG(("char %c, color %ld\n", (char) *(cp - 1), (long) co));
207  }
208  css = miss(v, d, css, co, cp, start);
209  if (css == NULL)
210  return NULL;
211  css->lastseen = cp;
212  ss = css;
213 
214  /*
215  * This is the main text-scanning loop. It seems worth having two copies
216  * to avoid the overhead of REG_FTRACE tests here, even in REG_DEBUG
217  * builds, when you're not actively tracing.
218  */
219 #ifdef REG_DEBUG
220  if (v->eflags & REG_FTRACE)
221  {
222  while (cp < realmax)
223  {
224  FDEBUG(("--- at c%d ---\n", (int) (css - d->ssets)));
225  co = GETCOLOR(cm, *cp);
226  FDEBUG(("char %c, color %ld\n", (char) *cp, (long) co));
227  ss = css->outs[co];
228  if (ss == NULL)
229  {
230  ss = miss(v, d, css, co, cp + 1, start);
231  if (ss == NULL)
232  break; /* NOTE BREAK OUT */
233  }
234  cp++;
235  ss->lastseen = cp;
236  css = ss;
237  if ((ss->flags & POSTSTATE) && cp >= realmin)
238  break; /* NOTE BREAK OUT */
239  }
240  }
241  else
242 #endif
243  {
244  while (cp < realmax)
245  {
246  co = GETCOLOR(cm, *cp);
247  ss = css->outs[co];
248  if (ss == NULL)
249  {
250  ss = miss(v, d, css, co, cp + 1, start);
251  if (ss == NULL)
252  break; /* NOTE BREAK OUT */
253  }
254  cp++;
255  ss->lastseen = cp;
256  css = ss;
257  if ((ss->flags & POSTSTATE) && cp >= realmin)
258  break; /* NOTE BREAK OUT */
259  }
260  }
261 
262  if (ss == NULL)
263  return NULL;
264 
265  if (coldp != NULL) /* report last no-progress state set, if any */
266  *coldp = lastcold(v, d);
267 
268  if ((ss->flags & POSTSTATE) && cp > min)
269  {
270  assert(cp >= realmin);
271  cp--;
272  }
273  else if (cp == v->stop && max == v->stop)
274  {
275  co = d->cnfa->eos[(v->eflags & REG_NOTEOL) ? 0 : 1];
276  FDEBUG(("color %ld\n", (long) co));
277  ss = miss(v, d, css, co, cp, start);
278  /* match might have ended at eol */
279  if ((ss == NULL || !(ss->flags & POSTSTATE)) && hitstopp != NULL)
280  *hitstopp = 1;
281  }
282 
283  if (ss == NULL || !(ss->flags & POSTSTATE))
284  return NULL;
285 
286  return cp;
287 }
288 
289 /*
290  * matchuntil - incremental matching engine
291  *
292  * This is meant for use with a search-style NFA (that is, the pattern is
293  * known to act as though it had a leading .*). We determine whether a
294  * match exists starting at v->start and ending at probe. Multiple calls
295  * require only O(N) time not O(N^2) so long as the probe values are
296  * nondecreasing. *lastcss and *lastcp must be initialized to NULL before
297  * starting a series of calls.
298  *
299  * Returns 1 if a match exists, 0 if not.
300  * Internal errors also return 0, with v->err set.
301  */
302 static int
303 matchuntil(struct vars * v,
304  struct dfa * d,
305  chr *probe, /* we want to know if a match ends here */
306  struct sset ** lastcss, /* state storage across calls */
307  chr **lastcp) /* state storage across calls */
308 {
309  chr *cp = *lastcp;
310  color co;
311  struct sset *css = *lastcss;
312  struct sset *ss;
313  struct colormap *cm = d->cm;
314 
315  /* initialize and startup, or restart, if necessary */
316  if (cp == NULL || cp > probe)
317  {
318  cp = v->start;
319  css = initialize(v, d, cp);
320  if (css == NULL)
321  return 0;
322 
323  FDEBUG((">>> startup >>>\n"));
324  co = d->cnfa->bos[(v->eflags & REG_NOTBOL) ? 0 : 1];
325  FDEBUG(("color %ld\n", (long) co));
326 
327  css = miss(v, d, css, co, cp, v->start);
328  if (css == NULL)
329  return 0;
330  css->lastseen = cp;
331  }
332  else if (css == NULL)
333  {
334  /* we previously found that no match is possible beyond *lastcp */
335  return 0;
336  }
337  ss = css;
338 
339  /*
340  * This is the main text-scanning loop. It seems worth having two copies
341  * to avoid the overhead of REG_FTRACE tests here, even in REG_DEBUG
342  * builds, when you're not actively tracing.
343  */
344 #ifdef REG_DEBUG
345  if (v->eflags & REG_FTRACE)
346  {
347  while (cp < probe)
348  {
349  FDEBUG((">>> at c%d >>>\n", (int) (css - d->ssets)));
350  co = GETCOLOR(cm, *cp);
351  FDEBUG(("char %c, color %ld\n", (char) *cp, (long) co));
352  ss = css->outs[co];
353  if (ss == NULL)
354  {
355  ss = miss(v, d, css, co, cp + 1, v->start);
356  if (ss == NULL)
357  break; /* NOTE BREAK OUT */
358  }
359  cp++;
360  ss->lastseen = cp;
361  css = ss;
362  }
363  }
364  else
365 #endif
366  {
367  while (cp < probe)
368  {
369  co = GETCOLOR(cm, *cp);
370  ss = css->outs[co];
371  if (ss == NULL)
372  {
373  ss = miss(v, d, css, co, cp + 1, v->start);
374  if (ss == NULL)
375  break; /* NOTE BREAK OUT */
376  }
377  cp++;
378  ss->lastseen = cp;
379  css = ss;
380  }
381  }
382 
383  *lastcss = ss;
384  *lastcp = cp;
385 
386  if (ss == NULL)
387  return 0; /* impossible match, or internal error */
388 
389  /* We need to process one more chr, or the EOS symbol, to check match */
390  if (cp < v->stop)
391  {
392  FDEBUG((">>> at c%d >>>\n", (int) (css - d->ssets)));
393  co = GETCOLOR(cm, *cp);
394  FDEBUG(("char %c, color %ld\n", (char) *cp, (long) co));
395  ss = css->outs[co];
396  if (ss == NULL)
397  ss = miss(v, d, css, co, cp + 1, v->start);
398  }
399  else
400  {
401  assert(cp == v->stop);
402  co = d->cnfa->eos[(v->eflags & REG_NOTEOL) ? 0 : 1];
403  FDEBUG(("color %ld\n", (long) co));
404  ss = miss(v, d, css, co, cp, v->start);
405  }
406 
407  if (ss == NULL || !(ss->flags & POSTSTATE))
408  return 0;
409 
410  return 1;
411 }
412 
413 /*
414  * lastcold - determine last point at which no progress had been made
415  */
416 static chr * /* endpoint, or NULL */
417 lastcold(struct vars * v,
418  struct dfa * d)
419 {
420  struct sset *ss;
421  chr *nopr;
422  int i;
423 
424  nopr = d->lastnopr;
425  if (nopr == NULL)
426  nopr = v->start;
427  for (ss = d->ssets, i = d->nssused; i > 0; ss++, i--)
428  if ((ss->flags & NOPROGRESS) && nopr < ss->lastseen)
429  nopr = ss->lastseen;
430  return nopr;
431 }
432 
433 /*
434  * newdfa - set up a fresh DFA
435  */
436 static struct dfa *
437 newdfa(struct vars * v,
438  struct cnfa * cnfa,
439  struct colormap * cm,
440  struct smalldfa * sml) /* preallocated space, may be NULL */
441 {
442  struct dfa *d;
443  size_t nss = cnfa->nstates * 2;
444  int wordsper = (cnfa->nstates + UBITS - 1) / UBITS;
445  struct smalldfa *smallwas = sml;
446 
447  assert(cnfa != NULL && cnfa->nstates != 0);
448 
449  if (nss <= FEWSTATES && cnfa->ncolors <= FEWCOLORS)
450  {
451  assert(wordsper == 1);
452  if (sml == NULL)
453  {
454  sml = (struct smalldfa *) MALLOC(sizeof(struct smalldfa));
455  if (sml == NULL)
456  {
457  ERR(REG_ESPACE);
458  return NULL;
459  }
460  }
461  d = &sml->dfa;
462  d->ssets = sml->ssets;
463  d->statesarea = sml->statesarea;
464  d->work = &d->statesarea[nss];
465  d->outsarea = sml->outsarea;
466  d->incarea = sml->incarea;
467  d->cptsmalloced = 0;
468  d->mallocarea = (smallwas == NULL) ? (char *) sml : NULL;
469  }
470  else
471  {
472  d = (struct dfa *) MALLOC(sizeof(struct dfa));
473  if (d == NULL)
474  {
475  ERR(REG_ESPACE);
476  return NULL;
477  }
478  d->ssets = (struct sset *) MALLOC(nss * sizeof(struct sset));
479  d->statesarea = (unsigned *) MALLOC((nss + WORK) * wordsper *
480  sizeof(unsigned));
481  d->work = &d->statesarea[nss * wordsper];
482  d->outsarea = (struct sset **) MALLOC(nss * cnfa->ncolors *
483  sizeof(struct sset *));
484  d->incarea = (struct arcp *) MALLOC(nss * cnfa->ncolors *
485  sizeof(struct arcp));
486  d->cptsmalloced = 1;
487  d->mallocarea = (char *) d;
488  if (d->ssets == NULL || d->statesarea == NULL ||
489  d->outsarea == NULL || d->incarea == NULL)
490  {
491  freedfa(d);
492  ERR(REG_ESPACE);
493  return NULL;
494  }
495  }
496 
497  d->nssets = (v->eflags & REG_SMALL) ? 7 : nss;
498  d->nssused = 0;
499  d->nstates = cnfa->nstates;
500  d->ncolors = cnfa->ncolors;
501  d->wordsper = wordsper;
502  d->cnfa = cnfa;
503  d->cm = cm;
504  d->lastpost = NULL;
505  d->lastnopr = NULL;
506  d->search = d->ssets;
507 
508  /* initialization of sset fields is done as needed */
509 
510  return d;
511 }
512 
513 /*
514  * freedfa - free a DFA
515  */
516 static void
517 freedfa(struct dfa * d)
518 {
519  if (d->cptsmalloced)
520  {
521  if (d->ssets != NULL)
522  FREE(d->ssets);
523  if (d->statesarea != NULL)
524  FREE(d->statesarea);
525  if (d->outsarea != NULL)
526  FREE(d->outsarea);
527  if (d->incarea != NULL)
528  FREE(d->incarea);
529  }
530 
531  if (d->mallocarea != NULL)
532  FREE(d->mallocarea);
533 }
534 
535 /*
536  * hash - construct a hash code for a bitvector
537  *
538  * There are probably better ways, but they're more expensive.
539  */
540 static unsigned
541 hash(unsigned *uv,
542  int n)
543 {
544  int i;
545  unsigned h;
546 
547  h = 0;
548  for (i = 0; i < n; i++)
549  h ^= uv[i];
550  return h;
551 }
552 
553 /*
554  * initialize - hand-craft a cache entry for startup, otherwise get ready
555  */
556 static struct sset *
557 initialize(struct vars * v,
558  struct dfa * d,
559  chr *start)
560 {
561  struct sset *ss;
562  int i;
563 
564  /* is previous one still there? */
565  if (d->nssused > 0 && (d->ssets[0].flags & STARTER))
566  ss = &d->ssets[0];
567  else
568  { /* no, must (re)build it */
569  ss = getvacant(v, d, start, start);
570  if (ss == NULL)
571  return NULL;
572  for (i = 0; i < d->wordsper; i++)
573  ss->states[i] = 0;
574  BSET(ss->states, d->cnfa->pre);
575  ss->hash = HASH(ss->states, d->wordsper);
576  assert(d->cnfa->pre != d->cnfa->post);
577  ss->flags = STARTER | LOCKED | NOPROGRESS;
578  /* lastseen dealt with below */
579  }
580 
581  for (i = 0; i < d->nssused; i++)
582  d->ssets[i].lastseen = NULL;
583  ss->lastseen = start; /* maybe untrue, but harmless */
584  d->lastpost = NULL;
585  d->lastnopr = NULL;
586  return ss;
587 }
588 
589 /*
590  * miss - handle a stateset cache miss
591  *
592  * css is the current stateset, co is the color of the current input character,
593  * cp points to the character after that (which is where we may need to test
594  * LACONs). start does not affect matching behavior but is needed for pickss'
595  * heuristics about which stateset cache entry to replace.
596  *
597  * Ordinarily, returns the address of the next stateset (the one that is
598  * valid after consuming the input character). Returns NULL if no valid
599  * NFA states remain, ie we have a certain match failure.
600  * Internal errors also return NULL, with v->err set.
601  */
602 static struct sset *
603 miss(struct vars * v,
604  struct dfa * d,
605  struct sset * css,
606  color co,
607  chr *cp, /* next chr */
608  chr *start) /* where the attempt got started */
609 {
610  struct cnfa *cnfa = d->cnfa;
611  int i;
612  unsigned h;
613  struct carc *ca;
614  struct sset *p;
615  int ispost;
616  int noprogress;
617  int gotstate;
618  int dolacons;
619  int sawlacons;
620 
621  /* for convenience, we can be called even if it might not be a miss */
622  if (css->outs[co] != NULL)
623  {
624  FDEBUG(("hit\n"));
625  return css->outs[co];
626  }
627  FDEBUG(("miss\n"));
628 
629  /*
630  * Checking for operation cancel in the inner text search loop seems
631  * unduly expensive. As a compromise, check during cache misses.
632  */
633  if (CANCEL_REQUESTED(v->re))
634  {
635  ERR(REG_CANCEL);
636  return NULL;
637  }
638 
639  /*
640  * What set of states would we end up in after consuming the co character?
641  * We first consider PLAIN arcs that consume the character, and then look
642  * to see what LACON arcs could be traversed after consuming it.
643  */
644  for (i = 0; i < d->wordsper; i++)
645  d->work[i] = 0; /* build new stateset bitmap in d->work */
646  ispost = 0;
647  noprogress = 1;
648  gotstate = 0;
649  for (i = 0; i < d->nstates; i++)
650  if (ISBSET(css->states, i))
651  for (ca = cnfa->states[i]; ca->co != COLORLESS; ca++)
652  if (ca->co == co)
653  {
654  BSET(d->work, ca->to);
655  gotstate = 1;
656  if (ca->to == cnfa->post)
657  ispost = 1;
658  if (!(cnfa->stflags[ca->to] & CNFA_NOPROGRESS))
659  noprogress = 0;
660  FDEBUG(("%d -> %d\n", i, ca->to));
661  }
662  if (!gotstate)
663  return NULL; /* character cannot reach any new state */
664  dolacons = (cnfa->flags & HASLACONS);
665  sawlacons = 0;
666  /* outer loop handles transitive closure of reachable-by-LACON states */
667  while (dolacons)
668  {
669  dolacons = 0;
670  for (i = 0; i < d->nstates; i++)
671  if (ISBSET(d->work, i))
672  for (ca = cnfa->states[i]; ca->co != COLORLESS; ca++)
673  {
674  if (ca->co < cnfa->ncolors)
675  continue; /* not a LACON arc */
676  if (ISBSET(d->work, ca->to))
677  continue; /* arc would be a no-op anyway */
678  sawlacons = 1; /* this LACON affects our result */
679  if (!lacon(v, cnfa, cp, ca->co))
680  {
681  if (ISERR())
682  return NULL;
683  continue; /* LACON arc cannot be traversed */
684  }
685  if (ISERR())
686  return NULL;
687  BSET(d->work, ca->to);
688  dolacons = 1;
689  if (ca->to == cnfa->post)
690  ispost = 1;
691  if (!(cnfa->stflags[ca->to] & CNFA_NOPROGRESS))
692  noprogress = 0;
693  FDEBUG(("%d :> %d\n", i, ca->to));
694  }
695  }
696  h = HASH(d->work, d->wordsper);
697 
698  /* Is this stateset already in the cache? */
699  for (p = d->ssets, i = d->nssused; i > 0; p++, i--)
700  if (HIT(h, d->work, p, d->wordsper))
701  {
702  FDEBUG(("cached c%d\n", (int) (p - d->ssets)));
703  break; /* NOTE BREAK OUT */
704  }
705  if (i == 0)
706  { /* nope, need a new cache entry */
707  p = getvacant(v, d, cp, start);
708  if (p == NULL)
709  return NULL;
710  assert(p != css);
711  for (i = 0; i < d->wordsper; i++)
712  p->states[i] = d->work[i];
713  p->hash = h;
714  p->flags = (ispost) ? POSTSTATE : 0;
715  if (noprogress)
716  p->flags |= NOPROGRESS;
717  /* lastseen to be dealt with by caller */
718  }
719 
720  /*
721  * Link new stateset to old, unless a LACON affected the result, in which
722  * case we don't create the link. That forces future transitions across
723  * this same arc (same prior stateset and character color) to come through
724  * miss() again, so that we can recheck the LACON(s), which might or might
725  * not pass since context will be different.
726  */
727  if (!sawlacons)
728  {
729  FDEBUG(("c%d[%d]->c%d\n",
730  (int) (css - d->ssets), co, (int) (p - d->ssets)));
731  css->outs[co] = p;
732  css->inchain[co] = p->ins;
733  p->ins.ss = css;
734  p->ins.co = co;
735  }
736  return p;
737 }
738 
739 /*
740  * lacon - lookaround-constraint checker for miss()
741  */
742 static int /* predicate: constraint satisfied? */
743 lacon(struct vars * v,
744  struct cnfa * pcnfa, /* parent cnfa */
745  chr *cp,
746  color co) /* "color" of the lookaround constraint */
747 {
748  int n;
749  struct subre *sub;
750  struct dfa *d;
751  chr *end;
752  int satisfied;
753 
754  /* Since this is recursive, it could be driven to stack overflow */
755  if (STACK_TOO_DEEP(v->re))
756  {
757  ERR(REG_ETOOBIG);
758  return 0;
759  }
760 
761  n = co - pcnfa->ncolors;
762  assert(n > 0 && n < v->g->nlacons && v->g->lacons != NULL);
763  FDEBUG(("=== testing lacon %d\n", n));
764  sub = &v->g->lacons[n];
765  d = getladfa(v, n);
766  if (d == NULL)
767  return 0;
768  if (LATYPE_IS_AHEAD(sub->subno))
769  {
770  /* used to use longest() here, but shortest() could be much cheaper */
771  end = shortest(v, d, cp, cp, v->stop,
772  (chr **) NULL, (int *) NULL);
773  satisfied = LATYPE_IS_POS(sub->subno) ? (end != NULL) : (end == NULL);
774  }
775  else
776  {
777  /*
778  * To avoid doing O(N^2) work when repeatedly testing a lookbehind
779  * constraint in an N-character string, we use matchuntil() which can
780  * cache the DFA state across calls. We only need to restart if the
781  * probe point decreases, which is not common. The NFA we're using is
782  * a search NFA, so it doesn't mind scanning over stuff before the
783  * nominal match.
784  */
785  satisfied = matchuntil(v, d, cp, &v->lblastcss[n], &v->lblastcp[n]);
786  if (!LATYPE_IS_POS(sub->subno))
787  satisfied = !satisfied;
788  }
789  FDEBUG(("=== lacon %d satisfied %d\n", n, satisfied));
790  return satisfied;
791 }
792 
793 /*
794  * getvacant - get a vacant state set
795  *
796  * This routine clears out the inarcs and outarcs, but does not otherwise
797  * clear the innards of the state set -- that's up to the caller.
798  */
799 static struct sset *
800 getvacant(struct vars * v,
801  struct dfa * d,
802  chr *cp,
803  chr *start)
804 {
805  int i;
806  struct sset *ss;
807  struct sset *p;
808  struct arcp ap;
809  color co;
810 
811  ss = pickss(v, d, cp, start);
812  if (ss == NULL)
813  return NULL;
814  assert(!(ss->flags & LOCKED));
815 
816  /* clear out its inarcs, including self-referential ones */
817  ap = ss->ins;
818  while ((p = ap.ss) != NULL)
819  {
820  co = ap.co;
821  FDEBUG(("zapping c%d's %ld outarc\n", (int) (p - d->ssets), (long) co));
822  p->outs[co] = NULL;
823  ap = p->inchain[co];
824  p->inchain[co].ss = NULL; /* paranoia */
825  }
826  ss->ins.ss = NULL;
827 
828  /* take it off the inarc chains of the ssets reached by its outarcs */
829  for (i = 0; i < d->ncolors; i++)
830  {
831  p = ss->outs[i];
832  assert(p != ss); /* not self-referential */
833  if (p == NULL)
834  continue; /* NOTE CONTINUE */
835  FDEBUG(("del outarc %d from c%d's in chn\n", i, (int) (p - d->ssets)));
836  if (p->ins.ss == ss && p->ins.co == i)
837  p->ins = ss->inchain[i];
838  else
839  {
840  struct arcp lastap = {NULL, 0};
841 
842  assert(p->ins.ss != NULL);
843  for (ap = p->ins; ap.ss != NULL &&
844  !(ap.ss == ss && ap.co == i);
845  ap = ap.ss->inchain[ap.co])
846  lastap = ap;
847  assert(ap.ss != NULL);
848  lastap.ss->inchain[lastap.co] = ss->inchain[i];
849  }
850  ss->outs[i] = NULL;
851  ss->inchain[i].ss = NULL;
852  }
853 
854  /* if ss was a success state, may need to remember location */
855  if ((ss->flags & POSTSTATE) && ss->lastseen != d->lastpost &&
856  (d->lastpost == NULL || d->lastpost < ss->lastseen))
857  d->lastpost = ss->lastseen;
858 
859  /* likewise for a no-progress state */
860  if ((ss->flags & NOPROGRESS) && ss->lastseen != d->lastnopr &&
861  (d->lastnopr == NULL || d->lastnopr < ss->lastseen))
862  d->lastnopr = ss->lastseen;
863 
864  return ss;
865 }
866 
867 /*
868  * pickss - pick the next stateset to be used
869  */
870 static struct sset *
871 pickss(struct vars * v,
872  struct dfa * d,
873  chr *cp,
874  chr *start)
875 {
876  int i;
877  struct sset *ss;
878  struct sset *end;
879  chr *ancient;
880 
881  /* shortcut for cases where cache isn't full */
882  if (d->nssused < d->nssets)
883  {
884  i = d->nssused;
885  d->nssused++;
886  ss = &d->ssets[i];
887  FDEBUG(("new c%d\n", i));
888  /* set up innards */
889  ss->states = &d->statesarea[i * d->wordsper];
890  ss->flags = 0;
891  ss->ins.ss = NULL;
892  ss->ins.co = WHITE; /* give it some value */
893  ss->outs = &d->outsarea[i * d->ncolors];
894  ss->inchain = &d->incarea[i * d->ncolors];
895  for (i = 0; i < d->ncolors; i++)
896  {
897  ss->outs[i] = NULL;
898  ss->inchain[i].ss = NULL;
899  }
900  return ss;
901  }
902 
903  /* look for oldest, or old enough anyway */
904  if (cp - start > d->nssets * 2 / 3) /* oldest 33% are expendable */
905  ancient = cp - d->nssets * 2 / 3;
906  else
907  ancient = start;
908  for (ss = d->search, end = &d->ssets[d->nssets]; ss < end; ss++)
909  if ((ss->lastseen == NULL || ss->lastseen < ancient) &&
910  !(ss->flags & LOCKED))
911  {
912  d->search = ss + 1;
913  FDEBUG(("replacing c%d\n", (int) (ss - d->ssets)));
914  return ss;
915  }
916  for (ss = d->ssets, end = d->search; ss < end; ss++)
917  if ((ss->lastseen == NULL || ss->lastseen < ancient) &&
918  !(ss->flags & LOCKED))
919  {
920  d->search = ss + 1;
921  FDEBUG(("replacing c%d\n", (int) (ss - d->ssets)));
922  return ss;
923  }
924 
925  /* nobody's old enough?!? -- something's really wrong */
926  FDEBUG(("cannot find victim to replace!\n"));
927  ERR(REG_ASSERT);
928  return NULL;
929 }
struct sset * ssets
Definition: regexec.c:70
#define STARTER
Definition: regexec.c:53
#define LATYPE_IS_POS(la)
Definition: regguts.h:103
static int matchuntil(struct vars *v, struct dfa *d, chr *probe, struct sset **lastcss, chr **lastcp)
Definition: rege_dfa.c:303
chr * lastseen
Definition: regexec.c:58
#define REG_NOTBOL
Definition: regex.h:124
struct sset ** outs
Definition: regexec.c:59
#define ERR
Definition: _int.h:146
struct arcp * incarea
Definition: regexec.c:74
struct subre * lacons
Definition: regguts.h:474
struct colormap * cm
Definition: regexec.c:76
#define CNFA_NOPROGRESS
Definition: regguts.h:365
#define REG_ESPACE
Definition: regex.h:149
struct cnfa * cnfa
Definition: regexec.c:75
int subno
Definition: regguts.h:427
Definition: regguts.h:354
short color
Definition: regguts.h:134
int pre
Definition: regguts.h:360
#define WORK
Definition: regexec.c:84
#define FREE(p)
Definition: regcustom.h:54
static struct sset * pickss(struct vars *v, struct dfa *d, chr *cp, chr *start)
Definition: rege_dfa.c:871
#define REG_FTRACE
Definition: regex.h:127
unsigned * work
Definition: regexec.c:72
#define HASH(sign, val)
Definition: hstore_gist.c:38
struct sset * search
Definition: regexec.c:79
static void freedfa(struct dfa *d)
Definition: rege_dfa.c:517
#define ISERR()
Definition: regcomp.c:264
static struct sset * initialize(struct vars *v, struct dfa *d, chr *start)
Definition: rege_dfa.c:557
#define FEWCOLORS
Definition: regexec.c:88
struct carc ** states
Definition: regguts.h:366
#define HIT(h, bv, ss, nw)
Definition: regexec.c:50
chr * start
Definition: regexec.c:111
#define MALLOC(n)
Definition: regcustom.h:53
Definition: regexec.c:63
struct sset ** lblastcss
Definition: regexec.c:117
static struct sset * getvacant(struct vars *v, struct dfa *d, chr *cp, chr *start)
Definition: rege_dfa.c:800
int nstates
Definition: regguts.h:356
static struct dfa * newdfa(struct vars *v, struct cnfa *cnfa, struct colormap *cm, struct smalldfa *sml)
Definition: rege_dfa.c:437
pg_wchar chr
Definition: regcustom.h:59
unsigned statesarea[FEWSTATES *2+WORK]
Definition: regexec.c:93
unsigned * states
Definition: regexec.c:47
static chr * lastcold(struct vars *v, struct dfa *d)
Definition: rege_dfa.c:417
regex_t * re
Definition: regcomp.c:228
struct arcp * inchain
Definition: regexec.c:60
struct dfa dfa
Definition: regexec.c:91
#define NOPROGRESS
Definition: regexec.c:56
int cptsmalloced
Definition: regexec.c:80
#define assert(TEST)
Definition: imath.c:37
int ncolors
Definition: regexec.c:68
#define LATYPE_IS_AHEAD(la)
Definition: regguts.h:104
char * stflags
Definition: regguts.h:364
int nstates
Definition: regexec.c:67
int nssets
Definition: regexec.c:65
int nssused
Definition: regexec.c:66
int wordsper
Definition: regexec.c:69
struct guts * g
Definition: regexec.c:106
int eflags
Definition: regexec.c:107
#define UBITS
Definition: regguts.h:125
color bos[2]
Definition: regguts.h:362
struct arcp incarea[FEWSTATES *2 *FEWCOLORS]
Definition: regexec.c:95
chr * lastnopr
Definition: regexec.c:78
#define REG_SMALL
Definition: regex.h:129
#define FDEBUG(arglist)
Definition: regguts.h:116
static struct sset * miss(struct vars *v, struct dfa *d, struct sset *css, color co, chr *cp, chr *start)
Definition: rege_dfa.c:603
static chr * shortest(struct vars *v, struct dfa *d, chr *start, chr *min, chr *max, chr **coldp, int *hitstopp)
Definition: rege_dfa.c:168
Definition: regexec.c:39
int to
Definition: regguts.h:351
struct sset ssets[FEWSTATES *2]
Definition: regexec.c:92
#define REG_ASSERT
Definition: regex.h:151
#define NULL
Definition: c.h:226
#define ISBSET(uv, sn)
Definition: regguts.h:127
char * mallocarea
Definition: regexec.c:81
int flags
Definition: regguts.h:358
struct sset * outsarea[FEWSTATES *2 *FEWCOLORS]
Definition: regexec.c:94
#define POSTSTATE
Definition: regexec.c:54
#define REG_ETOOBIG
Definition: regex.h:155
#define WHITE
Definition: regguts.h:138
static chr * longest(struct vars *v, struct dfa *d, chr *start, chr *stop, int *hitstopp)
Definition: rege_dfa.c:42
Definition: regguts.h:408
struct arcp ins
Definition: regexec.c:57
int post
Definition: regguts.h:361
const chr * stop
Definition: regcomp.c:230
struct vars * v
Definition: regguts.h:210
#define BSET(uv, sn)
Definition: regguts.h:126
struct sset ** outsarea
Definition: regexec.c:73
#define LOCKED
Definition: regexec.c:55
color co
Definition: regguts.h:350
Definition: regexec.c:45
#define CANCEL_REQUESTED(re)
Definition: regguts.h:452
int i
#define COLORLESS
Definition: regguts.h:137
chr * lastpost
Definition: regexec.c:77
chr ** lblastcp
Definition: regexec.c:118
static int lacon(struct vars *v, struct cnfa *pcnfa, chr *cp, color co)
Definition: rege_dfa.c:743
unsigned hash
Definition: regexec.c:48
struct sset * ss
Definition: regexec.c:41
unsigned * statesarea
Definition: regexec.c:71
static struct dfa * getladfa(struct vars *, int)
Definition: regexec.c:355
int flags
Definition: regexec.c:52
Definition: regcomp.c:226
static unsigned hash(unsigned *uv, int n)
Definition: rege_dfa.c:541
#define HASLACONS
Definition: regguts.h:359
#define REG_NOTEOL
Definition: regex.h:125
#define GETCOLOR(cm, c)
Definition: regguts.h:235
size_t max
Definition: regguts.h:212
#define STACK_TOO_DEEP(re)
Definition: regguts.h:455
color eos[2]
Definition: regguts.h:363
int ncolors
Definition: regguts.h:357
color co
Definition: regexec.c:42
#define REG_CANCEL
Definition: regex.h:157
Definition: regguts.h:348