PostgreSQL Source Code  git master
regexec.c
Go to the documentation of this file.
1 /*
2  * re_*exec and friends - match REs
3  *
4  * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved.
5  *
6  * Development of this software was funded, in part, by Cray Research Inc.,
7  * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics
8  * Corporation, none of whom are responsible for the results. The author
9  * thanks all of them.
10  *
11  * Redistribution and use in source and binary forms -- with or without
12  * modification -- are permitted for any purpose, provided that
13  * redistributions in source form retain this entire copyright notice and
14  * indicate the origin and nature of any modifications.
15  *
16  * I'd appreciate being given credit for this package in the documentation
17  * of software which uses it, but that is not a requirement.
18  *
19  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
20  * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
21  * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
22  * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29  *
30  * src/backend/regex/regexec.c
31  *
32  */
33 
34 #include "regex/regguts.h"
35 
36 
37 
38 /* lazy-DFA representation */
39 struct arcp
40 { /* "pointer" to an outarc */
41  struct sset *ss;
43 };
44 
45 struct sset
46 { /* state set */
47  unsigned *states; /* pointer to bitvector */
48  unsigned hash; /* hash of bitvector */
49 #define HASH(bv, nw) (((nw) == 1) ? *(bv) : hash(bv, nw))
50 #define HIT(h,bv,ss,nw) ((ss)->hash == (h) && ((nw) == 1 || \
51  memcmp(VS(bv), VS((ss)->states), (nw)*sizeof(unsigned)) == 0))
52  int flags;
53 #define STARTER 01 /* the initial state set */
54 #define POSTSTATE 02 /* includes the goal state */
55 #define LOCKED 04 /* locked in cache */
56 #define NOPROGRESS 010 /* zero-progress state set */
57  struct arcp ins; /* chain of inarcs pointing here */
58  chr *lastseen; /* last entered on arrival here */
59  struct sset **outs; /* outarc vector indexed by color */
60  struct arcp *inchain; /* chain-pointer vector for outarcs */
61 };
62 
63 struct dfa
64 {
65  int nssets; /* size of cache */
66  int nssused; /* how many entries occupied yet */
67  int nstates; /* number of states */
68  int ncolors; /* length of outarc and inchain vectors */
69  int wordsper; /* length of state-set bitvectors */
70  struct sset *ssets; /* state-set cache */
71  unsigned *statesarea; /* bitvector storage */
72  unsigned *work; /* pointer to work area within statesarea */
73  struct sset **outsarea; /* outarc-vector storage */
74  struct arcp *incarea; /* inchain storage */
75  struct cnfa *cnfa;
76  struct colormap *cm;
77  chr *lastpost; /* location of last cache-flushed success */
78  chr *lastnopr; /* location of last cache-flushed NOPROGRESS */
79  struct sset *search; /* replacement-search-pointer memory */
80  int backno; /* if DFA for a backref, subno it refers to */
81  short backmin; /* min repetitions for backref */
82  short backmax; /* max repetitions for backref */
83  bool ismalloced; /* should this struct dfa be freed? */
84  bool arraysmalloced; /* should its subsidiary arrays be freed? */
85 };
86 
87 #define WORK 1 /* number of work bitvectors needed */
88 
89 /* setup for non-malloc allocation for small cases */
90 #define FEWSTATES 20 /* must be less than UBITS */
91 #define FEWCOLORS 15
92 struct smalldfa
93 {
94  struct dfa dfa; /* must be first */
95  struct sset ssets[FEWSTATES * 2];
96  unsigned statesarea[FEWSTATES * 2 + WORK];
97  struct sset *outsarea[FEWSTATES * 2 * FEWCOLORS];
98  struct arcp incarea[FEWSTATES * 2 * FEWCOLORS];
99 };
100 
101 #define DOMALLOC ((struct smalldfa *)NULL) /* force malloc */
102 
103 
104 
105 /* internal variables, bundled for easy passing around */
106 struct vars
107 {
108  regex_t *re;
109  struct guts *g;
110  int eflags; /* copies of arguments */
111  size_t nmatch;
114  chr *start; /* start of string */
115  chr *search_start; /* search start of string */
116  chr *stop; /* just past end of string */
117  int err; /* error code if any (0 none) */
118  struct dfa **subdfas; /* per-tree-subre DFAs */
119  struct dfa **ladfas; /* per-lacon-subre DFAs */
120  struct sset **lblastcss; /* per-lacon-subre lookbehind restart data */
121  chr **lblastcp; /* per-lacon-subre lookbehind restart data */
122  struct smalldfa dfa1;
123  struct smalldfa dfa2;
124 };
125 
126 #define VISERR(vv) ((vv)->err != 0) /* have we seen an error yet? */
127 #define ISERR() VISERR(v)
128 #define VERR(vv,e) ((vv)->err = ((vv)->err ? (vv)->err : (e)))
129 #define ERR(e) VERR(v, e) /* record an error */
130 #define NOERR() {if (ISERR()) return v->err;} /* if error seen, return it */
131 #define OFF(p) ((p) - v->start)
132 #define LOFF(p) ((long)OFF(p))
133 
134 
135 
136 /*
137  * forward declarations
138  */
139 /* === regexec.c === */
140 static struct dfa *getsubdfa(struct vars *v, struct subre *t);
141 static struct dfa *getladfa(struct vars *v, int n);
142 static int find(struct vars *v, struct cnfa *cnfa, struct colormap *cm);
143 static int cfind(struct vars *v, struct cnfa *cnfa, struct colormap *cm);
144 static int cfindloop(struct vars *v, struct cnfa *cnfa, struct colormap *cm,
145  struct dfa *d, struct dfa *s, chr **coldp);
146 static void zapallsubs(regmatch_t *p, size_t n);
147 static void zaptreesubs(struct vars *v, struct subre *t);
148 static void subset(struct vars *v, struct subre *sub, chr *begin, chr *end);
149 static int cdissect(struct vars *v, struct subre *t, chr *begin, chr *end);
150 static int ccondissect(struct vars *v, struct subre *t, chr *begin, chr *end);
151 static int crevcondissect(struct vars *v, struct subre *t, chr *begin, chr *end);
152 static int cbrdissect(struct vars *v, struct subre *t, chr *begin, chr *end);
153 static int caltdissect(struct vars *v, struct subre *t, chr *begin, chr *end);
154 static int citerdissect(struct vars *v, struct subre *t, chr *begin, chr *end);
155 static int creviterdissect(struct vars *v, struct subre *t, chr *begin, chr *end);
156 
157 /* === rege_dfa.c === */
158 static chr *longest(struct vars *v, struct dfa *d,
159  chr *start, chr *stop, int *hitstopp);
160 static chr *shortest(struct vars *v, struct dfa *d, chr *start, chr *min,
161  chr *max, chr **coldp, int *hitstopp);
162 static int matchuntil(struct vars *v, struct dfa *d, chr *probe,
163  struct sset **lastcss, chr **lastcp);
164 static chr *dfa_backref(struct vars *v, struct dfa *d, chr *start,
165  chr *min, chr *max, bool shortest);
166 static chr *lastcold(struct vars *v, struct dfa *d);
167 static struct dfa *newdfa(struct vars *v, struct cnfa *cnfa,
168  struct colormap *cm, struct smalldfa *sml);
169 static void freedfa(struct dfa *d);
170 static unsigned hash(unsigned *uv, int n);
171 static struct sset *initialize(struct vars *v, struct dfa *d, chr *start);
172 static struct sset *miss(struct vars *v, struct dfa *d, struct sset *css,
173  color co, chr *cp, chr *start);
174 static int lacon(struct vars *v, struct cnfa *pcnfa, chr *cp, color co);
175 static struct sset *getvacant(struct vars *v, struct dfa *d, chr *cp,
176  chr *start);
177 static struct sset *pickss(struct vars *v, struct dfa *d, chr *cp,
178  chr *start);
179 
180 
181 /*
182  * pg_regexec - match regular expression
183  */
184 int
186  const chr *string,
187  size_t len,
188  size_t search_start,
189  rm_detail_t *details,
190  size_t nmatch,
191  regmatch_t pmatch[],
192  int flags)
193 {
194  struct vars var;
195  struct vars *v = &var;
196  int st;
197  size_t n;
198  size_t i;
199  int backref;
200 
201 #define LOCALMAT 20
202  regmatch_t mat[LOCALMAT];
203 
204 #define LOCALDFAS 40
205  struct dfa *subdfas[LOCALDFAS];
206 
207  /* sanity checks */
208  if (re == NULL || string == NULL || re->re_magic != REMAGIC)
209  return REG_INVARG;
210  if (re->re_csize != sizeof(chr))
211  return REG_MIXED;
212  if (search_start > len)
213  return REG_NOMATCH;
214 
215  /* Initialize locale-dependent support */
217 
218  /* setup */
219  v->re = re;
220  v->g = (struct guts *) re->re_guts;
221  if ((v->g->cflags & REG_EXPECT) && details == NULL)
222  return REG_INVARG;
223  if (v->g->info & REG_UIMPOSSIBLE)
224  return REG_NOMATCH;
225  backref = (v->g->info & REG_UBACKREF) ? 1 : 0;
226  v->eflags = flags;
227  if (backref && nmatch <= v->g->nsub)
228  {
229  /* need larger work area */
230  v->nmatch = v->g->nsub + 1;
231  if (v->nmatch <= LOCALMAT)
232  v->pmatch = mat;
233  else
234  v->pmatch = (regmatch_t *) MALLOC(v->nmatch * sizeof(regmatch_t));
235  if (v->pmatch == NULL)
236  return REG_ESPACE;
237  zapallsubs(v->pmatch, v->nmatch);
238  }
239  else
240  {
241  /* we can store results directly in caller's array */
242  v->pmatch = pmatch;
243  /* ensure any extra entries in caller's array are filled with -1 */
244  if (nmatch > 0)
245  zapallsubs(pmatch, nmatch);
246  /* then forget about extra entries, to avoid useless work in find() */
247  if (nmatch > v->g->nsub + 1)
248  nmatch = v->g->nsub + 1;
249  v->nmatch = nmatch;
250  }
251  v->details = details;
252  v->start = (chr *) string;
253  v->search_start = (chr *) string + search_start;
254  v->stop = (chr *) string + len;
255  v->err = 0;
256  v->subdfas = NULL;
257  v->ladfas = NULL;
258  v->lblastcss = NULL;
259  v->lblastcp = NULL;
260  /* below this point, "goto cleanup" will behave sanely */
261 
262  assert(v->g->ntree >= 0);
263  n = (size_t) v->g->ntree;
264  if (n <= LOCALDFAS)
265  v->subdfas = subdfas;
266  else
267  {
268  v->subdfas = (struct dfa **) MALLOC(n * sizeof(struct dfa *));
269  if (v->subdfas == NULL)
270  {
271  st = REG_ESPACE;
272  goto cleanup;
273  }
274  }
275  for (i = 0; i < n; i++)
276  v->subdfas[i] = NULL;
277 
278  assert(v->g->nlacons >= 0);
279  n = (size_t) v->g->nlacons;
280  if (n > 0)
281  {
282  v->ladfas = (struct dfa **) MALLOC(n * sizeof(struct dfa *));
283  if (v->ladfas == NULL)
284  {
285  st = REG_ESPACE;
286  goto cleanup;
287  }
288  for (i = 0; i < n; i++)
289  v->ladfas[i] = NULL;
290  v->lblastcss = (struct sset **) MALLOC(n * sizeof(struct sset *));
291  v->lblastcp = (chr **) MALLOC(n * sizeof(chr *));
292  if (v->lblastcss == NULL || v->lblastcp == NULL)
293  {
294  st = REG_ESPACE;
295  goto cleanup;
296  }
297  for (i = 0; i < n; i++)
298  {
299  v->lblastcss[i] = NULL;
300  v->lblastcp[i] = NULL;
301  }
302  }
303 
304  /* do it */
305  assert(v->g->tree != NULL);
306  if (backref)
307  st = cfind(v, &v->g->tree->cnfa, &v->g->cmap);
308  else
309  st = find(v, &v->g->tree->cnfa, &v->g->cmap);
310 
311  /* on success, ensure caller's match vector is filled correctly */
312  if (st == REG_OKAY && nmatch > 0)
313  {
314  if (v->pmatch != pmatch)
315  {
316  /* copy portion of match vector over from (larger) work area */
317  assert(nmatch <= v->nmatch);
318  memcpy(VS(pmatch), VS(v->pmatch), nmatch * sizeof(regmatch_t));
319  }
320  if (v->g->cflags & REG_NOSUB)
321  {
322  /* don't expose possibly-partial sub-match results to caller */
323  zapallsubs(pmatch, nmatch);
324  }
325  }
326 
327  /* clean up */
328 cleanup:
329  if (v->pmatch != pmatch && v->pmatch != mat)
330  FREE(v->pmatch);
331  if (v->subdfas != NULL)
332  {
333  n = (size_t) v->g->ntree;
334  for (i = 0; i < n; i++)
335  {
336  if (v->subdfas[i] != NULL)
337  freedfa(v->subdfas[i]);
338  }
339  if (v->subdfas != subdfas)
340  FREE(v->subdfas);
341  }
342  if (v->ladfas != NULL)
343  {
344  n = (size_t) v->g->nlacons;
345  for (i = 0; i < n; i++)
346  {
347  if (v->ladfas[i] != NULL)
348  freedfa(v->ladfas[i]);
349  }
350  FREE(v->ladfas);
351  }
352  if (v->lblastcss != NULL)
353  FREE(v->lblastcss);
354  if (v->lblastcp != NULL)
355  FREE(v->lblastcp);
356 
357 #ifdef REG_DEBUG
358  if (v->eflags & (REG_FTRACE | REG_MTRACE))
359  fflush(stdout);
360 #endif
361 
362  return st;
363 }
364 
365 /*
366  * getsubdfa - create or re-fetch the DFA for a tree subre node
367  *
368  * We only need to create the DFA once per overall regex execution.
369  * The DFA will be freed by the cleanup step in pg_regexec().
370  */
371 static struct dfa *
372 getsubdfa(struct vars *v,
373  struct subre *t)
374 {
375  struct dfa *d = v->subdfas[t->id];
376 
377  if (d == NULL)
378  {
379  d = newdfa(v, &t->cnfa, &v->g->cmap, DOMALLOC);
380  if (d == NULL)
381  return NULL;
382  /* set up additional info if this is a backref node */
383  if (t->op == 'b')
384  {
385  d->backno = t->backno;
386  d->backmin = t->min;
387  d->backmax = t->max;
388  }
389  v->subdfas[t->id] = d;
390  }
391  return d;
392 }
393 
394 /*
395  * getladfa - create or re-fetch the DFA for a LACON subre node
396  *
397  * Same as above, but for LACONs.
398  */
399 static struct dfa *
400 getladfa(struct vars *v,
401  int n)
402 {
403  assert(n > 0 && n < v->g->nlacons && v->g->lacons != NULL);
404 
405  if (v->ladfas[n] == NULL)
406  {
407  struct subre *sub = &v->g->lacons[n];
408 
409  v->ladfas[n] = newdfa(v, &sub->cnfa, &v->g->cmap, DOMALLOC);
410  /* a LACON can't contain a backref, so nothing else to do */
411  }
412  return v->ladfas[n];
413 }
414 
415 /*
416  * find - find a match for the main NFA (no-complications case)
417  */
418 static int
419 find(struct vars *v,
420  struct cnfa *cnfa,
421  struct colormap *cm)
422 {
423  struct dfa *s;
424  struct dfa *d;
425  chr *begin;
426  chr *end = NULL;
427  chr *cold;
428  chr *open; /* open and close of range of possible starts */
429  chr *close;
430  int hitend;
431  int shorter = (v->g->tree->flags & SHORTER) ? 1 : 0;
432 
433  /* first, a shot with the search RE */
434  s = newdfa(v, &v->g->search, cm, &v->dfa1);
435  if (s == NULL)
436  return v->err;
437  MDEBUG(("\nsearch at %ld\n", LOFF(v->start)));
438  cold = NULL;
439  close = shortest(v, s, v->search_start, v->search_start, v->stop,
440  &cold, (int *) NULL);
441  freedfa(s);
442  NOERR();
443  if (v->g->cflags & REG_EXPECT)
444  {
445  assert(v->details != NULL);
446  if (cold != NULL)
447  v->details->rm_extend.rm_so = OFF(cold);
448  else
449  v->details->rm_extend.rm_so = OFF(v->stop);
450  v->details->rm_extend.rm_eo = OFF(v->stop); /* unknown */
451  }
452  if (close == NULL) /* not found */
453  return REG_NOMATCH;
454  if (v->nmatch == 0) /* found, don't need exact location */
455  return REG_OKAY;
456 
457  /* find starting point and match */
458  assert(cold != NULL);
459  open = cold;
460  cold = NULL;
461  MDEBUG(("between %ld and %ld\n", LOFF(open), LOFF(close)));
462  d = newdfa(v, cnfa, cm, &v->dfa1);
463  if (d == NULL)
464  return v->err;
465  for (begin = open; begin <= close; begin++)
466  {
467  MDEBUG(("\nfind trying at %ld\n", LOFF(begin)));
468  if (shorter)
469  end = shortest(v, d, begin, begin, v->stop,
470  (chr **) NULL, &hitend);
471  else
472  end = longest(v, d, begin, v->stop, &hitend);
473  if (ISERR())
474  {
475  freedfa(d);
476  return v->err;
477  }
478  if (hitend && cold == NULL)
479  cold = begin;
480  if (end != NULL)
481  break; /* NOTE BREAK OUT */
482  }
483  assert(end != NULL); /* search RE succeeded so loop should */
484  freedfa(d);
485 
486  /* and pin down details */
487  assert(v->nmatch > 0);
488  v->pmatch[0].rm_so = OFF(begin);
489  v->pmatch[0].rm_eo = OFF(end);
490  if (v->g->cflags & REG_EXPECT)
491  {
492  if (cold != NULL)
493  v->details->rm_extend.rm_so = OFF(cold);
494  else
495  v->details->rm_extend.rm_so = OFF(v->stop);
496  v->details->rm_extend.rm_eo = OFF(v->stop); /* unknown */
497  }
498  if (v->nmatch == 1) /* no need for submatches */
499  return REG_OKAY;
500 
501  /* find submatches */
502  return cdissect(v, v->g->tree, begin, end);
503 }
504 
505 /*
506  * cfind - find a match for the main NFA (with complications)
507  */
508 static int
509 cfind(struct vars *v,
510  struct cnfa *cnfa,
511  struct colormap *cm)
512 {
513  struct dfa *s;
514  struct dfa *d;
515  chr *cold;
516  int ret;
517 
518  s = newdfa(v, &v->g->search, cm, &v->dfa1);
519  if (s == NULL)
520  return v->err;
521  d = newdfa(v, cnfa, cm, &v->dfa2);
522  if (d == NULL)
523  {
524  freedfa(s);
525  return v->err;
526  }
527 
528  ret = cfindloop(v, cnfa, cm, d, s, &cold);
529 
530  freedfa(d);
531  freedfa(s);
532  NOERR();
533  if (v->g->cflags & REG_EXPECT)
534  {
535  assert(v->details != NULL);
536  if (cold != NULL)
537  v->details->rm_extend.rm_so = OFF(cold);
538  else
539  v->details->rm_extend.rm_so = OFF(v->stop);
540  v->details->rm_extend.rm_eo = OFF(v->stop); /* unknown */
541  }
542  return ret;
543 }
544 
545 /*
546  * cfindloop - the heart of cfind
547  */
548 static int
549 cfindloop(struct vars *v,
550  struct cnfa *cnfa,
551  struct colormap *cm,
552  struct dfa *d,
553  struct dfa *s,
554  chr **coldp) /* where to put coldstart pointer */
555 {
556  chr *begin;
557  chr *end;
558  chr *cold;
559  chr *open; /* open and close of range of possible starts */
560  chr *close;
561  chr *estart;
562  chr *estop;
563  int er;
564  int shorter = v->g->tree->flags & SHORTER;
565  int hitend;
566 
567  assert(d != NULL && s != NULL);
568  cold = NULL;
569  close = v->search_start;
570  do
571  {
572  /* Search with the search RE for match range at/beyond "close" */
573  MDEBUG(("\ncsearch at %ld\n", LOFF(close)));
574  close = shortest(v, s, close, close, v->stop, &cold, (int *) NULL);
575  if (ISERR())
576  {
577  *coldp = cold;
578  return v->err;
579  }
580  if (close == NULL)
581  break; /* no more possible match anywhere */
582  assert(cold != NULL);
583  open = cold;
584  cold = NULL;
585  /* Search for matches starting between "open" and "close" inclusive */
586  MDEBUG(("cbetween %ld and %ld\n", LOFF(open), LOFF(close)));
587  for (begin = open; begin <= close; begin++)
588  {
589  MDEBUG(("\ncfind trying at %ld\n", LOFF(begin)));
590  estart = begin;
591  estop = v->stop;
592  for (;;)
593  {
594  /* Here we use the top node's detailed RE */
595  if (shorter)
596  end = shortest(v, d, begin, estart,
597  estop, (chr **) NULL, &hitend);
598  else
599  end = longest(v, d, begin, estop,
600  &hitend);
601  if (ISERR())
602  {
603  *coldp = cold;
604  return v->err;
605  }
606  if (hitend && cold == NULL)
607  cold = begin;
608  if (end == NULL)
609  break; /* no match with this begin point, try next */
610  MDEBUG(("tentative end %ld\n", LOFF(end)));
611  /* Dissect the potential match to see if it really matches */
612  er = cdissect(v, v->g->tree, begin, end);
613  if (er == REG_OKAY)
614  {
615  if (v->nmatch > 0)
616  {
617  v->pmatch[0].rm_so = OFF(begin);
618  v->pmatch[0].rm_eo = OFF(end);
619  }
620  *coldp = cold;
621  return REG_OKAY;
622  }
623  if (er != REG_NOMATCH)
624  {
625  ERR(er);
626  *coldp = cold;
627  return er;
628  }
629  /* Try next longer/shorter match with same begin point */
630  if (shorter)
631  {
632  if (end == estop)
633  break; /* no more, so try next begin point */
634  estart = end + 1;
635  }
636  else
637  {
638  if (end == begin)
639  break; /* no more, so try next begin point */
640  estop = end - 1;
641  }
642  } /* end loop over endpoint positions */
643  } /* end loop over beginning positions */
644 
645  /*
646  * If we get here, there is no possible match starting at or before
647  * "close", so consider matches beyond that. We'll do a fresh search
648  * with the search RE to find a new promising match range.
649  */
650  close++;
651  } while (close < v->stop);
652 
653  *coldp = cold;
654  return REG_NOMATCH;
655 }
656 
657 /*
658  * zapallsubs - initialize all subexpression matches to "no match"
659  *
660  * Note that p[0], the overall-match location, is not touched.
661  */
662 static void
664  size_t n)
665 {
666  size_t i;
667 
668  for (i = n - 1; i > 0; i--)
669  {
670  p[i].rm_so = -1;
671  p[i].rm_eo = -1;
672  }
673 }
674 
675 /*
676  * zaptreesubs - initialize subexpressions within subtree to "no match"
677  */
678 static void
679 zaptreesubs(struct vars *v,
680  struct subre *t)
681 {
682  int n = t->capno;
683  struct subre *t2;
684 
685  if (n > 0)
686  {
687  if ((size_t) n < v->nmatch)
688  {
689  v->pmatch[n].rm_so = -1;
690  v->pmatch[n].rm_eo = -1;
691  }
692  }
693 
694  for (t2 = t->child; t2 != NULL; t2 = t2->sibling)
695  zaptreesubs(v, t2);
696 }
697 
698 /*
699  * subset - set subexpression match data for a successful subre
700  */
701 static void
702 subset(struct vars *v,
703  struct subre *sub,
704  chr *begin,
705  chr *end)
706 {
707  int n = sub->capno;
708 
709  assert(n > 0);
710  if ((size_t) n >= v->nmatch)
711  return;
712 
713  MDEBUG(("%d: setting %d = %ld-%ld\n", sub->id, n, LOFF(begin), LOFF(end)));
714  v->pmatch[n].rm_so = OFF(begin);
715  v->pmatch[n].rm_eo = OFF(end);
716 }
717 
718 /*
719  * cdissect - check backrefs and determine subexpression matches
720  *
721  * cdissect recursively processes a subre tree to check matching of backrefs
722  * and/or identify submatch boundaries for capture nodes. The proposed match
723  * runs from "begin" to "end" (not including "end"), and we are basically
724  * "dissecting" it to see where the submatches are.
725  *
726  * Before calling any level of cdissect, the caller must have run the node's
727  * DFA and found that the proposed substring satisfies the DFA. (We make
728  * the caller do that because in concatenation and iteration nodes, it's
729  * much faster to check all the substrings against the child DFAs before we
730  * recurse.)
731  *
732  * A side-effect of a successful match is to save match locations for
733  * capturing subexpressions in v->pmatch[]. This is a little bit tricky,
734  * so we make the following rules:
735  * 1. Before initial entry to cdissect, all match data must have been
736  * cleared (this is seen to by zapallsubs).
737  * 2. Before any recursive entry to cdissect, the match data for that
738  * subexpression tree must be guaranteed clear (see zaptreesubs).
739  * 3. When returning REG_OKAY, each level of cdissect will have saved
740  * any relevant match locations.
741  * 4. When returning REG_NOMATCH, each level of cdissect will guarantee
742  * that its subexpression match locations are again clear.
743  * 5. No guarantees are made for error cases (i.e., other result codes).
744  * 6. When a level of cdissect abandons a successful sub-match, it will
745  * clear that subtree's match locations with zaptreesubs before trying
746  * any new DFA match or cdissect call for that subtree or any subtree
747  * to its right (that is, any subtree that could have a backref into the
748  * abandoned match).
749  * This may seem overly complicated, but it's difficult to simplify it
750  * because of the provision that match locations must be reset before
751  * any fresh DFA match (a rule that is needed to make dfa_backref safe).
752  * That means it won't work to just reset relevant match locations at the
753  * start of each cdissect level.
754  */
755 static int /* regexec return code */
756 cdissect(struct vars *v,
757  struct subre *t,
758  chr *begin, /* beginning of relevant substring */
759  chr *end) /* end of same */
760 {
761  int er;
762 
763  assert(t != NULL);
764  MDEBUG(("%d: cdissect %c %ld-%ld\n", t->id, t->op, LOFF(begin), LOFF(end)));
765 
766  /* handy place to check for operation cancel */
767  if (CANCEL_REQUESTED(v->re))
768  return REG_CANCEL;
769  /* ... and stack overrun */
770  if (STACK_TOO_DEEP(v->re))
771  return REG_ETOOBIG;
772 
773  switch (t->op)
774  {
775  case '=': /* terminal node */
776  assert(t->child == NULL);
777  er = REG_OKAY; /* no action, parent did the work */
778  break;
779  case 'b': /* back reference */
780  assert(t->child == NULL);
781  er = cbrdissect(v, t, begin, end);
782  break;
783  case '.': /* concatenation */
784  assert(t->child != NULL);
785  if (t->child->flags & SHORTER) /* reverse scan */
786  er = crevcondissect(v, t, begin, end);
787  else
788  er = ccondissect(v, t, begin, end);
789  break;
790  case '|': /* alternation */
791  assert(t->child != NULL);
792  er = caltdissect(v, t, begin, end);
793  break;
794  case '*': /* iteration */
795  assert(t->child != NULL);
796  if (t->child->flags & SHORTER) /* reverse scan */
797  er = creviterdissect(v, t, begin, end);
798  else
799  er = citerdissect(v, t, begin, end);
800  break;
801  case '(': /* no-op capture node */
802  assert(t->child != NULL);
803  er = cdissect(v, t->child, begin, end);
804  break;
805  default:
806  er = REG_ASSERT;
807  break;
808  }
809 
810  /*
811  * We should never have a match failure unless backrefs lurk below;
812  * otherwise, either caller failed to check the DFA, or there's some
813  * inconsistency between the DFA and the node's innards.
814  */
815  assert(er != REG_NOMATCH || (t->flags & BACKR));
816 
817  /*
818  * If this node is marked as capturing, save successful match's location.
819  */
820  if (t->capno > 0 && er == REG_OKAY)
821  subset(v, t, begin, end);
822 
823  return er;
824 }
825 
826 /*
827  * ccondissect - dissect match for concatenation node
828  */
829 static int /* regexec return code */
830 ccondissect(struct vars *v,
831  struct subre *t,
832  chr *begin, /* beginning of relevant substring */
833  chr *end) /* end of same */
834 {
835  struct subre *left = t->child;
836  struct subre *right = left->sibling;
837  struct dfa *d;
838  struct dfa *d2;
839  chr *mid;
840  int er;
841 
842  assert(t->op == '.');
843  assert(left != NULL && left->cnfa.nstates > 0);
844  assert(right != NULL && right->cnfa.nstates > 0);
845  assert(right->sibling == NULL);
846  assert(!(left->flags & SHORTER));
847 
848  d = getsubdfa(v, left);
849  NOERR();
850  d2 = getsubdfa(v, right);
851  NOERR();
852  MDEBUG(("%d: ccondissect %ld-%ld\n", t->id, LOFF(begin), LOFF(end)));
853 
854  /* pick a tentative midpoint */
855  mid = longest(v, d, begin, end, (int *) NULL);
856  NOERR();
857  if (mid == NULL)
858  return REG_NOMATCH;
859  MDEBUG(("%d: tentative midpoint %ld\n", t->id, LOFF(mid)));
860 
861  /* iterate until satisfaction or failure */
862  for (;;)
863  {
864  /* try this midpoint on for size */
865  if (longest(v, d2, mid, end, (int *) NULL) == end)
866  {
867  er = cdissect(v, left, begin, mid);
868  if (er == REG_OKAY)
869  {
870  er = cdissect(v, right, mid, end);
871  if (er == REG_OKAY)
872  {
873  /* satisfaction */
874  MDEBUG(("%d: successful\n", t->id));
875  return REG_OKAY;
876  }
877  /* Reset left's matches (right should have done so itself) */
878  zaptreesubs(v, left);
879  }
880  if (er != REG_NOMATCH)
881  return er;
882  }
883  NOERR();
884 
885  /* that midpoint didn't work, find a new one */
886  if (mid == begin)
887  {
888  /* all possibilities exhausted */
889  MDEBUG(("%d: no midpoint\n", t->id));
890  return REG_NOMATCH;
891  }
892  mid = longest(v, d, begin, mid - 1, (int *) NULL);
893  NOERR();
894  if (mid == NULL)
895  {
896  /* failed to find a new one */
897  MDEBUG(("%d: failed midpoint\n", t->id));
898  return REG_NOMATCH;
899  }
900  MDEBUG(("%d: new midpoint %ld\n", t->id, LOFF(mid)));
901  }
902 
903  /* can't get here */
904  return REG_ASSERT;
905 }
906 
907 /*
908  * crevcondissect - dissect match for concatenation node, shortest-first
909  */
910 static int /* regexec return code */
912  struct subre *t,
913  chr *begin, /* beginning of relevant substring */
914  chr *end) /* end of same */
915 {
916  struct subre *left = t->child;
917  struct subre *right = left->sibling;
918  struct dfa *d;
919  struct dfa *d2;
920  chr *mid;
921  int er;
922 
923  assert(t->op == '.');
924  assert(left != NULL && left->cnfa.nstates > 0);
925  assert(right != NULL && right->cnfa.nstates > 0);
926  assert(right->sibling == NULL);
927  assert(left->flags & SHORTER);
928 
929  d = getsubdfa(v, left);
930  NOERR();
931  d2 = getsubdfa(v, right);
932  NOERR();
933  MDEBUG(("%d: crevcondissect %ld-%ld\n", t->id, LOFF(begin), LOFF(end)));
934 
935  /* pick a tentative midpoint */
936  mid = shortest(v, d, begin, begin, end, (chr **) NULL, (int *) NULL);
937  NOERR();
938  if (mid == NULL)
939  return REG_NOMATCH;
940  MDEBUG(("%d: tentative midpoint %ld\n", t->id, LOFF(mid)));
941 
942  /* iterate until satisfaction or failure */
943  for (;;)
944  {
945  /* try this midpoint on for size */
946  if (longest(v, d2, mid, end, (int *) NULL) == end)
947  {
948  er = cdissect(v, left, begin, mid);
949  if (er == REG_OKAY)
950  {
951  er = cdissect(v, right, mid, end);
952  if (er == REG_OKAY)
953  {
954  /* satisfaction */
955  MDEBUG(("%d: successful\n", t->id));
956  return REG_OKAY;
957  }
958  /* Reset left's matches (right should have done so itself) */
959  zaptreesubs(v, left);
960  }
961  if (er != REG_NOMATCH)
962  return er;
963  }
964  NOERR();
965 
966  /* that midpoint didn't work, find a new one */
967  if (mid == end)
968  {
969  /* all possibilities exhausted */
970  MDEBUG(("%d: no midpoint\n", t->id));
971  return REG_NOMATCH;
972  }
973  mid = shortest(v, d, begin, mid + 1, end, (chr **) NULL, (int *) NULL);
974  NOERR();
975  if (mid == NULL)
976  {
977  /* failed to find a new one */
978  MDEBUG(("%d: failed midpoint\n", t->id));
979  return REG_NOMATCH;
980  }
981  MDEBUG(("%d: new midpoint %ld\n", t->id, LOFF(mid)));
982  }
983 
984  /* can't get here */
985  return REG_ASSERT;
986 }
987 
988 /*
989  * cbrdissect - dissect match for backref node
990  *
991  * The backref match might already have been verified by dfa_backref(),
992  * but we don't know that for sure so must check it here.
993  */
994 static int /* regexec return code */
995 cbrdissect(struct vars *v,
996  struct subre *t,
997  chr *begin, /* beginning of relevant substring */
998  chr *end) /* end of same */
999 {
1000  int n = t->backno;
1001  size_t numreps;
1002  size_t tlen;
1003  size_t brlen;
1004  chr *brstring;
1005  chr *p;
1006  int min = t->min;
1007  int max = t->max;
1008 
1009  assert(t != NULL);
1010  assert(t->op == 'b');
1011  assert(n >= 0);
1012  assert((size_t) n < v->nmatch);
1013 
1014  MDEBUG(("%d: cbrdissect %d{%d-%d} %ld-%ld\n", t->id, n, min, max,
1015  LOFF(begin), LOFF(end)));
1016 
1017  /* get the backreferenced string */
1018  if (v->pmatch[n].rm_so == -1)
1019  return REG_NOMATCH;
1020  brstring = v->start + v->pmatch[n].rm_so;
1021  brlen = v->pmatch[n].rm_eo - v->pmatch[n].rm_so;
1022 
1023  /* special cases for zero-length strings */
1024  if (brlen == 0)
1025  {
1026  /*
1027  * matches only if target is zero length, but any number of
1028  * repetitions can be considered to be present
1029  */
1030  if (begin == end && min <= max)
1031  {
1032  MDEBUG(("%d: backref matched trivially\n", t->id));
1033  return REG_OKAY;
1034  }
1035  return REG_NOMATCH;
1036  }
1037  if (begin == end)
1038  {
1039  /* matches only if zero repetitions are okay */
1040  if (min == 0)
1041  {
1042  MDEBUG(("%d: backref matched trivially\n", t->id));
1043  return REG_OKAY;
1044  }
1045  return REG_NOMATCH;
1046  }
1047 
1048  /*
1049  * check target length to see if it could possibly be an allowed number of
1050  * repetitions of brstring
1051  */
1052  assert(end > begin);
1053  tlen = end - begin;
1054  if (tlen % brlen != 0)
1055  return REG_NOMATCH;
1056  numreps = tlen / brlen;
1057  if (numreps < min || (numreps > max && max != DUPINF))
1058  return REG_NOMATCH;
1059 
1060  /* okay, compare the actual string contents */
1061  p = begin;
1062  while (numreps-- > 0)
1063  {
1064  if ((*v->g->compare) (brstring, p, brlen) != 0)
1065  return REG_NOMATCH;
1066  p += brlen;
1067  }
1068 
1069  MDEBUG(("%d: backref matched\n", t->id));
1070  return REG_OKAY;
1071 }
1072 
1073 /*
1074  * caltdissect - dissect match for alternation node
1075  */
1076 static int /* regexec return code */
1077 caltdissect(struct vars *v,
1078  struct subre *t,
1079  chr *begin, /* beginning of relevant substring */
1080  chr *end) /* end of same */
1081 {
1082  struct dfa *d;
1083  int er;
1084 
1085  assert(t->op == '|');
1086 
1087  t = t->child;
1088  /* there should be at least 2 alternatives */
1089  assert(t != NULL && t->sibling != NULL);
1090 
1091  while (t != NULL)
1092  {
1093  assert(t->cnfa.nstates > 0);
1094 
1095  MDEBUG(("%d: caltdissect %ld-%ld\n", t->id, LOFF(begin), LOFF(end)));
1096 
1097  d = getsubdfa(v, t);
1098  NOERR();
1099  if (longest(v, d, begin, end, (int *) NULL) == end)
1100  {
1101  MDEBUG(("%d: caltdissect matched\n", t->id));
1102  er = cdissect(v, t, begin, end);
1103  if (er != REG_NOMATCH)
1104  return er;
1105  }
1106  NOERR();
1107 
1108  t = t->sibling;
1109  }
1110 
1111  return REG_NOMATCH;
1112 }
1113 
1114 /*
1115  * citerdissect - dissect match for iteration node
1116  */
1117 static int /* regexec return code */
1118 citerdissect(struct vars *v,
1119  struct subre *t,
1120  chr *begin, /* beginning of relevant substring */
1121  chr *end) /* end of same */
1122 {
1123  struct dfa *d;
1124  chr **endpts;
1125  chr *limit;
1126  int min_matches;
1127  size_t max_matches;
1128  int nverified;
1129  int k;
1130  int i;
1131  int er;
1132 
1133  assert(t->op == '*');
1134  assert(t->child != NULL && t->child->cnfa.nstates > 0);
1135  assert(!(t->child->flags & SHORTER));
1136  assert(begin <= end);
1137 
1138  MDEBUG(("%d: citerdissect %ld-%ld\n", t->id, LOFF(begin), LOFF(end)));
1139 
1140  /*
1141  * For the moment, assume the minimum number of matches is 1. If zero
1142  * matches are allowed, and the target string is empty, we are allowed to
1143  * match regardless of the contents of the iter node --- but we would
1144  * prefer to match once, so that capturing parens get set. (An example of
1145  * the concern here is a pattern like "()*\1", which historically this
1146  * code has allowed to succeed.) Therefore, we deal with the zero-matches
1147  * case at the bottom, after failing to find any other way to match.
1148  */
1149  min_matches = t->min;
1150  if (min_matches <= 0)
1151  min_matches = 1;
1152 
1153  /*
1154  * We need workspace to track the endpoints of each sub-match. Normally
1155  * we consider only nonzero-length sub-matches, so there can be at most
1156  * end-begin of them. However, if min is larger than that, we will also
1157  * consider zero-length sub-matches in order to find enough matches.
1158  *
1159  * For convenience, endpts[0] contains the "begin" pointer and we store
1160  * sub-match endpoints in endpts[1..max_matches].
1161  */
1162  max_matches = end - begin;
1163  if (max_matches > t->max && t->max != DUPINF)
1164  max_matches = t->max;
1165  if (max_matches < min_matches)
1166  max_matches = min_matches;
1167  endpts = (chr **) MALLOC((max_matches + 1) * sizeof(chr *));
1168  if (endpts == NULL)
1169  return REG_ESPACE;
1170  endpts[0] = begin;
1171 
1172  d = getsubdfa(v, t->child);
1173  if (ISERR())
1174  {
1175  FREE(endpts);
1176  return v->err;
1177  }
1178 
1179  /*
1180  * Our strategy is to first find a set of sub-match endpoints that are
1181  * valid according to the child node's DFA, and then recursively dissect
1182  * each sub-match to confirm validity. If any validity check fails,
1183  * backtrack that sub-match and try again. And, when we next try for a
1184  * validity check, we need not recheck any successfully verified
1185  * sub-matches that we didn't move the endpoints of. nverified remembers
1186  * how many sub-matches are currently known okay.
1187  */
1188 
1189  /* initialize to consider first sub-match */
1190  nverified = 0;
1191  k = 1;
1192  limit = end;
1193 
1194  /* iterate until satisfaction or failure */
1195  while (k > 0)
1196  {
1197  /* try to find an endpoint for the k'th sub-match */
1198  endpts[k] = longest(v, d, endpts[k - 1], limit, (int *) NULL);
1199  if (ISERR())
1200  {
1201  FREE(endpts);
1202  return v->err;
1203  }
1204  if (endpts[k] == NULL)
1205  {
1206  /* no match possible, so see if we can shorten previous one */
1207  k--;
1208  goto backtrack;
1209  }
1210  MDEBUG(("%d: working endpoint %d: %ld\n",
1211  t->id, k, LOFF(endpts[k])));
1212 
1213  /* k'th sub-match can no longer be considered verified */
1214  if (nverified >= k)
1215  nverified = k - 1;
1216 
1217  if (endpts[k] != end)
1218  {
1219  /* haven't reached end yet, try another iteration if allowed */
1220  if (k >= max_matches)
1221  {
1222  /* must try to shorten some previous match */
1223  k--;
1224  goto backtrack;
1225  }
1226 
1227  /* reject zero-length match unless necessary to achieve min */
1228  if (endpts[k] == endpts[k - 1] &&
1229  (k >= min_matches || min_matches - k < end - endpts[k]))
1230  goto backtrack;
1231 
1232  k++;
1233  limit = end;
1234  continue;
1235  }
1236 
1237  /*
1238  * We've identified a way to divide the string into k sub-matches that
1239  * works so far as the child DFA can tell. If k is an allowed number
1240  * of matches, start the slow part: recurse to verify each sub-match.
1241  * We always have k <= max_matches, needn't check that.
1242  */
1243  if (k < min_matches)
1244  goto backtrack;
1245 
1246  MDEBUG(("%d: verifying %d..%d\n", t->id, nverified + 1, k));
1247 
1248  for (i = nverified + 1; i <= k; i++)
1249  {
1250  /* zap any match data from a non-last iteration */
1251  zaptreesubs(v, t->child);
1252  er = cdissect(v, t->child, endpts[i - 1], endpts[i]);
1253  if (er == REG_OKAY)
1254  {
1255  nverified = i;
1256  continue;
1257  }
1258  if (er == REG_NOMATCH)
1259  break;
1260  /* oops, something failed */
1261  FREE(endpts);
1262  return er;
1263  }
1264 
1265  if (i > k)
1266  {
1267  /* satisfaction */
1268  MDEBUG(("%d: successful\n", t->id));
1269  FREE(endpts);
1270  return REG_OKAY;
1271  }
1272 
1273  /* i'th match failed to verify, so backtrack it */
1274  k = i;
1275 
1276 backtrack:
1277 
1278  /*
1279  * Must consider shorter versions of the k'th sub-match. However,
1280  * we'll only ask for a zero-length match if necessary.
1281  */
1282  while (k > 0)
1283  {
1284  chr *prev_end = endpts[k - 1];
1285 
1286  if (endpts[k] > prev_end)
1287  {
1288  limit = endpts[k] - 1;
1289  if (limit > prev_end ||
1290  (k < min_matches && min_matches - k >= end - prev_end))
1291  {
1292  /* break out of backtrack loop, continue the outer one */
1293  break;
1294  }
1295  }
1296  /* can't shorten k'th sub-match any more, consider previous one */
1297  k--;
1298  }
1299  }
1300 
1301  /* all possibilities exhausted */
1302  FREE(endpts);
1303 
1304  /*
1305  * Now consider the possibility that we can match to a zero-length string
1306  * by using zero repetitions.
1307  */
1308  if (t->min == 0 && begin == end)
1309  {
1310  MDEBUG(("%d: allowing zero matches\n", t->id));
1311  return REG_OKAY;
1312  }
1313 
1314  MDEBUG(("%d: failed\n", t->id));
1315  return REG_NOMATCH;
1316 }
1317 
1318 /*
1319  * creviterdissect - dissect match for iteration node, shortest-first
1320  */
1321 static int /* regexec return code */
1323  struct subre *t,
1324  chr *begin, /* beginning of relevant substring */
1325  chr *end) /* end of same */
1326 {
1327  struct dfa *d;
1328  chr **endpts;
1329  chr *limit;
1330  int min_matches;
1331  size_t max_matches;
1332  int nverified;
1333  int k;
1334  int i;
1335  int er;
1336 
1337  assert(t->op == '*');
1338  assert(t->child != NULL && t->child->cnfa.nstates > 0);
1339  assert(t->child->flags & SHORTER);
1340  assert(begin <= end);
1341 
1342  MDEBUG(("%d: creviterdissect %ld-%ld\n", t->id, LOFF(begin), LOFF(end)));
1343 
1344  /*
1345  * If zero matches are allowed, and target string is empty, just declare
1346  * victory. OTOH, if target string isn't empty, zero matches can't work
1347  * so we pretend the min is 1.
1348  */
1349  min_matches = t->min;
1350  if (min_matches <= 0)
1351  {
1352  if (begin == end)
1353  {
1354  MDEBUG(("%d: allowing zero matches\n", t->id));
1355  return REG_OKAY;
1356  }
1357  min_matches = 1;
1358  }
1359 
1360  /*
1361  * We need workspace to track the endpoints of each sub-match. Normally
1362  * we consider only nonzero-length sub-matches, so there can be at most
1363  * end-begin of them. However, if min is larger than that, we will also
1364  * consider zero-length sub-matches in order to find enough matches.
1365  *
1366  * For convenience, endpts[0] contains the "begin" pointer and we store
1367  * sub-match endpoints in endpts[1..max_matches].
1368  */
1369  max_matches = end - begin;
1370  if (max_matches > t->max && t->max != DUPINF)
1371  max_matches = t->max;
1372  if (max_matches < min_matches)
1373  max_matches = min_matches;
1374  endpts = (chr **) MALLOC((max_matches + 1) * sizeof(chr *));
1375  if (endpts == NULL)
1376  return REG_ESPACE;
1377  endpts[0] = begin;
1378 
1379  d = getsubdfa(v, t->child);
1380  if (ISERR())
1381  {
1382  FREE(endpts);
1383  return v->err;
1384  }
1385 
1386  /*
1387  * Our strategy is to first find a set of sub-match endpoints that are
1388  * valid according to the child node's DFA, and then recursively dissect
1389  * each sub-match to confirm validity. If any validity check fails,
1390  * backtrack that sub-match and try again. And, when we next try for a
1391  * validity check, we need not recheck any successfully verified
1392  * sub-matches that we didn't move the endpoints of. nverified remembers
1393  * how many sub-matches are currently known okay.
1394  */
1395 
1396  /* initialize to consider first sub-match */
1397  nverified = 0;
1398  k = 1;
1399  limit = begin;
1400 
1401  /* iterate until satisfaction or failure */
1402  while (k > 0)
1403  {
1404  /* disallow zero-length match unless necessary to achieve min */
1405  if (limit == endpts[k - 1] &&
1406  limit != end &&
1407  (k >= min_matches || min_matches - k < end - limit))
1408  limit++;
1409 
1410  /* if this is the last allowed sub-match, it must reach to the end */
1411  if (k >= max_matches)
1412  limit = end;
1413 
1414  /* try to find an endpoint for the k'th sub-match */
1415  endpts[k] = shortest(v, d, endpts[k - 1], limit, end,
1416  (chr **) NULL, (int *) NULL);
1417  if (ISERR())
1418  {
1419  FREE(endpts);
1420  return v->err;
1421  }
1422  if (endpts[k] == NULL)
1423  {
1424  /* no match possible, so see if we can lengthen previous one */
1425  k--;
1426  goto backtrack;
1427  }
1428  MDEBUG(("%d: working endpoint %d: %ld\n",
1429  t->id, k, LOFF(endpts[k])));
1430 
1431  /* k'th sub-match can no longer be considered verified */
1432  if (nverified >= k)
1433  nverified = k - 1;
1434 
1435  if (endpts[k] != end)
1436  {
1437  /* haven't reached end yet, try another iteration if allowed */
1438  if (k >= max_matches)
1439  {
1440  /* must try to lengthen some previous match */
1441  k--;
1442  goto backtrack;
1443  }
1444 
1445  k++;
1446  limit = endpts[k - 1];
1447  continue;
1448  }
1449 
1450  /*
1451  * We've identified a way to divide the string into k sub-matches that
1452  * works so far as the child DFA can tell. If k is an allowed number
1453  * of matches, start the slow part: recurse to verify each sub-match.
1454  * We always have k <= max_matches, needn't check that.
1455  */
1456  if (k < min_matches)
1457  goto backtrack;
1458 
1459  MDEBUG(("%d: verifying %d..%d\n", t->id, nverified + 1, k));
1460 
1461  for (i = nverified + 1; i <= k; i++)
1462  {
1463  /* zap any match data from a non-last iteration */
1464  zaptreesubs(v, t->child);
1465  er = cdissect(v, t->child, endpts[i - 1], endpts[i]);
1466  if (er == REG_OKAY)
1467  {
1468  nverified = i;
1469  continue;
1470  }
1471  if (er == REG_NOMATCH)
1472  break;
1473  /* oops, something failed */
1474  FREE(endpts);
1475  return er;
1476  }
1477 
1478  if (i > k)
1479  {
1480  /* satisfaction */
1481  MDEBUG(("%d: successful\n", t->id));
1482  FREE(endpts);
1483  return REG_OKAY;
1484  }
1485 
1486  /* i'th match failed to verify, so backtrack it */
1487  k = i;
1488 
1489 backtrack:
1490 
1491  /*
1492  * Must consider longer versions of the k'th sub-match.
1493  */
1494  while (k > 0)
1495  {
1496  if (endpts[k] < end)
1497  {
1498  limit = endpts[k] + 1;
1499  /* break out of backtrack loop, continue the outer one */
1500  break;
1501  }
1502  /* can't lengthen k'th sub-match any more, consider previous one */
1503  k--;
1504  }
1505  }
1506 
1507  /* all possibilities exhausted */
1508  MDEBUG(("%d: failed\n", t->id));
1509  FREE(endpts);
1510  return REG_NOMATCH;
1511 }
1512 
1513 
1514 
1515 #include "rege_dfa.c"
static void cleanup(void)
Definition: bootstrap.c:696
#define FREE(ptr)
Definition: cryptohash.c:37
#define close(a)
Definition: win32.h:12
int i
Definition: isn.c:73
if(TABLE==NULL||TABLE_index==NULL)
Definition: isn.c:77
static void const char fflush(stdout)
const void size_t len
void pg_set_regex_collation(Oid collation)
#define MALLOC(n)
Definition: regcustom.h:52
pg_wchar chr
Definition: regcustom.h:58
#define assert(x)
Definition: regcustom.h:55
#define REG_NOMATCH
Definition: regex.h:138
#define REG_ASSERT
Definition: regex.h:151
#define REG_UIMPOSSIBLE
Definition: regex.h:72
#define REG_CANCEL
Definition: regex.h:157
#define REG_MTRACE
Definition: regex.h:128
#define REG_FTRACE
Definition: regex.h:127
#define REG_EXPECT
Definition: regex.h:113
#define REG_INVARG
Definition: regex.h:152
#define REG_ETOOBIG
Definition: regex.h:155
#define REG_OKAY
Definition: regex.h:137
#define REG_MIXED
Definition: regex.h:153
#define REG_NOSUB
Definition: regex.h:107
#define REG_ESPACE
Definition: regex.h:149
#define REG_UBACKREF
Definition: regex.h:60
static int find(struct vars *v, struct cnfa *cnfa, struct colormap *cm)
Definition: regexec.c:419
static int citerdissect(struct vars *v, struct subre *t, chr *begin, chr *end)
Definition: regexec.c:1118
static void freedfa(struct dfa *d)
static struct sset * initialize(struct vars *v, struct dfa *d, chr *start)
#define DOMALLOC
Definition: regexec.c:101
static struct sset * getvacant(struct vars *v, struct dfa *d, chr *cp, chr *start)
#define LOFF(p)
Definition: regexec.c:132
#define NOERR()
Definition: regexec.c:130
static void zapallsubs(regmatch_t *p, size_t n)
Definition: regexec.c:663
static int lacon(struct vars *v, struct cnfa *pcnfa, chr *cp, color co)
#define ISERR()
Definition: regexec.c:127
static int ccondissect(struct vars *v, struct subre *t, chr *begin, chr *end)
Definition: regexec.c:830
static int cfind(struct vars *v, struct cnfa *cnfa, struct colormap *cm)
Definition: regexec.c:509
#define ERR(e)
Definition: regexec.c:129
static int creviterdissect(struct vars *v, struct subre *t, chr *begin, chr *end)
Definition: regexec.c:1322
static int cfindloop(struct vars *v, struct cnfa *cnfa, struct colormap *cm, struct dfa *d, struct dfa *s, chr **coldp)
Definition: regexec.c:549
static chr * longest(struct vars *v, struct dfa *d, chr *start, chr *stop, int *hitstopp)
static unsigned hash(unsigned *uv, int n)
static int cbrdissect(struct vars *v, struct subre *t, chr *begin, chr *end)
Definition: regexec.c:995
static struct dfa * getladfa(struct vars *v, int n)
Definition: regexec.c:400
static struct dfa * getsubdfa(struct vars *v, struct subre *t)
Definition: regexec.c:372
static int crevcondissect(struct vars *v, struct subre *t, chr *begin, chr *end)
Definition: regexec.c:911
#define WORK
Definition: regexec.c:87
#define LOCALMAT
#define OFF(p)
Definition: regexec.c:131
static struct sset * pickss(struct vars *v, struct dfa *d, chr *cp, chr *start)
static struct dfa * newdfa(struct vars *v, struct cnfa *cnfa, struct colormap *cm, struct smalldfa *sml)
static int cdissect(struct vars *v, struct subre *t, chr *begin, chr *end)
Definition: regexec.c:756
static chr * lastcold(struct vars *v, struct dfa *d)
static void zaptreesubs(struct vars *v, struct subre *t)
Definition: regexec.c:679
static int caltdissect(struct vars *v, struct subre *t, chr *begin, chr *end)
Definition: regexec.c:1077
static chr * dfa_backref(struct vars *v, struct dfa *d, chr *start, chr *min, chr *max, bool shortest)
#define FEWCOLORS
Definition: regexec.c:91
static void subset(struct vars *v, struct subre *sub, chr *begin, chr *end)
Definition: regexec.c:702
static chr * shortest(struct vars *v, struct dfa *d, chr *start, chr *min, chr *max, chr **coldp, int *hitstopp)
int pg_regexec(regex_t *re, const chr *string, size_t len, size_t search_start, rm_detail_t *details, size_t nmatch, regmatch_t pmatch[], int flags)
Definition: regexec.c:185
#define FEWSTATES
Definition: regexec.c:90
static int matchuntil(struct vars *v, struct dfa *d, chr *probe, struct sset **lastcss, chr **lastcp)
static struct sset * miss(struct vars *v, struct dfa *d, struct sset *css, color co, chr *cp, chr *start)
#define LOCALDFAS
#define BACKR
Definition: regguts.h:479
#define STACK_TOO_DEEP(re)
Definition: regguts.h:520
#define REMAGIC
Definition: regguts.h:96
short color
Definition: regguts.h:150
#define DUPINF
Definition: regguts.h:94
#define CANCEL_REQUESTED(re)
Definition: regguts.h:517
#define MDEBUG(arglist)
Definition: regguts.h:117
#define VS(x)
Definition: regguts.h:61
#define SHORTER
Definition: regguts.h:476
Definition: regexec.c:40
struct sset * ss
Definition: regexec.c:41
color co
Definition: regexec.c:42
Definition: regguts.h:402
int nstates
Definition: regguts.h:403
Definition: regexec.c:64
int nstates
Definition: regexec.c:67
bool arraysmalloced
Definition: regexec.c:84
chr * lastpost
Definition: regexec.c:77
chr * lastnopr
Definition: regexec.c:78
int wordsper
Definition: regexec.c:69
unsigned * work
Definition: regexec.c:72
unsigned * statesarea
Definition: regexec.c:71
struct arcp * incarea
Definition: regexec.c:74
struct sset * search
Definition: regexec.c:79
int ncolors
Definition: regexec.c:68
short backmin
Definition: regexec.c:81
struct sset * ssets
Definition: regexec.c:70
struct cnfa * cnfa
Definition: regexec.c:75
int backno
Definition: regexec.c:80
bool ismalloced
Definition: regexec.c:83
struct colormap * cm
Definition: regexec.c:76
short backmax
Definition: regexec.c:82
struct sset ** outsarea
Definition: regexec.c:73
int nssused
Definition: regexec.c:66
int nssets
Definition: regexec.c:65
Definition: regguts.h:528
struct subre * tree
Definition: regguts.h:534
struct subre * lacons
Definition: regguts.h:539
long info
Definition: regguts.h:532
struct cnfa search
Definition: regguts.h:535
int ntree
Definition: regguts.h:536
int cflags
Definition: regguts.h:531
size_t nsub
Definition: regguts.h:533
int nlacons
Definition: regguts.h:540
struct colormap cmap
Definition: regguts.h:537
Definition: regex.h:56
char * re_guts
Definition: regex.h:78
int re_csize
Definition: regex.h:74
int re_magic
Definition: regex.h:57
Oid re_collation
Definition: regex.h:76
regoff_t rm_eo
Definition: regex.h:86
regoff_t rm_so
Definition: regex.h:85
regmatch_t rm_extend
Definition: regex.h:92
unsigned statesarea[FEWSTATES *2+WORK]
Definition: regexec.c:96
struct arcp incarea[FEWSTATES *2 *FEWCOLORS]
Definition: regexec.c:98
struct sset * outsarea[FEWSTATES *2 *FEWCOLORS]
Definition: regexec.c:97
struct sset ssets[FEWSTATES *2]
Definition: regexec.c:95
Definition: regexec.c:46
struct arcp ins
Definition: regexec.c:57
unsigned hash
Definition: regexec.c:48
struct arcp * inchain
Definition: regexec.c:60
chr * lastseen
Definition: regexec.c:58
unsigned * states
Definition: regexec.c:47
struct sset ** outs
Definition: regexec.c:59
int flags
Definition: regexec.c:52
Definition: regguts.h:472
int backno
Definition: regguts.h:493
char op
Definition: regguts.h:473
struct state * end
Definition: regguts.h:499
short min
Definition: regguts.h:494
short max
Definition: regguts.h:495
struct subre * sibling
Definition: regguts.h:497
char flags
Definition: regguts.h:474
int id
Definition: regguts.h:491
struct cnfa cnfa
Definition: regguts.h:500
struct subre * child
Definition: regguts.h:496
int capno
Definition: regguts.h:492
struct state * begin
Definition: regguts.h:498
Definition: regcomp.c:282
size_t nmatch
Definition: regexec.c:111
struct dfa ** ladfas
Definition: regexec.c:119
const chr * stop
Definition: regcomp.c:285
struct dfa ** subdfas
Definition: regexec.c:118
chr * search_start
Definition: regexec.c:115
struct sset ** lblastcss
Definition: regexec.c:120
int err
Definition: regcomp.c:286
struct guts * g
Definition: regexec.c:109
struct smalldfa dfa1
Definition: regexec.c:122
chr ** lblastcp
Definition: regexec.c:121
regex_t * re
Definition: regcomp.c:283
int eflags
Definition: regexec.c:110
chr * start
Definition: regexec.c:114
rm_detail_t * details
Definition: regexec.c:113
regmatch_t * pmatch
Definition: regexec.c:112
chr * stop
Definition: regexec.c:116
struct smalldfa dfa2
Definition: regexec.c:123