PostgreSQL Source Code git master
regexec.c
Go to the documentation of this file.
1/*
2 * re_*exec and friends - match REs
3 *
4 * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved.
5 *
6 * Development of this software was funded, in part, by Cray Research Inc.,
7 * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics
8 * Corporation, none of whom are responsible for the results. The author
9 * thanks all of them.
10 *
11 * Redistribution and use in source and binary forms -- with or without
12 * modification -- are permitted for any purpose, provided that
13 * redistributions in source form retain this entire copyright notice and
14 * indicate the origin and nature of any modifications.
15 *
16 * I'd appreciate being given credit for this package in the documentation
17 * of software which uses it, but that is not a requirement.
18 *
19 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
20 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
21 * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
22 * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 *
30 * src/backend/regex/regexec.c
31 *
32 */
33
34#include "regex/regguts.h"
35
36
37
38/* lazy-DFA representation */
39struct arcp
40{ /* "pointer" to an outarc */
41 struct sset *ss;
43};
44
45struct sset
46{ /* state set */
47 unsigned *states; /* pointer to bitvector */
48 unsigned hash; /* hash of bitvector */
49#define HASH(bv, nw) (((nw) == 1) ? *(bv) : hash(bv, nw))
50#define HIT(h,bv,ss,nw) ((ss)->hash == (h) && ((nw) == 1 || \
51 memcmp(VS(bv), VS((ss)->states), (nw)*sizeof(unsigned)) == 0))
52 int flags;
53#define STARTER 01 /* the initial state set */
54#define POSTSTATE 02 /* includes the goal state */
55#define LOCKED 04 /* locked in cache */
56#define NOPROGRESS 010 /* zero-progress state set */
57 struct arcp ins; /* chain of inarcs pointing here */
58 chr *lastseen; /* last entered on arrival here */
59 struct sset **outs; /* outarc vector indexed by color */
60 struct arcp *inchain; /* chain-pointer vector for outarcs */
61};
62
63struct dfa
64{
65 int nssets; /* size of cache */
66 int nssused; /* how many entries occupied yet */
67 int nstates; /* number of states */
68 int ncolors; /* length of outarc and inchain vectors */
69 int wordsper; /* length of state-set bitvectors */
70 struct sset *ssets; /* state-set cache */
71 unsigned *statesarea; /* bitvector storage */
72 unsigned *work; /* pointer to work area within statesarea */
73 struct sset **outsarea; /* outarc-vector storage */
74 struct arcp *incarea; /* inchain storage */
75 struct cnfa *cnfa;
76 struct colormap *cm;
77 chr *lastpost; /* location of last cache-flushed success */
78 chr *lastnopr; /* location of last cache-flushed NOPROGRESS */
79 struct sset *search; /* replacement-search-pointer memory */
80 int backno; /* if DFA for a backref, subno it refers to */
81 short backmin; /* min repetitions for backref */
82 short backmax; /* max repetitions for backref */
83 bool ismalloced; /* should this struct dfa be freed? */
84 bool arraysmalloced; /* should its subsidiary arrays be freed? */
85};
86
87#define WORK 1 /* number of work bitvectors needed */
88
89/* setup for non-malloc allocation for small cases */
90#define FEWSTATES 20 /* must be less than UBITS */
91#define FEWCOLORS 15
93{
94 struct dfa dfa; /* must be first */
95 struct sset ssets[FEWSTATES * 2];
96 unsigned statesarea[FEWSTATES * 2 + WORK];
99};
100
101#define DOMALLOC ((struct smalldfa *)NULL) /* force malloc */
102
103
104
105/* internal variables, bundled for easy passing around */
106struct vars
107{
108 regex_t *re;
109 struct guts *g;
110 int eflags; /* copies of arguments */
111 size_t nmatch;
114 chr *start; /* start of string */
115 chr *search_start; /* search start of string */
116 chr *stop; /* just past end of string */
117 int err; /* error code if any (0 none) */
118 struct dfa **subdfas; /* per-tree-subre DFAs */
119 struct dfa **ladfas; /* per-lacon-subre DFAs */
120 struct sset **lblastcss; /* per-lacon-subre lookbehind restart data */
121 chr **lblastcp; /* per-lacon-subre lookbehind restart data */
124};
125
126#define VISERR(vv) ((vv)->err != 0) /* have we seen an error yet? */
127#define ISERR() VISERR(v)
128#define VERR(vv,e) ((vv)->err = ((vv)->err ? (vv)->err : (e)))
129#define ERR(e) VERR(v, e) /* record an error */
130#define NOERR() {if (ISERR()) return v->err;} /* if error seen, return it */
131#define OFF(p) ((p) - v->start)
132#define LOFF(p) ((long)OFF(p))
133
134
135
136/*
137 * forward declarations
138 */
139/* === regexec.c === */
140static struct dfa *getsubdfa(struct vars *v, struct subre *t);
141static struct dfa *getladfa(struct vars *v, int n);
142static int find(struct vars *v, struct cnfa *cnfa, struct colormap *cm);
143static int cfind(struct vars *v, struct cnfa *cnfa, struct colormap *cm);
144static int cfindloop(struct vars *v, struct cnfa *cnfa, struct colormap *cm,
145 struct dfa *d, struct dfa *s, chr **coldp);
146static void zapallsubs(regmatch_t *p, size_t n);
147static void zaptreesubs(struct vars *v, struct subre *t);
148static void subset(struct vars *v, struct subre *sub, chr *begin, chr *end);
149static int cdissect(struct vars *v, struct subre *t, chr *begin, chr *end);
150static int ccondissect(struct vars *v, struct subre *t, chr *begin, chr *end);
151static int crevcondissect(struct vars *v, struct subre *t, chr *begin, chr *end);
152static int cbrdissect(struct vars *v, struct subre *t, chr *begin, chr *end);
153static int caltdissect(struct vars *v, struct subre *t, chr *begin, chr *end);
154static int citerdissect(struct vars *v, struct subre *t, chr *begin, chr *end);
155static int creviterdissect(struct vars *v, struct subre *t, chr *begin, chr *end);
156
157/* === rege_dfa.c === */
158static chr *longest(struct vars *v, struct dfa *d,
159 chr *start, chr *stop, int *hitstopp);
160static chr *shortest(struct vars *v, struct dfa *d, chr *start, chr *min,
161 chr *max, chr **coldp, int *hitstopp);
162static int matchuntil(struct vars *v, struct dfa *d, chr *probe,
163 struct sset **lastcss, chr **lastcp);
164static chr *dfa_backref(struct vars *v, struct dfa *d, chr *start,
165 chr *min, chr *max, bool shortest);
166static chr *lastcold(struct vars *v, struct dfa *d);
167static struct dfa *newdfa(struct vars *v, struct cnfa *cnfa,
168 struct colormap *cm, struct smalldfa *sml);
169static void freedfa(struct dfa *d);
170static unsigned hash(unsigned *uv, int n);
171static struct sset *initialize(struct vars *v, struct dfa *d, chr *start);
172static struct sset *miss(struct vars *v, struct dfa *d, struct sset *css,
173 color co, chr *cp, chr *start);
174static int lacon(struct vars *v, struct cnfa *pcnfa, chr *cp, color co);
175static struct sset *getvacant(struct vars *v, struct dfa *d, chr *cp,
176 chr *start);
177static struct sset *pickss(struct vars *v, struct dfa *d, chr *cp,
178 chr *start);
179
180
181/*
182 * pg_regexec - match regular expression
183 */
184int
186 const chr *string,
187 size_t len,
188 size_t search_start,
189 rm_detail_t *details,
190 size_t nmatch,
191 regmatch_t pmatch[],
192 int flags)
193{
194 struct vars var;
195 struct vars *v = &var;
196 int st;
197 size_t n;
198 size_t i;
199 int backref;
200
201#define LOCALMAT 20
202 regmatch_t mat[LOCALMAT];
203
204#define LOCALDFAS 40
205 struct dfa *subdfas[LOCALDFAS];
206
207 /* sanity checks */
208 if (re == NULL || string == NULL || re->re_magic != REMAGIC)
209 return REG_INVARG;
210 if (re->re_csize != sizeof(chr))
211 return REG_MIXED;
212 if (search_start > len)
213 return REG_NOMATCH;
214
215 /* Initialize locale-dependent support */
216 pg_set_regex_collation(re->re_collation);
217
218 /* setup */
219 v->re = re;
220 v->g = (struct guts *) re->re_guts;
221 if ((v->g->cflags & REG_EXPECT) && details == NULL)
222 return REG_INVARG;
223 if (v->g->info & REG_UIMPOSSIBLE)
224 return REG_NOMATCH;
225 backref = (v->g->info & REG_UBACKREF) ? 1 : 0;
226 v->eflags = flags;
227 if (backref && nmatch <= v->g->nsub)
228 {
229 /* need larger work area */
230 v->nmatch = v->g->nsub + 1;
231 if (v->nmatch <= LOCALMAT)
232 v->pmatch = mat;
233 else
234 v->pmatch = (regmatch_t *) MALLOC(v->nmatch * sizeof(regmatch_t));
235 if (v->pmatch == NULL)
236 return REG_ESPACE;
237 zapallsubs(v->pmatch, v->nmatch);
238 }
239 else
240 {
241 /* we can store results directly in caller's array */
242 v->pmatch = pmatch;
243 /* ensure any extra entries in caller's array are filled with -1 */
244 if (nmatch > 0)
245 zapallsubs(pmatch, nmatch);
246 /* then forget about extra entries, to avoid useless work in find() */
247 if (nmatch > v->g->nsub + 1)
248 nmatch = v->g->nsub + 1;
249 v->nmatch = nmatch;
250 }
251 v->details = details;
252 v->start = (chr *) string;
253 v->search_start = (chr *) string + search_start;
254 v->stop = (chr *) string + len;
255 v->err = 0;
256 v->subdfas = NULL;
257 v->ladfas = NULL;
258 v->lblastcss = NULL;
259 v->lblastcp = NULL;
260 /* below this point, "goto cleanup" will behave sanely */
261
262 assert(v->g->ntree >= 0);
263 n = (size_t) v->g->ntree;
264 if (n <= LOCALDFAS)
265 v->subdfas = subdfas;
266 else
267 {
268 v->subdfas = (struct dfa **) MALLOC(n * sizeof(struct dfa *));
269 if (v->subdfas == NULL)
270 {
271 st = REG_ESPACE;
272 goto cleanup;
273 }
274 }
275 for (i = 0; i < n; i++)
276 v->subdfas[i] = NULL;
277
278 assert(v->g->nlacons >= 0);
279 n = (size_t) v->g->nlacons;
280 if (n > 0)
281 {
282 v->ladfas = (struct dfa **) MALLOC(n * sizeof(struct dfa *));
283 if (v->ladfas == NULL)
284 {
285 st = REG_ESPACE;
286 goto cleanup;
287 }
288 for (i = 0; i < n; i++)
289 v->ladfas[i] = NULL;
290 v->lblastcss = (struct sset **) MALLOC(n * sizeof(struct sset *));
291 v->lblastcp = (chr **) MALLOC(n * sizeof(chr *));
292 if (v->lblastcss == NULL || v->lblastcp == NULL)
293 {
294 st = REG_ESPACE;
295 goto cleanup;
296 }
297 for (i = 0; i < n; i++)
298 {
299 v->lblastcss[i] = NULL;
300 v->lblastcp[i] = NULL;
301 }
302 }
303
304 /* do it */
305 assert(v->g->tree != NULL);
306 if (backref)
307 st = cfind(v, &v->g->tree->cnfa, &v->g->cmap);
308 else
309 st = find(v, &v->g->tree->cnfa, &v->g->cmap);
310
311 /* on success, ensure caller's match vector is filled correctly */
312 if (st == REG_OKAY && nmatch > 0)
313 {
314 if (v->pmatch != pmatch)
315 {
316 /* copy portion of match vector over from (larger) work area */
317 assert(nmatch <= v->nmatch);
318 memcpy(VS(pmatch), VS(v->pmatch), nmatch * sizeof(regmatch_t));
319 }
320 if (v->g->cflags & REG_NOSUB)
321 {
322 /* don't expose possibly-partial sub-match results to caller */
323 zapallsubs(pmatch, nmatch);
324 }
325 }
326
327 /* clean up */
328cleanup:
329 if (v->pmatch != pmatch && v->pmatch != mat)
330 FREE(v->pmatch);
331 if (v->subdfas != NULL)
332 {
333 n = (size_t) v->g->ntree;
334 for (i = 0; i < n; i++)
335 {
336 if (v->subdfas[i] != NULL)
337 freedfa(v->subdfas[i]);
338 }
339 if (v->subdfas != subdfas)
340 FREE(v->subdfas);
341 }
342 if (v->ladfas != NULL)
343 {
344 n = (size_t) v->g->nlacons;
345 for (i = 0; i < n; i++)
346 {
347 if (v->ladfas[i] != NULL)
348 freedfa(v->ladfas[i]);
349 }
350 FREE(v->ladfas);
351 }
352 if (v->lblastcss != NULL)
353 FREE(v->lblastcss);
354 if (v->lblastcp != NULL)
355 FREE(v->lblastcp);
356
357#ifdef REG_DEBUG
358 if (v->eflags & (REG_FTRACE | REG_MTRACE))
359 fflush(stdout);
360#endif
361
362 return st;
363}
364
365/*
366 * getsubdfa - create or re-fetch the DFA for a tree subre node
367 *
368 * We only need to create the DFA once per overall regex execution.
369 * The DFA will be freed by the cleanup step in pg_regexec().
370 */
371static struct dfa *
372getsubdfa(struct vars *v,
373 struct subre *t)
374{
375 struct dfa *d = v->subdfas[t->id];
376
377 if (d == NULL)
378 {
379 d = newdfa(v, &t->cnfa, &v->g->cmap, DOMALLOC);
380 if (d == NULL)
381 return NULL;
382 /* set up additional info if this is a backref node */
383 if (t->op == 'b')
384 {
385 d->backno = t->backno;
386 d->backmin = t->min;
387 d->backmax = t->max;
388 }
389 v->subdfas[t->id] = d;
390 }
391 return d;
392}
393
394/*
395 * getladfa - create or re-fetch the DFA for a LACON subre node
396 *
397 * Same as above, but for LACONs.
398 */
399static struct dfa *
400getladfa(struct vars *v,
401 int n)
402{
403 assert(n > 0 && n < v->g->nlacons && v->g->lacons != NULL);
404
405 if (v->ladfas[n] == NULL)
406 {
407 struct subre *sub = &v->g->lacons[n];
408
409 v->ladfas[n] = newdfa(v, &sub->cnfa, &v->g->cmap, DOMALLOC);
410 /* a LACON can't contain a backref, so nothing else to do */
411 }
412 return v->ladfas[n];
413}
414
415/*
416 * find - find a match for the main NFA (no-complications case)
417 */
418static int
419find(struct vars *v,
420 struct cnfa *cnfa,
421 struct colormap *cm)
422{
423 struct dfa *s;
424 struct dfa *d;
425 chr *begin;
426 chr *end = NULL;
427 chr *cold;
428 chr *open; /* open and close of range of possible starts */
429 chr *close;
430 int hitend;
431 int shorter = (v->g->tree->flags & SHORTER) ? 1 : 0;
432
433 /* first, a shot with the search RE */
434 s = newdfa(v, &v->g->search, cm, &v->dfa1);
435 if (s == NULL)
436 return v->err;
437 MDEBUG(("\nsearch at %ld\n", LOFF(v->start)));
438 cold = NULL;
439 close = shortest(v, s, v->search_start, v->search_start, v->stop,
440 &cold, (int *) NULL);
441 freedfa(s);
442 NOERR();
443 if (v->g->cflags & REG_EXPECT)
444 {
445 assert(v->details != NULL);
446 if (cold != NULL)
447 v->details->rm_extend.rm_so = OFF(cold);
448 else
449 v->details->rm_extend.rm_so = OFF(v->stop);
450 v->details->rm_extend.rm_eo = OFF(v->stop); /* unknown */
451 }
452 if (close == NULL) /* not found */
453 return REG_NOMATCH;
454 if (v->nmatch == 0) /* found, don't need exact location */
455 return REG_OKAY;
456
457 /* find starting point and match */
458 assert(cold != NULL);
459 open = cold;
460 cold = NULL;
461 MDEBUG(("between %ld and %ld\n", LOFF(open), LOFF(close)));
462 d = newdfa(v, cnfa, cm, &v->dfa1);
463 if (d == NULL)
464 return v->err;
465 for (begin = open; begin <= close; begin++)
466 {
467 MDEBUG(("\nfind trying at %ld\n", LOFF(begin)));
468 if (shorter)
469 end = shortest(v, d, begin, begin, v->stop,
470 (chr **) NULL, &hitend);
471 else
472 end = longest(v, d, begin, v->stop, &hitend);
473 if (ISERR())
474 {
475 freedfa(d);
476 return v->err;
477 }
478 if (hitend && cold == NULL)
479 cold = begin;
480 if (end != NULL)
481 break; /* NOTE BREAK OUT */
482 }
483 assert(end != NULL); /* search RE succeeded so loop should */
484 freedfa(d);
485
486 /* and pin down details */
487 assert(v->nmatch > 0);
488 v->pmatch[0].rm_so = OFF(begin);
489 v->pmatch[0].rm_eo = OFF(end);
490 if (v->g->cflags & REG_EXPECT)
491 {
492 if (cold != NULL)
493 v->details->rm_extend.rm_so = OFF(cold);
494 else
495 v->details->rm_extend.rm_so = OFF(v->stop);
496 v->details->rm_extend.rm_eo = OFF(v->stop); /* unknown */
497 }
498 if (v->nmatch == 1) /* no need for submatches */
499 return REG_OKAY;
500
501 /* find submatches */
502 return cdissect(v, v->g->tree, begin, end);
503}
504
505/*
506 * cfind - find a match for the main NFA (with complications)
507 */
508static int
509cfind(struct vars *v,
510 struct cnfa *cnfa,
511 struct colormap *cm)
512{
513 struct dfa *s;
514 struct dfa *d;
515 chr *cold;
516 int ret;
517
518 s = newdfa(v, &v->g->search, cm, &v->dfa1);
519 if (s == NULL)
520 return v->err;
521 d = newdfa(v, cnfa, cm, &v->dfa2);
522 if (d == NULL)
523 {
524 freedfa(s);
525 return v->err;
526 }
527
528 ret = cfindloop(v, cnfa, cm, d, s, &cold);
529
530 freedfa(d);
531 freedfa(s);
532 NOERR();
533 if (v->g->cflags & REG_EXPECT)
534 {
535 assert(v->details != NULL);
536 if (cold != NULL)
537 v->details->rm_extend.rm_so = OFF(cold);
538 else
539 v->details->rm_extend.rm_so = OFF(v->stop);
540 v->details->rm_extend.rm_eo = OFF(v->stop); /* unknown */
541 }
542 return ret;
543}
544
545/*
546 * cfindloop - the heart of cfind
547 */
548static int
549cfindloop(struct vars *v,
550 struct cnfa *cnfa,
551 struct colormap *cm,
552 struct dfa *d,
553 struct dfa *s,
554 chr **coldp) /* where to put coldstart pointer */
555{
556 chr *begin;
557 chr *end;
558 chr *cold;
559 chr *open; /* open and close of range of possible starts */
560 chr *close;
561 chr *estart;
562 chr *estop;
563 int er;
564 int shorter = v->g->tree->flags & SHORTER;
565 int hitend;
566
567 assert(d != NULL && s != NULL);
568 cold = NULL;
569 close = v->search_start;
570 do
571 {
572 /* Search with the search RE for match range at/beyond "close" */
573 MDEBUG(("\ncsearch at %ld\n", LOFF(close)));
574 close = shortest(v, s, close, close, v->stop, &cold, (int *) NULL);
575 if (ISERR())
576 {
577 *coldp = cold;
578 return v->err;
579 }
580 if (close == NULL)
581 break; /* no more possible match anywhere */
582 assert(cold != NULL);
583 open = cold;
584 cold = NULL;
585 /* Search for matches starting between "open" and "close" inclusive */
586 MDEBUG(("cbetween %ld and %ld\n", LOFF(open), LOFF(close)));
587 for (begin = open; begin <= close; begin++)
588 {
589 MDEBUG(("\ncfind trying at %ld\n", LOFF(begin)));
590 estart = begin;
591 estop = v->stop;
592 for (;;)
593 {
594 /* Here we use the top node's detailed RE */
595 if (shorter)
596 end = shortest(v, d, begin, estart,
597 estop, (chr **) NULL, &hitend);
598 else
599 end = longest(v, d, begin, estop,
600 &hitend);
601 if (ISERR())
602 {
603 *coldp = cold;
604 return v->err;
605 }
606 if (hitend && cold == NULL)
607 cold = begin;
608 if (end == NULL)
609 break; /* no match with this begin point, try next */
610 MDEBUG(("tentative end %ld\n", LOFF(end)));
611 /* Dissect the potential match to see if it really matches */
612 er = cdissect(v, v->g->tree, begin, end);
613 if (er == REG_OKAY)
614 {
615 if (v->nmatch > 0)
616 {
617 v->pmatch[0].rm_so = OFF(begin);
618 v->pmatch[0].rm_eo = OFF(end);
619 }
620 *coldp = cold;
621 return REG_OKAY;
622 }
623 if (er != REG_NOMATCH)
624 {
625 ERR(er);
626 *coldp = cold;
627 return er;
628 }
629 /* Try next longer/shorter match with same begin point */
630 if (shorter)
631 {
632 if (end == estop)
633 break; /* no more, so try next begin point */
634 estart = end + 1;
635 }
636 else
637 {
638 if (end == begin)
639 break; /* no more, so try next begin point */
640 estop = end - 1;
641 }
642 } /* end loop over endpoint positions */
643 } /* end loop over beginning positions */
644
645 /*
646 * If we get here, there is no possible match starting at or before
647 * "close", so consider matches beyond that. We'll do a fresh search
648 * with the search RE to find a new promising match range.
649 */
650 close++;
651 } while (close < v->stop);
652
653 *coldp = cold;
654 return REG_NOMATCH;
655}
656
657/*
658 * zapallsubs - initialize all subexpression matches to "no match"
659 *
660 * Note that p[0], the overall-match location, is not touched.
661 */
662static void
664 size_t n)
665{
666 size_t i;
667
668 for (i = n - 1; i > 0; i--)
669 {
670 p[i].rm_so = -1;
671 p[i].rm_eo = -1;
672 }
673}
674
675/*
676 * zaptreesubs - initialize subexpressions within subtree to "no match"
677 */
678static void
680 struct subre *t)
681{
682 int n = t->capno;
683 struct subre *t2;
684
685 if (n > 0)
686 {
687 if ((size_t) n < v->nmatch)
688 {
689 v->pmatch[n].rm_so = -1;
690 v->pmatch[n].rm_eo = -1;
691 }
692 }
693
694 for (t2 = t->child; t2 != NULL; t2 = t2->sibling)
695 zaptreesubs(v, t2);
696}
697
698/*
699 * subset - set subexpression match data for a successful subre
700 */
701static void
702subset(struct vars *v,
703 struct subre *sub,
704 chr *begin,
705 chr *end)
706{
707 int n = sub->capno;
708
709 assert(n > 0);
710 if ((size_t) n >= v->nmatch)
711 return;
712
713 MDEBUG(("%d: setting %d = %ld-%ld\n", sub->id, n, LOFF(begin), LOFF(end)));
714 v->pmatch[n].rm_so = OFF(begin);
715 v->pmatch[n].rm_eo = OFF(end);
716}
717
718/*
719 * cdissect - check backrefs and determine subexpression matches
720 *
721 * cdissect recursively processes a subre tree to check matching of backrefs
722 * and/or identify submatch boundaries for capture nodes. The proposed match
723 * runs from "begin" to "end" (not including "end"), and we are basically
724 * "dissecting" it to see where the submatches are.
725 *
726 * Before calling any level of cdissect, the caller must have run the node's
727 * DFA and found that the proposed substring satisfies the DFA. (We make
728 * the caller do that because in concatenation and iteration nodes, it's
729 * much faster to check all the substrings against the child DFAs before we
730 * recurse.)
731 *
732 * A side-effect of a successful match is to save match locations for
733 * capturing subexpressions in v->pmatch[]. This is a little bit tricky,
734 * so we make the following rules:
735 * 1. Before initial entry to cdissect, all match data must have been
736 * cleared (this is seen to by zapallsubs).
737 * 2. Before any recursive entry to cdissect, the match data for that
738 * subexpression tree must be guaranteed clear (see zaptreesubs).
739 * 3. When returning REG_OKAY, each level of cdissect will have saved
740 * any relevant match locations.
741 * 4. When returning REG_NOMATCH, each level of cdissect will guarantee
742 * that its subexpression match locations are again clear.
743 * 5. No guarantees are made for error cases (i.e., other result codes).
744 * 6. When a level of cdissect abandons a successful sub-match, it will
745 * clear that subtree's match locations with zaptreesubs before trying
746 * any new DFA match or cdissect call for that subtree or any subtree
747 * to its right (that is, any subtree that could have a backref into the
748 * abandoned match).
749 * This may seem overly complicated, but it's difficult to simplify it
750 * because of the provision that match locations must be reset before
751 * any fresh DFA match (a rule that is needed to make dfa_backref safe).
752 * That means it won't work to just reset relevant match locations at the
753 * start of each cdissect level.
754 */
755static int /* regexec return code */
756cdissect(struct vars *v,
757 struct subre *t,
758 chr *begin, /* beginning of relevant substring */
759 chr *end) /* end of same */
760{
761 int er;
762
763 assert(t != NULL);
764 MDEBUG(("%d: cdissect %c %ld-%ld\n", t->id, t->op, LOFF(begin), LOFF(end)));
765
766 /* handy place to check for operation cancel */
767 INTERRUPT(v->re);
768 /* ... and stack overrun */
769 if (STACK_TOO_DEEP(v->re))
770 return REG_ETOOBIG;
771
772 switch (t->op)
773 {
774 case '=': /* terminal node */
775 assert(t->child == NULL);
776 er = REG_OKAY; /* no action, parent did the work */
777 break;
778 case 'b': /* back reference */
779 assert(t->child == NULL);
780 er = cbrdissect(v, t, begin, end);
781 break;
782 case '.': /* concatenation */
783 assert(t->child != NULL);
784 if (t->child->flags & SHORTER) /* reverse scan */
785 er = crevcondissect(v, t, begin, end);
786 else
787 er = ccondissect(v, t, begin, end);
788 break;
789 case '|': /* alternation */
790 assert(t->child != NULL);
791 er = caltdissect(v, t, begin, end);
792 break;
793 case '*': /* iteration */
794 assert(t->child != NULL);
795 if (t->child->flags & SHORTER) /* reverse scan */
796 er = creviterdissect(v, t, begin, end);
797 else
798 er = citerdissect(v, t, begin, end);
799 break;
800 case '(': /* no-op capture node */
801 assert(t->child != NULL);
802 er = cdissect(v, t->child, begin, end);
803 break;
804 default:
805 er = REG_ASSERT;
806 break;
807 }
808
809 /*
810 * We should never have a match failure unless backrefs lurk below;
811 * otherwise, either caller failed to check the DFA, or there's some
812 * inconsistency between the DFA and the node's innards.
813 */
814 assert(er != REG_NOMATCH || (t->flags & BACKR));
815
816 /*
817 * If this node is marked as capturing, save successful match's location.
818 */
819 if (t->capno > 0 && er == REG_OKAY)
820 subset(v, t, begin, end);
821
822 return er;
823}
824
825/*
826 * ccondissect - dissect match for concatenation node
827 */
828static int /* regexec return code */
830 struct subre *t,
831 chr *begin, /* beginning of relevant substring */
832 chr *end) /* end of same */
833{
834 struct subre *left = t->child;
835 struct subre *right = left->sibling;
836 struct dfa *d;
837 struct dfa *d2;
838 chr *mid;
839 int er;
840
841 assert(t->op == '.');
842 assert(left != NULL && left->cnfa.nstates > 0);
843 assert(right != NULL && right->cnfa.nstates > 0);
844 assert(right->sibling == NULL);
845 assert(!(left->flags & SHORTER));
846
847 d = getsubdfa(v, left);
848 NOERR();
849 d2 = getsubdfa(v, right);
850 NOERR();
851 MDEBUG(("%d: ccondissect %ld-%ld\n", t->id, LOFF(begin), LOFF(end)));
852
853 /* pick a tentative midpoint */
854 mid = longest(v, d, begin, end, (int *) NULL);
855 NOERR();
856 if (mid == NULL)
857 return REG_NOMATCH;
858 MDEBUG(("%d: tentative midpoint %ld\n", t->id, LOFF(mid)));
859
860 /* iterate until satisfaction or failure */
861 for (;;)
862 {
863 /* try this midpoint on for size */
864 if (longest(v, d2, mid, end, (int *) NULL) == end)
865 {
866 er = cdissect(v, left, begin, mid);
867 if (er == REG_OKAY)
868 {
869 er = cdissect(v, right, mid, end);
870 if (er == REG_OKAY)
871 {
872 /* satisfaction */
873 MDEBUG(("%d: successful\n", t->id));
874 return REG_OKAY;
875 }
876 /* Reset left's matches (right should have done so itself) */
877 zaptreesubs(v, left);
878 }
879 if (er != REG_NOMATCH)
880 return er;
881 }
882 NOERR();
883
884 /* that midpoint didn't work, find a new one */
885 if (mid == begin)
886 {
887 /* all possibilities exhausted */
888 MDEBUG(("%d: no midpoint\n", t->id));
889 return REG_NOMATCH;
890 }
891 mid = longest(v, d, begin, mid - 1, (int *) NULL);
892 NOERR();
893 if (mid == NULL)
894 {
895 /* failed to find a new one */
896 MDEBUG(("%d: failed midpoint\n", t->id));
897 return REG_NOMATCH;
898 }
899 MDEBUG(("%d: new midpoint %ld\n", t->id, LOFF(mid)));
900 }
901
902 /* can't get here */
903 return REG_ASSERT;
904}
905
906/*
907 * crevcondissect - dissect match for concatenation node, shortest-first
908 */
909static int /* regexec return code */
911 struct subre *t,
912 chr *begin, /* beginning of relevant substring */
913 chr *end) /* end of same */
914{
915 struct subre *left = t->child;
916 struct subre *right = left->sibling;
917 struct dfa *d;
918 struct dfa *d2;
919 chr *mid;
920 int er;
921
922 assert(t->op == '.');
923 assert(left != NULL && left->cnfa.nstates > 0);
924 assert(right != NULL && right->cnfa.nstates > 0);
925 assert(right->sibling == NULL);
926 assert(left->flags & SHORTER);
927
928 d = getsubdfa(v, left);
929 NOERR();
930 d2 = getsubdfa(v, right);
931 NOERR();
932 MDEBUG(("%d: crevcondissect %ld-%ld\n", t->id, LOFF(begin), LOFF(end)));
933
934 /* pick a tentative midpoint */
935 mid = shortest(v, d, begin, begin, end, (chr **) NULL, (int *) NULL);
936 NOERR();
937 if (mid == NULL)
938 return REG_NOMATCH;
939 MDEBUG(("%d: tentative midpoint %ld\n", t->id, LOFF(mid)));
940
941 /* iterate until satisfaction or failure */
942 for (;;)
943 {
944 /* try this midpoint on for size */
945 if (longest(v, d2, mid, end, (int *) NULL) == end)
946 {
947 er = cdissect(v, left, begin, mid);
948 if (er == REG_OKAY)
949 {
950 er = cdissect(v, right, mid, end);
951 if (er == REG_OKAY)
952 {
953 /* satisfaction */
954 MDEBUG(("%d: successful\n", t->id));
955 return REG_OKAY;
956 }
957 /* Reset left's matches (right should have done so itself) */
958 zaptreesubs(v, left);
959 }
960 if (er != REG_NOMATCH)
961 return er;
962 }
963 NOERR();
964
965 /* that midpoint didn't work, find a new one */
966 if (mid == end)
967 {
968 /* all possibilities exhausted */
969 MDEBUG(("%d: no midpoint\n", t->id));
970 return REG_NOMATCH;
971 }
972 mid = shortest(v, d, begin, mid + 1, end, (chr **) NULL, (int *) NULL);
973 NOERR();
974 if (mid == NULL)
975 {
976 /* failed to find a new one */
977 MDEBUG(("%d: failed midpoint\n", t->id));
978 return REG_NOMATCH;
979 }
980 MDEBUG(("%d: new midpoint %ld\n", t->id, LOFF(mid)));
981 }
982
983 /* can't get here */
984 return REG_ASSERT;
985}
986
987/*
988 * cbrdissect - dissect match for backref node
989 *
990 * The backref match might already have been verified by dfa_backref(),
991 * but we don't know that for sure so must check it here.
992 */
993static int /* regexec return code */
994cbrdissect(struct vars *v,
995 struct subre *t,
996 chr *begin, /* beginning of relevant substring */
997 chr *end) /* end of same */
998{
999 int n = t->backno;
1000 size_t numreps;
1001 size_t tlen;
1002 size_t brlen;
1003 chr *brstring;
1004 chr *p;
1005 int min = t->min;
1006 int max = t->max;
1007
1008 assert(t != NULL);
1009 assert(t->op == 'b');
1010 assert(n >= 0);
1011 assert((size_t) n < v->nmatch);
1012
1013 MDEBUG(("%d: cbrdissect %d{%d-%d} %ld-%ld\n", t->id, n, min, max,
1014 LOFF(begin), LOFF(end)));
1015
1016 /* get the backreferenced string */
1017 if (v->pmatch[n].rm_so == -1)
1018 return REG_NOMATCH;
1019 brstring = v->start + v->pmatch[n].rm_so;
1020 brlen = v->pmatch[n].rm_eo - v->pmatch[n].rm_so;
1021
1022 /* special cases for zero-length strings */
1023 if (brlen == 0)
1024 {
1025 /*
1026 * matches only if target is zero length, but any number of
1027 * repetitions can be considered to be present
1028 */
1029 if (begin == end && min <= max)
1030 {
1031 MDEBUG(("%d: backref matched trivially\n", t->id));
1032 return REG_OKAY;
1033 }
1034 return REG_NOMATCH;
1035 }
1036 if (begin == end)
1037 {
1038 /* matches only if zero repetitions are okay */
1039 if (min == 0)
1040 {
1041 MDEBUG(("%d: backref matched trivially\n", t->id));
1042 return REG_OKAY;
1043 }
1044 return REG_NOMATCH;
1045 }
1046
1047 /*
1048 * check target length to see if it could possibly be an allowed number of
1049 * repetitions of brstring
1050 */
1051 assert(end > begin);
1052 tlen = end - begin;
1053 if (tlen % brlen != 0)
1054 return REG_NOMATCH;
1055 numreps = tlen / brlen;
1056 if (numreps < min || (numreps > max && max != DUPINF))
1057 return REG_NOMATCH;
1058
1059 /* okay, compare the actual string contents */
1060 p = begin;
1061 while (numreps-- > 0)
1062 {
1063 if ((*v->g->compare) (brstring, p, brlen) != 0)
1064 return REG_NOMATCH;
1065 p += brlen;
1066 }
1067
1068 MDEBUG(("%d: backref matched\n", t->id));
1069 return REG_OKAY;
1070}
1071
1072/*
1073 * caltdissect - dissect match for alternation node
1074 */
1075static int /* regexec return code */
1077 struct subre *t,
1078 chr *begin, /* beginning of relevant substring */
1079 chr *end) /* end of same */
1080{
1081 struct dfa *d;
1082 int er;
1083
1084 assert(t->op == '|');
1085
1086 t = t->child;
1087 /* there should be at least 2 alternatives */
1088 assert(t != NULL && t->sibling != NULL);
1089
1090 while (t != NULL)
1091 {
1092 assert(t->cnfa.nstates > 0);
1093
1094 MDEBUG(("%d: caltdissect %ld-%ld\n", t->id, LOFF(begin), LOFF(end)));
1095
1096 d = getsubdfa(v, t);
1097 NOERR();
1098 if (longest(v, d, begin, end, (int *) NULL) == end)
1099 {
1100 MDEBUG(("%d: caltdissect matched\n", t->id));
1101 er = cdissect(v, t, begin, end);
1102 if (er != REG_NOMATCH)
1103 return er;
1104 }
1105 NOERR();
1106
1107 t = t->sibling;
1108 }
1109
1110 return REG_NOMATCH;
1111}
1112
1113/*
1114 * citerdissect - dissect match for iteration node
1115 */
1116static int /* regexec return code */
1118 struct subre *t,
1119 chr *begin, /* beginning of relevant substring */
1120 chr *end) /* end of same */
1121{
1122 struct dfa *d;
1123 chr **endpts;
1124 chr *limit;
1125 int min_matches;
1126 size_t max_matches;
1127 int nverified;
1128 int k;
1129 int i;
1130 int er;
1131
1132 assert(t->op == '*');
1133 assert(t->child != NULL && t->child->cnfa.nstates > 0);
1134 assert(!(t->child->flags & SHORTER));
1135 assert(begin <= end);
1136
1137 MDEBUG(("%d: citerdissect %ld-%ld\n", t->id, LOFF(begin), LOFF(end)));
1138
1139 /*
1140 * For the moment, assume the minimum number of matches is 1. If zero
1141 * matches are allowed, and the target string is empty, we are allowed to
1142 * match regardless of the contents of the iter node --- but we would
1143 * prefer to match once, so that capturing parens get set. (An example of
1144 * the concern here is a pattern like "()*\1", which historically this
1145 * code has allowed to succeed.) Therefore, we deal with the zero-matches
1146 * case at the bottom, after failing to find any other way to match.
1147 */
1148 min_matches = t->min;
1149 if (min_matches <= 0)
1150 min_matches = 1;
1151
1152 /*
1153 * We need workspace to track the endpoints of each sub-match. Normally
1154 * we consider only nonzero-length sub-matches, so there can be at most
1155 * end-begin of them. However, if min is larger than that, we will also
1156 * consider zero-length sub-matches in order to find enough matches.
1157 *
1158 * For convenience, endpts[0] contains the "begin" pointer and we store
1159 * sub-match endpoints in endpts[1..max_matches].
1160 */
1161 max_matches = end - begin;
1162 if (max_matches > t->max && t->max != DUPINF)
1163 max_matches = t->max;
1164 if (max_matches < min_matches)
1165 max_matches = min_matches;
1166 endpts = (chr **) MALLOC((max_matches + 1) * sizeof(chr *));
1167 if (endpts == NULL)
1168 return REG_ESPACE;
1169 endpts[0] = begin;
1170
1171 d = getsubdfa(v, t->child);
1172 if (ISERR())
1173 {
1174 FREE(endpts);
1175 return v->err;
1176 }
1177
1178 /*
1179 * Our strategy is to first find a set of sub-match endpoints that are
1180 * valid according to the child node's DFA, and then recursively dissect
1181 * each sub-match to confirm validity. If any validity check fails,
1182 * backtrack that sub-match and try again. And, when we next try for a
1183 * validity check, we need not recheck any successfully verified
1184 * sub-matches that we didn't move the endpoints of. nverified remembers
1185 * how many sub-matches are currently known okay.
1186 */
1187
1188 /* initialize to consider first sub-match */
1189 nverified = 0;
1190 k = 1;
1191 limit = end;
1192
1193 /* iterate until satisfaction or failure */
1194 while (k > 0)
1195 {
1196 /* try to find an endpoint for the k'th sub-match */
1197 endpts[k] = longest(v, d, endpts[k - 1], limit, (int *) NULL);
1198 if (ISERR())
1199 {
1200 FREE(endpts);
1201 return v->err;
1202 }
1203 if (endpts[k] == NULL)
1204 {
1205 /* no match possible, so see if we can shorten previous one */
1206 k--;
1207 goto backtrack;
1208 }
1209 MDEBUG(("%d: working endpoint %d: %ld\n",
1210 t->id, k, LOFF(endpts[k])));
1211
1212 /* k'th sub-match can no longer be considered verified */
1213 if (nverified >= k)
1214 nverified = k - 1;
1215
1216 if (endpts[k] != end)
1217 {
1218 /* haven't reached end yet, try another iteration if allowed */
1219 if (k >= max_matches)
1220 {
1221 /* must try to shorten some previous match */
1222 k--;
1223 goto backtrack;
1224 }
1225
1226 /* reject zero-length match unless necessary to achieve min */
1227 if (endpts[k] == endpts[k - 1] &&
1228 (k >= min_matches || min_matches - k < end - endpts[k]))
1229 goto backtrack;
1230
1231 k++;
1232 limit = end;
1233 continue;
1234 }
1235
1236 /*
1237 * We've identified a way to divide the string into k sub-matches that
1238 * works so far as the child DFA can tell. If k is an allowed number
1239 * of matches, start the slow part: recurse to verify each sub-match.
1240 * We always have k <= max_matches, needn't check that.
1241 */
1242 if (k < min_matches)
1243 goto backtrack;
1244
1245 MDEBUG(("%d: verifying %d..%d\n", t->id, nverified + 1, k));
1246
1247 for (i = nverified + 1; i <= k; i++)
1248 {
1249 /* zap any match data from a non-last iteration */
1250 zaptreesubs(v, t->child);
1251 er = cdissect(v, t->child, endpts[i - 1], endpts[i]);
1252 if (er == REG_OKAY)
1253 {
1254 nverified = i;
1255 continue;
1256 }
1257 if (er == REG_NOMATCH)
1258 break;
1259 /* oops, something failed */
1260 FREE(endpts);
1261 return er;
1262 }
1263
1264 if (i > k)
1265 {
1266 /* satisfaction */
1267 MDEBUG(("%d: successful\n", t->id));
1268 FREE(endpts);
1269 return REG_OKAY;
1270 }
1271
1272 /* i'th match failed to verify, so backtrack it */
1273 k = i;
1274
1275backtrack:
1276
1277 /*
1278 * Must consider shorter versions of the k'th sub-match. However,
1279 * we'll only ask for a zero-length match if necessary.
1280 */
1281 while (k > 0)
1282 {
1283 chr *prev_end = endpts[k - 1];
1284
1285 if (endpts[k] > prev_end)
1286 {
1287 limit = endpts[k] - 1;
1288 if (limit > prev_end ||
1289 (k < min_matches && min_matches - k >= end - prev_end))
1290 {
1291 /* break out of backtrack loop, continue the outer one */
1292 break;
1293 }
1294 }
1295 /* can't shorten k'th sub-match any more, consider previous one */
1296 k--;
1297 }
1298 }
1299
1300 /* all possibilities exhausted */
1301 FREE(endpts);
1302
1303 /*
1304 * Now consider the possibility that we can match to a zero-length string
1305 * by using zero repetitions.
1306 */
1307 if (t->min == 0 && begin == end)
1308 {
1309 MDEBUG(("%d: allowing zero matches\n", t->id));
1310 return REG_OKAY;
1311 }
1312
1313 MDEBUG(("%d: failed\n", t->id));
1314 return REG_NOMATCH;
1315}
1316
1317/*
1318 * creviterdissect - dissect match for iteration node, shortest-first
1319 */
1320static int /* regexec return code */
1322 struct subre *t,
1323 chr *begin, /* beginning of relevant substring */
1324 chr *end) /* end of same */
1325{
1326 struct dfa *d;
1327 chr **endpts;
1328 chr *limit;
1329 int min_matches;
1330 size_t max_matches;
1331 int nverified;
1332 int k;
1333 int i;
1334 int er;
1335
1336 assert(t->op == '*');
1337 assert(t->child != NULL && t->child->cnfa.nstates > 0);
1338 assert(t->child->flags & SHORTER);
1339 assert(begin <= end);
1340
1341 MDEBUG(("%d: creviterdissect %ld-%ld\n", t->id, LOFF(begin), LOFF(end)));
1342
1343 /*
1344 * If zero matches are allowed, and target string is empty, just declare
1345 * victory. OTOH, if target string isn't empty, zero matches can't work
1346 * so we pretend the min is 1.
1347 */
1348 min_matches = t->min;
1349 if (min_matches <= 0)
1350 {
1351 if (begin == end)
1352 {
1353 MDEBUG(("%d: allowing zero matches\n", t->id));
1354 return REG_OKAY;
1355 }
1356 min_matches = 1;
1357 }
1358
1359 /*
1360 * We need workspace to track the endpoints of each sub-match. Normally
1361 * we consider only nonzero-length sub-matches, so there can be at most
1362 * end-begin of them. However, if min is larger than that, we will also
1363 * consider zero-length sub-matches in order to find enough matches.
1364 *
1365 * For convenience, endpts[0] contains the "begin" pointer and we store
1366 * sub-match endpoints in endpts[1..max_matches].
1367 */
1368 max_matches = end - begin;
1369 if (max_matches > t->max && t->max != DUPINF)
1370 max_matches = t->max;
1371 if (max_matches < min_matches)
1372 max_matches = min_matches;
1373 endpts = (chr **) MALLOC((max_matches + 1) * sizeof(chr *));
1374 if (endpts == NULL)
1375 return REG_ESPACE;
1376 endpts[0] = begin;
1377
1378 d = getsubdfa(v, t->child);
1379 if (ISERR())
1380 {
1381 FREE(endpts);
1382 return v->err;
1383 }
1384
1385 /*
1386 * Our strategy is to first find a set of sub-match endpoints that are
1387 * valid according to the child node's DFA, and then recursively dissect
1388 * each sub-match to confirm validity. If any validity check fails,
1389 * backtrack that sub-match and try again. And, when we next try for a
1390 * validity check, we need not recheck any successfully verified
1391 * sub-matches that we didn't move the endpoints of. nverified remembers
1392 * how many sub-matches are currently known okay.
1393 */
1394
1395 /* initialize to consider first sub-match */
1396 nverified = 0;
1397 k = 1;
1398 limit = begin;
1399
1400 /* iterate until satisfaction or failure */
1401 while (k > 0)
1402 {
1403 /* disallow zero-length match unless necessary to achieve min */
1404 if (limit == endpts[k - 1] &&
1405 limit != end &&
1406 (k >= min_matches || min_matches - k < end - limit))
1407 limit++;
1408
1409 /* if this is the last allowed sub-match, it must reach to the end */
1410 if (k >= max_matches)
1411 limit = end;
1412
1413 /* try to find an endpoint for the k'th sub-match */
1414 endpts[k] = shortest(v, d, endpts[k - 1], limit, end,
1415 (chr **) NULL, (int *) NULL);
1416 if (ISERR())
1417 {
1418 FREE(endpts);
1419 return v->err;
1420 }
1421 if (endpts[k] == NULL)
1422 {
1423 /* no match possible, so see if we can lengthen previous one */
1424 k--;
1425 goto backtrack;
1426 }
1427 MDEBUG(("%d: working endpoint %d: %ld\n",
1428 t->id, k, LOFF(endpts[k])));
1429
1430 /* k'th sub-match can no longer be considered verified */
1431 if (nverified >= k)
1432 nverified = k - 1;
1433
1434 if (endpts[k] != end)
1435 {
1436 /* haven't reached end yet, try another iteration if allowed */
1437 if (k >= max_matches)
1438 {
1439 /* must try to lengthen some previous match */
1440 k--;
1441 goto backtrack;
1442 }
1443
1444 k++;
1445 limit = endpts[k - 1];
1446 continue;
1447 }
1448
1449 /*
1450 * We've identified a way to divide the string into k sub-matches that
1451 * works so far as the child DFA can tell. If k is an allowed number
1452 * of matches, start the slow part: recurse to verify each sub-match.
1453 * We always have k <= max_matches, needn't check that.
1454 */
1455 if (k < min_matches)
1456 goto backtrack;
1457
1458 MDEBUG(("%d: verifying %d..%d\n", t->id, nverified + 1, k));
1459
1460 for (i = nverified + 1; i <= k; i++)
1461 {
1462 /* zap any match data from a non-last iteration */
1463 zaptreesubs(v, t->child);
1464 er = cdissect(v, t->child, endpts[i - 1], endpts[i]);
1465 if (er == REG_OKAY)
1466 {
1467 nverified = i;
1468 continue;
1469 }
1470 if (er == REG_NOMATCH)
1471 break;
1472 /* oops, something failed */
1473 FREE(endpts);
1474 return er;
1475 }
1476
1477 if (i > k)
1478 {
1479 /* satisfaction */
1480 MDEBUG(("%d: successful\n", t->id));
1481 FREE(endpts);
1482 return REG_OKAY;
1483 }
1484
1485 /* i'th match failed to verify, so backtrack it */
1486 k = i;
1487
1488backtrack:
1489
1490 /*
1491 * Must consider longer versions of the k'th sub-match.
1492 */
1493 while (k > 0)
1494 {
1495 if (endpts[k] < end)
1496 {
1497 limit = endpts[k] + 1;
1498 /* break out of backtrack loop, continue the outer one */
1499 break;
1500 }
1501 /* can't lengthen k'th sub-match any more, consider previous one */
1502 k--;
1503 }
1504 }
1505
1506 /* all possibilities exhausted */
1507 MDEBUG(("%d: failed\n", t->id));
1508 FREE(endpts);
1509 return REG_NOMATCH;
1510}
1511
1512
1513
1514#include "rege_dfa.c"
static void cleanup(void)
Definition: bootstrap.c:713
return str start
for(;;)
#define close(a)
Definition: win32.h:12
int i
Definition: isn.c:74
if(TABLE==NULL||TABLE_index==NULL)
Definition: isn.c:78
const void size_t len
void pg_set_regex_collation(Oid collation)
#define INTERRUPT(re)
Definition: regcustom.h:55
#define MALLOC(n)
Definition: regcustom.h:52
pg_wchar chr
Definition: regcustom.h:59
#define assert(x)
Definition: regcustom.h:56
#define REG_NOMATCH
Definition: regex.h:216
#define REG_ASSERT
Definition: regex.h:229
#define REG_UIMPOSSIBLE
Definition: regex.h:150
#define REG_MTRACE
Definition: regex.h:206
#define REG_FTRACE
Definition: regex.h:205
#define REG_EXPECT
Definition: regex.h:191
#define REG_INVARG
Definition: regex.h:230
#define REG_ETOOBIG
Definition: regex.h:233
#define regmatch_t
Definition: regex.h:246
#define REG_OKAY
Definition: regex.h:215
#define REG_MIXED
Definition: regex.h:231
#define REG_NOSUB
Definition: regex.h:185
#define regex_t
Definition: regex.h:245
#define REG_ESPACE
Definition: regex.h:227
#define REG_UBACKREF
Definition: regex.h:138
static int find(struct vars *v, struct cnfa *cnfa, struct colormap *cm)
Definition: regexec.c:419
static int citerdissect(struct vars *v, struct subre *t, chr *begin, chr *end)
Definition: regexec.c:1117
static void freedfa(struct dfa *d)
#define DOMALLOC
Definition: regexec.c:101
static chr * shortest(struct vars *v, struct dfa *d, chr *start, chr *min, chr *max, chr **coldp, int *hitstopp)
#define LOFF(p)
Definition: regexec.c:132
#define NOERR()
Definition: regexec.c:130
static void zapallsubs(regmatch_t *p, size_t n)
Definition: regexec.c:663
static chr * dfa_backref(struct vars *v, struct dfa *d, chr *start, chr *min, chr *max, bool shortest)
static int lacon(struct vars *v, struct cnfa *pcnfa, chr *cp, color co)
#define ISERR()
Definition: regexec.c:127
static int ccondissect(struct vars *v, struct subre *t, chr *begin, chr *end)
Definition: regexec.c:829
static int cfind(struct vars *v, struct cnfa *cnfa, struct colormap *cm)
Definition: regexec.c:509
#define ERR(e)
Definition: regexec.c:129
static int creviterdissect(struct vars *v, struct subre *t, chr *begin, chr *end)
Definition: regexec.c:1321
static int cfindloop(struct vars *v, struct cnfa *cnfa, struct colormap *cm, struct dfa *d, struct dfa *s, chr **coldp)
Definition: regexec.c:549
static unsigned hash(unsigned *uv, int n)
static int cbrdissect(struct vars *v, struct subre *t, chr *begin, chr *end)
Definition: regexec.c:994
static struct dfa * getladfa(struct vars *v, int n)
Definition: regexec.c:400
static chr * lastcold(struct vars *v, struct dfa *d)
static struct dfa * getsubdfa(struct vars *v, struct subre *t)
Definition: regexec.c:372
static chr * longest(struct vars *v, struct dfa *d, chr *start, chr *stop, int *hitstopp)
static struct sset * getvacant(struct vars *v, struct dfa *d, chr *cp, chr *start)
static int crevcondissect(struct vars *v, struct subre *t, chr *begin, chr *end)
Definition: regexec.c:910
#define WORK
Definition: regexec.c:87
#define LOCALMAT
static struct sset * miss(struct vars *v, struct dfa *d, struct sset *css, color co, chr *cp, chr *start)
static struct dfa * newdfa(struct vars *v, struct cnfa *cnfa, struct colormap *cm, struct smalldfa *sml)
#define OFF(p)
Definition: regexec.c:131
static struct sset * initialize(struct vars *v, struct dfa *d, chr *start)
static int cdissect(struct vars *v, struct subre *t, chr *begin, chr *end)
Definition: regexec.c:756
static void zaptreesubs(struct vars *v, struct subre *t)
Definition: regexec.c:679
static struct sset * pickss(struct vars *v, struct dfa *d, chr *cp, chr *start)
static int caltdissect(struct vars *v, struct subre *t, chr *begin, chr *end)
Definition: regexec.c:1076
#define FEWCOLORS
Definition: regexec.c:91
static void subset(struct vars *v, struct subre *sub, chr *begin, chr *end)
Definition: regexec.c:702
int pg_regexec(regex_t *re, const chr *string, size_t len, size_t search_start, rm_detail_t *details, size_t nmatch, regmatch_t pmatch[], int flags)
Definition: regexec.c:185
#define FEWSTATES
Definition: regexec.c:90
static int matchuntil(struct vars *v, struct dfa *d, chr *probe, struct sset **lastcss, chr **lastcp)
#define LOCALDFAS
#define BACKR
Definition: regguts.h:486
#define STACK_TOO_DEEP(re)
Definition: regguts.h:523
#define REMAGIC
Definition: regguts.h:101
short color
Definition: regguts.h:155
#define DUPINF
Definition: regguts.h:99
#define MDEBUG(arglist)
Definition: regguts.h:122
#define VS(x)
Definition: regguts.h:61
#define SHORTER
Definition: regguts.h:483
Definition: regexec.c:40
struct sset * ss
Definition: regexec.c:41
color co
Definition: regexec.c:42
Definition: regguts.h:407
int nstates
Definition: regguts.h:408
Definition: regexec.c:64
int nstates
Definition: regexec.c:67
bool arraysmalloced
Definition: regexec.c:84
chr * lastpost
Definition: regexec.c:77
chr * lastnopr
Definition: regexec.c:78
int wordsper
Definition: regexec.c:69
unsigned * work
Definition: regexec.c:72
unsigned * statesarea
Definition: regexec.c:71
struct arcp * incarea
Definition: regexec.c:74
struct sset * search
Definition: regexec.c:79
int ncolors
Definition: regexec.c:68
short backmin
Definition: regexec.c:81
struct sset * ssets
Definition: regexec.c:70
struct cnfa * cnfa
Definition: regexec.c:75
int backno
Definition: regexec.c:80
bool ismalloced
Definition: regexec.c:83
struct colormap * cm
Definition: regexec.c:76
short backmax
Definition: regexec.c:82
struct sset ** outsarea
Definition: regexec.c:73
int nssused
Definition: regexec.c:66
int nssets
Definition: regexec.c:65
Definition: regguts.h:531
struct subre * tree
Definition: regguts.h:537
struct subre * lacons
Definition: regguts.h:542
long info
Definition: regguts.h:535
struct cnfa search
Definition: regguts.h:538
int ntree
Definition: regguts.h:539
int cflags
Definition: regguts.h:534
size_t nsub
Definition: regguts.h:536
int nlacons
Definition: regguts.h:543
struct colormap cmap
Definition: regguts.h:540
pg_regoff_t rm_eo
Definition: regex.h:164
pg_regoff_t rm_so
Definition: regex.h:163
pg_regmatch_t rm_extend
Definition: regex.h:170
unsigned statesarea[FEWSTATES *2+WORK]
Definition: regexec.c:96
struct arcp incarea[FEWSTATES *2 *FEWCOLORS]
Definition: regexec.c:98
struct sset * outsarea[FEWSTATES *2 *FEWCOLORS]
Definition: regexec.c:97
struct sset ssets[FEWSTATES *2]
Definition: regexec.c:95
Definition: regexec.c:46
struct arcp ins
Definition: regexec.c:57
unsigned hash
Definition: regexec.c:48
struct arcp * inchain
Definition: regexec.c:60
chr * lastseen
Definition: regexec.c:58
unsigned * states
Definition: regexec.c:47
struct sset ** outs
Definition: regexec.c:59
int flags
Definition: regexec.c:52
Definition: regguts.h:479
int backno
Definition: regguts.h:500
char op
Definition: regguts.h:480
struct state * end
Definition: regguts.h:506
short min
Definition: regguts.h:501
short max
Definition: regguts.h:502
struct subre * sibling
Definition: regguts.h:504
char flags
Definition: regguts.h:481
int id
Definition: regguts.h:498
struct cnfa cnfa
Definition: regguts.h:507
struct subre * child
Definition: regguts.h:503
int capno
Definition: regguts.h:499
struct state * begin
Definition: regguts.h:505
Definition: regcomp.c:282
size_t nmatch
Definition: regexec.c:111
struct dfa ** ladfas
Definition: regexec.c:119
const chr * stop
Definition: regcomp.c:285
struct dfa ** subdfas
Definition: regexec.c:118
chr * search_start
Definition: regexec.c:115
struct sset ** lblastcss
Definition: regexec.c:120
int err
Definition: regcomp.c:286
struct guts * g
Definition: regexec.c:109
struct smalldfa dfa1
Definition: regexec.c:122
chr ** lblastcp
Definition: regexec.c:121
regex_t * re
Definition: regcomp.c:283
int eflags
Definition: regexec.c:110
chr * start
Definition: regexec.c:114
rm_detail_t * details
Definition: regexec.c:113
regmatch_t * pmatch
Definition: regexec.c:112
chr * stop
Definition: regexec.c:116
struct smalldfa dfa2
Definition: regexec.c:123
@ FREE
Definition: task.c:94