PostgreSQL Source Code git master
Loading...
Searching...
No Matches
regexec.c File Reference
#include "regex/regguts.h"
#include "rege_dfa.c"
Include dependency graph for regexec.c:

Go to the source code of this file.

Data Structures

struct  arcp
 
struct  sset
 
struct  dfa
 
struct  smalldfa
 
struct  vars
 

Macros

#define HASH(bv, nw)   (((nw) == 1) ? *(bv) : hash(bv, nw))
 
#define HIT(h, bv, ss, nw)
 
#define STARTER   01 /* the initial state set */
 
#define POSTSTATE   02 /* includes the goal state */
 
#define LOCKED   04 /* locked in cache */
 
#define NOPROGRESS   010 /* zero-progress state set */
 
#define WORK   1 /* number of work bitvectors needed */
 
#define FEWSTATES   20 /* must be less than UBITS */
 
#define FEWCOLORS   15
 
#define DOMALLOC   ((struct smalldfa *)NULL) /* force malloc */
 
#define VISERR(vv)   ((vv)->err != 0) /* have we seen an error yet? */
 
#define ISERR()   VISERR(v)
 
#define VERR(vv, e)   ((vv)->err = ((vv)->err ? (vv)->err : (e)))
 
#define ERR(e)   VERR(v, e) /* record an error */
 
#define NOERR()   {if (ISERR()) return v->err;} /* if error seen, return it */
 
#define OFF(p)   ((p) - v->start)
 
#define LOFF(p)   ((long)OFF(p))
 
#define LOCALMAT   20
 
#define LOCALDFAS   40
 

Functions

static struct dfagetsubdfa (struct vars *v, struct subre *t)
 
static struct dfagetladfa (struct vars *v, int n)
 
static int find (struct vars *v, struct cnfa *cnfa, struct colormap *cm)
 
static int cfind (struct vars *v, struct cnfa *cnfa, struct colormap *cm)
 
static int cfindloop (struct vars *v, struct cnfa *cnfa, struct colormap *cm, struct dfa *d, struct dfa *s, chr **coldp)
 
static void zapallsubs (regmatch_t *p, size_t n)
 
static void zaptreesubs (struct vars *v, struct subre *t)
 
static void subset (struct vars *v, struct subre *sub, chr *begin, chr *end)
 
static int cdissect (struct vars *v, struct subre *t, chr *begin, chr *end)
 
static int ccondissect (struct vars *v, struct subre *t, chr *begin, chr *end)
 
static int crevcondissect (struct vars *v, struct subre *t, chr *begin, chr *end)
 
static int cbrdissect (struct vars *v, struct subre *t, chr *begin, chr *end)
 
static int caltdissect (struct vars *v, struct subre *t, chr *begin, chr *end)
 
static int citerdissect (struct vars *v, struct subre *t, chr *begin, chr *end)
 
static int creviterdissect (struct vars *v, struct subre *t, chr *begin, chr *end)
 
static chrlongest (struct vars *v, struct dfa *d, chr *start, chr *stop, int *hitstopp)
 
static chrshortest (struct vars *v, struct dfa *d, chr *start, chr *min, chr *max, chr **coldp, int *hitstopp)
 
static int matchuntil (struct vars *v, struct dfa *d, chr *probe, struct sset **lastcss, chr **lastcp)
 
static chrdfa_backref (struct vars *v, struct dfa *d, chr *start, chr *min, chr *max, bool shortest)
 
static chrlastcold (struct vars *v, struct dfa *d)
 
static struct dfanewdfa (struct vars *v, struct cnfa *cnfa, struct colormap *cm, struct smalldfa *sml)
 
static void freedfa (struct dfa *d)
 
static unsigned hash (unsigned *uv, int n)
 
static struct ssetinitialize (struct vars *v, struct dfa *d, chr *start)
 
static struct ssetmiss (struct vars *v, struct dfa *d, struct sset *css, color co, chr *cp, chr *start)
 
static int lacon (struct vars *v, struct cnfa *pcnfa, chr *cp, color co)
 
static struct ssetgetvacant (struct vars *v, struct dfa *d, chr *cp, chr *start)
 
static struct ssetpickss (struct vars *v, struct dfa *d, chr *cp, chr *start)
 
int pg_regexec (regex_t *re, const chr *string, size_t len, size_t search_start, rm_detail_t *details, size_t nmatch, regmatch_t pmatch[], int flags)
 

Macro Definition Documentation

◆ DOMALLOC

#define DOMALLOC   ((struct smalldfa *)NULL) /* force malloc */

Definition at line 101 of file regexec.c.

◆ ERR

#define ERR (   e)    VERR(v, e) /* record an error */

Definition at line 129 of file regexec.c.

◆ FEWCOLORS

#define FEWCOLORS   15

Definition at line 91 of file regexec.c.

◆ FEWSTATES

#define FEWSTATES   20 /* must be less than UBITS */

Definition at line 90 of file regexec.c.

◆ HASH

#define HASH (   bv,
  nw 
)    (((nw) == 1) ? *(bv) : hash(bv, nw))

Definition at line 49 of file regexec.c.

◆ HIT

#define HIT (   h,
  bv,
  ss,
  nw 
)
Value:
((ss)->hash == (h) && ((nw) == 1 || \
memcmp(VS(bv), VS((ss)->states), (nw)*sizeof(unsigned)) == 0))
static int fb(int x)
static unsigned hash(unsigned *uv, int n)
#define VS(x)
Definition regguts.h:61

Definition at line 50 of file regexec.c.

64{
65 int nssets; /* size of cache */
66 int nssused; /* how many entries occupied yet */
67 int nstates; /* number of states */
68 int ncolors; /* length of outarc and inchain vectors */
69 int wordsper; /* length of state-set bitvectors */
70 struct sset *ssets; /* state-set cache */
71 unsigned *statesarea; /* bitvector storage */
72 unsigned *work; /* pointer to work area within statesarea */
73 struct sset **outsarea; /* outarc-vector storage */
74 struct arcp *incarea; /* inchain storage */
75 struct cnfa *cnfa;
76 struct colormap *cm;
77 chr *lastpost; /* location of last cache-flushed success */
78 chr *lastnopr; /* location of last cache-flushed NOPROGRESS */
79 struct sset *search; /* replacement-search-pointer memory */
80 int backno; /* if DFA for a backref, subno it refers to */
81 short backmin; /* min repetitions for backref */
82 short backmax; /* max repetitions for backref */
83 bool ismalloced; /* should this struct dfa be freed? */
84 bool arraysmalloced; /* should its subsidiary arrays be freed? */
85};
86
87#define WORK 1 /* number of work bitvectors needed */
88
89/* setup for non-malloc allocation for small cases */
90#define FEWSTATES 20 /* must be less than UBITS */
91#define FEWCOLORS 15
92struct smalldfa
93{
94 struct dfa dfa; /* must be first */
95 struct sset ssets[FEWSTATES * 2];
96 unsigned statesarea[FEWSTATES * 2 + WORK];
97 struct sset *outsarea[FEWSTATES * 2 * FEWCOLORS];
98 struct arcp incarea[FEWSTATES * 2 * FEWCOLORS];
99};
100
101#define DOMALLOC ((struct smalldfa *)NULL) /* force malloc */
102
103
104
105/* internal variables, bundled for easy passing around */
106struct vars
107{
108 regex_t *re;
109 struct guts *g;
110 int eflags; /* copies of arguments */
111 size_t nmatch;
114 chr *start; /* start of string */
115 chr *search_start; /* search start of string */
116 chr *stop; /* just past end of string */
117 int err; /* error code if any (0 none) */
118 struct dfa **subdfas; /* per-tree-subre DFAs */
119 struct dfa **ladfas; /* per-lacon-subre DFAs */
120 struct sset **lblastcss; /* per-lacon-subre lookbehind restart data */
121 chr **lblastcp; /* per-lacon-subre lookbehind restart data */
122 struct smalldfa dfa1;
123 struct smalldfa dfa2;
124};
125
126#define VISERR(vv) ((vv)->err != 0) /* have we seen an error yet? */
127#define ISERR() VISERR(v)
128#define VERR(vv,e) ((vv)->err = ((vv)->err ? (vv)->err : (e)))
129#define ERR(e) VERR(v, e) /* record an error */
130#define NOERR() {if (ISERR()) return v->err;} /* if error seen, return it */
131#define OFF(p) ((p) - v->start)
132#define LOFF(p) ((long)OFF(p))
133
134
135
136/*
137 * forward declarations
138 */
139/* === regexec.c === */
140static struct dfa *getsubdfa(struct vars *v, struct subre *t);
141static struct dfa *getladfa(struct vars *v, int n);
142static int find(struct vars *v, struct cnfa *cnfa, struct colormap *cm);
143static int cfind(struct vars *v, struct cnfa *cnfa, struct colormap *cm);
144static int cfindloop(struct vars *v, struct cnfa *cnfa, struct colormap *cm,
145 struct dfa *d, struct dfa *s, chr **coldp);
146static void zapallsubs(regmatch_t *p, size_t n);
147static void zaptreesubs(struct vars *v, struct subre *t);
148static void subset(struct vars *v, struct subre *sub, chr *begin, chr *end);
149static int cdissect(struct vars *v, struct subre *t, chr *begin, chr *end);
150static int ccondissect(struct vars *v, struct subre *t, chr *begin, chr *end);
151static int crevcondissect(struct vars *v, struct subre *t, chr *begin, chr *end);
152static int cbrdissect(struct vars *v, struct subre *t, chr *begin, chr *end);
153static int caltdissect(struct vars *v, struct subre *t, chr *begin, chr *end);
154static int citerdissect(struct vars *v, struct subre *t, chr *begin, chr *end);
155static int creviterdissect(struct vars *v, struct subre *t, chr *begin, chr *end);
156
157/* === rege_dfa.c === */
158static chr *longest(struct vars *v, struct dfa *d,
159 chr *start, chr *stop, int *hitstopp);
160static chr *shortest(struct vars *v, struct dfa *d, chr *start, chr *min,
161 chr *max, chr **coldp, int *hitstopp);
162static int matchuntil(struct vars *v, struct dfa *d, chr *probe,
163 struct sset **lastcss, chr **lastcp);
164static chr *dfa_backref(struct vars *v, struct dfa *d, chr *start,
165 chr *min, chr *max, bool shortest);
166static chr *lastcold(struct vars *v, struct dfa *d);
167static struct dfa *newdfa(struct vars *v, struct cnfa *cnfa,
168 struct colormap *cm, struct smalldfa *sml);
169static void freedfa(struct dfa *d);
170static unsigned hash(unsigned *uv, int n);
171static struct sset *initialize(struct vars *v, struct dfa *d, chr *start);
172static struct sset *miss(struct vars *v, struct dfa *d, struct sset *css,
173 color co, chr *cp, chr *start);
174static int lacon(struct vars *v, struct cnfa *pcnfa, chr *cp, color co);
175static struct sset *getvacant(struct vars *v, struct dfa *d, chr *cp,
176 chr *start);
177static struct sset *pickss(struct vars *v, struct dfa *d, chr *cp,
178 chr *start);
179
180
181/*
182 * pg_regexec - match regular expression
183 */
184int
186 const chr *string,
187 size_t len,
188 size_t search_start,
189 rm_detail_t *details,
190 size_t nmatch,
191 regmatch_t pmatch[],
192 int flags)
193{
194 struct vars var;
195 struct vars *v = &var;
196 int st;
197 size_t n;
198 size_t i;
199 int backref;
200
201#define LOCALMAT 20
203
204#define LOCALDFAS 40
205 struct dfa *subdfas[LOCALDFAS];
206
207 /* sanity checks */
208 if (re == NULL || string == NULL || re->re_magic != REMAGIC)
209 return REG_INVARG;
210 if (re->re_csize != sizeof(chr))
211 return REG_MIXED;
212 if (search_start > len)
213 return REG_NOMATCH;
214
215 /* Initialize locale-dependent support */
216 pg_set_regex_collation(re->re_collation);
217
218 /* setup */
219 v->re = re;
220 v->g = (struct guts *) re->re_guts;
221 if ((v->g->cflags & REG_EXPECT) && details == NULL)
222 return REG_INVARG;
223 if (v->g->info & REG_UIMPOSSIBLE)
224 return REG_NOMATCH;
225 backref = (v->g->info & REG_UBACKREF) ? 1 : 0;
226 v->eflags = flags;
227 if (backref && nmatch <= v->g->nsub)
228 {
229 /* need larger work area */
230 v->nmatch = v->g->nsub + 1;
231 if (v->nmatch <= LOCALMAT)
232 v->pmatch = mat;
233 else
235 if (v->pmatch == NULL)
236 return REG_ESPACE;
237 zapallsubs(v->pmatch, v->nmatch);
238 }
239 else
240 {
241 /* we can store results directly in caller's array */
242 v->pmatch = pmatch;
243 /* ensure any extra entries in caller's array are filled with -1 */
244 if (nmatch > 0)
245 zapallsubs(pmatch, nmatch);
246 /* then forget about extra entries, to avoid useless work in find() */
247 if (nmatch > v->g->nsub + 1)
248 nmatch = v->g->nsub + 1;
249 v->nmatch = nmatch;
250 }
251 v->details = details;
252 v->start = (chr *) string;
253 v->search_start = (chr *) string + search_start;
254 v->stop = (chr *) string + len;
255 v->err = 0;
256 v->subdfas = NULL;
257 v->ladfas = NULL;
258 v->lblastcss = NULL;
259 v->lblastcp = NULL;
260 /* below this point, "goto cleanup" will behave sanely */
261
262 assert(v->g->ntree >= 0);
263 n = (size_t) v->g->ntree;
264 if (n <= LOCALDFAS)
265 v->subdfas = subdfas;
266 else
267 {
268 /* ntree is surely less than the number of states, so this is safe: */
269 v->subdfas = (struct dfa **) MALLOC(n * sizeof(struct dfa *));
270 if (v->subdfas == NULL)
271 {
272 st = REG_ESPACE;
273 goto cleanup;
274 }
275 }
276 for (i = 0; i < n; i++)
277 v->subdfas[i] = NULL;
278
279 assert(v->g->nlacons >= 0);
280 n = (size_t) v->g->nlacons;
281 if (n > 0)
282 {
283 /* nlacons is surely less than the number of arcs, so this is safe: */
284 v->ladfas = (struct dfa **) MALLOC(n * sizeof(struct dfa *));
285 if (v->ladfas == NULL)
286 {
287 st = REG_ESPACE;
288 goto cleanup;
289 }
290 for (i = 0; i < n; i++)
291 v->ladfas[i] = NULL;
292 v->lblastcss = (struct sset **) MALLOC(n * sizeof(struct sset *));
293 v->lblastcp = (chr **) MALLOC(n * sizeof(chr *));
294 if (v->lblastcss == NULL || v->lblastcp == NULL)
295 {
296 st = REG_ESPACE;
297 goto cleanup;
298 }
299 for (i = 0; i < n; i++)
300 {
301 v->lblastcss[i] = NULL;
302 v->lblastcp[i] = NULL;
303 }
304 }
305
306 /* do it */
307 assert(v->g->tree != NULL);
308 if (backref)
309 st = cfind(v, &v->g->tree->cnfa, &v->g->cmap);
310 else
311 st = find(v, &v->g->tree->cnfa, &v->g->cmap);
312
313 /* on success, ensure caller's match vector is filled correctly */
314 if (st == REG_OKAY && nmatch > 0)
315 {
316 if (v->pmatch != pmatch)
317 {
318 /* copy portion of match vector over from (larger) work area */
319 assert(nmatch <= v->nmatch);
320 memcpy(VS(pmatch), VS(v->pmatch), nmatch * sizeof(regmatch_t));
321 }
322 if (v->g->cflags & REG_NOSUB)
323 {
324 /* don't expose possibly-partial sub-match results to caller */
325 zapallsubs(pmatch, nmatch);
326 }
327 }
328
329 /* clean up */
330cleanup:
331 if (v->pmatch != pmatch && v->pmatch != mat)
332 FREE(v->pmatch);
333 if (v->subdfas != NULL)
334 {
335 n = (size_t) v->g->ntree;
336 for (i = 0; i < n; i++)
337 {
338 if (v->subdfas[i] != NULL)
339 freedfa(v->subdfas[i]);
340 }
341 if (v->subdfas != subdfas)
342 FREE(v->subdfas);
343 }
344 if (v->ladfas != NULL)
345 {
346 n = (size_t) v->g->nlacons;
347 for (i = 0; i < n; i++)
348 {
349 if (v->ladfas[i] != NULL)
350 freedfa(v->ladfas[i]);
351 }
352 FREE(v->ladfas);
353 }
354 if (v->lblastcss != NULL)
355 FREE(v->lblastcss);
356 if (v->lblastcp != NULL)
357 FREE(v->lblastcp);
358
359#ifdef REG_DEBUG
360 if (v->eflags & (REG_FTRACE | REG_MTRACE))
361 fflush(stdout);
362#endif
363
364 return st;
365}
366
367/*
368 * getsubdfa - create or re-fetch the DFA for a tree subre node
369 *
370 * We only need to create the DFA once per overall regex execution.
371 * The DFA will be freed by the cleanup step in pg_regexec().
372 */
373static struct dfa *
374getsubdfa(struct vars *v,
375 struct subre *t)
376{
377 struct dfa *d = v->subdfas[t->id];
378
379 if (d == NULL)
380 {
381 d = newdfa(v, &t->cnfa, &v->g->cmap, DOMALLOC);
382 if (d == NULL)
383 return NULL;
384 /* set up additional info if this is a backref node */
385 if (t->op == 'b')
386 {
387 d->backno = t->backno;
388 d->backmin = t->min;
389 d->backmax = t->max;
390 }
391 v->subdfas[t->id] = d;
392 }
393 return d;
394}
395
396/*
397 * getladfa - create or re-fetch the DFA for a LACON subre node
398 *
399 * Same as above, but for LACONs.
400 */
401static struct dfa *
402getladfa(struct vars *v,
403 int n)
404{
405 assert(n > 0 && n < v->g->nlacons && v->g->lacons != NULL);
406
407 if (v->ladfas[n] == NULL)
408 {
409 struct subre *sub = &v->g->lacons[n];
410
411 v->ladfas[n] = newdfa(v, &sub->cnfa, &v->g->cmap, DOMALLOC);
412 /* a LACON can't contain a backref, so nothing else to do */
413 }
414 return v->ladfas[n];
415}
416
417/*
418 * find - find a match for the main NFA (no-complications case)
419 */
420static int
421find(struct vars *v,
422 struct cnfa *cnfa,
423 struct colormap *cm)
424{
425 struct dfa *s;
426 struct dfa *d;
427 chr *begin;
428 chr *end = NULL;
429 chr *cold;
430 chr *open; /* open and close of range of possible starts */
431 chr *close;
432 int hitend;
433 int shorter = (v->g->tree->flags & SHORTER) ? 1 : 0;
434
435 /* first, a shot with the search RE */
436 s = newdfa(v, &v->g->search, cm, &v->dfa1);
437 if (s == NULL)
438 return v->err;
439 MDEBUG(("\nsearch at %ld\n", LOFF(v->start)));
440 cold = NULL;
441 close = shortest(v, s, v->search_start, v->search_start, v->stop,
442 &cold, (int *) NULL);
443 freedfa(s);
444 NOERR();
445 if (v->g->cflags & REG_EXPECT)
446 {
447 assert(v->details != NULL);
448 if (cold != NULL)
450 else
451 v->details->rm_extend.rm_so = OFF(v->stop);
452 v->details->rm_extend.rm_eo = OFF(v->stop); /* unknown */
453 }
454 if (close == NULL) /* not found */
455 return REG_NOMATCH;
456 if (v->nmatch == 0) /* found, don't need exact location */
457 return REG_OKAY;
458
459 /* find starting point and match */
460 assert(cold != NULL);
461 open = cold;
462 cold = NULL;
463 MDEBUG(("between %ld and %ld\n", LOFF(open), LOFF(close)));
464 d = newdfa(v, cnfa, cm, &v->dfa1);
465 if (d == NULL)
466 return v->err;
467 for (begin = open; begin <= close; begin++)
468 {
469 MDEBUG(("\nfind trying at %ld\n", LOFF(begin)));
470 if (shorter)
471 end = shortest(v, d, begin, begin, v->stop,
472 (chr **) NULL, &hitend);
473 else
474 end = longest(v, d, begin, v->stop, &hitend);
475 if (ISERR())
476 {
477 freedfa(d);
478 return v->err;
479 }
480 if (hitend && cold == NULL)
481 cold = begin;
482 if (end != NULL)
483 break; /* NOTE BREAK OUT */
484 }
485 assert(end != NULL); /* search RE succeeded so loop should */
486 freedfa(d);
487
488 /* and pin down details */
489 assert(v->nmatch > 0);
490 v->pmatch[0].rm_so = OFF(begin);
491 v->pmatch[0].rm_eo = OFF(end);
492 if (v->g->cflags & REG_EXPECT)
493 {
494 if (cold != NULL)
496 else
497 v->details->rm_extend.rm_so = OFF(v->stop);
498 v->details->rm_extend.rm_eo = OFF(v->stop); /* unknown */
499 }
500 if (v->nmatch == 1) /* no need for submatches */
501 return REG_OKAY;
502
503 /* find submatches */
504 return cdissect(v, v->g->tree, begin, end);
505}
506
507/*
508 * cfind - find a match for the main NFA (with complications)
509 */
510static int
511cfind(struct vars *v,
512 struct cnfa *cnfa,
513 struct colormap *cm)
514{
515 struct dfa *s;
516 struct dfa *d;
517 chr *cold;
518 int ret;
519
520 s = newdfa(v, &v->g->search, cm, &v->dfa1);
521 if (s == NULL)
522 return v->err;
523 d = newdfa(v, cnfa, cm, &v->dfa2);
524 if (d == NULL)
525 {
526 freedfa(s);
527 return v->err;
528 }
529
530 ret = cfindloop(v, cnfa, cm, d, s, &cold);
531
532 freedfa(d);
533 freedfa(s);
534 NOERR();
535 if (v->g->cflags & REG_EXPECT)
536 {
537 assert(v->details != NULL);
538 if (cold != NULL)
540 else
541 v->details->rm_extend.rm_so = OFF(v->stop);
542 v->details->rm_extend.rm_eo = OFF(v->stop); /* unknown */
543 }
544 return ret;
545}
546
547/*
548 * cfindloop - the heart of cfind
549 */
550static int
551cfindloop(struct vars *v,
552 struct cnfa *cnfa,
553 struct colormap *cm,
554 struct dfa *d,
555 struct dfa *s,
556 chr **coldp) /* where to put coldstart pointer */
557{
558 chr *begin;
559 chr *end;
560 chr *cold;
561 chr *open; /* open and close of range of possible starts */
562 chr *close;
563 chr *estart;
564 chr *estop;
565 int er;
566 int shorter = v->g->tree->flags & SHORTER;
567 int hitend;
568
569 assert(d != NULL && s != NULL);
570 cold = NULL;
571 close = v->search_start;
572 do
573 {
574 /* Search with the search RE for match range at/beyond "close" */
575 MDEBUG(("\ncsearch at %ld\n", LOFF(close)));
576 close = shortest(v, s, close, close, v->stop, &cold, (int *) NULL);
577 if (ISERR())
578 {
579 *coldp = cold;
580 return v->err;
581 }
582 if (close == NULL)
583 break; /* no more possible match anywhere */
584 assert(cold != NULL);
585 open = cold;
586 cold = NULL;
587 /* Search for matches starting between "open" and "close" inclusive */
588 MDEBUG(("cbetween %ld and %ld\n", LOFF(open), LOFF(close)));
589 for (begin = open; begin <= close; begin++)
590 {
591 MDEBUG(("\ncfind trying at %ld\n", LOFF(begin)));
592 estart = begin;
593 estop = v->stop;
594 for (;;)
595 {
596 /* Here we use the top node's detailed RE */
597 if (shorter)
598 end = shortest(v, d, begin, estart,
599 estop, (chr **) NULL, &hitend);
600 else
601 end = longest(v, d, begin, estop,
602 &hitend);
603 if (ISERR())
604 {
605 *coldp = cold;
606 return v->err;
607 }
608 if (hitend && cold == NULL)
609 cold = begin;
610 if (end == NULL)
611 break; /* no match with this begin point, try next */
612 MDEBUG(("tentative end %ld\n", LOFF(end)));
613 /* Dissect the potential match to see if it really matches */
614 er = cdissect(v, v->g->tree, begin, end);
615 if (er == REG_OKAY)
616 {
617 if (v->nmatch > 0)
618 {
619 v->pmatch[0].rm_so = OFF(begin);
620 v->pmatch[0].rm_eo = OFF(end);
621 }
622 *coldp = cold;
623 return REG_OKAY;
624 }
625 if (er != REG_NOMATCH)
626 {
627 ERR(er);
628 *coldp = cold;
629 return er;
630 }
631 /* Try next longer/shorter match with same begin point */
632 if (shorter)
633 {
634 if (end == estop)
635 break; /* no more, so try next begin point */
636 estart = end + 1;
637 }
638 else
639 {
640 if (end == begin)
641 break; /* no more, so try next begin point */
642 estop = end - 1;
643 }
644 } /* end loop over endpoint positions */
645 } /* end loop over beginning positions */
646
647 /*
648 * If we get here, there is no possible match starting at or before
649 * "close", so consider matches beyond that. We'll do a fresh search
650 * with the search RE to find a new promising match range.
651 */
652 close++;
653 } while (close < v->stop);
654
655 *coldp = cold;
656 return REG_NOMATCH;
657}
658
659/*
660 * zapallsubs - initialize all subexpression matches to "no match"
661 *
662 * Note that p[0], the overall-match location, is not touched.
663 */
664static void
666 size_t n)
667{
668 size_t i;
669
670 for (i = n - 1; i > 0; i--)
671 {
672 p[i].rm_so = -1;
673 p[i].rm_eo = -1;
674 }
675}
676
677/*
678 * zaptreesubs - initialize subexpressions within subtree to "no match"
679 */
680static void
681zaptreesubs(struct vars *v,
682 struct subre *t)
683{
684 int n = t->capno;
685 struct subre *t2;
686
687 if (n > 0)
688 {
689 if ((size_t) n < v->nmatch)
690 {
691 v->pmatch[n].rm_so = -1;
692 v->pmatch[n].rm_eo = -1;
693 }
694 }
695
696 for (t2 = t->child; t2 != NULL; t2 = t2->sibling)
697 zaptreesubs(v, t2);
698}
699
700/*
701 * subset - set subexpression match data for a successful subre
702 */
703static void
704subset(struct vars *v,
705 struct subre *sub,
706 chr *begin,
707 chr *end)
708{
709 int n = sub->capno;
710
711 assert(n > 0);
712 if ((size_t) n >= v->nmatch)
713 return;
714
715 MDEBUG(("%d: setting %d = %ld-%ld\n", sub->id, n, LOFF(begin), LOFF(end)));
716 v->pmatch[n].rm_so = OFF(begin);
717 v->pmatch[n].rm_eo = OFF(end);
718}
719
720/*
721 * cdissect - check backrefs and determine subexpression matches
722 *
723 * cdissect recursively processes a subre tree to check matching of backrefs
724 * and/or identify submatch boundaries for capture nodes. The proposed match
725 * runs from "begin" to "end" (not including "end"), and we are basically
726 * "dissecting" it to see where the submatches are.
727 *
728 * Before calling any level of cdissect, the caller must have run the node's
729 * DFA and found that the proposed substring satisfies the DFA. (We make
730 * the caller do that because in concatenation and iteration nodes, it's
731 * much faster to check all the substrings against the child DFAs before we
732 * recurse.)
733 *
734 * A side-effect of a successful match is to save match locations for
735 * capturing subexpressions in v->pmatch[]. This is a little bit tricky,
736 * so we make the following rules:
737 * 1. Before initial entry to cdissect, all match data must have been
738 * cleared (this is seen to by zapallsubs).
739 * 2. Before any recursive entry to cdissect, the match data for that
740 * subexpression tree must be guaranteed clear (see zaptreesubs).
741 * 3. When returning REG_OKAY, each level of cdissect will have saved
742 * any relevant match locations.
743 * 4. When returning REG_NOMATCH, each level of cdissect will guarantee
744 * that its subexpression match locations are again clear.
745 * 5. No guarantees are made for error cases (i.e., other result codes).
746 * 6. When a level of cdissect abandons a successful sub-match, it will
747 * clear that subtree's match locations with zaptreesubs before trying
748 * any new DFA match or cdissect call for that subtree or any subtree
749 * to its right (that is, any subtree that could have a backref into the
750 * abandoned match).
751 * This may seem overly complicated, but it's difficult to simplify it
752 * because of the provision that match locations must be reset before
753 * any fresh DFA match (a rule that is needed to make dfa_backref safe).
754 * That means it won't work to just reset relevant match locations at the
755 * start of each cdissect level.
756 */
757static int /* regexec return code */
758cdissect(struct vars *v,
759 struct subre *t,
760 chr *begin, /* beginning of relevant substring */
761 chr *end) /* end of same */
762{
763 int er;
764
765 assert(t != NULL);
766 MDEBUG(("%d: cdissect %c %ld-%ld\n", t->id, t->op, LOFF(begin), LOFF(end)));
767
768 /* handy place to check for operation cancel */
769 INTERRUPT(v->re);
770 /* ... and stack overrun */
771 if (STACK_TOO_DEEP(v->re))
772 return REG_ETOOBIG;
773
774 switch (t->op)
775 {
776 case '=': /* terminal node */
777 assert(t->child == NULL);
778 er = REG_OKAY; /* no action, parent did the work */
779 break;
780 case 'b': /* back reference */
781 assert(t->child == NULL);
782 er = cbrdissect(v, t, begin, end);
783 break;
784 case '.': /* concatenation */
785 assert(t->child != NULL);
786 if (t->child->flags & SHORTER) /* reverse scan */
787 er = crevcondissect(v, t, begin, end);
788 else
789 er = ccondissect(v, t, begin, end);
790 break;
791 case '|': /* alternation */
792 assert(t->child != NULL);
793 er = caltdissect(v, t, begin, end);
794 break;
795 case '*': /* iteration */
796 assert(t->child != NULL);
797 if (t->child->flags & SHORTER) /* reverse scan */
798 er = creviterdissect(v, t, begin, end);
799 else
800 er = citerdissect(v, t, begin, end);
801 break;
802 case '(': /* no-op capture node */
803 assert(t->child != NULL);
804 er = cdissect(v, t->child, begin, end);
805 break;
806 default:
807 er = REG_ASSERT;
808 break;
809 }
810
811 /*
812 * We should never have a match failure unless backrefs lurk below;
813 * otherwise, either caller failed to check the DFA, or there's some
814 * inconsistency between the DFA and the node's innards.
815 */
816 assert(er != REG_NOMATCH || (t->flags & BACKR));
817
818 /*
819 * If this node is marked as capturing, save successful match's location.
820 */
821 if (t->capno > 0 && er == REG_OKAY)
822 subset(v, t, begin, end);
823
824 return er;
825}
826
827/*
828 * ccondissect - dissect match for concatenation node
829 */
830static int /* regexec return code */
831ccondissect(struct vars *v,
832 struct subre *t,
833 chr *begin, /* beginning of relevant substring */
834 chr *end) /* end of same */
835{
836 struct subre *left = t->child;
837 struct subre *right = left->sibling;
838 struct dfa *d;
839 struct dfa *d2;
840 chr *mid;
841 int er;
842
843 assert(t->op == '.');
844 assert(left != NULL && left->cnfa.nstates > 0);
845 assert(right != NULL && right->cnfa.nstates > 0);
846 assert(right->sibling == NULL);
847 assert(!(left->flags & SHORTER));
848
849 d = getsubdfa(v, left);
850 NOERR();
851 d2 = getsubdfa(v, right);
852 NOERR();
853 MDEBUG(("%d: ccondissect %ld-%ld\n", t->id, LOFF(begin), LOFF(end)));
854
855 /* pick a tentative midpoint */
856 mid = longest(v, d, begin, end, (int *) NULL);
857 NOERR();
858 if (mid == NULL)
859 return REG_NOMATCH;
860 MDEBUG(("%d: tentative midpoint %ld\n", t->id, LOFF(mid)));
861
862 /* iterate until satisfaction or failure */
863 for (;;)
864 {
865 /* try this midpoint on for size */
866 if (longest(v, d2, mid, end, (int *) NULL) == end)
867 {
868 er = cdissect(v, left, begin, mid);
869 if (er == REG_OKAY)
870 {
871 er = cdissect(v, right, mid, end);
872 if (er == REG_OKAY)
873 {
874 /* satisfaction */
875 MDEBUG(("%d: successful\n", t->id));
876 return REG_OKAY;
877 }
878 /* Reset left's matches (right should have done so itself) */
879 zaptreesubs(v, left);
880 }
881 if (er != REG_NOMATCH)
882 return er;
883 }
884 NOERR();
885
886 /* that midpoint didn't work, find a new one */
887 if (mid == begin)
888 {
889 /* all possibilities exhausted */
890 MDEBUG(("%d: no midpoint\n", t->id));
891 return REG_NOMATCH;
892 }
893 mid = longest(v, d, begin, mid - 1, (int *) NULL);
894 NOERR();
895 if (mid == NULL)
896 {
897 /* failed to find a new one */
898 MDEBUG(("%d: failed midpoint\n", t->id));
899 return REG_NOMATCH;
900 }
901 MDEBUG(("%d: new midpoint %ld\n", t->id, LOFF(mid)));
902 }
903
904 /* can't get here */
905 return REG_ASSERT;
906}
907
908/*
909 * crevcondissect - dissect match for concatenation node, shortest-first
910 */
911static int /* regexec return code */
912crevcondissect(struct vars *v,
913 struct subre *t,
914 chr *begin, /* beginning of relevant substring */
915 chr *end) /* end of same */
916{
917 struct subre *left = t->child;
918 struct subre *right = left->sibling;
919 struct dfa *d;
920 struct dfa *d2;
921 chr *mid;
922 int er;
923
924 assert(t->op == '.');
925 assert(left != NULL && left->cnfa.nstates > 0);
926 assert(right != NULL && right->cnfa.nstates > 0);
927 assert(right->sibling == NULL);
928 assert(left->flags & SHORTER);
929
930 d = getsubdfa(v, left);
931 NOERR();
932 d2 = getsubdfa(v, right);
933 NOERR();
934 MDEBUG(("%d: crevcondissect %ld-%ld\n", t->id, LOFF(begin), LOFF(end)));
935
936 /* pick a tentative midpoint */
937 mid = shortest(v, d, begin, begin, end, (chr **) NULL, (int *) NULL);
938 NOERR();
939 if (mid == NULL)
940 return REG_NOMATCH;
941 MDEBUG(("%d: tentative midpoint %ld\n", t->id, LOFF(mid)));
942
943 /* iterate until satisfaction or failure */
944 for (;;)
945 {
946 /* try this midpoint on for size */
947 if (longest(v, d2, mid, end, (int *) NULL) == end)
948 {
949 er = cdissect(v, left, begin, mid);
950 if (er == REG_OKAY)
951 {
952 er = cdissect(v, right, mid, end);
953 if (er == REG_OKAY)
954 {
955 /* satisfaction */
956 MDEBUG(("%d: successful\n", t->id));
957 return REG_OKAY;
958 }
959 /* Reset left's matches (right should have done so itself) */
960 zaptreesubs(v, left);
961 }
962 if (er != REG_NOMATCH)
963 return er;
964 }
965 NOERR();
966
967 /* that midpoint didn't work, find a new one */
968 if (mid == end)
969 {
970 /* all possibilities exhausted */
971 MDEBUG(("%d: no midpoint\n", t->id));
972 return REG_NOMATCH;
973 }
974 mid = shortest(v, d, begin, mid + 1, end, (chr **) NULL, (int *) NULL);
975 NOERR();
976 if (mid == NULL)
977 {
978 /* failed to find a new one */
979 MDEBUG(("%d: failed midpoint\n", t->id));
980 return REG_NOMATCH;
981 }
982 MDEBUG(("%d: new midpoint %ld\n", t->id, LOFF(mid)));
983 }
984
985 /* can't get here */
986 return REG_ASSERT;
987}
988
989/*
990 * cbrdissect - dissect match for backref node
991 *
992 * The backref match might already have been verified by dfa_backref(),
993 * but we don't know that for sure so must check it here.
994 */
995static int /* regexec return code */
996cbrdissect(struct vars *v,
997 struct subre *t,
998 chr *begin, /* beginning of relevant substring */
999 chr *end) /* end of same */
1000{
1001 int n = t->backno;
1002 size_t numreps;
1003 size_t tlen;
1004 size_t brlen;
1005 chr *brstring;
1006 chr *p;
1007 int min = t->min;
1008 int max = t->max;
1009
1010 assert(t != NULL);
1011 assert(t->op == 'b');
1012 assert(n >= 0);
1013 assert((size_t) n < v->nmatch);
1014
1015 MDEBUG(("%d: cbrdissect %d{%d-%d} %ld-%ld\n", t->id, n, min, max,
1016 LOFF(begin), LOFF(end)));
1017
1018 /* get the backreferenced string */
1019 if (v->pmatch[n].rm_so == -1)
1020 return REG_NOMATCH;
1021 brstring = v->start + v->pmatch[n].rm_so;
1022 brlen = v->pmatch[n].rm_eo - v->pmatch[n].rm_so;
1023
1024 /* special cases for zero-length strings */
1025 if (brlen == 0)
1026 {
1027 /*
1028 * matches only if target is zero length, but any number of
1029 * repetitions can be considered to be present
1030 */
1031 if (begin == end && min <= max)
1032 {
1033 MDEBUG(("%d: backref matched trivially\n", t->id));
1034 return REG_OKAY;
1035 }
1036 return REG_NOMATCH;
1037 }
1038 if (begin == end)
1039 {
1040 /* matches only if zero repetitions are okay */
1041 if (min == 0)
1042 {
1043 MDEBUG(("%d: backref matched trivially\n", t->id));
1044 return REG_OKAY;
1045 }
1046 return REG_NOMATCH;
1047 }
1048
1049 /*
1050 * check target length to see if it could possibly be an allowed number of
1051 * repetitions of brstring
1052 */
1053 assert(end > begin);
1054 tlen = end - begin;
1055 if (tlen % brlen != 0)
1056 return REG_NOMATCH;
1057 numreps = tlen / brlen;
1058 if (numreps < min || (numreps > max && max != DUPINF))
1059 return REG_NOMATCH;
1060
1061 /* okay, compare the actual string contents */
1062 p = begin;
1063 while (numreps-- > 0)
1064 {
1065 if ((*v->g->compare) (brstring, p, brlen) != 0)
1066 return REG_NOMATCH;
1067 p += brlen;
1068 }
1069
1070 MDEBUG(("%d: backref matched\n", t->id));
1071 return REG_OKAY;
1072}
1073
1074/*
1075 * caltdissect - dissect match for alternation node
1076 */
1077static int /* regexec return code */
1078caltdissect(struct vars *v,
1079 struct subre *t,
1080 chr *begin, /* beginning of relevant substring */
1081 chr *end) /* end of same */
1082{
1083 struct dfa *d;
1084 int er;
1085
1086 assert(t->op == '|');
1087
1088 t = t->child;
1089 /* there should be at least 2 alternatives */
1090 assert(t != NULL && t->sibling != NULL);
1091
1092 while (t != NULL)
1093 {
1094 assert(t->cnfa.nstates > 0);
1095
1096 MDEBUG(("%d: caltdissect %ld-%ld\n", t->id, LOFF(begin), LOFF(end)));
1097
1098 d = getsubdfa(v, t);
1099 NOERR();
1100 if (longest(v, d, begin, end, (int *) NULL) == end)
1101 {
1102 MDEBUG(("%d: caltdissect matched\n", t->id));
1103 er = cdissect(v, t, begin, end);
1104 if (er != REG_NOMATCH)
1105 return er;
1106 }
1107 NOERR();
1108
1109 t = t->sibling;
1110 }
1111
1112 return REG_NOMATCH;
1113}
1114
1115/*
1116 * citerdissect - dissect match for iteration node
1117 */
1118static int /* regexec return code */
1119citerdissect(struct vars *v,
1120 struct subre *t,
1121 chr *begin, /* beginning of relevant substring */
1122 chr *end) /* end of same */
1123{
1124 struct dfa *d;
1125 chr **endpts;
1126 chr *limit;
1127 int min_matches;
1128 size_t max_matches;
1129 int nverified;
1130 int k;
1131 int i;
1132 int er;
1133
1134 assert(t->op == '*');
1135 assert(t->child != NULL && t->child->cnfa.nstates > 0);
1136 assert(!(t->child->flags & SHORTER));
1137 assert(begin <= end);
1138
1139 MDEBUG(("%d: citerdissect %ld-%ld\n", t->id, LOFF(begin), LOFF(end)));
1140
1141 /*
1142 * For the moment, assume the minimum number of matches is 1. If zero
1143 * matches are allowed, and the target string is empty, we are allowed to
1144 * match regardless of the contents of the iter node --- but we would
1145 * prefer to match once, so that capturing parens get set. (An example of
1146 * the concern here is a pattern like "()*\1", which historically this
1147 * code has allowed to succeed.) Therefore, we deal with the zero-matches
1148 * case at the bottom, after failing to find any other way to match.
1149 */
1150 min_matches = t->min;
1151 if (min_matches <= 0)
1152 min_matches = 1;
1153
1154 /*
1155 * We need workspace to track the endpoints of each sub-match. Normally
1156 * we consider only nonzero-length sub-matches, so there can be at most
1157 * end-begin of them. However, if min is larger than that, we will also
1158 * consider zero-length sub-matches in order to find enough matches.
1159 *
1160 * For convenience, endpts[0] contains the "begin" pointer and we store
1161 * sub-match endpoints in endpts[1..max_matches].
1162 */
1163 max_matches = end - begin;
1164 if (max_matches > t->max && t->max != DUPINF)
1165 max_matches = t->max;
1169 if (endpts == NULL)
1170 return REG_ESPACE;
1171 endpts[0] = begin;
1172
1173 d = getsubdfa(v, t->child);
1174 if (ISERR())
1175 {
1176 FREE(endpts);
1177 return v->err;
1178 }
1179
1180 /*
1181 * Our strategy is to first find a set of sub-match endpoints that are
1182 * valid according to the child node's DFA, and then recursively dissect
1183 * each sub-match to confirm validity. If any validity check fails,
1184 * backtrack that sub-match and try again. And, when we next try for a
1185 * validity check, we need not recheck any successfully verified
1186 * sub-matches that we didn't move the endpoints of. nverified remembers
1187 * how many sub-matches are currently known okay.
1188 */
1189
1190 /* initialize to consider first sub-match */
1191 nverified = 0;
1192 k = 1;
1193 limit = end;
1194
1195 /* iterate until satisfaction or failure */
1196 while (k > 0)
1197 {
1198 /* try to find an endpoint for the k'th sub-match */
1199 endpts[k] = longest(v, d, endpts[k - 1], limit, (int *) NULL);
1200 if (ISERR())
1201 {
1202 FREE(endpts);
1203 return v->err;
1204 }
1205 if (endpts[k] == NULL)
1206 {
1207 /* no match possible, so see if we can shorten previous one */
1208 k--;
1209 goto backtrack;
1210 }
1211 MDEBUG(("%d: working endpoint %d: %ld\n",
1212 t->id, k, LOFF(endpts[k])));
1213
1214 /* k'th sub-match can no longer be considered verified */
1215 if (nverified >= k)
1216 nverified = k - 1;
1217
1218 if (endpts[k] != end)
1219 {
1220 /* haven't reached end yet, try another iteration if allowed */
1221 if (k >= max_matches)
1222 {
1223 /* must try to shorten some previous match */
1224 k--;
1225 goto backtrack;
1226 }
1227
1228 /* reject zero-length match unless necessary to achieve min */
1229 if (endpts[k] == endpts[k - 1] &&
1230 (k >= min_matches || min_matches - k < end - endpts[k]))
1231 goto backtrack;
1232
1233 k++;
1234 limit = end;
1235 continue;
1236 }
1237
1238 /*
1239 * We've identified a way to divide the string into k sub-matches that
1240 * works so far as the child DFA can tell. If k is an allowed number
1241 * of matches, start the slow part: recurse to verify each sub-match.
1242 * We always have k <= max_matches, needn't check that.
1243 */
1244 if (k < min_matches)
1245 goto backtrack;
1246
1247 MDEBUG(("%d: verifying %d..%d\n", t->id, nverified + 1, k));
1248
1249 for (i = nverified + 1; i <= k; i++)
1250 {
1251 /* zap any match data from a non-last iteration */
1252 zaptreesubs(v, t->child);
1253 er = cdissect(v, t->child, endpts[i - 1], endpts[i]);
1254 if (er == REG_OKAY)
1255 {
1256 nverified = i;
1257 continue;
1258 }
1259 if (er == REG_NOMATCH)
1260 break;
1261 /* oops, something failed */
1262 FREE(endpts);
1263 return er;
1264 }
1265
1266 if (i > k)
1267 {
1268 /* satisfaction */
1269 MDEBUG(("%d: successful\n", t->id));
1270 FREE(endpts);
1271 return REG_OKAY;
1272 }
1273
1274 /* i'th match failed to verify, so backtrack it */
1275 k = i;
1276
1277backtrack:
1278
1279 /*
1280 * Must consider shorter versions of the k'th sub-match. However,
1281 * we'll only ask for a zero-length match if necessary.
1282 */
1283 while (k > 0)
1284 {
1285 chr *prev_end = endpts[k - 1];
1286
1287 if (endpts[k] > prev_end)
1288 {
1289 limit = endpts[k] - 1;
1290 if (limit > prev_end ||
1292 {
1293 /* break out of backtrack loop, continue the outer one */
1294 break;
1295 }
1296 }
1297 /* can't shorten k'th sub-match any more, consider previous one */
1298 k--;
1299 }
1300 }
1301
1302 /* all possibilities exhausted */
1303 FREE(endpts);
1304
1305 /*
1306 * Now consider the possibility that we can match to a zero-length string
1307 * by using zero repetitions.
1308 */
1309 if (t->min == 0 && begin == end)
1310 {
1311 MDEBUG(("%d: allowing zero matches\n", t->id));
1312 return REG_OKAY;
1313 }
1314
1315 MDEBUG(("%d: failed\n", t->id));
1316 return REG_NOMATCH;
1317}
1318
1319/*
1320 * creviterdissect - dissect match for iteration node, shortest-first
1321 */
1322static int /* regexec return code */
1323creviterdissect(struct vars *v,
1324 struct subre *t,
1325 chr *begin, /* beginning of relevant substring */
1326 chr *end) /* end of same */
1327{
1328 struct dfa *d;
1329 chr **endpts;
1330 chr *limit;
1331 int min_matches;
1332 size_t max_matches;
1333 int nverified;
1334 int k;
1335 int i;
1336 int er;
1337
1338 assert(t->op == '*');
1339 assert(t->child != NULL && t->child->cnfa.nstates > 0);
1340 assert(t->child->flags & SHORTER);
1341 assert(begin <= end);
1342
1343 MDEBUG(("%d: creviterdissect %ld-%ld\n", t->id, LOFF(begin), LOFF(end)));
1344
1345 /*
1346 * If zero matches are allowed, and target string is empty, just declare
1347 * victory. OTOH, if target string isn't empty, zero matches can't work
1348 * so we pretend the min is 1.
1349 */
1350 min_matches = t->min;
1351 if (min_matches <= 0)
1352 {
1353 if (begin == end)
1354 {
1355 MDEBUG(("%d: allowing zero matches\n", t->id));
1356 return REG_OKAY;
1357 }
1358 min_matches = 1;
1359 }
1360
1361 /*
1362 * We need workspace to track the endpoints of each sub-match. Normally
1363 * we consider only nonzero-length sub-matches, so there can be at most
1364 * end-begin of them. However, if min is larger than that, we will also
1365 * consider zero-length sub-matches in order to find enough matches.
1366 *
1367 * For convenience, endpts[0] contains the "begin" pointer and we store
1368 * sub-match endpoints in endpts[1..max_matches].
1369 */
1370 max_matches = end - begin;
1371 if (max_matches > t->max && t->max != DUPINF)
1372 max_matches = t->max;
1376 if (endpts == NULL)
1377 return REG_ESPACE;
1378 endpts[0] = begin;
1379
1380 d = getsubdfa(v, t->child);
1381 if (ISERR())
1382 {
1383 FREE(endpts);
1384 return v->err;
1385 }
1386
1387 /*
1388 * Our strategy is to first find a set of sub-match endpoints that are
1389 * valid according to the child node's DFA, and then recursively dissect
1390 * each sub-match to confirm validity. If any validity check fails,
1391 * backtrack that sub-match and try again. And, when we next try for a
1392 * validity check, we need not recheck any successfully verified
1393 * sub-matches that we didn't move the endpoints of. nverified remembers
1394 * how many sub-matches are currently known okay.
1395 */
1396
1397 /* initialize to consider first sub-match */
1398 nverified = 0;
1399 k = 1;
1400 limit = begin;
1401
1402 /* iterate until satisfaction or failure */
1403 while (k > 0)
1404 {
1405 /* disallow zero-length match unless necessary to achieve min */
1406 if (limit == endpts[k - 1] &&
1407 limit != end &&
1408 (k >= min_matches || min_matches - k < end - limit))
1409 limit++;
1410
1411 /* if this is the last allowed sub-match, it must reach to the end */
1412 if (k >= max_matches)
1413 limit = end;
1414
1415 /* try to find an endpoint for the k'th sub-match */
1416 endpts[k] = shortest(v, d, endpts[k - 1], limit, end,
1417 (chr **) NULL, (int *) NULL);
1418 if (ISERR())
1419 {
1420 FREE(endpts);
1421 return v->err;
1422 }
1423 if (endpts[k] == NULL)
1424 {
1425 /* no match possible, so see if we can lengthen previous one */
1426 k--;
1427 goto backtrack;
1428 }
1429 MDEBUG(("%d: working endpoint %d: %ld\n",
1430 t->id, k, LOFF(endpts[k])));
1431
1432 /* k'th sub-match can no longer be considered verified */
1433 if (nverified >= k)
1434 nverified = k - 1;
1435
1436 if (endpts[k] != end)
1437 {
1438 /* haven't reached end yet, try another iteration if allowed */
1439 if (k >= max_matches)
1440 {
1441 /* must try to lengthen some previous match */
1442 k--;
1443 goto backtrack;
1444 }
1445
1446 k++;
1447 limit = endpts[k - 1];
1448 continue;
1449 }
1450
1451 /*
1452 * We've identified a way to divide the string into k sub-matches that
1453 * works so far as the child DFA can tell. If k is an allowed number
1454 * of matches, start the slow part: recurse to verify each sub-match.
1455 * We always have k <= max_matches, needn't check that.
1456 */
1457 if (k < min_matches)
1458 goto backtrack;
1459
1460 MDEBUG(("%d: verifying %d..%d\n", t->id, nverified + 1, k));
1461
1462 for (i = nverified + 1; i <= k; i++)
1463 {
1464 /* zap any match data from a non-last iteration */
1465 zaptreesubs(v, t->child);
1466 er = cdissect(v, t->child, endpts[i - 1], endpts[i]);
1467 if (er == REG_OKAY)
1468 {
1469 nverified = i;
1470 continue;
1471 }
1472 if (er == REG_NOMATCH)
1473 break;
1474 /* oops, something failed */
1475 FREE(endpts);
1476 return er;
1477 }
1478
1479 if (i > k)
1480 {
1481 /* satisfaction */
1482 MDEBUG(("%d: successful\n", t->id));
1483 FREE(endpts);
1484 return REG_OKAY;
1485 }
1486
1487 /* i'th match failed to verify, so backtrack it */
1488 k = i;
1489
1490backtrack:
1491
1492 /*
1493 * Must consider longer versions of the k'th sub-match.
1494 */
1495 while (k > 0)
1496 {
1497 if (endpts[k] < end)
1498 {
1499 limit = endpts[k] + 1;
1500 /* break out of backtrack loop, continue the outer one */
1501 break;
1502 }
1503 /* can't lengthen k'th sub-match any more, consider previous one */
1504 k--;
1505 }
1506 }
1507
1508 /* all possibilities exhausted */
1509 MDEBUG(("%d: failed\n", t->id));
1510 FREE(endpts);
1511 return REG_NOMATCH;
1512}
1513
1514
1515
1516#include "rege_dfa.c"
static void cleanup(void)
Definition bootstrap.c:886
memcpy(sums, checksumBaseOffsets, sizeof(checksumBaseOffsets))
#define FREE(ptr)
Definition cryptohash.c:37
return str start
#define close(a)
Definition win32.h:12
int i
Definition isn.c:77
void initialize(void)
const void size_t len
void pg_set_regex_collation(Oid collation)
#define MALLOC_ARRAY(type, n)
Definition regcustom.h:55
#define INTERRUPT(re)
Definition regcustom.h:57
#define MALLOC(n)
Definition regcustom.h:52
pg_wchar chr
Definition regcustom.h:62
#define assert(x)
Definition regcustom.h:59
#define REG_NOMATCH
Definition regex.h:216
#define REG_ASSERT
Definition regex.h:229
#define REG_UIMPOSSIBLE
Definition regex.h:150
#define REG_MTRACE
Definition regex.h:206
#define REG_FTRACE
Definition regex.h:205
#define REG_EXPECT
Definition regex.h:191
#define REG_INVARG
Definition regex.h:230
#define REG_ETOOBIG
Definition regex.h:233
#define regmatch_t
Definition regex.h:246
#define REG_OKAY
Definition regex.h:215
#define REG_MIXED
Definition regex.h:231
#define REG_NOSUB
Definition regex.h:185
#define regex_t
Definition regex.h:245
#define REG_ESPACE
Definition regex.h:227
#define REG_UBACKREF
Definition regex.h:138
static int find(struct vars *v, struct cnfa *cnfa, struct colormap *cm)
Definition regexec.c:421
static int citerdissect(struct vars *v, struct subre *t, chr *begin, chr *end)
Definition regexec.c:1119
static void freedfa(struct dfa *d)
#define DOMALLOC
Definition regexec.c:101
static chr * shortest(struct vars *v, struct dfa *d, chr *start, chr *min, chr *max, chr **coldp, int *hitstopp)
#define LOFF(p)
Definition regexec.c:132
#define NOERR()
Definition regexec.c:130
static void zapallsubs(regmatch_t *p, size_t n)
Definition regexec.c:665
static chr * dfa_backref(struct vars *v, struct dfa *d, chr *start, chr *min, chr *max, bool shortest)
static int lacon(struct vars *v, struct cnfa *pcnfa, chr *cp, color co)
#define ISERR()
Definition regexec.c:127
static int ccondissect(struct vars *v, struct subre *t, chr *begin, chr *end)
Definition regexec.c:831
static int cfind(struct vars *v, struct cnfa *cnfa, struct colormap *cm)
Definition regexec.c:511
#define ERR(e)
Definition regexec.c:129
static int creviterdissect(struct vars *v, struct subre *t, chr *begin, chr *end)
Definition regexec.c:1323
static int cfindloop(struct vars *v, struct cnfa *cnfa, struct colormap *cm, struct dfa *d, struct dfa *s, chr **coldp)
Definition regexec.c:551
static int cbrdissect(struct vars *v, struct subre *t, chr *begin, chr *end)
Definition regexec.c:996
static struct dfa * getladfa(struct vars *v, int n)
Definition regexec.c:402
static chr * lastcold(struct vars *v, struct dfa *d)
static struct dfa * getsubdfa(struct vars *v, struct subre *t)
Definition regexec.c:374
static chr * longest(struct vars *v, struct dfa *d, chr *start, chr *stop, int *hitstopp)
static struct sset * getvacant(struct vars *v, struct dfa *d, chr *cp, chr *start)
static int crevcondissect(struct vars *v, struct subre *t, chr *begin, chr *end)
Definition regexec.c:912
#define WORK
Definition regexec.c:87
#define LOCALMAT
static struct sset * miss(struct vars *v, struct dfa *d, struct sset *css, color co, chr *cp, chr *start)
static struct dfa * newdfa(struct vars *v, struct cnfa *cnfa, struct colormap *cm, struct smalldfa *sml)
#define OFF(p)
Definition regexec.c:131
static int cdissect(struct vars *v, struct subre *t, chr *begin, chr *end)
Definition regexec.c:758
static void zaptreesubs(struct vars *v, struct subre *t)
Definition regexec.c:681
static struct sset * pickss(struct vars *v, struct dfa *d, chr *cp, chr *start)
static int caltdissect(struct vars *v, struct subre *t, chr *begin, chr *end)
Definition regexec.c:1078
#define FEWCOLORS
Definition regexec.c:91
static void subset(struct vars *v, struct subre *sub, chr *begin, chr *end)
Definition regexec.c:704
int pg_regexec(regex_t *re, const chr *string, size_t len, size_t search_start, rm_detail_t *details, size_t nmatch, regmatch_t pmatch[], int flags)
Definition regexec.c:185
#define FEWSTATES
Definition regexec.c:90
static int matchuntil(struct vars *v, struct dfa *d, chr *probe, struct sset **lastcss, chr **lastcp)
#define LOCALDFAS
#define BACKR
Definition regguts.h:499
#define STACK_TOO_DEEP(re)
Definition regguts.h:536
#define REMAGIC
Definition regguts.h:109
short color
Definition regguts.h:163
#define DUPINF
Definition regguts.h:107
#define MDEBUG(arglist)
Definition regguts.h:130
#define SHORTER
Definition regguts.h:496
Definition regexec.c:40
int nstates
Definition regguts.h:416
Definition regexec.c:64
short backmin
Definition regexec.c:81
int backno
Definition regexec.c:80
struct colormap * cm
Definition regexec.c:76
short backmax
Definition regexec.c:82
struct subre * tree
Definition regguts.h:550
struct subre * lacons
Definition regguts.h:555
long info
Definition regguts.h:548
struct cnfa search
Definition regguts.h:551
int ntree
Definition regguts.h:552
int cflags
Definition regguts.h:547
size_t nsub
Definition regguts.h:549
int nlacons
Definition regguts.h:556
struct colormap cmap
Definition regguts.h:553
pg_regoff_t rm_eo
Definition regex.h:164
pg_regoff_t rm_so
Definition regex.h:163
pg_regmatch_t rm_extend
Definition regex.h:170
unsigned statesarea[FEWSTATES *2+WORK]
Definition regexec.c:96
struct arcp incarea[FEWSTATES *2 *FEWCOLORS]
Definition regexec.c:98
struct sset * outsarea[FEWSTATES *2 *FEWCOLORS]
Definition regexec.c:97
struct sset ssets[FEWSTATES *2]
Definition regexec.c:95
Definition regexec.c:46
int flags
Definition regexec.c:52
int backno
Definition regguts.h:513
char op
Definition regguts.h:493
struct state * end
Definition regguts.h:519
short min
Definition regguts.h:514
short max
Definition regguts.h:515
struct subre * sibling
Definition regguts.h:517
char flags
Definition regguts.h:494
int id
Definition regguts.h:511
struct cnfa cnfa
Definition regguts.h:520
struct subre * child
Definition regguts.h:516
int capno
Definition regguts.h:512
struct state * begin
Definition regguts.h:518
size_t nmatch
Definition regexec.c:111
struct dfa ** ladfas
Definition regexec.c:119
const chr * stop
Definition regcomp.c:285
struct dfa ** subdfas
Definition regexec.c:118
chr * search_start
Definition regexec.c:115
struct sset ** lblastcss
Definition regexec.c:120
int err
Definition regcomp.c:286
struct guts * g
Definition regexec.c:109
struct smalldfa dfa1
Definition regexec.c:122
chr ** lblastcp
Definition regexec.c:121
regex_t * re
Definition regcomp.c:283
int eflags
Definition regexec.c:110
chr * start
Definition regexec.c:114
rm_detail_t * details
Definition regexec.c:113
regmatch_t * pmatch
Definition regexec.c:112
struct smalldfa dfa2
Definition regexec.c:123

◆ ISERR

#define ISERR ( )    VISERR(v)

Definition at line 127 of file regexec.c.

◆ LOCALDFAS

#define LOCALDFAS   40

◆ LOCALMAT

#define LOCALMAT   20

◆ LOCKED

#define LOCKED   04 /* locked in cache */

Definition at line 55 of file regexec.c.

◆ LOFF

#define LOFF (   p)    ((long)OFF(p))

Definition at line 132 of file regexec.c.

◆ NOERR

#define NOERR ( )    {if (ISERR()) return v->err;} /* if error seen, return it */

Definition at line 130 of file regexec.c.

◆ NOPROGRESS

#define NOPROGRESS   010 /* zero-progress state set */

Definition at line 56 of file regexec.c.

◆ OFF

#define OFF (   p)    ((p) - v->start)

Definition at line 131 of file regexec.c.

◆ POSTSTATE

#define POSTSTATE   02 /* includes the goal state */

Definition at line 54 of file regexec.c.

◆ STARTER

#define STARTER   01 /* the initial state set */

Definition at line 53 of file regexec.c.

◆ VERR

#define VERR (   vv,
  e 
)    ((vv)->err = ((vv)->err ? (vv)->err : (e)))

Definition at line 128 of file regexec.c.

◆ VISERR

#define VISERR (   vv)    ((vv)->err != 0) /* have we seen an error yet? */

Definition at line 126 of file regexec.c.

◆ WORK

#define WORK   1 /* number of work bitvectors needed */

Definition at line 87 of file regexec.c.

Function Documentation

◆ caltdissect()

static int caltdissect ( struct vars v,
struct subre t,
chr begin,
chr end 
)
static

Definition at line 1078 of file regexec.c.

1082{
1083 struct dfa *d;
1084 int er;
1085
1086 assert(t->op == '|');
1087
1088 t = t->child;
1089 /* there should be at least 2 alternatives */
1090 assert(t != NULL && t->sibling != NULL);
1091
1092 while (t != NULL)
1093 {
1094 assert(t->cnfa.nstates > 0);
1095
1096 MDEBUG(("%d: caltdissect %ld-%ld\n", t->id, LOFF(begin), LOFF(end)));
1097
1098 d = getsubdfa(v, t);
1099 NOERR();
1100 if (longest(v, d, begin, end, (int *) NULL) == end)
1101 {
1102 MDEBUG(("%d: caltdissect matched\n", t->id));
1103 er = cdissect(v, t, begin, end);
1104 if (er != REG_NOMATCH)
1105 return er;
1106 }
1107 NOERR();
1108
1109 t = t->sibling;
1110 }
1111
1112 return REG_NOMATCH;
1113}

References assert, cdissect(), subre::child, subre::cnfa, fb(), getsubdfa(), subre::id, LOFF, longest(), MDEBUG, NOERR, cnfa::nstates, subre::op, REG_NOMATCH, and subre::sibling.

Referenced by cdissect().

◆ cbrdissect()

static int cbrdissect ( struct vars v,
struct subre t,
chr begin,
chr end 
)
static

Definition at line 996 of file regexec.c.

1000{
1001 int n = t->backno;
1002 size_t numreps;
1003 size_t tlen;
1004 size_t brlen;
1005 chr *brstring;
1006 chr *p;
1007 int min = t->min;
1008 int max = t->max;
1009
1010 assert(t != NULL);
1011 assert(t->op == 'b');
1012 assert(n >= 0);
1013 assert((size_t) n < v->nmatch);
1014
1015 MDEBUG(("%d: cbrdissect %d{%d-%d} %ld-%ld\n", t->id, n, min, max,
1016 LOFF(begin), LOFF(end)));
1017
1018 /* get the backreferenced string */
1019 if (v->pmatch[n].rm_so == -1)
1020 return REG_NOMATCH;
1021 brstring = v->start + v->pmatch[n].rm_so;
1022 brlen = v->pmatch[n].rm_eo - v->pmatch[n].rm_so;
1023
1024 /* special cases for zero-length strings */
1025 if (brlen == 0)
1026 {
1027 /*
1028 * matches only if target is zero length, but any number of
1029 * repetitions can be considered to be present
1030 */
1031 if (begin == end && min <= max)
1032 {
1033 MDEBUG(("%d: backref matched trivially\n", t->id));
1034 return REG_OKAY;
1035 }
1036 return REG_NOMATCH;
1037 }
1038 if (begin == end)
1039 {
1040 /* matches only if zero repetitions are okay */
1041 if (min == 0)
1042 {
1043 MDEBUG(("%d: backref matched trivially\n", t->id));
1044 return REG_OKAY;
1045 }
1046 return REG_NOMATCH;
1047 }
1048
1049 /*
1050 * check target length to see if it could possibly be an allowed number of
1051 * repetitions of brstring
1052 */
1053 assert(end > begin);
1054 tlen = end - begin;
1055 if (tlen % brlen != 0)
1056 return REG_NOMATCH;
1057 numreps = tlen / brlen;
1058 if (numreps < min || (numreps > max && max != DUPINF))
1059 return REG_NOMATCH;
1060
1061 /* okay, compare the actual string contents */
1062 p = begin;
1063 while (numreps-- > 0)
1064 {
1065 if ((*v->g->compare) (brstring, p, brlen) != 0)
1066 return REG_NOMATCH;
1067 p += brlen;
1068 }
1069
1070 MDEBUG(("%d: backref matched\n", t->id));
1071 return REG_OKAY;
1072}

References assert, subre::backno, DUPINF, fb(), vars::g, subre::id, LOFF, subre::max, MDEBUG, subre::min, subre::op, vars::pmatch, REG_NOMATCH, REG_OKAY, and vars::start.

Referenced by cdissect().

◆ ccondissect()

static int ccondissect ( struct vars v,
struct subre t,
chr begin,
chr end 
)
static

Definition at line 831 of file regexec.c.

835{
836 struct subre *left = t->child;
837 struct subre *right = left->sibling;
838 struct dfa *d;
839 struct dfa *d2;
840 chr *mid;
841 int er;
842
843 assert(t->op == '.');
844 assert(left != NULL && left->cnfa.nstates > 0);
845 assert(right != NULL && right->cnfa.nstates > 0);
846 assert(right->sibling == NULL);
847 assert(!(left->flags & SHORTER));
848
849 d = getsubdfa(v, left);
850 NOERR();
851 d2 = getsubdfa(v, right);
852 NOERR();
853 MDEBUG(("%d: ccondissect %ld-%ld\n", t->id, LOFF(begin), LOFF(end)));
854
855 /* pick a tentative midpoint */
856 mid = longest(v, d, begin, end, (int *) NULL);
857 NOERR();
858 if (mid == NULL)
859 return REG_NOMATCH;
860 MDEBUG(("%d: tentative midpoint %ld\n", t->id, LOFF(mid)));
861
862 /* iterate until satisfaction or failure */
863 for (;;)
864 {
865 /* try this midpoint on for size */
866 if (longest(v, d2, mid, end, (int *) NULL) == end)
867 {
868 er = cdissect(v, left, begin, mid);
869 if (er == REG_OKAY)
870 {
871 er = cdissect(v, right, mid, end);
872 if (er == REG_OKAY)
873 {
874 /* satisfaction */
875 MDEBUG(("%d: successful\n", t->id));
876 return REG_OKAY;
877 }
878 /* Reset left's matches (right should have done so itself) */
879 zaptreesubs(v, left);
880 }
881 if (er != REG_NOMATCH)
882 return er;
883 }
884 NOERR();
885
886 /* that midpoint didn't work, find a new one */
887 if (mid == begin)
888 {
889 /* all possibilities exhausted */
890 MDEBUG(("%d: no midpoint\n", t->id));
891 return REG_NOMATCH;
892 }
893 mid = longest(v, d, begin, mid - 1, (int *) NULL);
894 NOERR();
895 if (mid == NULL)
896 {
897 /* failed to find a new one */
898 MDEBUG(("%d: failed midpoint\n", t->id));
899 return REG_NOMATCH;
900 }
901 MDEBUG(("%d: new midpoint %ld\n", t->id, LOFF(mid)));
902 }
903
904 /* can't get here */
905 return REG_ASSERT;
906}

References assert, cdissect(), subre::child, subre::cnfa, fb(), subre::flags, getsubdfa(), subre::id, LOFF, longest(), MDEBUG, NOERR, cnfa::nstates, subre::op, REG_ASSERT, REG_NOMATCH, REG_OKAY, SHORTER, subre::sibling, and zaptreesubs().

Referenced by cdissect().

◆ cdissect()

static int cdissect ( struct vars v,
struct subre t,
chr begin,
chr end 
)
static

Definition at line 758 of file regexec.c.

762{
763 int er;
764
765 assert(t != NULL);
766 MDEBUG(("%d: cdissect %c %ld-%ld\n", t->id, t->op, LOFF(begin), LOFF(end)));
767
768 /* handy place to check for operation cancel */
769 INTERRUPT(v->re);
770 /* ... and stack overrun */
771 if (STACK_TOO_DEEP(v->re))
772 return REG_ETOOBIG;
773
774 switch (t->op)
775 {
776 case '=': /* terminal node */
777 assert(t->child == NULL);
778 er = REG_OKAY; /* no action, parent did the work */
779 break;
780 case 'b': /* back reference */
781 assert(t->child == NULL);
782 er = cbrdissect(v, t, begin, end);
783 break;
784 case '.': /* concatenation */
785 assert(t->child != NULL);
786 if (t->child->flags & SHORTER) /* reverse scan */
787 er = crevcondissect(v, t, begin, end);
788 else
789 er = ccondissect(v, t, begin, end);
790 break;
791 case '|': /* alternation */
792 assert(t->child != NULL);
793 er = caltdissect(v, t, begin, end);
794 break;
795 case '*': /* iteration */
796 assert(t->child != NULL);
797 if (t->child->flags & SHORTER) /* reverse scan */
798 er = creviterdissect(v, t, begin, end);
799 else
800 er = citerdissect(v, t, begin, end);
801 break;
802 case '(': /* no-op capture node */
803 assert(t->child != NULL);
804 er = cdissect(v, t->child, begin, end);
805 break;
806 default:
807 er = REG_ASSERT;
808 break;
809 }
810
811 /*
812 * We should never have a match failure unless backrefs lurk below;
813 * otherwise, either caller failed to check the DFA, or there's some
814 * inconsistency between the DFA and the node's innards.
815 */
816 assert(er != REG_NOMATCH || (t->flags & BACKR));
817
818 /*
819 * If this node is marked as capturing, save successful match's location.
820 */
821 if (t->capno > 0 && er == REG_OKAY)
822 subset(v, t, begin, end);
823
824 return er;
825}

References assert, BACKR, subre::begin, caltdissect(), subre::capno, cbrdissect(), ccondissect(), cdissect(), subre::child, citerdissect(), crevcondissect(), creviterdissect(), subre::end, fb(), subre::flags, subre::id, INTERRUPT, LOFF, MDEBUG, subre::op, vars::re, REG_ASSERT, REG_ETOOBIG, REG_NOMATCH, REG_OKAY, SHORTER, STACK_TOO_DEEP, and subset().

Referenced by caltdissect(), ccondissect(), cdissect(), cfindloop(), citerdissect(), crevcondissect(), creviterdissect(), and find().

◆ cfind()

static int cfind ( struct vars v,
struct cnfa cnfa,
struct colormap cm 
)
static

Definition at line 511 of file regexec.c.

514{
515 struct dfa *s;
516 struct dfa *d;
517 chr *cold;
518 int ret;
519
520 s = newdfa(v, &v->g->search, cm, &v->dfa1);
521 if (s == NULL)
522 return v->err;
523 d = newdfa(v, cnfa, cm, &v->dfa2);
524 if (d == NULL)
525 {
526 freedfa(s);
527 return v->err;
528 }
529
530 ret = cfindloop(v, cnfa, cm, d, s, &cold);
531
532 freedfa(d);
533 freedfa(s);
534 NOERR();
535 if (v->g->cflags & REG_EXPECT)
536 {
537 assert(v->details != NULL);
538 if (cold != NULL)
540 else
541 v->details->rm_extend.rm_so = OFF(v->stop);
542 v->details->rm_extend.rm_eo = OFF(v->stop); /* unknown */
543 }
544 return ret;
545}

References assert, cfindloop(), guts::cflags, dfa::cm, vars::details, vars::dfa1, vars::dfa2, vars::err, fb(), freedfa(), vars::g, newdfa(), NOERR, OFF, REG_EXPECT, pg_regmatch_t::rm_eo, rm_detail_t::rm_extend, pg_regmatch_t::rm_so, guts::search, and vars::stop.

Referenced by pg_regexec().

◆ cfindloop()

static int cfindloop ( struct vars v,
struct cnfa cnfa,
struct colormap cm,
struct dfa d,
struct dfa s,
chr **  coldp 
)
static

Definition at line 551 of file regexec.c.

557{
558 chr *begin;
559 chr *end;
560 chr *cold;
561 chr *open; /* open and close of range of possible starts */
562 chr *close;
563 chr *estart;
564 chr *estop;
565 int er;
566 int shorter = v->g->tree->flags & SHORTER;
567 int hitend;
568
569 assert(d != NULL && s != NULL);
570 cold = NULL;
571 close = v->search_start;
572 do
573 {
574 /* Search with the search RE for match range at/beyond "close" */
575 MDEBUG(("\ncsearch at %ld\n", LOFF(close)));
576 close = shortest(v, s, close, close, v->stop, &cold, (int *) NULL);
577 if (ISERR())
578 {
579 *coldp = cold;
580 return v->err;
581 }
582 if (close == NULL)
583 break; /* no more possible match anywhere */
584 assert(cold != NULL);
585 open = cold;
586 cold = NULL;
587 /* Search for matches starting between "open" and "close" inclusive */
588 MDEBUG(("cbetween %ld and %ld\n", LOFF(open), LOFF(close)));
589 for (begin = open; begin <= close; begin++)
590 {
591 MDEBUG(("\ncfind trying at %ld\n", LOFF(begin)));
592 estart = begin;
593 estop = v->stop;
594 for (;;)
595 {
596 /* Here we use the top node's detailed RE */
597 if (shorter)
598 end = shortest(v, d, begin, estart,
599 estop, (chr **) NULL, &hitend);
600 else
601 end = longest(v, d, begin, estop,
602 &hitend);
603 if (ISERR())
604 {
605 *coldp = cold;
606 return v->err;
607 }
608 if (hitend && cold == NULL)
609 cold = begin;
610 if (end == NULL)
611 break; /* no match with this begin point, try next */
612 MDEBUG(("tentative end %ld\n", LOFF(end)));
613 /* Dissect the potential match to see if it really matches */
614 er = cdissect(v, v->g->tree, begin, end);
615 if (er == REG_OKAY)
616 {
617 if (v->nmatch > 0)
618 {
619 v->pmatch[0].rm_so = OFF(begin);
620 v->pmatch[0].rm_eo = OFF(end);
621 }
622 *coldp = cold;
623 return REG_OKAY;
624 }
625 if (er != REG_NOMATCH)
626 {
627 ERR(er);
628 *coldp = cold;
629 return er;
630 }
631 /* Try next longer/shorter match with same begin point */
632 if (shorter)
633 {
634 if (end == estop)
635 break; /* no more, so try next begin point */
636 estart = end + 1;
637 }
638 else
639 {
640 if (end == begin)
641 break; /* no more, so try next begin point */
642 estop = end - 1;
643 }
644 } /* end loop over endpoint positions */
645 } /* end loop over beginning positions */
646
647 /*
648 * If we get here, there is no possible match starting at or before
649 * "close", so consider matches beyond that. We'll do a fresh search
650 * with the search RE to find a new promising match range.
651 */
652 close++;
653 } while (close < v->stop);
654
655 *coldp = cold;
656 return REG_NOMATCH;
657}

References assert, cdissect(), close, vars::err, ERR, fb(), subre::flags, vars::g, ISERR, LOFF, longest(), MDEBUG, vars::nmatch, OFF, vars::pmatch, REG_NOMATCH, REG_OKAY, vars::search_start, SHORTER, shortest(), vars::stop, and guts::tree.

Referenced by cfind().

◆ citerdissect()

static int citerdissect ( struct vars v,
struct subre t,
chr begin,
chr end 
)
static

Definition at line 1119 of file regexec.c.

1123{
1124 struct dfa *d;
1125 chr **endpts;
1126 chr *limit;
1127 int min_matches;
1128 size_t max_matches;
1129 int nverified;
1130 int k;
1131 int i;
1132 int er;
1133
1134 assert(t->op == '*');
1135 assert(t->child != NULL && t->child->cnfa.nstates > 0);
1136 assert(!(t->child->flags & SHORTER));
1137 assert(begin <= end);
1138
1139 MDEBUG(("%d: citerdissect %ld-%ld\n", t->id, LOFF(begin), LOFF(end)));
1140
1141 /*
1142 * For the moment, assume the minimum number of matches is 1. If zero
1143 * matches are allowed, and the target string is empty, we are allowed to
1144 * match regardless of the contents of the iter node --- but we would
1145 * prefer to match once, so that capturing parens get set. (An example of
1146 * the concern here is a pattern like "()*\1", which historically this
1147 * code has allowed to succeed.) Therefore, we deal with the zero-matches
1148 * case at the bottom, after failing to find any other way to match.
1149 */
1150 min_matches = t->min;
1151 if (min_matches <= 0)
1152 min_matches = 1;
1153
1154 /*
1155 * We need workspace to track the endpoints of each sub-match. Normally
1156 * we consider only nonzero-length sub-matches, so there can be at most
1157 * end-begin of them. However, if min is larger than that, we will also
1158 * consider zero-length sub-matches in order to find enough matches.
1159 *
1160 * For convenience, endpts[0] contains the "begin" pointer and we store
1161 * sub-match endpoints in endpts[1..max_matches].
1162 */
1163 max_matches = end - begin;
1164 if (max_matches > t->max && t->max != DUPINF)
1165 max_matches = t->max;
1169 if (endpts == NULL)
1170 return REG_ESPACE;
1171 endpts[0] = begin;
1172
1173 d = getsubdfa(v, t->child);
1174 if (ISERR())
1175 {
1176 FREE(endpts);
1177 return v->err;
1178 }
1179
1180 /*
1181 * Our strategy is to first find a set of sub-match endpoints that are
1182 * valid according to the child node's DFA, and then recursively dissect
1183 * each sub-match to confirm validity. If any validity check fails,
1184 * backtrack that sub-match and try again. And, when we next try for a
1185 * validity check, we need not recheck any successfully verified
1186 * sub-matches that we didn't move the endpoints of. nverified remembers
1187 * how many sub-matches are currently known okay.
1188 */
1189
1190 /* initialize to consider first sub-match */
1191 nverified = 0;
1192 k = 1;
1193 limit = end;
1194
1195 /* iterate until satisfaction or failure */
1196 while (k > 0)
1197 {
1198 /* try to find an endpoint for the k'th sub-match */
1199 endpts[k] = longest(v, d, endpts[k - 1], limit, (int *) NULL);
1200 if (ISERR())
1201 {
1202 FREE(endpts);
1203 return v->err;
1204 }
1205 if (endpts[k] == NULL)
1206 {
1207 /* no match possible, so see if we can shorten previous one */
1208 k--;
1209 goto backtrack;
1210 }
1211 MDEBUG(("%d: working endpoint %d: %ld\n",
1212 t->id, k, LOFF(endpts[k])));
1213
1214 /* k'th sub-match can no longer be considered verified */
1215 if (nverified >= k)
1216 nverified = k - 1;
1217
1218 if (endpts[k] != end)
1219 {
1220 /* haven't reached end yet, try another iteration if allowed */
1221 if (k >= max_matches)
1222 {
1223 /* must try to shorten some previous match */
1224 k--;
1225 goto backtrack;
1226 }
1227
1228 /* reject zero-length match unless necessary to achieve min */
1229 if (endpts[k] == endpts[k - 1] &&
1230 (k >= min_matches || min_matches - k < end - endpts[k]))
1231 goto backtrack;
1232
1233 k++;
1234 limit = end;
1235 continue;
1236 }
1237
1238 /*
1239 * We've identified a way to divide the string into k sub-matches that
1240 * works so far as the child DFA can tell. If k is an allowed number
1241 * of matches, start the slow part: recurse to verify each sub-match.
1242 * We always have k <= max_matches, needn't check that.
1243 */
1244 if (k < min_matches)
1245 goto backtrack;
1246
1247 MDEBUG(("%d: verifying %d..%d\n", t->id, nverified + 1, k));
1248
1249 for (i = nverified + 1; i <= k; i++)
1250 {
1251 /* zap any match data from a non-last iteration */
1252 zaptreesubs(v, t->child);
1253 er = cdissect(v, t->child, endpts[i - 1], endpts[i]);
1254 if (er == REG_OKAY)
1255 {
1256 nverified = i;
1257 continue;
1258 }
1259 if (er == REG_NOMATCH)
1260 break;
1261 /* oops, something failed */
1262 FREE(endpts);
1263 return er;
1264 }
1265
1266 if (i > k)
1267 {
1268 /* satisfaction */
1269 MDEBUG(("%d: successful\n", t->id));
1270 FREE(endpts);
1271 return REG_OKAY;
1272 }
1273
1274 /* i'th match failed to verify, so backtrack it */
1275 k = i;
1276
1277backtrack:
1278
1279 /*
1280 * Must consider shorter versions of the k'th sub-match. However,
1281 * we'll only ask for a zero-length match if necessary.
1282 */
1283 while (k > 0)
1284 {
1285 chr *prev_end = endpts[k - 1];
1286
1287 if (endpts[k] > prev_end)
1288 {
1289 limit = endpts[k] - 1;
1290 if (limit > prev_end ||
1292 {
1293 /* break out of backtrack loop, continue the outer one */
1294 break;
1295 }
1296 }
1297 /* can't shorten k'th sub-match any more, consider previous one */
1298 k--;
1299 }
1300 }
1301
1302 /* all possibilities exhausted */
1303 FREE(endpts);
1304
1305 /*
1306 * Now consider the possibility that we can match to a zero-length string
1307 * by using zero repetitions.
1308 */
1309 if (t->min == 0 && begin == end)
1310 {
1311 MDEBUG(("%d: allowing zero matches\n", t->id));
1312 return REG_OKAY;
1313 }
1314
1315 MDEBUG(("%d: failed\n", t->id));
1316 return REG_NOMATCH;
1317}

References assert, cdissect(), subre::child, subre::cnfa, DUPINF, vars::err, fb(), subre::flags, FREE, getsubdfa(), i, subre::id, ISERR, LOFF, longest(), MALLOC_ARRAY, subre::max, MDEBUG, subre::min, cnfa::nstates, subre::op, REG_ESPACE, REG_NOMATCH, REG_OKAY, SHORTER, and zaptreesubs().

Referenced by cdissect().

◆ crevcondissect()

static int crevcondissect ( struct vars v,
struct subre t,
chr begin,
chr end 
)
static

Definition at line 912 of file regexec.c.

916{
917 struct subre *left = t->child;
918 struct subre *right = left->sibling;
919 struct dfa *d;
920 struct dfa *d2;
921 chr *mid;
922 int er;
923
924 assert(t->op == '.');
925 assert(left != NULL && left->cnfa.nstates > 0);
926 assert(right != NULL && right->cnfa.nstates > 0);
927 assert(right->sibling == NULL);
928 assert(left->flags & SHORTER);
929
930 d = getsubdfa(v, left);
931 NOERR();
932 d2 = getsubdfa(v, right);
933 NOERR();
934 MDEBUG(("%d: crevcondissect %ld-%ld\n", t->id, LOFF(begin), LOFF(end)));
935
936 /* pick a tentative midpoint */
937 mid = shortest(v, d, begin, begin, end, (chr **) NULL, (int *) NULL);
938 NOERR();
939 if (mid == NULL)
940 return REG_NOMATCH;
941 MDEBUG(("%d: tentative midpoint %ld\n", t->id, LOFF(mid)));
942
943 /* iterate until satisfaction or failure */
944 for (;;)
945 {
946 /* try this midpoint on for size */
947 if (longest(v, d2, mid, end, (int *) NULL) == end)
948 {
949 er = cdissect(v, left, begin, mid);
950 if (er == REG_OKAY)
951 {
952 er = cdissect(v, right, mid, end);
953 if (er == REG_OKAY)
954 {
955 /* satisfaction */
956 MDEBUG(("%d: successful\n", t->id));
957 return REG_OKAY;
958 }
959 /* Reset left's matches (right should have done so itself) */
960 zaptreesubs(v, left);
961 }
962 if (er != REG_NOMATCH)
963 return er;
964 }
965 NOERR();
966
967 /* that midpoint didn't work, find a new one */
968 if (mid == end)
969 {
970 /* all possibilities exhausted */
971 MDEBUG(("%d: no midpoint\n", t->id));
972 return REG_NOMATCH;
973 }
974 mid = shortest(v, d, begin, mid + 1, end, (chr **) NULL, (int *) NULL);
975 NOERR();
976 if (mid == NULL)
977 {
978 /* failed to find a new one */
979 MDEBUG(("%d: failed midpoint\n", t->id));
980 return REG_NOMATCH;
981 }
982 MDEBUG(("%d: new midpoint %ld\n", t->id, LOFF(mid)));
983 }
984
985 /* can't get here */
986 return REG_ASSERT;
987}

References assert, cdissect(), subre::child, subre::cnfa, fb(), subre::flags, getsubdfa(), subre::id, LOFF, longest(), MDEBUG, NOERR, cnfa::nstates, subre::op, REG_ASSERT, REG_NOMATCH, REG_OKAY, SHORTER, shortest(), subre::sibling, and zaptreesubs().

Referenced by cdissect().

◆ creviterdissect()

static int creviterdissect ( struct vars v,
struct subre t,
chr begin,
chr end 
)
static

Definition at line 1323 of file regexec.c.

1327{
1328 struct dfa *d;
1329 chr **endpts;
1330 chr *limit;
1331 int min_matches;
1332 size_t max_matches;
1333 int nverified;
1334 int k;
1335 int i;
1336 int er;
1337
1338 assert(t->op == '*');
1339 assert(t->child != NULL && t->child->cnfa.nstates > 0);
1340 assert(t->child->flags & SHORTER);
1341 assert(begin <= end);
1342
1343 MDEBUG(("%d: creviterdissect %ld-%ld\n", t->id, LOFF(begin), LOFF(end)));
1344
1345 /*
1346 * If zero matches are allowed, and target string is empty, just declare
1347 * victory. OTOH, if target string isn't empty, zero matches can't work
1348 * so we pretend the min is 1.
1349 */
1350 min_matches = t->min;
1351 if (min_matches <= 0)
1352 {
1353 if (begin == end)
1354 {
1355 MDEBUG(("%d: allowing zero matches\n", t->id));
1356 return REG_OKAY;
1357 }
1358 min_matches = 1;
1359 }
1360
1361 /*
1362 * We need workspace to track the endpoints of each sub-match. Normally
1363 * we consider only nonzero-length sub-matches, so there can be at most
1364 * end-begin of them. However, if min is larger than that, we will also
1365 * consider zero-length sub-matches in order to find enough matches.
1366 *
1367 * For convenience, endpts[0] contains the "begin" pointer and we store
1368 * sub-match endpoints in endpts[1..max_matches].
1369 */
1370 max_matches = end - begin;
1371 if (max_matches > t->max && t->max != DUPINF)
1372 max_matches = t->max;
1376 if (endpts == NULL)
1377 return REG_ESPACE;
1378 endpts[0] = begin;
1379
1380 d = getsubdfa(v, t->child);
1381 if (ISERR())
1382 {
1383 FREE(endpts);
1384 return v->err;
1385 }
1386
1387 /*
1388 * Our strategy is to first find a set of sub-match endpoints that are
1389 * valid according to the child node's DFA, and then recursively dissect
1390 * each sub-match to confirm validity. If any validity check fails,
1391 * backtrack that sub-match and try again. And, when we next try for a
1392 * validity check, we need not recheck any successfully verified
1393 * sub-matches that we didn't move the endpoints of. nverified remembers
1394 * how many sub-matches are currently known okay.
1395 */
1396
1397 /* initialize to consider first sub-match */
1398 nverified = 0;
1399 k = 1;
1400 limit = begin;
1401
1402 /* iterate until satisfaction or failure */
1403 while (k > 0)
1404 {
1405 /* disallow zero-length match unless necessary to achieve min */
1406 if (limit == endpts[k - 1] &&
1407 limit != end &&
1408 (k >= min_matches || min_matches - k < end - limit))
1409 limit++;
1410
1411 /* if this is the last allowed sub-match, it must reach to the end */
1412 if (k >= max_matches)
1413 limit = end;
1414
1415 /* try to find an endpoint for the k'th sub-match */
1416 endpts[k] = shortest(v, d, endpts[k - 1], limit, end,
1417 (chr **) NULL, (int *) NULL);
1418 if (ISERR())
1419 {
1420 FREE(endpts);
1421 return v->err;
1422 }
1423 if (endpts[k] == NULL)
1424 {
1425 /* no match possible, so see if we can lengthen previous one */
1426 k--;
1427 goto backtrack;
1428 }
1429 MDEBUG(("%d: working endpoint %d: %ld\n",
1430 t->id, k, LOFF(endpts[k])));
1431
1432 /* k'th sub-match can no longer be considered verified */
1433 if (nverified >= k)
1434 nverified = k - 1;
1435
1436 if (endpts[k] != end)
1437 {
1438 /* haven't reached end yet, try another iteration if allowed */
1439 if (k >= max_matches)
1440 {
1441 /* must try to lengthen some previous match */
1442 k--;
1443 goto backtrack;
1444 }
1445
1446 k++;
1447 limit = endpts[k - 1];
1448 continue;
1449 }
1450
1451 /*
1452 * We've identified a way to divide the string into k sub-matches that
1453 * works so far as the child DFA can tell. If k is an allowed number
1454 * of matches, start the slow part: recurse to verify each sub-match.
1455 * We always have k <= max_matches, needn't check that.
1456 */
1457 if (k < min_matches)
1458 goto backtrack;
1459
1460 MDEBUG(("%d: verifying %d..%d\n", t->id, nverified + 1, k));
1461
1462 for (i = nverified + 1; i <= k; i++)
1463 {
1464 /* zap any match data from a non-last iteration */
1465 zaptreesubs(v, t->child);
1466 er = cdissect(v, t->child, endpts[i - 1], endpts[i]);
1467 if (er == REG_OKAY)
1468 {
1469 nverified = i;
1470 continue;
1471 }
1472 if (er == REG_NOMATCH)
1473 break;
1474 /* oops, something failed */
1475 FREE(endpts);
1476 return er;
1477 }
1478
1479 if (i > k)
1480 {
1481 /* satisfaction */
1482 MDEBUG(("%d: successful\n", t->id));
1483 FREE(endpts);
1484 return REG_OKAY;
1485 }
1486
1487 /* i'th match failed to verify, so backtrack it */
1488 k = i;
1489
1490backtrack:
1491
1492 /*
1493 * Must consider longer versions of the k'th sub-match.
1494 */
1495 while (k > 0)
1496 {
1497 if (endpts[k] < end)
1498 {
1499 limit = endpts[k] + 1;
1500 /* break out of backtrack loop, continue the outer one */
1501 break;
1502 }
1503 /* can't lengthen k'th sub-match any more, consider previous one */
1504 k--;
1505 }
1506 }
1507
1508 /* all possibilities exhausted */
1509 MDEBUG(("%d: failed\n", t->id));
1510 FREE(endpts);
1511 return REG_NOMATCH;
1512}

References assert, cdissect(), subre::child, subre::cnfa, DUPINF, vars::err, fb(), subre::flags, FREE, getsubdfa(), i, subre::id, ISERR, LOFF, MALLOC_ARRAY, subre::max, MDEBUG, subre::min, cnfa::nstates, subre::op, REG_ESPACE, REG_NOMATCH, REG_OKAY, SHORTER, shortest(), and zaptreesubs().

Referenced by cdissect().

◆ dfa_backref()

static chr * dfa_backref ( struct vars v,
struct dfa d,
chr start,
chr min,
chr max,
bool  shortest 
)
static

◆ find()

static int find ( struct vars v,
struct cnfa cnfa,
struct colormap cm 
)
static

Definition at line 421 of file regexec.c.

424{
425 struct dfa *s;
426 struct dfa *d;
427 chr *begin;
428 chr *end = NULL;
429 chr *cold;
430 chr *open; /* open and close of range of possible starts */
431 chr *close;
432 int hitend;
433 int shorter = (v->g->tree->flags & SHORTER) ? 1 : 0;
434
435 /* first, a shot with the search RE */
436 s = newdfa(v, &v->g->search, cm, &v->dfa1);
437 if (s == NULL)
438 return v->err;
439 MDEBUG(("\nsearch at %ld\n", LOFF(v->start)));
440 cold = NULL;
441 close = shortest(v, s, v->search_start, v->search_start, v->stop,
442 &cold, (int *) NULL);
443 freedfa(s);
444 NOERR();
445 if (v->g->cflags & REG_EXPECT)
446 {
447 assert(v->details != NULL);
448 if (cold != NULL)
450 else
451 v->details->rm_extend.rm_so = OFF(v->stop);
452 v->details->rm_extend.rm_eo = OFF(v->stop); /* unknown */
453 }
454 if (close == NULL) /* not found */
455 return REG_NOMATCH;
456 if (v->nmatch == 0) /* found, don't need exact location */
457 return REG_OKAY;
458
459 /* find starting point and match */
460 assert(cold != NULL);
461 open = cold;
462 cold = NULL;
463 MDEBUG(("between %ld and %ld\n", LOFF(open), LOFF(close)));
464 d = newdfa(v, cnfa, cm, &v->dfa1);
465 if (d == NULL)
466 return v->err;
467 for (begin = open; begin <= close; begin++)
468 {
469 MDEBUG(("\nfind trying at %ld\n", LOFF(begin)));
470 if (shorter)
471 end = shortest(v, d, begin, begin, v->stop,
472 (chr **) NULL, &hitend);
473 else
474 end = longest(v, d, begin, v->stop, &hitend);
475 if (ISERR())
476 {
477 freedfa(d);
478 return v->err;
479 }
480 if (hitend && cold == NULL)
481 cold = begin;
482 if (end != NULL)
483 break; /* NOTE BREAK OUT */
484 }
485 assert(end != NULL); /* search RE succeeded so loop should */
486 freedfa(d);
487
488 /* and pin down details */
489 assert(v->nmatch > 0);
490 v->pmatch[0].rm_so = OFF(begin);
491 v->pmatch[0].rm_eo = OFF(end);
492 if (v->g->cflags & REG_EXPECT)
493 {
494 if (cold != NULL)
496 else
497 v->details->rm_extend.rm_so = OFF(v->stop);
498 v->details->rm_extend.rm_eo = OFF(v->stop); /* unknown */
499 }
500 if (v->nmatch == 1) /* no need for submatches */
501 return REG_OKAY;
502
503 /* find submatches */
504 return cdissect(v, v->g->tree, begin, end);
505}

References assert, cdissect(), guts::cflags, close, dfa::cm, vars::details, vars::dfa1, vars::err, fb(), subre::flags, freedfa(), vars::g, ISERR, LOFF, longest(), MDEBUG, newdfa(), vars::nmatch, NOERR, OFF, vars::pmatch, REG_EXPECT, REG_NOMATCH, REG_OKAY, pg_regmatch_t::rm_eo, rm_detail_t::rm_extend, pg_regmatch_t::rm_so, guts::search, vars::search_start, SHORTER, shortest(), vars::start, vars::stop, and guts::tree.

Referenced by NIAddAffix(), NIImportAffixes(), NIImportOOAffixes(), parse_affentry(), parse_ooaffentry(), pg_regexec(), and testdelete().

◆ freedfa()

static void freedfa ( struct dfa d)
static

Referenced by cfind(), find(), and pg_regexec().

◆ getladfa()

static struct dfa * getladfa ( struct vars v,
int  n 
)
static

Definition at line 402 of file regexec.c.

404{
405 assert(n > 0 && n < v->g->nlacons && v->g->lacons != NULL);
406
407 if (v->ladfas[n] == NULL)
408 {
409 struct subre *sub = &v->g->lacons[n];
410
411 v->ladfas[n] = newdfa(v, &sub->cnfa, &v->g->cmap, DOMALLOC);
412 /* a LACON can't contain a backref, so nothing else to do */
413 }
414 return v->ladfas[n];
415}

References assert, guts::cmap, subre::cnfa, DOMALLOC, fb(), vars::g, guts::lacons, vars::ladfas, and newdfa().

Referenced by lacon().

◆ getsubdfa()

static struct dfa * getsubdfa ( struct vars v,
struct subre t 
)
static

Definition at line 374 of file regexec.c.

376{
377 struct dfa *d = v->subdfas[t->id];
378
379 if (d == NULL)
380 {
381 d = newdfa(v, &t->cnfa, &v->g->cmap, DOMALLOC);
382 if (d == NULL)
383 return NULL;
384 /* set up additional info if this is a backref node */
385 if (t->op == 'b')
386 {
387 d->backno = t->backno;
388 d->backmin = t->min;
389 d->backmax = t->max;
390 }
391 v->subdfas[t->id] = d;
392 }
393 return d;
394}

References dfa::backmax, dfa::backmin, dfa::backno, subre::backno, guts::cmap, subre::cnfa, DOMALLOC, fb(), vars::g, subre::id, subre::max, subre::min, newdfa(), subre::op, and vars::subdfas.

Referenced by caltdissect(), ccondissect(), citerdissect(), crevcondissect(), and creviterdissect().

◆ getvacant()

static struct sset * getvacant ( struct vars v,
struct dfa d,
chr cp,
chr start 
)
static

◆ hash()

static unsigned hash ( unsigned uv,
int  n 
)
static

◆ initialize()

static struct sset * initialize ( struct vars v,
struct dfa d,
chr start 
)
static

◆ lacon()

static int lacon ( struct vars v,
struct cnfa pcnfa,
chr cp,
color  co 
)
static

◆ lastcold()

static chr * lastcold ( struct vars v,
struct dfa d 
)
static

◆ longest()

static chr * longest ( struct vars v,
struct dfa d,
chr start,
chr stop,
int hitstopp 
)
static

◆ matchuntil()

static int matchuntil ( struct vars v,
struct dfa d,
chr probe,
struct sset **  lastcss,
chr **  lastcp 
)
static

◆ miss()

static struct sset * miss ( struct vars v,
struct dfa d,
struct sset css,
color  co,
chr cp,
chr start 
)
static

◆ newdfa()

static struct dfa * newdfa ( struct vars v,
struct cnfa cnfa,
struct colormap cm,
struct smalldfa sml 
)
static

Referenced by cfind(), find(), getladfa(), and getsubdfa().

◆ pg_regexec()

int pg_regexec ( regex_t re,
const chr string,
size_t  len,
size_t  search_start,
rm_detail_t details,
size_t  nmatch,
regmatch_t  pmatch[],
int  flags 
)

Definition at line 185 of file regexec.c.

193{
194 struct vars var;
195 struct vars *v = &var;
196 int st;
197 size_t n;
198 size_t i;
199 int backref;
200
201#define LOCALMAT 20
203
204#define LOCALDFAS 40
205 struct dfa *subdfas[LOCALDFAS];
206
207 /* sanity checks */
208 if (re == NULL || string == NULL || re->re_magic != REMAGIC)
209 return REG_INVARG;
210 if (re->re_csize != sizeof(chr))
211 return REG_MIXED;
212 if (search_start > len)
213 return REG_NOMATCH;
214
215 /* Initialize locale-dependent support */
216 pg_set_regex_collation(re->re_collation);
217
218 /* setup */
219 v->re = re;
220 v->g = (struct guts *) re->re_guts;
221 if ((v->g->cflags & REG_EXPECT) && details == NULL)
222 return REG_INVARG;
223 if (v->g->info & REG_UIMPOSSIBLE)
224 return REG_NOMATCH;
225 backref = (v->g->info & REG_UBACKREF) ? 1 : 0;
226 v->eflags = flags;
227 if (backref && nmatch <= v->g->nsub)
228 {
229 /* need larger work area */
230 v->nmatch = v->g->nsub + 1;
231 if (v->nmatch <= LOCALMAT)
232 v->pmatch = mat;
233 else
235 if (v->pmatch == NULL)
236 return REG_ESPACE;
237 zapallsubs(v->pmatch, v->nmatch);
238 }
239 else
240 {
241 /* we can store results directly in caller's array */
242 v->pmatch = pmatch;
243 /* ensure any extra entries in caller's array are filled with -1 */
244 if (nmatch > 0)
245 zapallsubs(pmatch, nmatch);
246 /* then forget about extra entries, to avoid useless work in find() */
247 if (nmatch > v->g->nsub + 1)
248 nmatch = v->g->nsub + 1;
249 v->nmatch = nmatch;
250 }
251 v->details = details;
252 v->start = (chr *) string;
253 v->search_start = (chr *) string + search_start;
254 v->stop = (chr *) string + len;
255 v->err = 0;
256 v->subdfas = NULL;
257 v->ladfas = NULL;
258 v->lblastcss = NULL;
259 v->lblastcp = NULL;
260 /* below this point, "goto cleanup" will behave sanely */
261
262 assert(v->g->ntree >= 0);
263 n = (size_t) v->g->ntree;
264 if (n <= LOCALDFAS)
265 v->subdfas = subdfas;
266 else
267 {
268 /* ntree is surely less than the number of states, so this is safe: */
269 v->subdfas = (struct dfa **) MALLOC(n * sizeof(struct dfa *));
270 if (v->subdfas == NULL)
271 {
272 st = REG_ESPACE;
273 goto cleanup;
274 }
275 }
276 for (i = 0; i < n; i++)
277 v->subdfas[i] = NULL;
278
279 assert(v->g->nlacons >= 0);
280 n = (size_t) v->g->nlacons;
281 if (n > 0)
282 {
283 /* nlacons is surely less than the number of arcs, so this is safe: */
284 v->ladfas = (struct dfa **) MALLOC(n * sizeof(struct dfa *));
285 if (v->ladfas == NULL)
286 {
287 st = REG_ESPACE;
288 goto cleanup;
289 }
290 for (i = 0; i < n; i++)
291 v->ladfas[i] = NULL;
292 v->lblastcss = (struct sset **) MALLOC(n * sizeof(struct sset *));
293 v->lblastcp = (chr **) MALLOC(n * sizeof(chr *));
294 if (v->lblastcss == NULL || v->lblastcp == NULL)
295 {
296 st = REG_ESPACE;
297 goto cleanup;
298 }
299 for (i = 0; i < n; i++)
300 {
301 v->lblastcss[i] = NULL;
302 v->lblastcp[i] = NULL;
303 }
304 }
305
306 /* do it */
307 assert(v->g->tree != NULL);
308 if (backref)
309 st = cfind(v, &v->g->tree->cnfa, &v->g->cmap);
310 else
311 st = find(v, &v->g->tree->cnfa, &v->g->cmap);
312
313 /* on success, ensure caller's match vector is filled correctly */
314 if (st == REG_OKAY && nmatch > 0)
315 {
316 if (v->pmatch != pmatch)
317 {
318 /* copy portion of match vector over from (larger) work area */
319 assert(nmatch <= v->nmatch);
320 memcpy(VS(pmatch), VS(v->pmatch), nmatch * sizeof(regmatch_t));
321 }
322 if (v->g->cflags & REG_NOSUB)
323 {
324 /* don't expose possibly-partial sub-match results to caller */
325 zapallsubs(pmatch, nmatch);
326 }
327 }
328
329 /* clean up */
330cleanup:
331 if (v->pmatch != pmatch && v->pmatch != mat)
332 FREE(v->pmatch);
333 if (v->subdfas != NULL)
334 {
335 n = (size_t) v->g->ntree;
336 for (i = 0; i < n; i++)
337 {
338 if (v->subdfas[i] != NULL)
339 freedfa(v->subdfas[i]);
340 }
341 if (v->subdfas != subdfas)
342 FREE(v->subdfas);
343 }
344 if (v->ladfas != NULL)
345 {
346 n = (size_t) v->g->nlacons;
347 for (i = 0; i < n; i++)
348 {
349 if (v->ladfas[i] != NULL)
350 freedfa(v->ladfas[i]);
351 }
352 FREE(v->ladfas);
353 }
354 if (v->lblastcss != NULL)
355 FREE(v->lblastcss);
356 if (v->lblastcp != NULL)
357 FREE(v->lblastcp);
358
359#ifdef REG_DEBUG
360 if (v->eflags & (REG_FTRACE | REG_MTRACE))
361 fflush(stdout);
362#endif
363
364 return st;
365}

References assert, cfind(), guts::cflags, cleanup(), guts::cmap, subre::cnfa, vars::details, vars::eflags, vars::err, fb(), find(), FREE, freedfa(), vars::g, i, guts::info, vars::ladfas, vars::lblastcp, vars::lblastcss, len, LOCALDFAS, LOCALMAT, MALLOC, MALLOC_ARRAY, memcpy(), guts::nlacons, vars::nmatch, guts::nsub, guts::ntree, pg_set_regex_collation(), vars::pmatch, vars::re, REG_ESPACE, REG_EXPECT, REG_FTRACE, REG_INVARG, REG_MIXED, REG_MTRACE, REG_NOMATCH, REG_NOSUB, REG_OKAY, REG_UBACKREF, REG_UIMPOSSIBLE, regmatch_t, REMAGIC, vars::search_start, vars::start, vars::stop, vars::subdfas, guts::tree, VS, and zapallsubs().

Referenced by CheckAffix(), RE_wchar_execute(), regexec_auth_token(), replace_text_regexp(), and test_re_execute().

◆ pickss()

static struct sset * pickss ( struct vars v,
struct dfa d,
chr cp,
chr start 
)
static

◆ shortest()

static chr * shortest ( struct vars v,
struct dfa d,
chr start,
chr min,
chr max,
chr **  coldp,
int hitstopp 
)
static

◆ subset()

static void subset ( struct vars v,
struct subre sub,
chr begin,
chr end 
)
static

Definition at line 704 of file regexec.c.

708{
709 int n = sub->capno;
710
711 assert(n > 0);
712 if ((size_t) n >= v->nmatch)
713 return;
714
715 MDEBUG(("%d: setting %d = %ld-%ld\n", sub->id, n, LOFF(begin), LOFF(end)));
716 v->pmatch[n].rm_so = OFF(begin);
717 v->pmatch[n].rm_eo = OFF(end);
718}

References assert, subre::begin, subre::capno, subre::end, subre::id, LOFF, MDEBUG, vars::nmatch, OFF, and vars::pmatch.

Referenced by cdissect().

◆ zapallsubs()

static void zapallsubs ( regmatch_t p,
size_t  n 
)
static

Definition at line 665 of file regexec.c.

667{
668 size_t i;
669
670 for (i = n - 1; i > 0; i--)
671 {
672 p[i].rm_so = -1;
673 p[i].rm_eo = -1;
674 }
675}

References i.

Referenced by pg_regexec().

◆ zaptreesubs()

static void zaptreesubs ( struct vars v,
struct subre t 
)
static

Definition at line 681 of file regexec.c.

683{
684 int n = t->capno;
685 struct subre *t2;
686
687 if (n > 0)
688 {
689 if ((size_t) n < v->nmatch)
690 {
691 v->pmatch[n].rm_so = -1;
692 v->pmatch[n].rm_eo = -1;
693 }
694 }
695
696 for (t2 = t->child; t2 != NULL; t2 = t2->sibling)
697 zaptreesubs(v, t2);
698}

References subre::capno, subre::child, fb(), vars::pmatch, and zaptreesubs().

Referenced by ccondissect(), citerdissect(), crevcondissect(), creviterdissect(), and zaptreesubs().