PostgreSQL Source Code  git master
regexec.c
Go to the documentation of this file.
1 /*
2  * re_*exec and friends - match REs
3  *
4  * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved.
5  *
6  * Development of this software was funded, in part, by Cray Research Inc.,
7  * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics
8  * Corporation, none of whom are responsible for the results. The author
9  * thanks all of them.
10  *
11  * Redistribution and use in source and binary forms -- with or without
12  * modification -- are permitted for any purpose, provided that
13  * redistributions in source form retain this entire copyright notice and
14  * indicate the origin and nature of any modifications.
15  *
16  * I'd appreciate being given credit for this package in the documentation
17  * of software which uses it, but that is not a requirement.
18  *
19  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
20  * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
21  * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
22  * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29  *
30  * src/backend/regex/regexec.c
31  *
32  */
33 
34 #include "regex/regguts.h"
35 
36 
37 
38 /* lazy-DFA representation */
39 struct arcp
40 { /* "pointer" to an outarc */
41  struct sset *ss;
43 };
44 
45 struct sset
46 { /* state set */
47  unsigned *states; /* pointer to bitvector */
48  unsigned hash; /* hash of bitvector */
49 #define HASH(bv, nw) (((nw) == 1) ? *(bv) : hash(bv, nw))
50 #define HIT(h,bv,ss,nw) ((ss)->hash == (h) && ((nw) == 1 || \
51  memcmp(VS(bv), VS((ss)->states), (nw)*sizeof(unsigned)) == 0))
52  int flags;
53 #define STARTER 01 /* the initial state set */
54 #define POSTSTATE 02 /* includes the goal state */
55 #define LOCKED 04 /* locked in cache */
56 #define NOPROGRESS 010 /* zero-progress state set */
57  struct arcp ins; /* chain of inarcs pointing here */
58  chr *lastseen; /* last entered on arrival here */
59  struct sset **outs; /* outarc vector indexed by color */
60  struct arcp *inchain; /* chain-pointer vector for outarcs */
61 };
62 
63 struct dfa
64 {
65  int nssets; /* size of cache */
66  int nssused; /* how many entries occupied yet */
67  int nstates; /* number of states */
68  int ncolors; /* length of outarc and inchain vectors */
69  int wordsper; /* length of state-set bitvectors */
70  struct sset *ssets; /* state-set cache */
71  unsigned *statesarea; /* bitvector storage */
72  unsigned *work; /* pointer to work area within statesarea */
73  struct sset **outsarea; /* outarc-vector storage */
74  struct arcp *incarea; /* inchain storage */
75  struct cnfa *cnfa;
76  struct colormap *cm;
77  chr *lastpost; /* location of last cache-flushed success */
78  chr *lastnopr; /* location of last cache-flushed NOPROGRESS */
79  struct sset *search; /* replacement-search-pointer memory */
80  int backno; /* if DFA for a backref, subno it refers to */
81  short backmin; /* min repetitions for backref */
82  short backmax; /* max repetitions for backref */
83  bool ismalloced; /* should this struct dfa be freed? */
84  bool arraysmalloced; /* should its subsidiary arrays be freed? */
85 };
86 
87 #define WORK 1 /* number of work bitvectors needed */
88 
89 /* setup for non-malloc allocation for small cases */
90 #define FEWSTATES 20 /* must be less than UBITS */
91 #define FEWCOLORS 15
92 struct smalldfa
93 {
94  struct dfa dfa; /* must be first */
95  struct sset ssets[FEWSTATES * 2];
96  unsigned statesarea[FEWSTATES * 2 + WORK];
97  struct sset *outsarea[FEWSTATES * 2 * FEWCOLORS];
98  struct arcp incarea[FEWSTATES * 2 * FEWCOLORS];
99 };
100 
101 #define DOMALLOC ((struct smalldfa *)NULL) /* force malloc */
102 
103 
104 
105 /* internal variables, bundled for easy passing around */
106 struct vars
107 {
108  regex_t *re;
109  struct guts *g;
110  int eflags; /* copies of arguments */
111  size_t nmatch;
114  chr *start; /* start of string */
115  chr *search_start; /* search start of string */
116  chr *stop; /* just past end of string */
117  int err; /* error code if any (0 none) */
118  struct dfa **subdfas; /* per-tree-subre DFAs */
119  struct dfa **ladfas; /* per-lacon-subre DFAs */
120  struct sset **lblastcss; /* per-lacon-subre lookbehind restart data */
121  chr **lblastcp; /* per-lacon-subre lookbehind restart data */
122  struct smalldfa dfa1;
123  struct smalldfa dfa2;
124 };
125 
126 #define VISERR(vv) ((vv)->err != 0) /* have we seen an error yet? */
127 #define ISERR() VISERR(v)
128 #define VERR(vv,e) ((vv)->err = ((vv)->err ? (vv)->err : (e)))
129 #define ERR(e) VERR(v, e) /* record an error */
130 #define NOERR() {if (ISERR()) return v->err;} /* if error seen, return it */
131 #define OFF(p) ((p) - v->start)
132 #define LOFF(p) ((long)OFF(p))
133 
134 
135 
136 /*
137  * forward declarations
138  */
139 /* === regexec.c === */
140 static struct dfa *getsubdfa(struct vars *, struct subre *);
141 static struct dfa *getladfa(struct vars *, int);
142 static int find(struct vars *, struct cnfa *, struct colormap *);
143 static int cfind(struct vars *, struct cnfa *, struct colormap *);
144 static int cfindloop(struct vars *, struct cnfa *, struct colormap *, struct dfa *, struct dfa *, chr **);
145 static void zapallsubs(regmatch_t *, size_t);
146 static void zaptreesubs(struct vars *, struct subre *);
147 static void subset(struct vars *, struct subre *, chr *, chr *);
148 static int cdissect(struct vars *, struct subre *, chr *, chr *);
149 static int ccondissect(struct vars *, struct subre *, chr *, chr *);
150 static int crevcondissect(struct vars *, struct subre *, chr *, chr *);
151 static int cbrdissect(struct vars *, struct subre *, chr *, chr *);
152 static int caltdissect(struct vars *, struct subre *, chr *, chr *);
153 static int citerdissect(struct vars *, struct subre *, chr *, chr *);
154 static int creviterdissect(struct vars *, struct subre *, chr *, chr *);
155 
156 /* === rege_dfa.c === */
157 static chr *longest(struct vars *, struct dfa *, chr *, chr *, int *);
158 static chr *shortest(struct vars *, struct dfa *, chr *, chr *, chr *, chr **, int *);
159 static int matchuntil(struct vars *, struct dfa *, chr *, struct sset **, chr **);
160 static chr *dfa_backref(struct vars *, struct dfa *, chr *, chr *, chr *, bool);
161 static chr *lastcold(struct vars *, struct dfa *);
162 static struct dfa *newdfa(struct vars *, struct cnfa *, struct colormap *, struct smalldfa *);
163 static void freedfa(struct dfa *);
164 static unsigned hash(unsigned *, int);
165 static struct sset *initialize(struct vars *, struct dfa *, chr *);
166 static struct sset *miss(struct vars *, struct dfa *, struct sset *, color, chr *, chr *);
167 static int lacon(struct vars *, struct cnfa *, chr *, color);
168 static struct sset *getvacant(struct vars *, struct dfa *, chr *, chr *);
169 static struct sset *pickss(struct vars *, struct dfa *, chr *, chr *);
170 
171 
172 /*
173  * pg_regexec - match regular expression
174  */
175 int
177  const chr *string,
178  size_t len,
179  size_t search_start,
180  rm_detail_t *details,
181  size_t nmatch,
182  regmatch_t pmatch[],
183  int flags)
184 {
185  struct vars var;
186  register struct vars *v = &var;
187  int st;
188  size_t n;
189  size_t i;
190  int backref;
191 
192 #define LOCALMAT 20
193  regmatch_t mat[LOCALMAT];
194 
195 #define LOCALDFAS 40
196  struct dfa *subdfas[LOCALDFAS];
197 
198  /* sanity checks */
199  if (re == NULL || string == NULL || re->re_magic != REMAGIC)
200  return REG_INVARG;
201  if (re->re_csize != sizeof(chr))
202  return REG_MIXED;
203 
204  /* Initialize locale-dependent support */
206 
207  /* setup */
208  v->re = re;
209  v->g = (struct guts *) re->re_guts;
210  if ((v->g->cflags & REG_EXPECT) && details == NULL)
211  return REG_INVARG;
212  if (v->g->info & REG_UIMPOSSIBLE)
213  return REG_NOMATCH;
214  backref = (v->g->info & REG_UBACKREF) ? 1 : 0;
215  v->eflags = flags;
216  if (v->g->cflags & REG_NOSUB)
217  nmatch = 0; /* override client */
218  v->nmatch = nmatch;
219  if (backref)
220  {
221  /* need work area */
222  if (v->g->nsub + 1 <= LOCALMAT)
223  v->pmatch = mat;
224  else
225  v->pmatch = (regmatch_t *) MALLOC((v->g->nsub + 1) *
226  sizeof(regmatch_t));
227  if (v->pmatch == NULL)
228  return REG_ESPACE;
229  v->nmatch = v->g->nsub + 1;
230  }
231  else
232  v->pmatch = pmatch;
233  v->details = details;
234  v->start = (chr *) string;
235  v->search_start = (chr *) string + search_start;
236  v->stop = (chr *) string + len;
237  v->err = 0;
238  v->subdfas = NULL;
239  v->ladfas = NULL;
240  v->lblastcss = NULL;
241  v->lblastcp = NULL;
242  /* below this point, "goto cleanup" will behave sanely */
243 
244  assert(v->g->ntree >= 0);
245  n = (size_t) v->g->ntree;
246  if (n <= LOCALDFAS)
247  v->subdfas = subdfas;
248  else
249  {
250  v->subdfas = (struct dfa **) MALLOC(n * sizeof(struct dfa *));
251  if (v->subdfas == NULL)
252  {
253  st = REG_ESPACE;
254  goto cleanup;
255  }
256  }
257  for (i = 0; i < n; i++)
258  v->subdfas[i] = NULL;
259 
260  assert(v->g->nlacons >= 0);
261  n = (size_t) v->g->nlacons;
262  if (n > 0)
263  {
264  v->ladfas = (struct dfa **) MALLOC(n * sizeof(struct dfa *));
265  if (v->ladfas == NULL)
266  {
267  st = REG_ESPACE;
268  goto cleanup;
269  }
270  for (i = 0; i < n; i++)
271  v->ladfas[i] = NULL;
272  v->lblastcss = (struct sset **) MALLOC(n * sizeof(struct sset *));
273  v->lblastcp = (chr **) MALLOC(n * sizeof(chr *));
274  if (v->lblastcss == NULL || v->lblastcp == NULL)
275  {
276  st = REG_ESPACE;
277  goto cleanup;
278  }
279  for (i = 0; i < n; i++)
280  {
281  v->lblastcss[i] = NULL;
282  v->lblastcp[i] = NULL;
283  }
284  }
285 
286  /* do it */
287  assert(v->g->tree != NULL);
288  if (backref)
289  st = cfind(v, &v->g->tree->cnfa, &v->g->cmap);
290  else
291  st = find(v, &v->g->tree->cnfa, &v->g->cmap);
292 
293  /* copy (portion of) match vector over if necessary */
294  if (st == REG_OKAY && v->pmatch != pmatch && nmatch > 0)
295  {
296  zapallsubs(pmatch, nmatch);
297  n = (nmatch < v->nmatch) ? nmatch : v->nmatch;
298  memcpy(VS(pmatch), VS(v->pmatch), n * sizeof(regmatch_t));
299  }
300 
301  /* clean up */
302 cleanup:
303  if (v->pmatch != pmatch && v->pmatch != mat)
304  FREE(v->pmatch);
305  if (v->subdfas != NULL)
306  {
307  n = (size_t) v->g->ntree;
308  for (i = 0; i < n; i++)
309  {
310  if (v->subdfas[i] != NULL)
311  freedfa(v->subdfas[i]);
312  }
313  if (v->subdfas != subdfas)
314  FREE(v->subdfas);
315  }
316  if (v->ladfas != NULL)
317  {
318  n = (size_t) v->g->nlacons;
319  for (i = 0; i < n; i++)
320  {
321  if (v->ladfas[i] != NULL)
322  freedfa(v->ladfas[i]);
323  }
324  FREE(v->ladfas);
325  }
326  if (v->lblastcss != NULL)
327  FREE(v->lblastcss);
328  if (v->lblastcp != NULL)
329  FREE(v->lblastcp);
330 
331 #ifdef REG_DEBUG
332  if (v->eflags & (REG_FTRACE | REG_MTRACE))
333  fflush(stdout);
334 #endif
335 
336  return st;
337 }
338 
339 /*
340  * getsubdfa - create or re-fetch the DFA for a tree subre node
341  *
342  * We only need to create the DFA once per overall regex execution.
343  * The DFA will be freed by the cleanup step in pg_regexec().
344  */
345 static struct dfa *
346 getsubdfa(struct vars *v,
347  struct subre *t)
348 {
349  struct dfa *d = v->subdfas[t->id];
350 
351  if (d == NULL)
352  {
353  d = newdfa(v, &t->cnfa, &v->g->cmap, DOMALLOC);
354  if (d == NULL)
355  return NULL;
356  /* set up additional info if this is a backref node */
357  if (t->op == 'b')
358  {
359  d->backno = t->backno;
360  d->backmin = t->min;
361  d->backmax = t->max;
362  }
363  v->subdfas[t->id] = d;
364  }
365  return d;
366 }
367 
368 /*
369  * getladfa - create or re-fetch the DFA for a LACON subre node
370  *
371  * Same as above, but for LACONs.
372  */
373 static struct dfa *
374 getladfa(struct vars *v,
375  int n)
376 {
377  assert(n > 0 && n < v->g->nlacons && v->g->lacons != NULL);
378 
379  if (v->ladfas[n] == NULL)
380  {
381  struct subre *sub = &v->g->lacons[n];
382 
383  v->ladfas[n] = newdfa(v, &sub->cnfa, &v->g->cmap, DOMALLOC);
384  /* a LACON can't contain a backref, so nothing else to do */
385  }
386  return v->ladfas[n];
387 }
388 
389 /*
390  * find - find a match for the main NFA (no-complications case)
391  */
392 static int
393 find(struct vars *v,
394  struct cnfa *cnfa,
395  struct colormap *cm)
396 {
397  struct dfa *s;
398  struct dfa *d;
399  chr *begin;
400  chr *end = NULL;
401  chr *cold;
402  chr *open; /* open and close of range of possible starts */
403  chr *close;
404  int hitend;
405  int shorter = (v->g->tree->flags & SHORTER) ? 1 : 0;
406 
407  /* first, a shot with the search RE */
408  s = newdfa(v, &v->g->search, cm, &v->dfa1);
409  if (s == NULL)
410  return v->err;
411  MDEBUG(("\nsearch at %ld\n", LOFF(v->start)));
412  cold = NULL;
413  close = shortest(v, s, v->search_start, v->search_start, v->stop,
414  &cold, (int *) NULL);
415  freedfa(s);
416  NOERR();
417  if (v->g->cflags & REG_EXPECT)
418  {
419  assert(v->details != NULL);
420  if (cold != NULL)
421  v->details->rm_extend.rm_so = OFF(cold);
422  else
423  v->details->rm_extend.rm_so = OFF(v->stop);
424  v->details->rm_extend.rm_eo = OFF(v->stop); /* unknown */
425  }
426  if (close == NULL) /* not found */
427  return REG_NOMATCH;
428  if (v->nmatch == 0) /* found, don't need exact location */
429  return REG_OKAY;
430 
431  /* find starting point and match */
432  assert(cold != NULL);
433  open = cold;
434  cold = NULL;
435  MDEBUG(("between %ld and %ld\n", LOFF(open), LOFF(close)));
436  d = newdfa(v, cnfa, cm, &v->dfa1);
437  if (d == NULL)
438  return v->err;
439  for (begin = open; begin <= close; begin++)
440  {
441  MDEBUG(("\nfind trying at %ld\n", LOFF(begin)));
442  if (shorter)
443  end = shortest(v, d, begin, begin, v->stop,
444  (chr **) NULL, &hitend);
445  else
446  end = longest(v, d, begin, v->stop, &hitend);
447  if (ISERR())
448  {
449  freedfa(d);
450  return v->err;
451  }
452  if (hitend && cold == NULL)
453  cold = begin;
454  if (end != NULL)
455  break; /* NOTE BREAK OUT */
456  }
457  assert(end != NULL); /* search RE succeeded so loop should */
458  freedfa(d);
459 
460  /* and pin down details */
461  assert(v->nmatch > 0);
462  v->pmatch[0].rm_so = OFF(begin);
463  v->pmatch[0].rm_eo = OFF(end);
464  if (v->g->cflags & REG_EXPECT)
465  {
466  if (cold != NULL)
467  v->details->rm_extend.rm_so = OFF(cold);
468  else
469  v->details->rm_extend.rm_so = OFF(v->stop);
470  v->details->rm_extend.rm_eo = OFF(v->stop); /* unknown */
471  }
472  if (v->nmatch == 1) /* no need for submatches */
473  return REG_OKAY;
474 
475  /* find submatches */
476  zapallsubs(v->pmatch, v->nmatch);
477  return cdissect(v, v->g->tree, begin, end);
478 }
479 
480 /*
481  * cfind - find a match for the main NFA (with complications)
482  */
483 static int
484 cfind(struct vars *v,
485  struct cnfa *cnfa,
486  struct colormap *cm)
487 {
488  struct dfa *s;
489  struct dfa *d;
490  chr *cold;
491  int ret;
492 
493  s = newdfa(v, &v->g->search, cm, &v->dfa1);
494  if (s == NULL)
495  return v->err;
496  d = newdfa(v, cnfa, cm, &v->dfa2);
497  if (d == NULL)
498  {
499  freedfa(s);
500  return v->err;
501  }
502 
503  ret = cfindloop(v, cnfa, cm, d, s, &cold);
504 
505  freedfa(d);
506  freedfa(s);
507  NOERR();
508  if (v->g->cflags & REG_EXPECT)
509  {
510  assert(v->details != NULL);
511  if (cold != NULL)
512  v->details->rm_extend.rm_so = OFF(cold);
513  else
514  v->details->rm_extend.rm_so = OFF(v->stop);
515  v->details->rm_extend.rm_eo = OFF(v->stop); /* unknown */
516  }
517  return ret;
518 }
519 
520 /*
521  * cfindloop - the heart of cfind
522  */
523 static int
524 cfindloop(struct vars *v,
525  struct cnfa *cnfa,
526  struct colormap *cm,
527  struct dfa *d,
528  struct dfa *s,
529  chr **coldp) /* where to put coldstart pointer */
530 {
531  chr *begin;
532  chr *end;
533  chr *cold;
534  chr *open; /* open and close of range of possible starts */
535  chr *close;
536  chr *estart;
537  chr *estop;
538  int er;
539  int shorter = v->g->tree->flags & SHORTER;
540  int hitend;
541 
542  assert(d != NULL && s != NULL);
543  cold = NULL;
544  close = v->search_start;
545  do
546  {
547  /* Search with the search RE for match range at/beyond "close" */
548  MDEBUG(("\ncsearch at %ld\n", LOFF(close)));
549  close = shortest(v, s, close, close, v->stop, &cold, (int *) NULL);
550  if (ISERR())
551  {
552  *coldp = cold;
553  return v->err;
554  }
555  if (close == NULL)
556  break; /* no more possible match anywhere */
557  assert(cold != NULL);
558  open = cold;
559  cold = NULL;
560  /* Search for matches starting between "open" and "close" inclusive */
561  MDEBUG(("cbetween %ld and %ld\n", LOFF(open), LOFF(close)));
562  for (begin = open; begin <= close; begin++)
563  {
564  MDEBUG(("\ncfind trying at %ld\n", LOFF(begin)));
565  estart = begin;
566  estop = v->stop;
567  for (;;)
568  {
569  /* Here we use the top node's detailed RE */
570  if (shorter)
571  end = shortest(v, d, begin, estart,
572  estop, (chr **) NULL, &hitend);
573  else
574  end = longest(v, d, begin, estop,
575  &hitend);
576  if (ISERR())
577  {
578  *coldp = cold;
579  return v->err;
580  }
581  if (hitend && cold == NULL)
582  cold = begin;
583  if (end == NULL)
584  break; /* no match with this begin point, try next */
585  MDEBUG(("tentative end %ld\n", LOFF(end)));
586  /* Dissect the potential match to see if it really matches */
587  zapallsubs(v->pmatch, v->nmatch);
588  er = cdissect(v, v->g->tree, begin, end);
589  if (er == REG_OKAY)
590  {
591  if (v->nmatch > 0)
592  {
593  v->pmatch[0].rm_so = OFF(begin);
594  v->pmatch[0].rm_eo = OFF(end);
595  }
596  *coldp = cold;
597  return REG_OKAY;
598  }
599  if (er != REG_NOMATCH)
600  {
601  ERR(er);
602  *coldp = cold;
603  return er;
604  }
605  /* Try next longer/shorter match with same begin point */
606  if (shorter)
607  {
608  if (end == estop)
609  break; /* no more, so try next begin point */
610  estart = end + 1;
611  }
612  else
613  {
614  if (end == begin)
615  break; /* no more, so try next begin point */
616  estop = end - 1;
617  }
618  } /* end loop over endpoint positions */
619  } /* end loop over beginning positions */
620 
621  /*
622  * If we get here, there is no possible match starting at or before
623  * "close", so consider matches beyond that. We'll do a fresh search
624  * with the search RE to find a new promising match range.
625  */
626  close++;
627  } while (close < v->stop);
628 
629  *coldp = cold;
630  return REG_NOMATCH;
631 }
632 
633 /*
634  * zapallsubs - initialize all subexpression matches to "no match"
635  */
636 static void
638  size_t n)
639 {
640  size_t i;
641 
642  for (i = n - 1; i > 0; i--)
643  {
644  p[i].rm_so = -1;
645  p[i].rm_eo = -1;
646  }
647 }
648 
649 /*
650  * zaptreesubs - initialize subexpressions within subtree to "no match"
651  */
652 static void
653 zaptreesubs(struct vars *v,
654  struct subre *t)
655 {
656  int n = t->capno;
657  struct subre *t2;
658 
659  if (n > 0)
660  {
661  if ((size_t) n < v->nmatch)
662  {
663  v->pmatch[n].rm_so = -1;
664  v->pmatch[n].rm_eo = -1;
665  }
666  }
667 
668  for (t2 = t->child; t2 != NULL; t2 = t2->sibling)
669  zaptreesubs(v, t2);
670 }
671 
672 /*
673  * subset - set subexpression match data for a successful subre
674  */
675 static void
676 subset(struct vars *v,
677  struct subre *sub,
678  chr *begin,
679  chr *end)
680 {
681  int n = sub->capno;
682 
683  assert(n > 0);
684  if ((size_t) n >= v->nmatch)
685  return;
686 
687  MDEBUG(("%d: setting %d = %ld-%ld\n", sub->id, n, LOFF(begin), LOFF(end)));
688  v->pmatch[n].rm_so = OFF(begin);
689  v->pmatch[n].rm_eo = OFF(end);
690 }
691 
692 /*
693  * cdissect - check backrefs and determine subexpression matches
694  *
695  * cdissect recursively processes a subre tree to check matching of backrefs
696  * and/or identify submatch boundaries for capture nodes. The proposed match
697  * runs from "begin" to "end" (not including "end"), and we are basically
698  * "dissecting" it to see where the submatches are.
699  *
700  * Before calling any level of cdissect, the caller must have run the node's
701  * DFA and found that the proposed substring satisfies the DFA. (We make
702  * the caller do that because in concatenation and iteration nodes, it's
703  * much faster to check all the substrings against the child DFAs before we
704  * recurse.) Also, caller must have cleared subexpression match data via
705  * zaptreesubs (or zapallsubs at the top level).
706  */
707 static int /* regexec return code */
708 cdissect(struct vars *v,
709  struct subre *t,
710  chr *begin, /* beginning of relevant substring */
711  chr *end) /* end of same */
712 {
713  int er;
714 
715  assert(t != NULL);
716  MDEBUG(("%d: cdissect %c %ld-%ld\n", t->id, t->op, LOFF(begin), LOFF(end)));
717 
718  /* handy place to check for operation cancel */
719  if (CANCEL_REQUESTED(v->re))
720  return REG_CANCEL;
721  /* ... and stack overrun */
722  if (STACK_TOO_DEEP(v->re))
723  return REG_ETOOBIG;
724 
725  switch (t->op)
726  {
727  case '=': /* terminal node */
728  assert(t->child == NULL);
729  er = REG_OKAY; /* no action, parent did the work */
730  break;
731  case 'b': /* back reference */
732  assert(t->child == NULL);
733  er = cbrdissect(v, t, begin, end);
734  break;
735  case '.': /* concatenation */
736  assert(t->child != NULL);
737  if (t->child->flags & SHORTER) /* reverse scan */
738  er = crevcondissect(v, t, begin, end);
739  else
740  er = ccondissect(v, t, begin, end);
741  break;
742  case '|': /* alternation */
743  assert(t->child != NULL);
744  er = caltdissect(v, t, begin, end);
745  break;
746  case '*': /* iteration */
747  assert(t->child != NULL);
748  if (t->child->flags & SHORTER) /* reverse scan */
749  er = creviterdissect(v, t, begin, end);
750  else
751  er = citerdissect(v, t, begin, end);
752  break;
753  case '(': /* no-op capture node */
754  assert(t->child != NULL);
755  assert(t->capno > 0);
756  er = cdissect(v, t->child, begin, end);
757  break;
758  default:
759  er = REG_ASSERT;
760  break;
761  }
762 
763  /*
764  * We should never have a match failure unless backrefs lurk below;
765  * otherwise, either caller failed to check the DFA, or there's some
766  * inconsistency between the DFA and the node's innards.
767  */
768  assert(er != REG_NOMATCH || (t->flags & BACKR));
769 
770  /*
771  * If this node is marked as capturing, save successful match's location.
772  */
773  if (t->capno > 0 && er == REG_OKAY)
774  subset(v, t, begin, end);
775 
776  return er;
777 }
778 
779 /*
780  * ccondissect - dissect match for concatenation node
781  */
782 static int /* regexec return code */
783 ccondissect(struct vars *v,
784  struct subre *t,
785  chr *begin, /* beginning of relevant substring */
786  chr *end) /* end of same */
787 {
788  struct subre *left = t->child;
789  struct subre *right = left->sibling;
790  struct dfa *d;
791  struct dfa *d2;
792  chr *mid;
793  int er;
794 
795  assert(t->op == '.');
796  assert(left != NULL && left->cnfa.nstates > 0);
797  assert(right != NULL && right->cnfa.nstates > 0);
798  assert(right->sibling == NULL);
799  assert(!(left->flags & SHORTER));
800 
801  d = getsubdfa(v, left);
802  NOERR();
803  d2 = getsubdfa(v, right);
804  NOERR();
805  MDEBUG(("%d: ccondissect %ld-%ld\n", t->id, LOFF(begin), LOFF(end)));
806 
807  /* pick a tentative midpoint */
808  mid = longest(v, d, begin, end, (int *) NULL);
809  NOERR();
810  if (mid == NULL)
811  return REG_NOMATCH;
812  MDEBUG(("%d: tentative midpoint %ld\n", t->id, LOFF(mid)));
813 
814  /* iterate until satisfaction or failure */
815  for (;;)
816  {
817  /* try this midpoint on for size */
818  if (longest(v, d2, mid, end, (int *) NULL) == end)
819  {
820  er = cdissect(v, left, begin, mid);
821  if (er == REG_OKAY)
822  {
823  er = cdissect(v, right, mid, end);
824  if (er == REG_OKAY)
825  {
826  /* satisfaction */
827  MDEBUG(("%d: successful\n", t->id));
828  return REG_OKAY;
829  }
830  }
831  if (er != REG_NOMATCH)
832  return er;
833  }
834  NOERR();
835 
836  /* that midpoint didn't work, find a new one */
837  if (mid == begin)
838  {
839  /* all possibilities exhausted */
840  MDEBUG(("%d: no midpoint\n", t->id));
841  return REG_NOMATCH;
842  }
843  mid = longest(v, d, begin, mid - 1, (int *) NULL);
844  NOERR();
845  if (mid == NULL)
846  {
847  /* failed to find a new one */
848  MDEBUG(("%d: failed midpoint\n", t->id));
849  return REG_NOMATCH;
850  }
851  MDEBUG(("%d: new midpoint %ld\n", t->id, LOFF(mid)));
852  zaptreesubs(v, left);
853  zaptreesubs(v, right);
854  }
855 
856  /* can't get here */
857  return REG_ASSERT;
858 }
859 
860 /*
861  * crevcondissect - dissect match for concatenation node, shortest-first
862  */
863 static int /* regexec return code */
865  struct subre *t,
866  chr *begin, /* beginning of relevant substring */
867  chr *end) /* end of same */
868 {
869  struct subre *left = t->child;
870  struct subre *right = left->sibling;
871  struct dfa *d;
872  struct dfa *d2;
873  chr *mid;
874  int er;
875 
876  assert(t->op == '.');
877  assert(left != NULL && left->cnfa.nstates > 0);
878  assert(right != NULL && right->cnfa.nstates > 0);
879  assert(right->sibling == NULL);
880  assert(left->flags & SHORTER);
881 
882  d = getsubdfa(v, left);
883  NOERR();
884  d2 = getsubdfa(v, right);
885  NOERR();
886  MDEBUG(("%d: crevcondissect %ld-%ld\n", t->id, LOFF(begin), LOFF(end)));
887 
888  /* pick a tentative midpoint */
889  mid = shortest(v, d, begin, begin, end, (chr **) NULL, (int *) NULL);
890  NOERR();
891  if (mid == NULL)
892  return REG_NOMATCH;
893  MDEBUG(("%d: tentative midpoint %ld\n", t->id, LOFF(mid)));
894 
895  /* iterate until satisfaction or failure */
896  for (;;)
897  {
898  /* try this midpoint on for size */
899  if (longest(v, d2, mid, end, (int *) NULL) == end)
900  {
901  er = cdissect(v, left, begin, mid);
902  if (er == REG_OKAY)
903  {
904  er = cdissect(v, right, mid, end);
905  if (er == REG_OKAY)
906  {
907  /* satisfaction */
908  MDEBUG(("%d: successful\n", t->id));
909  return REG_OKAY;
910  }
911  }
912  if (er != REG_NOMATCH)
913  return er;
914  }
915  NOERR();
916 
917  /* that midpoint didn't work, find a new one */
918  if (mid == end)
919  {
920  /* all possibilities exhausted */
921  MDEBUG(("%d: no midpoint\n", t->id));
922  return REG_NOMATCH;
923  }
924  mid = shortest(v, d, begin, mid + 1, end, (chr **) NULL, (int *) NULL);
925  NOERR();
926  if (mid == NULL)
927  {
928  /* failed to find a new one */
929  MDEBUG(("%d: failed midpoint\n", t->id));
930  return REG_NOMATCH;
931  }
932  MDEBUG(("%d: new midpoint %ld\n", t->id, LOFF(mid)));
933  zaptreesubs(v, left);
934  zaptreesubs(v, right);
935  }
936 
937  /* can't get here */
938  return REG_ASSERT;
939 }
940 
941 /*
942  * cbrdissect - dissect match for backref node
943  *
944  * The backref match might already have been verified by dfa_backref(),
945  * but we don't know that for sure so must check it here.
946  */
947 static int /* regexec return code */
948 cbrdissect(struct vars *v,
949  struct subre *t,
950  chr *begin, /* beginning of relevant substring */
951  chr *end) /* end of same */
952 {
953  int n = t->backno;
954  size_t numreps;
955  size_t tlen;
956  size_t brlen;
957  chr *brstring;
958  chr *p;
959  int min = t->min;
960  int max = t->max;
961 
962  assert(t != NULL);
963  assert(t->op == 'b');
964  assert(n >= 0);
965  assert((size_t) n < v->nmatch);
966 
967  MDEBUG(("%d: cbrdissect %d{%d-%d} %ld-%ld\n", t->id, n, min, max,
968  LOFF(begin), LOFF(end)));
969 
970  /* get the backreferenced string */
971  if (v->pmatch[n].rm_so == -1)
972  return REG_NOMATCH;
973  brstring = v->start + v->pmatch[n].rm_so;
974  brlen = v->pmatch[n].rm_eo - v->pmatch[n].rm_so;
975 
976  /* special cases for zero-length strings */
977  if (brlen == 0)
978  {
979  /*
980  * matches only if target is zero length, but any number of
981  * repetitions can be considered to be present
982  */
983  if (begin == end && min <= max)
984  {
985  MDEBUG(("%d: backref matched trivially\n", t->id));
986  return REG_OKAY;
987  }
988  return REG_NOMATCH;
989  }
990  if (begin == end)
991  {
992  /* matches only if zero repetitions are okay */
993  if (min == 0)
994  {
995  MDEBUG(("%d: backref matched trivially\n", t->id));
996  return REG_OKAY;
997  }
998  return REG_NOMATCH;
999  }
1000 
1001  /*
1002  * check target length to see if it could possibly be an allowed number of
1003  * repetitions of brstring
1004  */
1005  assert(end > begin);
1006  tlen = end - begin;
1007  if (tlen % brlen != 0)
1008  return REG_NOMATCH;
1009  numreps = tlen / brlen;
1010  if (numreps < min || (numreps > max && max != DUPINF))
1011  return REG_NOMATCH;
1012 
1013  /* okay, compare the actual string contents */
1014  p = begin;
1015  while (numreps-- > 0)
1016  {
1017  if ((*v->g->compare) (brstring, p, brlen) != 0)
1018  return REG_NOMATCH;
1019  p += brlen;
1020  }
1021 
1022  MDEBUG(("%d: backref matched\n", t->id));
1023  return REG_OKAY;
1024 }
1025 
1026 /*
1027  * caltdissect - dissect match for alternation node
1028  */
1029 static int /* regexec return code */
1030 caltdissect(struct vars *v,
1031  struct subre *t,
1032  chr *begin, /* beginning of relevant substring */
1033  chr *end) /* end of same */
1034 {
1035  struct dfa *d;
1036  int er;
1037 
1038  assert(t->op == '|');
1039 
1040  t = t->child;
1041  /* there should be at least 2 alternatives */
1042  assert(t != NULL && t->sibling != NULL);
1043 
1044  while (t != NULL)
1045  {
1046  assert(t->cnfa.nstates > 0);
1047 
1048  MDEBUG(("%d: caltdissect %ld-%ld\n", t->id, LOFF(begin), LOFF(end)));
1049 
1050  d = getsubdfa(v, t);
1051  NOERR();
1052  if (longest(v, d, begin, end, (int *) NULL) == end)
1053  {
1054  MDEBUG(("%d: caltdissect matched\n", t->id));
1055  er = cdissect(v, t, begin, end);
1056  if (er != REG_NOMATCH)
1057  return er;
1058  }
1059  NOERR();
1060 
1061  t = t->sibling;
1062  }
1063 
1064  return REG_NOMATCH;
1065 }
1066 
1067 /*
1068  * citerdissect - dissect match for iteration node
1069  */
1070 static int /* regexec return code */
1071 citerdissect(struct vars *v,
1072  struct subre *t,
1073  chr *begin, /* beginning of relevant substring */
1074  chr *end) /* end of same */
1075 {
1076  struct dfa *d;
1077  chr **endpts;
1078  chr *limit;
1079  int min_matches;
1080  size_t max_matches;
1081  int nverified;
1082  int k;
1083  int i;
1084  int er;
1085 
1086  assert(t->op == '*');
1087  assert(t->child != NULL && t->child->cnfa.nstates > 0);
1088  assert(!(t->child->flags & SHORTER));
1089  assert(begin <= end);
1090 
1091  MDEBUG(("%d: citerdissect %ld-%ld\n", t->id, LOFF(begin), LOFF(end)));
1092 
1093  /*
1094  * For the moment, assume the minimum number of matches is 1. If zero
1095  * matches are allowed, and the target string is empty, we are allowed to
1096  * match regardless of the contents of the iter node --- but we would
1097  * prefer to match once, so that capturing parens get set. (An example of
1098  * the concern here is a pattern like "()*\1", which historically this
1099  * code has allowed to succeed.) Therefore, we deal with the zero-matches
1100  * case at the bottom, after failing to find any other way to match.
1101  */
1102  min_matches = t->min;
1103  if (min_matches <= 0)
1104  min_matches = 1;
1105 
1106  /*
1107  * We need workspace to track the endpoints of each sub-match. Normally
1108  * we consider only nonzero-length sub-matches, so there can be at most
1109  * end-begin of them. However, if min is larger than that, we will also
1110  * consider zero-length sub-matches in order to find enough matches.
1111  *
1112  * For convenience, endpts[0] contains the "begin" pointer and we store
1113  * sub-match endpoints in endpts[1..max_matches].
1114  */
1115  max_matches = end - begin;
1116  if (max_matches > t->max && t->max != DUPINF)
1117  max_matches = t->max;
1118  if (max_matches < min_matches)
1119  max_matches = min_matches;
1120  endpts = (chr **) MALLOC((max_matches + 1) * sizeof(chr *));
1121  if (endpts == NULL)
1122  return REG_ESPACE;
1123  endpts[0] = begin;
1124 
1125  d = getsubdfa(v, t->child);
1126  if (ISERR())
1127  {
1128  FREE(endpts);
1129  return v->err;
1130  }
1131 
1132  /*
1133  * Our strategy is to first find a set of sub-match endpoints that are
1134  * valid according to the child node's DFA, and then recursively dissect
1135  * each sub-match to confirm validity. If any validity check fails,
1136  * backtrack the last sub-match and try again. And, when we next try for
1137  * a validity check, we need not recheck any successfully verified
1138  * sub-matches that we didn't move the endpoints of. nverified remembers
1139  * how many sub-matches are currently known okay.
1140  */
1141 
1142  /* initialize to consider first sub-match */
1143  nverified = 0;
1144  k = 1;
1145  limit = end;
1146 
1147  /* iterate until satisfaction or failure */
1148  while (k > 0)
1149  {
1150  /* try to find an endpoint for the k'th sub-match */
1151  endpts[k] = longest(v, d, endpts[k - 1], limit, (int *) NULL);
1152  if (ISERR())
1153  {
1154  FREE(endpts);
1155  return v->err;
1156  }
1157  if (endpts[k] == NULL)
1158  {
1159  /* no match possible, so see if we can shorten previous one */
1160  k--;
1161  goto backtrack;
1162  }
1163  MDEBUG(("%d: working endpoint %d: %ld\n",
1164  t->id, k, LOFF(endpts[k])));
1165 
1166  /* k'th sub-match can no longer be considered verified */
1167  if (nverified >= k)
1168  nverified = k - 1;
1169 
1170  if (endpts[k] != end)
1171  {
1172  /* haven't reached end yet, try another iteration if allowed */
1173  if (k >= max_matches)
1174  {
1175  /* must try to shorten some previous match */
1176  k--;
1177  goto backtrack;
1178  }
1179 
1180  /* reject zero-length match unless necessary to achieve min */
1181  if (endpts[k] == endpts[k - 1] &&
1182  (k >= min_matches || min_matches - k < end - endpts[k]))
1183  goto backtrack;
1184 
1185  k++;
1186  limit = end;
1187  continue;
1188  }
1189 
1190  /*
1191  * We've identified a way to divide the string into k sub-matches that
1192  * works so far as the child DFA can tell. If k is an allowed number
1193  * of matches, start the slow part: recurse to verify each sub-match.
1194  * We always have k <= max_matches, needn't check that.
1195  */
1196  if (k < min_matches)
1197  goto backtrack;
1198 
1199  MDEBUG(("%d: verifying %d..%d\n", t->id, nverified + 1, k));
1200 
1201  for (i = nverified + 1; i <= k; i++)
1202  {
1203  zaptreesubs(v, t->child);
1204  er = cdissect(v, t->child, endpts[i - 1], endpts[i]);
1205  if (er == REG_OKAY)
1206  {
1207  nverified = i;
1208  continue;
1209  }
1210  if (er == REG_NOMATCH)
1211  break;
1212  /* oops, something failed */
1213  FREE(endpts);
1214  return er;
1215  }
1216 
1217  if (i > k)
1218  {
1219  /* satisfaction */
1220  MDEBUG(("%d: successful\n", t->id));
1221  FREE(endpts);
1222  return REG_OKAY;
1223  }
1224 
1225  /* match failed to verify, so backtrack */
1226 
1227 backtrack:
1228 
1229  /*
1230  * Must consider shorter versions of the current sub-match. However,
1231  * we'll only ask for a zero-length match if necessary.
1232  */
1233  while (k > 0)
1234  {
1235  chr *prev_end = endpts[k - 1];
1236 
1237  if (endpts[k] > prev_end)
1238  {
1239  limit = endpts[k] - 1;
1240  if (limit > prev_end ||
1241  (k < min_matches && min_matches - k >= end - prev_end))
1242  {
1243  /* break out of backtrack loop, continue the outer one */
1244  break;
1245  }
1246  }
1247  /* can't shorten k'th sub-match any more, consider previous one */
1248  k--;
1249  }
1250  }
1251 
1252  /* all possibilities exhausted */
1253  FREE(endpts);
1254 
1255  /*
1256  * Now consider the possibility that we can match to a zero-length string
1257  * by using zero repetitions.
1258  */
1259  if (t->min == 0 && begin == end)
1260  {
1261  MDEBUG(("%d: allowing zero matches\n", t->id));
1262  return REG_OKAY;
1263  }
1264 
1265  MDEBUG(("%d: failed\n", t->id));
1266  return REG_NOMATCH;
1267 }
1268 
1269 /*
1270  * creviterdissect - dissect match for iteration node, shortest-first
1271  */
1272 static int /* regexec return code */
1274  struct subre *t,
1275  chr *begin, /* beginning of relevant substring */
1276  chr *end) /* end of same */
1277 {
1278  struct dfa *d;
1279  chr **endpts;
1280  chr *limit;
1281  int min_matches;
1282  size_t max_matches;
1283  int nverified;
1284  int k;
1285  int i;
1286  int er;
1287 
1288  assert(t->op == '*');
1289  assert(t->child != NULL && t->child->cnfa.nstates > 0);
1290  assert(t->child->flags & SHORTER);
1291  assert(begin <= end);
1292 
1293  MDEBUG(("%d: creviterdissect %ld-%ld\n", t->id, LOFF(begin), LOFF(end)));
1294 
1295  /*
1296  * If zero matches are allowed, and target string is empty, just declare
1297  * victory. OTOH, if target string isn't empty, zero matches can't work
1298  * so we pretend the min is 1.
1299  */
1300  min_matches = t->min;
1301  if (min_matches <= 0)
1302  {
1303  if (begin == end)
1304  {
1305  MDEBUG(("%d: allowing zero matches\n", t->id));
1306  return REG_OKAY;
1307  }
1308  min_matches = 1;
1309  }
1310 
1311  /*
1312  * We need workspace to track the endpoints of each sub-match. Normally
1313  * we consider only nonzero-length sub-matches, so there can be at most
1314  * end-begin of them. However, if min is larger than that, we will also
1315  * consider zero-length sub-matches in order to find enough matches.
1316  *
1317  * For convenience, endpts[0] contains the "begin" pointer and we store
1318  * sub-match endpoints in endpts[1..max_matches].
1319  */
1320  max_matches = end - begin;
1321  if (max_matches > t->max && t->max != DUPINF)
1322  max_matches = t->max;
1323  if (max_matches < min_matches)
1324  max_matches = min_matches;
1325  endpts = (chr **) MALLOC((max_matches + 1) * sizeof(chr *));
1326  if (endpts == NULL)
1327  return REG_ESPACE;
1328  endpts[0] = begin;
1329 
1330  d = getsubdfa(v, t->child);
1331  if (ISERR())
1332  {
1333  FREE(endpts);
1334  return v->err;
1335  }
1336 
1337  /*
1338  * Our strategy is to first find a set of sub-match endpoints that are
1339  * valid according to the child node's DFA, and then recursively dissect
1340  * each sub-match to confirm validity. If any validity check fails,
1341  * backtrack the last sub-match and try again. And, when we next try for
1342  * a validity check, we need not recheck any successfully verified
1343  * sub-matches that we didn't move the endpoints of. nverified remembers
1344  * how many sub-matches are currently known okay.
1345  */
1346 
1347  /* initialize to consider first sub-match */
1348  nverified = 0;
1349  k = 1;
1350  limit = begin;
1351 
1352  /* iterate until satisfaction or failure */
1353  while (k > 0)
1354  {
1355  /* disallow zero-length match unless necessary to achieve min */
1356  if (limit == endpts[k - 1] &&
1357  limit != end &&
1358  (k >= min_matches || min_matches - k < end - limit))
1359  limit++;
1360 
1361  /* if this is the last allowed sub-match, it must reach to the end */
1362  if (k >= max_matches)
1363  limit = end;
1364 
1365  /* try to find an endpoint for the k'th sub-match */
1366  endpts[k] = shortest(v, d, endpts[k - 1], limit, end,
1367  (chr **) NULL, (int *) NULL);
1368  if (ISERR())
1369  {
1370  FREE(endpts);
1371  return v->err;
1372  }
1373  if (endpts[k] == NULL)
1374  {
1375  /* no match possible, so see if we can lengthen previous one */
1376  k--;
1377  goto backtrack;
1378  }
1379  MDEBUG(("%d: working endpoint %d: %ld\n",
1380  t->id, k, LOFF(endpts[k])));
1381 
1382  /* k'th sub-match can no longer be considered verified */
1383  if (nverified >= k)
1384  nverified = k - 1;
1385 
1386  if (endpts[k] != end)
1387  {
1388  /* haven't reached end yet, try another iteration if allowed */
1389  if (k >= max_matches)
1390  {
1391  /* must try to lengthen some previous match */
1392  k--;
1393  goto backtrack;
1394  }
1395 
1396  k++;
1397  limit = endpts[k - 1];
1398  continue;
1399  }
1400 
1401  /*
1402  * We've identified a way to divide the string into k sub-matches that
1403  * works so far as the child DFA can tell. If k is an allowed number
1404  * of matches, start the slow part: recurse to verify each sub-match.
1405  * We always have k <= max_matches, needn't check that.
1406  */
1407  if (k < min_matches)
1408  goto backtrack;
1409 
1410  MDEBUG(("%d: verifying %d..%d\n", t->id, nverified + 1, k));
1411 
1412  for (i = nverified + 1; i <= k; i++)
1413  {
1414  zaptreesubs(v, t->child);
1415  er = cdissect(v, t->child, endpts[i - 1], endpts[i]);
1416  if (er == REG_OKAY)
1417  {
1418  nverified = i;
1419  continue;
1420  }
1421  if (er == REG_NOMATCH)
1422  break;
1423  /* oops, something failed */
1424  FREE(endpts);
1425  return er;
1426  }
1427 
1428  if (i > k)
1429  {
1430  /* satisfaction */
1431  MDEBUG(("%d: successful\n", t->id));
1432  FREE(endpts);
1433  return REG_OKAY;
1434  }
1435 
1436  /* match failed to verify, so backtrack */
1437 
1438 backtrack:
1439 
1440  /*
1441  * Must consider longer versions of the current sub-match.
1442  */
1443  while (k > 0)
1444  {
1445  if (endpts[k] < end)
1446  {
1447  limit = endpts[k] + 1;
1448  /* break out of backtrack loop, continue the outer one */
1449  break;
1450  }
1451  /* can't lengthen k'th sub-match any more, consider previous one */
1452  k--;
1453  }
1454  }
1455 
1456  /* all possibilities exhausted */
1457  MDEBUG(("%d: failed\n", t->id));
1458  FREE(endpts);
1459  return REG_NOMATCH;
1460 }
1461 
1462 
1463 
1464 #include "rege_dfa.c"
#define REG_UIMPOSSIBLE
Definition: regex.h:74
static chr * shortest(struct vars *, struct dfa *, chr *, chr *, chr *, chr **, int *)
struct sset * ssets
Definition: regexec.c:70
struct dfa ** ladfas
Definition: regexec.c:119
static struct sset * getvacant(struct vars *, struct dfa *, chr *, chr *)
chr * lastseen
Definition: regexec.c:58
static int find(struct vars *, struct cnfa *, struct colormap *)
Definition: regexec.c:393
struct sset ** outs
Definition: regexec.c:59
#define REG_EXPECT
Definition: regex.h:115
struct subre * child
Definition: regguts.h:495
short backmin
Definition: regexec.c:81
int capno
Definition: regguts.h:491
struct arcp * incarea
Definition: regexec.c:74
struct subre * lacons
Definition: regguts.h:538
static struct sset * miss(struct vars *, struct dfa *, struct sset *, color, chr *, chr *)
struct subre * tree
Definition: regguts.h:533
static unsigned hash(unsigned *, int)
struct colormap * cm
Definition: regexec.c:76
#define REG_ESPACE
Definition: regex.h:151
struct cnfa * cnfa
Definition: regexec.c:75
regoff_t rm_so
Definition: regex.h:87
Definition: regguts.h:401
short color
Definition: regguts.h:150
char op
Definition: regguts.h:473
#define WORK
Definition: regexec.c:87
int nlacons
Definition: regguts.h:539
#define LOFF(p)
Definition: regexec.c:132
short min
Definition: regguts.h:493
#define REMAGIC
Definition: regguts.h:96
#define REG_FTRACE
Definition: regex.h:129
unsigned * work
Definition: regexec.c:72
static chr * lastcold(struct vars *, struct dfa *)
#define FREE(ptr)
Definition: cryptohash.c:37
static struct dfa * newdfa(struct vars *, struct cnfa *, struct colormap *, struct smalldfa *)
static chr * longest(struct vars *, struct dfa *, chr *, chr *, int *)
struct state * begin
Definition: regguts.h:497
struct sset * search
Definition: regexec.c:79
rm_detail_t * details
Definition: regexec.c:113
static int matchuntil(struct vars *, struct dfa *, chr *, struct sset **, chr **)
#define FEWCOLORS
Definition: regexec.c:91
#define MDEBUG(arglist)
Definition: regguts.h:117
int re_csize
Definition: regex.h:76
#define REG_MTRACE
Definition: regex.h:130
chr * start
Definition: regexec.c:114
regoff_t rm_eo
Definition: regex.h:88
#define MALLOC(n)
Definition: regcustom.h:60
Definition: regexec.c:63
struct sset ** lblastcss
Definition: regexec.c:120
chr * search_start
Definition: regexec.c:115
int nstates
Definition: regguts.h:403
int id
Definition: regguts.h:490
int re_magic
Definition: regex.h:57
struct cnfa search
Definition: regguts.h:534
pg_wchar chr
Definition: regcustom.h:66
#define REG_OKAY
Definition: regex.h:139
unsigned * states
Definition: regexec.c:47
bool ismalloced
Definition: regexec.c:83
regex_t * re
Definition: regcomp.c:239
static void zaptreesubs(struct vars *, struct subre *)
Definition: regexec.c:653
struct subre * sibling
Definition: regguts.h:496
static struct dfa * getsubdfa(struct vars *, struct subre *)
Definition: regexec.c:346
static int cfindloop(struct vars *, struct cnfa *, struct colormap *, struct dfa *, struct dfa *, chr **)
Definition: regexec.c:524
struct arcp * inchain
Definition: regexec.c:60
#define DOMALLOC
Definition: regexec.c:101
#define REG_INVARG
Definition: regex.h:154
#define FEWSTATES
Definition: regexec.c:90
struct smalldfa dfa2
Definition: regexec.c:123
char flags
Definition: regguts.h:474
bool arraysmalloced
Definition: regexec.c:84
struct colormap cmap
Definition: regguts.h:536
#define assert(TEST)
Definition: imath.c:73
int ncolors
Definition: regexec.c:68
static int cbrdissect(struct vars *, struct subre *, chr *, chr *)
Definition: regexec.c:948
int nstates
Definition: regexec.c:67
int nssets
Definition: regexec.c:65
int nssused
Definition: regexec.c:66
int wordsper
Definition: regexec.c:69
struct guts * g
Definition: regexec.c:109
int err
Definition: regcomp.c:242
#define LOCALMAT
int eflags
Definition: regexec.c:110
int backno
Definition: regguts.h:492
static int creviterdissect(struct vars *, struct subre *, chr *, chr *)
Definition: regexec.c:1273
static void cleanup(void)
Definition: bootstrap.c:872
static struct sset * initialize(struct vars *, struct dfa *, chr *)
chr * lastnopr
Definition: regexec.c:78
#define BACKR
Definition: regguts.h:479
Definition: regguts.h:526
int ntree
Definition: regguts.h:535
#define REG_UBACKREF
Definition: regex.h:60
Definition: regexec.c:39
static int caltdissect(struct vars *, struct subre *, chr *, chr *)
Definition: regexec.c:1030
#define LOCALDFAS
#define DUPINF
Definition: regguts.h:94
struct dfa ** subdfas
Definition: regexec.c:118
#define REG_ASSERT
Definition: regex.h:153
int backno
Definition: regexec.c:80
void pg_set_regex_collation(Oid collation)
short max
Definition: regguts.h:494
regmatch_t * pmatch
Definition: regexec.c:112
static int lacon(struct vars *, struct cnfa *, chr *, color)
#define ERR(e)
Definition: regexec.c:129
short backmax
Definition: regexec.c:82
chr * stop
Definition: regexec.c:116
#define OFF(p)
Definition: regexec.c:131
static chr * dfa_backref(struct vars *, struct dfa *, chr *, chr *, chr *, bool)
static int ccondissect(struct vars *, struct subre *, chr *, chr *)
Definition: regexec.c:783
#define REG_ETOOBIG
Definition: regex.h:157
static int citerdissect(struct vars *, struct subre *, chr *, chr *)
Definition: regexec.c:1071
#define REG_NOSUB
Definition: regex.h:109
static void freedfa(struct dfa *)
#define VS(x)
Definition: regguts.h:61
struct state * end
Definition: regguts.h:498
static int cdissect(struct vars *, struct subre *, chr *, chr *)
Definition: regexec.c:708
long info
Definition: regguts.h:531
Definition: regguts.h:471
struct cnfa cnfa
Definition: regguts.h:499
size_t nmatch
Definition: regexec.c:111
static void zapallsubs(regmatch_t *, size_t)
Definition: regexec.c:637
regmatch_t rm_extend
Definition: regex.h:94
const chr * stop
Definition: regcomp.c:241
char * re_guts
Definition: regex.h:80
int cflags
Definition: regguts.h:530
struct sset ** outsarea
Definition: regexec.c:73
int pg_regexec(regex_t *re, const chr *string, size_t len, size_t search_start, rm_detail_t *details, size_t nmatch, regmatch_t pmatch[], int flags)
Definition: regexec.c:176
Definition: regexec.c:45
static struct sset * pickss(struct vars *, struct dfa *, chr *, chr *)
size_t nsub
Definition: regguts.h:532
#define SHORTER
Definition: regguts.h:476
#define CANCEL_REQUESTED(re)
Definition: regguts.h:516
int i
#define REG_NOMATCH
Definition: regex.h:140
chr * lastpost
Definition: regexec.c:77
chr ** lblastcp
Definition: regexec.c:121
#define REG_MIXED
Definition: regex.h:155
#define NOERR()
Definition: regexec.c:130
unsigned hash
Definition: regexec.c:48
struct sset * ss
Definition: regexec.c:41
unsigned * statesarea
Definition: regexec.c:71
static struct dfa * getladfa(struct vars *, int)
Definition: regexec.c:374
#define close(a)
Definition: win32.h:12
static int crevcondissect(struct vars *, struct subre *, chr *, chr *)
Definition: regexec.c:864
int flags
Definition: regexec.c:52
Definition: regcomp.c:237
static void subset(struct vars *, struct subre *, chr *, chr *)
Definition: regexec.c:676
Oid re_collation
Definition: regex.h:78
Definition: regex.h:55
#define STACK_TOO_DEEP(re)
Definition: regguts.h:519
struct smalldfa dfa1
Definition: regexec.c:122
static int cfind(struct vars *, struct cnfa *, struct colormap *)
Definition: regexec.c:484
color co
Definition: regexec.c:42
#define ISERR()
Definition: regexec.c:127
#define REG_CANCEL
Definition: regex.h:159