PostgreSQL Source Code git master
All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
rege_dfa.c File Reference
This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Functions

static chrlongest (struct vars *v, struct dfa *d, chr *start, chr *stop, int *hitstopp)
 
static chrshortest (struct vars *v, struct dfa *d, chr *start, chr *min, chr *max, chr **coldp, int *hitstopp)
 
static int matchuntil (struct vars *v, struct dfa *d, chr *probe, struct sset **lastcss, chr **lastcp)
 
static chrdfa_backref (struct vars *v, struct dfa *d, chr *start, chr *min, chr *max, bool shortest)
 
static chrlastcold (struct vars *v, struct dfa *d)
 
static struct dfanewdfa (struct vars *v, struct cnfa *cnfa, struct colormap *cm, struct smalldfa *sml)
 
static void freedfa (struct dfa *d)
 
static unsigned hash (unsigned *uv, int n)
 
static struct ssetinitialize (struct vars *v, struct dfa *d, chr *start)
 
static struct ssetmiss (struct vars *v, struct dfa *d, struct sset *css, color co, chr *cp, chr *start)
 
static int lacon (struct vars *v, struct cnfa *pcnfa, chr *cp, color co)
 
static struct ssetgetvacant (struct vars *v, struct dfa *d, chr *cp, chr *start)
 
static struct ssetpickss (struct vars *v, struct dfa *d, chr *cp, chr *start)
 

Function Documentation

◆ dfa_backref()

static chr * dfa_backref ( struct vars v,
struct dfa d,
chr start,
chr min,
chr max,
bool  shortest 
)
static

Definition at line 506 of file rege_dfa.c.

512{
513 int n = d->backno;
514 int backmin = d->backmin;
515 int backmax = d->backmax;
516 size_t numreps;
517 size_t minreps;
518 size_t maxreps;
519 size_t brlen;
520 chr *brstring;
521 chr *p;
522
523 /* get the backreferenced string (caller should have checked this) */
524 if (v->pmatch[n].rm_so == -1)
525 return NULL;
526 brstring = v->start + v->pmatch[n].rm_so;
527 brlen = v->pmatch[n].rm_eo - v->pmatch[n].rm_so;
528
529 /* special-case zero-length backreference to avoid divide by zero */
530 if (brlen == 0)
531 {
532 /*
533 * matches only a zero-length string, but any number of repetitions
534 * can be considered to be present
535 */
536 if (min == start && backmin <= backmax)
537 return start;
538 return NULL;
539 }
540
541 /*
542 * convert min and max into numbers of possible repetitions of the backref
543 * string, rounding appropriately
544 */
545 if (min <= start)
546 minreps = 0;
547 else
548 minreps = (min - start - 1) / brlen + 1;
549 maxreps = (max - start) / brlen;
550
551 /* apply bounds, then see if there is any allowed match length */
552 if (minreps < backmin)
553 minreps = backmin;
554 if (backmax != DUPINF && maxreps > backmax)
555 maxreps = backmax;
556 if (maxreps < minreps)
557 return NULL;
558
559 /* quick exit if zero-repetitions match is valid and preferred */
560 if (shortest && minreps == 0)
561 return start;
562
563 /* okay, compare the actual string contents */
564 p = start;
565 numreps = 0;
566 while (numreps < maxreps)
567 {
568 if ((*v->g->compare) (brstring, p, brlen) != 0)
569 break;
570 p += brlen;
571 numreps++;
572 if (shortest && numreps >= minreps)
573 break;
574 }
575
576 if (numreps >= minreps)
577 return p;
578 return NULL;
579}
return str start
pg_wchar chr
Definition: regcustom.h:59
static chr * shortest(struct vars *v, struct dfa *d, chr *start, chr *min, chr *max, chr **coldp, int *hitstopp)
Definition: rege_dfa.c:204
#define DUPINF
Definition: regguts.h:99
short backmin
Definition: regexec.c:81
int backno
Definition: regexec.c:80
short backmax
Definition: regexec.c:82
struct guts * g
Definition: regexec.c:109
chr * start
Definition: regexec.c:114
regmatch_t * pmatch
Definition: regexec.c:112

References dfa::backmax, dfa::backmin, dfa::backno, DUPINF, vars::g, colormap::max, vars::pmatch, shortest(), vars::start, start, and colormap::v.

Referenced by longest(), and shortest().

◆ freedfa()

static void freedfa ( struct dfa d)
static

Definition at line 691 of file rege_dfa.c.

692{
693 if (d->arraysmalloced)
694 {
695 if (d->ssets != NULL)
696 FREE(d->ssets);
697 if (d->statesarea != NULL)
698 FREE(d->statesarea);
699 if (d->outsarea != NULL)
700 FREE(d->outsarea);
701 if (d->incarea != NULL)
702 FREE(d->incarea);
703 }
704
705 if (d->ismalloced)
706 FREE(d);
707}
bool arraysmalloced
Definition: regexec.c:84
unsigned * statesarea
Definition: regexec.c:71
struct arcp * incarea
Definition: regexec.c:74
struct sset * ssets
Definition: regexec.c:70
bool ismalloced
Definition: regexec.c:83
struct sset ** outsarea
Definition: regexec.c:73
@ FREE
Definition: task.c:94

References dfa::arraysmalloced, FREE, dfa::incarea, dfa::ismalloced, dfa::outsarea, dfa::ssets, and dfa::statesarea.

Referenced by newdfa().

◆ getvacant()

static struct sset * getvacant ( struct vars v,
struct dfa d,
chr cp,
chr start 
)
static

Definition at line 973 of file rege_dfa.c.

977{
978 int i;
979 struct sset *ss;
980 struct sset *p;
981 struct arcp ap;
982 color co;
983
984 ss = pickss(v, d, cp, start);
985 if (ss == NULL)
986 return NULL;
987 assert(!(ss->flags & LOCKED));
988
989 /* clear out its inarcs, including self-referential ones */
990 ap = ss->ins;
991 while ((p = ap.ss) != NULL)
992 {
993 co = ap.co;
994 FDEBUG(("zapping c%d's %ld outarc\n", (int) (p - d->ssets), (long) co));
995 p->outs[co] = NULL;
996 ap = p->inchain[co];
997 p->inchain[co].ss = NULL; /* paranoia */
998 }
999 ss->ins.ss = NULL;
1000
1001 /* take it off the inarc chains of the ssets reached by its outarcs */
1002 for (i = 0; i < d->ncolors; i++)
1003 {
1004 p = ss->outs[i];
1005 assert(p != ss); /* not self-referential */
1006 if (p == NULL)
1007 continue; /* NOTE CONTINUE */
1008 FDEBUG(("del outarc %d from c%d's in chn\n", i, (int) (p - d->ssets)));
1009 if (p->ins.ss == ss && p->ins.co == i)
1010 p->ins = ss->inchain[i];
1011 else
1012 {
1013 struct arcp lastap = {NULL, 0};
1014
1015 assert(p->ins.ss != NULL);
1016 for (ap = p->ins; ap.ss != NULL &&
1017 !(ap.ss == ss && ap.co == i);
1018 ap = ap.ss->inchain[ap.co])
1019 lastap = ap;
1020 assert(ap.ss != NULL);
1021 lastap.ss->inchain[lastap.co] = ss->inchain[i];
1022 }
1023 ss->outs[i] = NULL;
1024 ss->inchain[i].ss = NULL;
1025 }
1026
1027 /* if ss was a success state, may need to remember location */
1028 if ((ss->flags & POSTSTATE) && ss->lastseen != d->lastpost &&
1029 (d->lastpost == NULL || d->lastpost < ss->lastseen))
1030 d->lastpost = ss->lastseen;
1031
1032 /* likewise for a no-progress state */
1033 if ((ss->flags & NOPROGRESS) && ss->lastseen != d->lastnopr &&
1034 (d->lastnopr == NULL || d->lastnopr < ss->lastseen))
1035 d->lastnopr = ss->lastseen;
1036
1037 return ss;
1038}
int i
Definition: isn.c:72
#define assert(x)
Definition: regcustom.h:56
static struct sset * pickss(struct vars *v, struct dfa *d, chr *cp, chr *start)
Definition: rege_dfa.c:1044
#define LOCKED
Definition: regexec.c:55
#define POSTSTATE
Definition: regexec.c:54
#define NOPROGRESS
Definition: regexec.c:56
short color
Definition: regguts.h:155
#define FDEBUG(arglist)
Definition: regguts.h:121
Definition: regexec.c:40
struct sset * ss
Definition: regexec.c:41
color co
Definition: regexec.c:42
chr * lastpost
Definition: regexec.c:77
chr * lastnopr
Definition: regexec.c:78
int ncolors
Definition: regexec.c:68
Definition: regexec.c:46
struct arcp ins
Definition: regexec.c:57
struct arcp * inchain
Definition: regexec.c:60
chr * lastseen
Definition: regexec.c:58
struct sset ** outs
Definition: regexec.c:59
int flags
Definition: regexec.c:52

References assert, arcp::co, FDEBUG, sset::flags, i, sset::inchain, sset::ins, dfa::lastnopr, dfa::lastpost, sset::lastseen, LOCKED, dfa::ncolors, NOPROGRESS, sset::outs, pickss(), POSTSTATE, arcp::ss, dfa::ssets, and start.

Referenced by initialize(), and miss().

◆ hash()

◆ initialize()

static struct sset * initialize ( struct vars v,
struct dfa d,
chr start 
)
static

Definition at line 731 of file rege_dfa.c.

734{
735 struct sset *ss;
736 int i;
737
738 /* is previous one still there? */
739 if (d->nssused > 0 && (d->ssets[0].flags & STARTER))
740 ss = &d->ssets[0];
741 else
742 { /* no, must (re)build it */
743 ss = getvacant(v, d, start, start);
744 if (ss == NULL)
745 return NULL;
746 for (i = 0; i < d->wordsper; i++)
747 ss->states[i] = 0;
748 BSET(ss->states, d->cnfa->pre);
749 ss->hash = HASH(ss->states, d->wordsper);
750 assert(d->cnfa->pre != d->cnfa->post);
751 ss->flags = STARTER | LOCKED | NOPROGRESS;
752 /* lastseen dealt with below */
753 }
754
755 for (i = 0; i < d->nssused; i++)
756 d->ssets[i].lastseen = NULL;
757 ss->lastseen = start; /* maybe untrue, but harmless */
758 d->lastpost = NULL;
759 d->lastnopr = NULL;
760 return ss;
761}
#define HASH(sign, val, siglen)
Definition: hstore_gist.c:46
static struct sset * getvacant(struct vars *v, struct dfa *d, chr *cp, chr *start)
Definition: rege_dfa.c:973
#define STARTER
Definition: regexec.c:53
#define BSET(uv, sn)
Definition: regguts.h:131
int pre
Definition: regguts.h:415
int post
Definition: regguts.h:416
int wordsper
Definition: regexec.c:69
struct cnfa * cnfa
Definition: regexec.c:75
int nssused
Definition: regexec.c:66
unsigned hash
Definition: regexec.c:48
unsigned * states
Definition: regexec.c:47

References assert, BSET, dfa::cnfa, sset::flags, getvacant(), HASH, sset::hash, i, dfa::lastnopr, dfa::lastpost, sset::lastseen, LOCKED, NOPROGRESS, dfa::nssused, cnfa::post, cnfa::pre, dfa::ssets, start, STARTER, sset::states, and dfa::wordsper.

Referenced by longest(), matchuntil(), and shortest().

◆ lacon()

static int lacon ( struct vars v,
struct cnfa pcnfa,
chr cp,
color  co 
)
static

Definition at line 916 of file rege_dfa.c.

920{
921 int n;
922 struct subre *sub;
923 struct dfa *d;
924 chr *end;
925 int satisfied;
926
927 /* Since this is recursive, it could be driven to stack overflow */
928 if (STACK_TOO_DEEP(v->re))
929 {
931 return 0;
932 }
933
934 n = co - pcnfa->ncolors;
935 assert(n > 0 && n < v->g->nlacons && v->g->lacons != NULL);
936 FDEBUG(("=== testing lacon %d\n", n));
937 sub = &v->g->lacons[n];
938 d = getladfa(v, n);
939 if (d == NULL)
940 return 0;
941 if (LATYPE_IS_AHEAD(sub->latype))
942 {
943 /* used to use longest() here, but shortest() could be much cheaper */
944 end = shortest(v, d, cp, cp, v->stop,
945 (chr **) NULL, (int *) NULL);
946 satisfied = LATYPE_IS_POS(sub->latype) ? (end != NULL) : (end == NULL);
947 }
948 else
949 {
950 /*
951 * To avoid doing O(N^2) work when repeatedly testing a lookbehind
952 * constraint in an N-character string, we use matchuntil() which can
953 * cache the DFA state across calls. We only need to restart if the
954 * probe point decreases, which is not common. The NFA we're using is
955 * a search NFA, so it doesn't mind scanning over stuff before the
956 * nominal match.
957 */
958 satisfied = matchuntil(v, d, cp, &v->lblastcss[n], &v->lblastcp[n]);
959 if (!LATYPE_IS_POS(sub->latype))
960 satisfied = !satisfied;
961 }
962 FDEBUG(("=== lacon %d satisfied %d\n", n, satisfied));
963 return satisfied;
964}
#define ERR
Definition: _int.h:161
static int matchuntil(struct vars *v, struct dfa *d, chr *probe, struct sset **lastcss, chr **lastcp)
Definition: rege_dfa.c:371
#define REG_ETOOBIG
Definition: regex.h:233
static struct dfa * getladfa(struct vars *v, int n)
Definition: regexec.c:400
#define STACK_TOO_DEEP(re)
Definition: regguts.h:523
#define LATYPE_IS_AHEAD(la)
Definition: regguts.h:109
#define LATYPE_IS_POS(la)
Definition: regguts.h:108
int ncolors
Definition: regguts.h:409
Definition: regexec.c:64
struct subre * lacons
Definition: regguts.h:542
Definition: regguts.h:479
char latype
Definition: regguts.h:497
const chr * stop
Definition: regcomp.c:285
struct sset ** lblastcss
Definition: regexec.c:120
chr ** lblastcp
Definition: regexec.c:121
regex_t * re
Definition: regcomp.c:283

References assert, ERR, FDEBUG, vars::g, getladfa(), guts::lacons, subre::latype, LATYPE_IS_AHEAD, LATYPE_IS_POS, vars::lblastcp, vars::lblastcss, matchuntil(), cnfa::ncolors, vars::re, REG_ETOOBIG, shortest(), STACK_TOO_DEEP, and vars::stop.

Referenced by miss().

◆ lastcold()

static chr * lastcold ( struct vars v,
struct dfa d 
)
static

Definition at line 585 of file rege_dfa.c.

587{
588 struct sset *ss;
589 chr *nopr;
590 int i;
591
592 nopr = d->lastnopr;
593 if (nopr == NULL)
594 nopr = v->start;
595 for (ss = d->ssets, i = d->nssused; i > 0; ss++, i--)
596 if ((ss->flags & NOPROGRESS) && nopr < ss->lastseen)
597 nopr = ss->lastseen;
598 return nopr;
599}

References sset::flags, i, dfa::lastnopr, sset::lastseen, NOPROGRESS, dfa::nssused, dfa::ssets, and vars::start.

Referenced by shortest().

◆ longest()

static chr * longest ( struct vars v,
struct dfa d,
chr start,
chr stop,
int *  hitstopp 
)
static

Definition at line 42 of file rege_dfa.c.

47{
48 chr *cp;
49 chr *realstop = (stop == v->stop) ? stop : stop + 1;
50 color co;
51 struct sset *css;
52 struct sset *ss;
53 chr *post;
54 int i;
55 struct colormap *cm = d->cm;
56
57 /* prevent "uninitialized variable" warnings */
58 if (hitstopp != NULL)
59 *hitstopp = 0;
60
61 /* if this is a backref to a known string, just match against that */
62 if (d->backno >= 0)
63 {
64 assert((size_t) d->backno < v->nmatch);
65 if (v->pmatch[d->backno].rm_so >= 0)
66 {
67 cp = dfa_backref(v, d, start, start, stop, false);
68 if (cp == v->stop && stop == v->stop && hitstopp != NULL)
69 *hitstopp = 1;
70 return cp;
71 }
72 }
73
74 /* fast path for matchall NFAs */
75 if (d->cnfa->flags & MATCHALL)
76 {
77 size_t nchr = stop - start;
78 size_t maxmatchall = d->cnfa->maxmatchall;
79
80 if (nchr < d->cnfa->minmatchall)
81 return NULL;
82 if (maxmatchall == DUPINF)
83 {
84 if (stop == v->stop && hitstopp != NULL)
85 *hitstopp = 1;
86 }
87 else
88 {
89 if (stop == v->stop && nchr <= maxmatchall + 1 && hitstopp != NULL)
90 *hitstopp = 1;
91 if (nchr > maxmatchall)
92 return start + maxmatchall;
93 }
94 return stop;
95 }
96
97 /* initialize */
98 css = initialize(v, d, start);
99 if (css == NULL)
100 return NULL;
101 cp = start;
102
103 /* startup */
104 FDEBUG(("+++ startup +++\n"));
105 if (cp == v->start)
106 {
107 co = d->cnfa->bos[(v->eflags & REG_NOTBOL) ? 0 : 1];
108 FDEBUG(("color %ld\n", (long) co));
109 }
110 else
111 {
112 co = GETCOLOR(cm, *(cp - 1));
113 FDEBUG(("char %c, color %ld\n", (char) *(cp - 1), (long) co));
114 }
115 css = miss(v, d, css, co, cp, start);
116 if (css == NULL)
117 return NULL;
118 css->lastseen = cp;
119
120 /*
121 * This is the main text-scanning loop. It seems worth having two copies
122 * to avoid the overhead of REG_FTRACE tests here, even in REG_DEBUG
123 * builds, when you're not actively tracing.
124 */
125#ifdef REG_DEBUG
126 if (v->eflags & REG_FTRACE)
127 {
128 while (cp < realstop)
129 {
130 FDEBUG(("+++ at c%d +++\n", (int) (css - d->ssets)));
131 co = GETCOLOR(cm, *cp);
132 FDEBUG(("char %c, color %ld\n", (char) *cp, (long) co));
133 ss = css->outs[co];
134 if (ss == NULL)
135 {
136 ss = miss(v, d, css, co, cp + 1, start);
137 if (ss == NULL)
138 break; /* NOTE BREAK OUT */
139 }
140 cp++;
141 ss->lastseen = cp;
142 css = ss;
143 }
144 }
145 else
146#endif
147 {
148 while (cp < realstop)
149 {
150 co = GETCOLOR(cm, *cp);
151 ss = css->outs[co];
152 if (ss == NULL)
153 {
154 ss = miss(v, d, css, co, cp + 1, start);
155 if (ss == NULL)
156 break; /* NOTE BREAK OUT */
157 }
158 cp++;
159 ss->lastseen = cp;
160 css = ss;
161 }
162 }
163
164 if (ISERR())
165 return NULL;
166
167 /* shutdown */
168 FDEBUG(("+++ shutdown at c%d +++\n", (int) (css - d->ssets)));
169 if (cp == v->stop && stop == v->stop)
170 {
171 if (hitstopp != NULL)
172 *hitstopp = 1;
173 co = d->cnfa->eos[(v->eflags & REG_NOTEOL) ? 0 : 1];
174 FDEBUG(("color %ld\n", (long) co));
175 ss = miss(v, d, css, co, cp, start);
176 if (ISERR())
177 return NULL;
178 /* special case: match ended at eol? */
179 if (ss != NULL && (ss->flags & POSTSTATE))
180 return cp;
181 else if (ss != NULL)
182 ss->lastseen = cp; /* to be tidy */
183 }
184
185 /* find last match, if any */
186 post = d->lastpost;
187 for (ss = d->ssets, i = d->nssused; i > 0; ss++, i--)
188 if ((ss->flags & POSTSTATE) && post != ss->lastseen &&
189 (post == NULL || post < ss->lastseen))
190 post = ss->lastseen;
191 if (post != NULL) /* found one */
192 return post - 1;
193
194 return NULL;
195}
#define ISERR()
Definition: regcomp.c:317
static chr * dfa_backref(struct vars *v, struct dfa *d, chr *start, chr *min, chr *max, bool shortest)
Definition: rege_dfa.c:506
static struct sset * miss(struct vars *v, struct dfa *d, struct sset *css, color co, chr *cp, chr *start)
Definition: rege_dfa.c:777
static struct sset * initialize(struct vars *v, struct dfa *d, chr *start)
Definition: rege_dfa.c:731
#define REG_FTRACE
Definition: regex.h:205
#define REG_NOTEOL
Definition: regex.h:203
#define REG_NOTBOL
Definition: regex.h:202
#define GETCOLOR(cm, c)
Definition: regguts.h:257
#define MATCHALL
Definition: regguts.h:412
Definition: regguts.h:407
color eos[2]
Definition: regguts.h:418
int maxmatchall
Definition: regguts.h:426
int flags
Definition: regguts.h:410
int minmatchall
Definition: regguts.h:425
color bos[2]
Definition: regguts.h:417
struct vars * v
Definition: regguts.h:232
struct colormap * cm
Definition: regexec.c:76
size_t nmatch
Definition: regexec.c:111
int eflags
Definition: regexec.c:110

References assert, dfa::backno, cnfa::bos, dfa::cm, dfa::cnfa, dfa_backref(), DUPINF, vars::eflags, cnfa::eos, FDEBUG, sset::flags, cnfa::flags, GETCOLOR, i, initialize(), ISERR, dfa::lastpost, sset::lastseen, MATCHALL, cnfa::maxmatchall, cnfa::minmatchall, miss(), vars::nmatch, dfa::nssused, sset::outs, vars::pmatch, POSTSTATE, REG_FTRACE, REG_NOTBOL, REG_NOTEOL, dfa::ssets, vars::start, start, vars::stop, and colormap::v.

Referenced by ProcessSyncRequests().

◆ matchuntil()

static int matchuntil ( struct vars v,
struct dfa d,
chr probe,
struct sset **  lastcss,
chr **  lastcp 
)
static

Definition at line 371 of file rege_dfa.c.

376{
377 chr *cp = *lastcp;
378 color co;
379 struct sset *css = *lastcss;
380 struct sset *ss;
381 struct colormap *cm = d->cm;
382
383 /* fast path for matchall NFAs */
384 if (d->cnfa->flags & MATCHALL)
385 {
386 size_t nchr = probe - v->start;
387
388 if (nchr < d->cnfa->minmatchall)
389 return 0;
390 /* maxmatchall will always be infinity, cf. makesearch() */
392 return 1;
393 }
394
395 /* initialize and startup, or restart, if necessary */
396 if (cp == NULL || cp > probe)
397 {
398 cp = v->start;
399 css = initialize(v, d, cp);
400 if (css == NULL)
401 return 0;
402
403 FDEBUG((">>> startup >>>\n"));
404 co = d->cnfa->bos[(v->eflags & REG_NOTBOL) ? 0 : 1];
405 FDEBUG(("color %ld\n", (long) co));
406
407 css = miss(v, d, css, co, cp, v->start);
408 if (css == NULL)
409 return 0;
410 css->lastseen = cp;
411 }
412 else if (css == NULL)
413 {
414 /* we previously found that no match is possible beyond *lastcp */
415 return 0;
416 }
417 ss = css;
418
419 /*
420 * This is the main text-scanning loop. It seems worth having two copies
421 * to avoid the overhead of REG_FTRACE tests here, even in REG_DEBUG
422 * builds, when you're not actively tracing.
423 */
424#ifdef REG_DEBUG
425 if (v->eflags & REG_FTRACE)
426 {
427 while (cp < probe)
428 {
429 FDEBUG((">>> at c%d >>>\n", (int) (css - d->ssets)));
430 co = GETCOLOR(cm, *cp);
431 FDEBUG(("char %c, color %ld\n", (char) *cp, (long) co));
432 ss = css->outs[co];
433 if (ss == NULL)
434 {
435 ss = miss(v, d, css, co, cp + 1, v->start);
436 if (ss == NULL)
437 break; /* NOTE BREAK OUT */
438 }
439 cp++;
440 ss->lastseen = cp;
441 css = ss;
442 }
443 }
444 else
445#endif
446 {
447 while (cp < probe)
448 {
449 co = GETCOLOR(cm, *cp);
450 ss = css->outs[co];
451 if (ss == NULL)
452 {
453 ss = miss(v, d, css, co, cp + 1, v->start);
454 if (ss == NULL)
455 break; /* NOTE BREAK OUT */
456 }
457 cp++;
458 ss->lastseen = cp;
459 css = ss;
460 }
461 }
462
463 *lastcss = ss;
464 *lastcp = cp;
465
466 if (ss == NULL)
467 return 0; /* impossible match, or internal error */
468
469 /* We need to process one more chr, or the EOS symbol, to check match */
470 if (cp < v->stop)
471 {
472 FDEBUG((">>> at c%d >>>\n", (int) (css - d->ssets)));
473 co = GETCOLOR(cm, *cp);
474 FDEBUG(("char %c, color %ld\n", (char) *cp, (long) co));
475 ss = css->outs[co];
476 if (ss == NULL)
477 ss = miss(v, d, css, co, cp + 1, v->start);
478 }
479 else
480 {
481 assert(cp == v->stop);
482 co = d->cnfa->eos[(v->eflags & REG_NOTEOL) ? 0 : 1];
483 FDEBUG(("color %ld\n", (long) co));
484 ss = miss(v, d, css, co, cp, v->start);
485 }
486
487 if (ss == NULL || !(ss->flags & POSTSTATE))
488 return 0;
489
490 return 1;
491}

References assert, cnfa::bos, dfa::cm, dfa::cnfa, DUPINF, vars::eflags, cnfa::eos, FDEBUG, sset::flags, cnfa::flags, GETCOLOR, initialize(), sset::lastseen, MATCHALL, cnfa::maxmatchall, cnfa::minmatchall, miss(), sset::outs, POSTSTATE, REG_FTRACE, REG_NOTBOL, REG_NOTEOL, dfa::ssets, vars::start, vars::stop, and colormap::v.

Referenced by lacon().

◆ miss()

static struct sset * miss ( struct vars v,
struct dfa d,
struct sset css,
color  co,
chr cp,
chr start 
)
static

Definition at line 777 of file rege_dfa.c.

783{
784 struct cnfa *cnfa = d->cnfa;
785 int i;
786 unsigned h;
787 struct carc *ca;
788 struct sset *p;
789 int ispseudocolor;
790 int ispost;
791 int noprogress;
792 int gotstate;
793 int dolacons;
794 int sawlacons;
795
796 /* for convenience, we can be called even if it might not be a miss */
797 if (css->outs[co] != NULL)
798 {
799 FDEBUG(("hit\n"));
800 return css->outs[co];
801 }
802 FDEBUG(("miss\n"));
803
804 /*
805 * Checking for operation cancel in the inner text search loop seems
806 * unduly expensive. As a compromise, check during cache misses.
807 */
808 INTERRUPT(v->re);
809
810 /*
811 * What set of states would we end up in after consuming the co character?
812 * We first consider PLAIN arcs that consume the character, and then look
813 * to see what LACON arcs could be traversed after consuming it.
814 */
815 for (i = 0; i < d->wordsper; i++)
816 d->work[i] = 0; /* build new stateset bitmap in d->work */
817 ispseudocolor = d->cm->cd[co].flags & PSEUDO;
818 ispost = 0;
819 noprogress = 1;
820 gotstate = 0;
821 for (i = 0; i < d->nstates; i++)
822 if (ISBSET(css->states, i))
823 for (ca = cnfa->states[i]; ca->co != COLORLESS; ca++)
824 if (ca->co == co ||
825 (ca->co == RAINBOW && !ispseudocolor))
826 {
827 BSET(d->work, ca->to);
828 gotstate = 1;
829 if (ca->to == cnfa->post)
830 ispost = 1;
831 if (!(cnfa->stflags[ca->to] & CNFA_NOPROGRESS))
832 noprogress = 0;
833 FDEBUG(("%d -> %d\n", i, ca->to));
834 }
835 if (!gotstate)
836 return NULL; /* character cannot reach any new state */
837 dolacons = (cnfa->flags & HASLACONS);
838 sawlacons = 0;
839 /* outer loop handles transitive closure of reachable-by-LACON states */
840 while (dolacons)
841 {
842 dolacons = 0;
843 for (i = 0; i < d->nstates; i++)
844 if (ISBSET(d->work, i))
845 for (ca = cnfa->states[i]; ca->co != COLORLESS; ca++)
846 {
847 if (ca->co < cnfa->ncolors)
848 continue; /* not a LACON arc */
849 if (ISBSET(d->work, ca->to))
850 continue; /* arc would be a no-op anyway */
851 sawlacons = 1; /* this LACON affects our result */
852 if (!lacon(v, cnfa, cp, ca->co))
853 {
854 if (ISERR())
855 return NULL;
856 continue; /* LACON arc cannot be traversed */
857 }
858 if (ISERR())
859 return NULL;
860 BSET(d->work, ca->to);
861 dolacons = 1;
862 if (ca->to == cnfa->post)
863 ispost = 1;
864 if (!(cnfa->stflags[ca->to] & CNFA_NOPROGRESS))
865 noprogress = 0;
866 FDEBUG(("%d :> %d\n", i, ca->to));
867 }
868 }
869 h = HASH(d->work, d->wordsper);
870
871 /* Is this stateset already in the cache? */
872 for (p = d->ssets, i = d->nssused; i > 0; p++, i--)
873 if (HIT(h, d->work, p, d->wordsper))
874 {
875 FDEBUG(("cached c%d\n", (int) (p - d->ssets)));
876 break; /* NOTE BREAK OUT */
877 }
878 if (i == 0)
879 { /* nope, need a new cache entry */
880 p = getvacant(v, d, cp, start);
881 if (p == NULL)
882 return NULL;
883 assert(p != css);
884 for (i = 0; i < d->wordsper; i++)
885 p->states[i] = d->work[i];
886 p->hash = h;
887 p->flags = (ispost) ? POSTSTATE : 0;
888 if (noprogress)
889 p->flags |= NOPROGRESS;
890 /* lastseen to be dealt with by caller */
891 }
892
893 /*
894 * Link new stateset to old, unless a LACON affected the result, in which
895 * case we don't create the link. That forces future transitions across
896 * this same arc (same prior stateset and character color) to come through
897 * miss() again, so that we can recheck the LACON(s), which might or might
898 * not pass since context will be different.
899 */
900 if (!sawlacons)
901 {
902 FDEBUG(("c%d[%d]->c%d\n",
903 (int) (css - d->ssets), co, (int) (p - d->ssets)));
904 css->outs[co] = p;
905 css->inchain[co] = p->ins;
906 p->ins.ss = css;
907 p->ins.co = co;
908 }
909 return p;
910}
for(;;)
#define INTERRUPT(re)
Definition: regcustom.h:55
static int lacon(struct vars *v, struct cnfa *pcnfa, chr *cp, color co)
Definition: rege_dfa.c:916
#define HIT(h, bv, ss, nw)
Definition: regexec.c:50
#define PSEUDO
Definition: regguts.h:186
#define RAINBOW
Definition: regguts.h:159
#define CNFA_NOPROGRESS
Definition: regguts.h:420
#define COLORLESS
Definition: regguts.h:158
#define ISBSET(uv, sn)
Definition: regguts.h:132
#define HASLACONS
Definition: regguts.h:411
Definition: regguts.h:401
int to
Definition: regguts.h:403
color co
Definition: regguts.h:402
struct carc ** states
Definition: regguts.h:421
char * stflags
Definition: regguts.h:419
int flags
Definition: regguts.h:184
struct colordesc * cd
Definition: regguts.h:236
int nstates
Definition: regexec.c:67
unsigned * work
Definition: regexec.c:72

References assert, BSET, colormap::cd, dfa::cm, dfa::cnfa, CNFA_NOPROGRESS, arcp::co, carc::co, COLORLESS, FDEBUG, sset::flags, colordesc::flags, cnfa::flags, for(), getvacant(), HASH, sset::hash, HASLACONS, HIT, i, sset::inchain, sset::ins, INTERRUPT, ISBSET, ISERR, lacon(), cnfa::ncolors, NOPROGRESS, dfa::nssused, dfa::nstates, sset::outs, cnfa::post, POSTSTATE, PSEUDO, RAINBOW, vars::re, arcp::ss, dfa::ssets, start, sset::states, cnfa::states, cnfa::stflags, carc::to, dfa::wordsper, and dfa::work.

Referenced by longest(), matchuntil(), and shortest().

◆ newdfa()

static struct dfa * newdfa ( struct vars v,
struct cnfa cnfa,
struct colormap cm,
struct smalldfa sml 
)
static

Definition at line 607 of file rege_dfa.c.

611{
612 struct dfa *d;
613 size_t nss = cnfa->nstates * 2;
614 int wordsper = (cnfa->nstates + UBITS - 1) / UBITS;
615 bool ismalloced = false;
616
617 assert(cnfa != NULL && cnfa->nstates != 0);
618
619 if (nss <= FEWSTATES && cnfa->ncolors <= FEWCOLORS)
620 {
621 assert(wordsper == 1);
622 if (sml == NULL)
623 {
624 sml = (struct smalldfa *) MALLOC(sizeof(struct smalldfa));
625 if (sml == NULL)
626 {
628 return NULL;
629 }
630 ismalloced = true;
631 }
632 d = &sml->dfa;
633 d->ssets = sml->ssets;
634 d->statesarea = sml->statesarea;
635 d->work = &d->statesarea[nss];
636 d->outsarea = sml->outsarea;
637 d->incarea = sml->incarea;
638 d->ismalloced = ismalloced;
639 d->arraysmalloced = false; /* not separately allocated, anyway */
640 }
641 else
642 {
643 d = (struct dfa *) MALLOC(sizeof(struct dfa));
644 if (d == NULL)
645 {
647 return NULL;
648 }
649 d->ssets = (struct sset *) MALLOC(nss * sizeof(struct sset));
650 d->statesarea = (unsigned *) MALLOC((nss + WORK) * wordsper *
651 sizeof(unsigned));
652 d->work = &d->statesarea[nss * wordsper];
653 d->outsarea = (struct sset **) MALLOC(nss * cnfa->ncolors *
654 sizeof(struct sset *));
655 d->incarea = (struct arcp *) MALLOC(nss * cnfa->ncolors *
656 sizeof(struct arcp));
657 d->ismalloced = true;
658 d->arraysmalloced = true;
659 /* now freedfa() will behave sanely */
660 if (d->ssets == NULL || d->statesarea == NULL ||
661 d->outsarea == NULL || d->incarea == NULL)
662 {
663 freedfa(d);
665 return NULL;
666 }
667 }
668
669 d->nssets = (v->eflags & REG_SMALL) ? 7 : nss;
670 d->nssused = 0;
671 d->nstates = cnfa->nstates;
672 d->ncolors = cnfa->ncolors;
673 d->wordsper = wordsper;
674 d->cnfa = cnfa;
675 d->cm = cm;
676 d->lastpost = NULL;
677 d->lastnopr = NULL;
678 d->search = d->ssets;
679 d->backno = -1; /* may be set by caller */
680 d->backmin = d->backmax = 0;
681
682 /* initialization of sset fields is done as needed */
683
684 return d;
685}
#define MALLOC(n)
Definition: regcustom.h:52
static void freedfa(struct dfa *d)
Definition: rege_dfa.c:691
#define REG_SMALL
Definition: regex.h:207
#define REG_ESPACE
Definition: regex.h:227
#define WORK
Definition: regexec.c:87
#define FEWCOLORS
Definition: regexec.c:91
#define UBITS
Definition: regguts.h:130
int nstates
Definition: regguts.h:408
struct sset * search
Definition: regexec.c:79
int nssets
Definition: regexec.c:65
unsigned statesarea[FEWSTATES *2+WORK]
Definition: regexec.c:96
struct arcp incarea[FEWSTATES *2 *FEWCOLORS]
Definition: regexec.c:98
struct sset * outsarea[FEWSTATES *2 *FEWCOLORS]
Definition: regexec.c:97
struct dfa dfa
Definition: regexec.c:94
struct sset ssets[FEWSTATES *2]
Definition: regexec.c:95

References dfa::arraysmalloced, assert, dfa::backmax, dfa::backmin, dfa::backno, dfa::cm, dfa::cnfa, smalldfa::dfa, vars::eflags, ERR, FEWCOLORS, freedfa(), dfa::incarea, smalldfa::incarea, dfa::ismalloced, dfa::lastnopr, dfa::lastpost, MALLOC, dfa::ncolors, cnfa::ncolors, dfa::nssets, dfa::nssused, dfa::nstates, cnfa::nstates, dfa::outsarea, smalldfa::outsarea, REG_ESPACE, REG_SMALL, dfa::search, dfa::ssets, smalldfa::ssets, dfa::statesarea, smalldfa::statesarea, UBITS, dfa::wordsper, dfa::work, and WORK.

◆ pickss()

static struct sset * pickss ( struct vars v,
struct dfa d,
chr cp,
chr start 
)
static

Definition at line 1044 of file rege_dfa.c.

1048{
1049 int i;
1050 struct sset *ss;
1051 struct sset *end;
1052 chr *ancient;
1053
1054 /* shortcut for cases where cache isn't full */
1055 if (d->nssused < d->nssets)
1056 {
1057 i = d->nssused;
1058 d->nssused++;
1059 ss = &d->ssets[i];
1060 FDEBUG(("new c%d\n", i));
1061 /* set up innards */
1062 ss->states = &d->statesarea[i * d->wordsper];
1063 ss->flags = 0;
1064 ss->ins.ss = NULL;
1065 ss->ins.co = WHITE; /* give it some value */
1066 ss->outs = &d->outsarea[i * d->ncolors];
1067 ss->inchain = &d->incarea[i * d->ncolors];
1068 for (i = 0; i < d->ncolors; i++)
1069 {
1070 ss->outs[i] = NULL;
1071 ss->inchain[i].ss = NULL;
1072 }
1073 return ss;
1074 }
1075
1076 /* look for oldest, or old enough anyway */
1077 if (cp - start > d->nssets * 2 / 3) /* oldest 33% are expendable */
1078 ancient = cp - d->nssets * 2 / 3;
1079 else
1080 ancient = start;
1081 for (ss = d->search, end = &d->ssets[d->nssets]; ss < end; ss++)
1082 if ((ss->lastseen == NULL || ss->lastseen < ancient) &&
1083 !(ss->flags & LOCKED))
1084 {
1085 d->search = ss + 1;
1086 FDEBUG(("replacing c%d\n", (int) (ss - d->ssets)));
1087 return ss;
1088 }
1089 for (ss = d->ssets, end = d->search; ss < end; ss++)
1090 if ((ss->lastseen == NULL || ss->lastseen < ancient) &&
1091 !(ss->flags & LOCKED))
1092 {
1093 d->search = ss + 1;
1094 FDEBUG(("replacing c%d\n", (int) (ss - d->ssets)));
1095 return ss;
1096 }
1097
1098 /* nobody's old enough?!? -- something's really wrong */
1099 FDEBUG(("cannot find victim to replace!\n"));
1100 ERR(REG_ASSERT);
1101 return NULL;
1102}
#define REG_ASSERT
Definition: regex.h:229
#define WHITE
Definition: regguts.h:160

References arcp::co, ERR, FDEBUG, sset::flags, i, dfa::incarea, sset::inchain, sset::ins, sset::lastseen, LOCKED, dfa::ncolors, dfa::nssets, dfa::nssused, sset::outs, dfa::outsarea, REG_ASSERT, dfa::search, arcp::ss, dfa::ssets, start, sset::states, dfa::statesarea, WHITE, and dfa::wordsper.

Referenced by getvacant().

◆ shortest()

static chr * shortest ( struct vars v,
struct dfa d,
chr start,
chr min,
chr max,
chr **  coldp,
int *  hitstopp 
)
static

Definition at line 204 of file rege_dfa.c.

211{
212 chr *cp;
213 chr *realmin = (min == v->stop) ? min : min + 1;
214 chr *realmax = (max == v->stop) ? max : max + 1;
215 color co;
216 struct sset *css;
217 struct sset *ss;
218 struct colormap *cm = d->cm;
219
220 /* prevent "uninitialized variable" warnings */
221 if (coldp != NULL)
222 *coldp = NULL;
223 if (hitstopp != NULL)
224 *hitstopp = 0;
225
226 /* if this is a backref to a known string, just match against that */
227 if (d->backno >= 0)
228 {
229 assert((size_t) d->backno < v->nmatch);
230 if (v->pmatch[d->backno].rm_so >= 0)
231 {
232 cp = dfa_backref(v, d, start, min, max, true);
233 if (cp != NULL && coldp != NULL)
234 *coldp = start;
235 /* there is no case where we should set *hitstopp */
236 return cp;
237 }
238 }
239
240 /* fast path for matchall NFAs */
241 if (d->cnfa->flags & MATCHALL)
242 {
243 size_t nchr = min - start;
244
245 if (d->cnfa->maxmatchall != DUPINF &&
246 nchr > d->cnfa->maxmatchall)
247 return NULL;
248 if ((max - start) < d->cnfa->minmatchall)
249 return NULL;
250 if (nchr < d->cnfa->minmatchall)
251 min = start + d->cnfa->minmatchall;
252 if (coldp != NULL)
253 *coldp = start;
254 /* there is no case where we should set *hitstopp */
255 return min;
256 }
257
258 /* initialize */
259 css = initialize(v, d, start);
260 if (css == NULL)
261 return NULL;
262 cp = start;
263
264 /* startup */
265 FDEBUG(("--- startup ---\n"));
266 if (cp == v->start)
267 {
268 co = d->cnfa->bos[(v->eflags & REG_NOTBOL) ? 0 : 1];
269 FDEBUG(("color %ld\n", (long) co));
270 }
271 else
272 {
273 co = GETCOLOR(cm, *(cp - 1));
274 FDEBUG(("char %c, color %ld\n", (char) *(cp - 1), (long) co));
275 }
276 css = miss(v, d, css, co, cp, start);
277 if (css == NULL)
278 return NULL;
279 css->lastseen = cp;
280 ss = css;
281
282 /*
283 * This is the main text-scanning loop. It seems worth having two copies
284 * to avoid the overhead of REG_FTRACE tests here, even in REG_DEBUG
285 * builds, when you're not actively tracing.
286 */
287#ifdef REG_DEBUG
288 if (v->eflags & REG_FTRACE)
289 {
290 while (cp < realmax)
291 {
292 FDEBUG(("--- at c%d ---\n", (int) (css - d->ssets)));
293 co = GETCOLOR(cm, *cp);
294 FDEBUG(("char %c, color %ld\n", (char) *cp, (long) co));
295 ss = css->outs[co];
296 if (ss == NULL)
297 {
298 ss = miss(v, d, css, co, cp + 1, start);
299 if (ss == NULL)
300 break; /* NOTE BREAK OUT */
301 }
302 cp++;
303 ss->lastseen = cp;
304 css = ss;
305 if ((ss->flags & POSTSTATE) && cp >= realmin)
306 break; /* NOTE BREAK OUT */
307 }
308 }
309 else
310#endif
311 {
312 while (cp < realmax)
313 {
314 co = GETCOLOR(cm, *cp);
315 ss = css->outs[co];
316 if (ss == NULL)
317 {
318 ss = miss(v, d, css, co, cp + 1, start);
319 if (ss == NULL)
320 break; /* NOTE BREAK OUT */
321 }
322 cp++;
323 ss->lastseen = cp;
324 css = ss;
325 if ((ss->flags & POSTSTATE) && cp >= realmin)
326 break; /* NOTE BREAK OUT */
327 }
328 }
329
330 if (ss == NULL)
331 return NULL;
332
333 if (coldp != NULL) /* report last no-progress state set, if any */
334 *coldp = lastcold(v, d);
335
336 if ((ss->flags & POSTSTATE) && cp > min)
337 {
338 assert(cp >= realmin);
339 cp--;
340 }
341 else if (cp == v->stop && max == v->stop)
342 {
343 co = d->cnfa->eos[(v->eflags & REG_NOTEOL) ? 0 : 1];
344 FDEBUG(("color %ld\n", (long) co));
345 ss = miss(v, d, css, co, cp, start);
346 /* match might have ended at eol */
347 if ((ss == NULL || !(ss->flags & POSTSTATE)) && hitstopp != NULL)
348 *hitstopp = 1;
349 }
350
351 if (ss == NULL || !(ss->flags & POSTSTATE))
352 return NULL;
353
354 return cp;
355}
static chr * lastcold(struct vars *v, struct dfa *d)
Definition: rege_dfa.c:585
size_t max
Definition: regguts.h:234

References assert, dfa::backno, cnfa::bos, dfa::cm, dfa::cnfa, dfa_backref(), DUPINF, vars::eflags, cnfa::eos, FDEBUG, sset::flags, cnfa::flags, GETCOLOR, initialize(), lastcold(), sset::lastseen, MATCHALL, colormap::max, cnfa::maxmatchall, cnfa::minmatchall, miss(), vars::nmatch, sset::outs, vars::pmatch, POSTSTATE, REG_FTRACE, REG_NOTBOL, REG_NOTEOL, dfa::ssets, vars::start, start, vars::stop, and colormap::v.

Referenced by dfa_backref(), and lacon().