PostgreSQL Source Code  git master
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros
regc_nfa.c File Reference
This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Macros

#define NISERR()   VISERR(nfa->v)
 
#define NERR(e)   VERR(nfa->v, (e))
 
#define BULK_ARC_OP_USE_SORT(nsrcarcs, ndestarcs)   ((nsrcarcs) < 4 ? 0 : ((nsrcarcs) > 32 || (ndestarcs) > 32))
 
#define CA(ct, at)   (((ct)<<CHAR_BIT) | (at))
 

Functions

static struct nfanewnfa (struct vars *v, struct colormap *cm, struct nfa *parent)
 
static void freenfa (struct nfa *nfa)
 
static struct statenewstate (struct nfa *nfa)
 
static struct statenewfstate (struct nfa *nfa, int flag)
 
static void dropstate (struct nfa *nfa, struct state *s)
 
static void freestate (struct nfa *nfa, struct state *s)
 
static void destroystate (struct nfa *nfa, struct state *s)
 
static void newarc (struct nfa *nfa, int t, color co, struct state *from, struct state *to)
 
static void createarc (struct nfa *nfa, int t, color co, struct state *from, struct state *to)
 
static struct arcallocarc (struct nfa *nfa, struct state *s)
 
static void freearc (struct nfa *nfa, struct arc *victim)
 
static void changearctarget (struct arc *a, struct state *newto)
 
static int hasnonemptyout (struct state *s)
 
static struct arcfindarc (struct state *s, int type, color co)
 
static void cparc (struct nfa *nfa, struct arc *oa, struct state *from, struct state *to)
 
static void sortins (struct nfa *nfa, struct state *s)
 
static int sortins_cmp (const void *a, const void *b)
 
static void sortouts (struct nfa *nfa, struct state *s)
 
static int sortouts_cmp (const void *a, const void *b)
 
static void moveins (struct nfa *nfa, struct state *oldState, struct state *newState)
 
static void copyins (struct nfa *nfa, struct state *oldState, struct state *newState)
 
static void mergeins (struct nfa *nfa, struct state *s, struct arc **arcarray, int arccount)
 
static void moveouts (struct nfa *nfa, struct state *oldState, struct state *newState)
 
static void copyouts (struct nfa *nfa, struct state *oldState, struct state *newState)
 
static void cloneouts (struct nfa *nfa, struct state *old, struct state *from, struct state *to, int type)
 
static void delsub (struct nfa *nfa, struct state *lp, struct state *rp)
 
static void deltraverse (struct nfa *nfa, struct state *leftend, struct state *s)
 
static void dupnfa (struct nfa *nfa, struct state *start, struct state *stop, struct state *from, struct state *to)
 
static void duptraverse (struct nfa *nfa, struct state *s, struct state *stmp)
 
static void cleartraverse (struct nfa *nfa, struct state *s)
 
static struct statesingle_color_transition (struct state *s1, struct state *s2)
 
static void specialcolors (struct nfa *nfa)
 
static long optimize (struct nfa *nfa, FILE *f)
 
static void pullback (struct nfa *nfa, FILE *f)
 
static int pull (struct nfa *nfa, struct arc *con, struct state **intermediates)
 
static void pushfwd (struct nfa *nfa, FILE *f)
 
static int push (struct nfa *nfa, struct arc *con, struct state **intermediates)
 
static int combine (struct arc *con, struct arc *a)
 
static void fixempties (struct nfa *nfa, FILE *f)
 
static struct stateemptyreachable (struct nfa *nfa, struct state *s, struct state *lastfound, struct arc **inarcsorig)
 
static int isconstraintarc (struct arc *a)
 
static int hasconstraintout (struct state *s)
 
static void fixconstraintloops (struct nfa *nfa, FILE *f)
 
static int findconstraintloop (struct nfa *nfa, struct state *s)
 
static void breakconstraintloop (struct nfa *nfa, struct state *sinitial)
 
static void clonesuccessorstates (struct nfa *nfa, struct state *ssource, struct state *sclone, struct state *spredecessor, struct arc *refarc, char *curdonemap, char *outerdonemap, int nstates)
 
static void cleanup (struct nfa *nfa)
 
static void markreachable (struct nfa *nfa, struct state *s, struct state *okay, struct state *mark)
 
static void markcanreach (struct nfa *nfa, struct state *s, struct state *okay, struct state *mark)
 
static long analyze (struct nfa *nfa)
 
static void compact (struct nfa *nfa, struct cnfa *cnfa)
 
static void carcsort (struct carc *first, size_t n)
 
static int carc_cmp (const void *a, const void *b)
 
static void freecnfa (struct cnfa *cnfa)
 
static void dumpnfa (struct nfa *nfa, FILE *f)
 

Macro Definition Documentation

#define BULK_ARC_OP_USE_SORT (   nsrcarcs,
  ndestarcs 
)    ((nsrcarcs) < 4 ? 0 : ((nsrcarcs) > 32 || (ndestarcs) > 32))

Definition at line 720 of file regc_nfa.c.

Referenced by copyins(), copyouts(), moveins(), and moveouts().

#define CA (   ct,
  at 
)    (((ct)<<CHAR_BIT) | (at))

Referenced by combine().

Function Documentation

static struct arc* allocarc ( struct nfa nfa,
struct state s 
)
static

Definition at line 368 of file regc_nfa.c.

References arcbatch::a, ABSIZE, assert, state::free, i, MALLOC, NERR, arcbatch::next, state::noas, NULL, state::oas, REG_ESPACE, REG_ETOOBIG, REG_MAX_COMPILE_SPACE, vars::spaceused, arc::type, and nfa::v.

Referenced by createarc().

370 {
371  struct arc *a;
372 
373  /* shortcut */
374  if (s->free == NULL && s->noas < ABSIZE)
375  {
376  a = &s->oas.a[s->noas];
377  s->noas++;
378  return a;
379  }
380 
381  /* if none at hand, get more */
382  if (s->free == NULL)
383  {
384  struct arcbatch *newAb;
385  int i;
386 
387  if (nfa->v->spaceused >= REG_MAX_COMPILE_SPACE)
388  {
389  NERR(REG_ETOOBIG);
390  return NULL;
391  }
392  newAb = (struct arcbatch *) MALLOC(sizeof(struct arcbatch));
393  if (newAb == NULL)
394  {
395  NERR(REG_ESPACE);
396  return NULL;
397  }
398  nfa->v->spaceused += sizeof(struct arcbatch);
399  newAb->next = s->oas.next;
400  s->oas.next = newAb;
401 
402  for (i = 0; i < ABSIZE; i++)
403  {
404  newAb->a[i].type = 0;
405  newAb->a[i].freechain = &newAb->a[i + 1];
406  }
407  newAb->a[ABSIZE - 1].freechain = NULL;
408  s->free = &newAb->a[0];
409  }
410  assert(s->free != NULL);
411 
412  a = s->free;
413  s->free = a->freechain;
414  return a;
415 }
int type
Definition: regguts.h:278
#define NERR(e)
Definition: regc_nfa.c:40
int noas
Definition: regguts.h:312
#define REG_ESPACE
Definition: regex.h:149
Definition: regguts.h:276
struct arc a[ABSIZE]
Definition: regguts.h:295
#define MALLOC(n)
Definition: regcustom.h:53
#define REG_MAX_COMPILE_SPACE
Definition: regguts.h:383
struct arc * free
Definition: regguts.h:307
struct vars * v
Definition: regguts.h:328
struct arcbatch * next
Definition: regguts.h:293
#define assert(TEST)
Definition: imath.c:37
#define NULL
Definition: c.h:226
#define REG_ETOOBIG
Definition: regex.h:155
int i
#define ABSIZE
Definition: regguts.h:294
size_t spaceused
Definition: regcomp.c:256
struct arcbatch oas
Definition: regguts.h:311
static long analyze ( struct nfa nfa)
static

Definition at line 2816 of file regc_nfa.c.

References NISERR, NULL, arc::outchain, state::outs, nfa::post, nfa::pre, REG_UEMPTYMATCH, REG_UIMPOSSIBLE, and arc::to.

Referenced by GetCommandLogLevel(), and optimize().

2817 {
2818  struct arc *a;
2819  struct arc *aa;
2820 
2821  if (NISERR())
2822  return 0;
2823 
2824  if (nfa->pre->outs == NULL)
2825  return REG_UIMPOSSIBLE;
2826  for (a = nfa->pre->outs; a != NULL; a = a->outchain)
2827  for (aa = a->to->outs; aa != NULL; aa = aa->outchain)
2828  if (aa->to == nfa->post)
2829  return REG_UEMPTYMATCH;
2830  return 0;
2831 }
#define REG_UIMPOSSIBLE
Definition: regex.h:72
#define NISERR()
Definition: regc_nfa.c:39
Definition: regguts.h:276
struct state * post
Definition: regguts.h:320
struct arc * outchain
Definition: regguts.h:282
struct arc * outs
Definition: regguts.h:306
struct state * to
Definition: regguts.h:281
#define REG_UEMPTYMATCH
Definition: regex.h:71
#define NULL
Definition: c.h:226
struct state * pre
Definition: regguts.h:317
static void breakconstraintloop ( struct nfa nfa,
struct state sinitial 
)
static

Definition at line 2351 of file regc_nfa.c.

References assert, clonesuccessorstates(), cparc(), freearc(), freestate(), arc::from, isconstraintarc(), newstate(), state::next, NISERR, state::nouts, nfa::nstates, NULL, arc::outchain, state::outs, nfa::states, state::tmp, and arc::to.

Referenced by findconstraintloop().

2352 {
2353  struct state *s;
2354  struct state *shead;
2355  struct state *stail;
2356  struct state *sclone;
2357  struct state *nexts;
2358  struct arc *refarc;
2359  struct arc *a;
2360  struct arc *nexta;
2361 
2362  /*
2363  * Start by identifying which loop step we want to break at.
2364  * Preferentially this is one with only one constraint arc. (XXX are
2365  * there any other secondary heuristics we want to use here?) Set refarc
2366  * to point to the selected lone constraint arc, if there is one.
2367  */
2368  refarc = NULL;
2369  s = sinitial;
2370  do
2371  {
2372  nexts = s->tmp;
2373  assert(nexts != s); /* should not see any one-element loops */
2374  if (refarc == NULL)
2375  {
2376  int narcs = 0;
2377 
2378  for (a = s->outs; a != NULL; a = a->outchain)
2379  {
2380  if (a->to == nexts && isconstraintarc(a))
2381  {
2382  refarc = a;
2383  narcs++;
2384  }
2385  }
2386  assert(narcs > 0);
2387  if (narcs > 1)
2388  refarc = NULL; /* multiple constraint arcs here, no good */
2389  }
2390  s = nexts;
2391  } while (s != sinitial);
2392 
2393  if (refarc)
2394  {
2395  /* break at the refarc */
2396  shead = refarc->from;
2397  stail = refarc->to;
2398  assert(stail == shead->tmp);
2399  }
2400  else
2401  {
2402  /* for lack of a better idea, break after sinitial */
2403  shead = sinitial;
2404  stail = sinitial->tmp;
2405  }
2406 
2407  /*
2408  * Reset the tmp fields so that we can use them for local storage in
2409  * clonesuccessorstates. (findconstraintloop won't mind, since it's just
2410  * going to abandon its search anyway.)
2411  */
2412  for (s = nfa->states; s != NULL; s = s->next)
2413  s->tmp = NULL;
2414 
2415  /*
2416  * Recursively build clone state(s) as needed.
2417  */
2418  sclone = newstate(nfa);
2419  if (sclone == NULL)
2420  {
2421  assert(NISERR());
2422  return;
2423  }
2424 
2425  clonesuccessorstates(nfa, stail, sclone, shead, refarc,
2426  NULL, NULL, nfa->nstates);
2427 
2428  if (NISERR())
2429  return;
2430 
2431  /*
2432  * It's possible that sclone has no outarcs at all, in which case it's
2433  * useless. (We don't try extremely hard to get rid of useless states
2434  * here, but this is an easy and fairly common case.)
2435  */
2436  if (sclone->nouts == 0)
2437  {
2438  freestate(nfa, sclone);
2439  sclone = NULL;
2440  }
2441 
2442  /*
2443  * Move shead's constraint-loop arcs to point to sclone, or just drop them
2444  * if we discovered we don't need sclone.
2445  */
2446  for (a = shead->outs; a != NULL; a = nexta)
2447  {
2448  nexta = a->outchain;
2449  if (a->to == stail && isconstraintarc(a))
2450  {
2451  if (sclone)
2452  cparc(nfa, a, shead, sclone);
2453  freearc(nfa, a);
2454  if (NISERR())
2455  break;
2456  }
2457  }
2458 }
#define NISERR()
Definition: regc_nfa.c:39
struct state * from
Definition: regguts.h:280
Definition: regguts.h:276
struct state * states
Definition: regguts.h:322
struct arc * outchain
Definition: regguts.h:282
static int isconstraintarc(struct arc *a)
Definition: regc_nfa.c:2124
#define assert(TEST)
Definition: imath.c:37
struct state * next
Definition: regguts.h:309
int nstates
Definition: regguts.h:321
struct arc * outs
Definition: regguts.h:306
static void clonesuccessorstates(struct nfa *nfa, struct state *ssource, struct state *sclone, struct state *spredecessor, struct arc *refarc, char *curdonemap, char *outerdonemap, int nstates)
Definition: regc_nfa.c:2497
static void freearc(struct nfa *nfa, struct arc *victim)
Definition: regc_nfa.c:421
struct state * tmp
Definition: regguts.h:308
struct state * to
Definition: regguts.h:281
static struct state * newstate(struct nfa *nfa)
Definition: regc_nfa.c:124
#define NULL
Definition: c.h:226
Definition: regguts.h:298
int nouts
Definition: regguts.h:305
static void cparc(struct nfa *nfa, struct arc *oa, struct state *from, struct state *to)
Definition: regc_nfa.c:570
static void freestate(struct nfa *nfa, struct state *s)
Definition: regc_nfa.c:218
static int carc_cmp ( const void *  a,
const void *  b 
)
static

Definition at line 2932 of file regc_nfa.c.

References carc::co, and carc::to.

Referenced by carcsort().

2933 {
2934  const struct carc *aa = (const struct carc *) a;
2935  const struct carc *bb = (const struct carc *) b;
2936 
2937  if (aa->co < bb->co)
2938  return -1;
2939  if (aa->co > bb->co)
2940  return +1;
2941  if (aa->to < bb->to)
2942  return -1;
2943  if (aa->to > bb->to)
2944  return +1;
2945  return 0;
2946 }
int to
Definition: regguts.h:351
color co
Definition: regguts.h:350
Definition: regguts.h:348
static void carcsort ( struct carc first,
size_t  n 
)
static

Definition at line 2925 of file regc_nfa.c.

References carc_cmp(), and qsort.

Referenced by compact().

2926 {
2927  if (n > 1)
2928  qsort(first, n, sizeof(struct carc), carc_cmp);
2929 }
static int carc_cmp(const void *a, const void *b)
Definition: regc_nfa.c:2932
#define qsort(a, b, c, d)
Definition: port.h:440
Definition: regguts.h:348
static void changearctarget ( struct arc a,
struct state newto 
)
static

Definition at line 495 of file regc_nfa.c.

References assert, arc::inchain, arc::inchainRev, state::ins, state::nins, NULL, and arc::to.

Referenced by moveins().

496 {
497  struct state *oldto = a->to;
498  struct arc *predecessor;
499 
500  assert(oldto != newto);
501 
502  /* take it off old target's in-chain */
503  assert(oldto != NULL);
504  predecessor = a->inchainRev;
505  if (predecessor == NULL)
506  {
507  assert(oldto->ins == a);
508  oldto->ins = a->inchain;
509  }
510  else
511  {
512  assert(predecessor->inchain == a);
513  predecessor->inchain = a->inchain;
514  }
515  if (a->inchain != NULL)
516  {
517  assert(a->inchain->inchainRev == a);
518  a->inchain->inchainRev = predecessor;
519  }
520  oldto->nins--;
521 
522  a->to = newto;
523 
524  /* prepend it to new target's in-chain */
525  a->inchain = newto->ins;
526  a->inchainRev = NULL;
527  if (newto->ins)
528  newto->ins->inchainRev = a;
529  newto->ins = a;
530  newto->nins++;
531 }
Definition: regguts.h:276
int nins
Definition: regguts.h:303
struct arc * inchainRev
Definition: regguts.h:286
#define assert(TEST)
Definition: imath.c:37
struct state * to
Definition: regguts.h:281
#define NULL
Definition: c.h:226
Definition: regguts.h:298
struct arc * inchain
Definition: regguts.h:285
struct arc * ins
Definition: regguts.h:304
static void cleanup ( struct nfa nfa)
static

Definition at line 2729 of file regc_nfa.c.

References assert, cleartraverse(), dropstate(), state::flag, markcanreach(), markreachable(), state::next, state::nins, NISERR, state::no, nfa::nstates, NULL, nfa::post, nfa::pre, nfa::states, and state::tmp.

Referenced by optimize().

2730 {
2731  struct state *s;
2732  struct state *nexts;
2733  int n;
2734 
2735  if (NISERR())
2736  return;
2737 
2738  /* clear out unreachable or dead-end states */
2739  /* use pre to mark reachable, then post to mark can-reach-post */
2740  markreachable(nfa, nfa->pre, (struct state *) NULL, nfa->pre);
2741  markcanreach(nfa, nfa->post, nfa->pre, nfa->post);
2742  for (s = nfa->states; s != NULL && !NISERR(); s = nexts)
2743  {
2744  nexts = s->next;
2745  if (s->tmp != nfa->post && !s->flag)
2746  dropstate(nfa, s);
2747  }
2748  assert(NISERR() || nfa->post->nins == 0 || nfa->post->tmp == nfa->post);
2749  cleartraverse(nfa, nfa->pre);
2750  assert(NISERR() || nfa->post->nins == 0 || nfa->post->tmp == NULL);
2751  /* the nins==0 (final unreachable) case will be caught later */
2752 
2753  /* renumber surviving states */
2754  n = 0;
2755  for (s = nfa->states; s != NULL; s = s->next)
2756  s->no = n++;
2757  nfa->nstates = n;
2758 }
#define NISERR()
Definition: regc_nfa.c:39
int no
Definition: regguts.h:300
static void markreachable(struct nfa *nfa, struct state *s, struct state *okay, struct state *mark)
Definition: regc_nfa.c:2764
struct state * states
Definition: regguts.h:322
int nins
Definition: regguts.h:303
struct state * post
Definition: regguts.h:320
static void markcanreach(struct nfa *nfa, struct state *s, struct state *okay, struct state *mark)
Definition: regc_nfa.c:2790
#define assert(TEST)
Definition: imath.c:37
struct state * next
Definition: regguts.h:309
int nstates
Definition: regguts.h:321
char flag
Definition: regguts.h:302
static void cleartraverse(struct nfa *nfa, struct state *s)
Definition: regc_nfa.c:1331
struct state * tmp
Definition: regguts.h:308
#define NULL
Definition: c.h:226
Definition: regguts.h:298
static void dropstate(struct nfa *nfa, struct state *s)
Definition: regc_nfa.c:202
struct state * pre
Definition: regguts.h:317
static void cleartraverse ( struct nfa nfa,
struct state s 
)
static

Definition at line 1331 of file regc_nfa.c.

References NERR, NULL, arc::outchain, state::outs, vars::re, REG_ETOOBIG, STACK_TOO_DEEP, state::tmp, arc::to, and nfa::v.

Referenced by cleanup(), and dupnfa().

1333 {
1334  struct arc *a;
1335 
1336  /* Since this is recursive, it could be driven to stack overflow */
1337  if (STACK_TOO_DEEP(nfa->v->re))
1338  {
1339  NERR(REG_ETOOBIG);
1340  return;
1341  }
1342 
1343  if (s->tmp == NULL)
1344  return;
1345  s->tmp = NULL;
1346 
1347  for (a = s->outs; a != NULL; a = a->outchain)
1348  cleartraverse(nfa, a->to);
1349 }
#define NERR(e)
Definition: regc_nfa.c:40
Definition: regguts.h:276
struct vars * v
Definition: regguts.h:328
regex_t * re
Definition: regcomp.c:228
struct arc * outchain
Definition: regguts.h:282
struct arc * outs
Definition: regguts.h:306
static void cleartraverse(struct nfa *nfa, struct state *s)
Definition: regc_nfa.c:1331
struct state * tmp
Definition: regguts.h:308
struct state * to
Definition: regguts.h:281
#define NULL
Definition: c.h:226
#define REG_ETOOBIG
Definition: regex.h:155
#define STACK_TOO_DEEP(re)
Definition: regguts.h:455
static void cloneouts ( struct nfa nfa,
struct state old,
struct state from,
struct state to,
int  type 
)
static

Definition at line 1175 of file regc_nfa.c.

References assert, arc::co, newarc(), NULL, arc::outchain, and state::outs.

1180 {
1181  struct arc *a;
1182 
1183  assert(old != from);
1184 
1185  for (a = old->outs; a != NULL; a = a->outchain)
1186  newarc(nfa, type, a->co, from, to);
1187 }
int type
Definition: regguts.h:278
Definition: regguts.h:276
struct arc * outchain
Definition: regguts.h:282
#define assert(TEST)
Definition: imath.c:37
struct arc * outs
Definition: regguts.h:306
color co
Definition: regguts.h:279
#define NULL
Definition: c.h:226
static void newarc(struct nfa *nfa, int t, color co, struct state *from, struct state *to)
Definition: regc_nfa.c:276
static void clonesuccessorstates ( struct nfa nfa,
struct state ssource,
struct state sclone,
struct state spredecessor,
struct arc refarc,
char *  curdonemap,
char *  outerdonemap,
int  nstates 
)
static

Definition at line 2497 of file regc_nfa.c.

References a2, assert, arc::co, cparc(), dropstate(), FREE, arc::from, hasconstraintout(), state::ins, isconstraintarc(), MALLOC, NERR, newstate(), state::nins, NISERR, state::no, NULL, arc::outchain, state::outs, vars::re, REG_ESPACE, REG_ETOOBIG, STACK_TOO_DEEP, state::tmp, arc::to, arc::type, and nfa::v.

Referenced by breakconstraintloop().

2505 {
2506  char *donemap;
2507  struct arc *a;
2508 
2509  /* Since this is recursive, it could be driven to stack overflow */
2510  if (STACK_TOO_DEEP(nfa->v->re))
2511  {
2512  NERR(REG_ETOOBIG);
2513  return;
2514  }
2515 
2516  /* If this state hasn't already got a donemap, create one */
2517  donemap = curdonemap;
2518  if (donemap == NULL)
2519  {
2520  donemap = (char *) MALLOC(nstates * sizeof(char));
2521  if (donemap == NULL)
2522  {
2523  NERR(REG_ESPACE);
2524  return;
2525  }
2526 
2527  if (outerdonemap != NULL)
2528  {
2529  /*
2530  * Not at outermost recursion level, so copy the outer level's
2531  * donemap; this ensures that we see states in process of being
2532  * visited at outer levels, or already merged into predecessor
2533  * states, as ones we shouldn't traverse back to.
2534  */
2535  memcpy(donemap, outerdonemap, nstates * sizeof(char));
2536  }
2537  else
2538  {
2539  /* At outermost level, only spredecessor is off-limits */
2540  memset(donemap, 0, nstates * sizeof(char));
2541  assert(spredecessor->no < nstates);
2542  donemap[spredecessor->no] = 1;
2543  }
2544  }
2545 
2546  /* Mark ssource as visited in the donemap */
2547  assert(ssource->no < nstates);
2548  assert(donemap[ssource->no] == 0);
2549  donemap[ssource->no] = 1;
2550 
2551  /*
2552  * We proceed by first cloning all of ssource's outarcs, creating new
2553  * clone states as needed but not doing more with them than that. Then in
2554  * a second pass, recurse to process the child clone states. This allows
2555  * us to have only one child clone state per reachable source state, even
2556  * when there are multiple outarcs leading to the same state. Also, when
2557  * we do visit a child state, its set of inarcs is known exactly, which
2558  * makes it safe to apply the constraint-is-already-checked optimization.
2559  * Also, this ensures that we've merged all the states we can into the
2560  * current clone before we recurse to any children, thus possibly saving
2561  * them from making extra images of those states.
2562  *
2563  * While this function runs, child clone states of the current state are
2564  * marked by setting their tmp fields to point to the original state they
2565  * were cloned from. This makes it possible to detect multiple outarcs
2566  * leading to the same state, and also makes it easy to distinguish clone
2567  * states from original states (which will have tmp == NULL).
2568  */
2569  for (a = ssource->outs; a != NULL && !NISERR(); a = a->outchain)
2570  {
2571  struct state *sto = a->to;
2572 
2573  /*
2574  * We do not consider cloning successor states that have no constraint
2575  * outarcs; just link to them as-is. They cannot be part of a
2576  * constraint loop so there is no need to make copies. In particular,
2577  * this rule keeps us from trying to clone the post state, which would
2578  * be a bad idea.
2579  */
2580  if (isconstraintarc(a) && hasconstraintout(sto))
2581  {
2582  struct state *prevclone;
2583  int canmerge;
2584  struct arc *a2;
2585 
2586  /*
2587  * Back-link constraint arcs must not be followed. Nor is there a
2588  * need to revisit states previously merged into this clone.
2589  */
2590  assert(sto->no < nstates);
2591  if (donemap[sto->no] != 0)
2592  continue;
2593 
2594  /*
2595  * Check whether we already have a child clone state for this
2596  * source state.
2597  */
2598  prevclone = NULL;
2599  for (a2 = sclone->outs; a2 != NULL; a2 = a2->outchain)
2600  {
2601  if (a2->to->tmp == sto)
2602  {
2603  prevclone = a2->to;
2604  break;
2605  }
2606  }
2607 
2608  /*
2609  * If this arc is labeled the same as refarc, or the same as any
2610  * arc we must have traversed to get to sclone, then no additional
2611  * constraints need to be met to get to sto, so we should just
2612  * merge its outarcs into sclone.
2613  */
2614  if (refarc && a->type == refarc->type && a->co == refarc->co)
2615  canmerge = 1;
2616  else
2617  {
2618  struct state *s;
2619 
2620  canmerge = 0;
2621  for (s = sclone; s->ins; s = s->ins->from)
2622  {
2623  if (s->nins == 1 &&
2624  a->type == s->ins->type && a->co == s->ins->co)
2625  {
2626  canmerge = 1;
2627  break;
2628  }
2629  }
2630  }
2631 
2632  if (canmerge)
2633  {
2634  /*
2635  * We can merge into sclone. If we previously made a child
2636  * clone state, drop it; there's no need to visit it. (This
2637  * can happen if ssource has multiple pathways to sto, and we
2638  * only just now found one that is provably a no-op.)
2639  */
2640  if (prevclone)
2641  dropstate(nfa, prevclone); /* kills our outarc, too */
2642 
2643  /* Recurse to merge sto's outarcs into sclone */
2645  sto,
2646  sclone,
2647  spredecessor,
2648  refarc,
2649  donemap,
2650  outerdonemap,
2651  nstates);
2652  /* sto should now be marked as previously visited */
2653  assert(NISERR() || donemap[sto->no] == 1);
2654  }
2655  else if (prevclone)
2656  {
2657  /*
2658  * We already have a clone state for this successor, so just
2659  * make another arc to it.
2660  */
2661  cparc(nfa, a, sclone, prevclone);
2662  }
2663  else
2664  {
2665  /*
2666  * We need to create a new successor clone state.
2667  */
2668  struct state *stoclone;
2669 
2670  stoclone = newstate(nfa);
2671  if (stoclone == NULL)
2672  {
2673  assert(NISERR());
2674  break;
2675  }
2676  /* Mark it as to what it's a clone of */
2677  stoclone->tmp = sto;
2678  /* ... and add the outarc leading to it */
2679  cparc(nfa, a, sclone, stoclone);
2680  }
2681  }
2682  else
2683  {
2684  /*
2685  * Non-constraint outarcs just get copied to sclone, as do outarcs
2686  * leading to states with no constraint outarc.
2687  */
2688  cparc(nfa, a, sclone, sto);
2689  }
2690  }
2691 
2692  /*
2693  * If we are at outer level for this clone state, recurse to all its child
2694  * clone states, clearing their tmp fields as we go. (If we're not
2695  * outermost for sclone, leave this to be done by the outer call level.)
2696  * Note that if we have multiple outarcs leading to the same clone state,
2697  * it will only be recursed-to once.
2698  */
2699  if (curdonemap == NULL)
2700  {
2701  for (a = sclone->outs; a != NULL && !NISERR(); a = a->outchain)
2702  {
2703  struct state *stoclone = a->to;
2704  struct state *sto = stoclone->tmp;
2705 
2706  if (sto != NULL)
2707  {
2708  stoclone->tmp = NULL;
2710  sto,
2711  stoclone,
2712  spredecessor,
2713  refarc,
2714  NULL,
2715  donemap,
2716  nstates);
2717  }
2718  }
2719 
2720  /* Don't forget to free sclone's donemap when done with it */
2721  FREE(donemap);
2722  }
2723 }
#define NISERR()
Definition: regc_nfa.c:39
int no
Definition: regguts.h:300
struct state * from
Definition: regguts.h:280
int type
Definition: regguts.h:278
#define NERR(e)
Definition: regc_nfa.c:40
#define REG_ESPACE
Definition: regex.h:149
Definition: regguts.h:276
int nins
Definition: regguts.h:303
#define FREE(p)
Definition: regcustom.h:54
#define MALLOC(n)
Definition: regcustom.h:53
struct vars * v
Definition: regguts.h:328
regex_t * re
Definition: regcomp.c:228
struct arc * outchain
Definition: regguts.h:282
static int isconstraintarc(struct arc *a)
Definition: regc_nfa.c:2124
#define assert(TEST)
Definition: imath.c:37
struct arc * outs
Definition: regguts.h:306
static int hasconstraintout(struct state *s)
Definition: regc_nfa.c:2142
static void clonesuccessorstates(struct nfa *nfa, struct state *ssource, struct state *sclone, struct state *spredecessor, struct arc *refarc, char *curdonemap, char *outerdonemap, int nstates)
Definition: regc_nfa.c:2497
struct state * tmp
Definition: regguts.h:308
struct state * to
Definition: regguts.h:281
static struct state * newstate(struct nfa *nfa)
Definition: regc_nfa.c:124
color co
Definition: regguts.h:279
#define NULL
Definition: c.h:226
Definition: regguts.h:298
#define REG_ETOOBIG
Definition: regex.h:155
static void dropstate(struct nfa *nfa, struct state *s)
Definition: regc_nfa.c:202
static void cparc(struct nfa *nfa, struct arc *oa, struct state *from, struct state *to)
Definition: regc_nfa.c:570
#define STACK_TOO_DEEP(re)
Definition: regguts.h:455
struct arc * ins
Definition: regguts.h:304
static FormData_pg_attribute a2
Definition: heap.c:148
static int combine ( struct arc con,
struct arc a 
)
static

Definition at line 1815 of file regc_nfa.c.

References AHEAD, assert, BEHIND, CA, arc::co, COMPATIBLE, INCOMPATIBLE, LACON, NOTREACHED, PLAIN, SATISFIED, and arc::type.

Referenced by pull(), and push().

1817 {
1818 #define CA(ct,at) (((ct)<<CHAR_BIT) | (at))
1819 
1820  switch (CA(con->type, a->type))
1821  {
1822  case CA('^', PLAIN): /* newlines are handled separately */
1823  case CA('$', PLAIN):
1824  return INCOMPATIBLE;
1825  break;
1826  case CA(AHEAD, PLAIN): /* color constraints meet colors */
1827  case CA(BEHIND, PLAIN):
1828  if (con->co == a->co)
1829  return SATISFIED;
1830  return INCOMPATIBLE;
1831  break;
1832  case CA('^', '^'): /* collision, similar constraints */
1833  case CA('$', '$'):
1834  case CA(AHEAD, AHEAD):
1835  case CA(BEHIND, BEHIND):
1836  if (con->co == a->co) /* true duplication */
1837  return SATISFIED;
1838  return INCOMPATIBLE;
1839  break;
1840  case CA('^', BEHIND): /* collision, dissimilar constraints */
1841  case CA(BEHIND, '^'):
1842  case CA('$', AHEAD):
1843  case CA(AHEAD, '$'):
1844  return INCOMPATIBLE;
1845  break;
1846  case CA('^', '$'): /* constraints passing each other */
1847  case CA('^', AHEAD):
1848  case CA(BEHIND, '$'):
1849  case CA(BEHIND, AHEAD):
1850  case CA('$', '^'):
1851  case CA('$', BEHIND):
1852  case CA(AHEAD, '^'):
1853  case CA(AHEAD, BEHIND):
1854  case CA('^', LACON):
1855  case CA(BEHIND, LACON):
1856  case CA('$', LACON):
1857  case CA(AHEAD, LACON):
1858  return COMPATIBLE;
1859  break;
1860  }
1861  assert(NOTREACHED);
1862  return INCOMPATIBLE; /* for benefit of blind compilers */
1863 }
int type
Definition: regguts.h:278
#define CA(ct, at)
#define BEHIND
Definition: regcomp.c:288
#define NOTREACHED
Definition: regguts.h:91
#define SATISFIED
Definition: regcomp.c:161
#define LACON
Definition: regcomp.c:286
#define assert(TEST)
Definition: imath.c:37
#define COMPATIBLE
Definition: regcomp.c:162
#define PLAIN
Definition: regcomp.c:278
#define INCOMPATIBLE
Definition: regcomp.c:160
#define AHEAD
Definition: regcomp.c:287
static void compact ( struct nfa nfa,
struct cnfa cnfa 
)
static

Definition at line 2837 of file regc_nfa.c.

References cnfa::arcs, assert, nfa::bos, cnfa::bos, carcsort(), nfa::cm, CNFA_NOPROGRESS, arc::co, carc::co, COLORLESS, nfa::eos, cnfa::eos, cnfa::flags, FREE, HASLACONS, LACON, MALLOC, maxcolor(), cnfa::ncolors, NERR, state::next, NISERR, state::no, state::nouts, cnfa::nstates, NULL, arc::outchain, state::outs, PLAIN, nfa::post, cnfa::post, nfa::pre, cnfa::pre, REG_ASSERT, REG_ESPACE, nfa::states, cnfa::states, cnfa::stflags, arc::to, carc::to, and arc::type.

2839 {
2840  struct state *s;
2841  struct arc *a;
2842  size_t nstates;
2843  size_t narcs;
2844  struct carc *ca;
2845  struct carc *first;
2846 
2847  assert(!NISERR());
2848 
2849  nstates = 0;
2850  narcs = 0;
2851  for (s = nfa->states; s != NULL; s = s->next)
2852  {
2853  nstates++;
2854  narcs += s->nouts + 1; /* need one extra for endmarker */
2855  }
2856 
2857  cnfa->stflags = (char *) MALLOC(nstates * sizeof(char));
2858  cnfa->states = (struct carc **) MALLOC(nstates * sizeof(struct carc *));
2859  cnfa->arcs = (struct carc *) MALLOC(narcs * sizeof(struct carc));
2860  if (cnfa->stflags == NULL || cnfa->states == NULL || cnfa->arcs == NULL)
2861  {
2862  if (cnfa->stflags != NULL)
2863  FREE(cnfa->stflags);
2864  if (cnfa->states != NULL)
2865  FREE(cnfa->states);
2866  if (cnfa->arcs != NULL)
2867  FREE(cnfa->arcs);
2868  NERR(REG_ESPACE);
2869  return;
2870  }
2871  cnfa->nstates = nstates;
2872  cnfa->pre = nfa->pre->no;
2873  cnfa->post = nfa->post->no;
2874  cnfa->bos[0] = nfa->bos[0];
2875  cnfa->bos[1] = nfa->bos[1];
2876  cnfa->eos[0] = nfa->eos[0];
2877  cnfa->eos[1] = nfa->eos[1];
2878  cnfa->ncolors = maxcolor(nfa->cm) + 1;
2879  cnfa->flags = 0;
2880 
2881  ca = cnfa->arcs;
2882  for (s = nfa->states; s != NULL; s = s->next)
2883  {
2884  assert((size_t) s->no < nstates);
2885  cnfa->stflags[s->no] = 0;
2886  cnfa->states[s->no] = ca;
2887  first = ca;
2888  for (a = s->outs; a != NULL; a = a->outchain)
2889  switch (a->type)
2890  {
2891  case PLAIN:
2892  ca->co = a->co;
2893  ca->to = a->to->no;
2894  ca++;
2895  break;
2896  case LACON:
2897  assert(s->no != cnfa->pre);
2898  ca->co = (color) (cnfa->ncolors + a->co);
2899  ca->to = a->to->no;
2900  ca++;
2901  cnfa->flags |= HASLACONS;
2902  break;
2903  default:
2904  NERR(REG_ASSERT);
2905  break;
2906  }
2907  carcsort(first, ca - first);
2908  ca->co = COLORLESS;
2909  ca->to = 0;
2910  ca++;
2911  }
2912  assert(ca == &cnfa->arcs[narcs]);
2913  assert(cnfa->nstates != 0);
2914 
2915  /* mark no-progress states */
2916  for (a = nfa->pre->outs; a != NULL; a = a->outchain)
2917  cnfa->stflags[a->to->no] = CNFA_NOPROGRESS;
2918  cnfa->stflags[nfa->pre->no] = CNFA_NOPROGRESS;
2919 }
#define NISERR()
Definition: regc_nfa.c:39
int no
Definition: regguts.h:300
int type
Definition: regguts.h:278
#define NERR(e)
Definition: regc_nfa.c:40
#define CNFA_NOPROGRESS
Definition: regguts.h:365
#define REG_ESPACE
Definition: regex.h:149
Definition: regguts.h:276
struct state * states
Definition: regguts.h:322
short color
Definition: regguts.h:134
int pre
Definition: regguts.h:360
color eos[2]
Definition: regguts.h:327
#define FREE(p)
Definition: regcustom.h:54
struct state * post
Definition: regguts.h:320
struct carc ** states
Definition: regguts.h:366
#define MALLOC(n)
Definition: regcustom.h:53
int nstates
Definition: regguts.h:356
#define LACON
Definition: regcomp.c:286
struct arc * outchain
Definition: regguts.h:282
color bos[2]
Definition: regguts.h:326
#define assert(TEST)
Definition: imath.c:37
struct colormap * cm
Definition: regguts.h:325
struct state * next
Definition: regguts.h:309
struct arc * outs
Definition: regguts.h:306
char * stflags
Definition: regguts.h:364
color bos[2]
Definition: regguts.h:362
struct state * to
Definition: regguts.h:281
#define PLAIN
Definition: regcomp.c:278
int to
Definition: regguts.h:351
color co
Definition: regguts.h:279
#define REG_ASSERT
Definition: regex.h:151
#define NULL
Definition: c.h:226
Definition: regguts.h:298
int flags
Definition: regguts.h:358
struct carc * arcs
Definition: regguts.h:368
int post
Definition: regguts.h:361
int nouts
Definition: regguts.h:305
color co
Definition: regguts.h:350
static color maxcolor(struct colormap *cm)
Definition: regc_color.c:172
static void carcsort(struct carc *first, size_t n)
Definition: regc_nfa.c:2925
#define COLORLESS
Definition: regguts.h:137
#define HASLACONS
Definition: regguts.h:359
struct state * pre
Definition: regguts.h:317
color eos[2]
Definition: regguts.h:363
int ncolors
Definition: regguts.h:357
Definition: regguts.h:348
static void copyins ( struct nfa nfa,
struct state oldState,
struct state newState 
)
static

Definition at line 828 of file regc_nfa.c.

References assert, BULK_ARC_OP_USE_SORT, CANCEL_REQUESTED, arc::co, cparc(), createarc(), arc::from, arc::inchain, state::ins, NERR, state::nins, NISERR, NOTREACHED, NULL, vars::re, REG_CANCEL, sortins(), sortins_cmp(), arc::type, and nfa::v.

Referenced by pull().

831 {
832  assert(oldState != newState);
833 
834  if (!BULK_ARC_OP_USE_SORT(oldState->nins, newState->nins))
835  {
836  /* With not too many arcs, just do them one at a time */
837  struct arc *a;
838 
839  for (a = oldState->ins; a != NULL; a = a->inchain)
840  cparc(nfa, a, a->from, newState);
841  }
842  else
843  {
844  /*
845  * With many arcs, use a sort-merge approach. Note that createarc()
846  * will put new arcs onto the front of newState's chain, so it does
847  * not break our walk through the sorted part of the chain.
848  */
849  struct arc *oa;
850  struct arc *na;
851 
852  /*
853  * Because we bypass newarc() in this code path, we'd better include a
854  * cancel check.
855  */
856  if (CANCEL_REQUESTED(nfa->v->re))
857  {
858  NERR(REG_CANCEL);
859  return;
860  }
861 
862  sortins(nfa, oldState);
863  sortins(nfa, newState);
864  if (NISERR())
865  return; /* might have failed to sort */
866  oa = oldState->ins;
867  na = newState->ins;
868  while (oa != NULL && na != NULL)
869  {
870  struct arc *a = oa;
871 
872  switch (sortins_cmp(&oa, &na))
873  {
874  case -1:
875  /* newState does not have anything matching oa */
876  oa = oa->inchain;
877  createarc(nfa, a->type, a->co, a->from, newState);
878  break;
879  case 0:
880  /* match, advance in both lists */
881  oa = oa->inchain;
882  na = na->inchain;
883  break;
884  case +1:
885  /* advance only na; oa might have a match later */
886  na = na->inchain;
887  break;
888  default:
890  }
891  }
892  while (oa != NULL)
893  {
894  /* newState does not have anything matching oa */
895  struct arc *a = oa;
896 
897  oa = oa->inchain;
898  createarc(nfa, a->type, a->co, a->from, newState);
899  }
900  }
901 }
#define NISERR()
Definition: regc_nfa.c:39
struct state * from
Definition: regguts.h:280
int type
Definition: regguts.h:278
#define NERR(e)
Definition: regc_nfa.c:40
Definition: regguts.h:276
int nins
Definition: regguts.h:303
#define NOTREACHED
Definition: regguts.h:91
struct vars * v
Definition: regguts.h:328
#define BULK_ARC_OP_USE_SORT(nsrcarcs, ndestarcs)
Definition: regc_nfa.c:720
regex_t * re
Definition: regcomp.c:228
#define assert(TEST)
Definition: imath.c:37
static int sortins_cmp(const void *a, const void *b)
Definition: regc_nfa.c:624
static void createarc(struct nfa *nfa, int t, color co, struct state *from, struct state *to)
Definition: regc_nfa.c:322
color co
Definition: regguts.h:279
#define NULL
Definition: c.h:226
static void cparc(struct nfa *nfa, struct arc *oa, struct state *from, struct state *to)
Definition: regc_nfa.c:570
struct arc * inchain
Definition: regguts.h:285
#define CANCEL_REQUESTED(re)
Definition: regguts.h:452
struct arc * ins
Definition: regguts.h:304
#define REG_CANCEL
Definition: regex.h:157
static void sortins(struct nfa *nfa, struct state *s)
Definition: regc_nfa.c:582
static void copyouts ( struct nfa nfa,
struct state oldState,
struct state newState 
)
static

Definition at line 1096 of file regc_nfa.c.

References assert, BULK_ARC_OP_USE_SORT, CANCEL_REQUESTED, arc::co, cparc(), createarc(), NERR, NISERR, NOTREACHED, state::nouts, NULL, arc::outchain, state::outs, vars::re, REG_CANCEL, sortouts(), sortouts_cmp(), arc::to, arc::type, and nfa::v.

Referenced by push().

1099 {
1100  assert(oldState != newState);
1101 
1102  if (!BULK_ARC_OP_USE_SORT(oldState->nouts, newState->nouts))
1103  {
1104  /* With not too many arcs, just do them one at a time */
1105  struct arc *a;
1106 
1107  for (a = oldState->outs; a != NULL; a = a->outchain)
1108  cparc(nfa, a, newState, a->to);
1109  }
1110  else
1111  {
1112  /*
1113  * With many arcs, use a sort-merge approach. Note that createarc()
1114  * will put new arcs onto the front of newState's chain, so it does
1115  * not break our walk through the sorted part of the chain.
1116  */
1117  struct arc *oa;
1118  struct arc *na;
1119 
1120  /*
1121  * Because we bypass newarc() in this code path, we'd better include a
1122  * cancel check.
1123  */
1124  if (CANCEL_REQUESTED(nfa->v->re))
1125  {
1126  NERR(REG_CANCEL);
1127  return;
1128  }
1129 
1130  sortouts(nfa, oldState);
1131  sortouts(nfa, newState);
1132  if (NISERR())
1133  return; /* might have failed to sort */
1134  oa = oldState->outs;
1135  na = newState->outs;
1136  while (oa != NULL && na != NULL)
1137  {
1138  struct arc *a = oa;
1139 
1140  switch (sortouts_cmp(&oa, &na))
1141  {
1142  case -1:
1143  /* newState does not have anything matching oa */
1144  oa = oa->outchain;
1145  createarc(nfa, a->type, a->co, newState, a->to);
1146  break;
1147  case 0:
1148  /* match, advance in both lists */
1149  oa = oa->outchain;
1150  na = na->outchain;
1151  break;
1152  case +1:
1153  /* advance only na; oa might have a match later */
1154  na = na->outchain;
1155  break;
1156  default:
1157  assert(NOTREACHED);
1158  }
1159  }
1160  while (oa != NULL)
1161  {
1162  /* newState does not have anything matching oa */
1163  struct arc *a = oa;
1164 
1165  oa = oa->outchain;
1166  createarc(nfa, a->type, a->co, newState, a->to);
1167  }
1168  }
1169 }
#define NISERR()
Definition: regc_nfa.c:39
int type
Definition: regguts.h:278
#define NERR(e)
Definition: regc_nfa.c:40
Definition: regguts.h:276
#define NOTREACHED
Definition: regguts.h:91
struct vars * v
Definition: regguts.h:328
#define BULK_ARC_OP_USE_SORT(nsrcarcs, ndestarcs)
Definition: regc_nfa.c:720
regex_t * re
Definition: regcomp.c:228
struct arc * outchain
Definition: regguts.h:282
#define assert(TEST)
Definition: imath.c:37
struct arc * outs
Definition: regguts.h:306
struct state * to
Definition: regguts.h:281
static void createarc(struct nfa *nfa, int t, color co, struct state *from, struct state *to)
Definition: regc_nfa.c:322
color co
Definition: regguts.h:279
#define NULL
Definition: c.h:226
static void sortouts(struct nfa *nfa, struct state *s)
Definition: regc_nfa.c:649
static int sortouts_cmp(const void *a, const void *b)
Definition: regc_nfa.c:691
int nouts
Definition: regguts.h:305
static void cparc(struct nfa *nfa, struct arc *oa, struct state *from, struct state *to)
Definition: regc_nfa.c:570
#define CANCEL_REQUESTED(re)
Definition: regguts.h:452
#define REG_CANCEL
Definition: regex.h:157
static void cparc ( struct nfa nfa,
struct arc oa,
struct state from,
struct state to 
)
static

Definition at line 570 of file regc_nfa.c.

References arc::co, newarc(), and arc::type.

Referenced by breakconstraintloop(), clonesuccessorstates(), copyins(), copyouts(), duptraverse(), moveins(), moveouts(), pull(), and push().

574 {
575  newarc(nfa, oa->type, oa->co, from, to);
576 }
int type
Definition: regguts.h:278
color co
Definition: regguts.h:279
static void newarc(struct nfa *nfa, int t, color co, struct state *from, struct state *to)
Definition: regc_nfa.c:276
static void createarc ( struct nfa nfa,
int  t,
color  co,
struct state from,
struct state to 
)
static

Definition at line 322 of file regc_nfa.c.

References allocarc(), assert, nfa::cm, arc::co, colorchain(), COLORED, arc::from, arc::inchain, arc::inchainRev, state::ins, state::nins, NISERR, state::nouts, NULL, arc::outchain, arc::outchainRev, state::outs, nfa::parent, arc::to, and arc::type.

Referenced by copyins(), copyouts(), mergeins(), moveouts(), and newarc().

327 {
328  struct arc *a;
329 
330  /* the arc is physically allocated within its from-state */
331  a = allocarc(nfa, from);
332  if (NISERR())
333  return;
334  assert(a != NULL);
335 
336  a->type = t;
337  a->co = co;
338  a->to = to;
339  a->from = from;
340 
341  /*
342  * Put the new arc on the beginning, not the end, of the chains; it's
343  * simpler here, and freearc() is the same cost either way. See also the
344  * logic in moveins() and its cohorts, as well as fixempties().
345  */
346  a->inchain = to->ins;
347  a->inchainRev = NULL;
348  if (to->ins)
349  to->ins->inchainRev = a;
350  to->ins = a;
351  a->outchain = from->outs;
352  a->outchainRev = NULL;
353  if (from->outs)
354  from->outs->outchainRev = a;
355  from->outs = a;
356 
357  from->nouts++;
358  to->nins++;
359 
360  if (COLORED(a) && nfa->parent == NULL)
361  colorchain(nfa->cm, a);
362 }
#define NISERR()
Definition: regc_nfa.c:39
struct state * from
Definition: regguts.h:280
int type
Definition: regguts.h:278
Definition: regguts.h:276
int nins
Definition: regguts.h:303
#define COLORED(a)
Definition: regcomp.c:296
struct arc * inchainRev
Definition: regguts.h:286
static void colorchain(struct colormap *cm, struct arc *a)
Definition: regc_color.c:975
struct arc * outchain
Definition: regguts.h:282
#define assert(TEST)
Definition: imath.c:37
struct colormap * cm
Definition: regguts.h:325
static struct arc * allocarc(struct nfa *nfa, struct state *s)
Definition: regc_nfa.c:368
struct arc * outs
Definition: regguts.h:306
struct state * to
Definition: regguts.h:281
color co
Definition: regguts.h:279
#define NULL
Definition: c.h:226
int nouts
Definition: regguts.h:305
struct nfa * parent
Definition: regguts.h:329
struct arc * inchain
Definition: regguts.h:285
struct arc * outchainRev
Definition: regguts.h:283
struct arc * ins
Definition: regguts.h:304
static void delsub ( struct nfa nfa,
struct state lp,
struct state rp 
)
static

Definition at line 1196 of file regc_nfa.c.

References assert, deltraverse(), FREESTATE, state::nins, NISERR, state::no, state::nouts, NULL, and state::tmp.

1199 {
1200  assert(lp != rp);
1201 
1202  rp->tmp = rp; /* mark end */
1203 
1204  deltraverse(nfa, lp, lp);
1205  if (NISERR())
1206  return; /* asserts might not hold after failure */
1207  assert(lp->nouts == 0 && rp->nins == 0); /* did the job */
1208  assert(lp->no != FREESTATE && rp->no != FREESTATE); /* no more */
1209 
1210  rp->tmp = NULL; /* unmark end */
1211  lp->tmp = NULL; /* and begin, marked by deltraverse */
1212 }
#define NISERR()
Definition: regc_nfa.c:39
int no
Definition: regguts.h:300
#define FREESTATE
Definition: regguts.h:301
int nins
Definition: regguts.h:303
static void deltraverse(struct nfa *nfa, struct state *leftend, struct state *s)
Definition: regc_nfa.c:1219
#define assert(TEST)
Definition: imath.c:37
struct state * tmp
Definition: regguts.h:308
#define NULL
Definition: c.h:226
int nouts
Definition: regguts.h:305
static void deltraverse ( struct nfa nfa,
struct state leftend,
struct state s 
)
static

Definition at line 1219 of file regc_nfa.c.

References assert, freearc(), freestate(), FREESTATE, NERR, state::nins, NISERR, state::no, state::nouts, NULL, state::outs, vars::re, REG_ETOOBIG, STACK_TOO_DEEP, state::tmp, arc::to, and nfa::v.

Referenced by delsub().

1222 {
1223  struct arc *a;
1224  struct state *to;
1225 
1226  /* Since this is recursive, it could be driven to stack overflow */
1227  if (STACK_TOO_DEEP(nfa->v->re))
1228  {
1229  NERR(REG_ETOOBIG);
1230  return;
1231  }
1232 
1233  if (s->nouts == 0)
1234  return; /* nothing to do */
1235  if (s->tmp != NULL)
1236  return; /* already in progress */
1237 
1238  s->tmp = s; /* mark as in progress */
1239 
1240  while ((a = s->outs) != NULL)
1241  {
1242  to = a->to;
1243  deltraverse(nfa, leftend, to);
1244  if (NISERR())
1245  return; /* asserts might not hold after failure */
1246  assert(to->nouts == 0 || to->tmp != NULL);
1247  freearc(nfa, a);
1248  if (to->nins == 0 && to->tmp == NULL)
1249  {
1250  assert(to->nouts == 0);
1251  freestate(nfa, to);
1252  }
1253  }
1254 
1255  assert(s->no != FREESTATE); /* we're still here */
1256  assert(s == leftend || s->nins != 0); /* and still reachable */
1257  assert(s->nouts == 0); /* but have no outarcs */
1258 
1259  s->tmp = NULL; /* we're done here */
1260 }
#define NISERR()
Definition: regc_nfa.c:39
int no
Definition: regguts.h:300
#define NERR(e)
Definition: regc_nfa.c:40
#define FREESTATE
Definition: regguts.h:301
Definition: regguts.h:276
int nins
Definition: regguts.h:303
static void deltraverse(struct nfa *nfa, struct state *leftend, struct state *s)
Definition: regc_nfa.c:1219
struct vars * v
Definition: regguts.h:328
regex_t * re
Definition: regcomp.c:228
#define assert(TEST)
Definition: imath.c:37
struct arc * outs
Definition: regguts.h:306
static void freearc(struct nfa *nfa, struct arc *victim)
Definition: regc_nfa.c:421
struct state * tmp
Definition: regguts.h:308
struct state * to
Definition: regguts.h:281
#define NULL
Definition: c.h:226
Definition: regguts.h:298
#define REG_ETOOBIG
Definition: regex.h:155
int nouts
Definition: regguts.h:305
static void freestate(struct nfa *nfa, struct state *s)
Definition: regc_nfa.c:218
#define STACK_TOO_DEEP(re)
Definition: regguts.h:455
static void destroystate ( struct nfa nfa,
struct state s 
)
static

Definition at line 249 of file regc_nfa.c.

References assert, FREE, FREESTATE, state::ins, arcbatch::next, state::next, state::no, NULL, state::oas, state::outs, vars::spaceused, and nfa::v.

Referenced by freenfa().

251 {
252  struct arcbatch *ab;
253  struct arcbatch *abnext;
254 
255  assert(s->no == FREESTATE);
256  for (ab = s->oas.next; ab != NULL; ab = abnext)
257  {
258  abnext = ab->next;
259  FREE(ab);
260  nfa->v->spaceused -= sizeof(struct arcbatch);
261  }
262  s->ins = NULL;
263  s->outs = NULL;
264  s->next = NULL;
265  FREE(s);
266  nfa->v->spaceused -= sizeof(struct state);
267 }
int no
Definition: regguts.h:300
#define FREESTATE
Definition: regguts.h:301
#define FREE(p)
Definition: regcustom.h:54
struct vars * v
Definition: regguts.h:328
struct arcbatch * next
Definition: regguts.h:293
#define assert(TEST)
Definition: imath.c:37
struct state * next
Definition: regguts.h:309
struct arc * outs
Definition: regguts.h:306
#define NULL
Definition: c.h:226
Definition: regguts.h:298
size_t spaceused
Definition: regcomp.c:256
struct arcbatch oas
Definition: regguts.h:311
struct arc * ins
Definition: regguts.h:304
static void dropstate ( struct nfa nfa,
struct state s 
)
static

Definition at line 202 of file regc_nfa.c.

References freearc(), freestate(), state::ins, NULL, and state::outs.

Referenced by cleanup(), clonesuccessorstates(), fixconstraintloops(), fixempties(), pullback(), and pushfwd().

204 {
205  struct arc *a;
206 
207  while ((a = s->ins) != NULL)
208  freearc(nfa, a);
209  while ((a = s->outs) != NULL)
210  freearc(nfa, a);
211  freestate(nfa, s);
212 }
Definition: regguts.h:276
struct arc * outs
Definition: regguts.h:306
static void freearc(struct nfa *nfa, struct arc *victim)
Definition: regc_nfa.c:421
#define NULL
Definition: c.h:226
static void freestate(struct nfa *nfa, struct state *s)
Definition: regc_nfa.c:218
struct arc * ins
Definition: regguts.h:304
static void dumpnfa ( struct nfa nfa,
FILE *  f 
)
static

Definition at line 2965 of file regc_nfa.c.

References nfa::bos, nfa::cm, COLORLESS, nfa::eos, state::next, state::no, state::nouts, NULL, nfa::parent, nfa::post, nfa::pre, and nfa::states.

Referenced by fixconstraintloops(), fixempties(), optimize(), pullback(), and pushfwd().

2967 {
2968 #ifdef REG_DEBUG
2969  struct state *s;
2970  int nstates = 0;
2971  int narcs = 0;
2972 
2973  fprintf(f, "pre %d, post %d", nfa->pre->no, nfa->post->no);
2974  if (nfa->bos[0] != COLORLESS)
2975  fprintf(f, ", bos [%ld]", (long) nfa->bos[0]);
2976  if (nfa->bos[1] != COLORLESS)
2977  fprintf(f, ", bol [%ld]", (long) nfa->bos[1]);
2978  if (nfa->eos[0] != COLORLESS)
2979  fprintf(f, ", eos [%ld]", (long) nfa->eos[0]);
2980  if (nfa->eos[1] != COLORLESS)
2981  fprintf(f, ", eol [%ld]", (long) nfa->eos[1]);
2982  fprintf(f, "\n");
2983  for (s = nfa->states; s != NULL; s = s->next)
2984  {
2985  dumpstate(s, f);
2986  nstates++;
2987  narcs += s->nouts;
2988  }
2989  fprintf(f, "total of %d states, %d arcs\n", nstates, narcs);
2990  if (nfa->parent == NULL)
2991  dumpcolors(nfa->cm, f);
2992  fflush(f);
2993 #endif
2994 }
int no
Definition: regguts.h:300
struct state * states
Definition: regguts.h:322
color eos[2]
Definition: regguts.h:327
struct state * post
Definition: regguts.h:320
color bos[2]
Definition: regguts.h:326
struct colormap * cm
Definition: regguts.h:325
struct state * next
Definition: regguts.h:309
#define NULL
Definition: c.h:226
Definition: regguts.h:298
int nouts
Definition: regguts.h:305
struct nfa * parent
Definition: regguts.h:329
#define COLORLESS
Definition: regguts.h:137
struct state * pre
Definition: regguts.h:317
static void dupnfa ( struct nfa nfa,
struct state start,
struct state stop,
struct state from,
struct state to 
)
static

Definition at line 1270 of file regc_nfa.c.

References cleartraverse(), duptraverse(), EMPTY, newarc(), NULL, and state::tmp.

1275 {
1276  if (start == stop)
1277  {
1278  newarc(nfa, EMPTY, 0, from, to);
1279  return;
1280  }
1281 
1282  stop->tmp = to;
1283  duptraverse(nfa, start, from);
1284  /* done, except for clearing out the tmp pointers */
1285 
1286  stop->tmp = NULL;
1287  cleartraverse(nfa, start);
1288 }
static void cleartraverse(struct nfa *nfa, struct state *s)
Definition: regc_nfa.c:1331
struct state * tmp
Definition: regguts.h:308
#define NULL
Definition: c.h:226
static void newarc(struct nfa *nfa, int t, color co, struct state *from, struct state *to)
Definition: regc_nfa.c:276
#define EMPTY
Definition: regcomp.c:276
static void duptraverse(struct nfa *nfa, struct state *s, struct state *stmp)
Definition: regc_nfa.c:1294
static void duptraverse ( struct nfa nfa,
struct state s,
struct state stmp 
)
static

Definition at line 1294 of file regc_nfa.c.

References assert, cparc(), NERR, newstate(), NISERR, NULL, arc::outchain, state::outs, vars::re, REG_ETOOBIG, STACK_TOO_DEEP, state::tmp, arc::to, and nfa::v.

Referenced by dupnfa().

1297 {
1298  struct arc *a;
1299 
1300  /* Since this is recursive, it could be driven to stack overflow */
1301  if (STACK_TOO_DEEP(nfa->v->re))
1302  {
1303  NERR(REG_ETOOBIG);
1304  return;
1305  }
1306 
1307  if (s->tmp != NULL)
1308  return; /* already done */
1309 
1310  s->tmp = (stmp == NULL) ? newstate(nfa) : stmp;
1311  if (s->tmp == NULL)
1312  {
1313  assert(NISERR());
1314  return;
1315  }
1316 
1317  for (a = s->outs; a != NULL && !NISERR(); a = a->outchain)
1318  {
1319  duptraverse(nfa, a->to, (struct state *) NULL);
1320  if (NISERR())
1321  break;
1322  assert(a->to->tmp != NULL);
1323  cparc(nfa, a, s->tmp, a->to->tmp);
1324  }
1325 }
#define NISERR()
Definition: regc_nfa.c:39
#define NERR(e)
Definition: regc_nfa.c:40
Definition: regguts.h:276
struct vars * v
Definition: regguts.h:328
regex_t * re
Definition: regcomp.c:228
struct arc * outchain
Definition: regguts.h:282
#define assert(TEST)
Definition: imath.c:37
struct arc * outs
Definition: regguts.h:306
struct state * tmp
Definition: regguts.h:308
struct state * to
Definition: regguts.h:281
static struct state * newstate(struct nfa *nfa)
Definition: regc_nfa.c:124
#define NULL
Definition: c.h:226
Definition: regguts.h:298
#define REG_ETOOBIG
Definition: regex.h:155
static void duptraverse(struct nfa *nfa, struct state *s, struct state *stmp)
Definition: regc_nfa.c:1294
static void cparc(struct nfa *nfa, struct arc *oa, struct state *from, struct state *to)
Definition: regc_nfa.c:570
#define STACK_TOO_DEEP(re)
Definition: regguts.h:455
static struct state* emptyreachable ( struct nfa nfa,
struct state s,
struct state lastfound,
struct arc **  inarcsorig 
)
static

Definition at line 2096 of file regc_nfa.c.

References EMPTY, arc::from, arc::inchain, NERR, state::no, NULL, vars::re, REG_ETOOBIG, STACK_TOO_DEEP, state::tmp, arc::type, and nfa::v.

Referenced by fixempties().

2100 {
2101  struct arc *a;
2102 
2103  /* Since this is recursive, it could be driven to stack overflow */
2104  if (STACK_TOO_DEEP(nfa->v->re))
2105  {
2106  NERR(REG_ETOOBIG);
2107  return lastfound;
2108  }
2109 
2110  s->tmp = lastfound;
2111  lastfound = s;
2112  for (a = inarcsorig[s->no]; a != NULL; a = a->inchain)
2113  {
2114  if (a->type == EMPTY && a->from->tmp == NULL)
2115  lastfound = emptyreachable(nfa, a->from, lastfound, inarcsorig);
2116  }
2117  return lastfound;
2118 }
int no
Definition: regguts.h:300
struct state * from
Definition: regguts.h:280
int type
Definition: regguts.h:278
#define NERR(e)
Definition: regc_nfa.c:40
Definition: regguts.h:276
static struct state * emptyreachable(struct nfa *nfa, struct state *s, struct state *lastfound, struct arc **inarcsorig)
Definition: regc_nfa.c:2096
struct vars * v
Definition: regguts.h:328
regex_t * re
Definition: regcomp.c:228
struct state * tmp
Definition: regguts.h:308
#define NULL
Definition: c.h:226
#define REG_ETOOBIG
Definition: regex.h:155
#define EMPTY
Definition: regcomp.c:276
struct arc * inchain
Definition: regguts.h:285
#define STACK_TOO_DEEP(re)
Definition: regguts.h:455
static struct arc* findarc ( struct state s,
int  type,
color  co 
)
static

Definition at line 554 of file regc_nfa.c.

References arc::co, NULL, arc::outchain, state::outs, and arc::type.

Referenced by colorcomplement().

557 {
558  struct arc *a;
559 
560  for (a = s->outs; a != NULL; a = a->outchain)
561  if (a->type == type && a->co == co)
562  return a;
563  return NULL;
564 }
int type
Definition: regguts.h:278
Definition: regguts.h:276
struct arc * outchain
Definition: regguts.h:282
struct arc * outs
Definition: regguts.h:306
color co
Definition: regguts.h:279
#define NULL
Definition: c.h:226
static int findconstraintloop ( struct nfa nfa,
struct state s 
)
static

Definition at line 2262 of file regc_nfa.c.

References assert, breakconstraintloop(), isconstraintarc(), NERR, NULL, arc::outchain, state::outs, vars::re, REG_ETOOBIG, STACK_TOO_DEEP, state::tmp, arc::to, and nfa::v.

Referenced by fixconstraintloops().

2263 {
2264  struct arc *a;
2265 
2266  /* Since this is recursive, it could be driven to stack overflow */
2267  if (STACK_TOO_DEEP(nfa->v->re))
2268  {
2269  NERR(REG_ETOOBIG);
2270  return 1; /* to exit as quickly as possible */
2271  }
2272 
2273  if (s->tmp != NULL)
2274  {
2275  /* Already proven uninteresting? */
2276  if (s->tmp == s)
2277  return 0;
2278  /* Found a loop involving s */
2279  breakconstraintloop(nfa, s);
2280  /* The tmp fields have been cleaned up by breakconstraintloop */
2281  return 1;
2282  }
2283  for (a = s->outs; a != NULL; a = a->outchain)
2284  {
2285  if (isconstraintarc(a))
2286  {
2287  struct state *sto = a->to;
2288 
2289  assert(sto != s);
2290  s->tmp = sto;
2291  if (findconstraintloop(nfa, sto))
2292  return 1;
2293  }
2294  }
2295 
2296  /*
2297  * If we get here, no constraint loop exists leading out from s. Mark it
2298  * with s->tmp == s so we need not rediscover that fact again later.
2299  */
2300  s->tmp = s;
2301  return 0;
2302 }
#define NERR(e)
Definition: regc_nfa.c:40
Definition: regguts.h:276
struct vars * v
Definition: regguts.h:328
regex_t * re
Definition: regcomp.c:228
static void breakconstraintloop(struct nfa *nfa, struct state *sinitial)
Definition: regc_nfa.c:2351
struct arc * outchain
Definition: regguts.h:282
static int isconstraintarc(struct arc *a)
Definition: regc_nfa.c:2124
#define assert(TEST)
Definition: imath.c:37
struct arc * outs
Definition: regguts.h:306
struct state * tmp
Definition: regguts.h:308
static int findconstraintloop(struct nfa *nfa, struct state *s)
Definition: regc_nfa.c:2262
struct state * to
Definition: regguts.h:281
#define NULL
Definition: c.h:226
Definition: regguts.h:298
#define REG_ETOOBIG
Definition: regex.h:155
#define STACK_TOO_DEEP(re)
Definition: regguts.h:455
static void fixconstraintloops ( struct nfa nfa,
FILE *  f 
)
static

Definition at line 2163 of file regc_nfa.c.

References assert, dropstate(), dumpnfa(), findconstraintloop(), state::flag, freearc(), isconstraintarc(), state::next, state::nins, NISERR, state::nouts, NULL, arc::outchain, state::outs, nfa::states, state::tmp, and arc::to.

Referenced by optimize().

2165 {
2166  struct state *s;
2167  struct state *nexts;
2168  struct arc *a;
2169  struct arc *nexta;
2170  int hasconstraints;
2171 
2172  /*
2173  * In the trivial case of a state that loops to itself, we can just drop
2174  * the constraint arc altogether. This is worth special-casing because
2175  * such loops are far more common than loops containing multiple states.
2176  * While we're at it, note whether any constraint arcs survive.
2177  */
2178  hasconstraints = 0;
2179  for (s = nfa->states; s != NULL && !NISERR(); s = nexts)
2180  {
2181  nexts = s->next;
2182  /* while we're at it, ensure tmp fields are clear for next step */
2183  assert(s->tmp == NULL);
2184  for (a = s->outs; a != NULL && !NISERR(); a = nexta)
2185  {
2186  nexta = a->outchain;
2187  if (isconstraintarc(a))
2188  {
2189  if (a->to == s)
2190  freearc(nfa, a);
2191  else
2192  hasconstraints = 1;
2193  }
2194  }
2195  /* If we removed all the outarcs, the state is useless. */
2196  if (s->nouts == 0 && !s->flag)
2197  dropstate(nfa, s);
2198  }
2199 
2200  /* Nothing to do if no remaining constraint arcs */
2201  if (NISERR() || !hasconstraints)
2202  return;
2203 
2204  /*
2205  * Starting from each remaining NFA state, search outwards for a
2206  * constraint loop. If we find a loop, break the loop, then start the
2207  * search over. (We could possibly retain some state from the first scan,
2208  * but it would complicate things greatly, and multi-state constraint
2209  * loops are rare enough that it's not worth optimizing the case.)
2210  */
2211 restart:
2212  for (s = nfa->states; s != NULL && !NISERR(); s = s->next)
2213  {
2214  if (findconstraintloop(nfa, s))
2215  goto restart;
2216  }
2217 
2218  if (NISERR())
2219  return;
2220 
2221  /*
2222  * Now remove any states that have become useless. (This cleanup is not
2223  * very thorough, and would be even less so if we tried to combine it with
2224  * the previous step; but cleanup() will take care of anything we miss.)
2225  *
2226  * Because findconstraintloop intentionally doesn't reset all tmp fields,
2227  * we have to clear them after it's done. This is a convenient place to
2228  * do that, too.
2229  */
2230  for (s = nfa->states; s != NULL; s = nexts)
2231  {
2232  nexts = s->next;
2233  s->tmp = NULL;
2234  if ((s->nins == 0 || s->nouts == 0) && !s->flag)
2235  dropstate(nfa, s);
2236  }
2237 
2238  if (f != NULL)
2239  dumpnfa(nfa, f);
2240 }
#define NISERR()
Definition: regc_nfa.c:39
Definition: regguts.h:276
struct state * states
Definition: regguts.h:322
int nins
Definition: regguts.h:303
static void dumpnfa(struct nfa *nfa, FILE *f)
Definition: regc_nfa.c:2965
struct arc * outchain
Definition: regguts.h:282
static int isconstraintarc(struct arc *a)
Definition: regc_nfa.c:2124
#define assert(TEST)
Definition: imath.c:37
struct state * next
Definition: regguts.h:309
struct arc * outs
Definition: regguts.h:306
static void freearc(struct nfa *nfa, struct arc *victim)
Definition: regc_nfa.c:421
char flag
Definition: regguts.h:302
struct state * tmp
Definition: regguts.h:308
static int findconstraintloop(struct nfa *nfa, struct state *s)
Definition: regc_nfa.c:2262
struct state * to
Definition: regguts.h:281
#define NULL
Definition: c.h:226
Definition: regguts.h:298
static void dropstate(struct nfa *nfa, struct state *s)
Definition: regc_nfa.c:202
int nouts
Definition: regguts.h:305
static void fixempties ( struct nfa nfa,
FILE *  f 
)
static

Definition at line 1869 of file regc_nfa.c.

References assert, dropstate(), dumpnfa(), EMPTY, emptyreachable(), state::flag, FREE, freearc(), arc::from, hasnonemptyout(), arc::inchain, state::ins, MALLOC, mergeins(), moveins(), moveouts(), NERR, state::next, state::nins, NISERR, state::no, state::nouts, nfa::nstates, NULL, arc::outchain, state::outs, REG_ESPACE, s2, nfa::states, state::tmp, arc::to, and arc::type.

Referenced by optimize().

1871 {
1872  struct state *s;
1873  struct state *s2;
1874  struct state *nexts;
1875  struct arc *a;
1876  struct arc *nexta;
1877  int totalinarcs;
1878  struct arc **inarcsorig;
1879  struct arc **arcarray;
1880  int arccount;
1881  int prevnins;
1882  int nskip;
1883 
1884  /*
1885  * First, get rid of any states whose sole out-arc is an EMPTY, since
1886  * they're basically just aliases for their successor. The parsing
1887  * algorithm creates enough of these that it's worth special-casing this.
1888  */
1889  for (s = nfa->states; s != NULL && !NISERR(); s = nexts)
1890  {
1891  nexts = s->next;
1892  if (s->flag || s->nouts != 1)
1893  continue;
1894  a = s->outs;
1895  assert(a != NULL && a->outchain == NULL);
1896  if (a->type != EMPTY)
1897  continue;
1898  if (s != a->to)
1899  moveins(nfa, s, a->to);
1900  dropstate(nfa, s);
1901  }
1902 
1903  /*
1904  * Similarly, get rid of any state with a single EMPTY in-arc, by folding
1905  * it into its predecessor.
1906  */
1907  for (s = nfa->states; s != NULL && !NISERR(); s = nexts)
1908  {
1909  nexts = s->next;
1910  /* while we're at it, ensure tmp fields are clear for next step */
1911  assert(s->tmp == NULL);
1912  if (s->flag || s->nins != 1)
1913  continue;
1914  a = s->ins;
1915  assert(a != NULL && a->inchain == NULL);
1916  if (a->type != EMPTY)
1917  continue;
1918  if (s != a->from)
1919  moveouts(nfa, s, a->from);
1920  dropstate(nfa, s);
1921  }
1922 
1923  if (NISERR())
1924  return;
1925 
1926  /*
1927  * For each remaining NFA state, find all other states from which it is
1928  * reachable by a chain of one or more EMPTY arcs. Then generate new arcs
1929  * that eliminate the need for each such chain.
1930  *
1931  * We could replace a chain of EMPTY arcs that leads from a "from" state
1932  * to a "to" state either by pushing non-EMPTY arcs forward (linking
1933  * directly from "from"'s predecessors to "to") or by pulling them back
1934  * (linking directly from "from" to "to"'s successors). We choose to
1935  * always do the former; this choice is somewhat arbitrary, but the
1936  * approach below requires that we uniformly do one or the other.
1937  *
1938  * Suppose we have a chain of N successive EMPTY arcs (where N can easily
1939  * approach the size of the NFA). All of the intermediate states must
1940  * have additional inarcs and outarcs, else they'd have been removed by
1941  * the steps above. Assuming their inarcs are mostly not empties, we will
1942  * add O(N^2) arcs to the NFA, since a non-EMPTY inarc leading to any one
1943  * state in the chain must be duplicated to lead to all its successor
1944  * states as well. So there is no hope of doing less than O(N^2) work;
1945  * however, we should endeavor to keep the big-O cost from being even
1946  * worse than that, which it can easily become without care. In
1947  * particular, suppose we were to copy all S1's inarcs forward to S2, and
1948  * then also to S3, and then later we consider pushing S2's inarcs forward
1949  * to S3. If we include the arcs already copied from S1 in that, we'd be
1950  * doing O(N^3) work. (The duplicate-arc elimination built into newarc()
1951  * and its cohorts would get rid of the extra arcs, but not without cost.)
1952  *
1953  * We can avoid this cost by treating only arcs that existed at the start
1954  * of this phase as candidates to be pushed forward. To identify those,
1955  * we remember the first inarc each state had to start with. We rely on
1956  * the fact that newarc() and friends put new arcs on the front of their
1957  * to-states' inchains, and that this phase never deletes arcs, so that
1958  * the original arcs must be the last arcs in their to-states' inchains.
1959  *
1960  * So the process here is that, for each state in the NFA, we gather up
1961  * all non-EMPTY inarcs of states that can reach the target state via
1962  * EMPTY arcs. We then sort, de-duplicate, and merge these arcs into the
1963  * target state's inchain. (We can safely use sort-merge for this as long
1964  * as we update each state's original-arcs pointer after we add arcs to
1965  * it; the sort step of mergeins probably changed the order of the old
1966  * arcs.)
1967  *
1968  * Another refinement worth making is that, because we only add non-EMPTY
1969  * arcs during this phase, and all added arcs have the same from-state as
1970  * the non-EMPTY arc they were cloned from, we know ahead of time that any
1971  * states having only EMPTY outarcs will be useless for lack of outarcs
1972  * after we drop the EMPTY arcs. (They cannot gain non-EMPTY outarcs if
1973  * they had none to start with.) So we need not bother to update the
1974  * inchains of such states at all.
1975  */
1976 
1977  /* Remember the states' first original inarcs */
1978  /* ... and while at it, count how many old inarcs there are altogether */
1979  inarcsorig = (struct arc **) MALLOC(nfa->nstates * sizeof(struct arc *));
1980  if (inarcsorig == NULL)
1981  {
1982  NERR(REG_ESPACE);
1983  return;
1984  }
1985  totalinarcs = 0;
1986  for (s = nfa->states; s != NULL; s = s->next)
1987  {
1988  inarcsorig[s->no] = s->ins;
1989  totalinarcs += s->nins;
1990  }
1991 
1992  /*
1993  * Create a workspace for accumulating the inarcs to be added to the
1994  * current target state. totalinarcs is probably a considerable
1995  * overestimate of the space needed, but the NFA is unlikely to be large
1996  * enough at this point to make it worth being smarter.
1997  */
1998  arcarray = (struct arc **) MALLOC(totalinarcs * sizeof(struct arc *));
1999  if (arcarray == NULL)
2000  {
2001  NERR(REG_ESPACE);
2002  FREE(inarcsorig);
2003  return;
2004  }
2005 
2006  /* And iterate over the target states */
2007  for (s = nfa->states; s != NULL && !NISERR(); s = s->next)
2008  {
2009  /* Ignore target states without non-EMPTY outarcs, per note above */
2010  if (!s->flag && !hasnonemptyout(s))
2011  continue;
2012 
2013  /* Find predecessor states and accumulate their original inarcs */
2014  arccount = 0;
2015  for (s2 = emptyreachable(nfa, s, s, inarcsorig); s2 != s; s2 = nexts)
2016  {
2017  /* Add s2's original inarcs to arcarray[], but ignore empties */
2018  for (a = inarcsorig[s2->no]; a != NULL; a = a->inchain)
2019  {
2020  if (a->type != EMPTY)
2021  arcarray[arccount++] = a;
2022  }
2023 
2024  /* Reset the tmp fields as we walk back */
2025  nexts = s2->tmp;
2026  s2->tmp = NULL;
2027  }
2028  s->tmp = NULL;
2029  assert(arccount <= totalinarcs);
2030 
2031  /* Remember how many original inarcs this state has */
2032  prevnins = s->nins;
2033 
2034  /* Add non-duplicate inarcs to target state */
2035  mergeins(nfa, s, arcarray, arccount);
2036 
2037  /* Now we must update the state's inarcsorig pointer */
2038  nskip = s->nins - prevnins;
2039  a = s->ins;
2040  while (nskip-- > 0)
2041  a = a->inchain;
2042  inarcsorig[s->no] = a;
2043  }
2044 
2045  FREE(arcarray);
2046  FREE(inarcsorig);
2047 
2048  if (NISERR())
2049  return;
2050 
2051  /*
2052  * Now remove all the EMPTY arcs, since we don't need them anymore.
2053  */
2054  for (s = nfa->states; s != NULL; s = s->next)
2055  {
2056  for (a = s->outs; a != NULL; a = nexta)
2057  {
2058  nexta = a->outchain;
2059  if (a->type == EMPTY)
2060  freearc(nfa, a);
2061  }
2062  }
2063 
2064  /*
2065  * And remove any states that have become useless. (This cleanup is not
2066  * very thorough, and would be even less so if we tried to combine it with
2067  * the previous step; but cleanup() will take care of anything we miss.)
2068  */
2069  for (s = nfa->states; s != NULL; s = nexts)
2070  {
2071  nexts = s->next;
2072  if ((s->nins == 0 || s->nouts == 0) && !s->flag)
2073  dropstate(nfa, s);
2074  }
2075 
2076  if (f != NULL)
2077  dumpnfa(nfa, f);
2078 }
#define NISERR()
Definition: regc_nfa.c:39
int no
Definition: regguts.h:300
struct state * from
Definition: regguts.h:280
int type
Definition: regguts.h:278
static void moveouts(struct nfa *nfa, struct state *oldState, struct state *newState)
Definition: regc_nfa.c:1007
#define NERR(e)
Definition: regc_nfa.c:40
#define REG_ESPACE
Definition: regex.h:149
Definition: regguts.h:276
struct state * states
Definition: regguts.h:322
int nins
Definition: regguts.h:303
#define FREE(p)
Definition: regcustom.h:54
static struct state * emptyreachable(struct nfa *nfa, struct state *s, struct state *lastfound, struct arc **inarcsorig)
Definition: regc_nfa.c:2096
static void dumpnfa(struct nfa *nfa, FILE *f)
Definition: regc_nfa.c:2965
static void mergeins(struct nfa *nfa, struct state *s, struct arc **arcarray, int arccount)
Definition: regc_nfa.c:910
#define MALLOC(n)
Definition: regcustom.h:53
static int hasnonemptyout(struct state *s)
Definition: regc_nfa.c:537
struct arc * outchain
Definition: regguts.h:282
#define assert(TEST)
Definition: imath.c:37
struct state * next
Definition: regguts.h:309
int nstates
Definition: regguts.h:321
struct arc * outs
Definition: regguts.h:306
static void freearc(struct nfa *nfa, struct arc *victim)
Definition: regc_nfa.c:421
char flag
Definition: regguts.h:302
struct state * tmp
Definition: regguts.h:308
static void moveins(struct nfa *nfa, struct state *oldState, struct state *newState)
Definition: regc_nfa.c:736
struct state * to
Definition: regguts.h:281
char * s2
#define NULL
Definition: c.h:226
Definition: regguts.h:298
#define EMPTY
Definition: regcomp.c:276
static void dropstate(struct nfa *nfa, struct state *s)
Definition: regc_nfa.c:202
int nouts
Definition: regguts.h:305
struct arc * inchain
Definition: regguts.h:285
struct arc * ins
Definition: regguts.h:304
static void freearc ( struct nfa nfa,
struct arc victim 
)
static

Definition at line 421 of file regc_nfa.c.

References assert, nfa::cm, COLORED, state::free, arc::from, arc::inchain, arc::inchainRev, state::ins, state::nins, state::nouts, NULL, arc::outchain, arc::outchainRev, state::outs, nfa::parent, arc::to, arc::type, and uncolorchain().

Referenced by breakconstraintloop(), deltraverse(), dropstate(), fixconstraintloops(), fixempties(), moveins(), moveouts(), pull(), pullback(), push(), and pushfwd().

423 {
424  struct state *from = victim->from;
425  struct state *to = victim->to;
426  struct arc *predecessor;
427 
428  assert(victim->type != 0);
429 
430  /* take it off color chain if necessary */
431  if (COLORED(victim) && nfa->parent == NULL)
432  uncolorchain(nfa->cm, victim);
433 
434  /* take it off source's out-chain */
435  assert(from != NULL);
436  predecessor = victim->outchainRev;
437  if (predecessor == NULL)
438  {
439  assert(from->outs == victim);
440  from->outs = victim->outchain;
441  }
442  else
443  {
444  assert(predecessor->outchain == victim);
445  predecessor->outchain = victim->outchain;
446  }
447  if (victim->outchain != NULL)
448  {
449  assert(victim->outchain->outchainRev == victim);
450  victim->outchain->outchainRev = predecessor;
451  }
452  from->nouts--;
453 
454  /* take it off target's in-chain */
455  assert(to != NULL);
456  predecessor = victim->inchainRev;
457  if (predecessor == NULL)
458  {
459  assert(to->ins == victim);
460  to->ins = victim->inchain;
461  }
462  else
463  {
464  assert(predecessor->inchain == victim);
465  predecessor->inchain = victim->inchain;
466  }
467  if (victim->inchain != NULL)
468  {
469  assert(victim->inchain->inchainRev == victim);
470  victim->inchain->inchainRev = predecessor;
471  }
472  to->nins--;
473 
474  /* clean up and place on from-state's free list */
475  victim->type = 0;
476  victim->from = NULL; /* precautions... */
477  victim->to = NULL;
478  victim->inchain = NULL;
479  victim->inchainRev = NULL;
480  victim->outchain = NULL;
481  victim->outchainRev = NULL;
482  victim->freechain = from->free;
483  from->free = victim;
484 }
struct state * from
Definition: regguts.h:280
int type
Definition: regguts.h:278
Definition: regguts.h:276
int nins
Definition: regguts.h:303
#define COLORED(a)
Definition: regcomp.c:296
struct arc * free
Definition: regguts.h:307
struct arc * inchainRev
Definition: regguts.h:286
struct arc * outchain
Definition: regguts.h:282
#define assert(TEST)
Definition: imath.c:37
struct colormap * cm
Definition: regguts.h:325
struct arc * outs
Definition: regguts.h:306
struct state * to
Definition: regguts.h:281
#define NULL
Definition: c.h:226
Definition: regguts.h:298
int nouts
Definition: regguts.h:305
static void uncolorchain(struct colormap *cm, struct arc *a)
Definition: regc_color.c:991
struct nfa * parent
Definition: regguts.h:329
struct arc * inchain
Definition: regguts.h:285
struct arc * outchainRev
Definition: regguts.h:283
struct arc * ins
Definition: regguts.h:304
static void freecnfa ( struct cnfa cnfa)
static

Definition at line 2952 of file regc_nfa.c.

References cnfa::arcs, assert, FREE, cnfa::nstates, cnfa::states, and cnfa::stflags.

2953 {
2954  assert(cnfa->nstates != 0); /* not empty already */
2955  cnfa->nstates = 0;
2956  FREE(cnfa->stflags);
2957  FREE(cnfa->states);
2958  FREE(cnfa->arcs);
2959 }
#define FREE(p)
Definition: regcustom.h:54
struct carc ** states
Definition: regguts.h:366
int nstates
Definition: regguts.h:356
#define assert(TEST)
Definition: imath.c:37
char * stflags
Definition: regguts.h:364
struct carc * arcs
Definition: regguts.h:368
static void freenfa ( struct nfa nfa)
static

Definition at line 98 of file regc_nfa.c.

References destroystate(), FREE, nfa::free, freestate(), state::next, state::nins, state::nouts, nfa::nstates, NULL, nfa::post, nfa::pre, nfa::slast, and nfa::states.

Referenced by newnfa().

99 {
100  struct state *s;
101 
102  while ((s = nfa->states) != NULL)
103  {
104  s->nins = s->nouts = 0; /* don't worry about arcs */
105  freestate(nfa, s);
106  }
107  while ((s = nfa->free) != NULL)
108  {
109  nfa->free = s->next;
110  destroystate(nfa, s);
111  }
112 
113  nfa->slast = NULL;
114  nfa->nstates = -1;
115  nfa->pre = NULL;
116  nfa->post = NULL;
117  FREE(nfa);
118 }
struct state * states
Definition: regguts.h:322
int nins
Definition: regguts.h:303
#define FREE(p)
Definition: regcustom.h:54
struct state * post
Definition: regguts.h:320
struct state * free
Definition: regguts.h:324
struct state * next
Definition: regguts.h:309
int nstates
Definition: regguts.h:321
#define NULL
Definition: c.h:226
Definition: regguts.h:298
struct state * slast
Definition: regguts.h:323
int nouts
Definition: regguts.h:305
static void destroystate(struct nfa *nfa, struct state *s)
Definition: regc_nfa.c:249
static void freestate(struct nfa *nfa, struct state *s)
Definition: regc_nfa.c:218
struct state * pre
Definition: regguts.h:317
static void freestate ( struct nfa nfa,
struct state s 
)
static

Definition at line 218 of file regc_nfa.c.

References assert, state::flag, nfa::free, FREESTATE, state::next, state::nins, state::no, state::nouts, NULL, state::prev, nfa::slast, and nfa::states.

Referenced by breakconstraintloop(), deltraverse(), dropstate(), and freenfa().

220 {
221  assert(s != NULL);
222  assert(s->nins == 0 && s->nouts == 0);
223 
224  s->no = FREESTATE;
225  s->flag = 0;
226  if (s->next != NULL)
227  s->next->prev = s->prev;
228  else
229  {
230  assert(s == nfa->slast);
231  nfa->slast = s->prev;
232  }
233  if (s->prev != NULL)
234  s->prev->next = s->next;
235  else
236  {
237  assert(s == nfa->states);
238  nfa->states = s->next;
239  }
240  s->prev = NULL;
241  s->next = nfa->free; /* don't delete it, put it on the free list */
242  nfa->free = s;
243 }
int no
Definition: regguts.h:300
#define FREESTATE
Definition: regguts.h:301
struct state * states
Definition: regguts.h:322
int nins
Definition: regguts.h:303
struct state * free
Definition: regguts.h:324
#define assert(TEST)
Definition: imath.c:37
struct state * next
Definition: regguts.h:309
char flag
Definition: regguts.h:302
#define NULL
Definition: c.h:226
struct state * prev
Definition: regguts.h:310
struct state * slast
Definition: regguts.h:323
int nouts
Definition: regguts.h:305
static int hasconstraintout ( struct state s)
static

Definition at line 2142 of file regc_nfa.c.

References isconstraintarc(), NULL, arc::outchain, and state::outs.

Referenced by clonesuccessorstates().

2143 {
2144  struct arc *a;
2145 
2146  for (a = s->outs; a != NULL; a = a->outchain)
2147  {
2148  if (isconstraintarc(a))
2149  return 1;
2150  }
2151  return 0;
2152 }
Definition: regguts.h:276
struct arc * outchain
Definition: regguts.h:282
static int isconstraintarc(struct arc *a)
Definition: regc_nfa.c:2124
struct arc * outs
Definition: regguts.h:306
#define NULL
Definition: c.h:226
static int hasnonemptyout ( struct state s)
static

Definition at line 537 of file regc_nfa.c.

References EMPTY, NULL, arc::outchain, state::outs, and arc::type.

Referenced by fixempties().

538 {
539  struct arc *a;
540 
541  for (a = s->outs; a != NULL; a = a->outchain)
542  {
543  if (a->type != EMPTY)
544  return 1;
545  }
546  return 0;
547 }
int type
Definition: regguts.h:278
Definition: regguts.h:276
struct arc * outchain
Definition: regguts.h:282
struct arc * outs
Definition: regguts.h:306
#define NULL
Definition: c.h:226
#define EMPTY
Definition: regcomp.c:276
static int isconstraintarc ( struct arc a)
inlinestatic

Definition at line 2124 of file regc_nfa.c.

References AHEAD, BEHIND, LACON, and arc::type.

Referenced by breakconstraintloop(), clonesuccessorstates(), findconstraintloop(), fixconstraintloops(), and hasconstraintout().

2125 {
2126  switch (a->type)
2127  {
2128  case '^':
2129  case '$':
2130  case BEHIND:
2131  case AHEAD:
2132  case LACON:
2133  return 1;
2134  }
2135  return 0;
2136 }
int type
Definition: regguts.h:278
#define BEHIND
Definition: regcomp.c:288
#define LACON
Definition: regcomp.c:286
#define AHEAD
Definition: regcomp.c:287
static void markcanreach ( struct nfa nfa,
struct state s,
struct state okay,
struct state mark 
)
static

Definition at line 2790 of file regc_nfa.c.

References arc::from, arc::inchain, state::ins, NERR, NULL, vars::re, REG_ETOOBIG, STACK_TOO_DEEP, state::tmp, and nfa::v.

Referenced by cleanup().

2794 {
2795  struct arc *a;
2796 
2797  /* Since this is recursive, it could be driven to stack overflow */
2798  if (STACK_TOO_DEEP(nfa->v->re))
2799  {
2800  NERR(REG_ETOOBIG);
2801  return;
2802  }
2803 
2804  if (s->tmp != okay)
2805  return;
2806  s->tmp = mark;
2807 
2808  for (a = s->ins; a != NULL; a = a->inchain)
2809  markcanreach(nfa, a->from, okay, mark);
2810 }
struct state * from
Definition: regguts.h:280
#define NERR(e)
Definition: regc_nfa.c:40
Definition: regguts.h:276
struct vars * v
Definition: regguts.h:328
static void markcanreach(struct nfa *nfa, struct state *s, struct state *okay, struct state *mark)
Definition: regc_nfa.c:2790
regex_t * re
Definition: regcomp.c:228
struct state * tmp
Definition: regguts.h:308
#define NULL
Definition: c.h:226
#define REG_ETOOBIG
Definition: regex.h:155
struct arc * inchain
Definition: regguts.h:285
#define STACK_TOO_DEEP(re)
Definition: regguts.h:455
struct arc * ins
Definition: regguts.h:304
static void markreachable ( struct nfa nfa,
struct state s,
struct state okay,
struct state mark 
)
static

Definition at line 2764 of file regc_nfa.c.

References NERR, NULL, arc::outchain, state::outs, vars::re, REG_ETOOBIG, STACK_TOO_DEEP, state::tmp, arc::to, and nfa::v.

Referenced by cleanup().

2768 {
2769  struct arc *a;
2770 
2771  /* Since this is recursive, it could be driven to stack overflow */
2772  if (STACK_TOO_DEEP(nfa->v->re))
2773  {
2774  NERR(REG_ETOOBIG);
2775  return;
2776  }
2777 
2778  if (s->tmp != okay)
2779  return;
2780  s->tmp = mark;
2781 
2782  for (a = s->outs; a != NULL; a = a->outchain)
2783  markreachable(nfa, a->to, okay, mark);
2784 }
static void markreachable(struct nfa *nfa, struct state *s, struct state *okay, struct state *mark)
Definition: regc_nfa.c:2764
#define NERR(e)
Definition: regc_nfa.c:40
Definition: regguts.h:276
struct vars * v
Definition: regguts.h:328
regex_t * re
Definition: regcomp.c:228
struct arc * outchain
Definition: regguts.h:282
struct arc * outs
Definition: regguts.h:306
struct state * tmp
Definition: regguts.h:308
struct state * to
Definition: regguts.h:281
#define NULL
Definition: c.h:226
#define REG_ETOOBIG
Definition: regex.h:155
#define STACK_TOO_DEEP(re)
Definition: regguts.h:455
static void mergeins ( struct nfa nfa,
struct state s,
struct arc **  arcarray,
int  arccount 
)
static

Definition at line 910 of file regc_nfa.c.

References assert, CANCEL_REQUESTED, arc::co, createarc(), arc::from, i, arc::inchain, state::ins, NERR, NISERR, NOTREACHED, NULL, qsort, vars::re, REG_CANCEL, sortins(), sortins_cmp(), arc::type, and nfa::v.

Referenced by fixempties().

914 {
915  struct arc *na;
916  int i;
917  int j;
918 
919  if (arccount <= 0)
920  return;
921 
922  /*
923  * Because we bypass newarc() in this code path, we'd better include a
924  * cancel check.
925  */
926  if (CANCEL_REQUESTED(nfa->v->re))
927  {
928  NERR(REG_CANCEL);
929  return;
930  }
931 
932  /* Sort existing inarcs as well as proposed new ones */
933  sortins(nfa, s);
934  if (NISERR())
935  return; /* might have failed to sort */
936 
937  qsort(arcarray, arccount, sizeof(struct arc *), sortins_cmp);
938 
939  /*
940  * arcarray very likely includes dups, so we must eliminate them. (This
941  * could be folded into the next loop, but it's not worth the trouble.)
942  */
943  j = 0;
944  for (i = 1; i < arccount; i++)
945  {
946  switch (sortins_cmp(&arcarray[j], &arcarray[i]))
947  {
948  case -1:
949  /* non-dup */
950  arcarray[++j] = arcarray[i];
951  break;
952  case 0:
953  /* dup */
954  break;
955  default:
956  /* trouble */
958  }
959  }
960  arccount = j + 1;
961 
962  /*
963  * Now merge into s' inchain. Note that createarc() will put new arcs
964  * onto the front of s's chain, so it does not break our walk through the
965  * sorted part of the chain.
966  */
967  i = 0;
968  na = s->ins;
969  while (i < arccount && na != NULL)
970  {
971  struct arc *a = arcarray[i];
972 
973  switch (sortins_cmp(&a, &na))
974  {
975  case -1:
976  /* s does not have anything matching a */
977  createarc(nfa, a->type, a->co, a->from, s);
978  i++;
979  break;
980  case 0:
981  /* match, advance in both lists */
982  i++;
983  na = na->inchain;
984  break;
985  case +1:
986  /* advance only na; array might have a match later */
987  na = na->inchain;
988  break;
989  default:
991  }
992  }
993  while (i < arccount)
994  {
995  /* s does not have anything matching a */
996  struct arc *a = arcarray[i];
997 
998  createarc(nfa, a->type, a->co, a->from, s);
999  i++;
1000  }
1001 }
#define NISERR()
Definition: regc_nfa.c:39
struct state * from
Definition: regguts.h:280
int type
Definition: regguts.h:278
#define NERR(e)
Definition: regc_nfa.c:40
Definition: regguts.h:276
#define NOTREACHED
Definition: regguts.h:91
struct vars * v
Definition: regguts.h:328
regex_t * re
Definition: regcomp.c:228
#define assert(TEST)
Definition: imath.c:37
static int sortins_cmp(const void *a, const void *b)
Definition: regc_nfa.c:624
static void createarc(struct nfa *nfa, int t, color co, struct state *from, struct state *to)
Definition: regc_nfa.c:322
color co
Definition: regguts.h:279
#define NULL
Definition: c.h:226
struct arc * inchain
Definition: regguts.h:285
#define CANCEL_REQUESTED(re)
Definition: regguts.h:452
int i
#define qsort(a, b, c, d)
Definition: port.h:440
struct arc * ins
Definition: regguts.h:304
#define REG_CANCEL
Definition: regex.h:157
static void sortins(struct nfa *nfa, struct state *s)
Definition: regc_nfa.c:582
static void moveins ( struct nfa nfa,
struct state oldState,
struct state newState 
)
static

Definition at line 736 of file regc_nfa.c.

References assert, BULK_ARC_OP_USE_SORT, CANCEL_REQUESTED, changearctarget(), cparc(), freearc(), arc::from, arc::inchain, state::ins, NERR, state::nins, NISERR, NOTREACHED, NULL, vars::re, REG_CANCEL, sortins(), sortins_cmp(), and nfa::v.

Referenced by fixempties(), and pull().

739 {
740  assert(oldState != newState);
741 
742  if (!BULK_ARC_OP_USE_SORT(oldState->nins, newState->nins))
743  {
744  /* With not too many arcs, just do them one at a time */
745  struct arc *a;
746 
747  while ((a = oldState->ins) != NULL)
748  {
749  cparc(nfa, a, a->from, newState);
750  freearc(nfa, a);
751  }
752  }
753  else
754  {
755  /*
756  * With many arcs, use a sort-merge approach. Note changearctarget()
757  * will put the arc onto the front of newState's chain, so it does not
758  * break our walk through the sorted part of the chain.
759  */
760  struct arc *oa;
761  struct arc *na;
762 
763  /*
764  * Because we bypass newarc() in this code path, we'd better include a
765  * cancel check.
766  */
767  if (CANCEL_REQUESTED(nfa->v->re))
768  {
769  NERR(REG_CANCEL);
770  return;
771  }
772 
773  sortins(nfa, oldState);
774  sortins(nfa, newState);
775  if (NISERR())
776  return; /* might have failed to sort */
777  oa = oldState->ins;
778  na = newState->ins;
779  while (oa != NULL && na != NULL)
780  {
781  struct arc *a = oa;
782 
783  switch (sortins_cmp(&oa, &na))
784  {
785  case -1:
786  /* newState does not have anything matching oa */
787  oa = oa->inchain;
788 
789  /*
790  * Rather than doing createarc+freearc, we can just unlink
791  * and relink the existing arc struct.
792  */
793  changearctarget(a, newState);
794  break;
795  case 0:
796  /* match, advance in both lists */
797  oa = oa->inchain;
798  na = na->inchain;
799  /* ... and drop duplicate arc from oldState */
800  freearc(nfa, a);
801  break;
802  case +1:
803  /* advance only na; oa might have a match later */
804  na = na->inchain;
805  break;
806  default:
808  }
809  }
810  while (oa != NULL)
811  {
812  /* newState does not have anything matching oa */
813  struct arc *a = oa;
814 
815  oa = oa->inchain;
816  changearctarget(a, newState);
817  }
818  }
819 
820  assert(oldState->nins == 0);
821  assert(oldState->ins == NULL);
822 }
#define NISERR()
Definition: regc_nfa.c:39
struct state * from
Definition: regguts.h:280
#define NERR(e)
Definition: regc_nfa.c:40
Definition: regguts.h:276
static void changearctarget(struct arc *a, struct state *newto)
Definition: regc_nfa.c:495
int nins
Definition: regguts.h:303
#define NOTREACHED
Definition: regguts.h:91
struct vars * v
Definition: regguts.h:328
#define BULK_ARC_OP_USE_SORT(nsrcarcs, ndestarcs)
Definition: regc_nfa.c:720
regex_t * re
Definition: regcomp.c:228
#define assert(TEST)
Definition: imath.c:37
static int sortins_cmp(const void *a, const void *b)
Definition: regc_nfa.c:624
static void freearc(struct nfa *nfa, struct arc *victim)
Definition: regc_nfa.c:421
#define NULL
Definition: c.h:226
static void cparc(struct nfa *nfa, struct arc *oa, struct state *from, struct state *to)
Definition: regc_nfa.c:570
struct arc * inchain
Definition: regguts.h:285
#define CANCEL_REQUESTED(re)
Definition: regguts.h:452
struct arc * ins
Definition: regguts.h:304
#define REG_CANCEL
Definition: regex.h:157
static void sortins(struct nfa *nfa, struct state *s)
Definition: regc_nfa.c:582
static void moveouts ( struct nfa nfa,
struct state oldState,
struct state newState 
)
static

Definition at line 1007 of file regc_nfa.c.

References assert, BULK_ARC_OP_USE_SORT, CANCEL_REQUESTED, arc::co, cparc(), createarc(), freearc(), NERR, NISERR, NOTREACHED, state::nouts, NULL, arc::outchain, state::outs, vars::re, REG_CANCEL, sortouts(), sortouts_cmp(), arc::to, arc::type, and nfa::v.

Referenced by fixempties(), and push().

1010 {
1011  assert(oldState != newState);
1012 
1013  if (!BULK_ARC_OP_USE_SORT(oldState->nouts, newState->nouts))
1014  {
1015  /* With not too many arcs, just do them one at a time */
1016  struct arc *a;
1017 
1018  while ((a = oldState->outs) != NULL)
1019  {
1020  cparc(nfa, a, newState, a->to);
1021  freearc(nfa, a);
1022  }
1023  }
1024  else
1025  {
1026  /*
1027  * With many arcs, use a sort-merge approach. Note that createarc()
1028  * will put new arcs onto the front of newState's chain, so it does
1029  * not break our walk through the sorted part of the chain.
1030  */
1031  struct arc *oa;
1032  struct arc *na;
1033 
1034  /*
1035  * Because we bypass newarc() in this code path, we'd better include a
1036  * cancel check.
1037  */
1038  if (CANCEL_REQUESTED(nfa->v->re))
1039  {
1040  NERR(REG_CANCEL);
1041  return;
1042  }
1043 
1044  sortouts(nfa, oldState);
1045  sortouts(nfa, newState);
1046  if (NISERR())
1047  return; /* might have failed to sort */
1048  oa = oldState->outs;
1049  na = newState->outs;
1050  while (oa != NULL && na != NULL)
1051  {
1052  struct arc *a = oa;
1053 
1054  switch (sortouts_cmp(&oa, &na))
1055  {
1056  case -1:
1057  /* newState does not have anything matching oa */
1058  oa = oa->outchain;
1059  createarc(nfa, a->type, a->co, newState, a->to);
1060  freearc(nfa, a);
1061  break;
1062  case 0:
1063  /* match, advance in both lists */
1064  oa = oa->outchain;
1065  na = na->outchain;
1066  /* ... and drop duplicate arc from oldState */
1067  freearc(nfa, a);
1068  break;
1069  case +1:
1070  /* advance only na; oa might have a match later */
1071  na = na->outchain;
1072  break;
1073  default:
1074  assert(NOTREACHED);
1075  }
1076  }
1077  while (oa != NULL)
1078  {
1079  /* newState does not have anything matching oa */
1080  struct arc *a = oa;
1081 
1082  oa = oa->outchain;
1083  createarc(nfa, a->type, a->co, newState, a->to);
1084  freearc(nfa, a);
1085  }
1086  }
1087 
1088  assert(oldState->nouts == 0);
1089  assert(oldState->outs == NULL);
1090 }
#define NISERR()
Definition: regc_nfa.c:39
int type
Definition: regguts.h:278
#define NERR(e)
Definition: regc_nfa.c:40
Definition: regguts.h:276
#define NOTREACHED
Definition: regguts.h:91
struct vars * v
Definition: regguts.h:328
#define BULK_ARC_OP_USE_SORT(nsrcarcs, ndestarcs)
Definition: regc_nfa.c:720
regex_t * re
Definition: regcomp.c:228
struct arc * outchain
Definition: regguts.h:282
#define assert(TEST)
Definition: imath.c:37
struct arc * outs
Definition: regguts.h:306
static void freearc(struct nfa *nfa, struct arc *victim)
Definition: regc_nfa.c:421
struct state * to
Definition: regguts.h:281
static void createarc(struct nfa *nfa, int t, color co, struct state *from, struct state *to)
Definition: regc_nfa.c:322
color co
Definition: regguts.h:279
#define NULL
Definition: c.h:226
static void sortouts(struct nfa *nfa, struct state *s)
Definition: regc_nfa.c:649
static int sortouts_cmp(const void *a, const void *b)
Definition: regc_nfa.c:691
int nouts
Definition: regguts.h:305
static void cparc(struct nfa *nfa, struct arc *oa, struct state *from, struct state *to)
Definition: regc_nfa.c:570
#define CANCEL_REQUESTED(re)
Definition: regguts.h:452
#define REG_CANCEL
Definition: regex.h:157
static void newarc ( struct nfa nfa,
int  t,
color  co,
struct state from,
struct state to 
)
static

Definition at line 276 of file regc_nfa.c.

References assert, CANCEL_REQUESTED, arc::co, createarc(), arc::from, arc::inchain, state::ins, NERR, state::nins, state::nouts, NULL, arc::outchain, state::outs, vars::re, REG_CANCEL, arc::to, arc::type, and nfa::v.

Referenced by cloneouts(), colorcomplement(), cparc(), dupnfa(), newnfa(), okcolors(), pullback(), pushfwd(), rainbow(), subcolorcvec(), subcoloronechr(), and subcoloronerow().

281 {
282  struct arc *a;
283 
284  assert(from != NULL && to != NULL);
285 
286  /*
287  * This is a handy place to check for operation cancel during regex
288  * compilation, since no code path will go very long without making a new
289  * state or arc.
290  */
291  if (CANCEL_REQUESTED(nfa->v->re))
292  {
293  NERR(REG_CANCEL);
294  return;
295  }
296 
297  /* check for duplicate arc, using whichever chain is shorter */
298  if (from->nouts <= to->nins)
299  {
300  for (a = from->outs; a != NULL; a = a->outchain)
301  if (a->to == to && a->co == co && a->type == t)
302  return;
303  }
304  else
305  {
306  for (a = to->ins; a != NULL; a = a->inchain)
307  if (a->from == from && a->co == co && a->type == t)
308  return;
309  }
310 
311  /* no dup, so create the arc */
312  createarc(nfa, t, co, from, to);
313 }
struct state * from
Definition: regguts.h:280
int type
Definition: regguts.h:278
#define NERR(e)
Definition: regc_nfa.c:40
Definition: regguts.h:276
int nins
Definition: regguts.h:303
struct vars * v
Definition: regguts.h:328
regex_t * re
Definition: regcomp.c:228
struct arc * outchain
Definition: regguts.h:282
#define assert(TEST)
Definition: imath.c:37
struct arc * outs
Definition: regguts.h:306
struct state * to
Definition: regguts.h:281
static void createarc(struct nfa *nfa, int t, color co, struct state *from, struct state *to)
Definition: regc_nfa.c:322
color co
Definition: regguts.h:279
#define NULL
Definition: c.h:226
int nouts
Definition: regguts.h:305
struct arc * inchain
Definition: regguts.h:285
#define CANCEL_REQUESTED(re)
Definition: regguts.h:452
struct arc * ins
Definition: regguts.h:304
#define REG_CANCEL
Definition: regex.h:157
static struct state* newfstate ( struct nfa nfa,
int  flag 
)
static

Definition at line 188 of file regc_nfa.c.

References state::flag, newstate(), and NULL.

Referenced by newnfa().

189 {
190  struct state *s;
191 
192  s = newstate(nfa);
193  if (s != NULL)
194  s->flag = (char) flag;
195  return s;
196 }
char * flag(int b)
Definition: test-ctype.c:33
char flag
Definition: regguts.h:302
static struct state * newstate(struct nfa *nfa)
Definition: regc_nfa.c:124
#define NULL
Definition: c.h:226
Definition: regguts.h:298
static struct nfa* newnfa ( struct vars v,
struct colormap cm,
struct nfa parent 
)
static

Definition at line 47 of file regc_nfa.c.

References nfa::bos, nfa::cm, COLORLESS, nfa::eos, ERR, nfa::final, nfa::free, freenfa(), nfa::init, ISERR, MALLOC, newarc(), newfstate(), newstate(), nfa::nstates, NULL, nfa::parent, PLAIN, nfa::post, nfa::pre, rainbow(), REG_ESPACE, nfa::slast, nfa::states, and nfa::v.

50 {
51  struct nfa *nfa;
52 
53  nfa = (struct nfa *) MALLOC(sizeof(struct nfa));
54  if (nfa == NULL)
55  {
56  ERR(REG_ESPACE);
57  return NULL;
58  }
59 
60  nfa->states = NULL;
61  nfa->slast = NULL;
62  nfa->free = NULL;
63  nfa->nstates = 0;
64  nfa->cm = cm;
65  nfa->v = v;
66  nfa->bos[0] = nfa->bos[1] = COLORLESS;
67  nfa->eos[0] = nfa->eos[1] = COLORLESS;
68  nfa->parent = parent; /* Precedes newfstate so parent is valid. */
69  nfa->post = newfstate(nfa, '@'); /* number 0 */
70  nfa->pre = newfstate(nfa, '>'); /* number 1 */
71 
72  nfa->init = newstate(nfa); /* may become invalid later */
73  nfa->final = newstate(nfa);
74  if (ISERR())
75  {
76  freenfa(nfa);
77  return NULL;
78  }
79  rainbow(nfa, nfa->cm, PLAIN, COLORLESS, nfa->pre, nfa->init);
80  newarc(nfa, '^', 1, nfa->pre, nfa->init);
81  newarc(nfa, '^', 0, nfa->pre, nfa->init);
82  rainbow(nfa, nfa->cm, PLAIN, COLORLESS, nfa->final, nfa->post);
83  newarc(nfa, '$', 1, nfa->final, nfa->post);
84  newarc(nfa, '$', 0, nfa->final, nfa->post);
85 
86  if (ISERR())
87  {
88  freenfa(nfa);
89  return NULL;
90  }
91  return nfa;
92 }
#define ERR
Definition: _int.h:146
struct state * final
Definition: regguts.h:319
#define REG_ESPACE
Definition: regex.h:149
struct state * states
Definition: regguts.h:322
color eos[2]
Definition: regguts.h:327
struct state * post
Definition: regguts.h:320
Definition: regguts.h:315
#define ISERR()
Definition: regcomp.c:264
#define MALLOC(n)
Definition: regcustom.h:53
static struct state * newfstate(struct nfa *nfa, int flag)
Definition: regc_nfa.c:188
struct vars * v
Definition: regguts.h:328
struct state * free
Definition: regguts.h:324
color bos[2]
Definition: regguts.h:326
struct colormap * cm
Definition: regguts.h:325
int nstates
Definition: regguts.h:321
static void freenfa(struct nfa *nfa)
Definition: regc_nfa.c:98
static void rainbow(struct nfa *nfa, struct colormap *cm, int type, color but, struct state *from, struct state *to)
Definition: regc_color.c:1017
static struct state * newstate(struct nfa *nfa)
Definition: regc_nfa.c:124
#define PLAIN
Definition: regcomp.c:278
#define NULL
Definition: c.h:226
static void newarc(struct nfa *nfa, int t, color co, struct state *from, struct state *to)
Definition: regc_nfa.c:276
struct state * slast
Definition: regguts.h:323
struct nfa * parent
Definition: regguts.h:329
struct state * init
Definition: regguts.h:318
#define COLORLESS
Definition: regguts.h:137
struct state * pre
Definition: regguts.h:317
static struct state* newstate ( struct nfa nfa)
static

Definition at line 124 of file regc_nfa.c.

References assert, CANCEL_REQUESTED, state::flag, state::free, nfa::free, state::ins, MALLOC, NERR, arcbatch::next, state::next, state::nins, state::no, state::noas, state::nouts, nfa::nstates, NULL, state::oas, state::outs, state::prev, vars::re, REG_CANCEL, REG_ESPACE, REG_ETOOBIG, REG_MAX_COMPILE_SPACE, nfa::slast, vars::spaceused, nfa::states, state::tmp, and nfa::v.

Referenced by breakconstraintloop(), clonesuccessorstates(), duptraverse(), newfstate(), newnfa(), pull(), and push().

125 {
126  struct state *s;
127 
128  /*
129  * This is a handy place to check for operation cancel during regex
130  * compilation, since no code path will go very long without making a new
131  * state or arc.
132  */
133  if (CANCEL_REQUESTED(nfa->v->re))
134  {
135  NERR(REG_CANCEL);
136  return NULL;
137  }
138 
139  if (nfa->free != NULL)
140  {
141  s = nfa->free;
142  nfa->free = s->next;
143  }
144  else
145  {
146  if (nfa->v->spaceused >= REG_MAX_COMPILE_SPACE)
147  {
148  NERR(REG_ETOOBIG);
149  return NULL;
150  }
151  s = (struct state *) MALLOC(sizeof(struct state));
152  if (s == NULL)
153  {
154  NERR(REG_ESPACE);
155  return NULL;
156  }
157  nfa->v->spaceused += sizeof(struct state);
158  s->oas.next = NULL;
159  s->free = NULL;
160  s->noas = 0;
161  }
162 
163  assert(nfa->nstates >= 0);
164  s->no = nfa->nstates++;
165  s->flag = 0;
166  if (nfa->states == NULL)
167  nfa->states = s;
168  s->nins = 0;
169  s->ins = NULL;
170  s->nouts = 0;
171  s->outs = NULL;
172  s->tmp = NULL;
173  s->next = NULL;
174  if (nfa->slast != NULL)
175  {
176  assert(nfa->slast->next == NULL);
177  nfa->slast->next = s;
178  }
179  s->prev = nfa->slast;
180  nfa->slast = s;
181  return s;
182 }
int no
Definition: regguts.h:300
#define NERR(e)
Definition: regc_nfa.c:40
int noas
Definition: regguts.h:312
#define REG_ESPACE
Definition: regex.h:149
struct state * states
Definition: regguts.h:322
int nins
Definition: regguts.h:303
#define MALLOC(n)
Definition: regcustom.h:53
#define REG_MAX_COMPILE_SPACE
Definition: regguts.h:383
struct arc * free
Definition: regguts.h:307
struct vars * v
Definition: regguts.h:328
struct arcbatch * next
Definition: regguts.h:293
regex_t * re
Definition: regcomp.c:228
struct state * free
Definition: regguts.h:324
#define assert(TEST)
Definition: imath.c:37
struct state * next
Definition: regguts.h:309
int nstates
Definition: regguts.h:321
struct arc * outs
Definition: regguts.h:306
char flag
Definition: regguts.h:302
struct state * tmp
Definition: regguts.h:308
#define NULL
Definition: c.h:226
Definition: regguts.h:298
#define REG_ETOOBIG
Definition: regex.h:155
struct state * prev
Definition: regguts.h:310
struct state * slast
Definition: regguts.h:323
int nouts
Definition: regguts.h:305
#define CANCEL_REQUESTED(re)
Definition: regguts.h:452
size_t spaceused
Definition: regcomp.c:256
struct arcbatch oas
Definition: regguts.h:311
struct arc * ins
Definition: regguts.h:304
#define REG_CANCEL
Definition: regex.h:157
static long optimize ( struct nfa nfa,
FILE *  f 
)
static

Definition at line 1437 of file regc_nfa.c.

References analyze(), cleanup(), dumpnfa(), fixconstraintloops(), fixempties(), NULL, pullback(), pushfwd(), and verbose.

1439 {
1440 #ifdef REG_DEBUG
1441  int verbose = (f != NULL) ? 1 : 0;
1442 
1443  if (verbose)
1444  fprintf(f, "\ninitial cleanup:\n");
1445 #endif
1446  cleanup(nfa); /* may simplify situation */
1447 #ifdef REG_DEBUG
1448  if (verbose)
1449  dumpnfa(nfa, f);
1450  if (verbose)
1451  fprintf(f, "\nempties:\n");
1452 #endif
1453  fixempties(nfa, f); /* get rid of EMPTY arcs */
1454 #ifdef REG_DEBUG
1455  if (verbose)
1456  fprintf(f, "\nconstraints:\n");
1457 #endif
1458  fixconstraintloops(nfa, f); /* get rid of constraint loops */
1459  pullback(nfa, f); /* pull back constraints backward */
1460  pushfwd(nfa, f); /* push fwd constraints forward */
1461 #ifdef REG_DEBUG
1462  if (verbose)
1463  fprintf(f, "\nfinal cleanup:\n");
1464 #endif
1465  cleanup(nfa); /* final tidying */
1466 #ifdef REG_DEBUG
1467  if (verbose)
1468  dumpnfa(nfa, f);
1469 #endif
1470  return analyze(nfa); /* and analysis */
1471 }
static void fixconstraintloops(struct nfa *nfa, FILE *f)
Definition: regc_nfa.c:2163
static void fixempties(struct nfa *nfa, FILE *f)
Definition: regc_nfa.c:1869
static void pullback(struct nfa *nfa, FILE *f)
Definition: regc_nfa.c:1477
static void dumpnfa(struct nfa *nfa, FILE *f)
Definition: regc_nfa.c:2965
static void pushfwd(struct nfa *nfa, FILE *f)
Definition: regc_nfa.c:1644
static int verbose
Definition: pg_basebackup.c:87
#define NULL
Definition: c.h:226
static void cleanup(struct nfa *nfa)
Definition: regc_nfa.c:2729
static long analyze(struct nfa *nfa)
Definition: regc_nfa.c:2816
static int pull ( struct nfa nfa,
struct arc con,
struct state **  intermediates 
)
static

Definition at line 1557 of file regc_nfa.c.

References assert, combine(), COMPATIBLE, copyins(), cparc(), state::flag, freearc(), arc::from, arc::inchain, INCOMPATIBLE, state::ins, moveins(), newstate(), state::nins, NISERR, NOTREACHED, state::nouts, NULL, state::outs, SATISFIED, state::tmp, and arc::to.

Referenced by pullback().

1560 {
1561  struct state *from = con->from;
1562  struct state *to = con->to;
1563  struct arc *a;
1564  struct arc *nexta;
1565  struct state *s;
1566 
1567  assert(from != to); /* should have gotten rid of this earlier */
1568  if (from->flag) /* can't pull back beyond start */
1569  return 0;
1570  if (from->nins == 0)
1571  { /* unreachable */
1572  freearc(nfa, con);
1573  return 1;
1574  }
1575 
1576  /*
1577  * First, clone from state if necessary to avoid other outarcs. This may
1578  * seem wasteful, but it simplifies the logic, and we'll get rid of the
1579  * clone state again at the bottom.
1580  */
1581  if (from->nouts > 1)
1582  {
1583  s = newstate(nfa);
1584  if (NISERR())
1585  return 0;
1586  copyins(nfa, from, s); /* duplicate inarcs */
1587  cparc(nfa, con, s, to); /* move constraint arc */
1588  freearc(nfa, con);
1589  if (NISERR())
1590  return 0;
1591  from = s;
1592  con = from->outs;
1593  }
1594  assert(from->nouts == 1);
1595 
1596  /* propagate the constraint into the from state's inarcs */
1597  for (a = from->ins; a != NULL && !NISERR(); a = nexta)
1598  {
1599  nexta = a->inchain;
1600  switch (combine(con, a))
1601  {
1602  case INCOMPATIBLE: /* destroy the arc */
1603  freearc(nfa, a);
1604  break;
1605  case SATISFIED: /* no action needed */
1606  break;
1607  case COMPATIBLE: /* swap the two arcs, more or less */
1608  /* need an intermediate state, but might have one already */
1609  for (s = *intermediates; s != NULL; s = s->tmp)
1610  {
1611  assert(s->nins > 0 && s->nouts > 0);
1612  if (s->ins->from == a->from && s->outs->to == to)
1613  break;
1614  }
1615  if (s == NULL)
1616  {
1617  s = newstate(nfa);
1618  if (NISERR())
1619  return 0;
1620  s->tmp = *intermediates;
1621  *intermediates = s;
1622  }
1623  cparc(nfa, con, a->from, s);
1624  cparc(nfa, a, s, to);
1625  freearc(nfa, a);
1626  break;
1627  default:
1628  assert(NOTREACHED);
1629  break;
1630  }
1631  }
1632 
1633  /* remaining inarcs, if any, incorporate the constraint */
1634  moveins(nfa, from, to);
1635  freearc(nfa, con);
1636  /* from state is now useless, but we leave it to pullback() to clean up */
1637  return 1;
1638 }
#define NISERR()
Definition: regc_nfa.c:39
struct state * from
Definition: regguts.h:280
Definition: regguts.h:276
int nins
Definition: regguts.h:303
static void copyins(struct nfa *nfa, struct state *oldState, struct state *newState)
Definition: regc_nfa.c:828
#define NOTREACHED
Definition: regguts.h:91
#define SATISFIED
Definition: regcomp.c:161
#define assert(TEST)
Definition: imath.c:37
struct arc * outs
Definition: regguts.h:306
static void freearc(struct nfa *nfa, struct arc *victim)
Definition: regc_nfa.c:421
char flag
Definition: regguts.h:302
struct state * tmp
Definition: regguts.h:308
static void moveins(struct nfa *nfa, struct state *oldState, struct state *newState)
Definition: regc_nfa.c:736
struct state * to
Definition: regguts.h:281
#define COMPATIBLE
Definition: regcomp.c:162
static struct state * newstate(struct nfa *nfa)
Definition: regc_nfa.c:124
#define NULL
Definition: c.h:226
Definition: regguts.h:298
int nouts
Definition: regguts.h:305
static void cparc(struct nfa *nfa, struct arc *oa, struct state *from, struct state *to)
Definition: regc_nfa.c:570
struct arc * inchain
Definition: regguts.h:285
#define INCOMPATIBLE
Definition: regcomp.c:160
struct arc * ins
Definition: regguts.h:304
static int combine(struct arc *con, struct arc *a)
Definition: regc_nfa.c:1815
static void pullback ( struct nfa nfa,
FILE *  f 
)
static

Definition at line 1477 of file regc_nfa.c.

References assert, BEHIND, nfa::bos, arc::co, dropstate(), dumpnfa(), state::flag, freearc(), arc::from, newarc(), state::next, state::nins, NISERR, state::nouts, NULL, arc::outchain, state::outs, PLAIN, nfa::pre, progress, pull(), nfa::states, state::tmp, arc::to, and arc::type.

Referenced by optimize().

1479 {
1480  struct state *s;
1481  struct state *nexts;
1482  struct arc *a;
1483  struct arc *nexta;
1484  struct state *intermediates;
1485  int progress;
1486 
1487  /* find and pull until there are no more */
1488  do
1489  {
1490  progress = 0;
1491  for (s = nfa->states; s != NULL && !NISERR(); s = nexts)
1492  {
1493  nexts = s->next;
1494  intermediates = NULL;
1495  for (a = s->outs; a != NULL && !NISERR(); a = nexta)
1496  {
1497  nexta = a->outchain;
1498  if (a->type == '^' || a->type == BEHIND)
1499  if (pull(nfa, a, &intermediates))
1500  progress = 1;
1501  }
1502  /* clear tmp fields of intermediate states created here */
1503  while (intermediates != NULL)
1504  {
1505  struct state *ns = intermediates->tmp;
1506 
1507  intermediates->tmp = NULL;
1508  intermediates = ns;
1509  }
1510  /* if s is now useless, get rid of it */
1511  if ((s->nins == 0 || s->nouts == 0) && !s->flag)
1512  dropstate(nfa, s);
1513  }
1514  if (progress && f != NULL)
1515  dumpnfa(nfa, f);
1516  } while (progress && !NISERR());
1517  if (NISERR())
1518  return;
1519 
1520  /*
1521  * Any ^ constraints we were able to pull to the start state can now be
1522  * replaced by PLAIN arcs referencing the BOS or BOL colors. There should
1523  * be no other ^ or BEHIND arcs left in the NFA, though we do not check
1524  * that here (compact() will fail if so).
1525  */
1526  for (a = nfa->pre->outs; a != NULL; a = nexta)
1527  {
1528  nexta = a->outchain;
1529  if (a->type == '^')
1530  {
1531  assert(a->co == 0 || a->co == 1);
1532  newarc(nfa, PLAIN, nfa->bos[a->co], a->from, a->to);
1533  freearc(nfa, a);
1534  }
1535  }
1536 }
#define NISERR()
Definition: regc_nfa.c:39
struct state * from
Definition: regguts.h:280
int type
Definition: regguts.h:278
Definition: regguts.h:276
struct state * states
Definition: regguts.h:322
int nins
Definition: regguts.h:303
static void dumpnfa(struct nfa *nfa, FILE *f)
Definition: regc_nfa.c:2965
#define BEHIND
Definition: regcomp.c:288
struct arc * outchain
Definition: regguts.h:282
color bos[2]
Definition: regguts.h:326
#define assert(TEST)
Definition: imath.c:37
struct state * next
Definition: regguts.h:309
struct arc * outs
Definition: regguts.h:306
static void freearc(struct nfa *nfa, struct arc *victim)
Definition: regc_nfa.c:421
char flag
Definition: regguts.h:302
struct state * tmp
Definition: regguts.h:308
int progress
Definition: pgbench.c:172
struct state * to
Definition: regguts.h:281
#define PLAIN
Definition: regcomp.c:278
color co
Definition: regguts.h:279
#define NULL
Definition: c.h:226
Definition: regguts.h:298
static int pull(struct nfa *nfa, struct arc *con, struct state **intermediates)
Definition: regc_nfa.c:1557
static void newarc(struct nfa *nfa, int t, color co, struct state *from, struct state *to)
Definition: regc_nfa.c:276
static void dropstate(struct nfa *nfa, struct state *s)
Definition: regc_nfa.c:202
int nouts
Definition: regguts.h:305
struct state * pre
Definition: regguts.h:317
static int push ( struct nfa nfa,
struct arc con,
struct state **  intermediates 
)
static

Definition at line 1724 of file regc_nfa.c.

References assert, combine(), COMPATIBLE, copyouts(), cparc(), state::flag, freearc(), arc::from, INCOMPATIBLE, state::ins, moveouts(), newstate(), state::nins, NISERR, NOTREACHED, state::nouts, NULL, arc::outchain, state::outs, SATISFIED, state::tmp, and arc::to.

Referenced by pushfwd().

1727 {
1728  struct state *from = con->from;
1729  struct state *to = con->to;
1730  struct arc *a;
1731  struct arc *nexta;
1732  struct state *s;
1733 
1734  assert(to != from); /* should have gotten rid of this earlier */
1735  if (to->flag) /* can't push forward beyond end */
1736  return 0;
1737  if (to->nouts == 0)
1738  { /* dead end */
1739  freearc(nfa, con);
1740  return 1;
1741  }
1742 
1743  /*
1744  * First, clone to state if necessary to avoid other inarcs. This may
1745  * seem wasteful, but it simplifies the logic, and we'll get rid of the
1746  * clone state again at the bottom.
1747  */
1748  if (to->nins > 1)
1749  {
1750  s = newstate(nfa);
1751  if (NISERR())
1752  return 0;
1753  copyouts(nfa, to, s); /* duplicate outarcs */
1754  cparc(nfa, con, from, s); /* move constraint arc */
1755  freearc(nfa, con);
1756  if (NISERR())
1757  return 0;
1758  to = s;
1759  con = to->ins;
1760  }
1761  assert(to->nins == 1);
1762 
1763  /* propagate the constraint into the to state's outarcs */
1764  for (a = to->outs; a != NULL && !NISERR(); a = nexta)
1765  {
1766  nexta = a->outchain;
1767  switch (combine(con, a))
1768  {
1769  case INCOMPATIBLE: /* destroy the arc */
1770  freearc(nfa, a);
1771  break;
1772  case SATISFIED: /* no action needed */
1773  break;
1774  case COMPATIBLE: /* swap the two arcs, more or less */
1775  /* need an intermediate state, but might have one already */
1776  for (s = *intermediates; s != NULL; s = s->tmp)
1777  {
1778  assert(s->nins > 0 && s->nouts > 0);
1779  if (s->ins->from == from && s->outs->to == a->to)
1780  break;
1781  }
1782  if (s == NULL)
1783  {
1784  s = newstate(nfa);
1785  if (NISERR())
1786  return 0;
1787  s->tmp = *intermediates;
1788  *intermediates = s;
1789  }
1790  cparc(nfa, con, s, a->to);
1791  cparc(nfa, a, from, s);
1792  freearc(nfa, a);
1793  break;
1794  default:
1795  assert(NOTREACHED);
1796  break;
1797  }
1798  }
1799 
1800  /* remaining outarcs, if any, incorporate the constraint */
1801  moveouts(nfa, to, from);
1802  freearc(nfa, con);
1803  /* to state is now useless, but we leave it to pushfwd() to clean up */
1804  return 1;
1805 }
#define NISERR()
Definition: regc_nfa.c:39
struct state * from
Definition: regguts.h:280
static void moveouts(struct nfa *nfa, struct state *oldState, struct state *newState)
Definition: regc_nfa.c:1007
Definition: regguts.h:276
int nins
Definition: regguts.h:303
#define NOTREACHED
Definition: regguts.h:91
#define SATISFIED
Definition: regcomp.c:161
static void copyouts(struct nfa *nfa, struct state *oldState, struct state *newState)
Definition: regc_nfa.c:1096
struct arc * outchain
Definition: regguts.h:282
#define assert(TEST)
Definition: imath.c:37
struct arc * outs
Definition: regguts.h:306
static void freearc(struct nfa *nfa, struct arc *victim)
Definition: regc_nfa.c:421
char flag
Definition: regguts.h:302
struct state * tmp
Definition: regguts.h:308
struct state * to
Definition: regguts.h:281
#define COMPATIBLE
Definition: regcomp.c:162
static struct state * newstate(struct nfa *nfa)
Definition: regc_nfa.c:124
#define NULL
Definition: c.h:226
Definition: regguts.h:298
int nouts
Definition: regguts.h:305
static void cparc(struct nfa *nfa, struct arc *oa, struct state *from, struct state *to)
Definition: regc_nfa.c:570
#define INCOMPATIBLE
Definition: regcomp.c:160
struct arc * ins
Definition: regguts.h:304
static int combine(struct arc *con, struct arc *a)
Definition: regc_nfa.c:1815
static void pushfwd ( struct nfa nfa,
FILE *  f 
)
static

Definition at line 1644 of file regc_nfa.c.

References AHEAD, assert, arc::co, dropstate(), dumpnfa(), nfa::eos, state::flag, freearc(), arc::from, arc::inchain, state::ins, newarc(), state::next, state::nins, NISERR, state::nouts, NULL, PLAIN, nfa::post, progress, push(), nfa::states, state::tmp, arc::to, and arc::type.

Referenced by optimize().

1646 {
1647  struct state *s;
1648  struct state *nexts;
1649  struct arc *a;
1650  struct arc *nexta;
1651  struct state *intermediates;
1652  int progress;
1653 
1654  /* find and push until there are no more */
1655  do
1656  {
1657  progress = 0;
1658  for (s = nfa->states; s != NULL && !NISERR(); s = nexts)
1659  {
1660  nexts = s->next;
1661  intermediates = NULL;
1662  for (a = s->ins; a != NULL && !NISERR(); a = nexta)
1663  {
1664  nexta = a->inchain;
1665  if (a->type == '$' || a->type == AHEAD)
1666  if (push(nfa, a, &intermediates))
1667  progress = 1;
1668  }
1669  /* clear tmp fields of intermediate states created here */
1670  while (intermediates != NULL)
1671  {
1672  struct state *ns = intermediates->tmp;
1673 
1674  intermediates->tmp = NULL;
1675  intermediates = ns;
1676  }
1677  /* if s is now useless, get rid of it */
1678  if ((s->nins == 0 || s->nouts == 0) && !s->flag)
1679  dropstate(nfa, s);
1680  }
1681  if (progress && f != NULL)
1682  dumpnfa(nfa, f);
1683  } while (progress && !NISERR());
1684  if (NISERR())
1685  return;
1686 
1687  /*
1688  * Any $ constraints we were able to push to the post state can now be
1689  * replaced by PLAIN arcs referencing the EOS or EOL colors. There should
1690  * be no other $ or AHEAD arcs left in the NFA, though we do not check
1691  * that here (compact() will fail if so).
1692  */
1693  for (a = nfa->post->ins; a != NULL; a = nexta)
1694  {
1695  nexta = a->inchain;
1696  if (a->type == '$')
1697  {
1698  assert(a->co == 0 || a->co == 1);
1699  newarc(nfa, PLAIN, nfa->eos[a->co], a->from, a->to);
1700  freearc(nfa, a);
1701  }
1702  }
1703 }
#define NISERR()
Definition: regc_nfa.c:39
struct state * from
Definition: regguts.h:280
int type
Definition: regguts.h:278
static int push(struct nfa *nfa, struct arc *con, struct state **intermediates)
Definition: regc_nfa.c:1724
Definition: regguts.h:276
struct state * states
Definition: regguts.h:322
int nins
Definition: regguts.h:303
color eos[2]
Definition: regguts.h:327
struct state * post
Definition: regguts.h:320
static void dumpnfa(struct nfa *nfa, FILE *f)
Definition: regc_nfa.c:2965
#define assert(TEST)
Definition: imath.c:37
struct state * next
Definition: regguts.h:309
static void freearc(struct nfa *nfa, struct arc *victim)
Definition: regc_nfa.c:421
char flag
Definition: regguts.h:302
struct state * tmp
Definition: regguts.h:308
int progress
Definition: pgbench.c:172
struct state * to
Definition: regguts.h:281
#define PLAIN
Definition: regcomp.c:278
color co
Definition: regguts.h:279
#define NULL
Definition: c.h:226
Definition: regguts.h:298
static void newarc(struct nfa *nfa, int t, color co, struct state *from, struct state *to)
Definition: regc_nfa.c:276
static void dropstate(struct nfa *nfa, struct state *s)
Definition: regc_nfa.c:202
int nouts
Definition: regguts.h:305
struct arc * inchain
Definition: regguts.h:285
#define AHEAD
Definition: regcomp.c:287
struct arc * ins
Definition: regguts.h:304
static struct state* single_color_transition ( struct state s1,
struct state s2 
)
static

Definition at line 1368 of file regc_nfa.c.

References EMPTY, arc::from, state::ins, state::nins, state::nouts, NULL, arc::outchain, state::outs, PLAIN, s1, arc::to, and arc::type.

1369 {
1370  struct arc *a;
1371 
1372  /* Ignore leading EMPTY arc, if any */
1373  if (s1->nouts == 1 && s1->outs->type == EMPTY)
1374  s1 = s1->outs->to;
1375  /* Likewise for any trailing EMPTY arc */
1376  if (s2->nins == 1 && s2->ins->type == EMPTY)
1377  s2 = s2->ins->from;
1378  /* Perhaps we could have a single-state loop in between, if so reject */
1379  if (s1 == s2)
1380  return NULL;
1381  /* s1 must have at least one outarc... */
1382  if (s1->outs == NULL)
1383  return NULL;
1384  /* ... and they must all be PLAIN arcs to s2 */
1385  for (a = s1->outs; a != NULL; a = a->outchain)
1386  {
1387  if (a->type != PLAIN || a->to != s2)
1388  return NULL;
1389  }
1390  /* OK, return s1 as the possessor of the relevant outarcs */
1391  return s1;
1392 }
struct state * from
Definition: regguts.h:280
int type
Definition: regguts.h:278
Definition: regguts.h:276
int nins
Definition: regguts.h:303
char * s1
struct arc * outchain
Definition: regguts.h:282
struct arc * outs
Definition: regguts.h:306
struct state * to
Definition: regguts.h:281
#define PLAIN
Definition: regcomp.c:278
#define NULL
Definition: c.h:226
#define EMPTY
Definition: regcomp.c:276
int nouts
Definition: regguts.h:305
struct arc * ins
Definition: regguts.h:304
static void sortins ( struct nfa nfa,
struct state s 
)
static

Definition at line 582 of file regc_nfa.c.

References assert, FREE, i, arc::inchain, arc::inchainRev, state::ins, MALLOC, NERR, state::nins, NULL, qsort, REG_ESPACE, and sortins_cmp().

Referenced by copyins(), mergeins(), and moveins().

584 {
585  struct arc **sortarray;
586  struct arc *a;
587  int n = s->nins;
588  int i;
589 
590  if (n <= 1)
591  return; /* nothing to do */
592  /* make an array of arc pointers ... */
593  sortarray = (struct arc **) MALLOC(n * sizeof(struct arc *));
594  if (sortarray == NULL)
595  {
596  NERR(REG_ESPACE);
597  return;
598  }
599  i = 0;
600  for (a = s->ins; a != NULL; a = a->inchain)
601  sortarray[i++] = a;
602  assert(i == n);
603  /* ... sort the array */
604  qsort(sortarray, n, sizeof(struct arc *), sortins_cmp);
605  /* ... and rebuild arc list in order */
606  /* it seems worth special-casing first and last items to simplify loop */
607  a = sortarray[0];
608  s->ins = a;
609  a->inchain = sortarray[1];
610  a->inchainRev = NULL;
611  for (i = 1; i < n - 1; i++)
612  {
613  a = sortarray[i];
614  a->inchain = sortarray[i + 1];
615  a->inchainRev = sortarray[i - 1];
616  }
617  a = sortarray[i];
618  a->inchain = NULL;
619  a->inchainRev = sortarray[i - 1];
620  FREE(sortarray);
621 }
#define NERR(e)
Definition: regc_nfa.c:40
#define REG_ESPACE
Definition: regex.h:149
Definition: regguts.h:276
int nins
Definition: regguts.h:303
#define FREE(p)
Definition: regcustom.h:54
#define MALLOC(n)
Definition: regcustom.h:53
struct arc * inchainRev
Definition: regguts.h:286
#define assert(TEST)
Definition: imath.c:37
static int sortins_cmp(const void *a, const void *b)
Definition: regc_nfa.c:624
#define NULL
Definition: c.h:226
struct arc * inchain
Definition: regguts.h:285
int i
#define qsort(a, b, c, d)
Definition: port.h:440
struct arc * ins
Definition: regguts.h:304
static int sortins_cmp ( const void *  a,
const void *  b 
)
static

Definition at line 624 of file regc_nfa.c.

References arc::co, arc::from, state::no, and arc::type.

Referenced by copyins(), mergeins(), moveins(), and sortins().

625 {
626  const struct arc *aa = *((const struct arc * const *) a);
627  const struct arc *bb = *((const struct arc * const *) b);
628 
629  /* we check the fields in the order they are most likely to be different */
630  if (aa->from->no < bb->from->no)
631  return -1;
632  if (aa->from->no > bb->from->no)
633  return 1;
634  if (aa->co < bb->co)
635  return -1;
636  if (aa->co > bb->co)
637  return 1;
638  if (aa->type < bb->type)
639  return -1;
640  if (aa->type > bb->type)
641  return 1;
642  return 0;
643 }
int no
Definition: regguts.h:300
struct state * from
Definition: regguts.h:280
int type
Definition: regguts.h:278
Definition: regguts.h:276
color co
Definition: regguts.h:279
static void sortouts ( struct nfa nfa,
struct state s 
)
static

Definition at line 649 of file regc_nfa.c.

References assert, FREE, i, MALLOC, NERR, state::nouts, NULL, arc::outchain, arc::outchainRev, state::outs, qsort, REG_ESPACE, and sortouts_cmp().

Referenced by copyouts(), and moveouts().

651 {
652  struct arc **sortarray;
653  struct arc *a;
654  int n = s->nouts;
655  int i;
656 
657  if (n <= 1)
658  return; /* nothing to do */
659  /* make an array of arc pointers ... */
660  sortarray = (struct arc **) MALLOC(n * sizeof(struct arc *));
661  if (sortarray == NULL)
662  {
663  NERR(REG_ESPACE);
664  return;
665  }
666  i = 0;
667  for (a = s->outs; a != NULL; a = a->outchain)
668  sortarray[i++] = a;
669  assert(i == n);
670  /* ... sort the array */
671  qsort(sortarray, n, sizeof(struct arc *), sortouts_cmp);
672  /* ... and rebuild arc list in order */
673  /* it seems worth special-casing first and last items to simplify loop */
674  a = sortarray[0];
675  s->outs = a;
676  a->outchain = sortarray[1];
677  a->outchainRev = NULL;
678  for (i = 1; i < n - 1; i++)
679  {
680  a = sortarray[i];
681  a->outchain = sortarray[i + 1];
682  a->outchainRev = sortarray[i - 1];
683  }
684  a = sortarray[i];
685  a->outchain = NULL;
686  a->outchainRev = sortarray[i - 1];
687  FREE(sortarray);
688 }
#define NERR(e)
Definition: regc_nfa.c:40
#define REG_ESPACE
Definition: regex.h:149
Definition: regguts.h:276
#define FREE(p)
Definition: regcustom.h:54
#define MALLOC(n)
Definition: regcustom.h:53
struct arc * outchain
Definition: regguts.h:282
#define assert(TEST)
Definition: imath.c:37
struct arc * outs
Definition: regguts.h:306
#define NULL
Definition: c.h:226
static int sortouts_cmp(const void *a, const void *b)
Definition: regc_nfa.c:691
int nouts
Definition: regguts.h:305
int i
#define qsort(a, b, c, d)
Definition: port.h:440
struct arc * outchainRev
Definition: regguts.h:283
static int sortouts_cmp ( const void *  a,
const void *  b 
)
static

Definition at line 691 of file regc_nfa.c.

References arc::co, state::no, arc::to, and arc::type.

Referenced by copyouts(), moveouts(), and sortouts().

692 {
693  const struct arc *aa = *((const struct arc * const *) a);
694  const struct arc *bb = *((const struct arc * const *) b);
695 
696  /* we check the fields in the order they are most likely to be different */
697  if (aa->to->no < bb->to->no)
698  return -1;
699  if (aa->to->no > bb->to->no)
700  return 1;
701  if (aa->co < bb->co)
702  return -1;
703  if (aa->co > bb->co)
704  return 1;
705  if (aa->type < bb->type)
706  return -1;
707  if (aa->type > bb->type)
708  return 1;
709  return 0;
710 }
int no
Definition: regguts.h:300
int type
Definition: regguts.h:278
Definition: regguts.h:276
struct state * to
Definition: regguts.h:281
color co
Definition: regguts.h:279
static void specialcolors ( struct nfa nfa)
static

Definition at line 1398 of file regc_nfa.c.

References assert, nfa::bos, nfa::cm, COLORLESS, nfa::eos, NULL, nfa::parent, and pseudocolor().

1399 {
1400  /* false colors for BOS, BOL, EOS, EOL */
1401  if (nfa->parent == NULL)
1402  {
1403  nfa->bos[0] = pseudocolor(nfa->cm);
1404  nfa->bos[1] = pseudocolor(nfa->cm);
1405  nfa->eos[0] = pseudocolor(nfa->cm);
1406  nfa->eos[1] = pseudocolor(nfa->cm);
1407  }
1408  else
1409  {
1410  assert(nfa->parent->bos[0] != COLORLESS);
1411  nfa->bos[0] = nfa->parent->bos[0];
1412  assert(nfa->parent->bos[1] != COLORLESS);
1413  nfa->bos[1] = nfa->parent->bos[1];
1414  assert(nfa->parent->eos[0] != COLORLESS);
1415  nfa->eos[0] = nfa->parent->eos[0];
1416  assert(nfa->parent->eos[1] != COLORLESS);
1417  nfa->eos[1] = nfa->parent->eos[1];
1418  }
1419 }
color eos[2]
Definition: regguts.h:327
static color pseudocolor(struct colormap *cm)
Definition: regc_color.c:312
color bos[2]
Definition: regguts.h:326
#define assert(TEST)
Definition: imath.c:37
struct colormap * cm
Definition: regguts.h:325
#define NULL
Definition: c.h:226
struct nfa * parent
Definition: regguts.h:329
#define COLORLESS
Definition: regguts.h:137