PostgreSQL Source Code  git master
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros
regexec.c File Reference
#include "regex/regguts.h"
#include "rege_dfa.c"
Include dependency graph for regexec.c:

Go to the source code of this file.

Data Structures

struct  arcp
 
struct  sset
 
struct  dfa
 
struct  smalldfa
 
struct  vars
 

Macros

#define HASH(bv, nw)   (((nw) == 1) ? *(bv) : hash(bv, nw))
 
#define HIT(h, bv, ss, nw)
 
#define STARTER   01 /* the initial state set */
 
#define POSTSTATE   02 /* includes the goal state */
 
#define LOCKED   04 /* locked in cache */
 
#define NOPROGRESS   010 /* zero-progress state set */
 
#define WORK   1 /* number of work bitvectors needed */
 
#define FEWSTATES   20 /* must be less than UBITS */
 
#define FEWCOLORS   15
 
#define DOMALLOC   ((struct smalldfa *)NULL) /* force malloc */
 
#define VISERR(vv)   ((vv)->err != 0) /* have we seen an error yet? */
 
#define ISERR()   VISERR(v)
 
#define VERR(vv, e)   ((vv)->err = ((vv)->err ? (vv)->err : (e)))
 
#define ERR(e)   VERR(v, e) /* record an error */
 
#define NOERR()   {if (ISERR()) return v->err;} /* if error seen, return it */
 
#define OFF(p)   ((p) - v->start)
 
#define LOFF(p)   ((long)OFF(p))
 
#define LOCALMAT   20
 
#define LOCALDFAS   40
 

Functions

static struct dfagetsubdfa (struct vars *, struct subre *)
 
static struct dfagetladfa (struct vars *, int)
 
static int find (struct vars *, struct cnfa *, struct colormap *)
 
static int cfind (struct vars *, struct cnfa *, struct colormap *)
 
static int cfindloop (struct vars *, struct cnfa *, struct colormap *, struct dfa *, struct dfa *, chr **)
 
static void zapallsubs (regmatch_t *, size_t)
 
static void zaptreesubs (struct vars *, struct subre *)
 
static void subset (struct vars *, struct subre *, chr *, chr *)
 
static int cdissect (struct vars *, struct subre *, chr *, chr *)
 
static int ccondissect (struct vars *, struct subre *, chr *, chr *)
 
static int crevcondissect (struct vars *, struct subre *, chr *, chr *)
 
static int cbrdissect (struct vars *, struct subre *, chr *, chr *)
 
static int caltdissect (struct vars *, struct subre *, chr *, chr *)
 
static int citerdissect (struct vars *, struct subre *, chr *, chr *)
 
static int creviterdissect (struct vars *, struct subre *, chr *, chr *)
 
static chrlongest (struct vars *, struct dfa *, chr *, chr *, int *)
 
static chrshortest (struct vars *, struct dfa *, chr *, chr *, chr *, chr **, int *)
 
static int matchuntil (struct vars *, struct dfa *, chr *, struct sset **, chr **)
 
static chrlastcold (struct vars *, struct dfa *)
 
static struct dfanewdfa (struct vars *, struct cnfa *, struct colormap *, struct smalldfa *)
 
static void freedfa (struct dfa *)
 
static unsigned hash (unsigned *, int)
 
static struct ssetinitialize (struct vars *, struct dfa *, chr *)
 
static struct ssetmiss (struct vars *, struct dfa *, struct sset *, color, chr *, chr *)
 
static int lacon (struct vars *, struct cnfa *, chr *, color)
 
static struct ssetgetvacant (struct vars *, struct dfa *, chr *, chr *)
 
static struct ssetpickss (struct vars *, struct dfa *, chr *, chr *)
 
int pg_regexec (regex_t *re, const chr *string, size_t len, size_t search_start, rm_detail_t *details, size_t nmatch, regmatch_t pmatch[], int flags)
 

Macro Definition Documentation

#define DOMALLOC   ((struct smalldfa *)NULL) /* force malloc */

Definition at line 98 of file regexec.c.

Referenced by getladfa(), and getsubdfa().

#define ERR (   e)    VERR(v, e) /* record an error */

Definition at line 126 of file regexec.c.

Referenced by cfindloop().

#define FEWCOLORS   15

Definition at line 88 of file regexec.c.

Referenced by newdfa().

#define FEWSTATES   20 /* must be less than UBITS */

Definition at line 87 of file regexec.c.

#define HASH (   bv,
  nw 
)    (((nw) == 1) ? *(bv) : hash(bv, nw))

Definition at line 49 of file regexec.c.

#define HIT (   h,
  bv,
  ss,
  nw 
)
Value:
((ss)->hash == (h) && ((nw) == 1 || \
memcmp(VS(bv), VS((ss)->states), (nw)*sizeof(unsigned)) == 0))
static unsigned hash(unsigned *, int)
#define VS(x)
Definition: regguts.h:61

Definition at line 50 of file regexec.c.

Referenced by miss().

#define ISERR ( )    VISERR(v)

Definition at line 124 of file regexec.c.

Referenced by cfind(), cfindloop(), citerdissect(), creviterdissect(), find(), getladfa(), and getsubdfa().

#define LOCALDFAS   40

Referenced by pg_regexec().

#define LOCALMAT   20

Referenced by pg_regexec().

#define LOCKED   04 /* locked in cache */

Definition at line 55 of file regexec.c.

Referenced by getvacant(), initialize(), and pickss().

#define LOFF (   p)    ((long)OFF(p))
#define NOERR ( )    {if (ISERR()) return v->err;} /* if error seen, return it */

Definition at line 127 of file regexec.c.

Referenced by caltdissect(), ccondissect(), cfind(), crevcondissect(), and find().

#define NOPROGRESS   010 /* zero-progress state set */

Definition at line 56 of file regexec.c.

Referenced by getvacant(), initialize(), lastcold(), and miss().

#define OFF (   p)    ((p) - v->start)

Definition at line 128 of file regexec.c.

Referenced by cfind(), cfindloop(), find(), and subset().

#define POSTSTATE   02 /* includes the goal state */

Definition at line 54 of file regexec.c.

Referenced by getvacant(), longest(), matchuntil(), miss(), and shortest().

#define STARTER   01 /* the initial state set */

Definition at line 53 of file regexec.c.

Referenced by initialize().

#define VERR (   vv,
  e 
)    ((vv)->err = ((vv)->err ? (vv)->err : (e)))

Definition at line 125 of file regexec.c.

#define VISERR (   vv)    ((vv)->err != 0) /* have we seen an error yet? */

Definition at line 123 of file regexec.c.

#define WORK   1 /* number of work bitvectors needed */

Definition at line 84 of file regexec.c.

Referenced by newdfa().

Function Documentation

static int caltdissect ( struct vars v,
struct subre t,
chr begin,
chr end 
)
static

Definition at line 1000 of file regexec.c.

References assert, cdissect(), subre::cnfa, getsubdfa(), subre::id, subre::left, longest(), MDEBUG, NOERR, cnfa::nstates, subre::op, REG_NOMATCH, and subre::right.

Referenced by cdissect().

1004 {
1005  struct dfa *d;
1006  int er;
1007 
1008  /* We loop, rather than tail-recurse, to handle a chain of alternatives */
1009  while (t != NULL)
1010  {
1011  assert(t->op == '|');
1012  assert(t->left != NULL && t->left->cnfa.nstates > 0);
1013 
1014  MDEBUG(("calt n%d\n", t->id));
1015 
1016  d = getsubdfa(v, t->left);
1017  NOERR();
1018  if (longest(v, d, begin, end, (int *) NULL) == end)
1019  {
1020  MDEBUG(("calt matched\n"));
1021  er = cdissect(v, t->left, begin, end);
1022  if (er != REG_NOMATCH)
1023  return er;
1024  }
1025  NOERR();
1026 
1027  t = t->right;
1028  }
1029 
1030  return REG_NOMATCH;
1031 }
struct subre * right
Definition: regguts.h:432
char op
Definition: regguts.h:410
static chr * longest(struct vars *, struct dfa *, chr *, chr *, int *)
#define MDEBUG(arglist)
Definition: regguts.h:117
Definition: regexec.c:63
int nstates
Definition: regguts.h:356
static struct dfa * getsubdfa(struct vars *, struct subre *)
Definition: regexec.c:337
#define assert(TEST)
Definition: imath.c:37
struct subre * left
Definition: regguts.h:431
static int cdissect(struct vars *, struct subre *, chr *, chr *)
Definition: regexec.c:692
struct cnfa cnfa
Definition: regguts.h:435
short id
Definition: regguts.h:426
#define REG_NOMATCH
Definition: regex.h:138
#define NOERR()
Definition: regexec.c:127
static int cbrdissect ( struct vars v,
struct subre t,
chr begin,
chr end 
)
static

Definition at line 919 of file regexec.c.

References assert, DUPINF, vars::g, subre::id, subre::max, MDEBUG, subre::min, subre::op, vars::pmatch, REG_NOMATCH, REG_OKAY, regmatch_t::rm_eo, regmatch_t::rm_so, vars::start, and subre::subno.

Referenced by cdissect().

923 {
924  int n = t->subno;
925  size_t numreps;
926  size_t tlen;
927  size_t brlen;
928  chr *brstring;
929  chr *p;
930  int min = t->min;
931  int max = t->max;
932 
933  assert(t != NULL);
934  assert(t->op == 'b');
935  assert(n >= 0);
936  assert((size_t) n < v->nmatch);
937 
938  MDEBUG(("cbackref n%d %d{%d-%d}\n", t->id, n, min, max));
939 
940  /* get the backreferenced string */
941  if (v->pmatch[n].rm_so == -1)
942  return REG_NOMATCH;
943  brstring = v->start + v->pmatch[n].rm_so;
944  brlen = v->pmatch[n].rm_eo - v->pmatch[n].rm_so;
945 
946  /* special cases for zero-length strings */
947  if (brlen == 0)
948  {
949  /*
950  * matches only if target is zero length, but any number of
951  * repetitions can be considered to be present
952  */
953  if (begin == end && min <= max)
954  {
955  MDEBUG(("cbackref matched trivially\n"));
956  return REG_OKAY;
957  }
958  return REG_NOMATCH;
959  }
960  if (begin == end)
961  {
962  /* matches only if zero repetitions are okay */
963  if (min == 0)
964  {
965  MDEBUG(("cbackref matched trivially\n"));
966  return REG_OKAY;
967  }
968  return REG_NOMATCH;
969  }
970 
971  /*
972  * check target length to see if it could possibly be an allowed number of
973  * repetitions of brstring
974  */
975  assert(end > begin);
976  tlen = end - begin;
977  if (tlen % brlen != 0)
978  return REG_NOMATCH;
979  numreps = tlen / brlen;
980  if (numreps < min || (numreps > max && max != DUPINF))
981  return REG_NOMATCH;
982 
983  /* okay, compare the actual string contents */
984  p = begin;
985  while (numreps-- > 0)
986  {
987  if ((*v->g->compare) (brstring, p, brlen) != 0)
988  return REG_NOMATCH;
989  p += brlen;
990  }
991 
992  MDEBUG(("cbackref matched\n"));
993  return REG_OKAY;
994 }
int subno
Definition: regguts.h:427
regoff_t rm_so
Definition: regex.h:85
char op
Definition: regguts.h:410
short min
Definition: regguts.h:429
#define MDEBUG(arglist)
Definition: regguts.h:117
chr * start
Definition: regexec.c:111
regoff_t rm_eo
Definition: regex.h:86
pg_wchar chr
Definition: regcustom.h:68
#define REG_OKAY
Definition: regex.h:137
#define assert(TEST)
Definition: imath.c:37
struct guts * g
Definition: regexec.c:106
#define DUPINF
Definition: regguts.h:94
short max
Definition: regguts.h:430
regmatch_t * pmatch
Definition: regexec.c:109
short id
Definition: regguts.h:426
#define REG_NOMATCH
Definition: regex.h:138
static int ccondissect ( struct vars v,
struct subre t,
chr begin,
chr end 
)
static

Definition at line 763 of file regexec.c.

References assert, cdissect(), subre::cnfa, subre::flags, getsubdfa(), subre::id, subre::left, LOFF, longest(), MDEBUG, NOERR, cnfa::nstates, subre::op, REG_ASSERT, REG_NOMATCH, REG_OKAY, subre::right, SHORTER, and zaptreesubs().

Referenced by cdissect().

767 {
768  struct dfa *d;
769  struct dfa *d2;
770  chr *mid;
771  int er;
772 
773  assert(t->op == '.');
774  assert(t->left != NULL && t->left->cnfa.nstates > 0);
775  assert(t->right != NULL && t->right->cnfa.nstates > 0);
776  assert(!(t->left->flags & SHORTER));
777 
778  d = getsubdfa(v, t->left);
779  NOERR();
780  d2 = getsubdfa(v, t->right);
781  NOERR();
782  MDEBUG(("cconcat %d\n", t->id));
783 
784  /* pick a tentative midpoint */
785  mid = longest(v, d, begin, end, (int *) NULL);
786  NOERR();
787  if (mid == NULL)
788  return REG_NOMATCH;
789  MDEBUG(("tentative midpoint %ld\n", LOFF(mid)));
790 
791  /* iterate until satisfaction or failure */
792  for (;;)
793  {
794  /* try this midpoint on for size */
795  if (longest(v, d2, mid, end, (int *) NULL) == end)
796  {
797  er = cdissect(v, t->left, begin, mid);
798  if (er == REG_OKAY)
799  {
800  er = cdissect(v, t->right, mid, end);
801  if (er == REG_OKAY)
802  {
803  /* satisfaction */
804  MDEBUG(("successful\n"));
805  return REG_OKAY;
806  }
807  }
808  if (er != REG_NOMATCH)
809  return er;
810  }
811  NOERR();
812 
813  /* that midpoint didn't work, find a new one */
814  if (mid == begin)
815  {
816  /* all possibilities exhausted */
817  MDEBUG(("%d no midpoint\n", t->id));
818  return REG_NOMATCH;
819  }
820  mid = longest(v, d, begin, mid - 1, (int *) NULL);
821  NOERR();
822  if (mid == NULL)
823  {
824  /* failed to find a new one */
825  MDEBUG(("%d failed midpoint\n", t->id));
826  return REG_NOMATCH;
827  }
828  MDEBUG(("%d: new midpoint %ld\n", t->id, LOFF(mid)));
829  zaptreesubs(v, t->left);
830  zaptreesubs(v, t->right);
831  }
832 
833  /* can't get here */
834  return REG_ASSERT;
835 }
struct subre * right
Definition: regguts.h:432
char op
Definition: regguts.h:410
#define LOFF(p)
Definition: regexec.c:129
static chr * longest(struct vars *, struct dfa *, chr *, chr *, int *)
#define MDEBUG(arglist)
Definition: regguts.h:117
Definition: regexec.c:63
int nstates
Definition: regguts.h:356
pg_wchar chr
Definition: regcustom.h:68
#define REG_OKAY
Definition: regex.h:137
static void zaptreesubs(struct vars *, struct subre *)
Definition: regexec.c:635
static struct dfa * getsubdfa(struct vars *, struct subre *)
Definition: regexec.c:337
char flags
Definition: regguts.h:411
#define assert(TEST)
Definition: imath.c:37
struct subre * left
Definition: regguts.h:431
#define REG_ASSERT
Definition: regex.h:151
static int cdissect(struct vars *, struct subre *, chr *, chr *)
Definition: regexec.c:692
struct cnfa cnfa
Definition: regguts.h:435
short id
Definition: regguts.h:426
#define SHORTER
Definition: regguts.h:413
#define REG_NOMATCH
Definition: regex.h:138
#define NOERR()
Definition: regexec.c:127
static int cdissect ( struct vars v,
struct subre t,
chr begin,
chr end 
)
static

Definition at line 692 of file regexec.c.

References assert, BACKR, caltdissect(), CANCEL_REQUESTED, cbrdissect(), ccondissect(), citerdissect(), crevcondissect(), creviterdissect(), subre::flags, subre::left, LOFF, MDEBUG, subre::op, vars::re, REG_ASSERT, REG_CANCEL, REG_ETOOBIG, REG_NOMATCH, REG_OKAY, subre::right, SHORTER, STACK_TOO_DEEP, subre::subno, and subset().

Referenced by caltdissect(), ccondissect(), cfindloop(), citerdissect(), crevcondissect(), creviterdissect(), and find().

696 {
697  int er;
698 
699  assert(t != NULL);
700  MDEBUG(("cdissect %ld-%ld %c\n", LOFF(begin), LOFF(end), t->op));
701 
702  /* handy place to check for operation cancel */
703  if (CANCEL_REQUESTED(v->re))
704  return REG_CANCEL;
705  /* ... and stack overrun */
706  if (STACK_TOO_DEEP(v->re))
707  return REG_ETOOBIG;
708 
709  switch (t->op)
710  {
711  case '=': /* terminal node */
712  assert(t->left == NULL && t->right == NULL);
713  er = REG_OKAY; /* no action, parent did the work */
714  break;
715  case 'b': /* back reference */
716  assert(t->left == NULL && t->right == NULL);
717  er = cbrdissect(v, t, begin, end);
718  break;
719  case '.': /* concatenation */
720  assert(t->left != NULL && t->right != NULL);
721  if (t->left->flags & SHORTER) /* reverse scan */
722  er = crevcondissect(v, t, begin, end);
723  else
724  er = ccondissect(v, t, begin, end);
725  break;
726  case '|': /* alternation */
727  assert(t->left != NULL);
728  er = caltdissect(v, t, begin, end);
729  break;
730  case '*': /* iteration */
731  assert(t->left != NULL);
732  if (t->left->flags & SHORTER) /* reverse scan */
733  er = creviterdissect(v, t, begin, end);
734  else
735  er = citerdissect(v, t, begin, end);
736  break;
737  case '(': /* capturing */
738  assert(t->left != NULL && t->right == NULL);
739  assert(t->subno > 0);
740  er = cdissect(v, t->left, begin, end);
741  if (er == REG_OKAY)
742  subset(v, t, begin, end);
743  break;
744  default:
745  er = REG_ASSERT;
746  break;
747  }
748 
749  /*
750  * We should never have a match failure unless backrefs lurk below;
751  * otherwise, either caller failed to check the DFA, or there's some
752  * inconsistency between the DFA and the node's innards.
753  */
754  assert(er != REG_NOMATCH || (t->flags & BACKR));
755 
756  return er;
757 }
struct subre * right
Definition: regguts.h:432
int subno
Definition: regguts.h:427
char op
Definition: regguts.h:410
#define LOFF(p)
Definition: regexec.c:129
#define MDEBUG(arglist)
Definition: regguts.h:117
#define REG_OKAY
Definition: regex.h:137
regex_t * re
Definition: regcomp.c:226
char flags
Definition: regguts.h:411
#define assert(TEST)
Definition: imath.c:37
static int cbrdissect(struct vars *, struct subre *, chr *, chr *)
Definition: regexec.c:919
struct subre * left
Definition: regguts.h:431
static int creviterdissect(struct vars *, struct subre *, chr *, chr *)
Definition: regexec.c:1238
#define BACKR
Definition: regguts.h:416
static int caltdissect(struct vars *, struct subre *, chr *, chr *)
Definition: regexec.c:1000
#define REG_ASSERT
Definition: regex.h:151
static int ccondissect(struct vars *, struct subre *, chr *, chr *)
Definition: regexec.c:763
#define REG_ETOOBIG
Definition: regex.h:155
static int citerdissect(struct vars *, struct subre *, chr *, chr *)
Definition: regexec.c:1037
static int cdissect(struct vars *, struct subre *, chr *, chr *)
Definition: regexec.c:692
#define SHORTER
Definition: regguts.h:413
#define CANCEL_REQUESTED(re)
Definition: regguts.h:452
#define REG_NOMATCH
Definition: regex.h:138
static int crevcondissect(struct vars *, struct subre *, chr *, chr *)
Definition: regexec.c:841
static void subset(struct vars *, struct subre *, chr *, chr *)
Definition: regexec.c:660
#define STACK_TOO_DEEP(re)
Definition: regguts.h:455
#define REG_CANCEL
Definition: regex.h:157
static int cfind ( struct vars v,
struct cnfa cnfa,
struct colormap cm 
)
static

Definition at line 466 of file regexec.c.

References assert, cfindloop(), guts::cflags, vars::details, vars::dfa1, vars::dfa2, vars::err, freedfa(), vars::g, ISERR, newdfa(), NOERR, OFF, REG_EXPECT, regmatch_t::rm_eo, rm_detail_t::rm_extend, regmatch_t::rm_so, guts::search, and vars::stop.

Referenced by pg_regexec().

469 {
470  struct dfa *s;
471  struct dfa *d;
472  chr *cold;
473  int ret;
474 
475  s = newdfa(v, &v->g->search, cm, &v->dfa1);
476  NOERR();
477  d = newdfa(v, cnfa, cm, &v->dfa2);
478  if (ISERR())
479  {
480  assert(d == NULL);
481  freedfa(s);
482  return v->err;
483  }
484 
485  ret = cfindloop(v, cnfa, cm, d, s, &cold);
486 
487  freedfa(d);
488  freedfa(s);
489  NOERR();
490  if (v->g->cflags & REG_EXPECT)
491  {
492  assert(v->details != NULL);
493  if (cold != NULL)
494  v->details->rm_extend.rm_so = OFF(cold);
495  else
496  v->details->rm_extend.rm_so = OFF(v->stop);
497  v->details->rm_extend.rm_eo = OFF(v->stop); /* unknown */
498  }
499  return ret;
500 }
#define REG_EXPECT
Definition: regex.h:113
regoff_t rm_so
Definition: regex.h:85
static struct dfa * newdfa(struct vars *, struct cnfa *, struct colormap *, struct smalldfa *)
rm_detail_t * details
Definition: regexec.c:110
regoff_t rm_eo
Definition: regex.h:86
Definition: regexec.c:63
struct cnfa search
Definition: regguts.h:470
pg_wchar chr
Definition: regcustom.h:68
static int cfindloop(struct vars *, struct cnfa *, struct colormap *, struct dfa *, struct dfa *, chr **)
Definition: regexec.c:506
struct smalldfa dfa2
Definition: regexec.c:120
#define assert(TEST)
Definition: imath.c:37
struct guts * g
Definition: regexec.c:106
int err
Definition: regcomp.c:231
#define OFF(p)
Definition: regexec.c:128
static void freedfa(struct dfa *)
regmatch_t rm_extend
Definition: regex.h:92
const chr * stop
Definition: regcomp.c:228
int cflags
Definition: regguts.h:466
#define NOERR()
Definition: regexec.c:127
struct smalldfa dfa1
Definition: regexec.c:119
#define ISERR()
Definition: regexec.c:124
static int cfindloop ( struct vars v,
struct cnfa cnfa,
struct colormap cm,
struct dfa d,
struct dfa s,
chr **  coldp 
)
static

Definition at line 506 of file regexec.c.

References assert, cdissect(), close, ERR, vars::err, subre::flags, vars::g, ISERR, LOFF, longest(), MDEBUG, vars::nmatch, OFF, vars::pmatch, REG_NOMATCH, REG_OKAY, regmatch_t::rm_eo, regmatch_t::rm_so, vars::search_start, SHORTER, shortest(), vars::stop, guts::tree, and zapallsubs().

Referenced by cfind().

512 {
513  chr *begin;
514  chr *end;
515  chr *cold;
516  chr *open; /* open and close of range of possible starts */
517  chr *close;
518  chr *estart;
519  chr *estop;
520  int er;
521  int shorter = v->g->tree->flags & SHORTER;
522  int hitend;
523 
524  assert(d != NULL && s != NULL);
525  cold = NULL;
526  close = v->search_start;
527  do
528  {
529  /* Search with the search RE for match range at/beyond "close" */
530  MDEBUG(("\ncsearch at %ld\n", LOFF(close)));
531  close = shortest(v, s, close, close, v->stop, &cold, (int *) NULL);
532  if (ISERR())
533  {
534  *coldp = cold;
535  return v->err;
536  }
537  if (close == NULL)
538  break; /* no more possible match anywhere */
539  assert(cold != NULL);
540  open = cold;
541  cold = NULL;
542  /* Search for matches starting between "open" and "close" inclusive */
543  MDEBUG(("cbetween %ld and %ld\n", LOFF(open), LOFF(close)));
544  for (begin = open; begin <= close; begin++)
545  {
546  MDEBUG(("\ncfind trying at %ld\n", LOFF(begin)));
547  estart = begin;
548  estop = v->stop;
549  for (;;)
550  {
551  /* Here we use the top node's detailed RE */
552  if (shorter)
553  end = shortest(v, d, begin, estart,
554  estop, (chr **) NULL, &hitend);
555  else
556  end = longest(v, d, begin, estop,
557  &hitend);
558  if (ISERR())
559  {
560  *coldp = cold;
561  return v->err;
562  }
563  if (hitend && cold == NULL)
564  cold = begin;
565  if (end == NULL)
566  break; /* no match with this begin point, try next */
567  MDEBUG(("tentative end %ld\n", LOFF(end)));
568  /* Dissect the potential match to see if it really matches */
569  zapallsubs(v->pmatch, v->nmatch);
570  er = cdissect(v, v->g->tree, begin, end);
571  if (er == REG_OKAY)
572  {
573  if (v->nmatch > 0)
574  {
575  v->pmatch[0].rm_so = OFF(begin);
576  v->pmatch[0].rm_eo = OFF(end);
577  }
578  *coldp = cold;
579  return REG_OKAY;
580  }
581  if (er != REG_NOMATCH)
582  {
583  ERR(er);
584  *coldp = cold;
585  return er;
586  }
587  /* Try next longer/shorter match with same begin point */
588  if (shorter)
589  {
590  if (end == estop)
591  break; /* no more, so try next begin point */
592  estart = end + 1;
593  }
594  else
595  {
596  if (end == begin)
597  break; /* no more, so try next begin point */
598  estop = end - 1;
599  }
600  } /* end loop over endpoint positions */
601  } /* end loop over beginning positions */
602 
603  /*
604  * If we get here, there is no possible match starting at or before
605  * "close", so consider matches beyond that. We'll do a fresh search
606  * with the search RE to find a new promising match range.
607  */
608  close++;
609  } while (close < v->stop);
610 
611  *coldp = cold;
612  return REG_NOMATCH;
613 }
static chr * shortest(struct vars *, struct dfa *, chr *, chr *, chr *, chr **, int *)
struct subre * tree
Definition: regguts.h:469
regoff_t rm_so
Definition: regex.h:85
#define LOFF(p)
Definition: regexec.c:129
static chr * longest(struct vars *, struct dfa *, chr *, chr *, int *)
#define MDEBUG(arglist)
Definition: regguts.h:117
regoff_t rm_eo
Definition: regex.h:86
chr * search_start
Definition: regexec.c:112
pg_wchar chr
Definition: regcustom.h:68
#define REG_OKAY
Definition: regex.h:137
char flags
Definition: regguts.h:411
#define assert(TEST)
Definition: imath.c:37
struct guts * g
Definition: regexec.c:106
int err
Definition: regcomp.c:231
regmatch_t * pmatch
Definition: regexec.c:109
#define ERR(e)
Definition: regexec.c:126
#define OFF(p)
Definition: regexec.c:128
static int cdissect(struct vars *, struct subre *, chr *, chr *)
Definition: regexec.c:692
size_t nmatch
Definition: regexec.c:108
static void zapallsubs(regmatch_t *, size_t)
Definition: regexec.c:619
const chr * stop
Definition: regcomp.c:228
#define SHORTER
Definition: regguts.h:413
#define REG_NOMATCH
Definition: regex.h:138
#define close(a)
Definition: win32.h:12
#define ISERR()
Definition: regexec.c:124
static int citerdissect ( struct vars v,
struct subre t,
chr begin,
chr end 
)
static

Definition at line 1037 of file regexec.c.

References assert, cdissect(), subre::cnfa, DUPINF, vars::err, subre::flags, FREE, getsubdfa(), i, subre::id, ISERR, subre::left, LOFF, longest(), MALLOC, subre::max, MDEBUG, subre::min, cnfa::nstates, subre::op, REG_ESPACE, REG_NOMATCH, REG_OKAY, SHORTER, and zaptreesubs().

Referenced by cdissect().

1041 {
1042  struct dfa *d;
1043  chr **endpts;
1044  chr *limit;
1045  int min_matches;
1046  size_t max_matches;
1047  int nverified;
1048  int k;
1049  int i;
1050  int er;
1051 
1052  assert(t->op == '*');
1053  assert(t->left != NULL && t->left->cnfa.nstates > 0);
1054  assert(!(t->left->flags & SHORTER));
1055  assert(begin <= end);
1056 
1057  /*
1058  * For the moment, assume the minimum number of matches is 1. If zero
1059  * matches are allowed, and the target string is empty, we are allowed to
1060  * match regardless of the contents of the iter node --- but we would
1061  * prefer to match once, so that capturing parens get set. (An example of
1062  * the concern here is a pattern like "()*\1", which historically this
1063  * code has allowed to succeed.) Therefore, we deal with the zero-matches
1064  * case at the bottom, after failing to find any other way to match.
1065  */
1066  min_matches = t->min;
1067  if (min_matches <= 0)
1068  min_matches = 1;
1069 
1070  /*
1071  * We need workspace to track the endpoints of each sub-match. Normally
1072  * we consider only nonzero-length sub-matches, so there can be at most
1073  * end-begin of them. However, if min is larger than that, we will also
1074  * consider zero-length sub-matches in order to find enough matches.
1075  *
1076  * For convenience, endpts[0] contains the "begin" pointer and we store
1077  * sub-match endpoints in endpts[1..max_matches].
1078  */
1079  max_matches = end - begin;
1080  if (max_matches > t->max && t->max != DUPINF)
1081  max_matches = t->max;
1082  if (max_matches < min_matches)
1083  max_matches = min_matches;
1084  endpts = (chr **) MALLOC((max_matches + 1) * sizeof(chr *));
1085  if (endpts == NULL)
1086  return REG_ESPACE;
1087  endpts[0] = begin;
1088 
1089  d = getsubdfa(v, t->left);
1090  if (ISERR())
1091  {
1092  FREE(endpts);
1093  return v->err;
1094  }
1095  MDEBUG(("citer %d\n", t->id));
1096 
1097  /*
1098  * Our strategy is to first find a set of sub-match endpoints that are
1099  * valid according to the child node's DFA, and then recursively dissect
1100  * each sub-match to confirm validity. If any validity check fails,
1101  * backtrack the last sub-match and try again. And, when we next try for
1102  * a validity check, we need not recheck any successfully verified
1103  * sub-matches that we didn't move the endpoints of. nverified remembers
1104  * how many sub-matches are currently known okay.
1105  */
1106 
1107  /* initialize to consider first sub-match */
1108  nverified = 0;
1109  k = 1;
1110  limit = end;
1111 
1112  /* iterate until satisfaction or failure */
1113  while (k > 0)
1114  {
1115  /* try to find an endpoint for the k'th sub-match */
1116  endpts[k] = longest(v, d, endpts[k - 1], limit, (int *) NULL);
1117  if (ISERR())
1118  {
1119  FREE(endpts);
1120  return v->err;
1121  }
1122  if (endpts[k] == NULL)
1123  {
1124  /* no match possible, so see if we can shorten previous one */
1125  k--;
1126  goto backtrack;
1127  }
1128  MDEBUG(("%d: working endpoint %d: %ld\n",
1129  t->id, k, LOFF(endpts[k])));
1130 
1131  /* k'th sub-match can no longer be considered verified */
1132  if (nverified >= k)
1133  nverified = k - 1;
1134 
1135  if (endpts[k] != end)
1136  {
1137  /* haven't reached end yet, try another iteration if allowed */
1138  if (k >= max_matches)
1139  {
1140  /* must try to shorten some previous match */
1141  k--;
1142  goto backtrack;
1143  }
1144 
1145  /* reject zero-length match unless necessary to achieve min */
1146  if (endpts[k] == endpts[k - 1] &&
1147  (k >= min_matches || min_matches - k < end - endpts[k]))
1148  goto backtrack;
1149 
1150  k++;
1151  limit = end;
1152  continue;
1153  }
1154 
1155  /*
1156  * We've identified a way to divide the string into k sub-matches that
1157  * works so far as the child DFA can tell. If k is an allowed number
1158  * of matches, start the slow part: recurse to verify each sub-match.
1159  * We always have k <= max_matches, needn't check that.
1160  */
1161  if (k < min_matches)
1162  goto backtrack;
1163 
1164  MDEBUG(("%d: verifying %d..%d\n", t->id, nverified + 1, k));
1165 
1166  for (i = nverified + 1; i <= k; i++)
1167  {
1168  zaptreesubs(v, t->left);
1169  er = cdissect(v, t->left, endpts[i - 1], endpts[i]);
1170  if (er == REG_OKAY)
1171  {
1172  nverified = i;
1173  continue;
1174  }
1175  if (er == REG_NOMATCH)
1176  break;
1177  /* oops, something failed */
1178  FREE(endpts);
1179  return er;
1180  }
1181 
1182  if (i > k)
1183  {
1184  /* satisfaction */
1185  MDEBUG(("%d successful\n", t->id));
1186  FREE(endpts);
1187  return REG_OKAY;
1188  }
1189 
1190  /* match failed to verify, so backtrack */
1191 
1192 backtrack:
1193 
1194  /*
1195  * Must consider shorter versions of the current sub-match. However,
1196  * we'll only ask for a zero-length match if necessary.
1197  */
1198  while (k > 0)
1199  {
1200  chr *prev_end = endpts[k - 1];
1201 
1202  if (endpts[k] > prev_end)
1203  {
1204  limit = endpts[k] - 1;
1205  if (limit > prev_end ||
1206  (k < min_matches && min_matches - k >= end - prev_end))
1207  {
1208  /* break out of backtrack loop, continue the outer one */
1209  break;
1210  }
1211  }
1212  /* can't shorten k'th sub-match any more, consider previous one */
1213  k--;
1214  }
1215  }
1216 
1217  /* all possibilities exhausted */
1218  FREE(endpts);
1219 
1220  /*
1221  * Now consider the possibility that we can match to a zero-length string
1222  * by using zero repetitions.
1223  */
1224  if (t->min == 0 && begin == end)
1225  {
1226  MDEBUG(("%d allowing zero matches\n", t->id));
1227  return REG_OKAY;
1228  }
1229 
1230  MDEBUG(("%d failed\n", t->id));
1231  return REG_NOMATCH;
1232 }
#define REG_ESPACE
Definition: regex.h:149
char op
Definition: regguts.h:410
#define LOFF(p)
Definition: regexec.c:129
short min
Definition: regguts.h:429
static chr * longest(struct vars *, struct dfa *, chr *, chr *, int *)
#define MDEBUG(arglist)
Definition: regguts.h:117
#define MALLOC(n)
Definition: regcustom.h:62
Definition: regexec.c:63
int nstates
Definition: regguts.h:356
pg_wchar chr
Definition: regcustom.h:68
#define REG_OKAY
Definition: regex.h:137
static void zaptreesubs(struct vars *, struct subre *)
Definition: regexec.c:635
static struct dfa * getsubdfa(struct vars *, struct subre *)
Definition: regexec.c:337
char flags
Definition: regguts.h:411
#define assert(TEST)
Definition: imath.c:37
struct subre * left
Definition: regguts.h:431
int err
Definition: regcomp.c:231
#define DUPINF
Definition: regguts.h:94
short max
Definition: regguts.h:430
static int cdissect(struct vars *, struct subre *, chr *, chr *)
Definition: regexec.c:692
struct cnfa cnfa
Definition: regguts.h:435
short id
Definition: regguts.h:426
#define SHORTER
Definition: regguts.h:413
int i
#define REG_NOMATCH
Definition: regex.h:138
#define FREE(size)
Definition: saslprep.c:51
#define ISERR()
Definition: regexec.c:124
static int crevcondissect ( struct vars v,
struct subre t,
chr begin,
chr end 
)
static

Definition at line 841 of file regexec.c.

References assert, cdissect(), subre::cnfa, subre::flags, getsubdfa(), subre::id, subre::left, LOFF, longest(), MDEBUG, NOERR, cnfa::nstates, subre::op, REG_ASSERT, REG_NOMATCH, REG_OKAY, subre::right, SHORTER, shortest(), and zaptreesubs().

Referenced by cdissect().

845 {
846  struct dfa *d;
847  struct dfa *d2;
848  chr *mid;
849  int er;
850 
851  assert(t->op == '.');
852  assert(t->left != NULL && t->left->cnfa.nstates > 0);
853  assert(t->right != NULL && t->right->cnfa.nstates > 0);
854  assert(t->left->flags & SHORTER);
855 
856  d = getsubdfa(v, t->left);
857  NOERR();
858  d2 = getsubdfa(v, t->right);
859  NOERR();
860  MDEBUG(("crevcon %d\n", t->id));
861 
862  /* pick a tentative midpoint */
863  mid = shortest(v, d, begin, begin, end, (chr **) NULL, (int *) NULL);
864  NOERR();
865  if (mid == NULL)
866  return REG_NOMATCH;
867  MDEBUG(("tentative midpoint %ld\n", LOFF(mid)));
868 
869  /* iterate until satisfaction or failure */
870  for (;;)
871  {
872  /* try this midpoint on for size */
873  if (longest(v, d2, mid, end, (int *) NULL) == end)
874  {
875  er = cdissect(v, t->left, begin, mid);
876  if (er == REG_OKAY)
877  {
878  er = cdissect(v, t->right, mid, end);
879  if (er == REG_OKAY)
880  {
881  /* satisfaction */
882  MDEBUG(("successful\n"));
883  return REG_OKAY;
884  }
885  }
886  if (er != REG_NOMATCH)
887  return er;
888  }
889  NOERR();
890 
891  /* that midpoint didn't work, find a new one */
892  if (mid == end)
893  {
894  /* all possibilities exhausted */
895  MDEBUG(("%d no midpoint\n", t->id));
896  return REG_NOMATCH;
897  }
898  mid = shortest(v, d, begin, mid + 1, end, (chr **) NULL, (int *) NULL);
899  NOERR();
900  if (mid == NULL)
901  {
902  /* failed to find a new one */
903  MDEBUG(("%d failed midpoint\n", t->id));
904  return REG_NOMATCH;
905  }
906  MDEBUG(("%d: new midpoint %ld\n", t->id, LOFF(mid)));
907  zaptreesubs(v, t->left);
908  zaptreesubs(v, t->right);
909  }
910 
911  /* can't get here */
912  return REG_ASSERT;
913 }
static chr * shortest(struct vars *, struct dfa *, chr *, chr *, chr *, chr **, int *)
struct subre * right
Definition: regguts.h:432
char op
Definition: regguts.h:410
#define LOFF(p)
Definition: regexec.c:129
static chr * longest(struct vars *, struct dfa *, chr *, chr *, int *)
#define MDEBUG(arglist)
Definition: regguts.h:117
Definition: regexec.c:63
int nstates
Definition: regguts.h:356
pg_wchar chr
Definition: regcustom.h:68
#define REG_OKAY
Definition: regex.h:137
static void zaptreesubs(struct vars *, struct subre *)
Definition: regexec.c:635
static struct dfa * getsubdfa(struct vars *, struct subre *)
Definition: regexec.c:337
char flags
Definition: regguts.h:411
#define assert(TEST)
Definition: imath.c:37
struct subre * left
Definition: regguts.h:431
#define REG_ASSERT
Definition: regex.h:151
static int cdissect(struct vars *, struct subre *, chr *, chr *)
Definition: regexec.c:692
struct cnfa cnfa
Definition: regguts.h:435
short id
Definition: regguts.h:426
#define SHORTER
Definition: regguts.h:413
#define REG_NOMATCH
Definition: regex.h:138
#define NOERR()
Definition: regexec.c:127
static int creviterdissect ( struct vars v,
struct subre t,
chr begin,
chr end 
)
static

Definition at line 1238 of file regexec.c.

References assert, cdissect(), subre::cnfa, DUPINF, vars::err, subre::flags, FREE, getsubdfa(), i, subre::id, ISERR, subre::left, LOFF, MALLOC, subre::max, MDEBUG, subre::min, cnfa::nstates, subre::op, REG_ESPACE, REG_NOMATCH, REG_OKAY, SHORTER, shortest(), and zaptreesubs().

Referenced by cdissect().

1242 {
1243  struct dfa *d;
1244  chr **endpts;
1245  chr *limit;
1246  int min_matches;
1247  size_t max_matches;
1248  int nverified;
1249  int k;
1250  int i;
1251  int er;
1252 
1253  assert(t->op == '*');
1254  assert(t->left != NULL && t->left->cnfa.nstates > 0);
1255  assert(t->left->flags & SHORTER);
1256  assert(begin <= end);
1257 
1258  /*
1259  * If zero matches are allowed, and target string is empty, just declare
1260  * victory. OTOH, if target string isn't empty, zero matches can't work
1261  * so we pretend the min is 1.
1262  */
1263  min_matches = t->min;
1264  if (min_matches <= 0)
1265  {
1266  if (begin == end)
1267  return REG_OKAY;
1268  min_matches = 1;
1269  }
1270 
1271  /*
1272  * We need workspace to track the endpoints of each sub-match. Normally
1273  * we consider only nonzero-length sub-matches, so there can be at most
1274  * end-begin of them. However, if min is larger than that, we will also
1275  * consider zero-length sub-matches in order to find enough matches.
1276  *
1277  * For convenience, endpts[0] contains the "begin" pointer and we store
1278  * sub-match endpoints in endpts[1..max_matches].
1279  */
1280  max_matches = end - begin;
1281  if (max_matches > t->max && t->max != DUPINF)
1282  max_matches = t->max;
1283  if (max_matches < min_matches)
1284  max_matches = min_matches;
1285  endpts = (chr **) MALLOC((max_matches + 1) * sizeof(chr *));
1286  if (endpts == NULL)
1287  return REG_ESPACE;
1288  endpts[0] = begin;
1289 
1290  d = getsubdfa(v, t->left);
1291  if (ISERR())
1292  {
1293  FREE(endpts);
1294  return v->err;
1295  }
1296  MDEBUG(("creviter %d\n", t->id));
1297 
1298  /*
1299  * Our strategy is to first find a set of sub-match endpoints that are
1300  * valid according to the child node's DFA, and then recursively dissect
1301  * each sub-match to confirm validity. If any validity check fails,
1302  * backtrack the last sub-match and try again. And, when we next try for
1303  * a validity check, we need not recheck any successfully verified
1304  * sub-matches that we didn't move the endpoints of. nverified remembers
1305  * how many sub-matches are currently known okay.
1306  */
1307 
1308  /* initialize to consider first sub-match */
1309  nverified = 0;
1310  k = 1;
1311  limit = begin;
1312 
1313  /* iterate until satisfaction or failure */
1314  while (k > 0)
1315  {
1316  /* disallow zero-length match unless necessary to achieve min */
1317  if (limit == endpts[k - 1] &&
1318  limit != end &&
1319  (k >= min_matches || min_matches - k < end - limit))
1320  limit++;
1321 
1322  /* if this is the last allowed sub-match, it must reach to the end */
1323  if (k >= max_matches)
1324  limit = end;
1325 
1326  /* try to find an endpoint for the k'th sub-match */
1327  endpts[k] = shortest(v, d, endpts[k - 1], limit, end,
1328  (chr **) NULL, (int *) NULL);
1329  if (ISERR())
1330  {
1331  FREE(endpts);
1332  return v->err;
1333  }
1334  if (endpts[k] == NULL)
1335  {
1336  /* no match possible, so see if we can lengthen previous one */
1337  k--;
1338  goto backtrack;
1339  }
1340  MDEBUG(("%d: working endpoint %d: %ld\n",
1341  t->id, k, LOFF(endpts[k])));
1342 
1343  /* k'th sub-match can no longer be considered verified */
1344  if (nverified >= k)
1345  nverified = k - 1;
1346 
1347  if (endpts[k] != end)
1348  {
1349  /* haven't reached end yet, try another iteration if allowed */
1350  if (k >= max_matches)
1351  {
1352  /* must try to lengthen some previous match */
1353  k--;
1354  goto backtrack;
1355  }
1356 
1357  k++;
1358  limit = endpts[k - 1];
1359  continue;
1360  }
1361 
1362  /*
1363  * We've identified a way to divide the string into k sub-matches that
1364  * works so far as the child DFA can tell. If k is an allowed number
1365  * of matches, start the slow part: recurse to verify each sub-match.
1366  * We always have k <= max_matches, needn't check that.
1367  */
1368  if (k < min_matches)
1369  goto backtrack;
1370 
1371  MDEBUG(("%d: verifying %d..%d\n", t->id, nverified + 1, k));
1372 
1373  for (i = nverified + 1; i <= k; i++)
1374  {
1375  zaptreesubs(v, t->left);
1376  er = cdissect(v, t->left, endpts[i - 1], endpts[i]);
1377  if (er == REG_OKAY)
1378  {
1379  nverified = i;
1380  continue;
1381  }
1382  if (er == REG_NOMATCH)
1383  break;
1384  /* oops, something failed */
1385  FREE(endpts);
1386  return er;
1387  }
1388 
1389  if (i > k)
1390  {
1391  /* satisfaction */
1392  MDEBUG(("%d successful\n", t->id));
1393  FREE(endpts);
1394  return REG_OKAY;
1395  }
1396 
1397  /* match failed to verify, so backtrack */
1398 
1399 backtrack:
1400 
1401  /*
1402  * Must consider longer versions of the current sub-match.
1403  */
1404  while (k > 0)
1405  {
1406  if (endpts[k] < end)
1407  {
1408  limit = endpts[k] + 1;
1409  /* break out of backtrack loop, continue the outer one */
1410  break;
1411  }
1412  /* can't lengthen k'th sub-match any more, consider previous one */
1413  k--;
1414  }
1415  }
1416 
1417  /* all possibilities exhausted */
1418  MDEBUG(("%d failed\n", t->id));
1419  FREE(endpts);
1420  return REG_NOMATCH;
1421 }
static chr * shortest(struct vars *, struct dfa *, chr *, chr *, chr *, chr **, int *)
#define REG_ESPACE
Definition: regex.h:149
char op
Definition: regguts.h:410
#define LOFF(p)
Definition: regexec.c:129
short min
Definition: regguts.h:429
#define MDEBUG(arglist)
Definition: regguts.h:117
#define MALLOC(n)
Definition: regcustom.h:62
Definition: regexec.c:63
int nstates
Definition: regguts.h:356
pg_wchar chr
Definition: regcustom.h:68
#define REG_OKAY
Definition: regex.h:137
static void zaptreesubs(struct vars *, struct subre *)
Definition: regexec.c:635
static struct dfa * getsubdfa(struct vars *, struct subre *)
Definition: regexec.c:337
char flags
Definition: regguts.h:411
#define assert(TEST)
Definition: imath.c:37
struct subre * left
Definition: regguts.h:431
int err
Definition: regcomp.c:231
#define DUPINF
Definition: regguts.h:94
short max
Definition: regguts.h:430
static int cdissect(struct vars *, struct subre *, chr *, chr *)
Definition: regexec.c:692
struct cnfa cnfa
Definition: regguts.h:435
short id
Definition: regguts.h:426
#define SHORTER
Definition: regguts.h:413
int i
#define REG_NOMATCH
Definition: regex.h:138
#define FREE(size)
Definition: saslprep.c:51
#define ISERR()
Definition: regexec.c:124
static int find ( struct vars v,
struct cnfa cnfa,
struct colormap cm 
)
static

Definition at line 375 of file regexec.c.

References assert, cdissect(), guts::cflags, close, vars::details, vars::dfa1, vars::err, subre::flags, freedfa(), vars::g, ISERR, LOFF, longest(), MDEBUG, newdfa(), vars::nmatch, NOERR, OFF, vars::pmatch, REG_EXPECT, REG_NOMATCH, REG_OKAY, regmatch_t::rm_eo, rm_detail_t::rm_extend, regmatch_t::rm_so, guts::search, vars::search_start, SHORTER, shortest(), vars::start, vars::stop, guts::tree, and zapallsubs().

Referenced by NIAddAffix(), NIImportAffixes(), NIImportOOAffixes(), parse_affentry(), pg_regexec(), and testdelete().

378 {
379  struct dfa *s;
380  struct dfa *d;
381  chr *begin;
382  chr *end = NULL;
383  chr *cold;
384  chr *open; /* open and close of range of possible starts */
385  chr *close;
386  int hitend;
387  int shorter = (v->g->tree->flags & SHORTER) ? 1 : 0;
388 
389  /* first, a shot with the search RE */
390  s = newdfa(v, &v->g->search, cm, &v->dfa1);
391  assert(!(ISERR() && s != NULL));
392  NOERR();
393  MDEBUG(("\nsearch at %ld\n", LOFF(v->start)));
394  cold = NULL;
395  close = shortest(v, s, v->search_start, v->search_start, v->stop,
396  &cold, (int *) NULL);
397  freedfa(s);
398  NOERR();
399  if (v->g->cflags & REG_EXPECT)
400  {
401  assert(v->details != NULL);
402  if (cold != NULL)
403  v->details->rm_extend.rm_so = OFF(cold);
404  else
405  v->details->rm_extend.rm_so = OFF(v->stop);
406  v->details->rm_extend.rm_eo = OFF(v->stop); /* unknown */
407  }
408  if (close == NULL) /* not found */
409  return REG_NOMATCH;
410  if (v->nmatch == 0) /* found, don't need exact location */
411  return REG_OKAY;
412 
413  /* find starting point and match */
414  assert(cold != NULL);
415  open = cold;
416  cold = NULL;
417  MDEBUG(("between %ld and %ld\n", LOFF(open), LOFF(close)));
418  d = newdfa(v, cnfa, cm, &v->dfa1);
419  assert(!(ISERR() && d != NULL));
420  NOERR();
421  for (begin = open; begin <= close; begin++)
422  {
423  MDEBUG(("\nfind trying at %ld\n", LOFF(begin)));
424  if (shorter)
425  end = shortest(v, d, begin, begin, v->stop,
426  (chr **) NULL, &hitend);
427  else
428  end = longest(v, d, begin, v->stop, &hitend);
429  if (ISERR())
430  {
431  freedfa(d);
432  return v->err;
433  }
434  if (hitend && cold == NULL)
435  cold = begin;
436  if (end != NULL)
437  break; /* NOTE BREAK OUT */
438  }
439  assert(end != NULL); /* search RE succeeded so loop should */
440  freedfa(d);
441 
442  /* and pin down details */
443  assert(v->nmatch > 0);
444  v->pmatch[0].rm_so = OFF(begin);
445  v->pmatch[0].rm_eo = OFF(end);
446  if (v->g->cflags & REG_EXPECT)
447  {
448  if (cold != NULL)
449  v->details->rm_extend.rm_so = OFF(cold);
450  else
451  v->details->rm_extend.rm_so = OFF(v->stop);
452  v->details->rm_extend.rm_eo = OFF(v->stop); /* unknown */
453  }
454  if (v->nmatch == 1) /* no need for submatches */
455  return REG_OKAY;
456 
457  /* find submatches */
458  zapallsubs(v->pmatch, v->nmatch);
459  return cdissect(v, v->g->tree, begin, end);
460 }
static chr * shortest(struct vars *, struct dfa *, chr *, chr *, chr *, chr **, int *)
#define REG_EXPECT
Definition: regex.h:113
struct subre * tree
Definition: regguts.h:469
regoff_t rm_so
Definition: regex.h:85
#define LOFF(p)
Definition: regexec.c:129
static struct dfa * newdfa(struct vars *, struct cnfa *, struct colormap *, struct smalldfa *)
static chr * longest(struct vars *, struct dfa *, chr *, chr *, int *)
rm_detail_t * details
Definition: regexec.c:110
#define MDEBUG(arglist)
Definition: regguts.h:117
chr * start
Definition: regexec.c:111
regoff_t rm_eo
Definition: regex.h:86
Definition: regexec.c:63
chr * search_start
Definition: regexec.c:112
struct cnfa search
Definition: regguts.h:470
pg_wchar chr
Definition: regcustom.h:68
#define REG_OKAY
Definition: regex.h:137
char flags
Definition: regguts.h:411
#define assert(TEST)
Definition: imath.c:37
struct guts * g
Definition: regexec.c:106
int err
Definition: regcomp.c:231
regmatch_t * pmatch
Definition: regexec.c:109
#define OFF(p)
Definition: regexec.c:128
static void freedfa(struct dfa *)
static int cdissect(struct vars *, struct subre *, chr *, chr *)
Definition: regexec.c:692
size_t nmatch
Definition: regexec.c:108
static void zapallsubs(regmatch_t *, size_t)
Definition: regexec.c:619
regmatch_t rm_extend
Definition: regex.h:92
const chr * stop
Definition: regcomp.c:228
int cflags
Definition: regguts.h:466
#define SHORTER
Definition: regguts.h:413
#define REG_NOMATCH
Definition: regex.h:138
#define NOERR()
Definition: regexec.c:127
#define close(a)
Definition: win32.h:12
struct smalldfa dfa1
Definition: regexec.c:119
#define ISERR()
Definition: regexec.c:124
static void freedfa ( struct dfa )
static

Referenced by cfind(), find(), and pg_regexec().

static struct dfa * getladfa ( struct vars v,
int  n 
)
static

Definition at line 355 of file regexec.c.

References assert, guts::cmap, subre::cnfa, DOMALLOC, vars::g, ISERR, guts::lacons, vars::ladfas, and newdfa().

Referenced by lacon().

357 {
358  assert(n > 0 && n < v->g->nlacons && v->g->lacons != NULL);
359 
360  if (v->ladfas[n] == NULL)
361  {
362  struct subre *sub = &v->g->lacons[n];
363 
364  v->ladfas[n] = newdfa(v, &sub->cnfa, &v->g->cmap, DOMALLOC);
365  if (ISERR())
366  return NULL;
367  }
368  return v->ladfas[n];
369 }
struct dfa ** ladfas
Definition: regexec.c:116
struct subre * lacons
Definition: regguts.h:474
static struct dfa * newdfa(struct vars *, struct cnfa *, struct colormap *, struct smalldfa *)
#define DOMALLOC
Definition: regexec.c:98
struct colormap cmap
Definition: regguts.h:472
#define assert(TEST)
Definition: imath.c:37
struct guts * g
Definition: regexec.c:106
Definition: regguts.h:408
struct cnfa cnfa
Definition: regguts.h:435
#define ISERR()
Definition: regexec.c:124
static struct dfa * getsubdfa ( struct vars v,
struct subre t 
)
static

Definition at line 337 of file regexec.c.

References guts::cmap, subre::cnfa, DOMALLOC, vars::g, subre::id, ISERR, newdfa(), and vars::subdfas.

Referenced by caltdissect(), ccondissect(), citerdissect(), crevcondissect(), and creviterdissect().

339 {
340  if (v->subdfas[t->id] == NULL)
341  {
342  v->subdfas[t->id] = newdfa(v, &t->cnfa, &v->g->cmap, DOMALLOC);
343  if (ISERR())
344  return NULL;
345  }
346  return v->subdfas[t->id];
347 }
static struct dfa * newdfa(struct vars *, struct cnfa *, struct colormap *, struct smalldfa *)
#define DOMALLOC
Definition: regexec.c:98
struct colormap cmap
Definition: regguts.h:472
struct guts * g
Definition: regexec.c:106
struct dfa ** subdfas
Definition: regexec.c:115
struct cnfa cnfa
Definition: regguts.h:435
short id
Definition: regguts.h:426
#define ISERR()
Definition: regexec.c:124
static struct sset* getvacant ( struct vars ,
struct dfa ,
chr ,
chr  
)
static
static unsigned hash ( unsigned *  ,
int   
)
static
static struct sset* initialize ( struct vars ,
struct dfa ,
chr  
)
static
static int lacon ( struct vars ,
struct cnfa ,
chr ,
color   
)
static
static chr* lastcold ( struct vars ,
struct dfa  
)
static
static chr* longest ( struct vars ,
struct dfa ,
chr ,
chr ,
int *   
)
static
static int matchuntil ( struct vars ,
struct dfa ,
chr ,
struct sset **  ,
chr **   
)
static
static struct sset* miss ( struct vars ,
struct dfa ,
struct sset ,
color  ,
chr ,
chr  
)
static
static struct dfa* newdfa ( struct vars ,
struct cnfa ,
struct colormap ,
struct smalldfa  
)
static

Referenced by cfind(), find(), getladfa(), and getsubdfa().

int pg_regexec ( regex_t re,
const chr string,
size_t  len,
size_t  search_start,
rm_detail_t details,
size_t  nmatch,
regmatch_t  pmatch[],
int  flags 
)

Definition at line 172 of file regexec.c.

References assert, cfind(), guts::cflags, cleanup(), guts::cmap, subre::cnfa, vars::details, vars::eflags, vars::err, find(), FREE, freedfa(), vars::g, i, guts::info, vars::ladfas, vars::lblastcp, vars::lblastcss, LOCALDFAS, LOCALMAT, MALLOC, guts::nlacons, vars::nmatch, guts::nsub, guts::ntree, pg_set_regex_collation(), vars::pmatch, vars::re, regex_t::re_collation, regex_t::re_csize, regex_t::re_guts, regex_t::re_magic, REG_ESPACE, REG_EXPECT, REG_INVARG, REG_MIXED, REG_NOMATCH, REG_NOSUB, REG_OKAY, REG_UBACKREF, REG_UIMPOSSIBLE, REMAGIC, vars::search_start, vars::start, vars::stop, vars::subdfas, guts::tree, VS, and zapallsubs().

Referenced by check_ident_usermap(), CheckAffix(), RE_wchar_execute(), and replace_text_regexp().

180 {
181  struct vars var;
182  register struct vars *v = &var;
183  int st;
184  size_t n;
185  size_t i;
186  int backref;
187 
188 #define LOCALMAT 20
189  regmatch_t mat[LOCALMAT];
190 
191 #define LOCALDFAS 40
192  struct dfa *subdfas[LOCALDFAS];
193 
194  /* sanity checks */
195  if (re == NULL || string == NULL || re->re_magic != REMAGIC)
196  return REG_INVARG;
197  if (re->re_csize != sizeof(chr))
198  return REG_MIXED;
199 
200  /* Initialize locale-dependent support */
202 
203  /* setup */
204  v->re = re;
205  v->g = (struct guts *) re->re_guts;
206  if ((v->g->cflags & REG_EXPECT) && details == NULL)
207  return REG_INVARG;
208  if (v->g->info & REG_UIMPOSSIBLE)
209  return REG_NOMATCH;
210  backref = (v->g->info & REG_UBACKREF) ? 1 : 0;
211  v->eflags = flags;
212  if (v->g->cflags & REG_NOSUB)
213  nmatch = 0; /* override client */
214  v->nmatch = nmatch;
215  if (backref)
216  {
217  /* need work area */
218  if (v->g->nsub + 1 <= LOCALMAT)
219  v->pmatch = mat;
220  else
221  v->pmatch = (regmatch_t *) MALLOC((v->g->nsub + 1) *
222  sizeof(regmatch_t));
223  if (v->pmatch == NULL)
224  return REG_ESPACE;
225  v->nmatch = v->g->nsub + 1;
226  }
227  else
228  v->pmatch = pmatch;
229  v->details = details;
230  v->start = (chr *) string;
231  v->search_start = (chr *) string + search_start;
232  v->stop = (chr *) string + len;
233  v->err = 0;
234  v->subdfas = NULL;
235  v->ladfas = NULL;
236  v->lblastcss = NULL;
237  v->lblastcp = NULL;
238  /* below this point, "goto cleanup" will behave sanely */
239 
240  assert(v->g->ntree >= 0);
241  n = (size_t) v->g->ntree;
242  if (n <= LOCALDFAS)
243  v->subdfas = subdfas;
244  else
245  {
246  v->subdfas = (struct dfa **) MALLOC(n * sizeof(struct dfa *));
247  if (v->subdfas == NULL)
248  {
249  st = REG_ESPACE;
250  goto cleanup;
251  }
252  }
253  for (i = 0; i < n; i++)
254  v->subdfas[i] = NULL;
255 
256  assert(v->g->nlacons >= 0);
257  n = (size_t) v->g->nlacons;
258  if (n > 0)
259  {
260  v->ladfas = (struct dfa **) MALLOC(n * sizeof(struct dfa *));
261  if (v->ladfas == NULL)
262  {
263  st = REG_ESPACE;
264  goto cleanup;
265  }
266  for (i = 0; i < n; i++)
267  v->ladfas[i] = NULL;
268  v->lblastcss = (struct sset **) MALLOC(n * sizeof(struct sset *));
269  v->lblastcp = (chr **) MALLOC(n * sizeof(chr *));
270  if (v->lblastcss == NULL || v->lblastcp == NULL)
271  {
272  st = REG_ESPACE;
273  goto cleanup;
274  }
275  for (i = 0; i < n; i++)
276  {
277  v->lblastcss[i] = NULL;
278  v->lblastcp[i] = NULL;
279  }
280  }
281 
282  /* do it */
283  assert(v->g->tree != NULL);
284  if (backref)
285  st = cfind(v, &v->g->tree->cnfa, &v->g->cmap);
286  else
287  st = find(v, &v->g->tree->cnfa, &v->g->cmap);
288 
289  /* copy (portion of) match vector over if necessary */
290  if (st == REG_OKAY && v->pmatch != pmatch && nmatch > 0)
291  {
292  zapallsubs(pmatch, nmatch);
293  n = (nmatch < v->nmatch) ? nmatch : v->nmatch;
294  memcpy(VS(pmatch), VS(v->pmatch), n * sizeof(regmatch_t));
295  }
296 
297  /* clean up */
298 cleanup:
299  if (v->pmatch != pmatch && v->pmatch != mat)
300  FREE(v->pmatch);
301  if (v->subdfas != NULL)
302  {
303  n = (size_t) v->g->ntree;
304  for (i = 0; i < n; i++)
305  {
306  if (v->subdfas[i] != NULL)
307  freedfa(v->subdfas[i]);
308  }
309  if (v->subdfas != subdfas)
310  FREE(v->subdfas);
311  }
312  if (v->ladfas != NULL)
313  {
314  n = (size_t) v->g->nlacons;
315  for (i = 0; i < n; i++)
316  {
317  if (v->ladfas[i] != NULL)
318  freedfa(v->ladfas[i]);
319  }
320  FREE(v->ladfas);
321  }
322  if (v->lblastcss != NULL)
323  FREE(v->lblastcss);
324  if (v->lblastcp != NULL)
325  FREE(v->lblastcp);
326 
327  return st;
328 }
#define REG_UIMPOSSIBLE
Definition: regex.h:72
struct dfa ** ladfas
Definition: regexec.c:116
static int find(struct vars *, struct cnfa *, struct colormap *)
Definition: regexec.c:375
#define REG_EXPECT
Definition: regex.h:113
struct subre * tree
Definition: regguts.h:469
#define REG_ESPACE
Definition: regex.h:149
int nlacons
Definition: regguts.h:475
#define REMAGIC
Definition: regguts.h:96
rm_detail_t * details
Definition: regexec.c:110
int re_csize
Definition: regex.h:74
chr * start
Definition: regexec.c:111
#define MALLOC(n)
Definition: regcustom.h:62
Definition: regexec.c:63
struct sset ** lblastcss
Definition: regexec.c:117
chr * search_start
Definition: regexec.c:112
int re_magic
Definition: regex.h:57
pg_wchar chr
Definition: regcustom.h:68
#define REG_OKAY
Definition: regex.h:137
regex_t * re
Definition: regcomp.c:226
#define REG_INVARG
Definition: regex.h:152
struct colormap cmap
Definition: regguts.h:472
#define assert(TEST)
Definition: imath.c:37
struct guts * g
Definition: regexec.c:106
int err
Definition: regcomp.c:231
#define LOCALMAT
int eflags
Definition: regexec.c:107
static void cleanup(void)
Definition: bootstrap.c:860
Definition: regguts.h:462
int ntree
Definition: regguts.h:471
#define REG_UBACKREF
Definition: regex.h:60
#define LOCALDFAS
struct dfa ** subdfas
Definition: regexec.c:115
void pg_set_regex_collation(Oid collation)
regmatch_t * pmatch
Definition: regexec.c:109
#define REG_NOSUB
Definition: regex.h:107
static void freedfa(struct dfa *)
#define VS(x)
Definition: regguts.h:61
long info
Definition: regguts.h:467
struct cnfa cnfa
Definition: regguts.h:435
size_t nmatch
Definition: regexec.c:108
static void zapallsubs(regmatch_t *, size_t)
Definition: regexec.c:619
const chr * stop
Definition: regcomp.c:228
char * re_guts
Definition: regex.h:78
int cflags
Definition: regguts.h:466
Definition: regexec.c:45
size_t nsub
Definition: regguts.h:468
int i
#define REG_NOMATCH
Definition: regex.h:138
chr ** lblastcp
Definition: regexec.c:118
#define REG_MIXED
Definition: regex.h:153
Definition: regcomp.c:224
Oid re_collation
Definition: regex.h:76
#define FREE(size)
Definition: saslprep.c:51
static int cfind(struct vars *, struct cnfa *, struct colormap *)
Definition: regexec.c:466
static struct sset* pickss ( struct vars ,
struct dfa ,
chr ,
chr  
)
static
static chr* shortest ( struct vars ,
struct dfa ,
chr ,
chr ,
chr ,
chr **  ,
int *   
)
static
static void subset ( struct vars v,
struct subre sub,
chr begin,
chr end 
)
static

Definition at line 660 of file regexec.c.

References assert, MDEBUG, vars::nmatch, OFF, vars::pmatch, regmatch_t::rm_eo, regmatch_t::rm_so, and subre::subno.

Referenced by cdissect().

664 {
665  int n = sub->subno;
666 
667  assert(n > 0);
668  if ((size_t) n >= v->nmatch)
669  return;
670 
671  MDEBUG(("setting %d\n", n));
672  v->pmatch[n].rm_so = OFF(begin);
673  v->pmatch[n].rm_eo = OFF(end);
674 }
int subno
Definition: regguts.h:427
regoff_t rm_so
Definition: regex.h:85
#define MDEBUG(arglist)
Definition: regguts.h:117
regoff_t rm_eo
Definition: regex.h:86
#define assert(TEST)
Definition: imath.c:37
regmatch_t * pmatch
Definition: regexec.c:109
#define OFF(p)
Definition: regexec.c:128
size_t nmatch
Definition: regexec.c:108
static void zapallsubs ( regmatch_t p,
size_t  n 
)
static

Definition at line 619 of file regexec.c.

References i, regmatch_t::rm_eo, and regmatch_t::rm_so.

Referenced by cfindloop(), find(), and pg_regexec().

621 {
622  size_t i;
623 
624  for (i = n - 1; i > 0; i--)
625  {
626  p[i].rm_so = -1;
627  p[i].rm_eo = -1;
628  }
629 }
regoff_t rm_so
Definition: regex.h:85
regoff_t rm_eo
Definition: regex.h:86
int i
static void zaptreesubs ( struct vars v,
struct subre t 
)
static

Definition at line 635 of file regexec.c.

References assert, subre::left, subre::op, vars::pmatch, subre::right, regmatch_t::rm_eo, regmatch_t::rm_so, and subre::subno.

Referenced by ccondissect(), citerdissect(), crevcondissect(), and creviterdissect().

637 {
638  if (t->op == '(')
639  {
640  int n = t->subno;
641 
642  assert(n > 0);
643  if ((size_t) n < v->nmatch)
644  {
645  v->pmatch[n].rm_so = -1;
646  v->pmatch[n].rm_eo = -1;
647  }
648  }
649 
650  if (t->left != NULL)
651  zaptreesubs(v, t->left);
652  if (t->right != NULL)
653  zaptreesubs(v, t->right);
654 }
struct subre * right
Definition: regguts.h:432
int subno
Definition: regguts.h:427
regoff_t rm_so
Definition: regex.h:85
char op
Definition: regguts.h:410
regoff_t rm_eo
Definition: regex.h:86
static void zaptreesubs(struct vars *, struct subre *)
Definition: regexec.c:635
#define assert(TEST)
Definition: imath.c:37
struct subre * left
Definition: regguts.h:431
regmatch_t * pmatch
Definition: regexec.c:109