PostgreSQL Source Code  git master
regexec.c File Reference
#include "regex/regguts.h"
#include "rege_dfa.c"
Include dependency graph for regexec.c:

Go to the source code of this file.

Data Structures

struct  arcp
 
struct  sset
 
struct  dfa
 
struct  smalldfa
 
struct  vars
 

Macros

#define HASH(bv, nw)   (((nw) == 1) ? *(bv) : hash(bv, nw))
 
#define HIT(h, bv, ss, nw)
 
#define STARTER   01 /* the initial state set */
 
#define POSTSTATE   02 /* includes the goal state */
 
#define LOCKED   04 /* locked in cache */
 
#define NOPROGRESS   010 /* zero-progress state set */
 
#define WORK   1 /* number of work bitvectors needed */
 
#define FEWSTATES   20 /* must be less than UBITS */
 
#define FEWCOLORS   15
 
#define DOMALLOC   ((struct smalldfa *)NULL) /* force malloc */
 
#define VISERR(vv)   ((vv)->err != 0) /* have we seen an error yet? */
 
#define ISERR()   VISERR(v)
 
#define VERR(vv, e)   ((vv)->err = ((vv)->err ? (vv)->err : (e)))
 
#define ERR(e)   VERR(v, e) /* record an error */
 
#define NOERR()   {if (ISERR()) return v->err;} /* if error seen, return it */
 
#define OFF(p)   ((p) - v->start)
 
#define LOFF(p)   ((long)OFF(p))
 
#define LOCALMAT   20
 
#define LOCALDFAS   40
 

Functions

static struct dfagetsubdfa (struct vars *, struct subre *)
 
static struct dfagetladfa (struct vars *, int)
 
static int find (struct vars *, struct cnfa *, struct colormap *)
 
static int cfind (struct vars *, struct cnfa *, struct colormap *)
 
static int cfindloop (struct vars *, struct cnfa *, struct colormap *, struct dfa *, struct dfa *, chr **)
 
static void zapallsubs (regmatch_t *, size_t)
 
static void zaptreesubs (struct vars *, struct subre *)
 
static void subset (struct vars *, struct subre *, chr *, chr *)
 
static int cdissect (struct vars *, struct subre *, chr *, chr *)
 
static int ccondissect (struct vars *, struct subre *, chr *, chr *)
 
static int crevcondissect (struct vars *, struct subre *, chr *, chr *)
 
static int cbrdissect (struct vars *, struct subre *, chr *, chr *)
 
static int caltdissect (struct vars *, struct subre *, chr *, chr *)
 
static int citerdissect (struct vars *, struct subre *, chr *, chr *)
 
static int creviterdissect (struct vars *, struct subre *, chr *, chr *)
 
static chrlongest (struct vars *, struct dfa *, chr *, chr *, int *)
 
static chrshortest (struct vars *, struct dfa *, chr *, chr *, chr *, chr **, int *)
 
static int matchuntil (struct vars *, struct dfa *, chr *, struct sset **, chr **)
 
static chrdfa_backref (struct vars *, struct dfa *, chr *, chr *, chr *, bool)
 
static chrlastcold (struct vars *, struct dfa *)
 
static struct dfanewdfa (struct vars *, struct cnfa *, struct colormap *, struct smalldfa *)
 
static void freedfa (struct dfa *)
 
static unsigned hash (unsigned *, int)
 
static struct ssetinitialize (struct vars *, struct dfa *, chr *)
 
static struct ssetmiss (struct vars *, struct dfa *, struct sset *, color, chr *, chr *)
 
static int lacon (struct vars *, struct cnfa *, chr *, color)
 
static struct ssetgetvacant (struct vars *, struct dfa *, chr *, chr *)
 
static struct ssetpickss (struct vars *, struct dfa *, chr *, chr *)
 
int pg_regexec (regex_t *re, const chr *string, size_t len, size_t search_start, rm_detail_t *details, size_t nmatch, regmatch_t pmatch[], int flags)
 

Macro Definition Documentation

◆ DOMALLOC

#define DOMALLOC   ((struct smalldfa *)NULL) /* force malloc */

Definition at line 101 of file regexec.c.

Referenced by getladfa(), and getsubdfa().

◆ ERR

#define ERR (   e)    VERR(v, e) /* record an error */

Definition at line 129 of file regexec.c.

Referenced by cfindloop().

◆ FEWCOLORS

#define FEWCOLORS   15

Definition at line 91 of file regexec.c.

Referenced by newdfa().

◆ FEWSTATES

#define FEWSTATES   20 /* must be less than UBITS */

Definition at line 90 of file regexec.c.

◆ HASH

#define HASH (   bv,
  nw 
)    (((nw) == 1) ? *(bv) : hash(bv, nw))

Definition at line 49 of file regexec.c.

◆ HIT

#define HIT (   h,
  bv,
  ss,
  nw 
)
Value:
((ss)->hash == (h) && ((nw) == 1 || \
memcmp(VS(bv), VS((ss)->states), (nw)*sizeof(unsigned)) == 0))
static unsigned hash(unsigned *, int)
#define VS(x)
Definition: regguts.h:61

Definition at line 50 of file regexec.c.

Referenced by miss().

◆ ISERR

#define ISERR ( )    VISERR(v)

Definition at line 127 of file regexec.c.

Referenced by cfindloop(), citerdissect(), creviterdissect(), and find().

◆ LOCALDFAS

#define LOCALDFAS   40

Referenced by pg_regexec().

◆ LOCALMAT

#define LOCALMAT   20

Referenced by pg_regexec().

◆ LOCKED

#define LOCKED   04 /* locked in cache */

Definition at line 55 of file regexec.c.

Referenced by getvacant(), initialize(), and pickss().

◆ LOFF

#define LOFF (   p)    ((long)OFF(p))

◆ NOERR

#define NOERR ( )    {if (ISERR()) return v->err;} /* if error seen, return it */

Definition at line 130 of file regexec.c.

Referenced by caltdissect(), ccondissect(), cfind(), crevcondissect(), and find().

◆ NOPROGRESS

#define NOPROGRESS   010 /* zero-progress state set */

Definition at line 56 of file regexec.c.

Referenced by getvacant(), initialize(), lastcold(), and miss().

◆ OFF

#define OFF (   p)    ((p) - v->start)

Definition at line 131 of file regexec.c.

Referenced by cfind(), cfindloop(), find(), and subset().

◆ POSTSTATE

#define POSTSTATE   02 /* includes the goal state */

Definition at line 54 of file regexec.c.

Referenced by getvacant(), longest(), matchuntil(), miss(), and shortest().

◆ STARTER

#define STARTER   01 /* the initial state set */

Definition at line 53 of file regexec.c.

Referenced by initialize().

◆ VERR

#define VERR (   vv,
  e 
)    ((vv)->err = ((vv)->err ? (vv)->err : (e)))

Definition at line 128 of file regexec.c.

◆ VISERR

#define VISERR (   vv)    ((vv)->err != 0) /* have we seen an error yet? */

Definition at line 126 of file regexec.c.

◆ WORK

#define WORK   1 /* number of work bitvectors needed */

Definition at line 87 of file regexec.c.

Referenced by newdfa().

Function Documentation

◆ caltdissect()

static int caltdissect ( struct vars v,
struct subre t,
chr begin,
chr end 
)
static

Definition at line 1068 of file regexec.c.

References assert, cdissect(), subre::child, subre::cnfa, getsubdfa(), subre::id, LOFF, longest(), MDEBUG, NOERR, cnfa::nstates, subre::op, REG_NOMATCH, and subre::sibling.

Referenced by cdissect().

1072 {
1073  struct dfa *d;
1074  int er;
1075 
1076  assert(t->op == '|');
1077 
1078  t = t->child;
1079  /* there should be at least 2 alternatives */
1080  assert(t != NULL && t->sibling != NULL);
1081 
1082  while (t != NULL)
1083  {
1084  assert(t->cnfa.nstates > 0);
1085 
1086  MDEBUG(("%d: caltdissect %ld-%ld\n", t->id, LOFF(begin), LOFF(end)));
1087 
1088  d = getsubdfa(v, t);
1089  NOERR();
1090  if (longest(v, d, begin, end, (int *) NULL) == end)
1091  {
1092  MDEBUG(("%d: caltdissect matched\n", t->id));
1093  er = cdissect(v, t, begin, end);
1094  if (er != REG_NOMATCH)
1095  return er;
1096  }
1097  NOERR();
1098 
1099  t = t->sibling;
1100  }
1101 
1102  return REG_NOMATCH;
1103 }
struct subre * child
Definition: regguts.h:496
char op
Definition: regguts.h:473
#define LOFF(p)
Definition: regexec.c:132
static chr * longest(struct vars *, struct dfa *, chr *, chr *, int *)
#define MDEBUG(arglist)
Definition: regguts.h:117
Definition: regexec.c:63
int nstates
Definition: regguts.h:403
int id
Definition: regguts.h:491
struct subre * sibling
Definition: regguts.h:497
static struct dfa * getsubdfa(struct vars *, struct subre *)
Definition: regexec.c:363
#define assert(TEST)
Definition: imath.c:73
static int cdissect(struct vars *, struct subre *, chr *, chr *)
Definition: regexec.c:747
struct cnfa cnfa
Definition: regguts.h:500
#define REG_NOMATCH
Definition: regex.h:140
#define NOERR()
Definition: regexec.c:130

◆ cbrdissect()

static int cbrdissect ( struct vars v,
struct subre t,
chr begin,
chr end 
)
static

Definition at line 986 of file regexec.c.

References assert, subre::backno, DUPINF, vars::g, subre::id, LOFF, subre::max, MDEBUG, subre::min, subre::op, vars::pmatch, REG_NOMATCH, REG_OKAY, regmatch_t::rm_eo, regmatch_t::rm_so, and vars::start.

Referenced by cdissect().

990 {
991  int n = t->backno;
992  size_t numreps;
993  size_t tlen;
994  size_t brlen;
995  chr *brstring;
996  chr *p;
997  int min = t->min;
998  int max = t->max;
999 
1000  assert(t != NULL);
1001  assert(t->op == 'b');
1002  assert(n >= 0);
1003  assert((size_t) n < v->nmatch);
1004 
1005  MDEBUG(("%d: cbrdissect %d{%d-%d} %ld-%ld\n", t->id, n, min, max,
1006  LOFF(begin), LOFF(end)));
1007 
1008  /* get the backreferenced string */
1009  if (v->pmatch[n].rm_so == -1)
1010  return REG_NOMATCH;
1011  brstring = v->start + v->pmatch[n].rm_so;
1012  brlen = v->pmatch[n].rm_eo - v->pmatch[n].rm_so;
1013 
1014  /* special cases for zero-length strings */
1015  if (brlen == 0)
1016  {
1017  /*
1018  * matches only if target is zero length, but any number of
1019  * repetitions can be considered to be present
1020  */
1021  if (begin == end && min <= max)
1022  {
1023  MDEBUG(("%d: backref matched trivially\n", t->id));
1024  return REG_OKAY;
1025  }
1026  return REG_NOMATCH;
1027  }
1028  if (begin == end)
1029  {
1030  /* matches only if zero repetitions are okay */
1031  if (min == 0)
1032  {
1033  MDEBUG(("%d: backref matched trivially\n", t->id));
1034  return REG_OKAY;
1035  }
1036  return REG_NOMATCH;
1037  }
1038 
1039  /*
1040  * check target length to see if it could possibly be an allowed number of
1041  * repetitions of brstring
1042  */
1043  assert(end > begin);
1044  tlen = end - begin;
1045  if (tlen % brlen != 0)
1046  return REG_NOMATCH;
1047  numreps = tlen / brlen;
1048  if (numreps < min || (numreps > max && max != DUPINF))
1049  return REG_NOMATCH;
1050 
1051  /* okay, compare the actual string contents */
1052  p = begin;
1053  while (numreps-- > 0)
1054  {
1055  if ((*v->g->compare) (brstring, p, brlen) != 0)
1056  return REG_NOMATCH;
1057  p += brlen;
1058  }
1059 
1060  MDEBUG(("%d: backref matched\n", t->id));
1061  return REG_OKAY;
1062 }
regoff_t rm_so
Definition: regex.h:87
char op
Definition: regguts.h:473
#define LOFF(p)
Definition: regexec.c:132
short min
Definition: regguts.h:494
#define MDEBUG(arglist)
Definition: regguts.h:117
chr * start
Definition: regexec.c:114
regoff_t rm_eo
Definition: regex.h:88
int id
Definition: regguts.h:491
pg_wchar chr
Definition: regcustom.h:66
#define REG_OKAY
Definition: regex.h:139
#define assert(TEST)
Definition: imath.c:73
struct guts * g
Definition: regexec.c:109
int backno
Definition: regguts.h:493
#define DUPINF
Definition: regguts.h:94
short max
Definition: regguts.h:495
regmatch_t * pmatch
Definition: regexec.c:112
#define REG_NOMATCH
Definition: regex.h:140

◆ ccondissect()

static int ccondissect ( struct vars v,
struct subre t,
chr begin,
chr end 
)
static

Definition at line 821 of file regexec.c.

References assert, cdissect(), subre::child, subre::cnfa, subre::flags, getsubdfa(), subre::id, LOFF, longest(), MDEBUG, NOERR, cnfa::nstates, subre::op, REG_ASSERT, REG_NOMATCH, REG_OKAY, SHORTER, subre::sibling, and zaptreesubs().

Referenced by cdissect().

825 {
826  struct subre *left = t->child;
827  struct subre *right = left->sibling;
828  struct dfa *d;
829  struct dfa *d2;
830  chr *mid;
831  int er;
832 
833  assert(t->op == '.');
834  assert(left != NULL && left->cnfa.nstates > 0);
835  assert(right != NULL && right->cnfa.nstates > 0);
836  assert(right->sibling == NULL);
837  assert(!(left->flags & SHORTER));
838 
839  d = getsubdfa(v, left);
840  NOERR();
841  d2 = getsubdfa(v, right);
842  NOERR();
843  MDEBUG(("%d: ccondissect %ld-%ld\n", t->id, LOFF(begin), LOFF(end)));
844 
845  /* pick a tentative midpoint */
846  mid = longest(v, d, begin, end, (int *) NULL);
847  NOERR();
848  if (mid == NULL)
849  return REG_NOMATCH;
850  MDEBUG(("%d: tentative midpoint %ld\n", t->id, LOFF(mid)));
851 
852  /* iterate until satisfaction or failure */
853  for (;;)
854  {
855  /* try this midpoint on for size */
856  if (longest(v, d2, mid, end, (int *) NULL) == end)
857  {
858  er = cdissect(v, left, begin, mid);
859  if (er == REG_OKAY)
860  {
861  er = cdissect(v, right, mid, end);
862  if (er == REG_OKAY)
863  {
864  /* satisfaction */
865  MDEBUG(("%d: successful\n", t->id));
866  return REG_OKAY;
867  }
868  /* Reset left's matches (right should have done so itself) */
869  zaptreesubs(v, left);
870  }
871  if (er != REG_NOMATCH)
872  return er;
873  }
874  NOERR();
875 
876  /* that midpoint didn't work, find a new one */
877  if (mid == begin)
878  {
879  /* all possibilities exhausted */
880  MDEBUG(("%d: no midpoint\n", t->id));
881  return REG_NOMATCH;
882  }
883  mid = longest(v, d, begin, mid - 1, (int *) NULL);
884  NOERR();
885  if (mid == NULL)
886  {
887  /* failed to find a new one */
888  MDEBUG(("%d: failed midpoint\n", t->id));
889  return REG_NOMATCH;
890  }
891  MDEBUG(("%d: new midpoint %ld\n", t->id, LOFF(mid)));
892  }
893 
894  /* can't get here */
895  return REG_ASSERT;
896 }
struct subre * child
Definition: regguts.h:496
char op
Definition: regguts.h:473
#define LOFF(p)
Definition: regexec.c:132
static chr * longest(struct vars *, struct dfa *, chr *, chr *, int *)
#define MDEBUG(arglist)
Definition: regguts.h:117
Definition: regexec.c:63
int nstates
Definition: regguts.h:403
int id
Definition: regguts.h:491
pg_wchar chr
Definition: regcustom.h:66
#define REG_OKAY
Definition: regex.h:139
static void zaptreesubs(struct vars *, struct subre *)
Definition: regexec.c:670
struct subre * sibling
Definition: regguts.h:497
static struct dfa * getsubdfa(struct vars *, struct subre *)
Definition: regexec.c:363
char flags
Definition: regguts.h:474
#define assert(TEST)
Definition: imath.c:73
#define REG_ASSERT
Definition: regex.h:153
static int cdissect(struct vars *, struct subre *, chr *, chr *)
Definition: regexec.c:747
Definition: regguts.h:471
struct cnfa cnfa
Definition: regguts.h:500
#define SHORTER
Definition: regguts.h:476
#define REG_NOMATCH
Definition: regex.h:140
#define NOERR()
Definition: regexec.c:130

◆ cdissect()

static int cdissect ( struct vars v,
struct subre t,
chr begin,
chr end 
)
static

Definition at line 747 of file regexec.c.

References assert, BACKR, caltdissect(), CANCEL_REQUESTED, subre::capno, cbrdissect(), ccondissect(), subre::child, citerdissect(), crevcondissect(), creviterdissect(), subre::flags, subre::id, LOFF, MDEBUG, subre::op, vars::re, REG_ASSERT, REG_CANCEL, REG_ETOOBIG, REG_NOMATCH, REG_OKAY, SHORTER, STACK_TOO_DEEP, and subset().

Referenced by caltdissect(), ccondissect(), cfindloop(), citerdissect(), crevcondissect(), creviterdissect(), and find().

751 {
752  int er;
753 
754  assert(t != NULL);
755  MDEBUG(("%d: cdissect %c %ld-%ld\n", t->id, t->op, LOFF(begin), LOFF(end)));
756 
757  /* handy place to check for operation cancel */
758  if (CANCEL_REQUESTED(v->re))
759  return REG_CANCEL;
760  /* ... and stack overrun */
761  if (STACK_TOO_DEEP(v->re))
762  return REG_ETOOBIG;
763 
764  switch (t->op)
765  {
766  case '=': /* terminal node */
767  assert(t->child == NULL);
768  er = REG_OKAY; /* no action, parent did the work */
769  break;
770  case 'b': /* back reference */
771  assert(t->child == NULL);
772  er = cbrdissect(v, t, begin, end);
773  break;
774  case '.': /* concatenation */
775  assert(t->child != NULL);
776  if (t->child->flags & SHORTER) /* reverse scan */
777  er = crevcondissect(v, t, begin, end);
778  else
779  er = ccondissect(v, t, begin, end);
780  break;
781  case '|': /* alternation */
782  assert(t->child != NULL);
783  er = caltdissect(v, t, begin, end);
784  break;
785  case '*': /* iteration */
786  assert(t->child != NULL);
787  if (t->child->flags & SHORTER) /* reverse scan */
788  er = creviterdissect(v, t, begin, end);
789  else
790  er = citerdissect(v, t, begin, end);
791  break;
792  case '(': /* no-op capture node */
793  assert(t->child != NULL);
794  er = cdissect(v, t->child, begin, end);
795  break;
796  default:
797  er = REG_ASSERT;
798  break;
799  }
800 
801  /*
802  * We should never have a match failure unless backrefs lurk below;
803  * otherwise, either caller failed to check the DFA, or there's some
804  * inconsistency between the DFA and the node's innards.
805  */
806  assert(er != REG_NOMATCH || (t->flags & BACKR));
807 
808  /*
809  * If this node is marked as capturing, save successful match's location.
810  */
811  if (t->capno > 0 && er == REG_OKAY)
812  subset(v, t, begin, end);
813 
814  return er;
815 }
struct subre * child
Definition: regguts.h:496
int capno
Definition: regguts.h:492
char op
Definition: regguts.h:473
#define LOFF(p)
Definition: regexec.c:132
#define MDEBUG(arglist)
Definition: regguts.h:117
int id
Definition: regguts.h:491
#define REG_OKAY
Definition: regex.h:139
regex_t * re
Definition: regcomp.c:239
char flags
Definition: regguts.h:474
#define assert(TEST)
Definition: imath.c:73
static int cbrdissect(struct vars *, struct subre *, chr *, chr *)
Definition: regexec.c:986
static int creviterdissect(struct vars *, struct subre *, chr *, chr *)
Definition: regexec.c:1313
#define BACKR
Definition: regguts.h:479
static int caltdissect(struct vars *, struct subre *, chr *, chr *)
Definition: regexec.c:1068
#define REG_ASSERT
Definition: regex.h:153
static int ccondissect(struct vars *, struct subre *, chr *, chr *)
Definition: regexec.c:821
#define REG_ETOOBIG
Definition: regex.h:157
static int citerdissect(struct vars *, struct subre *, chr *, chr *)
Definition: regexec.c:1109
static int cdissect(struct vars *, struct subre *, chr *, chr *)
Definition: regexec.c:747
#define SHORTER
Definition: regguts.h:476
#define CANCEL_REQUESTED(re)
Definition: regguts.h:517
#define REG_NOMATCH
Definition: regex.h:140
static int crevcondissect(struct vars *, struct subre *, chr *, chr *)
Definition: regexec.c:902
static void subset(struct vars *, struct subre *, chr *, chr *)
Definition: regexec.c:693
#define STACK_TOO_DEEP(re)
Definition: regguts.h:520
#define REG_CANCEL
Definition: regex.h:159

◆ cfind()

static int cfind ( struct vars v,
struct cnfa cnfa,
struct colormap cm 
)
static

Definition at line 500 of file regexec.c.

References assert, cfindloop(), guts::cflags, vars::details, vars::dfa1, vars::dfa2, vars::err, freedfa(), vars::g, newdfa(), NOERR, OFF, REG_EXPECT, regmatch_t::rm_eo, rm_detail_t::rm_extend, regmatch_t::rm_so, guts::search, and vars::stop.

Referenced by pg_regexec().

503 {
504  struct dfa *s;
505  struct dfa *d;
506  chr *cold;
507  int ret;
508 
509  s = newdfa(v, &v->g->search, cm, &v->dfa1);
510  if (s == NULL)
511  return v->err;
512  d = newdfa(v, cnfa, cm, &v->dfa2);
513  if (d == NULL)
514  {
515  freedfa(s);
516  return v->err;
517  }
518 
519  ret = cfindloop(v, cnfa, cm, d, s, &cold);
520 
521  freedfa(d);
522  freedfa(s);
523  NOERR();
524  if (v->g->cflags & REG_EXPECT)
525  {
526  assert(v->details != NULL);
527  if (cold != NULL)
528  v->details->rm_extend.rm_so = OFF(cold);
529  else
530  v->details->rm_extend.rm_so = OFF(v->stop);
531  v->details->rm_extend.rm_eo = OFF(v->stop); /* unknown */
532  }
533  return ret;
534 }
#define REG_EXPECT
Definition: regex.h:115
regoff_t rm_so
Definition: regex.h:87
static struct dfa * newdfa(struct vars *, struct cnfa *, struct colormap *, struct smalldfa *)
rm_detail_t * details
Definition: regexec.c:113
regoff_t rm_eo
Definition: regex.h:88
Definition: regexec.c:63
struct cnfa search
Definition: regguts.h:535
pg_wchar chr
Definition: regcustom.h:66
static int cfindloop(struct vars *, struct cnfa *, struct colormap *, struct dfa *, struct dfa *, chr **)
Definition: regexec.c:540
struct smalldfa dfa2
Definition: regexec.c:123
#define assert(TEST)
Definition: imath.c:73
struct guts * g
Definition: regexec.c:109
int err
Definition: regcomp.c:242
#define OFF(p)
Definition: regexec.c:131
static void freedfa(struct dfa *)
regmatch_t rm_extend
Definition: regex.h:94
const chr * stop
Definition: regcomp.c:241
int cflags
Definition: regguts.h:531
#define NOERR()
Definition: regexec.c:130
struct smalldfa dfa1
Definition: regexec.c:122

◆ cfindloop()

static int cfindloop ( struct vars v,
struct cnfa cnfa,
struct colormap cm,
struct dfa d,
struct dfa s,
chr **  coldp 
)
static

Definition at line 540 of file regexec.c.

References assert, cdissect(), close, ERR, vars::err, subre::flags, vars::g, ISERR, LOFF, longest(), MDEBUG, vars::nmatch, OFF, vars::pmatch, REG_NOMATCH, REG_OKAY, regmatch_t::rm_eo, regmatch_t::rm_so, vars::search_start, SHORTER, shortest(), vars::stop, and guts::tree.

Referenced by cfind().

546 {
547  chr *begin;
548  chr *end;
549  chr *cold;
550  chr *open; /* open and close of range of possible starts */
551  chr *close;
552  chr *estart;
553  chr *estop;
554  int er;
555  int shorter = v->g->tree->flags & SHORTER;
556  int hitend;
557 
558  assert(d != NULL && s != NULL);
559  cold = NULL;
560  close = v->search_start;
561  do
562  {
563  /* Search with the search RE for match range at/beyond "close" */
564  MDEBUG(("\ncsearch at %ld\n", LOFF(close)));
565  close = shortest(v, s, close, close, v->stop, &cold, (int *) NULL);
566  if (ISERR())
567  {
568  *coldp = cold;
569  return v->err;
570  }
571  if (close == NULL)
572  break; /* no more possible match anywhere */
573  assert(cold != NULL);
574  open = cold;
575  cold = NULL;
576  /* Search for matches starting between "open" and "close" inclusive */
577  MDEBUG(("cbetween %ld and %ld\n", LOFF(open), LOFF(close)));
578  for (begin = open; begin <= close; begin++)
579  {
580  MDEBUG(("\ncfind trying at %ld\n", LOFF(begin)));
581  estart = begin;
582  estop = v->stop;
583  for (;;)
584  {
585  /* Here we use the top node's detailed RE */
586  if (shorter)
587  end = shortest(v, d, begin, estart,
588  estop, (chr **) NULL, &hitend);
589  else
590  end = longest(v, d, begin, estop,
591  &hitend);
592  if (ISERR())
593  {
594  *coldp = cold;
595  return v->err;
596  }
597  if (hitend && cold == NULL)
598  cold = begin;
599  if (end == NULL)
600  break; /* no match with this begin point, try next */
601  MDEBUG(("tentative end %ld\n", LOFF(end)));
602  /* Dissect the potential match to see if it really matches */
603  er = cdissect(v, v->g->tree, begin, end);
604  if (er == REG_OKAY)
605  {
606  if (v->nmatch > 0)
607  {
608  v->pmatch[0].rm_so = OFF(begin);
609  v->pmatch[0].rm_eo = OFF(end);
610  }
611  *coldp = cold;
612  return REG_OKAY;
613  }
614  if (er != REG_NOMATCH)
615  {
616  ERR(er);
617  *coldp = cold;
618  return er;
619  }
620  /* Try next longer/shorter match with same begin point */
621  if (shorter)
622  {
623  if (end == estop)
624  break; /* no more, so try next begin point */
625  estart = end + 1;
626  }
627  else
628  {
629  if (end == begin)
630  break; /* no more, so try next begin point */
631  estop = end - 1;
632  }
633  } /* end loop over endpoint positions */
634  } /* end loop over beginning positions */
635 
636  /*
637  * If we get here, there is no possible match starting at or before
638  * "close", so consider matches beyond that. We'll do a fresh search
639  * with the search RE to find a new promising match range.
640  */
641  close++;
642  } while (close < v->stop);
643 
644  *coldp = cold;
645  return REG_NOMATCH;
646 }
static chr * shortest(struct vars *, struct dfa *, chr *, chr *, chr *, chr **, int *)
struct subre * tree
Definition: regguts.h:534
regoff_t rm_so
Definition: regex.h:87
#define LOFF(p)
Definition: regexec.c:132
static chr * longest(struct vars *, struct dfa *, chr *, chr *, int *)
#define MDEBUG(arglist)
Definition: regguts.h:117
regoff_t rm_eo
Definition: regex.h:88
chr * search_start
Definition: regexec.c:115
pg_wchar chr
Definition: regcustom.h:66
#define REG_OKAY
Definition: regex.h:139
char flags
Definition: regguts.h:474
#define assert(TEST)
Definition: imath.c:73
struct guts * g
Definition: regexec.c:109
int err
Definition: regcomp.c:242
regmatch_t * pmatch
Definition: regexec.c:112
#define ERR(e)
Definition: regexec.c:129
#define OFF(p)
Definition: regexec.c:131
static int cdissect(struct vars *, struct subre *, chr *, chr *)
Definition: regexec.c:747
size_t nmatch
Definition: regexec.c:111
const chr * stop
Definition: regcomp.c:241
#define SHORTER
Definition: regguts.h:476
#define REG_NOMATCH
Definition: regex.h:140
#define close(a)
Definition: win32.h:12
#define ISERR()
Definition: regexec.c:127

◆ citerdissect()

static int citerdissect ( struct vars v,
struct subre t,
chr begin,
chr end 
)
static

Definition at line 1109 of file regexec.c.

References assert, cdissect(), subre::child, subre::cnfa, DUPINF, vars::err, subre::flags, FREE, getsubdfa(), i, subre::id, ISERR, LOFF, longest(), MALLOC, subre::max, MDEBUG, subre::min, cnfa::nstates, subre::op, REG_ESPACE, REG_NOMATCH, REG_OKAY, SHORTER, and zaptreesubs().

Referenced by cdissect().

1113 {
1114  struct dfa *d;
1115  chr **endpts;
1116  chr *limit;
1117  int min_matches;
1118  size_t max_matches;
1119  int nverified;
1120  int k;
1121  int i;
1122  int er;
1123 
1124  assert(t->op == '*');
1125  assert(t->child != NULL && t->child->cnfa.nstates > 0);
1126  assert(!(t->child->flags & SHORTER));
1127  assert(begin <= end);
1128 
1129  MDEBUG(("%d: citerdissect %ld-%ld\n", t->id, LOFF(begin), LOFF(end)));
1130 
1131  /*
1132  * For the moment, assume the minimum number of matches is 1. If zero
1133  * matches are allowed, and the target string is empty, we are allowed to
1134  * match regardless of the contents of the iter node --- but we would
1135  * prefer to match once, so that capturing parens get set. (An example of
1136  * the concern here is a pattern like "()*\1", which historically this
1137  * code has allowed to succeed.) Therefore, we deal with the zero-matches
1138  * case at the bottom, after failing to find any other way to match.
1139  */
1140  min_matches = t->min;
1141  if (min_matches <= 0)
1142  min_matches = 1;
1143 
1144  /*
1145  * We need workspace to track the endpoints of each sub-match. Normally
1146  * we consider only nonzero-length sub-matches, so there can be at most
1147  * end-begin of them. However, if min is larger than that, we will also
1148  * consider zero-length sub-matches in order to find enough matches.
1149  *
1150  * For convenience, endpts[0] contains the "begin" pointer and we store
1151  * sub-match endpoints in endpts[1..max_matches].
1152  */
1153  max_matches = end - begin;
1154  if (max_matches > t->max && t->max != DUPINF)
1155  max_matches = t->max;
1156  if (max_matches < min_matches)
1157  max_matches = min_matches;
1158  endpts = (chr **) MALLOC((max_matches + 1) * sizeof(chr *));
1159  if (endpts == NULL)
1160  return REG_ESPACE;
1161  endpts[0] = begin;
1162 
1163  d = getsubdfa(v, t->child);
1164  if (ISERR())
1165  {
1166  FREE(endpts);
1167  return v->err;
1168  }
1169 
1170  /*
1171  * Our strategy is to first find a set of sub-match endpoints that are
1172  * valid according to the child node's DFA, and then recursively dissect
1173  * each sub-match to confirm validity. If any validity check fails,
1174  * backtrack that sub-match and try again. And, when we next try for a
1175  * validity check, we need not recheck any successfully verified
1176  * sub-matches that we didn't move the endpoints of. nverified remembers
1177  * how many sub-matches are currently known okay.
1178  */
1179 
1180  /* initialize to consider first sub-match */
1181  nverified = 0;
1182  k = 1;
1183  limit = end;
1184 
1185  /* iterate until satisfaction or failure */
1186  while (k > 0)
1187  {
1188  /* try to find an endpoint for the k'th sub-match */
1189  endpts[k] = longest(v, d, endpts[k - 1], limit, (int *) NULL);
1190  if (ISERR())
1191  {
1192  FREE(endpts);
1193  return v->err;
1194  }
1195  if (endpts[k] == NULL)
1196  {
1197  /* no match possible, so see if we can shorten previous one */
1198  k--;
1199  goto backtrack;
1200  }
1201  MDEBUG(("%d: working endpoint %d: %ld\n",
1202  t->id, k, LOFF(endpts[k])));
1203 
1204  /* k'th sub-match can no longer be considered verified */
1205  if (nverified >= k)
1206  nverified = k - 1;
1207 
1208  if (endpts[k] != end)
1209  {
1210  /* haven't reached end yet, try another iteration if allowed */
1211  if (k >= max_matches)
1212  {
1213  /* must try to shorten some previous match */
1214  k--;
1215  goto backtrack;
1216  }
1217 
1218  /* reject zero-length match unless necessary to achieve min */
1219  if (endpts[k] == endpts[k - 1] &&
1220  (k >= min_matches || min_matches - k < end - endpts[k]))
1221  goto backtrack;
1222 
1223  k++;
1224  limit = end;
1225  continue;
1226  }
1227 
1228  /*
1229  * We've identified a way to divide the string into k sub-matches that
1230  * works so far as the child DFA can tell. If k is an allowed number
1231  * of matches, start the slow part: recurse to verify each sub-match.
1232  * We always have k <= max_matches, needn't check that.
1233  */
1234  if (k < min_matches)
1235  goto backtrack;
1236 
1237  MDEBUG(("%d: verifying %d..%d\n", t->id, nverified + 1, k));
1238 
1239  for (i = nverified + 1; i <= k; i++)
1240  {
1241  /* zap any match data from a non-last iteration */
1242  zaptreesubs(v, t->child);
1243  er = cdissect(v, t->child, endpts[i - 1], endpts[i]);
1244  if (er == REG_OKAY)
1245  {
1246  nverified = i;
1247  continue;
1248  }
1249  if (er == REG_NOMATCH)
1250  break;
1251  /* oops, something failed */
1252  FREE(endpts);
1253  return er;
1254  }
1255 
1256  if (i > k)
1257  {
1258  /* satisfaction */
1259  MDEBUG(("%d: successful\n", t->id));
1260  FREE(endpts);
1261  return REG_OKAY;
1262  }
1263 
1264  /* i'th match failed to verify, so backtrack it */
1265  k = i;
1266 
1267 backtrack:
1268 
1269  /*
1270  * Must consider shorter versions of the k'th sub-match. However,
1271  * we'll only ask for a zero-length match if necessary.
1272  */
1273  while (k > 0)
1274  {
1275  chr *prev_end = endpts[k - 1];
1276 
1277  if (endpts[k] > prev_end)
1278  {
1279  limit = endpts[k] - 1;
1280  if (limit > prev_end ||
1281  (k < min_matches && min_matches - k >= end - prev_end))
1282  {
1283  /* break out of backtrack loop, continue the outer one */
1284  break;
1285  }
1286  }
1287  /* can't shorten k'th sub-match any more, consider previous one */
1288  k--;
1289  }
1290  }
1291 
1292  /* all possibilities exhausted */
1293  FREE(endpts);
1294 
1295  /*
1296  * Now consider the possibility that we can match to a zero-length string
1297  * by using zero repetitions.
1298  */
1299  if (t->min == 0 && begin == end)
1300  {
1301  MDEBUG(("%d: allowing zero matches\n", t->id));
1302  return REG_OKAY;
1303  }
1304 
1305  MDEBUG(("%d: failed\n", t->id));
1306  return REG_NOMATCH;
1307 }
struct subre * child
Definition: regguts.h:496
#define REG_ESPACE
Definition: regex.h:151
char op
Definition: regguts.h:473
#define LOFF(p)
Definition: regexec.c:132
short min
Definition: regguts.h:494
#define FREE(ptr)
Definition: cryptohash.c:37
static chr * longest(struct vars *, struct dfa *, chr *, chr *, int *)
#define MDEBUG(arglist)
Definition: regguts.h:117
#define MALLOC(n)
Definition: regcustom.h:60
Definition: regexec.c:63
int nstates
Definition: regguts.h:403
int id
Definition: regguts.h:491
pg_wchar chr
Definition: regcustom.h:66
#define REG_OKAY
Definition: regex.h:139
static void zaptreesubs(struct vars *, struct subre *)
Definition: regexec.c:670
static struct dfa * getsubdfa(struct vars *, struct subre *)
Definition: regexec.c:363
char flags
Definition: regguts.h:474
#define assert(TEST)
Definition: imath.c:73
int err
Definition: regcomp.c:242
#define DUPINF
Definition: regguts.h:94
short max
Definition: regguts.h:495
static int cdissect(struct vars *, struct subre *, chr *, chr *)
Definition: regexec.c:747
struct cnfa cnfa
Definition: regguts.h:500
#define SHORTER
Definition: regguts.h:476
int i
#define REG_NOMATCH
Definition: regex.h:140
#define ISERR()
Definition: regexec.c:127

◆ crevcondissect()

static int crevcondissect ( struct vars v,
struct subre t,
chr begin,
chr end 
)
static

Definition at line 902 of file regexec.c.

References assert, cdissect(), subre::child, subre::cnfa, subre::flags, getsubdfa(), subre::id, LOFF, longest(), MDEBUG, NOERR, cnfa::nstates, subre::op, REG_ASSERT, REG_NOMATCH, REG_OKAY, SHORTER, shortest(), subre::sibling, and zaptreesubs().

Referenced by cdissect().

906 {
907  struct subre *left = t->child;
908  struct subre *right = left->sibling;
909  struct dfa *d;
910  struct dfa *d2;
911  chr *mid;
912  int er;
913 
914  assert(t->op == '.');
915  assert(left != NULL && left->cnfa.nstates > 0);
916  assert(right != NULL && right->cnfa.nstates > 0);
917  assert(right->sibling == NULL);
918  assert(left->flags & SHORTER);
919 
920  d = getsubdfa(v, left);
921  NOERR();
922  d2 = getsubdfa(v, right);
923  NOERR();
924  MDEBUG(("%d: crevcondissect %ld-%ld\n", t->id, LOFF(begin), LOFF(end)));
925 
926  /* pick a tentative midpoint */
927  mid = shortest(v, d, begin, begin, end, (chr **) NULL, (int *) NULL);
928  NOERR();
929  if (mid == NULL)
930  return REG_NOMATCH;
931  MDEBUG(("%d: tentative midpoint %ld\n", t->id, LOFF(mid)));
932 
933  /* iterate until satisfaction or failure */
934  for (;;)
935  {
936  /* try this midpoint on for size */
937  if (longest(v, d2, mid, end, (int *) NULL) == end)
938  {
939  er = cdissect(v, left, begin, mid);
940  if (er == REG_OKAY)
941  {
942  er = cdissect(v, right, mid, end);
943  if (er == REG_OKAY)
944  {
945  /* satisfaction */
946  MDEBUG(("%d: successful\n", t->id));
947  return REG_OKAY;
948  }
949  /* Reset left's matches (right should have done so itself) */
950  zaptreesubs(v, left);
951  }
952  if (er != REG_NOMATCH)
953  return er;
954  }
955  NOERR();
956 
957  /* that midpoint didn't work, find a new one */
958  if (mid == end)
959  {
960  /* all possibilities exhausted */
961  MDEBUG(("%d: no midpoint\n", t->id));
962  return REG_NOMATCH;
963  }
964  mid = shortest(v, d, begin, mid + 1, end, (chr **) NULL, (int *) NULL);
965  NOERR();
966  if (mid == NULL)
967  {
968  /* failed to find a new one */
969  MDEBUG(("%d: failed midpoint\n", t->id));
970  return REG_NOMATCH;
971  }
972  MDEBUG(("%d: new midpoint %ld\n", t->id, LOFF(mid)));
973  }
974 
975  /* can't get here */
976  return REG_ASSERT;
977 }
static chr * shortest(struct vars *, struct dfa *, chr *, chr *, chr *, chr **, int *)
struct subre * child
Definition: regguts.h:496
char op
Definition: regguts.h:473
#define LOFF(p)
Definition: regexec.c:132
static chr * longest(struct vars *, struct dfa *, chr *, chr *, int *)
#define MDEBUG(arglist)
Definition: regguts.h:117
Definition: regexec.c:63
int nstates
Definition: regguts.h:403
int id
Definition: regguts.h:491
pg_wchar chr
Definition: regcustom.h:66
#define REG_OKAY
Definition: regex.h:139
static void zaptreesubs(struct vars *, struct subre *)
Definition: regexec.c:670
struct subre * sibling
Definition: regguts.h:497
static struct dfa * getsubdfa(struct vars *, struct subre *)
Definition: regexec.c:363
char flags
Definition: regguts.h:474
#define assert(TEST)
Definition: imath.c:73
#define REG_ASSERT
Definition: regex.h:153
static int cdissect(struct vars *, struct subre *, chr *, chr *)
Definition: regexec.c:747
Definition: regguts.h:471
struct cnfa cnfa
Definition: regguts.h:500
#define SHORTER
Definition: regguts.h:476
#define REG_NOMATCH
Definition: regex.h:140
#define NOERR()
Definition: regexec.c:130

◆ creviterdissect()

static int creviterdissect ( struct vars v,
struct subre t,
chr begin,
chr end 
)
static

Definition at line 1313 of file regexec.c.

References assert, cdissect(), subre::child, subre::cnfa, DUPINF, vars::err, subre::flags, FREE, getsubdfa(), i, subre::id, ISERR, LOFF, MALLOC, subre::max, MDEBUG, subre::min, cnfa::nstates, subre::op, REG_ESPACE, REG_NOMATCH, REG_OKAY, SHORTER, shortest(), and zaptreesubs().

Referenced by cdissect().

1317 {
1318  struct dfa *d;
1319  chr **endpts;
1320  chr *limit;
1321  int min_matches;
1322  size_t max_matches;
1323  int nverified;
1324  int k;
1325  int i;
1326  int er;
1327 
1328  assert(t->op == '*');
1329  assert(t->child != NULL && t->child->cnfa.nstates > 0);
1330  assert(t->child->flags & SHORTER);
1331  assert(begin <= end);
1332 
1333  MDEBUG(("%d: creviterdissect %ld-%ld\n", t->id, LOFF(begin), LOFF(end)));
1334 
1335  /*
1336  * If zero matches are allowed, and target string is empty, just declare
1337  * victory. OTOH, if target string isn't empty, zero matches can't work
1338  * so we pretend the min is 1.
1339  */
1340  min_matches = t->min;
1341  if (min_matches <= 0)
1342  {
1343  if (begin == end)
1344  {
1345  MDEBUG(("%d: allowing zero matches\n", t->id));
1346  return REG_OKAY;
1347  }
1348  min_matches = 1;
1349  }
1350 
1351  /*
1352  * We need workspace to track the endpoints of each sub-match. Normally
1353  * we consider only nonzero-length sub-matches, so there can be at most
1354  * end-begin of them. However, if min is larger than that, we will also
1355  * consider zero-length sub-matches in order to find enough matches.
1356  *
1357  * For convenience, endpts[0] contains the "begin" pointer and we store
1358  * sub-match endpoints in endpts[1..max_matches].
1359  */
1360  max_matches = end - begin;
1361  if (max_matches > t->max && t->max != DUPINF)
1362  max_matches = t->max;
1363  if (max_matches < min_matches)
1364  max_matches = min_matches;
1365  endpts = (chr **) MALLOC((max_matches + 1) * sizeof(chr *));
1366  if (endpts == NULL)
1367  return REG_ESPACE;
1368  endpts[0] = begin;
1369 
1370  d = getsubdfa(v, t->child);
1371  if (ISERR())
1372  {
1373  FREE(endpts);
1374  return v->err;
1375  }
1376 
1377  /*
1378  * Our strategy is to first find a set of sub-match endpoints that are
1379  * valid according to the child node's DFA, and then recursively dissect
1380  * each sub-match to confirm validity. If any validity check fails,
1381  * backtrack that sub-match and try again. And, when we next try for a
1382  * validity check, we need not recheck any successfully verified
1383  * sub-matches that we didn't move the endpoints of. nverified remembers
1384  * how many sub-matches are currently known okay.
1385  */
1386 
1387  /* initialize to consider first sub-match */
1388  nverified = 0;
1389  k = 1;
1390  limit = begin;
1391 
1392  /* iterate until satisfaction or failure */
1393  while (k > 0)
1394  {
1395  /* disallow zero-length match unless necessary to achieve min */
1396  if (limit == endpts[k - 1] &&
1397  limit != end &&
1398  (k >= min_matches || min_matches - k < end - limit))
1399  limit++;
1400 
1401  /* if this is the last allowed sub-match, it must reach to the end */
1402  if (k >= max_matches)
1403  limit = end;
1404 
1405  /* try to find an endpoint for the k'th sub-match */
1406  endpts[k] = shortest(v, d, endpts[k - 1], limit, end,
1407  (chr **) NULL, (int *) NULL);
1408  if (ISERR())
1409  {
1410  FREE(endpts);
1411  return v->err;
1412  }
1413  if (endpts[k] == NULL)
1414  {
1415  /* no match possible, so see if we can lengthen previous one */
1416  k--;
1417  goto backtrack;
1418  }
1419  MDEBUG(("%d: working endpoint %d: %ld\n",
1420  t->id, k, LOFF(endpts[k])));
1421 
1422  /* k'th sub-match can no longer be considered verified */
1423  if (nverified >= k)
1424  nverified = k - 1;
1425 
1426  if (endpts[k] != end)
1427  {
1428  /* haven't reached end yet, try another iteration if allowed */
1429  if (k >= max_matches)
1430  {
1431  /* must try to lengthen some previous match */
1432  k--;
1433  goto backtrack;
1434  }
1435 
1436  k++;
1437  limit = endpts[k - 1];
1438  continue;
1439  }
1440 
1441  /*
1442  * We've identified a way to divide the string into k sub-matches that
1443  * works so far as the child DFA can tell. If k is an allowed number
1444  * of matches, start the slow part: recurse to verify each sub-match.
1445  * We always have k <= max_matches, needn't check that.
1446  */
1447  if (k < min_matches)
1448  goto backtrack;
1449 
1450  MDEBUG(("%d: verifying %d..%d\n", t->id, nverified + 1, k));
1451 
1452  for (i = nverified + 1; i <= k; i++)
1453  {
1454  /* zap any match data from a non-last iteration */
1455  zaptreesubs(v, t->child);
1456  er = cdissect(v, t->child, endpts[i - 1], endpts[i]);
1457  if (er == REG_OKAY)
1458  {
1459  nverified = i;
1460  continue;
1461  }
1462  if (er == REG_NOMATCH)
1463  break;
1464  /* oops, something failed */
1465  FREE(endpts);
1466  return er;
1467  }
1468 
1469  if (i > k)
1470  {
1471  /* satisfaction */
1472  MDEBUG(("%d: successful\n", t->id));
1473  FREE(endpts);
1474  return REG_OKAY;
1475  }
1476 
1477  /* i'th match failed to verify, so backtrack it */
1478  k = i;
1479 
1480 backtrack:
1481 
1482  /*
1483  * Must consider longer versions of the k'th sub-match.
1484  */
1485  while (k > 0)
1486  {
1487  if (endpts[k] < end)
1488  {
1489  limit = endpts[k] + 1;
1490  /* break out of backtrack loop, continue the outer one */
1491  break;
1492  }
1493  /* can't lengthen k'th sub-match any more, consider previous one */
1494  k--;
1495  }
1496  }
1497 
1498  /* all possibilities exhausted */
1499  MDEBUG(("%d: failed\n", t->id));
1500  FREE(endpts);
1501  return REG_NOMATCH;
1502 }
static chr * shortest(struct vars *, struct dfa *, chr *, chr *, chr *, chr **, int *)
struct subre * child
Definition: regguts.h:496
#define REG_ESPACE
Definition: regex.h:151
char op
Definition: regguts.h:473
#define LOFF(p)
Definition: regexec.c:132
short min
Definition: regguts.h:494
#define FREE(ptr)
Definition: cryptohash.c:37
#define MDEBUG(arglist)
Definition: regguts.h:117
#define MALLOC(n)
Definition: regcustom.h:60
Definition: regexec.c:63
int nstates
Definition: regguts.h:403
int id
Definition: regguts.h:491
pg_wchar chr
Definition: regcustom.h:66
#define REG_OKAY
Definition: regex.h:139
static void zaptreesubs(struct vars *, struct subre *)
Definition: regexec.c:670
static struct dfa * getsubdfa(struct vars *, struct subre *)
Definition: regexec.c:363
char flags
Definition: regguts.h:474
#define assert(TEST)
Definition: imath.c:73
int err
Definition: regcomp.c:242
#define DUPINF
Definition: regguts.h:94
short max
Definition: regguts.h:495
static int cdissect(struct vars *, struct subre *, chr *, chr *)
Definition: regexec.c:747
struct cnfa cnfa
Definition: regguts.h:500
#define SHORTER
Definition: regguts.h:476
int i
#define REG_NOMATCH
Definition: regex.h:140
#define ISERR()
Definition: regexec.c:127

◆ dfa_backref()

static chr* dfa_backref ( struct vars ,
struct dfa ,
chr ,
chr ,
chr ,
bool   
)
static

◆ find()

static int find ( struct vars v,
struct cnfa cnfa,
struct colormap cm 
)
static

Definition at line 410 of file regexec.c.

References assert, cdissect(), guts::cflags, close, vars::details, vars::dfa1, vars::err, subre::flags, freedfa(), vars::g, ISERR, LOFF, longest(), MDEBUG, newdfa(), vars::nmatch, NOERR, OFF, vars::pmatch, REG_EXPECT, REG_NOMATCH, REG_OKAY, regmatch_t::rm_eo, rm_detail_t::rm_extend, regmatch_t::rm_so, guts::search, vars::search_start, SHORTER, shortest(), vars::start, vars::stop, and guts::tree.

Referenced by NIAddAffix(), NIImportAffixes(), NIImportOOAffixes(), parse_affentry(), pg_regexec(), and testdelete().

413 {
414  struct dfa *s;
415  struct dfa *d;
416  chr *begin;
417  chr *end = NULL;
418  chr *cold;
419  chr *open; /* open and close of range of possible starts */
420  chr *close;
421  int hitend;
422  int shorter = (v->g->tree->flags & SHORTER) ? 1 : 0;
423 
424  /* first, a shot with the search RE */
425  s = newdfa(v, &v->g->search, cm, &v->dfa1);
426  if (s == NULL)
427  return v->err;
428  MDEBUG(("\nsearch at %ld\n", LOFF(v->start)));
429  cold = NULL;
430  close = shortest(v, s, v->search_start, v->search_start, v->stop,
431  &cold, (int *) NULL);
432  freedfa(s);
433  NOERR();
434  if (v->g->cflags & REG_EXPECT)
435  {
436  assert(v->details != NULL);
437  if (cold != NULL)
438  v->details->rm_extend.rm_so = OFF(cold);
439  else
440  v->details->rm_extend.rm_so = OFF(v->stop);
441  v->details->rm_extend.rm_eo = OFF(v->stop); /* unknown */
442  }
443  if (close == NULL) /* not found */
444  return REG_NOMATCH;
445  if (v->nmatch == 0) /* found, don't need exact location */
446  return REG_OKAY;
447 
448  /* find starting point and match */
449  assert(cold != NULL);
450  open = cold;
451  cold = NULL;
452  MDEBUG(("between %ld and %ld\n", LOFF(open), LOFF(close)));
453  d = newdfa(v, cnfa, cm, &v->dfa1);
454  if (d == NULL)
455  return v->err;
456  for (begin = open; begin <= close; begin++)
457  {
458  MDEBUG(("\nfind trying at %ld\n", LOFF(begin)));
459  if (shorter)
460  end = shortest(v, d, begin, begin, v->stop,
461  (chr **) NULL, &hitend);
462  else
463  end = longest(v, d, begin, v->stop, &hitend);
464  if (ISERR())
465  {
466  freedfa(d);
467  return v->err;
468  }
469  if (hitend && cold == NULL)
470  cold = begin;
471  if (end != NULL)
472  break; /* NOTE BREAK OUT */
473  }
474  assert(end != NULL); /* search RE succeeded so loop should */
475  freedfa(d);
476 
477  /* and pin down details */
478  assert(v->nmatch > 0);
479  v->pmatch[0].rm_so = OFF(begin);
480  v->pmatch[0].rm_eo = OFF(end);
481  if (v->g->cflags & REG_EXPECT)
482  {
483  if (cold != NULL)
484  v->details->rm_extend.rm_so = OFF(cold);
485  else
486  v->details->rm_extend.rm_so = OFF(v->stop);
487  v->details->rm_extend.rm_eo = OFF(v->stop); /* unknown */
488  }
489  if (v->nmatch == 1) /* no need for submatches */
490  return REG_OKAY;
491 
492  /* find submatches */
493  return cdissect(v, v->g->tree, begin, end);
494 }
static chr * shortest(struct vars *, struct dfa *, chr *, chr *, chr *, chr **, int *)
#define REG_EXPECT
Definition: regex.h:115
struct subre * tree
Definition: regguts.h:534
regoff_t rm_so
Definition: regex.h:87
#define LOFF(p)
Definition: regexec.c:132
static struct dfa * newdfa(struct vars *, struct cnfa *, struct colormap *, struct smalldfa *)
static chr * longest(struct vars *, struct dfa *, chr *, chr *, int *)
rm_detail_t * details
Definition: regexec.c:113
#define MDEBUG(arglist)
Definition: regguts.h:117
chr * start
Definition: regexec.c:114
regoff_t rm_eo
Definition: regex.h:88
Definition: regexec.c:63
chr * search_start
Definition: regexec.c:115
struct cnfa search
Definition: regguts.h:535
pg_wchar chr
Definition: regcustom.h:66
#define REG_OKAY
Definition: regex.h:139
char flags
Definition: regguts.h:474
#define assert(TEST)
Definition: imath.c:73
struct guts * g
Definition: regexec.c:109
int err
Definition: regcomp.c:242
regmatch_t * pmatch
Definition: regexec.c:112
#define OFF(p)
Definition: regexec.c:131
static void freedfa(struct dfa *)
static int cdissect(struct vars *, struct subre *, chr *, chr *)
Definition: regexec.c:747
size_t nmatch
Definition: regexec.c:111
regmatch_t rm_extend
Definition: regex.h:94
const chr * stop
Definition: regcomp.c:241
int cflags
Definition: regguts.h:531
#define SHORTER
Definition: regguts.h:476
#define REG_NOMATCH
Definition: regex.h:140
#define NOERR()
Definition: regexec.c:130
#define close(a)
Definition: win32.h:12
struct smalldfa dfa1
Definition: regexec.c:122
#define ISERR()
Definition: regexec.c:127

◆ freedfa()

static void freedfa ( struct dfa )
static

Referenced by cfind(), find(), and pg_regexec().

◆ getladfa()

static struct dfa * getladfa ( struct vars v,
int  n 
)
static

Definition at line 391 of file regexec.c.

References assert, guts::cmap, subre::cnfa, DOMALLOC, vars::g, guts::lacons, vars::ladfas, and newdfa().

Referenced by lacon().

393 {
394  assert(n > 0 && n < v->g->nlacons && v->g->lacons != NULL);
395 
396  if (v->ladfas[n] == NULL)
397  {
398  struct subre *sub = &v->g->lacons[n];
399 
400  v->ladfas[n] = newdfa(v, &sub->cnfa, &v->g->cmap, DOMALLOC);
401  /* a LACON can't contain a backref, so nothing else to do */
402  }
403  return v->ladfas[n];
404 }
struct dfa ** ladfas
Definition: regexec.c:119
struct subre * lacons
Definition: regguts.h:539
static struct dfa * newdfa(struct vars *, struct cnfa *, struct colormap *, struct smalldfa *)
#define DOMALLOC
Definition: regexec.c:101
struct colormap cmap
Definition: regguts.h:537
#define assert(TEST)
Definition: imath.c:73
struct guts * g
Definition: regexec.c:109
Definition: regguts.h:471
struct cnfa cnfa
Definition: regguts.h:500

◆ getsubdfa()

static struct dfa * getsubdfa ( struct vars v,
struct subre t 
)
static

Definition at line 363 of file regexec.c.

References dfa::backmax, dfa::backmin, dfa::backno, subre::backno, guts::cmap, subre::cnfa, DOMALLOC, vars::g, subre::id, subre::max, subre::min, newdfa(), subre::op, and vars::subdfas.

Referenced by caltdissect(), ccondissect(), citerdissect(), crevcondissect(), and creviterdissect().

365 {
366  struct dfa *d = v->subdfas[t->id];
367 
368  if (d == NULL)
369  {
370  d = newdfa(v, &t->cnfa, &v->g->cmap, DOMALLOC);
371  if (d == NULL)
372  return NULL;
373  /* set up additional info if this is a backref node */
374  if (t->op == 'b')
375  {
376  d->backno = t->backno;
377  d->backmin = t->min;
378  d->backmax = t->max;
379  }
380  v->subdfas[t->id] = d;
381  }
382  return d;
383 }
short backmin
Definition: regexec.c:81
char op
Definition: regguts.h:473
short min
Definition: regguts.h:494
static struct dfa * newdfa(struct vars *, struct cnfa *, struct colormap *, struct smalldfa *)
Definition: regexec.c:63
int id
Definition: regguts.h:491
#define DOMALLOC
Definition: regexec.c:101
struct colormap cmap
Definition: regguts.h:537
struct guts * g
Definition: regexec.c:109
int backno
Definition: regguts.h:493
struct dfa ** subdfas
Definition: regexec.c:118
int backno
Definition: regexec.c:80
short max
Definition: regguts.h:495
short backmax
Definition: regexec.c:82
struct cnfa cnfa
Definition: regguts.h:500

◆ getvacant()

static struct sset* getvacant ( struct vars ,
struct dfa ,
chr ,
chr  
)
static

◆ hash()

static unsigned hash ( unsigned *  ,
int   
)
static

◆ initialize()

static struct sset* initialize ( struct vars ,
struct dfa ,
chr  
)
static

◆ lacon()

static int lacon ( struct vars ,
struct cnfa ,
chr ,
color   
)
static

◆ lastcold()

static chr* lastcold ( struct vars ,
struct dfa  
)
static

◆ longest()

static chr* longest ( struct vars ,
struct dfa ,
chr ,
chr ,
int *   
)
static

◆ matchuntil()

static int matchuntil ( struct vars ,
struct dfa ,
chr ,
struct sset **  ,
chr **   
)
static

◆ miss()

static struct sset* miss ( struct vars ,
struct dfa ,
struct sset ,
color  ,
chr ,
chr  
)
static

◆ newdfa()

static struct dfa* newdfa ( struct vars ,
struct cnfa ,
struct colormap ,
struct smalldfa  
)
static

Referenced by cfind(), find(), getladfa(), and getsubdfa().

◆ pg_regexec()

int pg_regexec ( regex_t re,
const chr string,
size_t  len,
size_t  search_start,
rm_detail_t details,
size_t  nmatch,
regmatch_t  pmatch[],
int  flags 
)

Definition at line 176 of file regexec.c.

References assert, cfind(), guts::cflags, cleanup(), guts::cmap, subre::cnfa, vars::details, vars::eflags, vars::err, find(), FREE, freedfa(), vars::g, i, guts::info, vars::ladfas, vars::lblastcp, vars::lblastcss, LOCALDFAS, LOCALMAT, MALLOC, guts::nlacons, vars::nmatch, guts::nsub, guts::ntree, pg_set_regex_collation(), vars::pmatch, vars::re, regex_t::re_collation, regex_t::re_csize, regex_t::re_guts, regex_t::re_magic, REG_ESPACE, REG_EXPECT, REG_FTRACE, REG_INVARG, REG_MIXED, REG_MTRACE, REG_NOMATCH, REG_NOSUB, REG_OKAY, REG_UBACKREF, REG_UIMPOSSIBLE, REMAGIC, vars::search_start, vars::start, generate_unaccent_rules::stdout, vars::stop, vars::subdfas, guts::tree, VS, and zapallsubs().

Referenced by check_ident_usermap(), CheckAffix(), RE_wchar_execute(), replace_text_regexp(), and test_re_execute().

184 {
185  struct vars var;
186  register struct vars *v = &var;
187  int st;
188  size_t n;
189  size_t i;
190  int backref;
191 
192 #define LOCALMAT 20
193  regmatch_t mat[LOCALMAT];
194 
195 #define LOCALDFAS 40
196  struct dfa *subdfas[LOCALDFAS];
197 
198  /* sanity checks */
199  if (re == NULL || string == NULL || re->re_magic != REMAGIC)
200  return REG_INVARG;
201  if (re->re_csize != sizeof(chr))
202  return REG_MIXED;
203  if (search_start > len)
204  return REG_NOMATCH;
205 
206  /* Initialize locale-dependent support */
208 
209  /* setup */
210  v->re = re;
211  v->g = (struct guts *) re->re_guts;
212  if ((v->g->cflags & REG_EXPECT) && details == NULL)
213  return REG_INVARG;
214  if (v->g->info & REG_UIMPOSSIBLE)
215  return REG_NOMATCH;
216  backref = (v->g->info & REG_UBACKREF) ? 1 : 0;
217  v->eflags = flags;
218  if (backref && nmatch <= v->g->nsub)
219  {
220  /* need larger work area */
221  v->nmatch = v->g->nsub + 1;
222  if (v->nmatch <= LOCALMAT)
223  v->pmatch = mat;
224  else
225  v->pmatch = (regmatch_t *) MALLOC(v->nmatch * sizeof(regmatch_t));
226  if (v->pmatch == NULL)
227  return REG_ESPACE;
228  zapallsubs(v->pmatch, v->nmatch);
229  }
230  else
231  {
232  /* we can store results directly in caller's array */
233  v->pmatch = pmatch;
234  /* ensure any extra entries in caller's array are filled with -1 */
235  if (nmatch > 0)
236  zapallsubs(pmatch, nmatch);
237  /* then forget about extra entries, to avoid useless work in find() */
238  if (nmatch > v->g->nsub + 1)
239  nmatch = v->g->nsub + 1;
240  v->nmatch = nmatch;
241  }
242  v->details = details;
243  v->start = (chr *) string;
244  v->search_start = (chr *) string + search_start;
245  v->stop = (chr *) string + len;
246  v->err = 0;
247  v->subdfas = NULL;
248  v->ladfas = NULL;
249  v->lblastcss = NULL;
250  v->lblastcp = NULL;
251  /* below this point, "goto cleanup" will behave sanely */
252 
253  assert(v->g->ntree >= 0);
254  n = (size_t) v->g->ntree;
255  if (n <= LOCALDFAS)
256  v->subdfas = subdfas;
257  else
258  {
259  v->subdfas = (struct dfa **) MALLOC(n * sizeof(struct dfa *));
260  if (v->subdfas == NULL)
261  {
262  st = REG_ESPACE;
263  goto cleanup;
264  }
265  }
266  for (i = 0; i < n; i++)
267  v->subdfas[i] = NULL;
268 
269  assert(v->g->nlacons >= 0);
270  n = (size_t) v->g->nlacons;
271  if (n > 0)
272  {
273  v->ladfas = (struct dfa **) MALLOC(n * sizeof(struct dfa *));
274  if (v->ladfas == NULL)
275  {
276  st = REG_ESPACE;
277  goto cleanup;
278  }
279  for (i = 0; i < n; i++)
280  v->ladfas[i] = NULL;
281  v->lblastcss = (struct sset **) MALLOC(n * sizeof(struct sset *));
282  v->lblastcp = (chr **) MALLOC(n * sizeof(chr *));
283  if (v->lblastcss == NULL || v->lblastcp == NULL)
284  {
285  st = REG_ESPACE;
286  goto cleanup;
287  }
288  for (i = 0; i < n; i++)
289  {
290  v->lblastcss[i] = NULL;
291  v->lblastcp[i] = NULL;
292  }
293  }
294 
295  /* do it */
296  assert(v->g->tree != NULL);
297  if (backref)
298  st = cfind(v, &v->g->tree->cnfa, &v->g->cmap);
299  else
300  st = find(v, &v->g->tree->cnfa, &v->g->cmap);
301 
302  /* on success, ensure caller's match vector is filled correctly */
303  if (st == REG_OKAY && nmatch > 0)
304  {
305  if (v->pmatch != pmatch)
306  {
307  /* copy portion of match vector over from (larger) work area */
308  assert(nmatch <= v->nmatch);
309  memcpy(VS(pmatch), VS(v->pmatch), nmatch * sizeof(regmatch_t));
310  }
311  if (v->g->cflags & REG_NOSUB)
312  {
313  /* don't expose possibly-partial sub-match results to caller */
314  zapallsubs(pmatch, nmatch);
315  }
316  }
317 
318  /* clean up */
319 cleanup:
320  if (v->pmatch != pmatch && v->pmatch != mat)
321  FREE(v->pmatch);
322  if (v->subdfas != NULL)
323  {
324  n = (size_t) v->g->ntree;
325  for (i = 0; i < n; i++)
326  {
327  if (v->subdfas[i] != NULL)
328  freedfa(v->subdfas[i]);
329  }
330  if (v->subdfas != subdfas)
331  FREE(v->subdfas);
332  }
333  if (v->ladfas != NULL)
334  {
335  n = (size_t) v->g->nlacons;
336  for (i = 0; i < n; i++)
337  {
338  if (v->ladfas[i] != NULL)
339  freedfa(v->ladfas[i]);
340  }
341  FREE(v->ladfas);
342  }
343  if (v->lblastcss != NULL)
344  FREE(v->lblastcss);
345  if (v->lblastcp != NULL)
346  FREE(v->lblastcp);
347 
348 #ifdef REG_DEBUG
349  if (v->eflags & (REG_FTRACE | REG_MTRACE))
350  fflush(stdout);
351 #endif
352 
353  return st;
354 }
#define REG_UIMPOSSIBLE
Definition: regex.h:74
struct dfa ** ladfas
Definition: regexec.c:119
static int find(struct vars *, struct cnfa *, struct colormap *)
Definition: regexec.c:410
#define REG_EXPECT
Definition: regex.h:115
struct subre * tree
Definition: regguts.h:534
#define REG_ESPACE
Definition: regex.h:151
int nlacons
Definition: regguts.h:540
#define REMAGIC
Definition: regguts.h:96
#define REG_FTRACE
Definition: regex.h:129
#define FREE(ptr)
Definition: cryptohash.c:37
rm_detail_t * details
Definition: regexec.c:113
int re_csize
Definition: regex.h:76
#define REG_MTRACE
Definition: regex.h:130
chr * start
Definition: regexec.c:114
#define MALLOC(n)
Definition: regcustom.h:60
Definition: regexec.c:63
struct sset ** lblastcss
Definition: regexec.c:120
chr * search_start
Definition: regexec.c:115
int re_magic
Definition: regex.h:57
pg_wchar chr
Definition: regcustom.h:66
#define REG_OKAY
Definition: regex.h:139
regex_t * re
Definition: regcomp.c:239
#define REG_INVARG
Definition: regex.h:154
struct colormap cmap
Definition: regguts.h:537
#define assert(TEST)
Definition: imath.c:73
struct guts * g
Definition: regexec.c:109
int err
Definition: regcomp.c:242
#define LOCALMAT
int eflags
Definition: regexec.c:110
static void cleanup(void)
Definition: bootstrap.c:697
Definition: regguts.h:527
int ntree
Definition: regguts.h:536
#define REG_UBACKREF
Definition: regex.h:60
#define LOCALDFAS
struct dfa ** subdfas
Definition: regexec.c:118
void pg_set_regex_collation(Oid collation)
regmatch_t * pmatch
Definition: regexec.c:112
#define REG_NOSUB
Definition: regex.h:109
static void freedfa(struct dfa *)
#define VS(x)
Definition: regguts.h:61
long info
Definition: regguts.h:532
struct cnfa cnfa
Definition: regguts.h:500
size_t nmatch
Definition: regexec.c:111
static void zapallsubs(regmatch_t *, size_t)
Definition: regexec.c:654
const chr * stop
Definition: regcomp.c:241
char * re_guts
Definition: regex.h:80
int cflags
Definition: regguts.h:531
Definition: regexec.c:45
size_t nsub
Definition: regguts.h:533
int i
#define REG_NOMATCH
Definition: regex.h:140
chr ** lblastcp
Definition: regexec.c:121
#define REG_MIXED
Definition: regex.h:155
Definition: regcomp.c:237
Oid re_collation
Definition: regex.h:78
static int cfind(struct vars *, struct cnfa *, struct colormap *)
Definition: regexec.c:500

◆ pickss()

static struct sset* pickss ( struct vars ,
struct dfa ,
chr ,
chr  
)
static

◆ shortest()

static chr* shortest ( struct vars ,
struct dfa ,
chr ,
chr ,
chr ,
chr **  ,
int *   
)
static

◆ subset()

static void subset ( struct vars v,
struct subre sub,
chr begin,
chr end 
)
static

Definition at line 693 of file regexec.c.

References assert, subre::capno, subre::id, LOFF, MDEBUG, vars::nmatch, OFF, vars::pmatch, regmatch_t::rm_eo, and regmatch_t::rm_so.

Referenced by cdissect().

697 {
698  int n = sub->capno;
699 
700  assert(n > 0);
701  if ((size_t) n >= v->nmatch)
702  return;
703 
704  MDEBUG(("%d: setting %d = %ld-%ld\n", sub->id, n, LOFF(begin), LOFF(end)));
705  v->pmatch[n].rm_so = OFF(begin);
706  v->pmatch[n].rm_eo = OFF(end);
707 }
int capno
Definition: regguts.h:492
regoff_t rm_so
Definition: regex.h:87
#define LOFF(p)
Definition: regexec.c:132
#define MDEBUG(arglist)
Definition: regguts.h:117
regoff_t rm_eo
Definition: regex.h:88
int id
Definition: regguts.h:491
#define assert(TEST)
Definition: imath.c:73
regmatch_t * pmatch
Definition: regexec.c:112
#define OFF(p)
Definition: regexec.c:131
size_t nmatch
Definition: regexec.c:111

◆ zapallsubs()

static void zapallsubs ( regmatch_t p,
size_t  n 
)
static

Definition at line 654 of file regexec.c.

References i, regmatch_t::rm_eo, and regmatch_t::rm_so.

Referenced by pg_regexec().

656 {
657  size_t i;
658 
659  for (i = n - 1; i > 0; i--)
660  {
661  p[i].rm_so = -1;
662  p[i].rm_eo = -1;
663  }
664 }
regoff_t rm_so
Definition: regex.h:87
regoff_t rm_eo
Definition: regex.h:88
int i

◆ zaptreesubs()

static void zaptreesubs ( struct vars v,
struct subre t 
)
static

Definition at line 670 of file regexec.c.

References subre::capno, subre::child, vars::pmatch, regmatch_t::rm_eo, regmatch_t::rm_so, and subre::sibling.

Referenced by ccondissect(), citerdissect(), crevcondissect(), and creviterdissect().

672 {
673  int n = t->capno;
674  struct subre *t2;
675 
676  if (n > 0)
677  {
678  if ((size_t) n < v->nmatch)
679  {
680  v->pmatch[n].rm_so = -1;
681  v->pmatch[n].rm_eo = -1;
682  }
683  }
684 
685  for (t2 = t->child; t2 != NULL; t2 = t2->sibling)
686  zaptreesubs(v, t2);
687 }
struct subre * child
Definition: regguts.h:496
int capno
Definition: regguts.h:492
regoff_t rm_so
Definition: regex.h:87
regoff_t rm_eo
Definition: regex.h:88
static void zaptreesubs(struct vars *, struct subre *)
Definition: regexec.c:670
struct subre * sibling
Definition: regguts.h:497
regmatch_t * pmatch
Definition: regexec.c:112
Definition: regguts.h:471