PostgreSQL Source Code  git master
regexec.c File Reference
#include "regex/regguts.h"
#include "rege_dfa.c"
Include dependency graph for regexec.c:

Go to the source code of this file.

Data Structures

struct  arcp
 
struct  sset
 
struct  dfa
 
struct  smalldfa
 
struct  vars
 

Macros

#define HASH(bv, nw)   (((nw) == 1) ? *(bv) : hash(bv, nw))
 
#define HIT(h, bv, ss, nw)
 
#define STARTER   01 /* the initial state set */
 
#define POSTSTATE   02 /* includes the goal state */
 
#define LOCKED   04 /* locked in cache */
 
#define NOPROGRESS   010 /* zero-progress state set */
 
#define WORK   1 /* number of work bitvectors needed */
 
#define FEWSTATES   20 /* must be less than UBITS */
 
#define FEWCOLORS   15
 
#define DOMALLOC   ((struct smalldfa *)NULL) /* force malloc */
 
#define VISERR(vv)   ((vv)->err != 0) /* have we seen an error yet? */
 
#define ISERR()   VISERR(v)
 
#define VERR(vv, e)   ((vv)->err = ((vv)->err ? (vv)->err : (e)))
 
#define ERR(e)   VERR(v, e) /* record an error */
 
#define NOERR()   {if (ISERR()) return v->err;} /* if error seen, return it */
 
#define OFF(p)   ((p) - v->start)
 
#define LOFF(p)   ((long)OFF(p))
 
#define LOCALMAT   20
 
#define LOCALDFAS   40
 

Functions

static struct dfagetsubdfa (struct vars *, struct subre *)
 
static struct dfagetladfa (struct vars *, int)
 
static int find (struct vars *, struct cnfa *, struct colormap *)
 
static int cfind (struct vars *, struct cnfa *, struct colormap *)
 
static int cfindloop (struct vars *, struct cnfa *, struct colormap *, struct dfa *, struct dfa *, chr **)
 
static void zapallsubs (regmatch_t *, size_t)
 
static void zaptreesubs (struct vars *, struct subre *)
 
static void subset (struct vars *, struct subre *, chr *, chr *)
 
static int cdissect (struct vars *, struct subre *, chr *, chr *)
 
static int ccondissect (struct vars *, struct subre *, chr *, chr *)
 
static int crevcondissect (struct vars *, struct subre *, chr *, chr *)
 
static int cbrdissect (struct vars *, struct subre *, chr *, chr *)
 
static int caltdissect (struct vars *, struct subre *, chr *, chr *)
 
static int citerdissect (struct vars *, struct subre *, chr *, chr *)
 
static int creviterdissect (struct vars *, struct subre *, chr *, chr *)
 
static chrlongest (struct vars *, struct dfa *, chr *, chr *, int *)
 
static chrshortest (struct vars *, struct dfa *, chr *, chr *, chr *, chr **, int *)
 
static int matchuntil (struct vars *, struct dfa *, chr *, struct sset **, chr **)
 
static chrdfa_backref (struct vars *, struct dfa *, chr *, chr *, chr *, bool)
 
static chrlastcold (struct vars *, struct dfa *)
 
static struct dfanewdfa (struct vars *, struct cnfa *, struct colormap *, struct smalldfa *)
 
static void freedfa (struct dfa *)
 
static unsigned hash (unsigned *, int)
 
static struct ssetinitialize (struct vars *, struct dfa *, chr *)
 
static struct ssetmiss (struct vars *, struct dfa *, struct sset *, color, chr *, chr *)
 
static int lacon (struct vars *, struct cnfa *, chr *, color)
 
static struct ssetgetvacant (struct vars *, struct dfa *, chr *, chr *)
 
static struct ssetpickss (struct vars *, struct dfa *, chr *, chr *)
 
int pg_regexec (regex_t *re, const chr *string, size_t len, size_t search_start, rm_detail_t *details, size_t nmatch, regmatch_t pmatch[], int flags)
 

Macro Definition Documentation

◆ DOMALLOC

#define DOMALLOC   ((struct smalldfa *)NULL) /* force malloc */

Definition at line 101 of file regexec.c.

Referenced by getladfa(), and getsubdfa().

◆ ERR

#define ERR (   e)    VERR(v, e) /* record an error */

Definition at line 129 of file regexec.c.

Referenced by cfindloop().

◆ FEWCOLORS

#define FEWCOLORS   15

Definition at line 91 of file regexec.c.

Referenced by newdfa().

◆ FEWSTATES

#define FEWSTATES   20 /* must be less than UBITS */

Definition at line 90 of file regexec.c.

◆ HASH

#define HASH (   bv,
  nw 
)    (((nw) == 1) ? *(bv) : hash(bv, nw))

Definition at line 49 of file regexec.c.

◆ HIT

#define HIT (   h,
  bv,
  ss,
  nw 
)
Value:
((ss)->hash == (h) && ((nw) == 1 || \
memcmp(VS(bv), VS((ss)->states), (nw)*sizeof(unsigned)) == 0))
static unsigned hash(unsigned *, int)
#define VS(x)
Definition: regguts.h:61

Definition at line 50 of file regexec.c.

Referenced by miss().

◆ ISERR

#define ISERR ( )    VISERR(v)

Definition at line 127 of file regexec.c.

Referenced by cfindloop(), citerdissect(), creviterdissect(), and find().

◆ LOCALDFAS

#define LOCALDFAS   40

Referenced by pg_regexec().

◆ LOCALMAT

#define LOCALMAT   20

Referenced by pg_regexec().

◆ LOCKED

#define LOCKED   04 /* locked in cache */

Definition at line 55 of file regexec.c.

Referenced by getvacant(), initialize(), and pickss().

◆ LOFF

#define LOFF (   p)    ((long)OFF(p))

◆ NOERR

#define NOERR ( )    {if (ISERR()) return v->err;} /* if error seen, return it */

Definition at line 130 of file regexec.c.

Referenced by caltdissect(), ccondissect(), cfind(), crevcondissect(), and find().

◆ NOPROGRESS

#define NOPROGRESS   010 /* zero-progress state set */

Definition at line 56 of file regexec.c.

Referenced by getvacant(), initialize(), lastcold(), and miss().

◆ OFF

#define OFF (   p)    ((p) - v->start)

Definition at line 131 of file regexec.c.

Referenced by cfind(), cfindloop(), find(), and subset().

◆ POSTSTATE

#define POSTSTATE   02 /* includes the goal state */

Definition at line 54 of file regexec.c.

Referenced by getvacant(), longest(), matchuntil(), miss(), and shortest().

◆ STARTER

#define STARTER   01 /* the initial state set */

Definition at line 53 of file regexec.c.

Referenced by initialize().

◆ VERR

#define VERR (   vv,
  e 
)    ((vv)->err = ((vv)->err ? (vv)->err : (e)))

Definition at line 128 of file regexec.c.

◆ VISERR

#define VISERR (   vv)    ((vv)->err != 0) /* have we seen an error yet? */

Definition at line 126 of file regexec.c.

◆ WORK

#define WORK   1 /* number of work bitvectors needed */

Definition at line 87 of file regexec.c.

Referenced by newdfa().

Function Documentation

◆ caltdissect()

static int caltdissect ( struct vars v,
struct subre t,
chr begin,
chr end 
)
static

Definition at line 1030 of file regexec.c.

References assert, cdissect(), subre::child, subre::cnfa, getsubdfa(), subre::id, LOFF, longest(), MDEBUG, NOERR, cnfa::nstates, subre::op, REG_NOMATCH, and subre::sibling.

Referenced by cdissect().

1034 {
1035  struct dfa *d;
1036  int er;
1037 
1038  assert(t->op == '|');
1039 
1040  t = t->child;
1041  /* there should be at least 2 alternatives */
1042  assert(t != NULL && t->sibling != NULL);
1043 
1044  while (t != NULL)
1045  {
1046  assert(t->cnfa.nstates > 0);
1047 
1048  MDEBUG(("%d: caltdissect %ld-%ld\n", t->id, LOFF(begin), LOFF(end)));
1049 
1050  d = getsubdfa(v, t);
1051  NOERR();
1052  if (longest(v, d, begin, end, (int *) NULL) == end)
1053  {
1054  MDEBUG(("%d: caltdissect matched\n", t->id));
1055  er = cdissect(v, t, begin, end);
1056  if (er != REG_NOMATCH)
1057  return er;
1058  }
1059  NOERR();
1060 
1061  t = t->sibling;
1062  }
1063 
1064  return REG_NOMATCH;
1065 }
struct subre * child
Definition: regguts.h:495
char op
Definition: regguts.h:473
#define LOFF(p)
Definition: regexec.c:132
static chr * longest(struct vars *, struct dfa *, chr *, chr *, int *)
#define MDEBUG(arglist)
Definition: regguts.h:117
Definition: regexec.c:63
int nstates
Definition: regguts.h:403
int id
Definition: regguts.h:490
struct subre * sibling
Definition: regguts.h:496
static struct dfa * getsubdfa(struct vars *, struct subre *)
Definition: regexec.c:346
#define assert(TEST)
Definition: imath.c:73
static int cdissect(struct vars *, struct subre *, chr *, chr *)
Definition: regexec.c:708
struct cnfa cnfa
Definition: regguts.h:499
#define REG_NOMATCH
Definition: regex.h:140
#define NOERR()
Definition: regexec.c:130

◆ cbrdissect()

static int cbrdissect ( struct vars v,
struct subre t,
chr begin,
chr end 
)
static

Definition at line 948 of file regexec.c.

References assert, subre::backno, DUPINF, vars::g, subre::id, LOFF, subre::max, MDEBUG, subre::min, subre::op, vars::pmatch, REG_NOMATCH, REG_OKAY, regmatch_t::rm_eo, regmatch_t::rm_so, and vars::start.

Referenced by cdissect().

952 {
953  int n = t->backno;
954  size_t numreps;
955  size_t tlen;
956  size_t brlen;
957  chr *brstring;
958  chr *p;
959  int min = t->min;
960  int max = t->max;
961 
962  assert(t != NULL);
963  assert(t->op == 'b');
964  assert(n >= 0);
965  assert((size_t) n < v->nmatch);
966 
967  MDEBUG(("%d: cbrdissect %d{%d-%d} %ld-%ld\n", t->id, n, min, max,
968  LOFF(begin), LOFF(end)));
969 
970  /* get the backreferenced string */
971  if (v->pmatch[n].rm_so == -1)
972  return REG_NOMATCH;
973  brstring = v->start + v->pmatch[n].rm_so;
974  brlen = v->pmatch[n].rm_eo - v->pmatch[n].rm_so;
975 
976  /* special cases for zero-length strings */
977  if (brlen == 0)
978  {
979  /*
980  * matches only if target is zero length, but any number of
981  * repetitions can be considered to be present
982  */
983  if (begin == end && min <= max)
984  {
985  MDEBUG(("%d: backref matched trivially\n", t->id));
986  return REG_OKAY;
987  }
988  return REG_NOMATCH;
989  }
990  if (begin == end)
991  {
992  /* matches only if zero repetitions are okay */
993  if (min == 0)
994  {
995  MDEBUG(("%d: backref matched trivially\n", t->id));
996  return REG_OKAY;
997  }
998  return REG_NOMATCH;
999  }
1000 
1001  /*
1002  * check target length to see if it could possibly be an allowed number of
1003  * repetitions of brstring
1004  */
1005  assert(end > begin);
1006  tlen = end - begin;
1007  if (tlen % brlen != 0)
1008  return REG_NOMATCH;
1009  numreps = tlen / brlen;
1010  if (numreps < min || (numreps > max && max != DUPINF))
1011  return REG_NOMATCH;
1012 
1013  /* okay, compare the actual string contents */
1014  p = begin;
1015  while (numreps-- > 0)
1016  {
1017  if ((*v->g->compare) (brstring, p, brlen) != 0)
1018  return REG_NOMATCH;
1019  p += brlen;
1020  }
1021 
1022  MDEBUG(("%d: backref matched\n", t->id));
1023  return REG_OKAY;
1024 }
regoff_t rm_so
Definition: regex.h:87
char op
Definition: regguts.h:473
#define LOFF(p)
Definition: regexec.c:132
short min
Definition: regguts.h:493
#define MDEBUG(arglist)
Definition: regguts.h:117
chr * start
Definition: regexec.c:114
regoff_t rm_eo
Definition: regex.h:88
int id
Definition: regguts.h:490
pg_wchar chr
Definition: regcustom.h:66
#define REG_OKAY
Definition: regex.h:139
#define assert(TEST)
Definition: imath.c:73
struct guts * g
Definition: regexec.c:109
int backno
Definition: regguts.h:492
#define DUPINF
Definition: regguts.h:94
short max
Definition: regguts.h:494
regmatch_t * pmatch
Definition: regexec.c:112
#define REG_NOMATCH
Definition: regex.h:140

◆ ccondissect()

static int ccondissect ( struct vars v,
struct subre t,
chr begin,
chr end 
)
static

Definition at line 783 of file regexec.c.

References assert, cdissect(), subre::child, subre::cnfa, subre::flags, getsubdfa(), subre::id, LOFF, longest(), MDEBUG, NOERR, cnfa::nstates, subre::op, REG_ASSERT, REG_NOMATCH, REG_OKAY, SHORTER, subre::sibling, and zaptreesubs().

Referenced by cdissect().

787 {
788  struct subre *left = t->child;
789  struct subre *right = left->sibling;
790  struct dfa *d;
791  struct dfa *d2;
792  chr *mid;
793  int er;
794 
795  assert(t->op == '.');
796  assert(left != NULL && left->cnfa.nstates > 0);
797  assert(right != NULL && right->cnfa.nstates > 0);
798  assert(right->sibling == NULL);
799  assert(!(left->flags & SHORTER));
800 
801  d = getsubdfa(v, left);
802  NOERR();
803  d2 = getsubdfa(v, right);
804  NOERR();
805  MDEBUG(("%d: ccondissect %ld-%ld\n", t->id, LOFF(begin), LOFF(end)));
806 
807  /* pick a tentative midpoint */
808  mid = longest(v, d, begin, end, (int *) NULL);
809  NOERR();
810  if (mid == NULL)
811  return REG_NOMATCH;
812  MDEBUG(("%d: tentative midpoint %ld\n", t->id, LOFF(mid)));
813 
814  /* iterate until satisfaction or failure */
815  for (;;)
816  {
817  /* try this midpoint on for size */
818  if (longest(v, d2, mid, end, (int *) NULL) == end)
819  {
820  er = cdissect(v, left, begin, mid);
821  if (er == REG_OKAY)
822  {
823  er = cdissect(v, right, mid, end);
824  if (er == REG_OKAY)
825  {
826  /* satisfaction */
827  MDEBUG(("%d: successful\n", t->id));
828  return REG_OKAY;
829  }
830  }
831  if (er != REG_NOMATCH)
832  return er;
833  }
834  NOERR();
835 
836  /* that midpoint didn't work, find a new one */
837  if (mid == begin)
838  {
839  /* all possibilities exhausted */
840  MDEBUG(("%d: no midpoint\n", t->id));
841  return REG_NOMATCH;
842  }
843  mid = longest(v, d, begin, mid - 1, (int *) NULL);
844  NOERR();
845  if (mid == NULL)
846  {
847  /* failed to find a new one */
848  MDEBUG(("%d: failed midpoint\n", t->id));
849  return REG_NOMATCH;
850  }
851  MDEBUG(("%d: new midpoint %ld\n", t->id, LOFF(mid)));
852  zaptreesubs(v, left);
853  zaptreesubs(v, right);
854  }
855 
856  /* can't get here */
857  return REG_ASSERT;
858 }
struct subre * child
Definition: regguts.h:495
char op
Definition: regguts.h:473
#define LOFF(p)
Definition: regexec.c:132
static chr * longest(struct vars *, struct dfa *, chr *, chr *, int *)
#define MDEBUG(arglist)
Definition: regguts.h:117
Definition: regexec.c:63
int nstates
Definition: regguts.h:403
int id
Definition: regguts.h:490
pg_wchar chr
Definition: regcustom.h:66
#define REG_OKAY
Definition: regex.h:139
static void zaptreesubs(struct vars *, struct subre *)
Definition: regexec.c:653
struct subre * sibling
Definition: regguts.h:496
static struct dfa * getsubdfa(struct vars *, struct subre *)
Definition: regexec.c:346
char flags
Definition: regguts.h:474
#define assert(TEST)
Definition: imath.c:73
#define REG_ASSERT
Definition: regex.h:153
static int cdissect(struct vars *, struct subre *, chr *, chr *)
Definition: regexec.c:708
Definition: regguts.h:471
struct cnfa cnfa
Definition: regguts.h:499
#define SHORTER
Definition: regguts.h:476
#define REG_NOMATCH
Definition: regex.h:140
#define NOERR()
Definition: regexec.c:130

◆ cdissect()

static int cdissect ( struct vars v,
struct subre t,
chr begin,
chr end 
)
static

Definition at line 708 of file regexec.c.

References assert, BACKR, caltdissect(), CANCEL_REQUESTED, subre::capno, cbrdissect(), ccondissect(), subre::child, citerdissect(), crevcondissect(), creviterdissect(), subre::flags, subre::id, LOFF, MDEBUG, subre::op, vars::re, REG_ASSERT, REG_CANCEL, REG_ETOOBIG, REG_NOMATCH, REG_OKAY, SHORTER, STACK_TOO_DEEP, and subset().

Referenced by caltdissect(), ccondissect(), cfindloop(), citerdissect(), crevcondissect(), creviterdissect(), and find().

712 {
713  int er;
714 
715  assert(t != NULL);
716  MDEBUG(("%d: cdissect %c %ld-%ld\n", t->id, t->op, LOFF(begin), LOFF(end)));
717 
718  /* handy place to check for operation cancel */
719  if (CANCEL_REQUESTED(v->re))
720  return REG_CANCEL;
721  /* ... and stack overrun */
722  if (STACK_TOO_DEEP(v->re))
723  return REG_ETOOBIG;
724 
725  switch (t->op)
726  {
727  case '=': /* terminal node */
728  assert(t->child == NULL);
729  er = REG_OKAY; /* no action, parent did the work */
730  break;
731  case 'b': /* back reference */
732  assert(t->child == NULL);
733  er = cbrdissect(v, t, begin, end);
734  break;
735  case '.': /* concatenation */
736  assert(t->child != NULL);
737  if (t->child->flags & SHORTER) /* reverse scan */
738  er = crevcondissect(v, t, begin, end);
739  else
740  er = ccondissect(v, t, begin, end);
741  break;
742  case '|': /* alternation */
743  assert(t->child != NULL);
744  er = caltdissect(v, t, begin, end);
745  break;
746  case '*': /* iteration */
747  assert(t->child != NULL);
748  if (t->child->flags & SHORTER) /* reverse scan */
749  er = creviterdissect(v, t, begin, end);
750  else
751  er = citerdissect(v, t, begin, end);
752  break;
753  case '(': /* no-op capture node */
754  assert(t->child != NULL);
755  assert(t->capno > 0);
756  er = cdissect(v, t->child, begin, end);
757  break;
758  default:
759  er = REG_ASSERT;
760  break;
761  }
762 
763  /*
764  * We should never have a match failure unless backrefs lurk below;
765  * otherwise, either caller failed to check the DFA, or there's some
766  * inconsistency between the DFA and the node's innards.
767  */
768  assert(er != REG_NOMATCH || (t->flags & BACKR));
769 
770  /*
771  * If this node is marked as capturing, save successful match's location.
772  */
773  if (t->capno > 0 && er == REG_OKAY)
774  subset(v, t, begin, end);
775 
776  return er;
777 }
struct subre * child
Definition: regguts.h:495
int capno
Definition: regguts.h:491
char op
Definition: regguts.h:473
#define LOFF(p)
Definition: regexec.c:132
#define MDEBUG(arglist)
Definition: regguts.h:117
int id
Definition: regguts.h:490
#define REG_OKAY
Definition: regex.h:139
regex_t * re
Definition: regcomp.c:239
char flags
Definition: regguts.h:474
#define assert(TEST)
Definition: imath.c:73
static int cbrdissect(struct vars *, struct subre *, chr *, chr *)
Definition: regexec.c:948
static int creviterdissect(struct vars *, struct subre *, chr *, chr *)
Definition: regexec.c:1273
#define BACKR
Definition: regguts.h:479
static int caltdissect(struct vars *, struct subre *, chr *, chr *)
Definition: regexec.c:1030
#define REG_ASSERT
Definition: regex.h:153
static int ccondissect(struct vars *, struct subre *, chr *, chr *)
Definition: regexec.c:783
#define REG_ETOOBIG
Definition: regex.h:157
static int citerdissect(struct vars *, struct subre *, chr *, chr *)
Definition: regexec.c:1071
static int cdissect(struct vars *, struct subre *, chr *, chr *)
Definition: regexec.c:708
#define SHORTER
Definition: regguts.h:476
#define CANCEL_REQUESTED(re)
Definition: regguts.h:516
#define REG_NOMATCH
Definition: regex.h:140
static int crevcondissect(struct vars *, struct subre *, chr *, chr *)
Definition: regexec.c:864
static void subset(struct vars *, struct subre *, chr *, chr *)
Definition: regexec.c:676
#define STACK_TOO_DEEP(re)
Definition: regguts.h:519
#define REG_CANCEL
Definition: regex.h:159

◆ cfind()

static int cfind ( struct vars v,
struct cnfa cnfa,
struct colormap cm 
)
static

Definition at line 484 of file regexec.c.

References assert, cfindloop(), guts::cflags, vars::details, vars::dfa1, vars::dfa2, vars::err, freedfa(), vars::g, newdfa(), NOERR, OFF, REG_EXPECT, regmatch_t::rm_eo, rm_detail_t::rm_extend, regmatch_t::rm_so, guts::search, and vars::stop.

Referenced by pg_regexec().

487 {
488  struct dfa *s;
489  struct dfa *d;
490  chr *cold;
491  int ret;
492 
493  s = newdfa(v, &v->g->search, cm, &v->dfa1);
494  if (s == NULL)
495  return v->err;
496  d = newdfa(v, cnfa, cm, &v->dfa2);
497  if (d == NULL)
498  {
499  freedfa(s);
500  return v->err;
501  }
502 
503  ret = cfindloop(v, cnfa, cm, d, s, &cold);
504 
505  freedfa(d);
506  freedfa(s);
507  NOERR();
508  if (v->g->cflags & REG_EXPECT)
509  {
510  assert(v->details != NULL);
511  if (cold != NULL)
512  v->details->rm_extend.rm_so = OFF(cold);
513  else
514  v->details->rm_extend.rm_so = OFF(v->stop);
515  v->details->rm_extend.rm_eo = OFF(v->stop); /* unknown */
516  }
517  return ret;
518 }
#define REG_EXPECT
Definition: regex.h:115
regoff_t rm_so
Definition: regex.h:87
static struct dfa * newdfa(struct vars *, struct cnfa *, struct colormap *, struct smalldfa *)
rm_detail_t * details
Definition: regexec.c:113
regoff_t rm_eo
Definition: regex.h:88
Definition: regexec.c:63
struct cnfa search
Definition: regguts.h:534
pg_wchar chr
Definition: regcustom.h:66
static int cfindloop(struct vars *, struct cnfa *, struct colormap *, struct dfa *, struct dfa *, chr **)
Definition: regexec.c:524
struct smalldfa dfa2
Definition: regexec.c:123
#define assert(TEST)
Definition: imath.c:73
struct guts * g
Definition: regexec.c:109
int err
Definition: regcomp.c:242
#define OFF(p)
Definition: regexec.c:131
static void freedfa(struct dfa *)
regmatch_t rm_extend
Definition: regex.h:94
const chr * stop
Definition: regcomp.c:241
int cflags
Definition: regguts.h:530
#define NOERR()
Definition: regexec.c:130
struct smalldfa dfa1
Definition: regexec.c:122

◆ cfindloop()

static int cfindloop ( struct vars v,
struct cnfa cnfa,
struct colormap cm,
struct dfa d,
struct dfa s,
chr **  coldp 
)
static

Definition at line 524 of file regexec.c.

References assert, cdissect(), close, ERR, vars::err, subre::flags, vars::g, ISERR, LOFF, longest(), MDEBUG, vars::nmatch, OFF, vars::pmatch, REG_NOMATCH, REG_OKAY, regmatch_t::rm_eo, regmatch_t::rm_so, vars::search_start, SHORTER, shortest(), vars::stop, guts::tree, and zapallsubs().

Referenced by cfind().

530 {
531  chr *begin;
532  chr *end;
533  chr *cold;
534  chr *open; /* open and close of range of possible starts */
535  chr *close;
536  chr *estart;
537  chr *estop;
538  int er;
539  int shorter = v->g->tree->flags & SHORTER;
540  int hitend;
541 
542  assert(d != NULL && s != NULL);
543  cold = NULL;
544  close = v->search_start;
545  do
546  {
547  /* Search with the search RE for match range at/beyond "close" */
548  MDEBUG(("\ncsearch at %ld\n", LOFF(close)));
549  close = shortest(v, s, close, close, v->stop, &cold, (int *) NULL);
550  if (ISERR())
551  {
552  *coldp = cold;
553  return v->err;
554  }
555  if (close == NULL)
556  break; /* no more possible match anywhere */
557  assert(cold != NULL);
558  open = cold;
559  cold = NULL;
560  /* Search for matches starting between "open" and "close" inclusive */
561  MDEBUG(("cbetween %ld and %ld\n", LOFF(open), LOFF(close)));
562  for (begin = open; begin <= close; begin++)
563  {
564  MDEBUG(("\ncfind trying at %ld\n", LOFF(begin)));
565  estart = begin;
566  estop = v->stop;
567  for (;;)
568  {
569  /* Here we use the top node's detailed RE */
570  if (shorter)
571  end = shortest(v, d, begin, estart,
572  estop, (chr **) NULL, &hitend);
573  else
574  end = longest(v, d, begin, estop,
575  &hitend);
576  if (ISERR())
577  {
578  *coldp = cold;
579  return v->err;
580  }
581  if (hitend && cold == NULL)
582  cold = begin;
583  if (end == NULL)
584  break; /* no match with this begin point, try next */
585  MDEBUG(("tentative end %ld\n", LOFF(end)));
586  /* Dissect the potential match to see if it really matches */
587  zapallsubs(v->pmatch, v->nmatch);
588  er = cdissect(v, v->g->tree, begin, end);
589  if (er == REG_OKAY)
590  {
591  if (v->nmatch > 0)
592  {
593  v->pmatch[0].rm_so = OFF(begin);
594  v->pmatch[0].rm_eo = OFF(end);
595  }
596  *coldp = cold;
597  return REG_OKAY;
598  }
599  if (er != REG_NOMATCH)
600  {
601  ERR(er);
602  *coldp = cold;
603  return er;
604  }
605  /* Try next longer/shorter match with same begin point */
606  if (shorter)
607  {
608  if (end == estop)
609  break; /* no more, so try next begin point */
610  estart = end + 1;
611  }
612  else
613  {
614  if (end == begin)
615  break; /* no more, so try next begin point */
616  estop = end - 1;
617  }
618  } /* end loop over endpoint positions */
619  } /* end loop over beginning positions */
620 
621  /*
622  * If we get here, there is no possible match starting at or before
623  * "close", so consider matches beyond that. We'll do a fresh search
624  * with the search RE to find a new promising match range.
625  */
626  close++;
627  } while (close < v->stop);
628 
629  *coldp = cold;
630  return REG_NOMATCH;
631 }
static chr * shortest(struct vars *, struct dfa *, chr *, chr *, chr *, chr **, int *)
struct subre * tree
Definition: regguts.h:533
regoff_t rm_so
Definition: regex.h:87
#define LOFF(p)
Definition: regexec.c:132
static chr * longest(struct vars *, struct dfa *, chr *, chr *, int *)
#define MDEBUG(arglist)
Definition: regguts.h:117
regoff_t rm_eo
Definition: regex.h:88
chr * search_start
Definition: regexec.c:115
pg_wchar chr
Definition: regcustom.h:66
#define REG_OKAY
Definition: regex.h:139
char flags
Definition: regguts.h:474
#define assert(TEST)
Definition: imath.c:73
struct guts * g
Definition: regexec.c:109
int err
Definition: regcomp.c:242
regmatch_t * pmatch
Definition: regexec.c:112
#define ERR(e)
Definition: regexec.c:129
#define OFF(p)
Definition: regexec.c:131
static int cdissect(struct vars *, struct subre *, chr *, chr *)
Definition: regexec.c:708
size_t nmatch
Definition: regexec.c:111
static void zapallsubs(regmatch_t *, size_t)
Definition: regexec.c:637
const chr * stop
Definition: regcomp.c:241
#define SHORTER
Definition: regguts.h:476
#define REG_NOMATCH
Definition: regex.h:140
#define close(a)
Definition: win32.h:12
#define ISERR()
Definition: regexec.c:127

◆ citerdissect()

static int citerdissect ( struct vars v,
struct subre t,
chr begin,
chr end 
)
static

Definition at line 1071 of file regexec.c.

References assert, cdissect(), subre::child, subre::cnfa, DUPINF, vars::err, subre::flags, FREE, getsubdfa(), i, subre::id, ISERR, LOFF, longest(), MALLOC, subre::max, MDEBUG, subre::min, cnfa::nstates, subre::op, REG_ESPACE, REG_NOMATCH, REG_OKAY, SHORTER, and zaptreesubs().

Referenced by cdissect().

1075 {
1076  struct dfa *d;
1077  chr **endpts;
1078  chr *limit;
1079  int min_matches;
1080  size_t max_matches;
1081  int nverified;
1082  int k;
1083  int i;
1084  int er;
1085 
1086  assert(t->op == '*');
1087  assert(t->child != NULL && t->child->cnfa.nstates > 0);
1088  assert(!(t->child->flags & SHORTER));
1089  assert(begin <= end);
1090 
1091  MDEBUG(("%d: citerdissect %ld-%ld\n", t->id, LOFF(begin), LOFF(end)));
1092 
1093  /*
1094  * For the moment, assume the minimum number of matches is 1. If zero
1095  * matches are allowed, and the target string is empty, we are allowed to
1096  * match regardless of the contents of the iter node --- but we would
1097  * prefer to match once, so that capturing parens get set. (An example of
1098  * the concern here is a pattern like "()*\1", which historically this
1099  * code has allowed to succeed.) Therefore, we deal with the zero-matches
1100  * case at the bottom, after failing to find any other way to match.
1101  */
1102  min_matches = t->min;
1103  if (min_matches <= 0)
1104  min_matches = 1;
1105 
1106  /*
1107  * We need workspace to track the endpoints of each sub-match. Normally
1108  * we consider only nonzero-length sub-matches, so there can be at most
1109  * end-begin of them. However, if min is larger than that, we will also
1110  * consider zero-length sub-matches in order to find enough matches.
1111  *
1112  * For convenience, endpts[0] contains the "begin" pointer and we store
1113  * sub-match endpoints in endpts[1..max_matches].
1114  */
1115  max_matches = end - begin;
1116  if (max_matches > t->max && t->max != DUPINF)
1117  max_matches = t->max;
1118  if (max_matches < min_matches)
1119  max_matches = min_matches;
1120  endpts = (chr **) MALLOC((max_matches + 1) * sizeof(chr *));
1121  if (endpts == NULL)
1122  return REG_ESPACE;
1123  endpts[0] = begin;
1124 
1125  d = getsubdfa(v, t->child);
1126  if (ISERR())
1127  {
1128  FREE(endpts);
1129  return v->err;
1130  }
1131 
1132  /*
1133  * Our strategy is to first find a set of sub-match endpoints that are
1134  * valid according to the child node's DFA, and then recursively dissect
1135  * each sub-match to confirm validity. If any validity check fails,
1136  * backtrack the last sub-match and try again. And, when we next try for
1137  * a validity check, we need not recheck any successfully verified
1138  * sub-matches that we didn't move the endpoints of. nverified remembers
1139  * how many sub-matches are currently known okay.
1140  */
1141 
1142  /* initialize to consider first sub-match */
1143  nverified = 0;
1144  k = 1;
1145  limit = end;
1146 
1147  /* iterate until satisfaction or failure */
1148  while (k > 0)
1149  {
1150  /* try to find an endpoint for the k'th sub-match */
1151  endpts[k] = longest(v, d, endpts[k - 1], limit, (int *) NULL);
1152  if (ISERR())
1153  {
1154  FREE(endpts);
1155  return v->err;
1156  }
1157  if (endpts[k] == NULL)
1158  {
1159  /* no match possible, so see if we can shorten previous one */
1160  k--;
1161  goto backtrack;
1162  }
1163  MDEBUG(("%d: working endpoint %d: %ld\n",
1164  t->id, k, LOFF(endpts[k])));
1165 
1166  /* k'th sub-match can no longer be considered verified */
1167  if (nverified >= k)
1168  nverified = k - 1;
1169 
1170  if (endpts[k] != end)
1171  {
1172  /* haven't reached end yet, try another iteration if allowed */
1173  if (k >= max_matches)
1174  {
1175  /* must try to shorten some previous match */
1176  k--;
1177  goto backtrack;
1178  }
1179 
1180  /* reject zero-length match unless necessary to achieve min */
1181  if (endpts[k] == endpts[k - 1] &&
1182  (k >= min_matches || min_matches - k < end - endpts[k]))
1183  goto backtrack;
1184 
1185  k++;
1186  limit = end;
1187  continue;
1188  }
1189 
1190  /*
1191  * We've identified a way to divide the string into k sub-matches that
1192  * works so far as the child DFA can tell. If k is an allowed number
1193  * of matches, start the slow part: recurse to verify each sub-match.
1194  * We always have k <= max_matches, needn't check that.
1195  */
1196  if (k < min_matches)
1197  goto backtrack;
1198 
1199  MDEBUG(("%d: verifying %d..%d\n", t->id, nverified + 1, k));
1200 
1201  for (i = nverified + 1; i <= k; i++)
1202  {
1203  zaptreesubs(v, t->child);
1204  er = cdissect(v, t->child, endpts[i - 1], endpts[i]);
1205  if (er == REG_OKAY)
1206  {
1207  nverified = i;
1208  continue;
1209  }
1210  if (er == REG_NOMATCH)
1211  break;
1212  /* oops, something failed */
1213  FREE(endpts);
1214  return er;
1215  }
1216 
1217  if (i > k)
1218  {
1219  /* satisfaction */
1220  MDEBUG(("%d: successful\n", t->id));
1221  FREE(endpts);
1222  return REG_OKAY;
1223  }
1224 
1225  /* match failed to verify, so backtrack */
1226 
1227 backtrack:
1228 
1229  /*
1230  * Must consider shorter versions of the current sub-match. However,
1231  * we'll only ask for a zero-length match if necessary.
1232  */
1233  while (k > 0)
1234  {
1235  chr *prev_end = endpts[k - 1];
1236 
1237  if (endpts[k] > prev_end)
1238  {
1239  limit = endpts[k] - 1;
1240  if (limit > prev_end ||
1241  (k < min_matches && min_matches - k >= end - prev_end))
1242  {
1243  /* break out of backtrack loop, continue the outer one */
1244  break;
1245  }
1246  }
1247  /* can't shorten k'th sub-match any more, consider previous one */
1248  k--;
1249  }
1250  }
1251 
1252  /* all possibilities exhausted */
1253  FREE(endpts);
1254 
1255  /*
1256  * Now consider the possibility that we can match to a zero-length string
1257  * by using zero repetitions.
1258  */
1259  if (t->min == 0 && begin == end)
1260  {
1261  MDEBUG(("%d: allowing zero matches\n", t->id));
1262  return REG_OKAY;
1263  }
1264 
1265  MDEBUG(("%d: failed\n", t->id));
1266  return REG_NOMATCH;
1267 }
struct subre * child
Definition: regguts.h:495
#define REG_ESPACE
Definition: regex.h:151
char op
Definition: regguts.h:473
#define LOFF(p)
Definition: regexec.c:132
short min
Definition: regguts.h:493
#define FREE(ptr)
Definition: cryptohash.c:37
static chr * longest(struct vars *, struct dfa *, chr *, chr *, int *)
#define MDEBUG(arglist)
Definition: regguts.h:117
#define MALLOC(n)
Definition: regcustom.h:60
Definition: regexec.c:63
int nstates
Definition: regguts.h:403
int id
Definition: regguts.h:490
pg_wchar chr
Definition: regcustom.h:66
#define REG_OKAY
Definition: regex.h:139
static void zaptreesubs(struct vars *, struct subre *)
Definition: regexec.c:653
static struct dfa * getsubdfa(struct vars *, struct subre *)
Definition: regexec.c:346
char flags
Definition: regguts.h:474
#define assert(TEST)
Definition: imath.c:73
int err
Definition: regcomp.c:242
#define DUPINF
Definition: regguts.h:94
short max
Definition: regguts.h:494
static int cdissect(struct vars *, struct subre *, chr *, chr *)
Definition: regexec.c:708
struct cnfa cnfa
Definition: regguts.h:499
#define SHORTER
Definition: regguts.h:476
int i
#define REG_NOMATCH
Definition: regex.h:140
#define ISERR()
Definition: regexec.c:127

◆ crevcondissect()

static int crevcondissect ( struct vars v,
struct subre t,
chr begin,
chr end 
)
static

Definition at line 864 of file regexec.c.

References assert, cdissect(), subre::child, subre::cnfa, subre::flags, getsubdfa(), subre::id, LOFF, longest(), MDEBUG, NOERR, cnfa::nstates, subre::op, REG_ASSERT, REG_NOMATCH, REG_OKAY, SHORTER, shortest(), subre::sibling, and zaptreesubs().

Referenced by cdissect().

868 {
869  struct subre *left = t->child;
870  struct subre *right = left->sibling;
871  struct dfa *d;
872  struct dfa *d2;
873  chr *mid;
874  int er;
875 
876  assert(t->op == '.');
877  assert(left != NULL && left->cnfa.nstates > 0);
878  assert(right != NULL && right->cnfa.nstates > 0);
879  assert(right->sibling == NULL);
880  assert(left->flags & SHORTER);
881 
882  d = getsubdfa(v, left);
883  NOERR();
884  d2 = getsubdfa(v, right);
885  NOERR();
886  MDEBUG(("%d: crevcondissect %ld-%ld\n", t->id, LOFF(begin), LOFF(end)));
887 
888  /* pick a tentative midpoint */
889  mid = shortest(v, d, begin, begin, end, (chr **) NULL, (int *) NULL);
890  NOERR();
891  if (mid == NULL)
892  return REG_NOMATCH;
893  MDEBUG(("%d: tentative midpoint %ld\n", t->id, LOFF(mid)));
894 
895  /* iterate until satisfaction or failure */
896  for (;;)
897  {
898  /* try this midpoint on for size */
899  if (longest(v, d2, mid, end, (int *) NULL) == end)
900  {
901  er = cdissect(v, left, begin, mid);
902  if (er == REG_OKAY)
903  {
904  er = cdissect(v, right, mid, end);
905  if (er == REG_OKAY)
906  {
907  /* satisfaction */
908  MDEBUG(("%d: successful\n", t->id));
909  return REG_OKAY;
910  }
911  }
912  if (er != REG_NOMATCH)
913  return er;
914  }
915  NOERR();
916 
917  /* that midpoint didn't work, find a new one */
918  if (mid == end)
919  {
920  /* all possibilities exhausted */
921  MDEBUG(("%d: no midpoint\n", t->id));
922  return REG_NOMATCH;
923  }
924  mid = shortest(v, d, begin, mid + 1, end, (chr **) NULL, (int *) NULL);
925  NOERR();
926  if (mid == NULL)
927  {
928  /* failed to find a new one */
929  MDEBUG(("%d: failed midpoint\n", t->id));
930  return REG_NOMATCH;
931  }
932  MDEBUG(("%d: new midpoint %ld\n", t->id, LOFF(mid)));
933  zaptreesubs(v, left);
934  zaptreesubs(v, right);
935  }
936 
937  /* can't get here */
938  return REG_ASSERT;
939 }
static chr * shortest(struct vars *, struct dfa *, chr *, chr *, chr *, chr **, int *)
struct subre * child
Definition: regguts.h:495
char op
Definition: regguts.h:473
#define LOFF(p)
Definition: regexec.c:132
static chr * longest(struct vars *, struct dfa *, chr *, chr *, int *)
#define MDEBUG(arglist)
Definition: regguts.h:117
Definition: regexec.c:63
int nstates
Definition: regguts.h:403
int id
Definition: regguts.h:490
pg_wchar chr
Definition: regcustom.h:66
#define REG_OKAY
Definition: regex.h:139
static void zaptreesubs(struct vars *, struct subre *)
Definition: regexec.c:653
struct subre * sibling
Definition: regguts.h:496
static struct dfa * getsubdfa(struct vars *, struct subre *)
Definition: regexec.c:346
char flags
Definition: regguts.h:474
#define assert(TEST)
Definition: imath.c:73
#define REG_ASSERT
Definition: regex.h:153
static int cdissect(struct vars *, struct subre *, chr *, chr *)
Definition: regexec.c:708
Definition: regguts.h:471
struct cnfa cnfa
Definition: regguts.h:499
#define SHORTER
Definition: regguts.h:476
#define REG_NOMATCH
Definition: regex.h:140
#define NOERR()
Definition: regexec.c:130

◆ creviterdissect()

static int creviterdissect ( struct vars v,
struct subre t,
chr begin,
chr end 
)
static

Definition at line 1273 of file regexec.c.

References assert, cdissect(), subre::child, subre::cnfa, DUPINF, vars::err, subre::flags, FREE, getsubdfa(), i, subre::id, ISERR, LOFF, MALLOC, subre::max, MDEBUG, subre::min, cnfa::nstates, subre::op, REG_ESPACE, REG_NOMATCH, REG_OKAY, SHORTER, shortest(), and zaptreesubs().

Referenced by cdissect().

1277 {
1278  struct dfa *d;
1279  chr **endpts;
1280  chr *limit;
1281  int min_matches;
1282  size_t max_matches;
1283  int nverified;
1284  int k;
1285  int i;
1286  int er;
1287 
1288  assert(t->op == '*');
1289  assert(t->child != NULL && t->child->cnfa.nstates > 0);
1290  assert(t->child->flags & SHORTER);
1291  assert(begin <= end);
1292 
1293  MDEBUG(("%d: creviterdissect %ld-%ld\n", t->id, LOFF(begin), LOFF(end)));
1294 
1295  /*
1296  * If zero matches are allowed, and target string is empty, just declare
1297  * victory. OTOH, if target string isn't empty, zero matches can't work
1298  * so we pretend the min is 1.
1299  */
1300  min_matches = t->min;
1301  if (min_matches <= 0)
1302  {
1303  if (begin == end)
1304  {
1305  MDEBUG(("%d: allowing zero matches\n", t->id));
1306  return REG_OKAY;
1307  }
1308  min_matches = 1;
1309  }
1310 
1311  /*
1312  * We need workspace to track the endpoints of each sub-match. Normally
1313  * we consider only nonzero-length sub-matches, so there can be at most
1314  * end-begin of them. However, if min is larger than that, we will also
1315  * consider zero-length sub-matches in order to find enough matches.
1316  *
1317  * For convenience, endpts[0] contains the "begin" pointer and we store
1318  * sub-match endpoints in endpts[1..max_matches].
1319  */
1320  max_matches = end - begin;
1321  if (max_matches > t->max && t->max != DUPINF)
1322  max_matches = t->max;
1323  if (max_matches < min_matches)
1324  max_matches = min_matches;
1325  endpts = (chr **) MALLOC((max_matches + 1) * sizeof(chr *));
1326  if (endpts == NULL)
1327  return REG_ESPACE;
1328  endpts[0] = begin;
1329 
1330  d = getsubdfa(v, t->child);
1331  if (ISERR())
1332  {
1333  FREE(endpts);
1334  return v->err;
1335  }
1336 
1337  /*
1338  * Our strategy is to first find a set of sub-match endpoints that are
1339  * valid according to the child node's DFA, and then recursively dissect
1340  * each sub-match to confirm validity. If any validity check fails,
1341  * backtrack the last sub-match and try again. And, when we next try for
1342  * a validity check, we need not recheck any successfully verified
1343  * sub-matches that we didn't move the endpoints of. nverified remembers
1344  * how many sub-matches are currently known okay.
1345  */
1346 
1347  /* initialize to consider first sub-match */
1348  nverified = 0;
1349  k = 1;
1350  limit = begin;
1351 
1352  /* iterate until satisfaction or failure */
1353  while (k > 0)
1354  {
1355  /* disallow zero-length match unless necessary to achieve min */
1356  if (limit == endpts[k - 1] &&
1357  limit != end &&
1358  (k >= min_matches || min_matches - k < end - limit))
1359  limit++;
1360 
1361  /* if this is the last allowed sub-match, it must reach to the end */
1362  if (k >= max_matches)
1363  limit = end;
1364 
1365  /* try to find an endpoint for the k'th sub-match */
1366  endpts[k] = shortest(v, d, endpts[k - 1], limit, end,
1367  (chr **) NULL, (int *) NULL);
1368  if (ISERR())
1369  {
1370  FREE(endpts);
1371  return v->err;
1372  }
1373  if (endpts[k] == NULL)
1374  {
1375  /* no match possible, so see if we can lengthen previous one */
1376  k--;
1377  goto backtrack;
1378  }
1379  MDEBUG(("%d: working endpoint %d: %ld\n",
1380  t->id, k, LOFF(endpts[k])));
1381 
1382  /* k'th sub-match can no longer be considered verified */
1383  if (nverified >= k)
1384  nverified = k - 1;
1385 
1386  if (endpts[k] != end)
1387  {
1388  /* haven't reached end yet, try another iteration if allowed */
1389  if (k >= max_matches)
1390  {
1391  /* must try to lengthen some previous match */
1392  k--;
1393  goto backtrack;
1394  }
1395 
1396  k++;
1397  limit = endpts[k - 1];
1398  continue;
1399  }
1400 
1401  /*
1402  * We've identified a way to divide the string into k sub-matches that
1403  * works so far as the child DFA can tell. If k is an allowed number
1404  * of matches, start the slow part: recurse to verify each sub-match.
1405  * We always have k <= max_matches, needn't check that.
1406  */
1407  if (k < min_matches)
1408  goto backtrack;
1409 
1410  MDEBUG(("%d: verifying %d..%d\n", t->id, nverified + 1, k));
1411 
1412  for (i = nverified + 1; i <= k; i++)
1413  {
1414  zaptreesubs(v, t->child);
1415  er = cdissect(v, t->child, endpts[i - 1], endpts[i]);
1416  if (er == REG_OKAY)
1417  {
1418  nverified = i;
1419  continue;
1420  }
1421  if (er == REG_NOMATCH)
1422  break;
1423  /* oops, something failed */
1424  FREE(endpts);
1425  return er;
1426  }
1427 
1428  if (i > k)
1429  {
1430  /* satisfaction */
1431  MDEBUG(("%d: successful\n", t->id));
1432  FREE(endpts);
1433  return REG_OKAY;
1434  }
1435 
1436  /* match failed to verify, so backtrack */
1437 
1438 backtrack:
1439 
1440  /*
1441  * Must consider longer versions of the current sub-match.
1442  */
1443  while (k > 0)
1444  {
1445  if (endpts[k] < end)
1446  {
1447  limit = endpts[k] + 1;
1448  /* break out of backtrack loop, continue the outer one */
1449  break;
1450  }
1451  /* can't lengthen k'th sub-match any more, consider previous one */
1452  k--;
1453  }
1454  }
1455 
1456  /* all possibilities exhausted */
1457  MDEBUG(("%d: failed\n", t->id));
1458  FREE(endpts);
1459  return REG_NOMATCH;
1460 }
static chr * shortest(struct vars *, struct dfa *, chr *, chr *, chr *, chr **, int *)
struct subre * child
Definition: regguts.h:495
#define REG_ESPACE
Definition: regex.h:151
char op
Definition: regguts.h:473
#define LOFF(p)
Definition: regexec.c:132
short min
Definition: regguts.h:493
#define FREE(ptr)
Definition: cryptohash.c:37
#define MDEBUG(arglist)
Definition: regguts.h:117
#define MALLOC(n)
Definition: regcustom.h:60
Definition: regexec.c:63
int nstates
Definition: regguts.h:403
int id
Definition: regguts.h:490
pg_wchar chr
Definition: regcustom.h:66
#define REG_OKAY
Definition: regex.h:139
static void zaptreesubs(struct vars *, struct subre *)
Definition: regexec.c:653
static struct dfa * getsubdfa(struct vars *, struct subre *)
Definition: regexec.c:346
char flags
Definition: regguts.h:474
#define assert(TEST)
Definition: imath.c:73
int err
Definition: regcomp.c:242
#define DUPINF
Definition: regguts.h:94
short max
Definition: regguts.h:494
static int cdissect(struct vars *, struct subre *, chr *, chr *)
Definition: regexec.c:708
struct cnfa cnfa
Definition: regguts.h:499
#define SHORTER
Definition: regguts.h:476
int i
#define REG_NOMATCH
Definition: regex.h:140
#define ISERR()
Definition: regexec.c:127

◆ dfa_backref()

static chr* dfa_backref ( struct vars ,
struct dfa ,
chr ,
chr ,
chr ,
bool   
)
static

◆ find()

static int find ( struct vars v,
struct cnfa cnfa,
struct colormap cm 
)
static

Definition at line 393 of file regexec.c.

References assert, cdissect(), guts::cflags, close, vars::details, vars::dfa1, vars::err, subre::flags, freedfa(), vars::g, ISERR, LOFF, longest(), MDEBUG, newdfa(), vars::nmatch, NOERR, OFF, vars::pmatch, REG_EXPECT, REG_NOMATCH, REG_OKAY, regmatch_t::rm_eo, rm_detail_t::rm_extend, regmatch_t::rm_so, guts::search, vars::search_start, SHORTER, shortest(), vars::start, vars::stop, guts::tree, and zapallsubs().

Referenced by NIAddAffix(), NIImportAffixes(), NIImportOOAffixes(), parse_affentry(), pg_regexec(), and testdelete().

396 {
397  struct dfa *s;
398  struct dfa *d;
399  chr *begin;
400  chr *end = NULL;
401  chr *cold;
402  chr *open; /* open and close of range of possible starts */
403  chr *close;
404  int hitend;
405  int shorter = (v->g->tree->flags & SHORTER) ? 1 : 0;
406 
407  /* first, a shot with the search RE */
408  s = newdfa(v, &v->g->search, cm, &v->dfa1);
409  if (s == NULL)
410  return v->err;
411  MDEBUG(("\nsearch at %ld\n", LOFF(v->start)));
412  cold = NULL;
413  close = shortest(v, s, v->search_start, v->search_start, v->stop,
414  &cold, (int *) NULL);
415  freedfa(s);
416  NOERR();
417  if (v->g->cflags & REG_EXPECT)
418  {
419  assert(v->details != NULL);
420  if (cold != NULL)
421  v->details->rm_extend.rm_so = OFF(cold);
422  else
423  v->details->rm_extend.rm_so = OFF(v->stop);
424  v->details->rm_extend.rm_eo = OFF(v->stop); /* unknown */
425  }
426  if (close == NULL) /* not found */
427  return REG_NOMATCH;
428  if (v->nmatch == 0) /* found, don't need exact location */
429  return REG_OKAY;
430 
431  /* find starting point and match */
432  assert(cold != NULL);
433  open = cold;
434  cold = NULL;
435  MDEBUG(("between %ld and %ld\n", LOFF(open), LOFF(close)));
436  d = newdfa(v, cnfa, cm, &v->dfa1);
437  if (d == NULL)
438  return v->err;
439  for (begin = open; begin <= close; begin++)
440  {
441  MDEBUG(("\nfind trying at %ld\n", LOFF(begin)));
442  if (shorter)
443  end = shortest(v, d, begin, begin, v->stop,
444  (chr **) NULL, &hitend);
445  else
446  end = longest(v, d, begin, v->stop, &hitend);
447  if (ISERR())
448  {
449  freedfa(d);
450  return v->err;
451  }
452  if (hitend && cold == NULL)
453  cold = begin;
454  if (end != NULL)
455  break; /* NOTE BREAK OUT */
456  }
457  assert(end != NULL); /* search RE succeeded so loop should */
458  freedfa(d);
459 
460  /* and pin down details */
461  assert(v->nmatch > 0);
462  v->pmatch[0].rm_so = OFF(begin);
463  v->pmatch[0].rm_eo = OFF(end);
464  if (v->g->cflags & REG_EXPECT)
465  {
466  if (cold != NULL)
467  v->details->rm_extend.rm_so = OFF(cold);
468  else
469  v->details->rm_extend.rm_so = OFF(v->stop);
470  v->details->rm_extend.rm_eo = OFF(v->stop); /* unknown */
471  }
472  if (v->nmatch == 1) /* no need for submatches */
473  return REG_OKAY;
474 
475  /* find submatches */
476  zapallsubs(v->pmatch, v->nmatch);
477  return cdissect(v, v->g->tree, begin, end);
478 }
static chr * shortest(struct vars *, struct dfa *, chr *, chr *, chr *, chr **, int *)
#define REG_EXPECT
Definition: regex.h:115
struct subre * tree
Definition: regguts.h:533
regoff_t rm_so
Definition: regex.h:87
#define LOFF(p)
Definition: regexec.c:132
static struct dfa * newdfa(struct vars *, struct cnfa *, struct colormap *, struct smalldfa *)
static chr * longest(struct vars *, struct dfa *, chr *, chr *, int *)
rm_detail_t * details
Definition: regexec.c:113
#define MDEBUG(arglist)
Definition: regguts.h:117
chr * start
Definition: regexec.c:114
regoff_t rm_eo
Definition: regex.h:88
Definition: regexec.c:63
chr * search_start
Definition: regexec.c:115
struct cnfa search
Definition: regguts.h:534
pg_wchar chr
Definition: regcustom.h:66
#define REG_OKAY
Definition: regex.h:139
char flags
Definition: regguts.h:474
#define assert(TEST)
Definition: imath.c:73
struct guts * g
Definition: regexec.c:109
int err
Definition: regcomp.c:242
regmatch_t * pmatch
Definition: regexec.c:112
#define OFF(p)
Definition: regexec.c:131
static void freedfa(struct dfa *)
static int cdissect(struct vars *, struct subre *, chr *, chr *)
Definition: regexec.c:708
size_t nmatch
Definition: regexec.c:111
static void zapallsubs(regmatch_t *, size_t)
Definition: regexec.c:637
regmatch_t rm_extend
Definition: regex.h:94
const chr * stop
Definition: regcomp.c:241
int cflags
Definition: regguts.h:530
#define SHORTER
Definition: regguts.h:476
#define REG_NOMATCH
Definition: regex.h:140
#define NOERR()
Definition: regexec.c:130
#define close(a)
Definition: win32.h:12
struct smalldfa dfa1
Definition: regexec.c:122
#define ISERR()
Definition: regexec.c:127

◆ freedfa()

static void freedfa ( struct dfa )
static

Referenced by cfind(), find(), and pg_regexec().

◆ getladfa()

static struct dfa * getladfa ( struct vars v,
int  n 
)
static

Definition at line 374 of file regexec.c.

References assert, guts::cmap, subre::cnfa, DOMALLOC, vars::g, guts::lacons, vars::ladfas, and newdfa().

Referenced by lacon().

376 {
377  assert(n > 0 && n < v->g->nlacons && v->g->lacons != NULL);
378 
379  if (v->ladfas[n] == NULL)
380  {
381  struct subre *sub = &v->g->lacons[n];
382 
383  v->ladfas[n] = newdfa(v, &sub->cnfa, &v->g->cmap, DOMALLOC);
384  /* a LACON can't contain a backref, so nothing else to do */
385  }
386  return v->ladfas[n];
387 }
struct dfa ** ladfas
Definition: regexec.c:119
struct subre * lacons
Definition: regguts.h:538
static struct dfa * newdfa(struct vars *, struct cnfa *, struct colormap *, struct smalldfa *)
#define DOMALLOC
Definition: regexec.c:101
struct colormap cmap
Definition: regguts.h:536
#define assert(TEST)
Definition: imath.c:73
struct guts * g
Definition: regexec.c:109
Definition: regguts.h:471
struct cnfa cnfa
Definition: regguts.h:499

◆ getsubdfa()

static struct dfa * getsubdfa ( struct vars v,
struct subre t 
)
static

Definition at line 346 of file regexec.c.

References dfa::backmax, dfa::backmin, dfa::backno, subre::backno, guts::cmap, subre::cnfa, DOMALLOC, vars::g, subre::id, subre::max, subre::min, newdfa(), subre::op, and vars::subdfas.

Referenced by caltdissect(), ccondissect(), citerdissect(), crevcondissect(), and creviterdissect().

348 {
349  struct dfa *d = v->subdfas[t->id];
350 
351  if (d == NULL)
352  {
353  d = newdfa(v, &t->cnfa, &v->g->cmap, DOMALLOC);
354  if (d == NULL)
355  return NULL;
356  /* set up additional info if this is a backref node */
357  if (t->op == 'b')
358  {
359  d->backno = t->backno;
360  d->backmin = t->min;
361  d->backmax = t->max;
362  }
363  v->subdfas[t->id] = d;
364  }
365  return d;
366 }
short backmin
Definition: regexec.c:81
char op
Definition: regguts.h:473
short min
Definition: regguts.h:493
static struct dfa * newdfa(struct vars *, struct cnfa *, struct colormap *, struct smalldfa *)
Definition: regexec.c:63
int id
Definition: regguts.h:490
#define DOMALLOC
Definition: regexec.c:101
struct colormap cmap
Definition: regguts.h:536
struct guts * g
Definition: regexec.c:109
int backno
Definition: regguts.h:492
struct dfa ** subdfas
Definition: regexec.c:118
int backno
Definition: regexec.c:80
short max
Definition: regguts.h:494
short backmax
Definition: regexec.c:82
struct cnfa cnfa
Definition: regguts.h:499

◆ getvacant()

static struct sset* getvacant ( struct vars ,
struct dfa ,
chr ,
chr  
)
static

◆ hash()

static unsigned hash ( unsigned *  ,
int   
)
static

◆ initialize()

static struct sset* initialize ( struct vars ,
struct dfa ,
chr  
)
static

◆ lacon()

static int lacon ( struct vars ,
struct cnfa ,
chr ,
color   
)
static

◆ lastcold()

static chr* lastcold ( struct vars ,
struct dfa  
)
static

◆ longest()

static chr* longest ( struct vars ,
struct dfa ,
chr ,
chr ,
int *   
)
static

◆ matchuntil()

static int matchuntil ( struct vars ,
struct dfa ,
chr ,
struct sset **  ,
chr **   
)
static

◆ miss()

static struct sset* miss ( struct vars ,
struct dfa ,
struct sset ,
color  ,
chr ,
chr  
)
static

◆ newdfa()

static struct dfa* newdfa ( struct vars ,
struct cnfa ,
struct colormap ,
struct smalldfa  
)
static

Referenced by cfind(), find(), getladfa(), and getsubdfa().

◆ pg_regexec()

int pg_regexec ( regex_t re,
const chr string,
size_t  len,
size_t  search_start,
rm_detail_t details,
size_t  nmatch,
regmatch_t  pmatch[],
int  flags 
)

Definition at line 176 of file regexec.c.

References assert, cfind(), guts::cflags, cleanup(), guts::cmap, subre::cnfa, vars::details, vars::eflags, vars::err, find(), FREE, freedfa(), vars::g, i, guts::info, vars::ladfas, vars::lblastcp, vars::lblastcss, LOCALDFAS, LOCALMAT, MALLOC, guts::nlacons, vars::nmatch, guts::nsub, guts::ntree, pg_set_regex_collation(), vars::pmatch, vars::re, regex_t::re_collation, regex_t::re_csize, regex_t::re_guts, regex_t::re_magic, REG_ESPACE, REG_EXPECT, REG_FTRACE, REG_INVARG, REG_MIXED, REG_MTRACE, REG_NOMATCH, REG_NOSUB, REG_OKAY, REG_UBACKREF, REG_UIMPOSSIBLE, REMAGIC, vars::search_start, vars::start, generate_unaccent_rules::stdout, vars::stop, vars::subdfas, guts::tree, VS, and zapallsubs().

Referenced by check_ident_usermap(), CheckAffix(), RE_wchar_execute(), replace_text_regexp(), and test_re_execute().

184 {
185  struct vars var;
186  register struct vars *v = &var;
187  int st;
188  size_t n;
189  size_t i;
190  int backref;
191 
192 #define LOCALMAT 20
193  regmatch_t mat[LOCALMAT];
194 
195 #define LOCALDFAS 40
196  struct dfa *subdfas[LOCALDFAS];
197 
198  /* sanity checks */
199  if (re == NULL || string == NULL || re->re_magic != REMAGIC)
200  return REG_INVARG;
201  if (re->re_csize != sizeof(chr))
202  return REG_MIXED;
203 
204  /* Initialize locale-dependent support */
206 
207  /* setup */
208  v->re = re;
209  v->g = (struct guts *) re->re_guts;
210  if ((v->g->cflags & REG_EXPECT) && details == NULL)
211  return REG_INVARG;
212  if (v->g->info & REG_UIMPOSSIBLE)
213  return REG_NOMATCH;
214  backref = (v->g->info & REG_UBACKREF) ? 1 : 0;
215  v->eflags = flags;
216  if (v->g->cflags & REG_NOSUB)
217  nmatch = 0; /* override client */
218  v->nmatch = nmatch;
219  if (backref)
220  {
221  /* need work area */
222  if (v->g->nsub + 1 <= LOCALMAT)
223  v->pmatch = mat;
224  else
225  v->pmatch = (regmatch_t *) MALLOC((v->g->nsub + 1) *
226  sizeof(regmatch_t));
227  if (v->pmatch == NULL)
228  return REG_ESPACE;
229  v->nmatch = v->g->nsub + 1;
230  }
231  else
232  v->pmatch = pmatch;
233  v->details = details;
234  v->start = (chr *) string;
235  v->search_start = (chr *) string + search_start;
236  v->stop = (chr *) string + len;
237  v->err = 0;
238  v->subdfas = NULL;
239  v->ladfas = NULL;
240  v->lblastcss = NULL;
241  v->lblastcp = NULL;
242  /* below this point, "goto cleanup" will behave sanely */
243 
244  assert(v->g->ntree >= 0);
245  n = (size_t) v->g->ntree;
246  if (n <= LOCALDFAS)
247  v->subdfas = subdfas;
248  else
249  {
250  v->subdfas = (struct dfa **) MALLOC(n * sizeof(struct dfa *));
251  if (v->subdfas == NULL)
252  {
253  st = REG_ESPACE;
254  goto cleanup;
255  }
256  }
257  for (i = 0; i < n; i++)
258  v->subdfas[i] = NULL;
259 
260  assert(v->g->nlacons >= 0);
261  n = (size_t) v->g->nlacons;
262  if (n > 0)
263  {
264  v->ladfas = (struct dfa **) MALLOC(n * sizeof(struct dfa *));
265  if (v->ladfas == NULL)
266  {
267  st = REG_ESPACE;
268  goto cleanup;
269  }
270  for (i = 0; i < n; i++)
271  v->ladfas[i] = NULL;
272  v->lblastcss = (struct sset **) MALLOC(n * sizeof(struct sset *));
273  v->lblastcp = (chr **) MALLOC(n * sizeof(chr *));
274  if (v->lblastcss == NULL || v->lblastcp == NULL)
275  {
276  st = REG_ESPACE;
277  goto cleanup;
278  }
279  for (i = 0; i < n; i++)
280  {
281  v->lblastcss[i] = NULL;
282  v->lblastcp[i] = NULL;
283  }
284  }
285 
286  /* do it */
287  assert(v->g->tree != NULL);
288  if (backref)
289  st = cfind(v, &v->g->tree->cnfa, &v->g->cmap);
290  else
291  st = find(v, &v->g->tree->cnfa, &v->g->cmap);
292 
293  /* copy (portion of) match vector over if necessary */
294  if (st == REG_OKAY && v->pmatch != pmatch && nmatch > 0)
295  {
296  zapallsubs(pmatch, nmatch);
297  n = (nmatch < v->nmatch) ? nmatch : v->nmatch;
298  memcpy(VS(pmatch), VS(v->pmatch), n * sizeof(regmatch_t));
299  }
300 
301  /* clean up */
302 cleanup:
303  if (v->pmatch != pmatch && v->pmatch != mat)
304  FREE(v->pmatch);
305  if (v->subdfas != NULL)
306  {
307  n = (size_t) v->g->ntree;
308  for (i = 0; i < n; i++)
309  {
310  if (v->subdfas[i] != NULL)
311  freedfa(v->subdfas[i]);
312  }
313  if (v->subdfas != subdfas)
314  FREE(v->subdfas);
315  }
316  if (v->ladfas != NULL)
317  {
318  n = (size_t) v->g->nlacons;
319  for (i = 0; i < n; i++)
320  {
321  if (v->ladfas[i] != NULL)
322  freedfa(v->ladfas[i]);
323  }
324  FREE(v->ladfas);
325  }
326  if (v->lblastcss != NULL)
327  FREE(v->lblastcss);
328  if (v->lblastcp != NULL)
329  FREE(v->lblastcp);
330 
331 #ifdef REG_DEBUG
332  if (v->eflags & (REG_FTRACE | REG_MTRACE))
333  fflush(stdout);
334 #endif
335 
336  return st;
337 }
#define REG_UIMPOSSIBLE
Definition: regex.h:74
struct dfa ** ladfas
Definition: regexec.c:119
static int find(struct vars *, struct cnfa *, struct colormap *)
Definition: regexec.c:393
#define REG_EXPECT
Definition: regex.h:115
struct subre * tree
Definition: regguts.h:533
#define REG_ESPACE
Definition: regex.h:151
int nlacons
Definition: regguts.h:539
#define REMAGIC
Definition: regguts.h:96
#define REG_FTRACE
Definition: regex.h:129
#define FREE(ptr)
Definition: cryptohash.c:37
rm_detail_t * details
Definition: regexec.c:113
int re_csize
Definition: regex.h:76
#define REG_MTRACE
Definition: regex.h:130
chr * start
Definition: regexec.c:114
#define MALLOC(n)
Definition: regcustom.h:60
Definition: regexec.c:63
struct sset ** lblastcss
Definition: regexec.c:120
chr * search_start
Definition: regexec.c:115
int re_magic
Definition: regex.h:57
pg_wchar chr
Definition: regcustom.h:66
#define REG_OKAY
Definition: regex.h:139
regex_t * re
Definition: regcomp.c:239
#define REG_INVARG
Definition: regex.h:154
struct colormap cmap
Definition: regguts.h:536
#define assert(TEST)
Definition: imath.c:73
struct guts * g
Definition: regexec.c:109
int err
Definition: regcomp.c:242
#define LOCALMAT
int eflags
Definition: regexec.c:110
static void cleanup(void)
Definition: bootstrap.c:872
Definition: regguts.h:526
int ntree
Definition: regguts.h:535
#define REG_UBACKREF
Definition: regex.h:60
#define LOCALDFAS
struct dfa ** subdfas
Definition: regexec.c:118
void pg_set_regex_collation(Oid collation)
regmatch_t * pmatch
Definition: regexec.c:112
#define REG_NOSUB
Definition: regex.h:109
static void freedfa(struct dfa *)
#define VS(x)
Definition: regguts.h:61
long info
Definition: regguts.h:531
struct cnfa cnfa
Definition: regguts.h:499
size_t nmatch
Definition: regexec.c:111
static void zapallsubs(regmatch_t *, size_t)
Definition: regexec.c:637
const chr * stop
Definition: regcomp.c:241
char * re_guts
Definition: regex.h:80
int cflags
Definition: regguts.h:530
Definition: regexec.c:45
size_t nsub
Definition: regguts.h:532
int i
#define REG_NOMATCH
Definition: regex.h:140
chr ** lblastcp
Definition: regexec.c:121
#define REG_MIXED
Definition: regex.h:155
Definition: regcomp.c:237
Oid re_collation
Definition: regex.h:78
static int cfind(struct vars *, struct cnfa *, struct colormap *)
Definition: regexec.c:484

◆ pickss()

static struct sset* pickss ( struct vars ,
struct dfa ,
chr ,
chr  
)
static

◆ shortest()

static chr* shortest ( struct vars ,
struct dfa ,
chr ,
chr ,
chr ,
chr **  ,
int *   
)
static

◆ subset()

static void subset ( struct vars v,
struct subre sub,
chr begin,
chr end 
)
static

Definition at line 676 of file regexec.c.

References assert, subre::capno, subre::id, LOFF, MDEBUG, vars::nmatch, OFF, vars::pmatch, regmatch_t::rm_eo, and regmatch_t::rm_so.

Referenced by cdissect().

680 {
681  int n = sub->capno;
682 
683  assert(n > 0);
684  if ((size_t) n >= v->nmatch)
685  return;
686 
687  MDEBUG(("%d: setting %d = %ld-%ld\n", sub->id, n, LOFF(begin), LOFF(end)));
688  v->pmatch[n].rm_so = OFF(begin);
689  v->pmatch[n].rm_eo = OFF(end);
690 }
int capno
Definition: regguts.h:491
regoff_t rm_so
Definition: regex.h:87
#define LOFF(p)
Definition: regexec.c:132
#define MDEBUG(arglist)
Definition: regguts.h:117
regoff_t rm_eo
Definition: regex.h:88
int id
Definition: regguts.h:490
#define assert(TEST)
Definition: imath.c:73
regmatch_t * pmatch
Definition: regexec.c:112
#define OFF(p)
Definition: regexec.c:131
size_t nmatch
Definition: regexec.c:111

◆ zapallsubs()

static void zapallsubs ( regmatch_t p,
size_t  n 
)
static

Definition at line 637 of file regexec.c.

References i, regmatch_t::rm_eo, and regmatch_t::rm_so.

Referenced by cfindloop(), find(), and pg_regexec().

639 {
640  size_t i;
641 
642  for (i = n - 1; i > 0; i--)
643  {
644  p[i].rm_so = -1;
645  p[i].rm_eo = -1;
646  }
647 }
regoff_t rm_so
Definition: regex.h:87
regoff_t rm_eo
Definition: regex.h:88
int i

◆ zaptreesubs()

static void zaptreesubs ( struct vars v,
struct subre t 
)
static

Definition at line 653 of file regexec.c.

References subre::capno, subre::child, vars::pmatch, regmatch_t::rm_eo, regmatch_t::rm_so, and subre::sibling.

Referenced by ccondissect(), citerdissect(), crevcondissect(), and creviterdissect().

655 {
656  int n = t->capno;
657  struct subre *t2;
658 
659  if (n > 0)
660  {
661  if ((size_t) n < v->nmatch)
662  {
663  v->pmatch[n].rm_so = -1;
664  v->pmatch[n].rm_eo = -1;
665  }
666  }
667 
668  for (t2 = t->child; t2 != NULL; t2 = t2->sibling)
669  zaptreesubs(v, t2);
670 }
struct subre * child
Definition: regguts.h:495
int capno
Definition: regguts.h:491
regoff_t rm_so
Definition: regex.h:87
regoff_t rm_eo
Definition: regex.h:88
static void zaptreesubs(struct vars *, struct subre *)
Definition: regexec.c:653
struct subre * sibling
Definition: regguts.h:496
regmatch_t * pmatch
Definition: regexec.c:112
Definition: regguts.h:471