PostgreSQL Source Code  git master
regexec.c File Reference
#include "regex/regguts.h"
#include "rege_dfa.c"
Include dependency graph for regexec.c:

Go to the source code of this file.

Data Structures

struct  arcp
 
struct  sset
 
struct  dfa
 
struct  smalldfa
 
struct  vars
 

Macros

#define HASH(bv, nw)   (((nw) == 1) ? *(bv) : hash(bv, nw))
 
#define HIT(h, bv, ss, nw)
 
#define STARTER   01 /* the initial state set */
 
#define POSTSTATE   02 /* includes the goal state */
 
#define LOCKED   04 /* locked in cache */
 
#define NOPROGRESS   010 /* zero-progress state set */
 
#define WORK   1 /* number of work bitvectors needed */
 
#define FEWSTATES   20 /* must be less than UBITS */
 
#define FEWCOLORS   15
 
#define DOMALLOC   ((struct smalldfa *)NULL) /* force malloc */
 
#define VISERR(vv)   ((vv)->err != 0) /* have we seen an error yet? */
 
#define ISERR()   VISERR(v)
 
#define VERR(vv, e)   ((vv)->err = ((vv)->err ? (vv)->err : (e)))
 
#define ERR(e)   VERR(v, e) /* record an error */
 
#define NOERR()   {if (ISERR()) return v->err;} /* if error seen, return it */
 
#define OFF(p)   ((p) - v->start)
 
#define LOFF(p)   ((long)OFF(p))
 
#define LOCALMAT   20
 
#define LOCALDFAS   40
 

Functions

static struct dfagetsubdfa (struct vars *, struct subre *)
 
static struct dfagetladfa (struct vars *, int)
 
static int find (struct vars *, struct cnfa *, struct colormap *)
 
static int cfind (struct vars *, struct cnfa *, struct colormap *)
 
static int cfindloop (struct vars *, struct cnfa *, struct colormap *, struct dfa *, struct dfa *, chr **)
 
static void zapallsubs (regmatch_t *, size_t)
 
static void zaptreesubs (struct vars *, struct subre *)
 
static void subset (struct vars *, struct subre *, chr *, chr *)
 
static int cdissect (struct vars *, struct subre *, chr *, chr *)
 
static int ccondissect (struct vars *, struct subre *, chr *, chr *)
 
static int crevcondissect (struct vars *, struct subre *, chr *, chr *)
 
static int cbrdissect (struct vars *, struct subre *, chr *, chr *)
 
static int caltdissect (struct vars *, struct subre *, chr *, chr *)
 
static int citerdissect (struct vars *, struct subre *, chr *, chr *)
 
static int creviterdissect (struct vars *, struct subre *, chr *, chr *)
 
static chrlongest (struct vars *, struct dfa *, chr *, chr *, int *)
 
static chrshortest (struct vars *, struct dfa *, chr *, chr *, chr *, chr **, int *)
 
static int matchuntil (struct vars *, struct dfa *, chr *, struct sset **, chr **)
 
static chrdfa_backref (struct vars *, struct dfa *, chr *, chr *, chr *, bool)
 
static chrlastcold (struct vars *, struct dfa *)
 
static struct dfanewdfa (struct vars *, struct cnfa *, struct colormap *, struct smalldfa *)
 
static void freedfa (struct dfa *)
 
static unsigned hash (unsigned *, int)
 
static struct ssetinitialize (struct vars *, struct dfa *, chr *)
 
static struct ssetmiss (struct vars *, struct dfa *, struct sset *, color, chr *, chr *)
 
static int lacon (struct vars *, struct cnfa *, chr *, color)
 
static struct ssetgetvacant (struct vars *, struct dfa *, chr *, chr *)
 
static struct ssetpickss (struct vars *, struct dfa *, chr *, chr *)
 
int pg_regexec (regex_t *re, const chr *string, size_t len, size_t search_start, rm_detail_t *details, size_t nmatch, regmatch_t pmatch[], int flags)
 

Macro Definition Documentation

◆ DOMALLOC

#define DOMALLOC   ((struct smalldfa *)NULL) /* force malloc */

Definition at line 101 of file regexec.c.

Referenced by getladfa(), and getsubdfa().

◆ ERR

#define ERR (   e)    VERR(v, e) /* record an error */

Definition at line 129 of file regexec.c.

Referenced by cfindloop().

◆ FEWCOLORS

#define FEWCOLORS   15

Definition at line 91 of file regexec.c.

Referenced by newdfa().

◆ FEWSTATES

#define FEWSTATES   20 /* must be less than UBITS */

Definition at line 90 of file regexec.c.

◆ HASH

#define HASH (   bv,
  nw 
)    (((nw) == 1) ? *(bv) : hash(bv, nw))

Definition at line 49 of file regexec.c.

◆ HIT

#define HIT (   h,
  bv,
  ss,
  nw 
)
Value:
((ss)->hash == (h) && ((nw) == 1 || \
memcmp(VS(bv), VS((ss)->states), (nw)*sizeof(unsigned)) == 0))
static unsigned hash(unsigned *, int)
#define VS(x)
Definition: regguts.h:61

Definition at line 50 of file regexec.c.

Referenced by miss().

◆ ISERR

#define ISERR ( )    VISERR(v)

Definition at line 127 of file regexec.c.

Referenced by cfind(), cfindloop(), citerdissect(), creviterdissect(), find(), getladfa(), and getsubdfa().

◆ LOCALDFAS

#define LOCALDFAS   40

Referenced by pg_regexec().

◆ LOCALMAT

#define LOCALMAT   20

Referenced by pg_regexec().

◆ LOCKED

#define LOCKED   04 /* locked in cache */

Definition at line 55 of file regexec.c.

Referenced by getvacant(), initialize(), and pickss().

◆ LOFF

#define LOFF (   p)    ((long)OFF(p))

◆ NOERR

#define NOERR ( )    {if (ISERR()) return v->err;} /* if error seen, return it */

Definition at line 130 of file regexec.c.

Referenced by caltdissect(), ccondissect(), cfind(), crevcondissect(), and find().

◆ NOPROGRESS

#define NOPROGRESS   010 /* zero-progress state set */

Definition at line 56 of file regexec.c.

Referenced by getvacant(), initialize(), lastcold(), and miss().

◆ OFF

#define OFF (   p)    ((p) - v->start)

Definition at line 131 of file regexec.c.

Referenced by cfind(), cfindloop(), find(), and subset().

◆ POSTSTATE

#define POSTSTATE   02 /* includes the goal state */

Definition at line 54 of file regexec.c.

Referenced by getvacant(), longest(), matchuntil(), miss(), and shortest().

◆ STARTER

#define STARTER   01 /* the initial state set */

Definition at line 53 of file regexec.c.

Referenced by initialize().

◆ VERR

#define VERR (   vv,
  e 
)    ((vv)->err = ((vv)->err ? (vv)->err : (e)))

Definition at line 128 of file regexec.c.

◆ VISERR

#define VISERR (   vv)    ((vv)->err != 0) /* have we seen an error yet? */

Definition at line 126 of file regexec.c.

◆ WORK

#define WORK   1 /* number of work bitvectors needed */

Definition at line 87 of file regexec.c.

Referenced by newdfa().

Function Documentation

◆ caltdissect()

static int caltdissect ( struct vars v,
struct subre t,
chr begin,
chr end 
)
static

Definition at line 1032 of file regexec.c.

References assert, cdissect(), subre::child, subre::cnfa, getsubdfa(), subre::id, LOFF, longest(), MDEBUG, NOERR, cnfa::nstates, subre::op, REG_NOMATCH, and subre::sibling.

Referenced by cdissect().

1036 {
1037  struct dfa *d;
1038  int er;
1039 
1040  assert(t->op == '|');
1041 
1042  t = t->child;
1043  /* there should be at least 2 alternatives */
1044  assert(t != NULL && t->sibling != NULL);
1045 
1046  while (t != NULL)
1047  {
1048  assert(t->cnfa.nstates > 0);
1049 
1050  MDEBUG(("%d: caltdissect %ld-%ld\n", t->id, LOFF(begin), LOFF(end)));
1051 
1052  d = getsubdfa(v, t);
1053  NOERR();
1054  if (longest(v, d, begin, end, (int *) NULL) == end)
1055  {
1056  MDEBUG(("%d: caltdissect matched\n", t->id));
1057  er = cdissect(v, t, begin, end);
1058  if (er != REG_NOMATCH)
1059  return er;
1060  }
1061  NOERR();
1062 
1063  t = t->sibling;
1064  }
1065 
1066  return REG_NOMATCH;
1067 }
struct subre * child
Definition: regguts.h:495
char op
Definition: regguts.h:473
#define LOFF(p)
Definition: regexec.c:132
static chr * longest(struct vars *, struct dfa *, chr *, chr *, int *)
#define MDEBUG(arglist)
Definition: regguts.h:117
Definition: regexec.c:63
int nstates
Definition: regguts.h:403
int id
Definition: regguts.h:490
struct subre * sibling
Definition: regguts.h:496
static struct dfa * getsubdfa(struct vars *, struct subre *)
Definition: regexec.c:346
#define assert(TEST)
Definition: imath.c:73
static int cdissect(struct vars *, struct subre *, chr *, chr *)
Definition: regexec.c:710
struct cnfa cnfa
Definition: regguts.h:499
#define REG_NOMATCH
Definition: regex.h:140
#define NOERR()
Definition: regexec.c:130

◆ cbrdissect()

static int cbrdissect ( struct vars v,
struct subre t,
chr begin,
chr end 
)
static

Definition at line 950 of file regexec.c.

References assert, subre::backno, DUPINF, vars::g, subre::id, LOFF, subre::max, MDEBUG, subre::min, subre::op, vars::pmatch, REG_NOMATCH, REG_OKAY, regmatch_t::rm_eo, regmatch_t::rm_so, and vars::start.

Referenced by cdissect().

954 {
955  int n = t->backno;
956  size_t numreps;
957  size_t tlen;
958  size_t brlen;
959  chr *brstring;
960  chr *p;
961  int min = t->min;
962  int max = t->max;
963 
964  assert(t != NULL);
965  assert(t->op == 'b');
966  assert(n >= 0);
967  assert((size_t) n < v->nmatch);
968 
969  MDEBUG(("%d: cbrdissect %d{%d-%d} %ld-%ld\n", t->id, n, min, max,
970  LOFF(begin), LOFF(end)));
971 
972  /* get the backreferenced string */
973  if (v->pmatch[n].rm_so == -1)
974  return REG_NOMATCH;
975  brstring = v->start + v->pmatch[n].rm_so;
976  brlen = v->pmatch[n].rm_eo - v->pmatch[n].rm_so;
977 
978  /* special cases for zero-length strings */
979  if (brlen == 0)
980  {
981  /*
982  * matches only if target is zero length, but any number of
983  * repetitions can be considered to be present
984  */
985  if (begin == end && min <= max)
986  {
987  MDEBUG(("%d: backref matched trivially\n", t->id));
988  return REG_OKAY;
989  }
990  return REG_NOMATCH;
991  }
992  if (begin == end)
993  {
994  /* matches only if zero repetitions are okay */
995  if (min == 0)
996  {
997  MDEBUG(("%d: backref matched trivially\n", t->id));
998  return REG_OKAY;
999  }
1000  return REG_NOMATCH;
1001  }
1002 
1003  /*
1004  * check target length to see if it could possibly be an allowed number of
1005  * repetitions of brstring
1006  */
1007  assert(end > begin);
1008  tlen = end - begin;
1009  if (tlen % brlen != 0)
1010  return REG_NOMATCH;
1011  numreps = tlen / brlen;
1012  if (numreps < min || (numreps > max && max != DUPINF))
1013  return REG_NOMATCH;
1014 
1015  /* okay, compare the actual string contents */
1016  p = begin;
1017  while (numreps-- > 0)
1018  {
1019  if ((*v->g->compare) (brstring, p, brlen) != 0)
1020  return REG_NOMATCH;
1021  p += brlen;
1022  }
1023 
1024  MDEBUG(("%d: backref matched\n", t->id));
1025  return REG_OKAY;
1026 }
regoff_t rm_so
Definition: regex.h:87
char op
Definition: regguts.h:473
#define LOFF(p)
Definition: regexec.c:132
short min
Definition: regguts.h:493
#define MDEBUG(arglist)
Definition: regguts.h:117
chr * start
Definition: regexec.c:114
regoff_t rm_eo
Definition: regex.h:88
int id
Definition: regguts.h:490
pg_wchar chr
Definition: regcustom.h:66
#define REG_OKAY
Definition: regex.h:139
#define assert(TEST)
Definition: imath.c:73
struct guts * g
Definition: regexec.c:109
int backno
Definition: regguts.h:492
#define DUPINF
Definition: regguts.h:94
short max
Definition: regguts.h:494
regmatch_t * pmatch
Definition: regexec.c:112
#define REG_NOMATCH
Definition: regex.h:140

◆ ccondissect()

static int ccondissect ( struct vars v,
struct subre t,
chr begin,
chr end 
)
static

Definition at line 785 of file regexec.c.

References assert, cdissect(), subre::child, subre::cnfa, subre::flags, getsubdfa(), subre::id, LOFF, longest(), MDEBUG, NOERR, cnfa::nstates, subre::op, REG_ASSERT, REG_NOMATCH, REG_OKAY, SHORTER, subre::sibling, and zaptreesubs().

Referenced by cdissect().

789 {
790  struct subre *left = t->child;
791  struct subre *right = left->sibling;
792  struct dfa *d;
793  struct dfa *d2;
794  chr *mid;
795  int er;
796 
797  assert(t->op == '.');
798  assert(left != NULL && left->cnfa.nstates > 0);
799  assert(right != NULL && right->cnfa.nstates > 0);
800  assert(right->sibling == NULL);
801  assert(!(left->flags & SHORTER));
802 
803  d = getsubdfa(v, left);
804  NOERR();
805  d2 = getsubdfa(v, right);
806  NOERR();
807  MDEBUG(("%d: ccondissect %ld-%ld\n", t->id, LOFF(begin), LOFF(end)));
808 
809  /* pick a tentative midpoint */
810  mid = longest(v, d, begin, end, (int *) NULL);
811  NOERR();
812  if (mid == NULL)
813  return REG_NOMATCH;
814  MDEBUG(("%d: tentative midpoint %ld\n", t->id, LOFF(mid)));
815 
816  /* iterate until satisfaction or failure */
817  for (;;)
818  {
819  /* try this midpoint on for size */
820  if (longest(v, d2, mid, end, (int *) NULL) == end)
821  {
822  er = cdissect(v, left, begin, mid);
823  if (er == REG_OKAY)
824  {
825  er = cdissect(v, right, mid, end);
826  if (er == REG_OKAY)
827  {
828  /* satisfaction */
829  MDEBUG(("%d: successful\n", t->id));
830  return REG_OKAY;
831  }
832  }
833  if (er != REG_NOMATCH)
834  return er;
835  }
836  NOERR();
837 
838  /* that midpoint didn't work, find a new one */
839  if (mid == begin)
840  {
841  /* all possibilities exhausted */
842  MDEBUG(("%d: no midpoint\n", t->id));
843  return REG_NOMATCH;
844  }
845  mid = longest(v, d, begin, mid - 1, (int *) NULL);
846  NOERR();
847  if (mid == NULL)
848  {
849  /* failed to find a new one */
850  MDEBUG(("%d: failed midpoint\n", t->id));
851  return REG_NOMATCH;
852  }
853  MDEBUG(("%d: new midpoint %ld\n", t->id, LOFF(mid)));
854  zaptreesubs(v, left);
855  zaptreesubs(v, right);
856  }
857 
858  /* can't get here */
859  return REG_ASSERT;
860 }
struct subre * child
Definition: regguts.h:495
char op
Definition: regguts.h:473
#define LOFF(p)
Definition: regexec.c:132
static chr * longest(struct vars *, struct dfa *, chr *, chr *, int *)
#define MDEBUG(arglist)
Definition: regguts.h:117
Definition: regexec.c:63
int nstates
Definition: regguts.h:403
int id
Definition: regguts.h:490
pg_wchar chr
Definition: regcustom.h:66
#define REG_OKAY
Definition: regex.h:139
static void zaptreesubs(struct vars *, struct subre *)
Definition: regexec.c:655
struct subre * sibling
Definition: regguts.h:496
static struct dfa * getsubdfa(struct vars *, struct subre *)
Definition: regexec.c:346
char flags
Definition: regguts.h:474
#define assert(TEST)
Definition: imath.c:73
#define REG_ASSERT
Definition: regex.h:153
static int cdissect(struct vars *, struct subre *, chr *, chr *)
Definition: regexec.c:710
Definition: regguts.h:471
struct cnfa cnfa
Definition: regguts.h:499
#define SHORTER
Definition: regguts.h:476
#define REG_NOMATCH
Definition: regex.h:140
#define NOERR()
Definition: regexec.c:130

◆ cdissect()

static int cdissect ( struct vars v,
struct subre t,
chr begin,
chr end 
)
static

Definition at line 710 of file regexec.c.

References assert, BACKR, caltdissect(), CANCEL_REQUESTED, subre::capno, cbrdissect(), ccondissect(), subre::child, citerdissect(), crevcondissect(), creviterdissect(), subre::flags, subre::id, LOFF, MDEBUG, subre::op, vars::re, REG_ASSERT, REG_CANCEL, REG_ETOOBIG, REG_NOMATCH, REG_OKAY, SHORTER, STACK_TOO_DEEP, and subset().

Referenced by caltdissect(), ccondissect(), cfindloop(), citerdissect(), crevcondissect(), creviterdissect(), and find().

714 {
715  int er;
716 
717  assert(t != NULL);
718  MDEBUG(("%d: cdissect %c %ld-%ld\n", t->id, t->op, LOFF(begin), LOFF(end)));
719 
720  /* handy place to check for operation cancel */
721  if (CANCEL_REQUESTED(v->re))
722  return REG_CANCEL;
723  /* ... and stack overrun */
724  if (STACK_TOO_DEEP(v->re))
725  return REG_ETOOBIG;
726 
727  switch (t->op)
728  {
729  case '=': /* terminal node */
730  assert(t->child == NULL);
731  er = REG_OKAY; /* no action, parent did the work */
732  break;
733  case 'b': /* back reference */
734  assert(t->child == NULL);
735  er = cbrdissect(v, t, begin, end);
736  break;
737  case '.': /* concatenation */
738  assert(t->child != NULL);
739  if (t->child->flags & SHORTER) /* reverse scan */
740  er = crevcondissect(v, t, begin, end);
741  else
742  er = ccondissect(v, t, begin, end);
743  break;
744  case '|': /* alternation */
745  assert(t->child != NULL);
746  er = caltdissect(v, t, begin, end);
747  break;
748  case '*': /* iteration */
749  assert(t->child != NULL);
750  if (t->child->flags & SHORTER) /* reverse scan */
751  er = creviterdissect(v, t, begin, end);
752  else
753  er = citerdissect(v, t, begin, end);
754  break;
755  case '(': /* no-op capture node */
756  assert(t->child != NULL);
757  assert(t->capno > 0);
758  er = cdissect(v, t->child, begin, end);
759  break;
760  default:
761  er = REG_ASSERT;
762  break;
763  }
764 
765  /*
766  * We should never have a match failure unless backrefs lurk below;
767  * otherwise, either caller failed to check the DFA, or there's some
768  * inconsistency between the DFA and the node's innards.
769  */
770  assert(er != REG_NOMATCH || (t->flags & BACKR));
771 
772  /*
773  * If this node is marked as capturing, save successful match's location.
774  */
775  if (t->capno > 0 && er == REG_OKAY)
776  subset(v, t, begin, end);
777 
778  return er;
779 }
struct subre * child
Definition: regguts.h:495
int capno
Definition: regguts.h:491
char op
Definition: regguts.h:473
#define LOFF(p)
Definition: regexec.c:132
#define MDEBUG(arglist)
Definition: regguts.h:117
int id
Definition: regguts.h:490
#define REG_OKAY
Definition: regex.h:139
regex_t * re
Definition: regcomp.c:240
char flags
Definition: regguts.h:474
#define assert(TEST)
Definition: imath.c:73
static int cbrdissect(struct vars *, struct subre *, chr *, chr *)
Definition: regexec.c:950
static int creviterdissect(struct vars *, struct subre *, chr *, chr *)
Definition: regexec.c:1275
#define BACKR
Definition: regguts.h:479
static int caltdissect(struct vars *, struct subre *, chr *, chr *)
Definition: regexec.c:1032
#define REG_ASSERT
Definition: regex.h:153
static int ccondissect(struct vars *, struct subre *, chr *, chr *)
Definition: regexec.c:785
#define REG_ETOOBIG
Definition: regex.h:157
static int citerdissect(struct vars *, struct subre *, chr *, chr *)
Definition: regexec.c:1073
static int cdissect(struct vars *, struct subre *, chr *, chr *)
Definition: regexec.c:710
#define SHORTER
Definition: regguts.h:476
#define CANCEL_REQUESTED(re)
Definition: regguts.h:516
#define REG_NOMATCH
Definition: regex.h:140
static int crevcondissect(struct vars *, struct subre *, chr *, chr *)
Definition: regexec.c:866
static void subset(struct vars *, struct subre *, chr *, chr *)
Definition: regexec.c:678
#define STACK_TOO_DEEP(re)
Definition: regguts.h:519
#define REG_CANCEL
Definition: regex.h:159

◆ cfind()

static int cfind ( struct vars v,
struct cnfa cnfa,
struct colormap cm 
)
static

Definition at line 486 of file regexec.c.

References assert, cfindloop(), guts::cflags, vars::details, vars::dfa1, vars::dfa2, vars::err, freedfa(), vars::g, ISERR, newdfa(), NOERR, OFF, REG_EXPECT, regmatch_t::rm_eo, rm_detail_t::rm_extend, regmatch_t::rm_so, guts::search, and vars::stop.

Referenced by pg_regexec().

489 {
490  struct dfa *s;
491  struct dfa *d;
492  chr *cold;
493  int ret;
494 
495  s = newdfa(v, &v->g->search, cm, &v->dfa1);
496  NOERR();
497  d = newdfa(v, cnfa, cm, &v->dfa2);
498  if (ISERR())
499  {
500  assert(d == NULL);
501  freedfa(s);
502  return v->err;
503  }
504 
505  ret = cfindloop(v, cnfa, cm, d, s, &cold);
506 
507  freedfa(d);
508  freedfa(s);
509  NOERR();
510  if (v->g->cflags & REG_EXPECT)
511  {
512  assert(v->details != NULL);
513  if (cold != NULL)
514  v->details->rm_extend.rm_so = OFF(cold);
515  else
516  v->details->rm_extend.rm_so = OFF(v->stop);
517  v->details->rm_extend.rm_eo = OFF(v->stop); /* unknown */
518  }
519  return ret;
520 }
#define REG_EXPECT
Definition: regex.h:115
regoff_t rm_so
Definition: regex.h:87
static struct dfa * newdfa(struct vars *, struct cnfa *, struct colormap *, struct smalldfa *)
rm_detail_t * details
Definition: regexec.c:113
regoff_t rm_eo
Definition: regex.h:88
Definition: regexec.c:63
struct cnfa search
Definition: regguts.h:534
pg_wchar chr
Definition: regcustom.h:66
static int cfindloop(struct vars *, struct cnfa *, struct colormap *, struct dfa *, struct dfa *, chr **)
Definition: regexec.c:526
struct smalldfa dfa2
Definition: regexec.c:123
#define assert(TEST)
Definition: imath.c:73
struct guts * g
Definition: regexec.c:109
int err
Definition: regcomp.c:243
#define OFF(p)
Definition: regexec.c:131
static void freedfa(struct dfa *)
regmatch_t rm_extend
Definition: regex.h:94
const chr * stop
Definition: regcomp.c:242
int cflags
Definition: regguts.h:530
#define NOERR()
Definition: regexec.c:130
struct smalldfa dfa1
Definition: regexec.c:122
#define ISERR()
Definition: regexec.c:127

◆ cfindloop()

static int cfindloop ( struct vars v,
struct cnfa cnfa,
struct colormap cm,
struct dfa d,
struct dfa s,
chr **  coldp 
)
static

Definition at line 526 of file regexec.c.

References assert, cdissect(), close, ERR, vars::err, subre::flags, vars::g, ISERR, LOFF, longest(), MDEBUG, vars::nmatch, OFF, vars::pmatch, REG_NOMATCH, REG_OKAY, regmatch_t::rm_eo, regmatch_t::rm_so, vars::search_start, SHORTER, shortest(), vars::stop, guts::tree, and zapallsubs().

Referenced by cfind().

532 {
533  chr *begin;
534  chr *end;
535  chr *cold;
536  chr *open; /* open and close of range of possible starts */
537  chr *close;
538  chr *estart;
539  chr *estop;
540  int er;
541  int shorter = v->g->tree->flags & SHORTER;
542  int hitend;
543 
544  assert(d != NULL && s != NULL);
545  cold = NULL;
546  close = v->search_start;
547  do
548  {
549  /* Search with the search RE for match range at/beyond "close" */
550  MDEBUG(("\ncsearch at %ld\n", LOFF(close)));
551  close = shortest(v, s, close, close, v->stop, &cold, (int *) NULL);
552  if (ISERR())
553  {
554  *coldp = cold;
555  return v->err;
556  }
557  if (close == NULL)
558  break; /* no more possible match anywhere */
559  assert(cold != NULL);
560  open = cold;
561  cold = NULL;
562  /* Search for matches starting between "open" and "close" inclusive */
563  MDEBUG(("cbetween %ld and %ld\n", LOFF(open), LOFF(close)));
564  for (begin = open; begin <= close; begin++)
565  {
566  MDEBUG(("\ncfind trying at %ld\n", LOFF(begin)));
567  estart = begin;
568  estop = v->stop;
569  for (;;)
570  {
571  /* Here we use the top node's detailed RE */
572  if (shorter)
573  end = shortest(v, d, begin, estart,
574  estop, (chr **) NULL, &hitend);
575  else
576  end = longest(v, d, begin, estop,
577  &hitend);
578  if (ISERR())
579  {
580  *coldp = cold;
581  return v->err;
582  }
583  if (hitend && cold == NULL)
584  cold = begin;
585  if (end == NULL)
586  break; /* no match with this begin point, try next */
587  MDEBUG(("tentative end %ld\n", LOFF(end)));
588  /* Dissect the potential match to see if it really matches */
589  zapallsubs(v->pmatch, v->nmatch);
590  er = cdissect(v, v->g->tree, begin, end);
591  if (er == REG_OKAY)
592  {
593  if (v->nmatch > 0)
594  {
595  v->pmatch[0].rm_so = OFF(begin);
596  v->pmatch[0].rm_eo = OFF(end);
597  }
598  *coldp = cold;
599  return REG_OKAY;
600  }
601  if (er != REG_NOMATCH)
602  {
603  ERR(er);
604  *coldp = cold;
605  return er;
606  }
607  /* Try next longer/shorter match with same begin point */
608  if (shorter)
609  {
610  if (end == estop)
611  break; /* no more, so try next begin point */
612  estart = end + 1;
613  }
614  else
615  {
616  if (end == begin)
617  break; /* no more, so try next begin point */
618  estop = end - 1;
619  }
620  } /* end loop over endpoint positions */
621  } /* end loop over beginning positions */
622 
623  /*
624  * If we get here, there is no possible match starting at or before
625  * "close", so consider matches beyond that. We'll do a fresh search
626  * with the search RE to find a new promising match range.
627  */
628  close++;
629  } while (close < v->stop);
630 
631  *coldp = cold;
632  return REG_NOMATCH;
633 }
static chr * shortest(struct vars *, struct dfa *, chr *, chr *, chr *, chr **, int *)
struct subre * tree
Definition: regguts.h:533
regoff_t rm_so
Definition: regex.h:87
#define LOFF(p)
Definition: regexec.c:132
static chr * longest(struct vars *, struct dfa *, chr *, chr *, int *)
#define MDEBUG(arglist)
Definition: regguts.h:117
regoff_t rm_eo
Definition: regex.h:88
chr * search_start
Definition: regexec.c:115
pg_wchar chr
Definition: regcustom.h:66
#define REG_OKAY
Definition: regex.h:139
char flags
Definition: regguts.h:474
#define assert(TEST)
Definition: imath.c:73
struct guts * g
Definition: regexec.c:109
int err
Definition: regcomp.c:243
regmatch_t * pmatch
Definition: regexec.c:112
#define ERR(e)
Definition: regexec.c:129
#define OFF(p)
Definition: regexec.c:131
static int cdissect(struct vars *, struct subre *, chr *, chr *)
Definition: regexec.c:710
size_t nmatch
Definition: regexec.c:111
static void zapallsubs(regmatch_t *, size_t)
Definition: regexec.c:639
const chr * stop
Definition: regcomp.c:242
#define SHORTER
Definition: regguts.h:476
#define REG_NOMATCH
Definition: regex.h:140
#define close(a)
Definition: win32.h:12
#define ISERR()
Definition: regexec.c:127

◆ citerdissect()

static int citerdissect ( struct vars v,
struct subre t,
chr begin,
chr end 
)
static

Definition at line 1073 of file regexec.c.

References assert, cdissect(), subre::child, subre::cnfa, DUPINF, vars::err, subre::flags, FREE, getsubdfa(), i, subre::id, ISERR, LOFF, longest(), MALLOC, subre::max, MDEBUG, subre::min, cnfa::nstates, subre::op, REG_ESPACE, REG_NOMATCH, REG_OKAY, SHORTER, and zaptreesubs().

Referenced by cdissect().

1077 {
1078  struct dfa *d;
1079  chr **endpts;
1080  chr *limit;
1081  int min_matches;
1082  size_t max_matches;
1083  int nverified;
1084  int k;
1085  int i;
1086  int er;
1087 
1088  assert(t->op == '*');
1089  assert(t->child != NULL && t->child->cnfa.nstates > 0);
1090  assert(!(t->child->flags & SHORTER));
1091  assert(begin <= end);
1092 
1093  MDEBUG(("%d: citerdissect %ld-%ld\n", t->id, LOFF(begin), LOFF(end)));
1094 
1095  /*
1096  * For the moment, assume the minimum number of matches is 1. If zero
1097  * matches are allowed, and the target string is empty, we are allowed to
1098  * match regardless of the contents of the iter node --- but we would
1099  * prefer to match once, so that capturing parens get set. (An example of
1100  * the concern here is a pattern like "()*\1", which historically this
1101  * code has allowed to succeed.) Therefore, we deal with the zero-matches
1102  * case at the bottom, after failing to find any other way to match.
1103  */
1104  min_matches = t->min;
1105  if (min_matches <= 0)
1106  min_matches = 1;
1107 
1108  /*
1109  * We need workspace to track the endpoints of each sub-match. Normally
1110  * we consider only nonzero-length sub-matches, so there can be at most
1111  * end-begin of them. However, if min is larger than that, we will also
1112  * consider zero-length sub-matches in order to find enough matches.
1113  *
1114  * For convenience, endpts[0] contains the "begin" pointer and we store
1115  * sub-match endpoints in endpts[1..max_matches].
1116  */
1117  max_matches = end - begin;
1118  if (max_matches > t->max && t->max != DUPINF)
1119  max_matches = t->max;
1120  if (max_matches < min_matches)
1121  max_matches = min_matches;
1122  endpts = (chr **) MALLOC((max_matches + 1) * sizeof(chr *));
1123  if (endpts == NULL)
1124  return REG_ESPACE;
1125  endpts[0] = begin;
1126 
1127  d = getsubdfa(v, t->child);
1128  if (ISERR())
1129  {
1130  FREE(endpts);
1131  return v->err;
1132  }
1133 
1134  /*
1135  * Our strategy is to first find a set of sub-match endpoints that are
1136  * valid according to the child node's DFA, and then recursively dissect
1137  * each sub-match to confirm validity. If any validity check fails,
1138  * backtrack the last sub-match and try again. And, when we next try for
1139  * a validity check, we need not recheck any successfully verified
1140  * sub-matches that we didn't move the endpoints of. nverified remembers
1141  * how many sub-matches are currently known okay.
1142  */
1143 
1144  /* initialize to consider first sub-match */
1145  nverified = 0;
1146  k = 1;
1147  limit = end;
1148 
1149  /* iterate until satisfaction or failure */
1150  while (k > 0)
1151  {
1152  /* try to find an endpoint for the k'th sub-match */
1153  endpts[k] = longest(v, d, endpts[k - 1], limit, (int *) NULL);
1154  if (ISERR())
1155  {
1156  FREE(endpts);
1157  return v->err;
1158  }
1159  if (endpts[k] == NULL)
1160  {
1161  /* no match possible, so see if we can shorten previous one */
1162  k--;
1163  goto backtrack;
1164  }
1165  MDEBUG(("%d: working endpoint %d: %ld\n",
1166  t->id, k, LOFF(endpts[k])));
1167 
1168  /* k'th sub-match can no longer be considered verified */
1169  if (nverified >= k)
1170  nverified = k - 1;
1171 
1172  if (endpts[k] != end)
1173  {
1174  /* haven't reached end yet, try another iteration if allowed */
1175  if (k >= max_matches)
1176  {
1177  /* must try to shorten some previous match */
1178  k--;
1179  goto backtrack;
1180  }
1181 
1182  /* reject zero-length match unless necessary to achieve min */
1183  if (endpts[k] == endpts[k - 1] &&
1184  (k >= min_matches || min_matches - k < end - endpts[k]))
1185  goto backtrack;
1186 
1187  k++;
1188  limit = end;
1189  continue;
1190  }
1191 
1192  /*
1193  * We've identified a way to divide the string into k sub-matches that
1194  * works so far as the child DFA can tell. If k is an allowed number
1195  * of matches, start the slow part: recurse to verify each sub-match.
1196  * We always have k <= max_matches, needn't check that.
1197  */
1198  if (k < min_matches)
1199  goto backtrack;
1200 
1201  MDEBUG(("%d: verifying %d..%d\n", t->id, nverified + 1, k));
1202 
1203  for (i = nverified + 1; i <= k; i++)
1204  {
1205  zaptreesubs(v, t->child);
1206  er = cdissect(v, t->child, endpts[i - 1], endpts[i]);
1207  if (er == REG_OKAY)
1208  {
1209  nverified = i;
1210  continue;
1211  }
1212  if (er == REG_NOMATCH)
1213  break;
1214  /* oops, something failed */
1215  FREE(endpts);
1216  return er;
1217  }
1218 
1219  if (i > k)
1220  {
1221  /* satisfaction */
1222  MDEBUG(("%d: successful\n", t->id));
1223  FREE(endpts);
1224  return REG_OKAY;
1225  }
1226 
1227  /* match failed to verify, so backtrack */
1228 
1229 backtrack:
1230 
1231  /*
1232  * Must consider shorter versions of the current sub-match. However,
1233  * we'll only ask for a zero-length match if necessary.
1234  */
1235  while (k > 0)
1236  {
1237  chr *prev_end = endpts[k - 1];
1238 
1239  if (endpts[k] > prev_end)
1240  {
1241  limit = endpts[k] - 1;
1242  if (limit > prev_end ||
1243  (k < min_matches && min_matches - k >= end - prev_end))
1244  {
1245  /* break out of backtrack loop, continue the outer one */
1246  break;
1247  }
1248  }
1249  /* can't shorten k'th sub-match any more, consider previous one */
1250  k--;
1251  }
1252  }
1253 
1254  /* all possibilities exhausted */
1255  FREE(endpts);
1256 
1257  /*
1258  * Now consider the possibility that we can match to a zero-length string
1259  * by using zero repetitions.
1260  */
1261  if (t->min == 0 && begin == end)
1262  {
1263  MDEBUG(("%d: allowing zero matches\n", t->id));
1264  return REG_OKAY;
1265  }
1266 
1267  MDEBUG(("%d: failed\n", t->id));
1268  return REG_NOMATCH;
1269 }
struct subre * child
Definition: regguts.h:495
#define REG_ESPACE
Definition: regex.h:151
char op
Definition: regguts.h:473
#define LOFF(p)
Definition: regexec.c:132
short min
Definition: regguts.h:493
#define FREE(ptr)
Definition: cryptohash.c:37
static chr * longest(struct vars *, struct dfa *, chr *, chr *, int *)
#define MDEBUG(arglist)
Definition: regguts.h:117
#define MALLOC(n)
Definition: regcustom.h:60
Definition: regexec.c:63
int nstates
Definition: regguts.h:403
int id
Definition: regguts.h:490
pg_wchar chr
Definition: regcustom.h:66
#define REG_OKAY
Definition: regex.h:139
static void zaptreesubs(struct vars *, struct subre *)
Definition: regexec.c:655
static struct dfa * getsubdfa(struct vars *, struct subre *)
Definition: regexec.c:346
char flags
Definition: regguts.h:474
#define assert(TEST)
Definition: imath.c:73
int err
Definition: regcomp.c:243
#define DUPINF
Definition: regguts.h:94
short max
Definition: regguts.h:494
static int cdissect(struct vars *, struct subre *, chr *, chr *)
Definition: regexec.c:710
struct cnfa cnfa
Definition: regguts.h:499
#define SHORTER
Definition: regguts.h:476
int i
#define REG_NOMATCH
Definition: regex.h:140
#define ISERR()
Definition: regexec.c:127

◆ crevcondissect()

static int crevcondissect ( struct vars v,
struct subre t,
chr begin,
chr end 
)
static

Definition at line 866 of file regexec.c.

References assert, cdissect(), subre::child, subre::cnfa, subre::flags, getsubdfa(), subre::id, LOFF, longest(), MDEBUG, NOERR, cnfa::nstates, subre::op, REG_ASSERT, REG_NOMATCH, REG_OKAY, SHORTER, shortest(), subre::sibling, and zaptreesubs().

Referenced by cdissect().

870 {
871  struct subre *left = t->child;
872  struct subre *right = left->sibling;
873  struct dfa *d;
874  struct dfa *d2;
875  chr *mid;
876  int er;
877 
878  assert(t->op == '.');
879  assert(left != NULL && left->cnfa.nstates > 0);
880  assert(right != NULL && right->cnfa.nstates > 0);
881  assert(right->sibling == NULL);
882  assert(left->flags & SHORTER);
883 
884  d = getsubdfa(v, left);
885  NOERR();
886  d2 = getsubdfa(v, right);
887  NOERR();
888  MDEBUG(("%d: crevcondissect %ld-%ld\n", t->id, LOFF(begin), LOFF(end)));
889 
890  /* pick a tentative midpoint */
891  mid = shortest(v, d, begin, begin, end, (chr **) NULL, (int *) NULL);
892  NOERR();
893  if (mid == NULL)
894  return REG_NOMATCH;
895  MDEBUG(("%d: tentative midpoint %ld\n", t->id, LOFF(mid)));
896 
897  /* iterate until satisfaction or failure */
898  for (;;)
899  {
900  /* try this midpoint on for size */
901  if (longest(v, d2, mid, end, (int *) NULL) == end)
902  {
903  er = cdissect(v, left, begin, mid);
904  if (er == REG_OKAY)
905  {
906  er = cdissect(v, right, mid, end);
907  if (er == REG_OKAY)
908  {
909  /* satisfaction */
910  MDEBUG(("%d: successful\n", t->id));
911  return REG_OKAY;
912  }
913  }
914  if (er != REG_NOMATCH)
915  return er;
916  }
917  NOERR();
918 
919  /* that midpoint didn't work, find a new one */
920  if (mid == end)
921  {
922  /* all possibilities exhausted */
923  MDEBUG(("%d: no midpoint\n", t->id));
924  return REG_NOMATCH;
925  }
926  mid = shortest(v, d, begin, mid + 1, end, (chr **) NULL, (int *) NULL);
927  NOERR();
928  if (mid == NULL)
929  {
930  /* failed to find a new one */
931  MDEBUG(("%d: failed midpoint\n", t->id));
932  return REG_NOMATCH;
933  }
934  MDEBUG(("%d: new midpoint %ld\n", t->id, LOFF(mid)));
935  zaptreesubs(v, left);
936  zaptreesubs(v, right);
937  }
938 
939  /* can't get here */
940  return REG_ASSERT;
941 }
static chr * shortest(struct vars *, struct dfa *, chr *, chr *, chr *, chr **, int *)
struct subre * child
Definition: regguts.h:495
char op
Definition: regguts.h:473
#define LOFF(p)
Definition: regexec.c:132
static chr * longest(struct vars *, struct dfa *, chr *, chr *, int *)
#define MDEBUG(arglist)
Definition: regguts.h:117
Definition: regexec.c:63
int nstates
Definition: regguts.h:403
int id
Definition: regguts.h:490
pg_wchar chr
Definition: regcustom.h:66
#define REG_OKAY
Definition: regex.h:139
static void zaptreesubs(struct vars *, struct subre *)
Definition: regexec.c:655
struct subre * sibling
Definition: regguts.h:496
static struct dfa * getsubdfa(struct vars *, struct subre *)
Definition: regexec.c:346
char flags
Definition: regguts.h:474
#define assert(TEST)
Definition: imath.c:73
#define REG_ASSERT
Definition: regex.h:153
static int cdissect(struct vars *, struct subre *, chr *, chr *)
Definition: regexec.c:710
Definition: regguts.h:471
struct cnfa cnfa
Definition: regguts.h:499
#define SHORTER
Definition: regguts.h:476
#define REG_NOMATCH
Definition: regex.h:140
#define NOERR()
Definition: regexec.c:130

◆ creviterdissect()

static int creviterdissect ( struct vars v,
struct subre t,
chr begin,
chr end 
)
static

Definition at line 1275 of file regexec.c.

References assert, cdissect(), subre::child, subre::cnfa, DUPINF, vars::err, subre::flags, FREE, getsubdfa(), i, subre::id, ISERR, LOFF, MALLOC, subre::max, MDEBUG, subre::min, cnfa::nstates, subre::op, REG_ESPACE, REG_NOMATCH, REG_OKAY, SHORTER, shortest(), and zaptreesubs().

Referenced by cdissect().

1279 {
1280  struct dfa *d;
1281  chr **endpts;
1282  chr *limit;
1283  int min_matches;
1284  size_t max_matches;
1285  int nverified;
1286  int k;
1287  int i;
1288  int er;
1289 
1290  assert(t->op == '*');
1291  assert(t->child != NULL && t->child->cnfa.nstates > 0);
1292  assert(t->child->flags & SHORTER);
1293  assert(begin <= end);
1294 
1295  MDEBUG(("%d: creviterdissect %ld-%ld\n", t->id, LOFF(begin), LOFF(end)));
1296 
1297  /*
1298  * If zero matches are allowed, and target string is empty, just declare
1299  * victory. OTOH, if target string isn't empty, zero matches can't work
1300  * so we pretend the min is 1.
1301  */
1302  min_matches = t->min;
1303  if (min_matches <= 0)
1304  {
1305  if (begin == end)
1306  {
1307  MDEBUG(("%d: allowing zero matches\n", t->id));
1308  return REG_OKAY;
1309  }
1310  min_matches = 1;
1311  }
1312 
1313  /*
1314  * We need workspace to track the endpoints of each sub-match. Normally
1315  * we consider only nonzero-length sub-matches, so there can be at most
1316  * end-begin of them. However, if min is larger than that, we will also
1317  * consider zero-length sub-matches in order to find enough matches.
1318  *
1319  * For convenience, endpts[0] contains the "begin" pointer and we store
1320  * sub-match endpoints in endpts[1..max_matches].
1321  */
1322  max_matches = end - begin;
1323  if (max_matches > t->max && t->max != DUPINF)
1324  max_matches = t->max;
1325  if (max_matches < min_matches)
1326  max_matches = min_matches;
1327  endpts = (chr **) MALLOC((max_matches + 1) * sizeof(chr *));
1328  if (endpts == NULL)
1329  return REG_ESPACE;
1330  endpts[0] = begin;
1331 
1332  d = getsubdfa(v, t->child);
1333  if (ISERR())
1334  {
1335  FREE(endpts);
1336  return v->err;
1337  }
1338 
1339  /*
1340  * Our strategy is to first find a set of sub-match endpoints that are
1341  * valid according to the child node's DFA, and then recursively dissect
1342  * each sub-match to confirm validity. If any validity check fails,
1343  * backtrack the last sub-match and try again. And, when we next try for
1344  * a validity check, we need not recheck any successfully verified
1345  * sub-matches that we didn't move the endpoints of. nverified remembers
1346  * how many sub-matches are currently known okay.
1347  */
1348 
1349  /* initialize to consider first sub-match */
1350  nverified = 0;
1351  k = 1;
1352  limit = begin;
1353 
1354  /* iterate until satisfaction or failure */
1355  while (k > 0)
1356  {
1357  /* disallow zero-length match unless necessary to achieve min */
1358  if (limit == endpts[k - 1] &&
1359  limit != end &&
1360  (k >= min_matches || min_matches - k < end - limit))
1361  limit++;
1362 
1363  /* if this is the last allowed sub-match, it must reach to the end */
1364  if (k >= max_matches)
1365  limit = end;
1366 
1367  /* try to find an endpoint for the k'th sub-match */
1368  endpts[k] = shortest(v, d, endpts[k - 1], limit, end,
1369  (chr **) NULL, (int *) NULL);
1370  if (ISERR())
1371  {
1372  FREE(endpts);
1373  return v->err;
1374  }
1375  if (endpts[k] == NULL)
1376  {
1377  /* no match possible, so see if we can lengthen previous one */
1378  k--;
1379  goto backtrack;
1380  }
1381  MDEBUG(("%d: working endpoint %d: %ld\n",
1382  t->id, k, LOFF(endpts[k])));
1383 
1384  /* k'th sub-match can no longer be considered verified */
1385  if (nverified >= k)
1386  nverified = k - 1;
1387 
1388  if (endpts[k] != end)
1389  {
1390  /* haven't reached end yet, try another iteration if allowed */
1391  if (k >= max_matches)
1392  {
1393  /* must try to lengthen some previous match */
1394  k--;
1395  goto backtrack;
1396  }
1397 
1398  k++;
1399  limit = endpts[k - 1];
1400  continue;
1401  }
1402 
1403  /*
1404  * We've identified a way to divide the string into k sub-matches that
1405  * works so far as the child DFA can tell. If k is an allowed number
1406  * of matches, start the slow part: recurse to verify each sub-match.
1407  * We always have k <= max_matches, needn't check that.
1408  */
1409  if (k < min_matches)
1410  goto backtrack;
1411 
1412  MDEBUG(("%d: verifying %d..%d\n", t->id, nverified + 1, k));
1413 
1414  for (i = nverified + 1; i <= k; i++)
1415  {
1416  zaptreesubs(v, t->child);
1417  er = cdissect(v, t->child, endpts[i - 1], endpts[i]);
1418  if (er == REG_OKAY)
1419  {
1420  nverified = i;
1421  continue;
1422  }
1423  if (er == REG_NOMATCH)
1424  break;
1425  /* oops, something failed */
1426  FREE(endpts);
1427  return er;
1428  }
1429 
1430  if (i > k)
1431  {
1432  /* satisfaction */
1433  MDEBUG(("%d: successful\n", t->id));
1434  FREE(endpts);
1435  return REG_OKAY;
1436  }
1437 
1438  /* match failed to verify, so backtrack */
1439 
1440 backtrack:
1441 
1442  /*
1443  * Must consider longer versions of the current sub-match.
1444  */
1445  while (k > 0)
1446  {
1447  if (endpts[k] < end)
1448  {
1449  limit = endpts[k] + 1;
1450  /* break out of backtrack loop, continue the outer one */
1451  break;
1452  }
1453  /* can't lengthen k'th sub-match any more, consider previous one */
1454  k--;
1455  }
1456  }
1457 
1458  /* all possibilities exhausted */
1459  MDEBUG(("%d: failed\n", t->id));
1460  FREE(endpts);
1461  return REG_NOMATCH;
1462 }
static chr * shortest(struct vars *, struct dfa *, chr *, chr *, chr *, chr **, int *)
struct subre * child
Definition: regguts.h:495
#define REG_ESPACE
Definition: regex.h:151
char op
Definition: regguts.h:473
#define LOFF(p)
Definition: regexec.c:132
short min
Definition: regguts.h:493
#define FREE(ptr)
Definition: cryptohash.c:37
#define MDEBUG(arglist)
Definition: regguts.h:117
#define MALLOC(n)
Definition: regcustom.h:60
Definition: regexec.c:63
int nstates
Definition: regguts.h:403
int id
Definition: regguts.h:490
pg_wchar chr
Definition: regcustom.h:66
#define REG_OKAY
Definition: regex.h:139
static void zaptreesubs(struct vars *, struct subre *)
Definition: regexec.c:655
static struct dfa * getsubdfa(struct vars *, struct subre *)
Definition: regexec.c:346
char flags
Definition: regguts.h:474
#define assert(TEST)
Definition: imath.c:73
int err
Definition: regcomp.c:243
#define DUPINF
Definition: regguts.h:94
short max
Definition: regguts.h:494
static int cdissect(struct vars *, struct subre *, chr *, chr *)
Definition: regexec.c:710
struct cnfa cnfa
Definition: regguts.h:499
#define SHORTER
Definition: regguts.h:476
int i
#define REG_NOMATCH
Definition: regex.h:140
#define ISERR()
Definition: regexec.c:127

◆ dfa_backref()

static chr* dfa_backref ( struct vars ,
struct dfa ,
chr ,
chr ,
chr ,
bool   
)
static

◆ find()

static int find ( struct vars v,
struct cnfa cnfa,
struct colormap cm 
)
static

Definition at line 395 of file regexec.c.

References assert, cdissect(), guts::cflags, close, vars::details, vars::dfa1, vars::err, subre::flags, freedfa(), vars::g, ISERR, LOFF, longest(), MDEBUG, newdfa(), vars::nmatch, NOERR, OFF, vars::pmatch, REG_EXPECT, REG_NOMATCH, REG_OKAY, regmatch_t::rm_eo, rm_detail_t::rm_extend, regmatch_t::rm_so, guts::search, vars::search_start, SHORTER, shortest(), vars::start, vars::stop, guts::tree, and zapallsubs().

Referenced by NIAddAffix(), NIImportAffixes(), NIImportOOAffixes(), parse_affentry(), pg_regexec(), and testdelete().

398 {
399  struct dfa *s;
400  struct dfa *d;
401  chr *begin;
402  chr *end = NULL;
403  chr *cold;
404  chr *open; /* open and close of range of possible starts */
405  chr *close;
406  int hitend;
407  int shorter = (v->g->tree->flags & SHORTER) ? 1 : 0;
408 
409  /* first, a shot with the search RE */
410  s = newdfa(v, &v->g->search, cm, &v->dfa1);
411  assert(!(ISERR() && s != NULL));
412  NOERR();
413  MDEBUG(("\nsearch at %ld\n", LOFF(v->start)));
414  cold = NULL;
415  close = shortest(v, s, v->search_start, v->search_start, v->stop,
416  &cold, (int *) NULL);
417  freedfa(s);
418  NOERR();
419  if (v->g->cflags & REG_EXPECT)
420  {
421  assert(v->details != NULL);
422  if (cold != NULL)
423  v->details->rm_extend.rm_so = OFF(cold);
424  else
425  v->details->rm_extend.rm_so = OFF(v->stop);
426  v->details->rm_extend.rm_eo = OFF(v->stop); /* unknown */
427  }
428  if (close == NULL) /* not found */
429  return REG_NOMATCH;
430  if (v->nmatch == 0) /* found, don't need exact location */
431  return REG_OKAY;
432 
433  /* find starting point and match */
434  assert(cold != NULL);
435  open = cold;
436  cold = NULL;
437  MDEBUG(("between %ld and %ld\n", LOFF(open), LOFF(close)));
438  d = newdfa(v, cnfa, cm, &v->dfa1);
439  assert(!(ISERR() && d != NULL));
440  NOERR();
441  for (begin = open; begin <= close; begin++)
442  {
443  MDEBUG(("\nfind trying at %ld\n", LOFF(begin)));
444  if (shorter)
445  end = shortest(v, d, begin, begin, v->stop,
446  (chr **) NULL, &hitend);
447  else
448  end = longest(v, d, begin, v->stop, &hitend);
449  if (ISERR())
450  {
451  freedfa(d);
452  return v->err;
453  }
454  if (hitend && cold == NULL)
455  cold = begin;
456  if (end != NULL)
457  break; /* NOTE BREAK OUT */
458  }
459  assert(end != NULL); /* search RE succeeded so loop should */
460  freedfa(d);
461 
462  /* and pin down details */
463  assert(v->nmatch > 0);
464  v->pmatch[0].rm_so = OFF(begin);
465  v->pmatch[0].rm_eo = OFF(end);
466  if (v->g->cflags & REG_EXPECT)
467  {
468  if (cold != NULL)
469  v->details->rm_extend.rm_so = OFF(cold);
470  else
471  v->details->rm_extend.rm_so = OFF(v->stop);
472  v->details->rm_extend.rm_eo = OFF(v->stop); /* unknown */
473  }
474  if (v->nmatch == 1) /* no need for submatches */
475  return REG_OKAY;
476 
477  /* find submatches */
478  zapallsubs(v->pmatch, v->nmatch);
479  return cdissect(v, v->g->tree, begin, end);
480 }
static chr * shortest(struct vars *, struct dfa *, chr *, chr *, chr *, chr **, int *)
#define REG_EXPECT
Definition: regex.h:115
struct subre * tree
Definition: regguts.h:533
regoff_t rm_so
Definition: regex.h:87
#define LOFF(p)
Definition: regexec.c:132
static struct dfa * newdfa(struct vars *, struct cnfa *, struct colormap *, struct smalldfa *)
static chr * longest(struct vars *, struct dfa *, chr *, chr *, int *)
rm_detail_t * details
Definition: regexec.c:113
#define MDEBUG(arglist)
Definition: regguts.h:117
chr * start
Definition: regexec.c:114
regoff_t rm_eo
Definition: regex.h:88
Definition: regexec.c:63
chr * search_start
Definition: regexec.c:115
struct cnfa search
Definition: regguts.h:534
pg_wchar chr
Definition: regcustom.h:66
#define REG_OKAY
Definition: regex.h:139
char flags
Definition: regguts.h:474
#define assert(TEST)
Definition: imath.c:73
struct guts * g
Definition: regexec.c:109
int err
Definition: regcomp.c:243
regmatch_t * pmatch
Definition: regexec.c:112
#define OFF(p)
Definition: regexec.c:131
static void freedfa(struct dfa *)
static int cdissect(struct vars *, struct subre *, chr *, chr *)
Definition: regexec.c:710
size_t nmatch
Definition: regexec.c:111
static void zapallsubs(regmatch_t *, size_t)
Definition: regexec.c:639
regmatch_t rm_extend
Definition: regex.h:94
const chr * stop
Definition: regcomp.c:242
int cflags
Definition: regguts.h:530
#define SHORTER
Definition: regguts.h:476
#define REG_NOMATCH
Definition: regex.h:140
#define NOERR()
Definition: regexec.c:130
#define close(a)
Definition: win32.h:12
struct smalldfa dfa1
Definition: regexec.c:122
#define ISERR()
Definition: regexec.c:127

◆ freedfa()

static void freedfa ( struct dfa )
static

Referenced by cfind(), find(), and pg_regexec().

◆ getladfa()

static struct dfa * getladfa ( struct vars v,
int  n 
)
static

Definition at line 374 of file regexec.c.

References assert, guts::cmap, subre::cnfa, DOMALLOC, vars::g, ISERR, guts::lacons, vars::ladfas, and newdfa().

Referenced by lacon().

376 {
377  assert(n > 0 && n < v->g->nlacons && v->g->lacons != NULL);
378 
379  if (v->ladfas[n] == NULL)
380  {
381  struct subre *sub = &v->g->lacons[n];
382 
383  v->ladfas[n] = newdfa(v, &sub->cnfa, &v->g->cmap, DOMALLOC);
384  if (ISERR())
385  return NULL;
386  /* a LACON can't contain a backref, so nothing else to do */
387  }
388  return v->ladfas[n];
389 }
struct dfa ** ladfas
Definition: regexec.c:119
struct subre * lacons
Definition: regguts.h:538
static struct dfa * newdfa(struct vars *, struct cnfa *, struct colormap *, struct smalldfa *)
#define DOMALLOC
Definition: regexec.c:101
struct colormap cmap
Definition: regguts.h:536
#define assert(TEST)
Definition: imath.c:73
struct guts * g
Definition: regexec.c:109
Definition: regguts.h:471
struct cnfa cnfa
Definition: regguts.h:499
#define ISERR()
Definition: regexec.c:127

◆ getsubdfa()

static struct dfa * getsubdfa ( struct vars v,
struct subre t 
)
static

Definition at line 346 of file regexec.c.

References dfa::backmax, dfa::backmin, dfa::backno, subre::backno, guts::cmap, subre::cnfa, DOMALLOC, vars::g, subre::id, ISERR, subre::max, subre::min, newdfa(), subre::op, and vars::subdfas.

Referenced by caltdissect(), ccondissect(), citerdissect(), crevcondissect(), and creviterdissect().

348 {
349  struct dfa *d = v->subdfas[t->id];
350 
351  if (d == NULL)
352  {
353  d = newdfa(v, &t->cnfa, &v->g->cmap, DOMALLOC);
354  if (ISERR())
355  return NULL;
356  /* set up additional info if this is a backref node */
357  if (t->op == 'b')
358  {
359  d->backno = t->backno;
360  d->backmin = t->min;
361  d->backmax = t->max;
362  }
363  v->subdfas[t->id] = d;
364  }
365  return d;
366 }
short backmin
Definition: regexec.c:81
char op
Definition: regguts.h:473
short min
Definition: regguts.h:493
static struct dfa * newdfa(struct vars *, struct cnfa *, struct colormap *, struct smalldfa *)
Definition: regexec.c:63
int id
Definition: regguts.h:490
#define DOMALLOC
Definition: regexec.c:101
struct colormap cmap
Definition: regguts.h:536
struct guts * g
Definition: regexec.c:109
int backno
Definition: regguts.h:492
struct dfa ** subdfas
Definition: regexec.c:118
int backno
Definition: regexec.c:80
short max
Definition: regguts.h:494
short backmax
Definition: regexec.c:82
struct cnfa cnfa
Definition: regguts.h:499
#define ISERR()
Definition: regexec.c:127

◆ getvacant()

static struct sset* getvacant ( struct vars ,
struct dfa ,
chr ,
chr  
)
static

◆ hash()

static unsigned hash ( unsigned *  ,
int   
)
static

◆ initialize()

static struct sset* initialize ( struct vars ,
struct dfa ,
chr  
)
static

◆ lacon()

static int lacon ( struct vars ,
struct cnfa ,
chr ,
color   
)
static

◆ lastcold()

static chr* lastcold ( struct vars ,
struct dfa  
)
static

◆ longest()

static chr* longest ( struct vars ,
struct dfa ,
chr ,
chr ,
int *   
)
static

◆ matchuntil()

static int matchuntil ( struct vars ,
struct dfa ,
chr ,
struct sset **  ,
chr **   
)
static

◆ miss()

static struct sset* miss ( struct vars ,
struct dfa ,
struct sset ,
color  ,
chr ,
chr  
)
static

◆ newdfa()

static struct dfa* newdfa ( struct vars ,
struct cnfa ,
struct colormap ,
struct smalldfa  
)
static

Referenced by cfind(), find(), getladfa(), and getsubdfa().

◆ pg_regexec()

int pg_regexec ( regex_t re,
const chr string,
size_t  len,
size_t  search_start,
rm_detail_t details,
size_t  nmatch,
regmatch_t  pmatch[],
int  flags 
)

Definition at line 176 of file regexec.c.

References assert, cfind(), guts::cflags, cleanup(), guts::cmap, subre::cnfa, vars::details, vars::eflags, vars::err, find(), FREE, freedfa(), vars::g, i, guts::info, vars::ladfas, vars::lblastcp, vars::lblastcss, LOCALDFAS, LOCALMAT, MALLOC, guts::nlacons, vars::nmatch, guts::nsub, guts::ntree, pg_set_regex_collation(), vars::pmatch, vars::re, regex_t::re_collation, regex_t::re_csize, regex_t::re_guts, regex_t::re_magic, REG_ESPACE, REG_EXPECT, REG_FTRACE, REG_INVARG, REG_MIXED, REG_MTRACE, REG_NOMATCH, REG_NOSUB, REG_OKAY, REG_UBACKREF, REG_UIMPOSSIBLE, REMAGIC, vars::search_start, vars::start, generate_unaccent_rules::stdout, vars::stop, vars::subdfas, guts::tree, VS, and zapallsubs().

Referenced by check_ident_usermap(), CheckAffix(), RE_wchar_execute(), replace_text_regexp(), and test_re_execute().

184 {
185  struct vars var;
186  register struct vars *v = &var;
187  int st;
188  size_t n;
189  size_t i;
190  int backref;
191 
192 #define LOCALMAT 20
193  regmatch_t mat[LOCALMAT];
194 
195 #define LOCALDFAS 40
196  struct dfa *subdfas[LOCALDFAS];
197 
198  /* sanity checks */
199  if (re == NULL || string == NULL || re->re_magic != REMAGIC)
200  return REG_INVARG;
201  if (re->re_csize != sizeof(chr))
202  return REG_MIXED;
203 
204  /* Initialize locale-dependent support */
206 
207  /* setup */
208  v->re = re;
209  v->g = (struct guts *) re->re_guts;
210  if ((v->g->cflags & REG_EXPECT) && details == NULL)
211  return REG_INVARG;
212  if (v->g->info & REG_UIMPOSSIBLE)
213  return REG_NOMATCH;
214  backref = (v->g->info & REG_UBACKREF) ? 1 : 0;
215  v->eflags = flags;
216  if (v->g->cflags & REG_NOSUB)
217  nmatch = 0; /* override client */
218  v->nmatch = nmatch;
219  if (backref)
220  {
221  /* need work area */
222  if (v->g->nsub + 1 <= LOCALMAT)
223  v->pmatch = mat;
224  else
225  v->pmatch = (regmatch_t *) MALLOC((v->g->nsub + 1) *
226  sizeof(regmatch_t));
227  if (v->pmatch == NULL)
228  return REG_ESPACE;
229  v->nmatch = v->g->nsub + 1;
230  }
231  else
232  v->pmatch = pmatch;
233  v->details = details;
234  v->start = (chr *) string;
235  v->search_start = (chr *) string + search_start;
236  v->stop = (chr *) string + len;
237  v->err = 0;
238  v->subdfas = NULL;
239  v->ladfas = NULL;
240  v->lblastcss = NULL;
241  v->lblastcp = NULL;
242  /* below this point, "goto cleanup" will behave sanely */
243 
244  assert(v->g->ntree >= 0);
245  n = (size_t) v->g->ntree;
246  if (n <= LOCALDFAS)
247  v->subdfas = subdfas;
248  else
249  {
250  v->subdfas = (struct dfa **) MALLOC(n * sizeof(struct dfa *));
251  if (v->subdfas == NULL)
252  {
253  st = REG_ESPACE;
254  goto cleanup;
255  }
256  }
257  for (i = 0; i < n; i++)
258  v->subdfas[i] = NULL;
259 
260  assert(v->g->nlacons >= 0);
261  n = (size_t) v->g->nlacons;
262  if (n > 0)
263  {
264  v->ladfas = (struct dfa **) MALLOC(n * sizeof(struct dfa *));
265  if (v->ladfas == NULL)
266  {
267  st = REG_ESPACE;
268  goto cleanup;
269  }
270  for (i = 0; i < n; i++)
271  v->ladfas[i] = NULL;
272  v->lblastcss = (struct sset **) MALLOC(n * sizeof(struct sset *));
273  v->lblastcp = (chr **) MALLOC(n * sizeof(chr *));
274  if (v->lblastcss == NULL || v->lblastcp == NULL)
275  {
276  st = REG_ESPACE;
277  goto cleanup;
278  }
279  for (i = 0; i < n; i++)
280  {
281  v->lblastcss[i] = NULL;
282  v->lblastcp[i] = NULL;
283  }
284  }
285 
286  /* do it */
287  assert(v->g->tree != NULL);
288  if (backref)
289  st = cfind(v, &v->g->tree->cnfa, &v->g->cmap);
290  else
291  st = find(v, &v->g->tree->cnfa, &v->g->cmap);
292 
293  /* copy (portion of) match vector over if necessary */
294  if (st == REG_OKAY && v->pmatch != pmatch && nmatch > 0)
295  {
296  zapallsubs(pmatch, nmatch);
297  n = (nmatch < v->nmatch) ? nmatch : v->nmatch;
298  memcpy(VS(pmatch), VS(v->pmatch), n * sizeof(regmatch_t));
299  }
300 
301  /* clean up */
302 cleanup:
303  if (v->pmatch != pmatch && v->pmatch != mat)
304  FREE(v->pmatch);
305  if (v->subdfas != NULL)
306  {
307  n = (size_t) v->g->ntree;
308  for (i = 0; i < n; i++)
309  {
310  if (v->subdfas[i] != NULL)
311  freedfa(v->subdfas[i]);
312  }
313  if (v->subdfas != subdfas)
314  FREE(v->subdfas);
315  }
316  if (v->ladfas != NULL)
317  {
318  n = (size_t) v->g->nlacons;
319  for (i = 0; i < n; i++)
320  {
321  if (v->ladfas[i] != NULL)
322  freedfa(v->ladfas[i]);
323  }
324  FREE(v->ladfas);
325  }
326  if (v->lblastcss != NULL)
327  FREE(v->lblastcss);
328  if (v->lblastcp != NULL)
329  FREE(v->lblastcp);
330 
331 #ifdef REG_DEBUG
332  if (v->eflags & (REG_FTRACE | REG_MTRACE))
333  fflush(stdout);
334 #endif
335 
336  return st;
337 }
#define REG_UIMPOSSIBLE
Definition: regex.h:74
struct dfa ** ladfas
Definition: regexec.c:119
static int find(struct vars *, struct cnfa *, struct colormap *)
Definition: regexec.c:395
#define REG_EXPECT
Definition: regex.h:115
struct subre * tree
Definition: regguts.h:533
#define REG_ESPACE
Definition: regex.h:151
int nlacons
Definition: regguts.h:539
#define REMAGIC
Definition: regguts.h:96
#define REG_FTRACE
Definition: regex.h:129
#define FREE(ptr)
Definition: cryptohash.c:37
rm_detail_t * details
Definition: regexec.c:113
int re_csize
Definition: regex.h:76
#define REG_MTRACE
Definition: regex.h:130
chr * start
Definition: regexec.c:114
#define MALLOC(n)
Definition: regcustom.h:60
Definition: regexec.c:63
struct sset ** lblastcss
Definition: regexec.c:120
chr * search_start
Definition: regexec.c:115
int re_magic
Definition: regex.h:57
pg_wchar chr
Definition: regcustom.h:66
#define REG_OKAY
Definition: regex.h:139
regex_t * re
Definition: regcomp.c:240
#define REG_INVARG
Definition: regex.h:154
struct colormap cmap
Definition: regguts.h:536
#define assert(TEST)
Definition: imath.c:73
struct guts * g
Definition: regexec.c:109
int err
Definition: regcomp.c:243
#define LOCALMAT
int eflags
Definition: regexec.c:110
static void cleanup(void)
Definition: bootstrap.c:862
Definition: regguts.h:526
int ntree
Definition: regguts.h:535
#define REG_UBACKREF
Definition: regex.h:60
#define LOCALDFAS
struct dfa ** subdfas
Definition: regexec.c:118
void pg_set_regex_collation(Oid collation)
regmatch_t * pmatch
Definition: regexec.c:112
#define REG_NOSUB
Definition: regex.h:109
static void freedfa(struct dfa *)
#define VS(x)
Definition: regguts.h:61
long info
Definition: regguts.h:531
struct cnfa cnfa
Definition: regguts.h:499
size_t nmatch
Definition: regexec.c:111
static void zapallsubs(regmatch_t *, size_t)
Definition: regexec.c:639
const chr * stop
Definition: regcomp.c:242
char * re_guts
Definition: regex.h:80
int cflags
Definition: regguts.h:530
Definition: regexec.c:45
size_t nsub
Definition: regguts.h:532
int i
#define REG_NOMATCH
Definition: regex.h:140
chr ** lblastcp
Definition: regexec.c:121
#define REG_MIXED
Definition: regex.h:155
Definition: regcomp.c:238
Oid re_collation
Definition: regex.h:78
static int cfind(struct vars *, struct cnfa *, struct colormap *)
Definition: regexec.c:486

◆ pickss()

static struct sset* pickss ( struct vars ,
struct dfa ,
chr ,
chr  
)
static

◆ shortest()

static chr* shortest ( struct vars ,
struct dfa ,
chr ,
chr ,
chr ,
chr **  ,
int *   
)
static

◆ subset()

static void subset ( struct vars v,
struct subre sub,
chr begin,
chr end 
)
static

Definition at line 678 of file regexec.c.

References assert, subre::capno, subre::id, LOFF, MDEBUG, vars::nmatch, OFF, vars::pmatch, regmatch_t::rm_eo, and regmatch_t::rm_so.

Referenced by cdissect().

682 {
683  int n = sub->capno;
684 
685  assert(n > 0);
686  if ((size_t) n >= v->nmatch)
687  return;
688 
689  MDEBUG(("%d: setting %d = %ld-%ld\n", sub->id, n, LOFF(begin), LOFF(end)));
690  v->pmatch[n].rm_so = OFF(begin);
691  v->pmatch[n].rm_eo = OFF(end);
692 }
int capno
Definition: regguts.h:491
regoff_t rm_so
Definition: regex.h:87
#define LOFF(p)
Definition: regexec.c:132
#define MDEBUG(arglist)
Definition: regguts.h:117
regoff_t rm_eo
Definition: regex.h:88
int id
Definition: regguts.h:490
#define assert(TEST)
Definition: imath.c:73
regmatch_t * pmatch
Definition: regexec.c:112
#define OFF(p)
Definition: regexec.c:131
size_t nmatch
Definition: regexec.c:111

◆ zapallsubs()

static void zapallsubs ( regmatch_t p,
size_t  n 
)
static

Definition at line 639 of file regexec.c.

References i, regmatch_t::rm_eo, and regmatch_t::rm_so.

Referenced by cfindloop(), find(), and pg_regexec().

641 {
642  size_t i;
643 
644  for (i = n - 1; i > 0; i--)
645  {
646  p[i].rm_so = -1;
647  p[i].rm_eo = -1;
648  }
649 }
regoff_t rm_so
Definition: regex.h:87
regoff_t rm_eo
Definition: regex.h:88
int i

◆ zaptreesubs()

static void zaptreesubs ( struct vars v,
struct subre t 
)
static

Definition at line 655 of file regexec.c.

References subre::capno, subre::child, vars::pmatch, regmatch_t::rm_eo, regmatch_t::rm_so, and subre::sibling.

Referenced by ccondissect(), citerdissect(), crevcondissect(), and creviterdissect().

657 {
658  int n = t->capno;
659  struct subre *t2;
660 
661  if (n > 0)
662  {
663  if ((size_t) n < v->nmatch)
664  {
665  v->pmatch[n].rm_so = -1;
666  v->pmatch[n].rm_eo = -1;
667  }
668  }
669 
670  for (t2 = t->child; t2 != NULL; t2 = t2->sibling)
671  zaptreesubs(v, t2);
672 }
struct subre * child
Definition: regguts.h:495
int capno
Definition: regguts.h:491
regoff_t rm_so
Definition: regex.h:87
regoff_t rm_eo
Definition: regex.h:88
static void zaptreesubs(struct vars *, struct subre *)
Definition: regexec.c:655
struct subre * sibling
Definition: regguts.h:496
regmatch_t * pmatch
Definition: regexec.c:112
Definition: regguts.h:471