PostgreSQL Source Code  git master
regexec.c File Reference
#include "regex/regguts.h"
#include "rege_dfa.c"
Include dependency graph for regexec.c:

Go to the source code of this file.

Data Structures

struct  arcp
 
struct  sset
 
struct  dfa
 
struct  smalldfa
 
struct  vars
 

Macros

#define HASH(bv, nw)   (((nw) == 1) ? *(bv) : hash(bv, nw))
 
#define HIT(h, bv, ss, nw)
 
#define STARTER   01 /* the initial state set */
 
#define POSTSTATE   02 /* includes the goal state */
 
#define LOCKED   04 /* locked in cache */
 
#define NOPROGRESS   010 /* zero-progress state set */
 
#define WORK   1 /* number of work bitvectors needed */
 
#define FEWSTATES   20 /* must be less than UBITS */
 
#define FEWCOLORS   15
 
#define DOMALLOC   ((struct smalldfa *)NULL) /* force malloc */
 
#define VISERR(vv)   ((vv)->err != 0) /* have we seen an error yet? */
 
#define ISERR()   VISERR(v)
 
#define VERR(vv, e)   ((vv)->err = ((vv)->err ? (vv)->err : (e)))
 
#define ERR(e)   VERR(v, e) /* record an error */
 
#define NOERR()   {if (ISERR()) return v->err;} /* if error seen, return it */
 
#define OFF(p)   ((p) - v->start)
 
#define LOFF(p)   ((long)OFF(p))
 
#define LOCALMAT   20
 
#define LOCALDFAS   40
 

Functions

static struct dfagetsubdfa (struct vars *v, struct subre *t)
 
static struct dfagetladfa (struct vars *v, int n)
 
static int find (struct vars *v, struct cnfa *cnfa, struct colormap *cm)
 
static int cfind (struct vars *v, struct cnfa *cnfa, struct colormap *cm)
 
static int cfindloop (struct vars *v, struct cnfa *cnfa, struct colormap *cm, struct dfa *d, struct dfa *s, chr **coldp)
 
static void zapallsubs (regmatch_t *p, size_t n)
 
static void zaptreesubs (struct vars *v, struct subre *t)
 
static void subset (struct vars *v, struct subre *sub, chr *begin, chr *end)
 
static int cdissect (struct vars *v, struct subre *t, chr *begin, chr *end)
 
static int ccondissect (struct vars *v, struct subre *t, chr *begin, chr *end)
 
static int crevcondissect (struct vars *v, struct subre *t, chr *begin, chr *end)
 
static int cbrdissect (struct vars *v, struct subre *t, chr *begin, chr *end)
 
static int caltdissect (struct vars *v, struct subre *t, chr *begin, chr *end)
 
static int citerdissect (struct vars *v, struct subre *t, chr *begin, chr *end)
 
static int creviterdissect (struct vars *v, struct subre *t, chr *begin, chr *end)
 
static chrlongest (struct vars *v, struct dfa *d, chr *start, chr *stop, int *hitstopp)
 
static chrshortest (struct vars *v, struct dfa *d, chr *start, chr *min, chr *max, chr **coldp, int *hitstopp)
 
static int matchuntil (struct vars *v, struct dfa *d, chr *probe, struct sset **lastcss, chr **lastcp)
 
static chrdfa_backref (struct vars *v, struct dfa *d, chr *start, chr *min, chr *max, bool shortest)
 
static chrlastcold (struct vars *v, struct dfa *d)
 
static struct dfanewdfa (struct vars *v, struct cnfa *cnfa, struct colormap *cm, struct smalldfa *sml)
 
static void freedfa (struct dfa *d)
 
static unsigned hash (unsigned *uv, int n)
 
static struct ssetinitialize (struct vars *v, struct dfa *d, chr *start)
 
static struct ssetmiss (struct vars *v, struct dfa *d, struct sset *css, color co, chr *cp, chr *start)
 
static int lacon (struct vars *v, struct cnfa *pcnfa, chr *cp, color co)
 
static struct ssetgetvacant (struct vars *v, struct dfa *d, chr *cp, chr *start)
 
static struct ssetpickss (struct vars *v, struct dfa *d, chr *cp, chr *start)
 
int pg_regexec (regex_t *re, const chr *string, size_t len, size_t search_start, rm_detail_t *details, size_t nmatch, regmatch_t pmatch[], int flags)
 

Macro Definition Documentation

◆ DOMALLOC

#define DOMALLOC   ((struct smalldfa *)NULL) /* force malloc */

Definition at line 101 of file regexec.c.

◆ ERR

#define ERR (   e)    VERR(v, e) /* record an error */

Definition at line 129 of file regexec.c.

◆ FEWCOLORS

#define FEWCOLORS   15

Definition at line 91 of file regexec.c.

◆ FEWSTATES

#define FEWSTATES   20 /* must be less than UBITS */

Definition at line 90 of file regexec.c.

◆ HASH

#define HASH (   bv,
  nw 
)    (((nw) == 1) ? *(bv) : hash(bv, nw))

Definition at line 49 of file regexec.c.

◆ HIT

#define HIT (   h,
  bv,
  ss,
  nw 
)
Value:
((ss)->hash == (h) && ((nw) == 1 || \
memcmp(VS(bv), VS((ss)->states), (nw)*sizeof(unsigned)) == 0))
static unsigned hash(unsigned *uv, int n)
#define VS(x)
Definition: regguts.h:61

Definition at line 50 of file regexec.c.

◆ ISERR

#define ISERR ( )    VISERR(v)

Definition at line 127 of file regexec.c.

◆ LOCALDFAS

#define LOCALDFAS   40

◆ LOCALMAT

#define LOCALMAT   20

◆ LOCKED

#define LOCKED   04 /* locked in cache */

Definition at line 55 of file regexec.c.

◆ LOFF

#define LOFF (   p)    ((long)OFF(p))

Definition at line 132 of file regexec.c.

◆ NOERR

#define NOERR ( )    {if (ISERR()) return v->err;} /* if error seen, return it */

Definition at line 130 of file regexec.c.

◆ NOPROGRESS

#define NOPROGRESS   010 /* zero-progress state set */

Definition at line 56 of file regexec.c.

◆ OFF

#define OFF (   p)    ((p) - v->start)

Definition at line 131 of file regexec.c.

◆ POSTSTATE

#define POSTSTATE   02 /* includes the goal state */

Definition at line 54 of file regexec.c.

◆ STARTER

#define STARTER   01 /* the initial state set */

Definition at line 53 of file regexec.c.

◆ VERR

#define VERR (   vv,
  e 
)    ((vv)->err = ((vv)->err ? (vv)->err : (e)))

Definition at line 128 of file regexec.c.

◆ VISERR

#define VISERR (   vv)    ((vv)->err != 0) /* have we seen an error yet? */

Definition at line 126 of file regexec.c.

◆ WORK

#define WORK   1 /* number of work bitvectors needed */

Definition at line 87 of file regexec.c.

Function Documentation

◆ caltdissect()

static int caltdissect ( struct vars v,
struct subre t,
chr begin,
chr end 
)
static

Definition at line 1076 of file regexec.c.

1080 {
1081  struct dfa *d;
1082  int er;
1083 
1084  assert(t->op == '|');
1085 
1086  t = t->child;
1087  /* there should be at least 2 alternatives */
1088  assert(t != NULL && t->sibling != NULL);
1089 
1090  while (t != NULL)
1091  {
1092  assert(t->cnfa.nstates > 0);
1093 
1094  MDEBUG(("%d: caltdissect %ld-%ld\n", t->id, LOFF(begin), LOFF(end)));
1095 
1096  d = getsubdfa(v, t);
1097  NOERR();
1098  if (longest(v, d, begin, end, (int *) NULL) == end)
1099  {
1100  MDEBUG(("%d: caltdissect matched\n", t->id));
1101  er = cdissect(v, t, begin, end);
1102  if (er != REG_NOMATCH)
1103  return er;
1104  }
1105  NOERR();
1106 
1107  t = t->sibling;
1108  }
1109 
1110  return REG_NOMATCH;
1111 }
#define assert(x)
Definition: regcustom.h:56
#define REG_NOMATCH
Definition: regex.h:138
#define LOFF(p)
Definition: regexec.c:132
#define NOERR()
Definition: regexec.c:130
static chr * longest(struct vars *v, struct dfa *d, chr *start, chr *stop, int *hitstopp)
static struct dfa * getsubdfa(struct vars *v, struct subre *t)
Definition: regexec.c:372
static int cdissect(struct vars *v, struct subre *t, chr *begin, chr *end)
Definition: regexec.c:756
#define MDEBUG(arglist)
Definition: regguts.h:122
int nstates
Definition: regguts.h:408
Definition: regexec.c:64
char op
Definition: regguts.h:478
struct subre * sibling
Definition: regguts.h:502
int id
Definition: regguts.h:496
struct cnfa cnfa
Definition: regguts.h:505
struct subre * child
Definition: regguts.h:501

References assert, cdissect(), subre::child, subre::cnfa, getsubdfa(), subre::id, LOFF, longest(), MDEBUG, NOERR, cnfa::nstates, subre::op, REG_NOMATCH, and subre::sibling.

Referenced by cdissect().

◆ cbrdissect()

static int cbrdissect ( struct vars v,
struct subre t,
chr begin,
chr end 
)
static

Definition at line 994 of file regexec.c.

998 {
999  int n = t->backno;
1000  size_t numreps;
1001  size_t tlen;
1002  size_t brlen;
1003  chr *brstring;
1004  chr *p;
1005  int min = t->min;
1006  int max = t->max;
1007 
1008  assert(t != NULL);
1009  assert(t->op == 'b');
1010  assert(n >= 0);
1011  assert((size_t) n < v->nmatch);
1012 
1013  MDEBUG(("%d: cbrdissect %d{%d-%d} %ld-%ld\n", t->id, n, min, max,
1014  LOFF(begin), LOFF(end)));
1015 
1016  /* get the backreferenced string */
1017  if (v->pmatch[n].rm_so == -1)
1018  return REG_NOMATCH;
1019  brstring = v->start + v->pmatch[n].rm_so;
1020  brlen = v->pmatch[n].rm_eo - v->pmatch[n].rm_so;
1021 
1022  /* special cases for zero-length strings */
1023  if (brlen == 0)
1024  {
1025  /*
1026  * matches only if target is zero length, but any number of
1027  * repetitions can be considered to be present
1028  */
1029  if (begin == end && min <= max)
1030  {
1031  MDEBUG(("%d: backref matched trivially\n", t->id));
1032  return REG_OKAY;
1033  }
1034  return REG_NOMATCH;
1035  }
1036  if (begin == end)
1037  {
1038  /* matches only if zero repetitions are okay */
1039  if (min == 0)
1040  {
1041  MDEBUG(("%d: backref matched trivially\n", t->id));
1042  return REG_OKAY;
1043  }
1044  return REG_NOMATCH;
1045  }
1046 
1047  /*
1048  * check target length to see if it could possibly be an allowed number of
1049  * repetitions of brstring
1050  */
1051  assert(end > begin);
1052  tlen = end - begin;
1053  if (tlen % brlen != 0)
1054  return REG_NOMATCH;
1055  numreps = tlen / brlen;
1056  if (numreps < min || (numreps > max && max != DUPINF))
1057  return REG_NOMATCH;
1058 
1059  /* okay, compare the actual string contents */
1060  p = begin;
1061  while (numreps-- > 0)
1062  {
1063  if ((*v->g->compare) (brstring, p, brlen) != 0)
1064  return REG_NOMATCH;
1065  p += brlen;
1066  }
1067 
1068  MDEBUG(("%d: backref matched\n", t->id));
1069  return REG_OKAY;
1070 }
pg_wchar chr
Definition: regcustom.h:59
#define REG_OKAY
Definition: regex.h:137
#define DUPINF
Definition: regguts.h:99
regoff_t rm_eo
Definition: regex.h:86
regoff_t rm_so
Definition: regex.h:85
int backno
Definition: regguts.h:498
short min
Definition: regguts.h:499
short max
Definition: regguts.h:500
struct guts * g
Definition: regexec.c:109
chr * start
Definition: regexec.c:114
regmatch_t * pmatch
Definition: regexec.c:112

References assert, subre::backno, DUPINF, vars::g, subre::id, LOFF, subre::max, MDEBUG, subre::min, subre::op, vars::pmatch, REG_NOMATCH, REG_OKAY, regmatch_t::rm_eo, regmatch_t::rm_so, and vars::start.

Referenced by cdissect().

◆ ccondissect()

static int ccondissect ( struct vars v,
struct subre t,
chr begin,
chr end 
)
static

Definition at line 829 of file regexec.c.

833 {
834  struct subre *left = t->child;
835  struct subre *right = left->sibling;
836  struct dfa *d;
837  struct dfa *d2;
838  chr *mid;
839  int er;
840 
841  assert(t->op == '.');
842  assert(left != NULL && left->cnfa.nstates > 0);
843  assert(right != NULL && right->cnfa.nstates > 0);
844  assert(right->sibling == NULL);
845  assert(!(left->flags & SHORTER));
846 
847  d = getsubdfa(v, left);
848  NOERR();
849  d2 = getsubdfa(v, right);
850  NOERR();
851  MDEBUG(("%d: ccondissect %ld-%ld\n", t->id, LOFF(begin), LOFF(end)));
852 
853  /* pick a tentative midpoint */
854  mid = longest(v, d, begin, end, (int *) NULL);
855  NOERR();
856  if (mid == NULL)
857  return REG_NOMATCH;
858  MDEBUG(("%d: tentative midpoint %ld\n", t->id, LOFF(mid)));
859 
860  /* iterate until satisfaction or failure */
861  for (;;)
862  {
863  /* try this midpoint on for size */
864  if (longest(v, d2, mid, end, (int *) NULL) == end)
865  {
866  er = cdissect(v, left, begin, mid);
867  if (er == REG_OKAY)
868  {
869  er = cdissect(v, right, mid, end);
870  if (er == REG_OKAY)
871  {
872  /* satisfaction */
873  MDEBUG(("%d: successful\n", t->id));
874  return REG_OKAY;
875  }
876  /* Reset left's matches (right should have done so itself) */
877  zaptreesubs(v, left);
878  }
879  if (er != REG_NOMATCH)
880  return er;
881  }
882  NOERR();
883 
884  /* that midpoint didn't work, find a new one */
885  if (mid == begin)
886  {
887  /* all possibilities exhausted */
888  MDEBUG(("%d: no midpoint\n", t->id));
889  return REG_NOMATCH;
890  }
891  mid = longest(v, d, begin, mid - 1, (int *) NULL);
892  NOERR();
893  if (mid == NULL)
894  {
895  /* failed to find a new one */
896  MDEBUG(("%d: failed midpoint\n", t->id));
897  return REG_NOMATCH;
898  }
899  MDEBUG(("%d: new midpoint %ld\n", t->id, LOFF(mid)));
900  }
901 
902  /* can't get here */
903  return REG_ASSERT;
904 }
#define REG_ASSERT
Definition: regex.h:151
static void zaptreesubs(struct vars *v, struct subre *t)
Definition: regexec.c:679
#define SHORTER
Definition: regguts.h:481
Definition: regguts.h:477
char flags
Definition: regguts.h:479

References assert, cdissect(), subre::child, subre::cnfa, subre::flags, getsubdfa(), subre::id, LOFF, longest(), MDEBUG, NOERR, cnfa::nstates, subre::op, REG_ASSERT, REG_NOMATCH, REG_OKAY, SHORTER, subre::sibling, and zaptreesubs().

Referenced by cdissect().

◆ cdissect()

static int cdissect ( struct vars v,
struct subre t,
chr begin,
chr end 
)
static

Definition at line 756 of file regexec.c.

760 {
761  int er;
762 
763  assert(t != NULL);
764  MDEBUG(("%d: cdissect %c %ld-%ld\n", t->id, t->op, LOFF(begin), LOFF(end)));
765 
766  /* handy place to check for operation cancel */
767  INTERRUPT(v->re);
768  /* ... and stack overrun */
769  if (STACK_TOO_DEEP(v->re))
770  return REG_ETOOBIG;
771 
772  switch (t->op)
773  {
774  case '=': /* terminal node */
775  assert(t->child == NULL);
776  er = REG_OKAY; /* no action, parent did the work */
777  break;
778  case 'b': /* back reference */
779  assert(t->child == NULL);
780  er = cbrdissect(v, t, begin, end);
781  break;
782  case '.': /* concatenation */
783  assert(t->child != NULL);
784  if (t->child->flags & SHORTER) /* reverse scan */
785  er = crevcondissect(v, t, begin, end);
786  else
787  er = ccondissect(v, t, begin, end);
788  break;
789  case '|': /* alternation */
790  assert(t->child != NULL);
791  er = caltdissect(v, t, begin, end);
792  break;
793  case '*': /* iteration */
794  assert(t->child != NULL);
795  if (t->child->flags & SHORTER) /* reverse scan */
796  er = creviterdissect(v, t, begin, end);
797  else
798  er = citerdissect(v, t, begin, end);
799  break;
800  case '(': /* no-op capture node */
801  assert(t->child != NULL);
802  er = cdissect(v, t->child, begin, end);
803  break;
804  default:
805  er = REG_ASSERT;
806  break;
807  }
808 
809  /*
810  * We should never have a match failure unless backrefs lurk below;
811  * otherwise, either caller failed to check the DFA, or there's some
812  * inconsistency between the DFA and the node's innards.
813  */
814  assert(er != REG_NOMATCH || (t->flags & BACKR));
815 
816  /*
817  * If this node is marked as capturing, save successful match's location.
818  */
819  if (t->capno > 0 && er == REG_OKAY)
820  subset(v, t, begin, end);
821 
822  return er;
823 }
#define INTERRUPT(re)
Definition: regcustom.h:55
#define REG_ETOOBIG
Definition: regex.h:155
static int citerdissect(struct vars *v, struct subre *t, chr *begin, chr *end)
Definition: regexec.c:1117
static int ccondissect(struct vars *v, struct subre *t, chr *begin, chr *end)
Definition: regexec.c:829
static int creviterdissect(struct vars *v, struct subre *t, chr *begin, chr *end)
Definition: regexec.c:1321
static int cbrdissect(struct vars *v, struct subre *t, chr *begin, chr *end)
Definition: regexec.c:994
static int crevcondissect(struct vars *v, struct subre *t, chr *begin, chr *end)
Definition: regexec.c:910
static int caltdissect(struct vars *v, struct subre *t, chr *begin, chr *end)
Definition: regexec.c:1076
static void subset(struct vars *v, struct subre *sub, chr *begin, chr *end)
Definition: regexec.c:702
#define BACKR
Definition: regguts.h:484
#define STACK_TOO_DEEP(re)
Definition: regguts.h:521
int capno
Definition: regguts.h:497
regex_t * re
Definition: regcomp.c:282

References assert, BACKR, subre::begin, caltdissect(), subre::capno, cbrdissect(), ccondissect(), subre::child, citerdissect(), crevcondissect(), creviterdissect(), subre::end, subre::flags, subre::id, INTERRUPT, LOFF, MDEBUG, subre::op, vars::re, REG_ASSERT, REG_ETOOBIG, REG_NOMATCH, REG_OKAY, SHORTER, STACK_TOO_DEEP, and subset().

Referenced by caltdissect(), ccondissect(), cfindloop(), citerdissect(), crevcondissect(), creviterdissect(), and find().

◆ cfind()

static int cfind ( struct vars v,
struct cnfa cnfa,
struct colormap cm 
)
static

Definition at line 509 of file regexec.c.

512 {
513  struct dfa *s;
514  struct dfa *d;
515  chr *cold;
516  int ret;
517 
518  s = newdfa(v, &v->g->search, cm, &v->dfa1);
519  if (s == NULL)
520  return v->err;
521  d = newdfa(v, cnfa, cm, &v->dfa2);
522  if (d == NULL)
523  {
524  freedfa(s);
525  return v->err;
526  }
527 
528  ret = cfindloop(v, cnfa, cm, d, s, &cold);
529 
530  freedfa(d);
531  freedfa(s);
532  NOERR();
533  if (v->g->cflags & REG_EXPECT)
534  {
535  assert(v->details != NULL);
536  if (cold != NULL)
537  v->details->rm_extend.rm_so = OFF(cold);
538  else
539  v->details->rm_extend.rm_so = OFF(v->stop);
540  v->details->rm_extend.rm_eo = OFF(v->stop); /* unknown */
541  }
542  return ret;
543 }
#define REG_EXPECT
Definition: regex.h:113
static void freedfa(struct dfa *d)
static int cfindloop(struct vars *v, struct cnfa *cnfa, struct colormap *cm, struct dfa *d, struct dfa *s, chr **coldp)
Definition: regexec.c:549
#define OFF(p)
Definition: regexec.c:131
static struct dfa * newdfa(struct vars *v, struct cnfa *cnfa, struct colormap *cm, struct smalldfa *sml)
Definition: regguts.h:407
struct colormap * cm
Definition: regexec.c:76
struct cnfa search
Definition: regguts.h:536
int cflags
Definition: regguts.h:532
regmatch_t rm_extend
Definition: regex.h:92
const chr * stop
Definition: regcomp.c:284
int err
Definition: regcomp.c:285
struct smalldfa dfa1
Definition: regexec.c:122
rm_detail_t * details
Definition: regexec.c:113
struct smalldfa dfa2
Definition: regexec.c:123

References assert, cfindloop(), guts::cflags, dfa::cm, vars::details, vars::dfa1, vars::dfa2, vars::err, freedfa(), vars::g, newdfa(), NOERR, OFF, REG_EXPECT, regmatch_t::rm_eo, rm_detail_t::rm_extend, regmatch_t::rm_so, guts::search, and vars::stop.

Referenced by pg_regexec().

◆ cfindloop()

static int cfindloop ( struct vars v,
struct cnfa cnfa,
struct colormap cm,
struct dfa d,
struct dfa s,
chr **  coldp 
)
static

Definition at line 549 of file regexec.c.

555 {
556  chr *begin;
557  chr *end;
558  chr *cold;
559  chr *open; /* open and close of range of possible starts */
560  chr *close;
561  chr *estart;
562  chr *estop;
563  int er;
564  int shorter = v->g->tree->flags & SHORTER;
565  int hitend;
566 
567  assert(d != NULL && s != NULL);
568  cold = NULL;
569  close = v->search_start;
570  do
571  {
572  /* Search with the search RE for match range at/beyond "close" */
573  MDEBUG(("\ncsearch at %ld\n", LOFF(close)));
574  close = shortest(v, s, close, close, v->stop, &cold, (int *) NULL);
575  if (ISERR())
576  {
577  *coldp = cold;
578  return v->err;
579  }
580  if (close == NULL)
581  break; /* no more possible match anywhere */
582  assert(cold != NULL);
583  open = cold;
584  cold = NULL;
585  /* Search for matches starting between "open" and "close" inclusive */
586  MDEBUG(("cbetween %ld and %ld\n", LOFF(open), LOFF(close)));
587  for (begin = open; begin <= close; begin++)
588  {
589  MDEBUG(("\ncfind trying at %ld\n", LOFF(begin)));
590  estart = begin;
591  estop = v->stop;
592  for (;;)
593  {
594  /* Here we use the top node's detailed RE */
595  if (shorter)
596  end = shortest(v, d, begin, estart,
597  estop, (chr **) NULL, &hitend);
598  else
599  end = longest(v, d, begin, estop,
600  &hitend);
601  if (ISERR())
602  {
603  *coldp = cold;
604  return v->err;
605  }
606  if (hitend && cold == NULL)
607  cold = begin;
608  if (end == NULL)
609  break; /* no match with this begin point, try next */
610  MDEBUG(("tentative end %ld\n", LOFF(end)));
611  /* Dissect the potential match to see if it really matches */
612  er = cdissect(v, v->g->tree, begin, end);
613  if (er == REG_OKAY)
614  {
615  if (v->nmatch > 0)
616  {
617  v->pmatch[0].rm_so = OFF(begin);
618  v->pmatch[0].rm_eo = OFF(end);
619  }
620  *coldp = cold;
621  return REG_OKAY;
622  }
623  if (er != REG_NOMATCH)
624  {
625  ERR(er);
626  *coldp = cold;
627  return er;
628  }
629  /* Try next longer/shorter match with same begin point */
630  if (shorter)
631  {
632  if (end == estop)
633  break; /* no more, so try next begin point */
634  estart = end + 1;
635  }
636  else
637  {
638  if (end == begin)
639  break; /* no more, so try next begin point */
640  estop = end - 1;
641  }
642  } /* end loop over endpoint positions */
643  } /* end loop over beginning positions */
644 
645  /*
646  * If we get here, there is no possible match starting at or before
647  * "close", so consider matches beyond that. We'll do a fresh search
648  * with the search RE to find a new promising match range.
649  */
650  close++;
651  } while (close < v->stop);
652 
653  *coldp = cold;
654  return REG_NOMATCH;
655 }
#define close(a)
Definition: win32.h:12
#define ISERR()
Definition: regexec.c:127
#define ERR(e)
Definition: regexec.c:129
static chr * shortest(struct vars *v, struct dfa *d, chr *start, chr *min, chr *max, chr **coldp, int *hitstopp)
struct subre * tree
Definition: regguts.h:535
size_t nmatch
Definition: regexec.c:111
chr * search_start
Definition: regexec.c:115

References assert, cdissect(), close, vars::err, ERR, subre::flags, vars::g, ISERR, LOFF, longest(), MDEBUG, vars::nmatch, OFF, vars::pmatch, REG_NOMATCH, REG_OKAY, regmatch_t::rm_eo, regmatch_t::rm_so, vars::search_start, SHORTER, shortest(), vars::stop, and guts::tree.

Referenced by cfind().

◆ citerdissect()

static int citerdissect ( struct vars v,
struct subre t,
chr begin,
chr end 
)
static

Definition at line 1117 of file regexec.c.

1121 {
1122  struct dfa *d;
1123  chr **endpts;
1124  chr *limit;
1125  int min_matches;
1126  size_t max_matches;
1127  int nverified;
1128  int k;
1129  int i;
1130  int er;
1131 
1132  assert(t->op == '*');
1133  assert(t->child != NULL && t->child->cnfa.nstates > 0);
1134  assert(!(t->child->flags & SHORTER));
1135  assert(begin <= end);
1136 
1137  MDEBUG(("%d: citerdissect %ld-%ld\n", t->id, LOFF(begin), LOFF(end)));
1138 
1139  /*
1140  * For the moment, assume the minimum number of matches is 1. If zero
1141  * matches are allowed, and the target string is empty, we are allowed to
1142  * match regardless of the contents of the iter node --- but we would
1143  * prefer to match once, so that capturing parens get set. (An example of
1144  * the concern here is a pattern like "()*\1", which historically this
1145  * code has allowed to succeed.) Therefore, we deal with the zero-matches
1146  * case at the bottom, after failing to find any other way to match.
1147  */
1148  min_matches = t->min;
1149  if (min_matches <= 0)
1150  min_matches = 1;
1151 
1152  /*
1153  * We need workspace to track the endpoints of each sub-match. Normally
1154  * we consider only nonzero-length sub-matches, so there can be at most
1155  * end-begin of them. However, if min is larger than that, we will also
1156  * consider zero-length sub-matches in order to find enough matches.
1157  *
1158  * For convenience, endpts[0] contains the "begin" pointer and we store
1159  * sub-match endpoints in endpts[1..max_matches].
1160  */
1161  max_matches = end - begin;
1162  if (max_matches > t->max && t->max != DUPINF)
1163  max_matches = t->max;
1164  if (max_matches < min_matches)
1165  max_matches = min_matches;
1166  endpts = (chr **) MALLOC((max_matches + 1) * sizeof(chr *));
1167  if (endpts == NULL)
1168  return REG_ESPACE;
1169  endpts[0] = begin;
1170 
1171  d = getsubdfa(v, t->child);
1172  if (ISERR())
1173  {
1174  FREE(endpts);
1175  return v->err;
1176  }
1177 
1178  /*
1179  * Our strategy is to first find a set of sub-match endpoints that are
1180  * valid according to the child node's DFA, and then recursively dissect
1181  * each sub-match to confirm validity. If any validity check fails,
1182  * backtrack that sub-match and try again. And, when we next try for a
1183  * validity check, we need not recheck any successfully verified
1184  * sub-matches that we didn't move the endpoints of. nverified remembers
1185  * how many sub-matches are currently known okay.
1186  */
1187 
1188  /* initialize to consider first sub-match */
1189  nverified = 0;
1190  k = 1;
1191  limit = end;
1192 
1193  /* iterate until satisfaction or failure */
1194  while (k > 0)
1195  {
1196  /* try to find an endpoint for the k'th sub-match */
1197  endpts[k] = longest(v, d, endpts[k - 1], limit, (int *) NULL);
1198  if (ISERR())
1199  {
1200  FREE(endpts);
1201  return v->err;
1202  }
1203  if (endpts[k] == NULL)
1204  {
1205  /* no match possible, so see if we can shorten previous one */
1206  k--;
1207  goto backtrack;
1208  }
1209  MDEBUG(("%d: working endpoint %d: %ld\n",
1210  t->id, k, LOFF(endpts[k])));
1211 
1212  /* k'th sub-match can no longer be considered verified */
1213  if (nverified >= k)
1214  nverified = k - 1;
1215 
1216  if (endpts[k] != end)
1217  {
1218  /* haven't reached end yet, try another iteration if allowed */
1219  if (k >= max_matches)
1220  {
1221  /* must try to shorten some previous match */
1222  k--;
1223  goto backtrack;
1224  }
1225 
1226  /* reject zero-length match unless necessary to achieve min */
1227  if (endpts[k] == endpts[k - 1] &&
1228  (k >= min_matches || min_matches - k < end - endpts[k]))
1229  goto backtrack;
1230 
1231  k++;
1232  limit = end;
1233  continue;
1234  }
1235 
1236  /*
1237  * We've identified a way to divide the string into k sub-matches that
1238  * works so far as the child DFA can tell. If k is an allowed number
1239  * of matches, start the slow part: recurse to verify each sub-match.
1240  * We always have k <= max_matches, needn't check that.
1241  */
1242  if (k < min_matches)
1243  goto backtrack;
1244 
1245  MDEBUG(("%d: verifying %d..%d\n", t->id, nverified + 1, k));
1246 
1247  for (i = nverified + 1; i <= k; i++)
1248  {
1249  /* zap any match data from a non-last iteration */
1250  zaptreesubs(v, t->child);
1251  er = cdissect(v, t->child, endpts[i - 1], endpts[i]);
1252  if (er == REG_OKAY)
1253  {
1254  nverified = i;
1255  continue;
1256  }
1257  if (er == REG_NOMATCH)
1258  break;
1259  /* oops, something failed */
1260  FREE(endpts);
1261  return er;
1262  }
1263 
1264  if (i > k)
1265  {
1266  /* satisfaction */
1267  MDEBUG(("%d: successful\n", t->id));
1268  FREE(endpts);
1269  return REG_OKAY;
1270  }
1271 
1272  /* i'th match failed to verify, so backtrack it */
1273  k = i;
1274 
1275 backtrack:
1276 
1277  /*
1278  * Must consider shorter versions of the k'th sub-match. However,
1279  * we'll only ask for a zero-length match if necessary.
1280  */
1281  while (k > 0)
1282  {
1283  chr *prev_end = endpts[k - 1];
1284 
1285  if (endpts[k] > prev_end)
1286  {
1287  limit = endpts[k] - 1;
1288  if (limit > prev_end ||
1289  (k < min_matches && min_matches - k >= end - prev_end))
1290  {
1291  /* break out of backtrack loop, continue the outer one */
1292  break;
1293  }
1294  }
1295  /* can't shorten k'th sub-match any more, consider previous one */
1296  k--;
1297  }
1298  }
1299 
1300  /* all possibilities exhausted */
1301  FREE(endpts);
1302 
1303  /*
1304  * Now consider the possibility that we can match to a zero-length string
1305  * by using zero repetitions.
1306  */
1307  if (t->min == 0 && begin == end)
1308  {
1309  MDEBUG(("%d: allowing zero matches\n", t->id));
1310  return REG_OKAY;
1311  }
1312 
1313  MDEBUG(("%d: failed\n", t->id));
1314  return REG_NOMATCH;
1315 }
#define FREE(ptr)
Definition: cryptohash.c:37
int i
Definition: isn.c:73
#define MALLOC(n)
Definition: regcustom.h:52
#define REG_ESPACE
Definition: regex.h:149

References assert, cdissect(), subre::child, subre::cnfa, DUPINF, vars::err, subre::flags, FREE, getsubdfa(), i, subre::id, ISERR, LOFF, longest(), MALLOC, subre::max, MDEBUG, subre::min, cnfa::nstates, subre::op, REG_ESPACE, REG_NOMATCH, REG_OKAY, SHORTER, and zaptreesubs().

Referenced by cdissect().

◆ crevcondissect()

static int crevcondissect ( struct vars v,
struct subre t,
chr begin,
chr end 
)
static

Definition at line 910 of file regexec.c.

914 {
915  struct subre *left = t->child;
916  struct subre *right = left->sibling;
917  struct dfa *d;
918  struct dfa *d2;
919  chr *mid;
920  int er;
921 
922  assert(t->op == '.');
923  assert(left != NULL && left->cnfa.nstates > 0);
924  assert(right != NULL && right->cnfa.nstates > 0);
925  assert(right->sibling == NULL);
926  assert(left->flags & SHORTER);
927 
928  d = getsubdfa(v, left);
929  NOERR();
930  d2 = getsubdfa(v, right);
931  NOERR();
932  MDEBUG(("%d: crevcondissect %ld-%ld\n", t->id, LOFF(begin), LOFF(end)));
933 
934  /* pick a tentative midpoint */
935  mid = shortest(v, d, begin, begin, end, (chr **) NULL, (int *) NULL);
936  NOERR();
937  if (mid == NULL)
938  return REG_NOMATCH;
939  MDEBUG(("%d: tentative midpoint %ld\n", t->id, LOFF(mid)));
940 
941  /* iterate until satisfaction or failure */
942  for (;;)
943  {
944  /* try this midpoint on for size */
945  if (longest(v, d2, mid, end, (int *) NULL) == end)
946  {
947  er = cdissect(v, left, begin, mid);
948  if (er == REG_OKAY)
949  {
950  er = cdissect(v, right, mid, end);
951  if (er == REG_OKAY)
952  {
953  /* satisfaction */
954  MDEBUG(("%d: successful\n", t->id));
955  return REG_OKAY;
956  }
957  /* Reset left's matches (right should have done so itself) */
958  zaptreesubs(v, left);
959  }
960  if (er != REG_NOMATCH)
961  return er;
962  }
963  NOERR();
964 
965  /* that midpoint didn't work, find a new one */
966  if (mid == end)
967  {
968  /* all possibilities exhausted */
969  MDEBUG(("%d: no midpoint\n", t->id));
970  return REG_NOMATCH;
971  }
972  mid = shortest(v, d, begin, mid + 1, end, (chr **) NULL, (int *) NULL);
973  NOERR();
974  if (mid == NULL)
975  {
976  /* failed to find a new one */
977  MDEBUG(("%d: failed midpoint\n", t->id));
978  return REG_NOMATCH;
979  }
980  MDEBUG(("%d: new midpoint %ld\n", t->id, LOFF(mid)));
981  }
982 
983  /* can't get here */
984  return REG_ASSERT;
985 }

References assert, cdissect(), subre::child, subre::cnfa, subre::flags, getsubdfa(), subre::id, LOFF, longest(), MDEBUG, NOERR, cnfa::nstates, subre::op, REG_ASSERT, REG_NOMATCH, REG_OKAY, SHORTER, shortest(), subre::sibling, and zaptreesubs().

Referenced by cdissect().

◆ creviterdissect()

static int creviterdissect ( struct vars v,
struct subre t,
chr begin,
chr end 
)
static

Definition at line 1321 of file regexec.c.

1325 {
1326  struct dfa *d;
1327  chr **endpts;
1328  chr *limit;
1329  int min_matches;
1330  size_t max_matches;
1331  int nverified;
1332  int k;
1333  int i;
1334  int er;
1335 
1336  assert(t->op == '*');
1337  assert(t->child != NULL && t->child->cnfa.nstates > 0);
1338  assert(t->child->flags & SHORTER);
1339  assert(begin <= end);
1340 
1341  MDEBUG(("%d: creviterdissect %ld-%ld\n", t->id, LOFF(begin), LOFF(end)));
1342 
1343  /*
1344  * If zero matches are allowed, and target string is empty, just declare
1345  * victory. OTOH, if target string isn't empty, zero matches can't work
1346  * so we pretend the min is 1.
1347  */
1348  min_matches = t->min;
1349  if (min_matches <= 0)
1350  {
1351  if (begin == end)
1352  {
1353  MDEBUG(("%d: allowing zero matches\n", t->id));
1354  return REG_OKAY;
1355  }
1356  min_matches = 1;
1357  }
1358 
1359  /*
1360  * We need workspace to track the endpoints of each sub-match. Normally
1361  * we consider only nonzero-length sub-matches, so there can be at most
1362  * end-begin of them. However, if min is larger than that, we will also
1363  * consider zero-length sub-matches in order to find enough matches.
1364  *
1365  * For convenience, endpts[0] contains the "begin" pointer and we store
1366  * sub-match endpoints in endpts[1..max_matches].
1367  */
1368  max_matches = end - begin;
1369  if (max_matches > t->max && t->max != DUPINF)
1370  max_matches = t->max;
1371  if (max_matches < min_matches)
1372  max_matches = min_matches;
1373  endpts = (chr **) MALLOC((max_matches + 1) * sizeof(chr *));
1374  if (endpts == NULL)
1375  return REG_ESPACE;
1376  endpts[0] = begin;
1377 
1378  d = getsubdfa(v, t->child);
1379  if (ISERR())
1380  {
1381  FREE(endpts);
1382  return v->err;
1383  }
1384 
1385  /*
1386  * Our strategy is to first find a set of sub-match endpoints that are
1387  * valid according to the child node's DFA, and then recursively dissect
1388  * each sub-match to confirm validity. If any validity check fails,
1389  * backtrack that sub-match and try again. And, when we next try for a
1390  * validity check, we need not recheck any successfully verified
1391  * sub-matches that we didn't move the endpoints of. nverified remembers
1392  * how many sub-matches are currently known okay.
1393  */
1394 
1395  /* initialize to consider first sub-match */
1396  nverified = 0;
1397  k = 1;
1398  limit = begin;
1399 
1400  /* iterate until satisfaction or failure */
1401  while (k > 0)
1402  {
1403  /* disallow zero-length match unless necessary to achieve min */
1404  if (limit == endpts[k - 1] &&
1405  limit != end &&
1406  (k >= min_matches || min_matches - k < end - limit))
1407  limit++;
1408 
1409  /* if this is the last allowed sub-match, it must reach to the end */
1410  if (k >= max_matches)
1411  limit = end;
1412 
1413  /* try to find an endpoint for the k'th sub-match */
1414  endpts[k] = shortest(v, d, endpts[k - 1], limit, end,
1415  (chr **) NULL, (int *) NULL);
1416  if (ISERR())
1417  {
1418  FREE(endpts);
1419  return v->err;
1420  }
1421  if (endpts[k] == NULL)
1422  {
1423  /* no match possible, so see if we can lengthen previous one */
1424  k--;
1425  goto backtrack;
1426  }
1427  MDEBUG(("%d: working endpoint %d: %ld\n",
1428  t->id, k, LOFF(endpts[k])));
1429 
1430  /* k'th sub-match can no longer be considered verified */
1431  if (nverified >= k)
1432  nverified = k - 1;
1433 
1434  if (endpts[k] != end)
1435  {
1436  /* haven't reached end yet, try another iteration if allowed */
1437  if (k >= max_matches)
1438  {
1439  /* must try to lengthen some previous match */
1440  k--;
1441  goto backtrack;
1442  }
1443 
1444  k++;
1445  limit = endpts[k - 1];
1446  continue;
1447  }
1448 
1449  /*
1450  * We've identified a way to divide the string into k sub-matches that
1451  * works so far as the child DFA can tell. If k is an allowed number
1452  * of matches, start the slow part: recurse to verify each sub-match.
1453  * We always have k <= max_matches, needn't check that.
1454  */
1455  if (k < min_matches)
1456  goto backtrack;
1457 
1458  MDEBUG(("%d: verifying %d..%d\n", t->id, nverified + 1, k));
1459 
1460  for (i = nverified + 1; i <= k; i++)
1461  {
1462  /* zap any match data from a non-last iteration */
1463  zaptreesubs(v, t->child);
1464  er = cdissect(v, t->child, endpts[i - 1], endpts[i]);
1465  if (er == REG_OKAY)
1466  {
1467  nverified = i;
1468  continue;
1469  }
1470  if (er == REG_NOMATCH)
1471  break;
1472  /* oops, something failed */
1473  FREE(endpts);
1474  return er;
1475  }
1476 
1477  if (i > k)
1478  {
1479  /* satisfaction */
1480  MDEBUG(("%d: successful\n", t->id));
1481  FREE(endpts);
1482  return REG_OKAY;
1483  }
1484 
1485  /* i'th match failed to verify, so backtrack it */
1486  k = i;
1487 
1488 backtrack:
1489 
1490  /*
1491  * Must consider longer versions of the k'th sub-match.
1492  */
1493  while (k > 0)
1494  {
1495  if (endpts[k] < end)
1496  {
1497  limit = endpts[k] + 1;
1498  /* break out of backtrack loop, continue the outer one */
1499  break;
1500  }
1501  /* can't lengthen k'th sub-match any more, consider previous one */
1502  k--;
1503  }
1504  }
1505 
1506  /* all possibilities exhausted */
1507  MDEBUG(("%d: failed\n", t->id));
1508  FREE(endpts);
1509  return REG_NOMATCH;
1510 }

References assert, cdissect(), subre::child, subre::cnfa, DUPINF, vars::err, subre::flags, FREE, getsubdfa(), i, subre::id, ISERR, LOFF, MALLOC, subre::max, MDEBUG, subre::min, cnfa::nstates, subre::op, REG_ESPACE, REG_NOMATCH, REG_OKAY, SHORTER, shortest(), and zaptreesubs().

Referenced by cdissect().

◆ dfa_backref()

static chr* dfa_backref ( struct vars v,
struct dfa d,
chr start,
chr min,
chr max,
bool  shortest 
)
static

◆ find()

static int find ( struct vars v,
struct cnfa cnfa,
struct colormap cm 
)
static

Definition at line 419 of file regexec.c.

422 {
423  struct dfa *s;
424  struct dfa *d;
425  chr *begin;
426  chr *end = NULL;
427  chr *cold;
428  chr *open; /* open and close of range of possible starts */
429  chr *close;
430  int hitend;
431  int shorter = (v->g->tree->flags & SHORTER) ? 1 : 0;
432 
433  /* first, a shot with the search RE */
434  s = newdfa(v, &v->g->search, cm, &v->dfa1);
435  if (s == NULL)
436  return v->err;
437  MDEBUG(("\nsearch at %ld\n", LOFF(v->start)));
438  cold = NULL;
439  close = shortest(v, s, v->search_start, v->search_start, v->stop,
440  &cold, (int *) NULL);
441  freedfa(s);
442  NOERR();
443  if (v->g->cflags & REG_EXPECT)
444  {
445  assert(v->details != NULL);
446  if (cold != NULL)
447  v->details->rm_extend.rm_so = OFF(cold);
448  else
449  v->details->rm_extend.rm_so = OFF(v->stop);
450  v->details->rm_extend.rm_eo = OFF(v->stop); /* unknown */
451  }
452  if (close == NULL) /* not found */
453  return REG_NOMATCH;
454  if (v->nmatch == 0) /* found, don't need exact location */
455  return REG_OKAY;
456 
457  /* find starting point and match */
458  assert(cold != NULL);
459  open = cold;
460  cold = NULL;
461  MDEBUG(("between %ld and %ld\n", LOFF(open), LOFF(close)));
462  d = newdfa(v, cnfa, cm, &v->dfa1);
463  if (d == NULL)
464  return v->err;
465  for (begin = open; begin <= close; begin++)
466  {
467  MDEBUG(("\nfind trying at %ld\n", LOFF(begin)));
468  if (shorter)
469  end = shortest(v, d, begin, begin, v->stop,
470  (chr **) NULL, &hitend);
471  else
472  end = longest(v, d, begin, v->stop, &hitend);
473  if (ISERR())
474  {
475  freedfa(d);
476  return v->err;
477  }
478  if (hitend && cold == NULL)
479  cold = begin;
480  if (end != NULL)
481  break; /* NOTE BREAK OUT */
482  }
483  assert(end != NULL); /* search RE succeeded so loop should */
484  freedfa(d);
485 
486  /* and pin down details */
487  assert(v->nmatch > 0);
488  v->pmatch[0].rm_so = OFF(begin);
489  v->pmatch[0].rm_eo = OFF(end);
490  if (v->g->cflags & REG_EXPECT)
491  {
492  if (cold != NULL)
493  v->details->rm_extend.rm_so = OFF(cold);
494  else
495  v->details->rm_extend.rm_so = OFF(v->stop);
496  v->details->rm_extend.rm_eo = OFF(v->stop); /* unknown */
497  }
498  if (v->nmatch == 1) /* no need for submatches */
499  return REG_OKAY;
500 
501  /* find submatches */
502  return cdissect(v, v->g->tree, begin, end);
503 }

References assert, cdissect(), guts::cflags, close, dfa::cm, vars::details, vars::dfa1, vars::err, subre::flags, freedfa(), vars::g, ISERR, LOFF, longest(), MDEBUG, newdfa(), vars::nmatch, NOERR, OFF, vars::pmatch, REG_EXPECT, REG_NOMATCH, REG_OKAY, regmatch_t::rm_eo, rm_detail_t::rm_extend, regmatch_t::rm_so, guts::search, vars::search_start, SHORTER, shortest(), vars::start, vars::stop, and guts::tree.

Referenced by NIAddAffix(), NIImportAffixes(), NIImportOOAffixes(), parse_affentry(), parse_ooaffentry(), pg_regexec(), and testdelete().

◆ freedfa()

static void freedfa ( struct dfa d)
static

Referenced by cfind(), find(), and pg_regexec().

◆ getladfa()

static struct dfa * getladfa ( struct vars v,
int  n 
)
static

Definition at line 400 of file regexec.c.

402 {
403  assert(n > 0 && n < v->g->nlacons && v->g->lacons != NULL);
404 
405  if (v->ladfas[n] == NULL)
406  {
407  struct subre *sub = &v->g->lacons[n];
408 
409  v->ladfas[n] = newdfa(v, &sub->cnfa, &v->g->cmap, DOMALLOC);
410  /* a LACON can't contain a backref, so nothing else to do */
411  }
412  return v->ladfas[n];
413 }
#define DOMALLOC
Definition: regexec.c:101
struct subre * lacons
Definition: regguts.h:540
struct colormap cmap
Definition: regguts.h:538
struct dfa ** ladfas
Definition: regexec.c:119

References assert, guts::cmap, subre::cnfa, DOMALLOC, vars::g, guts::lacons, vars::ladfas, and newdfa().

Referenced by lacon().

◆ getsubdfa()

static struct dfa * getsubdfa ( struct vars v,
struct subre t 
)
static

Definition at line 372 of file regexec.c.

374 {
375  struct dfa *d = v->subdfas[t->id];
376 
377  if (d == NULL)
378  {
379  d = newdfa(v, &t->cnfa, &v->g->cmap, DOMALLOC);
380  if (d == NULL)
381  return NULL;
382  /* set up additional info if this is a backref node */
383  if (t->op == 'b')
384  {
385  d->backno = t->backno;
386  d->backmin = t->min;
387  d->backmax = t->max;
388  }
389  v->subdfas[t->id] = d;
390  }
391  return d;
392 }
short backmin
Definition: regexec.c:81
int backno
Definition: regexec.c:80
short backmax
Definition: regexec.c:82
struct dfa ** subdfas
Definition: regexec.c:118

References dfa::backmax, dfa::backmin, dfa::backno, subre::backno, guts::cmap, subre::cnfa, DOMALLOC, vars::g, subre::id, subre::max, subre::min, newdfa(), subre::op, and vars::subdfas.

Referenced by caltdissect(), ccondissect(), citerdissect(), crevcondissect(), and creviterdissect().

◆ getvacant()

static struct sset* getvacant ( struct vars v,
struct dfa d,
chr cp,
chr start 
)
static

◆ hash()

static unsigned hash ( unsigned *  uv,
int  n 
)
static

◆ initialize()

static struct sset* initialize ( struct vars v,
struct dfa d,
chr start 
)
static

◆ lacon()

static int lacon ( struct vars v,
struct cnfa pcnfa,
chr cp,
color  co 
)
static

◆ lastcold()

static chr* lastcold ( struct vars v,
struct dfa d 
)
static

◆ longest()

static chr* longest ( struct vars v,
struct dfa d,
chr start,
chr stop,
int *  hitstopp 
)
static

◆ matchuntil()

static int matchuntil ( struct vars v,
struct dfa d,
chr probe,
struct sset **  lastcss,
chr **  lastcp 
)
static

◆ miss()

static struct sset* miss ( struct vars v,
struct dfa d,
struct sset css,
color  co,
chr cp,
chr start 
)
static

◆ newdfa()

static struct dfa* newdfa ( struct vars v,
struct cnfa cnfa,
struct colormap cm,
struct smalldfa sml 
)
static

Referenced by cfind(), find(), getladfa(), and getsubdfa().

◆ pg_regexec()

int pg_regexec ( regex_t re,
const chr string,
size_t  len,
size_t  search_start,
rm_detail_t details,
size_t  nmatch,
regmatch_t  pmatch[],
int  flags 
)

Definition at line 185 of file regexec.c.

193 {
194  struct vars var;
195  struct vars *v = &var;
196  int st;
197  size_t n;
198  size_t i;
199  int backref;
200 
201 #define LOCALMAT 20
202  regmatch_t mat[LOCALMAT];
203 
204 #define LOCALDFAS 40
205  struct dfa *subdfas[LOCALDFAS];
206 
207  /* sanity checks */
208  if (re == NULL || string == NULL || re->re_magic != REMAGIC)
209  return REG_INVARG;
210  if (re->re_csize != sizeof(chr))
211  return REG_MIXED;
212  if (search_start > len)
213  return REG_NOMATCH;
214 
215  /* Initialize locale-dependent support */
217 
218  /* setup */
219  v->re = re;
220  v->g = (struct guts *) re->re_guts;
221  if ((v->g->cflags & REG_EXPECT) && details == NULL)
222  return REG_INVARG;
223  if (v->g->info & REG_UIMPOSSIBLE)
224  return REG_NOMATCH;
225  backref = (v->g->info & REG_UBACKREF) ? 1 : 0;
226  v->eflags = flags;
227  if (backref && nmatch <= v->g->nsub)
228  {
229  /* need larger work area */
230  v->nmatch = v->g->nsub + 1;
231  if (v->nmatch <= LOCALMAT)
232  v->pmatch = mat;
233  else
234  v->pmatch = (regmatch_t *) MALLOC(v->nmatch * sizeof(regmatch_t));
235  if (v->pmatch == NULL)
236  return REG_ESPACE;
237  zapallsubs(v->pmatch, v->nmatch);
238  }
239  else
240  {
241  /* we can store results directly in caller's array */
242  v->pmatch = pmatch;
243  /* ensure any extra entries in caller's array are filled with -1 */
244  if (nmatch > 0)
245  zapallsubs(pmatch, nmatch);
246  /* then forget about extra entries, to avoid useless work in find() */
247  if (nmatch > v->g->nsub + 1)
248  nmatch = v->g->nsub + 1;
249  v->nmatch = nmatch;
250  }
251  v->details = details;
252  v->start = (chr *) string;
253  v->search_start = (chr *) string + search_start;
254  v->stop = (chr *) string + len;
255  v->err = 0;
256  v->subdfas = NULL;
257  v->ladfas = NULL;
258  v->lblastcss = NULL;
259  v->lblastcp = NULL;
260  /* below this point, "goto cleanup" will behave sanely */
261 
262  assert(v->g->ntree >= 0);
263  n = (size_t) v->g->ntree;
264  if (n <= LOCALDFAS)
265  v->subdfas = subdfas;
266  else
267  {
268  v->subdfas = (struct dfa **) MALLOC(n * sizeof(struct dfa *));
269  if (v->subdfas == NULL)
270  {
271  st = REG_ESPACE;
272  goto cleanup;
273  }
274  }
275  for (i = 0; i < n; i++)
276  v->subdfas[i] = NULL;
277 
278  assert(v->g->nlacons >= 0);
279  n = (size_t) v->g->nlacons;
280  if (n > 0)
281  {
282  v->ladfas = (struct dfa **) MALLOC(n * sizeof(struct dfa *));
283  if (v->ladfas == NULL)
284  {
285  st = REG_ESPACE;
286  goto cleanup;
287  }
288  for (i = 0; i < n; i++)
289  v->ladfas[i] = NULL;
290  v->lblastcss = (struct sset **) MALLOC(n * sizeof(struct sset *));
291  v->lblastcp = (chr **) MALLOC(n * sizeof(chr *));
292  if (v->lblastcss == NULL || v->lblastcp == NULL)
293  {
294  st = REG_ESPACE;
295  goto cleanup;
296  }
297  for (i = 0; i < n; i++)
298  {
299  v->lblastcss[i] = NULL;
300  v->lblastcp[i] = NULL;
301  }
302  }
303 
304  /* do it */
305  assert(v->g->tree != NULL);
306  if (backref)
307  st = cfind(v, &v->g->tree->cnfa, &v->g->cmap);
308  else
309  st = find(v, &v->g->tree->cnfa, &v->g->cmap);
310 
311  /* on success, ensure caller's match vector is filled correctly */
312  if (st == REG_OKAY && nmatch > 0)
313  {
314  if (v->pmatch != pmatch)
315  {
316  /* copy portion of match vector over from (larger) work area */
317  assert(nmatch <= v->nmatch);
318  memcpy(VS(pmatch), VS(v->pmatch), nmatch * sizeof(regmatch_t));
319  }
320  if (v->g->cflags & REG_NOSUB)
321  {
322  /* don't expose possibly-partial sub-match results to caller */
323  zapallsubs(pmatch, nmatch);
324  }
325  }
326 
327  /* clean up */
328 cleanup:
329  if (v->pmatch != pmatch && v->pmatch != mat)
330  FREE(v->pmatch);
331  if (v->subdfas != NULL)
332  {
333  n = (size_t) v->g->ntree;
334  for (i = 0; i < n; i++)
335  {
336  if (v->subdfas[i] != NULL)
337  freedfa(v->subdfas[i]);
338  }
339  if (v->subdfas != subdfas)
340  FREE(v->subdfas);
341  }
342  if (v->ladfas != NULL)
343  {
344  n = (size_t) v->g->nlacons;
345  for (i = 0; i < n; i++)
346  {
347  if (v->ladfas[i] != NULL)
348  freedfa(v->ladfas[i]);
349  }
350  FREE(v->ladfas);
351  }
352  if (v->lblastcss != NULL)
353  FREE(v->lblastcss);
354  if (v->lblastcp != NULL)
355  FREE(v->lblastcp);
356 
357 #ifdef REG_DEBUG
358  if (v->eflags & (REG_FTRACE | REG_MTRACE))
359  fflush(stdout);
360 #endif
361 
362  return st;
363 }
static void cleanup(void)
Definition: bootstrap.c:682
for(;;)
if(TABLE==NULL||TABLE_index==NULL)
Definition: isn.c:77
static void const char fflush(stdout)
const void size_t len
void pg_set_regex_collation(Oid collation)
#define REG_UIMPOSSIBLE
Definition: regex.h:72
#define REG_MTRACE
Definition: regex.h:128
#define REG_FTRACE
Definition: regex.h:127
#define REG_INVARG
Definition: regex.h:152
#define REG_MIXED
Definition: regex.h:153
#define REG_NOSUB
Definition: regex.h:107
#define REG_UBACKREF
Definition: regex.h:60
static int find(struct vars *v, struct cnfa *cnfa, struct colormap *cm)
Definition: regexec.c:419
static void zapallsubs(regmatch_t *p, size_t n)
Definition: regexec.c:663
static int cfind(struct vars *v, struct cnfa *cnfa, struct colormap *cm)
Definition: regexec.c:509
#define LOCALMAT
#define LOCALDFAS
#define REMAGIC
Definition: regguts.h:101
Definition: regguts.h:529
long info
Definition: regguts.h:533
int ntree
Definition: regguts.h:537
size_t nsub
Definition: regguts.h:534
int nlacons
Definition: regguts.h:541
char * re_guts
Definition: regex.h:78
int re_csize
Definition: regex.h:74
int re_magic
Definition: regex.h:57
Oid re_collation
Definition: regex.h:76
Definition: regexec.c:46
Definition: regcomp.c:281
struct sset ** lblastcss
Definition: regexec.c:120
chr ** lblastcp
Definition: regexec.c:121
int eflags
Definition: regexec.c:110

References assert, cfind(), guts::cflags, cleanup(), guts::cmap, subre::cnfa, vars::details, vars::eflags, vars::err, fflush(), find(), for(), FREE, freedfa(), vars::g, i, if(), guts::info, vars::ladfas, vars::lblastcp, vars::lblastcss, len, LOCALDFAS, LOCALMAT, MALLOC, guts::nlacons, vars::nmatch, guts::nsub, guts::ntree, pg_set_regex_collation(), vars::pmatch, vars::re, regex_t::re_collation, regex_t::re_csize, regex_t::re_guts, regex_t::re_magic, REG_ESPACE, REG_EXPECT, REG_FTRACE, REG_INVARG, REG_MIXED, REG_MTRACE, REG_NOMATCH, REG_NOSUB, REG_OKAY, REG_UBACKREF, REG_UIMPOSSIBLE, REMAGIC, vars::search_start, vars::start, generate_unaccent_rules::stdout, vars::stop, vars::subdfas, guts::tree, VS, and zapallsubs().

Referenced by CheckAffix(), RE_wchar_execute(), regexec_auth_token(), replace_text_regexp(), and test_re_execute().

◆ pickss()

static struct sset* pickss ( struct vars v,
struct dfa d,
chr cp,
chr start 
)
static

◆ shortest()

static chr* shortest ( struct vars v,
struct dfa d,
chr start,
chr min,
chr max,
chr **  coldp,
int *  hitstopp 
)
static

◆ subset()

static void subset ( struct vars v,
struct subre sub,
chr begin,
chr end 
)
static

Definition at line 702 of file regexec.c.

706 {
707  int n = sub->capno;
708 
709  assert(n > 0);
710  if ((size_t) n >= v->nmatch)
711  return;
712 
713  MDEBUG(("%d: setting %d = %ld-%ld\n", sub->id, n, LOFF(begin), LOFF(end)));
714  v->pmatch[n].rm_so = OFF(begin);
715  v->pmatch[n].rm_eo = OFF(end);
716 }

References assert, subre::begin, subre::capno, subre::end, subre::id, LOFF, MDEBUG, vars::nmatch, OFF, vars::pmatch, regmatch_t::rm_eo, and regmatch_t::rm_so.

Referenced by cdissect().

◆ zapallsubs()

static void zapallsubs ( regmatch_t p,
size_t  n 
)
static

Definition at line 663 of file regexec.c.

665 {
666  size_t i;
667 
668  for (i = n - 1; i > 0; i--)
669  {
670  p[i].rm_so = -1;
671  p[i].rm_eo = -1;
672  }
673 }

References i, regmatch_t::rm_eo, and regmatch_t::rm_so.

Referenced by pg_regexec().

◆ zaptreesubs()

static void zaptreesubs ( struct vars v,
struct subre t 
)
static

Definition at line 679 of file regexec.c.

681 {
682  int n = t->capno;
683  struct subre *t2;
684 
685  if (n > 0)
686  {
687  if ((size_t) n < v->nmatch)
688  {
689  v->pmatch[n].rm_so = -1;
690  v->pmatch[n].rm_eo = -1;
691  }
692  }
693 
694  for (t2 = t->child; t2 != NULL; t2 = t2->sibling)
695  zaptreesubs(v, t2);
696 }

References subre::capno, subre::child, vars::pmatch, regmatch_t::rm_eo, regmatch_t::rm_so, and subre::sibling.

Referenced by ccondissect(), citerdissect(), crevcondissect(), and creviterdissect().