PostgreSQL Source Code git master
regexec.c File Reference
#include "regex/regguts.h"
#include "rege_dfa.c"
Include dependency graph for regexec.c:

Go to the source code of this file.

Data Structures

struct  arcp
 
struct  sset
 
struct  dfa
 
struct  smalldfa
 
struct  vars
 

Macros

#define HASH(bv, nw)   (((nw) == 1) ? *(bv) : hash(bv, nw))
 
#define HIT(h, bv, ss, nw)
 
#define STARTER   01 /* the initial state set */
 
#define POSTSTATE   02 /* includes the goal state */
 
#define LOCKED   04 /* locked in cache */
 
#define NOPROGRESS   010 /* zero-progress state set */
 
#define WORK   1 /* number of work bitvectors needed */
 
#define FEWSTATES   20 /* must be less than UBITS */
 
#define FEWCOLORS   15
 
#define DOMALLOC   ((struct smalldfa *)NULL) /* force malloc */
 
#define VISERR(vv)   ((vv)->err != 0) /* have we seen an error yet? */
 
#define ISERR()   VISERR(v)
 
#define VERR(vv, e)   ((vv)->err = ((vv)->err ? (vv)->err : (e)))
 
#define ERR(e)   VERR(v, e) /* record an error */
 
#define NOERR()   {if (ISERR()) return v->err;} /* if error seen, return it */
 
#define OFF(p)   ((p) - v->start)
 
#define LOFF(p)   ((long)OFF(p))
 
#define LOCALMAT   20
 
#define LOCALDFAS   40
 

Functions

static struct dfagetsubdfa (struct vars *v, struct subre *t)
 
static struct dfagetladfa (struct vars *v, int n)
 
static int find (struct vars *v, struct cnfa *cnfa, struct colormap *cm)
 
static int cfind (struct vars *v, struct cnfa *cnfa, struct colormap *cm)
 
static int cfindloop (struct vars *v, struct cnfa *cnfa, struct colormap *cm, struct dfa *d, struct dfa *s, chr **coldp)
 
static void zapallsubs (regmatch_t *p, size_t n)
 
static void zaptreesubs (struct vars *v, struct subre *t)
 
static void subset (struct vars *v, struct subre *sub, chr *begin, chr *end)
 
static int cdissect (struct vars *v, struct subre *t, chr *begin, chr *end)
 
static int ccondissect (struct vars *v, struct subre *t, chr *begin, chr *end)
 
static int crevcondissect (struct vars *v, struct subre *t, chr *begin, chr *end)
 
static int cbrdissect (struct vars *v, struct subre *t, chr *begin, chr *end)
 
static int caltdissect (struct vars *v, struct subre *t, chr *begin, chr *end)
 
static int citerdissect (struct vars *v, struct subre *t, chr *begin, chr *end)
 
static int creviterdissect (struct vars *v, struct subre *t, chr *begin, chr *end)
 
static chrlongest (struct vars *v, struct dfa *d, chr *start, chr *stop, int *hitstopp)
 
static chrshortest (struct vars *v, struct dfa *d, chr *start, chr *min, chr *max, chr **coldp, int *hitstopp)
 
static int matchuntil (struct vars *v, struct dfa *d, chr *probe, struct sset **lastcss, chr **lastcp)
 
static chrdfa_backref (struct vars *v, struct dfa *d, chr *start, chr *min, chr *max, bool shortest)
 
static chrlastcold (struct vars *v, struct dfa *d)
 
static struct dfanewdfa (struct vars *v, struct cnfa *cnfa, struct colormap *cm, struct smalldfa *sml)
 
static void freedfa (struct dfa *d)
 
static unsigned hash (unsigned *uv, int n)
 
static struct ssetinitialize (struct vars *v, struct dfa *d, chr *start)
 
static struct ssetmiss (struct vars *v, struct dfa *d, struct sset *css, color co, chr *cp, chr *start)
 
static int lacon (struct vars *v, struct cnfa *pcnfa, chr *cp, color co)
 
static struct ssetgetvacant (struct vars *v, struct dfa *d, chr *cp, chr *start)
 
static struct ssetpickss (struct vars *v, struct dfa *d, chr *cp, chr *start)
 
int pg_regexec (regex_t *re, const chr *string, size_t len, size_t search_start, rm_detail_t *details, size_t nmatch, regmatch_t pmatch[], int flags)
 

Macro Definition Documentation

◆ DOMALLOC

#define DOMALLOC   ((struct smalldfa *)NULL) /* force malloc */

Definition at line 101 of file regexec.c.

◆ ERR

#define ERR (   e)    VERR(v, e) /* record an error */

Definition at line 129 of file regexec.c.

◆ FEWCOLORS

#define FEWCOLORS   15

Definition at line 91 of file regexec.c.

◆ FEWSTATES

#define FEWSTATES   20 /* must be less than UBITS */

Definition at line 90 of file regexec.c.

◆ HASH

#define HASH (   bv,
  nw 
)    (((nw) == 1) ? *(bv) : hash(bv, nw))

Definition at line 49 of file regexec.c.

◆ HIT

#define HIT (   h,
  bv,
  ss,
  nw 
)
Value:
((ss)->hash == (h) && ((nw) == 1 || \
memcmp(VS(bv), VS((ss)->states), (nw)*sizeof(unsigned)) == 0))
static unsigned hash(unsigned *uv, int n)
#define VS(x)
Definition: regguts.h:61

Definition at line 50 of file regexec.c.

◆ ISERR

#define ISERR ( )    VISERR(v)

Definition at line 127 of file regexec.c.

◆ LOCALDFAS

#define LOCALDFAS   40

◆ LOCALMAT

#define LOCALMAT   20

◆ LOCKED

#define LOCKED   04 /* locked in cache */

Definition at line 55 of file regexec.c.

◆ LOFF

#define LOFF (   p)    ((long)OFF(p))

Definition at line 132 of file regexec.c.

◆ NOERR

#define NOERR ( )    {if (ISERR()) return v->err;} /* if error seen, return it */

Definition at line 130 of file regexec.c.

◆ NOPROGRESS

#define NOPROGRESS   010 /* zero-progress state set */

Definition at line 56 of file regexec.c.

◆ OFF

#define OFF (   p)    ((p) - v->start)

Definition at line 131 of file regexec.c.

◆ POSTSTATE

#define POSTSTATE   02 /* includes the goal state */

Definition at line 54 of file regexec.c.

◆ STARTER

#define STARTER   01 /* the initial state set */

Definition at line 53 of file regexec.c.

◆ VERR

#define VERR (   vv,
  e 
)    ((vv)->err = ((vv)->err ? (vv)->err : (e)))

Definition at line 128 of file regexec.c.

◆ VISERR

#define VISERR (   vv)    ((vv)->err != 0) /* have we seen an error yet? */

Definition at line 126 of file regexec.c.

◆ WORK

#define WORK   1 /* number of work bitvectors needed */

Definition at line 87 of file regexec.c.

Function Documentation

◆ caltdissect()

static int caltdissect ( struct vars v,
struct subre t,
chr begin,
chr end 
)
static

Definition at line 1076 of file regexec.c.

1080{
1081 struct dfa *d;
1082 int er;
1083
1084 assert(t->op == '|');
1085
1086 t = t->child;
1087 /* there should be at least 2 alternatives */
1088 assert(t != NULL && t->sibling != NULL);
1089
1090 while (t != NULL)
1091 {
1092 assert(t->cnfa.nstates > 0);
1093
1094 MDEBUG(("%d: caltdissect %ld-%ld\n", t->id, LOFF(begin), LOFF(end)));
1095
1096 d = getsubdfa(v, t);
1097 NOERR();
1098 if (longest(v, d, begin, end, (int *) NULL) == end)
1099 {
1100 MDEBUG(("%d: caltdissect matched\n", t->id));
1101 er = cdissect(v, t, begin, end);
1102 if (er != REG_NOMATCH)
1103 return er;
1104 }
1105 NOERR();
1106
1107 t = t->sibling;
1108 }
1109
1110 return REG_NOMATCH;
1111}
#define assert(x)
Definition: regcustom.h:56
#define REG_NOMATCH
Definition: regex.h:216
#define LOFF(p)
Definition: regexec.c:132
#define NOERR()
Definition: regexec.c:130
static struct dfa * getsubdfa(struct vars *v, struct subre *t)
Definition: regexec.c:372
static chr * longest(struct vars *v, struct dfa *d, chr *start, chr *stop, int *hitstopp)
static int cdissect(struct vars *v, struct subre *t, chr *begin, chr *end)
Definition: regexec.c:756
#define MDEBUG(arglist)
Definition: regguts.h:122
int nstates
Definition: regguts.h:408
Definition: regexec.c:64
char op
Definition: regguts.h:480
struct subre * sibling
Definition: regguts.h:504
int id
Definition: regguts.h:498
struct cnfa cnfa
Definition: regguts.h:507
struct subre * child
Definition: regguts.h:503

References assert, cdissect(), subre::child, subre::cnfa, getsubdfa(), subre::id, LOFF, longest(), MDEBUG, NOERR, cnfa::nstates, subre::op, REG_NOMATCH, and subre::sibling.

Referenced by cdissect().

◆ cbrdissect()

static int cbrdissect ( struct vars v,
struct subre t,
chr begin,
chr end 
)
static

Definition at line 994 of file regexec.c.

998{
999 int n = t->backno;
1000 size_t numreps;
1001 size_t tlen;
1002 size_t brlen;
1003 chr *brstring;
1004 chr *p;
1005 int min = t->min;
1006 int max = t->max;
1007
1008 assert(t != NULL);
1009 assert(t->op == 'b');
1010 assert(n >= 0);
1011 assert((size_t) n < v->nmatch);
1012
1013 MDEBUG(("%d: cbrdissect %d{%d-%d} %ld-%ld\n", t->id, n, min, max,
1014 LOFF(begin), LOFF(end)));
1015
1016 /* get the backreferenced string */
1017 if (v->pmatch[n].rm_so == -1)
1018 return REG_NOMATCH;
1019 brstring = v->start + v->pmatch[n].rm_so;
1020 brlen = v->pmatch[n].rm_eo - v->pmatch[n].rm_so;
1021
1022 /* special cases for zero-length strings */
1023 if (brlen == 0)
1024 {
1025 /*
1026 * matches only if target is zero length, but any number of
1027 * repetitions can be considered to be present
1028 */
1029 if (begin == end && min <= max)
1030 {
1031 MDEBUG(("%d: backref matched trivially\n", t->id));
1032 return REG_OKAY;
1033 }
1034 return REG_NOMATCH;
1035 }
1036 if (begin == end)
1037 {
1038 /* matches only if zero repetitions are okay */
1039 if (min == 0)
1040 {
1041 MDEBUG(("%d: backref matched trivially\n", t->id));
1042 return REG_OKAY;
1043 }
1044 return REG_NOMATCH;
1045 }
1046
1047 /*
1048 * check target length to see if it could possibly be an allowed number of
1049 * repetitions of brstring
1050 */
1051 assert(end > begin);
1052 tlen = end - begin;
1053 if (tlen % brlen != 0)
1054 return REG_NOMATCH;
1055 numreps = tlen / brlen;
1056 if (numreps < min || (numreps > max && max != DUPINF))
1057 return REG_NOMATCH;
1058
1059 /* okay, compare the actual string contents */
1060 p = begin;
1061 while (numreps-- > 0)
1062 {
1063 if ((*v->g->compare) (brstring, p, brlen) != 0)
1064 return REG_NOMATCH;
1065 p += brlen;
1066 }
1067
1068 MDEBUG(("%d: backref matched\n", t->id));
1069 return REG_OKAY;
1070}
pg_wchar chr
Definition: regcustom.h:59
#define REG_OKAY
Definition: regex.h:215
#define DUPINF
Definition: regguts.h:99
int backno
Definition: regguts.h:500
short min
Definition: regguts.h:501
short max
Definition: regguts.h:502
struct guts * g
Definition: regexec.c:109
chr * start
Definition: regexec.c:114
regmatch_t * pmatch
Definition: regexec.c:112

References assert, subre::backno, DUPINF, vars::g, subre::id, LOFF, subre::max, MDEBUG, subre::min, subre::op, vars::pmatch, REG_NOMATCH, REG_OKAY, and vars::start.

Referenced by cdissect().

◆ ccondissect()

static int ccondissect ( struct vars v,
struct subre t,
chr begin,
chr end 
)
static

Definition at line 829 of file regexec.c.

833{
834 struct subre *left = t->child;
835 struct subre *right = left->sibling;
836 struct dfa *d;
837 struct dfa *d2;
838 chr *mid;
839 int er;
840
841 assert(t->op == '.');
842 assert(left != NULL && left->cnfa.nstates > 0);
843 assert(right != NULL && right->cnfa.nstates > 0);
844 assert(right->sibling == NULL);
845 assert(!(left->flags & SHORTER));
846
847 d = getsubdfa(v, left);
848 NOERR();
849 d2 = getsubdfa(v, right);
850 NOERR();
851 MDEBUG(("%d: ccondissect %ld-%ld\n", t->id, LOFF(begin), LOFF(end)));
852
853 /* pick a tentative midpoint */
854 mid = longest(v, d, begin, end, (int *) NULL);
855 NOERR();
856 if (mid == NULL)
857 return REG_NOMATCH;
858 MDEBUG(("%d: tentative midpoint %ld\n", t->id, LOFF(mid)));
859
860 /* iterate until satisfaction or failure */
861 for (;;)
862 {
863 /* try this midpoint on for size */
864 if (longest(v, d2, mid, end, (int *) NULL) == end)
865 {
866 er = cdissect(v, left, begin, mid);
867 if (er == REG_OKAY)
868 {
869 er = cdissect(v, right, mid, end);
870 if (er == REG_OKAY)
871 {
872 /* satisfaction */
873 MDEBUG(("%d: successful\n", t->id));
874 return REG_OKAY;
875 }
876 /* Reset left's matches (right should have done so itself) */
877 zaptreesubs(v, left);
878 }
879 if (er != REG_NOMATCH)
880 return er;
881 }
882 NOERR();
883
884 /* that midpoint didn't work, find a new one */
885 if (mid == begin)
886 {
887 /* all possibilities exhausted */
888 MDEBUG(("%d: no midpoint\n", t->id));
889 return REG_NOMATCH;
890 }
891 mid = longest(v, d, begin, mid - 1, (int *) NULL);
892 NOERR();
893 if (mid == NULL)
894 {
895 /* failed to find a new one */
896 MDEBUG(("%d: failed midpoint\n", t->id));
897 return REG_NOMATCH;
898 }
899 MDEBUG(("%d: new midpoint %ld\n", t->id, LOFF(mid)));
900 }
901
902 /* can't get here */
903 return REG_ASSERT;
904}
#define REG_ASSERT
Definition: regex.h:229
static void zaptreesubs(struct vars *v, struct subre *t)
Definition: regexec.c:679
#define SHORTER
Definition: regguts.h:483
Definition: regguts.h:479
char flags
Definition: regguts.h:481

References assert, cdissect(), subre::child, subre::cnfa, subre::flags, getsubdfa(), subre::id, LOFF, longest(), MDEBUG, NOERR, cnfa::nstates, subre::op, REG_ASSERT, REG_NOMATCH, REG_OKAY, SHORTER, subre::sibling, and zaptreesubs().

Referenced by cdissect().

◆ cdissect()

static int cdissect ( struct vars v,
struct subre t,
chr begin,
chr end 
)
static

Definition at line 756 of file regexec.c.

760{
761 int er;
762
763 assert(t != NULL);
764 MDEBUG(("%d: cdissect %c %ld-%ld\n", t->id, t->op, LOFF(begin), LOFF(end)));
765
766 /* handy place to check for operation cancel */
767 INTERRUPT(v->re);
768 /* ... and stack overrun */
769 if (STACK_TOO_DEEP(v->re))
770 return REG_ETOOBIG;
771
772 switch (t->op)
773 {
774 case '=': /* terminal node */
775 assert(t->child == NULL);
776 er = REG_OKAY; /* no action, parent did the work */
777 break;
778 case 'b': /* back reference */
779 assert(t->child == NULL);
780 er = cbrdissect(v, t, begin, end);
781 break;
782 case '.': /* concatenation */
783 assert(t->child != NULL);
784 if (t->child->flags & SHORTER) /* reverse scan */
785 er = crevcondissect(v, t, begin, end);
786 else
787 er = ccondissect(v, t, begin, end);
788 break;
789 case '|': /* alternation */
790 assert(t->child != NULL);
791 er = caltdissect(v, t, begin, end);
792 break;
793 case '*': /* iteration */
794 assert(t->child != NULL);
795 if (t->child->flags & SHORTER) /* reverse scan */
796 er = creviterdissect(v, t, begin, end);
797 else
798 er = citerdissect(v, t, begin, end);
799 break;
800 case '(': /* no-op capture node */
801 assert(t->child != NULL);
802 er = cdissect(v, t->child, begin, end);
803 break;
804 default:
805 er = REG_ASSERT;
806 break;
807 }
808
809 /*
810 * We should never have a match failure unless backrefs lurk below;
811 * otherwise, either caller failed to check the DFA, or there's some
812 * inconsistency between the DFA and the node's innards.
813 */
814 assert(er != REG_NOMATCH || (t->flags & BACKR));
815
816 /*
817 * If this node is marked as capturing, save successful match's location.
818 */
819 if (t->capno > 0 && er == REG_OKAY)
820 subset(v, t, begin, end);
821
822 return er;
823}
#define INTERRUPT(re)
Definition: regcustom.h:55
#define REG_ETOOBIG
Definition: regex.h:233
static int citerdissect(struct vars *v, struct subre *t, chr *begin, chr *end)
Definition: regexec.c:1117
static int ccondissect(struct vars *v, struct subre *t, chr *begin, chr *end)
Definition: regexec.c:829
static int creviterdissect(struct vars *v, struct subre *t, chr *begin, chr *end)
Definition: regexec.c:1321
static int cbrdissect(struct vars *v, struct subre *t, chr *begin, chr *end)
Definition: regexec.c:994
static int crevcondissect(struct vars *v, struct subre *t, chr *begin, chr *end)
Definition: regexec.c:910
static int caltdissect(struct vars *v, struct subre *t, chr *begin, chr *end)
Definition: regexec.c:1076
static void subset(struct vars *v, struct subre *sub, chr *begin, chr *end)
Definition: regexec.c:702
#define BACKR
Definition: regguts.h:486
#define STACK_TOO_DEEP(re)
Definition: regguts.h:523
int capno
Definition: regguts.h:499
regex_t * re
Definition: regcomp.c:283

References assert, BACKR, subre::begin, caltdissect(), subre::capno, cbrdissect(), ccondissect(), cdissect(), subre::child, citerdissect(), crevcondissect(), creviterdissect(), subre::end, subre::flags, subre::id, INTERRUPT, LOFF, MDEBUG, subre::op, vars::re, REG_ASSERT, REG_ETOOBIG, REG_NOMATCH, REG_OKAY, SHORTER, STACK_TOO_DEEP, and subset().

Referenced by caltdissect(), ccondissect(), cdissect(), cfindloop(), citerdissect(), crevcondissect(), creviterdissect(), and find().

◆ cfind()

static int cfind ( struct vars v,
struct cnfa cnfa,
struct colormap cm 
)
static

Definition at line 509 of file regexec.c.

512{
513 struct dfa *s;
514 struct dfa *d;
515 chr *cold;
516 int ret;
517
518 s = newdfa(v, &v->g->search, cm, &v->dfa1);
519 if (s == NULL)
520 return v->err;
521 d = newdfa(v, cnfa, cm, &v->dfa2);
522 if (d == NULL)
523 {
524 freedfa(s);
525 return v->err;
526 }
527
528 ret = cfindloop(v, cnfa, cm, d, s, &cold);
529
530 freedfa(d);
531 freedfa(s);
532 NOERR();
533 if (v->g->cflags & REG_EXPECT)
534 {
535 assert(v->details != NULL);
536 if (cold != NULL)
537 v->details->rm_extend.rm_so = OFF(cold);
538 else
539 v->details->rm_extend.rm_so = OFF(v->stop);
540 v->details->rm_extend.rm_eo = OFF(v->stop); /* unknown */
541 }
542 return ret;
543}
#define REG_EXPECT
Definition: regex.h:191
static void freedfa(struct dfa *d)
static int cfindloop(struct vars *v, struct cnfa *cnfa, struct colormap *cm, struct dfa *d, struct dfa *s, chr **coldp)
Definition: regexec.c:549
static struct dfa * newdfa(struct vars *v, struct cnfa *cnfa, struct colormap *cm, struct smalldfa *sml)
#define OFF(p)
Definition: regexec.c:131
Definition: regguts.h:407
struct colormap * cm
Definition: regexec.c:76
struct cnfa search
Definition: regguts.h:538
int cflags
Definition: regguts.h:534
pg_regoff_t rm_eo
Definition: regex.h:164
pg_regoff_t rm_so
Definition: regex.h:163
pg_regmatch_t rm_extend
Definition: regex.h:170
const chr * stop
Definition: regcomp.c:285
int err
Definition: regcomp.c:286
struct smalldfa dfa1
Definition: regexec.c:122
rm_detail_t * details
Definition: regexec.c:113
struct smalldfa dfa2
Definition: regexec.c:123

References assert, cfindloop(), guts::cflags, dfa::cm, vars::details, vars::dfa1, vars::dfa2, vars::err, freedfa(), vars::g, newdfa(), NOERR, OFF, REG_EXPECT, pg_regmatch_t::rm_eo, rm_detail_t::rm_extend, pg_regmatch_t::rm_so, guts::search, and vars::stop.

Referenced by pg_regexec().

◆ cfindloop()

static int cfindloop ( struct vars v,
struct cnfa cnfa,
struct colormap cm,
struct dfa d,
struct dfa s,
chr **  coldp 
)
static

Definition at line 549 of file regexec.c.

555{
556 chr *begin;
557 chr *end;
558 chr *cold;
559 chr *open; /* open and close of range of possible starts */
560 chr *close;
561 chr *estart;
562 chr *estop;
563 int er;
564 int shorter = v->g->tree->flags & SHORTER;
565 int hitend;
566
567 assert(d != NULL && s != NULL);
568 cold = NULL;
569 close = v->search_start;
570 do
571 {
572 /* Search with the search RE for match range at/beyond "close" */
573 MDEBUG(("\ncsearch at %ld\n", LOFF(close)));
574 close = shortest(v, s, close, close, v->stop, &cold, (int *) NULL);
575 if (ISERR())
576 {
577 *coldp = cold;
578 return v->err;
579 }
580 if (close == NULL)
581 break; /* no more possible match anywhere */
582 assert(cold != NULL);
583 open = cold;
584 cold = NULL;
585 /* Search for matches starting between "open" and "close" inclusive */
586 MDEBUG(("cbetween %ld and %ld\n", LOFF(open), LOFF(close)));
587 for (begin = open; begin <= close; begin++)
588 {
589 MDEBUG(("\ncfind trying at %ld\n", LOFF(begin)));
590 estart = begin;
591 estop = v->stop;
592 for (;;)
593 {
594 /* Here we use the top node's detailed RE */
595 if (shorter)
596 end = shortest(v, d, begin, estart,
597 estop, (chr **) NULL, &hitend);
598 else
599 end = longest(v, d, begin, estop,
600 &hitend);
601 if (ISERR())
602 {
603 *coldp = cold;
604 return v->err;
605 }
606 if (hitend && cold == NULL)
607 cold = begin;
608 if (end == NULL)
609 break; /* no match with this begin point, try next */
610 MDEBUG(("tentative end %ld\n", LOFF(end)));
611 /* Dissect the potential match to see if it really matches */
612 er = cdissect(v, v->g->tree, begin, end);
613 if (er == REG_OKAY)
614 {
615 if (v->nmatch > 0)
616 {
617 v->pmatch[0].rm_so = OFF(begin);
618 v->pmatch[0].rm_eo = OFF(end);
619 }
620 *coldp = cold;
621 return REG_OKAY;
622 }
623 if (er != REG_NOMATCH)
624 {
625 ERR(er);
626 *coldp = cold;
627 return er;
628 }
629 /* Try next longer/shorter match with same begin point */
630 if (shorter)
631 {
632 if (end == estop)
633 break; /* no more, so try next begin point */
634 estart = end + 1;
635 }
636 else
637 {
638 if (end == begin)
639 break; /* no more, so try next begin point */
640 estop = end - 1;
641 }
642 } /* end loop over endpoint positions */
643 } /* end loop over beginning positions */
644
645 /*
646 * If we get here, there is no possible match starting at or before
647 * "close", so consider matches beyond that. We'll do a fresh search
648 * with the search RE to find a new promising match range.
649 */
650 close++;
651 } while (close < v->stop);
652
653 *coldp = cold;
654 return REG_NOMATCH;
655}
#define close(a)
Definition: win32.h:12
static chr * shortest(struct vars *v, struct dfa *d, chr *start, chr *min, chr *max, chr **coldp, int *hitstopp)
#define ISERR()
Definition: regexec.c:127
#define ERR(e)
Definition: regexec.c:129
struct subre * tree
Definition: regguts.h:537
size_t nmatch
Definition: regexec.c:111
chr * search_start
Definition: regexec.c:115

References assert, cdissect(), close, vars::err, ERR, subre::flags, vars::g, ISERR, LOFF, longest(), MDEBUG, vars::nmatch, OFF, vars::pmatch, REG_NOMATCH, REG_OKAY, vars::search_start, SHORTER, shortest(), vars::stop, and guts::tree.

Referenced by cfind().

◆ citerdissect()

static int citerdissect ( struct vars v,
struct subre t,
chr begin,
chr end 
)
static

Definition at line 1117 of file regexec.c.

1121{
1122 struct dfa *d;
1123 chr **endpts;
1124 chr *limit;
1125 int min_matches;
1126 size_t max_matches;
1127 int nverified;
1128 int k;
1129 int i;
1130 int er;
1131
1132 assert(t->op == '*');
1133 assert(t->child != NULL && t->child->cnfa.nstates > 0);
1134 assert(!(t->child->flags & SHORTER));
1135 assert(begin <= end);
1136
1137 MDEBUG(("%d: citerdissect %ld-%ld\n", t->id, LOFF(begin), LOFF(end)));
1138
1139 /*
1140 * For the moment, assume the minimum number of matches is 1. If zero
1141 * matches are allowed, and the target string is empty, we are allowed to
1142 * match regardless of the contents of the iter node --- but we would
1143 * prefer to match once, so that capturing parens get set. (An example of
1144 * the concern here is a pattern like "()*\1", which historically this
1145 * code has allowed to succeed.) Therefore, we deal with the zero-matches
1146 * case at the bottom, after failing to find any other way to match.
1147 */
1148 min_matches = t->min;
1149 if (min_matches <= 0)
1150 min_matches = 1;
1151
1152 /*
1153 * We need workspace to track the endpoints of each sub-match. Normally
1154 * we consider only nonzero-length sub-matches, so there can be at most
1155 * end-begin of them. However, if min is larger than that, we will also
1156 * consider zero-length sub-matches in order to find enough matches.
1157 *
1158 * For convenience, endpts[0] contains the "begin" pointer and we store
1159 * sub-match endpoints in endpts[1..max_matches].
1160 */
1161 max_matches = end - begin;
1162 if (max_matches > t->max && t->max != DUPINF)
1163 max_matches = t->max;
1164 if (max_matches < min_matches)
1165 max_matches = min_matches;
1166 endpts = (chr **) MALLOC((max_matches + 1) * sizeof(chr *));
1167 if (endpts == NULL)
1168 return REG_ESPACE;
1169 endpts[0] = begin;
1170
1171 d = getsubdfa(v, t->child);
1172 if (ISERR())
1173 {
1174 FREE(endpts);
1175 return v->err;
1176 }
1177
1178 /*
1179 * Our strategy is to first find a set of sub-match endpoints that are
1180 * valid according to the child node's DFA, and then recursively dissect
1181 * each sub-match to confirm validity. If any validity check fails,
1182 * backtrack that sub-match and try again. And, when we next try for a
1183 * validity check, we need not recheck any successfully verified
1184 * sub-matches that we didn't move the endpoints of. nverified remembers
1185 * how many sub-matches are currently known okay.
1186 */
1187
1188 /* initialize to consider first sub-match */
1189 nverified = 0;
1190 k = 1;
1191 limit = end;
1192
1193 /* iterate until satisfaction or failure */
1194 while (k > 0)
1195 {
1196 /* try to find an endpoint for the k'th sub-match */
1197 endpts[k] = longest(v, d, endpts[k - 1], limit, (int *) NULL);
1198 if (ISERR())
1199 {
1200 FREE(endpts);
1201 return v->err;
1202 }
1203 if (endpts[k] == NULL)
1204 {
1205 /* no match possible, so see if we can shorten previous one */
1206 k--;
1207 goto backtrack;
1208 }
1209 MDEBUG(("%d: working endpoint %d: %ld\n",
1210 t->id, k, LOFF(endpts[k])));
1211
1212 /* k'th sub-match can no longer be considered verified */
1213 if (nverified >= k)
1214 nverified = k - 1;
1215
1216 if (endpts[k] != end)
1217 {
1218 /* haven't reached end yet, try another iteration if allowed */
1219 if (k >= max_matches)
1220 {
1221 /* must try to shorten some previous match */
1222 k--;
1223 goto backtrack;
1224 }
1225
1226 /* reject zero-length match unless necessary to achieve min */
1227 if (endpts[k] == endpts[k - 1] &&
1228 (k >= min_matches || min_matches - k < end - endpts[k]))
1229 goto backtrack;
1230
1231 k++;
1232 limit = end;
1233 continue;
1234 }
1235
1236 /*
1237 * We've identified a way to divide the string into k sub-matches that
1238 * works so far as the child DFA can tell. If k is an allowed number
1239 * of matches, start the slow part: recurse to verify each sub-match.
1240 * We always have k <= max_matches, needn't check that.
1241 */
1242 if (k < min_matches)
1243 goto backtrack;
1244
1245 MDEBUG(("%d: verifying %d..%d\n", t->id, nverified + 1, k));
1246
1247 for (i = nverified + 1; i <= k; i++)
1248 {
1249 /* zap any match data from a non-last iteration */
1250 zaptreesubs(v, t->child);
1251 er = cdissect(v, t->child, endpts[i - 1], endpts[i]);
1252 if (er == REG_OKAY)
1253 {
1254 nverified = i;
1255 continue;
1256 }
1257 if (er == REG_NOMATCH)
1258 break;
1259 /* oops, something failed */
1260 FREE(endpts);
1261 return er;
1262 }
1263
1264 if (i > k)
1265 {
1266 /* satisfaction */
1267 MDEBUG(("%d: successful\n", t->id));
1268 FREE(endpts);
1269 return REG_OKAY;
1270 }
1271
1272 /* i'th match failed to verify, so backtrack it */
1273 k = i;
1274
1275backtrack:
1276
1277 /*
1278 * Must consider shorter versions of the k'th sub-match. However,
1279 * we'll only ask for a zero-length match if necessary.
1280 */
1281 while (k > 0)
1282 {
1283 chr *prev_end = endpts[k - 1];
1284
1285 if (endpts[k] > prev_end)
1286 {
1287 limit = endpts[k] - 1;
1288 if (limit > prev_end ||
1289 (k < min_matches && min_matches - k >= end - prev_end))
1290 {
1291 /* break out of backtrack loop, continue the outer one */
1292 break;
1293 }
1294 }
1295 /* can't shorten k'th sub-match any more, consider previous one */
1296 k--;
1297 }
1298 }
1299
1300 /* all possibilities exhausted */
1301 FREE(endpts);
1302
1303 /*
1304 * Now consider the possibility that we can match to a zero-length string
1305 * by using zero repetitions.
1306 */
1307 if (t->min == 0 && begin == end)
1308 {
1309 MDEBUG(("%d: allowing zero matches\n", t->id));
1310 return REG_OKAY;
1311 }
1312
1313 MDEBUG(("%d: failed\n", t->id));
1314 return REG_NOMATCH;
1315}
int i
Definition: isn.c:74
#define MALLOC(n)
Definition: regcustom.h:52
#define REG_ESPACE
Definition: regex.h:227
@ FREE
Definition: task.c:94

References assert, cdissect(), subre::child, subre::cnfa, DUPINF, vars::err, subre::flags, FREE, getsubdfa(), i, subre::id, ISERR, LOFF, longest(), MALLOC, subre::max, MDEBUG, subre::min, cnfa::nstates, subre::op, REG_ESPACE, REG_NOMATCH, REG_OKAY, SHORTER, and zaptreesubs().

Referenced by cdissect().

◆ crevcondissect()

static int crevcondissect ( struct vars v,
struct subre t,
chr begin,
chr end 
)
static

Definition at line 910 of file regexec.c.

914{
915 struct subre *left = t->child;
916 struct subre *right = left->sibling;
917 struct dfa *d;
918 struct dfa *d2;
919 chr *mid;
920 int er;
921
922 assert(t->op == '.');
923 assert(left != NULL && left->cnfa.nstates > 0);
924 assert(right != NULL && right->cnfa.nstates > 0);
925 assert(right->sibling == NULL);
926 assert(left->flags & SHORTER);
927
928 d = getsubdfa(v, left);
929 NOERR();
930 d2 = getsubdfa(v, right);
931 NOERR();
932 MDEBUG(("%d: crevcondissect %ld-%ld\n", t->id, LOFF(begin), LOFF(end)));
933
934 /* pick a tentative midpoint */
935 mid = shortest(v, d, begin, begin, end, (chr **) NULL, (int *) NULL);
936 NOERR();
937 if (mid == NULL)
938 return REG_NOMATCH;
939 MDEBUG(("%d: tentative midpoint %ld\n", t->id, LOFF(mid)));
940
941 /* iterate until satisfaction or failure */
942 for (;;)
943 {
944 /* try this midpoint on for size */
945 if (longest(v, d2, mid, end, (int *) NULL) == end)
946 {
947 er = cdissect(v, left, begin, mid);
948 if (er == REG_OKAY)
949 {
950 er = cdissect(v, right, mid, end);
951 if (er == REG_OKAY)
952 {
953 /* satisfaction */
954 MDEBUG(("%d: successful\n", t->id));
955 return REG_OKAY;
956 }
957 /* Reset left's matches (right should have done so itself) */
958 zaptreesubs(v, left);
959 }
960 if (er != REG_NOMATCH)
961 return er;
962 }
963 NOERR();
964
965 /* that midpoint didn't work, find a new one */
966 if (mid == end)
967 {
968 /* all possibilities exhausted */
969 MDEBUG(("%d: no midpoint\n", t->id));
970 return REG_NOMATCH;
971 }
972 mid = shortest(v, d, begin, mid + 1, end, (chr **) NULL, (int *) NULL);
973 NOERR();
974 if (mid == NULL)
975 {
976 /* failed to find a new one */
977 MDEBUG(("%d: failed midpoint\n", t->id));
978 return REG_NOMATCH;
979 }
980 MDEBUG(("%d: new midpoint %ld\n", t->id, LOFF(mid)));
981 }
982
983 /* can't get here */
984 return REG_ASSERT;
985}

References assert, cdissect(), subre::child, subre::cnfa, subre::flags, getsubdfa(), subre::id, LOFF, longest(), MDEBUG, NOERR, cnfa::nstates, subre::op, REG_ASSERT, REG_NOMATCH, REG_OKAY, SHORTER, shortest(), subre::sibling, and zaptreesubs().

Referenced by cdissect().

◆ creviterdissect()

static int creviterdissect ( struct vars v,
struct subre t,
chr begin,
chr end 
)
static

Definition at line 1321 of file regexec.c.

1325{
1326 struct dfa *d;
1327 chr **endpts;
1328 chr *limit;
1329 int min_matches;
1330 size_t max_matches;
1331 int nverified;
1332 int k;
1333 int i;
1334 int er;
1335
1336 assert(t->op == '*');
1337 assert(t->child != NULL && t->child->cnfa.nstates > 0);
1338 assert(t->child->flags & SHORTER);
1339 assert(begin <= end);
1340
1341 MDEBUG(("%d: creviterdissect %ld-%ld\n", t->id, LOFF(begin), LOFF(end)));
1342
1343 /*
1344 * If zero matches are allowed, and target string is empty, just declare
1345 * victory. OTOH, if target string isn't empty, zero matches can't work
1346 * so we pretend the min is 1.
1347 */
1348 min_matches = t->min;
1349 if (min_matches <= 0)
1350 {
1351 if (begin == end)
1352 {
1353 MDEBUG(("%d: allowing zero matches\n", t->id));
1354 return REG_OKAY;
1355 }
1356 min_matches = 1;
1357 }
1358
1359 /*
1360 * We need workspace to track the endpoints of each sub-match. Normally
1361 * we consider only nonzero-length sub-matches, so there can be at most
1362 * end-begin of them. However, if min is larger than that, we will also
1363 * consider zero-length sub-matches in order to find enough matches.
1364 *
1365 * For convenience, endpts[0] contains the "begin" pointer and we store
1366 * sub-match endpoints in endpts[1..max_matches].
1367 */
1368 max_matches = end - begin;
1369 if (max_matches > t->max && t->max != DUPINF)
1370 max_matches = t->max;
1371 if (max_matches < min_matches)
1372 max_matches = min_matches;
1373 endpts = (chr **) MALLOC((max_matches + 1) * sizeof(chr *));
1374 if (endpts == NULL)
1375 return REG_ESPACE;
1376 endpts[0] = begin;
1377
1378 d = getsubdfa(v, t->child);
1379 if (ISERR())
1380 {
1381 FREE(endpts);
1382 return v->err;
1383 }
1384
1385 /*
1386 * Our strategy is to first find a set of sub-match endpoints that are
1387 * valid according to the child node's DFA, and then recursively dissect
1388 * each sub-match to confirm validity. If any validity check fails,
1389 * backtrack that sub-match and try again. And, when we next try for a
1390 * validity check, we need not recheck any successfully verified
1391 * sub-matches that we didn't move the endpoints of. nverified remembers
1392 * how many sub-matches are currently known okay.
1393 */
1394
1395 /* initialize to consider first sub-match */
1396 nverified = 0;
1397 k = 1;
1398 limit = begin;
1399
1400 /* iterate until satisfaction or failure */
1401 while (k > 0)
1402 {
1403 /* disallow zero-length match unless necessary to achieve min */
1404 if (limit == endpts[k - 1] &&
1405 limit != end &&
1406 (k >= min_matches || min_matches - k < end - limit))
1407 limit++;
1408
1409 /* if this is the last allowed sub-match, it must reach to the end */
1410 if (k >= max_matches)
1411 limit = end;
1412
1413 /* try to find an endpoint for the k'th sub-match */
1414 endpts[k] = shortest(v, d, endpts[k - 1], limit, end,
1415 (chr **) NULL, (int *) NULL);
1416 if (ISERR())
1417 {
1418 FREE(endpts);
1419 return v->err;
1420 }
1421 if (endpts[k] == NULL)
1422 {
1423 /* no match possible, so see if we can lengthen previous one */
1424 k--;
1425 goto backtrack;
1426 }
1427 MDEBUG(("%d: working endpoint %d: %ld\n",
1428 t->id, k, LOFF(endpts[k])));
1429
1430 /* k'th sub-match can no longer be considered verified */
1431 if (nverified >= k)
1432 nverified = k - 1;
1433
1434 if (endpts[k] != end)
1435 {
1436 /* haven't reached end yet, try another iteration if allowed */
1437 if (k >= max_matches)
1438 {
1439 /* must try to lengthen some previous match */
1440 k--;
1441 goto backtrack;
1442 }
1443
1444 k++;
1445 limit = endpts[k - 1];
1446 continue;
1447 }
1448
1449 /*
1450 * We've identified a way to divide the string into k sub-matches that
1451 * works so far as the child DFA can tell. If k is an allowed number
1452 * of matches, start the slow part: recurse to verify each sub-match.
1453 * We always have k <= max_matches, needn't check that.
1454 */
1455 if (k < min_matches)
1456 goto backtrack;
1457
1458 MDEBUG(("%d: verifying %d..%d\n", t->id, nverified + 1, k));
1459
1460 for (i = nverified + 1; i <= k; i++)
1461 {
1462 /* zap any match data from a non-last iteration */
1463 zaptreesubs(v, t->child);
1464 er = cdissect(v, t->child, endpts[i - 1], endpts[i]);
1465 if (er == REG_OKAY)
1466 {
1467 nverified = i;
1468 continue;
1469 }
1470 if (er == REG_NOMATCH)
1471 break;
1472 /* oops, something failed */
1473 FREE(endpts);
1474 return er;
1475 }
1476
1477 if (i > k)
1478 {
1479 /* satisfaction */
1480 MDEBUG(("%d: successful\n", t->id));
1481 FREE(endpts);
1482 return REG_OKAY;
1483 }
1484
1485 /* i'th match failed to verify, so backtrack it */
1486 k = i;
1487
1488backtrack:
1489
1490 /*
1491 * Must consider longer versions of the k'th sub-match.
1492 */
1493 while (k > 0)
1494 {
1495 if (endpts[k] < end)
1496 {
1497 limit = endpts[k] + 1;
1498 /* break out of backtrack loop, continue the outer one */
1499 break;
1500 }
1501 /* can't lengthen k'th sub-match any more, consider previous one */
1502 k--;
1503 }
1504 }
1505
1506 /* all possibilities exhausted */
1507 MDEBUG(("%d: failed\n", t->id));
1508 FREE(endpts);
1509 return REG_NOMATCH;
1510}

References assert, cdissect(), subre::child, subre::cnfa, DUPINF, vars::err, subre::flags, FREE, getsubdfa(), i, subre::id, ISERR, LOFF, MALLOC, subre::max, MDEBUG, subre::min, cnfa::nstates, subre::op, REG_ESPACE, REG_NOMATCH, REG_OKAY, SHORTER, shortest(), and zaptreesubs().

Referenced by cdissect().

◆ dfa_backref()

static chr * dfa_backref ( struct vars v,
struct dfa d,
chr start,
chr min,
chr max,
bool  shortest 
)
static

◆ find()

static int find ( struct vars v,
struct cnfa cnfa,
struct colormap cm 
)
static

Definition at line 419 of file regexec.c.

422{
423 struct dfa *s;
424 struct dfa *d;
425 chr *begin;
426 chr *end = NULL;
427 chr *cold;
428 chr *open; /* open and close of range of possible starts */
429 chr *close;
430 int hitend;
431 int shorter = (v->g->tree->flags & SHORTER) ? 1 : 0;
432
433 /* first, a shot with the search RE */
434 s = newdfa(v, &v->g->search, cm, &v->dfa1);
435 if (s == NULL)
436 return v->err;
437 MDEBUG(("\nsearch at %ld\n", LOFF(v->start)));
438 cold = NULL;
439 close = shortest(v, s, v->search_start, v->search_start, v->stop,
440 &cold, (int *) NULL);
441 freedfa(s);
442 NOERR();
443 if (v->g->cflags & REG_EXPECT)
444 {
445 assert(v->details != NULL);
446 if (cold != NULL)
447 v->details->rm_extend.rm_so = OFF(cold);
448 else
449 v->details->rm_extend.rm_so = OFF(v->stop);
450 v->details->rm_extend.rm_eo = OFF(v->stop); /* unknown */
451 }
452 if (close == NULL) /* not found */
453 return REG_NOMATCH;
454 if (v->nmatch == 0) /* found, don't need exact location */
455 return REG_OKAY;
456
457 /* find starting point and match */
458 assert(cold != NULL);
459 open = cold;
460 cold = NULL;
461 MDEBUG(("between %ld and %ld\n", LOFF(open), LOFF(close)));
462 d = newdfa(v, cnfa, cm, &v->dfa1);
463 if (d == NULL)
464 return v->err;
465 for (begin = open; begin <= close; begin++)
466 {
467 MDEBUG(("\nfind trying at %ld\n", LOFF(begin)));
468 if (shorter)
469 end = shortest(v, d, begin, begin, v->stop,
470 (chr **) NULL, &hitend);
471 else
472 end = longest(v, d, begin, v->stop, &hitend);
473 if (ISERR())
474 {
475 freedfa(d);
476 return v->err;
477 }
478 if (hitend && cold == NULL)
479 cold = begin;
480 if (end != NULL)
481 break; /* NOTE BREAK OUT */
482 }
483 assert(end != NULL); /* search RE succeeded so loop should */
484 freedfa(d);
485
486 /* and pin down details */
487 assert(v->nmatch > 0);
488 v->pmatch[0].rm_so = OFF(begin);
489 v->pmatch[0].rm_eo = OFF(end);
490 if (v->g->cflags & REG_EXPECT)
491 {
492 if (cold != NULL)
493 v->details->rm_extend.rm_so = OFF(cold);
494 else
495 v->details->rm_extend.rm_so = OFF(v->stop);
496 v->details->rm_extend.rm_eo = OFF(v->stop); /* unknown */
497 }
498 if (v->nmatch == 1) /* no need for submatches */
499 return REG_OKAY;
500
501 /* find submatches */
502 return cdissect(v, v->g->tree, begin, end);
503}

References assert, cdissect(), guts::cflags, close, dfa::cm, vars::details, vars::dfa1, vars::err, subre::flags, freedfa(), vars::g, ISERR, LOFF, longest(), MDEBUG, newdfa(), vars::nmatch, NOERR, OFF, vars::pmatch, REG_EXPECT, REG_NOMATCH, REG_OKAY, pg_regmatch_t::rm_eo, rm_detail_t::rm_extend, pg_regmatch_t::rm_so, guts::search, vars::search_start, SHORTER, shortest(), vars::start, vars::stop, and guts::tree.

Referenced by NIAddAffix(), NIImportAffixes(), NIImportOOAffixes(), parse_affentry(), parse_ooaffentry(), pg_regexec(), and testdelete().

◆ freedfa()

static void freedfa ( struct dfa d)
static

Referenced by cfind(), find(), and pg_regexec().

◆ getladfa()

static struct dfa * getladfa ( struct vars v,
int  n 
)
static

Definition at line 400 of file regexec.c.

402{
403 assert(n > 0 && n < v->g->nlacons && v->g->lacons != NULL);
404
405 if (v->ladfas[n] == NULL)
406 {
407 struct subre *sub = &v->g->lacons[n];
408
409 v->ladfas[n] = newdfa(v, &sub->cnfa, &v->g->cmap, DOMALLOC);
410 /* a LACON can't contain a backref, so nothing else to do */
411 }
412 return v->ladfas[n];
413}
#define DOMALLOC
Definition: regexec.c:101
struct subre * lacons
Definition: regguts.h:542
struct colormap cmap
Definition: regguts.h:540
struct dfa ** ladfas
Definition: regexec.c:119

References assert, guts::cmap, subre::cnfa, DOMALLOC, vars::g, guts::lacons, vars::ladfas, and newdfa().

Referenced by lacon().

◆ getsubdfa()

static struct dfa * getsubdfa ( struct vars v,
struct subre t 
)
static

Definition at line 372 of file regexec.c.

374{
375 struct dfa *d = v->subdfas[t->id];
376
377 if (d == NULL)
378 {
379 d = newdfa(v, &t->cnfa, &v->g->cmap, DOMALLOC);
380 if (d == NULL)
381 return NULL;
382 /* set up additional info if this is a backref node */
383 if (t->op == 'b')
384 {
385 d->backno = t->backno;
386 d->backmin = t->min;
387 d->backmax = t->max;
388 }
389 v->subdfas[t->id] = d;
390 }
391 return d;
392}
short backmin
Definition: regexec.c:81
int backno
Definition: regexec.c:80
short backmax
Definition: regexec.c:82
struct dfa ** subdfas
Definition: regexec.c:118

References dfa::backmax, dfa::backmin, dfa::backno, subre::backno, guts::cmap, subre::cnfa, DOMALLOC, vars::g, subre::id, subre::max, subre::min, newdfa(), subre::op, and vars::subdfas.

Referenced by caltdissect(), ccondissect(), citerdissect(), crevcondissect(), and creviterdissect().

◆ getvacant()

static struct sset * getvacant ( struct vars v,
struct dfa d,
chr cp,
chr start 
)
static

◆ hash()

static unsigned hash ( unsigned *  uv,
int  n 
)
static

◆ initialize()

static struct sset * initialize ( struct vars v,
struct dfa d,
chr start 
)
static

◆ lacon()

static int lacon ( struct vars v,
struct cnfa pcnfa,
chr cp,
color  co 
)
static

◆ lastcold()

static chr * lastcold ( struct vars v,
struct dfa d 
)
static

◆ longest()

static chr * longest ( struct vars v,
struct dfa d,
chr start,
chr stop,
int *  hitstopp 
)
static

◆ matchuntil()

static int matchuntil ( struct vars v,
struct dfa d,
chr probe,
struct sset **  lastcss,
chr **  lastcp 
)
static

◆ miss()

static struct sset * miss ( struct vars v,
struct dfa d,
struct sset css,
color  co,
chr cp,
chr start 
)
static

◆ newdfa()

static struct dfa * newdfa ( struct vars v,
struct cnfa cnfa,
struct colormap cm,
struct smalldfa sml 
)
static

Referenced by cfind(), find(), getladfa(), and getsubdfa().

◆ pg_regexec()

int pg_regexec ( regex_t re,
const chr string,
size_t  len,
size_t  search_start,
rm_detail_t details,
size_t  nmatch,
regmatch_t  pmatch[],
int  flags 
)

Definition at line 185 of file regexec.c.

193{
194 struct vars var;
195 struct vars *v = &var;
196 int st;
197 size_t n;
198 size_t i;
199 int backref;
200
201#define LOCALMAT 20
202 regmatch_t mat[LOCALMAT];
203
204#define LOCALDFAS 40
205 struct dfa *subdfas[LOCALDFAS];
206
207 /* sanity checks */
208 if (re == NULL || string == NULL || re->re_magic != REMAGIC)
209 return REG_INVARG;
210 if (re->re_csize != sizeof(chr))
211 return REG_MIXED;
212 if (search_start > len)
213 return REG_NOMATCH;
214
215 /* Initialize locale-dependent support */
216 pg_set_regex_collation(re->re_collation);
217
218 /* setup */
219 v->re = re;
220 v->g = (struct guts *) re->re_guts;
221 if ((v->g->cflags & REG_EXPECT) && details == NULL)
222 return REG_INVARG;
223 if (v->g->info & REG_UIMPOSSIBLE)
224 return REG_NOMATCH;
225 backref = (v->g->info & REG_UBACKREF) ? 1 : 0;
226 v->eflags = flags;
227 if (backref && nmatch <= v->g->nsub)
228 {
229 /* need larger work area */
230 v->nmatch = v->g->nsub + 1;
231 if (v->nmatch <= LOCALMAT)
232 v->pmatch = mat;
233 else
234 v->pmatch = (regmatch_t *) MALLOC(v->nmatch * sizeof(regmatch_t));
235 if (v->pmatch == NULL)
236 return REG_ESPACE;
237 zapallsubs(v->pmatch, v->nmatch);
238 }
239 else
240 {
241 /* we can store results directly in caller's array */
242 v->pmatch = pmatch;
243 /* ensure any extra entries in caller's array are filled with -1 */
244 if (nmatch > 0)
245 zapallsubs(pmatch, nmatch);
246 /* then forget about extra entries, to avoid useless work in find() */
247 if (nmatch > v->g->nsub + 1)
248 nmatch = v->g->nsub + 1;
249 v->nmatch = nmatch;
250 }
251 v->details = details;
252 v->start = (chr *) string;
253 v->search_start = (chr *) string + search_start;
254 v->stop = (chr *) string + len;
255 v->err = 0;
256 v->subdfas = NULL;
257 v->ladfas = NULL;
258 v->lblastcss = NULL;
259 v->lblastcp = NULL;
260 /* below this point, "goto cleanup" will behave sanely */
261
262 assert(v->g->ntree >= 0);
263 n = (size_t) v->g->ntree;
264 if (n <= LOCALDFAS)
265 v->subdfas = subdfas;
266 else
267 {
268 v->subdfas = (struct dfa **) MALLOC(n * sizeof(struct dfa *));
269 if (v->subdfas == NULL)
270 {
271 st = REG_ESPACE;
272 goto cleanup;
273 }
274 }
275 for (i = 0; i < n; i++)
276 v->subdfas[i] = NULL;
277
278 assert(v->g->nlacons >= 0);
279 n = (size_t) v->g->nlacons;
280 if (n > 0)
281 {
282 v->ladfas = (struct dfa **) MALLOC(n * sizeof(struct dfa *));
283 if (v->ladfas == NULL)
284 {
285 st = REG_ESPACE;
286 goto cleanup;
287 }
288 for (i = 0; i < n; i++)
289 v->ladfas[i] = NULL;
290 v->lblastcss = (struct sset **) MALLOC(n * sizeof(struct sset *));
291 v->lblastcp = (chr **) MALLOC(n * sizeof(chr *));
292 if (v->lblastcss == NULL || v->lblastcp == NULL)
293 {
294 st = REG_ESPACE;
295 goto cleanup;
296 }
297 for (i = 0; i < n; i++)
298 {
299 v->lblastcss[i] = NULL;
300 v->lblastcp[i] = NULL;
301 }
302 }
303
304 /* do it */
305 assert(v->g->tree != NULL);
306 if (backref)
307 st = cfind(v, &v->g->tree->cnfa, &v->g->cmap);
308 else
309 st = find(v, &v->g->tree->cnfa, &v->g->cmap);
310
311 /* on success, ensure caller's match vector is filled correctly */
312 if (st == REG_OKAY && nmatch > 0)
313 {
314 if (v->pmatch != pmatch)
315 {
316 /* copy portion of match vector over from (larger) work area */
317 assert(nmatch <= v->nmatch);
318 memcpy(VS(pmatch), VS(v->pmatch), nmatch * sizeof(regmatch_t));
319 }
320 if (v->g->cflags & REG_NOSUB)
321 {
322 /* don't expose possibly-partial sub-match results to caller */
323 zapallsubs(pmatch, nmatch);
324 }
325 }
326
327 /* clean up */
328cleanup:
329 if (v->pmatch != pmatch && v->pmatch != mat)
330 FREE(v->pmatch);
331 if (v->subdfas != NULL)
332 {
333 n = (size_t) v->g->ntree;
334 for (i = 0; i < n; i++)
335 {
336 if (v->subdfas[i] != NULL)
337 freedfa(v->subdfas[i]);
338 }
339 if (v->subdfas != subdfas)
340 FREE(v->subdfas);
341 }
342 if (v->ladfas != NULL)
343 {
344 n = (size_t) v->g->nlacons;
345 for (i = 0; i < n; i++)
346 {
347 if (v->ladfas[i] != NULL)
348 freedfa(v->ladfas[i]);
349 }
350 FREE(v->ladfas);
351 }
352 if (v->lblastcss != NULL)
353 FREE(v->lblastcss);
354 if (v->lblastcp != NULL)
355 FREE(v->lblastcp);
356
357#ifdef REG_DEBUG
358 if (v->eflags & (REG_FTRACE | REG_MTRACE))
359 fflush(stdout);
360#endif
361
362 return st;
363}
static void cleanup(void)
Definition: bootstrap.c:713
for(;;)
if(TABLE==NULL||TABLE_index==NULL)
Definition: isn.c:78
const void size_t len
void pg_set_regex_collation(Oid collation)
#define REG_UIMPOSSIBLE
Definition: regex.h:150
#define REG_MTRACE
Definition: regex.h:206
#define REG_FTRACE
Definition: regex.h:205
#define REG_INVARG
Definition: regex.h:230
#define regmatch_t
Definition: regex.h:246
#define REG_MIXED
Definition: regex.h:231
#define REG_NOSUB
Definition: regex.h:185
#define REG_UBACKREF
Definition: regex.h:138
static int find(struct vars *v, struct cnfa *cnfa, struct colormap *cm)
Definition: regexec.c:419
static void zapallsubs(regmatch_t *p, size_t n)
Definition: regexec.c:663
static int cfind(struct vars *v, struct cnfa *cnfa, struct colormap *cm)
Definition: regexec.c:509
#define LOCALMAT
#define LOCALDFAS
#define REMAGIC
Definition: regguts.h:101
Definition: regguts.h:531
long info
Definition: regguts.h:535
int ntree
Definition: regguts.h:539
size_t nsub
Definition: regguts.h:536
int nlacons
Definition: regguts.h:543
Definition: regexec.c:46
Definition: regcomp.c:282
struct sset ** lblastcss
Definition: regexec.c:120
chr ** lblastcp
Definition: regexec.c:121
int eflags
Definition: regexec.c:110

References assert, cfind(), guts::cflags, cleanup(), guts::cmap, subre::cnfa, vars::details, vars::eflags, vars::err, find(), for(), FREE, freedfa(), vars::g, i, if(), guts::info, vars::ladfas, vars::lblastcp, vars::lblastcss, len, LOCALDFAS, LOCALMAT, MALLOC, guts::nlacons, vars::nmatch, guts::nsub, guts::ntree, pg_set_regex_collation(), vars::pmatch, vars::re, REG_ESPACE, REG_EXPECT, REG_FTRACE, REG_INVARG, REG_MIXED, REG_MTRACE, REG_NOMATCH, REG_NOSUB, REG_OKAY, REG_UBACKREF, REG_UIMPOSSIBLE, regmatch_t, REMAGIC, vars::search_start, vars::start, generate_unaccent_rules::stdout, vars::stop, vars::subdfas, guts::tree, VS, and zapallsubs().

Referenced by CheckAffix(), RE_wchar_execute(), regexec_auth_token(), replace_text_regexp(), and test_re_execute().

◆ pickss()

static struct sset * pickss ( struct vars v,
struct dfa d,
chr cp,
chr start 
)
static

◆ shortest()

static chr * shortest ( struct vars v,
struct dfa d,
chr start,
chr min,
chr max,
chr **  coldp,
int *  hitstopp 
)
static

◆ subset()

static void subset ( struct vars v,
struct subre sub,
chr begin,
chr end 
)
static

Definition at line 702 of file regexec.c.

706{
707 int n = sub->capno;
708
709 assert(n > 0);
710 if ((size_t) n >= v->nmatch)
711 return;
712
713 MDEBUG(("%d: setting %d = %ld-%ld\n", sub->id, n, LOFF(begin), LOFF(end)));
714 v->pmatch[n].rm_so = OFF(begin);
715 v->pmatch[n].rm_eo = OFF(end);
716}

References assert, subre::begin, subre::capno, subre::end, subre::id, LOFF, MDEBUG, vars::nmatch, OFF, and vars::pmatch.

Referenced by cdissect().

◆ zapallsubs()

static void zapallsubs ( regmatch_t p,
size_t  n 
)
static

Definition at line 663 of file regexec.c.

665{
666 size_t i;
667
668 for (i = n - 1; i > 0; i--)
669 {
670 p[i].rm_so = -1;
671 p[i].rm_eo = -1;
672 }
673}

References i.

Referenced by pg_regexec().

◆ zaptreesubs()

static void zaptreesubs ( struct vars v,
struct subre t 
)
static

Definition at line 679 of file regexec.c.

681{
682 int n = t->capno;
683 struct subre *t2;
684
685 if (n > 0)
686 {
687 if ((size_t) n < v->nmatch)
688 {
689 v->pmatch[n].rm_so = -1;
690 v->pmatch[n].rm_eo = -1;
691 }
692 }
693
694 for (t2 = t->child; t2 != NULL; t2 = t2->sibling)
695 zaptreesubs(v, t2);
696}

References subre::capno, subre::child, vars::pmatch, subre::sibling, and zaptreesubs().

Referenced by ccondissect(), citerdissect(), crevcondissect(), creviterdissect(), and zaptreesubs().