Loading...
--- Libc/Libc-763.12/regex/engine.c
+++ Libc/Libc-262/regex/engine.c
@@ -1,5 +1,25 @@
-/*-
- * Copyright (c) 1992, 1993, 1994 Henry Spencer.
+/*
+ * Copyright (c) 1999 Apple Computer, Inc. All rights reserved.
+ *
+ * @APPLE_LICENSE_HEADER_START@
+ *
+ * The contents of this file constitute Original Code as defined in and
+ * are subject to the Apple Public Source License Version 1.1 (the
+ * "License"). You may not use this file except in compliance with the
+ * License. Please obtain a copy of the License at
+ * http://www.apple.com/publicsource and read it before using this file.
+ *
+ * This Original Code and all software distributed under the License are
+ * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
+ * License for the specific language governing rights and limitations
+ * under the License.
+ *
+ * @APPLE_LICENSE_HEADER_END@
+ */
+/*
* Copyright (c) 1992, 1993, 1994
* The Regents of the University of California. All rights reserved.
*
@@ -14,6 +34,10 @@
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
@@ -29,12 +53,7 @@
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
- *
- * @(#)engine.c 8.5 (Berkeley) 3/20/94
*/
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/lib/libc/regex/engine.c,v 1.23 2009/09/16 06:32:23 dds Exp $");
/*
* The matching engine and friends. This file is #included by regexec.c
@@ -65,34 +84,22 @@
#define at lat
#define match lmat
#endif
-#ifdef MNAMES
-#define matcher mmatcher
-#define fast mfast
-#define slow mslow
-#define dissect mdissect
-#define backref mbackref
-#define step mstep
-#define print mprint
-#define at mat
-#define match mmat
-#endif
/* another structure passed up and down to avoid zillions of parameters */
struct match {
struct re_guts *g;
int eflags;
regmatch_t *pmatch; /* [nsub+1] (0 element unused) */
- const char *offp; /* offsets work from here */
- const char *beginp; /* start of string -- virtual NUL precedes */
- const char *endp; /* end of string -- virtual NUL here */
- const char *coldp; /* can be no match starting before here */
- const char **lastpos; /* [nplus+1] */
+ char *offp; /* offsets work from here */
+ char *beginp; /* start of string -- virtual NUL precedes */
+ char *endp; /* end of string -- virtual NUL here */
+ char *coldp; /* can be no match starting before here */
+ char **lastpos; /* [nplus+1] */
STATEVARS;
states st; /* current states */
states fresh; /* states for a fresh start */
states tmp; /* temporary */
states empty; /* empty set of states */
- mbstate_t mbs; /* multibyte conversion state */
};
/* ========= begin header generated by ./mkh ========= */
@@ -101,29 +108,29 @@
#endif
/* === engine.c === */
-static int matcher(struct re_guts *g, const char *string, size_t nmatch, regmatch_t pmatch[], int eflags);
-static const char *dissect(struct match *m, const char *start, const char *stop, sopno startst, sopno stopst);
-static const char *backref(struct match *m, const char *start, const char *stop, sopno startst, sopno stopst, sopno lev, int);
-static const char *fast(struct match *m, const char *start, const char *stop, sopno startst, sopno stopst);
-static const char *slow(struct match *m, const char *start, const char *stop, sopno startst, sopno stopst);
-static states step(struct re_guts *g, sopno start, sopno stop, states bef, wint_t ch, states aft);
-#define MAX_RECURSION 100
-#define BOL (OUT-1)
-#define EOL (BOL-1)
-#define BOLEOL (BOL-2)
-#define NOTHING (BOL-3)
-#define BOW (BOL-4)
-#define EOW (BOL-5)
-#define BADCHAR (BOL-6)
-#define NONCHAR(c) ((c) <= OUT)
+static int matcher __P((struct re_guts *g, char *string, size_t nmatch, regmatch_t pmatch[], int eflags));
+static char *dissect __P((struct match *m, char *start, char *stop, sopno startst, sopno stopst));
+static char *backref __P((struct match *m, char *start, char *stop, sopno startst, sopno stopst, sopno lev));
+static char *fast __P((struct match *m, char *start, char *stop, sopno startst, sopno stopst));
+static char *slow __P((struct match *m, char *start, char *stop, sopno startst, sopno stopst));
+static states step __P((struct re_guts *g, sopno start, sopno stop, states bef, int ch, states aft));
+#define BOL (OUT+1)
+#define EOL (BOL+1)
+#define BOLEOL (BOL+2)
+#define NOTHING (BOL+3)
+#define BOW (BOL+4)
+#define EOW (BOL+5)
+#define CODEMAX (BOL+5) /* highest code used */
+#define NONCHAR(c) ((c) > CHAR_MAX)
+#define NNONCHAR (CODEMAX-CHAR_MAX)
#ifdef REDEBUG
-static void print(struct match *m, const char *caption, states st, int ch, FILE *d);
+static void print __P((struct match *m, char *caption, states st, int ch, FILE *d));
#endif
#ifdef REDEBUG
-static void at(struct match *m, const char *title, const char *start, const char *stop, sopno startst, sopno stopst);
+static void at __P((struct match *m, char *title, char *start, char *stop, sopno startst, sopno stopst));
#endif
#ifdef REDEBUG
-static const char *pchar(int ch);
+static char *pchar __P((int ch));
#endif
#ifdef __cplusplus
@@ -143,32 +150,26 @@
/*
- matcher - the actual matching engine
- == static int matcher(struct re_guts *g, const char *string, \
+ == static int matcher(register struct re_guts *g, char *string, \
== size_t nmatch, regmatch_t pmatch[], int eflags);
*/
static int /* 0 success, REG_NOMATCH failure */
-matcher(struct re_guts *g,
- const char *string,
- size_t nmatch,
- regmatch_t pmatch[],
- int eflags)
+matcher(g, string, nmatch, pmatch, eflags)
+register struct re_guts *g;
+char *string;
+size_t nmatch;
+regmatch_t pmatch[];
+int eflags;
{
- const char *endp;
- int i;
+ register char *endp;
+ register int i;
struct match mv;
- struct match *m = &mv;
- const char *dp;
- const sopno gf = g->firststate+1; /* +1 for OEND */
- const sopno gl = g->laststate;
- const char *start;
- const char *stop;
- /* Boyer-Moore algorithms variables */
- const char *pp;
- int cj, mj;
- const char *mustfirst;
- const char *mustlast;
- int *matchjump;
- int *charjump;
+ register struct match *m = &mv;
+ register char *dp;
+ const register sopno gf = g->firststate+1; /* +1 for OEND */
+ const register sopno gl = g->laststate;
+ char *start;
+ char *stop;
/* simplify the situation where possible */
if (g->cflags®_NOSUB)
@@ -185,46 +186,12 @@
/* prescreening; this does wonders for this rather slow code */
if (g->must != NULL) {
- if (g->charjump != NULL && g->matchjump != NULL) {
- mustfirst = g->must;
- mustlast = g->must + g->mlen - 1;
- charjump = g->charjump;
- matchjump = g->matchjump;
- pp = mustlast;
- for (dp = start+g->mlen-1; dp < stop;) {
- /* Fast skip non-matches */
- while (dp < stop && charjump[(int)*dp])
- dp += charjump[(int)*dp];
-
- if (dp >= stop)
- break;
-
- /* Greedy matcher */
- /* We depend on not being used for
- * for strings of length 1
- */
- while (*--dp == *--pp && pp != mustfirst);
-
- if (*dp == *pp)
- break;
-
- /* Jump to next possible match */
- mj = matchjump[pp - mustfirst];
- cj = charjump[(int)*dp];
- dp += (cj < mj ? mj : cj);
- pp = mustlast;
- }
- if (pp != mustfirst)
- return(REG_NOMATCH);
- } else {
- for (dp = start; dp < stop; dp++)
- if (*dp == g->must[0] &&
- stop - dp >= g->mlen &&
- memcmp(dp, g->must, (size_t)g->mlen) == 0)
- break;
- if (dp == stop) /* we didn't find g->must */
- return(REG_NOMATCH);
- }
+ for (dp = start; dp < stop; dp++)
+ if (*dp == g->must[0] && stop - dp >= g->mlen &&
+ memcmp(dp, g->must, (size_t)g->mlen) == 0)
+ break;
+ if (dp == stop) /* we didn't find g->must */
+ return(REG_NOMATCH);
}
/* match struct setup */
@@ -241,22 +208,11 @@
SETUP(m->tmp);
SETUP(m->empty);
CLEAR(m->empty);
- ZAPSTATE(&m->mbs);
-
- /* Adjust start according to moffset, to speed things up */
- if (g->moffset > -1)
- start = ((dp - g->moffset) < start) ? start : dp - g->moffset;
-
- SP("mloop", m->st, *start);
/* this loop does only one repetition except for backrefs */
for (;;) {
endp = fast(m, start, stop, gf, gl);
if (endp == NULL) { /* a miss */
- if (m->pmatch != NULL)
- free((char *)m->pmatch);
- if (m->lastpos != NULL)
- free((char *)m->lastpos);
STATETEARDOWN(m);
return(REG_NOMATCH);
}
@@ -271,8 +227,7 @@
if (endp != NULL)
break;
assert(m->coldp < m->endp);
- m->coldp += XMBRTOWC(NULL, m->coldp,
- m->endp - m->coldp, &m->mbs, 0, g->loc);
+ m->coldp++;
}
if (nmatch == 1 && !g->backrefs)
break; /* no further info needed */
@@ -292,15 +247,15 @@
dp = dissect(m, m->coldp, endp, gf, gl);
} else {
if (g->nplus > 0 && m->lastpos == NULL)
- m->lastpos = malloc((g->nplus+1) *
- sizeof(const char *));
+ m->lastpos = (char **)malloc((g->nplus+1) *
+ sizeof(char *));
if (g->nplus > 0 && m->lastpos == NULL) {
free(m->pmatch);
STATETEARDOWN(m);
return(REG_ESPACE);
}
NOTE("backref dissect");
- dp = backref(m, m->coldp, endp, gf, gl, (sopno)0, 0);
+ dp = backref(m, m->coldp, endp, gf, gl, (sopno)0);
}
if (dp != NULL)
break;
@@ -323,7 +278,7 @@
}
#endif
NOTE("backoff dissect");
- dp = backref(m, m->coldp, endp, gf, gl, (sopno)0, 0);
+ dp = backref(m, m->coldp, endp, gf, gl, (sopno)0);
}
assert(dp == NULL || dp == endp);
if (dp != NULL) /* found a shorter one */
@@ -331,9 +286,7 @@
/* despite initial appearances, there is no match here */
NOTE("false alarm");
- /* recycle starting later */
- start = m->coldp + XMBRTOWC(NULL, m->coldp,
- stop - m->coldp, &m->mbs, 0, g->loc);
+ start = m->coldp + 1; /* recycle starting later */
assert(start <= stop);
}
@@ -363,29 +316,30 @@
/*
- dissect - figure out what matched what, no back references
- == static const char *dissect(struct match *m, const char *start, \
- == const char *stop, sopno startst, sopno stopst);
+ == static char *dissect(register struct match *m, char *start, \
+ == char *stop, sopno startst, sopno stopst);
*/
-static const char * /* == stop (success) always */
-dissect(struct match *m,
- const char *start,
- const char *stop,
- sopno startst,
- sopno stopst)
+static char * /* == stop (success) always */
+dissect(m, start, stop, startst, stopst)
+register struct match *m;
+char *start;
+char *stop;
+sopno startst;
+sopno stopst;
{
- int i;
- sopno ss; /* start sop of current subRE */
- sopno es; /* end sop of current subRE */
- const char *sp; /* start of string matched by it */
- const char *stp; /* string matched by it cannot pass here */
- const char *rest; /* start of rest of string */
- const char *tail; /* string unmatched by rest of RE */
- sopno ssub; /* start sop of subsubRE */
- sopno esub; /* end sop of subsubRE */
- const char *ssp; /* start of string matched by subsubRE */
- const char *sep; /* end of string matched by subsubRE */
- const char *oldssp; /* previous ssp */
- const char *dp;
+ register int i;
+ register sopno ss; /* start sop of current subRE */
+ register sopno es; /* end sop of current subRE */
+ register char *sp; /* start of string matched by it */
+ register char *stp; /* string matched by it cannot pass here */
+ register char *rest; /* start of rest of string */
+ register char *tail; /* string unmatched by rest of RE */
+ register sopno ssub; /* start sop of subsubRE */
+ register sopno esub; /* end sop of subsubRE */
+ register char *ssp; /* start of string matched by subsubRE */
+ register char *sep; /* end of string matched by subsubRE */
+ register char *oldssp; /* previous ssp */
+ register char *dp;
AT("diss", start, stop, startst, stopst);
sp = start;
@@ -410,7 +364,7 @@
assert(nope);
break;
case OCHAR:
- sp += XMBRTOWC(NULL, sp, stop - start, &m->mbs, 0, m->g->loc);
+ sp++;
break;
case OBOL:
case OEOL:
@@ -419,7 +373,7 @@
break;
case OANY:
case OANYOF:
- sp += XMBRTOWC(NULL, sp, stop - start, &m->mbs, 0, m->g->loc);
+ sp++;
break;
case OBACK_:
case O_BACK:
@@ -478,10 +432,6 @@
if (sep == NULL) {
/* last successful match */
sep = ssp;
- ssp = oldssp;
- }
- else if (tail==rest) {
- /* Fix for test expr 105 */
ssp = oldssp;
}
assert(sep == rest); /* must exhaust substring */
@@ -536,14 +486,6 @@
i = OPND(m->g->strip[ss]);
assert(0 < i && i <= m->g->nsub);
m->pmatch[i].rm_so = sp - m->offp;
- /* fix for T.regcomp 43: don't remember previous
- subexpression matches beyond the current one (i) */
- i++;
- while (i<= m->g->nsub) {
- m->pmatch[i].rm_so = -1;
- m->pmatch[i].rm_eo = -1;
- i++;
- }
break;
case ORPAREN:
i = OPND(m->g->strip[ss]);
@@ -562,31 +504,30 @@
/*
- backref - figure out what matched what, figuring in back references
- == static const char *backref(struct match *m, const char *start, \
- == const char *stop, sopno startst, sopno stopst, sopno lev);
+ == static char *backref(register struct match *m, char *start, \
+ == char *stop, sopno startst, sopno stopst, sopno lev);
*/
-static const char * /* == stop (success) or NULL (failure) */
-backref(struct match *m,
- const char *start,
- const char *stop,
- sopno startst,
- sopno stopst,
- sopno lev, /* PLUS nesting level */
- int rec)
+static char * /* == stop (success) or NULL (failure) */
+backref(m, start, stop, startst, stopst, lev)
+register struct match *m;
+char *start;
+char *stop;
+sopno startst;
+sopno stopst;
+sopno lev; /* PLUS nesting level */
{
- int i;
- sopno ss; /* start sop of current subRE */
- const char *sp; /* start of string matched by it */
- sopno ssub; /* start sop of subsubRE */
- sopno esub; /* end sop of subsubRE */
- const char *ssp; /* start of string matched by subsubRE */
- const char *dp;
- size_t len;
- int hard;
- sop s;
- regoff_t offsave;
- cset *cs;
- wint_t wc;
+ register int i;
+ register sopno ss; /* start sop of current subRE */
+ register char *sp; /* start of string matched by it */
+ register sopno ssub; /* start sop of subsubRE */
+ register sopno esub; /* end sop of subsubRE */
+ register char *ssp; /* start of string matched by subsubRE */
+ register char *dp;
+ register size_t len;
+ register int hard;
+ register sop s;
+ register regoff_t offsave;
+ register cset *cs;
AT("back", start, stop, startst, stopst);
sp = start;
@@ -596,25 +537,17 @@
for (ss = startst; !hard && ss < stopst; ss++)
switch (OP(s = m->g->strip[ss])) {
case OCHAR:
- if (sp == stop)
- return(NULL);
- sp += XMBRTOWC(&wc, sp, stop - sp, &m->mbs, BADCHAR, m->g->loc);
- if (wc != OPND(s))
+ if (sp == stop || *sp++ != (char)OPND(s))
return(NULL);
break;
case OANY:
if (sp == stop)
return(NULL);
- sp += XMBRTOWC(&wc, sp, stop - sp, &m->mbs, BADCHAR, m->g->loc);
- if (wc == BADCHAR)
- return (NULL);
+ sp++;
break;
case OANYOF:
- if (sp == stop)
- return (NULL);
cs = &m->g->sets[OPND(s)];
- sp += XMBRTOWC(&wc, sp, stop - sp, &m->mbs, BADCHAR, m->g->loc);
- if (wc == BADCHAR || !CHIN(cs, wc, m->g->loc))
+ if (sp == stop || !CHIN(cs, *sp++))
return(NULL);
break;
case OBOL:
@@ -638,8 +571,8 @@
(sp < m->endp && *(sp-1) == '\n' &&
(m->g->cflags®_NEWLINE)) ||
(sp > m->beginp &&
- !ISWORD(*(sp-1), m->g->loc)) ) &&
- (sp < m->endp && ISWORD(*sp, m->g->loc)) )
+ !ISWORD(*(sp-1))) ) &&
+ (sp < m->endp && ISWORD(*sp)) )
{ /* yes */ }
else
return(NULL);
@@ -648,8 +581,8 @@
if (( (sp == m->endp && !(m->eflags®_NOTEOL)) ||
(sp < m->endp && *sp == '\n' &&
(m->g->cflags®_NEWLINE)) ||
- (sp < m->endp && !ISWORD(*sp, m->g->loc)) ) &&
- (sp > m->beginp && ISWORD(*(sp-1), m->g->loc)) )
+ (sp < m->endp && !ISWORD(*sp)) ) &&
+ (sp > m->beginp && ISWORD(*(sp-1))) )
{ /* yes */ }
else
return(NULL);
@@ -687,8 +620,6 @@
return(NULL);
assert(m->pmatch[i].rm_so != -1);
len = m->pmatch[i].rm_eo - m->pmatch[i].rm_so;
- if (len == 0 && rec++ > MAX_RECURSION)
- return(NULL);
assert(stop - m->beginp >= len);
if (sp > stop - len)
return(NULL); /* not enough left to match */
@@ -697,28 +628,28 @@
return(NULL);
while (m->g->strip[ss] != SOP(O_BACK, i))
ss++;
- return(backref(m, sp+len, stop, ss+1, stopst, lev, rec));
+ return(backref(m, sp+len, stop, ss+1, stopst, lev));
break;
case OQUEST_: /* to null or not */
- dp = backref(m, sp, stop, ss+1, stopst, lev, rec);
+ dp = backref(m, sp, stop, ss+1, stopst, lev);
if (dp != NULL)
return(dp); /* not */
- return(backref(m, sp, stop, ss+OPND(s)+1, stopst, lev, rec));
+ return(backref(m, sp, stop, ss+OPND(s)+1, stopst, lev));
break;
case OPLUS_:
assert(m->lastpos != NULL);
assert(lev+1 <= m->g->nplus);
m->lastpos[lev+1] = sp;
- return(backref(m, sp, stop, ss+1, stopst, lev+1, rec));
+ return(backref(m, sp, stop, ss+1, stopst, lev+1));
break;
case O_PLUS:
if (sp == m->lastpos[lev]) /* last pass matched null */
- return(backref(m, sp, stop, ss+1, stopst, lev-1, rec));
+ return(backref(m, sp, stop, ss+1, stopst, lev-1));
/* try another pass */
m->lastpos[lev] = sp;
- dp = backref(m, sp, stop, ss-OPND(s)+1, stopst, lev, rec);
+ dp = backref(m, sp, stop, ss-OPND(s)+1, stopst, lev);
if (dp == NULL)
- return(backref(m, sp, stop, ss+1, stopst, lev-1, rec));
+ return(backref(m, sp, stop, ss+1, stopst, lev-1));
else
return(dp);
break;
@@ -727,7 +658,7 @@
esub = ss + OPND(s) - 1;
assert(OP(m->g->strip[esub]) == OOR1);
for (;;) { /* find first matching branch */
- dp = backref(m, sp, stop, ssub, esub, lev, rec);
+ dp = backref(m, sp, stop, ssub, esub, lev);
if (dp != NULL)
return(dp);
/* that one missed, try next one */
@@ -748,7 +679,7 @@
assert(0 < i && i <= m->g->nsub);
offsave = m->pmatch[i].rm_so;
m->pmatch[i].rm_so = sp - m->offp;
- dp = backref(m, sp, stop, ss+1, stopst, lev, rec);
+ dp = backref(m, sp, stop, ss+1, stopst, lev);
if (dp != NULL)
return(dp);
m->pmatch[i].rm_so = offsave;
@@ -759,7 +690,7 @@
assert(0 < i && i <= m->g->nsub);
offsave = m->pmatch[i].rm_eo;
m->pmatch[i].rm_eo = sp - m->offp;
- dp = backref(m, sp, stop, ss+1, stopst, lev, rec);
+ dp = backref(m, sp, stop, ss+1, stopst, lev);
if (dp != NULL)
return(dp);
m->pmatch[i].rm_eo = offsave;
@@ -773,57 +704,41 @@
/* "can't happen" */
assert(nope);
/* NOTREACHED */
- return "shut up gcc";
}
/*
- fast - step through the string at top speed
- == static const char *fast(struct match *m, const char *start, \
- == const char *stop, sopno startst, sopno stopst);
+ == static char *fast(register struct match *m, char *start, \
+ == char *stop, sopno startst, sopno stopst);
*/
-static const char * /* where tentative match ended, or NULL */
-fast( struct match *m,
- const char *start,
- const char *stop,
- sopno startst,
- sopno stopst)
+static char * /* where tentative match ended, or NULL */
+fast(m, start, stop, startst, stopst)
+register struct match *m;
+char *start;
+char *stop;
+sopno startst;
+sopno stopst;
{
- states st = m->st;
- states fresh = m->fresh;
- states tmp = m->tmp;
- const char *p = start;
- wint_t c;
- wint_t lastc; /* previous c */
- wint_t flagch;
- int i;
- const char *coldp; /* last p after which no match was underway */
- size_t clen;
+ register states st = m->st;
+ register states fresh = m->fresh;
+ register states tmp = m->tmp;
+ register char *p = start;
+ register int c = (start == m->beginp) ? OUT : *(start-1);
+ register int lastc; /* previous c */
+ register int flagch;
+ register int i;
+ register char *coldp; /* last p after which no match was underway */
CLEAR(st);
SET1(st, startst);
- SP("fast", st, *p);
st = step(m->g, startst, stopst, st, NOTHING, st);
ASSIGN(fresh, st);
SP("start", st, *p);
coldp = NULL;
- if (start == m->beginp)
- c = OUT;
- else {
- /*
- * XXX Wrong if the previous character was multi-byte.
- * Newline never is (in encodings supported by FreeBSD),
- * so this only breaks the ISWORD tests below.
- */
- c = (uch)*(start - 1);
- }
for (;;) {
/* next character */
lastc = c;
- if (p == m->endp) {
- clen = 0;
- c = OUT;
- } else
- clen = XMBRTOWC(&c, p, m->endp - p, &m->mbs, BADCHAR, m->g->loc);
+ c = (p == m->endp) ? OUT : *p;
if (EQ(st, fresh))
coldp = p;
@@ -847,12 +762,12 @@
}
/* how about a word boundary? */
- if ( (flagch == BOL || (lastc != OUT && !ISWORD(lastc, m->g->loc))) &&
- (c != OUT && ISWORD(c, m->g->loc)) ) {
+ if ( (flagch == BOL || (lastc != OUT && !ISWORD(lastc))) &&
+ (c != OUT && ISWORD(c)) ) {
flagch = BOW;
}
- if ( (lastc != OUT && ISWORD(lastc, m->g->loc)) &&
- (flagch == EOL || (c != OUT && !ISWORD(c, m->g->loc))) ) {
+ if ( (lastc != OUT && ISWORD(lastc)) &&
+ (flagch == EOL || (c != OUT && !ISWORD(c))) ) {
flagch = EOW;
}
if (flagch == BOW || flagch == EOW) {
@@ -861,7 +776,7 @@
}
/* are we done? */
- if (ISSET(st, stopst) || p == stop || clen > stop - p)
+ if (ISSET(st, stopst) || p == stop)
break; /* NOTE BREAK OUT */
/* no, we must deal with this character */
@@ -871,39 +786,39 @@
st = step(m->g, startst, stopst, tmp, c, st);
SP("aft", st, c);
assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st));
- p += clen;
+ p++;
}
assert(coldp != NULL);
m->coldp = coldp;
if (ISSET(st, stopst))
- return(p+XMBRTOWC(NULL, p, stop - p, &m->mbs, 0, m->g->loc));
+ return(p+1);
else
return(NULL);
}
/*
- slow - step through the string more deliberately
- == static const char *slow(struct match *m, const char *start, \
- == const char *stop, sopno startst, sopno stopst);
+ == static char *slow(register struct match *m, char *start, \
+ == char *stop, sopno startst, sopno stopst);
*/
-static const char * /* where it ended */
-slow( struct match *m,
- const char *start,
- const char *stop,
- sopno startst,
- sopno stopst)
+static char * /* where it ended */
+slow(m, start, stop, startst, stopst)
+register struct match *m;
+char *start;
+char *stop;
+sopno startst;
+sopno stopst;
{
- states st = m->st;
- states empty = m->empty;
- states tmp = m->tmp;
- const char *p = start;
- wint_t c;
- wint_t lastc; /* previous c */
- wint_t flagch;
- int i;
- const char *matchp; /* last p at which a match ended */
- size_t clen;
+ register states st = m->st;
+ register states empty = m->empty;
+ register states tmp = m->tmp;
+ register char *p = start;
+ register int c = (start == m->beginp) ? OUT : *(start-1);
+ register int lastc; /* previous c */
+ register int flagch;
+ register int i;
+ register char *matchp; /* last p at which a match ended */
AT("slow", start, stop, startst, stopst);
CLEAR(st);
@@ -911,24 +826,10 @@
SP("sstart", st, *p);
st = step(m->g, startst, stopst, st, NOTHING, st);
matchp = NULL;
- if (start == m->beginp)
- c = OUT;
- else {
- /*
- * XXX Wrong if the previous character was multi-byte.
- * Newline never is (in encodings supported by FreeBSD),
- * so this only breaks the ISWORD tests below.
- */
- c = (uch)*(start - 1);
- }
for (;;) {
/* next character */
lastc = c;
- if (p == m->endp) {
- c = OUT;
- clen = 0;
- } else
- clen = XMBRTOWC(&c, p, m->endp - p, &m->mbs, BADCHAR, m->g->loc);
+ c = (p == m->endp) ? OUT : *p;
/* is there an EOL and/or BOL between lastc and c? */
flagch = '\0';
@@ -950,12 +851,12 @@
}
/* how about a word boundary? */
- if ( (flagch == BOL || (lastc != OUT && !ISWORD(lastc, m->g->loc))) &&
- (c != OUT && ISWORD(c, m->g->loc)) ) {
+ if ( (flagch == BOL || (lastc != OUT && !ISWORD(lastc))) &&
+ (c != OUT && ISWORD(c)) ) {
flagch = BOW;
}
- if ( (lastc != OUT && ISWORD(lastc, m->g->loc)) &&
- (flagch == EOL || (c != OUT && !ISWORD(c, m->g->loc))) ) {
+ if ( (lastc != OUT && ISWORD(lastc)) &&
+ (flagch == EOL || (c != OUT && !ISWORD(c))) ) {
flagch = EOW;
}
if (flagch == BOW || flagch == EOW) {
@@ -966,7 +867,7 @@
/* are we done? */
if (ISSET(st, stopst))
matchp = p;
- if (EQ(st, empty) || p == stop || clen > stop - p)
+ if (EQ(st, empty) || p == stop)
break; /* NOTE BREAK OUT */
/* no, we must deal with this character */
@@ -976,7 +877,7 @@
st = step(m->g, startst, stopst, tmp, c, st);
SP("saft", st, c);
assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st));
- p += clen;
+ p++;
}
return(matchp);
@@ -985,31 +886,33 @@
/*
- step - map set of states reachable before char to set reachable after
- == static states step(struct re_guts *g, sopno start, sopno stop, \
- == states bef, int ch, states aft);
- == #define BOL (OUT-1)
- == #define EOL (BOL-1)
- == #define BOLEOL (BOL-2)
- == #define NOTHING (BOL-3)
- == #define BOW (BOL-4)
- == #define EOW (BOL-5)
- == #define BADCHAR (BOL-6)
- == #define NONCHAR(c) ((c) <= OUT)
+ == static states step(register struct re_guts *g, sopno start, sopno stop, \
+ == register states bef, int ch, register states aft);
+ == #define BOL (OUT+1)
+ == #define EOL (BOL+1)
+ == #define BOLEOL (BOL+2)
+ == #define NOTHING (BOL+3)
+ == #define BOW (BOL+4)
+ == #define EOW (BOL+5)
+ == #define CODEMAX (BOL+5) // highest code used
+ == #define NONCHAR(c) ((c) > CHAR_MAX)
+ == #define NNONCHAR (CODEMAX-CHAR_MAX)
*/
static states
-step(struct re_guts *g,
- sopno start, /* start state within strip */
- sopno stop, /* state after stop state within strip */
- states bef, /* states reachable before */
- wint_t ch, /* character or NONCHAR code */
- states aft) /* states already known reachable after */
+step(g, start, stop, bef, ch, aft)
+register struct re_guts *g;
+sopno start; /* start state within strip */
+sopno stop; /* state after stop state within strip */
+register states bef; /* states reachable before */
+int ch; /* character or NONCHAR code */
+register states aft; /* states already known reachable after */
{
- cset *cs;
- sop s;
- sopno pc;
- onestate here; /* note, macros know this name */
- sopno look;
- int i;
+ register cset *cs;
+ register sop s;
+ register sopno pc;
+ register onestate here; /* note, macros know this name */
+ register sopno look;
+ register int i;
for (pc = start, INIT(here, pc); pc != stop; pc++, INC(here)) {
s = g->strip[pc];
@@ -1019,8 +922,8 @@
break;
case OCHAR:
/* only characters can match */
- assert(!NONCHAR(ch) || ch != OPND(s));
- if (ch == OPND(s))
+ assert(!NONCHAR(ch) || ch != (char)OPND(s));
+ if (ch == (char)OPND(s))
FWD(aft, bef, 1);
break;
case OBOL:
@@ -1045,7 +948,7 @@
break;
case OANYOF:
cs = &g->sets[OPND(s)];
- if (!NONCHAR(ch) && CHIN(cs, ch, g->loc))
+ if (!NONCHAR(ch) && CHIN(cs, ch))
FWD(aft, bef, 1);
break;
case OBACK_: /* ignored here */
@@ -1087,7 +990,7 @@
OP(s = g->strip[pc+look]) != O_CH;
look += OPND(s))
assert(OP(s) == OOR2);
- FWD(aft, aft, look + 1);
+ FWD(aft, aft, look);
}
break;
case OOR2: /* propagate OCH_'s marking */
@@ -1113,20 +1016,21 @@
/*
- print - print a set of states
== #ifdef REDEBUG
- == static void print(struct match *m, const char *caption, states st, \
+ == static void print(struct match *m, char *caption, states st, \
== int ch, FILE *d);
== #endif
*/
static void
-print(struct match *m,
- const char *caption,
- states st,
- int ch,
- FILE *d)
+print(m, caption, st, ch, d)
+struct match *m;
+char *caption;
+states st;
+int ch;
+FILE *d;
{
- struct re_guts *g = m->g;
- int i;
- int first = 1;
+ register struct re_guts *g = m->g;
+ register int i;
+ register int first = 1;
if (!(m->eflags®_TRACE))
return;
@@ -1142,20 +1046,21 @@
fprintf(d, "\n");
}
-/*
+/*
- at - print current situation
== #ifdef REDEBUG
- == static void at(struct match *m, const char *title, const char *start, \
- == const char *stop, sopno startst, sopno stopst);
+ == static void at(struct match *m, char *title, char *start, char *stop, \
+ == sopno startst, sopno stopst);
== #endif
*/
static void
-at( struct match *m,
- const char *title,
- const char *start,
- const char *stop,
- sopno startst,
- sopno stopst)
+at(m, title, start, stop, startst, stopst)
+struct match *m;
+char *title;
+char *start;
+char *stop;
+sopno startst;
+sopno stopst;
{
if (!(m->eflags®_TRACE))
return;
@@ -1170,7 +1075,7 @@
/*
- pchar - make a character printable
== #ifdef REDEBUG
- == static const char *pchar(int ch);
+ == static char *pchar(int ch);
== #endif
*
* Is this identical to regchar() over in debug.c? Well, yes. But a
@@ -1178,12 +1083,13 @@
* a matching debug.o, and this is convenient. It all disappears in
* the non-debug compilation anyway, so it doesn't matter much.
*/
-static const char * /* -> representation */
-pchar(int ch)
+static char * /* -> representation */
+pchar(ch)
+int ch;
{
static char pbuf[10];
- if (isprint((uch)ch) || ch == ' ')
+ if (isprint(ch) || ch == ' ')
sprintf(pbuf, "%c", ch);
else
sprintf(pbuf, "\\%o", ch);