Loading...
--- Libc/Libc-262/regex/engine.c
+++ Libc/Libc-763.12/regex/engine.c
@@ -1,25 +1,5 @@
-/*
- * Copyright (c) 1999 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_LICENSE_HEADER_START@
- *
- * The contents of this file constitute Original Code as defined in and
- * are subject to the Apple Public Source License Version 1.1 (the
- * "License"). You may not use this file except in compliance with the
- * License. Please obtain a copy of the License at
- * http://www.apple.com/publicsource and read it before using this file.
- *
- * This Original Code and all software distributed under the License are
- * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
- * License for the specific language governing rights and limitations
- * under the License.
- *
- * @APPLE_LICENSE_HEADER_END@
- */
-/*
+/*-
+ * Copyright (c) 1992, 1993, 1994 Henry Spencer.
* Copyright (c) 1992, 1993, 1994
* The Regents of the University of California. All rights reserved.
*
@@ -34,10 +14,6 @@
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by the University of
- * California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
@@ -53,7 +29,12 @@
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
+ *
+ * @(#)engine.c 8.5 (Berkeley) 3/20/94
*/
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/lib/libc/regex/engine.c,v 1.23 2009/09/16 06:32:23 dds Exp $");
/*
* The matching engine and friends. This file is #included by regexec.c
@@ -84,22 +65,34 @@
#define at lat
#define match lmat
#endif
+#ifdef MNAMES
+#define matcher mmatcher
+#define fast mfast
+#define slow mslow
+#define dissect mdissect
+#define backref mbackref
+#define step mstep
+#define print mprint
+#define at mat
+#define match mmat
+#endif
/* another structure passed up and down to avoid zillions of parameters */
struct match {
struct re_guts *g;
int eflags;
regmatch_t *pmatch; /* [nsub+1] (0 element unused) */
- char *offp; /* offsets work from here */
- char *beginp; /* start of string -- virtual NUL precedes */
- char *endp; /* end of string -- virtual NUL here */
- char *coldp; /* can be no match starting before here */
- char **lastpos; /* [nplus+1] */
+ const char *offp; /* offsets work from here */
+ const char *beginp; /* start of string -- virtual NUL precedes */
+ const char *endp; /* end of string -- virtual NUL here */
+ const char *coldp; /* can be no match starting before here */
+ const char **lastpos; /* [nplus+1] */
STATEVARS;
states st; /* current states */
states fresh; /* states for a fresh start */
states tmp; /* temporary */
states empty; /* empty set of states */
+ mbstate_t mbs; /* multibyte conversion state */
};
/* ========= begin header generated by ./mkh ========= */
@@ -108,29 +101,29 @@
#endif
/* === engine.c === */
-static int matcher __P((struct re_guts *g, char *string, size_t nmatch, regmatch_t pmatch[], int eflags));
-static char *dissect __P((struct match *m, char *start, char *stop, sopno startst, sopno stopst));
-static char *backref __P((struct match *m, char *start, char *stop, sopno startst, sopno stopst, sopno lev));
-static char *fast __P((struct match *m, char *start, char *stop, sopno startst, sopno stopst));
-static char *slow __P((struct match *m, char *start, char *stop, sopno startst, sopno stopst));
-static states step __P((struct re_guts *g, sopno start, sopno stop, states bef, int ch, states aft));
-#define BOL (OUT+1)
-#define EOL (BOL+1)
-#define BOLEOL (BOL+2)
-#define NOTHING (BOL+3)
-#define BOW (BOL+4)
-#define EOW (BOL+5)
-#define CODEMAX (BOL+5) /* highest code used */
-#define NONCHAR(c) ((c) > CHAR_MAX)
-#define NNONCHAR (CODEMAX-CHAR_MAX)
+static int matcher(struct re_guts *g, const char *string, size_t nmatch, regmatch_t pmatch[], int eflags);
+static const char *dissect(struct match *m, const char *start, const char *stop, sopno startst, sopno stopst);
+static const char *backref(struct match *m, const char *start, const char *stop, sopno startst, sopno stopst, sopno lev, int);
+static const char *fast(struct match *m, const char *start, const char *stop, sopno startst, sopno stopst);
+static const char *slow(struct match *m, const char *start, const char *stop, sopno startst, sopno stopst);
+static states step(struct re_guts *g, sopno start, sopno stop, states bef, wint_t ch, states aft);
+#define MAX_RECURSION 100
+#define BOL (OUT-1)
+#define EOL (BOL-1)
+#define BOLEOL (BOL-2)
+#define NOTHING (BOL-3)
+#define BOW (BOL-4)
+#define EOW (BOL-5)
+#define BADCHAR (BOL-6)
+#define NONCHAR(c) ((c) <= OUT)
#ifdef REDEBUG
-static void print __P((struct match *m, char *caption, states st, int ch, FILE *d));
+static void print(struct match *m, const char *caption, states st, int ch, FILE *d);
#endif
#ifdef REDEBUG
-static void at __P((struct match *m, char *title, char *start, char *stop, sopno startst, sopno stopst));
+static void at(struct match *m, const char *title, const char *start, const char *stop, sopno startst, sopno stopst);
#endif
#ifdef REDEBUG
-static char *pchar __P((int ch));
+static const char *pchar(int ch);
#endif
#ifdef __cplusplus
@@ -150,26 +143,32 @@
/*
- matcher - the actual matching engine
- == static int matcher(register struct re_guts *g, char *string, \
+ == static int matcher(struct re_guts *g, const char *string, \
== size_t nmatch, regmatch_t pmatch[], int eflags);
*/
static int /* 0 success, REG_NOMATCH failure */
-matcher(g, string, nmatch, pmatch, eflags)
-register struct re_guts *g;
-char *string;
-size_t nmatch;
-regmatch_t pmatch[];
-int eflags;
+matcher(struct re_guts *g,
+ const char *string,
+ size_t nmatch,
+ regmatch_t pmatch[],
+ int eflags)
{
- register char *endp;
- register int i;
+ const char *endp;
+ int i;
struct match mv;
- register struct match *m = &mv;
- register char *dp;
- const register sopno gf = g->firststate+1; /* +1 for OEND */
- const register sopno gl = g->laststate;
- char *start;
- char *stop;
+ struct match *m = &mv;
+ const char *dp;
+ const sopno gf = g->firststate+1; /* +1 for OEND */
+ const sopno gl = g->laststate;
+ const char *start;
+ const char *stop;
+ /* Boyer-Moore algorithms variables */
+ const char *pp;
+ int cj, mj;
+ const char *mustfirst;
+ const char *mustlast;
+ int *matchjump;
+ int *charjump;
/* simplify the situation where possible */
if (g->cflags®_NOSUB)
@@ -186,12 +185,46 @@
/* prescreening; this does wonders for this rather slow code */
if (g->must != NULL) {
- for (dp = start; dp < stop; dp++)
- if (*dp == g->must[0] && stop - dp >= g->mlen &&
- memcmp(dp, g->must, (size_t)g->mlen) == 0)
- break;
- if (dp == stop) /* we didn't find g->must */
- return(REG_NOMATCH);
+ if (g->charjump != NULL && g->matchjump != NULL) {
+ mustfirst = g->must;
+ mustlast = g->must + g->mlen - 1;
+ charjump = g->charjump;
+ matchjump = g->matchjump;
+ pp = mustlast;
+ for (dp = start+g->mlen-1; dp < stop;) {
+ /* Fast skip non-matches */
+ while (dp < stop && charjump[(int)*dp])
+ dp += charjump[(int)*dp];
+
+ if (dp >= stop)
+ break;
+
+ /* Greedy matcher */
+ /* We depend on not being used for
+ * for strings of length 1
+ */
+ while (*--dp == *--pp && pp != mustfirst);
+
+ if (*dp == *pp)
+ break;
+
+ /* Jump to next possible match */
+ mj = matchjump[pp - mustfirst];
+ cj = charjump[(int)*dp];
+ dp += (cj < mj ? mj : cj);
+ pp = mustlast;
+ }
+ if (pp != mustfirst)
+ return(REG_NOMATCH);
+ } else {
+ for (dp = start; dp < stop; dp++)
+ if (*dp == g->must[0] &&
+ stop - dp >= g->mlen &&
+ memcmp(dp, g->must, (size_t)g->mlen) == 0)
+ break;
+ if (dp == stop) /* we didn't find g->must */
+ return(REG_NOMATCH);
+ }
}
/* match struct setup */
@@ -208,11 +241,22 @@
SETUP(m->tmp);
SETUP(m->empty);
CLEAR(m->empty);
+ ZAPSTATE(&m->mbs);
+
+ /* Adjust start according to moffset, to speed things up */
+ if (g->moffset > -1)
+ start = ((dp - g->moffset) < start) ? start : dp - g->moffset;
+
+ SP("mloop", m->st, *start);
/* this loop does only one repetition except for backrefs */
for (;;) {
endp = fast(m, start, stop, gf, gl);
if (endp == NULL) { /* a miss */
+ if (m->pmatch != NULL)
+ free((char *)m->pmatch);
+ if (m->lastpos != NULL)
+ free((char *)m->lastpos);
STATETEARDOWN(m);
return(REG_NOMATCH);
}
@@ -227,7 +271,8 @@
if (endp != NULL)
break;
assert(m->coldp < m->endp);
- m->coldp++;
+ m->coldp += XMBRTOWC(NULL, m->coldp,
+ m->endp - m->coldp, &m->mbs, 0, g->loc);
}
if (nmatch == 1 && !g->backrefs)
break; /* no further info needed */
@@ -247,15 +292,15 @@
dp = dissect(m, m->coldp, endp, gf, gl);
} else {
if (g->nplus > 0 && m->lastpos == NULL)
- m->lastpos = (char **)malloc((g->nplus+1) *
- sizeof(char *));
+ m->lastpos = malloc((g->nplus+1) *
+ sizeof(const char *));
if (g->nplus > 0 && m->lastpos == NULL) {
free(m->pmatch);
STATETEARDOWN(m);
return(REG_ESPACE);
}
NOTE("backref dissect");
- dp = backref(m, m->coldp, endp, gf, gl, (sopno)0);
+ dp = backref(m, m->coldp, endp, gf, gl, (sopno)0, 0);
}
if (dp != NULL)
break;
@@ -278,7 +323,7 @@
}
#endif
NOTE("backoff dissect");
- dp = backref(m, m->coldp, endp, gf, gl, (sopno)0);
+ dp = backref(m, m->coldp, endp, gf, gl, (sopno)0, 0);
}
assert(dp == NULL || dp == endp);
if (dp != NULL) /* found a shorter one */
@@ -286,7 +331,9 @@
/* despite initial appearances, there is no match here */
NOTE("false alarm");
- start = m->coldp + 1; /* recycle starting later */
+ /* recycle starting later */
+ start = m->coldp + XMBRTOWC(NULL, m->coldp,
+ stop - m->coldp, &m->mbs, 0, g->loc);
assert(start <= stop);
}
@@ -316,30 +363,29 @@
/*
- dissect - figure out what matched what, no back references
- == static char *dissect(register struct match *m, char *start, \
- == char *stop, sopno startst, sopno stopst);
+ == static const char *dissect(struct match *m, const char *start, \
+ == const char *stop, sopno startst, sopno stopst);
*/
-static char * /* == stop (success) always */
-dissect(m, start, stop, startst, stopst)
-register struct match *m;
-char *start;
-char *stop;
-sopno startst;
-sopno stopst;
+static const char * /* == stop (success) always */
+dissect(struct match *m,
+ const char *start,
+ const char *stop,
+ sopno startst,
+ sopno stopst)
{
- register int i;
- register sopno ss; /* start sop of current subRE */
- register sopno es; /* end sop of current subRE */
- register char *sp; /* start of string matched by it */
- register char *stp; /* string matched by it cannot pass here */
- register char *rest; /* start of rest of string */
- register char *tail; /* string unmatched by rest of RE */
- register sopno ssub; /* start sop of subsubRE */
- register sopno esub; /* end sop of subsubRE */
- register char *ssp; /* start of string matched by subsubRE */
- register char *sep; /* end of string matched by subsubRE */
- register char *oldssp; /* previous ssp */
- register char *dp;
+ int i;
+ sopno ss; /* start sop of current subRE */
+ sopno es; /* end sop of current subRE */
+ const char *sp; /* start of string matched by it */
+ const char *stp; /* string matched by it cannot pass here */
+ const char *rest; /* start of rest of string */
+ const char *tail; /* string unmatched by rest of RE */
+ sopno ssub; /* start sop of subsubRE */
+ sopno esub; /* end sop of subsubRE */
+ const char *ssp; /* start of string matched by subsubRE */
+ const char *sep; /* end of string matched by subsubRE */
+ const char *oldssp; /* previous ssp */
+ const char *dp;
AT("diss", start, stop, startst, stopst);
sp = start;
@@ -364,7 +410,7 @@
assert(nope);
break;
case OCHAR:
- sp++;
+ sp += XMBRTOWC(NULL, sp, stop - start, &m->mbs, 0, m->g->loc);
break;
case OBOL:
case OEOL:
@@ -373,7 +419,7 @@
break;
case OANY:
case OANYOF:
- sp++;
+ sp += XMBRTOWC(NULL, sp, stop - start, &m->mbs, 0, m->g->loc);
break;
case OBACK_:
case O_BACK:
@@ -432,6 +478,10 @@
if (sep == NULL) {
/* last successful match */
sep = ssp;
+ ssp = oldssp;
+ }
+ else if (tail==rest) {
+ /* Fix for test expr 105 */
ssp = oldssp;
}
assert(sep == rest); /* must exhaust substring */
@@ -486,6 +536,14 @@
i = OPND(m->g->strip[ss]);
assert(0 < i && i <= m->g->nsub);
m->pmatch[i].rm_so = sp - m->offp;
+ /* fix for T.regcomp 43: don't remember previous
+ subexpression matches beyond the current one (i) */
+ i++;
+ while (i<= m->g->nsub) {
+ m->pmatch[i].rm_so = -1;
+ m->pmatch[i].rm_eo = -1;
+ i++;
+ }
break;
case ORPAREN:
i = OPND(m->g->strip[ss]);
@@ -504,30 +562,31 @@
/*
- backref - figure out what matched what, figuring in back references
- == static char *backref(register struct match *m, char *start, \
- == char *stop, sopno startst, sopno stopst, sopno lev);
+ == static const char *backref(struct match *m, const char *start, \
+ == const char *stop, sopno startst, sopno stopst, sopno lev);
*/
-static char * /* == stop (success) or NULL (failure) */
-backref(m, start, stop, startst, stopst, lev)
-register struct match *m;
-char *start;
-char *stop;
-sopno startst;
-sopno stopst;
-sopno lev; /* PLUS nesting level */
+static const char * /* == stop (success) or NULL (failure) */
+backref(struct match *m,
+ const char *start,
+ const char *stop,
+ sopno startst,
+ sopno stopst,
+ sopno lev, /* PLUS nesting level */
+ int rec)
{
- register int i;
- register sopno ss; /* start sop of current subRE */
- register char *sp; /* start of string matched by it */
- register sopno ssub; /* start sop of subsubRE */
- register sopno esub; /* end sop of subsubRE */
- register char *ssp; /* start of string matched by subsubRE */
- register char *dp;
- register size_t len;
- register int hard;
- register sop s;
- register regoff_t offsave;
- register cset *cs;
+ int i;
+ sopno ss; /* start sop of current subRE */
+ const char *sp; /* start of string matched by it */
+ sopno ssub; /* start sop of subsubRE */
+ sopno esub; /* end sop of subsubRE */
+ const char *ssp; /* start of string matched by subsubRE */
+ const char *dp;
+ size_t len;
+ int hard;
+ sop s;
+ regoff_t offsave;
+ cset *cs;
+ wint_t wc;
AT("back", start, stop, startst, stopst);
sp = start;
@@ -537,17 +596,25 @@
for (ss = startst; !hard && ss < stopst; ss++)
switch (OP(s = m->g->strip[ss])) {
case OCHAR:
- if (sp == stop || *sp++ != (char)OPND(s))
+ if (sp == stop)
+ return(NULL);
+ sp += XMBRTOWC(&wc, sp, stop - sp, &m->mbs, BADCHAR, m->g->loc);
+ if (wc != OPND(s))
return(NULL);
break;
case OANY:
if (sp == stop)
return(NULL);
- sp++;
+ sp += XMBRTOWC(&wc, sp, stop - sp, &m->mbs, BADCHAR, m->g->loc);
+ if (wc == BADCHAR)
+ return (NULL);
break;
case OANYOF:
+ if (sp == stop)
+ return (NULL);
cs = &m->g->sets[OPND(s)];
- if (sp == stop || !CHIN(cs, *sp++))
+ sp += XMBRTOWC(&wc, sp, stop - sp, &m->mbs, BADCHAR, m->g->loc);
+ if (wc == BADCHAR || !CHIN(cs, wc, m->g->loc))
return(NULL);
break;
case OBOL:
@@ -571,8 +638,8 @@
(sp < m->endp && *(sp-1) == '\n' &&
(m->g->cflags®_NEWLINE)) ||
(sp > m->beginp &&
- !ISWORD(*(sp-1))) ) &&
- (sp < m->endp && ISWORD(*sp)) )
+ !ISWORD(*(sp-1), m->g->loc)) ) &&
+ (sp < m->endp && ISWORD(*sp, m->g->loc)) )
{ /* yes */ }
else
return(NULL);
@@ -581,8 +648,8 @@
if (( (sp == m->endp && !(m->eflags®_NOTEOL)) ||
(sp < m->endp && *sp == '\n' &&
(m->g->cflags®_NEWLINE)) ||
- (sp < m->endp && !ISWORD(*sp)) ) &&
- (sp > m->beginp && ISWORD(*(sp-1))) )
+ (sp < m->endp && !ISWORD(*sp, m->g->loc)) ) &&
+ (sp > m->beginp && ISWORD(*(sp-1), m->g->loc)) )
{ /* yes */ }
else
return(NULL);
@@ -620,6 +687,8 @@
return(NULL);
assert(m->pmatch[i].rm_so != -1);
len = m->pmatch[i].rm_eo - m->pmatch[i].rm_so;
+ if (len == 0 && rec++ > MAX_RECURSION)
+ return(NULL);
assert(stop - m->beginp >= len);
if (sp > stop - len)
return(NULL); /* not enough left to match */
@@ -628,28 +697,28 @@
return(NULL);
while (m->g->strip[ss] != SOP(O_BACK, i))
ss++;
- return(backref(m, sp+len, stop, ss+1, stopst, lev));
+ return(backref(m, sp+len, stop, ss+1, stopst, lev, rec));
break;
case OQUEST_: /* to null or not */
- dp = backref(m, sp, stop, ss+1, stopst, lev);
+ dp = backref(m, sp, stop, ss+1, stopst, lev, rec);
if (dp != NULL)
return(dp); /* not */
- return(backref(m, sp, stop, ss+OPND(s)+1, stopst, lev));
+ return(backref(m, sp, stop, ss+OPND(s)+1, stopst, lev, rec));
break;
case OPLUS_:
assert(m->lastpos != NULL);
assert(lev+1 <= m->g->nplus);
m->lastpos[lev+1] = sp;
- return(backref(m, sp, stop, ss+1, stopst, lev+1));
+ return(backref(m, sp, stop, ss+1, stopst, lev+1, rec));
break;
case O_PLUS:
if (sp == m->lastpos[lev]) /* last pass matched null */
- return(backref(m, sp, stop, ss+1, stopst, lev-1));
+ return(backref(m, sp, stop, ss+1, stopst, lev-1, rec));
/* try another pass */
m->lastpos[lev] = sp;
- dp = backref(m, sp, stop, ss-OPND(s)+1, stopst, lev);
+ dp = backref(m, sp, stop, ss-OPND(s)+1, stopst, lev, rec);
if (dp == NULL)
- return(backref(m, sp, stop, ss+1, stopst, lev-1));
+ return(backref(m, sp, stop, ss+1, stopst, lev-1, rec));
else
return(dp);
break;
@@ -658,7 +727,7 @@
esub = ss + OPND(s) - 1;
assert(OP(m->g->strip[esub]) == OOR1);
for (;;) { /* find first matching branch */
- dp = backref(m, sp, stop, ssub, esub, lev);
+ dp = backref(m, sp, stop, ssub, esub, lev, rec);
if (dp != NULL)
return(dp);
/* that one missed, try next one */
@@ -679,7 +748,7 @@
assert(0 < i && i <= m->g->nsub);
offsave = m->pmatch[i].rm_so;
m->pmatch[i].rm_so = sp - m->offp;
- dp = backref(m, sp, stop, ss+1, stopst, lev);
+ dp = backref(m, sp, stop, ss+1, stopst, lev, rec);
if (dp != NULL)
return(dp);
m->pmatch[i].rm_so = offsave;
@@ -690,7 +759,7 @@
assert(0 < i && i <= m->g->nsub);
offsave = m->pmatch[i].rm_eo;
m->pmatch[i].rm_eo = sp - m->offp;
- dp = backref(m, sp, stop, ss+1, stopst, lev);
+ dp = backref(m, sp, stop, ss+1, stopst, lev, rec);
if (dp != NULL)
return(dp);
m->pmatch[i].rm_eo = offsave;
@@ -704,41 +773,57 @@
/* "can't happen" */
assert(nope);
/* NOTREACHED */
+ return "shut up gcc";
}
/*
- fast - step through the string at top speed
- == static char *fast(register struct match *m, char *start, \
- == char *stop, sopno startst, sopno stopst);
+ == static const char *fast(struct match *m, const char *start, \
+ == const char *stop, sopno startst, sopno stopst);
*/
-static char * /* where tentative match ended, or NULL */
-fast(m, start, stop, startst, stopst)
-register struct match *m;
-char *start;
-char *stop;
-sopno startst;
-sopno stopst;
+static const char * /* where tentative match ended, or NULL */
+fast( struct match *m,
+ const char *start,
+ const char *stop,
+ sopno startst,
+ sopno stopst)
{
- register states st = m->st;
- register states fresh = m->fresh;
- register states tmp = m->tmp;
- register char *p = start;
- register int c = (start == m->beginp) ? OUT : *(start-1);
- register int lastc; /* previous c */
- register int flagch;
- register int i;
- register char *coldp; /* last p after which no match was underway */
+ states st = m->st;
+ states fresh = m->fresh;
+ states tmp = m->tmp;
+ const char *p = start;
+ wint_t c;
+ wint_t lastc; /* previous c */
+ wint_t flagch;
+ int i;
+ const char *coldp; /* last p after which no match was underway */
+ size_t clen;
CLEAR(st);
SET1(st, startst);
+ SP("fast", st, *p);
st = step(m->g, startst, stopst, st, NOTHING, st);
ASSIGN(fresh, st);
SP("start", st, *p);
coldp = NULL;
+ if (start == m->beginp)
+ c = OUT;
+ else {
+ /*
+ * XXX Wrong if the previous character was multi-byte.
+ * Newline never is (in encodings supported by FreeBSD),
+ * so this only breaks the ISWORD tests below.
+ */
+ c = (uch)*(start - 1);
+ }
for (;;) {
/* next character */
lastc = c;
- c = (p == m->endp) ? OUT : *p;
+ if (p == m->endp) {
+ clen = 0;
+ c = OUT;
+ } else
+ clen = XMBRTOWC(&c, p, m->endp - p, &m->mbs, BADCHAR, m->g->loc);
if (EQ(st, fresh))
coldp = p;
@@ -762,12 +847,12 @@
}
/* how about a word boundary? */
- if ( (flagch == BOL || (lastc != OUT && !ISWORD(lastc))) &&
- (c != OUT && ISWORD(c)) ) {
+ if ( (flagch == BOL || (lastc != OUT && !ISWORD(lastc, m->g->loc))) &&
+ (c != OUT && ISWORD(c, m->g->loc)) ) {
flagch = BOW;
}
- if ( (lastc != OUT && ISWORD(lastc)) &&
- (flagch == EOL || (c != OUT && !ISWORD(c))) ) {
+ if ( (lastc != OUT && ISWORD(lastc, m->g->loc)) &&
+ (flagch == EOL || (c != OUT && !ISWORD(c, m->g->loc))) ) {
flagch = EOW;
}
if (flagch == BOW || flagch == EOW) {
@@ -776,7 +861,7 @@
}
/* are we done? */
- if (ISSET(st, stopst) || p == stop)
+ if (ISSET(st, stopst) || p == stop || clen > stop - p)
break; /* NOTE BREAK OUT */
/* no, we must deal with this character */
@@ -786,39 +871,39 @@
st = step(m->g, startst, stopst, tmp, c, st);
SP("aft", st, c);
assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st));
- p++;
+ p += clen;
}
assert(coldp != NULL);
m->coldp = coldp;
if (ISSET(st, stopst))
- return(p+1);
+ return(p+XMBRTOWC(NULL, p, stop - p, &m->mbs, 0, m->g->loc));
else
return(NULL);
}
/*
- slow - step through the string more deliberately
- == static char *slow(register struct match *m, char *start, \
- == char *stop, sopno startst, sopno stopst);
+ == static const char *slow(struct match *m, const char *start, \
+ == const char *stop, sopno startst, sopno stopst);
*/
-static char * /* where it ended */
-slow(m, start, stop, startst, stopst)
-register struct match *m;
-char *start;
-char *stop;
-sopno startst;
-sopno stopst;
+static const char * /* where it ended */
+slow( struct match *m,
+ const char *start,
+ const char *stop,
+ sopno startst,
+ sopno stopst)
{
- register states st = m->st;
- register states empty = m->empty;
- register states tmp = m->tmp;
- register char *p = start;
- register int c = (start == m->beginp) ? OUT : *(start-1);
- register int lastc; /* previous c */
- register int flagch;
- register int i;
- register char *matchp; /* last p at which a match ended */
+ states st = m->st;
+ states empty = m->empty;
+ states tmp = m->tmp;
+ const char *p = start;
+ wint_t c;
+ wint_t lastc; /* previous c */
+ wint_t flagch;
+ int i;
+ const char *matchp; /* last p at which a match ended */
+ size_t clen;
AT("slow", start, stop, startst, stopst);
CLEAR(st);
@@ -826,10 +911,24 @@
SP("sstart", st, *p);
st = step(m->g, startst, stopst, st, NOTHING, st);
matchp = NULL;
+ if (start == m->beginp)
+ c = OUT;
+ else {
+ /*
+ * XXX Wrong if the previous character was multi-byte.
+ * Newline never is (in encodings supported by FreeBSD),
+ * so this only breaks the ISWORD tests below.
+ */
+ c = (uch)*(start - 1);
+ }
for (;;) {
/* next character */
lastc = c;
- c = (p == m->endp) ? OUT : *p;
+ if (p == m->endp) {
+ c = OUT;
+ clen = 0;
+ } else
+ clen = XMBRTOWC(&c, p, m->endp - p, &m->mbs, BADCHAR, m->g->loc);
/* is there an EOL and/or BOL between lastc and c? */
flagch = '\0';
@@ -851,12 +950,12 @@
}
/* how about a word boundary? */
- if ( (flagch == BOL || (lastc != OUT && !ISWORD(lastc))) &&
- (c != OUT && ISWORD(c)) ) {
+ if ( (flagch == BOL || (lastc != OUT && !ISWORD(lastc, m->g->loc))) &&
+ (c != OUT && ISWORD(c, m->g->loc)) ) {
flagch = BOW;
}
- if ( (lastc != OUT && ISWORD(lastc)) &&
- (flagch == EOL || (c != OUT && !ISWORD(c))) ) {
+ if ( (lastc != OUT && ISWORD(lastc, m->g->loc)) &&
+ (flagch == EOL || (c != OUT && !ISWORD(c, m->g->loc))) ) {
flagch = EOW;
}
if (flagch == BOW || flagch == EOW) {
@@ -867,7 +966,7 @@
/* are we done? */
if (ISSET(st, stopst))
matchp = p;
- if (EQ(st, empty) || p == stop)
+ if (EQ(st, empty) || p == stop || clen > stop - p)
break; /* NOTE BREAK OUT */
/* no, we must deal with this character */
@@ -877,7 +976,7 @@
st = step(m->g, startst, stopst, tmp, c, st);
SP("saft", st, c);
assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st));
- p++;
+ p += clen;
}
return(matchp);
@@ -886,33 +985,31 @@
/*
- step - map set of states reachable before char to set reachable after
- == static states step(register struct re_guts *g, sopno start, sopno stop, \
- == register states bef, int ch, register states aft);
- == #define BOL (OUT+1)
- == #define EOL (BOL+1)
- == #define BOLEOL (BOL+2)
- == #define NOTHING (BOL+3)
- == #define BOW (BOL+4)
- == #define EOW (BOL+5)
- == #define CODEMAX (BOL+5) // highest code used
- == #define NONCHAR(c) ((c) > CHAR_MAX)
- == #define NNONCHAR (CODEMAX-CHAR_MAX)
+ == static states step(struct re_guts *g, sopno start, sopno stop, \
+ == states bef, int ch, states aft);
+ == #define BOL (OUT-1)
+ == #define EOL (BOL-1)
+ == #define BOLEOL (BOL-2)
+ == #define NOTHING (BOL-3)
+ == #define BOW (BOL-4)
+ == #define EOW (BOL-5)
+ == #define BADCHAR (BOL-6)
+ == #define NONCHAR(c) ((c) <= OUT)
*/
static states
-step(g, start, stop, bef, ch, aft)
-register struct re_guts *g;
-sopno start; /* start state within strip */
-sopno stop; /* state after stop state within strip */
-register states bef; /* states reachable before */
-int ch; /* character or NONCHAR code */
-register states aft; /* states already known reachable after */
+step(struct re_guts *g,
+ sopno start, /* start state within strip */
+ sopno stop, /* state after stop state within strip */
+ states bef, /* states reachable before */
+ wint_t ch, /* character or NONCHAR code */
+ states aft) /* states already known reachable after */
{
- register cset *cs;
- register sop s;
- register sopno pc;
- register onestate here; /* note, macros know this name */
- register sopno look;
- register int i;
+ cset *cs;
+ sop s;
+ sopno pc;
+ onestate here; /* note, macros know this name */
+ sopno look;
+ int i;
for (pc = start, INIT(here, pc); pc != stop; pc++, INC(here)) {
s = g->strip[pc];
@@ -922,8 +1019,8 @@
break;
case OCHAR:
/* only characters can match */
- assert(!NONCHAR(ch) || ch != (char)OPND(s));
- if (ch == (char)OPND(s))
+ assert(!NONCHAR(ch) || ch != OPND(s));
+ if (ch == OPND(s))
FWD(aft, bef, 1);
break;
case OBOL:
@@ -948,7 +1045,7 @@
break;
case OANYOF:
cs = &g->sets[OPND(s)];
- if (!NONCHAR(ch) && CHIN(cs, ch))
+ if (!NONCHAR(ch) && CHIN(cs, ch, g->loc))
FWD(aft, bef, 1);
break;
case OBACK_: /* ignored here */
@@ -990,7 +1087,7 @@
OP(s = g->strip[pc+look]) != O_CH;
look += OPND(s))
assert(OP(s) == OOR2);
- FWD(aft, aft, look);
+ FWD(aft, aft, look + 1);
}
break;
case OOR2: /* propagate OCH_'s marking */
@@ -1016,21 +1113,20 @@
/*
- print - print a set of states
== #ifdef REDEBUG
- == static void print(struct match *m, char *caption, states st, \
+ == static void print(struct match *m, const char *caption, states st, \
== int ch, FILE *d);
== #endif
*/
static void
-print(m, caption, st, ch, d)
-struct match *m;
-char *caption;
-states st;
-int ch;
-FILE *d;
+print(struct match *m,
+ const char *caption,
+ states st,
+ int ch,
+ FILE *d)
{
- register struct re_guts *g = m->g;
- register int i;
- register int first = 1;
+ struct re_guts *g = m->g;
+ int i;
+ int first = 1;
if (!(m->eflags®_TRACE))
return;
@@ -1046,21 +1142,20 @@
fprintf(d, "\n");
}
-/*
+/*
- at - print current situation
== #ifdef REDEBUG
- == static void at(struct match *m, char *title, char *start, char *stop, \
- == sopno startst, sopno stopst);
+ == static void at(struct match *m, const char *title, const char *start, \
+ == const char *stop, sopno startst, sopno stopst);
== #endif
*/
static void
-at(m, title, start, stop, startst, stopst)
-struct match *m;
-char *title;
-char *start;
-char *stop;
-sopno startst;
-sopno stopst;
+at( struct match *m,
+ const char *title,
+ const char *start,
+ const char *stop,
+ sopno startst,
+ sopno stopst)
{
if (!(m->eflags®_TRACE))
return;
@@ -1075,7 +1170,7 @@
/*
- pchar - make a character printable
== #ifdef REDEBUG
- == static char *pchar(int ch);
+ == static const char *pchar(int ch);
== #endif
*
* Is this identical to regchar() over in debug.c? Well, yes. But a
@@ -1083,13 +1178,12 @@
* a matching debug.o, and this is convenient. It all disappears in
* the non-debug compilation anyway, so it doesn't matter much.
*/
-static char * /* -> representation */
-pchar(ch)
-int ch;
+static const char * /* -> representation */
+pchar(int ch)
{
static char pbuf[10];
- if (isprint(ch) || ch == ' ')
+ if (isprint((uch)ch) || ch == ' ')
sprintf(pbuf, "%c", ch);
else
sprintf(pbuf, "\\%o", ch);