Loading...
regex/FreeBSD/regcomp.c.patch Libc-763.12 /dev/null
--- Libc/Libc-763.12/regex/FreeBSD/regcomp.c.patch
+++ /dev/null
@@ -1,514 +0,0 @@
---- regcomp.c.orig	2010-06-21 14:05:04.000000000 -0700
-+++ regcomp.c	2010-06-21 14:23:51.000000000 -0700
-@@ -39,6 +39,8 @@ static char sccsid[] = "@(#)regcomp.c	8.
- #include <sys/cdefs.h>
- __FBSDID("$FreeBSD: src/lib/libc/regex/regcomp.c,v 1.36 2007/06/11 03:05:54 delphij Exp $");
- 
-+#include "xlocale_private.h"
-+
- #include <sys/types.h>
- #include <stdio.h>
- #include <string.h>
-@@ -69,6 +71,9 @@ struct parse {
- 	sopno ssize;		/* malloced strip size (allocated) */
- 	sopno slen;		/* malloced strip length (used) */
- 	int ncsalloc;		/* number of csets allocated */
-+#if __DARWIN_UNIX03
-+	int zerorepeats;
-+#endif /* __DARWIN_UNIX03 */
- 	struct re_guts *g;
- #	define	NPAREN	10	/* we need to remember () 1-9 for back refs */
- 	sopno pbegin[NPAREN];	/* -> ( ([0] unused) */
-@@ -93,7 +98,7 @@ static void p_b_cclass(struct parse *p, 
- static void p_b_eclass(struct parse *p, cset *cs);
- static wint_t p_b_symbol(struct parse *p);
- static wint_t p_b_coll_elem(struct parse *p, wint_t endc);
--static wint_t othercase(wint_t ch);
-+static wint_t othercase(wint_t ch, locale_t loc);
- static void bothcases(struct parse *p, wint_t ch);
- static void ordinary(struct parse *p, wint_t ch);
- static void nonnewline(struct parse *p);
-@@ -104,7 +109,7 @@ static void freeset(struct parse *p, cse
- static void CHadd(struct parse *p, cset *cs, wint_t ch);
- static void CHaddrange(struct parse *p, cset *cs, wint_t min, wint_t max);
- static void CHaddtype(struct parse *p, cset *cs, wctype_t wct);
--static wint_t singleton(cset *cs);
-+static wint_t singleton(cset *cs, locale_t loc);
- static sopno dupl(struct parse *p, sopno start, sopno finish);
- static void doemit(struct parse *p, sop op, size_t opnd);
- static void doinsert(struct parse *p, sop op, size_t opnd, sopno pos);
-@@ -222,10 +227,14 @@ regcomp(regex_t * __restrict preg,
- 	p->end = p->next + len;
- 	p->error = 0;
- 	p->ncsalloc = 0;
-+#if __DARWIN_UNIX03
-+	p->zerorepeats = 0;
-+#endif /* __DARWIN_UNIX03 */
- 	for (i = 0; i < NPAREN; i++) {
- 		p->pbegin[i] = 0;
- 		p->pend[i] = 0;
- 	}
-+	g->loc = __current_locale();
- 	g->sets = NULL;
- 	g->ncsets = 0;
- 	g->cflags = cflags;
-@@ -302,8 +311,12 @@ p_ere(struct parse *p,
- 		conc = HERE();
- 		while (MORE() && (c = PEEK()) != '|' && c != stop)
- 			p_ere_exp(p);
-+#if __DARWIN_UNIX03
-+ 		if (!p->zerorepeats) REQUIRE(HERE() != conc, REG_EMPTY);	/* require nonempty */
-+		else p->zerorepeats--;
-+#else
- 		(void)REQUIRE(HERE() != conc, REG_EMPTY);	/* require nonempty */
--
-+#endif
- 		if (!EAT('|'))
- 			break;		/* NOTE BREAK OUT */
- 
-@@ -410,7 +423,7 @@ p_ere_exp(struct parse *p)
- 		ordinary(p, wc);
- 		break;
- 	case '{':		/* okay as ordinary except if digit follows */
--		(void)REQUIRE(!MORE() || !isdigit((uch)PEEK()), REG_BADRPT);
-+		(void)REQUIRE(!MORE() || !isdigit_l((uch)PEEK(), p->g->loc), REG_BADRPT);
- 		/* FALLTHROUGH */
- 	default:
- 		p->next--;
-@@ -424,7 +437,7 @@ p_ere_exp(struct parse *p)
- 	c = PEEK();
- 	/* we call { a repetition if followed by a digit */
- 	if (!( c == '*' || c == '+' || c == '?' ||
--				(c == '{' && MORE2() && isdigit((uch)PEEK2())) ))
-+				(c == '{' && MORE2() && isdigit_l((uch)PEEK2(), p->g->loc)) ))
- 		return;		/* no repetition, we're done */
- 	NEXT();
- 
-@@ -453,7 +466,7 @@ p_ere_exp(struct parse *p)
- 	case '{':
- 		count = p_count(p);
- 		if (EAT(',')) {
--			if (isdigit((uch)PEEK())) {
-+			if (isdigit_l((uch)PEEK(), p->g->loc)) {
- 				count2 = p_count(p);
- 				(void)REQUIRE(count <= count2, REG_BADBR);
- 			} else		/* single number with comma */
-@@ -474,7 +487,7 @@ p_ere_exp(struct parse *p)
- 		return;
- 	c = PEEK();
- 	if (!( c == '*' || c == '+' || c == '?' ||
--				(c == '{' && MORE2() && isdigit((uch)PEEK2())) ) )
-+				(c == '{' && MORE2() && isdigit_l((uch)PEEK2(), p->g->loc)) ) )
- 		return;
- 	SETERROR(REG_BADRPT);
- }
-@@ -486,7 +499,12 @@ p_ere_exp(struct parse *p)
- static void
- p_str(struct parse *p)
- {
-+#if __DARWIN_UNIX03
-+ 	if (!p->zerorepeats) REQUIRE(MORE(), REG_EMPTY);
-+	else p->zerorepeats--;
-+#else  /* !__DARWIN_UNIX03 */
- 	(void)REQUIRE(MORE(), REG_EMPTY);
-+#endif /* __DARWIN_UNIX03 */
- 	while (MORE())
- 		ordinary(p, WGETNEXT());
- }
-@@ -525,8 +543,12 @@ p_bre(struct parse *p,
- 		p->g->iflags |= USEEOL;
- 		p->g->neol++;
- 	}
--
-+#if __DARWIN_UNIX03
-+	if (!p->zerorepeats) REQUIRE(HERE() != start, REG_EMPTY);	/* require nonempty */
-+	else p->zerorepeats--;
-+#else  /* !__DARWIN_UNIX03 */
- 	(void)REQUIRE(HERE() != start, REG_EMPTY);	/* require nonempty */
-+#endif /* __DARWIN_UNIX03 */
- }
- 
- /*
-@@ -599,12 +621,22 @@ p_simp_re(struct parse *p,
- 		i = (c&~BACKSL) - '0';
- 		assert(i < NPAREN);
- 		if (p->pend[i] != 0) {
-+#if __DARWIN_UNIX03
-+			int skip = 1;
-+#endif /* __DARWIN_UNIX03 */
- 			assert(i <= p->g->nsub);
- 			EMIT(OBACK_, i);
- 			assert(p->pbegin[i] != 0);
- 			assert(OP(p->strip[p->pbegin[i]]) == OLPAREN);
- 			assert(OP(p->strip[p->pend[i]]) == ORPAREN);
-+#if __DARWIN_UNIX03
-+			if (OP(p->strip[p->pbegin[i]+skip]) == OBOL) {
-+				skip++;		/* don't dup anchor in subexp */
-+			}
-+			(void) dupl(p, p->pbegin[i]+skip, p->pend[i]);
-+#else  /* !__DARWIN_UNIX03 */
- 			(void) dupl(p, p->pbegin[i]+1, p->pend[i]);
-+#endif /* __DARWIN_UNIX03 */
- 			EMIT(O_BACK, i);
- 		} else
- 			SETERROR(REG_ESUBREG);
-@@ -627,9 +659,10 @@ p_simp_re(struct parse *p,
- 		INSERT(OQUEST_, pos);
- 		ASTERN(O_QUEST, pos);
- 	} else if (EATTWO('\\', '{')) {
-+		(void)REQUIRE(MORE(), REG_EBRACE);
- 		count = p_count(p);
- 		if (EAT(',')) {
--			if (MORE() && isdigit((uch)PEEK())) {
-+			if (MORE() && isdigit_l((uch)PEEK(), p->g->loc)) {
- 				count2 = p_count(p);
- 				(void)REQUIRE(count <= count2, REG_BADBR);
- 			} else		/* single number with comma */
-@@ -659,7 +692,7 @@ p_count(struct parse *p)
- 	int count = 0;
- 	int ndigits = 0;
- 
--	while (MORE() && isdigit((uch)PEEK()) && count <= DUPMAX) {
-+	while (MORE() && isdigit_l((uch)PEEK(), p->g->loc) && count <= DUPMAX) {
- 		count = count*10 + (GETNEXT() - '0');
- 		ndigits++;
- 	}
-@@ -697,10 +730,22 @@ p_bracket(struct parse *p)
- 		cs->icase = 1;
- 	if (EAT('^'))
- 		cs->invert = 1;
-+#if __DARWIN_UNIX03
-+	if (PEEK2() != '-' && PEEK2() != ']') {	/* Don't eat '-' or ']' if they're part of ranges
-+						 * but do process [^-] */
- 	if (EAT(']'))
- 		CHadd(p, cs, ']');
- 	else if (EAT('-'))
- 		CHadd(p, cs, '-');
-+	}
-+	if (MORE() && !SEETWO('-',']')) /* Parse RE []-'] */
-+		p_b_term(p, cs);
-+#else /* !__DARWIN_UNIX03 */
-+	if (EAT(']'))
-+		CHadd(p, cs, ']');
-+	else if (EAT('-'))
-+		CHadd(p, cs, '-');
-+#endif /* __DARWIN_UNIX03 */
- 	while (MORE() && PEEK() != ']' && !SEETWO('-', ']'))
- 		p_b_term(p, cs);
- 	if (EAT('-'))
-@@ -713,7 +758,7 @@ p_bracket(struct parse *p)
- 	if (cs->invert && p->g->cflags&REG_NEWLINE)
- 		cs->bmp['\n' >> 3] |= 1 << ('\n' & 7);
- 
--	if ((ch = singleton(cs)) != OUT) {	/* optimize singleton sets */
-+	if ((ch = singleton(cs, p->g->loc)) != OUT) {	/* optimize singleton sets */
- 		ordinary(p, ch);
- 		freeset(p, cs);
- 	} else
-@@ -737,8 +782,16 @@ p_b_term(struct parse *p, cset *cs)
- 		c = (MORE2()) ? PEEK2() : '\0';
- 		break;
- 	case '-':
-+#if __DARWIN_UNIX03
-+ 		if (PEEK2() != '-') { /* Allow [---] */
-+		SETERROR(REG_ERANGE);
-+		return;			/* NOTE RETURN */
-+		} else
-+			c = '-';
-+#else  /* !__DARWIN_UNIX03 */
- 		SETERROR(REG_ERANGE);
- 		return;			/* NOTE RETURN */
-+#endif /* __DARWIN_UNIX03 */
- 		break;
- 	default:
- 		c = '\0';
-@@ -759,7 +812,11 @@ p_b_term(struct parse *p, cset *cs)
- 		NEXT2();
- 		(void)REQUIRE(MORE(), REG_EBRACK);
- 		c = PEEK();
-+#if __DARWIN_UNIX03
-+ 		REQUIRE(c != '-', REG_ECOLLATE); /* allow [=]=] */
-+#else  /* !__DARWIN_UNIX03 */
- 		(void)REQUIRE(c != '-' && c != ']', REG_ECOLLATE);
-+#endif /* __DARWIN_UNIX03 */
- 		p_b_eclass(p, cs);
- 		(void)REQUIRE(MORE(), REG_EBRACK);
- 		(void)REQUIRE(EATTWO('=', ']'), REG_ECOLLATE);
-@@ -778,14 +835,14 @@ p_b_term(struct parse *p, cset *cs)
- 		if (start == finish)
- 			CHadd(p, cs, start);
- 		else {
--			if (__collate_load_error) {
-+			if (p->g->loc->__collate_load_error) {
- 				(void)REQUIRE((uch)start <= (uch)finish, REG_ERANGE);
- 				CHaddrange(p, cs, start, finish);
- 			} else {
--				(void)REQUIRE(__collate_range_cmp(start, finish) <= 0, REG_ERANGE);
-+				(void)REQUIRE(__collate_range_cmp(start, finish, p->g->loc) <= 0, REG_ERANGE);
- 				for (i = 0; i <= UCHAR_MAX; i++) {
--					if (   __collate_range_cmp(start, i) <= 0
--					    && __collate_range_cmp(i, finish) <= 0
-+					if (   __collate_range_cmp(start, i, p->g->loc) <= 0
-+					    && __collate_range_cmp(i, finish, p->g->loc) <= 0
- 					   )
- 						CHadd(p, cs, i);
- 				}
-@@ -807,7 +864,7 @@ p_b_cclass(struct parse *p, cset *cs)
- 	wctype_t wct;
- 	char clname[16];
- 
--	while (MORE() && isalpha((uch)PEEK()))
-+	while (MORE() && isalpha_l((uch)PEEK(), p->g->loc))
- 		NEXT();
- 	len = p->next - sp;
- 	if (len >= sizeof(clname) - 1) {
-@@ -816,7 +873,7 @@ p_b_cclass(struct parse *p, cset *cs)
- 	}
- 	memcpy(clname, sp, len);
- 	clname[len] = '\0';
--	if ((wct = wctype(clname)) == 0) {
-+	if ((wct = wctype_l(clname, p->g->loc)) == 0) {
- 		SETERROR(REG_ECTYPE);
- 		return;
- 	}
-@@ -826,14 +883,38 @@ p_b_cclass(struct parse *p, cset *cs)
- /*
-  - p_b_eclass - parse an equivalence-class name and deal with it
-  == static void p_b_eclass(struct parse *p, cset *cs);
-- *
-- * This implementation is incomplete. xxx
-  */
- static void
- p_b_eclass(struct parse *p, cset *cs)
- {
--	wint_t c;
--
-+	char *sp = p->next;
-+	int len, ec;
-+	mbstate_t mbs;
-+	int *newequiv_classes;
-+ 	wint_t c;
-+ 
-+	while (MORE() && !SEETWO('=', ']'))
-+		NEXT();
-+	if (!MORE()) {
-+		SETERROR(REG_EBRACK);
-+		return;
-+	}
-+	len = p->next - sp;
-+	memset(&mbs, 0, sizeof(mbs));
-+	ec = __collate_equiv_class(sp, len, &mbs, p->g->loc);
-+	if (ec > 0) {
-+		newequiv_classes = realloc(cs->equiv_classes,
-+		    (cs->nequiv_classes + 1) * sizeof(*cs->equiv_classes));
-+		if (newequiv_classes == NULL) {
-+			SETERROR(REG_ESPACE);
-+			return;
-+		}
-+		cs->equiv_classes = newequiv_classes;
-+		cs->equiv_classes[cs->nequiv_classes++] = ec;
-+		return;
-+	}
-+	/* not an equivalence class, so fallback to a collating element */
-+	p->next = sp;
- 	c = p_b_coll_elem(p, '=');
- 	CHadd(p, cs, c);
- }
-@@ -866,10 +947,10 @@ p_b_coll_elem(struct parse *p,
- 	wint_t endc)		/* name ended by endc,']' */
- {
- 	char *sp = p->next;
--	struct cname *cp;
-+	const struct cname *cp;
- 	int len;
- 	mbstate_t mbs;
--	wchar_t wc;
-+	wchar_t wbuf[16];
- 	size_t clen;
- 
- 	while (MORE() && !SEETWO(endc, ']'))
-@@ -883,9 +964,10 @@ p_b_coll_elem(struct parse *p,
- 		if (strncmp(cp->name, sp, len) == 0 && cp->name[len] == '\0')
- 			return(cp->code);	/* known name */
- 	memset(&mbs, 0, sizeof(mbs));
--	if ((clen = mbrtowc(&wc, sp, len, &mbs)) == len)
--		return (wc);			/* single character */
--	else if (clen == (size_t)-1 || clen == (size_t)-2)
-+	clen = __collate_collating_symbol(wbuf, 16, sp, len, &mbs, p->g->loc);
-+	if (clen == 1)
-+		return (*wbuf);			/* single character */
-+	else if (clen == (size_t)-1)
- 		SETERROR(REG_ILLSEQ);
- 	else
- 		SETERROR(REG_ECOLLATE);		/* neither */
-@@ -894,16 +976,16 @@ p_b_coll_elem(struct parse *p,
- 
- /*
-  - othercase - return the case counterpart of an alphabetic
-- == static char othercase(int ch);
-+ == static char othercase(wint_t ch, locale_t loc);
-  */
- static wint_t			/* if no counterpart, return ch */
--othercase(wint_t ch)
-+othercase(wint_t ch, locale_t loc)
- {
--	assert(iswalpha(ch));
--	if (iswupper(ch))
--		return(towlower(ch));
--	else if (iswlower(ch))
--		return(towupper(ch));
-+	assert(iswalpha_l(ch, loc));
-+	if (iswupper_l(ch, loc))
-+		return(towlower_l(ch, loc));
-+	else if (iswlower_l(ch, loc))
-+		return(towupper_l(ch, loc));
- 	else			/* peculiar, but could happen */
- 		return(ch);
- }
-@@ -923,10 +1005,10 @@ bothcases(struct parse *p, wint_t ch)
- 	size_t n;
- 	mbstate_t mbs;
- 
--	assert(othercase(ch) != ch);	/* p_bracket() would recurse */
-+	assert(othercase(ch, p->g->loc) != ch);	/* p_bracket() would recurse */
- 	p->next = bracket;
- 	memset(&mbs, 0, sizeof(mbs));
--	n = wcrtomb(bracket, ch, &mbs);
-+	n = wcrtomb_l(bracket, ch, &mbs, p->g->loc);
- 	assert(n != (size_t)-1);
- 	bracket[n] = ']';
- 	bracket[n + 1] = '\0';
-@@ -946,7 +1028,7 @@ ordinary(struct parse *p, wint_t ch)
- {
- 	cset *cs;
- 
--	if ((p->g->cflags&REG_ICASE) && iswalpha(ch) && othercase(ch) != ch)
-+	if ((p->g->cflags&REG_ICASE) && iswalpha_l(ch, p->g->loc) && othercase(ch, p->g->loc) != ch)
- 		bothcases(p, ch);
- 	else if ((ch & OPDMASK) == ch)
- 		EMIT(OCHAR, ch);
-@@ -1012,10 +1094,22 @@ repeat(struct parse *p,
- 	switch (REP(MAP(from), MAP(to))) {
- 	case REP(0, 0):			/* must be user doing this */
- 		DROP(finish-start);	/* drop the operand */
-+#if __DARWIN_UNIX03
-+		p->zerorepeats++;
-+#endif /* __DARWIN_UNIX03 */
- 		break;
-+	case REP(0, INF):		/* as x{1,}? */
-+#if __DARWIN_UNIX03
-+		/* this case does not require the (y|) trick, noKLUDGE */
-+		/* Just like * =+?  */
-+		INSERT(OPLUS_, start);
-+		ASTERN(O_PLUS, start);
-+		INSERT(OQUEST_, start);
-+		ASTERN(O_QUEST, start);
-+		break;
-+#endif /* __DARWIN_UNIX03 */
- 	case REP(0, 1):			/* as x{1,1}? */
- 	case REP(0, N):			/* as x{1,n}? */
--	case REP(0, INF):		/* as x{1,}? */
- 		/* KLUDGE: emit y? as (y|) until subtle bug gets fixed */
- 		INSERT(OCH_, start);		/* offset is wrong... */
- 		repeat(p, start+1, 1, to);
-@@ -1029,6 +1123,10 @@ repeat(struct parse *p,
- 		/* done */
- 		break;
- 	case REP(1, N):			/* as x?x{1,n-1} */
-+#if __DARWIN_UNIX03
-+		INSERT(OQUEST_, start);
-+		ASTERN(O_QUEST, start);
-+#else /* !__DARWIN_UNIX03 */
- 		/* KLUDGE: emit y? as (y|) until subtle bug gets fixed */
- 		INSERT(OCH_, start);
- 		ASTERN(OOR1, start);
-@@ -1036,6 +1134,7 @@ repeat(struct parse *p,
- 		EMIT(OOR2, 0);			/* offset very wrong... */
- 		AHEAD(THERE());			/* ...so fix it */
- 		ASTERN(O_CH, THERETHERE());
-+#endif /* __DARWIN_UNIX03 */
- 		copy = dupl(p, start+1, finish+1);
- 		assert(copy == finish+4);
- 		repeat(p, copy, 1, to-1);
-@@ -1071,7 +1170,7 @@ wgetnext(struct parse *p)
- 	size_t n;
- 
- 	memset(&mbs, 0, sizeof(mbs));
--	n = mbrtowc(&wc, p->next, p->end - p->next, &mbs);
-+	n = mbrtowc_l(&wc, p->next, p->end - p->next, &mbs, p->g->loc);
- 	if (n == (size_t)-1 || n == (size_t)-2) {
- 		SETERROR(REG_ILLSEQ);
- 		return (0);
-@@ -1139,12 +1238,12 @@ freeset(struct parse *p, cset *cs)
-  - returning it if so, otherwise returning OUT.
-  */
- static wint_t
--singleton(cset *cs)
-+singleton(cset *cs, locale_t loc)
- {
- 	wint_t i, s, n;
- 
- 	for (i = n = 0; i < NC; i++)
--		if (CHIN(cs, i)) {
-+		if (CHIN(cs, i, loc)) {
- 			n++;
- 			s = i;
- 		}
-@@ -1178,9 +1277,9 @@ CHadd(struct parse *p, cset *cs, wint_t 
- 		cs->wides[cs->nwides++] = ch;
- 	}
- 	if (cs->icase) {
--		if ((nch = towlower(ch)) < NC)
-+		if ((nch = towlower_l(ch, p->g->loc)) < NC)
- 			cs->bmp[nch >> 3] |= 1 << (nch & 7);
--		if ((nch = towupper(ch)) < NC)
-+		if ((nch = towupper_l(ch, p->g->loc)) < NC)
- 			cs->bmp[nch >> 3] |= 1 << (nch & 7);
- 	}
- }
-@@ -1219,7 +1318,7 @@ CHaddtype(struct parse *p, cset *cs, wct
- 	wctype_t *newtypes;
- 
- 	for (i = 0; i < NC; i++)
--		if (iswctype(i, wct))
-+		if (iswctype_l(i, wct, p->g->loc))
- 			CHadd(p, cs, i);
- 	newtypes = realloc(cs->types, (cs->ntypes + 1) *
- 	    sizeof(*cs->types));
-@@ -1391,6 +1490,7 @@ findmust(struct parse *p, struct re_guts
- 	char buf[MB_LEN_MAX];
- 	size_t clen;
- 	mbstate_t mbs;
-+	struct __xlocale_st_runelocale *rl = p->g->loc->__lc_ctype;
- 
- 	/* avoid making error situations worse */
- 	if (p->error != 0)
-@@ -1401,8 +1501,8 @@ findmust(struct parse *p, struct re_guts
- 	 * multibyte character strings, but it's safe for at least
- 	 * UTF-8 (see RFC 3629).
- 	 */
--	if (MB_CUR_MAX > 1 &&
--	    strcmp(_CurrentRuneLocale->__encoding, "UTF-8") != 0)
-+	if (rl->__mb_cur_max > 1 &&
-+	    strcmp(rl->_CurrentRuneLocale.__encoding, "UTF-8") != 0)
- 		return;
- 
- 	/* find the longest OCHAR sequence in strip */
-@@ -1418,7 +1518,7 @@ findmust(struct parse *p, struct re_guts
- 				memset(&mbs, 0, sizeof(mbs));
- 				newstart = scan - 1;
- 			}
--			clen = wcrtomb(buf, OPND(s), &mbs);
-+			clen = wcrtomb_l(buf, OPND(s), &mbs, p->g->loc);
- 			if (clen == (size_t)-1)
- 				goto toohard;
- 			newlen += clen;
-@@ -1537,7 +1637,7 @@ findmust(struct parse *p, struct re_guts
- 	while (cp < g->must + g->mlen) {
- 		while (OP(s = *scan++) != OCHAR)
- 			continue;
--		clen = wcrtomb(cp, OPND(s), &mbs);
-+		clen = wcrtomb_l(cp, OPND(s), &mbs, p->g->loc);
- 		assert(clen != (size_t)-1);
- 		cp += clen;
- 	}