1 files changed, 60 insertions, 13 deletions
diff --git a/usr.bin/awk/b.c b/usr.bin/awk/b.c
index 288d022cffe..c6039e6740b 100644
--- a/usr.bin/awk/b.c
+++ b/usr.bin/awk/b.c
@@ -1,4 +1,4 @@
-/*	$OpenBSD: b.c,v 1.10 2001/09/08 00:12:40 millert Exp $	*/
+/*	$OpenBSD: b.c,v 1.11 2002/12/19 21:24:28 millert Exp $	*/
 /****************************************************************
 Copyright (C) Lucent Technologies 1997
 All Rights Reserved
@@ -76,7 +76,7 @@ int	patlen;
 fa	*fatab[NFA];
 int	nfatab	= 0;	/* entries in fatab */
 
-fa *makedfa(char *s, int anchor)	/* returns dfa for reg expr s */
+fa *makedfa(const char *s, int anchor)	/* returns dfa for reg expr s */
 {
 	int i, use, nuse;
 	fa *pfa;
@@ -94,7 +94,7 @@ fa *makedfa(char *s, int anchor)	/* returns dfa for reg expr s */
 		return mkdfa(s, anchor);
 	for (i = 0; i < nfatab; i++)	/* is it there already? */
 		if (fatab[i]->anchor == anchor
-		  && strcmp(fatab[i]->restr, s) == 0) {
+		  && strcmp((const char *) fatab[i]->restr, s) == 0) {
 			fatab[i]->use = now++;
 			return fatab[i];
 		}
@@ -118,7 +118,7 @@ fa *makedfa(char *s, int anchor)	/* returns dfa for reg expr s */
 	return pfa;
 }
 
-fa *mkdfa(char *s, int anchor)	/* does the real work of making a dfa */
+fa *mkdfa(const char *s, int anchor)	/* does the real work of making a dfa */
 				/* anchor = 1 for anchored matches, else 0 */
 {
 	Node *p, *p1;
@@ -283,7 +283,7 @@ int quoted(char **pp)	/* pick up next thing after a \\ */
 	return c;
 }
 
-char *cclenter(char *argp)	/* add a character class */
+char *cclenter(const char *argp)	/* add a character class */
 {
 	int i, c, c2;
 	uschar *p = (uschar *) argp;
@@ -329,7 +329,7 @@ char *cclenter(char *argp)	/* add a character class */
 	return (char *) tostring((char *) buf);
 }
 
-void overflo(char *s)
+void overflo(const char *s)
 {
 	FATAL("regular expression too big: %.30s...", s);
 }
@@ -447,7 +447,7 @@ void follow(Node *v)	/* collects leaves that can follow v into setvec */
 	}
 }
 
-int member(int c, char *sarg)	/* is c in s? */
+int member(int c, const char *sarg)	/* is c in s? */
 {
 	uschar *s = (uschar *) sarg;
 
@@ -457,7 +457,7 @@ int member(int c, char *sarg)	/* is c in s? */
 	return(0);
 }
 
-int match(fa *f, char *p0)	/* shortest match ? */
+int match(fa *f, const char *p0)	/* shortest match ? */
 {
 	int s, ns;
 	uschar *p = (uschar *) p0;
@@ -476,7 +476,7 @@ int match(fa *f, char *p0)	/* shortest match ? */
 	return(0);
 }
 
-int pmatch(fa *f, char *p0)	/* longest match, for sub */
+int pmatch(fa *f, const char *p0)	/* longest match, for sub */
 {
 	int s, ns;
 	uschar *p = (uschar *) p0;
@@ -529,7 +529,7 @@ int pmatch(fa *f, char *p0)	/* longest match, for sub */
 	return (0);
 }
 
-int nematch(fa *f, char *p0)	/* non-empty match, for sub */
+int nematch(fa *f, const char *p0)	/* non-empty match, for sub */
 {
 	int s, ns;
 	uschar *p = (uschar *) p0;
@@ -581,15 +581,17 @@ int nematch(fa *f, char *p0)	/* non-empty match, for sub */
 	return (0);
 }
 
-Node *reparse(char *p)	/* parses regular expression pointed to by p */
+Node *reparse(const char *p)	/* parses regular expression pointed to by p */
 {			/* uses relex() to scan regular expression */
 	Node *np;
 
 	dprintf( ("reparse <%s>\n", p) );
 	lastre = prestr = (uschar *) p;	/* prestr points to string to be parsed */
 	rtok = relex();
+	/* GNU compatibility: an empty regexp matches anything */
 	if (rtok == '\0')
-		FATAL("empty regular expression");
+		/* FATAL("empty regular expression"); previous */
+		return(op2(ALL, NIL, NIL));
 	np = regexp();
 	if (rtok != '\0')
 		FATAL("syntax error in regular expression %s at %s", lastre, prestr);
@@ -684,6 +686,37 @@ Node *unary(Node *np)
 	}
 }
 
+/*
+ * Character class definitions conformant to the POSIX locale as
+ * defined in IEEE P1003.1 draft 7 of June 2001, assuming the source
+ * and operating character sets are both ASCII (ISO646) or supersets
+ * thereof.
+ *
+ * Note that to avoid overflowing the temporary buffer used in
+ * relex(), the expanded character class (prior to range expansion)
+ * must be less than twice the size of their full name.
+ */
+struct charclass {
+	const char *cc_name;
+	int cc_namelen;
+	const char *cc_expand;
+} charclasses[] = {
+	{ "alnum",	5,	"0-9A-Za-z" },
+	{ "alpha",	5,	"A-Za-z" },
+	{ "blank",	5,	" \t" },
+	{ "cntrl",	5,	"\000-\037\177" },
+	{ "digit",	5,	"0-9" },
+	{ "graph",	5,	"\041-\176" },
+	{ "lower",	5,	"a-z" },
+	{ "print",	5,	" \041-\176" },
+	{ "punct",	5,	"\041-\057\072-\100\133-\140\173-\176" },
+	{ "space",	5,	" \f\n\r\t\v" },
+	{ "upper",	5,	"A-Z" },
+	{ "xdigit",	6,	"0-9A-Fa-f" },
+	{ NULL,		0,	NULL },
+};
+
+
 int relex(void)		/* lexical analyzer for reparse */
 {
 	int c, n;
@@ -691,6 +724,8 @@ int relex(void)		/* lexical analyzer for reparse */
 	static uschar *buf = 0;
 	static int bufsz = 100;
 	uschar *bp;
+	struct charclass *cc;
+	const uschar *p;
 
 	switch (c = *prestr++) {
 	case '|': return OR;
@@ -720,7 +755,7 @@ int relex(void)		/* lexical analyzer for reparse */
 		}
 		else
 			cflag = 0;
-		n = 2 * strlen(prestr)+1;
+		n = 2 * strlen((const char *) prestr)+1;
 		if (!adjbuf((char **) &buf, &bufsz, n, n, (char **) &bp, 0))
 			FATAL("out of space for reg expr %.10s...", lastre);
 		for (; ; ) {
@@ -731,6 +766,18 @@ int relex(void)		/* lexical analyzer for reparse */
 				*bp++ = c;
 			/* } else if (c == '\n') { */
 			/* 	FATAL("newline in character class %.20s...", lastre); */
+			} else if (c == '[' && *prestr == ':') {
+				/* POSIX char class names, Dag-Erling Smorgrav, des@ofug.org */
+				for (cc = charclasses; cc->cc_name; cc++)
+					if (strncmp((const char *) prestr + 1, (const char *) cc->cc_name, cc->cc_namelen) == 0)
+						break;
+				if (cc->cc_name != NULL && prestr[1 + cc->cc_namelen] == ':' &&
+				    prestr[2 + cc->cc_namelen] == ']') {
+					prestr += cc->cc_namelen + 3;
+					for (p = (const uschar *) cc->cc_expand; *p; p++)
+						*bp++ = *p;
+				} else
+					*bp++ = c;
 			} else if (c == '\0') {
 				FATAL("nonterminated character class %.20s", lastre);
 			} else if (bp == buf) {	/* 1st char is special */