From f726675da85a1c25e47a171f7efaa49fd01f1b2b Mon Sep 17 00:00:00 2001
From: Marc Espie <espie@cvs.openbsd.org>
Date: Mon, 17 Jul 2000 23:01:21 +0000
Subject: This does replace Str_Match with a better routine, which handles
 negated intervals, and \\ in intervals.

Accordingly, var.c no longer needs to copy the :Marg to replace \: with :

We don't use fnmatch(3) because of various optimizations which are harder
to achieve in a generic setting.

Also add regression suite for the Str_Match function.
---
 usr.bin/make/Makefile  |  10 ++-
 usr.bin/make/regress.c |  75 ++++++++++++++++++
 usr.bin/make/str.c     | 206 ++++++++++++++++++++++++++++---------------------
 usr.bin/make/var.c     |  48 ++----------
 4 files changed, 209 insertions(+), 130 deletions(-)
 create mode 100644 usr.bin/make/regress.c

diff --git a/usr.bin/make/Makefile b/usr.bin/make/Makefile
index f16deae950e..9da787abe60 100644
--- a/usr.bin/make/Makefile
+++ b/usr.bin/make/Makefile
@@ -1,4 +1,4 @@
-#	$OpenBSD: Makefile,v 1.19 2000/06/25 20:31:08 espie Exp $
+#	$OpenBSD: Makefile,v 1.20 2000/07/17 23:01:20 espie Exp $
 
 PROG=	make
 CFLAGS+= -I${.OBJDIR} -I${.CURDIR} -Wall -Wno-char-subscripts -Wno-unused -Wstrict-prototypes#-Wmissing-prototypes -Wstrict-prototypes
@@ -32,6 +32,8 @@ libohash.a: ${LIBOBJS}
 	ar cq $@ `lorder ${LIBOBJS}|tsort`
 	ranlib $@
 
+CLEANFILES+=generate hashconsts.h generate.o regress.o check
+
 CLEANFILES+=${LIBOBJS} libohash.a
 CLEANFILES+= hashconsts.h generate.o generate
 
@@ -45,6 +47,12 @@ hashconsts.h: generate
 generate: generate.o error.o libohash.a
 	${CC} -o ${.TARGET} ${CFLAGS} ${.ALLSRC} ${LDADD}
 
+check: regress.o str.o error.o buf.o libohash.a
+	${CC} -o ${.TARGET} ${CFLAGS} ${.ALLSRC} ${LDADD}
+
+regress: check
+	${.OBJDIR}/check 
+
 # kludge for people who forget to make depend
 var.o: hashconsts.h
 
diff --git a/usr.bin/make/regress.c b/usr.bin/make/regress.c
new file mode 100644
index 00000000000..d228bf10922
--- /dev/null
+++ b/usr.bin/make/regress.c
@@ -0,0 +1,75 @@
+/* $OpenBSD: regress.c,v 1.1 2000/07/17 23:01:20 espie Exp $ */
+
+/*
+ * Copyright (c) 1999 Marc Espie.
+ *
+ * Code written for the OpenBSD project.
+ * 
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE OPENBSD PROJECT AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE OPENBSD
+ * PROJECT OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/* regression tests */
+#include "make.h"
+#include <stdio.h>
+
+int main __P((void));
+#define CHECK(s)		\
+do {				\
+    printf("%-65s", #s);	\
+    if (s)			\
+    	printf("ok\n"); 	\
+    else {			\
+    	printf("failed\n");	\
+	errors++;		\
+    }				\
+} while (0);
+
+int main()
+{
+    unsigned errors = 0;
+
+    CHECK(Str_Match("string", "string") == 1);
+    CHECK(Str_Match("string", "string2") == 0);
+    CHECK(Str_Match("string", "string*") == 1);
+    CHECK(Str_Match("Long string", "Lo*ng") == 1);
+    CHECK(Str_Match("Long string", "Lo*ng ") == 0);
+    CHECK(Str_Match("Long string", "Lo*ng *") == 1);
+    CHECK(Str_Match("string", "stri?g") == 1);
+    CHECK(Str_Match("str?ng", "str\\?ng") == 1);
+    CHECK(Str_Match("striiiing", "str?*ng") == 1);
+    CHECK(Str_Match("Very long string just to see", "******a****") == 0);
+    CHECK(Str_Match("d[abc?", "d\\[abc\\?") == 1);
+    CHECK(Str_Match("d[abc!", "d\\[abc\\?") == 0);
+    CHECK(Str_Match("dwabc?", "d\\[abc\\?") == 0);
+    CHECK(Str_Match("da0", "d[bcda]0") == 1);
+    CHECK(Str_Match("da0", "d[z-a]0") == 1);
+    CHECK(Str_Match("d-0", "d[-a-z]0") == 1);
+    CHECK(Str_Match("dy0", "d[a\\-z]0") == 0);
+    CHECK(Str_Match("d-0", "d[a\\-z]0") == 1);
+    CHECK(Str_Match("dz0", "d[a\\]z]0") == 1);
+
+    if (errors != 0)
+	printf("Errors: %d\n", errors);
+    exit(0);
+}
+
+
diff --git a/usr.bin/make/str.c b/usr.bin/make/str.c
index 7dbebe561c2..7aeb5f1551a 100644
--- a/usr.bin/make/str.c
+++ b/usr.bin/make/str.c
@@ -1,4 +1,4 @@
-/*	$OpenBSD: str.c,v 1.13 2000/07/17 22:57:37 espie Exp $	*/
+/*	$OpenBSD: str.c,v 1.14 2000/07/17 23:01:20 espie Exp $	*/
 /*	$NetBSD: str.c,v 1.13 1996/11/06 17:59:23 christos Exp $	*/
 
 /*-
@@ -43,7 +43,7 @@
 #if 0
 static char     sccsid[] = "@(#)str.c	5.8 (Berkeley) 6/1/90";
 #else
-static char rcsid[] = "$OpenBSD: str.c,v 1.13 2000/07/17 22:57:37 espie Exp $";
+static char rcsid[] = "$OpenBSD: str.c,v 1.14 2000/07/17 23:01:20 espie Exp $";
 #endif
 #endif				/* not lint */
 
@@ -223,103 +223,133 @@ done:	argv[argc] = (char *)NULL;
  *
  * See if a particular string matches a particular pattern.
  *
- * Results: Non-zero is returned if string matches pattern, 0 otherwise. The
+ * Results: TRUE is returned if string matches pattern, FALSE otherwise. The
  * matching operation permits the following special characters in the
  * pattern: *?\[] (see the man page for details on what these mean).
- *
- * Side effects: None.
  */
-int
+Boolean
 Str_Match(string, pattern)
-	const char *string;		/* String */
-	const char *pattern;		/* Pattern */
+    const char *string;			/* String */
+    const char *pattern;		/* Pattern */
 {
-	char c2;
-
-	for (;;) {
-		/*
-		 * See if we're at the end of both the pattern and the
-		 * string. If, we succeeded.  If we're at the end of the
-		 * pattern but not at the end of the string, we failed.
-		 */
-		if (*pattern == 0)
-			return(!*string);
-		if (*string == 0 && *pattern != '*')
-			return(0);
-		/*
-		 * Check for a "*" as the next pattern character.  It matches
-		 * any substring.  We handle this by calling ourselves
-		 * recursively for each postfix of string, until either we
-		 * match or we reach the end of the string.
-		 */
-		if (*pattern == '*') {
-			pattern += 1;
-			if (*pattern == 0)
-				return(1);
-			while (*string != 0) {
-				if (Str_Match(string, pattern))
-					return(1);
-				++string;
-			}
-			return(0);
+    while (*pattern != '\0') {
+	/* Check for a "*" as the next pattern character.  It matches
+	 * any substring.  We handle this by calling ourselves
+	 * recursively for each postfix of string, until either we
+	 * match or we reach the end of the string.  */
+	if (*pattern == '*') {
+	    pattern++;
+	    /* Skip over contiguous  sequences of `?*', so that recursive
+	     * calls only occur on `real' characters.  */
+	    while (*pattern == '?' || *pattern == '*') {
+		if (*pattern == '?') {
+		    if (*string == '\0')
+			return FALSE;
+		    else
+			string++;
 		}
-		/*
-		 * Check for a "?" as the next pattern character.  It matches
-		 * any single character.
-		 */
-		if (*pattern == '?')
-			goto thisCharOK;
-		/*
-		 * Check for a "[" as the next pattern character.  It is
-		 * followed by a list of characters that are acceptable, or
-		 * by a range (two characters separated by "-").
-		 */
-		if (*pattern == '[') {
-			++pattern;
-			for (;;) {
-				if ((*pattern == ']') || (*pattern == 0))
-					return(0);
-				if (*pattern == *string)
-					break;
-				if (pattern[1] == '-') {
-					c2 = pattern[2];
-					if (c2 == 0)
-						return(0);
-					if ((*pattern <= *string) &&
-					    (c2 >= *string))
-						break;
-					if ((*pattern >= *string) &&
-					    (c2 <= *string))
-						break;
-					pattern += 2;
-				}
-				++pattern;
-			}
-			while ((*pattern != ']') && (*pattern != 0))
-				++pattern;
-			goto thisCharOK;
+		pattern++;
+	    }
+	    if (*pattern == '\0')
+		return TRUE;
+	    for (; *string != '\0'; string++)
+		if (Str_Match(string, pattern))
+		    return TRUE;
+	    return FALSE;
+	} else if (*string == '\0') 
+	    return FALSE;
+	/* Check for a "[" as the next pattern character.  It is
+	 * followed by a list of characters that are acceptable, or
+	 * by a range (two characters separated by "-").  */
+	else if (*pattern == '[') {
+	    pattern++;
+	    if (*pattern == '\0')
+	    	return FALSE;
+	    if (*pattern == '!' || *pattern == '^') {
+		pattern++;
+		if (*pattern == '\0')
+			return FALSE;
+		/* Negative match */
+		for (;;) {
+		    if (*pattern == '\\') {
+			if (*++pattern == '\0')
+			    return FALSE;
+		    }
+		    if (*pattern == *string)
+			return FALSE;
+		    if (pattern[1] == '-') {
+			if (pattern[2] == '\0')
+			    return FALSE;
+			if (*pattern < *string && *string <= pattern[2])
+			    return FALSE;
+			if (pattern[2] <= *string && *string < *pattern)
+			    return FALSE;
+			pattern += 3;
+		    } else
+			pattern++;
+		    if (*pattern == '\0')
+		    	return FALSE;
+		    /* The test for ']' is done at the end so that ']'
+		     * can be used at the start of the range without '\' */
+		    if (*pattern == ']')
+		    	break;
 		}
-		/*
-		 * If the next pattern character is '/', just strip off the
-		 * '/' so we do exact matching on the character that follows.
-		 */
-		if (*pattern == '\\') {
-			++pattern;
-			if (*pattern == 0)
-				return(0);
+	    } else {
+		for (;;) {
+		    if (*pattern == '\\') {
+			if (*++pattern == '\0')
+			    return FALSE;
+		    }
+		    if (*pattern == *string)
+			break;
+		    if (pattern[1] == '-') {
+			if (pattern[2] == '\0')
+			    return FALSE;
+			if (*pattern < *string && *string <= pattern[2])
+			    break;
+			if (pattern[2] <= *string && *string < *pattern)
+			    break;
+			pattern += 3;
+		    } else
+			pattern++;
+		    /* The test for ']' is done at the end so that ']'
+		     * can be used at the start of the range without '\' */
+		    if (*pattern == '\0' || *pattern == ']')
+		    	return FALSE;
+		}
+		/* Found matching character, skip over rest of class.  */
+		while (*pattern != ']') {
+		    if (*pattern == '\\')
+			pattern++;
+		    /* A non-terminated character class is ok.  */
+		    if (*pattern == '\0')
+			break;
+		    pattern++;
 		}
-		/*
-		 * There's no special character.  Just make sure that the
-		 * next characters of each string match.
-		 */
-		if (*pattern != *string)
-			return(0);
-thisCharOK:	++pattern;
-		++string;
+	    }
 	}
+	/* '?' matches any single character, so shunt test.  */
+	else if (*pattern != '?') {
+	    /* If the next pattern character is '\', just strip off the
+	     * '\' so we do exact matching on the character that follows.  */
+	    if (*pattern == '\\') {
+		if (*++pattern == '\0')
+		    return FALSE;
+	    }
+	    /* There's no special character.  Just make sure that 
+	     * the next characters of each string match.  */
+	    if (*pattern != *string)
+		return FALSE;
+	}
+	pattern++;
+	string++;
+    }
+    if (*string == '\0')
+	return TRUE;
+    else
+	return FALSE;
 }
 
-
 /*-
  *-----------------------------------------------------------------------
  * Str_SYSVMatch --
diff --git a/usr.bin/make/var.c b/usr.bin/make/var.c
index d3410f7131a..b84e205188e 100644
--- a/usr.bin/make/var.c
+++ b/usr.bin/make/var.c
@@ -1,4 +1,4 @@
-/*	$OpenBSD: var.c,v 1.40 2000/07/17 22:57:37 espie Exp $	*/
+/*	$OpenBSD: var.c,v 1.41 2000/07/17 23:01:20 espie Exp $	*/
 /*	$NetBSD: var.c,v 1.18 1997/03/18 19:24:46 christos Exp $	*/
 
 /*
@@ -70,7 +70,7 @@
 #if 0
 static char sccsid[] = "@(#)var.c	8.3 (Berkeley) 3/19/94";
 #else
-static char rcsid[] = "$OpenBSD: var.c,v 1.40 2000/07/17 22:57:37 espie Exp $";
+static char rcsid[] = "$OpenBSD: var.c,v 1.41 2000/07/17 23:01:20 espie Exp $";
 #endif
 #endif /* not lint */
 
@@ -1887,53 +1887,19 @@ Var_Parse(str, ctxt, err, lengthPtr, freePtr)
 		case 'N':
 		case 'M':
 		{
-		    char    *pattern;
-		    char    *cp2;
-		    Boolean copy;
-
-		    copy = FALSE;
 		    for (cp = tstr + 1;
 			 *cp != '\0' && *cp != ':' && *cp != endc;
-			 cp++)
-		    {
+			 cp++) {
 			if (*cp == '\\' && (cp[1] == ':' || cp[1] == endc)){
-			    copy = TRUE;
 			    cp++;
 			}
 		    }
 		    termc = *cp;
 		    *cp = '\0';
-		    if (copy) {
-			/*
-			 * Need to compress the \:'s out of the pattern, so
-			 * allocate enough room to hold the uncompressed
-			 * pattern (note that cp started at tstr+1, so
-			 * cp - tstr takes the null byte into account) and
-			 * compress the pattern into the space.
-			 */
-			pattern = emalloc(cp - tstr);
-			for (cp2 = pattern, cp = tstr + 1;
-			     *cp != '\0';
-			     cp++, cp2++)
-			{
-			    if ((*cp == '\\') &&
-				(cp[1] == ':' || cp[1] == endc)) {
-				    cp++;
-			    }
-			    *cp2 = *cp;
-			}
-			*cp2 = '\0';
-		    } else {
-			pattern = &tstr[1];
-		    }
-		    if (*tstr == 'M' || *tstr == 'm') {
-			newStr = VarModify(str, VarMatch, pattern);
-		    } else {
-			newStr = VarModify(str, VarNoMatch, pattern);
-		    }
-		    if (copy) {
-			free(pattern);
-		    }
+		    if (*tstr == 'M')
+			newStr = VarModify(str, VarMatch, tstr+1);
+		    else
+			newStr = VarModify(str, VarNoMatch, tstr+1);
 		    break;
 		}
 		case 'S':
-- 
cgit v1.2.3