summaryrefslogtreecommitdiff
path: root/usr.bin/grep
diff options
context:
space:
mode:
authorTodd C. Miller <millert@cvs.openbsd.org>2004-01-25 21:36:01 +0000
committerTodd C. Miller <millert@cvs.openbsd.org>2004-01-25 21:36:01 +0000
commit87fcaaa7c3c1c0c0dd898f12de2d379baafb0ed1 (patch)
treec73dc31f440b3c08e0564898d8c004b236de23a8 /usr.bin/grep
parent6451aa97207663c0fae475e74fed893c6d313f45 (diff)
Previously, in -w mode, for each match on a line grep would check
to see if the match was on a word boundary. However, this missed lines where the first match was not on a word boundary but a subsequent match was. Problem originally spotted by miod@ We fix this by using the [[:<:]] and [[:>:]] character classes for the slow path and by checking the word boundaries in grep_search() for the fast path instead of doing the checks after running regexec() or grep_search(). With this change, grep passes the new regress tests 15 and 16. problem originally spotted by espie@.
Diffstat (limited to 'usr.bin/grep')
-rw-r--r--usr.bin/grep/grep.c14
-rw-r--r--usr.bin/grep/grep.h4
-rw-r--r--usr.bin/grep/util.c78
3 files changed, 54 insertions, 42 deletions
diff --git a/usr.bin/grep/grep.c b/usr.bin/grep/grep.c
index 111545df623..6e6a8cf6126 100644
--- a/usr.bin/grep/grep.c
+++ b/usr.bin/grep/grep.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: grep.c,v 1.24 2003/12/11 20:49:20 mcbride Exp $ */
+/* $OpenBSD: grep.c,v 1.25 2004/01/25 21:36:00 millert Exp $ */
/*-
* Copyright (c) 1999 James Howard and Dag-Erling Coïdan Smørgrav
@@ -172,10 +172,16 @@ add_pattern(char *pat, size_t len)
}
if (pat[len - 1] == '\n')
--len;
- pattern[patterns] = grep_malloc(len + 1);
/* pat may not be NUL-terminated */
- memcpy(pattern[patterns], pat, len);
- pattern[patterns][len] = '\0';
+ if (wflag) {
+ pattern[patterns] = grep_malloc(len + 15);
+ snprintf(pattern[patterns], len + 15, "[[:<:]]%.*s[[:>:]]",
+ (int)len, pat);
+ } else {
+ pattern[patterns] = grep_malloc(len + 1);
+ memcpy(pattern[patterns], pat, len);
+ pattern[patterns][len] = '\0';
+ }
++patterns;
if (len > maxPatternLen)
diff --git a/usr.bin/grep/grep.h b/usr.bin/grep/grep.h
index 1d886d57c20..43842ba56d5 100644
--- a/usr.bin/grep/grep.h
+++ b/usr.bin/grep/grep.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: grep.h,v 1.9 2004/01/19 16:12:04 otto Exp $ */
+/* $OpenBSD: grep.h,v 1.10 2004/01/25 21:36:00 millert Exp $ */
/*-
* Copyright (c) 1999 James Howard and Dag-Erling Coïdan Smørgrav
@@ -55,6 +55,7 @@ typedef struct {
/* flags */
int bol;
int eol;
+ int wmatch;
int reversedSearch;
} fastgrep_t;
@@ -83,7 +84,6 @@ int procfile(char *fn);
int grep_tree(char **argv);
void *grep_malloc(size_t size);
void *grep_realloc(void *ptr, size_t size);
-unsigned char *grep_strdup(const char *);
void printline(str_t *line, int sep);
int fastcomp(fastgrep_t *, const char *);
diff --git a/usr.bin/grep/util.c b/usr.bin/grep/util.c
index 341d787ce44..80c0ee0132e 100644
--- a/usr.bin/grep/util.c
+++ b/usr.bin/grep/util.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: util.c,v 1.21 2004/01/19 16:12:04 otto Exp $ */
+/* $OpenBSD: util.c,v 1.22 2004/01/25 21:36:00 millert Exp $ */
/*-
* Copyright (c) 1999 James Howard and Dag-Erling Coïdan Smørgrav
@@ -178,25 +178,17 @@ procline(str_t *l, int nottext)
}
for (c = i = 0; i < patterns; i++) {
- pmatch.rm_so = 0;
- pmatch.rm_eo = l->len;
- if (fg_pattern[i].pattern)
+ if (fg_pattern[i].pattern) {
r = grep_search(&fg_pattern[i], (unsigned char *)l->dat,
l->len, &pmatch);
- else
+ } else {
+ pmatch.rm_so = 0;
+ pmatch.rm_eo = l->len;
r = regexec(&r_pattern[i], l->dat, 1, &pmatch, eflags);
- if (r == 0) {
- if (wflag) {
- if ((pmatch.rm_so != 0 &&
- isword(l->dat[pmatch.rm_so - 1])) ||
- (pmatch.rm_eo != l->len &&
- isword(l->dat[pmatch.rm_eo])))
- r = REG_NOMATCH;
- }
- if (xflag) {
- if (pmatch.rm_so != 0 || pmatch.rm_eo != l->len)
- r = REG_NOMATCH;
- }
+ }
+ if (r == 0 && xflag) {
+ if (pmatch.rm_so != 0 || pmatch.rm_eo != l->len)
+ r = REG_NOMATCH;
}
if (r == 0) {
c++;
@@ -254,6 +246,7 @@ fastcomp(fastgrep_t *fg, const char *pattern)
origPatternLen = fg->patternLen = strlen(pattern);
fg->bol = 0;
fg->eol = 0;
+ fg->wmatch = 0;
fg->reversedSearch = 0;
/* Remove end-of-line character ('$'). */
@@ -272,11 +265,22 @@ fastcomp(fastgrep_t *fg, const char *pattern)
boleol = 1;
}
+ /* Remove enclosing [[:<:]] and [[:>:]] (word match). */
+ if (fg->patternLen > 14 + fg->bol + fg->eol &&
+ strncmp(pattern + fg->bol, "[[:<:]]", 7) == 0 &&
+ strncmp(pattern + fg->patternLen - (7 + fg->eol), "[[:>:]]", 7) == 0) {
+ fg->patternLen -= 14;
+ fg->wmatch = 7;
+ }
+
/*
- * Copy pattern minus '^' and '$' characters at the beginning and
- * ending of the string respectively.
+ * Copy pattern minus '^' and '$' characters as well as word
+ * match character classes at the beginning and ending of the
+ * string respectively.
*/
- fg->pattern = grep_strdup(pattern + bol);
+ fg->pattern = grep_malloc(fg->patternLen + 1);
+ memcpy(fg->pattern, pattern + bol + fg->wmatch, fg->patternLen);
+ fg->pattern[fg->patternLen] = '\0';
/* Look for ways to cheat...er...avoid the full regex engine. */
for (i = 0; i < fg->patternLen; i++)
@@ -369,6 +373,9 @@ fastcomp(fastgrep_t *fg, const char *pattern)
return (0);
}
+#define wmatch(d, l, s, e) \
+ ((s == 0 || !isword(d[s-1])) && (e == l || !isword(d[e])))
+
static int
grep_search(fastgrep_t *fg, unsigned char *data, int dataLen, regmatch_t *pmatch)
{
@@ -393,10 +400,13 @@ grep_search(fastgrep_t *fg, unsigned char *data, int dataLen, regmatch_t *pmatch
else
j = 0;
if (!((fg->bol && fg->eol) && (dataLen != fg->patternLen)))
- if (grep_cmp(fg->pattern, data + j, fg->patternLen) == -1) {
- rtrnVal = 0;
+ if (grep_cmp(fg->pattern, data + j,
+ fg->patternLen) == -1) {
pmatch->rm_so = j;
pmatch->rm_eo = j + fg->patternLen;
+ if (!fg->wmatch || wmatch(data, dataLen,
+ pmatch->rm_so, pmatch->rm_eo))
+ rtrnVal = 0;
}
}
} else if (fg->reversedSearch) {
@@ -405,10 +415,13 @@ grep_search(fastgrep_t *fg, unsigned char *data, int dataLen, regmatch_t *pmatch
do {
if (grep_cmp(fg->pattern, data + j - fg->patternLen,
fg->patternLen) == -1) {
- rtrnVal = 0;
pmatch->rm_so = j - fg->patternLen;
pmatch->rm_eo = j;
- break;
+ if (!fg->wmatch || wmatch(data, dataLen,
+ pmatch->rm_so, pmatch->rm_eo)) {
+ rtrnVal = 0;
+ break;
+ }
}
/* Shift if within bounds, otherwise, we are done. */
if (j == fg->patternLen)
@@ -420,10 +433,13 @@ grep_search(fastgrep_t *fg, unsigned char *data, int dataLen, regmatch_t *pmatch
j = 0;
do {
if (grep_cmp(fg->pattern, data + j, fg->patternLen) == -1) {
- rtrnVal = 0;
pmatch->rm_so = j;
pmatch->rm_eo = j + fg->patternLen;
- break;
+ if (!fg->wmatch || wmatch(data, dataLen,
+ pmatch->rm_so, pmatch->rm_eo)) {
+ rtrnVal = 0;
+ break;
+ }
}
/* Shift if within bounds, otherwise, we are done. */
@@ -456,16 +472,6 @@ grep_realloc(void *ptr, size_t size)
return ptr;
}
-unsigned char *
-grep_strdup(const char *str)
-{
- unsigned char *ptr;
-
- if ((ptr = (unsigned char *)strdup(str)) == NULL)
- err(2, "strdup");
- return ptr;
-}
-
/*
* Returns: i >= 0 on failure (position that it failed)
* -1 on success