summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTheo de Raadt <deraadt@cvs.openbsd.org>2001-09-21 23:12:01 +0000
committerTheo de Raadt <deraadt@cvs.openbsd.org>2001-09-21 23:12:01 +0000
commit7012090811f9c9222eed9a3e377e46438309fc0f (patch)
tree447a65355d84343eb4da66d3e3ce437b13fc42ce
parent59a639e3afe2ddfe6b7a38faf6692e15271f4023 (diff)
complete grep clone by harding@motd.ca; everything is there, but it is
slow because libc regexp is slow. Anyone out there have the balls to sit down and optimize libc regexp?
-rw-r--r--usr.bin/grep/Makefile11
-rw-r--r--usr.bin/grep/grep.1261
-rw-r--r--usr.bin/grep/grep.c595
3 files changed, 867 insertions, 0 deletions
diff --git a/usr.bin/grep/Makefile b/usr.bin/grep/Makefile
new file mode 100644
index 00000000000..8e2b7a4be78
--- /dev/null
+++ b/usr.bin/grep/Makefile
@@ -0,0 +1,11 @@
+# $OpenBSD: Makefile,v 1.1 2001/09/21 23:12:00 deraadt Exp $
+
+PROG= grep
+SRCS= grep.c
+CFLAGS+= -Wall -pedantic -g
+
+LINKS+= ${BINDIR}/grep ${BINDIR}/egrep \
+ ${BINDIR}/grep ${BINDIR}/fgrep
+MLINKS= grep.1 egrep.1 grep.1 fgrep.1
+
+.include <bsd.prog.mk>
diff --git a/usr.bin/grep/grep.1 b/usr.bin/grep/grep.1
new file mode 100644
index 00000000000..555522a2bac
--- /dev/null
+++ b/usr.bin/grep/grep.1
@@ -0,0 +1,261 @@
+.\" $OpenBSD: grep.1,v 1.1 2001/09/21 23:12:00 deraadt Exp $
+.\"
+.\" Copyright (c) 2000 Carson Harding. All rights reserved.
+.\" This code was written and contributed to OpenBSD by Carson Harding.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\" 3. Neither the name of the author, or the names of contributors may be
+.\" used to endorse or promote products derived from this software without
+.\" specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.Dd May 10, 2001
+.Dt GREP 1
+.Os
+.Sh NAME
+.Nm grep , egrep , fgrep
+.Nd print lines matching a pattern
+.Sh SYNOPSIS
+.Nm grep
+.Op Fl E | Fl F
+.Op Fl bchilnqsvwxz
+.Oo
+.Fl RXH
+.Op Fl L | Fl P
+.Oc
+.Op Fl e Ar pattern_list
+.Op Fl f Ar pattern_file
+.Op Ar pattern
+.Op Ar file
+.Ar ...
+.Sh DESCRIPTION
+The
+.Nm
+utility searches for lines that contain the specified pattern
+or patterns. By default
+.Nm
+prints lines containing matches to the standard output. If no input files
+are specified,
+.Nm
+expects to read its input from the standard input.
+.Pp
+.Nm egrep
+is equivalent to
+.Nm grep
+.Fl E ;
+.Nm fgrep
+is equivalent to
+.Nm grep
+.Fl F .
+.Pp
+The patterns are regular expressions, or in the case of
+.Nm fgrep ,
+fixed strings. More than one pattern may be specified on the
+command-line, either by repeated use of the
+.Fl e
+flag, or by separating patterns with newlines:
+
+.Nm grep
+\'Ishmael
+ Ahab' mobydick.txt
+
+is the same as:
+
+.Nm
+-e Ishmael -e Ahab mobydick.txt
+
+Note that many regular expression special characters also have special
+meaning to the shell. It is best to enclose the regular expression
+in quotation marks. Note also that a null pattern ("" or a newline alone)
+matches all input.
+.Sh OPTIONS
+.Pp
+The options available are:
+.Bl -tag -width file Ds
+.It Fl E
+Use extended regular expressions, as if the program was invoked as
+.Nm egrep.
+.It Fl F
+Patterns are fixed strings, not regular expressions, as if the
+program was invoked as
+.Nm fgrep.
+.It Fl a
+Does nothing. In other greps this flag tells grep to treat the input
+as ascii and turns off attempts to determine whether the input file is a
+binary file. This
+.Nm grep
+does not (yet) do that check, so behaves as if
+.Fl a
+is always specified.
+.It Fl b
+Prepend the byte-offset of the beginning of the line containing the match.
+.It Fl c
+Print only a count of the matches found.
+.It Fl e Ar pattern_list
+The argument to
+.Fl e
+is a list of patterns to search for.
+.Fl e
+may be used multiple times, each instance of which may have
+one or more patterns separated by new-lines.
+.It Fl f Ar pattern_file
+The argument to
+.Fl f
+is the name of a file from which to read patterns.
+.Fl f
+may be more than once.
+.It Fl h
+Suppress the prefixing of file names to matching lines when multiple
+files are searched.
+.It Fl i
+Ignore case in comparisons.
+.It Fl l
+Print only a list of the names of files in which matches were found.
+If the input is the standard input,
+.Nm
+prints "(standard input)".
+.It Fl n
+Prefix matching lines with their line number in the file in which
+they occur.
+.It Fl q
+Be quiet about errors accessing files, only return program status.
+The status returned is 0 if there was a match, 1 if there were no
+matches, even if there were errors accessing files. (Without the
+.Fl q
+flag, if there was an error
+accessing a file
+.Nm
+would return a status of 2 even
+if it found matches.)
+.It Fl s
+Suppress errors about accessing files. Note that unlike
+.Fl q
+the program is not quiet: matches are still output.
+.It Fl v
+Invert the sense of the match: return all lines that do not
+contain the pattern.
+.It Fl w
+The pattern is considered a word. A matching pattern in a line must not
+be immediately bordered by a letter, a number, or an underscore ('_').
+.It Fl x
+Match the line exactly. If a regular expression (invoked as
+.Nm grep ,
+or
+.Nm egrep )
+the pattern must match the whole line (as if
+the pattern was enclosed in ^ and $). If invoked as
+.Nm fgrep,
+the string must match the entire line.
+.It Fl z
+When the
+.Fl l
+flag is also specified, print an ASCII NUL character (0) following
+the file name, rather than a newline. Like the
+.Fl print0
+option to
+.Nm find ,
+this may be used to pass file names containing unusual characters
+to programs such as
+.Nm xargs
+with the
+.Fl 0
+flag.
+.El
+.Sh FILESYSTEM TRAVERSAL OPTIONS
+.Pp
+The following options control recursive searchs:
+.Bl -tag -width file Ds
+.It Fl R
+Recursively descend through any specified directory arguments.
+.It Fl H
+If the
+.Fl R
+option is also specified, symbolic links on the command
+line are followed. (Symbolic links encountered in the tree
+traversal are not followed.)
+.It Fl L
+If the
+.Fl R
+option is also specified, all symbolic links are followed.
+.It Fl P
+If the
+.Fl R
+option is also specified, no symbolic links are followed.
+.It Fl X
+If the
+.Fl R
+option is also specified, searches are confined to the device on
+which the search started (file system mount points are not crossed).
+.El
+.Sh EXAMPLES
+.Pp
+To print all occurrences of the word Ishmael in mobydick.txt:
+.Dl grep Ishmael mobydick.txt
+.Pp
+To merely count all the lines in which the word Ishmael occurs:
+.Dl grep -c Ishmael mobydick.txt
+.Pp
+To print all occurrences of either of the words Ishmael or Ahab in
+mobydick.txt:
+.Dl grep 'Ishmael|Ahab' mobydick.txt
+.Pp
+To print all occurrences of the word whale in mobydick.txt, whether
+or not it is capitalised, and where it appears alone and not as part
+of a compound:
+.Dl grep -iw whale mobydick.txt
+.Pp
+To find all the empty lines and print the line numbers for where they occur:
+.Dl grep -nv \&. mobydick.txt
+.Sh DIAGNOSTICS
+.Pp
+The
+.Nm
+utility exits with one of the following values:
+.Pp
+.Bl -tag -width flag -compact
+.It 0
+One or more matching lines was found.
+.It 1
+No matches were found.
+.It 2
+An error occurred (whether or not matches were found).
+.Pp
+Note that when the
+.Fl q
+flag is used, file access errors do not cause an exit value of 2,
+and in the absence of other errors (a bad regular expression)
+the exit value is determined only by whether or not matches
+were found. (XX Should -s do the same??)
+.Sh SEE ALSO
+.Xr re_format 7 ,
+.Xr regex 3 ,
+.Sh HISTORY
+A
+.Nm grep
+command appeared in
+.At v6 .
+This version is a re-implementation from the POSIX specification and
+inspection of the operation of several implementations of
+.Nm grep .
+.Sh NOTES
+.Nm grep
+has no limits on input line length (other than imposed by available
+memory).
diff --git a/usr.bin/grep/grep.c b/usr.bin/grep/grep.c
new file mode 100644
index 00000000000..c613ac12672
--- /dev/null
+++ b/usr.bin/grep/grep.c
@@ -0,0 +1,595 @@
+/* $OpenBSD: grep.c,v 1.1 2001/09/21 23:12:00 deraadt Exp $ */
+
+/*-
+ * Copyright (c) 2000 Carson Harding. All rights reserved.
+ * This code was written and contributed to OpenBSD by Carson Harding.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the author, or the names of contributors may be
+ * used to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef lint
+static char rcsid[] = "$OpenBSD: grep.c,v 1.1 2001/09/21 23:12:00 deraadt Exp $";
+#endif /* not lint */
+
+#include <sys/types.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <regex.h>
+#include <string.h>
+#include <ctype.h>
+#include <sys/param.h>
+#include <fts.h>
+#include <err.h>
+
+extern char *__progname;
+
+
+void usage(void);
+void err_regerror(int r, regex_t *rexp);
+int grep_files(int regexc, regex_t *regexv, char **files);
+int grep_tree(int regexc, regex_t *regexv, char **paths);
+int grep_file(int regexc, regex_t *rexp, char *fname);
+void arg_patt(char *s);
+char *chop_patt(char *s, size_t *len);
+void add_patt(char *s, size_t len);
+void load_patt(char *fname);
+regex_t *regcomp_patt(int pattc, char *pattvp[], int cflags);
+
+
+int f_bytecount; /* -b prepend byte count */
+int f_countonly; /* -c return only count */
+int f_nofname; /* -h do not prepend filenames on multiple */
+int f_fnameonly; /* -l only print file name with match */
+int f_suppress; /* -s suppress error messages; 1/2 -q */
+int f_lineno; /* -n prepend with line numbers */
+int f_quiet; /* -q no output, only status */
+int f_wmatch; /* -w match words */
+int f_xmatch; /* -x match line */
+int f_zerobyte; /* -z NUL character after filename with -l */
+int f_match; /* = REG_MATCH; else = REG_NOMATCH for -v */
+int f_multifile; /* multiple files: prepend file names */
+int f_matchall; /* empty pattern, matches all input */
+int f_error; /* saw error; set exit status */
+
+ /* default traversal flags */
+int f_ftsflags = FTS_LOGICAL|FTS_NOCHDIR|FTS_NOSTAT;
+
+int f_debug; /* temporary debugging flag */
+
+#define START_PATT_SZ 8 /* start with room for 8 patterns */
+char **pattv; /* array of patterns from -e and -f */
+int pattc; /* patterns in pattern array */
+int pattn; /* patterns we have seen, including nulls */
+
+int
+main(int argc, char **argv)
+{
+ int c;
+ int ch;
+ int cflags; /* flags to regcomp() */
+ int sawfile; /* did we see a pattern file? */
+ regex_t *regexv; /* start of array of compiled patterns */
+
+ int (*grepf)(int regexc, regex_t *regexv, char **argv);
+
+ sawfile = 0;
+ cflags = REG_BASIC|REG_NEWLINE;
+ grepf = grep_files;
+
+ if (*__progname == 'e')
+ cflags |= REG_EXTENDED;
+ else if (*__progname == 'f')
+ cflags |= REG_NOSPEC;
+
+ while ((ch = getopt(argc, argv, "DEFRHLPXabce:f:hilnqsvwxz")) != -1) {
+ switch(ch) {
+ case 'D':
+ f_debug = 1;
+ break;
+ case 'E':
+ cflags |= REG_EXTENDED;
+ break;
+ case 'F':
+ cflags |= REG_NOSPEC;
+ break;
+ case 'H':
+ f_ftsflags |= FTS_COMFOLLOW;
+ break;
+ case 'L':
+ f_ftsflags |= FTS_LOGICAL;
+ break;
+ case 'P':
+ f_ftsflags |= FTS_PHYSICAL;
+ break;
+ case 'R':
+ grepf = grep_tree;
+ /*
+ * If walking the tree we don't know how many files
+ * we'll actually find. So assume multiple, if
+ * you don't want names, there's always -h ....
+ */
+ f_multifile = 1;
+ break;
+ case 'X':
+ f_ftsflags |= FTS_XDEV;
+ break;
+ case 'a':
+ /*
+ * Silently eat -a; we don't use the default
+ * behaviour it toggles off in gnugrep.
+ */
+ break;
+ case 'b':
+ f_bytecount = 1;
+ break;
+ case 'c':
+ f_countonly = 1;
+ break;
+ case 'e':
+ arg_patt(optarg);
+ break;
+ case 'f':
+ load_patt(optarg);
+ sawfile = 1;
+ break;
+ case 'h':
+ f_nofname = 1;
+ break;
+ case 'i':
+ cflags |= REG_ICASE;
+ break;
+ case 'l':
+ f_fnameonly = 1;
+ break;
+ case 'n':
+ f_lineno = 1;
+ break;
+ case 'q':
+ f_quiet = 1;
+ break;
+ case 's':
+ f_suppress = 1;
+ break;
+ case 'v':
+ f_match = REG_NOMATCH;
+ break;
+ case 'w':
+ f_wmatch = 1;
+ break;
+ case 'x':
+ f_xmatch = 1;
+ break;
+ case 'z':
+ f_zerobyte = 1;
+ break;
+ default:
+ usage();
+ break;
+ }
+ }
+
+ if ((cflags & REG_EXTENDED) && (cflags & REG_NOSPEC))
+ usage();
+
+ /*
+ * If we read one or more pattern files, and still
+ * didn't end up with any pattern, any pattern file
+ * we read was empty. This is different than failing
+ * to provide a pattern as an argument, and we fail
+ * on this case as if we had searched and found
+ * no matches. (At least this is what GNU grep and
+ * Solaris's grep do.)
+ */
+ if (!pattn && !argv[optind]) {
+ if (sawfile)
+ exit(1);
+ else usage();
+ }
+
+ if (!pattn) {
+ arg_patt(argv[optind]);
+ optind++;
+ }
+
+ /* why bother ... just do nothing sooner */
+ if (f_matchall && f_match == REG_NOMATCH)
+ exit(1);
+
+ regexv = regcomp_patt(pattc, pattv, cflags);
+
+ if (optind == argc) {
+ c = grep_file(pattc, regexv, NULL);
+ } else {
+ if (argc - optind > 1 && !f_nofname)
+ f_multifile = 1;
+ c = (*grepf)(pattc, regexv, &argv[optind]);
+ }
+
+ /* XX ugh */
+ if (f_error) {
+ if (c && f_quiet)
+ exit(0);
+ else
+ exit(2);
+ } else if (c)
+ exit(0);
+ else
+ exit(1);
+}
+
+void
+usage(void)
+{
+ fprintf(stderr, "usage: %s [-E|-F] [-abchilnqsvwx] [-RXH[-L|-P]]"
+ " {patt | -e patt | -f patt_file} [files]\n",
+ __progname);
+ exit(2);
+}
+
+/*
+ * Patterns as arguments may have embedded newlines.
+ * When read from file, these are detected by fgetln();
+ * in arguments we have to find and cut out the segments.
+ */
+void
+arg_patt(char *s)
+{
+ size_t len;
+ char *sp;
+
+ if (f_debug)
+ fprintf(stderr, "arg_patt(\"%s\")\n", s);
+
+ len = strlen(s);
+ if (!len) { /* got "" on the command-line */
+ add_patt(s, len);
+ return;
+ }
+ for (sp = chop_patt(s, &len); sp; sp = chop_patt(NULL, &len)) {
+ if (f_debug) {
+ fprintf(stderr, "adding pattern \"");
+ fwrite(sp, len, 1, stderr);
+ fprintf(stderr, "\", length %lu\n",(unsigned long)len);
+ if (pattc > 20) {
+ fprintf(stderr, "too many, exiting ...\n");
+ exit(2);
+ }
+ }
+ add_patt(sp, len);
+ }
+}
+
+/*
+ * Kind of like strtok; pass char *, then NULL for rest.
+ * Call it memtok()... New size gets written into len.
+ */
+char *
+chop_patt(char *s, size_t *len)
+{
+ char *cp;
+ static char *save_s;
+ static int save_n;
+
+ if (s)
+ save_n = *len;
+ else
+ s = save_s;
+
+ if (save_n <= 0) {
+ s = save_s = NULL;
+ } else if (s) {
+ if ((cp = memchr(s, '\n', save_n)) != NULL) {
+ *len = cp - s; /* returned segment */
+ save_n -= *len;
+ save_s = ++cp; /* adjust past newline */
+ save_n--;
+ } else {
+ *len = save_n; /* else return the whole string */
+ save_n = 0;
+ }
+ }
+
+ return s;
+}
+
+/*
+ * Start with an array for 8 patterns, and double it
+ * each time we outgrow it. If pattern is empty (0 length),
+ * or if f_matchall is already set, set f_matchall and return.
+ * No use adding a pattern if all input is going to match
+ * anyhow.
+ */
+void
+add_patt(char *s, size_t len)
+{
+ char *p;
+ static size_t pattmax = START_PATT_SZ;
+ static size_t sumlen;
+
+ pattn++;
+ sumlen += len;
+
+ if (!len || f_matchall) {
+ f_matchall = 1;
+ return;
+ }
+
+ if (!pattv) {
+ pattv = malloc(START_PATT_SZ * sizeof(char *));
+ if (!pattv)
+ err(2, "malloc");
+ pattc = 0;
+ } else if (pattc >= pattmax) {
+ pattmax *= 2;
+ pattv = realloc(pattv, pattmax * sizeof(char *));
+ if (!pattv)
+ err(2, "realloc");
+ }
+ p = malloc(len+1);
+ if (!p) err(2, "malloc");
+ memmove(p, s, len);
+ p[len] = '\0';
+ pattv[pattc++] = p;
+}
+
+/*
+ * Load patterns from file.
+ */
+void
+load_patt(char *fname)
+{
+ char *buf;
+ size_t len;
+ FILE *fr;
+
+ fr = fopen(fname, "r");
+ if (!fr)
+ err(2, fname);
+ while ((buf = fgetln(fr, &len)) != NULL) {
+ if (buf[len-1] == '\n')
+ buf[--len] = '\0';
+ add_patt(buf, len);
+ }
+ fclose(fr);
+}
+
+/*
+ * Compile the collected pattern strings into an array
+ * of regex_t.
+ */
+regex_t *
+regcomp_patt(int lpattc, char *lpattv[], int cflags)
+{
+ int i;
+ int r;
+ regex_t *rxv;
+
+ if (f_matchall)
+ return NULL;
+
+ rxv = malloc(sizeof(regex_t) * lpattc);
+ if (!rxv)
+ err(2, "malloc");
+ for (i = 0; i < lpattc; i++) {
+ if ((r = regcomp(&rxv[i], lpattv[i], cflags)) != 0)
+ err_regerror(r, &rxv[i]);
+ }
+ return rxv;
+}
+
+/*
+ * Print out regcomp error, and exit.
+ */
+void
+err_regerror(int r, regex_t *rexp)
+{
+ size_t n;
+ char *buf;
+
+ n = regerror(r, rexp, NULL, 0);
+ buf = malloc(n);
+ if (!buf)
+ err(2, "malloc");
+ (void)regerror(r, rexp, buf, n);
+ errx(2, "%s", buf);
+}
+
+/*
+ * Little wrapper so we can use function pointer above.
+ */
+int
+grep_files(int regexc, regex_t *regexv, char **files)
+{
+ int c;
+ char **fname;
+
+ c = 0;
+ for (fname = files; *fname; fname++)
+ c += grep_file(regexc, regexv, *fname);
+
+ return c;
+}
+
+/*
+ * Modified from James Howard and Dag-Erling Co?dan Sm?rgrav's grep:
+ * add FTS_D to FTS_DP (especially since D was the one being used)
+ * pass in regex_t array, and set fts flags above in main().
+ */
+int
+grep_tree(int regexc, regex_t *regexv, char **paths)
+{
+ int c;
+ FTS *fts;
+ FTSENT *p;
+
+ c = 0;
+
+ if (!(fts = fts_open(paths, f_ftsflags, (int (*) ()) NULL)))
+ err(2, "fts_open");
+ while ((p = fts_read(fts)) != NULL) {
+ switch (p->fts_info) {
+ case FTS_D:
+ case FTS_DP:
+ case FTS_DNR:
+ break;
+ case FTS_ERR:
+ errx(2, "%s: %s", p->fts_path, strerror(p->fts_errno));
+ break;
+ default:
+ if (f_debug)
+ printf("%s\n", p->fts_path);
+ c += grep_file(regexc, regexv, p->fts_path);
+ break;
+ }
+ }
+
+ return c;
+}
+
+/*
+ * Open and grep the named file. If fname is NULL, read
+ * from stdin.
+ */
+
+#define isword(x) (isalnum(x) || (x) == '_')
+
+int
+grep_file(int regexc, regex_t *regexv, char *fname)
+{
+ int i;
+ int c;
+ int n;
+ int r;
+ int match;
+ char *buf;
+ size_t b;
+ size_t len;
+ FILE *fr;
+ regmatch_t pmatch[1];
+ regoff_t so, eo;
+
+ b = 0; /* byte count */
+ c = 0; /* match count */
+ n = 0; /* line count */
+
+ if (!fname) {
+ fr = stdin;
+ fname = "(standard input)";
+ } else {
+ fr = fopen(fname, "r");
+ if (!fr) {
+ if (!f_suppress)
+ warn("%s", fname);
+ f_error = 1;
+ return 0;
+ }
+ }
+
+ while ((buf = fgetln(fr, &len)) != NULL) {
+ n++;
+ if (f_matchall)
+ goto printmatch;
+ match = 0;
+ for (i = 0; i < regexc; i++) {
+ pmatch[0].rm_so = 0;
+ pmatch[0].rm_eo = len-1;
+ r = regexec(&regexv[i], buf, 1, pmatch, REG_STARTEND);
+ if (r == f_match) {
+ /*
+ * XX gnu grep allows both -w and -x;
+ * XX but seems bizarre. sometimes -w seems
+ * XX to override, at other times, not.
+ * XX Need to figure that out.
+ * XX It seems logical to go with the most
+ * XX restrictive argument: -x, as -x is
+ * XX a boundary case of -w anyhow.
+ */
+ if (f_xmatch) {
+ if (pmatch[0].rm_so != 0 ||
+ pmatch[0].rm_eo != len-1)
+ continue;
+ } else if (f_wmatch) {
+ so = pmatch[0].rm_so;
+ eo = pmatch[0].rm_eo;
+ if (!((so == 0 || !isword(buf[so-1])) &&
+ (eo == len || !isword(buf[eo]))))
+ continue;
+ }
+ match = 1;
+ break;
+ }
+ /* XX test for regexec() errors ?? */
+ }
+ if (match) {
+printmatch:
+ c++;
+ if (f_fnameonly || f_quiet)
+ break;
+ if (f_countonly)
+ continue;
+ if (f_multifile && !f_nofname)
+ printf("%s:", fname);
+ if (f_lineno)
+ printf("%d:", n);
+ if (f_bytecount)
+ printf("%lu:", (unsigned long)b);
+ fwrite(buf, len, 1, stdout);
+ }
+ /* save position in stream before next line */
+ b += len;
+ }
+
+ if (!buf && ferror(fr)) {
+ warn("%s", fname);
+ f_error = 1;
+ /*
+ * XX or do we spit out what result we did have?
+ */
+ } else if (!f_quiet) {
+ /*
+ * XX test -c and -l together: gnu grep
+ * XX allows (although ugly), do others?
+ */
+ if (f_countonly) {
+ if (f_multifile)
+ printf("%s:", fname);
+ printf("%d\n", c);
+ }
+ if (c && f_fnameonly) {
+ fputs(fname, stdout);
+ if (f_zerobyte)
+ fputc('\0', stdout);
+ else
+ fputc('\n', stdout);
+ }
+ }
+
+ if (fr != stdin)
+ fclose(fr);
+
+ return c;
+}
+