diff options
author | Theo de Raadt <deraadt@cvs.openbsd.org> | 2003-06-22 22:20:08 +0000 |
---|---|---|
committer | Theo de Raadt <deraadt@cvs.openbsd.org> | 2003-06-22 22:20:08 +0000 |
commit | b0b0b549d5a01b84b265074c62253fbf87c11caf (patch) | |
tree | c3f7851d6a960774e96c296dce31e8df2dc76c8f | |
parent | 2aec8c7829a77f46b85b067420ff8ab9f2f50e94 (diff) |
freegrep 0.16
-rw-r--r-- | usr.bin/grep/Makefile | 17 | ||||
-rw-r--r-- | usr.bin/grep/binary.c | 88 | ||||
-rw-r--r-- | usr.bin/grep/file.c | 208 | ||||
-rw-r--r-- | usr.bin/grep/grep.1 | 430 | ||||
-rw-r--r-- | usr.bin/grep/grep.c | 761 | ||||
-rw-r--r-- | usr.bin/grep/grep.h | 101 | ||||
-rw-r--r-- | usr.bin/grep/mmfile.c | 109 | ||||
-rw-r--r-- | usr.bin/grep/queue.c | 128 | ||||
-rw-r--r-- | usr.bin/grep/util.c | 256 |
9 files changed, 1385 insertions, 713 deletions
diff --git a/usr.bin/grep/Makefile b/usr.bin/grep/Makefile index 8e2b7a4be78..b396adf971b 100644 --- a/usr.bin/grep/Makefile +++ b/usr.bin/grep/Makefile @@ -1,11 +1,16 @@ -# $OpenBSD: Makefile,v 1.1 2001/09/21 23:12:00 deraadt Exp $ +# $Id: Makefile,v 1.2 2003/06/22 22:20:07 deraadt Exp $ PROG= grep -SRCS= grep.c -CFLAGS+= -Wall -pedantic -g +SRCS= binary.c file.c grep.c mmfile.c queue.c util.c +LINKS= ${BINDIR}/grep ${BINDIR}/egrep \ + ${BINDIR}/grep ${BINDIR}/fgrep \ + ${BINDIR}/grep ${BINDIR}/zgrep +MLINKS= grep.1 egrep.1 \ + grep.1 fgrep.1 \ + grep.1 zgrep.1 -LINKS+= ${BINDIR}/grep ${BINDIR}/egrep \ - ${BINDIR}/grep ${BINDIR}/fgrep -MLINKS= grep.1 egrep.1 grep.1 fgrep.1 +CFLAGS+= -I/usr/local/include -Wall -pedantic + +LDADD= -lz -L/usr/local/lib/ -liberty .include <bsd.prog.mk> diff --git a/usr.bin/grep/binary.c b/usr.bin/grep/binary.c new file mode 100644 index 00000000000..a7071d505bd --- /dev/null +++ b/usr.bin/grep/binary.c @@ -0,0 +1,88 @@ +/*- + * Copyright (c) 1999 James Howard and Dag-Erling Coïdan Smørgrav + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $Id: binary.c,v 1.1 2003/06/22 22:20:07 deraadt Exp $ + */ + +#include <ctype.h> +#include <stdio.h> +#include <zlib.h> + +#include "grep.h" + +#define BUFFER_SIZE 32 + +int +bin_file(FILE *f) +{ + char buf[BUFFER_SIZE]; + int i, m; + + if (fseek(f, SEEK_SET, 0) == -1) + return 0; + + if ((m = (int)fread(buf, 1, BUFFER_SIZE, f)) == 0) + return 0; + + for (i = 0; i < m; i++) + if (!isprint(buf[i]) && !isspace(buf[i])) + return 1; + + rewind(f); + return 0; +} + +int +gzbin_file(gzFile *f) +{ + char buf[BUFFER_SIZE]; + int i, m; + + if (gzseek(f, SEEK_SET, 0) == -1) + return 0; + + if ((m = (int)gzread(f, buf, BUFFER_SIZE)) == 0) + return 0; + + for (i = 0; i < m; i++) + if (!isprint(buf[i])) + return 1; + + gzrewind(f); + return 0; +} + +int +mmbin_file(mmf_t *f) +{ + int i; + + /* XXX knows too much about mmf internals */ + for (i = 0; i < BUFFER_SIZE && i < f->len; i++) + if (!isprint(f->base[i])) + return 1; + mmrewind(f); + return 0; +} diff --git a/usr.bin/grep/file.c b/usr.bin/grep/file.c new file mode 100644 index 00000000000..a58a2c1bac2 --- /dev/null +++ b/usr.bin/grep/file.c @@ -0,0 +1,208 @@ +/*- + * Copyright (c) 1999 James Howard and Dag-Erling Coïdan Smørgrav + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $Id: file.c,v 1.1 2003/06/22 22:20:07 deraadt Exp $ + */ + +#include <sys/param.h> + +#include <err.h> +#include <stdio.h> +#include <stdlib.h> +#include <zlib.h> + +#include "grep.h" + +static char fname[MAXPATHLEN]; +static char *lnbuf; +static int lnbuflen; + +#define FILE_STDIO 0 +#define FILE_MMAP 1 +#define FILE_GZIP 2 + +struct file { + int type; + FILE *f; + mmf_t *mmf; + gzFile *gzf; +}; + +static char * +gzfgetln(gzFile *f, size_t *len) +{ + size_t n; + int c; + + for (n = 0; ; ++n) { + c = gzgetc(f); + if (c == -1) { + const char *gzerrstr; + int gzerr; + + if (gzeof(f)) + break; + + gzerrstr = gzerror(f, &gzerr); + if (gzerr == Z_ERRNO) + err(1, "%s", fname); + else + errx(1, "%s: %s", fname, gzerrstr); + } + if (c == '\n') + break; + if (n >= lnbuflen) { + lnbuflen *= 2; + lnbuf = grep_realloc(lnbuf, ++lnbuflen); + } + lnbuf[n] = c; + } + + if (gzeof(f) && n == 0) + return NULL; + *len = n; + return lnbuf; +} + +file_t * +grep_fdopen(int fd, char *mode) +{ + file_t *f; + + if (fd == 0) + sprintf(fname, "(standard input)"); + else + sprintf(fname, "(fd %d)", fd); + + f = grep_malloc(sizeof *f); + + if (Zflag) { + f->type = FILE_GZIP; + if ((f->gzf = gzdopen(fd, mode)) != NULL) + return f; + } else { + f->type = FILE_STDIO; + if ((f->f = fdopen(fd, mode)) != NULL) + return f; + } + + free(f); + return NULL; +} + +file_t * +grep_open(char *path, char *mode) +{ + file_t *f; + + snprintf(fname, MAXPATHLEN, "%s", path); + + f = grep_malloc(sizeof *f); + + if (Zflag) { + f->type = FILE_GZIP; + if ((f->gzf = gzopen(fname, mode)) != NULL) + return f; + } else { + /* try mmap first; if it fails, try stdio */ + if ((f->mmf = mmopen(fname, mode)) != NULL) { + f->type = FILE_MMAP; + return f; + } + f->type = FILE_STDIO; + if ((f->f = fopen(path, mode)) != NULL) + return f; + } + + free(f); + return NULL; +} + +int +grep_bin_file(file_t *f) +{ + switch (f->type) { + case FILE_STDIO: + return bin_file(f->f); + case FILE_MMAP: + return mmbin_file(f->mmf); + case FILE_GZIP: + return gzbin_file(f->gzf); + default: + /* can't happen */ + errx(1, "invalid file type"); + } +} + +long +grep_tell(file_t *f) +{ + switch (f->type) { + case FILE_STDIO: + return ftell(f->f); + case FILE_MMAP: + return mmtell(f->mmf); + case FILE_GZIP: + return gztell(f->gzf); + default: + /* can't happen */ + errx(1, "invalid file type"); + } +} + +char * +grep_fgetln(file_t *f, size_t *l) +{ + switch (f->type) { + case FILE_STDIO: + return fgetln(f->f, l); + case FILE_MMAP: + return mmfgetln(f->mmf, l); + case FILE_GZIP: + return gzfgetln(f->gzf, l); + default: + /* can't happen */ + errx(1, "invalid file type"); + } +} + +void +grep_close(file_t *f) +{ + switch (f->type) { + case FILE_STDIO: + fclose(f->f); + break; + case FILE_MMAP: + mmclose(f->mmf); + break; + case FILE_GZIP: + gzclose(f->gzf); + break; + default: + /* can't happen */ + errx(1, "invalid file type"); + } +} diff --git a/usr.bin/grep/grep.1 b/usr.bin/grep/grep.1 index 6713a560f12..c51d3be2098 100644 --- a/usr.bin/grep/grep.1 +++ b/usr.bin/grep/grep.1 @@ -1,7 +1,5 @@ -.\" $OpenBSD: grep.1,v 1.5 2003/06/10 09:12:10 jmc Exp $ -.\" -.\" Copyright (c) 2000 Carson Harding. All rights reserved. -.\" This code was written and contributed to OpenBSD by Carson Harding. +.\" Copyright (c) 1980, 1990, 1993 +.\" The Regents of the University of California. All rights reserved. .\" .\" Redistribution and use in source and binary forms, with or without .\" modification, are permitted provided that the following conditions @@ -11,14 +9,18 @@ .\" 2. Redistributions in binary form must reproduce the above copyright .\" notice, this list of conditions and the following disclaimer in the .\" documentation and/or other materials provided with the distribution. -.\" 3. Neither the name of the author, or the names of contributors may be -.\" used to endorse or promote products derived from this software without -.\" specific prior written permission. +.\" 3. All advertising materials mentioning features or use of this software +.\" must display the following acknowledgement: +.\" This product includes software developed by the University of +.\" California, Berkeley and its contributors. +.\" 4. Neither the name of the University nor the names of its contributors +.\" may be used to endorse or promote products derived from this software +.\" without specific prior written permission. .\" -.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND +.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) @@ -27,244 +29,256 @@ .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" -.Dd May 10, 2001 +.\" @(#)grep.1 8.3 (Berkeley) 4/18/94 +.\" +.Dd July 3, 1999 .Dt GREP 1 .Os .Sh NAME -.Nm grep , egrep , fgrep -.Nd print lines matching a pattern +.Nm grep, egrep, fgrep. zgrep +.Nd file pattern searcher .Sh SYNOPSIS .Nm grep -.Bk -words -.Op Fl E | Fl F -.Op Fl bchilnqsvwxz -.Oo -.Fl RXH -.Op Fl L | Fl P -.Oc -.Op Fl e Ar pattern_list -.Op Fl f Ar pattern_file -.Op Ar pattern -.Op Ar file -.Ar ... -.Ek +.Op Fl AB Ar num +.Op Fl CEFGHLPRSVZabchilnoqsvwx +.Op Fl e Ar pattern +.Op Fl f Ar file +.Op Ar .Sh DESCRIPTION The -.Nm -utility searches for lines that contain the specified pattern -or patterns. -By default -.Nm -prints lines containing matches to the standard output. -If no input files are specified, -.Nm -expects to read its input from the standard input. +.Nm grep +utilities search the given input files selecting lines that match one +or more patterns. +By default, a pattern matches an input line if any regular expression +(RE) in the pattern matches the input line without its trailing +newline. +An empty expression matches every line. +Each input line that matches at least one of the patterns is written +to the standard output. .Pp -.Nm egrep -is equivalent to +The .Nm grep -.Fl E ; +utility is used for simple patterns and +.Xr ex 1 +or +.Xr ed 1 +style regular expressions. +The +.Nm egrep +utility can handle extended regular expressions and multi-line +patterns. +The .Nm fgrep -is equivalent to -.Nm grep -.Fl F . -.Pp -The patterns are regular expressions, or in the case of -.Nm fgrep , -fixed strings. -More than one pattern may be specified on the command-line, -either by repeated use of the -.Fl e -flag, or by separating patterns with newlines: -.Pp -.Bd -literal -offset indent -grep \'Ishmael -Ahab' mobydick.txt -.Ed -.Pp -is the same as: +utility is quick but can handle only fixed patterns consisting of one +or more lines, allowing any of the pattern lines to match a portion of +the input. +The +.Nm zgrep +utility acts like grep, but accepts input files compressed with the +.Xr compress 1 +or +.Xr gzip 1 +compression utilities. .Pp -.Dl grep -e Ishmael -e Ahab mobydick.txt +The following options are available: .Pp -Note that many regular expression special characters also have special -meaning to the shell. -It is best to enclose the regular expression in quotation marks. -Note also that a null pattern ("" or a newline alone) matches all input. -.Sh OPTIONS -The options available are: -.Bl -tag -width file Ds +.Bl -tag -width indent +.It Fl A Ar num +Print +.Ar num +lines of trailing context after each match. +.It Fl B Ar num +Print +.Ar num +lines of leading context before each match. +.It Fl C +Equivalent to +.Fl A +.Ar 2 +.Fl B +.Ar 2 . .It Fl E -Use extended regular expressions, as if the program was invoked as -.Nm egrep . +Force +.Nm grep +to behave as +.Nm egrep. .It Fl F -Patterns are fixed strings, not regular expressions, as if the -program was invoked as -.Nm fgrep . -.It Fl a -Does nothing. -In other greps this flag tells grep to treat the input as ASCII and turns off -attempts to determine whether the input file is a binary file. -This +Force +.Nm grep +to behave as +.Nm fgrep. +.It Fl G +Force +.Nm grep +to behave as +.Nm grep. +.It Fl H +If +.Fl R +is specified, follow symbolic links only if they were explictly listed +on the command line. +.It Fl L +Only the names of files not containing selected lines are written to +standard output. +Pathnames are listed once per file searched. +If the standard input is searched, the pathname +.Sq Fl +is written. +.It Fl P +If +.Fl R +is specified, no symbolic links are followed. +.It Fl R +Recursively search subdirectories listed. +.It Fl S +If +.Fl R +is specified, all symbolic links are followed. +.It Fl V +Display version information. +.It Fl Z +Force .Nm grep -does not (yet) do that check, so behaves as if -.Fl a -is always specified. +to behave as +.Nm zgrep . +.It Fl a +Do not search in binary files. .It Fl b -Prepend the byte-offset of the beginning of the line containing the match. +The block number on the disk in which a matched pattern is located is +displayed in front of the respective matched line. .It Fl c -Print only a count of the matches found. -.It Fl e Ar pattern_list -The argument to -.Fl e -is a list of patterns to search for. +Only a count of selected lines is written to standard output. +.It Fl e Ar expression +Specify a pattern used during the search of the input. Multiple .Fl e -may be used multiple times, each instance of which may have -one or more patterns separated by new-lines. -.It Fl f Ar pattern_file -The argument to -.Fl f -is the name of a file from which to read patterns. -.Fl f -may be more than once. +options can be used to specify multiple patterns; an input line is +selected if it matches any of the specified patterns. +.It Fl f Ar pattern_file +The pattern is read from the specified file. Trailing newlines in the +pattern file are ignored. +.Pf ( Nm Egrep +and +.Nm fgrep +only). .It Fl h -Suppress the prefixing of file names to matching lines when multiple -files are searched. +Never print filename headers with output lines. .It Fl i -Ignore case in comparisons. +Perform case insensitive matching. .It Fl l -Print only a list of the names of files in which matches were found. -If the input is the standard input, -.Nm -prints "(standard input)". +Only the names of files containing selected lines are written to +standard output. Pathnames are listed once per file searched. If the +standard input is searched, the pathname +.Sq - +is written. .It Fl n -Prefix matching lines with their line number in the file in which -they occur. +Each output line is preceded by its relative line number in the file, +starting at line 1. +The line number counter is reset for each file processed. +This option is ignored if +.Fl c , +.Fl l , +or +.Fl s +is +specified. +.It Fl o +Always print filename headers with output lines. .It Fl q -Be quiet about errors accessing files, only return program status. -The status returned is 0 if there was a match, 1 if there were no -matches, even if there were errors accessing files. -(Without the -.Fl q -flag, if there was an error -accessing a file -.Nm -would return a status of 2 even -if it found matches.) +Suppress normal output. .It Fl s -Suppress errors about accessing files. -Note that unlike -.Fl q -the program is not quiet: matches are still output. +Silent mode. Nonexistent and unreadable files are ignored. .It Fl v -Invert the sense of the match: return all lines that do not -contain the pattern. +Selected lines are those +.Em not +matching the specified +patterns. .It Fl w -The pattern is considered a word. -A matching pattern in a line must not be immediately bordered by a letter, -a number, or an underscore ('_'). +The expression is searched for as a word (as if surrounded by `\e<' +and `\e>', see +.Xr ex 1 ) . .It Fl x -Match the line exactly. -If a regular expression (invoked as -.Nm grep , -or -.Nm egrep ) -the pattern must match the whole line (as if -the pattern was enclosed in ^ and $). -If invoked as -.Nm fgrep , -the string must match the entire line. -.It Fl z -When the -.Fl l -flag is also specified, print an ASCII NUL character (0) following -the file name, rather than a newline. -Like the -.Fl print0 -option to -.Nm find , -this may be used to pass file names containing unusual characters -to programs such as -.Nm xargs -with the -.Fl 0 -flag. +Only input lines selected against an entire fixed string or regular +expression are considered to be matching lines. +.Pp .El -.Sh FILESYSTEM TRAVERSAL OPTIONS -The following options control recursive searchs: -.Bl -tag -width file Ds -.It Fl R -Recursively descend through any specified directory arguments. -.It Fl H -If the -.Fl R -option is also specified, symbolic links on the command -line are followed. -(Symbolic links encountered in the tree traversal are not followed.) -.It Fl L -If the -.Fl R -option is also specified, all symbolic links are followed. -.It Fl P -If the -.Fl R -option is also specified, no symbolic links are followed. -.It Fl X -If the -.Fl R -option is also specified, searches are confined to the device on -which the search started (file system mount points are not crossed). +If no file arguments are specified, the standard input is used. +.Pp +The +.Nm grep +utility exits with one of the following values: +.Pp +.Bl -tag -width flag -compact +.It Li 0 +One or more lines were selected. +.It Li 1 +No lines were selected. +.It Li >1 +An error occurred. +.El +.Sh EXTENDED REGULAR EXPRESSIONS +The following characters are interpreted by +.Nm egrep : +.Pp +.Bl -tag -width flag -compact +.It Cm \&$ +Align the match from the end of the line. +.It Cm \&^ +Align the match from the beginning of the line. +.It Cm \&| +Add another pattern (see example below). +.It Cm \&? +Match 1 or less sequential repetitions of the pattern. +.It Cm \&+ +Match 1 or more sequential repetitions of the pattern. +.It Cm \&* +Match 0 or more sequential repetitions of the pattern. +.It Cm \&[] +Match any single character or range of characters +enclosed in the brackets. +.It Cm \&\e +Escape special characters which have meaning to +.Nm egrep , +the set of {$,.,^,[,],|,?,+,*,(,)}. .El .Sh EXAMPLES -To print all occurrences of the word Ishmael in mobydick.txt: -.Dl grep Ishmael mobydick.txt +To find all occurrences of the word patricia in a file: .Pp -To merely count all the lines in which the word Ishmael occurs: -.Dl grep -c Ishmael mobydick.txt +.Dl grep patricia myfile .Pp -To print all occurrences of either of the words Ishmael or Ahab in -mobydick.txt: -.Dl grep 'Ishmael|Ahab' mobydick.txt +To find all occurrences of the pattern +.Ql \&.Pp +at the beginning of a line: .Pp -To print all occurrences of the word whale in mobydick.txt, whether -or not it is capitalised, and where it appears alone and not as part -of a compound: -.Dl grep -iw whale mobydick.txt +.Dl grep '^\e.Pp' .Pp -To find all the empty lines and print the line numbers for where they occur: -.Dl grep -nv \&. mobydick.txt -.Sh DIAGNOSTICS -The -.Nm -utility exits with one of the following values: +The apostrophes assure the entire expression is evaluated by +.Nm grep +instead of by the +user's shell. +The caret +.Ql Li \&^ +matches the null string at the beginning of a line, +and the +.Ql Li \&\e +escapes the +.Ql Li \&. +which would otherwise match any character. .Pp -.Bl -tag -width flag -compact -.It 0 -One or more matching lines was found. -.It 1 -No matches were found. -.It 2 -An error occurred (whether or not matches were found). -.El +A simple example of an extended regular expression: .Pp -Note that when the -.Fl q -flag is used, file access errors do not cause an exit value of 2, -and in the absence of other errors (a bad regular expression) -the exit value is determined only by whether or not matches -were found. -(XX Should -s do the same??) +.Dl egrep '19|20|25' calendar +.Pp +Peruses the file calendar looking for either 19, 20 +or 25. .Sh SEE ALSO +.Xr ed 1 , +.Xr ex 1 , +.Xr sed 1 , +.Xr re_format 7 , .Xr regex 3 , -.Xr re_format 7 +.Xr regexp 3 .Sh HISTORY -A +The .Nm grep command appeared in .At v6 . -This version is a re-implementation from the POSIX specification and -inspection of the operation of several implementations of -.Nm grep . -.Sh NOTES -.Nm grep -has no limits on input line length (other than imposed by available -memory). diff --git a/usr.bin/grep/grep.c b/usr.bin/grep/grep.c index 9d472dd5a81..8ce4202eff9 100644 --- a/usr.bin/grep/grep.c +++ b/usr.bin/grep/grep.c @@ -1,8 +1,6 @@ -/* $OpenBSD: grep.c,v 1.2 2003/02/16 03:46:04 cloder Exp $ */ - /*- - * Copyright (c) 2000 Carson Harding. All rights reserved. - * This code was written and contributed to OpenBSD by Carson Harding. + * Copyright (c) 1999 James Howard and Dag-Erling Coïdan Smørgrav + * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -12,9 +10,6 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 3. Neither the name of the author, or the names of contributors may be - * used to endorse or promote products derived from this software without - * specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE @@ -27,569 +22,337 @@ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. + * + * $Id: grep.c,v 1.3 2003/06/22 22:20:07 deraadt Exp $ */ -#ifndef lint -static char rcsid[] = "$OpenBSD: grep.c,v 1.2 2003/02/16 03:46:04 cloder Exp $"; -#endif /* not lint */ - #include <sys/types.h> +#include <sys/stat.h> + +#include <err.h> +#include <errno.h> +#include <getopt.h> +#include <regex.h> #include <stdio.h> #include <stdlib.h> -#include <unistd.h> -#include <regex.h> #include <string.h> -#include <ctype.h> -#include <sys/param.h> -#include <fts.h> -#include <err.h> - -extern char *__progname; - - -void usage(void); -void err_regerror(int r, regex_t *rexp); -int grep_files(int regexc, regex_t *regexv, char **files); -int grep_tree(int regexc, regex_t *regexv, char **paths); -int grep_file(int regexc, regex_t *rexp, char *fname); -void arg_patt(char *s); -char *chop_patt(char *s, size_t *len); -void add_patt(char *s, size_t len); -void load_patt(char *fname); -regex_t *regcomp_patt(int pattc, char *pattvp[], int cflags); - +#include <unistd.h> -int f_bytecount; /* -b prepend byte count */ -int f_countonly; /* -c return only count */ -int f_nofname; /* -h do not prepend filenames on multiple */ -int f_fnameonly; /* -l only print file name with match */ -int f_suppress; /* -s suppress error messages; 1/2 -q */ -int f_lineno; /* -n prepend with line numbers */ -int f_quiet; /* -q no output, only status */ -int f_wmatch; /* -w match words */ -int f_xmatch; /* -x match line */ -int f_zerobyte; /* -z NUL character after filename with -l */ -int f_match; /* = REG_MATCH; else = REG_NOMATCH for -v */ -int f_multifile; /* multiple files: prepend file names */ -int f_matchall; /* empty pattern, matches all input */ -int f_error; /* saw error; set exit status */ +#include "grep.h" + +/* Flags passed to regcomp() and regexec() */ +int cflags; +int eflags = REG_STARTEND; + +int matchall; /* shortcut */ +int patterns, pattern_sz; +char **pattern; +regex_t *r_pattern; + +/* For regex errors */ +char re_error[RE_ERROR_BUF + 1]; + +/* Command-line flags */ +int Aflag; /* -A x: print x lines trailing each match */ +int Bflag; /* -B x: print x lines leading each match */ +int Eflag; /* -E: interpret pattern as extended regexp */ +int Fflag; /* -F: interpret pattern as list of fixed strings */ +int Gflag; /* -G: interpret pattern as basic regexp */ +int Hflag; /* -H: if -R, follow explicitly listed symlinks */ +int Lflag; /* -L: only show names of files with no matches */ +int Pflag; /* -P: if -R, no symlinks are followed */ +int Rflag; /* -R: recursively search directory trees */ +int Sflag; /* -S: if -R, follow all symlinks */ +int Vflag; /* -V: display version information */ +int Zflag; /* -Z: decompress input before processing */ +int aflag; /* -a: only search ascii files */ +int bflag; /* -b: show block numbers for each match */ +int cflag; /* -c: only show a count of matching lines */ +int hflag; /* -h: don't print filename headers */ +int iflag; /* -i: ignore case */ +int lflag; /* -l: only show names of files with matches */ +int nflag; /* -n: show line numbers in front of matching lines */ +int oflag; /* -o: always print file name */ +int qflag; /* -q: quiet mode (don't output anything) */ +int sflag; /* -s: silent mode (ignore errors) */ +int vflag; /* -v: only show non-matching lines */ +int wflag; /* -w: pattern must start and end on word boundaries */ +int xflag; /* -x: pattern must match entire line */ + +/* Housekeeping */ +int first; /* flag whether or not this is our fist match */ +int tail; /* lines left to print */ +int lead; /* number of lines in leading context queue */ + +char *progname; + +static void +usage(void) +{ + fprintf(stderr, "usage: %s %s %s\n", + progname, + "[-[AB] num] [-CEFGHLPRSVZabchilnoqsvwx]", + "[-e patttern] [-f file]"); + exit(2); +} - /* default traversal flags */ -int f_ftsflags = FTS_LOGICAL|FTS_NOCHDIR|FTS_NOSTAT; +static char *optstr = "0123456789A:B:CEFGHLPSRUVZabce:f:hilnoqrsuvwxy"; -int f_debug; /* temporary debugging flag */ +struct option long_options[] = +{ + {"basic-regexp", no_argument, NULL, 'G'}, + {"extended-regexp", no_argument, NULL, 'E'}, + {"fixed-strings", no_argument, NULL, 'F'}, + {"after-context", required_argument, NULL, 'A'}, + {"before-context", required_argument, NULL, 'B'}, + {"context", optional_argument, NULL, 'C'}, + {"version", no_argument, NULL, 'V'}, + {"byte-offset", no_argument, NULL, 'b'}, + {"count", no_argument, NULL, 'c'}, + {"regexp", required_argument, NULL, 'e'}, + {"file", required_argument, NULL, 'f'}, + {"no-filename", no_argument, NULL, 'h'}, + {"ignore-case", no_argument, NULL, 'i'}, + {"files-without-match", no_argument, NULL, 'L'}, + {"files-with-matches", no_argument, NULL, 'l'}, + {"line-number", no_argument, NULL, 'n'}, + {"quiet", no_argument, NULL, 'q'}, + {"silent", no_argument, NULL, 'q'}, + {"recursive", no_argument, NULL, 'r'}, + {"no-messages", no_argument, NULL, 's'}, + {"text", no_argument, NULL, 'a'}, + {"revert-match", no_argument, NULL, 'v'}, + {"word-regexp", no_argument, NULL, 'w'}, + {"line-regexp", no_argument, NULL, 'x'}, + {"binary", no_argument, NULL, 'U'}, + {"unix-byte-offsets", no_argument, NULL, 'u'}, + {"decompress", no_argument, NULL, 'Z'}, + + {NULL, no_argument, NULL, 0} +}; + + +static void +add_pattern(char *pat, size_t len) +{ + if (len == 0 || matchall) { + matchall = 1; + return; + } + if (patterns == pattern_sz) { + pattern_sz *= 2; + pattern = grep_realloc(pattern, ++pattern_sz); + } + if (pat[len-1] == '\n') + --len; + pattern[patterns] = grep_malloc(len+1); + strncpy(pattern[patterns], pat, len); + pattern[patterns][len] = '\0'; + ++patterns; +} -#define START_PATT_SZ 8 /* start with room for 8 patterns */ -char **pattv; /* array of patterns from -e and -f */ -int pattc; /* patterns in pattern array */ -int pattn; /* patterns we have seen, including nulls */ +static void +read_patterns(char *fn) +{ + FILE *f; + char *line; + size_t len; + int nl; + + if ((f = fopen(fn, "r")) == NULL) + err(1, "%s", fn); + nl = 0; + while ((line = fgetln(f, &len)) != NULL) { + if (*line == '\n') { + ++nl; + continue; + } + if (nl) { + matchall = 1; + break; + } + nl = 0; + add_pattern(line, len); + } + if (ferror(f)) + err(1, "%s", fn); + fclose(f); +} int -main(int argc, char **argv) +main(int argc, char *argv[]) { - int c; - int ch; - int cflags; /* flags to regcomp() */ - int sawfile; /* did we see a pattern file? */ - regex_t *regexv; /* start of array of compiled patterns */ - - int (*grepf)(int regexc, regex_t *regexv, char **argv); + char *tmp; + int c, i; - sawfile = 0; - cflags = REG_BASIC|REG_NEWLINE; - grepf = grep_files; - - if (*__progname == 'e') - cflags |= REG_EXTENDED; - else if (*__progname == 'f') - cflags |= REG_NOSPEC; - - while ((ch = getopt(argc, argv, "DEFRHLPXabce:f:hilnqsvwxz")) != -1) { - switch(ch) { - case 'D': - f_debug = 1; + if ((progname = strrchr(*argv, '/')) != NULL) + ++progname; + else + progname = *argv; + + while ((c = getopt_long(argc, argv, optstr, + long_options, (int *)NULL)) != -1) { + switch (c) { + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + tmp = argv[optind - 1]; + if (tmp[0] == '-' && tmp[1] == c && !tmp[2]) + Aflag = Bflag = strtol(++tmp, (char **)NULL, 10); + else + Aflag = Bflag = strtol(argv[optind] + 1, (char **)NULL, 10); + break; + case 'A': + Aflag = strtol(optarg, (char **)NULL, 10); + break; + case 'B': + Bflag = strtol(optarg, (char **)NULL, 10); + break; + case 'C': + if (optarg == NULL) + Aflag = Bflag = 2; + else + Aflag = Bflag = strtol(optarg, (char **)NULL, 10); break; case 'E': - cflags |= REG_EXTENDED; + Eflag++; break; case 'F': - cflags |= REG_NOSPEC; + Fflag++; + break; + case 'G': + Gflag++; break; case 'H': - f_ftsflags |= FTS_COMFOLLOW; + Hflag++; break; case 'L': - f_ftsflags |= FTS_LOGICAL; + lflag = 0; + Lflag = qflag = 1; break; case 'P': - f_ftsflags |= FTS_PHYSICAL; + Pflag++; + break; + case 'S': + Sflag++; break; case 'R': - grepf = grep_tree; - /* - * If walking the tree we don't know how many files - * we'll actually find. So assume multiple, if - * you don't want names, there's always -h .... - */ - f_multifile = 1; + case 'r': + Rflag++; + oflag++; break; - case 'X': - f_ftsflags |= FTS_XDEV; + case 'U': + case 'u': + /* these are here for compatability */ + break; + case 'V': + fprintf(stderr, "grep version %u.%u\n", VER_MAJ, VER_MIN); + fprintf(stderr, argv[0]); + usage(); + break; + case 'Z': + Zflag++; break; case 'a': - /* - * Silently eat -a; we don't use the default - * behaviour it toggles off in gnugrep. - */ + aflag = 1; break; case 'b': - f_bytecount = 1; + bflag = 1; break; case 'c': - f_countonly = 1; + cflag = 1; break; case 'e': - arg_patt(optarg); + add_pattern(optarg, strlen(optarg)); break; case 'f': - load_patt(optarg); - sawfile = 1; + read_patterns(optarg); break; case 'h': - f_nofname = 1; + oflag = 0; + hflag = 1; break; case 'i': + case 'y': cflags |= REG_ICASE; break; case 'l': - f_fnameonly = 1; + Lflag = 0; + lflag = qflag = 1; break; case 'n': - f_lineno = 1; + nflag = 1; + break; + case 'o': + hflag = 0; + oflag = 1; break; case 'q': - f_quiet = 1; + qflag = 1; break; case 's': - f_suppress = 1; + sflag = 1; break; case 'v': - f_match = REG_NOMATCH; + vflag = 1; break; case 'w': - f_wmatch = 1; + wflag = 1; break; case 'x': - f_xmatch = 1; - break; - case 'z': - f_zerobyte = 1; + xflag = 1; break; default: usage(); - break; } } - if ((cflags & REG_EXTENDED) && (cflags & REG_NOSPEC)) + argc -= optind; + argv += optind; + + if (argc == 0 && patterns == 0) usage(); - /* - * If we read one or more pattern files, and still - * didn't end up with any pattern, any pattern file - * we read was empty. This is different than failing - * to provide a pattern as an argument, and we fail - * on this case as if we had searched and found - * no matches. (At least this is what GNU grep and - * Solaris's grep do.) - */ - if (!pattn && !argv[optind]) { - if (sawfile) - exit(1); - else usage(); + if (patterns == 0) { + add_pattern(*argv, strlen(*argv)); + --argc; + ++argv; } - - if (!pattn) { - arg_patt(argv[optind]); - optind++; + + switch (*progname) { + case 'e': + Eflag++; + break; + case 'f': + Fflag++; + break; + case 'g': + Gflag++; + break; + case 'z': + Zflag++; + break; } - /* why bother ... just do nothing sooner */ - if (f_matchall && f_match == REG_NOMATCH) - exit(1); - - regexv = regcomp_patt(pattc, pattv, cflags); - - if (optind == argc) { - c = grep_file(pattc, regexv, NULL); - } else { - if (argc - optind > 1 && !f_nofname) - f_multifile = 1; - c = (*grepf)(pattc, regexv, &argv[optind]); - } - - /* XX ugh */ - if (f_error) { - if (c && f_quiet) - exit(0); - else - exit(2); - } else if (c) - exit(0); - else - exit(1); -} - -void -usage(void) -{ - fprintf(stderr, "usage: %s [-E|-F] [-abchilnqsvwx] [-RXH[-L|-P]]" - " {patt | -e patt | -f patt_file} [files]\n", - __progname); - exit(2); -} - -/* - * Patterns as arguments may have embedded newlines. - * When read from file, these are detected by fgetln(); - * in arguments we have to find and cut out the segments. - */ -void -arg_patt(char *s) -{ - size_t len; - char *sp; - - if (f_debug) - fprintf(stderr, "arg_patt(\"%s\")\n", s); - - len = strlen(s); - if (!len) { /* got "" on the command-line */ - add_patt(s, len); - return; - } - for (sp = chop_patt(s, &len); sp; sp = chop_patt(NULL, &len)) { - if (f_debug) { - fprintf(stderr, "adding pattern \""); - fwrite(sp, len, 1, stderr); - fprintf(stderr, "\", length %lu\n",(unsigned long)len); - if (pattc > 20) { - fprintf(stderr, "too many, exiting ...\n"); - exit(2); - } + cflags |= Eflag ? REG_EXTENDED : REG_BASIC; + r_pattern = grep_malloc(patterns * sizeof(regex_t)); + for (i = 0; i < patterns; ++i) { + if ((c = regcomp(&r_pattern[i], pattern[i], cflags))) { + regerror(c, &r_pattern[i], re_error, RE_ERROR_BUF); + errx(1, "%s", re_error); } - add_patt(sp, len); } -} -/* - * Kind of like strtok; pass char *, then NULL for rest. - * Call it memtok()... New size gets written into len. - */ -char * -chop_patt(char *s, size_t *len) -{ - char *cp; - static char *save_s; - static int save_n; + if ((argc == 0 || argc == 1) && !oflag) + hflag = 1; - if (s) - save_n = *len; + if (argc == 0) + exit(!procfile(NULL)); + + if (Rflag) + c = grep_tree(argv); else - s = save_s; + for (c = 0; argc--; ++argv) + c += procfile(*argv); - if (save_n <= 0) { - s = save_s = NULL; - } else if (s) { - if ((cp = memchr(s, '\n', save_n)) != NULL) { - *len = cp - s; /* returned segment */ - save_n -= *len; - save_s = ++cp; /* adjust past newline */ - save_n--; - } else { - *len = save_n; /* else return the whole string */ - save_n = 0; - } - } - - return s; -} - -/* - * Start with an array for 8 patterns, and double it - * each time we outgrow it. If pattern is empty (0 length), - * or if f_matchall is already set, set f_matchall and return. - * No use adding a pattern if all input is going to match - * anyhow. - */ -void -add_patt(char *s, size_t len) -{ - char *p; - static size_t pattmax = START_PATT_SZ; - static size_t sumlen; - - pattn++; - sumlen += len; - - if (!len || f_matchall) { - f_matchall = 1; - return; - } - - if (!pattv) { - pattv = malloc(START_PATT_SZ * sizeof(char *)); - if (!pattv) - err(2, "malloc"); - pattc = 0; - } else if (pattc >= pattmax) { - pattmax *= 2; - pattv = realloc(pattv, pattmax * sizeof(char *)); - if (!pattv) - err(2, "realloc"); - } - p = malloc(len+1); - if (!p) err(2, "malloc"); - memmove(p, s, len); - p[len] = '\0'; - pattv[pattc++] = p; -} - -/* - * Load patterns from file. - */ -void -load_patt(char *fname) -{ - char *buf; - size_t len; - FILE *fr; - - fr = fopen(fname, "r"); - if (!fr) - err(2, "%s", fname); - while ((buf = fgetln(fr, &len)) != NULL) { - if (buf[len-1] == '\n') - buf[--len] = '\0'; - add_patt(buf, len); - } - fclose(fr); -} - -/* - * Compile the collected pattern strings into an array - * of regex_t. - */ -regex_t * -regcomp_patt(int lpattc, char *lpattv[], int cflags) -{ - int i; - int r; - regex_t *rxv; - - if (f_matchall) - return NULL; - - rxv = malloc(sizeof(regex_t) * lpattc); - if (!rxv) - err(2, "malloc"); - for (i = 0; i < lpattc; i++) { - if ((r = regcomp(&rxv[i], lpattv[i], cflags)) != 0) - err_regerror(r, &rxv[i]); - } - return rxv; -} - -/* - * Print out regcomp error, and exit. - */ -void -err_regerror(int r, regex_t *rexp) -{ - size_t n; - char *buf; - - n = regerror(r, rexp, NULL, 0); - buf = malloc(n); - if (!buf) - err(2, "malloc"); - (void)regerror(r, rexp, buf, n); - errx(2, "%s", buf); -} - -/* - * Little wrapper so we can use function pointer above. - */ -int -grep_files(int regexc, regex_t *regexv, char **files) -{ - int c; - char **fname; - - c = 0; - for (fname = files; *fname; fname++) - c += grep_file(regexc, regexv, *fname); - - return c; + exit(!c); } - -/* - * Modified from James Howard and Dag-Erling Co?dan Sm?rgrav's grep: - * add FTS_D to FTS_DP (especially since D was the one being used) - * pass in regex_t array, and set fts flags above in main(). - */ -int -grep_tree(int regexc, regex_t *regexv, char **paths) -{ - int c; - FTS *fts; - FTSENT *p; - - c = 0; - - if (!(fts = fts_open(paths, f_ftsflags, (int (*) ()) NULL))) - err(2, "fts_open"); - while ((p = fts_read(fts)) != NULL) { - switch (p->fts_info) { - case FTS_D: - case FTS_DP: - case FTS_DNR: - break; - case FTS_ERR: - errx(2, "%s: %s", p->fts_path, strerror(p->fts_errno)); - break; - default: - if (f_debug) - printf("%s\n", p->fts_path); - c += grep_file(regexc, regexv, p->fts_path); - break; - } - } - - return c; -} - -/* - * Open and grep the named file. If fname is NULL, read - * from stdin. - */ - -#define isword(x) (isalnum(x) || (x) == '_') - -int -grep_file(int regexc, regex_t *regexv, char *fname) -{ - int i; - int c; - int n; - int r; - int match; - char *buf; - size_t b; - size_t len; - FILE *fr; - regmatch_t pmatch[1]; - regoff_t so, eo; - - b = 0; /* byte count */ - c = 0; /* match count */ - n = 0; /* line count */ - - if (!fname) { - fr = stdin; - fname = "(standard input)"; - } else { - fr = fopen(fname, "r"); - if (!fr) { - if (!f_suppress) - warn("%s", fname); - f_error = 1; - return 0; - } - } - - while ((buf = fgetln(fr, &len)) != NULL) { - n++; - if (f_matchall) - goto printmatch; - match = 0; - for (i = 0; i < regexc; i++) { - pmatch[0].rm_so = 0; - pmatch[0].rm_eo = len-1; - r = regexec(®exv[i], buf, 1, pmatch, REG_STARTEND); - if (r == f_match) { - /* - * XX gnu grep allows both -w and -x; - * XX but seems bizarre. sometimes -w seems - * XX to override, at other times, not. - * XX Need to figure that out. - * XX It seems logical to go with the most - * XX restrictive argument: -x, as -x is - * XX a boundary case of -w anyhow. - */ - if (f_xmatch) { - if (pmatch[0].rm_so != 0 || - pmatch[0].rm_eo != len-1) - continue; - } else if (f_wmatch) { - so = pmatch[0].rm_so; - eo = pmatch[0].rm_eo; - if (!((so == 0 || !isword(buf[so-1])) && - (eo == len || !isword(buf[eo])))) - continue; - } - match = 1; - break; - } - /* XX test for regexec() errors ?? */ - } - if (match) { -printmatch: - c++; - if (f_fnameonly || f_quiet) - break; - if (f_countonly) - continue; - if (f_multifile && !f_nofname) - printf("%s:", fname); - if (f_lineno) - printf("%d:", n); - if (f_bytecount) - printf("%lu:", (unsigned long)b); - fwrite(buf, len, 1, stdout); - } - /* save position in stream before next line */ - b += len; - } - - if (!buf && ferror(fr)) { - warn("%s", fname); - f_error = 1; - /* - * XX or do we spit out what result we did have? - */ - } else if (!f_quiet) { - /* - * XX test -c and -l together: gnu grep - * XX allows (although ugly), do others? - */ - if (f_countonly) { - if (f_multifile) - printf("%s:", fname); - printf("%d\n", c); - } - if (c && f_fnameonly) { - fputs(fname, stdout); - if (f_zerobyte) - fputc('\0', stdout); - else - fputc('\n', stdout); - } - } - - if (fr != stdin) - fclose(fr); - - return c; -} - diff --git a/usr.bin/grep/grep.h b/usr.bin/grep/grep.h new file mode 100644 index 00000000000..ded975497c2 --- /dev/null +++ b/usr.bin/grep/grep.h @@ -0,0 +1,101 @@ +/*- + * Copyright (c) 1999 James Howard and Dag-Erling Coïdan Smørgrav + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/types.h> + +#include <regex.h> +#include <stdio.h> +#include <zlib.h> + +#define VER_MAJ 0 +#define VER_MIN 9 + +typedef struct { + size_t len; + int line_no; + int off; + char *file; + char *dat; +} str_t; + +/* Flags passed to regcomp() and regexec() */ +extern int cflags, eflags; + +/* Command line flags */ +extern int Aflag, Bflag, Hflag, Lflag, Pflag, Sflag, Rflag, Zflag, + aflag, bflag, cflag, hflag, lflag, nflag, qflag, sflag, + vflag, wflag, xflag; + +extern int first, lead, matchall, patterns, tail; +extern char **pattern; +extern regex_t *r_pattern; + +/* For regex errors */ +#define RE_ERROR_BUF 512 +extern char re_error[RE_ERROR_BUF + 1]; /* Seems big enough */ + +/* util.c */ +int procfile(char *fn); +int grep_tree(char **argv); +void *grep_malloc(size_t size); +void *grep_realloc(void *ptr, size_t size); +void printline(str_t *line, int sep); + +/* queue.c */ +void initqueue(); +void enqueue(str_t *x); +void printqueue(); +void clearqueue(); + +/* mmfile.c */ +typedef struct mmfile { + int fd; + size_t len; + char *base, *end, *ptr; +} mmf_t; + +mmf_t *mmopen(char *fn, char *mode); +void mmclose(mmf_t *mmf); +char *mmfgetln(mmf_t *mmf, size_t *l); +long mmtell(mmf_t *mmf); +void mmrewind(mmf_t *mmf); + +/* file.c */ +struct file; +typedef struct file file_t; + +file_t *grep_fdopen(int fd, char *mode); +file_t *grep_open(char *path, char *mode); +int grep_bin_file(file_t *f); +long grep_tell(file_t *f); +char *grep_fgetln(file_t *f, size_t *l); +void grep_close(file_t *f); + +/* binary.c */ +int bin_file(FILE * f); +int gzbin_file(gzFile * f); +int mmbin_file(mmf_t *f); + diff --git a/usr.bin/grep/mmfile.c b/usr.bin/grep/mmfile.c new file mode 100644 index 00000000000..045ad7b4d49 --- /dev/null +++ b/usr.bin/grep/mmfile.c @@ -0,0 +1,109 @@ +/*- + * Copyright (c) 1999 James Howard and Dag-Erling Coïdan Smørgrav + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $Id: mmfile.c,v 1.1 2003/06/22 22:20:07 deraadt Exp $ + */ + +#include <sys/param.h> +#include <sys/mman.h> +#include <sys/stat.h> + +#include <err.h> +#include <fcntl.h> +#include <stdlib.h> +#include <unistd.h> + +#include "grep.h" + +#define MAX_MAP_LEN 1048576 + +mmf_t * +mmopen(char *fn, char *mode) +{ + mmf_t *mmf; + struct stat st; + + /* XXX ignore mode for now */ + mode = mode; + + mmf = grep_malloc(sizeof *mmf); + if ((mmf->fd = open(fn, O_RDONLY)) == -1) + goto ouch1; + if (fstat(mmf->fd, &st) == -1) + goto ouch2; + if (st.st_size > SIZE_T_MAX) /* too big to mmap */ + goto ouch2; + if ((st.st_mode & S_IFREG) == 0) /* only mmap regular files */ + goto ouch2; + mmf->len = (size_t)st.st_size; + mmf->base = mmap(NULL, mmf->len, PROT_READ, MAP_PRIVATE, mmf->fd, 0); + if (mmf->base == NULL) + goto ouch2; + mmf->ptr = mmf->base; + mmf->end = mmf->base + mmf->len; + madvise(mmf->base, mmf->len, MADV_SEQUENTIAL); + return mmf; + +ouch2: + close(mmf->fd); +ouch1: + free(mmf); + return NULL; +} + +void +mmclose(mmf_t *mmf) +{ + munmap(mmf->base, mmf->len); + close(mmf->fd); + free(mmf); +} + +char * +mmfgetln(mmf_t *mmf, size_t *l) +{ + static char *p; + + if (mmf->ptr >= mmf->end) + return NULL; + for (p = mmf->ptr; mmf->ptr < mmf->end; ++mmf->ptr) + if (*mmf->ptr == '\n') + break; + *l = mmf->ptr - p; + ++mmf->ptr; + return p; +} + +long +mmtell(mmf_t *mmf) +{ + return mmf->ptr - mmf->base; +} + +void +mmrewind(mmf_t *mmf) +{ + mmf->ptr = mmf->base; +} diff --git a/usr.bin/grep/queue.c b/usr.bin/grep/queue.c new file mode 100644 index 00000000000..1b93c615a36 --- /dev/null +++ b/usr.bin/grep/queue.c @@ -0,0 +1,128 @@ +/*- + * Copyright (c) 1999 James Howard and Dag-Erling Coïdan Smørgrav + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $Id: queue.c,v 1.1 2003/06/22 22:20:07 deraadt Exp $ + */ + +/* + * A really poor man's queue. It does only what it has to and gets out of + * Dodge. + */ + +#include <sys/param.h> + +#include <stdlib.h> +#include <string.h> + +#include "grep.h" + +typedef struct queue { + struct queue *next; + str_t data; +} queue_t; + +static queue_t *q_head, *q_tail; +static int count; + +static queue_t *dequeue(void); + +void +initqueue(void) +{ + q_head = q_tail = NULL; +} + +static void +free_item(queue_t *item) +{ + free(item); +} + +void +enqueue(str_t *x) +{ + queue_t *item; + + item = grep_malloc(sizeof *item + x->len); + item->data.len = x->len; + item->data.line_no = x->line_no; + item->data.off = x->off; + item->data.dat = (char *)item + sizeof *item; + memcpy(item->data.dat, x->dat, x->len); + item->data.file = x->file; + item->next = NULL; + + if (!q_head) { + q_head = q_tail = item; + } else { + q_tail->next = item; + q_tail = item; + } + + if (++count > Bflag) + free_item(dequeue()); +} + +static queue_t * +dequeue(void) +{ + queue_t *item; + + if (q_head == NULL) + return NULL; + + --count; + item = q_head; + q_head = item->next; + if (q_head == NULL) + q_tail = NULL; + return item; +} + +void +printqueue(void) +{ + queue_t *item; + + while ((item = dequeue()) != NULL) { + printline(&item->data, '-'); + free_item(item); + } +} + +void +clearqueue(void) +{ + queue_t *item; + + while ((item = dequeue()) != NULL) + free_item(item); +} + +int +countqueue(void) +{ + return count; +} diff --git a/usr.bin/grep/util.c b/usr.bin/grep/util.c new file mode 100644 index 00000000000..2b8bc1d5a0a --- /dev/null +++ b/usr.bin/grep/util.c @@ -0,0 +1,256 @@ +/*- + * Copyright (c) 1999 James Howard and Dag-Erling Coïdan Smørgrav + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $Id: util.c,v 1.1 2003/06/22 22:20:07 deraadt Exp $ + */ + +#include <sys/types.h> +#include <sys/stat.h> + +#include <ctype.h> +#include <err.h> +#include <errno.h> +#include <fts.h> +#include <regex.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <zlib.h> + +#include "grep.h" + +/* + * Process a file line by line... + */ + +static int linesqueued; +static int procline(str_t *l); + +int +grep_tree(char **argv) +{ + FTS *fts; + FTSENT *p; + int c, fts_flags; + + c = fts_flags = 0; + + if (Hflag) + fts_flags = FTS_COMFOLLOW; + if (Pflag) + fts_flags = FTS_PHYSICAL; + if (Sflag) + fts_flags = FTS_LOGICAL; + + fts_flags |= FTS_NOSTAT | FTS_NOCHDIR; + + if (!(fts = fts_open(argv, fts_flags, (int (*) ()) NULL))) + err(1, NULL); + while ((p = fts_read(fts)) != NULL) { + switch (p->fts_info) { + case FTS_DNR: + break; + case FTS_ERR: + errx(1, "%s: %s", p->fts_path, strerror(p->fts_errno)); + break; + case FTS_DP: + break; + default: + c += procfile(p->fts_path); + break; + } + } + + return c; +} + +int +procfile(char *fn) +{ + str_t ln; + file_t *f; + int c, t, z; + + if (fn == NULL) { + fn = "(standard input)"; + f = grep_fdopen(STDIN_FILENO, "r"); + } else { + f = grep_open(fn, "r"); + } + if (f == NULL) { + if (!sflag) + warn("%s", fn); + return 0; + } + if (aflag && grep_bin_file(f)) { + grep_close(f); + return 0; + } + + ln.file = fn; + ln.line_no = 0; + linesqueued = 0; + ln.off = -1; + + if (Bflag > 0) + initqueue(); + for (c = 0; !(lflag && c);) { + ln.off += ln.len + 1; + if ((ln.dat = grep_fgetln(f, &ln.len)) == NULL) + break; + if (ln.len > 0 && ln.dat[ln.len - 1] == '\n') + --ln.len; + ln.line_no++; + + z = tail; + + if ((t = procline(&ln)) == 0 && Bflag > 0 && z == 0) { + enqueue(&ln); + linesqueued++; + } + c += t; + } + if (Bflag > 0) + clearqueue(); + grep_close(f); + + if (cflag) { + if (!hflag) + printf("%s:", ln.file); + printf("%u\n", c); + } + if (lflag && c != 0) + printf("%s\n", fn); + if (Lflag && c == 0) + printf("%s\n", fn); + return c; +} + + +/* + * Process an individual line in a file. Return non-zero if it matches. + */ + +#define isword(x) (isalnum(x) || (x) == '_') + +static int +procline(str_t *l) +{ + regmatch_t pmatch; + int c, i, r, t; + + if (matchall) { + c = !vflag; + goto print; + } + + t = vflag ? REG_NOMATCH : 0; + pmatch.rm_so = 0; + pmatch.rm_eo = l->len; + for (c = i = 0; i < patterns; i++) { + r = regexec(&r_pattern[i], l->dat, 0, &pmatch, eflags); + if (r == REG_NOMATCH && t == 0) + continue; + if (r == 0) { + if (wflag) { + if ((pmatch.rm_so != 0 && isword(l->dat[pmatch.rm_so - 1])) + || (pmatch.rm_eo != l->len && isword(l->dat[pmatch.rm_eo]))) + r = REG_NOMATCH; + } + if (xflag) { + if (pmatch.rm_so != 0 || pmatch.rm_eo != l->len) + r = REG_NOMATCH; + } + } + if (r == t) { + c++; + break; + } + } + +print: + if ((tail > 0 || c) && !cflag && !qflag) { + if (c) { + if (first > 0 && tail == 0 && (Bflag < linesqueued) && (Aflag || Bflag)) + printf("--\n"); + first = 1; + tail = Aflag; + if (Bflag > 0) + printqueue(); + linesqueued = 0; + printline(l, ':'); + } else { + printline(l, '-'); + tail--; + } + } + return c; +} + +void * +grep_malloc(size_t size) +{ + void *ptr; + + if ((ptr = malloc(size)) == NULL) + err(1, "malloc"); + return ptr; +} + +void * +grep_realloc(void *ptr, size_t size) +{ + if ((ptr = realloc(ptr, size)) == NULL) + err(1, "realloc"); + return ptr; +} + +void +printline(str_t *line, int sep) +{ + int n; + + n = 0; + if (!hflag) { + fputs(line->file, stdout); + ++n; + } + if (nflag) { + if (n) + putchar(sep); + printf("%d", line->line_no); + ++n; + } + if (bflag) { + if (n) + putchar(sep); + printf("%lu", (unsigned long)line->off); + } + if (n) + putchar(sep); + fwrite(line->dat, line->len, 1, stdout); + putchar('\n'); +} |