diff options
author | Theo de Raadt <deraadt@cvs.openbsd.org> | 1995-10-18 08:53:40 +0000 |
---|---|---|
committer | Theo de Raadt <deraadt@cvs.openbsd.org> | 1995-10-18 08:53:40 +0000 |
commit | d6583bb2a13f329cf0332ef2570eb8bb8fc0e39c (patch) | |
tree | ece253b876159b39c620e62b6c9b1174642e070e /gnu/usr.bin/grep/grep.c |
initial import of NetBSD tree
Diffstat (limited to 'gnu/usr.bin/grep/grep.c')
-rw-r--r-- | gnu/usr.bin/grep/grep.c | 833 |
1 files changed, 833 insertions, 0 deletions
diff --git a/gnu/usr.bin/grep/grep.c b/gnu/usr.bin/grep/grep.c new file mode 100644 index 00000000000..aad5427c77f --- /dev/null +++ b/gnu/usr.bin/grep/grep.c @@ -0,0 +1,833 @@ +/* grep.c - main driver file for grep. + Copyright (C) 1992 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + + Written July 1992 by Mike Haertel. */ + +#ifndef lint +static char rcsid[] = "$Id: grep.c,v 1.1 1995/10/18 08:40:17 deraadt Exp $"; +#endif /* not lint */ + +#include <errno.h> +#include <stdio.h> + +#ifndef errno +extern int errno; +#endif + +#ifdef STDC_HEADERS +#include <stdlib.h> +#else +#include <sys/types.h> +extern char *malloc(), *realloc(); +extern void free(); +#endif + +#if defined(STDC_HEADERS) || defined(HAVE_STRING_H) +#include <string.h> +#ifdef NEED_MEMORY_H +#include <memory.h> +#endif +#else +#include <strings.h> +#ifdef __STDC__ +extern void *memchr(); +#else +extern char *memchr(); +#endif +#define strrchr rindex +#endif + +#ifdef HAVE_UNISTD_H +#include <sys/types.h> +#include <fcntl.h> +#include <unistd.h> +#else +#define O_RDONLY 0 +extern int open(), read(), close(); +#endif + +#include "getpagesize.h" +#include "grep.h" + +#undef MAX +#define MAX(A,B) ((A) > (B) ? (A) : (B)) + +/* Provide missing ANSI features if necessary. */ + +#ifndef HAVE_STRERROR +extern int sys_nerr; +extern char *sys_errlist[]; +#define strerror(E) ((E) < sys_nerr ? sys_errlist[(E)] : "bogus error number") +#endif + +#ifndef HAVE_MEMCHR +#ifdef __STDC__ +#define VOID void +#else +#define VOID char +#endif +VOID * +memchr(vp, c, n) + VOID *vp; + int c; + size_t n; +{ + unsigned char *p; + + for (p = (unsigned char *) vp; n--; ++p) + if (*p == c) + return (VOID *) p; + return 0; +} +#endif + +/* Define flags declared in grep.h. */ +char *matcher; +int match_icase; +int match_words; +int match_lines; + +/* Functions we'll use to search. */ +static void (*compile)(); +static char *(*execute)(); + +/* For error messages. */ +static char *prog; +static char *filename; +static int errseen; + +/* Print a message and possibly an error string. Remember + that something awful happened. */ +static void +error(mesg, errnum) +#ifdef __STDC__ + const +#endif + char *mesg; + int errnum; +{ + if (errnum) + fprintf(stderr, "%s: %s: %s\n", prog, mesg, strerror(errnum)); + else + fprintf(stderr, "%s: %s\n", prog, mesg); + errseen = 1; +} + +/* Like error(), but die horribly after printing. */ +void +fatal(mesg, errnum) +#ifdef __STDC__ + const +#endif + char *mesg; + int errnum; +{ + error(mesg, errnum); + exit(2); +} + +/* Interface to handle errors and fix library lossage. */ +char * +xmalloc(size) + size_t size; +{ + char *result; + + result = malloc(size); + if (size && !result) + fatal("memory exhausted", 0); + return result; +} + +/* Interface to handle errors and fix some library lossage. */ +char * +xrealloc(ptr, size) + char *ptr; + size_t size; +{ + char *result; + + if (ptr) + result = realloc(ptr, size); + else + result = malloc(size); + if (size && !result) + fatal("memory exhausted", 0); + return result; +} + +#if !defined(HAVE_VALLOC) +#define valloc malloc +#else +#ifdef __STDC__ +extern void *valloc(size_t); +#else +extern char *valloc(); +#endif +#endif + +/* Hairy buffering mechanism for grep. The intent is to keep + all reads aligned on a page boundary and multiples of the + page size. */ + +static char *buffer; /* Base of buffer. */ +static size_t bufsalloc; /* Allocated size of buffer save region. */ +static size_t bufalloc; /* Total buffer size. */ +static int bufdesc; /* File descriptor. */ +static char *bufbeg; /* Beginning of user-visible stuff. */ +static char *buflim; /* Limit of user-visible stuff. */ + +#if defined(HAVE_WORKING_MMAP) +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/mman.h> + +static int bufmapped; /* True for ordinary files. */ +static struct stat bufstat; /* From fstat(). */ +static off_t bufoffset; /* What read() normally remembers. */ +#endif + +/* Reset the buffer for a new file. Initialize + on the first time through. */ +void +reset(fd) + int fd; +{ + static int initialized; + + if (!initialized) + { + initialized = 1; +#ifndef BUFSALLOC + bufsalloc = MAX(8192, getpagesize()); +#else + bufsalloc = BUFSALLOC; +#endif + bufalloc = 5 * bufsalloc; + /* The 1 byte of overflow is a kludge for dfaexec(), which + inserts a sentinel newline at the end of the buffer + being searched. There's gotta be a better way... */ + buffer = valloc(bufalloc + 1); + if (!buffer) + fatal("memory exhausted", 0); + bufbeg = buffer; + buflim = buffer; + } + bufdesc = fd; +#if defined(HAVE_WORKING_MMAP) + if (fstat(fd, &bufstat) < 0 || !S_ISREG(bufstat.st_mode)) + bufmapped = 0; + else + { + bufmapped = 1; + bufoffset = lseek(fd, 0, 1); + } +#endif +} + +/* Read new stuff into the buffer, saving the specified + amount of old stuff. When we're done, 'bufbeg' points + to the beginning of the buffer contents, and 'buflim' + points just after the end. Return count of new stuff. */ +static int +fillbuf(save) + size_t save; +{ + char *nbuffer, *dp, *sp; + int cc; +#if defined(HAVE_WORKING_MMAP) + caddr_t maddr; +#endif + static int pagesize; + + if (pagesize == 0 && (pagesize = getpagesize()) == 0) + abort(); + + if (save > bufsalloc) + { + while (save > bufsalloc) + bufsalloc *= 2; + bufalloc = 5 * bufsalloc; + nbuffer = valloc(bufalloc + 1); + if (!nbuffer) + fatal("memory exhausted", 0); + } + else + nbuffer = buffer; + + sp = buflim - save; + dp = nbuffer + bufsalloc - save; + bufbeg = dp; + while (save--) + *dp++ = *sp++; + + /* We may have allocated a new, larger buffer. Since + there is no portable vfree(), we just have to forget + about the old one. Sorry. */ + buffer = nbuffer; + +#if defined(HAVE_WORKING_MMAP) + if (bufmapped && bufoffset % pagesize == 0 + && bufstat.st_size - bufoffset >= bufalloc - bufsalloc) + { + maddr = buffer + bufsalloc; + maddr = mmap(maddr, bufalloc - bufsalloc, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_FIXED, bufdesc, bufoffset); + if (maddr == (caddr_t) -1) + { + fprintf(stderr, "%s: warning: %s: %s\n", filename, + strerror(errno)); + goto tryread; + } +#if 0 + /* You might thing this (or MADV_WILLNEED) would help, + but it doesn't, at least not on a Sun running 4.1. + In fact, it actually slows us down about 30%! */ + madvise(maddr, bufalloc - bufsalloc, MADV_SEQUENTIAL); +#endif + cc = bufalloc - bufsalloc; + bufoffset += cc; + } + else + { + tryread: + /* We come here when we're not going to use mmap() any more. + Note that we need to synchronize the file offset the + first time through. */ + if (bufmapped) + { + bufmapped = 0; + lseek(bufdesc, bufoffset, 0); + } + cc = read(bufdesc, buffer + bufsalloc, bufalloc - bufsalloc); + } +#else + cc = read(bufdesc, buffer + bufsalloc, bufalloc - bufsalloc); +#endif + if (cc > 0) + buflim = buffer + bufsalloc + cc; + else + buflim = buffer + bufsalloc; + return cc; +} + +/* Flags controlling the style of output. */ +static int out_quiet; /* Suppress all normal output. */ +static int out_invert; /* Print nonmatching stuff. */ +static int out_file; /* Print filenames. */ +static int out_line; /* Print line numbers. */ +static int out_byte; /* Print byte offsets. */ +static int out_before; /* Lines of leading context. */ +static int out_after; /* Lines of trailing context. */ + +/* Internal variables to keep track of byte count, context, etc. */ +static size_t totalcc; /* Total character count before bufbeg. */ +static char *lastnl; /* Pointer after last newline counted. */ +static char *lastout; /* Pointer after last character output; + NULL if no character has been output + or if it's conceptually before bufbeg. */ +static size_t totalnl; /* Total newline count before lastnl. */ +static int pending; /* Pending lines of output. */ + +static void +nlscan(lim) + char *lim; +{ + char *beg; + + for (beg = lastnl; beg < lim; ++beg) + if (*beg == '\n') + ++totalnl; + lastnl = beg; +} + +static void +prline(beg, lim, sep) + char *beg; + char *lim; + char sep; +{ + if (out_file) + printf("%s%c", filename, sep); + if (out_line) + { + nlscan(beg); + printf("%d%c", ++totalnl, sep); + lastnl = lim; + } + if (out_byte) + printf("%lu%c", totalcc + (beg - bufbeg), sep); + fwrite(beg, 1, lim - beg, stdout); + if (ferror(stdout)) + error("writing output", errno); + lastout = lim; +} + +/* Print pending lines of trailing context prior to LIM. */ +static void +prpending(lim) + char *lim; +{ + char *nl; + + if (!lastout) + lastout = bufbeg; + while (pending > 0 && lastout < lim) + { + --pending; + if ((nl = memchr(lastout, '\n', lim - lastout)) != 0) + ++nl; + else + nl = lim; + prline(lastout, nl, '-'); + } +} + +/* Print the lines between BEG and LIM. Deal with context crap. + If NLINESP is non-null, store a count of lines between BEG and LIM. */ +static void +prtext(beg, lim, nlinesp) + char *beg; + char *lim; + int *nlinesp; +{ + static int used; /* avoid printing "--" before any output */ + char *bp, *p, *nl; + int i, n; + + if (!out_quiet && pending > 0) + prpending(beg); + + p = beg; + + if (!out_quiet) + { + /* Deal with leading context crap. */ + + bp = lastout ? lastout : bufbeg; + for (i = 0; i < out_before; ++i) + if (p > bp) + do + --p; + while (p > bp && p[-1] != '\n'); + + /* We only print the "--" separator if our output is + discontiguous from the last output in the file. */ + if ((out_before || out_after) && used && p != lastout) + puts("--"); + + while (p < beg) + { + nl = memchr(p, '\n', beg - p); + prline(p, nl + 1, '-'); + p = nl + 1; + } + } + + if (nlinesp) + { + /* Caller wants a line count. */ + for (n = 0; p < lim; ++n) + { + if ((nl = memchr(p, '\n', lim - p)) != 0) + ++nl; + else + nl = lim; + if (!out_quiet) + prline(p, nl, ':'); + p = nl; + } + *nlinesp = n; + } + else + if (!out_quiet) + prline(beg, lim, ':'); + + pending = out_after; + used = 1; +} + +/* Scan the specified portion of the buffer, matching lines (or + between matching lines if OUT_INVERT is true). Return a count of + lines printed. */ +static int +grepbuf(beg, lim) + char *beg; + char *lim; +{ + int nlines, n; + register char *p, *b; + char *endp; + + nlines = 0; + p = beg; + while ((b = (*execute)(p, lim - p, &endp)) != 0) + { + /* Avoid matching the empty line at the end of the buffer. */ + if (b == lim && ((b > beg && b[-1] == '\n') || b == beg)) + break; + if (!out_invert) + { + prtext(b, endp, (int *) 0); + nlines += 1; + } + else if (p < b) + { + prtext(p, b, &n); + nlines += n; + } + p = endp; + } + if (out_invert && p < lim) + { + prtext(p, lim, &n); + nlines += n; + } + return nlines; +} + +/* Search a given file. Return a count of lines printed. */ +static int +grep(fd) + int fd; +{ + int nlines, i; + size_t residue, save; + char *beg, *lim; + + reset(fd); + + totalcc = 0; + lastout = 0; + totalnl = 0; + pending = 0; + + nlines = 0; + residue = 0; + save = 0; + + for (;;) + { + if (fillbuf(save) < 0) + { + error(filename, errno); + return nlines; + } + lastnl = bufbeg; + if (lastout) + lastout = bufbeg; + if (buflim - bufbeg == save) + break; + beg = bufbeg + save - residue; + for (lim = buflim; lim > beg && lim[-1] != '\n'; --lim) + ; + residue = buflim - lim; + if (beg < lim) + { + nlines += grepbuf(beg, lim); + if (pending) + prpending(lim); + } + i = 0; + beg = lim; + while (i < out_before && beg > bufbeg && beg != lastout) + { + ++i; + do + --beg; + while (beg > bufbeg && beg[-1] != '\n'); + } + if (beg != lastout) + lastout = 0; + save = residue + lim - beg; + totalcc += buflim - bufbeg - save; + if (out_line) + nlscan(beg); + } + if (residue) + { + nlines += grepbuf(bufbeg + save - residue, buflim); + if (pending) + prpending(buflim); + } + return nlines; +} + +static char version[] = "GNU grep version 2.0"; + +#define USAGE \ + "usage: %s [-[[AB] ]<num>] [-[CEFGVchilnqsvwx]] [-[ef]] <expr> [<files...>]\n" + +static void +usage() +{ + fprintf(stderr, USAGE, prog); + exit(2); +} + +/* Go through the matchers vector and look for the specified matcher. + If we find it, install it in compile and execute, and return 1. */ +int +setmatcher(name) + char *name; +{ + int i; + + for (i = 0; matchers[i].name; ++i) + if (strcmp(name, matchers[i].name) == 0) + { + compile = matchers[i].compile; + execute = matchers[i].execute; + return 1; + } + return 0; +} + +int +main(argc, argv) + int argc; + char *argv[]; +{ + char *keys; + size_t keycc, oldcc, keyalloc; + int keyfound, count_matches, no_filenames, list_files, suppress_errors; + int opt, cc, desc, count, status; + FILE *fp; + extern char *optarg; + extern int optind; + + prog = argv[0]; + if (prog && strrchr(prog, '/')) + prog = strrchr(prog, '/') + 1; + + keys = NULL; + keycc = 0; + keyfound = 0; + count_matches = 0; + no_filenames = 0; + list_files = 0; + suppress_errors = 0; + matcher = NULL; + + while ((opt = getopt(argc, argv, "0123456789A:B:CEFGVX:bce:f:hiLlnoqsvwxy")) + != EOF) + switch (opt) + { + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + out_before = 10 * out_before + opt - '0'; + out_after = 10 * out_after + opt - '0'; + break; + case 'A': + out_after = atoi(optarg); + if (out_after < 0) + usage(); + break; + case 'B': + out_before = atoi(optarg); + if (out_before < 0) + usage(); + break; + case 'C': + out_before = out_after = 2; + break; + case 'E': + if (matcher && strcmp(matcher, "egrep") != 0) + fatal("you may specify only one of -E, -F, or -G", 0); + matcher = "posix-egrep"; + break; + case 'F': + if (matcher && strcmp(matcher, "fgrep") != 0) + fatal("you may specify only one of -E, -F, or -G", 0);; + matcher = "fgrep"; + break; + case 'G': + if (matcher && strcmp(matcher, "grep") != 0) + fatal("you may specify only one of -E, -F, or -G", 0); + matcher = "grep"; + break; + case 'V': + fprintf(stderr, "%s\n", version); + break; + case 'X': + if (matcher) + fatal("matcher already specified", 0); + matcher = optarg; + break; + case 'b': + out_byte = 1; + break; + case 'c': + out_quiet = 1; + count_matches = 1; + break; + case 'e': + cc = strlen(optarg); + keys = xrealloc(keys, keycc + cc + 1); + if (keyfound) + keys[keycc++] = '\n'; + strcpy(&keys[keycc], optarg); + keycc += cc; + keyfound = 1; + break; + case 'f': + fp = strcmp(optarg, "-") != 0 ? fopen(optarg, "r") : stdin; + if (!fp) + fatal(optarg, errno); + for (keyalloc = 1; keyalloc <= keycc; keyalloc *= 2) + ; + keys = xrealloc(keys, keyalloc); + oldcc = keycc; + if (keyfound) + keys[keycc++] = '\n'; + while (!feof(fp) + && (cc = fread(keys + keycc, 1, keyalloc - keycc, fp)) > 0) + { + keycc += cc; + if (keycc == keyalloc) + keys = xrealloc(keys, keyalloc *= 2); + } + if (fp != stdin) + fclose(fp); + /* Nuke the final newline to avoid matching a null string. */ + if (keycc - oldcc > 0 && keys[keycc - 1] == '\n') + --keycc; + keyfound = 1; + break; + case 'h': + no_filenames = 1; + break; + case 'i': + case 'y': /* For old-timers . . . */ + match_icase = 1; + break; + case 'L': + /* Like -l, except list files that don't contain matches. + Inspired by the same option in Hume's gre. */ + out_quiet = 1; + list_files = -1; + break; + case 'l': + out_quiet = 1; + list_files = 1; + break; + case 'n': + out_line = 1; + break; + case 'o': + out_file = 1; + break; + case 'q': + out_quiet = 1; + break; + case 's': + suppress_errors = 1; + break; + case 'v': + out_invert = 1; + break; + case 'w': + match_words = 1; + break; + case 'x': + match_lines = 1; + break; + default: + usage(); + break; + } + + if (!keyfound) + if (optind < argc) + { + keys = argv[optind++]; + keycc = strlen(keys); + } + else + usage(); + + if (!matcher) + matcher = prog; + + if (!setmatcher(matcher) && !setmatcher("default")) + abort(); + + (*compile)(keys, keycc); + + if (argc - optind > 1 && !no_filenames) + out_file = 1; + + status = 1; + + if (optind < argc) + while (optind < argc) + { + desc = strcmp(argv[optind], "-") ? open(argv[optind], O_RDONLY) : 0; + if (desc < 0) + { + if (!suppress_errors) + error(argv[optind], errno); + } + else + { + filename = desc == 0 ? "(standard input)" : argv[optind]; + count = grep(desc); + if (count_matches) + { + if (out_file) + printf("%s:", filename); + printf("%d\n", count); + } + if (count) + { + status = 0; + if (list_files == 1) + printf("%s\n", filename); + } + else if (list_files == -1) + printf("%s\n", filename); + } + if (desc != 0) + close(desc); + ++optind; + } + else + { + filename = "(standard input)"; + count = grep(0); + if (count_matches) + printf("%d\n", count); + if (count) + { + status = 0; + if (list_files == 1) + printf("(standard input)\n"); + } + else if (list_files == -1) + printf("(standard input)\n"); + } + + exit(errseen ? 2 : status); +} |