diff options
author | Theo de Raadt <deraadt@cvs.openbsd.org> | 2003-06-22 22:20:08 +0000 |
---|---|---|
committer | Theo de Raadt <deraadt@cvs.openbsd.org> | 2003-06-22 22:20:08 +0000 |
commit | b0b0b549d5a01b84b265074c62253fbf87c11caf (patch) | |
tree | c3f7851d6a960774e96c296dce31e8df2dc76c8f /usr.bin/grep/grep.c | |
parent | 2aec8c7829a77f46b85b067420ff8ab9f2f50e94 (diff) |
freegrep 0.16
Diffstat (limited to 'usr.bin/grep/grep.c')
-rw-r--r-- | usr.bin/grep/grep.c | 761 |
1 files changed, 262 insertions, 499 deletions
diff --git a/usr.bin/grep/grep.c b/usr.bin/grep/grep.c index 9d472dd5a81..8ce4202eff9 100644 --- a/usr.bin/grep/grep.c +++ b/usr.bin/grep/grep.c @@ -1,8 +1,6 @@ -/* $OpenBSD: grep.c,v 1.2 2003/02/16 03:46:04 cloder Exp $ */ - /*- - * Copyright (c) 2000 Carson Harding. All rights reserved. - * This code was written and contributed to OpenBSD by Carson Harding. + * Copyright (c) 1999 James Howard and Dag-Erling Coïdan Smørgrav + * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -12,9 +10,6 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 3. Neither the name of the author, or the names of contributors may be - * used to endorse or promote products derived from this software without - * specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE @@ -27,569 +22,337 @@ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. + * + * $Id: grep.c,v 1.3 2003/06/22 22:20:07 deraadt Exp $ */ -#ifndef lint -static char rcsid[] = "$OpenBSD: grep.c,v 1.2 2003/02/16 03:46:04 cloder Exp $"; -#endif /* not lint */ - #include <sys/types.h> +#include <sys/stat.h> + +#include <err.h> +#include <errno.h> +#include <getopt.h> +#include <regex.h> #include <stdio.h> #include <stdlib.h> -#include <unistd.h> -#include <regex.h> #include <string.h> -#include <ctype.h> -#include <sys/param.h> -#include <fts.h> -#include <err.h> - -extern char *__progname; - - -void usage(void); -void err_regerror(int r, regex_t *rexp); -int grep_files(int regexc, regex_t *regexv, char **files); -int grep_tree(int regexc, regex_t *regexv, char **paths); -int grep_file(int regexc, regex_t *rexp, char *fname); -void arg_patt(char *s); -char *chop_patt(char *s, size_t *len); -void add_patt(char *s, size_t len); -void load_patt(char *fname); -regex_t *regcomp_patt(int pattc, char *pattvp[], int cflags); - +#include <unistd.h> -int f_bytecount; /* -b prepend byte count */ -int f_countonly; /* -c return only count */ -int f_nofname; /* -h do not prepend filenames on multiple */ -int f_fnameonly; /* -l only print file name with match */ -int f_suppress; /* -s suppress error messages; 1/2 -q */ -int f_lineno; /* -n prepend with line numbers */ -int f_quiet; /* -q no output, only status */ -int f_wmatch; /* -w match words */ -int f_xmatch; /* -x match line */ -int f_zerobyte; /* -z NUL character after filename with -l */ -int f_match; /* = REG_MATCH; else = REG_NOMATCH for -v */ -int f_multifile; /* multiple files: prepend file names */ -int f_matchall; /* empty pattern, matches all input */ -int f_error; /* saw error; set exit status */ +#include "grep.h" + +/* Flags passed to regcomp() and regexec() */ +int cflags; +int eflags = REG_STARTEND; + +int matchall; /* shortcut */ +int patterns, pattern_sz; +char **pattern; +regex_t *r_pattern; + +/* For regex errors */ +char re_error[RE_ERROR_BUF + 1]; + +/* Command-line flags */ +int Aflag; /* -A x: print x lines trailing each match */ +int Bflag; /* -B x: print x lines leading each match */ +int Eflag; /* -E: interpret pattern as extended regexp */ +int Fflag; /* -F: interpret pattern as list of fixed strings */ +int Gflag; /* -G: interpret pattern as basic regexp */ +int Hflag; /* -H: if -R, follow explicitly listed symlinks */ +int Lflag; /* -L: only show names of files with no matches */ +int Pflag; /* -P: if -R, no symlinks are followed */ +int Rflag; /* -R: recursively search directory trees */ +int Sflag; /* -S: if -R, follow all symlinks */ +int Vflag; /* -V: display version information */ +int Zflag; /* -Z: decompress input before processing */ +int aflag; /* -a: only search ascii files */ +int bflag; /* -b: show block numbers for each match */ +int cflag; /* -c: only show a count of matching lines */ +int hflag; /* -h: don't print filename headers */ +int iflag; /* -i: ignore case */ +int lflag; /* -l: only show names of files with matches */ +int nflag; /* -n: show line numbers in front of matching lines */ +int oflag; /* -o: always print file name */ +int qflag; /* -q: quiet mode (don't output anything) */ +int sflag; /* -s: silent mode (ignore errors) */ +int vflag; /* -v: only show non-matching lines */ +int wflag; /* -w: pattern must start and end on word boundaries */ +int xflag; /* -x: pattern must match entire line */ + +/* Housekeeping */ +int first; /* flag whether or not this is our fist match */ +int tail; /* lines left to print */ +int lead; /* number of lines in leading context queue */ + +char *progname; + +static void +usage(void) +{ + fprintf(stderr, "usage: %s %s %s\n", + progname, + "[-[AB] num] [-CEFGHLPRSVZabchilnoqsvwx]", + "[-e patttern] [-f file]"); + exit(2); +} - /* default traversal flags */ -int f_ftsflags = FTS_LOGICAL|FTS_NOCHDIR|FTS_NOSTAT; +static char *optstr = "0123456789A:B:CEFGHLPSRUVZabce:f:hilnoqrsuvwxy"; -int f_debug; /* temporary debugging flag */ +struct option long_options[] = +{ + {"basic-regexp", no_argument, NULL, 'G'}, + {"extended-regexp", no_argument, NULL, 'E'}, + {"fixed-strings", no_argument, NULL, 'F'}, + {"after-context", required_argument, NULL, 'A'}, + {"before-context", required_argument, NULL, 'B'}, + {"context", optional_argument, NULL, 'C'}, + {"version", no_argument, NULL, 'V'}, + {"byte-offset", no_argument, NULL, 'b'}, + {"count", no_argument, NULL, 'c'}, + {"regexp", required_argument, NULL, 'e'}, + {"file", required_argument, NULL, 'f'}, + {"no-filename", no_argument, NULL, 'h'}, + {"ignore-case", no_argument, NULL, 'i'}, + {"files-without-match", no_argument, NULL, 'L'}, + {"files-with-matches", no_argument, NULL, 'l'}, + {"line-number", no_argument, NULL, 'n'}, + {"quiet", no_argument, NULL, 'q'}, + {"silent", no_argument, NULL, 'q'}, + {"recursive", no_argument, NULL, 'r'}, + {"no-messages", no_argument, NULL, 's'}, + {"text", no_argument, NULL, 'a'}, + {"revert-match", no_argument, NULL, 'v'}, + {"word-regexp", no_argument, NULL, 'w'}, + {"line-regexp", no_argument, NULL, 'x'}, + {"binary", no_argument, NULL, 'U'}, + {"unix-byte-offsets", no_argument, NULL, 'u'}, + {"decompress", no_argument, NULL, 'Z'}, + + {NULL, no_argument, NULL, 0} +}; + + +static void +add_pattern(char *pat, size_t len) +{ + if (len == 0 || matchall) { + matchall = 1; + return; + } + if (patterns == pattern_sz) { + pattern_sz *= 2; + pattern = grep_realloc(pattern, ++pattern_sz); + } + if (pat[len-1] == '\n') + --len; + pattern[patterns] = grep_malloc(len+1); + strncpy(pattern[patterns], pat, len); + pattern[patterns][len] = '\0'; + ++patterns; +} -#define START_PATT_SZ 8 /* start with room for 8 patterns */ -char **pattv; /* array of patterns from -e and -f */ -int pattc; /* patterns in pattern array */ -int pattn; /* patterns we have seen, including nulls */ +static void +read_patterns(char *fn) +{ + FILE *f; + char *line; + size_t len; + int nl; + + if ((f = fopen(fn, "r")) == NULL) + err(1, "%s", fn); + nl = 0; + while ((line = fgetln(f, &len)) != NULL) { + if (*line == '\n') { + ++nl; + continue; + } + if (nl) { + matchall = 1; + break; + } + nl = 0; + add_pattern(line, len); + } + if (ferror(f)) + err(1, "%s", fn); + fclose(f); +} int -main(int argc, char **argv) +main(int argc, char *argv[]) { - int c; - int ch; - int cflags; /* flags to regcomp() */ - int sawfile; /* did we see a pattern file? */ - regex_t *regexv; /* start of array of compiled patterns */ - - int (*grepf)(int regexc, regex_t *regexv, char **argv); + char *tmp; + int c, i; - sawfile = 0; - cflags = REG_BASIC|REG_NEWLINE; - grepf = grep_files; - - if (*__progname == 'e') - cflags |= REG_EXTENDED; - else if (*__progname == 'f') - cflags |= REG_NOSPEC; - - while ((ch = getopt(argc, argv, "DEFRHLPXabce:f:hilnqsvwxz")) != -1) { - switch(ch) { - case 'D': - f_debug = 1; + if ((progname = strrchr(*argv, '/')) != NULL) + ++progname; + else + progname = *argv; + + while ((c = getopt_long(argc, argv, optstr, + long_options, (int *)NULL)) != -1) { + switch (c) { + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + tmp = argv[optind - 1]; + if (tmp[0] == '-' && tmp[1] == c && !tmp[2]) + Aflag = Bflag = strtol(++tmp, (char **)NULL, 10); + else + Aflag = Bflag = strtol(argv[optind] + 1, (char **)NULL, 10); + break; + case 'A': + Aflag = strtol(optarg, (char **)NULL, 10); + break; + case 'B': + Bflag = strtol(optarg, (char **)NULL, 10); + break; + case 'C': + if (optarg == NULL) + Aflag = Bflag = 2; + else + Aflag = Bflag = strtol(optarg, (char **)NULL, 10); break; case 'E': - cflags |= REG_EXTENDED; + Eflag++; break; case 'F': - cflags |= REG_NOSPEC; + Fflag++; + break; + case 'G': + Gflag++; break; case 'H': - f_ftsflags |= FTS_COMFOLLOW; + Hflag++; break; case 'L': - f_ftsflags |= FTS_LOGICAL; + lflag = 0; + Lflag = qflag = 1; break; case 'P': - f_ftsflags |= FTS_PHYSICAL; + Pflag++; + break; + case 'S': + Sflag++; break; case 'R': - grepf = grep_tree; - /* - * If walking the tree we don't know how many files - * we'll actually find. So assume multiple, if - * you don't want names, there's always -h .... - */ - f_multifile = 1; + case 'r': + Rflag++; + oflag++; break; - case 'X': - f_ftsflags |= FTS_XDEV; + case 'U': + case 'u': + /* these are here for compatability */ + break; + case 'V': + fprintf(stderr, "grep version %u.%u\n", VER_MAJ, VER_MIN); + fprintf(stderr, argv[0]); + usage(); + break; + case 'Z': + Zflag++; break; case 'a': - /* - * Silently eat -a; we don't use the default - * behaviour it toggles off in gnugrep. - */ + aflag = 1; break; case 'b': - f_bytecount = 1; + bflag = 1; break; case 'c': - f_countonly = 1; + cflag = 1; break; case 'e': - arg_patt(optarg); + add_pattern(optarg, strlen(optarg)); break; case 'f': - load_patt(optarg); - sawfile = 1; + read_patterns(optarg); break; case 'h': - f_nofname = 1; + oflag = 0; + hflag = 1; break; case 'i': + case 'y': cflags |= REG_ICASE; break; case 'l': - f_fnameonly = 1; + Lflag = 0; + lflag = qflag = 1; break; case 'n': - f_lineno = 1; + nflag = 1; + break; + case 'o': + hflag = 0; + oflag = 1; break; case 'q': - f_quiet = 1; + qflag = 1; break; case 's': - f_suppress = 1; + sflag = 1; break; case 'v': - f_match = REG_NOMATCH; + vflag = 1; break; case 'w': - f_wmatch = 1; + wflag = 1; break; case 'x': - f_xmatch = 1; - break; - case 'z': - f_zerobyte = 1; + xflag = 1; break; default: usage(); - break; } } - if ((cflags & REG_EXTENDED) && (cflags & REG_NOSPEC)) + argc -= optind; + argv += optind; + + if (argc == 0 && patterns == 0) usage(); - /* - * If we read one or more pattern files, and still - * didn't end up with any pattern, any pattern file - * we read was empty. This is different than failing - * to provide a pattern as an argument, and we fail - * on this case as if we had searched and found - * no matches. (At least this is what GNU grep and - * Solaris's grep do.) - */ - if (!pattn && !argv[optind]) { - if (sawfile) - exit(1); - else usage(); + if (patterns == 0) { + add_pattern(*argv, strlen(*argv)); + --argc; + ++argv; } - - if (!pattn) { - arg_patt(argv[optind]); - optind++; + + switch (*progname) { + case 'e': + Eflag++; + break; + case 'f': + Fflag++; + break; + case 'g': + Gflag++; + break; + case 'z': + Zflag++; + break; } - /* why bother ... just do nothing sooner */ - if (f_matchall && f_match == REG_NOMATCH) - exit(1); - - regexv = regcomp_patt(pattc, pattv, cflags); - - if (optind == argc) { - c = grep_file(pattc, regexv, NULL); - } else { - if (argc - optind > 1 && !f_nofname) - f_multifile = 1; - c = (*grepf)(pattc, regexv, &argv[optind]); - } - - /* XX ugh */ - if (f_error) { - if (c && f_quiet) - exit(0); - else - exit(2); - } else if (c) - exit(0); - else - exit(1); -} - -void -usage(void) -{ - fprintf(stderr, "usage: %s [-E|-F] [-abchilnqsvwx] [-RXH[-L|-P]]" - " {patt | -e patt | -f patt_file} [files]\n", - __progname); - exit(2); -} - -/* - * Patterns as arguments may have embedded newlines. - * When read from file, these are detected by fgetln(); - * in arguments we have to find and cut out the segments. - */ -void -arg_patt(char *s) -{ - size_t len; - char *sp; - - if (f_debug) - fprintf(stderr, "arg_patt(\"%s\")\n", s); - - len = strlen(s); - if (!len) { /* got "" on the command-line */ - add_patt(s, len); - return; - } - for (sp = chop_patt(s, &len); sp; sp = chop_patt(NULL, &len)) { - if (f_debug) { - fprintf(stderr, "adding pattern \""); - fwrite(sp, len, 1, stderr); - fprintf(stderr, "\", length %lu\n",(unsigned long)len); - if (pattc > 20) { - fprintf(stderr, "too many, exiting ...\n"); - exit(2); - } + cflags |= Eflag ? REG_EXTENDED : REG_BASIC; + r_pattern = grep_malloc(patterns * sizeof(regex_t)); + for (i = 0; i < patterns; ++i) { + if ((c = regcomp(&r_pattern[i], pattern[i], cflags))) { + regerror(c, &r_pattern[i], re_error, RE_ERROR_BUF); + errx(1, "%s", re_error); } - add_patt(sp, len); } -} -/* - * Kind of like strtok; pass char *, then NULL for rest. - * Call it memtok()... New size gets written into len. - */ -char * -chop_patt(char *s, size_t *len) -{ - char *cp; - static char *save_s; - static int save_n; + if ((argc == 0 || argc == 1) && !oflag) + hflag = 1; - if (s) - save_n = *len; + if (argc == 0) + exit(!procfile(NULL)); + + if (Rflag) + c = grep_tree(argv); else - s = save_s; + for (c = 0; argc--; ++argv) + c += procfile(*argv); - if (save_n <= 0) { - s = save_s = NULL; - } else if (s) { - if ((cp = memchr(s, '\n', save_n)) != NULL) { - *len = cp - s; /* returned segment */ - save_n -= *len; - save_s = ++cp; /* adjust past newline */ - save_n--; - } else { - *len = save_n; /* else return the whole string */ - save_n = 0; - } - } - - return s; -} - -/* - * Start with an array for 8 patterns, and double it - * each time we outgrow it. If pattern is empty (0 length), - * or if f_matchall is already set, set f_matchall and return. - * No use adding a pattern if all input is going to match - * anyhow. - */ -void -add_patt(char *s, size_t len) -{ - char *p; - static size_t pattmax = START_PATT_SZ; - static size_t sumlen; - - pattn++; - sumlen += len; - - if (!len || f_matchall) { - f_matchall = 1; - return; - } - - if (!pattv) { - pattv = malloc(START_PATT_SZ * sizeof(char *)); - if (!pattv) - err(2, "malloc"); - pattc = 0; - } else if (pattc >= pattmax) { - pattmax *= 2; - pattv = realloc(pattv, pattmax * sizeof(char *)); - if (!pattv) - err(2, "realloc"); - } - p = malloc(len+1); - if (!p) err(2, "malloc"); - memmove(p, s, len); - p[len] = '\0'; - pattv[pattc++] = p; -} - -/* - * Load patterns from file. - */ -void -load_patt(char *fname) -{ - char *buf; - size_t len; - FILE *fr; - - fr = fopen(fname, "r"); - if (!fr) - err(2, "%s", fname); - while ((buf = fgetln(fr, &len)) != NULL) { - if (buf[len-1] == '\n') - buf[--len] = '\0'; - add_patt(buf, len); - } - fclose(fr); -} - -/* - * Compile the collected pattern strings into an array - * of regex_t. - */ -regex_t * -regcomp_patt(int lpattc, char *lpattv[], int cflags) -{ - int i; - int r; - regex_t *rxv; - - if (f_matchall) - return NULL; - - rxv = malloc(sizeof(regex_t) * lpattc); - if (!rxv) - err(2, "malloc"); - for (i = 0; i < lpattc; i++) { - if ((r = regcomp(&rxv[i], lpattv[i], cflags)) != 0) - err_regerror(r, &rxv[i]); - } - return rxv; -} - -/* - * Print out regcomp error, and exit. - */ -void -err_regerror(int r, regex_t *rexp) -{ - size_t n; - char *buf; - - n = regerror(r, rexp, NULL, 0); - buf = malloc(n); - if (!buf) - err(2, "malloc"); - (void)regerror(r, rexp, buf, n); - errx(2, "%s", buf); -} - -/* - * Little wrapper so we can use function pointer above. - */ -int -grep_files(int regexc, regex_t *regexv, char **files) -{ - int c; - char **fname; - - c = 0; - for (fname = files; *fname; fname++) - c += grep_file(regexc, regexv, *fname); - - return c; + exit(!c); } - -/* - * Modified from James Howard and Dag-Erling Co?dan Sm?rgrav's grep: - * add FTS_D to FTS_DP (especially since D was the one being used) - * pass in regex_t array, and set fts flags above in main(). - */ -int -grep_tree(int regexc, regex_t *regexv, char **paths) -{ - int c; - FTS *fts; - FTSENT *p; - - c = 0; - - if (!(fts = fts_open(paths, f_ftsflags, (int (*) ()) NULL))) - err(2, "fts_open"); - while ((p = fts_read(fts)) != NULL) { - switch (p->fts_info) { - case FTS_D: - case FTS_DP: - case FTS_DNR: - break; - case FTS_ERR: - errx(2, "%s: %s", p->fts_path, strerror(p->fts_errno)); - break; - default: - if (f_debug) - printf("%s\n", p->fts_path); - c += grep_file(regexc, regexv, p->fts_path); - break; - } - } - - return c; -} - -/* - * Open and grep the named file. If fname is NULL, read - * from stdin. - */ - -#define isword(x) (isalnum(x) || (x) == '_') - -int -grep_file(int regexc, regex_t *regexv, char *fname) -{ - int i; - int c; - int n; - int r; - int match; - char *buf; - size_t b; - size_t len; - FILE *fr; - regmatch_t pmatch[1]; - regoff_t so, eo; - - b = 0; /* byte count */ - c = 0; /* match count */ - n = 0; /* line count */ - - if (!fname) { - fr = stdin; - fname = "(standard input)"; - } else { - fr = fopen(fname, "r"); - if (!fr) { - if (!f_suppress) - warn("%s", fname); - f_error = 1; - return 0; - } - } - - while ((buf = fgetln(fr, &len)) != NULL) { - n++; - if (f_matchall) - goto printmatch; - match = 0; - for (i = 0; i < regexc; i++) { - pmatch[0].rm_so = 0; - pmatch[0].rm_eo = len-1; - r = regexec(®exv[i], buf, 1, pmatch, REG_STARTEND); - if (r == f_match) { - /* - * XX gnu grep allows both -w and -x; - * XX but seems bizarre. sometimes -w seems - * XX to override, at other times, not. - * XX Need to figure that out. - * XX It seems logical to go with the most - * XX restrictive argument: -x, as -x is - * XX a boundary case of -w anyhow. - */ - if (f_xmatch) { - if (pmatch[0].rm_so != 0 || - pmatch[0].rm_eo != len-1) - continue; - } else if (f_wmatch) { - so = pmatch[0].rm_so; - eo = pmatch[0].rm_eo; - if (!((so == 0 || !isword(buf[so-1])) && - (eo == len || !isword(buf[eo])))) - continue; - } - match = 1; - break; - } - /* XX test for regexec() errors ?? */ - } - if (match) { -printmatch: - c++; - if (f_fnameonly || f_quiet) - break; - if (f_countonly) - continue; - if (f_multifile && !f_nofname) - printf("%s:", fname); - if (f_lineno) - printf("%d:", n); - if (f_bytecount) - printf("%lu:", (unsigned long)b); - fwrite(buf, len, 1, stdout); - } - /* save position in stream before next line */ - b += len; - } - - if (!buf && ferror(fr)) { - warn("%s", fname); - f_error = 1; - /* - * XX or do we spit out what result we did have? - */ - } else if (!f_quiet) { - /* - * XX test -c and -l together: gnu grep - * XX allows (although ugly), do others? - */ - if (f_countonly) { - if (f_multifile) - printf("%s:", fname); - printf("%d\n", c); - } - if (c && f_fnameonly) { - fputs(fname, stdout); - if (f_zerobyte) - fputc('\0', stdout); - else - fputc('\n', stdout); - } - } - - if (fr != stdin) - fclose(fr); - - return c; -} - |