From 7d55775e260f0840640d21e14a2856e6fb835ee1 Mon Sep 17 00:00:00 2001 From: Ted Unangst Date: Mon, 23 Jun 2003 00:55:10 +0000 Subject: better detetection/handling of binary files. make -a do the right thing, doc and implement -U, -I. add --help and --mmap for compatibility. some other minor fixes. some from NetBSD. ok deraadt@ --- usr.bin/grep/binary.c | 4 ++-- usr.bin/grep/grep.1 | 10 ++++++--- usr.bin/grep/grep.c | 60 +++++++++++++++++++++++++++++++++++++-------------- usr.bin/grep/grep.h | 9 ++++++-- usr.bin/grep/util.c | 21 ++++++++++++------ 5 files changed, 75 insertions(+), 29 deletions(-) diff --git a/usr.bin/grep/binary.c b/usr.bin/grep/binary.c index 360d142f4bd..e8ff9188fd8 100644 --- a/usr.bin/grep/binary.c +++ b/usr.bin/grep/binary.c @@ -1,4 +1,4 @@ -/* $OpenBSD: binary.c,v 1.4 2003/06/22 22:38:50 deraadt Exp $ */ +/* $OpenBSD: binary.c,v 1.5 2003/06/23 00:55:09 tedu Exp $ */ /*- * Copyright (c) 1999 James Howard and Dag-Erling Coïdan Smørgrav @@ -83,7 +83,7 @@ mmbin_file(mmf_t *f) /* XXX knows too much about mmf internals */ for (i = 0; i < BUFFER_SIZE && i < f->len; i++) - if (!isprint(f->base[i])) + if (!isprint(f->base[i]) && !isspace(f->base[i])) return 1; mmrewind(f); return 0; diff --git a/usr.bin/grep/grep.1 b/usr.bin/grep/grep.1 index bd619f834c7..1e7108942cb 100644 --- a/usr.bin/grep/grep.1 +++ b/usr.bin/grep/grep.1 @@ -1,4 +1,4 @@ -.\" $OpenBSD: grep.1,v 1.9 2003/06/22 22:42:00 millert Exp $ +.\" $OpenBSD: grep.1,v 1.10 2003/06/23 00:55:09 tedu Exp $ .\" Copyright (c) 1980, 1990, 1993 .\" The Regents of the University of California. All rights reserved. .\" @@ -37,7 +37,7 @@ .Sh SYNOPSIS .Nm grep .Op Fl AB Ar num -.Op Fl CEFGHLPRSVZabchilnoqsvwx +.Op Fl CEFGHILPRSUVZabchilnoqsvwx .Op Fl e Ar pattern .Op Fl f Ar file .Op Ar @@ -122,6 +122,8 @@ If .Fl R is specified, follow symbolic links only if they were explictly listed on the command line. +.It Fl I +Ignore binary files. .It Fl L Only the names of files not containing selected lines are written to standard output. @@ -139,6 +141,8 @@ Recursively search subdirectories listed. If .Fl R is specified, all symbolic links are followed. +.It Fl U +Search binary files, but do not attempt to print them. .It Fl V Display version information. .It Fl Z @@ -147,7 +151,7 @@ Force to behave as .Nm zgrep . .It Fl a -Do not search in binary files. +Treat all files as text. .It Fl b The block number on the disk in which a matched pattern is located is displayed in front of the respective matched line. diff --git a/usr.bin/grep/grep.c b/usr.bin/grep/grep.c index a955223851c..f644f97dd3c 100644 --- a/usr.bin/grep/grep.c +++ b/usr.bin/grep/grep.c @@ -1,4 +1,4 @@ -/* $OpenBSD: grep.c,v 1.8 2003/06/22 23:51:22 tedu Exp $ */ +/* $OpenBSD: grep.c,v 1.9 2003/06/23 00:55:09 tedu Exp $ */ /*- * Copyright (c) 1999 James Howard and Dag-Erling Coïdan Smørgrav @@ -27,6 +27,7 @@ */ #include +#include #include #include @@ -67,7 +68,6 @@ int Vflag; /* -V: display version information */ #ifndef NOZ int Zflag; /* -Z: decompress input before processing */ #endif -int aflag; /* -a: only search ascii files */ int bflag; /* -b: show block numbers for each match */ int cflag; /* -c: only show a count of matching lines */ int hflag; /* -h: don't print filename headers */ @@ -81,6 +81,14 @@ int vflag; /* -v: only show non-matching lines */ int wflag; /* -w: pattern must start and end on word boundaries */ int xflag; /* -x: pattern must match entire line */ +int binbehave = BIN_FILE_BIN; + +enum { + BIN_OPT = CHAR_MAX + 1, + HELP_OPT, + MMAP_OPT +}; + /* Housekeeping */ int first; /* flag whether or not this is our fist match */ int tail; /* lines left to print */ @@ -93,29 +101,35 @@ usage(void) { fprintf(stderr, #ifdef NOZ - "usage: %s [-[AB] num] [-CEFGHLPRSVabchilnoqsvwx]" + "usage: %s [-[AB] num] [-CEFGHILPRSUVabchilnoqsvwx]" #else - "usage: %s [-[AB] num] [-CEFGHLPRSVZabchilnoqsvwx]" + "usage: %s [-[AB] num] [-CEFGHILPRSUVZabchilnoqsvwx]" #endif " [-e pattern] [-f file] [file ...]\n", __progname); exit(2); } #ifdef NOZ -static char *optstr = "0123456789A:B:CEFGHLPSRUVabce:f:hilnoqrsuvwxy"; +static char *optstr = "0123456789A:B:CEFGHILPSRUVabce:f:hilnoqrsuvwxy"; #else -static char *optstr = "0123456789A:B:CEFGHLPSRUVZabce:f:hilnoqrsuvwxy"; +static char *optstr = "0123456789A:B:CEFGHILPSRUVZabce:f:hilnoqrsuvwxy"; #endif struct option long_options[] = { - {"basic-regexp", no_argument, NULL, 'G'}, - {"extended-regexp", no_argument, NULL, 'E'}, - {"fixed-strings", no_argument, NULL, 'F'}, + {"binary-files", required_argument, NULL, BIN_OPT}, + {"help", no_argument, NULL, HELP_OPT}, + {"mmap", no_argument, NULL, MMAP_OPT}, {"after-context", required_argument, NULL, 'A'}, {"before-context", required_argument, NULL, 'B'}, {"context", optional_argument, NULL, 'C'}, + {"devices", required_argument, NULL, 'D'}, + {"extended-regexp", no_argument, NULL, 'E'}, + {"fixed-strings", no_argument, NULL, 'F'}, + {"basic-regexp", no_argument, NULL, 'G'}, + {"binary", no_argument, NULL, 'U'}, {"version", no_argument, NULL, 'V'}, + {"text", no_argument, NULL, 'a'}, {"byte-offset", no_argument, NULL, 'b'}, {"count", no_argument, NULL, 'c'}, {"regexp", required_argument, NULL, 'e'}, @@ -129,11 +143,9 @@ struct option long_options[] = {"silent", no_argument, NULL, 'q'}, {"recursive", no_argument, NULL, 'r'}, {"no-messages", no_argument, NULL, 's'}, - {"text", no_argument, NULL, 'a'}, {"revert-match", no_argument, NULL, 'v'}, {"word-regexp", no_argument, NULL, 'w'}, {"line-regexp", no_argument, NULL, 'x'}, - {"binary", no_argument, NULL, 'U'}, {"unix-byte-offsets", no_argument, NULL, 'u'}, #ifndef NOZ {"decompress", no_argument, NULL, 'Z'}, @@ -230,6 +242,9 @@ main(int argc, char *argv[]) case 'H': Hflag++; break; + case 'I': + binbehave = BIN_FILE_SKIP; + break; case 'L': lflag = 0; Lflag = qflag = 1; @@ -246,13 +261,11 @@ main(int argc, char *argv[]) oflag++; break; case 'U': - case 'u': - /* these are here for compatability */ + binbehave = BIN_FILE_BIN; break; case 'V': fprintf(stderr, "grep version %u.%u\n", VER_MAJ, VER_MIN); - fprintf(stderr, argv[0]); - usage(); + exit(0); break; #ifndef NOZ case 'Z': @@ -260,7 +273,7 @@ main(int argc, char *argv[]) break; #endif case 'a': - aflag = 1; + binbehave = BIN_FILE_TEXT; break; case 'b': bflag = 1; @@ -308,6 +321,21 @@ main(int argc, char *argv[]) case 'x': xflag = 1; break; + case BIN_OPT: + if (strcmp("binary", optarg) == 0) + binbehave = BIN_FILE_BIN; + else if (strcmp("without-match", optarg) == 0) + binbehave = BIN_FILE_SKIP; + else if (strcmp("text", optarg) == 0) + binbehave = BIN_FILE_TEXT; + else + errx(2, "Unknown binary-files option"); + break; + case 'u': + case MMAP_OPT: + /* default, compatibility */ + break; + case HELP_OPT: default: usage(); } diff --git a/usr.bin/grep/grep.h b/usr.bin/grep/grep.h index 907f6c042d9..d9981e3808b 100644 --- a/usr.bin/grep/grep.h +++ b/usr.bin/grep/grep.h @@ -1,4 +1,4 @@ -/* $OpenBSD: grep.h,v 1.2 2003/06/22 22:38:50 deraadt Exp $ */ +/* $OpenBSD: grep.h,v 1.3 2003/06/23 00:55:09 tedu Exp $ */ /*- * Copyright (c) 1999 James Howard and Dag-Erling Coïdan Smørgrav @@ -35,6 +35,10 @@ #define VER_MAJ 0 #define VER_MIN 9 +#define BIN_FILE_BIN 0 +#define BIN_FILE_SKIP 1 +#define BIN_FILE_TEXT 2 + typedef struct { size_t len; int line_no; @@ -48,8 +52,9 @@ extern int cflags, eflags; /* Command line flags */ extern int Aflag, Bflag, Hflag, Lflag, Pflag, Sflag, Rflag, Zflag, - aflag, bflag, cflag, hflag, lflag, nflag, qflag, sflag, + bflag, cflag, hflag, lflag, nflag, qflag, sflag, vflag, wflag, xflag; +extern int binbehave; extern int first, lead, matchall, patterns, tail; extern char **pattern; diff --git a/usr.bin/grep/util.c b/usr.bin/grep/util.c index 2d51301ebb4..730d39fff6d 100644 --- a/usr.bin/grep/util.c +++ b/usr.bin/grep/util.c @@ -1,4 +1,4 @@ -/* $OpenBSD: util.c,v 1.3 2003/06/22 22:38:50 deraadt Exp $ */ +/* $OpenBSD: util.c,v 1.4 2003/06/23 00:55:09 tedu Exp $ */ /*- * Copyright (c) 1999 James Howard and Dag-Erling Coïdan Smørgrav @@ -47,7 +47,7 @@ */ static int linesqueued; -static int procline(str_t *l); +static int procline(str_t *l, int); int grep_tree(char **argv) @@ -92,7 +92,7 @@ procfile(char *fn) { str_t ln; file_t *f; - int c, t, z; + int c, t, z, nottext; if (fn == NULL) { fn = "(standard input)"; @@ -105,7 +105,9 @@ procfile(char *fn) warn("%s", fn); return 0; } - if (aflag && grep_bin_file(f)) { + + nottext = grep_bin_file(f); + if (nottext && binbehave == BIN_FILE_SKIP) { grep_close(f); return 0; } @@ -127,7 +129,7 @@ procfile(char *fn) z = tail; - if ((t = procline(&ln)) == 0 && Bflag > 0 && z == 0) { + if ((t = procline(&ln, nottext)) == 0 && Bflag > 0 && z == 0) { enqueue(&ln); linesqueued++; } @@ -146,6 +148,10 @@ procfile(char *fn) printf("%s\n", fn); if (Lflag && c == 0) printf("%s\n", fn); + if (c && !cflag && !lflag && !Lflag && + binbehave == BIN_FILE_BIN && nottext) + printf("Binary file %s matches\n", fn); + return c; } @@ -157,7 +163,7 @@ procfile(char *fn) #define isword(x) (isalnum(x) || (x) == '_') static int -procline(str_t *l) +procline(str_t *l, int nottext) { regmatch_t pmatch; int c, i, r, t; @@ -192,6 +198,9 @@ procline(str_t *l) } print: + if (c && binbehave == BIN_FILE_BIN && nottext) + return c; /* Binary file */ + if ((tail > 0 || c) && !cflag && !qflag) { if (c) { if (first > 0 && tail == 0 && (Bflag < linesqueued) && (Aflag || Bflag)) -- cgit v1.2.3