diff options
author | Todd C. Miller <millert@cvs.openbsd.org> | 1999-02-04 03:53:49 +0000 |
---|---|---|
committer | Todd C. Miller <millert@cvs.openbsd.org> | 1999-02-04 03:53:49 +0000 |
commit | 986e4e95b9c5e0790b3a4a9918f01cb58fa5bd7b (patch) | |
tree | 52057a4fbc6e6aad43522533fec32553974cc7c4 /usr.bin | |
parent | ba8a47c2925c4499697da08dee3a471390263ed0 (diff) |
From FreeBSD:
Add new option '-p pattern' for splitting files based on matching
lines in the file with a regular expression. Useful for e.g.
'cvs diff' output. Also compile cleanly with -Wall and use
defines from <sysexits.h>.
Diffstat (limited to 'usr.bin')
-rw-r--r-- | usr.bin/split/split.1 | 17 | ||||
-rw-r--r-- | usr.bin/split/split.c | 139 |
2 files changed, 100 insertions, 56 deletions
diff --git a/usr.bin/split/split.1 b/usr.bin/split/split.1 index 0e59fb11795..c2d0022974c 100644 --- a/usr.bin/split/split.1 +++ b/usr.bin/split/split.1 @@ -1,4 +1,4 @@ -.\" $OpenBSD: split.1,v 1.2 1996/06/26 05:39:28 deraadt Exp $ +.\" $OpenBSD: split.1,v 1.3 1999/02/04 03:53:48 millert Exp $ .\" $NetBSD: split.1,v 1.5 1994/12/21 08:20:35 jtc Exp $ .\" .\" Copyright (c) 1990, 1991, 1993, 1994 @@ -44,6 +44,7 @@ .Nm split .Op Fl b Ar byte_count[k|m] .Op Fl l Ar line_count +.Op Fl p Ar pattern .Op Ar file Op Ar name .Sh DESCRIPTION The @@ -73,6 +74,16 @@ megabyte pieces. Create smaller files .Ar n lines in length. +.It Fl p Ar pattern +The file is split whenever an input line matches +.Ar pattern , +which is interpreted as an extended regular expression. +The matching line will be the first line of the next output file. +This option is incompatible with the +.Fl b +and +.Fl l +options. .El .Pp If additional arguments are specified, the first is used as the name @@ -95,6 +106,10 @@ For historical reasons, if you specify can only create 676 separate files. The default naming convention allows 2028 separate files. +.Pp +The maximum line length for matching patterns is 65536. +.Sh SEE ALSO +.Xr re_format 7 . .Sh HISTORY A .Nm split diff --git a/usr.bin/split/split.c b/usr.bin/split/split.c index 46ccad9999b..f7b47eb8b6f 100644 --- a/usr.bin/split/split.c +++ b/usr.bin/split/split.c @@ -1,4 +1,4 @@ -/* $OpenBSD: split.c,v 1.3 1997/01/15 23:43:14 millert Exp $ */ +/* $OpenBSD: split.c,v 1.4 1999/02/04 03:53:48 millert Exp $ */ /* $NetBSD: split.c,v 1.5 1995/08/31 22:22:05 jtc Exp $ */ /* @@ -43,11 +43,13 @@ static char copyright[] = #ifndef lint #if 0 static char sccsid[] = "@(#)split.c 8.3 (Berkeley) 4/25/94"; +#else +static char rcsid[] = "$OpenBSD: split.c,v 1.4 1999/02/04 03:53:48 millert Exp $"; #endif -static char rcsid[] = "$OpenBSD: split.c,v 1.3 1997/01/15 23:43:14 millert Exp $"; #endif /* not lint */ #include <sys/param.h> +#include <sys/types.h> #include <ctype.h> #include <err.h> @@ -56,6 +58,8 @@ static char rcsid[] = "$OpenBSD: split.c,v 1.3 1997/01/15 23:43:14 millert Exp $ #include <stdlib.h> #include <string.h> #include <unistd.h> +#include <regex.h> +#include <sysexits.h> #define DEFLINE 1000 /* Default num lines per file. */ @@ -65,6 +69,8 @@ int file_open; /* If a file open. */ int ifd = -1, ofd = -1; /* Input/output file descriptors. */ char bfr[MAXBSIZE]; /* I/O buffer. */ char fname[MAXPATHLEN]; /* File name prefix. */ +regex_t rgx; +int pflag; void newfile __P((void)); void split1 __P((void)); @@ -79,7 +85,7 @@ main(argc, argv) int ch; char *ep, *p; - while ((ch = getopt(argc, argv, "-0123456789b:l:")) != -1) + while ((ch = getopt(argc, argv, "-0123456789b:l:p:")) != -1) switch (ch) { case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': @@ -95,8 +101,8 @@ main(argc, argv) numlines = strtol(argv[optind] + 1, &ep, 10); if (numlines <= 0 || *ep) - errx(1, - "%s: illegal line count.", optarg); + errx(EX_USAGE, + "%s: illegal line count", optarg); } break; case '-': /* Undocumented: historic stdin flag. */ @@ -106,18 +112,25 @@ main(argc, argv) break; case 'b': /* Byte count. */ if ((bytecnt = strtol(optarg, &ep, 10)) <= 0 || - *ep != '\0' && *ep != 'k' && *ep != 'm') - errx(1, "%s: illegal byte count.", optarg); + (*ep != '\0' && *ep != 'k' && *ep != 'm')) + errx(EX_USAGE, + "%s: illegal byte count", optarg); if (*ep == 'k') bytecnt *= 1024; else if (*ep == 'm') bytecnt *= 1048576; break; + case 'p' : /* pattern matching. */ + if (regcomp(&rgx, optarg, REG_EXTENDED|REG_NOSUB) != 0) + errx(EX_USAGE, "%s: illegal regexp", optarg); + pflag = 1; + break; case 'l': /* Line count. */ if (numlines != 0) usage(); if ((numlines = strtol(optarg, &ep, 10)) <= 0 || *ep) - errx(1, "%s: illegal line count.", optarg); + errx(EX_USAGE, + "%s: illegal line count", optarg); break; default: usage(); @@ -128,7 +141,7 @@ main(argc, argv) if (*argv != NULL) if (ifd == -1) { /* Input file. */ if ((ifd = open(*argv, O_RDONLY, 0)) < 0) - err(1, "%s", *argv); + err(EX_NOINPUT, "%s", *argv); ++argv; } if (*argv != NULL) /* File name prefix. */ @@ -136,9 +149,12 @@ main(argc, argv) if (*argv != NULL) usage(); + if (pflag && (numlines != 0 || bytecnt != 0)) + usage(); + if (numlines == 0) numlines = DEFLINE; - else if (bytecnt) + else if (bytecnt != 0) usage(); if (ifd == -1) /* Stdin by default. */ @@ -149,6 +165,8 @@ main(argc, argv) exit (0); } split2(); + if (pflag) + regfree(&rgx); exit(0); } @@ -164,40 +182,38 @@ split1() char *C; for (bcnt = 0;;) - switch (len = read(ifd, bfr, MAXBSIZE)) { + switch ((len = read(ifd, bfr, MAXBSIZE))) { case 0: exit(0); case -1: - err(1, "read"); + err(EX_IOERR, "read"); /* NOTREACHED */ default: - if (!file_open) { + if (!file_open) newfile(); - file_open = 1; - } if (bcnt + len >= bytecnt) { dist = bytecnt - bcnt; if (write(ofd, bfr, dist) != dist) - err(1, "write"); + err(EX_IOERR, "write"); len -= dist; for (C = bfr + dist; len >= bytecnt; len -= bytecnt, C += bytecnt) { newfile(); if (write(ofd, C, (int)bytecnt) != bytecnt) - err(1, "write"); + err(EX_IOERR, "write"); } - if (len) { + if (len != 0) { newfile(); if (write(ofd, C, len) != len) - err(1, "write"); + err(EX_IOERR, "write"); } else file_open = 0; bcnt = len; } else { bcnt += len; if (write(ofd, bfr, len) != len) - err(1, "write"); + err(EX_IOERR, "write"); } } } @@ -209,40 +225,49 @@ split1() void split2() { - long lcnt; - int len, bcnt; - char *Ce, *Cs; + long lcnt = 0; + FILE *infp; - for (lcnt = 0;;) - switch (len = read(ifd, bfr, MAXBSIZE)) { - case 0: - exit(0); - case -1: - err(1, "read"); - /* NOTREACHED */ - default: - if (!file_open) { + /* Stick a stream on top of input file descriptor */ + if ((infp = fdopen(ifd, "r")) == NULL) + err(EX_NOINPUT, "fdopen"); + + /* Process input one line at a time */ + while (fgets(bfr, sizeof(bfr), infp) != NULL) { + const int len = strlen(bfr); + + /* If line is too long to deal with, just write it out */ + if (bfr[len - 1] != '\n') + goto writeit; + + /* Check if we need to start a new file */ + if (pflag) { + regmatch_t pmatch; + + pmatch.rm_so = 0; + pmatch.rm_eo = len - 1; + if (regexec(&rgx, bfr, 0, &pmatch, REG_STARTEND) == 0) newfile(); - file_open = 1; - } - for (Cs = Ce = bfr; len--; Ce++) - if (*Ce == '\n' && ++lcnt == numlines) { - bcnt = Ce - Cs + 1; - if (write(ofd, Cs, bcnt) != bcnt) - err(1, "write"); - lcnt = 0; - Cs = Ce + 1; - if (len) - newfile(); - else - file_open = 0; - } - if (Cs < Ce) { - bcnt = Ce - Cs; - if (write(ofd, Cs, bcnt) != bcnt) - err(1, "write"); - } + } else if (lcnt++ == numlines) { + newfile(); + lcnt = 1; } + +writeit: + /* Open output file if needed */ + if (!file_open) + newfile(); + + /* Write out line */ + if (write(ofd, bfr, len) != len) + err(EX_IOERR, "write"); + } + + /* EOF or error? */ + if (ferror(infp)) + err(EX_IOERR, "read"); + else + exit(0); } /* @@ -274,7 +299,7 @@ newfile() #define MAXFILES 676 if (fnum == MAXFILES) { if (!defname || fname[0] == 'z') - errx(1, "too many files."); + errx(EX_DATAERR, "too many files"); ++fname[0]; fnum = 0; } @@ -282,13 +307,17 @@ newfile() fpnt[1] = fnum % 26 + 'a'; ++fnum; if (!freopen(fname, "w", stdout)) - err(1, "%s", fname); + err(EX_IOERR, "%s", fname); + file_open = 1; } void usage() { + extern char *__progname; + (void)fprintf(stderr, -"usage: split [-b byte_count] [-l line_count] [file [prefix]]\n"); - exit(1); +"usage: %s [-b byte_count] [-l line_count] [-p pattern] [file [prefix]]\n", +__progname); + exit(EX_USAGE); } |