src - OpenBSD base system

diff options


context:
space:
mode:

author	Todd C. Miller <millert@cvs.openbsd.org>	2006-07-17 16:38:03 +0000
committer	Todd C. Miller <millert@cvs.openbsd.org>	2006-07-17 16:38:03 +0000
commit	7cfd06bd2b22620766244d0230a3c02a74a76948 (patch)
tree	7f730165122e9da1e926ab469e5b1c51bb8e7fe4
parent	8c6fdeb87a7c24ef2177d320f59d8e4af845117d (diff)

Add csplit(1) from FreeBSD; with man page tweaks from jmc@.

OK deraadt@

Diffstat

-rw-r--r--

usr.bin/Makefile

-rw-r--r--

usr.bin/csplit/Makefile

-rw-r--r--

usr.bin/csplit/csplit.1

161

-rw-r--r--

usr.bin/csplit/csplit.c

469

4 files changed, 638 insertions, 3 deletions

diff --git a/usr.bin/Makefile b/usr.bin/Makefile
index 2ddccc67dde..4577abf5be6 100644
--- a/usr.bin/Makefile
+++ b/usr.bin/Makefile

@@ -1,11 +1,11 @@

-# $OpenBSD: Makefile,v 1.100 2006/04/10 16:41:10 joris Exp $

+# $OpenBSD: Makefile,v 1.101 2006/07/17 16:38:02 millert Exp $

.include <bsd.own.mk>

SUBDIR= apply apropos ar arch asa asn1_compile at aucat audioctl awk banner \

basename bc bdes \

- biff cal calendar cap_mkdb cdio checknr chpass cmp col \

- colcrt colrm column comm compile_et compress cpp crontab ctags cut \

+ biff cal calendar cap_mkdb cdio checknr chpass cmp col colcrt colrm \

+ column comm compile_et compress cpp crontab csplit ctags cut \

dc deroff diff diff3 dirname du elf2olf encrypt env expand false file \

file2c find fgen finger fmt fold from fsplit fstat ftp gencat getcap \

getconf getent getopt gprof grep gzsig head hexdump id indent \

diff --git a/usr.bin/csplit/Makefile b/usr.bin/csplit/Makefile
new file mode 100644
index 00000000000..2ae70ef226c
--- /dev/null
+++ b/usr.bin/csplit/Makefile

@@ -0,0 +1,5 @@

+# $OpenBSD: Makefile,v 1.1 2006/07/17 16:38:02 millert Exp $

+PROG= csplit

+.include <bsd.prog.mk>

diff --git a/usr.bin/csplit/csplit.1 b/usr.bin/csplit/csplit.1
new file mode 100644
index 00000000000..f55cf993b3c
--- /dev/null
+++ b/usr.bin/csplit/csplit.1

@@ -0,0 +1,161 @@

+.\" $OpenBSD: csplit.1,v 1.1 2006/07/17 16:38:02 millert Exp $

+.\"

+.\" Redistribution and use in source and binary forms, with or without

+.\" modification, are permitted provided that the following conditions

+.\" are met:

+.\" 1. Redistributions of source code must retain the above copyright

+.\" notice, this list of conditions and the following disclaimer.

+.\" 2. Redistributions in binary form must reproduce the above copyright

+.\" notice, this list of conditions and the following disclaimer in the

+.\" documentation and/or other materials provided with the distribution.

+.\"

+.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND

+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE

+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE

+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE

+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL

+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS

+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)

+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT

+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY

+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF

+.\" SUCH DAMAGE.

+.\"

+.\" $FreeBSD: src/usr.bin/csplit/csplit.1,v 1.11 2005/01/25 22:29:51 tjr Exp $

+.\"

+.Dd January 26, 2005

+.Dt CSPLIT 1

+.Os

+.Sh NAME

+.Nm csplit

+.Nd split files based on context

+.Sh SYNOPSIS

+.Nm

+.Op Fl ks

+.Op Fl f Ar prefix

+.Op Fl n Ar number

+.Ar file args ...

+.Sh DESCRIPTION

+The

+.Nm

+utility splits

+.Ar file

+into pieces using the patterns

+.Ar args .

+If

+.Ar file

+is

+a dash

+.Pq Sq - ,

+.Nm

+reads from standard input.

+.Pp

+The options are as follows:

+.Bl -tag -width indent

+.It Fl f Ar prefix

+Give created files names beginning with

+.Ar prefix .

+The default is

+.Dq xx .

+.It Fl k

+Do not remove output files if an error occurs or a

+.Dv HUP ,

+.Dv INT ,

+or

+.Dv TERM

+signal is received.

+.It Fl n Ar number

+Use

+.Ar number

+of decimal digits after the

+.Ar prefix

+to form the file name.

+The default is 2.

+.It Fl s

+Do not write the size of each output file to standard output as it is

+created.

+.El

+.Pp

+The

+.Ar args

+operands may be a combination of the following patterns:

+.Bl -tag -width indent

+.It Xo

+.Sm off

+.No / Ar regexp No /

+.Op Oo Cm + | - Oc Ar offset

+.Sm on

+.Xc

+Create a file containing the input from the current line to (but not including)

+the next line matching the given basic regular expression.

+An optional

+.Ar offset

+from the line that matched may be specified.

+.It Xo

+.Sm off

+.No % Ar regexp No %

+.Op Oo Cm + | - Oc Ar offset

+.Sm on

+.Xc

+Same as above but a file is not created for the output.

+.It Ar line_no

+Create containing the input from the current line to (but not including)

+the specified line number.

+.It { Ns Ar num Ns }

+Repeat the previous pattern the specified number of times.

+If it follows a line number pattern, a new file will be created for each

+.Ar line_no

+lines,

+.Ar num

+times.

+The first line of the file is line number 1 for historic reasons.

+.El

+.Pp

+After all the patterns have been processed, the remaining input data

+(if there is any) will be written to a new file.

+.Pp

+Requesting to split at a line before the current line number or past the

+end of the file will result in an error.

+.Pp

+.Ex -std

+.Sh ENVIRONMENT

+The

+.Ev LANG , LC_ALL , LC_COLLATE ,

+and

+.Ev LC_CTYPE

+environment variables affect the execution of

+.Nm

+as described in

+.Xr environ 7 .

+.Sh EXAMPLES

+Split the

+.Xr mdoc 7

+file

+.Pa foo.1

+into one file for each section (up to 20):

+.Pp

+.Dl "csplit -k foo.1 '%^\e.Sh%' '/^\e.Sh/' '{20}'"

+.Pp

+Split standard input after the first 99 lines and every 100 lines thereafter:

+.Pp

+.Dl "csplit -k - 100 '{19}'"

+.Sh SEE ALSO

+.Xr sed 1 ,

+.Xr split 1 ,

+.Xr re_format 7

+.Sh STANDARDS

+The

+.Nm

+utility conforms to

+.St -p1003.1-2004 .

+.Sh HISTORY

+.Nm

+command appeared in PWB UNIX.

+.Sh BUGS

+Input lines are limited to

+.Dv LINE_MAX

+(2048) bytes in length.

diff --git a/usr.bin/csplit/csplit.c b/usr.bin/csplit/csplit.c
new file mode 100644
index 00000000000..9c257f836e7
--- /dev/null
+++ b/usr.bin/csplit/csplit.c

@@ -0,0 +1,469 @@

+/* $OpenBSD: csplit.c,v 1.1 2006/07/17 16:38:02 millert Exp $ */

+/* $FreeBSD: src/usr.bin/csplit/csplit.c,v 1.9 2004/03/22 11:15:03 tjr Exp $ */

+/*-

+ *

+ * Redistribution and use in source and binary forms, with or without

+ * modification, are permitted provided that the following conditions

+ * are met:

+ * 1. Redistributions of source code must retain the above copyright

+ * notice, this list of conditions and the following disclaimer.

+ * 2. Redistributions in binary form must reproduce the above copyright

+ * notice, this list of conditions and the following disclaimer in the

+ * documentation and/or other materials provided with the distribution.

+ *

+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND

+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE

+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE

+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE

+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL

+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS

+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)

+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT

+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY

+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF

+ * SUCH DAMAGE.

+ */

+/*

+ * csplit -- split files based on context

+ *

+ * This utility splits its input into numbered output files by line number

+ * or by a regular expression. Regular expression matches have an optional

+ * offset with them, allowing the split to occur a specified number of

+ * lines before or after the match.

+ *

+ * To handle negative offsets, we stop reading when the match occurs and

+ * store the offset that the file should have been split at, then use

+ * this output file as input until all the "overflowed" lines have been read.

+ * The file is then closed and truncated to the correct length.

+ *

+ * We assume that the output files can be seeked upon (ie. they cannot be

+ * symlinks to named pipes or character devices), but make no such

+ * assumption about the input.

+ */

+#include <sys/types.h>

+#include <ctype.h>

+#include <err.h>

+#include <errno.h>

+#include <limits.h>

+#include <locale.h>

+#include <regex.h>

+#include <signal.h>

+#include <stdint.h>

+#include <stdio.h>

+#include <stdlib.h>

+#include <string.h>

+#include <unistd.h>

+void cleanup(void);

+void do_lineno(const char *);

+void do_rexp(const char *);

+char *getline(void);

+void handlesig(int);

+FILE *newfile(void);

+void toomuch(FILE *, long);

+void usage(void);

+/*

+ * Command line options

+ */

+const char *prefix; /* File name prefix */

+long sufflen; /* Number of decimal digits for suffix */

+int sflag; /* Suppress output of file names */

+int kflag; /* Keep output if error occurs */

+/*

+ * Other miscellaneous globals (XXX too many)

+ */

+long lineno; /* Current line number in input file */

+long reps; /* Number of repetitions for this pattern */

+long nfiles; /* Number of files output so far */

+long maxfiles; /* Maximum number of files we can create */

+char currfile[PATH_MAX]; /* Current output file */

+const char *infn; /* Name of the input file */

+FILE *infile; /* Input file handle */

+FILE *overfile; /* Overflow file for toomuch() */

+off_t truncofs; /* Offset this file should be truncated at */

+int doclean; /* Should cleanup() remove output? */

+int

+main(int argc, char *argv[])

+ struct sigaction sa;

+ long i;

+ int ch;

+ const char *expr;

+ char *ep, *p;

+ FILE *ofp;

+ setlocale(LC_ALL, "");

+ kflag = sflag = 0;

+ prefix = "xx";

+ sufflen = 2;

+ while ((ch = getopt(argc, argv, "f:kn:s")) != -1) {

+ switch (ch) {

+ case 'f':

+ prefix = optarg;

+ break;

+ case 'k':

+ kflag = 1;

+ break;

+ case 'n':

+ errno = 0;

+ sufflen = strtol(optarg, &ep, 10);

+ if (sufflen <= 0 || *ep != '\0' || errno != 0)

+ errx(1, "%s: bad suffix length", optarg);

+ break;

+ case 's':

+ sflag = 1;

+ break;

+ default:

+ usage();

+ /*NOTREACHED*/

+ }

+ if (sufflen + strlen(prefix) >= PATH_MAX)

+ errx(1, "name too long");

+ argc -= optind;

+ argv += optind;

+ if ((infn = *argv++) == NULL)

+ usage();

+ if (strcmp(infn, "-") == 0) {

+ infile = stdin;

+ infn = "stdin";

+ } else if ((infile = fopen(infn, "r")) == NULL)

+ err(1, "%s", infn);

+ if (!kflag) {

+ doclean = 1;

+ atexit(cleanup);

+ sa.sa_flags = 0;

+ sa.sa_handler = handlesig;

+ sigemptyset(&sa.sa_mask);

+ sigaddset(&sa.sa_mask, SIGHUP);

+ sigaddset(&sa.sa_mask, SIGINT);

+ sigaddset(&sa.sa_mask, SIGTERM);

+ sigaction(SIGHUP, &sa, NULL);

+ sigaction(SIGINT, &sa, NULL);

+ sigaction(SIGTERM, &sa, NULL);

+ }

+ lineno = 0;

+ nfiles = 0;

+ truncofs = 0;

+ overfile = NULL;

+ /* Ensure 10^sufflen < LONG_MAX. */

+ for (maxfiles = 1, i = 0; i < sufflen; i++) {

+ if (maxfiles > LONG_MAX / 10)

+ errx(1, "%ld: suffix too long (limit %ld)",

+ sufflen, i);

+ maxfiles *= 10;

+ }

+ /* Create files based on supplied patterns. */

+ while (nfiles < maxfiles - 1 && (expr = *argv++) != NULL) {

+ /* Look ahead & see if this pattern has any repetitions. */

+ if (*argv != NULL && **argv == '{') {

+ errno = 0;

+ reps = strtol(*argv + 1, &ep, 10);

+ if (reps < 0 || *ep != '}' || errno != 0)

+ errx(1, "%s: bad repetition count", *argv + 1);

+ argv++;

+ } else

+ reps = 0;

+ if (*expr == '/' || *expr == '%') {

+ do {

+ do_rexp(expr);

+ } while (reps-- != 0 && nfiles < maxfiles - 1);

+ } else if (isdigit((unsigned char)*expr))

+ do_lineno(expr);

+ else

+ errx(1, "%s: unrecognised pattern", expr);

+ }

+ /* Copy the rest into a new file. */

+ if (!feof(infile)) {

+ ofp = newfile();

+ while ((p = getline()) != NULL && fputs(p, ofp) == 0)

+ ;

+ if (!sflag)

+ printf("%jd\n", (intmax_t)ftello(ofp));

+ if (fclose(ofp) != 0)

+ err(1, "%s", currfile);

+ }

+ toomuch(NULL, 0);

+ doclean = 0;

+ return (0);

+void

+usage(void)

+ extern char *__progname;

+ fprintf(stderr,

+ "usage: %s [-ks] [-f prefix] [-n number] file args ...\n",

+ __progname);

+ exit(1);

+void

+handlesig(int sig)

+ const char msg[] = "csplit: caught signal, cleaning up\n";

+ write(STDERR_FILENO, msg, sizeof(msg) - 1);

+ cleanup();

+ _exit(2);

+/* Create a new output file. */

+FILE *

+newfile(void)

+ FILE *fp;

+ if ((size_t)snprintf(currfile, sizeof(currfile), "%s%0*ld", prefix,

+ (int)sufflen, nfiles) >= sizeof(currfile))

+ errx(1, "%s: %s", currfile, strerror(ENAMETOOLONG));

+ if ((fp = fopen(currfile, "w+")) == NULL)

+ err(1, "%s", currfile);

+ nfiles++;

+ return (fp);

+/* Remove partial output, called before exiting. */

+void

+cleanup(void)

+ char fnbuf[PATH_MAX];

+ long i;

+ if (!doclean)

+ return;

+ /*

+ * NOTE: One cannot portably assume to be able to call snprintf()

+ * from inside a signal handler. It does, however, appear to be safe

+ * to do on FreeBSD. The solution to this problem is worse than the

+ * problem itself.

+ */

+ for (i = 0; i < nfiles; i++) {

+ snprintf(fnbuf, sizeof(fnbuf), "%s%0*ld", prefix,

+ (int)sufflen, i);

+ unlink(fnbuf);

+ }

+/* Read a line from the input into a static buffer. */

+char *

+getline(void)

+ static char lbuf[LINE_MAX];

+ FILE *src;

+ src = overfile != NULL ? overfile : infile;

+again: if (fgets(lbuf, sizeof(lbuf), src) == NULL) {

+ if (src == overfile) {

+ src = infile;

+ goto again;

+ }

+ return (NULL);

+ }

+ if (ferror(src))

+ err(1, "%s", infn);

+ lineno++;

+ return (lbuf);

+/* Conceptually rewind the input (as obtained by getline()) back `n' lines. */

+void

+toomuch(FILE *ofp, long n)

+ char buf[BUFSIZ];

+ size_t i, nread;

+ if (overfile != NULL) {

+ /*

+ * Truncate the previous file we overflowed into back to

+ * the correct length, close it.

+ */

+ if (fflush(overfile) != 0)

+ err(1, "overflow");

+ if (ftruncate(fileno(overfile), truncofs) != 0)

+ err(1, "overflow");

+ if (fclose(overfile) != 0)

+ err(1, "overflow");

+ overfile = NULL;

+ }

+ if (n == 0)

+ /* Just tidying up */

+ return;

+ lineno -= n;

+ /*

+ * Wind the overflow file backwards to `n' lines before the

+ * current one.

+ */

+ do {

+ if (ftello(ofp) < (off_t)sizeof(buf))

+ rewind(ofp);

+ else

+ fseeko(ofp, -(off_t)sizeof(buf), SEEK_CUR);

+ if (ferror(ofp))

+ errx(1, "%s: can't seek", currfile);

+ if ((nread = fread(buf, 1, sizeof(buf), ofp)) == 0)

+ errx(1, "can't read overflowed output");

+ if (fseeko(ofp, -(off_t)nread, SEEK_CUR) != 0)

+ err(1, "%s", currfile);

+ for (i = 1; i <= nread; i++)

+ if (buf[nread - i] == '\n' && n-- == 0)

+ break;

+ if (ftello(ofp) == 0)

+ break;

+ } while (n > 0);

+ if (fseeko(ofp, (off_t)(nread - i + 1), SEEK_CUR) != 0)

+ err(1, "%s", currfile);

+ /*

+ * getline() will read from here. Next call will truncate to

+ * truncofs in this file.

+ */

+ overfile = ofp;

+ truncofs = ftello(overfile);

+/* Handle splits for /regexp/ and %regexp% patterns. */

+void

+do_rexp(const char *expr)

+ regex_t cre;

+ intmax_t nwritten;

+ long ofs;

+ int first;

+ char *ecopy, *ep, *p, *pofs, *re;

+ FILE *ofp;

+ if ((ecopy = strdup(expr)) == NULL)

+ err(1, "strdup");

+ re = ecopy + 1;

+ if ((pofs = strrchr(ecopy, *expr)) == NULL || pofs[-1] == '\\')

+ errx(1, "%s: missing trailing %c", expr, *expr);

+ *pofs++ = '\0';

+ if (*pofs != '\0') {

+ errno = 0;

+ ofs = strtol(pofs, &ep, 10);

+ if (*ep != '\0' || errno != 0)

+ errx(1, "%s: bad offset", pofs);

+ } else

+ ofs = 0;

+ if (regcomp(&cre, re, REG_BASIC|REG_NOSUB) != 0)

+ errx(1, "%s: bad regular expression", re);

+ if (*expr == '/')

+ /* /regexp/: Save results to a file. */

+ ofp = newfile();

+ else {

+ /* %regexp%: Make a temporary file for overflow. */

+ if ((ofp = tmpfile()) == NULL)

+ err(1, "tmpfile");

+ }

+ /* Read and output lines until we get a match. */

+ first = 1;

+ while ((p = getline()) != NULL) {

+ if (fputs(p, ofp) != 0)

+ break;

+ if (!first && regexec(&cre, p, 0, NULL, 0) == 0)

+ break;

+ first = 0;

+ }

+ if (p == NULL)

+ errx(1, "%s: no match", re);

+ if (ofs <= 0) {

+ /*

+ * Negative (or zero) offset: throw back any lines we should

+ * not have read yet.

+ */

+ if (p != NULL) {

+ toomuch(ofp, -ofs + 1);

+ nwritten = (intmax_t)truncofs;

+ } else

+ nwritten = (intmax_t)ftello(ofp);

+ } else {

+ /*

+ * Positive offset: copy the requested number of lines

+ * after the match.

+ */

+ while (--ofs > 0 && (p = getline()) != NULL)

+ fputs(p, ofp);

+ toomuch(NULL, 0);

+ nwritten = (intmax_t)ftello(ofp);

+ if (fclose(ofp) != 0)

+ err(1, "%s", currfile);

+ }

+ if (!sflag && *expr == '/')

+ printf("%jd\n", nwritten);

+ regfree(&cre);

+ free(ecopy);

+/* Handle splits based on line number. */

+void

+do_lineno(const char *expr)

+ long lastline, tgtline;

+ char *ep, *p;

+ FILE *ofp;

+ errno = 0;

+ tgtline = strtol(expr, &ep, 10);

+ if (tgtline <= 0 || errno != 0 || *ep != '\0')

+ errx(1, "%s: bad line number", expr);

+ lastline = tgtline;

+ if (lastline <= lineno)

+ errx(1, "%s: can't go backwards", expr);

+ while (nfiles < maxfiles - 1) {

+ ofp = newfile();

+ while (lineno + 1 != lastline) {

+ if ((p = getline()) == NULL)

+ errx(1, "%ld: out of range", lastline);

+ if (fputs(p, ofp) != 0)

+ break;

+ }

+ if (!sflag)

+ printf("%jd\n", (intmax_t)ftello(ofp));

+ if (fclose(ofp) != 0)

+ err(1, "%s", currfile);

+ if (reps-- == 0)

+ break;

+ lastline += tgtline;

+ }