summaryrefslogtreecommitdiff
path: root/usr.bin/find
diff options
context:
space:
mode:
authorPascal Stumpf <pascal@cvs.openbsd.org>2012-01-02 23:19:46 +0000
committerPascal Stumpf <pascal@cvs.openbsd.org>2012-01-02 23:19:46 +0000
commit5b13b668414d89ca70d96a584fcd549f49127a25 (patch)
treee1c79067e64f6f080d3e20aa804e5043695a8bc9 /usr.bin/find
parentdae44749145c17a6a9f82a9eed5969a87b162936 (diff)
Add support for "find ... -exec foo {} \+".
From the manpage: If terminated by a plus sign (``+''), the pathnames for which the primary is evaluated are aggregated into sets, and utility will be invoked once per set, similar to xargs(1). If any invocation exits with non-zero exit status, then find will eventually do so as well, but this does not cause find to exit early. The string ``{}'' must appear, and must appear last. Each set is limited to no more than 5,000 pathnames, and is also limited such that the invocation of utility does not exceed ARG_MAX. Code comes from NetBSD, written by John Hawkinson. Tested by eivinde at terraplane dot org and myself for quite some time. ok miod@
Diffstat (limited to 'usr.bin/find')
-rw-r--r--usr.bin/find/extern.h4
-rw-r--r--usr.bin/find/find.150
-rw-r--r--usr.bin/find/find.c44
-rw-r--r--usr.bin/find/find.h19
-rw-r--r--usr.bin/find/function.c256
5 files changed, 312 insertions, 61 deletions
diff --git a/usr.bin/find/extern.h b/usr.bin/find/extern.h
index 737436b5932..476ea0fecc4 100644
--- a/usr.bin/find/extern.h
+++ b/usr.bin/find/extern.h
@@ -1,4 +1,4 @@
-/* * $OpenBSD: extern.h,v 1.16 2011/04/21 01:14:21 jacekm Exp $*/
+/* * $OpenBSD: extern.h,v 1.17 2012/01/02 23:19:45 pascal Exp $*/
/*-
* Copyright (c) 1991, 1993
* The Regents of the University of California. All rights reserved.
@@ -37,10 +37,12 @@ void *emalloc(unsigned int);
PLAN *find_create(char ***);
int find_execute(PLAN *, char **);
PLAN *find_formplan(char **);
+int find_traverse(PLAN *, int (*)(PLAN *, void *), void *);
PLAN *not_squish(PLAN *);
OPTION *option(char *);
PLAN *or_squish(PLAN *);
PLAN *paren_squish(PLAN *);
+int plan_cleanup(PLAN *, void *);
struct stat;
void printlong(char *, char *, struct stat *);
int queryuser(char **);
diff --git a/usr.bin/find/find.1 b/usr.bin/find/find.1
index fe6c637b0e3..8cdb8569171 100644
--- a/usr.bin/find/find.1
+++ b/usr.bin/find/find.1
@@ -1,4 +1,4 @@
-.\" $OpenBSD: find.1,v 1.82 2011/02/13 12:35:02 schwarze Exp $
+.\" $OpenBSD: find.1,v 1.83 2012/01/02 23:19:45 pascal Exp $
.\" Copyright (c) 1990, 1993
.\" The Regents of the University of California. All rights reserved.
.\"
@@ -31,7 +31,7 @@
.\"
.\" from: @(#)find.1 8.1 (Berkeley) 6/6/93
.\"
-.Dd $Mdocdate: February 13 2011 $
+.Dd $Mdocdate: January 2 2012 $
.Dt FIND 1
.Os
.Sh NAME
@@ -189,28 +189,48 @@ True if the current file or directory is empty.
.Op argument ...
.No ;
.Xc
-True if the program named
-.Ar utility
-returns a zero value as its exit status.
+.It Xo
+.Ic -exec Ar utility
+.Op argument ...
+.No {}
++
+.Xc
+Execute the specified
+.Ar utility .
Optional arguments may be passed to the utility.
The expression must be terminated by a semicolon
-.Pq Ql \&; .
+.Pq Ql \&;
+or a plus sign
+.Pq Ql \&+ .
+.Pp
+If terminated by a semicolon, the
+.Ar utility
+is executed once per path.
If the string
.Qq {}
appears anywhere in the utility name or the
arguments it is replaced by the pathname of the current file.
-.Ar utility
-will be executed from the directory from which
-.Nm
-was executed.
.Pp
-Since
+If terminated by a plus sign
+.Pq Dq \&+ ,
+the pathnames for which the
+primary is evaluated are aggregated into sets, and
.Ar utility
-is executed every time a match is made,
-it is often more efficient to pipe the output of
-.Nm
-to
+will be invoked once per set, similar to
.Xr xargs 1 .
+If any invocation exits with non-zero exit status, then
+.Nm
+will eventually do so as well, but this does not cause
+.Nm
+to exit early.
+The string
+.Dq {}
+must appear, and must appear last.
+Each set is limited to no more than 5,000 pathnames,
+and is also limited such that the invocation of
+.Ar utility
+does not exceed
+.Dv ARG_MAX .
.It Xo
.Ic -execdir Ar utility
.Op argument ...
diff --git a/usr.bin/find/find.c b/usr.bin/find/find.c
index 09345aeae7c..ba8808f40fc 100644
--- a/usr.bin/find/find.c
+++ b/usr.bin/find/find.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: find.c,v 1.15 2011/04/21 01:14:21 jacekm Exp $ */
+/* $OpenBSD: find.c,v 1.16 2012/01/02 23:19:45 pascal Exp $ */
/*-
* Copyright (c) 1991, 1993
@@ -148,7 +148,7 @@ find_execute(PLAN *plan, /* search plan */
char **paths) /* array of pathnames to traverse */
{
sigset_t fullset, oset;
- int rval;
+ int r, rval;
PLAN *p;
rval = 0;
@@ -201,5 +201,45 @@ find_execute(PLAN *plan, /* search plan */
;
}
(void)fts_close(tree);
+
+ /*
+ * Cleanup any plans with leftover state.
+ * Keep the last non-zero return value.
+ */
+ if ((r = find_traverse(plan, plan_cleanup, NULL)) != 0)
+ rval = r;
return (rval);
}
+
+/*
+ * find_traverse --
+ * traverse the plan tree and execute func() on all plans. This
+ * does not evaluate each plan's eval() function; it is intended
+ * for operations that must run on all plans, such as state
+ * cleanup.
+ *
+ * If any func() returns non-zero, then so will find_traverse().
+ */
+int
+find_traverse(PLAN *plan, int (*func)(PLAN *, void *), void *arg)
+{
+ PLAN *p;
+ int r, rval;
+
+ rval = 0;
+ for (p = plan; p; p = p->next) {
+ if ((r = func(p, arg)) != 0)
+ rval = r;
+ if (p->type == N_EXPR || p->type == N_OR) {
+ if (p->p_data[0])
+ if ((r = find_traverse(p->p_data[0],
+ func, arg)) != 0)
+ rval = r;
+ if (p->p_data[1])
+ if ((r = find_traverse(p->p_data[1],
+ func, arg)) != 0)
+ rval = r;
+ }
+ }
+ return rval;
+}
diff --git a/usr.bin/find/find.h b/usr.bin/find/find.h
index dc1a4ef8679..5226a26115a 100644
--- a/usr.bin/find/find.h
+++ b/usr.bin/find/find.h
@@ -1,4 +1,4 @@
-/* * $OpenBSD: find.h,v 1.14 2004/09/15 18:43:25 deraadt Exp $*/
+/* * $OpenBSD: find.h,v 1.15 2012/01/02 23:19:45 pascal Exp $*/
/*-
* Copyright (c) 1990, 1993
* The Regents of the University of California. All rights reserved.
@@ -54,6 +54,7 @@ typedef struct _plandata {
#define F_LESSTHAN 2
#define F_GREATER 3
#define F_NEEDOK 1 /* exec ok */
+#define F_PLUSSET 2 /* -exec ... {} + */
#define F_MTFLAG 1 /* fstype */
#define F_MTTYPE 2
#define F_ATLEAST 1 /* perm */
@@ -77,6 +78,13 @@ typedef struct _plandata {
char **_e_argv; /* argv array */
char **_e_orig; /* original strings */
int *_e_len; /* allocated length */
+ char **_ep_bxp; /* ptr to 1st addt'l arg */
+ char *_ep_p; /* current buffer pointer */
+ char *_ep_bbp; /* begin buffer pointer */
+ char *_ep_ebp; /* end buffer pointer */
+ int _ep_maxargs; /* max #args */
+ int _ep_narg; /* # addt'l args */
+ int _ep_rval; /* return value */
} ex;
char *_a_data[2]; /* array of char pointers */
char *_c_data; /* char pointer */
@@ -104,6 +112,15 @@ typedef struct _plandata {
#define e_argv p_un.ex._e_argv
#define e_orig p_un.ex._e_orig
#define e_len p_un.ex._e_len
+#define ep_p p_un.ex._ep_p
+#define ep_bbp p_un.ex._ep_bbp
+#define ep_ebp p_un.ex._ep_ebp
+#define ep_bxp p_un.ex._ep_bxp
+#define ep_cnt p_un.ex._ep_cnt
+#define ep_maxargs p_un.ex._ep_maxargs
+#define ep_nline p_un.ex._ep_nline
+#define ep_narg p_un.ex._ep_narg
+#define ep_rval p_un.ex._ep_rval
typedef struct _option {
char *name; /* option name */
diff --git a/usr.bin/find/function.c b/usr.bin/find/function.c
index 0d59e06ce35..59002dcc44f 100644
--- a/usr.bin/find/function.c
+++ b/usr.bin/find/function.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: function.c,v 1.36 2010/12/01 01:20:29 millert Exp $ */
+/* $OpenBSD: function.c,v 1.37 2012/01/02 23:19:45 pascal Exp $ */
/*-
* Copyright (c) 1990, 1993
@@ -46,6 +46,7 @@
#include <fts.h>
#include <grp.h>
#include <libgen.h>
+#include <limits.h>
#include <pwd.h>
#include <stdio.h>
#include <stdlib.h>
@@ -71,6 +72,7 @@
static PLAN *palloc(enum ntype, int (*)(PLAN *, FTSENT *));
static long find_parsenum(PLAN *plan, char *option, char *vp, char *endch);
+static void run_f_exec(PLAN *plan);
static PLAN *palloc(enum ntype t, int (*f)(PLAN *, FTSENT *));
int f_amin(PLAN *, FTSENT *);
@@ -339,38 +341,108 @@ c_empty(char *ignore, char ***ignored, int unused)
/*
* [-exec | -ok] utility [arg ... ] ; functions --
+ * [-exec | -ok] utility [arg ... ] {} + functions --
*
- * True if the executed utility returns a zero value as exit status.
- * The end of the primary expression is delimited by a semicolon. If
- * "{}" occurs anywhere, it gets replaced by the current pathname.
- * The current directory for the execution of utility is the same as
- * the current directory when the find utility was started.
+ * If the end of the primary expression is delimited by a
+ * semicolon: true if the executed utility returns a zero value
+ * as exit status. If "{}" occurs anywhere, it gets replaced by
+ * the current pathname.
*
- * The primary -ok is different in that it requests affirmation of the
- * user before executing the utility.
+ * If the end of the primary expression is delimited by a plus
+ * sign: always true. Pathnames for which the primary is
+ * evaluated shall be aggregated into sets. The utility will be
+ * executed once per set, with "{}" replaced by the entire set of
+ * pathnames (as if xargs). "{}" must appear last.
+ *
+ * The current directory for the execution of utility is the same
+ * as the current directory when the find utility was started.
+ *
+ * The primary -ok is different in that it requests affirmation
+ * of the user before executing the utility.
*/
int
f_exec(PLAN *plan, FTSENT *entry)
{
- int cnt;
+ int cnt, l;
pid_t pid;
int status;
- for (cnt = 0; plan->e_argv[cnt]; ++cnt)
- if (plan->e_len[cnt])
- brace_subst(plan->e_orig[cnt], &plan->e_argv[cnt],
- entry->fts_path, plan->e_len[cnt]);
+ if (plan->flags & F_PLUSSET) {
+ /*
+ * Confirm sufficient buffer space, then copy the path
+ * to the buffer.
+ */
+ l = strlen(entry->fts_path);
+ if (plan->ep_p + l < plan->ep_ebp) {
+ plan->ep_bxp[plan->ep_narg++] = plan->ep_p;
+ strlcpy(plan->ep_p, entry->fts_path, l + 1);
+ plan->ep_p += l + 1;
+
+ if (plan->ep_narg == plan->ep_maxargs)
+ run_f_exec(plan);
+ } else {
+ /*
+ * Without sufficient space to copy in the next
+ * argument, run the command to empty out the
+ * buffer before re-attepting the copy.
+ */
+ run_f_exec(plan);
+ if (plan->ep_p + l < plan->ep_ebp) {
+ plan->ep_bxp[plan->ep_narg++] = plan->ep_p;
+ strlcpy(plan->ep_p, entry->fts_path, l + 1);
+ plan->ep_p += l + 1;
+ } else
+ errx(1, "insufficient space for argument");
+ }
+ return (1);
+ } else {
+ for (cnt = 0; plan->e_argv[cnt]; ++cnt)
+ if (plan->e_len[cnt])
+ brace_subst(plan->e_orig[cnt],
+ &plan->e_argv[cnt],
+ entry->fts_path,
+ plan->e_len[cnt]);
+ if (plan->flags & F_NEEDOK && !queryuser(plan->e_argv))
+ return (0);
- if (plan->flags == F_NEEDOK && !queryuser(plan->e_argv))
- return (0);
+ /* don't mix output of command with find output */
+ fflush(stdout);
+ fflush(stderr);
+
+ switch (pid = vfork()) {
+ case -1:
+ err(1, "fork");
+ /* NOTREACHED */
+ case 0:
+ if (fchdir(dotfd)) {
+ warn("chdir");
+ _exit(1);
+ }
+ execvp(plan->e_argv[0], plan->e_argv);
+ warn("%s", plan->e_argv[0]);
+ _exit(1);
+ }
+ pid = waitpid(pid, &status, 0);
+ return (pid != -1 && WIFEXITED(status) && !WEXITSTATUS(status));
+ }
+}
- /* don't mix output of command with find output */
- fflush(stdout);
- fflush(stderr);
+static void
+run_f_exec(PLAN *plan)
+{
+ pid_t pid;
+ int rval, status;
+
+ /* Ensure arg list is null terminated. */
+ plan->ep_bxp[plan->ep_narg] = NULL;
+
+ /* Don't mix output of command with find output. */
+ fflush(stdout);
+ fflush(stderr);
switch (pid = vfork()) {
case -1:
- err(1, "fork");
+ err(1, "vfork");
/* NOTREACHED */
case 0:
if (fchdir(dotfd)) {
@@ -381,8 +453,26 @@ f_exec(PLAN *plan, FTSENT *entry)
warn("%s", plan->e_argv[0]);
_exit(1);
}
+
+ /* Clear out the argument list. */
+ plan->ep_narg = 0;
+ plan->ep_bxp[plan->ep_narg] = NULL;
+ /* As well as the argument buffer. */
+ plan->ep_p = plan->ep_bbp;
+ *plan->ep_p = '\0';
+
pid = waitpid(pid, &status, 0);
- return (pid != -1 && WIFEXITED(status) && !WEXITSTATUS(status));
+ if (WIFEXITED(status))
+ rval = WEXITSTATUS(status);
+ else
+ rval = -1;
+
+ /*
+ * If we have a non-zero exit status, preserve it so find(1) can
+ * later exit with it.
+ */
+ if (rval)
+ plan->ep_rval = rval;
}
/*
@@ -391,12 +481,16 @@ f_exec(PLAN *plan, FTSENT *entry)
* on the command line, one with (possibly duplicated) pointers to the
* argv array, and one with integer values that are lengths of the
* strings, but also flags meaning that the string has to be massaged.
+ *
+ * If -exec ... {} +, use only the first array, but make it large
+ * enough to hold 5000 args (cf. src/usr.bin/xargs/xargs.c for a
+ * discussion), and then allocate ARG_MAX - 4K of space for args.
*/
PLAN *
c_exec(char *unused, char ***argvp, int isok)
{
PLAN *new; /* node returned */
- int cnt;
+ int cnt, brace, lastbrace;
char **argv, **ap, *p;
/* make sure the current directory is readable */
@@ -407,36 +501,93 @@ c_exec(char *unused, char ***argvp, int isok)
new = palloc(N_EXEC, f_exec);
if (isok)
- new->flags = F_NEEDOK;
+ new->flags |= F_NEEDOK;
- for (ap = argv = *argvp;; ++ap) {
+ /*
+ * Terminate if we encounter an arg exacty equal to ";", or an
+ * arg exacty equal to "+" following an arg exacty equal to
+ * "{}".
+ */
+ for (ap = argv = *argvp, brace = 0;; ++ap) {
if (!*ap)
- errx(1,
- "%s: no terminating \";\"", isok ? "-ok" : "-exec");
- if (**ap == ';')
+ errx(1, "%s: no terminating \";\" or \"+\"",
+ isok ? "-ok" : "-exec");
+ lastbrace = brace;
+ brace = 0;
+ if (strcmp(*ap, "{}") == 0)
+ brace = 1;
+ if (strcmp(*ap, ";") == 0)
break;
+ if (strcmp(*ap, "+") == 0 && lastbrace) {
+ new->flags |= F_PLUSSET;
+ break;
+ }
}
- cnt = ap - *argvp + 1;
- new->e_argv = (char **)emalloc((u_int)cnt * sizeof(char *));
- new->e_orig = (char **)emalloc((u_int)cnt * sizeof(char *));
- new->e_len = (int *)emalloc((u_int)cnt * sizeof(int));
- for (argv = *argvp, cnt = 0; argv < ap; ++argv, ++cnt) {
- new->e_orig[cnt] = *argv;
- for (p = *argv; *p; ++p)
- if (p[0] == '{' && p[1] == '}') {
- new->e_argv[cnt] = emalloc((u_int)MAXPATHLEN);
- new->e_len[cnt] = MAXPATHLEN;
- break;
+ /*
+ * POSIX says -ok ... {} + "need not be supported," and it does
+ * not make much sense anyway.
+ */
+ if (new->flags & F_NEEDOK && new->flags & F_PLUSSET)
+ errx(1, "-ok: terminating \"+\" not permitted.");
+
+ if (new->flags & F_PLUSSET) {
+ u_int c, bufsize;
+
+ cnt = ap - *argvp - 1; /* units are words */
+ new->ep_maxargs = 5000;
+ new->e_argv = (char **)emalloc((u_int)(cnt + new->ep_maxargs)
+ * sizeof(char **));
+
+ /* We start stuffing arguments after the user's last one. */
+ new->ep_bxp = &new->e_argv[cnt];
+ new->ep_narg = 0;
+
+ /*
+ * Count up the space of the user's arguments, and
+ * subtract that from what we allocate.
+ */
+ for (argv = *argvp, c = 0, cnt = 0;
+ argv < ap;
+ ++argv, ++cnt) {
+ c += strlen(*argv) + 1;
+ new->e_argv[cnt] = *argv;
+ }
+ bufsize = ARG_MAX - 4 * 1024 - c;
+
+
+ /*
+ * Allocate, and then initialize current, base, and
+ * end pointers.
+ */
+ new->ep_p = new->ep_bbp = malloc(bufsize + 1);
+ new->ep_ebp = new->ep_bbp + bufsize - 1;
+ new->ep_rval = 0;
+ } else { /* !F_PLUSSET */
+ cnt = ap - *argvp + 1;
+ new->e_argv = (char **)emalloc((u_int)cnt * sizeof(char *));
+ new->e_orig = (char **)emalloc((u_int)cnt * sizeof(char *));
+ new->e_len = (int *)emalloc((u_int)cnt * sizeof(int));
+
+ for (argv = *argvp, cnt = 0; argv < ap; ++argv, ++cnt) {
+ new->e_orig[cnt] = *argv;
+ for (p = *argv; *p; ++p)
+ if (p[0] == '{' && p[1] == '}') {
+ new->e_argv[cnt] =
+ emalloc((u_int)MAXPATHLEN);
+ new->e_len[cnt] = MAXPATHLEN;
+ break;
+ }
+ if (!*p) {
+ new->e_argv[cnt] = *argv;
+ new->e_len[cnt] = 0;
}
- if (!*p) {
- new->e_argv[cnt] = *argv;
- new->e_len[cnt] = 0;
}
- }
- new->e_argv[cnt] = new->e_orig[cnt] = NULL;
+ new->e_orig[cnt] = NULL;
+ }
+ new->e_argv[cnt] = NULL;
*argvp = argv + 1;
return (new);
}
@@ -1441,6 +1592,27 @@ c_or(char *ignore, char ***ignored, int unused)
return (palloc(N_OR, f_or));
}
+
+/*
+ * plan_cleanup --
+ * Check and see if the specified plan has any residual state,
+ * and if so, clean it up as appropriate.
+ *
+ * At the moment, only N_EXEC has state. Two kinds: 1)
+ * lists of files to feed to subprocesses 2) State on exit
+ * statusses of past subprocesses.
+ */
+/* ARGSUSED1 */
+int
+plan_cleanup(PLAN *plan, void *arg)
+{
+ if (plan->type==N_EXEC && plan->ep_narg)
+ run_f_exec(plan);
+
+ return plan->ep_rval; /* Passed save exit-status up chain */
+}
+
+
static PLAN *
palloc(enum ntype t, int (*f)(PLAN *, FTSENT *))
{