diff options
author | Pascal Stumpf <pascal@cvs.openbsd.org> | 2012-01-02 23:19:46 +0000 |
---|---|---|
committer | Pascal Stumpf <pascal@cvs.openbsd.org> | 2012-01-02 23:19:46 +0000 |
commit | 5b13b668414d89ca70d96a584fcd549f49127a25 (patch) | |
tree | e1c79067e64f6f080d3e20aa804e5043695a8bc9 /usr.bin/find | |
parent | dae44749145c17a6a9f82a9eed5969a87b162936 (diff) |
Add support for "find ... -exec foo {} \+".
From the manpage:
If terminated by a plus sign (``+''), the pathnames for which the
primary is evaluated are aggregated into sets, and utility will
be invoked once per set, similar to xargs(1). If any invocation
exits with non-zero exit status, then find will eventually do so
as well, but this does not cause find to exit early. The string
``{}'' must appear, and must appear last. Each set is limited to
no more than 5,000 pathnames, and is also limited such that the
invocation of utility does not exceed ARG_MAX.
Code comes from NetBSD, written by John Hawkinson. Tested by eivinde at
terraplane dot org and myself for quite some time.
ok miod@
Diffstat (limited to 'usr.bin/find')
-rw-r--r-- | usr.bin/find/extern.h | 4 | ||||
-rw-r--r-- | usr.bin/find/find.1 | 50 | ||||
-rw-r--r-- | usr.bin/find/find.c | 44 | ||||
-rw-r--r-- | usr.bin/find/find.h | 19 | ||||
-rw-r--r-- | usr.bin/find/function.c | 256 |
5 files changed, 312 insertions, 61 deletions
diff --git a/usr.bin/find/extern.h b/usr.bin/find/extern.h index 737436b5932..476ea0fecc4 100644 --- a/usr.bin/find/extern.h +++ b/usr.bin/find/extern.h @@ -1,4 +1,4 @@ -/* * $OpenBSD: extern.h,v 1.16 2011/04/21 01:14:21 jacekm Exp $*/ +/* * $OpenBSD: extern.h,v 1.17 2012/01/02 23:19:45 pascal Exp $*/ /*- * Copyright (c) 1991, 1993 * The Regents of the University of California. All rights reserved. @@ -37,10 +37,12 @@ void *emalloc(unsigned int); PLAN *find_create(char ***); int find_execute(PLAN *, char **); PLAN *find_formplan(char **); +int find_traverse(PLAN *, int (*)(PLAN *, void *), void *); PLAN *not_squish(PLAN *); OPTION *option(char *); PLAN *or_squish(PLAN *); PLAN *paren_squish(PLAN *); +int plan_cleanup(PLAN *, void *); struct stat; void printlong(char *, char *, struct stat *); int queryuser(char **); diff --git a/usr.bin/find/find.1 b/usr.bin/find/find.1 index fe6c637b0e3..8cdb8569171 100644 --- a/usr.bin/find/find.1 +++ b/usr.bin/find/find.1 @@ -1,4 +1,4 @@ -.\" $OpenBSD: find.1,v 1.82 2011/02/13 12:35:02 schwarze Exp $ +.\" $OpenBSD: find.1,v 1.83 2012/01/02 23:19:45 pascal Exp $ .\" Copyright (c) 1990, 1993 .\" The Regents of the University of California. All rights reserved. .\" @@ -31,7 +31,7 @@ .\" .\" from: @(#)find.1 8.1 (Berkeley) 6/6/93 .\" -.Dd $Mdocdate: February 13 2011 $ +.Dd $Mdocdate: January 2 2012 $ .Dt FIND 1 .Os .Sh NAME @@ -189,28 +189,48 @@ True if the current file or directory is empty. .Op argument ... .No ; .Xc -True if the program named -.Ar utility -returns a zero value as its exit status. +.It Xo +.Ic -exec Ar utility +.Op argument ... +.No {} ++ +.Xc +Execute the specified +.Ar utility . Optional arguments may be passed to the utility. The expression must be terminated by a semicolon -.Pq Ql \&; . +.Pq Ql \&; +or a plus sign +.Pq Ql \&+ . +.Pp +If terminated by a semicolon, the +.Ar utility +is executed once per path. If the string .Qq {} appears anywhere in the utility name or the arguments it is replaced by the pathname of the current file. -.Ar utility -will be executed from the directory from which -.Nm -was executed. .Pp -Since +If terminated by a plus sign +.Pq Dq \&+ , +the pathnames for which the +primary is evaluated are aggregated into sets, and .Ar utility -is executed every time a match is made, -it is often more efficient to pipe the output of -.Nm -to +will be invoked once per set, similar to .Xr xargs 1 . +If any invocation exits with non-zero exit status, then +.Nm +will eventually do so as well, but this does not cause +.Nm +to exit early. +The string +.Dq {} +must appear, and must appear last. +Each set is limited to no more than 5,000 pathnames, +and is also limited such that the invocation of +.Ar utility +does not exceed +.Dv ARG_MAX . .It Xo .Ic -execdir Ar utility .Op argument ... diff --git a/usr.bin/find/find.c b/usr.bin/find/find.c index 09345aeae7c..ba8808f40fc 100644 --- a/usr.bin/find/find.c +++ b/usr.bin/find/find.c @@ -1,4 +1,4 @@ -/* $OpenBSD: find.c,v 1.15 2011/04/21 01:14:21 jacekm Exp $ */ +/* $OpenBSD: find.c,v 1.16 2012/01/02 23:19:45 pascal Exp $ */ /*- * Copyright (c) 1991, 1993 @@ -148,7 +148,7 @@ find_execute(PLAN *plan, /* search plan */ char **paths) /* array of pathnames to traverse */ { sigset_t fullset, oset; - int rval; + int r, rval; PLAN *p; rval = 0; @@ -201,5 +201,45 @@ find_execute(PLAN *plan, /* search plan */ ; } (void)fts_close(tree); + + /* + * Cleanup any plans with leftover state. + * Keep the last non-zero return value. + */ + if ((r = find_traverse(plan, plan_cleanup, NULL)) != 0) + rval = r; return (rval); } + +/* + * find_traverse -- + * traverse the plan tree and execute func() on all plans. This + * does not evaluate each plan's eval() function; it is intended + * for operations that must run on all plans, such as state + * cleanup. + * + * If any func() returns non-zero, then so will find_traverse(). + */ +int +find_traverse(PLAN *plan, int (*func)(PLAN *, void *), void *arg) +{ + PLAN *p; + int r, rval; + + rval = 0; + for (p = plan; p; p = p->next) { + if ((r = func(p, arg)) != 0) + rval = r; + if (p->type == N_EXPR || p->type == N_OR) { + if (p->p_data[0]) + if ((r = find_traverse(p->p_data[0], + func, arg)) != 0) + rval = r; + if (p->p_data[1]) + if ((r = find_traverse(p->p_data[1], + func, arg)) != 0) + rval = r; + } + } + return rval; +} diff --git a/usr.bin/find/find.h b/usr.bin/find/find.h index dc1a4ef8679..5226a26115a 100644 --- a/usr.bin/find/find.h +++ b/usr.bin/find/find.h @@ -1,4 +1,4 @@ -/* * $OpenBSD: find.h,v 1.14 2004/09/15 18:43:25 deraadt Exp $*/ +/* * $OpenBSD: find.h,v 1.15 2012/01/02 23:19:45 pascal Exp $*/ /*- * Copyright (c) 1990, 1993 * The Regents of the University of California. All rights reserved. @@ -54,6 +54,7 @@ typedef struct _plandata { #define F_LESSTHAN 2 #define F_GREATER 3 #define F_NEEDOK 1 /* exec ok */ +#define F_PLUSSET 2 /* -exec ... {} + */ #define F_MTFLAG 1 /* fstype */ #define F_MTTYPE 2 #define F_ATLEAST 1 /* perm */ @@ -77,6 +78,13 @@ typedef struct _plandata { char **_e_argv; /* argv array */ char **_e_orig; /* original strings */ int *_e_len; /* allocated length */ + char **_ep_bxp; /* ptr to 1st addt'l arg */ + char *_ep_p; /* current buffer pointer */ + char *_ep_bbp; /* begin buffer pointer */ + char *_ep_ebp; /* end buffer pointer */ + int _ep_maxargs; /* max #args */ + int _ep_narg; /* # addt'l args */ + int _ep_rval; /* return value */ } ex; char *_a_data[2]; /* array of char pointers */ char *_c_data; /* char pointer */ @@ -104,6 +112,15 @@ typedef struct _plandata { #define e_argv p_un.ex._e_argv #define e_orig p_un.ex._e_orig #define e_len p_un.ex._e_len +#define ep_p p_un.ex._ep_p +#define ep_bbp p_un.ex._ep_bbp +#define ep_ebp p_un.ex._ep_ebp +#define ep_bxp p_un.ex._ep_bxp +#define ep_cnt p_un.ex._ep_cnt +#define ep_maxargs p_un.ex._ep_maxargs +#define ep_nline p_un.ex._ep_nline +#define ep_narg p_un.ex._ep_narg +#define ep_rval p_un.ex._ep_rval typedef struct _option { char *name; /* option name */ diff --git a/usr.bin/find/function.c b/usr.bin/find/function.c index 0d59e06ce35..59002dcc44f 100644 --- a/usr.bin/find/function.c +++ b/usr.bin/find/function.c @@ -1,4 +1,4 @@ -/* $OpenBSD: function.c,v 1.36 2010/12/01 01:20:29 millert Exp $ */ +/* $OpenBSD: function.c,v 1.37 2012/01/02 23:19:45 pascal Exp $ */ /*- * Copyright (c) 1990, 1993 @@ -46,6 +46,7 @@ #include <fts.h> #include <grp.h> #include <libgen.h> +#include <limits.h> #include <pwd.h> #include <stdio.h> #include <stdlib.h> @@ -71,6 +72,7 @@ static PLAN *palloc(enum ntype, int (*)(PLAN *, FTSENT *)); static long find_parsenum(PLAN *plan, char *option, char *vp, char *endch); +static void run_f_exec(PLAN *plan); static PLAN *palloc(enum ntype t, int (*f)(PLAN *, FTSENT *)); int f_amin(PLAN *, FTSENT *); @@ -339,38 +341,108 @@ c_empty(char *ignore, char ***ignored, int unused) /* * [-exec | -ok] utility [arg ... ] ; functions -- + * [-exec | -ok] utility [arg ... ] {} + functions -- * - * True if the executed utility returns a zero value as exit status. - * The end of the primary expression is delimited by a semicolon. If - * "{}" occurs anywhere, it gets replaced by the current pathname. - * The current directory for the execution of utility is the same as - * the current directory when the find utility was started. + * If the end of the primary expression is delimited by a + * semicolon: true if the executed utility returns a zero value + * as exit status. If "{}" occurs anywhere, it gets replaced by + * the current pathname. * - * The primary -ok is different in that it requests affirmation of the - * user before executing the utility. + * If the end of the primary expression is delimited by a plus + * sign: always true. Pathnames for which the primary is + * evaluated shall be aggregated into sets. The utility will be + * executed once per set, with "{}" replaced by the entire set of + * pathnames (as if xargs). "{}" must appear last. + * + * The current directory for the execution of utility is the same + * as the current directory when the find utility was started. + * + * The primary -ok is different in that it requests affirmation + * of the user before executing the utility. */ int f_exec(PLAN *plan, FTSENT *entry) { - int cnt; + int cnt, l; pid_t pid; int status; - for (cnt = 0; plan->e_argv[cnt]; ++cnt) - if (plan->e_len[cnt]) - brace_subst(plan->e_orig[cnt], &plan->e_argv[cnt], - entry->fts_path, plan->e_len[cnt]); + if (plan->flags & F_PLUSSET) { + /* + * Confirm sufficient buffer space, then copy the path + * to the buffer. + */ + l = strlen(entry->fts_path); + if (plan->ep_p + l < plan->ep_ebp) { + plan->ep_bxp[plan->ep_narg++] = plan->ep_p; + strlcpy(plan->ep_p, entry->fts_path, l + 1); + plan->ep_p += l + 1; + + if (plan->ep_narg == plan->ep_maxargs) + run_f_exec(plan); + } else { + /* + * Without sufficient space to copy in the next + * argument, run the command to empty out the + * buffer before re-attepting the copy. + */ + run_f_exec(plan); + if (plan->ep_p + l < plan->ep_ebp) { + plan->ep_bxp[plan->ep_narg++] = plan->ep_p; + strlcpy(plan->ep_p, entry->fts_path, l + 1); + plan->ep_p += l + 1; + } else + errx(1, "insufficient space for argument"); + } + return (1); + } else { + for (cnt = 0; plan->e_argv[cnt]; ++cnt) + if (plan->e_len[cnt]) + brace_subst(plan->e_orig[cnt], + &plan->e_argv[cnt], + entry->fts_path, + plan->e_len[cnt]); + if (plan->flags & F_NEEDOK && !queryuser(plan->e_argv)) + return (0); - if (plan->flags == F_NEEDOK && !queryuser(plan->e_argv)) - return (0); + /* don't mix output of command with find output */ + fflush(stdout); + fflush(stderr); + + switch (pid = vfork()) { + case -1: + err(1, "fork"); + /* NOTREACHED */ + case 0: + if (fchdir(dotfd)) { + warn("chdir"); + _exit(1); + } + execvp(plan->e_argv[0], plan->e_argv); + warn("%s", plan->e_argv[0]); + _exit(1); + } + pid = waitpid(pid, &status, 0); + return (pid != -1 && WIFEXITED(status) && !WEXITSTATUS(status)); + } +} - /* don't mix output of command with find output */ - fflush(stdout); - fflush(stderr); +static void +run_f_exec(PLAN *plan) +{ + pid_t pid; + int rval, status; + + /* Ensure arg list is null terminated. */ + plan->ep_bxp[plan->ep_narg] = NULL; + + /* Don't mix output of command with find output. */ + fflush(stdout); + fflush(stderr); switch (pid = vfork()) { case -1: - err(1, "fork"); + err(1, "vfork"); /* NOTREACHED */ case 0: if (fchdir(dotfd)) { @@ -381,8 +453,26 @@ f_exec(PLAN *plan, FTSENT *entry) warn("%s", plan->e_argv[0]); _exit(1); } + + /* Clear out the argument list. */ + plan->ep_narg = 0; + plan->ep_bxp[plan->ep_narg] = NULL; + /* As well as the argument buffer. */ + plan->ep_p = plan->ep_bbp; + *plan->ep_p = '\0'; + pid = waitpid(pid, &status, 0); - return (pid != -1 && WIFEXITED(status) && !WEXITSTATUS(status)); + if (WIFEXITED(status)) + rval = WEXITSTATUS(status); + else + rval = -1; + + /* + * If we have a non-zero exit status, preserve it so find(1) can + * later exit with it. + */ + if (rval) + plan->ep_rval = rval; } /* @@ -391,12 +481,16 @@ f_exec(PLAN *plan, FTSENT *entry) * on the command line, one with (possibly duplicated) pointers to the * argv array, and one with integer values that are lengths of the * strings, but also flags meaning that the string has to be massaged. + * + * If -exec ... {} +, use only the first array, but make it large + * enough to hold 5000 args (cf. src/usr.bin/xargs/xargs.c for a + * discussion), and then allocate ARG_MAX - 4K of space for args. */ PLAN * c_exec(char *unused, char ***argvp, int isok) { PLAN *new; /* node returned */ - int cnt; + int cnt, brace, lastbrace; char **argv, **ap, *p; /* make sure the current directory is readable */ @@ -407,36 +501,93 @@ c_exec(char *unused, char ***argvp, int isok) new = palloc(N_EXEC, f_exec); if (isok) - new->flags = F_NEEDOK; + new->flags |= F_NEEDOK; - for (ap = argv = *argvp;; ++ap) { + /* + * Terminate if we encounter an arg exacty equal to ";", or an + * arg exacty equal to "+" following an arg exacty equal to + * "{}". + */ + for (ap = argv = *argvp, brace = 0;; ++ap) { if (!*ap) - errx(1, - "%s: no terminating \";\"", isok ? "-ok" : "-exec"); - if (**ap == ';') + errx(1, "%s: no terminating \";\" or \"+\"", + isok ? "-ok" : "-exec"); + lastbrace = brace; + brace = 0; + if (strcmp(*ap, "{}") == 0) + brace = 1; + if (strcmp(*ap, ";") == 0) break; + if (strcmp(*ap, "+") == 0 && lastbrace) { + new->flags |= F_PLUSSET; + break; + } } - cnt = ap - *argvp + 1; - new->e_argv = (char **)emalloc((u_int)cnt * sizeof(char *)); - new->e_orig = (char **)emalloc((u_int)cnt * sizeof(char *)); - new->e_len = (int *)emalloc((u_int)cnt * sizeof(int)); - for (argv = *argvp, cnt = 0; argv < ap; ++argv, ++cnt) { - new->e_orig[cnt] = *argv; - for (p = *argv; *p; ++p) - if (p[0] == '{' && p[1] == '}') { - new->e_argv[cnt] = emalloc((u_int)MAXPATHLEN); - new->e_len[cnt] = MAXPATHLEN; - break; + /* + * POSIX says -ok ... {} + "need not be supported," and it does + * not make much sense anyway. + */ + if (new->flags & F_NEEDOK && new->flags & F_PLUSSET) + errx(1, "-ok: terminating \"+\" not permitted."); + + if (new->flags & F_PLUSSET) { + u_int c, bufsize; + + cnt = ap - *argvp - 1; /* units are words */ + new->ep_maxargs = 5000; + new->e_argv = (char **)emalloc((u_int)(cnt + new->ep_maxargs) + * sizeof(char **)); + + /* We start stuffing arguments after the user's last one. */ + new->ep_bxp = &new->e_argv[cnt]; + new->ep_narg = 0; + + /* + * Count up the space of the user's arguments, and + * subtract that from what we allocate. + */ + for (argv = *argvp, c = 0, cnt = 0; + argv < ap; + ++argv, ++cnt) { + c += strlen(*argv) + 1; + new->e_argv[cnt] = *argv; + } + bufsize = ARG_MAX - 4 * 1024 - c; + + + /* + * Allocate, and then initialize current, base, and + * end pointers. + */ + new->ep_p = new->ep_bbp = malloc(bufsize + 1); + new->ep_ebp = new->ep_bbp + bufsize - 1; + new->ep_rval = 0; + } else { /* !F_PLUSSET */ + cnt = ap - *argvp + 1; + new->e_argv = (char **)emalloc((u_int)cnt * sizeof(char *)); + new->e_orig = (char **)emalloc((u_int)cnt * sizeof(char *)); + new->e_len = (int *)emalloc((u_int)cnt * sizeof(int)); + + for (argv = *argvp, cnt = 0; argv < ap; ++argv, ++cnt) { + new->e_orig[cnt] = *argv; + for (p = *argv; *p; ++p) + if (p[0] == '{' && p[1] == '}') { + new->e_argv[cnt] = + emalloc((u_int)MAXPATHLEN); + new->e_len[cnt] = MAXPATHLEN; + break; + } + if (!*p) { + new->e_argv[cnt] = *argv; + new->e_len[cnt] = 0; } - if (!*p) { - new->e_argv[cnt] = *argv; - new->e_len[cnt] = 0; } - } - new->e_argv[cnt] = new->e_orig[cnt] = NULL; + new->e_orig[cnt] = NULL; + } + new->e_argv[cnt] = NULL; *argvp = argv + 1; return (new); } @@ -1441,6 +1592,27 @@ c_or(char *ignore, char ***ignored, int unused) return (palloc(N_OR, f_or)); } + +/* + * plan_cleanup -- + * Check and see if the specified plan has any residual state, + * and if so, clean it up as appropriate. + * + * At the moment, only N_EXEC has state. Two kinds: 1) + * lists of files to feed to subprocesses 2) State on exit + * statusses of past subprocesses. + */ +/* ARGSUSED1 */ +int +plan_cleanup(PLAN *plan, void *arg) +{ + if (plan->type==N_EXEC && plan->ep_narg) + run_f_exec(plan); + + return plan->ep_rval; /* Passed save exit-status up chain */ +} + + static PLAN * palloc(enum ntype t, int (*f)(PLAN *, FTSENT *)) { |