diff options
author | Claudio Jeker <claudio@cvs.openbsd.org> | 2021-08-29 13:43:47 +0000 |
---|---|---|
committer | Claudio Jeker <claudio@cvs.openbsd.org> | 2021-08-29 13:43:47 +0000 |
commit | 653dedf334c7d9a9cf5efe2be77b9b4b617f943b (patch) | |
tree | 82c6bb333270eb85079e2309c1c8a5814f02b9f1 /usr.bin | |
parent | 409b89b45f65cc2d68727d1e0f9ca26b0e8bc541 (diff) |
Implement --exclude/exclude-file and --include/include-file.
Currently only simple include and excludes work, the advanced filters
introduced later in rsync are not implemented. It is unclear if the per
directory filters are something we want to implement. This requires
more modern protocols which openrsync is not able to handle right now.
This adds a special matching function to allow the ** matching which behaves
mostly like rsyncs version with the exception of how bad [] patterns are
expanded. For bad patterns openrsync follows more how fnmatch behaves and
not the somewhat strange rsync behaviour.
Not perfect but committing now so people can test and provide feedback.
Diffstat (limited to 'usr.bin')
-rw-r--r-- | usr.bin/rsync/Makefile | 6 | ||||
-rw-r--r-- | usr.bin/rsync/charclass.h | 29 | ||||
-rw-r--r-- | usr.bin/rsync/extern.h | 26 | ||||
-rw-r--r-- | usr.bin/rsync/flist.c | 39 | ||||
-rw-r--r-- | usr.bin/rsync/main.c | 34 | ||||
-rw-r--r-- | usr.bin/rsync/receiver.c | 25 | ||||
-rw-r--r-- | usr.bin/rsync/rmatch.c | 395 | ||||
-rw-r--r-- | usr.bin/rsync/rules.c | 479 | ||||
-rw-r--r-- | usr.bin/rsync/sender.c | 23 |
9 files changed, 1015 insertions, 41 deletions
diff --git a/usr.bin/rsync/Makefile b/usr.bin/rsync/Makefile index d7af8bd0a87..f2e4d460c57 100644 --- a/usr.bin/rsync/Makefile +++ b/usr.bin/rsync/Makefile @@ -1,9 +1,9 @@ -# $OpenBSD: Makefile,v 1.10 2019/05/08 21:30:11 benno Exp $ +# $OpenBSD: Makefile,v 1.11 2021/08/29 13:43:46 claudio Exp $ PROG= openrsync SRCS= blocks.c client.c downloader.c fargs.c flist.c hash.c ids.c \ - io.c log.c mkpath.c mktemp.c receiver.c sender.c server.c session.c \ - socket.c symlinks.c uploader.c main.c misc.c + io.c log.c main.c misc.c mkpath.c mktemp.c receiver.c rmatch.c \ + rules.c sender.c server.c session.c socket.c symlinks.c uploader.c LDADD+= -lcrypto -lm DPADD+= ${LIBCRYPTO} ${LIBM} MAN= openrsync.1 diff --git a/usr.bin/rsync/charclass.h b/usr.bin/rsync/charclass.h new file mode 100644 index 00000000000..1c5ff7ee044 --- /dev/null +++ b/usr.bin/rsync/charclass.h @@ -0,0 +1,29 @@ +/* + * Public domain, 2008, Todd C. Miller <millert@openbsd.org> + * + * $OpenBSD: charclass.h,v 1.1 2021/08/29 13:43:46 claudio Exp $ + */ + +/* + * POSIX character class support for fnmatch() and glob(). + */ +static const struct cclass { + const char *name; + int (*isctype)(int); +} cclasses[] = { + { "alnum", isalnum }, + { "alpha", isalpha }, + { "blank", isblank }, + { "cntrl", iscntrl }, + { "digit", isdigit }, + { "graph", isgraph }, + { "lower", islower }, + { "print", isprint }, + { "punct", ispunct }, + { "space", isspace }, + { "upper", isupper }, + { "xdigit", isxdigit }, + { NULL, NULL } +}; + +#define NCCLASSES (sizeof(cclasses) / sizeof(cclasses[0]) - 1) diff --git a/usr.bin/rsync/extern.h b/usr.bin/rsync/extern.h index d40188d1ebd..4681d47fa24 100644 --- a/usr.bin/rsync/extern.h +++ b/usr.bin/rsync/extern.h @@ -1,4 +1,4 @@ -/* $OpenBSD: extern.h,v 1.39 2021/06/30 15:24:10 claudio Exp $ */ +/* $OpenBSD: extern.h,v 1.40 2021/08/29 13:43:46 claudio Exp $ */ /* * Copyright (c) 2019 Kristaps Dzonsons <kristaps@bsd.lv> * @@ -131,12 +131,28 @@ struct opts { int no_motd; /* --no-motd */ int numeric_ids; /* --numeric-ids */ int one_file_system; /* -x */ + int from0; /* -0 */ char *rsync_path; /* --rsync-path */ char *ssh_prog; /* --rsh or -e */ char *port; /* --port */ char *address; /* --address */ }; +enum rule_type { + RULE_NONE, + RULE_EXCLUDE, + RULE_INCLUDE, + RULE_CLEAR, +#ifdef NOTYET + RULE_MERGE, + RULE_DIR_MERGE, + RULE_SHOW, + RULE_HIDE, + RULE_PROTECT, + RULE_RISK, +#endif +}; + /* * An individual block description for a file. * See struct blkset. @@ -362,6 +378,14 @@ char *mkstempnodat(int, char *, mode_t, dev_t); char *mkstempsock(const char *, char *); int mktemplate(char **, const char *, int); +int parse_rule(char *line, enum rule_type); +void parse_file(const char *, enum rule_type, int); +void send_rules(struct sess *, int); +void recv_rules(struct sess *, int); +int rules_match(const char *, int); + +int rmatch(const char *, const char *, int); + char *symlink_read(const char *); char *symlinkat_read(int, const char *); diff --git a/usr.bin/rsync/flist.c b/usr.bin/rsync/flist.c index e33f51b16d4..86cde1dd3d3 100644 --- a/usr.bin/rsync/flist.c +++ b/usr.bin/rsync/flist.c @@ -1,4 +1,4 @@ -/* $OpenBSD: flist.c,v 1.32 2021/06/30 13:10:04 claudio Exp $ */ +/* $OpenBSD: flist.c,v 1.33 2021/08/29 13:43:46 claudio Exp $ */ /* * Copyright (c) 2019 Kristaps Dzonsons <kristaps@bsd.lv> * Copyright (c) 2019 Florian Obser <florian@openbsd.org> @@ -823,6 +823,11 @@ flist_gen_dirent(struct sess *sess, char *root, struct flist **fl, size_t *sz, ERR("%s: lstat", root); return 0; } else if (S_ISREG(st.st_mode)) { + /* filter files */ + if (rules_match(root, 0) == -1) { + WARNX("%s: skipping excluded file", root); + return 1; + } if (!flist_realloc(fl, sz, max)) { ERRX1("flist_realloc"); return 0; @@ -839,7 +844,13 @@ flist_gen_dirent(struct sess *sess, char *root, struct flist **fl, size_t *sz, if (!sess->opts->preserve_links) { WARNX("%s: skipping symlink", root); return 1; - } else if (!flist_realloc(fl, sz, max)) { + } + /* filter files */ + if (rules_match(root, 0) == -1) { + WARNX("%s: skipping excluded symlink", root); + return 1; + } + if (!flist_realloc(fl, sz, max)) { ERRX1("flist_realloc"); return 0; } @@ -942,6 +953,15 @@ flist_gen_dirent(struct sess *sess, char *root, struct flist **fl, size_t *sz, nxdev++; } + /* filter files */ + if (rules_match(ent->fts_path + stripdir, + (ent->fts_info == FTS_D)) == -1) { + WARNX("%s: skipping excluded file", + ent->fts_path + stripdir); + fts_set(fts, ent, FTS_SKIP); + continue; + } + /* Allocate a new file entry. */ if (!flist_realloc(fl, sz, max)) { @@ -1073,6 +1093,11 @@ flist_gen_files(struct sess *sess, size_t argc, char **argv, continue; } + /* filter files */ + if (rules_match(argv[i], S_ISDIR(st.st_mode)) == -1) { + WARNX("%s: skipping excluded file", argv[i]); + continue; + } f = &fl[flsz++]; assert(f != NULL); @@ -1297,6 +1322,16 @@ flist_gen_dels(struct sess *sess, const char *root, struct flist **fl, continue; } + /* filter files on delete */ + /* TODO handle --delete-excluded */ + if (rules_match(ent->fts_path + stripdir, + (ent->fts_info == FTS_D)) == -1) { + WARNX("skip excluded file %s", + ent->fts_path + stripdir); + fts_set(fts, ent, FTS_SKIP); + continue; + } + /* Look up in hashtable. */ memset(&hent, 0, sizeof(ENTRY)); diff --git a/usr.bin/rsync/main.c b/usr.bin/rsync/main.c index cb4a03488e5..31174622fb4 100644 --- a/usr.bin/rsync/main.c +++ b/usr.bin/rsync/main.c @@ -1,4 +1,4 @@ -/* $OpenBSD: main.c,v 1.56 2021/07/14 11:14:27 claudio Exp $ */ +/* $OpenBSD: main.c,v 1.57 2021/08/29 13:43:46 claudio Exp $ */ /* * Copyright (c) 2019 Kristaps Dzonsons <kristaps@bsd.lv> * @@ -276,6 +276,10 @@ static struct opts opts; #define OP_RSYNCPATH 1002 #define OP_TIMEOUT 1003 #define OP_VERSION 1004 +#define OP_EXCLUDE 1005 +#define OP_INCLUDE 1006 +#define OP_EXCLUDE_FROM 1007 +#define OP_INCLUDE_FROM 1008 const struct option lopts[] = { { "address", required_argument, NULL, OP_ADDRESS }, @@ -286,9 +290,15 @@ const struct option lopts[] = { { "devices", no_argument, &opts.devices, 1 }, { "no-devices", no_argument, &opts.devices, 0 }, { "dry-run", no_argument, &opts.dry_run, 1 }, + { "exclude", required_argument, NULL, OP_EXCLUDE }, + { "exclude-from", required_argument, NULL, OP_EXCLUDE_FROM }, + { "from0", no_argument, NULL, '0' }, + { "no-from0", no_argument, &opts.from0, 0 }, { "group", no_argument, &opts.preserve_gids, 1 }, { "no-group", no_argument, &opts.preserve_gids, 0 }, { "help", no_argument, NULL, 'h' }, + { "include", required_argument, NULL, OP_INCLUDE }, + { "include-from", required_argument, NULL, OP_INCLUDE_FROM }, { "links", no_argument, &opts.preserve_links, 1 }, { "no-links", no_argument, &opts.preserve_links, 0 }, { "no-motd", no_argument, &opts.no_motd, 1 }, @@ -324,6 +334,7 @@ main(int argc, char *argv[]) struct fargs *fargs; char **args; const char *errstr; + /* Global pledge. */ if (pledge("stdio unix rpath wpath cpath dpath inet fattr chown dns getpw proc exec unveil", @@ -333,6 +344,9 @@ main(int argc, char *argv[]) while ((c = getopt_long(argc, argv, "Dae:ghlnoprtvxz", lopts, NULL)) != -1) { switch (c) { + case '0': + opts.from0 = 1; + break; case 'D': opts.devices = 1; opts.specials = 1; @@ -398,6 +412,24 @@ main(int argc, char *argv[]) errx(ERR_SYNTAX, "timeout is %s: %s", errstr, optarg); break; + case OP_EXCLUDE: + if (parse_rule(optarg, RULE_EXCLUDE) == -1) + errx(ERR_SYNTAX, "syntax error in exclude: %s", + optarg); + break; + case OP_INCLUDE: + if (parse_rule(optarg, RULE_INCLUDE) == -1) + errx(ERR_SYNTAX, "syntax error in include: %s", + optarg); + break; + case OP_EXCLUDE_FROM: + parse_file(optarg, RULE_EXCLUDE, + opts.from0 ? '\0' : '\n' ); + break; + case OP_INCLUDE_FROM: + parse_file(optarg, RULE_INCLUDE, + opts.from0 ? '\0' : '\n' ); + break; case OP_VERSION: fprintf(stderr, "openrsync: protocol version %u\n", RSYNC_PROTOCOL); diff --git a/usr.bin/rsync/receiver.c b/usr.bin/rsync/receiver.c index c731211070f..6e5b01670cd 100644 --- a/usr.bin/rsync/receiver.c +++ b/usr.bin/rsync/receiver.c @@ -1,4 +1,4 @@ -/* $OpenBSD: receiver.c,v 1.28 2021/06/30 13:10:04 claudio Exp $ */ +/* $OpenBSD: receiver.c,v 1.29 2021/08/29 13:43:46 claudio Exp $ */ /* * Copyright (c) 2019 Kristaps Dzonsons <kristaps@bsd.lv> @@ -172,7 +172,7 @@ int rsync_receiver(struct sess *sess, int fdin, int fdout, const char *root) { struct flist *fl = NULL, *dfl = NULL; - size_t i, flsz = 0, dflsz = 0, excl; + size_t i, flsz = 0, dflsz = 0; char *tofree; int rc = 0, dfd = -1, phase = 0, c; int32_t ioerror; @@ -184,22 +184,13 @@ rsync_receiver(struct sess *sess, int fdin, int fdout, const char *root) if (pledge("stdio unix rpath wpath cpath dpath fattr chown getpw unveil", NULL) == -1) err(ERR_IPC, "pledge"); - /* Client sends zero-length exclusions. */ + /* Client sends exclusions. */ + if (!sess->opts->server) + send_rules(sess, fdout); - if (!sess->opts->server && !io_write_int(sess, fdout, 0)) { - ERRX1("io_write_int"); - goto out; - } - - if (sess->opts->server && sess->opts->del) { - if (!io_read_size(sess, fdin, &excl)) { - ERRX1("io_read_size"); - goto out; - } else if (excl != 0) { - ERRX("exclusion list is non-empty"); - goto out; - } - } + /* Server receives exclusions if delete is on. */ + if (sess->opts->server && sess->opts->del) + recv_rules(sess, fdin); /* * Start by receiving the file list and our mystery number. diff --git a/usr.bin/rsync/rmatch.c b/usr.bin/rsync/rmatch.c new file mode 100644 index 00000000000..b037b80b9b6 --- /dev/null +++ b/usr.bin/rsync/rmatch.c @@ -0,0 +1,395 @@ +/* $OpenBSD: rmatch.c,v 1.1 2021/08/29 13:43:46 claudio Exp $ */ + +/* + * Copyright (c) 2021 Claudio Jeker <claudio@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +/* + * Copyright (c) 1989, 1993, 1994 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Guido van Rossum. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <ctype.h> +#include <stdio.h> +#include <string.h> +#include <limits.h> + +#include "charclass.h" + +#define RANGE_MATCH 1 +#define RANGE_NOMATCH 0 +#define RANGE_ERROR (-1) + +static int +classmatch(const char *pattern, char test, const char **ep) +{ + const char *mismatch = pattern; + const struct cclass *cc; + const char *colon; + size_t len; + int rval = RANGE_NOMATCH; + + if (*pattern++ != ':') { + *ep = mismatch; + return RANGE_ERROR; + } + if ((colon = strchr(pattern, ':')) == NULL || colon[1] != ']') { + *ep = mismatch; + return RANGE_ERROR; + } + *ep = colon + 2; + len = (size_t)(colon - pattern); + + for (cc = cclasses; cc->name != NULL; cc++) { + if (!strncmp(pattern, cc->name, len) && cc->name[len] == '\0') { + if (cc->isctype((unsigned char)test)) + rval = RANGE_MATCH; + return rval; + } + } + + /* invalid character class, treat as normal text */ + *ep = mismatch; + return RANGE_ERROR; +} + +static int +rangematch(const char **pp, char test) +{ + const char *pattern = *pp; + int negate, ok; + char c, c2; + + /* + * A bracket expression starting with an unquoted circumflex + * character produces unspecified results (IEEE 1003.2-1992, + * 3.13.2). This implementation treats it like '!', for + * consistency with the regular expression syntax. + * J.T. Conklin (conklin@ngai.kaleida.com) + */ + if ((negate = (*pattern == '!' || *pattern == '^'))) + ++pattern; + + /* + * A right bracket shall lose its special meaning and represent + * itself in a bracket expression if it occurs first in the list. + * -- POSIX.2 2.8.3.2 + */ + ok = 0; + c = *pattern++; + do { + if (c == '[') { + switch (classmatch(pattern, test, &pattern)) { + case RANGE_MATCH: + ok = 1; + continue; + case RANGE_NOMATCH: + continue; + default: + /* invalid character class, treat litterally. */ + break; + } + } + if (c == '\\') + c = *pattern++; + if (c == '\0') + return RANGE_ERROR; + /* patterns can not match on '/' */ + if (c == '/') + return RANGE_NOMATCH; + if (*pattern == '-' + && (c2 = *(pattern + 1)) != '\0' && c2 != ']') { + pattern += 2; + if (c2 == '\\') + c2 = *pattern++; + if (c2 == '\0') + return RANGE_ERROR; + if (c <= test && test <= c2) + ok = 1; + } else if (c == test) + ok = 1; + } while ((c = *pattern++) != ']'); + + *pp = pattern; + return (ok == negate ? RANGE_NOMATCH : RANGE_MATCH); +} + +/* + * Single character match, advances pattern as much as needed. + * Return 0 on match and !0 (aka 1) on missmatch. + * When matched pp is advanced to the end of the pattern matched. + */ +static int +matchchar(const char **pp, const char in) +{ + const char *pattern = *pp; + char c; + int rv = 0; + + switch (c = *pattern++) { + case '?': + if (in == '\0') + rv = 1; + if (in == '/') + rv = 1; + break; + case '[': + if (in == '\0') + rv = 1; + if (in == '/') + rv = 1; + if (rv == 1) + break; + + switch (rangematch(&pattern, in)) { + case RANGE_ERROR: + /* not a good range, treat as normal text */ + goto normal; + case RANGE_MATCH: + break; + case RANGE_NOMATCH: + rv = 1; + } + break; + case '\\': + if ((c = *pattern++) == '\0') { + c = '\\'; + --pattern; + } + /* FALLTHROUGH */ + default: + normal: + if (c != in) + rv = 1; + break; + } + + *pp = pattern; + return rv; +} + +/* + * Do a substring match. If wild is set then the pattern started with a '*'. + * The match will go until '*', '/' or '\0' is encountered in pattern or + * the input string is consumed up to end. + * The pattern and string handles pp and ss are updated only on success. + */ +static int +matchsub(const char **pp, const char **ss, const char *end, int wild) +{ + const char *pattern = *pp; + const char *p = pattern; + const char *string = *ss; + size_t matchlen; + + /* first calculate how many characters the submatch will consume */ + for (matchlen = 0; *p != '\0'; matchlen++) { + if (p[0] == '*') + break; + /* '/' acts as barrier */ + if (p[0] == '/' || (p[0] == '\\' && p[1] == '/')) { + if (wild) { + /* match needs to match up to end of segment */ + if (string > end - matchlen) + return 1; + string = end - matchlen; + wild = 0; + } + break; + } + /* + * skip forward one character in pattern by doing a + * dummy lookup. + */ + matchchar(&p, ' '); + } + + /* not enough char to match */ + if (string > end - matchlen) + return 1; + + if (*p == '\0') { + if (wild) { + /* match needs to match up to end of segment */ + string = end - matchlen; + wild = 0; + } + } + + while (*pattern != '\0' && *pattern != '*') { + /* eat possible escape char before '/' */ + if (pattern[0] == '\\' && pattern[1] == '/') + pattern++; + if (pattern[0] == '/') + break; + + /* check if there are still characters available to compare */ + if (string >= end) + return 1; + /* Compare one char at a time. */ + if (!matchchar(&pattern, *string++)) + continue; + if (wild) { + /* skip forward one char and restart match */ + string = ++*ss; + pattern = *pp; + /* can it still match? */ + if (string > end - matchlen) + return 1; + } else { + /* failed match */ + return 1; + } + } + + *pp = pattern; + *ss = string; + return 0; +} + +/* + * File matching with the addition of the special '**'. + * Returns 0 on match and !0 for strings that do not match pattern. + */ +int +rmatch(const char *pattern, const char *string, int leading_dir) +{ + const char *segend, *segnext, *mismatch = NULL; + int wild, starstar; + + while (*pattern && *string) { + + /* handle leading '/' first */ + if (pattern[0] == '\\' && pattern[1] == '/') + pattern++; + if (*string == '/' && *pattern == '/') { + string++; + pattern++; + } + + /* match to the next '/' in string */ + segend = strchr(string, '/'); + if (segend == NULL) + segend = strchr(string, '\0'); + + while (*pattern) { + /* + * Check for '*' and '**'. For '*' reduce '*' and '?' + * sequences into n-'?' and trailing '*'. + * For '**' this optimisation can not be done + * since '**???/' will match 'a/aa/aaa/' but not + * 'a/aa/aa/' still additional '*' will be reduced. + */ + wild = 0; + starstar = 0; + for ( ; *pattern == '*' || *pattern == '?'; pattern++) { + if (pattern[0] == '*') { + if (pattern[1] == '*') { + starstar = 1; + pattern++; + } + wild = 1; + } else if (!starstar) { /* pattern[0] == '?' */ + if (string < segend && *string != '/') + string++; + else + /* no match possible */ + return 1; + } else + break; + } + + /* pattern ends in '**' so it is a match */ + if (starstar && *pattern == '\0') + return 0; + + if (starstar) { + segnext = segend; + mismatch = pattern; + } + + while (string < segend) { + if (matchsub(&pattern, &string, segend, wild)) { +failed_match: + /* + * failed to match, if starstar retry + * with the next segment. + */ + if (mismatch) { + pattern = mismatch; + wild = 1; + string = segnext; + if (*string == '/') + string++; + segend = strchr(string, '/'); + if (!segend) + segend = strchr(string, + '\0'); + segnext = segend; + if (string < segend) + continue; + } + /* no match possible */ + return 1; + } + break; + } + + /* at end of string segment, eat up any extra '*' */ + if (string >= segend && *pattern != '*') + break; + } + if (*string != '\0' && *string != '/') + goto failed_match; + if (*pattern != '\0' && *pattern != '/') + goto failed_match; + } + + /* if both pattern and string are consumed it was a match */ + if (*pattern == '\0' && *string == '\0') + return 0; + /* if leading_dir is set then string can also be '/' for success */ + if (leading_dir && *pattern == '\0' && *string == '/') + return 0; + /* else failure */ + return 1; +} diff --git a/usr.bin/rsync/rules.c b/usr.bin/rsync/rules.c new file mode 100644 index 00000000000..c34e7d94365 --- /dev/null +++ b/usr.bin/rsync/rules.c @@ -0,0 +1,479 @@ +#include <err.h> +#include <stdlib.h> +#include <stdio.h> +#include <string.h> + +#include "extern.h" + +struct rule { + char *pattern; + enum rule_type type; +#ifdef NOTYET + unsigned int modifiers; +#endif + short numseg; + unsigned char anchored; + unsigned char fileonly; + unsigned char nowild; + unsigned char onlydir; + unsigned char leadingdir; +}; + +static struct rule *rules; +static size_t numrules; /* number of rules */ +static size_t rulesz; /* available size */ + +/* up to protocol 29 filter rules only support - + ! and no modifiers */ + +const struct command { + enum rule_type type; + char sopt; + const char *lopt; +} commands[] = { + { RULE_EXCLUDE, '-', "exclude" }, + { RULE_INCLUDE, '+', "include" }, + { RULE_CLEAR, '!', "clear" }, +#ifdef NOTYET + { RULE_MERGE, '.', "merge" }, + { RULE_DIR_MERGE, ':', "dir-merge" }, + { RULE_SHOW, 'S', "show" }, + { RULE_HIDE, 'H', "hide" }, + { RULE_PROTECT, 'P', "protect" }, + { RULE_RISK, 'R', "risk" }, +#endif + { 0 } +}; + +#ifdef NOTYET +#define MOD_ABSOLUTE 0x0001 +#define MOD_NEGATE 0x0002 +#define MOD_CVSEXCLUDE 0x0004 +#define MOD_SENDING 0x0008 +#define MOD_RECEIVING 0x0010 +#define MOD_PERISHABLE 0x0020 +#define MOD_XATTR 0x0040 +#define MOD_MERGE_EXCLUDE 0x0080 +#define MOD_MERGE_INCLUDE 0x0100 +#define MOD_MERGE_CVSCOMPAT 0x0200 +#define MOD_MERGE_EXCLUDE_FILE 0x0400 +#define MOD_MERGE_NO_INHERIT 0x0800 +#define MOD_MERGE_WORDSPLIT 0x1000 + +/* maybe support absolute and negate */ +const struct modifier { + unsigned int modifier; + char sopt; +} modifiers[] = { + { MOD_ABSOLUTE, '/' }, + { MOD_NEGATE, '!' }, + { MOD_CVSEXCLUDE, 'C' }, + { MOD_SENDING, 's' }, + { MOD_RECEIVING, 'r' }, + { MOD_PERISHABLE, 'p' }, + { MOD_XATTR, 'x' }, + /* for '.' and ':' types */ + { MOD_MERGE_EXCLUDE, '-' }, + { MOD_MERGE_INCLUDE, '+' }, + { MOD_MERGE_CVSCOMPAT, 'C' }, + { MOD_MERGE_EXCLUDE_FILE, 'e' }, + { MOD_MERGE_NO_INHERIT, 'n' }, + { MOD_MERGE_WORDSPLIT, 'w' }, + { 0 } +} +#endif + +static struct rule * +get_next_rule(void) +{ + struct rule *new; + size_t newsz; + + if (++numrules > rulesz) { + if (rulesz == 0) + newsz = 16; + else + newsz = rulesz * 2; + + new = recallocarray(rules, rulesz, newsz, sizeof(*rules)); + if (new == NULL) + err(ERR_NOMEM, NULL); + + rules = new; + rulesz = newsz; + } + + return rules + numrules - 1; +} + +static enum rule_type +parse_command(const char *command, size_t len) +{ + const char *mod; + size_t i; + + mod = memchr(command, ',', len); + if (mod != NULL) { + /* XXX modifiers not yet implemented */ + return RULE_NONE; + } + + for (i = 0; commands[i].type != RULE_NONE; i++) { + if (strncmp(commands[i].lopt, command, len) == 0) + return commands[i].type; + if (len == 1 && commands[i].sopt == *command) + return commands[i].type; + } + + return RULE_NONE; +} + +static void +parse_pattern(struct rule *r, char *pattern) +{ + size_t plen; + char *p; + short nseg = 1; + + /* + * check for / at start and end of pattern both are special and + * can bypass full path matching. + */ + if (*pattern == '/') { + pattern++; + r->anchored = 1; + } + plen = strlen(pattern); + /* + * check for patterns ending in '/' and '/'+'***' and handle them + * specially. Because of this and the check above pattern will never + * start or end with a '/'. + */ + if (plen > 1 && pattern[plen - 1] == '/') { + r->onlydir = 1; + pattern[plen - 1] = '\0'; + } + if (plen > 4 && strcmp(pattern + plen - 4, "/***") == 0) { + r->leadingdir = 1; + pattern[plen - 4] = '\0'; + } + + /* count how many segments the pattern has. */ + for (p = pattern; *p != '\0'; p++) + if (*p == '/') + nseg++; + r->numseg = nseg; + + /* check if this pattern only matches against the basename */ + if (nseg == 1 && !r->anchored) + r->fileonly = 1; + + if (strpbrk(pattern, "*?[") == NULL) { + /* no wildchar matching */ + r->nowild = 1; + } else { + /* requires wildchar matching */ + if (strstr(pattern, "**") != NULL) + r->numseg = -1; + } + + r->pattern = strdup(pattern); + if (r->pattern == NULL) + err(ERR_NOMEM, NULL); +} + +int +parse_rule(char *line, enum rule_type def) +{ + enum rule_type type; + struct rule *r; + char *pattern; + size_t len; + + switch (*line) { + case '#': + case ';': + /* comment */ + return 0; + case '\0': + /* ingore empty lines */ + return 0; + default: + len = strcspn(line, " _"); + type = parse_command(line, len); + if (type == RULE_NONE) { + if (def == RULE_NONE) + return -1; + type = def; + pattern = line; + } else + pattern = line + len + 1; + + if (*pattern == '\0' && type != RULE_CLEAR) + return -1; + if (*pattern != '\0' && type == RULE_CLEAR) + return -1; + break; + } + + r = get_next_rule(); + r->type = type; + parse_pattern(r, pattern); + + return 0; +} + +void +parse_file(const char *file, enum rule_type def, int delim) +{ + FILE *fp; + char *line = NULL; + size_t linesize = 0, linenum = 0; + ssize_t linelen; + + if ((fp = fopen(file, "r")) == NULL) + err(ERR_SYNTAX, "open: %s", file); + + while ((linelen = getdelim(&line, &linesize, delim, fp)) != -1) { + linenum++; + line[linelen - 1] = '\0'; + if (parse_rule(line, def) == -1) + errx(ERR_SYNTAX, "syntax error in %s at entry %zu", + file, linenum); + } + + free(line); + if (ferror(fp)) + err(ERR_SYNTAX, "failed to parse file %s", file); + fclose(fp); +} + +static const char * +send_command(struct rule *r) +{ + static char buf[16]; + char *b = buf; + char *ep = buf + sizeof(buf); + + switch (r->type) { + case RULE_EXCLUDE: + *b++ = '-'; + break; + case RULE_INCLUDE: + *b++ = '+'; + break; + case RULE_CLEAR: + *b++ = '!'; + break; +#ifdef NOTYET + case RULE_MERGE: + *b++ = '.'; + break; + case RULE_DIR_MERGE: + *b++ = ':'; + break; + case RULE_SHOW: + *b++ = 'S'; + break; + case RULE_HIDE: + *b++ = 'H'; + break; + case RULE_PROTECT: + *b++ = 'P'; + break; + case RULE_RISK: + *b++ = 'R'; + break; +#endif + default: + err(ERR_SYNTAX, "unknown rule type %d", r->type); + } + +#ifdef NOTYET + for (i = 0; modifiers[i].modifier != 0; i++) { + if (rule->modifiers & modifiers[i].modifier) + *b++ = modifiers[i].sopt; + if (b >= ep - 3) + err(ERR_SYNTAX, "rule modifiers overflow"); + } +#endif + if (b >= ep - 3) + err(ERR_SYNTAX, "rule prefix overflow"); + *b++ = ' '; + + /* include the stripped root '/' for anchored patterns */ + if (r->anchored) + *b++ = '/'; + *b++ = '\0'; + return buf; +} + +static const char * +postfix_command(struct rule *r) +{ + static char buf[8]; + + buf[0] = '\0'; + if (r->onlydir) + strlcpy(buf, "/", sizeof(buf)); + if (r->leadingdir) + strlcpy(buf, "/***", sizeof(buf)); + + return buf; +} + +void +send_rules(struct sess *sess, int fd) +{ + const char *cmd; + const char *postfix; + struct rule *r; + size_t cmdlen, len, postlen, i; + + for (i = 0; i < numrules; i++) { + r = &rules[i]; + cmd = send_command(r); + if (cmd == NULL) + err(ERR_PROTOCOL, + "rules are incompatible with remote rsync"); + postfix = postfix_command(r); + cmdlen = strlen(cmd); + len = strlen(r->pattern); + postlen = strlen(postfix); + + if (!io_write_int(sess, fd, cmdlen + len + postlen)) + err(ERR_SOCK_IO, "send rules"); + if (!io_write_buf(sess, fd, cmd, cmdlen)) + err(ERR_SOCK_IO, "send rules"); + if (!io_write_buf(sess, fd, r->pattern, len)) + err(ERR_SOCK_IO, "send rules"); + /* include the '/' stripped by onlydir */ + if (postlen > 0) + if (!io_write_buf(sess, fd, postfix, postlen)) + err(ERR_SOCK_IO, "send rules"); + } + + if (!io_write_int(sess, fd, 0)) + err(ERR_SOCK_IO, "send rules"); +} + +void +recv_rules(struct sess *sess, int fd) +{ + char line[8192]; + size_t len; + + do { + if (!io_read_size(sess, fd, &len)) + err(ERR_SOCK_IO, "receive rules"); + + if (len == 0) + return; + if (len >= sizeof(line) - 1) + errx(ERR_SOCK_IO, "received rule too long"); + if (!io_read_buf(sess, fd, line, len)) + err(ERR_SOCK_IO, "receive rules"); + line[len] = '\0'; + if (parse_rule(line, RULE_NONE) == -1) + errx(ERR_PROTOCOL, "syntax error in received rules"); + } while (1); +} + +static inline int +rule_matched(struct rule *r) +{ + /* TODO apply negation once modifiers are added */ + + if (r->type == RULE_EXCLUDE) + return -1; + else + return 1; +} + +int +rules_match(const char *path, int isdir) +{ + const char *basename, *p = NULL; + struct rule *r; + size_t i; + + basename = strrchr(path, '/'); + if (basename != NULL) + basename += 1; + else + basename = path; + + for (i = 0; i < numrules; i++) { + r = &rules[i]; + + if (r->onlydir && !isdir) + continue; + + if (r->nowild) { + /* fileonly and anchored are mutually exclusive */ + if (r->fileonly) { + if (strcmp(basename, r->pattern) == 0) + return rule_matched(r); + } else if (r->anchored) { + /* + * assumes that neither path nor pattern + * start with a '/'. + */ + if (strcmp(path, r->pattern) == 0) + return rule_matched(r); + } else if (r->leadingdir) { + size_t plen = strlen(r->pattern); + + p = strstr(path, r->pattern); + /* + * match from start or dir boundary also + * match to end or to dir boundary + */ + if (p != NULL && (p == path || p[-1] == '/') && + (p[plen] == '\0' || p[plen] == '/')) + return rule_matched(r); + } else { + size_t len = strlen(path); + size_t plen = strlen(r->pattern); + + if (len >= plen && strcmp(path + len - plen, + r->pattern) == 0) { + /* match all or start on dir boundary */ + if (len == plen || + path[len - plen - 1] == '/') + return rule_matched(r); + } + } + } else { + if (r->fileonly) { + p = basename; + } else if (r->anchored || r->numseg == -1) { + /* full path matching */ + p = path; + } else { + short nseg = 1; + + /* match against the last numseg elements */ + for (p = path; *p != '\0'; p++) + if (*p == '/') + nseg++; + if (nseg < r->numseg) { + p = NULL; + } else { + nseg -= r->numseg; + for (p = path; *p != '\0' && nseg > 0; + p++) { + if (*p == '/') + nseg--; + } + } + } + + if (p != NULL) { + if (rmatch(r->pattern, p, r->leadingdir) == 0) + return rule_matched(r); + } + } + } + + return 0; +} diff --git a/usr.bin/rsync/sender.c b/usr.bin/rsync/sender.c index 014d91ae411..e2999aa2589 100644 --- a/usr.bin/rsync/sender.c +++ b/usr.bin/rsync/sender.c @@ -1,4 +1,4 @@ -/* $OpenBSD: sender.c,v 1.29 2021/06/30 13:10:04 claudio Exp $ */ +/* $OpenBSD: sender.c,v 1.30 2021/08/29 13:43:46 claudio Exp $ */ /* * Copyright (c) 2019 Kristaps Dzonsons <kristaps@bsd.lv> * @@ -358,7 +358,7 @@ rsync_sender(struct sess *sess, int fdin, { struct flist *fl = NULL; const struct flist *f; - size_t i, flsz = 0, phase = 0, excl; + size_t i, flsz = 0, phase = 0; int rc = 0, c; int32_t idx; struct pollfd pfd[3]; @@ -393,12 +393,8 @@ rsync_sender(struct sess *sess, int fdin, } /* Client sends zero-length exclusions if deleting. */ - - if (!sess->opts->server && sess->opts->del && - !io_write_int(sess, fdout, 0)) { - ERRX1("io_write_int"); - goto out; - } + if (!sess->opts->server && sess->opts->del) + send_rules(sess, fdout); /* * Then the file list in any mode. @@ -427,15 +423,8 @@ rsync_sender(struct sess *sess, int fdin, * This is always 0 for now. */ - if (sess->opts->server) { - if (!io_read_size(sess, fdin, &excl)) { - ERRX1("io_read_size"); - goto out; - } else if (excl != 0) { - ERRX1("exclusion list is non-empty"); - goto out; - } - } + if (sess->opts->server) + recv_rules(sess, fdin); /* * Set up our poll events. |