src - OpenBSD base system

diff options


context:
space:
mode:

author	Claudio Jeker <claudio@cvs.openbsd.org>	2021-08-29 13:43:47 +0000
committer	Claudio Jeker <claudio@cvs.openbsd.org>	2021-08-29 13:43:47 +0000
commit	653dedf334c7d9a9cf5efe2be77b9b4b617f943b (patch)
tree	82c6bb333270eb85079e2309c1c8a5814f02b9f1 /usr.bin
parent	409b89b45f65cc2d68727d1e0f9ca26b0e8bc541 (diff)

Implement --exclude/exclude-file and --include/include-file.

Currently only simple include and excludes work, the advanced filters introduced later in rsync are not implemented. It is unclear if the per directory filters are something we want to implement. This requires more modern protocols which openrsync is not able to handle right now. This adds a special matching function to allow the ** matching which behaves mostly like rsyncs version with the exception of how bad [] patterns are expanded. For bad patterns openrsync follows more how fnmatch behaves and not the somewhat strange rsync behaviour. Not perfect but committing now so people can test and provide feedback.

Diffstat (limited to 'usr.bin')

-rw-r--r--

usr.bin/rsync/Makefile

-rw-r--r--

usr.bin/rsync/charclass.h

-rw-r--r--

usr.bin/rsync/extern.h

-rw-r--r--

usr.bin/rsync/flist.c

-rw-r--r--

usr.bin/rsync/main.c

-rw-r--r--

usr.bin/rsync/receiver.c

-rw-r--r--

usr.bin/rsync/rmatch.c

395

-rw-r--r--

usr.bin/rsync/rules.c

479

-rw-r--r--

usr.bin/rsync/sender.c

9 files changed, 1015 insertions, 41 deletions

diff --git a/usr.bin/rsync/Makefile b/usr.bin/rsync/Makefile
index d7af8bd0a87..f2e4d460c57 100644
--- a/usr.bin/rsync/Makefile
+++ b/usr.bin/rsync/Makefile

@@ -1,9 +1,9 @@

-# $OpenBSD: Makefile,v 1.10 2019/05/08 21:30:11 benno Exp $

+# $OpenBSD: Makefile,v 1.11 2021/08/29 13:43:46 claudio Exp $

PROG= openrsync

SRCS= blocks.c client.c downloader.c fargs.c flist.c hash.c ids.c \

- io.c log.c mkpath.c mktemp.c receiver.c sender.c server.c session.c \

- socket.c symlinks.c uploader.c main.c misc.c

+ io.c log.c main.c misc.c mkpath.c mktemp.c receiver.c rmatch.c \

+ rules.c sender.c server.c session.c socket.c symlinks.c uploader.c

LDADD+= -lcrypto -lm

DPADD+= ${LIBCRYPTO} ${LIBM}

MAN= openrsync.1

diff --git a/usr.bin/rsync/charclass.h b/usr.bin/rsync/charclass.h
new file mode 100644
index 00000000000..1c5ff7ee044
--- /dev/null
+++ b/usr.bin/rsync/charclass.h

@@ -0,0 +1,29 @@

+/*

+ * Public domain, 2008, Todd C. Miller <millert@openbsd.org>

+ *

+ * $OpenBSD: charclass.h,v 1.1 2021/08/29 13:43:46 claudio Exp $

+ */

+/*

+ * POSIX character class support for fnmatch() and glob().

+ */

+static const struct cclass {

+ const char *name;

+ int (*isctype)(int);

+} cclasses[] = {

+ { "alnum", isalnum },

+ { "alpha", isalpha },

+ { "blank", isblank },

+ { "cntrl", iscntrl },

+ { "digit", isdigit },

+ { "graph", isgraph },

+ { "lower", islower },

+ { "print", isprint },

+ { "punct", ispunct },

+ { "space", isspace },

+ { "upper", isupper },

+ { "xdigit", isxdigit },

+ { NULL, NULL }

+};

+#define NCCLASSES (sizeof(cclasses) / sizeof(cclasses[0]) - 1)

diff --git a/usr.bin/rsync/extern.h b/usr.bin/rsync/extern.h
index d40188d1ebd..4681d47fa24 100644
--- a/usr.bin/rsync/extern.h
+++ b/usr.bin/rsync/extern.h

@@ -1,4 +1,4 @@

-/* $OpenBSD: extern.h,v 1.39 2021/06/30 15:24:10 claudio Exp $ */

+/* $OpenBSD: extern.h,v 1.40 2021/08/29 13:43:46 claudio Exp $ */

@@ -131,12 +131,28 @@ struct opts {

int no_motd; /* --no-motd */

int numeric_ids; /* --numeric-ids */

int one_file_system; /* -x */

+ int from0; /* -0 */

char *rsync_path; /* --rsync-path */

char *ssh_prog; /* --rsh or -e */

char *port; /* --port */

char *address; /* --address */

};

+enum rule_type {

+ RULE_NONE,

+ RULE_EXCLUDE,

+ RULE_INCLUDE,

+ RULE_CLEAR,

+#ifdef NOTYET

+ RULE_MERGE,

+ RULE_DIR_MERGE,

+ RULE_SHOW,

+ RULE_HIDE,

+ RULE_PROTECT,

+ RULE_RISK,

+#endif

+};

* An individual block description for a file.

* See struct blkset.

@@ -362,6 +378,14 @@ char *mkstempnodat(int, char *, mode_t, dev_t);

char *mkstempsock(const char *, char *);

int mktemplate(char **, const char *, int);

+int parse_rule(char *line, enum rule_type);

+void parse_file(const char *, enum rule_type, int);

+void send_rules(struct sess *, int);

+void recv_rules(struct sess *, int);

+int rules_match(const char *, int);

+int rmatch(const char *, const char *, int);

char *symlink_read(const char *);

char *symlinkat_read(int, const char *);

diff --git a/usr.bin/rsync/flist.c b/usr.bin/rsync/flist.c
index e33f51b16d4..86cde1dd3d3 100644
--- a/usr.bin/rsync/flist.c
+++ b/usr.bin/rsync/flist.c

@@ -1,4 +1,4 @@

-/* $OpenBSD: flist.c,v 1.32 2021/06/30 13:10:04 claudio Exp $ */

+/* $OpenBSD: flist.c,v 1.33 2021/08/29 13:43:46 claudio Exp $ */

@@ -823,6 +823,11 @@ flist_gen_dirent(struct sess *sess, char *root, struct flist **fl, size_t *sz,

ERR("%s: lstat", root);

return 0;

} else if (S_ISREG(st.st_mode)) {

+ /* filter files */

+ if (rules_match(root, 0) == -1) {

+ WARNX("%s: skipping excluded file", root);

+ return 1;

+ }

if (!flist_realloc(fl, sz, max)) {

ERRX1("flist_realloc");

return 0;

@@ -839,7 +844,13 @@ flist_gen_dirent(struct sess *sess, char *root, struct flist **fl, size_t *sz,

if (!sess->opts->preserve_links) {

WARNX("%s: skipping symlink", root);

return 1;

- } else if (!flist_realloc(fl, sz, max)) {

+ }

+ /* filter files */

+ if (rules_match(root, 0) == -1) {

+ WARNX("%s: skipping excluded symlink", root);

+ return 1;

+ }

+ if (!flist_realloc(fl, sz, max)) {

ERRX1("flist_realloc");

return 0;

}

@@ -942,6 +953,15 @@ flist_gen_dirent(struct sess *sess, char *root, struct flist **fl, size_t *sz,

nxdev++;

}

+ /* filter files */

+ if (rules_match(ent->fts_path + stripdir,

+ (ent->fts_info == FTS_D)) == -1) {

+ WARNX("%s: skipping excluded file",

+ ent->fts_path + stripdir);

+ fts_set(fts, ent, FTS_SKIP);

+ continue;

+ }

/* Allocate a new file entry. */

if (!flist_realloc(fl, sz, max)) {

@@ -1073,6 +1093,11 @@ flist_gen_files(struct sess *sess, size_t argc, char **argv,

continue;

}

+ /* filter files */

+ if (rules_match(argv[i], S_ISDIR(st.st_mode)) == -1) {

+ WARNX("%s: skipping excluded file", argv[i]);

+ continue;

+ }

f = &fl[flsz++];

assert(f != NULL);

@@ -1297,6 +1322,16 @@ flist_gen_dels(struct sess *sess, const char *root, struct flist **fl,

continue;

}

+ /* filter files on delete */

+ /* TODO handle --delete-excluded */

+ if (rules_match(ent->fts_path + stripdir,

+ (ent->fts_info == FTS_D)) == -1) {

+ WARNX("skip excluded file %s",

+ ent->fts_path + stripdir);

+ fts_set(fts, ent, FTS_SKIP);

+ continue;

+ }

/* Look up in hashtable. */

memset(&hent, 0, sizeof(ENTRY));

diff --git a/usr.bin/rsync/main.c b/usr.bin/rsync/main.c
index cb4a03488e5..31174622fb4 100644
--- a/usr.bin/rsync/main.c
+++ b/usr.bin/rsync/main.c

@@ -1,4 +1,4 @@

-/* $OpenBSD: main.c,v 1.56 2021/07/14 11:14:27 claudio Exp $ */

+/* $OpenBSD: main.c,v 1.57 2021/08/29 13:43:46 claudio Exp $ */

@@ -276,6 +276,10 @@ static struct opts opts;

#define OP_RSYNCPATH 1002

#define OP_TIMEOUT 1003

#define OP_VERSION 1004

+#define OP_EXCLUDE 1005

+#define OP_INCLUDE 1006

+#define OP_EXCLUDE_FROM 1007

+#define OP_INCLUDE_FROM 1008

const struct option lopts[] = {

{ "address", required_argument, NULL, OP_ADDRESS },

@@ -286,9 +290,15 @@ const struct option lopts[] = {

{ "devices", no_argument, &opts.devices, 1 },

{ "no-devices", no_argument, &opts.devices, 0 },

{ "dry-run", no_argument, &opts.dry_run, 1 },

+ { "exclude", required_argument, NULL, OP_EXCLUDE },

+ { "exclude-from", required_argument, NULL, OP_EXCLUDE_FROM },

+ { "from0", no_argument, NULL, '0' },

+ { "no-from0", no_argument, &opts.from0, 0 },

{ "group", no_argument, &opts.preserve_gids, 1 },

{ "no-group", no_argument, &opts.preserve_gids, 0 },

{ "help", no_argument, NULL, 'h' },

+ { "include", required_argument, NULL, OP_INCLUDE },

+ { "include-from", required_argument, NULL, OP_INCLUDE_FROM },

{ "links", no_argument, &opts.preserve_links, 1 },

{ "no-links", no_argument, &opts.preserve_links, 0 },

{ "no-motd", no_argument, &opts.no_motd, 1 },

@@ -324,6 +334,7 @@ main(int argc, char *argv[])

struct fargs *fargs;

char **args;

const char *errstr;

/* Global pledge. */

if (pledge("stdio unix rpath wpath cpath dpath inet fattr chown dns getpw proc exec unveil",

@@ -333,6 +344,9 @@ main(int argc, char *argv[])

while ((c = getopt_long(argc, argv, "Dae:ghlnoprtvxz", lopts, NULL))

!= -1) {

switch (c) {

+ case '0':

+ opts.from0 = 1;

+ break;

case 'D':

opts.devices = 1;

opts.specials = 1;

@@ -398,6 +412,24 @@ main(int argc, char *argv[])

errx(ERR_SYNTAX, "timeout is %s: %s",

errstr, optarg);

break;

+ case OP_EXCLUDE:

+ if (parse_rule(optarg, RULE_EXCLUDE) == -1)

+ errx(ERR_SYNTAX, "syntax error in exclude: %s",

+ optarg);

+ break;

+ case OP_INCLUDE:

+ if (parse_rule(optarg, RULE_INCLUDE) == -1)

+ errx(ERR_SYNTAX, "syntax error in include: %s",

+ optarg);

+ break;

+ case OP_EXCLUDE_FROM:

+ parse_file(optarg, RULE_EXCLUDE,

+ opts.from0 ? '\0' : '\n' );

+ break;

+ case OP_INCLUDE_FROM:

+ parse_file(optarg, RULE_INCLUDE,

+ opts.from0 ? '\0' : '\n' );

+ break;

case OP_VERSION:

fprintf(stderr, "openrsync: protocol version %u\n",

RSYNC_PROTOCOL);

diff --git a/usr.bin/rsync/receiver.c b/usr.bin/rsync/receiver.c
index c731211070f..6e5b01670cd 100644
--- a/usr.bin/rsync/receiver.c
+++ b/usr.bin/rsync/receiver.c

@@ -1,4 +1,4 @@

-/* $OpenBSD: receiver.c,v 1.28 2021/06/30 13:10:04 claudio Exp $ */

+/* $OpenBSD: receiver.c,v 1.29 2021/08/29 13:43:46 claudio Exp $ */

@@ -172,7 +172,7 @@ int

rsync_receiver(struct sess *sess, int fdin, int fdout, const char *root)

{

struct flist *fl = NULL, *dfl = NULL;

- size_t i, flsz = 0, dflsz = 0, excl;

+ size_t i, flsz = 0, dflsz = 0;

char *tofree;

int rc = 0, dfd = -1, phase = 0, c;

int32_t ioerror;

@@ -184,22 +184,13 @@ rsync_receiver(struct sess *sess, int fdin, int fdout, const char *root)

if (pledge("stdio unix rpath wpath cpath dpath fattr chown getpw unveil", NULL) == -1)

err(ERR_IPC, "pledge");

- /* Client sends zero-length exclusions. */

+ /* Client sends exclusions. */

+ if (!sess->opts->server)

+ send_rules(sess, fdout);

- if (!sess->opts->server && !io_write_int(sess, fdout, 0)) {

- ERRX1("io_write_int");

- goto out;

- }

- if (sess->opts->server && sess->opts->del) {

- if (!io_read_size(sess, fdin, &excl)) {

- ERRX1("io_read_size");

- goto out;

- } else if (excl != 0) {

- ERRX("exclusion list is non-empty");

- goto out;

- }

+ /* Server receives exclusions if delete is on. */

+ if (sess->opts->server && sess->opts->del)

+ recv_rules(sess, fdin);

* Start by receiving the file list and our mystery number.

diff --git a/usr.bin/rsync/rmatch.c b/usr.bin/rsync/rmatch.c
new file mode 100644
index 00000000000..b037b80b9b6
--- /dev/null
+++ b/usr.bin/rsync/rmatch.c

@@ -0,0 +1,395 @@

+/* $OpenBSD: rmatch.c,v 1.1 2021/08/29 13:43:46 claudio Exp $ */

+/*

+ *

+ * Permission to use, copy, modify, and distribute this software for any

+ * purpose with or without fee is hereby granted, provided that the above

+ * copyright notice and this permission notice appear in all copies.

+ *

+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES

+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF

+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR

+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES

+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN

+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF

+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.

+ */

+/*

+ *

+ * This code is derived from software contributed to Berkeley by

+ * Guido van Rossum.

+ *

+ * Redistribution and use in source and binary forms, with or without

+ * modification, are permitted provided that the following conditions

+ * are met:

+ * 1. Redistributions of source code must retain the above copyright

+ * notice, this list of conditions and the following disclaimer.

+ * 2. Redistributions in binary form must reproduce the above copyright

+ * notice, this list of conditions and the following disclaimer in the

+ * documentation and/or other materials provided with the distribution.

+ * 3. Neither the name of the University nor the names of its contributors

+ * may be used to endorse or promote products derived from this software

+ * without specific prior written permission.

+ *

+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND

+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE

+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE

+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE

+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL

+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS

+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)

+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT

+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY

+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF

+ * SUCH DAMAGE.

+ */

+#include <ctype.h>

+#include <stdio.h>

+#include <string.h>

+#include <limits.h>

+#include "charclass.h"

+#define RANGE_MATCH 1

+#define RANGE_NOMATCH 0

+#define RANGE_ERROR (-1)

+static int

+classmatch(const char *pattern, char test, const char **ep)

+ const char *mismatch = pattern;

+ const struct cclass *cc;

+ const char *colon;

+ size_t len;

+ int rval = RANGE_NOMATCH;

+ if (*pattern++ != ':') {

+ *ep = mismatch;

+ return RANGE_ERROR;

+ }

+ if ((colon = strchr(pattern, ':')) == NULL || colon[1] != ']') {

+ *ep = mismatch;

+ return RANGE_ERROR;

+ }

+ *ep = colon + 2;

+ len = (size_t)(colon - pattern);

+ for (cc = cclasses; cc->name != NULL; cc++) {

+ if (!strncmp(pattern, cc->name, len) && cc->name[len] == '\0') {

+ if (cc->isctype((unsigned char)test))

+ rval = RANGE_MATCH;

+ return rval;

+ }

+ /* invalid character class, treat as normal text */

+ *ep = mismatch;

+ return RANGE_ERROR;

+static int

+rangematch(const char **pp, char test)

+ const char *pattern = *pp;

+ int negate, ok;

+ char c, c2;

+ /*

+ * A bracket expression starting with an unquoted circumflex

+ * character produces unspecified results (IEEE 1003.2-1992,

+ * 3.13.2). This implementation treats it like '!', for

+ * consistency with the regular expression syntax.

+ * J.T. Conklin (conklin@ngai.kaleida.com)

+ */

+ if ((negate = (*pattern == '!' || *pattern == '^')))

+ ++pattern;

+ /*

+ * A right bracket shall lose its special meaning and represent

+ * itself in a bracket expression if it occurs first in the list.

+ * -- POSIX.2 2.8.3.2

+ */

+ ok = 0;

+ c = *pattern++;

+ do {

+ if (c == '[') {

+ switch (classmatch(pattern, test, &pattern)) {

+ case RANGE_MATCH:

+ ok = 1;

+ continue;

+ case RANGE_NOMATCH:

+ continue;

+ default:

+ /* invalid character class, treat litterally. */

+ break;

+ }

+ if (c == '\\')

+ c = *pattern++;

+ if (c == '\0')

+ return RANGE_ERROR;

+ /* patterns can not match on '/' */

+ if (c == '/')

+ return RANGE_NOMATCH;

+ if (*pattern == '-'

+ && (c2 = *(pattern + 1)) != '\0' && c2 != ']') {

+ pattern += 2;

+ if (c2 == '\\')

+ c2 = *pattern++;

+ if (c2 == '\0')

+ return RANGE_ERROR;

+ if (c <= test && test <= c2)

+ ok = 1;

+ } else if (c == test)

+ ok = 1;

+ } while ((c = *pattern++) != ']');

+ *pp = pattern;

+ return (ok == negate ? RANGE_NOMATCH : RANGE_MATCH);

+/*

+ * Single character match, advances pattern as much as needed.

+ * Return 0 on match and !0 (aka 1) on missmatch.

+ * When matched pp is advanced to the end of the pattern matched.

+ */

+static int

+matchchar(const char **pp, const char in)

+ const char *pattern = *pp;

+ char c;

+ int rv = 0;

+ switch (c = *pattern++) {

+ case '?':

+ if (in == '\0')

+ rv = 1;

+ if (in == '/')

+ rv = 1;

+ break;

+ case '[':

+ if (in == '\0')

+ rv = 1;

+ if (in == '/')

+ rv = 1;

+ if (rv == 1)

+ break;

+ switch (rangematch(&pattern, in)) {

+ case RANGE_ERROR:

+ /* not a good range, treat as normal text */

+ goto normal;

+ case RANGE_MATCH:

+ break;

+ case RANGE_NOMATCH:

+ rv = 1;

+ }

+ break;

+ case '\\':

+ if ((c = *pattern++) == '\0') {

+ c = '\\';

+ --pattern;

+ }

+ /* FALLTHROUGH */

+ default:

+ normal:

+ if (c != in)

+ rv = 1;

+ break;

+ }

+ *pp = pattern;

+ return rv;

+/*

+ * Do a substring match. If wild is set then the pattern started with a '*'.

+ * The match will go until '*', '/' or '\0' is encountered in pattern or

+ * the input string is consumed up to end.

+ * The pattern and string handles pp and ss are updated only on success.

+ */

+static int

+matchsub(const char **pp, const char **ss, const char *end, int wild)

+ const char *pattern = *pp;

+ const char *p = pattern;

+ const char *string = *ss;

+ size_t matchlen;

+ /* first calculate how many characters the submatch will consume */

+ for (matchlen = 0; *p != '\0'; matchlen++) {

+ if (p[0] == '*')

+ break;

+ /* '/' acts as barrier */

+ if (p[0] == '/' || (p[0] == '\\' && p[1] == '/')) {

+ if (wild) {

+ /* match needs to match up to end of segment */

+ if (string > end - matchlen)

+ return 1;

+ string = end - matchlen;

+ wild = 0;

+ }

+ break;

+ }

+ /*

+ * skip forward one character in pattern by doing a

+ * dummy lookup.

+ */

+ matchchar(&p, ' ');

+ }

+ /* not enough char to match */

+ if (string > end - matchlen)

+ return 1;

+ if (*p == '\0') {

+ if (wild) {

+ /* match needs to match up to end of segment */

+ string = end - matchlen;

+ wild = 0;

+ }

+ while (*pattern != '\0' && *pattern != '*') {

+ /* eat possible escape char before '/' */

+ if (pattern[0] == '\\' && pattern[1] == '/')

+ pattern++;

+ if (pattern[0] == '/')

+ break;

+ /* check if there are still characters available to compare */

+ if (string >= end)

+ return 1;

+ /* Compare one char at a time. */

+ if (!matchchar(&pattern, *string++))

+ continue;

+ if (wild) {

+ /* skip forward one char and restart match */

+ string = ++*ss;

+ pattern = *pp;

+ /* can it still match? */

+ if (string > end - matchlen)

+ return 1;

+ } else {

+ /* failed match */

+ return 1;

+ }

+ *pp = pattern;

+ *ss = string;

+ return 0;

+/*

+ * File matching with the addition of the special '**'.

+ * Returns 0 on match and !0 for strings that do not match pattern.

+ */

+int

+rmatch(const char *pattern, const char *string, int leading_dir)

+ const char *segend, *segnext, *mismatch = NULL;

+ int wild, starstar;

+ while (*pattern && *string) {

+ /* handle leading '/' first */

+ if (pattern[0] == '\\' && pattern[1] == '/')

+ pattern++;

+ if (*string == '/' && *pattern == '/') {

+ string++;

+ pattern++;

+ }

+ /* match to the next '/' in string */

+ segend = strchr(string, '/');

+ if (segend == NULL)

+ segend = strchr(string, '\0');

+ while (*pattern) {

+ /*

+ * Check for '*' and '**'. For '*' reduce '*' and '?'

+ * sequences into n-'?' and trailing '*'.

+ * For '**' this optimisation can not be done

+ * since '**???/' will match 'a/aa/aaa/' but not

+ * 'a/aa/aa/' still additional '*' will be reduced.

+ */

+ wild = 0;

+ starstar = 0;

+ for ( ; *pattern == '*' || *pattern == '?'; pattern++) {

+ if (pattern[0] == '*') {

+ if (pattern[1] == '*') {

+ starstar = 1;

+ pattern++;

+ }

+ wild = 1;

+ } else if (!starstar) { /* pattern[0] == '?' */

+ if (string < segend && *string != '/')

+ string++;

+ else

+ /* no match possible */

+ return 1;

+ } else

+ break;

+ }

+ /* pattern ends in '**' so it is a match */

+ if (starstar && *pattern == '\0')

+ return 0;

+ if (starstar) {

+ segnext = segend;

+ mismatch = pattern;

+ }

+ while (string < segend) {

+ if (matchsub(&pattern, &string, segend, wild)) {

+failed_match:

+ /*

+ * failed to match, if starstar retry

+ * with the next segment.

+ */

+ if (mismatch) {

+ pattern = mismatch;

+ wild = 1;

+ string = segnext;

+ if (*string == '/')

+ string++;

+ segend = strchr(string, '/');

+ if (!segend)

+ segend = strchr(string,

+ '\0');

+ segnext = segend;

+ if (string < segend)

+ continue;

+ }

+ /* no match possible */

+ return 1;

+ }

+ break;

+ }

+ /* at end of string segment, eat up any extra '*' */

+ if (string >= segend && *pattern != '*')

+ break;

+ }

+ if (*string != '\0' && *string != '/')

+ goto failed_match;

+ if (*pattern != '\0' && *pattern != '/')

+ goto failed_match;

+ }

+ /* if both pattern and string are consumed it was a match */

+ if (*pattern == '\0' && *string == '\0')

+ return 0;

+ /* if leading_dir is set then string can also be '/' for success */

+ if (leading_dir && *pattern == '\0' && *string == '/')

+ return 0;

+ /* else failure */

+ return 1;

diff --git a/usr.bin/rsync/rules.c b/usr.bin/rsync/rules.c
new file mode 100644
index 00000000000..c34e7d94365
--- /dev/null
+++ b/usr.bin/rsync/rules.c

@@ -0,0 +1,479 @@

+#include <err.h>

+#include <stdlib.h>

+#include <stdio.h>

+#include <string.h>

+#include "extern.h"

+struct rule {

+ char *pattern;

+ enum rule_type type;

+#ifdef NOTYET

+ unsigned int modifiers;

+#endif

+ short numseg;

+ unsigned char anchored;

+ unsigned char fileonly;

+ unsigned char nowild;

+ unsigned char onlydir;

+ unsigned char leadingdir;

+};

+static struct rule *rules;

+static size_t numrules; /* number of rules */

+static size_t rulesz; /* available size */

+/* up to protocol 29 filter rules only support - + ! and no modifiers */

+const struct command {

+ enum rule_type type;

+ char sopt;

+ const char *lopt;

+} commands[] = {

+ { RULE_EXCLUDE, '-', "exclude" },

+ { RULE_INCLUDE, '+', "include" },

+ { RULE_CLEAR, '!', "clear" },

+#ifdef NOTYET

+ { RULE_MERGE, '.', "merge" },

+ { RULE_DIR_MERGE, ':', "dir-merge" },

+ { RULE_SHOW, 'S', "show" },

+ { RULE_HIDE, 'H', "hide" },

+ { RULE_PROTECT, 'P', "protect" },

+ { RULE_RISK, 'R', "risk" },

+#endif

+ { 0 }

+};

+#ifdef NOTYET

+#define MOD_ABSOLUTE 0x0001

+#define MOD_NEGATE 0x0002

+#define MOD_CVSEXCLUDE 0x0004

+#define MOD_SENDING 0x0008

+#define MOD_RECEIVING 0x0010

+#define MOD_PERISHABLE 0x0020

+#define MOD_XATTR 0x0040

+#define MOD_MERGE_EXCLUDE 0x0080

+#define MOD_MERGE_INCLUDE 0x0100

+#define MOD_MERGE_CVSCOMPAT 0x0200

+#define MOD_MERGE_EXCLUDE_FILE 0x0400

+#define MOD_MERGE_NO_INHERIT 0x0800

+#define MOD_MERGE_WORDSPLIT 0x1000

+/* maybe support absolute and negate */

+const struct modifier {

+ unsigned int modifier;

+ char sopt;

+} modifiers[] = {

+ { MOD_ABSOLUTE, '/' },

+ { MOD_NEGATE, '!' },

+ { MOD_CVSEXCLUDE, 'C' },

+ { MOD_SENDING, 's' },

+ { MOD_RECEIVING, 'r' },

+ { MOD_PERISHABLE, 'p' },

+ { MOD_XATTR, 'x' },

+ /* for '.' and ':' types */

+ { MOD_MERGE_EXCLUDE, '-' },

+ { MOD_MERGE_INCLUDE, '+' },

+ { MOD_MERGE_CVSCOMPAT, 'C' },

+ { MOD_MERGE_EXCLUDE_FILE, 'e' },

+ { MOD_MERGE_NO_INHERIT, 'n' },

+ { MOD_MERGE_WORDSPLIT, 'w' },

+ { 0 }

+#endif

+static struct rule *

+get_next_rule(void)

+ struct rule *new;

+ size_t newsz;

+ if (++numrules > rulesz) {

+ if (rulesz == 0)

+ newsz = 16;

+ else

+ newsz = rulesz * 2;

+ new = recallocarray(rules, rulesz, newsz, sizeof(*rules));

+ if (new == NULL)

+ err(ERR_NOMEM, NULL);

+ rules = new;

+ rulesz = newsz;

+ }

+ return rules + numrules - 1;

+static enum rule_type

+parse_command(const char *command, size_t len)

+ const char *mod;

+ size_t i;

+ mod = memchr(command, ',', len);

+ if (mod != NULL) {

+ /* XXX modifiers not yet implemented */

+ return RULE_NONE;

+ }

+ for (i = 0; commands[i].type != RULE_NONE; i++) {

+ if (strncmp(commands[i].lopt, command, len) == 0)

+ return commands[i].type;

+ if (len == 1 && commands[i].sopt == *command)

+ return commands[i].type;

+ }

+ return RULE_NONE;

+static void

+parse_pattern(struct rule *r, char *pattern)

+ size_t plen;

+ char *p;

+ short nseg = 1;

+ /*

+ * check for / at start and end of pattern both are special and

+ * can bypass full path matching.

+ */

+ if (*pattern == '/') {

+ pattern++;

+ r->anchored = 1;

+ }

+ plen = strlen(pattern);

+ /*

+ * check for patterns ending in '/' and '/'+'***' and handle them

+ * specially. Because of this and the check above pattern will never

+ * start or end with a '/'.

+ */

+ if (plen > 1 && pattern[plen - 1] == '/') {

+ r->onlydir = 1;

+ pattern[plen - 1] = '\0';

+ }

+ if (plen > 4 && strcmp(pattern + plen - 4, "/***") == 0) {

+ r->leadingdir = 1;

+ pattern[plen - 4] = '\0';

+ }

+ /* count how many segments the pattern has. */

+ for (p = pattern; *p != '\0'; p++)

+ if (*p == '/')

+ nseg++;

+ r->numseg = nseg;

+ /* check if this pattern only matches against the basename */

+ if (nseg == 1 && !r->anchored)

+ r->fileonly = 1;

+ if (strpbrk(pattern, "*?[") == NULL) {

+ /* no wildchar matching */

+ r->nowild = 1;

+ } else {

+ /* requires wildchar matching */

+ if (strstr(pattern, "**") != NULL)

+ r->numseg = -1;

+ }

+ r->pattern = strdup(pattern);

+ if (r->pattern == NULL)

+ err(ERR_NOMEM, NULL);

+int

+parse_rule(char *line, enum rule_type def)

+ enum rule_type type;

+ struct rule *r;

+ char *pattern;

+ size_t len;

+ switch (*line) {

+ case '#':

+ case ';':

+ /* comment */

+ return 0;

+ case '\0':

+ /* ingore empty lines */

+ return 0;

+ default:

+ len = strcspn(line, " _");

+ type = parse_command(line, len);

+ if (type == RULE_NONE) {

+ if (def == RULE_NONE)

+ return -1;

+ type = def;

+ pattern = line;

+ } else

+ pattern = line + len + 1;

+ if (*pattern == '\0' && type != RULE_CLEAR)

+ return -1;

+ if (*pattern != '\0' && type == RULE_CLEAR)

+ return -1;

+ break;

+ }

+ r = get_next_rule();

+ r->type = type;

+ parse_pattern(r, pattern);

+ return 0;

+void

+parse_file(const char *file, enum rule_type def, int delim)

+ FILE *fp;

+ char *line = NULL;

+ size_t linesize = 0, linenum = 0;

+ ssize_t linelen;

+ if ((fp = fopen(file, "r")) == NULL)

+ err(ERR_SYNTAX, "open: %s", file);

+ while ((linelen = getdelim(&line, &linesize, delim, fp)) != -1) {

+ linenum++;

+ line[linelen - 1] = '\0';

+ if (parse_rule(line, def) == -1)

+ errx(ERR_SYNTAX, "syntax error in %s at entry %zu",

+ file, linenum);

+ }

+ free(line);

+ if (ferror(fp))

+ err(ERR_SYNTAX, "failed to parse file %s", file);

+ fclose(fp);

+static const char *

+send_command(struct rule *r)

+ static char buf[16];

+ char *b = buf;

+ char *ep = buf + sizeof(buf);

+ switch (r->type) {

+ case RULE_EXCLUDE:

+ *b++ = '-';

+ break;

+ case RULE_INCLUDE:

+ *b++ = '+';

+ break;

+ case RULE_CLEAR:

+ *b++ = '!';

+ break;

+#ifdef NOTYET

+ case RULE_MERGE:

+ *b++ = '.';

+ break;

+ case RULE_DIR_MERGE:

+ *b++ = ':';

+ break;

+ case RULE_SHOW:

+ *b++ = 'S';

+ break;

+ case RULE_HIDE:

+ *b++ = 'H';

+ break;

+ case RULE_PROTECT:

+ *b++ = 'P';

+ break;

+ case RULE_RISK:

+ *b++ = 'R';

+ break;

+#endif

+ default:

+ err(ERR_SYNTAX, "unknown rule type %d", r->type);

+ }

+#ifdef NOTYET

+ for (i = 0; modifiers[i].modifier != 0; i++) {

+ if (rule->modifiers & modifiers[i].modifier)

+ *b++ = modifiers[i].sopt;

+ if (b >= ep - 3)

+ err(ERR_SYNTAX, "rule modifiers overflow");

+ }

+#endif

+ if (b >= ep - 3)

+ err(ERR_SYNTAX, "rule prefix overflow");

+ *b++ = ' ';

+ /* include the stripped root '/' for anchored patterns */

+ if (r->anchored)

+ *b++ = '/';

+ *b++ = '\0';

+ return buf;

+static const char *

+postfix_command(struct rule *r)

+ static char buf[8];

+ buf[0] = '\0';

+ if (r->onlydir)

+ strlcpy(buf, "/", sizeof(buf));

+ if (r->leadingdir)

+ strlcpy(buf, "/***", sizeof(buf));

+ return buf;

+void

+send_rules(struct sess *sess, int fd)

+ const char *cmd;

+ const char *postfix;

+ struct rule *r;

+ size_t cmdlen, len, postlen, i;

+ for (i = 0; i < numrules; i++) {

+ r = &rules[i];

+ cmd = send_command(r);

+ if (cmd == NULL)

+ err(ERR_PROTOCOL,

+ "rules are incompatible with remote rsync");

+ postfix = postfix_command(r);

+ cmdlen = strlen(cmd);

+ len = strlen(r->pattern);

+ postlen = strlen(postfix);

+ if (!io_write_int(sess, fd, cmdlen + len + postlen))