summaryrefslogtreecommitdiff
path: root/usr.bin/mandoc/mandocdb.c
diff options
context:
space:
mode:
authorIngo Schwarze <schwarze@cvs.openbsd.org>2020-01-26 21:24:59 +0000
committerIngo Schwarze <schwarze@cvs.openbsd.org>2020-01-26 21:24:59 +0000
commite69c831c243a0bbf18dfbe1290aa68c05aee9acf (patch)
treea7844daa1171f51a3be5169154716765a64595ab /usr.bin/mandoc/mandocdb.c
parent8602a4bf21e1ee4a4488667db57bfd24cfbef78e (diff)
Repair more of the issues that i found in filescan() while investigating
the report from <Andreas dot Kahari at abc dot se> on ports@: For a symlink, use the first of the following names that is available: 1. In -t mode, the symlink itself (unchanged). 2. When the (unresolved) symlink already resides inside the manpath, just strip the manpath and use the rest (unchanged). 3. When prefix(es) of the unresolved symlink point to the manpath, strip the longest such prefix and use the rest (new); this fixes situations where the manpath or one of its parent directories is a symlink and at the same time contains symlinks to manual pages. 4. Fall back to the fully resolved symlink, with the manpath stripped (new); this may for example happen when the command line passes symlinks from outside the manpath that point to manual pages inside the manpath, or if manual page trees contain symlinks to symlinks and not all of them are given on the command line. The fallback (4) isn't perfect. You can construct symlink spaghetti in such a way that this algorithm will not enter all manual page names into the database that a human would be able to deduce. But i do not expect such spaghetti to actually occur in practice (not even in ports), and a full fix would require re-implementing realpath(3) in terms of step-by-step readlink(2) calls, repeating the complicated algorithm (3) after each step. While here, also stop using PATH_MAX as the size of a static buffer in filescan(); on some systems, it can be unreasonably large. Instead, allocate path strings dynamically.
Diffstat (limited to 'usr.bin/mandoc/mandocdb.c')
-rw-r--r--usr.bin/mandoc/mandocdb.c118
1 files changed, 84 insertions, 34 deletions
diff --git a/usr.bin/mandoc/mandocdb.c b/usr.bin/mandoc/mandocdb.c
index a06b488ecb0..7913a131678 100644
--- a/usr.bin/mandoc/mandocdb.c
+++ b/usr.bin/mandoc/mandocdb.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: mandocdb.c,v 1.214 2020/01/26 11:15:49 schwarze Exp $ */
+/* $OpenBSD: mandocdb.c,v 1.215 2020/01/26 21:24:58 schwarze Exp $ */
/*
* Copyright (c) 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv>
* Copyright (c) 2011-2020 Ingo Schwarze <schwarze@openbsd.org>
@@ -744,17 +744,17 @@ treescan(void)
* See treescan() for the fts(3) version of this.
*/
static void
-filescan(const char *file)
+filescan(const char *infile)
{
- char buf[PATH_MAX];
struct stat st;
struct mlink *mlink;
- char *p, *start;
+ char *linkfile, *p, *realdir, *start, *usefile;
+ size_t realdir_len;
assert(use_all);
- if (strncmp(file, "./", 2) == 0)
- file += 2;
+ if (strncmp(infile, "./", 2) == 0)
+ infile += 2;
/*
* We have to do lstat(2) before realpath(3) loses
@@ -763,13 +763,13 @@ filescan(const char *file)
* we want to use the orginal file name, while for
* regular files, we want to use the real path.
*/
- if (lstat(file, &st) == -1) {
+ if (lstat(infile, &st) == -1) {
exitcode = (int)MANDOCLEVEL_BADARG;
- say(file, "&lstat");
+ say(infile, "&lstat");
return;
} else if (S_ISREG(st.st_mode) == 0 && S_ISLNK(st.st_mode) == 0) {
exitcode = (int)MANDOCLEVEL_BADARG;
- say(file, "Not a regular file");
+ say(infile, "Not a regular file");
return;
}
@@ -777,19 +777,20 @@ filescan(const char *file)
* We have to resolve the file name to the real path
* in any case for the base directory check.
*/
- if (realpath(file, buf) == NULL) {
+ if ((usefile = realpath(infile, NULL)) == NULL) {
exitcode = (int)MANDOCLEVEL_BADARG;
- say(file, "&realpath");
+ say(infile, "&realpath");
return;
}
if (op == OP_TEST)
- start = buf;
- else if (strncmp(buf, basedir, basedir_len) == 0)
- start = buf + basedir_len;
+ start = usefile;
+ else if (strncmp(usefile, basedir, basedir_len) == 0)
+ start = usefile + basedir_len;
else {
exitcode = (int)MANDOCLEVEL_BADARG;
- say("", "%s: outside base directory", buf);
+ say("", "%s: outside base directory", infile);
+ free(usefile);
return;
}
@@ -797,25 +798,72 @@ filescan(const char *file)
* Now we are sure the file is inside our tree.
* If it is a symbolic link, ignore the real path
* and use the original name.
- * This implies passing stuff like "cat1/../man1/foo.1"
- * on the command line won't work. So don't do that.
- * Note the stat(2) can still fail if the link target
- * doesn't exist.
*/
- if (S_ISLNK(st.st_mode)) {
- if (stat(buf, &st) == -1) {
+ do {
+ if (S_ISLNK(st.st_mode) == 0)
+ break;
+
+ /*
+ * Some implementations of realpath(3) may succeed
+ * even if the target of the link does not exist,
+ * so check again for extra safety.
+ */
+ if (stat(usefile, &st) == -1) {
exitcode = (int)MANDOCLEVEL_BADARG;
- say(file, "&stat");
+ say(infile, "&stat");
+ free(usefile);
return;
}
- if (strlcpy(buf, file, sizeof(buf)) >= sizeof(buf)) {
- say(file, "Filename too long");
- return;
+ linkfile = mandoc_strdup(infile);
+ if (op == OP_TEST) {
+ free(usefile);
+ start = usefile = linkfile;
+ break;
}
- start = buf;
- if (op != OP_TEST && strncmp(buf, basedir, basedir_len) == 0)
- start += basedir_len;
- }
+ if (strncmp(infile, basedir, basedir_len) == 0) {
+ free(usefile);
+ usefile = linkfile;
+ start = usefile + basedir_len;
+ break;
+ }
+
+ /*
+ * This symbolic link points into the basedir
+ * from the outside. Let's see whether any of
+ * the parent directories resolve to the basedir.
+ */
+ p = strchr(linkfile, '\0');
+ do {
+ while (*--p != '/')
+ continue;
+ *p = '\0';
+ if ((realdir = realpath(linkfile, NULL)) == NULL) {
+ exitcode = (int)MANDOCLEVEL_BADARG;
+ say(infile, "&realpath");
+ free(linkfile);
+ free(usefile);
+ return;
+ }
+ realdir_len = strlen(realdir) + 1;
+ free(realdir);
+ *p = '/';
+ } while (realdir_len > basedir_len);
+
+ /*
+ * If one of the directories resolves to the basedir,
+ * use the rest of the original name.
+ * Otherwise, the best we can do
+ * is to use the filename pointed to.
+ */
+ if (realdir_len == basedir_len) {
+ free(usefile);
+ usefile = linkfile;
+ start = p + 1;
+ } else {
+ free(linkfile);
+ start = usefile + basedir_len;
+ }
+ } while (/* CONSTCOND */ 0);
mlink = mandoc_calloc(1, sizeof(struct mlink));
mlink->dform = FORM_NONE;
@@ -823,6 +871,7 @@ filescan(const char *file)
sizeof(mlink->file)) {
say(start, "Filename too long");
free(mlink);
+ free(usefile);
return;
}
@@ -831,13 +880,13 @@ filescan(const char *file)
* but outside our tree, guess the base directory.
*/
- if (op == OP_TEST || (start == buf && *start == '/')) {
- if (strncmp(buf, "man/", 4) == 0)
- start = buf + 4;
- else if ((start = strstr(buf, "/man/")) != NULL)
+ if (op == OP_TEST || (start == usefile && *start == '/')) {
+ if (strncmp(usefile, "man/", 4) == 0)
+ start = usefile + 4;
+ else if ((start = strstr(usefile, "/man/")) != NULL)
start += 5;
else
- start = buf;
+ start = usefile;
}
/*
@@ -887,6 +936,7 @@ filescan(const char *file)
*p = '\0';
}
mlink_add(mlink, &st);
+ free(usefile);
}
static void