summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorIngo Schwarze <schwarze@cvs.openbsd.org>2017-06-25 17:42:38 +0000
committerIngo Schwarze <schwarze@cvs.openbsd.org>2017-06-25 17:42:38 +0000
commitf8105c1492f5570f72983e1559ea6a8752218890 (patch)
treedb4235f9f14864da6da7d98a3b2fd597345100e1
parent71101427e59a1379ad998ad7a420ab426a06e8f0 (diff)
Catch typos in .Sh names; suggested by jmc@.
I'm using a very simple, linear time / zero space fuzzy string matching heuristic rather than a full Levenshtein metric, to keep the code both simple and fast.
-rw-r--r--usr.bin/mandoc/mandoc.17
-rw-r--r--usr.bin/mandoc/mandoc.h3
-rw-r--r--usr.bin/mandoc/mdoc_validate.c65
-rw-r--r--usr.bin/mandoc/read.c3
4 files changed, 73 insertions, 5 deletions
diff --git a/usr.bin/mandoc/mandoc.1 b/usr.bin/mandoc/mandoc.1
index 2df74ea9cd6..931aabfa428 100644
--- a/usr.bin/mandoc/mandoc.1
+++ b/usr.bin/mandoc/mandoc.1
@@ -1,4 +1,4 @@
-.\" $OpenBSD: mandoc.1,v 1.130 2017/06/25 07:23:53 bentley Exp $
+.\" $OpenBSD: mandoc.1,v 1.131 2017/06/25 17:42:37 schwarze Exp $
.\"
.\" Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
.\" Copyright (c) 2012, 2014-2017 Ingo Schwarze <schwarze@openbsd.org>
@@ -857,6 +857,11 @@ A single manual page contains two copies of the RCS identifier for
the same operating system.
Consider deleting the later instance and moving the first one up
to the top of the page.
+.It Sy "typo in section name"
+.Pq mdoc
+Fuzzy string matching revealed that the argument of an
+.Ic \&Sh
+macro is similar, but not identical to a standard section name.
.It Sy "useless macro"
.Pq mdoc
A
diff --git a/usr.bin/mandoc/mandoc.h b/usr.bin/mandoc/mandoc.h
index 3ffdb5f9c0c..9515490c827 100644
--- a/usr.bin/mandoc/mandoc.h
+++ b/usr.bin/mandoc/mandoc.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: mandoc.h,v 1.177 2017/06/24 18:58:09 schwarze Exp $ */
+/* $OpenBSD: mandoc.h,v 1.178 2017/06/25 17:42:37 schwarze Exp $ */
/*
* Copyright (c) 2010, 2011, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
* Copyright (c) 2010-2017 Ingo Schwarze <schwarze@openbsd.org>
@@ -56,6 +56,7 @@ enum mandocerr {
MANDOCERR_DATE_LEGACY, /* legacy man(7) date format: Dd ... */
MANDOCERR_RCS_REP, /* duplicate RCS id: ... */
+ MANDOCERR_SEC_TYPO, /* typo in section name: Sh ... */
MANDOCERR_MACRO_USELESS, /* useless macro: macro */
MANDOCERR_BX, /* consider using OS macro: macro */
MANDOCERR_ER_ORDER, /* errnos out of order: Er ... */
diff --git a/usr.bin/mandoc/mdoc_validate.c b/usr.bin/mandoc/mdoc_validate.c
index 46f0bae730d..0b45e79303b 100644
--- a/usr.bin/mandoc/mdoc_validate.c
+++ b/usr.bin/mandoc/mdoc_validate.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: mdoc_validate.c,v 1.257 2017/06/24 18:58:09 schwarze Exp $ */
+/* $OpenBSD: mdoc_validate.c,v 1.258 2017/06/25 17:42:37 schwarze Exp $ */
/*
* Copyright (c) 2008-2012 Kristaps Dzonsons <kristaps@bsd.lv>
* Copyright (c) 2010-2017 Ingo Schwarze <schwarze@openbsd.org>
@@ -58,6 +58,7 @@ static void check_toptext(struct roff_man *, int, int, const char *);
static int child_an(const struct roff_node *);
static size_t macro2len(enum roff_tok);
static void rewrite_macro2len(struct roff_man *, char **);
+static int similar(const char *, const char *);
static void post_an(POST_ARGS);
static void post_an_norm(POST_ARGS);
@@ -2133,11 +2134,54 @@ post_sh_authors(POST_ARGS)
mdoc->last->line, mdoc->last->pos, NULL);
}
+/*
+ * Return an upper bound for the string distance (allowing
+ * transpositions). Not a full Levenshtein implementation
+ * because Levenshtein is quadratic in the string length
+ * and this function is called for every standard name,
+ * so the check for each custom name would be cubic.
+ * The following crude heuristics is linear, resulting
+ * in quadratic behaviour for checking one custom name,
+ * which does not cause measurable slowdown.
+ */
+static int
+similar(const char *s1, const char *s2)
+{
+ const int maxdist = 3;
+ int dist = 0;
+
+ while (s1[0] != '\0' && s2[0] != '\0') {
+ if (s1[0] == s2[0]) {
+ s1++;
+ s2++;
+ continue;
+ }
+ if (++dist > maxdist)
+ return INT_MAX;
+ if (s1[1] == s2[1]) { /* replacement */
+ s1++;
+ s2++;
+ } else if (s1[0] == s2[1] && s1[1] == s2[0]) {
+ s1 += 2; /* transposition */
+ s2 += 2;
+ } else if (s1[0] == s2[1]) /* insertion */
+ s2++;
+ else if (s1[1] == s2[0]) /* deletion */
+ s1++;
+ else
+ return INT_MAX;
+ }
+ dist += strlen(s1) + strlen(s2);
+ return dist > maxdist ? INT_MAX : dist;
+}
+
static void
post_sh_head(POST_ARGS)
{
struct roff_node *nch;
const char *goodsec;
+ const char *const *testsec;
+ int dist, mindist;
enum roff_sec sec;
/*
@@ -2175,8 +2219,25 @@ post_sh_head(POST_ARGS)
/* We don't care about custom sections after this. */
- if (sec == SEC_CUSTOM)
+ if (sec == SEC_CUSTOM) {
+ if ((nch = mdoc->last->child) == NULL ||
+ nch->type != ROFFT_TEXT || nch->next != NULL)
+ return;
+ goodsec = NULL;
+ mindist = INT_MAX;
+ for (testsec = secnames + 1; *testsec != NULL; testsec++) {
+ dist = similar(nch->string, *testsec);
+ if (dist < mindist) {
+ goodsec = *testsec;
+ mindist = dist;
+ }
+ }
+ if (goodsec != NULL)
+ mandoc_vmsg(MANDOCERR_SEC_TYPO, mdoc->parse,
+ nch->line, nch->pos, "Sh %s instead of %s",
+ nch->string, goodsec);
return;
+ }
/*
* Check whether our non-custom section is being repeated or is
diff --git a/usr.bin/mandoc/read.c b/usr.bin/mandoc/read.c
index 61466c5de74..55f24b3fdf5 100644
--- a/usr.bin/mandoc/read.c
+++ b/usr.bin/mandoc/read.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: read.c,v 1.153 2017/06/24 18:58:09 schwarze Exp $ */
+/* $OpenBSD: read.c,v 1.154 2017/06/25 17:42:37 schwarze Exp $ */
/*
* Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
* Copyright (c) 2010-2017 Ingo Schwarze <schwarze@openbsd.org>
@@ -94,6 +94,7 @@ static const char * const mandocerrs[MANDOCERR_MAX] = {
"legacy man(7) date format",
"duplicate RCS id",
+ "typo in section name",
"useless macro",
"consider using OS macro",
"errnos out of order",