diff options
author | Ingo Schwarze <schwarze@cvs.openbsd.org> | 2023-11-24 04:48:03 +0000 |
---|---|---|
committer | Ingo Schwarze <schwarze@cvs.openbsd.org> | 2023-11-24 04:48:03 +0000 |
commit | c3e2d101e826252a4855545f04d019d98ff89ce4 (patch) | |
tree | db35e4798ba5e126f04d03052871a22d9f9317ca | |
parent | 0662da9f2b7ddc6f1fb22ea659fb2c8c07f17371 (diff) |
1. Do not put ASCII_HYPH (0x1c) into the tag file.
That happened when tagging a string containing '-' on an input text line,
most commonly in man(7) .TP next line scope.
2. Do not let "\-" end the tag.
In both cases, translate ASCII_HYPH and "\-" to plain '-' for output.
For example, this improves handling of unbound.conf(5).
These two bugs were found thanks to a posting by weerd@.
-rw-r--r-- | regress/usr.bin/mandoc/mdoc/Cm/tag.out_html | 2 | ||||
-rw-r--r-- | regress/usr.bin/mandoc/mdoc/Cm/tag.out_tag | 2 | ||||
-rw-r--r-- | usr.bin/mandoc/tag.c | 59 |
3 files changed, 48 insertions, 15 deletions
diff --git a/regress/usr.bin/mandoc/mdoc/Cm/tag.out_html b/regress/usr.bin/mandoc/mdoc/Cm/tag.out_html index 5141f52a81d..ceadb4b592b 100644 --- a/regress/usr.bin/mandoc/mdoc/Cm/tag.out_html +++ b/regress/usr.bin/mandoc/mdoc/Cm/tag.out_html @@ -7,7 +7,7 @@ <dd>text</dd> <dt id="hyphen"><a class="permalink" href="#hyphen"><code class="Cm">-hyphen</code></a></dt> <dd>text</dd> - <dt id="minus"><a class="permalink" href="#minus"><code class="Cm">-minus-sign</code></a></dt> + <dt id="minus-sign"><a class="permalink" href="#minus-sign"><code class="Cm">-minus-sign</code></a></dt> <dd>text</dd> <dt id="backslash"><a class="permalink" href="#backslash"><code class="Cm">\backslash</code></a></dt> <dd>text</dd> diff --git a/regress/usr.bin/mandoc/mdoc/Cm/tag.out_tag b/regress/usr.bin/mandoc/mdoc/Cm/tag.out_tag index a59da516b3f..d6bd49a5b28 100644 --- a/regress/usr.bin/mandoc/mdoc/Cm/tag.out_tag +++ b/regress/usr.bin/mandoc/mdoc/Cm/tag.out_tag @@ -4,6 +4,6 @@ one tag.mandoc_ascii 9 two tag.mandoc_ascii 9 three tag.mandoc_ascii 12 hyphen tag.mandoc_ascii 14 -minus tag.mandoc_ascii 17 +minus-sign tag.mandoc_ascii 17 backslash tag.mandoc_ascii 20 four tag.mandoc_ascii 22 diff --git a/usr.bin/mandoc/tag.c b/usr.bin/mandoc/tag.c index 8cb7bdb393c..bc3f43f78c8 100644 --- a/usr.bin/mandoc/tag.c +++ b/usr.bin/mandoc/tag.c @@ -1,6 +1,6 @@ -/* $OpenBSD: tag.c,v 1.37 2022/04/26 11:28:35 schwarze Exp $ */ +/* $OpenBSD: tag.c,v 1.38 2023/11/24 04:48:02 schwarze Exp $ */ /* - * Copyright (c) 2015, 2016, 2018, 2019, 2020, 2022 + * Copyright (c) 2015, 2016, 2018, 2019, 2020, 2022, 2023 * Ingo Schwarze <schwarze@openbsd.org> * * Permission to use, copy, modify, and distribute this software for any @@ -24,11 +24,13 @@ #include <limits.h> #include <stddef.h> #include <stdint.h> +#include <stdio.h> #include <stdlib.h> #include <string.h> #include "mandoc_aux.h" #include "mandoc_ohash.h" +#include "mandoc.h" #include "roff.h" #include "mdoc.h" #include "roff_int.h" @@ -86,9 +88,11 @@ tag_put(const char *s, int prio, struct roff_node *n) { struct tag_entry *entry; struct roff_node *nold; - const char *se; + const char *se, *src; + char *cpy; size_t len; unsigned int slot; + int changed; assert(prio <= TAG_FALLBACK); @@ -104,6 +108,7 @@ tag_put(const char *s, int prio, struct roff_node *n) /* Determine the implicit tag. */ + changed = 1; if (s == NULL) { if (n->child == NULL || n->child->type != ROFFT_TEXT) return; @@ -120,27 +125,53 @@ tag_put(const char *s, int prio, struct roff_node *n) s += 2; break; default: - break; + return; } break; default: + changed = 0; break; } } /* + * Translate \- and ASCII_HYPH to plain '-'. * Skip whitespace and escapes and whatever follows, * and if there is any, downgrade the priority. */ - len = strcspn(s, " \t\\"); + cpy = mandoc_malloc(strlen(s) + 1); + for (src = s, len = 0; *src != '\0'; src++, len++) { + switch (*src) { + case '\t': + case ' ': + changed = 1; + break; + case ASCII_HYPH: + cpy[len] = '-'; + changed = 1; + continue; + case '\\': + if (src[1] != '-') + break; + src++; + changed = 1; + /* FALLTHROUGH */ + default: + cpy[len] = *src; + continue; + } + break; + } if (len == 0) - return; + goto out; + cpy[len] = '\0'; - se = s + len; - if (*se != '\0' && prio < TAG_WEAK) + if (*src != '\0' && prio < TAG_WEAK) prio = TAG_WEAK; + s = cpy; + se = cpy + len; slot = ohash_qlookupi(&tag_data, s, &se); entry = ohash_find(&tag_data, slot); @@ -148,8 +179,7 @@ tag_put(const char *s, int prio, struct roff_node *n) if (entry == NULL) { entry = mandoc_malloc(sizeof(*entry) + len + 1); - memcpy(entry->s, s, len); - entry->s[len] = '\0'; + memcpy(entry->s, s, len + 1); entry->nodes = NULL; entry->maxnodes = entry->nnodes = 0; ohash_insert(&tag_data, slot, entry); @@ -161,7 +191,7 @@ tag_put(const char *s, int prio, struct roff_node *n) */ else if (entry->prio < prio) - return; + goto out; /* * If the existing entry is worse, clear it. @@ -178,7 +208,7 @@ tag_put(const char *s, int prio, struct roff_node *n) } if (prio == TAG_FALLBACK) { entry->prio = TAG_DELETE; - return; + goto out; } } @@ -192,10 +222,13 @@ tag_put(const char *s, int prio, struct roff_node *n) entry->nodes[entry->nnodes++] = n; entry->prio = prio; n->flags |= NODE_ID; - if (n->child == NULL || n->child->string != s || *se != '\0') { + if (changed) { assert(n->tag == NULL); n->tag = mandoc_strndup(s, len); } + + out: + free(cpy); } int |