From 3962ec0010304a43d14512a8b79da75485655694 Mon Sep 17 00:00:00 2001 From: Ingo Schwarze Date: Sat, 15 Dec 2018 19:30:21 +0000 Subject: Several improvements to escape sequence handling. * Add the missing special character \_ (underscore). * Partial implementations of \a (leader character) and \E (uninterpreted escape character). * Parse and ignore \r (reverse line feed). * Add a WARNING message about undefined escape sequences. * Add an UNSUPP message about unsupported escape sequences. * Mark \! and \? (transparent throughput) and \O (suppress output) as unsupported. * Treat the various variants of zero-width spaces as one-byte escape sequences rather than as special characters, to avoid defining bogus forms with square brackets. * For special characters with one-byte names, do not define bogus forms with square brackets, except for \[-], which is valid. * In the form with square brackets, undefined special characters do not fall back to printing the name verbatim, not even for one-byte names. * Starting a special character name with a blank is an error. * Undefined escape sequences never abort formatting of the input string, not even in HTML output mode. * Document the newly handled escapes, and a few that were missing. * Regression tests for most of the above. --- usr.bin/mandoc/term.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) (limited to 'usr.bin/mandoc/term.c') diff --git a/usr.bin/mandoc/term.c b/usr.bin/mandoc/term.c index e21c2622927..7b5816be28a 100644 --- a/usr.bin/mandoc/term.c +++ b/usr.bin/mandoc/term.c @@ -1,4 +1,4 @@ -/* $OpenBSD: term.c,v 1.136 2018/10/25 01:21:30 schwarze Exp $ */ +/* $OpenBSD: term.c,v 1.137 2018/12/15 19:30:19 schwarze Exp $ */ /* * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons * Copyright (c) 2010-2018 Ingo Schwarze @@ -475,9 +475,6 @@ term_word(struct termp *p, const char *word) word++; esc = mandoc_escape(&word, &seq, &sz); - if (ESCAPE_ERROR == esc) - continue; - switch (esc) { case ESCAPE_UNICODE: uc = mchars_num2uc(seq + 1, sz - 1); @@ -498,6 +495,9 @@ term_word(struct termp *p, const char *word) encode1(p, uc); } continue; + case ESCAPE_UNDEF: + uc = *seq; + break; case ESCAPE_FONTBOLD: term_fontrepl(p, TERMFONT_BOLD); continue; @@ -585,6 +585,9 @@ term_word(struct termp *p, const char *word) case ESCAPE_SPECIAL: uc = mchars_spec2cp(cp, sz); break; + case ESCAPE_UNDEF: + uc = *seq; + break; default: uc = -1; break; @@ -843,12 +846,8 @@ term_strlen(const struct termp *p, const char *cp) switch (*cp) { case '\\': cp++; - esc = mandoc_escape(&cp, &seq, &ssz); - if (ESCAPE_ERROR == esc) - continue; - rhs = NULL; - + esc = mandoc_escape(&cp, &seq, &ssz); switch (esc) { case ESCAPE_UNICODE: uc = mchars_num2uc(seq + 1, ssz - 1); @@ -869,6 +868,9 @@ term_strlen(const struct termp *p, const char *cp) sz += cond_width(p, uc, &skip); } continue; + case ESCAPE_UNDEF: + uc = *seq; + break; case ESCAPE_DEVICE: if (p->type == TERMTYPE_PDF) { rhs = "pdf"; -- cgit v1.2.3