diff options
author | Ingo Schwarze <schwarze@cvs.openbsd.org> | 2014-10-29 00:17:02 +0000 |
---|---|---|
committer | Ingo Schwarze <schwarze@cvs.openbsd.org> | 2014-10-29 00:17:02 +0000 |
commit | a5ff6d8934e26bc098d34918dc831ee2981b5e64 (patch) | |
tree | ca0cd36f68f76f67819ea5aa07e11c881fb0e612 /usr.bin/mandoc | |
parent | 39a2da78dc8188292b51da7da1bcb4fadaa7f50f (diff) |
In terminal output, unify handling of Unicode and numbered character
escape sequences just like it was earlier implemented for -Thtml.
Do not let control characters other than ASCII 9 (horizontal tab)
propagate to the output, even though groff allows them; but that
really doesn't look like a great idea.
Let mchars_num2char() return int such that we can distinguish invalid \N
syntax from \N'0'. This also reduces the danger of signed char issues
popping up.
Diffstat (limited to 'usr.bin/mandoc')
-rw-r--r-- | usr.bin/mandoc/chars.c | 10 | ||||
-rw-r--r-- | usr.bin/mandoc/html.c | 11 | ||||
-rw-r--r-- | usr.bin/mandoc/mandoc.h | 4 | ||||
-rw-r--r-- | usr.bin/mandoc/term.c | 104 |
4 files changed, 78 insertions, 51 deletions
diff --git a/usr.bin/mandoc/chars.c b/usr.bin/mandoc/chars.c index ef8c6cdc5db..de89dc9f4b8 100644 --- a/usr.bin/mandoc/chars.c +++ b/usr.bin/mandoc/chars.c @@ -1,4 +1,4 @@ -/* $OpenBSD: chars.c,v 1.34 2014/10/28 17:35:42 schwarze Exp $ */ +/* $OpenBSD: chars.c,v 1.35 2014/10/29 00:17:01 schwarze Exp $ */ /* * Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> * Copyright (c) 2011, 2014 Ingo Schwarze <schwarze@openbsd.org> @@ -105,15 +105,13 @@ mchars_spec2cp(const struct mchars *arg, const char *p, size_t sz) return(ln != NULL ? ln->unicode : sz == 1 ? (unsigned char)*p : -1); } -char +int mchars_num2char(const char *p, size_t sz) { int i; - if ((i = mandoc_strntoi(p, sz, 10)) < 0) - return('\0'); - - return(i > 0 && i < 256 && isprint(i) ? i : '\0'); + i = mandoc_strntoi(p, sz, 10); + return(i >= 0 && i < 256 ? i : -1); } int diff --git a/usr.bin/mandoc/html.c b/usr.bin/mandoc/html.c index 14fe83c629b..1e199ef807a 100644 --- a/usr.bin/mandoc/html.c +++ b/usr.bin/mandoc/html.c @@ -1,4 +1,4 @@ -/* $OpenBSD: html.c,v 1.50 2014/10/28 17:35:42 schwarze Exp $ */ +/* $OpenBSD: html.c,v 1.51 2014/10/29 00:17:01 schwarze Exp $ */ /* * Copyright (c) 2008-2011, 2014 Kristaps Dzonsons <kristaps@bsd.lv> * Copyright (c) 2011, 2012, 2013, 2014 Ingo Schwarze <schwarze@openbsd.org> @@ -420,9 +420,13 @@ print_encode(struct html *h, const char *p, int norecurse) break; case ESCAPE_NUMBERED: c = mchars_num2char(seq, len); + if (c < 0) + continue; break; case ESCAPE_SPECIAL: c = mchars_spec2cp(h->symtab, seq, len); + if (c <= 0) + continue; break; case ESCAPE_NOSPACE: if ('\0' == *p) @@ -431,9 +435,8 @@ print_encode(struct html *h, const char *p, int norecurse) default: continue; } - if (c <= 0) - continue; - if (c < 0x20 || (c > 0x7E && c < 0xA0)) + if ((c < 0x20 && c != 0x09) || + (c > 0x7E && c < 0xA0)) c = 0xFFFD; if (c > 0x7E) printf("&#%d;", c); diff --git a/usr.bin/mandoc/mandoc.h b/usr.bin/mandoc/mandoc.h index 487152dee5b..dad792ddc09 100644 --- a/usr.bin/mandoc/mandoc.h +++ b/usr.bin/mandoc/mandoc.h @@ -1,4 +1,4 @@ -/* $OpenBSD: mandoc.h,v 1.109 2014/10/28 17:35:42 schwarze Exp $ */ +/* $OpenBSD: mandoc.h,v 1.110 2014/10/29 00:17:01 schwarze Exp $ */ /* * Copyright (c) 2010, 2011, 2014 Kristaps Dzonsons <kristaps@bsd.lv> * Copyright (c) 2010-2014 Ingo Schwarze <schwarze@openbsd.org> @@ -423,7 +423,7 @@ __BEGIN_DECLS enum mandoc_esc mandoc_escape(const char **, const char **, int *); struct mchars *mchars_alloc(void); void mchars_free(struct mchars *); -char mchars_num2char(const char *, size_t); +int mchars_num2char(const char *, size_t); const char *mchars_uc2str(int); int mchars_num2uc(const char *, size_t); int mchars_spec2cp(const struct mchars *, diff --git a/usr.bin/mandoc/term.c b/usr.bin/mandoc/term.c index 31a54be5287..9193f192cda 100644 --- a/usr.bin/mandoc/term.c +++ b/usr.bin/mandoc/term.c @@ -1,4 +1,4 @@ -/* $OpenBSD: term.c,v 1.92 2014/10/28 18:48:56 schwarze Exp $ */ +/* $OpenBSD: term.c,v 1.93 2014/10/29 00:17:01 schwarze Exp $ */ /* * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> * Copyright (c) 2010-2014 Ingo Schwarze <schwarze@openbsd.org> @@ -389,7 +389,6 @@ term_word(struct termp *p, const char *word) { const char nbrsp[2] = { ASCII_NBRSP, 0 }; const char *seq, *cp; - char c; int sz, uc; size_t ssz; enum mandoc_esc esc; @@ -441,16 +440,11 @@ term_word(struct termp *p, const char *word) switch (esc) { case ESCAPE_UNICODE: uc = mchars_num2uc(seq + 1, sz - 1); - if (p->enc == TERMENC_ASCII) { - cp = ascii_uc2str(uc); - encode(p, cp, strlen(cp)); - } else - encode1(p, uc); break; case ESCAPE_NUMBERED: - c = mchars_num2char(seq, sz); - if ('\0' != c) - encode(p, &c, 1); + uc = mchars_num2char(seq, sz); + if (uc < 0) + continue; break; case ESCAPE_SPECIAL: if (p->enc == TERMENC_ASCII) { @@ -463,35 +457,50 @@ term_word(struct termp *p, const char *word) if (uc > 0) encode1(p, uc); } - break; + continue; case ESCAPE_FONTBOLD: term_fontrepl(p, TERMFONT_BOLD); - break; + continue; case ESCAPE_FONTITALIC: term_fontrepl(p, TERMFONT_UNDER); - break; + continue; case ESCAPE_FONTBI: term_fontrepl(p, TERMFONT_BI); - break; + continue; case ESCAPE_FONT: /* FALLTHROUGH */ case ESCAPE_FONTROMAN: term_fontrepl(p, TERMFONT_NONE); - break; + continue; case ESCAPE_FONTPREV: term_fontlast(p); - break; + continue; case ESCAPE_NOSPACE: if (TERMP_SKIPCHAR & p->flags) p->flags &= ~TERMP_SKIPCHAR; else if ('\0' == *word) p->flags |= TERMP_NOSPACE; - break; + continue; case ESCAPE_SKIPCHAR: p->flags |= TERMP_SKIPCHAR; - break; + continue; default: - break; + continue; + } + + /* + * Common handling for Unicode and numbered + * character escape sequences. + */ + + if (p->enc == TERMENC_ASCII) { + cp = ascii_uc2str(uc); + encode(p, cp, strlen(cp)); + } else { + if ((uc < 0x20 && uc != 0x09) || + (uc > 0x7E && uc < 0xA0)) + uc = 0xFFFD; + encode1(p, uc); } } p->flags &= ~TERMP_NBRWORD; @@ -643,7 +652,7 @@ size_t term_strlen(const struct termp *p, const char *cp) { size_t sz, rsz, i; - int ssz, skip, c; + int ssz, skip, uc; const char *seq, *rhs; enum mandoc_esc esc; static const char rej[] = { '\\', ASCII_NBRSP, ASCII_HYPH, @@ -673,44 +682,61 @@ term_strlen(const struct termp *p, const char *cp) switch (esc) { case ESCAPE_UNICODE: - c = mchars_num2uc(seq + 1, sz - 1); - if (p->enc == TERMENC_ASCII) { - rhs = ascii_uc2str(c); - rsz = strlen(rhs); - } else - sz += cond_width(p, c, &skip); + uc = mchars_num2uc(seq + 1, sz - 1); break; case ESCAPE_NUMBERED: - c = mchars_num2char(seq, ssz); - if ('\0' != c) - sz += cond_width(p, c, &skip); + uc = mchars_num2char(seq, ssz); + if (uc < 0) + continue; break; case ESCAPE_SPECIAL: - if (p->enc == TERMENC_ASCII) + if (p->enc == TERMENC_ASCII) { rhs = mchars_spec2str(p->symtab, seq, ssz, &rsz); - else { - c = mchars_spec2cp(p->symtab, + if (rhs != NULL) + break; + } else { + uc = mchars_spec2cp(p->symtab, seq, ssz); - if (c > 0) - sz += cond_width(p, c, &skip); + if (uc > 0) + sz += cond_width(p, uc, &skip); } - break; + continue; case ESCAPE_SKIPCHAR: skip = 1; - break; + continue; default: - break; + continue; } - if (NULL == rhs) - break; + /* + * Common handling for Unicode and numbered + * character escape sequences. + */ + + if (rhs == NULL) { + if (p->enc == TERMENC_ASCII) { + rhs = ascii_uc2str(uc); + rsz = strlen(rhs); + } else { + if ((uc < 0x20 && uc != 0x09) || + (uc > 0x7E && uc < 0xA0)) + uc = 0xFFFD; + sz += cond_width(p, uc, &skip); + continue; + } + } if (skip) { skip = 0; break; } + /* + * Common handling for all escape sequences + * printing more than one character. + */ + for (i = 0; i < rsz; i++) sz += (*p->width)(p, *rhs++); break; |