diff options
author | Ingo Schwarze <schwarze@cvs.openbsd.org> | 2009-08-22 17:04:49 +0000 |
---|---|---|
committer | Ingo Schwarze <schwarze@cvs.openbsd.org> | 2009-08-22 17:04:49 +0000 |
commit | a67d998773e0f4014c8aa9b663487eea72b38450 (patch) | |
tree | 502f13eb4459248fa286af6c1ab54c79745cf17b /usr.bin/mandoc | |
parent | 60177f1cc0b7ef61d687cf6e6aae528fca66b7b8 (diff) |
sync to 1.8.5: partial rewrite of the handling of escape sequences:
distinguish special characters and predefined strings,
and add and fix several escape sequences
Diffstat (limited to 'usr.bin/mandoc')
-rw-r--r-- | usr.bin/mandoc/ascii.c | 51 | ||||
-rw-r--r-- | usr.bin/mandoc/ascii.in | 479 | ||||
-rw-r--r-- | usr.bin/mandoc/term.c | 119 | ||||
-rw-r--r-- | usr.bin/mandoc/term.h | 3 |
4 files changed, 391 insertions, 261 deletions
diff --git a/usr.bin/mandoc/ascii.c b/usr.bin/mandoc/ascii.c index 7cc89f6c66f..89816232a9d 100644 --- a/usr.bin/mandoc/ascii.c +++ b/usr.bin/mandoc/ascii.c @@ -1,4 +1,4 @@ -/* $Id: ascii.c,v 1.2 2009/06/14 23:00:57 schwarze Exp $ */ +/* $Id: ascii.c,v 1.3 2009/08/22 17:04:48 schwarze Exp $ */ /* * Copyright (c) 2009 Kristaps Dzonsons <kristaps@kth.se> * @@ -27,9 +27,12 @@ struct line { const char *code; const char *out; - /* 32- and 64-bit alignment safe. */ size_t codesz; size_t outsz; + int type; +#define ASCII_CHAR (1 << 0) +#define ASCII_STRING (1 << 1) +#define ASCII_BOTH (0x03) }; struct linep { @@ -37,8 +40,12 @@ struct linep { struct linep *next; }; -#define LINE(w, x, y, z) \ - { (w), (y), (x), (z) }, +#define CHAR(w, x, y, z) \ + { (w), (y), (x), (z), ASCII_CHAR }, +#define STRING(w, x, y, z) \ + { (w), (y), (x), (z), ASCII_STRING }, +#define BOTH(w, x, y, z) \ + { (w), (y), (x), (z), ASCII_BOTH }, static const struct line lines[] = { #include "ascii.in" }; @@ -50,7 +57,9 @@ struct asciitab { static inline int match(const struct line *, - const char *, size_t); + const char *, size_t, int); +static const char * lookup(struct asciitab *, const char *, + size_t, size_t *, int); void @@ -125,14 +134,29 @@ term_ascii2htab(void) const char * term_a2ascii(void *arg, const char *p, size_t sz, size_t *rsz) { - struct asciitab *tab; + + return(lookup((struct asciitab *)arg, p, + sz, rsz, ASCII_CHAR)); +} + + +const char * +term_a2res(void *arg, const char *p, size_t sz, size_t *rsz) +{ + + return(lookup((struct asciitab *)arg, p, + sz, rsz, ASCII_STRING)); +} + + +static const char * +lookup(struct asciitab *tab, const char *p, + size_t sz, size_t *rsz, int type) +{ struct linep *pp, *prev; void **htab; int hash; - tab = (struct asciitab *)arg; - htab = tab->htab; - assert(p); assert(sz > 0); @@ -147,19 +171,20 @@ term_a2ascii(void *arg, const char *p, size_t sz, size_t *rsz) */ hash = (int)p[0] - ASCII_PRINT_LO; + htab = tab->htab; if (NULL == (pp = ((struct linep **)htab)[hash])) return(NULL); if (NULL == pp->next) { - if ( ! match(pp->line, p, sz)) + if ( ! match(pp->line, p, sz, type)) return(NULL); *rsz = pp->line->outsz; return(pp->line->out); } for (prev = NULL; pp; pp = pp->next) { - if ( ! match(pp->line, p, sz)) { + if ( ! match(pp->line, p, sz, type)) { prev = pp; continue; } @@ -181,9 +206,11 @@ term_a2ascii(void *arg, const char *p, size_t sz, size_t *rsz) static inline int -match(const struct line *line, const char *p, size_t sz) +match(const struct line *line, const char *p, size_t sz, int type) { + if ( ! (line->type & type)) + return(0); if (line->codesz != sz) return(0); return(0 == strncmp(line->code, p, sz)); diff --git a/usr.bin/mandoc/ascii.in b/usr.bin/mandoc/ascii.in index 40f190da2e0..56f72d83bf3 100644 --- a/usr.bin/mandoc/ascii.in +++ b/usr.bin/mandoc/ascii.in @@ -1,4 +1,4 @@ -/* $Id: ascii.in,v 1.4 2009/07/13 00:18:12 schwarze Exp $ */ +/* $Id: ascii.in,v 1.5 2009/08/22 17:04:48 schwarze Exp $ */ /* * Copyright (c) 2009 Kristaps Dzonsons <kristaps@kth.se> * @@ -16,205 +16,284 @@ */ /* - * The ASCII translation table. The left-hand side corresponds to the - * escape sequence (\x, \(xx and so on) whose length is listed second - * element. The right-hand side is what's produced by the front-end, - * with the fourth element being its length. + * The ASCII translation tables. STRING corresponds to predefined + * strings (cf. mdoc_samples.7 and tmac/mdoc/doc-nroff). CHAR + * corresponds to special characters (cf. groff_char.7). BOTH contains + * sequences that are equivalent in both STRING and CHAR. * - * Be sure to escape strings. + * Either way, the left-hand side corresponds to the input sequence (\x, + * \(xx, \*(xx and so on) whose length is listed second element. The + * right-hand side is what's produced by the front-end, with the fourth + * element being its length. + * + * Be sure to C-escape strings! */ -LINE("\\", 1, "\\", 1) -LINE("\'", 1, "\'", 1) -LINE("`", 1, "`", 1) -LINE("%", 1, "%", 1) -LINE("-", 1, "-", 1) -LINE(" ", 1, " ", 1) -LINE("~", 1, " ", 1) -LINE("^", 1, "", 0) -LINE("0", 1, " ", 1) -LINE(".", 1, ".", 1) -LINE("&", 1, "", 0) -LINE("e", 1, "\\", 1) -LINE("q", 1, "\"", 1) -LINE("|", 1, "", 0) -LINE("rC", 2, "}", 1) -LINE("lC", 2, "{", 1) -LINE("rB", 2, "]", 1) -LINE("lB", 2, "[", 1) -LINE("ra", 2, ">", 1) -LINE("la", 2, "<", 1) -LINE("Lq", 2, "``", 2) -LINE("lq", 2, "``", 2) -LINE("Rq", 2, "\'\'", 2) -LINE("rq", 2, "\'\'", 2) -LINE("oq", 2, "`", 1) -LINE("aq", 2, "\'", 1) -LINE("Bq", 2, ",,", 2) -LINE("bq", 2, ",,", 2) -LINE("<-", 2, "<-", 2) -LINE("->", 2, "->", 2) -LINE("<>", 2, "<>", 2) -LINE("ua", 2, "^", 1) -LINE("da", 2, "v", 1) -LINE("bu", 2, "o", 1) -LINE("ci", 2, "O", 1) -LINE("Ba", 2, "|", 1) -LINE("ba", 2, "|", 1) -LINE("bb", 2, "|", 1) -LINE("co", 2, "(C)", 3) -LINE("rg", 2, "(R)", 3) -LINE("tm", 2, "tm", 2) -LINE("Am", 2, "&", 1) -LINE("Le", 2, "<=", 2) -LINE("<=", 2, "<=", 2) -LINE("Ge", 2, ">=", 2) -LINE(">=", 2, ">=", 2) -LINE("==", 2, "==", 2) -LINE("Ne", 2, "!=", 2) -LINE("!=", 2, "!=", 2) -LINE("Pm", 2, "+-", 2) -LINE("+-", 2, "+-", 2) -LINE("If", 2, "infinity", 8) -LINE("if", 2, "oo", 2) -LINE("Na", 2, "NaN", 3) -LINE("na", 2, "NaN", 3) -LINE("**", 2, "*", 1) -LINE("Gt", 2, ">", 1) -LINE("Lt", 2, "<", 1) -LINE("aa", 2, "\'", 1) -LINE("a~", 2, "~", 1) -LINE("ga", 2, "`", 1) -LINE("en", 2, "-", 1) -LINE("em", 2, "--", 2) -LINE("hy", 2, "-", 1) -LINE("Pi", 2, "pi", 2) -LINE("Fo", 2, "<<", 2) -LINE("Fc", 2, ">>", 2) -LINE("fo", 2, "<", 1) -LINE("fc", 2, ">", 1) -LINE("lh", 2, "<=", 2) -LINE("rh", 2, "=>", 2) -LINE("ae", 2, "ae", 2) -LINE("AE", 2, "AE", 2) -LINE("oe", 2, "oe", 2) -LINE("OE", 2, "OE", 2) -LINE("ss", 2, "ss", 2) -LINE("\'A", 2, "A", 1) -LINE("\'E", 2, "E", 1) -LINE("\'I", 2, "I", 1) -LINE("\'O", 2, "O", 1) -LINE("\'U", 2, "U", 1) -LINE("\'a", 2, "a", 1) -LINE("\'e", 2, "e", 1) -LINE("\'i", 2, "i", 1) -LINE("\'o", 2, "o", 1) -LINE("\'u", 2, "u", 1) -LINE("`A", 2, "A", 1) -LINE("`E", 2, "E", 1) -LINE("`I", 2, "I", 1) -LINE("`O", 2, "O", 1) -LINE("`U", 2, "U", 1) -LINE("`a", 2, "a", 1) -LINE("`e", 2, "e", 1) -LINE("`i", 2, "i", 1) -LINE("`o", 2, "o", 1) -LINE("`u", 2, "u", 1) -LINE("~A", 2, "A", 1) -LINE("~N", 2, "N", 1) -LINE("~O", 2, "O", 1) -LINE("~a", 2, "a", 1) -LINE("~n", 2, "n", 1) -LINE("~o", 2, "o", 1) -LINE("lA", 2, "<=", 2) -LINE("rA", 2, "=>", 2) -LINE("uA", 2, "^", 1) -LINE("dA", 2, "v", 1) -LINE("hA", 2, "<=>", 3) -LINE(":A", 2, "A", 1) -LINE(":E", 2, "E", 1) -LINE(":I", 2, "I", 1) -LINE(":O", 2, "O", 1) -LINE(":U", 2, "U", 1) -LINE(":a", 2, "a", 1) -LINE(":e", 2, "e", 1) -LINE(":i", 2, "i", 1) -LINE(":o", 2, "o", 1) -LINE(":u", 2, "u", 1) -LINE(":y", 2, "y", 1) -LINE("^A", 2, "A", 1) -LINE("^E", 2, "E", 1) -LINE("^I", 2, "I", 1) -LINE("^O", 2, "O", 1) -LINE("^U", 2, "U", 1) -LINE("^a", 2, "a", 1) -LINE("^e", 2, "e", 1) -LINE("^i", 2, "i", 1) -LINE("^o", 2, "o", 1) -LINE("^u", 2, "u", 1) -LINE("-D", 2, "D", 1) -LINE("Sd", 2, "o", 1) -LINE("TP", 2, "b", 1) -LINE("Tp", 2, "b", 1) -LINE(",C", 2, "C", 1) -LINE(",c", 2, "c", 1) -LINE("/L", 2, "L", 1) -LINE("/l", 2, "l", 1) -LINE("/O", 2, "O", 1) -LINE("/o", 2, "o", 1) -LINE("oA", 2, "A", 1) -LINE("oa", 2, "a", 1) -LINE("a^", 2, "^", 1) -LINE("ac", 2, ",", 1) -LINE("ad", 2, "\"", 1) -LINE("ah", 2, "v", 1) -LINE("ao", 2, "o", 1) -LINE("ho", 2, ",", 1) -LINE("ab", 2, "`", 1) -LINE("a\"", 2, "\"", 1) -LINE("a-", 2, "-", 1) -LINE("Cs", 2, "x", 1) -LINE("Do", 2, "$", 1) -LINE("Po", 2, "L", 1) -LINE("Ye", 2, "Y", 1) -LINE("Fn", 2, "f", 1) -LINE("ct", 2, "c", 1) -LINE("ff", 2, "ff", 2) -LINE("fi", 2, "fi", 2) -LINE("fl", 2, "fl", 2) -LINE("Fi", 2, "ffi", 3) -LINE("Fl", 2, "ffl", 3) -LINE("r!", 2, "i", 1) -LINE("r?", 2, "c", 1) -LINE("dd", 2, "=", 1) -LINE("dg", 2, "-", 1) -LINE("ps", 2, "9|", 2) -LINE("sc", 2, "S", 1) -LINE("de", 2, "o", 1) -LINE("tf", 2, ".:.", 3) -LINE("~~", 2, "~~", 2) -LINE("~=", 2, "~=", 2) -LINE("=~", 2, "=~", 2) -LINE("AN", 2, "^", 1) -LINE("OR", 2, "v", 1) -LINE("no", 2, "~", 1) -LINE("fa", 2, "V", 1) -LINE("te", 2, "3", 1) -LINE("Ah", 2, "N", 1) -LINE("Im", 2, "I", 1) -LINE("Re", 2, "R", 1) -LINE("mo", 2, "E", 1) -LINE("nm", 2, "E", 1) -LINE("eq", 2, "=", 1) -LINE("pl", 2, "+", 1) -LINE("di", 2, "-:-", 3) -LINE("mu", 2, "x", 1) -LINE("(=", 2, "(=", 2) -LINE("=)", 2, "=)", 2) -LINE("ap", 2, "~", 1) -LINE("pd", 2, "a", 1) -LINE("gr", 2, "V", 1) -LINE("ca", 2, "(^)", 3) -LINE("cu", 2, "U", 1) -LINE("es", 2, "{}", 2) -LINE("st", 2, "-)", 2) +STRING("Am", 2, "&", 1) +STRING("Ba", 2, "|", 1) +STRING("Ge", 2, ">=", 2) +STRING("Gt", 2, ">", 1) +STRING("If", 2, "infinity", 8) +STRING("Le", 2, "<=", 2) +STRING("Lq", 2, "``", 2) +STRING("Lt", 2, "<", 1) +STRING("Na", 2, "NaN", 3) +STRING("Ne", 2, "!=", 2) +STRING("Pi", 2, "pi", 2) +STRING("Pm", 2, "+-", 2) +STRING("R", 1, "(R)", 3) +STRING("Rq", 2, "\'\'", 2) +STRING("Tm", 2, "tm", 2) +STRING("left-bracket", 12, "[", 1) +STRING("left-parenthesis", 16, "(", 1) +STRING("left-singlequote", 16, "`", 1) +STRING("lp", 2, "(", 1) +STRING("q", 1, "\"", 1) +STRING("quote-left", 10, "`", 1) +STRING("quote-right", 11, "\'", 1) +STRING("right-bracket", 13, "]", 1) +STRING("right-parenthesis", 17, ")", 1) +STRING("right-singlequote", 17, "\'", 1) +STRING("rp", 2, ")", 1) + +BOTH("<=", 2, "<=", 2) +BOTH(">=", 2, ">=", 2) +BOTH("aa", 2, "\'", 1) +BOTH("ga", 2, "`", 1) +BOTH("lq", 2, "``", 2) +BOTH("rq", 2, "\'\'", 2) +BOTH("ua", 2, "^", 1) + +CHAR(" ", 1, " ", 1) +CHAR("!=", 2, "!=", 2) +CHAR("%", 1, "", 0) +CHAR("&", 1, "", 0) +CHAR("(=", 2, "(=", 2) +CHAR("**", 2, "*", 1) +CHAR("*A", 2, "A", 1) +CHAR("*B", 2, "B", 1) +CHAR("*C", 2, "H", 1) +CHAR("*D", 2, "/\\", 2) +CHAR("*E", 2, "E", 1) +CHAR("*F", 2, "O_", 1) +CHAR("*G", 2, "|", 1) +CHAR("*H", 2, "O", 1) +CHAR("*I", 2, "I", 1) +CHAR("*K", 2, "K", 1) +CHAR("*L", 2, "/\\", 2) +CHAR("*M", 2, "M", 1) +CHAR("*N", 2, "N", 1) +CHAR("*O", 2, "O", 1) +CHAR("*P", 2, "TT", 2) +CHAR("*Q", 2, "Y", 1) +CHAR("*R", 2, "P", 1) +CHAR("*S", 2, ">", 1) +CHAR("*T", 2, "T", 1) +CHAR("*U", 2, "Y", 1) +CHAR("*W", 2, "O", 1) +CHAR("*X", 2, "X", 1) +CHAR("*Y", 2, "H", 1) +CHAR("*Z", 2, "Z", 1) +CHAR("*a", 2, "a", 1) +CHAR("*b", 2, "B", 1) +CHAR("*c", 2, "E", 1) +CHAR("*d", 2, "d", 1) +CHAR("*e", 2, "e", 1) +CHAR("*f", 2, "o", 1) +CHAR("*g", 2, "y", 1) +CHAR("*h", 2, "0", 1) +CHAR("*i", 2, "i", 1) +CHAR("*k", 2, "k", 1) +CHAR("*l", 2, "\\", 1) +CHAR("*m", 2, "u", 1) +CHAR("*n", 2, "v", 1) +CHAR("*o", 2, "o", 1) +CHAR("*p", 2, "n", 1) +CHAR("*q", 2, "u", 1) +CHAR("*r", 2, "p", 1) +CHAR("*s", 2, "o", 1) +CHAR("*t", 2, "t", 1) +CHAR("*u", 2, "u", 1) +CHAR("*w", 2, "w", 1) +CHAR("*x", 2, "x", 1) +CHAR("*y", 2, "n", 1) +CHAR("*z", 2, "C", 1) +CHAR("+-", 2, "+-", 2) +CHAR("+f", 2, "o", 1) +CHAR("+h", 2, "0", 1) +CHAR("+p", 2, "w", 1) +CHAR(",C", 2, "C", 1) +CHAR(",c", 2, "c", 1) +CHAR("-", 1, "-", 1) +CHAR("->", 2, "->", 2) +CHAR("-D", 2, "D", 1) +CHAR(".", 1, ".", 1) +CHAR("/L", 2, "L", 1) +CHAR("/O", 2, "O", 1) +CHAR("/l", 2, "l", 1) +CHAR("/o", 2, "o", 1) +CHAR("0", 1, " ", 1) +CHAR(":A", 2, "A", 1) +CHAR(":E", 2, "E", 1) +CHAR(":I", 2, "I", 1) +CHAR(":O", 2, "O", 1) +CHAR(":U", 2, "U", 1) +CHAR(":a", 2, "a", 1) +CHAR(":e", 2, "e", 1) +CHAR(":i", 2, "i", 1) +CHAR(":o", 2, "o", 1) +CHAR(":u", 2, "u", 1) +CHAR(":y", 2, "y", 1) +CHAR("<-", 2, "<-", 2) +CHAR("<=", 2, "<=", 2) +CHAR("<>", 2, "<>", 2) +CHAR("=)", 2, "=)", 2) +CHAR("==", 2, "==", 2) +CHAR("=~", 2, "=~", 2) +CHAR(">=", 2, ">=", 2) +CHAR("AE", 2, "AE", 2) +CHAR("AN", 2, "^", 1) +CHAR("Ah", 2, "N", 1) +CHAR("Bq", 2, ",,", 2) +CHAR("Cs", 2, "x", 1) +CHAR("Do", 2, "$", 1) +CHAR("Eu", 2, "EUR", 3) +CHAR("Fc", 2, ">>", 2) +CHAR("Fi", 2, "ffi", 3) +CHAR("Fl", 2, "ffl", 3) +CHAR("Fn", 2, "f", 1) +CHAR("Fo", 2, "<<", 2) +CHAR("Im", 2, "I", 1) +CHAR("OE", 2, "OE", 2) +CHAR("OR", 2, "v", 1) +CHAR("Po", 2, "L", 1) +CHAR("Re", 2, "R", 1) +CHAR("Sd", 2, "o", 1) +CHAR("TP", 2, "b", 1) +CHAR("Tp", 2, "b", 1) +CHAR("Ye", 2, "Y", 1) +CHAR("\'", 1, "\'", 1) +CHAR("\'A", 2, "A", 1) +CHAR("\'E", 2, "E", 1) +CHAR("\'I", 2, "I", 1) +CHAR("\'O", 2, "O", 1) +CHAR("\'U", 2, "U", 1) +CHAR("\'a", 2, "a", 1) +CHAR("\'e", 2, "e", 1) +CHAR("\'i", 2, "i", 1) +CHAR("\'o", 2, "o", 1) +CHAR("\'u", 2, "u", 1) +CHAR("\\", 1, "\\", 1) +CHAR("^", 1, "", 0) +CHAR("^A", 2, "A", 1) +CHAR("^E", 2, "E", 1) +CHAR("^I", 2, "I", 1) +CHAR("^O", 2, "O", 1) +CHAR("^U", 2, "U", 1) +CHAR("^a", 2, "a", 1) +CHAR("^e", 2, "e", 1) +CHAR("^i", 2, "i", 1) +CHAR("^o", 2, "o", 1) +CHAR("^u", 2, "u", 1) +CHAR("`", 1, "`", 1) +CHAR("`A", 2, "A", 1) +CHAR("`E", 2, "E", 1) +CHAR("`I", 2, "I", 1) +CHAR("`O", 2, "O", 1) +CHAR("`U", 2, "U", 1) +CHAR("`a", 2, "a", 1) +CHAR("`e", 2, "e", 1) +CHAR("`i", 2, "i", 1) +CHAR("`o", 2, "o", 1) +CHAR("`u", 2, "u", 1) +CHAR("a-", 2, "-", 1) +CHAR("a\"", 2, "\"", 1) +CHAR("a^", 2, "^", 1) +CHAR("aa", 2, "\'", 1) +CHAR("ab", 2, "`", 1) +CHAR("ac", 2, ",", 1) +CHAR("ad", 2, "\"", 1) +CHAR("ae", 2, "ae", 2) +CHAR("ah", 2, "v", 1) +CHAR("ao", 2, "o", 1) +CHAR("ap", 2, "~", 1) +CHAR("aq", 2, "\'", 1) +CHAR("a~", 2, "~", 1) +CHAR("ba", 2, "|", 1) +CHAR("bb", 2, "|", 1) +CHAR("bq", 2, ",", 1) +CHAR("bu", 2, "o", 1) +CHAR("c", 1, "", 0) +CHAR("ca", 2, "(^)", 3) +CHAR("ci", 2, "O", 1) +CHAR("co", 2, "(C)", 3) +CHAR("ct", 2, "c", 1) +CHAR("cu", 2, "U", 1) +CHAR("dA", 2, "v", 1) +CHAR("da", 2, "v", 1) +CHAR("dd", 2, "=", 1) +CHAR("de", 2, "o", 1) +CHAR("dg", 2, "-", 1) +CHAR("di", 2, "-:-", 3) +CHAR("e", 1, "\\", 1) +CHAR("em", 2, "--", 2) +CHAR("en", 2, "-", 1) +CHAR("eq", 2, "=", 1) +CHAR("es", 2, "{}", 2) +CHAR("eu", 2, "EUR", 3) +CHAR("fa", 2, "V", 1) +CHAR("fc", 2, ">", 1) +CHAR("ff", 2, "ff", 2) +CHAR("fi", 2, "fi", 2) +CHAR("fl", 2, "fl", 2) +CHAR("fo", 2, "<", 1) +CHAR("ga", 2, "`", 1) +CHAR("gr", 2, "V", 1) +CHAR("hA", 2, "<=>", 3) +CHAR("ho", 2, ",", 1) +CHAR("hy", 2, "-", 1) +CHAR("if", 2, "oo", 2) +CHAR("lA", 2, "<=", 2) +CHAR("lB", 2, "[", 1) +CHAR("lC", 2, "{", 1) +CHAR("la", 2, "<", 1) +CHAR("lh", 2, "<=", 2) +CHAR("mo", 2, "E", 1) +CHAR("mu", 2, "x", 1) +CHAR("na", 2, "NaN", 3) +CHAR("nm", 2, "E", 1) +CHAR("no", 2, "~", 1) +CHAR("oA", 2, "A", 1) +CHAR("oa", 2, "a", 1) +CHAR("oe", 2, "oe", 2) +CHAR("oq", 2, "`", 1) +CHAR("pd", 2, "a", 1) +CHAR("pl", 2, "+", 1) +CHAR("ps", 2, "9|", 2) +CHAR("r!", 2, "i", 1) +CHAR("r?", 2, "c", 1) +CHAR("rA", 2, "=>", 2) +CHAR("rB", 2, "]", 1) +CHAR("rC", 2, "}", 1) +CHAR("ra", 2, ">", 1) +CHAR("rg", 2, "(R)", 3) +CHAR("rh", 2, "=>", 2) +CHAR("sc", 2, "S", 1) +CHAR("ss", 2, "ss", 2) +CHAR("st", 2, "-)", 2) +CHAR("te", 2, "3", 1) +CHAR("tf", 2, ".:.", 3) +CHAR("tm", 2, "tm", 2) +CHAR("ts", 2, "s", 1) +CHAR("uA", 2, "^", 1) +CHAR("ua", 2, "^", 1) +CHAR("|", 1, "", 0) +CHAR("~", 1, " ", 1) +CHAR("~=", 2, "~=", 2) +CHAR("~A", 2, "A", 1) +CHAR("~N", 2, "N", 1) +CHAR("~O", 2, "O", 1) +CHAR("~a", 2, "a", 1) +CHAR("~n", 2, "n", 1) +CHAR("~o", 2, "o", 1) +CHAR("~~", 2, "~~", 2) diff --git a/usr.bin/mandoc/term.c b/usr.bin/mandoc/term.c index 908040530b8..324be180478 100644 --- a/usr.bin/mandoc/term.c +++ b/usr.bin/mandoc/term.c @@ -1,4 +1,4 @@ -/* $Id: term.c,v 1.10 2009/08/09 21:28:57 schwarze Exp $ */ +/* $Id: term.c,v 1.11 2009/08/22 17:04:48 schwarze Exp $ */ /* * Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@kth.se> * @@ -31,13 +31,16 @@ extern int mdoc_run(struct termp *, static struct termp *term_alloc(enum termenc); static void term_free(struct termp *); -static void term_pescape(struct termp *, const char **); -static void term_nescape(struct termp *, + +static void do_escaped(struct termp *, const char **); +static void do_special(struct termp *, + const char *, size_t); +static void do_reserved(struct termp *, const char *, size_t); -static void term_chara(struct termp *, char); -static void term_encodea(struct termp *, char); -static int term_isopendelim(const char *); -static int term_isclosedelim(const char *); +static void buffer(struct termp *, char); +static void encode(struct termp *, char); +static int isopendelim(const char *); +static int isclosedelim(const char *); void * @@ -110,7 +113,7 @@ term_alloc(enum termenc enc) static int -term_isclosedelim(const char *p) +isclosedelim(const char *p) { if ( ! (*p && 0 == *(p + 1))) @@ -144,7 +147,7 @@ term_isclosedelim(const char *p) static int -term_isopendelim(const char *p) +isopendelim(const char *p) { if ( ! (*p && 0 == *(p + 1))) @@ -205,12 +208,6 @@ term_isopendelim(const char *p) * Otherwise, the line will break at the right margin. Extremely long * lines will cause the system to emit a warning (TODO: hyphenate, if * possible). - * - * FIXME: newline breaks occur (in groff) also occur when a single - * space follows a NOBREAK (try `Bl -tag') - * - * FIXME: there's a newline error where a `Bl -diag' will have a - * trailing newline if the line is exactly 73 chars long. */ void term_flushln(struct termp *p) @@ -385,13 +382,8 @@ term_vspace(struct termp *p) } -/* - * Determine the symbol indicated by an escape sequences, that is, one - * starting with a backslash. Once done, we pass this value into the - * output buffer by way of the symbol table. - */ static void -term_nescape(struct termp *p, const char *word, size_t len) +do_special(struct termp *p, const char *word, size_t len) { const char *rhs; size_t sz; @@ -399,9 +391,40 @@ term_nescape(struct termp *p, const char *word, size_t len) rhs = term_a2ascii(p->symtab, word, len, &sz); - if (rhs) - for (i = 0; i < (int)sz; i++) - term_encodea(p, rhs[i]); + if (NULL == rhs) { +#if 0 + fputs("Unknown special character: ", stderr); + for (i = 0; i < (int)len; i++) + fputc(word[i], stderr); + fputc('\n', stderr); +#endif + return; + } + for (i = 0; i < (int)sz; i++) + encode(p, rhs[i]); +} + + +static void +do_reserved(struct termp *p, const char *word, size_t len) +{ + const char *rhs; + size_t sz; + int i; + + rhs = term_a2res(p->symtab, word, len, &sz); + + if (NULL == rhs) { +#if 0 + fputs("Unknown reserved word: ", stderr); + for (i = 0; i < (int)len; i++) + fputc(word[i], stderr); + fputc('\n', stderr); +#endif + return; + } + for (i = 0; i < (int)sz; i++) + encode(p, rhs[i]); } @@ -411,12 +434,13 @@ term_nescape(struct termp *p, const char *word, size_t len) * the escape sequence (we assert upon badly-formed escape sequences). */ static void -term_pescape(struct termp *p, const char **word) +do_escaped(struct termp *p, const char **word) { - int j; + int j, type; const char *wp; wp = *word; + type = 1; if (0 == *(++wp)) { *word = wp; @@ -430,7 +454,7 @@ term_pescape(struct termp *p, const char **word) return; } - term_nescape(p, wp, 2); + do_special(p, wp, 2); *word = ++wp; return; @@ -448,13 +472,14 @@ term_pescape(struct termp *p, const char **word) return; } - term_nescape(p, wp, 2); + do_reserved(p, wp, 2); *word = ++wp; return; case ('['): + type = 0; break; default: - term_nescape(p, wp, 1); + do_reserved(p, wp, 1); *word = wp; return; } @@ -485,7 +510,7 @@ term_pescape(struct termp *p, const char **word) return; } else if ('[' != *wp) { - term_nescape(p, wp, 1); + do_special(p, wp, 1); *word = wp; return; } @@ -499,7 +524,10 @@ term_pescape(struct termp *p, const char **word) return; } - term_nescape(p, wp - j, (size_t)j); + if (type) + do_special(p, wp - j, (size_t)j); + else + do_reserved(p, wp - j, (size_t)j); *word = wp; } @@ -514,28 +542,23 @@ term_word(struct termp *p, const char *word) { const char *sv; - if (term_isclosedelim(word)) + if (isclosedelim(word)) if ( ! (TERMP_IGNDELIM & p->flags)) p->flags |= TERMP_NOSPACE; if ( ! (TERMP_NOSPACE & p->flags)) - term_chara(p, ' '); + buffer(p, ' '); if ( ! (p->flags & TERMP_NONOSPACE)) p->flags &= ~TERMP_NOSPACE; - /* - * If ANSI (word-length styling), then apply our style now, - * before the word. - */ - for (sv = word; *word; word++) if ('\\' != *word) - term_encodea(p, *word); + encode(p, *word); else - term_pescape(p, &word); + do_escaped(p, &word); - if (term_isopendelim(sv)) + if (isopendelim(sv)) p->flags |= TERMP_NOSPACE; } @@ -546,7 +569,7 @@ term_word(struct termp *p, const char *word) * size. */ static void -term_chara(struct termp *p, char c) +buffer(struct termp *p, char c) { size_t s; @@ -564,18 +587,18 @@ term_chara(struct termp *p, char c) static void -term_encodea(struct termp *p, char c) +encode(struct termp *p, char c) { if (' ' != c && TERMP_STYLE & p->flags) { if (TERMP_BOLD & p->flags) { - term_chara(p, c); - term_chara(p, 8); + buffer(p, c); + buffer(p, 8); } if (TERMP_UNDER & p->flags) { - term_chara(p, '_'); - term_chara(p, 8); + buffer(p, '_'); + buffer(p, 8); } } - term_chara(p, c); + buffer(p, c); } diff --git a/usr.bin/mandoc/term.h b/usr.bin/mandoc/term.h index 9812ed1b383..3b8d7c3947e 100644 --- a/usr.bin/mandoc/term.h +++ b/usr.bin/mandoc/term.h @@ -1,4 +1,4 @@ -/* $Id: term.h,v 1.7 2009/08/09 21:59:41 schwarze Exp $ */ +/* $Id: term.h,v 1.8 2009/08/22 17:04:48 schwarze Exp $ */ /* * Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@kth.se> * @@ -58,6 +58,7 @@ struct termp { void *term_ascii2htab(void); const char *term_a2ascii(void *, const char *, size_t, size_t *); +const char *term_a2res(void *, const char *, size_t, size_t *); void term_asciifree(void *); void term_newln(struct termp *); |