summaryrefslogtreecommitdiff
path: root/usr.bin/mandoc
diff options
context:
space:
mode:
authorIngo Schwarze <schwarze@cvs.openbsd.org>2009-08-22 17:04:49 +0000
committerIngo Schwarze <schwarze@cvs.openbsd.org>2009-08-22 17:04:49 +0000
commita67d998773e0f4014c8aa9b663487eea72b38450 (patch)
tree502f13eb4459248fa286af6c1ab54c79745cf17b /usr.bin/mandoc
parent60177f1cc0b7ef61d687cf6e6aae528fca66b7b8 (diff)
sync to 1.8.5: partial rewrite of the handling of escape sequences:
distinguish special characters and predefined strings, and add and fix several escape sequences
Diffstat (limited to 'usr.bin/mandoc')
-rw-r--r--usr.bin/mandoc/ascii.c51
-rw-r--r--usr.bin/mandoc/ascii.in479
-rw-r--r--usr.bin/mandoc/term.c119
-rw-r--r--usr.bin/mandoc/term.h3
4 files changed, 391 insertions, 261 deletions
diff --git a/usr.bin/mandoc/ascii.c b/usr.bin/mandoc/ascii.c
index 7cc89f6c66f..89816232a9d 100644
--- a/usr.bin/mandoc/ascii.c
+++ b/usr.bin/mandoc/ascii.c
@@ -1,4 +1,4 @@
-/* $Id: ascii.c,v 1.2 2009/06/14 23:00:57 schwarze Exp $ */
+/* $Id: ascii.c,v 1.3 2009/08/22 17:04:48 schwarze Exp $ */
/*
* Copyright (c) 2009 Kristaps Dzonsons <kristaps@kth.se>
*
@@ -27,9 +27,12 @@
struct line {
const char *code;
const char *out;
- /* 32- and 64-bit alignment safe. */
size_t codesz;
size_t outsz;
+ int type;
+#define ASCII_CHAR (1 << 0)
+#define ASCII_STRING (1 << 1)
+#define ASCII_BOTH (0x03)
};
struct linep {
@@ -37,8 +40,12 @@ struct linep {
struct linep *next;
};
-#define LINE(w, x, y, z) \
- { (w), (y), (x), (z) },
+#define CHAR(w, x, y, z) \
+ { (w), (y), (x), (z), ASCII_CHAR },
+#define STRING(w, x, y, z) \
+ { (w), (y), (x), (z), ASCII_STRING },
+#define BOTH(w, x, y, z) \
+ { (w), (y), (x), (z), ASCII_BOTH },
static const struct line lines[] = {
#include "ascii.in"
};
@@ -50,7 +57,9 @@ struct asciitab {
static inline int match(const struct line *,
- const char *, size_t);
+ const char *, size_t, int);
+static const char * lookup(struct asciitab *, const char *,
+ size_t, size_t *, int);
void
@@ -125,14 +134,29 @@ term_ascii2htab(void)
const char *
term_a2ascii(void *arg, const char *p, size_t sz, size_t *rsz)
{
- struct asciitab *tab;
+
+ return(lookup((struct asciitab *)arg, p,
+ sz, rsz, ASCII_CHAR));
+}
+
+
+const char *
+term_a2res(void *arg, const char *p, size_t sz, size_t *rsz)
+{
+
+ return(lookup((struct asciitab *)arg, p,
+ sz, rsz, ASCII_STRING));
+}
+
+
+static const char *
+lookup(struct asciitab *tab, const char *p,
+ size_t sz, size_t *rsz, int type)
+{
struct linep *pp, *prev;
void **htab;
int hash;
- tab = (struct asciitab *)arg;
- htab = tab->htab;
-
assert(p);
assert(sz > 0);
@@ -147,19 +171,20 @@ term_a2ascii(void *arg, const char *p, size_t sz, size_t *rsz)
*/
hash = (int)p[0] - ASCII_PRINT_LO;
+ htab = tab->htab;
if (NULL == (pp = ((struct linep **)htab)[hash]))
return(NULL);
if (NULL == pp->next) {
- if ( ! match(pp->line, p, sz))
+ if ( ! match(pp->line, p, sz, type))
return(NULL);
*rsz = pp->line->outsz;
return(pp->line->out);
}
for (prev = NULL; pp; pp = pp->next) {
- if ( ! match(pp->line, p, sz)) {
+ if ( ! match(pp->line, p, sz, type)) {
prev = pp;
continue;
}
@@ -181,9 +206,11 @@ term_a2ascii(void *arg, const char *p, size_t sz, size_t *rsz)
static inline int
-match(const struct line *line, const char *p, size_t sz)
+match(const struct line *line, const char *p, size_t sz, int type)
{
+ if ( ! (line->type & type))
+ return(0);
if (line->codesz != sz)
return(0);
return(0 == strncmp(line->code, p, sz));
diff --git a/usr.bin/mandoc/ascii.in b/usr.bin/mandoc/ascii.in
index 40f190da2e0..56f72d83bf3 100644
--- a/usr.bin/mandoc/ascii.in
+++ b/usr.bin/mandoc/ascii.in
@@ -1,4 +1,4 @@
-/* $Id: ascii.in,v 1.4 2009/07/13 00:18:12 schwarze Exp $ */
+/* $Id: ascii.in,v 1.5 2009/08/22 17:04:48 schwarze Exp $ */
/*
* Copyright (c) 2009 Kristaps Dzonsons <kristaps@kth.se>
*
@@ -16,205 +16,284 @@
*/
/*
- * The ASCII translation table. The left-hand side corresponds to the
- * escape sequence (\x, \(xx and so on) whose length is listed second
- * element. The right-hand side is what's produced by the front-end,
- * with the fourth element being its length.
+ * The ASCII translation tables. STRING corresponds to predefined
+ * strings (cf. mdoc_samples.7 and tmac/mdoc/doc-nroff). CHAR
+ * corresponds to special characters (cf. groff_char.7). BOTH contains
+ * sequences that are equivalent in both STRING and CHAR.
*
- * Be sure to escape strings.
+ * Either way, the left-hand side corresponds to the input sequence (\x,
+ * \(xx, \*(xx and so on) whose length is listed second element. The
+ * right-hand side is what's produced by the front-end, with the fourth
+ * element being its length.
+ *
+ * Be sure to C-escape strings!
*/
-LINE("\\", 1, "\\", 1)
-LINE("\'", 1, "\'", 1)
-LINE("`", 1, "`", 1)
-LINE("%", 1, "%", 1)
-LINE("-", 1, "-", 1)
-LINE(" ", 1, " ", 1)
-LINE("~", 1, " ", 1)
-LINE("^", 1, "", 0)
-LINE("0", 1, " ", 1)
-LINE(".", 1, ".", 1)
-LINE("&", 1, "", 0)
-LINE("e", 1, "\\", 1)
-LINE("q", 1, "\"", 1)
-LINE("|", 1, "", 0)
-LINE("rC", 2, "}", 1)
-LINE("lC", 2, "{", 1)
-LINE("rB", 2, "]", 1)
-LINE("lB", 2, "[", 1)
-LINE("ra", 2, ">", 1)
-LINE("la", 2, "<", 1)
-LINE("Lq", 2, "``", 2)
-LINE("lq", 2, "``", 2)
-LINE("Rq", 2, "\'\'", 2)
-LINE("rq", 2, "\'\'", 2)
-LINE("oq", 2, "`", 1)
-LINE("aq", 2, "\'", 1)
-LINE("Bq", 2, ",,", 2)
-LINE("bq", 2, ",,", 2)
-LINE("<-", 2, "<-", 2)
-LINE("->", 2, "->", 2)
-LINE("<>", 2, "<>", 2)
-LINE("ua", 2, "^", 1)
-LINE("da", 2, "v", 1)
-LINE("bu", 2, "o", 1)
-LINE("ci", 2, "O", 1)
-LINE("Ba", 2, "|", 1)
-LINE("ba", 2, "|", 1)
-LINE("bb", 2, "|", 1)
-LINE("co", 2, "(C)", 3)
-LINE("rg", 2, "(R)", 3)
-LINE("tm", 2, "tm", 2)
-LINE("Am", 2, "&", 1)
-LINE("Le", 2, "<=", 2)
-LINE("<=", 2, "<=", 2)
-LINE("Ge", 2, ">=", 2)
-LINE(">=", 2, ">=", 2)
-LINE("==", 2, "==", 2)
-LINE("Ne", 2, "!=", 2)
-LINE("!=", 2, "!=", 2)
-LINE("Pm", 2, "+-", 2)
-LINE("+-", 2, "+-", 2)
-LINE("If", 2, "infinity", 8)
-LINE("if", 2, "oo", 2)
-LINE("Na", 2, "NaN", 3)
-LINE("na", 2, "NaN", 3)
-LINE("**", 2, "*", 1)
-LINE("Gt", 2, ">", 1)
-LINE("Lt", 2, "<", 1)
-LINE("aa", 2, "\'", 1)
-LINE("a~", 2, "~", 1)
-LINE("ga", 2, "`", 1)
-LINE("en", 2, "-", 1)
-LINE("em", 2, "--", 2)
-LINE("hy", 2, "-", 1)
-LINE("Pi", 2, "pi", 2)
-LINE("Fo", 2, "<<", 2)
-LINE("Fc", 2, ">>", 2)
-LINE("fo", 2, "<", 1)
-LINE("fc", 2, ">", 1)
-LINE("lh", 2, "<=", 2)
-LINE("rh", 2, "=>", 2)
-LINE("ae", 2, "ae", 2)
-LINE("AE", 2, "AE", 2)
-LINE("oe", 2, "oe", 2)
-LINE("OE", 2, "OE", 2)
-LINE("ss", 2, "ss", 2)
-LINE("\'A", 2, "A", 1)
-LINE("\'E", 2, "E", 1)
-LINE("\'I", 2, "I", 1)
-LINE("\'O", 2, "O", 1)
-LINE("\'U", 2, "U", 1)
-LINE("\'a", 2, "a", 1)
-LINE("\'e", 2, "e", 1)
-LINE("\'i", 2, "i", 1)
-LINE("\'o", 2, "o", 1)
-LINE("\'u", 2, "u", 1)
-LINE("`A", 2, "A", 1)
-LINE("`E", 2, "E", 1)
-LINE("`I", 2, "I", 1)
-LINE("`O", 2, "O", 1)
-LINE("`U", 2, "U", 1)
-LINE("`a", 2, "a", 1)
-LINE("`e", 2, "e", 1)
-LINE("`i", 2, "i", 1)
-LINE("`o", 2, "o", 1)
-LINE("`u", 2, "u", 1)
-LINE("~A", 2, "A", 1)
-LINE("~N", 2, "N", 1)
-LINE("~O", 2, "O", 1)
-LINE("~a", 2, "a", 1)
-LINE("~n", 2, "n", 1)
-LINE("~o", 2, "o", 1)
-LINE("lA", 2, "<=", 2)
-LINE("rA", 2, "=>", 2)
-LINE("uA", 2, "^", 1)
-LINE("dA", 2, "v", 1)
-LINE("hA", 2, "<=>", 3)
-LINE(":A", 2, "A", 1)
-LINE(":E", 2, "E", 1)
-LINE(":I", 2, "I", 1)
-LINE(":O", 2, "O", 1)
-LINE(":U", 2, "U", 1)
-LINE(":a", 2, "a", 1)
-LINE(":e", 2, "e", 1)
-LINE(":i", 2, "i", 1)
-LINE(":o", 2, "o", 1)
-LINE(":u", 2, "u", 1)
-LINE(":y", 2, "y", 1)
-LINE("^A", 2, "A", 1)
-LINE("^E", 2, "E", 1)
-LINE("^I", 2, "I", 1)
-LINE("^O", 2, "O", 1)
-LINE("^U", 2, "U", 1)
-LINE("^a", 2, "a", 1)
-LINE("^e", 2, "e", 1)
-LINE("^i", 2, "i", 1)
-LINE("^o", 2, "o", 1)
-LINE("^u", 2, "u", 1)
-LINE("-D", 2, "D", 1)
-LINE("Sd", 2, "o", 1)
-LINE("TP", 2, "b", 1)
-LINE("Tp", 2, "b", 1)
-LINE(",C", 2, "C", 1)
-LINE(",c", 2, "c", 1)
-LINE("/L", 2, "L", 1)
-LINE("/l", 2, "l", 1)
-LINE("/O", 2, "O", 1)
-LINE("/o", 2, "o", 1)
-LINE("oA", 2, "A", 1)
-LINE("oa", 2, "a", 1)
-LINE("a^", 2, "^", 1)
-LINE("ac", 2, ",", 1)
-LINE("ad", 2, "\"", 1)
-LINE("ah", 2, "v", 1)
-LINE("ao", 2, "o", 1)
-LINE("ho", 2, ",", 1)
-LINE("ab", 2, "`", 1)
-LINE("a\"", 2, "\"", 1)
-LINE("a-", 2, "-", 1)
-LINE("Cs", 2, "x", 1)
-LINE("Do", 2, "$", 1)
-LINE("Po", 2, "L", 1)
-LINE("Ye", 2, "Y", 1)
-LINE("Fn", 2, "f", 1)
-LINE("ct", 2, "c", 1)
-LINE("ff", 2, "ff", 2)
-LINE("fi", 2, "fi", 2)
-LINE("fl", 2, "fl", 2)
-LINE("Fi", 2, "ffi", 3)
-LINE("Fl", 2, "ffl", 3)
-LINE("r!", 2, "i", 1)
-LINE("r?", 2, "c", 1)
-LINE("dd", 2, "=", 1)
-LINE("dg", 2, "-", 1)
-LINE("ps", 2, "9|", 2)
-LINE("sc", 2, "S", 1)
-LINE("de", 2, "o", 1)
-LINE("tf", 2, ".:.", 3)
-LINE("~~", 2, "~~", 2)
-LINE("~=", 2, "~=", 2)
-LINE("=~", 2, "=~", 2)
-LINE("AN", 2, "^", 1)
-LINE("OR", 2, "v", 1)
-LINE("no", 2, "~", 1)
-LINE("fa", 2, "V", 1)
-LINE("te", 2, "3", 1)
-LINE("Ah", 2, "N", 1)
-LINE("Im", 2, "I", 1)
-LINE("Re", 2, "R", 1)
-LINE("mo", 2, "E", 1)
-LINE("nm", 2, "E", 1)
-LINE("eq", 2, "=", 1)
-LINE("pl", 2, "+", 1)
-LINE("di", 2, "-:-", 3)
-LINE("mu", 2, "x", 1)
-LINE("(=", 2, "(=", 2)
-LINE("=)", 2, "=)", 2)
-LINE("ap", 2, "~", 1)
-LINE("pd", 2, "a", 1)
-LINE("gr", 2, "V", 1)
-LINE("ca", 2, "(^)", 3)
-LINE("cu", 2, "U", 1)
-LINE("es", 2, "{}", 2)
-LINE("st", 2, "-)", 2)
+STRING("Am", 2, "&", 1)
+STRING("Ba", 2, "|", 1)
+STRING("Ge", 2, ">=", 2)
+STRING("Gt", 2, ">", 1)
+STRING("If", 2, "infinity", 8)
+STRING("Le", 2, "<=", 2)
+STRING("Lq", 2, "``", 2)
+STRING("Lt", 2, "<", 1)
+STRING("Na", 2, "NaN", 3)
+STRING("Ne", 2, "!=", 2)
+STRING("Pi", 2, "pi", 2)
+STRING("Pm", 2, "+-", 2)
+STRING("R", 1, "(R)", 3)
+STRING("Rq", 2, "\'\'", 2)
+STRING("Tm", 2, "tm", 2)
+STRING("left-bracket", 12, "[", 1)
+STRING("left-parenthesis", 16, "(", 1)
+STRING("left-singlequote", 16, "`", 1)
+STRING("lp", 2, "(", 1)
+STRING("q", 1, "\"", 1)
+STRING("quote-left", 10, "`", 1)
+STRING("quote-right", 11, "\'", 1)
+STRING("right-bracket", 13, "]", 1)
+STRING("right-parenthesis", 17, ")", 1)
+STRING("right-singlequote", 17, "\'", 1)
+STRING("rp", 2, ")", 1)
+
+BOTH("<=", 2, "<=", 2)
+BOTH(">=", 2, ">=", 2)
+BOTH("aa", 2, "\'", 1)
+BOTH("ga", 2, "`", 1)
+BOTH("lq", 2, "``", 2)
+BOTH("rq", 2, "\'\'", 2)
+BOTH("ua", 2, "^", 1)
+
+CHAR(" ", 1, " ", 1)
+CHAR("!=", 2, "!=", 2)
+CHAR("%", 1, "", 0)
+CHAR("&", 1, "", 0)
+CHAR("(=", 2, "(=", 2)
+CHAR("**", 2, "*", 1)
+CHAR("*A", 2, "A", 1)
+CHAR("*B", 2, "B", 1)
+CHAR("*C", 2, "H", 1)
+CHAR("*D", 2, "/\\", 2)
+CHAR("*E", 2, "E", 1)
+CHAR("*F", 2, "O_", 1)
+CHAR("*G", 2, "|", 1)
+CHAR("*H", 2, "O", 1)
+CHAR("*I", 2, "I", 1)
+CHAR("*K", 2, "K", 1)
+CHAR("*L", 2, "/\\", 2)
+CHAR("*M", 2, "M", 1)
+CHAR("*N", 2, "N", 1)
+CHAR("*O", 2, "O", 1)
+CHAR("*P", 2, "TT", 2)
+CHAR("*Q", 2, "Y", 1)
+CHAR("*R", 2, "P", 1)
+CHAR("*S", 2, ">", 1)
+CHAR("*T", 2, "T", 1)
+CHAR("*U", 2, "Y", 1)
+CHAR("*W", 2, "O", 1)
+CHAR("*X", 2, "X", 1)
+CHAR("*Y", 2, "H", 1)
+CHAR("*Z", 2, "Z", 1)
+CHAR("*a", 2, "a", 1)
+CHAR("*b", 2, "B", 1)
+CHAR("*c", 2, "E", 1)
+CHAR("*d", 2, "d", 1)
+CHAR("*e", 2, "e", 1)
+CHAR("*f", 2, "o", 1)
+CHAR("*g", 2, "y", 1)
+CHAR("*h", 2, "0", 1)
+CHAR("*i", 2, "i", 1)
+CHAR("*k", 2, "k", 1)
+CHAR("*l", 2, "\\", 1)
+CHAR("*m", 2, "u", 1)
+CHAR("*n", 2, "v", 1)
+CHAR("*o", 2, "o", 1)
+CHAR("*p", 2, "n", 1)
+CHAR("*q", 2, "u", 1)
+CHAR("*r", 2, "p", 1)
+CHAR("*s", 2, "o", 1)
+CHAR("*t", 2, "t", 1)
+CHAR("*u", 2, "u", 1)
+CHAR("*w", 2, "w", 1)
+CHAR("*x", 2, "x", 1)
+CHAR("*y", 2, "n", 1)
+CHAR("*z", 2, "C", 1)
+CHAR("+-", 2, "+-", 2)
+CHAR("+f", 2, "o", 1)
+CHAR("+h", 2, "0", 1)
+CHAR("+p", 2, "w", 1)
+CHAR(",C", 2, "C", 1)
+CHAR(",c", 2, "c", 1)
+CHAR("-", 1, "-", 1)
+CHAR("->", 2, "->", 2)
+CHAR("-D", 2, "D", 1)
+CHAR(".", 1, ".", 1)
+CHAR("/L", 2, "L", 1)
+CHAR("/O", 2, "O", 1)
+CHAR("/l", 2, "l", 1)
+CHAR("/o", 2, "o", 1)
+CHAR("0", 1, " ", 1)
+CHAR(":A", 2, "A", 1)
+CHAR(":E", 2, "E", 1)
+CHAR(":I", 2, "I", 1)
+CHAR(":O", 2, "O", 1)
+CHAR(":U", 2, "U", 1)
+CHAR(":a", 2, "a", 1)
+CHAR(":e", 2, "e", 1)
+CHAR(":i", 2, "i", 1)
+CHAR(":o", 2, "o", 1)
+CHAR(":u", 2, "u", 1)
+CHAR(":y", 2, "y", 1)
+CHAR("<-", 2, "<-", 2)
+CHAR("<=", 2, "<=", 2)
+CHAR("<>", 2, "<>", 2)
+CHAR("=)", 2, "=)", 2)
+CHAR("==", 2, "==", 2)
+CHAR("=~", 2, "=~", 2)
+CHAR(">=", 2, ">=", 2)
+CHAR("AE", 2, "AE", 2)
+CHAR("AN", 2, "^", 1)
+CHAR("Ah", 2, "N", 1)
+CHAR("Bq", 2, ",,", 2)
+CHAR("Cs", 2, "x", 1)
+CHAR("Do", 2, "$", 1)
+CHAR("Eu", 2, "EUR", 3)
+CHAR("Fc", 2, ">>", 2)
+CHAR("Fi", 2, "ffi", 3)
+CHAR("Fl", 2, "ffl", 3)
+CHAR("Fn", 2, "f", 1)
+CHAR("Fo", 2, "<<", 2)
+CHAR("Im", 2, "I", 1)
+CHAR("OE", 2, "OE", 2)
+CHAR("OR", 2, "v", 1)
+CHAR("Po", 2, "L", 1)
+CHAR("Re", 2, "R", 1)
+CHAR("Sd", 2, "o", 1)
+CHAR("TP", 2, "b", 1)
+CHAR("Tp", 2, "b", 1)
+CHAR("Ye", 2, "Y", 1)
+CHAR("\'", 1, "\'", 1)
+CHAR("\'A", 2, "A", 1)
+CHAR("\'E", 2, "E", 1)
+CHAR("\'I", 2, "I", 1)
+CHAR("\'O", 2, "O", 1)
+CHAR("\'U", 2, "U", 1)
+CHAR("\'a", 2, "a", 1)
+CHAR("\'e", 2, "e", 1)
+CHAR("\'i", 2, "i", 1)
+CHAR("\'o", 2, "o", 1)
+CHAR("\'u", 2, "u", 1)
+CHAR("\\", 1, "\\", 1)
+CHAR("^", 1, "", 0)
+CHAR("^A", 2, "A", 1)
+CHAR("^E", 2, "E", 1)
+CHAR("^I", 2, "I", 1)
+CHAR("^O", 2, "O", 1)
+CHAR("^U", 2, "U", 1)
+CHAR("^a", 2, "a", 1)
+CHAR("^e", 2, "e", 1)
+CHAR("^i", 2, "i", 1)
+CHAR("^o", 2, "o", 1)
+CHAR("^u", 2, "u", 1)
+CHAR("`", 1, "`", 1)
+CHAR("`A", 2, "A", 1)
+CHAR("`E", 2, "E", 1)
+CHAR("`I", 2, "I", 1)
+CHAR("`O", 2, "O", 1)
+CHAR("`U", 2, "U", 1)
+CHAR("`a", 2, "a", 1)
+CHAR("`e", 2, "e", 1)
+CHAR("`i", 2, "i", 1)
+CHAR("`o", 2, "o", 1)
+CHAR("`u", 2, "u", 1)
+CHAR("a-", 2, "-", 1)
+CHAR("a\"", 2, "\"", 1)
+CHAR("a^", 2, "^", 1)
+CHAR("aa", 2, "\'", 1)
+CHAR("ab", 2, "`", 1)
+CHAR("ac", 2, ",", 1)
+CHAR("ad", 2, "\"", 1)
+CHAR("ae", 2, "ae", 2)
+CHAR("ah", 2, "v", 1)
+CHAR("ao", 2, "o", 1)
+CHAR("ap", 2, "~", 1)
+CHAR("aq", 2, "\'", 1)
+CHAR("a~", 2, "~", 1)
+CHAR("ba", 2, "|", 1)
+CHAR("bb", 2, "|", 1)
+CHAR("bq", 2, ",", 1)
+CHAR("bu", 2, "o", 1)
+CHAR("c", 1, "", 0)
+CHAR("ca", 2, "(^)", 3)
+CHAR("ci", 2, "O", 1)
+CHAR("co", 2, "(C)", 3)
+CHAR("ct", 2, "c", 1)
+CHAR("cu", 2, "U", 1)
+CHAR("dA", 2, "v", 1)
+CHAR("da", 2, "v", 1)
+CHAR("dd", 2, "=", 1)
+CHAR("de", 2, "o", 1)
+CHAR("dg", 2, "-", 1)
+CHAR("di", 2, "-:-", 3)
+CHAR("e", 1, "\\", 1)
+CHAR("em", 2, "--", 2)
+CHAR("en", 2, "-", 1)
+CHAR("eq", 2, "=", 1)
+CHAR("es", 2, "{}", 2)
+CHAR("eu", 2, "EUR", 3)
+CHAR("fa", 2, "V", 1)
+CHAR("fc", 2, ">", 1)
+CHAR("ff", 2, "ff", 2)
+CHAR("fi", 2, "fi", 2)
+CHAR("fl", 2, "fl", 2)
+CHAR("fo", 2, "<", 1)
+CHAR("ga", 2, "`", 1)
+CHAR("gr", 2, "V", 1)
+CHAR("hA", 2, "<=>", 3)
+CHAR("ho", 2, ",", 1)
+CHAR("hy", 2, "-", 1)
+CHAR("if", 2, "oo", 2)
+CHAR("lA", 2, "<=", 2)
+CHAR("lB", 2, "[", 1)
+CHAR("lC", 2, "{", 1)
+CHAR("la", 2, "<", 1)
+CHAR("lh", 2, "<=", 2)
+CHAR("mo", 2, "E", 1)
+CHAR("mu", 2, "x", 1)
+CHAR("na", 2, "NaN", 3)
+CHAR("nm", 2, "E", 1)
+CHAR("no", 2, "~", 1)
+CHAR("oA", 2, "A", 1)
+CHAR("oa", 2, "a", 1)
+CHAR("oe", 2, "oe", 2)
+CHAR("oq", 2, "`", 1)
+CHAR("pd", 2, "a", 1)
+CHAR("pl", 2, "+", 1)
+CHAR("ps", 2, "9|", 2)
+CHAR("r!", 2, "i", 1)
+CHAR("r?", 2, "c", 1)
+CHAR("rA", 2, "=>", 2)
+CHAR("rB", 2, "]", 1)
+CHAR("rC", 2, "}", 1)
+CHAR("ra", 2, ">", 1)
+CHAR("rg", 2, "(R)", 3)
+CHAR("rh", 2, "=>", 2)
+CHAR("sc", 2, "S", 1)
+CHAR("ss", 2, "ss", 2)
+CHAR("st", 2, "-)", 2)
+CHAR("te", 2, "3", 1)
+CHAR("tf", 2, ".:.", 3)
+CHAR("tm", 2, "tm", 2)
+CHAR("ts", 2, "s", 1)
+CHAR("uA", 2, "^", 1)
+CHAR("ua", 2, "^", 1)
+CHAR("|", 1, "", 0)
+CHAR("~", 1, " ", 1)
+CHAR("~=", 2, "~=", 2)
+CHAR("~A", 2, "A", 1)
+CHAR("~N", 2, "N", 1)
+CHAR("~O", 2, "O", 1)
+CHAR("~a", 2, "a", 1)
+CHAR("~n", 2, "n", 1)
+CHAR("~o", 2, "o", 1)
+CHAR("~~", 2, "~~", 2)
diff --git a/usr.bin/mandoc/term.c b/usr.bin/mandoc/term.c
index 908040530b8..324be180478 100644
--- a/usr.bin/mandoc/term.c
+++ b/usr.bin/mandoc/term.c
@@ -1,4 +1,4 @@
-/* $Id: term.c,v 1.10 2009/08/09 21:28:57 schwarze Exp $ */
+/* $Id: term.c,v 1.11 2009/08/22 17:04:48 schwarze Exp $ */
/*
* Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@kth.se>
*
@@ -31,13 +31,16 @@ extern int mdoc_run(struct termp *,
static struct termp *term_alloc(enum termenc);
static void term_free(struct termp *);
-static void term_pescape(struct termp *, const char **);
-static void term_nescape(struct termp *,
+
+static void do_escaped(struct termp *, const char **);
+static void do_special(struct termp *,
+ const char *, size_t);
+static void do_reserved(struct termp *,
const char *, size_t);
-static void term_chara(struct termp *, char);
-static void term_encodea(struct termp *, char);
-static int term_isopendelim(const char *);
-static int term_isclosedelim(const char *);
+static void buffer(struct termp *, char);
+static void encode(struct termp *, char);
+static int isopendelim(const char *);
+static int isclosedelim(const char *);
void *
@@ -110,7 +113,7 @@ term_alloc(enum termenc enc)
static int
-term_isclosedelim(const char *p)
+isclosedelim(const char *p)
{
if ( ! (*p && 0 == *(p + 1)))
@@ -144,7 +147,7 @@ term_isclosedelim(const char *p)
static int
-term_isopendelim(const char *p)
+isopendelim(const char *p)
{
if ( ! (*p && 0 == *(p + 1)))
@@ -205,12 +208,6 @@ term_isopendelim(const char *p)
* Otherwise, the line will break at the right margin. Extremely long
* lines will cause the system to emit a warning (TODO: hyphenate, if
* possible).
- *
- * FIXME: newline breaks occur (in groff) also occur when a single
- * space follows a NOBREAK (try `Bl -tag')
- *
- * FIXME: there's a newline error where a `Bl -diag' will have a
- * trailing newline if the line is exactly 73 chars long.
*/
void
term_flushln(struct termp *p)
@@ -385,13 +382,8 @@ term_vspace(struct termp *p)
}
-/*
- * Determine the symbol indicated by an escape sequences, that is, one
- * starting with a backslash. Once done, we pass this value into the
- * output buffer by way of the symbol table.
- */
static void
-term_nescape(struct termp *p, const char *word, size_t len)
+do_special(struct termp *p, const char *word, size_t len)
{
const char *rhs;
size_t sz;
@@ -399,9 +391,40 @@ term_nescape(struct termp *p, const char *word, size_t len)
rhs = term_a2ascii(p->symtab, word, len, &sz);
- if (rhs)
- for (i = 0; i < (int)sz; i++)
- term_encodea(p, rhs[i]);
+ if (NULL == rhs) {
+#if 0
+ fputs("Unknown special character: ", stderr);
+ for (i = 0; i < (int)len; i++)
+ fputc(word[i], stderr);
+ fputc('\n', stderr);
+#endif
+ return;
+ }
+ for (i = 0; i < (int)sz; i++)
+ encode(p, rhs[i]);
+}
+
+
+static void
+do_reserved(struct termp *p, const char *word, size_t len)
+{
+ const char *rhs;
+ size_t sz;
+ int i;
+
+ rhs = term_a2res(p->symtab, word, len, &sz);
+
+ if (NULL == rhs) {
+#if 0
+ fputs("Unknown reserved word: ", stderr);
+ for (i = 0; i < (int)len; i++)
+ fputc(word[i], stderr);
+ fputc('\n', stderr);
+#endif
+ return;
+ }
+ for (i = 0; i < (int)sz; i++)
+ encode(p, rhs[i]);
}
@@ -411,12 +434,13 @@ term_nescape(struct termp *p, const char *word, size_t len)
* the escape sequence (we assert upon badly-formed escape sequences).
*/
static void
-term_pescape(struct termp *p, const char **word)
+do_escaped(struct termp *p, const char **word)
{
- int j;
+ int j, type;
const char *wp;
wp = *word;
+ type = 1;
if (0 == *(++wp)) {
*word = wp;
@@ -430,7 +454,7 @@ term_pescape(struct termp *p, const char **word)
return;
}
- term_nescape(p, wp, 2);
+ do_special(p, wp, 2);
*word = ++wp;
return;
@@ -448,13 +472,14 @@ term_pescape(struct termp *p, const char **word)
return;
}
- term_nescape(p, wp, 2);
+ do_reserved(p, wp, 2);
*word = ++wp;
return;
case ('['):
+ type = 0;
break;
default:
- term_nescape(p, wp, 1);
+ do_reserved(p, wp, 1);
*word = wp;
return;
}
@@ -485,7 +510,7 @@ term_pescape(struct termp *p, const char **word)
return;
} else if ('[' != *wp) {
- term_nescape(p, wp, 1);
+ do_special(p, wp, 1);
*word = wp;
return;
}
@@ -499,7 +524,10 @@ term_pescape(struct termp *p, const char **word)
return;
}
- term_nescape(p, wp - j, (size_t)j);
+ if (type)
+ do_special(p, wp - j, (size_t)j);
+ else
+ do_reserved(p, wp - j, (size_t)j);
*word = wp;
}
@@ -514,28 +542,23 @@ term_word(struct termp *p, const char *word)
{
const char *sv;
- if (term_isclosedelim(word))
+ if (isclosedelim(word))
if ( ! (TERMP_IGNDELIM & p->flags))
p->flags |= TERMP_NOSPACE;
if ( ! (TERMP_NOSPACE & p->flags))
- term_chara(p, ' ');
+ buffer(p, ' ');
if ( ! (p->flags & TERMP_NONOSPACE))
p->flags &= ~TERMP_NOSPACE;
- /*
- * If ANSI (word-length styling), then apply our style now,
- * before the word.
- */
-
for (sv = word; *word; word++)
if ('\\' != *word)
- term_encodea(p, *word);
+ encode(p, *word);
else
- term_pescape(p, &word);
+ do_escaped(p, &word);
- if (term_isopendelim(sv))
+ if (isopendelim(sv))
p->flags |= TERMP_NOSPACE;
}
@@ -546,7 +569,7 @@ term_word(struct termp *p, const char *word)
* size.
*/
static void
-term_chara(struct termp *p, char c)
+buffer(struct termp *p, char c)
{
size_t s;
@@ -564,18 +587,18 @@ term_chara(struct termp *p, char c)
static void
-term_encodea(struct termp *p, char c)
+encode(struct termp *p, char c)
{
if (' ' != c && TERMP_STYLE & p->flags) {
if (TERMP_BOLD & p->flags) {
- term_chara(p, c);
- term_chara(p, 8);
+ buffer(p, c);
+ buffer(p, 8);
}
if (TERMP_UNDER & p->flags) {
- term_chara(p, '_');
- term_chara(p, 8);
+ buffer(p, '_');
+ buffer(p, 8);
}
}
- term_chara(p, c);
+ buffer(p, c);
}
diff --git a/usr.bin/mandoc/term.h b/usr.bin/mandoc/term.h
index 9812ed1b383..3b8d7c3947e 100644
--- a/usr.bin/mandoc/term.h
+++ b/usr.bin/mandoc/term.h
@@ -1,4 +1,4 @@
-/* $Id: term.h,v 1.7 2009/08/09 21:59:41 schwarze Exp $ */
+/* $Id: term.h,v 1.8 2009/08/22 17:04:48 schwarze Exp $ */
/*
* Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@kth.se>
*
@@ -58,6 +58,7 @@ struct termp {
void *term_ascii2htab(void);
const char *term_a2ascii(void *, const char *, size_t, size_t *);
+const char *term_a2res(void *, const char *, size_t, size_t *);
void term_asciifree(void *);
void term_newln(struct termp *);