summaryrefslogtreecommitdiff
path: root/usr.bin
diff options
context:
space:
mode:
authorIngo Schwarze <schwarze@cvs.openbsd.org>2011-05-29 21:22:19 +0000
committerIngo Schwarze <schwarze@cvs.openbsd.org>2011-05-29 21:22:19 +0000
commitd49b1d8e996d3b7d5b11ff7f6fec1308da0f4d19 (patch)
tree1b65c2913c52e78683a870fa30aacb6d0da621ec /usr.bin
parent34e3b2211040149f713e27fa1d0e45aa08dcaa93 (diff)
Merge release 1.11.3, almost all code by kristaps@:
* Unicode output support (no Unicode input yet, though). * Refactoring: completely handle predefined strings in roff.c. - New function mandoc_escape() replaces a2roffdeco() and mandoc_special(). - Start using mandoc_getarg() in mdoc_argv.c. - Clean up parsing of delimiters in mdoc(7). * And many minor fixes and lots of cleanup.
Diffstat (limited to 'usr.bin')
-rw-r--r--usr.bin/mandoc/Makefile4
-rw-r--r--usr.bin/mandoc/chars.c129
-rw-r--r--usr.bin/mandoc/chars.in77
-rw-r--r--usr.bin/mandoc/html.c341
-rw-r--r--usr.bin/mandoc/html.h18
-rw-r--r--usr.bin/mandoc/libmandoc.h4
-rw-r--r--usr.bin/mandoc/libmdoc.h30
-rw-r--r--usr.bin/mandoc/main.c23
-rw-r--r--usr.bin/mandoc/main.h6
-rw-r--r--usr.bin/mandoc/man_html.c34
-rw-r--r--usr.bin/mandoc/man_term.c11
-rw-r--r--usr.bin/mandoc/man_validate.c56
-rw-r--r--usr.bin/mandoc/mandoc.146
-rw-r--r--usr.bin/mandoc/mandoc.c485
-rw-r--r--usr.bin/mandoc/mandoc.h27
-rw-r--r--usr.bin/mandoc/mdoc_argv.c409
-rw-r--r--usr.bin/mandoc/mdoc_html.c68
-rw-r--r--usr.bin/mandoc/mdoc_macro.c16
-rw-r--r--usr.bin/mandoc/mdoc_term.c11
-rw-r--r--usr.bin/mandoc/mdoc_validate.c36
-rw-r--r--usr.bin/mandoc/out.c239
-rw-r--r--usr.bin/mandoc/out.h32
-rw-r--r--usr.bin/mandoc/predefs.in65
-rw-r--r--usr.bin/mandoc/read.c6
-rw-r--r--usr.bin/mandoc/roff.c124
-rw-r--r--usr.bin/mandoc/tbl_layout.c29
-rw-r--r--usr.bin/mandoc/term.c315
-rw-r--r--usr.bin/mandoc/term.h52
-rw-r--r--usr.bin/mandoc/term_ascii.c112
-rw-r--r--usr.bin/mandoc/term_ps.c382
30 files changed, 1597 insertions, 1590 deletions
diff --git a/usr.bin/mandoc/Makefile b/usr.bin/mandoc/Makefile
index 8e4388b2513..d507dc9af42 100644
--- a/usr.bin/mandoc/Makefile
+++ b/usr.bin/mandoc/Makefile
@@ -1,8 +1,8 @@
-# $OpenBSD: Makefile,v 1.55 2011/04/24 16:22:02 schwarze Exp $
+# $OpenBSD: Makefile,v 1.56 2011/05/29 21:22:18 schwarze Exp $
.include <bsd.own.mk>
-VERSION=1.11.1
+VERSION=1.11.3
CFLAGS+=-DVERSION=\"${VERSION}\"
CFLAGS+=-W -Wall -Wstrict-prototypes
diff --git a/usr.bin/mandoc/chars.c b/usr.bin/mandoc/chars.c
index 0446fa53ea4..7e27a3a8ff5 100644
--- a/usr.bin/mandoc/chars.c
+++ b/usr.bin/mandoc/chars.c
@@ -1,6 +1,6 @@
-/* $Id: chars.c,v 1.18 2011/04/24 16:22:02 schwarze Exp $ */
+/* $Id: chars.c,v 1.19 2011/05/29 21:22:18 schwarze Exp $ */
/*
- * Copyright (c) 2009, 2010 Kristaps Dzonsons <kristaps@bsd.lv>
+ * Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
* Copyright (c) 2011 Ingo Schwarze <schwarze@openbsd.org>
*
* Permission to use, copy, modify, and distribute this software for any
@@ -16,12 +16,13 @@
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
#include <assert.h>
+#include <ctype.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "mandoc.h"
-#include "out.h"
+#include "libmandoc.h"
#define PRINT_HI 126
#define PRINT_LO 32
@@ -31,52 +32,37 @@ struct ln {
const char *code;
const char *ascii;
int unicode;
- int type;
-#define CHARS_CHAR (1 << 0)
-#define CHARS_STRING (1 << 1)
-#define CHARS_BOTH (CHARS_CHAR | CHARS_STRING)
};
-#define LINES_MAX 353
+#define LINES_MAX 325
#define CHAR(in, ch, code) \
- { NULL, (in), (ch), (code), CHARS_CHAR },
-#define STRING(in, ch, code) \
- { NULL, (in), (ch), (code), CHARS_STRING },
-#define BOTH(in, ch, code) \
- { NULL, (in), (ch), (code), CHARS_BOTH },
+ { NULL, (in), (ch), (code) },
#define CHAR_TBL_START static struct ln lines[LINES_MAX] = {
#define CHAR_TBL_END };
#include "chars.in"
-struct ctab {
- enum chars type;
+struct mchars {
struct ln **htab;
};
-static inline int match(const struct ln *,
- const char *, size_t, int);
-static const struct ln *find(struct ctab *, const char *, size_t, int);
-
+static inline int match(const struct ln *, const char *, size_t);
+static const struct ln *find(struct mchars *, const char *, size_t);
void
-chars_free(void *arg)
+mchars_free(struct mchars *arg)
{
- struct ctab *tab;
-
- tab = (struct ctab *)arg;
- free(tab->htab);
- free(tab);
+ free(arg->htab);
+ free(arg);
}
-
-void *
-chars_init(enum chars type)
+struct mchars *
+mchars_alloc(void)
{
- struct ctab *tab;
+ struct mchars *tab;
struct ln **htab;
struct ln *pp;
int i, hash;
@@ -88,7 +74,7 @@ chars_init(enum chars type)
* (they're in-line re-ordered during lookup).
*/
- tab = mandoc_malloc(sizeof(struct ctab));
+ tab = mandoc_malloc(sizeof(struct mchars));
htab = mandoc_calloc(PRINT_HI - PRINT_LO + 1, sizeof(struct ln **));
for (i = 0; i < LINES_MAX; i++) {
@@ -105,7 +91,6 @@ chars_init(enum chars type)
}
tab->htab = htab;
- tab->type = type;
return(tab);
}
@@ -114,79 +99,57 @@ chars_init(enum chars type)
* Special character to Unicode codepoint.
*/
int
-chars_spec2cp(void *arg, const char *p, size_t sz)
-{
- const struct ln *ln;
-
- ln = find((struct ctab *)arg, p, sz, CHARS_CHAR);
- if (NULL == ln)
- return(-1);
- return(ln->unicode);
-}
-
-
-/*
- * Reserved word to Unicode codepoint.
- */
-int
-chars_res2cp(void *arg, const char *p, size_t sz)
+mchars_spec2cp(struct mchars *arg, const char *p, size_t sz)
{
const struct ln *ln;
- ln = find((struct ctab *)arg, p, sz, CHARS_STRING);
+ ln = find(arg, p, sz);
if (NULL == ln)
return(-1);
return(ln->unicode);
}
-
/*
- * Numbered character to literal character,
- * represented as a null-terminated string for additional safety.
+ * Numbered character string to ASCII codepoint.
+ * This can only be a printable character (i.e., alnum, punct, space) so
+ * prevent the character from ruining our state (backspace, newline, and
+ * so on).
+ * If the character is illegal, returns '\0'.
*/
-const char *
-chars_num2char(const char *p, size_t sz)
+char
+mchars_num2char(const char *p, size_t sz)
{
int i;
- static char c[2];
- if (sz > 3)
- return(NULL);
- i = atoi(p);
- if (i < 0 || i > 255)
- return(NULL);
- c[0] = (char)i;
- c[1] = '\0';
- return(c);
+ if ((i = mandoc_strntou(p, sz, 10)) < 0)
+ return('\0');
+ return(isprint(i) ? i : '\0');
}
-
-/*
- * Special character to string array.
+/*
+ * Hex character string to Unicode codepoint.
+ * If the character is illegal, returns '\0'.
*/
-const char *
-chars_spec2str(void *arg, const char *p, size_t sz, size_t *rsz)
+int
+mchars_num2uc(const char *p, size_t sz)
{
- const struct ln *ln;
-
- ln = find((struct ctab *)arg, p, sz, CHARS_CHAR);
- if (NULL == ln)
- return(NULL);
+ int i;
- *rsz = strlen(ln->ascii);
- return(ln->ascii);
+ if ((i = mandoc_strntou(p, sz, 16)) < 0)
+ return('\0');
+ /* FIXME: make sure we're not in a bogus range. */
+ return(i > 0x80 && i <= 0x10FFFF ? i : '\0');
}
-
/*
- * Reserved word to string array.
+ * Special character to string array.
*/
const char *
-chars_res2str(void *arg, const char *p, size_t sz, size_t *rsz)
+mchars_spec2str(struct mchars *arg, const char *p, size_t sz, size_t *rsz)
{
const struct ln *ln;
- ln = find((struct ctab *)arg, p, sz, CHARS_STRING);
+ ln = find(arg, p, sz);
if (NULL == ln)
return(NULL);
@@ -194,9 +157,8 @@ chars_res2str(void *arg, const char *p, size_t sz, size_t *rsz)
return(ln->ascii);
}
-
static const struct ln *
-find(struct ctab *tab, const char *p, size_t sz, int type)
+find(struct mchars *tab, const char *p, size_t sz)
{
struct ln *pp, *prev;
struct ln **htab;
@@ -222,7 +184,7 @@ find(struct ctab *tab, const char *p, size_t sz, int type)
return(NULL);
for (prev = NULL; pp; pp = pp->next) {
- if ( ! match(pp, p, sz, type)) {
+ if ( ! match(pp, p, sz)) {
prev = pp;
continue;
}
@@ -239,13 +201,10 @@ find(struct ctab *tab, const char *p, size_t sz, int type)
return(NULL);
}
-
static inline int
-match(const struct ln *ln, const char *p, size_t sz, int type)
+match(const struct ln *ln, const char *p, size_t sz)
{
- if ( ! (ln->type & type))
- return(0);
if (strncmp(ln->code, p, sz))
return(0);
return('\0' == ln->code[(int)sz]);
diff --git a/usr.bin/mandoc/chars.in b/usr.bin/mandoc/chars.in
index 49676cd4b1e..e4b2c65aa2d 100644
--- a/usr.bin/mandoc/chars.in
+++ b/usr.bin/mandoc/chars.in
@@ -1,4 +1,4 @@
-/* $Id: chars.in,v 1.15 2011/04/24 16:22:02 schwarze Exp $ */
+/* $Id: chars.in,v 1.16 2011/05/29 21:22:18 schwarze Exp $ */
/*
* Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
*
@@ -16,15 +16,12 @@
*/
/*
- * The ASCII translation tables. STRING corresponds to predefined
- * strings (cf. mdoc_samples.7 and tmac/mdoc/doc-nroff). CHAR
- * corresponds to special characters (cf. groff_char.7). BOTH contains
- * sequences that are equivalent in both STRING and CHAR.
+ * The ASCII translation tables.
*
- * Either way, the left-hand side corresponds to the input sequence (\x,
- * \(xx, \*(xx and so on) whose length is listed second element. The
- * right-hand side is what's produced by the front-end, with the fourth
- * element being its length.
+ * The left-hand side corresponds to the input sequence (\x, \(xx, \*(xx
+ * and so on) whose length is listed second element. The right-hand
+ * side is what's produced by the front-end, with the fourth element
+ * being its length.
*
* XXX - C-escape strings!
* XXX - update LINES_MAX if adding more!
@@ -36,25 +33,25 @@ static const char ascii_nbrsp[2] = { ASCII_NBRSP, '\0' };
CHAR_TBL_START
/* Spacing. */
-CHAR("c", "", 0)
+CHAR("c", "", 8203)
CHAR("0", " ", 8194)
CHAR(" ", ascii_nbrsp, 160)
CHAR("~", ascii_nbrsp, 160)
-CHAR("%", "", 0)
-CHAR("&", "", 0)
-CHAR("^", "", 0)
-CHAR("|", "", 0)
-CHAR("}", "", 0)
+CHAR("%", "", 8203)
+CHAR("&", "", 8203)
+CHAR("^", "", 8203)
+CHAR("|", "", 8203)
+CHAR("}", "", 8203)
/* Accents. */
CHAR("a\"", "\"", 779)
CHAR("a-", "-", 175)
CHAR("a.", ".", 729)
CHAR("a^", "^", 770)
-BOTH("\'", "\'", 769)
-BOTH("aa", "\'", 769)
-BOTH("ga", "`", 768)
-BOTH("`", "`", 768)
+CHAR("\'", "\'", 769)
+CHAR("aa", "\'", 769)
+CHAR("ga", "`", 768)
+CHAR("`", "`", 768)
CHAR("ab", "`", 774)
CHAR("ac", ",", 807)
CHAR("ad", "\"", 776)
@@ -68,8 +65,8 @@ CHAR("ti", "~", 126)
/* Quotes. */
CHAR("Bq", ",,", 8222)
CHAR("bq", ",", 8218)
-BOTH("lq", "``", 8220)
-BOTH("rq", "\'\'", 8221)
+CHAR("lq", "``", 8220)
+CHAR("rq", "\'\'", 8221)
CHAR("oq", "`", 8216)
CHAR("cq", "\'", 8217)
CHAR("aq", "\'", 39)
@@ -232,8 +229,8 @@ CHAR("<-", "<-", 8592)
CHAR("->", "->", 8594)
CHAR("<>", "<>", 8596)
CHAR("da", "v", 8595)
-BOTH("ua", "^", 8593)
-BOTH("va", "^v", 8597)
+CHAR("ua", "^", 8593)
+CHAR("va", "^v", 8597)
CHAR("lA", "<=", 8656)
CHAR("rA", "=>", 8658)
CHAR("hA", "<=>", 8660)
@@ -270,8 +267,8 @@ CHAR("di", "-:-", 247)
CHAR("tdi", "-:-", 247)
CHAR("f/", "/", 8260)
CHAR("**", "*", 8727)
-BOTH("<=", "<=", 8804)
-BOTH(">=", ">=", 8805)
+CHAR("<=", "<=", 8804)
+CHAR(">=", ">=", 8805)
CHAR("<<", "<<", 8810)
CHAR(">>", ">>", 8811)
CHAR("eq", "=", 61)
@@ -348,36 +345,6 @@ CHAR("Po", "L", 163)
CHAR("Cs", "x", 164)
CHAR("Fn", "f", 402)
-/* Old style. */
-STRING("Am", "&", 38)
-STRING("Ba", "|", 124)
-STRING("Ge", ">=", 8805)
-STRING("Gt", ">", 62)
-STRING("If", "infinity", 0)
-STRING("Le", "<=", 8804)
-STRING("Lq", "``", 8220)
-STRING("Lt", "<", 60)
-STRING("Na", "NaN", 0)
-STRING("Ne", "!=", 8800)
-STRING("Pi", "pi", 960)
-STRING("Pm", "+-", 177)
-STRING("Rq", "\'\'", 8221)
-STRING("left-bracket", "[", 91)
-STRING("left-parenthesis", "(", 40)
-STRING("left-singlequote", "`", 8216)
-STRING("lp", "(", 40)
-STRING("q", "\"", 34)
-STRING("quote-left", "`", 8216)
-STRING("quote-right", "\'", 8217)
-STRING("R", "(R)", 174)
-STRING("right-bracket", "]", 93)
-STRING("right-parenthesis", ")", 41)
-STRING("right-singlequote", "\'", 8217)
-STRING("rp", ")", 41)
-STRING("Tm", "(Tm)", 8482)
-STRING("Px", "POSIX", 0)
-STRING("Ai", "ANSI", 0)
-
/* Lines. */
CHAR("ba", "|", 124)
CHAR("br", "|", 9474)
diff --git a/usr.bin/mandoc/html.c b/usr.bin/mandoc/html.c
index 45197ad76ef..5ad6860d850 100644
--- a/usr.bin/mandoc/html.c
+++ b/usr.bin/mandoc/html.c
@@ -1,4 +1,4 @@
-/* $Id: html.c,v 1.25 2011/04/24 16:22:02 schwarze Exp $ */
+/* $Id: html.c,v 1.26 2011/05/29 21:22:18 schwarze Exp $ */
/*
* Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
* Copyright (c) 2011 Ingo Schwarze <schwarze@openbsd.org>
@@ -27,6 +27,7 @@
#include <unistd.h>
#include "mandoc.h"
+#include "libmandoc.h"
#include "out.h"
#include "html.h"
#include "main.h"
@@ -89,19 +90,25 @@ static const char *const htmlattrs[ATTR_MAX] = {
"colspan", /* ATTR_COLSPAN */
};
-static void print_num(struct html *, const char *, size_t);
-static void print_spec(struct html *, enum roffdeco,
- const char *, size_t);
-static void print_res(struct html *, const char *, size_t);
-static void print_ctag(struct html *, enum htmltag);
-static void print_doctype(struct html *);
-static void print_xmltype(struct html *);
-static int print_encode(struct html *, const char *, int);
-static void print_metaf(struct html *, enum roffdeco);
-static void print_attr(struct html *,
- const char *, const char *);
-static void *ml_alloc(char *, enum htmltype);
+static const char *const roffscales[SCALE_MAX] = {
+ "cm", /* SCALE_CM */
+ "in", /* SCALE_IN */
+ "pc", /* SCALE_PC */
+ "pt", /* SCALE_PT */
+ "em", /* SCALE_EM */
+ "em", /* SCALE_MM */
+ "ex", /* SCALE_EN */
+ "ex", /* SCALE_BU */
+ "em", /* SCALE_VS */
+ "ex", /* SCALE_FS */
+};
+static void bufncat(struct html *, const char *, size_t);
+static void print_ctag(struct html *, enum htmltag);
+static int print_encode(struct html *, const char *, int);
+static void print_metaf(struct html *, enum mandoc_esc);
+static void print_attr(struct html *, const char *, const char *);
+static void *ml_alloc(char *, enum htmltype);
static void *
ml_alloc(char *outopts, enum htmltype type)
@@ -119,7 +126,7 @@ ml_alloc(char *outopts, enum htmltype type)
h->type = type;
h->tags.head = NULL;
- h->symtab = chars_init(CHARS_HTML);
+ h->symtab = mchars_alloc();
while (outopts && *outopts)
switch (getsubopt(&outopts, UNCONST(toks), &v)) {
@@ -169,7 +176,7 @@ html_free(void *p)
}
if (h->symtab)
- chars_free(h->symtab);
+ mchars_free(h->symtab);
free(h);
}
@@ -205,72 +212,24 @@ print_gen_head(struct html *h)
}
}
-/* ARGSUSED */
-static void
-print_num(struct html *h, const char *p, size_t len)
-{
- const char *rhs;
-
- rhs = chars_num2char(p, len);
- if (rhs)
- putchar((int)*rhs);
-}
-
static void
-print_spec(struct html *h, enum roffdeco d, const char *p, size_t len)
-{
- int cp;
- const char *rhs;
- size_t sz;
-
- if ((cp = chars_spec2cp(h->symtab, p, len)) > 0) {
- printf("&#%d;", cp);
- return;
- } else if (-1 == cp && DECO_SSPECIAL == d) {
- fwrite(p, 1, len, stdout);
- return;
- } else if (-1 == cp)
- return;
-
- if (NULL != (rhs = chars_spec2str(h->symtab, p, len, &sz)))
- fwrite(rhs, 1, sz, stdout);
-}
-
-
-static void
-print_res(struct html *h, const char *p, size_t len)
-{
- int cp;
- const char *rhs;
- size_t sz;
-
- if ((cp = chars_res2cp(h->symtab, p, len)) > 0) {
- printf("&#%d;", cp);
- return;
- } else if (-1 == cp)
- return;
-
- if (NULL != (rhs = chars_res2str(h->symtab, p, len, &sz)))
- fwrite(rhs, 1, sz, stdout);
-}
-
-
-static void
-print_metaf(struct html *h, enum roffdeco deco)
+print_metaf(struct html *h, enum mandoc_esc deco)
{
enum htmlfont font;
switch (deco) {
- case (DECO_PREVIOUS):
+ case (ESCAPE_FONTPREV):
font = h->metal;
break;
- case (DECO_ITALIC):
+ case (ESCAPE_FONTITALIC):
font = HTMLFONT_ITALIC;
break;
- case (DECO_BOLD):
+ case (ESCAPE_FONTBOLD):
font = HTMLFONT_BOLD;
break;
- case (DECO_ROMAN):
+ case (ESCAPE_FONT):
+ /* FALLTHROUGH */
+ case (ESCAPE_FONTROMAN):
font = HTMLFONT_NONE;
break;
default:
@@ -292,80 +251,123 @@ print_metaf(struct html *h, enum roffdeco deco)
print_otag(h, TAG_I, 0, NULL);
}
+int
+html_strlen(const char *cp)
+{
+ int ssz, sz;
+ const char *seq, *p;
+
+ /*
+ * Account for escaped sequences within string length
+ * calculations. This follows the logic in term_strlen() as we
+ * must calculate the width of produced strings.
+ * Assume that characters are always width of "1". This is
+ * hacky, but it gets the job done for approximation of widths.
+ */
+
+ sz = 0;
+ while (NULL != (p = strchr(cp, '\\'))) {
+ sz += (int)(p - cp);
+ ++cp;
+ switch (mandoc_escape(&cp, &seq, &ssz)) {
+ case (ESCAPE_ERROR):
+ return(sz);
+ case (ESCAPE_UNICODE):
+ /* FALLTHROUGH */
+ case (ESCAPE_NUMBERED):
+ /* FALLTHROUGH */
+ case (ESCAPE_SPECIAL):
+ sz++;
+ break;
+ default:
+ break;
+ }
+ }
+
+ assert(sz >= 0);
+ return(sz + strlen(cp));
+}
static int
print_encode(struct html *h, const char *p, int norecurse)
{
size_t sz;
- int len, nospace;
+ int c, len, nospace;
const char *seq;
- enum roffdeco deco;
+ enum mandoc_esc esc;
static const char rejs[6] = { '\\', '<', '>', '&', ASCII_HYPH, '\0' };
nospace = 0;
- for (; *p; p++) {
+ while ('\0' != *p) {
sz = strcspn(p, rejs);
fwrite(p, 1, sz, stdout);
- p += /* LINTED */
- sz;
+ p += (int)sz;
+
+ if ('\0' == *p)
+ break;
- if ('<' == *p) {
+ switch (*p++) {
+ case ('<'):
printf("&lt;");
continue;
- } else if ('>' == *p) {
+ case ('>'):
printf("&gt;");
continue;
- } else if ('&' == *p) {
+ case ('&'):
printf("&amp;");
continue;
- } else if (ASCII_HYPH == *p) {
- /*
- * Note: "soft hyphens" aren't graphically
- * displayed when not breaking the text; we want
- * them to be displayed.
- */
- /*printf("&#173;");*/
+ case (ASCII_HYPH):
putchar('-');
continue;
- } else if ('\0' == *p)
+ default:
break;
+ }
- seq = ++p;
- len = a2roffdeco(&deco, &seq, &sz);
+ esc = mandoc_escape(&p, &seq, &len);
+ if (ESCAPE_ERROR == esc)
+ break;
- switch (deco) {
- case (DECO_NUMBERED):
- print_num(h, seq, sz);
+ switch (esc) {
+ case (ESCAPE_UNICODE):
+ /* Skip passed "u" header. */
+ c = mchars_num2uc(seq + 1, len - 1);
+ if ('\0' != c)
+ printf("&#x%x;", c);
break;
- case (DECO_RESERVED):
- print_res(h, seq, sz);
+ case (ESCAPE_NUMBERED):
+ c = mchars_num2char(seq, len);
+ if ('\0' != c)
+ putchar(c);
break;
- case (DECO_SSPECIAL):
- /* FALLTHROUGH */
- case (DECO_SPECIAL):
- print_spec(h, deco, seq, sz);
+ case (ESCAPE_SPECIAL):
+ c = mchars_spec2cp(h->symtab, seq, len);
+ if (c > 0)
+ printf("&#%d;", c);
+ else if (-1 == c && 1 == len)
+ putchar((int)*seq);
break;
- case (DECO_PREVIOUS):
+ case (ESCAPE_FONT):
+ /* FALLTHROUGH */
+ case (ESCAPE_FONTPREV):
/* FALLTHROUGH */
- case (DECO_BOLD):
+ case (ESCAPE_FONTBOLD):
/* FALLTHROUGH */
- case (DECO_ITALIC):
+ case (ESCAPE_FONTITALIC):
/* FALLTHROUGH */
- case (DECO_ROMAN):
+ case (ESCAPE_FONTROMAN):
if (norecurse)
break;
- print_metaf(h, deco);
+ print_metaf(h, esc);
+ break;
+ case (ESCAPE_NOSPACE):
+ if ('\0' == *p)
+ nospace = 1;
break;
default:
break;
}
-
- p += len - 1;
-
- if (DECO_NOSPACE == deco && '\0' == *(p + 1))
- nospace = 1;
}
return(nospace);
@@ -428,7 +430,7 @@ print_otag(struct html *h, enum htmltag tag,
print_attr(h, "lang", "en");
}
- /* Accomodate for XML "well-formed" singleton escaping. */
+ /* Accommodate for XML "well-formed" singleton escaping. */
if (HTML_AUTOCLOSE & htmltags[tag].flags)
switch (h->type) {
@@ -461,28 +463,9 @@ print_ctag(struct html *h, enum htmltag tag)
}
}
-
void
print_gen_decls(struct html *h)
{
-
- print_xmltype(h);
- print_doctype(h);
-}
-
-
-static void
-print_xmltype(struct html *h)
-{
-
- if (HTML_XHTML_1_0_STRICT == h->type)
- puts("<?xml version=\"1.0\" encoding=\"UTF-8\"?>");
-}
-
-
-static void
-print_doctype(struct html *h)
-{
const char *doctype;
const char *dtd;
const char *name;
@@ -494,6 +477,7 @@ print_doctype(struct html *h)
dtd = "http://www.w3.org/TR/html4/strict.dtd";
break;
default:
+ puts("<?xml version=\"1.0\" encoding=\"UTF-8\"?>");
name = "html";
doctype = "-//W3C//DTD XHTML 1.0 Strict//EN";
dtd = "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd";
@@ -583,7 +567,6 @@ print_stagq(struct html *h, const struct tag *suntil)
}
}
-
void
bufinit(struct html *h)
{
@@ -592,28 +575,27 @@ bufinit(struct html *h)
h->buflen = 0;
}
-
void
bufcat_style(struct html *h, const char *key, const char *val)
{
bufcat(h, key);
- bufncat(h, ":", 1);
+ bufcat(h, ":");
bufcat(h, val);
- bufncat(h, ";", 1);
+ bufcat(h, ";");
}
-
void
bufcat(struct html *h, const char *p)
{
- bufncat(h, p, strlen(p));
+ h->buflen = strlcat(h->buf, p, BUFSIZ);
+ assert(h->buflen < BUFSIZ);
+ h->buflen--;
}
-
void
-buffmt(struct html *h, const char *fmt, ...)
+bufcat_fmt(struct html *h, const char *fmt, ...)
{
va_list ap;
@@ -624,19 +606,15 @@ buffmt(struct html *h, const char *fmt, ...)
h->buflen = strlen(h->buf);
}
-
-void
+static void
bufncat(struct html *h, const char *p, size_t sz)
{
- if (h->buflen + sz > BUFSIZ - 1)
- sz = BUFSIZ - 1 - h->buflen;
-
- (void)strncat(h->buf, p, sz);
+ assert(h->buflen + sz + 1 < BUFSIZ);
+ strncat(h->buf, p, sz);
h->buflen += sz;
}
-
void
buffmt_includes(struct html *h, const char *name)
{
@@ -644,6 +622,7 @@ buffmt_includes(struct html *h, const char *name)
pp = h->base_includes;
+ bufinit(h);
while (NULL != (p = strchr(pp, '%'))) {
bufncat(h, pp, (size_t)(p - pp));
switch (*(p + 1)) {
@@ -660,7 +639,6 @@ buffmt_includes(struct html *h, const char *name)
bufcat(h, pp);
}
-
void
buffmt_man(struct html *h,
const char *name, const char *sec)
@@ -669,7 +647,7 @@ buffmt_man(struct html *h,
pp = h->base_man;
- /* LINTED */
+ bufinit(h);
while (NULL != (p = strchr(pp, '%'))) {
bufncat(h, pp, (size_t)(p - pp));
switch (*(p + 1)) {
@@ -677,7 +655,7 @@ buffmt_man(struct html *h,
bufcat(h, sec ? sec : "1");
break;
case('N'):
- buffmt(h, name);
+ bufcat_fmt(h, name);
break;
default:
bufncat(h, p, 2);
@@ -689,85 +667,24 @@ buffmt_man(struct html *h,
bufcat(h, pp);
}
-
void
bufcat_su(struct html *h, const char *p, const struct roffsu *su)
{
double v;
- const char *u;
v = su->scale;
+ if (SCALE_MM == su->unit && 0.0 == (v /= 100.0))
+ v = 1.0;
- switch (su->unit) {
- case (SCALE_CM):
- u = "cm";
- break;
- case (SCALE_IN):
- u = "in";
- break;
- case (SCALE_PC):
- u = "pc";
- break;
- case (SCALE_PT):
- u = "pt";
- break;
- case (SCALE_EM):
- u = "em";
- break;
- case (SCALE_MM):
- if (0 == (v /= 100))
- v = 1;
- u = "em";
- break;
- case (SCALE_EN):
- u = "ex";
- break;
- case (SCALE_BU):
- u = "ex";
- break;
- case (SCALE_VS):
- u = "em";
- break;
- default:
- u = "ex";
- break;
- }
-
- /*
- * XXX: the CSS spec isn't clear as to which types accept
- * integer or real numbers, so we just make them all decimals.
- */
- buffmt(h, "%s: %.2f%s;", p, v, u);
+ bufcat_fmt(h, "%s: %.2f%s;", p, v, roffscales[su->unit]);
}
-
void
-html_idcat(char *dst, const char *src, int sz)
+bufcat_id(struct html *h, const char *src)
{
- int ssz;
-
- assert(sz > 2);
/* Cf. <http://www.w3.org/TR/html4/types.html#h-6.2>. */
- /* We can't start with a number (bah). */
-
- if ('#' == *dst) {
- dst++;
- sz--;
- }
- if ('\0' == *dst) {
- *dst++ = 'x';
- *dst = '\0';
- sz--;
- }
-
- for ( ; *dst != '\0' && sz; dst++, sz--)
- /* Jump to end. */ ;
-
- for ( ; *src != '\0' && sz > 1; src++) {
- ssz = snprintf(dst, (size_t)sz, "%.2x", *src);
- sz -= ssz;
- dst += ssz;
- }
+ while ('\0' != *src)
+ bufcat_fmt(h, "%.2x", *src++);
}
diff --git a/usr.bin/mandoc/html.h b/usr.bin/mandoc/html.h
index 4643e81afd9..10f9a3a5787 100644
--- a/usr.bin/mandoc/html.h
+++ b/usr.bin/mandoc/html.h
@@ -1,4 +1,4 @@
-/* $Id: html.h,v 1.15 2011/01/31 03:04:26 schwarze Exp $ */
+/* $Id: html.h,v 1.16 2011/05/29 21:22:18 schwarze Exp $ */
/*
* Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
*
@@ -120,7 +120,7 @@ struct html {
struct tagq tags; /* stack of open tags */
struct rofftbl tbl; /* current table */
struct tag *tblt; /* current open table scope */
- void *symtab; /* character-escapes */
+ struct mchars *symtab; /* character-escapes */
char *base_man; /* base for manpage href */
char *base_includes; /* base for include href */
char *style; /* style-sheet URI */
@@ -142,19 +142,19 @@ void print_text(struct html *, const char *);
void print_tblclose(struct html *);
void print_tbl(struct html *, const struct tbl_span *);
+void bufcat_fmt(struct html *, const char *, ...);
+void bufcat(struct html *, const char *);
+void bufcat_id(struct html *, const char *);
+void bufcat_style(struct html *,
+ const char *, const char *);
void bufcat_su(struct html *, const char *,
const struct roffsu *);
+void bufinit(struct html *);
void buffmt_man(struct html *,
const char *, const char *);
void buffmt_includes(struct html *, const char *);
-void buffmt(struct html *, const char *, ...);
-void bufcat(struct html *, const char *);
-void bufcat_style(struct html *,
- const char *, const char *);
-void bufncat(struct html *, const char *, size_t);
-void bufinit(struct html *);
-void html_idcat(char *, const char *, int);
+int html_strlen(const char *);
__END_DECLS
diff --git a/usr.bin/mandoc/libmandoc.h b/usr.bin/mandoc/libmandoc.h
index eaacbfccbf1..1efe5da07a5 100644
--- a/usr.bin/mandoc/libmandoc.h
+++ b/usr.bin/mandoc/libmandoc.h
@@ -1,4 +1,4 @@
-/* $Id: libmandoc.h,v 1.11 2011/04/24 16:22:02 schwarze Exp $ */
+/* $Id: libmandoc.h,v 1.12 2011/05/29 21:22:18 schwarze Exp $ */
/*
* Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
*
@@ -73,13 +73,13 @@ void mandoc_msg(enum mandocerr, struct mparse *,
int, int, const char *);
void mandoc_vmsg(enum mandocerr, struct mparse *,
int, int, const char *, ...);
-int mandoc_special(char *);
char *mandoc_strdup(const char *);
char *mandoc_getarg(struct mparse *, char **, int, int *);
char *mandoc_normdate(struct mparse *, char *, int, int);
int mandoc_eos(const char *, size_t, int);
int mandoc_hyph(const char *, const char *);
int mandoc_getcontrol(const char *, int *);
+int mandoc_strntou(const char *, size_t, int);
void mdoc_free(struct mdoc *);
struct mdoc *mdoc_alloc(struct regset *, struct mparse *);
diff --git a/usr.bin/mandoc/libmdoc.h b/usr.bin/mandoc/libmdoc.h
index ceffcb05332..ee99633aa61 100644
--- a/usr.bin/mandoc/libmdoc.h
+++ b/usr.bin/mandoc/libmdoc.h
@@ -1,4 +1,4 @@
-/* $Id: libmdoc.h,v 1.45 2011/04/24 16:22:02 schwarze Exp $ */
+/* $Id: libmdoc.h,v 1.46 2011/05/29 21:22:18 schwarze Exp $ */
/*
* Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
*
@@ -62,20 +62,20 @@ struct mdoc_macro {
enum margserr {
ARGS_ERROR,
- ARGS_EOLN,
- ARGS_WORD,
- ARGS_PUNCT,
- ARGS_QWORD,
- ARGS_PHRASE,
- ARGS_PPHRASE,
- ARGS_PEND
+ ARGS_EOLN, /* end-of-line */
+ ARGS_WORD, /* normal word */
+ ARGS_PUNCT, /* series of punctuation */
+ ARGS_QWORD, /* quoted word */
+ ARGS_PHRASE, /* Ta'd phrase (-column) */
+ ARGS_PPHRASE, /* tabbed phrase (-column) */
+ ARGS_PEND /* last phrase (-column) */
};
enum margverr {
ARGV_ERROR,
- ARGV_EOLN,
- ARGV_ARG,
- ARGV_WORD
+ ARGV_EOLN, /* end of line */
+ ARGV_ARG, /* valid argument */
+ ARGV_WORD /* normal word (or bad argument---same thing) */
};
/*
@@ -133,14 +133,8 @@ void mdoc_argv_free(struct mdoc_arg *);
enum margserr mdoc_args(struct mdoc *, int,
int *, char *, enum mdoct, char **);
enum margserr mdoc_zargs(struct mdoc *, int,
- int *, char *, int, char **);
-#define ARGS_DELIM (1 << 1)
-#define ARGS_TABSEP (1 << 2)
-#define ARGS_NOWARN (1 << 3)
-
+ int *, char *, char **);
int mdoc_macroend(struct mdoc *);
-
-#define DELIMSZ 6 /* hint: max possible size of a delimiter */
enum mdelim mdoc_isdelim(const char *);
__END_DECLS
diff --git a/usr.bin/mandoc/main.c b/usr.bin/mandoc/main.c
index 088940778ef..3b2fd636dff 100644
--- a/usr.bin/mandoc/main.c
+++ b/usr.bin/mandoc/main.c
@@ -1,4 +1,4 @@
-/* $Id: main.c,v 1.76 2011/04/24 16:22:02 schwarze Exp $ */
+/* $Id: main.c,v 1.77 2011/05/29 21:22:18 schwarze Exp $ */
/*
* Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
* Copyright (c) 2010, 2011 Ingo Schwarze <schwarze@openbsd.org>
@@ -34,6 +34,8 @@ typedef void (*out_free)(void *);
enum outt {
OUTT_ASCII = 0, /* -Tascii */
+ OUTT_LOCALE, /* -Tlocale */
+ OUTT_UTF8, /* -Tutf8 */
OUTT_TREE, /* -Ttree */
OUTT_HTML, /* -Thtml */
OUTT_XHTML, /* -Txhtml */
@@ -197,9 +199,19 @@ parse(struct curparse *curp, int fd,
switch (curp->outtype) {
case (OUTT_XHTML):
curp->outdata = xhtml_alloc(curp->outopts);
+ curp->outfree = html_free;
break;
case (OUTT_HTML):
curp->outdata = html_alloc(curp->outopts);
+ curp->outfree = html_free;
+ break;
+ case (OUTT_UTF8):
+ curp->outdata = utf8_alloc(curp->outopts);
+ curp->outfree = ascii_free;
+ break;
+ case (OUTT_LOCALE):
+ curp->outdata = locale_alloc(curp->outopts);
+ curp->outfree = ascii_free;
break;
case (OUTT_ASCII):
curp->outdata = ascii_alloc(curp->outopts);
@@ -223,7 +235,6 @@ parse(struct curparse *curp, int fd,
case (OUTT_XHTML):
curp->outman = html_man;
curp->outmdoc = html_mdoc;
- curp->outfree = html_free;
break;
case (OUTT_TREE):
curp->outman = tree_man;
@@ -233,6 +244,10 @@ parse(struct curparse *curp, int fd,
/* FALLTHROUGH */
case (OUTT_ASCII):
/* FALLTHROUGH */
+ case (OUTT_UTF8):
+ /* FALLTHROUGH */
+ case (OUTT_LOCALE):
+ /* FALLTHROUGH */
case (OUTT_PS):
curp->outman = terminal_man;
curp->outmdoc = terminal_mdoc;
@@ -290,6 +305,10 @@ toptions(struct curparse *curp, char *arg)
curp->outtype = OUTT_TREE;
else if (0 == strcmp(arg, "html"))
curp->outtype = OUTT_HTML;
+ else if (0 == strcmp(arg, "utf8"))
+ curp->outtype = OUTT_UTF8;
+ else if (0 == strcmp(arg, "locale"))
+ curp->outtype = OUTT_LOCALE;
else if (0 == strcmp(arg, "xhtml"))
curp->outtype = OUTT_XHTML;
else if (0 == strcmp(arg, "ps"))
diff --git a/usr.bin/mandoc/main.h b/usr.bin/mandoc/main.h
index 2cb020dedc2..1efb9d34387 100644
--- a/usr.bin/mandoc/main.h
+++ b/usr.bin/mandoc/main.h
@@ -1,6 +1,6 @@
-/* $Id: main.h,v 1.7 2010/07/25 18:05:54 schwarze Exp $ */
+/* $Id: main.h,v 1.8 2011/05/29 21:22:18 schwarze Exp $ */
/*
- * Copyright (c) 2009, 2010 Kristaps Dzonsons <kristaps@bsd.lv>
+ * Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
@@ -41,6 +41,8 @@ void html_free(void *);
void tree_mdoc(void *, const struct mdoc *);
void tree_man(void *, const struct man *);
+void *locale_alloc(char *);
+void *utf8_alloc(char *);
void *ascii_alloc(char *);
void ascii_free(void *);
diff --git a/usr.bin/mandoc/man_html.c b/usr.bin/mandoc/man_html.c
index d805cce275e..5437cb1557f 100644
--- a/usr.bin/mandoc/man_html.c
+++ b/usr.bin/mandoc/man_html.c
@@ -1,4 +1,4 @@
-/* $Id: man_html.c,v 1.37 2011/04/21 22:59:54 schwarze Exp $ */
+/* $Id: man_html.c,v 1.38 2011/05/29 21:22:18 schwarze Exp $ */
/*
* Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
*
@@ -63,7 +63,7 @@ static int man_ign_pre(MAN_ARGS);
static int man_in_pre(MAN_ARGS);
static int man_literal_pre(MAN_ARGS);
static void man_root_post(MAN_ARGS);
-static int man_root_pre(MAN_ARGS);
+static void man_root_pre(MAN_ARGS);
static int man_B_pre(MAN_ARGS);
static int man_HP_pre(MAN_ARGS);
static int man_I_pre(MAN_ARGS);
@@ -153,9 +153,7 @@ print_man_head(MAN_ARGS)
{
print_gen_head(h);
- bufinit(h);
- buffmt(h, "%s(%s)", m->title, m->msec);
-
+ bufcat_fmt(h, "%s(%s)", m->title, m->msec);
print_otag(h, TAG_TITLE, 0, NULL);
print_text(h, h->buf);
}
@@ -181,13 +179,16 @@ print_man_node(MAN_ARGS)
child = 1;
t = h->tags.head;
- bufinit(h);
-
switch (n->type) {
case (MAN_ROOT):
- child = man_root_pre(m, n, mh, h);
+ man_root_pre(m, n, mh, h);
break;
case (MAN_TEXT):
+ /*
+ * If we have a blank line, output a vertical space.
+ * If we have a space as the first character, break
+ * before printing the line's data.
+ */
if ('\0' == *n->string) {
print_otag(h, TAG_P, 0, NULL);
return;
@@ -196,6 +197,13 @@ print_man_node(MAN_ARGS)
print_text(h, n->string);
+ /*
+ * If we're in a literal context, make sure that words
+ * togehter on the same line stay together. This is a
+ * POST-printing call, so we check the NEXT word. Since
+ * -man doesn't have nested macros, we don't need to be
+ * more specific than this.
+ */
if (MANH_LITERAL & mh->fl &&
(NULL == n->next ||
n->next->line > n->line))
@@ -244,8 +252,6 @@ print_man_node(MAN_ARGS)
/* This will automatically close out any font scope. */
print_stagq(h, t);
- bufinit(h);
-
switch (n->type) {
case (MAN_ROOT):
man_root_post(m, n, mh, h);
@@ -274,7 +280,7 @@ a2width(const struct man_node *n, struct roffsu *su)
/* ARGSUSED */
-static int
+static void
man_root_pre(MAN_ARGS)
{
struct htmlpair tag[3];
@@ -328,7 +334,6 @@ man_root_pre(MAN_ARGS)
print_text(h, title);
print_tagq(h, t);
- return(1);
}
@@ -387,6 +392,7 @@ man_br_pre(MAN_ARGS)
} else
su.scale = 0;
+ bufinit(h);
bufcat_su(h, "height", &su);
PAIR_STYLE_INIT(&tag, h);
print_otag(h, TAG_DIV, 1, &tag);
@@ -555,6 +561,7 @@ man_IP_pre(MAN_ARGS)
if (MAN_BLOCK == n->type) {
print_otag(h, TAG_P, 0, NULL);
print_otag(h, TAG_TABLE, 0, NULL);
+ bufinit(h);
bufcat_su(h, "width", &su);
PAIR_STYLE_INIT(&tag, h);
print_otag(h, TAG_COL, 1, &tag);
@@ -590,6 +597,8 @@ man_HP_pre(MAN_ARGS)
struct roffsu su;
const struct man_node *np;
+ bufinit(h);
+
np = MAN_BLOCK == n->type ?
n->head->child :
n->parent->head->child;
@@ -690,6 +699,7 @@ man_RS_pre(MAN_ARGS)
if (n->head->child)
a2width(n->head->child, &su);
+ bufinit(h);
bufcat_su(h, "margin-left", &su);
PAIR_STYLE_INIT(&tag, h);
print_otag(h, TAG_DIV, 1, &tag);
diff --git a/usr.bin/mandoc/man_term.c b/usr.bin/mandoc/man_term.c
index ab5c37bd86f..56b1b010756 100644
--- a/usr.bin/mandoc/man_term.c
+++ b/usr.bin/mandoc/man_term.c
@@ -1,4 +1,4 @@
-/* $Id: man_term.c,v 1.67 2011/04/24 16:22:02 schwarze Exp $ */
+/* $Id: man_term.c,v 1.68 2011/05/29 21:22:18 schwarze Exp $ */
/*
* Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
* Copyright (c) 2010, 2011 Ingo Schwarze <schwarze@openbsd.org>
@@ -152,14 +152,7 @@ terminal_man(void *arg, const struct man *man)
p->tabwidth = term_len(p, 5);
if (NULL == p->symtab)
- switch (p->enc) {
- case (TERMENC_ASCII):
- p->symtab = chars_init(CHARS_ASCII);
- break;
- default:
- abort();
- /* NOTREACHED */
- }
+ p->symtab = mchars_alloc();
n = man_node(man);
m = man_meta(man);
diff --git a/usr.bin/mandoc/man_validate.c b/usr.bin/mandoc/man_validate.c
index c062c60905e..bfa17bd77c9 100644
--- a/usr.bin/mandoc/man_validate.c
+++ b/usr.bin/mandoc/man_validate.c
@@ -1,4 +1,4 @@
-/* $Id: man_validate.c,v 1.44 2011/04/24 16:22:02 schwarze Exp $ */
+/* $Id: man_validate.c,v 1.45 2011/05/29 21:22:18 schwarze Exp $ */
/*
* Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
* Copyright (c) 2010 Ingo Schwarze <schwarze@openbsd.org>
@@ -50,7 +50,7 @@ static int check_par(CHKARGS);
static int check_part(CHKARGS);
static int check_root(CHKARGS);
static int check_sec(CHKARGS);
-static int check_text(CHKARGS);
+static void check_text(CHKARGS);
static int post_AT(CHKARGS);
static int post_fi(CHKARGS);
@@ -147,7 +147,8 @@ man_valid_post(struct man *m)
switch (m->last->type) {
case (MAN_TEXT):
- return(check_text(m, m->last));
+ check_text(m, m->last);
+ return(1);
case (MAN_ROOT):
return(check_root(m, m->last));
case (MAN_EQN):
@@ -200,43 +201,48 @@ check_root(CHKARGS)
return(1);
}
-
-static int
+static void
check_text(CHKARGS)
{
- char *p;
- int pos, c;
+ char *p, *pp, *cpp;
+ int pos;
size_t sz;
- for (p = n->string, pos = n->pos + 1; *p; p++, pos++) {
- sz = strcspn(p, "\t\\");
- p += (int)sz;
+ p = n->string;
+ pos = n->pos + 1;
- if ('\0' == *p)
- break;
+ while ('\0' != *p) {
+ sz = strcspn(p, "\t\\");
+ p += (int)sz;
pos += (int)sz;
if ('\t' == *p) {
- if (MAN_LITERAL & m->flags)
- continue;
- man_pmsg(m, n->line, pos, MANDOCERR_BADTAB);
+ if ( ! (MAN_LITERAL & m->flags))
+ man_pmsg(m, n->line, pos, MANDOCERR_BADTAB);
+ p++;
+ pos++;
continue;
- }
+ } else if ('\0' == *p)
+ break;
- /* Check the special character. */
+ pos++;
+ pp = ++p;
- c = mandoc_special(p);
- if (c) {
- p += c - 1;
- pos += c - 1;
- } else
+ if (ESCAPE_ERROR == mandoc_escape
+ ((const char **)&pp, NULL, NULL)) {
man_pmsg(m, n->line, pos, MANDOCERR_BADESCAPE);
- }
+ break;
+ }
- return(1);
-}
+ cpp = p;
+ while (NULL != (cpp = memchr(cpp, ASCII_HYPH, pp - cpp)))
+ *cpp = '-';
+ pos += pp - p;
+ p = pp;
+ }
+}
#define INEQ_DEFINE(x, ineq, name) \
static int \
diff --git a/usr.bin/mandoc/mandoc.1 b/usr.bin/mandoc/mandoc.1
index 3117c92aa8c..7b2720d5b50 100644
--- a/usr.bin/mandoc/mandoc.1
+++ b/usr.bin/mandoc/mandoc.1
@@ -1,6 +1,6 @@
-.\" $OpenBSD: mandoc.1,v 1.43 2011/01/09 15:24:57 schwarze Exp $
+.\" $OpenBSD: mandoc.1,v 1.44 2011/05/29 21:22:18 schwarze Exp $
.\"
-.\" Copyright (c) 2009, 2010 Kristaps Dzonsons <kristaps@bsd.lv>
+.\" Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
.\"
.\" Permission to use, copy, modify, and distribute this software for any
.\" purpose with or without fee is hereby granted, provided that the above
@@ -14,7 +14,7 @@
.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
.\"
-.Dd $Mdocdate: January 9 2011 $
+.Dd $Mdocdate: May 29 2011 $
.Dt MANDOC 1
.Os
.Sh NAME
@@ -158,6 +158,15 @@ utility accepts the following
.Fl T
arguments, which correspond to output modes:
.Bl -tag -width Ds
+.It Fl T Ns Cm utf8
+Encode output in the UTF-8 multi-byte format.
+See
+.Xr UTF-8 Output .
+.It Fl T Ns Cm locale
+Encode output using the current
+.Xr locale 1 .
+See
+.Sx Locale Output .
.It Fl T Ns Cm ascii
Produce 7-bit ASCII output.
This is the default.
@@ -189,6 +198,23 @@ See
.Pp
If multiple input files are specified, these will be processed by the
corresponding filter in-order.
+.Ss UTF-8 Output
+Use
+.Fl T Ns Cm utf8
+to force a UTF-8 locale.
+See
+.Sx Locale Output
+for details and options.
+.Ss Locale Output
+Locale-depending output encoding is triggered with
+.Fl T Ns Cm locale .
+This option is not available on all systems: systems without locale
+support, or those whose internal representation is not natively UCS-4,
+will fall back to
+.Fl T Ns Cm ascii .
+See
+.Sx ASCII Output
+for font style specification and available command-line arguments.
.Ss ASCII Output
Output produced by
.Fl T Ns Cm ascii ,
@@ -209,6 +235,9 @@ Emboldened characters are rendered as
The special characters documented in
.Xr mandoc_char 7
are rendered best-effort in an ASCII equivalent.
+If no equivalent is found,
+.Sq \&?
+is used instead.
.Pp
Output width is limited to 78 visible columns unless literal input lines
exceed this limit.
@@ -460,6 +489,13 @@ Each input and output format is separately noted.
.Ss ASCII Compatibility
.Bl -bullet -compact
.It
+Unrenderable unicode codepoints specified with
+.Sq \e[uNNNN]
+escapes are printed as
+.Sq \&?
+in mandoc.
+In GNU troff, these raise an error.
+.It
The
.Sq \&Bd \-literal
and
@@ -470,7 +506,7 @@ in
.Fl T Ns Cm ascii
are synonyms, as are \-filled and \-ragged.
.It
-In GNU troff, the
+In historic GNU troff, the
.Sq \&Pa
.Xr mdoc 7
macro does not underline when scoped under an
@@ -495,8 +531,6 @@ macro in
has no effect.
.It
Words aren't hyphenated.
-.It
-Sentences are unilaterally monospaced.
.El
.Ss HTML/XHTML Compatibility
.Bl -bullet -compact
diff --git a/usr.bin/mandoc/mandoc.c b/usr.bin/mandoc/mandoc.c
index 931ce863017..b9ec46283e8 100644
--- a/usr.bin/mandoc/mandoc.c
+++ b/usr.bin/mandoc/mandoc.c
@@ -1,4 +1,4 @@
-/* $Id: mandoc.c,v 1.25 2011/04/24 16:22:02 schwarze Exp $ */
+/* $Id: mandoc.c,v 1.26 2011/05/29 21:22:18 schwarze Exp $ */
/*
* Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
* Copyright (c) 2011 Ingo Schwarze <schwarze@openbsd.org>
@@ -19,6 +19,8 @@
#include <assert.h>
#include <ctype.h>
+#include <errno.h>
+#include <limits.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
@@ -31,199 +33,358 @@
static int a2time(time_t *, const char *, const char *);
static char *time2a(time_t);
+static int numescape(const char *);
-int
-mandoc_special(char *p)
+/*
+ * Pass over recursive numerical expressions. This context of this
+ * function is important: it's only called within character-terminating
+ * escapes (e.g., \s[xxxyyy]), so all we need to do is handle initial
+ * recursion: we don't care about what's in these blocks.
+ * This returns the number of characters skipped or -1 if an error
+ * occurs (the caller should bail).
+ */
+static int
+numescape(const char *start)
{
- int len, i;
- char term;
- char *sv;
-
- len = 0;
- term = '\0';
- sv = p;
-
- assert('\\' == *p);
- p++;
-
- switch (*p++) {
-#if 0
- case ('Z'):
+ int i;
+ size_t sz;
+ const char *cp;
+
+ i = 0;
+
+ /* The expression consists of a subexpression. */
+
+ if ('\\' == start[i]) {
+ cp = &start[++i];
+ /*
+ * Read past the end of the subexpression.
+ * Bail immediately on errors.
+ */
+ if (ESCAPE_ERROR == mandoc_escape(&cp, NULL, NULL))
+ return(-1);
+ return(i + cp - &start[i]);
+ }
+
+ if ('(' != start[i++])
+ return(0);
+
+ /*
+ * A parenthesised subexpression. Read until the closing
+ * parenthesis, making sure to handle any nested subexpressions
+ * that might ruin our parse.
+ */
+
+ while (')' != start[i]) {
+ sz = strcspn(&start[i], ")\\");
+ i += (int)sz;
+
+ if ('\0' == start[i])
+ return(-1);
+ else if ('\\' != start[i])
+ continue;
+
+ cp = &start[++i];
+ if (ESCAPE_ERROR == mandoc_escape(&cp, NULL, NULL))
+ return(-1);
+ i += cp - &start[i];
+ }
+
+ /* Read past the terminating ')'. */
+ return(++i);
+}
+
+enum mandoc_esc
+mandoc_escape(const char **end, const char **start, int *sz)
+{
+ char c, term, numeric;
+ int i, lim, ssz, rlim;
+ const char *cp, *rstart;
+ enum mandoc_esc gly;
+
+ cp = *end;
+ rstart = cp;
+ if (start)
+ *start = rstart;
+ i = lim = 0;
+ gly = ESCAPE_ERROR;
+ term = numeric = '\0';
+
+ switch ((c = cp[i++])) {
+ /*
+ * First the glyphs. There are several different forms of
+ * these, but each eventually returns a substring of the glyph
+ * name.
+ */
+ case ('('):
+ gly = ESCAPE_SPECIAL;
+ lim = 2;
+ break;
+ case ('['):
+ gly = ESCAPE_SPECIAL;
+ /*
+ * Unicode escapes are defined in groff as \[uXXXX] to
+ * \[u10FFFF], where the contained value must be a valid
+ * Unicode codepoint. Here, however, only check whether
+ * it's not a zero-width escape.
+ */
+ if ('u' == cp[i] && ']' != cp[i + 1])
+ gly = ESCAPE_UNICODE;
+ term = ']';
+ break;
+ case ('C'):
+ if ('\'' != cp[i])
+ return(ESCAPE_ERROR);
+ gly = ESCAPE_SPECIAL;
+ term = '\'';
+ break;
+
+ /*
+ * Handle all triggers matching \X(xy, \Xx, and \X[xxxx], where
+ * 'X' is the trigger. These have opaque sub-strings.
+ */
+ case ('F'):
/* FALLTHROUGH */
- case ('X'):
+ case ('g'):
/* FALLTHROUGH */
- case ('x'):
+ case ('k'):
/* FALLTHROUGH */
- case ('S'):
+ case ('M'):
/* FALLTHROUGH */
- case ('R'):
+ case ('m'):
/* FALLTHROUGH */
- case ('N'):
+ case ('n'):
/* FALLTHROUGH */
- case ('l'):
+ case ('V'):
/* FALLTHROUGH */
- case ('L'):
+ case ('Y'):
+ if (ESCAPE_ERROR == gly)
+ gly = ESCAPE_IGNORE;
/* FALLTHROUGH */
- case ('H'):
+ case ('f'):
+ if (ESCAPE_ERROR == gly)
+ gly = ESCAPE_FONT;
+
+ rstart= &cp[i];
+ if (start)
+ *start = rstart;
+
+ switch (cp[i++]) {
+ case ('('):
+ lim = 2;
+ break;
+ case ('['):
+ term = ']';
+ break;
+ default:
+ lim = 1;
+ i--;
+ break;
+ }
+ break;
+
+ /*
+ * These escapes are of the form \X'Y', where 'X' is the trigger
+ * and 'Y' is any string. These have opaque sub-strings.
+ */
+ case ('A'):
/* FALLTHROUGH */
- case ('h'):
+ case ('b'):
/* FALLTHROUGH */
case ('D'):
/* FALLTHROUGH */
- case ('C'):
- /* FALLTHROUGH */
- case ('b'):
+ case ('o'):
/* FALLTHROUGH */
- case ('B'):
+ case ('R'):
/* FALLTHROUGH */
- case ('a'):
+ case ('X'):
/* FALLTHROUGH */
- case ('A'):
- if (*p++ != '\'')
- return(0);
+ case ('Z'):
+ if ('\'' != cp[i++])
+ return(ESCAPE_ERROR);
+ gly = ESCAPE_IGNORE;
term = '\'';
break;
-#endif
+
+ /*
+ * These escapes are of the form \X'N', where 'X' is the trigger
+ * and 'N' resolves to a numerical expression.
+ */
+ case ('B'):
+ /* FALLTHROUGH */
case ('h'):
/* FALLTHROUGH */
+ case ('H'):
+ /* FALLTHROUGH */
+ case ('L'):
+ /* FALLTHROUGH */
+ case ('l'):
+ /* FALLTHROUGH */
+ case ('N'):
+ if (ESCAPE_ERROR == gly)
+ gly = ESCAPE_NUMBERED;
+ /* FALLTHROUGH */
+ case ('S'):
+ /* FALLTHROUGH */
case ('v'):
/* FALLTHROUGH */
+ case ('w'):
+ /* FALLTHROUGH */
+ case ('x'):
+ if (ESCAPE_ERROR == gly)
+ gly = ESCAPE_IGNORE;
+ if ('\'' != cp[i++])
+ return(ESCAPE_ERROR);
+ term = numeric = '\'';
+ break;
+
+ /*
+ * Sizes get a special category of their own.
+ */
case ('s'):
- if (ASCII_HYPH == *p)
- *p = '-';
+ gly = ESCAPE_IGNORE;
- i = 0;
- if ('+' == *p || '-' == *p) {
- p++;
- i = 1;
- }
+ rstart = &cp[i];
+ if (start)
+ *start = rstart;
- switch (*p++) {
+ /* See +/- counts as a sign. */
+ c = cp[i];
+ if ('+' == c || '-' == c || ASCII_HYPH == c)
+ ++i;
+
+ switch (cp[i++]) {
case ('('):
- len = 2;
+ lim = 2;
break;
case ('['):
- term = ']';
+ term = numeric = ']';
break;
case ('\''):
- term = '\'';
+ term = numeric = '\'';
break;
- case ('0'):
- i = 1;
- /* FALLTHROUGH */
default:
- len = 1;
- p--;
+ lim = 1;
+ i--;
break;
}
- if (ASCII_HYPH == *p)
- *p = '-';
- if ('+' == *p || '-' == *p) {
- if (i)
- return(0);
- p++;
- }
-
- /* Handle embedded numerical subexp or escape. */
-
- if ('(' == *p) {
- while (*p && ')' != *p)
- if ('\\' == *p++) {
- i = mandoc_special(--p);
- if (0 == i)
- return(0);
- p += i;
- }
-
- if (')' == *p++)
- break;
+ /* See +/- counts as a sign. */
+ c = cp[i];
+ if ('+' == c || '-' == c || ASCII_HYPH == c)
+ ++i;
- return(0);
- } else if ('\\' == *p) {
- if (0 == (i = mandoc_special(p)))
- return(0);
- p += i;
- }
+ break;
+ /*
+ * Anything else is assumed to be a glyph.
+ */
+ default:
+ gly = ESCAPE_SPECIAL;
+ lim = 1;
+ i--;
break;
-#if 0
- case ('Y'):
- /* FALLTHROUGH */
- case ('V'):
- /* FALLTHROUGH */
- case ('$'):
- /* FALLTHROUGH */
- case ('n'):
- /* FALLTHROUGH */
-#endif
- case ('k'):
- /* FALLTHROUGH */
- case ('M'):
- /* FALLTHROUGH */
- case ('m'):
- /* FALLTHROUGH */
- case ('f'):
- /* FALLTHROUGH */
- case ('F'):
- /* FALLTHROUGH */
- case ('*'):
- switch (*p++) {
- case ('('):
- len = 2;
+ }
+
+ assert(ESCAPE_ERROR != gly);
+
+ rstart = &cp[i];
+ if (start)
+ *start = rstart;
+
+ /*
+ * If a terminating block has been specified, we need to
+ * handle the case of recursion, which could have their
+ * own terminating blocks that mess up our parse. This, by the
+ * way, means that the "start" and "size" values will be
+ * effectively meaningless.
+ */
+
+ ssz = 0;
+ if (numeric && -1 == (ssz = numescape(&cp[i])))
+ return(ESCAPE_ERROR);
+
+ i += ssz;
+ rlim = -1;
+
+ /*
+ * We have a character terminator. Try to read up to that
+ * character. If we can't (i.e., we hit the nil), then return
+ * an error; if we can, calculate our length, read past the
+ * terminating character, and exit.
+ */
+
+ if ('\0' != term) {
+ *end = strchr(&cp[i], term);
+ if ('\0' == *end)
+ return(ESCAPE_ERROR);
+
+ rlim = *end - &cp[i];
+ if (sz)
+ *sz = rlim;
+ (*end)++;
+ goto out;
+ }
+
+ assert(lim > 0);
+
+ /*
+ * We have a numeric limit. If the string is shorter than that,
+ * stop and return an error. Else adjust our endpoint, length,
+ * and return the current glyph.
+ */
+
+ if ((size_t)lim > strlen(&cp[i]))
+ return(ESCAPE_ERROR);
+
+ rlim = lim;
+ if (sz)
+ *sz = rlim;
+
+ *end = &cp[i] + lim;
+
+out:
+ assert(rlim >= 0 && rstart);
+
+ /* Run post-processors. */
+
+ switch (gly) {
+ case (ESCAPE_FONT):
+ if (1 != rlim)
break;
- case ('['):
- term = ']';
+ switch (*rstart) {
+ case ('3'):
+ /* FALLTHROUGH */
+ case ('B'):
+ gly = ESCAPE_FONTBOLD;
break;
- default:
- len = 1;
- p--;
+ case ('2'):
+ /* FALLTHROUGH */
+ case ('I'):
+ gly = ESCAPE_FONTITALIC;
+ break;
+ case ('P'):
+ gly = ESCAPE_FONTPREV;
+ break;
+ case ('1'):
+ /* FALLTHROUGH */
+ case ('R'):
+ gly = ESCAPE_FONTROMAN;
break;
}
break;
- case ('('):
- len = 2;
- break;
- case ('['):
- term = ']';
- break;
- case ('z'):
- len = 1;
- if ('\\' == *p) {
- if (0 == (i = mandoc_special(p)))
- return(0);
- p += i;
- return(*p ? (int)(p - sv) : 0);
- }
- break;
- case ('o'):
- /* FALLTHROUGH */
- case ('w'):
- if ('\'' == *p++) {
- term = '\'';
+ case (ESCAPE_SPECIAL):
+ if (1 != rlim)
break;
- }
- /* FALLTHROUGH */
+ if ('c' == *rstart)
+ gly = ESCAPE_NOSPACE;
+ break;
default:
- len = 1;
- p--;
break;
}
- if (term) {
- for ( ; *p && term != *p; p++)
- if (ASCII_HYPH == *p)
- *p = '-';
- return(*p ? (int)(p - sv) : 0);
- }
-
- for (i = 0; *p && i < len; i++, p++)
- if (ASCII_HYPH == *p)
- *p = '-';
- return(i == len ? (int)(p - sv) : 0);
+ return(gly);
}
-
void *
mandoc_calloc(size_t num, size_t size)
{
@@ -299,11 +460,11 @@ mandoc_getarg(struct mparse *parse, char **cpp, int ln, int *pos)
/* Quoting can only start with a new word. */
start = *cpp;
+ quoted = 0;
if ('"' == *start) {
quoted = 1;
start++;
- } else
- quoted = 0;
+ }
pairs = 0;
white = 0;
@@ -444,7 +605,7 @@ mandoc_eos(const char *p, size_t sz, int enclosed)
/*
* End-of-sentence recognition must include situations where
* some symbols, such as `)', allow prior EOS punctuation to
- * propogate outward.
+ * propagate outward.
*/
found = 0;
@@ -527,3 +688,35 @@ mandoc_getcontrol(const char *cp, int *ppos)
*ppos = pos;
return(1);
}
+
+/*
+ * Convert a string to a long that may not be <0.
+ * If the string is invalid, or is less than 0, return -1.
+ */
+int
+mandoc_strntou(const char *p, size_t sz, int base)
+{
+ char buf[32];
+ char *ep;
+ long v;
+
+ if (sz > 31)
+ return(-1);
+
+ memcpy(buf, p, sz);
+ buf[(int)sz] = '\0';
+
+ errno = 0;
+ v = strtol(buf, &ep, base);
+
+ if (buf[0] == '\0' || *ep != '\0')
+ return(-1);
+
+ if ((errno == ERANGE &&
+ (v == LONG_MAX || v == LONG_MIN)) ||
+ (v > INT_MAX || v < 0))
+ return(-1);
+
+ return((int)v);
+}
+
diff --git a/usr.bin/mandoc/mandoc.h b/usr.bin/mandoc/mandoc.h
index 70999ca7149..39c4b2e0a2c 100644
--- a/usr.bin/mandoc/mandoc.h
+++ b/usr.bin/mandoc/mandoc.h
@@ -1,4 +1,4 @@
-/* $Id: mandoc.h,v 1.37 2011/04/24 16:22:02 schwarze Exp $ */
+/* $Id: mandoc.h,v 1.38 2011/05/29 21:22:18 schwarze Exp $ */
/*
* Copyright (c) 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
*
@@ -288,10 +288,25 @@ enum mparset {
MPARSE_MAN /* assume -man */
};
+enum mandoc_esc {
+ ESCAPE_ERROR = 0, /* bail! unparsable escape */
+ ESCAPE_IGNORE, /* escape to be ignored */
+ ESCAPE_SPECIAL, /* a regular special character */
+ ESCAPE_FONT, /* a generic font mode */
+ ESCAPE_FONTBOLD, /* bold font mode */
+ ESCAPE_FONTITALIC, /* italic font mode */
+ ESCAPE_FONTROMAN, /* roman font mode */
+ ESCAPE_FONTPREV, /* previous font mode */
+ ESCAPE_NUMBERED, /* a numbered glyph */
+ ESCAPE_UNICODE, /* a unicode codepoint */
+ ESCAPE_NOSPACE /* suppress space if the last on a line */
+};
+
typedef void (*mandocmsg)(enum mandocerr, enum mandoclevel,
const char *, int, int, const char *);
struct mparse;
+struct mchars;
struct mdoc;
struct man;
@@ -310,6 +325,16 @@ void *mandoc_calloc(size_t, size_t);
void *mandoc_malloc(size_t);
void *mandoc_realloc(void *, size_t);
+enum mandoc_esc mandoc_escape(const char **, const char **, int *);
+
+struct mchars *mchars_alloc(void);
+char mchars_num2char(const char *, size_t);
+int mchars_num2uc(const char *, size_t);
+const char *mchars_spec2str(struct mchars *, const char *, size_t, size_t *);
+int mchars_spec2cp(struct mchars *, const char *, size_t);
+void mchars_free(struct mchars *);
+
+
__END_DECLS
#endif /*!MANDOC_H*/
diff --git a/usr.bin/mandoc/mdoc_argv.c b/usr.bin/mandoc/mdoc_argv.c
index c35fcf2517c..5bc1386f021 100644
--- a/usr.bin/mandoc/mdoc_argv.c
+++ b/usr.bin/mandoc/mdoc_argv.c
@@ -1,4 +1,4 @@
-/* $Id: mdoc_argv.c,v 1.37 2011/04/24 16:22:02 schwarze Exp $ */
+/* $Id: mdoc_argv.c,v 1.38 2011/05/29 21:22:18 schwarze Exp $ */
/*
* Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
*
@@ -28,11 +28,25 @@
#include "libmandoc.h"
#define MULTI_STEP 5 /* pre-allocate argument values */
+#define DELIMSZ 6 /* max possible size of a delimiter */
+
+enum argsflag {
+ ARGSFL_NONE = 0,
+ ARGSFL_DELIM, /* handle delimiters of [[::delim::][ ]+]+ */
+ ARGSFL_TABSEP /* handle tab/`Ta' separated phrases */
+};
+
+enum argvflag {
+ ARGV_NONE, /* no args to flag (e.g., -split) */
+ ARGV_SINGLE, /* one arg to flag (e.g., -file xxx) */
+ ARGV_MULTI, /* multiple args (e.g., -column xxx yyy) */
+ ARGV_OPT_SINGLE /* optional arg (e.g., -offset [xxx]) */
+};
static enum mdocargt argv_a2arg(enum mdoct, const char *);
static enum margserr args(struct mdoc *, int, int *,
- char *, int, char **);
-static int args_checkpunct(const char *);
+ char *, enum argsflag, char **);
+static int args_checkpunct(const char *, int);
static int argv(struct mdoc *, int,
struct mdoc_argv *, int *, char *);
static int argv_single(struct mdoc *, int,
@@ -43,13 +57,6 @@ static int argv_multi(struct mdoc *, int,
struct mdoc_argv *, int *, char *);
static void argn_free(struct mdoc_arg *, int);
-enum argvflag {
- ARGV_NONE, /* no args to flag (e.g., -split) */
- ARGV_SINGLE, /* one arg to flag (e.g., -file xxx) */
- ARGV_MULTI, /* multiple args (e.g., -column xxx yyy) */
- ARGV_OPT_SINGLE /* optional arg (e.g., -offset [xxx]) */
-};
-
static const enum argvflag argvflags[MDOC_ARG_MAX] = {
ARGV_NONE, /* MDOC_Split */
ARGV_NONE, /* MDOC_Nosplit */
@@ -79,129 +86,129 @@ static const enum argvflag argvflags[MDOC_ARG_MAX] = {
ARGV_NONE /* MDOC_Symbolic */
};
-static const int argflags[MDOC_MAX] = {
- 0, /* Ap */
- 0, /* Dd */
- 0, /* Dt */
- 0, /* Os */
- 0, /* Sh */
- 0, /* Ss */
- 0, /* Pp */
- ARGS_DELIM, /* D1 */
- ARGS_DELIM, /* Dl */
- 0, /* Bd */
- 0, /* Ed */
- 0, /* Bl */
- 0, /* El */
- 0, /* It */
- ARGS_DELIM, /* Ad */
- ARGS_DELIM, /* An */
- ARGS_DELIM, /* Ar */
- 0, /* Cd */
- ARGS_DELIM, /* Cm */
- ARGS_DELIM, /* Dv */
- ARGS_DELIM, /* Er */
- ARGS_DELIM, /* Ev */
- 0, /* Ex */
- ARGS_DELIM, /* Fa */
- 0, /* Fd */
- ARGS_DELIM, /* Fl */
- ARGS_DELIM, /* Fn */
- ARGS_DELIM, /* Ft */
- ARGS_DELIM, /* Ic */
- 0, /* In */
- ARGS_DELIM, /* Li */
- 0, /* Nd */
- ARGS_DELIM, /* Nm */
- ARGS_DELIM, /* Op */
- 0, /* Ot */
- ARGS_DELIM, /* Pa */
- 0, /* Rv */
- ARGS_DELIM, /* St */
- ARGS_DELIM, /* Va */
- ARGS_DELIM, /* Vt */
- ARGS_DELIM, /* Xr */
- 0, /* %A */
- 0, /* %B */
- 0, /* %D */
- 0, /* %I */
- 0, /* %J */
- 0, /* %N */
- 0, /* %O */
- 0, /* %P */
- 0, /* %R */
- 0, /* %T */
- 0, /* %V */
- ARGS_DELIM, /* Ac */
- 0, /* Ao */
- ARGS_DELIM, /* Aq */
- ARGS_DELIM, /* At */
- ARGS_DELIM, /* Bc */
- 0, /* Bf */
- 0, /* Bo */
- ARGS_DELIM, /* Bq */
- ARGS_DELIM, /* Bsx */
- ARGS_DELIM, /* Bx */
- 0, /* Db */
- ARGS_DELIM, /* Dc */
- 0, /* Do */
- ARGS_DELIM, /* Dq */
- ARGS_DELIM, /* Ec */
- 0, /* Ef */
- ARGS_DELIM, /* Em */
- 0, /* Eo */
- ARGS_DELIM, /* Fx */
- ARGS_DELIM, /* Ms */
- ARGS_DELIM, /* No */
- ARGS_DELIM, /* Ns */
- ARGS_DELIM, /* Nx */
- ARGS_DELIM, /* Ox */
- ARGS_DELIM, /* Pc */
- ARGS_DELIM, /* Pf */
- 0, /* Po */
- ARGS_DELIM, /* Pq */
- ARGS_DELIM, /* Qc */
- ARGS_DELIM, /* Ql */
- 0, /* Qo */
- ARGS_DELIM, /* Qq */
- 0, /* Re */
- 0, /* Rs */
- ARGS_DELIM, /* Sc */
- 0, /* So */
- ARGS_DELIM, /* Sq */
- 0, /* Sm */
- ARGS_DELIM, /* Sx */
- ARGS_DELIM, /* Sy */
- ARGS_DELIM, /* Tn */
- ARGS_DELIM, /* Ux */
- ARGS_DELIM, /* Xc */
- 0, /* Xo */
- 0, /* Fo */
- 0, /* Fc */
- 0, /* Oo */
- ARGS_DELIM, /* Oc */
- 0, /* Bk */
- 0, /* Ek */
- 0, /* Bt */
- 0, /* Hf */
- 0, /* Fr */
- 0, /* Ud */
- 0, /* Lb */
- 0, /* Lp */
- ARGS_DELIM, /* Lk */
- ARGS_DELIM, /* Mt */
- ARGS_DELIM, /* Brq */
- 0, /* Bro */
- ARGS_DELIM, /* Brc */
- 0, /* %C */
- 0, /* Es */
- 0, /* En */
- 0, /* Dx */
- 0, /* %Q */
- 0, /* br */
- 0, /* sp */
- 0, /* %U */
- 0, /* Ta */
+static const enum argsflag argflags[MDOC_MAX] = {
+ ARGSFL_NONE, /* Ap */
+ ARGSFL_NONE, /* Dd */
+ ARGSFL_NONE, /* Dt */
+ ARGSFL_NONE, /* Os */
+ ARGSFL_NONE, /* Sh */
+ ARGSFL_NONE, /* Ss */
+ ARGSFL_NONE, /* Pp */
+ ARGSFL_DELIM, /* D1 */
+ ARGSFL_DELIM, /* Dl */
+ ARGSFL_NONE, /* Bd */
+ ARGSFL_NONE, /* Ed */
+ ARGSFL_NONE, /* Bl */
+ ARGSFL_NONE, /* El */
+ ARGSFL_NONE, /* It */
+ ARGSFL_DELIM, /* Ad */
+ ARGSFL_DELIM, /* An */
+ ARGSFL_DELIM, /* Ar */
+ ARGSFL_NONE, /* Cd */
+ ARGSFL_DELIM, /* Cm */
+ ARGSFL_DELIM, /* Dv */
+ ARGSFL_DELIM, /* Er */
+ ARGSFL_DELIM, /* Ev */
+ ARGSFL_NONE, /* Ex */
+ ARGSFL_DELIM, /* Fa */
+ ARGSFL_NONE, /* Fd */
+ ARGSFL_DELIM, /* Fl */
+ ARGSFL_DELIM, /* Fn */
+ ARGSFL_DELIM, /* Ft */
+ ARGSFL_DELIM, /* Ic */
+ ARGSFL_NONE, /* In */
+ ARGSFL_DELIM, /* Li */
+ ARGSFL_NONE, /* Nd */
+ ARGSFL_DELIM, /* Nm */
+ ARGSFL_DELIM, /* Op */
+ ARGSFL_NONE, /* Ot */
+ ARGSFL_DELIM, /* Pa */
+ ARGSFL_NONE, /* Rv */
+ ARGSFL_DELIM, /* St */
+ ARGSFL_DELIM, /* Va */
+ ARGSFL_DELIM, /* Vt */
+ ARGSFL_DELIM, /* Xr */
+ ARGSFL_NONE, /* %A */
+ ARGSFL_NONE, /* %B */
+ ARGSFL_NONE, /* %D */
+ ARGSFL_NONE, /* %I */
+ ARGSFL_NONE, /* %J */
+ ARGSFL_NONE, /* %N */
+ ARGSFL_NONE, /* %O */
+ ARGSFL_NONE, /* %P */
+ ARGSFL_NONE, /* %R */
+ ARGSFL_NONE, /* %T */
+ ARGSFL_NONE, /* %V */
+ ARGSFL_DELIM, /* Ac */
+ ARGSFL_NONE, /* Ao */
+ ARGSFL_DELIM, /* Aq */
+ ARGSFL_DELIM, /* At */
+ ARGSFL_DELIM, /* Bc */
+ ARGSFL_NONE, /* Bf */
+ ARGSFL_NONE, /* Bo */
+ ARGSFL_DELIM, /* Bq */
+ ARGSFL_DELIM, /* Bsx */
+ ARGSFL_DELIM, /* Bx */
+ ARGSFL_NONE, /* Db */
+ ARGSFL_DELIM, /* Dc */
+ ARGSFL_NONE, /* Do */
+ ARGSFL_DELIM, /* Dq */
+ ARGSFL_DELIM, /* Ec */
+ ARGSFL_NONE, /* Ef */
+ ARGSFL_DELIM, /* Em */
+ ARGSFL_NONE, /* Eo */
+ ARGSFL_DELIM, /* Fx */
+ ARGSFL_DELIM, /* Ms */
+ ARGSFL_DELIM, /* No */
+ ARGSFL_DELIM, /* Ns */
+ ARGSFL_DELIM, /* Nx */
+ ARGSFL_DELIM, /* Ox */
+ ARGSFL_DELIM, /* Pc */
+ ARGSFL_DELIM, /* Pf */
+ ARGSFL_NONE, /* Po */
+ ARGSFL_DELIM, /* Pq */
+ ARGSFL_DELIM, /* Qc */
+ ARGSFL_DELIM, /* Ql */
+ ARGSFL_NONE, /* Qo */
+ ARGSFL_DELIM, /* Qq */
+ ARGSFL_NONE, /* Re */
+ ARGSFL_NONE, /* Rs */
+ ARGSFL_DELIM, /* Sc */
+ ARGSFL_NONE, /* So */
+ ARGSFL_DELIM, /* Sq */
+ ARGSFL_NONE, /* Sm */
+ ARGSFL_DELIM, /* Sx */
+ ARGSFL_DELIM, /* Sy */
+ ARGSFL_DELIM, /* Tn */
+ ARGSFL_DELIM, /* Ux */
+ ARGSFL_DELIM, /* Xc */
+ ARGSFL_NONE, /* Xo */
+ ARGSFL_NONE, /* Fo */
+ ARGSFL_NONE, /* Fc */
+ ARGSFL_NONE, /* Oo */
+ ARGSFL_DELIM, /* Oc */
+ ARGSFL_NONE, /* Bk */
+ ARGSFL_NONE, /* Ek */
+ ARGSFL_NONE, /* Bt */
+ ARGSFL_NONE, /* Hf */
+ ARGSFL_NONE, /* Fr */
+ ARGSFL_NONE, /* Ud */
+ ARGSFL_NONE, /* Lb */
+ ARGSFL_NONE, /* Lp */
+ ARGSFL_DELIM, /* Lk */
+ ARGSFL_DELIM, /* Mt */
+ ARGSFL_DELIM, /* Brq */
+ ARGSFL_NONE, /* Bro */
+ ARGSFL_DELIM, /* Brc */
+ ARGSFL_NONE, /* %C */
+ ARGSFL_NONE, /* Es */
+ ARGSFL_NONE, /* En */
+ ARGSFL_NONE, /* Dx */
+ ARGSFL_NONE, /* %Q */
+ ARGSFL_NONE, /* br */
+ ARGSFL_NONE, /* sp */
+ ARGSFL_NONE, /* %U */
+ ARGSFL_NONE, /* Ta */
};
static const enum mdocargt args_Ex[] = {
@@ -371,18 +378,17 @@ argn_free(struct mdoc_arg *p, int iarg)
}
enum margserr
-mdoc_zargs(struct mdoc *m, int line, int *pos,
- char *buf, int flags, char **v)
+mdoc_zargs(struct mdoc *m, int line, int *pos, char *buf, char **v)
{
- return(args(m, line, pos, buf, flags, v));
+ return(args(m, line, pos, buf, ARGSFL_NONE, v));
}
enum margserr
mdoc_args(struct mdoc *m, int line, int *pos,
char *buf, enum mdoct tok, char **v)
{
- int fl;
+ enum argsflag fl;
struct mdoc_node *n;
fl = argflags[tok];
@@ -399,39 +405,21 @@ mdoc_args(struct mdoc *m, int line, int *pos,
for (n = m->last; n; n = n->parent)
if (MDOC_Bl == n->tok)
- break;
-
- if (n && LIST_column == n->norm->Bl.type) {
- fl |= ARGS_TABSEP;
- fl &= ~ARGS_DELIM;
- }
+ if (LIST_column == n->norm->Bl.type) {
+ fl = ARGSFL_TABSEP;
+ break;
+ }
return(args(m, line, pos, buf, fl, v));
}
static enum margserr
args(struct mdoc *m, int line, int *pos,
- char *buf, int fl, char **v)
+ char *buf, enum argsflag fl, char **v)
{
- int i;
char *p, *pp;
enum margserr rc;
- /*
- * Parse out the terms (like `val' in `.Xx -arg val' or simply
- * `.Xx val'), which can have all sorts of properties:
- *
- * ARGS_DELIM: use special handling if encountering trailing
- * delimiters in the form of [[::delim::][ ]+]+.
- *
- * ARGS_NOWARN: don't post warnings. This is only used when
- * re-parsing delimiters, as the warnings have already been
- * posted.
- *
- * ARGS_TABSEP: use special handling for tab/`Ta' separated
- * phrases like in `Bl -column'.
- */
-
assert(' ' != buf[*pos]);
if ('\0' == buf[*pos]) {
@@ -451,15 +439,9 @@ args(struct mdoc *m, int line, int *pos,
*v = &buf[*pos];
- if (ARGS_DELIM & fl && args_checkpunct(&buf[*pos])) {
- i = strlen(&buf[*pos]) + *pos;
- if (i && ' ' != buf[i - 1])
+ if (ARGSFL_DELIM == fl)
+ if (args_checkpunct(buf, *pos))
return(ARGS_PUNCT);
- if (ARGS_NOWARN & fl)
- return(ARGS_PUNCT);
- mdoc_pmsg(m, line, *pos, MANDOCERR_EOLNSPACE);
- return(ARGS_PUNCT);
- }
/*
* First handle TABSEP items, restricted to `Bl -column'. This
@@ -468,7 +450,7 @@ args(struct mdoc *m, int line, int *pos,
* for arguments at a later phase.
*/
- if (ARGS_TABSEP & fl) {
+ if (ARGSFL_TABSEP == fl) {
/* Scan ahead to tab (can't be escaped). */
p = strchr(*v, '\t');
pp = NULL;
@@ -507,7 +489,7 @@ args(struct mdoc *m, int line, int *pos,
}
/* Whitespace check for eoln case... */
- if ('\0' == *p && ' ' == *(p - 1) && ! (ARGS_NOWARN & fl))
+ if ('\0' == *p && ' ' == *(p - 1))
mdoc_pmsg(m, line, *pos, MANDOCERR_EOLNSPACE);
*pos += (int)(p - *v);
@@ -550,7 +532,7 @@ args(struct mdoc *m, int line, int *pos,
}
if ('\0' == buf[*pos]) {
- if (ARGS_NOWARN & fl || MDOC_PPHRASE & m->flags)
+ if (MDOC_PPHRASE & m->flags)
return(ARGS_QWORD);
mdoc_pmsg(m, line, *pos, MANDOCERR_BADQUOTE);
return(ARGS_QWORD);
@@ -565,31 +547,14 @@ args(struct mdoc *m, int line, int *pos,
while (' ' == buf[*pos])
(*pos)++;
- if (0 == buf[*pos] && ! (ARGS_NOWARN & fl))
+ if ('\0' == buf[*pos])
mdoc_pmsg(m, line, *pos, MANDOCERR_EOLNSPACE);
return(ARGS_QWORD);
}
- /*
- * A non-quoted term progresses until either the end of line or
- * a non-escaped whitespace.
- */
-
- for ( ; buf[*pos]; (*pos)++)
- if (*pos && ' ' == buf[*pos] && '\\' != buf[*pos - 1])
- break;
-
- if ('\0' == buf[*pos])
- return(ARGS_WORD);
-
- buf[(*pos)++] = '\0';
-
- while (' ' == buf[*pos])
- (*pos)++;
-
- if ('\0' == buf[*pos] && ! (ARGS_NOWARN & fl))
- mdoc_pmsg(m, line, *pos, MANDOCERR_EOLNSPACE);
+ p = &buf[*pos];
+ *v = mandoc_getarg(m->parse, &p, line, pos);
return(ARGS_WORD);
}
@@ -601,49 +566,47 @@ args(struct mdoc *m, int line, int *pos,
* whitespace may separate these tokens.
*/
static int
-args_checkpunct(const char *p)
+args_checkpunct(const char *buf, int i)
{
- int i, j;
- char buf[DELIMSZ];
+ int j;
+ char dbuf[DELIMSZ];
enum mdelim d;
- i = 0;
-
/* First token must be a close-delimiter. */
- for (j = 0; p[i] && ' ' != p[i] && j < DELIMSZ; j++, i++)
- buf[j] = p[i];
+ for (j = 0; buf[i] && ' ' != buf[i] && j < DELIMSZ; j++, i++)
+ dbuf[j] = buf[i];
if (DELIMSZ == j)
return(0);
- buf[j] = '\0';
- if (DELIM_CLOSE != mdoc_isdelim(buf))
+ dbuf[j] = '\0';
+ if (DELIM_CLOSE != mdoc_isdelim(dbuf))
return(0);
- while (' ' == p[i])
+ while (' ' == buf[i])
i++;
/* Remaining must NOT be open/none. */
- while (p[i]) {
+ while (buf[i]) {
j = 0;
- while (p[i] && ' ' != p[i] && j < DELIMSZ)
- buf[j++] = p[i++];
+ while (buf[i] && ' ' != buf[i] && j < DELIMSZ)
+ dbuf[j++] = buf[i++];
if (DELIMSZ == j)
return(0);
- buf[j] = '\0';
- d = mdoc_isdelim(buf);
+ dbuf[j] = '\0';
+ d = mdoc_isdelim(dbuf);
if (DELIM_NONE == d || DELIM_OPEN == d)
return(0);
- while (' ' == p[i])
+ while (' ' == buf[i])
i++;
}
- return('\0' == p[i]);
+ return('\0' == buf[i]);
}
/*
@@ -654,40 +617,40 @@ args_checkpunct(const char *p)
static enum mdocargt
argv_a2arg(enum mdoct tok, const char *p)
{
- const enum mdocargt *args;
+ const enum mdocargt *argsp;
- args = NULL;
+ argsp = NULL;
switch (tok) {
case (MDOC_An):
- args = args_An;
+ argsp = args_An;
break;
case (MDOC_Bd):
- args = args_Bd;
+ argsp = args_Bd;
break;
case (MDOC_Bf):
- args = args_Bf;
+ argsp = args_Bf;
break;
case (MDOC_Bk):
- args = args_Bk;
+ argsp = args_Bk;
break;
case (MDOC_Bl):
- args = args_Bl;
+ argsp = args_Bl;
break;
case (MDOC_Rv):
/* FALLTHROUGH */
case (MDOC_Ex):
- args = args_Ex;
+ argsp = args_Ex;
break;
default:
return(MDOC_ARG_MAX);
}
- assert(args);
+ assert(argsp);
- for ( ; MDOC_ARG_MAX != *args ; args++)
- if (0 == strcmp(p, mdoc_argnames[*args]))
- return(*args);
+ for ( ; MDOC_ARG_MAX != *argsp ; argsp++)
+ if (0 == strcmp(p, mdoc_argnames[*argsp]))
+ return(*argsp);
return(MDOC_ARG_MAX);
}
@@ -702,7 +665,7 @@ argv_multi(struct mdoc *m, int line,
for (v->sz = 0; ; v->sz++) {
if ('-' == buf[*pos])
break;
- ac = args(m, line, pos, buf, 0, &p);
+ ac = args(m, line, pos, buf, ARGSFL_NONE, &p);
if (ARGS_ERROR == ac)
return(0);
else if (ARGS_EOLN == ac)
@@ -728,7 +691,7 @@ argv_opt_single(struct mdoc *m, int line,
if ('-' == buf[*pos])
return(1);
- ac = args(m, line, pos, buf, 0, &p);
+ ac = args(m, line, pos, buf, ARGSFL_NONE, &p);
if (ARGS_ERROR == ac)
return(0);
if (ARGS_EOLN == ac)
@@ -754,7 +717,7 @@ argv_single(struct mdoc *m, int line,
ppos = *pos;
- ac = args(m, line, pos, buf, 0, &p);
+ ac = args(m, line, pos, buf, ARGSFL_NONE, &p);
if (ARGS_EOLN == ac) {
mdoc_pmsg(m, line, ppos, MANDOCERR_SYNTARGVCOUNT);
return(0);
diff --git a/usr.bin/mandoc/mdoc_html.c b/usr.bin/mandoc/mdoc_html.c
index 47112e20804..2bbf5f6fe75 100644
--- a/usr.bin/mandoc/mdoc_html.c
+++ b/usr.bin/mandoc/mdoc_html.c
@@ -1,4 +1,4 @@
-/* $Id: mdoc_html.c,v 1.56 2011/04/24 16:22:02 schwarze Exp $ */
+/* $Id: mdoc_html.c,v 1.57 2011/05/29 21:22:18 schwarze Exp $ */
/*
* Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
*
@@ -284,7 +284,7 @@ a2width(const char *p, struct roffsu *su)
if ( ! a2roffsu(p, su, SCALE_MAX)) {
su->unit = SCALE_BU;
- su->scale = (int)strlen(p);
+ su->scale = html_strlen(p);
}
}
@@ -351,7 +351,7 @@ a2offs(const char *p, struct roffsu *su)
SCALE_HS_INIT(su, INDENT * 2);
else if ( ! a2roffsu(p, su, SCALE_MAX)) {
su->unit = SCALE_BU;
- su->scale = (int)strlen(p);
+ su->scale = html_strlen(p);
}
}
@@ -378,13 +378,10 @@ print_mdoc_head(MDOC_ARGS)
print_gen_head(h);
bufinit(h);
- buffmt(h, "%s(%s)", m->title, m->msec);
+ bufcat_fmt(h, "%s(%s)", m->title, m->msec);
- if (m->arch) {
- bufcat(h, " (");
- bufcat(h, m->arch);
- bufcat(h, ")");
- }
+ if (m->arch)
+ bufcat_fmt(h, " (%s)", m->arch);
print_otag(h, TAG_TITLE, 0, NULL);
print_text(h, h->buf);
@@ -411,7 +408,6 @@ print_mdoc_node(MDOC_ARGS)
child = 1;
t = h->tags.head;
- bufinit(h);
switch (n->type) {
case (MDOC_ROOT):
child = mdoc_root_pre(m, n, h);
@@ -480,7 +476,6 @@ print_mdoc_node(MDOC_ARGS)
print_stagq(h, t);
- bufinit(h);
switch (n->type) {
case (MDOC_ROOT):
mdoc_root_post(m, n, h);
@@ -602,7 +597,6 @@ static int
mdoc_sh_pre(MDOC_ARGS)
{
struct htmlpair tag;
- char buf[BUFSIZ];
if (MDOC_BLOCK == n->type) {
PAIR_CLASS_INIT(&tag, "section");
@@ -611,14 +605,14 @@ mdoc_sh_pre(MDOC_ARGS)
} else if (MDOC_BODY == n->type)
return(1);
- buf[0] = '\0';
+ bufinit(h);
for (n = n->child; n; n = n->next) {
- html_idcat(buf, n->string, BUFSIZ);
+ bufcat_id(h, n->string);
if (n->next)
- html_idcat(buf, " ", BUFSIZ);
+ bufcat_id(h, " ");
}
- PAIR_ID_INIT(&tag, buf);
+ PAIR_ID_INIT(&tag, h->buf);
print_otag(h, TAG_H1, 1, &tag);
return(1);
}
@@ -629,7 +623,6 @@ static int
mdoc_ss_pre(MDOC_ARGS)
{
struct htmlpair tag;
- char buf[BUFSIZ];
if (MDOC_BLOCK == n->type) {
PAIR_CLASS_INIT(&tag, "subsection");
@@ -638,14 +631,14 @@ mdoc_ss_pre(MDOC_ARGS)
} else if (MDOC_BODY == n->type)
return(1);
- buf[0] = '\0';
+ bufinit(h);
for (n = n->child; n; n = n->next) {
- html_idcat(buf, n->string, BUFSIZ);
+ bufcat_id(h, n->string);
if (n->next)
- html_idcat(buf, " ", BUFSIZ);
+ bufcat_id(h, " ");
}
- PAIR_ID_INIT(&tag, buf);
+ PAIR_ID_INIT(&tag, h->buf);
print_otag(h, TAG_H2, 1, &tag);
return(1);
}
@@ -699,7 +692,7 @@ mdoc_nm_pre(MDOC_ARGS)
{
struct htmlpair tag;
struct roffsu su;
- size_t len;
+ int len;
switch (n->type) {
case (MDOC_ELEM):
@@ -727,12 +720,13 @@ mdoc_nm_pre(MDOC_ARGS)
for (len = 0, n = n->child; n; n = n->next)
if (MDOC_TEXT == n->type)
- len += strlen(n->string);
+ len += html_strlen(n->string);
if (0 == len && m->name)
- len = strlen(m->name);
+ len = html_strlen(m->name);
SCALE_HS_INIT(&su, (double)len);
+ bufinit(h);
bufcat_su(h, "width", &su);
PAIR_STYLE_INIT(&tag, h);
print_otag(h, TAG_COL, 1, &tag);
@@ -895,6 +889,8 @@ mdoc_it_pre(MDOC_ARGS)
assert(lists[type]);
PAIR_CLASS_INIT(&tag[0], lists[type]);
+ bufinit(h);
+
if (MDOC_HEAD == n->type) {
switch (type) {
case(LIST_bullet):
@@ -995,6 +991,8 @@ mdoc_bl_pre(MDOC_ARGS)
struct roffsu su;
char buf[BUFSIZ];
+ bufinit(h);
+
if (MDOC_BODY == n->type) {
if (LIST_column == n->norm->Bl.type)
print_otag(h, TAG_TBODY, 0, NULL);
@@ -1014,7 +1012,6 @@ mdoc_bl_pre(MDOC_ARGS)
for (i = 0; i < (int)n->norm->Bl.ncols; i++) {
a2width(n->norm->Bl.cols[i], &su);
- bufinit(h);
if (i < (int)n->norm->Bl.ncols - 1)
bufcat_su(h, "width", &su);
else
@@ -1143,6 +1140,7 @@ mdoc_d1_pre(MDOC_ARGS)
return(1);
SCALE_VS_INIT(&su, 0);
+ bufinit(h);
bufcat_su(h, "margin-top", &su);
bufcat_su(h, "margin-bottom", &su);
PAIR_STYLE_INIT(&tag[0], h);
@@ -1167,17 +1165,17 @@ static int
mdoc_sx_pre(MDOC_ARGS)
{
struct htmlpair tag[2];
- char buf[BUFSIZ];
- strlcpy(buf, "#", BUFSIZ);
+ bufinit(h);
+ bufcat(h, "#x");
for (n = n->child; n; n = n->next) {
- html_idcat(buf, n->string, BUFSIZ);
+ bufcat_id(h, n->string);
if (n->next)
- html_idcat(buf, " ", BUFSIZ);
+ bufcat_id(h, " ");
}
PAIR_CLASS_INIT(&tag[0], "link-sec");
- PAIR_HREF_INIT(&tag[1], buf);
+ PAIR_HREF_INIT(&tag[1], h->buf);
print_otag(h, TAG_I, 1, tag);
print_otag(h, TAG_A, 2, tag);
@@ -1215,7 +1213,8 @@ mdoc_bd_pre(MDOC_ARGS)
SCALE_HS_INIT(&su, 0);
if (n->norm->Bd.offs)
a2offs(n->norm->Bd.offs, &su);
-
+
+ bufinit(h);
bufcat_su(h, "margin-left", &su);
PAIR_STYLE_INIT(&tag[0], h);
@@ -1434,7 +1433,6 @@ mdoc_fd_pre(MDOC_ARGS)
buf[sz - 1] = '\0';
PAIR_CLASS_INIT(&tag[0], "link-includes");
- bufinit(h);
i = 1;
if (h->base_includes) {
@@ -1556,8 +1554,8 @@ mdoc_fn_pre(MDOC_ARGS)
print_text(h, "(");
h->flags |= HTML_NOSPACE;
- bufinit(h);
PAIR_CLASS_INIT(&tag[0], "farg");
+ bufinit(h);
bufcat_style(h, "white-space", "nowrap");
PAIR_STYLE_INIT(&tag[1], h);
@@ -1636,6 +1634,7 @@ mdoc_sp_pre(MDOC_ARGS)
} else
su.scale = 0;
+ bufinit(h);
bufcat_su(h, "height", &su);
PAIR_STYLE_INIT(&tag, h);
print_otag(h, TAG_DIV, 1, &tag);
@@ -1772,10 +1771,8 @@ mdoc_in_pre(MDOC_ARGS)
assert(MDOC_TEXT == n->type);
PAIR_CLASS_INIT(&tag[0], "link-includes");
- bufinit(h);
i = 1;
-
if (h->base_includes) {
buffmt_includes(h, n->string);
PAIR_HREF_INIT(&tag[i], h->buf);
@@ -1914,6 +1911,7 @@ mdoc_bf_pre(MDOC_ARGS)
* We want this to be inline-formatted, but needs to be div to
* accept block children.
*/
+ bufinit(h);
bufcat_style(h, "display", "inline");
SCALE_HS_INIT(&su, 1);
/* Needs a left-margin for spacing. */
diff --git a/usr.bin/mandoc/mdoc_macro.c b/usr.bin/mandoc/mdoc_macro.c
index e29fb2610d6..bc22ce7330a 100644
--- a/usr.bin/mandoc/mdoc_macro.c
+++ b/usr.bin/mandoc/mdoc_macro.c
@@ -1,4 +1,4 @@
-/* $Id: mdoc_macro.c,v 1.67 2011/04/24 16:49:10 schwarze Exp $ */
+/* $Id: mdoc_macro.c,v 1.68 2011/05/29 21:22:18 schwarze Exp $ */
/*
* Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
* Copyright (c) 2010 Ingo Schwarze <schwarze@openbsd.org>
@@ -615,7 +615,7 @@ append_delims(struct mdoc *m, int line, int *pos, char *buf)
for (;;) {
la = *pos;
- ac = mdoc_zargs(m, line, pos, buf, ARGS_NOWARN, &p);
+ ac = mdoc_zargs(m, line, pos, buf, &p);
if (ARGS_ERROR == ac)
return(0);
@@ -628,12 +628,12 @@ append_delims(struct mdoc *m, int line, int *pos, char *buf)
* If we encounter end-of-sentence symbols, then trigger
* the double-space.
*
- * XXX: it's easy to allow this to propogate outward to
+ * XXX: it's easy to allow this to propagate outward to
* the last symbol, such that `. )' will cause the
* correct double-spacing. However, (1) groff isn't
* smart enough to do this and (2) it would require
* knowing which symbols break this behaviour, for
- * example, `. ;' shouldn't propogate the double-space.
+ * example, `. ;' shouldn't propagate the double-space.
*/
if (mandoc_eos(p, strlen(p), 0))
m->last->flags |= MDOC_EOS;
@@ -992,7 +992,7 @@ blk_full(MACRO_PROT_ARGS)
}
/*
- * This routine accomodates implicitly- and explicitly-scoped
+ * This routine accommodates implicitly- and explicitly-scoped
* macro openings. Implicit ones first close out prior scope
* (seen above). Delay opening the head until necessary to
* allow leading punctuation to print. Special consideration
@@ -1289,7 +1289,7 @@ blk_part_imp(MACRO_PROT_ARGS)
if (mandoc_eos(n->string, strlen(n->string), 1))
n->flags |= MDOC_EOS;
- /* Up-propogate the end-of-space flag. */
+ /* Up-propagate the end-of-space flag. */
if (n && (MDOC_EOS & n->flags)) {
body->flags |= MDOC_EOS;
@@ -1711,7 +1711,7 @@ phrase(struct mdoc *m, int line, int ppos, char *buf)
for (pos = ppos; ; ) {
la = pos;
- ac = mdoc_zargs(m, line, &pos, buf, 0, &p);
+ ac = mdoc_zargs(m, line, &pos, buf, &p);
if (ARGS_ERROR == ac)
return(0);
@@ -1756,7 +1756,7 @@ phrase_ta(MACRO_PROT_ARGS)
for (;;) {
la = *pos;
- ac = mdoc_zargs(m, line, pos, buf, 0, &p);
+ ac = mdoc_zargs(m, line, pos, buf, &p);
if (ARGS_ERROR == ac)
return(0);
diff --git a/usr.bin/mandoc/mdoc_term.c b/usr.bin/mandoc/mdoc_term.c
index a992ed09cf8..24ca2a3e485 100644
--- a/usr.bin/mandoc/mdoc_term.c
+++ b/usr.bin/mandoc/mdoc_term.c
@@ -1,4 +1,4 @@
-/* $Id: mdoc_term.c,v 1.132 2011/04/24 16:22:02 schwarze Exp $ */
+/* $Id: mdoc_term.c,v 1.133 2011/05/29 21:22:18 schwarze Exp $ */
/*
* Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
* Copyright (c) 2010 Ingo Schwarze <schwarze@openbsd.org>
@@ -260,14 +260,7 @@ terminal_mdoc(void *arg, const struct mdoc *mdoc)
p->tabwidth = term_len(p, 5);
if (NULL == p->symtab)
- switch (p->enc) {
- case (TERMENC_ASCII):
- p->symtab = chars_init(CHARS_ASCII);
- break;
- default:
- abort();
- /* NOTREACHED */
- }
+ p->symtab = mchars_alloc();
n = mdoc_node(mdoc);
m = mdoc_meta(mdoc);
diff --git a/usr.bin/mandoc/mdoc_validate.c b/usr.bin/mandoc/mdoc_validate.c
index b915a49b24a..1c08e614a80 100644
--- a/usr.bin/mandoc/mdoc_validate.c
+++ b/usr.bin/mandoc/mdoc_validate.c
@@ -1,4 +1,4 @@
-/* $Id: mdoc_validate.c,v 1.92 2011/04/24 16:22:02 schwarze Exp $ */
+/* $Id: mdoc_validate.c,v 1.93 2011/05/29 21:22:18 schwarze Exp $ */
/*
* Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
* Copyright (c) 2010, 2011 Ingo Schwarze <schwarze@openbsd.org>
@@ -176,7 +176,7 @@ static v_pre pres_sh[] = { pre_sh, NULL };
static v_pre pres_ss[] = { pre_ss, NULL };
static v_pre pres_std[] = { pre_std, NULL };
-static const struct valids mdoc_valids[MDOC_MAX] = {
+static const struct valids mdoc_valids[MDOC_MAX] = {
{ NULL, NULL }, /* Ap */
{ pres_dd, posts_dd }, /* Dd */
{ pres_dt, posts_dt }, /* Dt */
@@ -541,31 +541,39 @@ check_argv(struct mdoc *m, struct mdoc_node *n, struct mdoc_argv *v)
static void
check_text(struct mdoc *m, int ln, int pos, char *p)
{
- int c;
+ char *cpp, *pp;
size_t sz;
- for ( ; *p; p++, pos++) {
+ while ('\0' != *p) {
sz = strcspn(p, "\t\\");
- p += (int)sz;
-
- if ('\0' == *p)
- break;
+ p += (int)sz;
pos += (int)sz;
if ('\t' == *p) {
if ( ! (MDOC_LITERAL & m->flags))
mdoc_pmsg(m, ln, pos, MANDOCERR_BADTAB);
+ p++;
+ pos++;
continue;
- }
+ } else if ('\0' == *p)
+ break;
+
+ pos++;
+ pp = ++p;
- if (0 == (c = mandoc_special(p))) {
+ if (ESCAPE_ERROR == mandoc_escape
+ ((const char **)&pp, NULL, NULL)) {
mdoc_pmsg(m, ln, pos, MANDOCERR_BADESCAPE);
- continue;
+ break;
}
- p += c - 1;
- pos += c - 1;
+ cpp = p;
+ while (NULL != (cpp = memchr(cpp, ASCII_HYPH, pp - cpp)))
+ *cpp = '-';
+
+ pos += pp - p;
+ p = pp;
}
}
@@ -1523,7 +1531,7 @@ post_bl_head(POST_ARGS)
assert(0 == np->args->argv[j].sz);
/*
- * Accomodate for new-style groff column syntax. Shuffle the
+ * Accommodate for new-style groff column syntax. Shuffle the
* child nodes, all of which must be TEXT, as arguments for the
* column field. Then, delete the head children.
*/
diff --git a/usr.bin/mandoc/out.c b/usr.bin/mandoc/out.c
index 48e3b3c5c8b..eaebdc4a697 100644
--- a/usr.bin/mandoc/out.c
+++ b/usr.bin/mandoc/out.c
@@ -1,4 +1,4 @@
-/* $Id: out.c,v 1.13 2011/04/21 22:59:54 schwarze Exp $ */
+/* $Id: out.c,v 1.14 2011/05/29 21:22:18 schwarze Exp $ */
/*
* Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
* Copyright (c) 2011 Ingo Schwarze <schwarze@openbsd.org>
@@ -170,243 +170,6 @@ time2a(time_t t, char *dst, size_t sz)
(void)strftime(p, sz, "%Y", &tm);
}
-
-int
-a2roffdeco(enum roffdeco *d, const char **word, size_t *sz)
-{
- int i, j, lim;
- char term, c;
- const char *wp;
- enum roffdeco dd;
-
- *d = DECO_NONE;
- lim = i = 0;
- term = '\0';
- wp = *word;
-
- switch ((c = wp[i++])) {
- case ('('):
- *d = DECO_SPECIAL;
- lim = 2;
- break;
- case ('F'):
- /* FALLTHROUGH */
- case ('f'):
- *d = 'F' == c ? DECO_FFONT : DECO_FONT;
-
- switch (wp[i++]) {
- case ('('):
- lim = 2;
- break;
- case ('['):
- term = ']';
- break;
- case ('3'):
- /* FALLTHROUGH */
- case ('B'):
- *d = DECO_BOLD;
- return(i);
- case ('2'):
- /* FALLTHROUGH */
- case ('I'):
- *d = DECO_ITALIC;
- return(i);
- case ('P'):
- *d = DECO_PREVIOUS;
- return(i);
- case ('1'):
- /* FALLTHROUGH */
- case ('R'):
- *d = DECO_ROMAN;
- return(i);
- default:
- i--;
- lim = 1;
- break;
- }
- break;
- case ('k'):
- /* FALLTHROUGH */
- case ('M'):
- /* FALLTHROUGH */
- case ('m'):
- /* FALLTHROUGH */
- case ('*'):
- if ('*' == c)
- *d = DECO_RESERVED;
-
- switch (wp[i++]) {
- case ('('):
- lim = 2;
- break;
- case ('['):
- term = ']';
- break;
- default:
- i--;
- lim = 1;
- break;
- }
- break;
-
- case ('N'):
-
- /*
- * Sequence of characters: backslash, 'N' (i = 0),
- * starting delimiter (i = 1), character number (i = 2).
- */
-
- *word = wp + 2;
- *sz = 0;
-
- /*
- * Cannot use a digit as a starting delimiter;
- * but skip the digit anyway.
- */
-
- if (isdigit((int)wp[1]))
- return(2);
-
- /*
- * Any non-digit terminates the character number.
- * That is, the terminating delimiter need not
- * match the starting delimiter.
- */
-
- for (i = 2; isdigit((int)wp[i]); i++)
- (*sz)++;
-
- /*
- * This is only a numbered character
- * if the character number has at least one digit.
- */
-
- if (*sz)
- *d = DECO_NUMBERED;
-
- /*
- * Skip the terminating delimiter, even if it does not
- * match, and even if there is no character number.
- */
-
- return(++i);
-
- case ('h'):
- /* FALLTHROUGH */
- case ('v'):
- /* FALLTHROUGH */
- case ('s'):
- j = 0;
- if ('+' == wp[i] || '-' == wp[i]) {
- i++;
- j = 1;
- }
-
- switch (wp[i++]) {
- case ('('):
- lim = 2;
- break;
- case ('['):
- term = ']';
- break;
- case ('\''):
- term = '\'';
- break;
- case ('0'):
- j = 1;
- /* FALLTHROUGH */
- default:
- i--;
- lim = 1;
- break;
- }
-
- if ('+' == wp[i] || '-' == wp[i]) {
- if (j)
- return(i);
- i++;
- }
-
- /* Handle embedded numerical subexp or escape. */
-
- if ('(' == wp[i]) {
- while (wp[i] && ')' != wp[i])
- if ('\\' == wp[i++]) {
- /* Handle embedded escape. */
- *word = &wp[i];
- i += a2roffdeco(&dd, word, sz);
- }
-
- if (')' == wp[i++])
- break;
-
- *d = DECO_NONE;
- return(i - 1);
- } else if ('\\' == wp[i]) {
- *word = &wp[++i];
- i += a2roffdeco(&dd, word, sz);
- }
-
- break;
- case ('['):
- *d = DECO_SPECIAL;
- term = ']';
- break;
- case ('c'):
- *d = DECO_NOSPACE;
- return(i);
- case ('z'):
- *d = DECO_NONE;
- if ('\\' == wp[i]) {
- *word = &wp[++i];
- return(i + a2roffdeco(&dd, word, sz));
- } else
- lim = 1;
- break;
- case ('o'):
- /* FALLTHROUGH */
- case ('w'):
- if ('\'' == wp[i++]) {
- term = '\'';
- break;
- }
- /* FALLTHROUGH */
- default:
- *d = DECO_SSPECIAL;
- i--;
- lim = 1;
- break;
- }
-
- assert(term || lim);
- *word = &wp[i];
-
- if (term) {
- j = i;
- while (wp[i] && wp[i] != term)
- i++;
- if ('\0' == wp[i]) {
- *d = DECO_NONE;
- return(i);
- }
-
- assert(i >= j);
- *sz = (size_t)(i - j);
-
- return(i + 1);
- }
-
- assert(lim > 0);
- *sz = (size_t)lim;
-
- for (j = 0; wp[i] && j < lim; j++)
- i++;
- if (j < lim)
- *d = DECO_NONE;
-
- return(i);
-}
-
/*
* Calculate the abstract widths and decimal positions of columns in a
* table. This routine allocates the columns structures then runs over
diff --git a/usr.bin/mandoc/out.h b/usr.bin/mandoc/out.h
index 0386a989d71..76f6bafe040 100644
--- a/usr.bin/mandoc/out.h
+++ b/usr.bin/mandoc/out.h
@@ -1,4 +1,4 @@
-/* $Id: out.h,v 1.10 2011/04/24 16:22:02 schwarze Exp $ */
+/* $Id: out.h,v 1.11 2011/05/29 21:22:18 schwarze Exp $ */
/*
* Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
*
@@ -31,27 +31,6 @@ enum roffscale {
SCALE_MAX
};
-enum roffdeco {
- DECO_NONE,
- DECO_NUMBERED, /* numbered character */
- DECO_SPECIAL, /* special character */
- DECO_SSPECIAL, /* single-char special */
- DECO_RESERVED, /* reserved word */
- DECO_BOLD, /* bold font */
- DECO_ITALIC, /* italic font */
- DECO_ROMAN, /* "normal" undecorated font */
- DECO_PREVIOUS, /* revert to previous font */
- DECO_NOSPACE, /* suppress spacing */
- DECO_FONT, /* font */
- DECO_FFONT, /* font family */
- DECO_MAX
-};
-
-enum chars {
- CHARS_ASCII, /* 7-bit ascii representation */
- CHARS_HTML /* unicode values */
-};
-
struct roffcol {
size_t width; /* width of cell */
size_t decimal; /* decimal position in cell */
@@ -85,18 +64,9 @@ __BEGIN_DECLS
while (/* CONSTCOND */ 0)
int a2roffsu(const char *, struct roffsu *, enum roffscale);
-int a2roffdeco(enum roffdeco *, const char **, size_t *);
void time2a(time_t, char *, size_t);
void tblcalc(struct rofftbl *tbl, const struct tbl_span *);
-void *chars_init(enum chars);
-const char *chars_num2char(const char *, size_t);
-const char *chars_spec2str(void *, const char *, size_t, size_t *);
-int chars_spec2cp(void *, const char *, size_t);
-const char *chars_res2str(void *, const char *, size_t, size_t *);
-int chars_res2cp(void *, const char *, size_t);
-void chars_free(void *);
-
__END_DECLS
#endif /*!OUT_H*/
diff --git a/usr.bin/mandoc/predefs.in b/usr.bin/mandoc/predefs.in
new file mode 100644
index 00000000000..6713bff1976
--- /dev/null
+++ b/usr.bin/mandoc/predefs.in
@@ -0,0 +1,65 @@
+/* $Id: predefs.in,v 1.1 2011/05/29 21:22:18 schwarze Exp $ */
+/*
+ * Copyright (c) 2011 Kristaps Dzonsons <kristaps@bsd.lv>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/*
+ * The predefined-string translation tables. Each corresponds to a
+ * predefined strings from (e.g.) tmac/mdoc/doc-nroff. The left-hand
+ * side corresponds to the input sequence (\*x, \*(xx and so on). The
+ * right-hand side is what's produced by libroff.
+ *
+ * XXX - C-escape strings!
+ * XXX - update PREDEF_MAX in roff.c if adding more!
+ */
+
+PREDEF("Am", "&")
+PREDEF("Ba", "|")
+PREDEF("Ge", "\\(>=")
+PREDEF("Gt", ">")
+PREDEF("If", "\\(if")
+PREDEF("Le", "\\(<=")
+PREDEF("Lq", "\\(lq")
+PREDEF("Lt", "<")
+PREDEF("Na", "NaN")
+PREDEF("Ne", "\\(!=")
+PREDEF("Pi", "\\(*p")
+PREDEF("Pm", "\\(+-")
+PREDEF("Rq", "\\(rq")
+PREDEF("left-bracket", "[")
+PREDEF("left-parenthesis", "(")
+PREDEF("lp", "(")
+PREDEF("left-singlequote", "\\(oq")
+PREDEF("q", "\\(dq")
+PREDEF("quote-left", "\\(oq")
+PREDEF("quote-right", "\\(cq")
+PREDEF("R", "\\(rg")
+PREDEF("right-bracket", "]")
+PREDEF("right-parenthesis", ")")
+PREDEF("rp", ")")
+PREDEF("right-singlequote", "\\(cq")
+PREDEF("Tm", "\\(tm")
+PREDEF("Px", "POSIX")
+PREDEF("Ai", "ANSI")
+PREDEF("\'", "\\\'")
+PREDEF("aa", "\\(aa")
+PREDEF("ga", "\\(ga")
+PREDEF("`", "\\`")
+PREDEF("lq", "\\(lq")
+PREDEF("rq", "\\(rq")
+PREDEF("ua", "\\(ua")
+PREDEF("va", "\\(va")
+PREDEF("<=", "\\(<=")
+PREDEF(">=", "\\(>=")
diff --git a/usr.bin/mandoc/read.c b/usr.bin/mandoc/read.c
index d3b837fe6d7..aa491c9fe34 100644
--- a/usr.bin/mandoc/read.c
+++ b/usr.bin/mandoc/read.c
@@ -1,4 +1,4 @@
-/* $Id: read.c,v 1.1 2011/04/24 16:22:02 schwarze Exp $ */
+/* $Id: read.c,v 1.2 2011/05/29 21:22:18 schwarze Exp $ */
/*
* Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
* Copyright (c) 2010, 2011 Ingo Schwarze <schwarze@openbsd.org>
@@ -134,7 +134,7 @@ static const char * const mandocerrs[MANDOCERR_MAX] = {
"tab in non-literal context",
"end of line whitespace",
"bad comment style",
- "unknown escape sequence",
+ "bad escape sequence",
"unterminated quoted string",
"generic error",
@@ -437,7 +437,7 @@ rerun:
/*
* If input parsers have not been allocated, do so now.
- * We keep these instanced betwen parsers, but set them
+ * We keep these instanced between parsers, but set them
* locally per parse routine since we can use different
* parsers with each one.
*/
diff --git a/usr.bin/mandoc/roff.c b/usr.bin/mandoc/roff.c
index 6cf1164ba45..39393ccadf0 100644
--- a/usr.bin/mandoc/roff.c
+++ b/usr.bin/mandoc/roff.c
@@ -1,4 +1,4 @@
-/* $Id: roff.c,v 1.36 2011/04/24 16:28:48 schwarze Exp $ */
+/* $Id: roff.c,v 1.37 2011/05/29 21:22:18 schwarze Exp $ */
/*
* Copyright (c) 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
* Copyright (c) 2010, 2011 Ingo Schwarze <schwarze@openbsd.org>
@@ -16,17 +16,15 @@
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
#include <assert.h>
-#include <errno.h>
#include <ctype.h>
-#include <limits.h>
#include <stdlib.h>
#include <string.h>
-#include <stdio.h>
#include "mandoc.h"
#include "libroff.h"
#include "libmandoc.h"
+/* Maximum number of nested if-else conditionals. */
#define RSTACK_MAX 128
enum rofft {
@@ -59,7 +57,7 @@ enum rofft {
ROFF_EQ,
ROFF_EN,
ROFF_cblock,
- ROFF_ccond, /* FIXME: remove this. */
+ ROFF_ccond,
ROFF_USERDEF,
ROFF_MAX
};
@@ -123,6 +121,14 @@ struct roffmac {
struct roffmac *next;
};
+struct predef {
+ const char *name; /* predefined input name */
+ const char *str; /* replacement symbol */
+};
+
+#define PREDEF(__name, __str) \
+ { (__name), (__str) },
+
static enum rofferr roff_block(ROFF_ARGS);
static enum rofferr roff_block_text(ROFF_ARGS);
static enum rofferr roff_block_sub(ROFF_ARGS);
@@ -140,7 +146,7 @@ static const char *roff_getstrn(const struct roff *,
static enum rofferr roff_line_ignore(ROFF_ARGS);
static enum rofferr roff_nr(ROFF_ARGS);
static int roff_res(struct roff *,
- char **, size_t *, int);
+ char **, size_t *, int, int);
static enum rofferr roff_rm(ROFF_ARGS);
static void roff_setstr(struct roff *,
const char *, const char *, int);
@@ -194,6 +200,12 @@ static struct roffmac roffs[ROFF_MAX] = {
{ NULL, roff_userdef, NULL, NULL, 0, NULL },
};
+/* Array of injected predefined strings. */
+#define PREDEFS_MAX 38
+static const struct predef predefs[PREDEFS_MAX] = {
+#include "predefs.in"
+};
+
static void roff_free1(struct roff *);
static enum rofft roff_hash_find(const char *, size_t);
static void roff_hash_init(void);
@@ -202,7 +214,6 @@ static void roffnode_push(struct roff *, enum rofft,
const char *, int, int);
static void roffnode_pop(struct roff *);
static enum rofft roff_parse(struct roff *, const char *, int *);
-static int roff_parse_nat(const char *, unsigned int *);
/* See roff_hash_find() */
#define ROFF_HASH(p) (p[0] - ASCII_LO)
@@ -228,7 +239,6 @@ roff_hash_init(void)
}
}
-
/*
* Look up a roff token by its name. Returns ROFF_MAX if no macro by
* the nil-terminated string name could be found.
@@ -351,6 +361,7 @@ struct roff *
roff_alloc(struct regset *regs, struct mparse *parse)
{
struct roff *r;
+ int i;
r = mandoc_calloc(1, sizeof(struct roff));
r->regs = regs;
@@ -358,6 +369,10 @@ roff_alloc(struct regset *regs, struct mparse *parse)
r->rstackpos = -1;
roff_hash_init();
+
+ for (i = 0; i < PREDEFS_MAX; i++)
+ roff_setstr(r, predefs[i].name, predefs[i].str, 0);
+
return(r);
}
@@ -368,7 +383,7 @@ roff_alloc(struct regset *regs, struct mparse *parse)
* is processed.
*/
static int
-roff_res(struct roff *r, char **bufp, size_t *szp, int pos)
+roff_res(struct roff *r, char **bufp, size_t *szp, int ln, int pos)
{
const char *stesc; /* start of an escape sequence ('\\') */
const char *stnam; /* start of the name, after "[(*" */
@@ -435,8 +450,9 @@ roff_res(struct roff *r, char **bufp, size_t *szp, int pos)
res = roff_getstrn(r, stnam, (size_t)i);
if (NULL == res) {
- cp -= maxl ? 1 : 0;
- continue;
+ /* TODO: keep track of the correct position. */
+ mandoc_msg(MANDOCERR_BADESCAPE, r->parse, ln, pos, NULL);
+ res = "";
}
/* Replace the escape sequence by the string. */
@@ -472,7 +488,7 @@ roff_parseln(struct roff *r, int ln, char **bufp,
* words to fill in.
*/
- if (r->first_string && ! roff_res(r, bufp, szp, pos))
+ if (r->first_string && ! roff_res(r, bufp, szp, ln, pos))
return(ROFF_REPARSE);
ppos = pos;
@@ -589,27 +605,6 @@ roff_parse(struct roff *r, const char *buf, int *pos)
return(t);
}
-
-static int
-roff_parse_nat(const char *buf, unsigned int *res)
-{
- char *ep;
- long lval;
-
- errno = 0;
- lval = strtol(buf, &ep, 10);
- if (buf[0] == '\0' || *ep != '\0')
- return(0);
- if ((errno == ERANGE &&
- (lval == LONG_MAX || lval == LONG_MIN)) ||
- (lval > INT_MAX || lval < 0))
- return(0);
-
- *res = (unsigned int)lval;
- return(1);
-}
-
-
/* ARGSUSED */
static enum rofferr
roff_cblock(ROFF_ARGS)
@@ -861,21 +856,29 @@ roff_cond_sub(ROFF_ARGS)
{
enum rofft t;
enum roffrule rr;
+ char *ep;
rr = r->last->rule;
+ roffnode_cleanscope(r);
- /*
- * Clean out scope. If we've closed ourselves, then don't
- * continue.
+ /*
+ * If the macro is unknown, first check if it contains a closing
+ * delimiter `\}'. If it does, close out our scope and return
+ * the currently-scoped rule (ignore or continue). Else, drop
+ * into the currently-scoped rule.
*/
- roffnode_cleanscope(r);
-
if (ROFF_MAX == (t = roff_parse(r, *bufp, &pos))) {
- if ('\\' == (*bufp)[pos] && '}' == (*bufp)[pos + 1])
- return(roff_ccond
- (r, ROFF_ccond, bufp, szp,
- ln, pos, pos + 2, offs));
+ ep = &(*bufp)[pos];
+ for ( ; NULL != (ep = strchr(ep, '\\')); ep++) {
+ ep++;
+ if ('}' != *ep)
+ continue;
+ *ep = '&';
+ roff_ccond(r, ROFF_ccond, bufp, szp,
+ ln, pos, pos + 2, offs);
+ break;
+ }
return(ROFFRULE_DENY == rr ? ROFF_IGN : ROFF_CONT);
}
@@ -884,6 +887,7 @@ roff_cond_sub(ROFF_ARGS)
* if they're either structurally required (such as loops and
* conditionals) or a closing macro.
*/
+
if (ROFFRULE_DENY == rr)
if ( ! (ROFFMAC_STRUCT & roffs[t].flags))
if (ROFF_ccond != t)
@@ -894,37 +898,28 @@ roff_cond_sub(ROFF_ARGS)
ln, ppos, pos, offs));
}
-
/* ARGSUSED */
static enum rofferr
roff_cond_text(ROFF_ARGS)
{
- char *ep, *st;
+ char *ep;
enum roffrule rr;
rr = r->last->rule;
+ roffnode_cleanscope(r);
- /*
- * We display the value of the text if out current evaluation
- * scope permits us to do so.
- */
-
- /* FIXME: use roff_ccond? */
-
- st = &(*bufp)[pos];
- if (NULL == (ep = strstr(st, "\\}"))) {
- roffnode_cleanscope(r);
- return(ROFFRULE_DENY == rr ? ROFF_IGN : ROFF_CONT);
+ ep = &(*bufp)[pos];
+ for ( ; NULL != (ep = strchr(ep, '\\')); ep++) {
+ ep++;
+ if ('}' != *ep)
+ continue;
+ *ep = '&';
+ roff_ccond(r, ROFF_ccond, bufp, szp,
+ ln, pos, pos + 2, offs);
}
-
- if (ep == st || (ep > st && '\\' != *(ep - 1)))
- roffnode_pop(r);
-
- roffnode_cleanscope(r);
return(ROFFRULE_DENY == rr ? ROFF_IGN : ROFF_CONT);
}
-
static enum roffrule
roff_evalcond(const char *v, int *pos)
{
@@ -1086,6 +1081,7 @@ roff_nr(ROFF_ARGS)
{
const char *key;
char *val;
+ int iv;
struct reg *rg;
val = *bufp + pos;
@@ -1094,8 +1090,10 @@ roff_nr(ROFF_ARGS)
if (0 == strcmp(key, "nS")) {
rg[(int)REG_nS].set = 1;
- if ( ! roff_parse_nat(val, &rg[(int)REG_nS].v.u))
- rg[(int)REG_nS].v.u = 0;
+ if ((iv = mandoc_strntou(val, strlen(val), 10)) >= 0)
+ rg[REG_nS].v.u = (unsigned)iv;
+ else
+ rg[(int)REG_nS].v.u = 0u;
}
return(ROFF_IGN);
diff --git a/usr.bin/mandoc/tbl_layout.c b/usr.bin/mandoc/tbl_layout.c
index 85efa0ee561..2d1989fa4e8 100644
--- a/usr.bin/mandoc/tbl_layout.c
+++ b/usr.bin/mandoc/tbl_layout.c
@@ -1,4 +1,4 @@
-/* $Id: tbl_layout.c,v 1.8 2011/04/24 16:22:02 schwarze Exp $ */
+/* $Id: tbl_layout.c,v 1.9 2011/05/29 21:22:18 schwarze Exp $ */
/*
* Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
*
@@ -68,6 +68,23 @@ mods(struct tbl_node *tbl, struct tbl_cell *cp,
char buf[5];
int i;
+ /* Not all types accept modifiers. */
+
+ switch (cp->pos) {
+ case (TBL_CELL_DOWN):
+ /* FALLTHROUGH */
+ case (TBL_CELL_HORIZ):
+ /* FALLTHROUGH */
+ case (TBL_CELL_DHORIZ):
+ /* FALLTHROUGH */
+ case (TBL_CELL_VERT):
+ /* FALLTHROUGH */
+ case (TBL_CELL_DVERT):
+ return(1);
+ default:
+ break;
+ }
+
mod:
/*
* XXX: since, at least for now, modifiers are non-conflicting
@@ -423,19 +440,19 @@ cell_alloc(struct tbl_node *tbl, struct tbl_row *rp, enum tbl_cellt pos)
}
static void
-head_adjust(const struct tbl_cell *cell, struct tbl_head *head)
+head_adjust(const struct tbl_cell *cellp, struct tbl_head *head)
{
- if (TBL_CELL_VERT != cell->pos &&
- TBL_CELL_DVERT != cell->pos) {
+ if (TBL_CELL_VERT != cellp->pos &&
+ TBL_CELL_DVERT != cellp->pos) {
head->pos = TBL_HEAD_DATA;
return;
}
- if (TBL_CELL_VERT == cell->pos)
+ if (TBL_CELL_VERT == cellp->pos)
if (TBL_HEAD_DVERT != head->pos)
head->pos = TBL_HEAD_VERT;
- if (TBL_CELL_DVERT == cell->pos)
+ if (TBL_CELL_DVERT == cellp->pos)
head->pos = TBL_HEAD_DVERT;
}
diff --git a/usr.bin/mandoc/term.c b/usr.bin/mandoc/term.c
index e89f927c9a6..863e0a7a602 100644
--- a/usr.bin/mandoc/term.c
+++ b/usr.bin/mandoc/term.c
@@ -1,6 +1,6 @@
-/* $Id: term.c,v 1.58 2011/04/24 16:22:02 schwarze Exp $ */
+/* $Id: term.c,v 1.59 2011/05/29 21:22:18 schwarze Exp $ */
/*
- * Copyright (c) 2008, 2009, 2010 Kristaps Dzonsons <kristaps@bsd.lv>
+ * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
* Copyright (c) 2010, 2011 Ingo Schwarze <schwarze@openbsd.org>
*
* Permission to use, copy, modify, and distribute this software for any
@@ -29,13 +29,10 @@
#include "term.h"
#include "main.h"
-static void spec(struct termp *, enum roffdeco,
- const char *, size_t);
-static void res(struct termp *, const char *, size_t);
-static void bufferc(struct termp *, char);
-static void adjbuf(struct termp *p, size_t);
-static void encode(struct termp *, const char *, size_t);
-
+static void adjbuf(struct termp *p, int);
+static void bufferc(struct termp *, char);
+static void encode(struct termp *, const char *, size_t);
+static void encode1(struct termp *, int);
void
term_free(struct termp *p)
@@ -44,7 +41,7 @@ term_free(struct termp *p)
if (p->buf)
free(p->buf);
if (p->symtab)
- chars_free(p->symtab);
+ mchars_free(p->symtab);
free(p);
}
@@ -69,18 +66,6 @@ term_end(struct termp *p)
(*p->end)(p);
}
-
-struct termp *
-term_alloc(enum termenc enc)
-{
- struct termp *p;
-
- p = mandoc_calloc(1, sizeof(struct termp));
- p->enc = enc;
- return(p);
-}
-
-
/*
* Flush a line of text. A "line" is loosely defined as being something
* that should be followed by a newline, regardless of whether it's
@@ -152,12 +137,12 @@ term_flushln(struct termp *p)
vis = vend = 0;
i = 0;
- while (i < (int)p->col) {
+ while (i < p->col) {
/*
* Handle literal tab characters: collapse all
* subsequent tabs into a single huge set of spaces.
*/
- while (i < (int)p->col && '\t' == p->buf[i]) {
+ while (i < p->col && '\t' == p->buf[i]) {
vend = (vis / p->tabwidth + 1) * p->tabwidth;
vbl += vend - vis;
vis = vend;
@@ -171,7 +156,7 @@ term_flushln(struct termp *p)
* space is printed according to regular spacing rules).
*/
- for (j = i, jhy = 0; j < (int)p->col; j++) {
+ for (j = i, jhy = 0; j < p->col; j++) {
if ((j && ' ' == p->buf[j]) || '\t' == p->buf[j])
break;
@@ -214,7 +199,7 @@ term_flushln(struct termp *p)
}
/* Write out the [remaining] word. */
- for ( ; i < (int)p->col; i++) {
+ for ( ; i < p->col; i++) {
if (vend > bp && jhy > 0 && i > jhy)
break;
if ('\t' == p->buf[i])
@@ -341,44 +326,6 @@ term_vspace(struct termp *p)
(*p->endline)(p);
}
-
-static void
-numbered(struct termp *p, const char *word, size_t len)
-{
- const char *rhs;
-
- rhs = chars_num2char(word, len);
- if (rhs)
- encode(p, rhs, 1);
-}
-
-
-static void
-spec(struct termp *p, enum roffdeco d, const char *word, size_t len)
-{
- const char *rhs;
- size_t sz;
-
- rhs = chars_spec2str(p->symtab, word, len, &sz);
- if (rhs)
- encode(p, rhs, sz);
- else if (DECO_SSPECIAL == d)
- encode(p, word, len);
-}
-
-
-static void
-res(struct termp *p, const char *word, size_t len)
-{
- const char *rhs;
- size_t sz;
-
- rhs = chars_res2str(p->symtab, word, len, &sz);
- if (rhs)
- encode(p, rhs, sz);
-}
-
-
void
term_fontlast(struct termp *p)
{
@@ -443,7 +390,6 @@ term_fontpop(struct termp *p)
p->fonti--;
}
-
/*
* Handle pwords, partial words, which may be either a single word or a
* phrase that cannot be broken down (such as a literal string). This
@@ -452,9 +398,11 @@ term_fontpop(struct termp *p)
void
term_word(struct termp *p, const char *word)
{
- const char *seq;
+ const char *seq, *cp;
+ char c;
+ int sz, uc;
size_t ssz;
- enum roffdeco deco;
+ enum mandoc_esc esc;
if ( ! (TERMP_NOSPACE & p->flags)) {
if ( ! (TERMP_KEEP & p->flags)) {
@@ -474,7 +422,7 @@ term_word(struct termp *p, const char *word)
p->flags &= ~(TERMP_SENTENCE | TERMP_IGNDELIM);
- while (*word) {
+ while ('\0' != *word) {
if ((ssz = strcspn(word, "\\")) > 0)
encode(p, word, ssz);
@@ -482,45 +430,71 @@ term_word(struct termp *p, const char *word)
if ('\\' != *word)
continue;
- seq = ++word;
- word += a2roffdeco(&deco, &seq, &ssz);
+ word++;
+ esc = mandoc_escape(&word, &seq, &sz);
+ if (ESCAPE_ERROR == esc)
+ break;
+
+ if (TERMENC_ASCII != p->enc)
+ switch (esc) {
+ case (ESCAPE_UNICODE):
+ uc = mchars_num2uc(seq + 1, sz - 1);
+ if ('\0' == uc)
+ break;
+ encode1(p, uc);
+ continue;
+ case (ESCAPE_SPECIAL):
+ uc = mchars_spec2cp(p->symtab, seq, sz);
+ if (uc <= 0)
+ break;
+ encode1(p, uc);
+ continue;
+ default:
+ break;
+ }
- switch (deco) {
- case (DECO_NUMBERED):
- numbered(p, seq, ssz);
+ switch (esc) {
+ case (ESCAPE_UNICODE):
+ encode1(p, '?');
break;
- case (DECO_RESERVED):
- res(p, seq, ssz);
+ case (ESCAPE_NUMBERED):
+ c = mchars_num2char(seq, sz);
+ if ('\0' != c)
+ encode(p, &c, 1);
break;
- case (DECO_SPECIAL):
- /* FALLTHROUGH */
- case (DECO_SSPECIAL):
- spec(p, deco, seq, ssz);
+ case (ESCAPE_SPECIAL):
+ cp = mchars_spec2str(p->symtab, seq, sz, &ssz);
+ if (NULL != cp)
+ encode(p, cp, ssz);
+ else if (1 == ssz)
+ encode(p, seq, sz);
break;
- case (DECO_BOLD):
+ case (ESCAPE_FONTBOLD):
term_fontrepl(p, TERMFONT_BOLD);
break;
- case (DECO_ITALIC):
+ case (ESCAPE_FONTITALIC):
term_fontrepl(p, TERMFONT_UNDER);
break;
- case (DECO_ROMAN):
+ case (ESCAPE_FONT):
+ /* FALLTHROUGH */
+ case (ESCAPE_FONTROMAN):
term_fontrepl(p, TERMFONT_NONE);
break;
- case (DECO_PREVIOUS):
+ case (ESCAPE_FONTPREV):
term_fontlast(p);
break;
+ case (ESCAPE_NOSPACE):
+ if ('\0' == *word)
+ p->flags |= TERMP_NOSPACE;
+ break;
default:
break;
}
-
- if (DECO_NOSPACE == deco && '\0' == *word)
- p->flags |= TERMP_NOSPACE;
}
}
-
static void
-adjbuf(struct termp *p, size_t sz)
+adjbuf(struct termp *p, int sz)
{
if (0 == p->maxcols)
@@ -528,10 +502,10 @@ adjbuf(struct termp *p, size_t sz)
while (sz >= p->maxcols)
p->maxcols <<= 2;
- p->buf = mandoc_realloc(p->buf, p->maxcols);
+ p->buf = mandoc_realloc
+ (p->buf, sizeof(int) * (size_t)p->maxcols);
}
-
static void
bufferc(struct termp *p, char c)
{
@@ -539,15 +513,44 @@ bufferc(struct termp *p, char c)
if (p->col + 1 >= p->maxcols)
adjbuf(p, p->col + 1);
- p->buf[(int)p->col++] = c;
+ p->buf[p->col++] = c;
}
+/*
+ * See encode().
+ * Do this for a single (probably unicode) value.
+ * Does not check for non-decorated glyphs.
+ */
+static void
+encode1(struct termp *p, int c)
+{
+ enum termfont f;
+
+ if (p->col + 4 >= p->maxcols)
+ adjbuf(p, p->col + 4);
+
+ f = term_fonttop(p);
+
+ if (TERMFONT_NONE == f) {
+ p->buf[p->col++] = c;
+ return;
+ } else if (TERMFONT_UNDER == f) {
+ p->buf[p->col++] = '_';
+ } else
+ p->buf[p->col++] = c;
+
+ p->buf[p->col++] = 8;
+ p->buf[p->col++] = c;
+}
static void
encode(struct termp *p, const char *word, size_t sz)
{
enum termfont f;
- int i;
+ int i, len;
+
+ /* LINTED */
+ len = sz;
/*
* Encode and buffer a string of characters. If the current
@@ -556,35 +559,34 @@ encode(struct termp *p, const char *word, size_t sz)
*/
if (TERMFONT_NONE == (f = term_fonttop(p))) {
- if (p->col + sz >= p->maxcols)
- adjbuf(p, p->col + sz);
- memcpy(&p->buf[(int)p->col], word, sz);
- p->col += sz;
+ if (p->col + len >= p->maxcols)
+ adjbuf(p, p->col + len);
+ for (i = 0; i < len; i++)
+ p->buf[p->col++] = word[i];
return;
}
/* Pre-buffer, assuming worst-case. */
- if (p->col + 1 + (sz * 3) >= p->maxcols)
- adjbuf(p, p->col + 1 + (sz * 3));
+ if (p->col + 1 + (len * 3) >= p->maxcols)
+ adjbuf(p, p->col + 1 + (len * 3));
- for (i = 0; i < (int)sz; i++) {
- if ( ! isgraph((u_char)word[i])) {
- p->buf[(int)p->col++] = word[i];
+ for (i = 0; i < len; i++) {
+ if ( ! isgraph((unsigned char)word[i])) {
+ p->buf[p->col++] = word[i];
continue;
}
if (TERMFONT_UNDER == f)
- p->buf[(int)p->col++] = '_';
+ p->buf[p->col++] = '_';
else
- p->buf[(int)p->col++] = word[i];
+ p->buf[p->col++] = word[i];
- p->buf[(int)p->col++] = 8;
- p->buf[(int)p->col++] = word[i];
+ p->buf[p->col++] = 8;
+ p->buf[p->col++] = word[i];
}
}
-
size_t
term_len(const struct termp *p, size_t sz)
{
@@ -596,59 +598,99 @@ term_len(const struct termp *p, size_t sz)
size_t
term_strlen(const struct termp *p, const char *cp)
{
- size_t sz, ssz, rsz, i;
- enum roffdeco d;
+ size_t sz, rsz, i;
+ int ssz, c;
const char *seq, *rhs;
+ enum mandoc_esc esc;
+ static const char rej[] = { '\\', ASCII_HYPH, ASCII_NBRSP, '\0' };
- for (sz = 0; '\0' != *cp; )
- /*
- * Account for escaped sequences within string length
- * calculations. This follows the logic in term_word()
- * as we must calculate the width of produced strings.
- */
- if ('\\' == *cp) {
- seq = ++cp;
- cp += a2roffdeco(&d, &seq, &ssz);
+ /*
+ * Account for escaped sequences within string length
+ * calculations. This follows the logic in term_word() as we
+ * must calculate the width of produced strings.
+ */
- switch (d) {
- case (DECO_RESERVED):
- rhs = chars_res2str
- (p->symtab, seq, ssz, &rsz);
+ sz = 0;
+ while ('\0' != *cp) {
+ rsz = strcspn(cp, rej);
+ for (i = 0; i < rsz; i++)
+ sz += (*p->width)(p, *cp++);
+
+ c = 0;
+ switch (*cp) {
+ case ('\\'):
+ cp++;
+ esc = mandoc_escape(&cp, &seq, &ssz);
+ if (ESCAPE_ERROR == esc)
+ return(sz);
+
+ if (TERMENC_ASCII != p->enc)
+ switch (esc) {
+ case (ESCAPE_UNICODE):
+ c = mchars_num2uc
+ (seq + 1, ssz - 1);
+ if ('\0' == c)
+ break;
+ sz += (*p->width)(p, c);
+ continue;
+ case (ESCAPE_SPECIAL):
+ c = mchars_spec2cp
+ (p->symtab, seq, ssz);
+ if (c <= 0)
+ break;
+ sz += (*p->width)(p, c);
+ continue;
+ default:
+ break;
+ }
+
+ rhs = NULL;
+
+ switch (esc) {
+ case (ESCAPE_UNICODE):
+ sz += (*p->width)(p, '?');
break;
- case (DECO_SPECIAL):
- /* FALLTHROUGH */
- case (DECO_SSPECIAL):
- rhs = chars_spec2str
+ case (ESCAPE_NUMBERED):
+ c = mchars_num2char(seq, ssz);
+ if ('\0' != c)
+ sz += (*p->width)(p, c);
+ break;
+ case (ESCAPE_SPECIAL):
+ rhs = mchars_spec2str
(p->symtab, seq, ssz, &rsz);
- /* Allow for one-char escapes. */
- if (DECO_SSPECIAL != d || rhs)
+ if (ssz != 1 || rhs)
break;
rhs = seq;
rsz = ssz;
break;
default:
- rhs = NULL;
break;
}
- if (rhs)
- for (i = 0; i < rsz; i++)
- sz += (*p->width)(p, *rhs++);
- } else if (ASCII_NBRSP == *cp) {
+ if (NULL == rhs)
+ break;
+
+ for (i = 0; i < rsz; i++)
+ sz += (*p->width)(p, *rhs++);
+ break;
+ case (ASCII_NBRSP):
sz += (*p->width)(p, ' ');
cp++;
- } else if (ASCII_HYPH == *cp) {
+ break;
+ case (ASCII_HYPH):
sz += (*p->width)(p, '-');
cp++;
- } else
- sz += (*p->width)(p, *cp++);
+ break;
+ default:
+ break;
+ }
+ }
return(sz);
}
-
/* ARGSUSED */
size_t
term_vspan(const struct termp *p, const struct roffsu *su)
@@ -685,7 +727,6 @@ term_vspan(const struct termp *p, const struct roffsu *su)
r);
}
-
size_t
term_hspan(const struct termp *p, const struct roffsu *su)
{
diff --git a/usr.bin/mandoc/term.h b/usr.bin/mandoc/term.h
index 1dfeecf0e1f..30d9b70a9d5 100644
--- a/usr.bin/mandoc/term.h
+++ b/usr.bin/mandoc/term.h
@@ -1,4 +1,4 @@
-/* $Id: term.h,v 1.29 2011/01/09 14:30:48 schwarze Exp $ */
+/* $Id: term.h,v 1.30 2011/05/29 21:22:18 schwarze Exp $ */
/*
* Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
*
@@ -22,7 +22,9 @@ __BEGIN_DECLS
struct termp;
enum termenc {
- TERMENC_ASCII
+ TERMENC_ASCII,
+ TERMENC_LOCALE,
+ TERMENC_UTF8
};
enum termtype {
@@ -42,35 +44,6 @@ enum termfont {
typedef void (*term_margin)(struct termp *, const void *);
-struct termp_ps {
- int flags;
-#define PS_INLINE (1 << 0) /* we're in a word */
-#define PS_MARGINS (1 << 1) /* we're in the margins */
-#define PS_NEWPAGE (1 << 2) /* new page, no words yet */
- size_t pscol; /* visible column (AFM units) */
- size_t psrow; /* visible row (AFM units) */
- char *psmarg; /* margin buf */
- size_t psmargsz; /* margin buf size */
- size_t psmargcur; /* cur index in margin buf */
- char last; /* character buffer */
- enum termfont lastf; /* last set font */
- size_t scale; /* font scaling factor */
- size_t pages; /* number of pages shown */
- size_t lineheight; /* line height (AFM units) */
- size_t top; /* body top (AFM units) */
- size_t bottom; /* body bottom (AFM units) */
- size_t height; /* page height (AFM units */
- size_t width; /* page width (AFM units) */
- size_t left; /* body left (AFM units) */
- size_t header; /* header pos (AFM units) */
- size_t footer; /* footer pos (AFM units) */
- size_t pdfbytes; /* current output byte */
- size_t pdflastpg; /* byte of last page mark */
- size_t pdfbody; /* start of body object */
- size_t *pdfobjs; /* table of object offsets */
- size_t pdfobjsz; /* size of pdfobjs */
-};
-
struct termp_tbl {
int width; /* width in fixed chars */
int decimal; /* decimal point position */
@@ -82,10 +55,10 @@ struct termp {
size_t defrmargin; /* Right margin of the device. */
size_t rmargin; /* Current right margin. */
size_t maxrmargin; /* Max right margin. */
- size_t maxcols; /* Max size of buf. */
+ int maxcols; /* Max size of buf. */
size_t offset; /* Margin offest. */
size_t tabwidth; /* Distance of tab positions. */
- size_t col; /* Bytes in buf. */
+ int col; /* Bytes in buf. */
size_t viscol; /* Chars on current line. */
int overstep; /* See termp_flushln(). */
int flags;
@@ -103,29 +76,26 @@ struct termp {
#define TERMP_ANPREC (1 << 13) /* See termp_an_pre(). */
#define TERMP_KEEP (1 << 14) /* Keep words together. */
#define TERMP_PREKEEP (1 << 15) /* ...starting with the next one. */
- char *buf; /* Output buffer. */
+ int *buf; /* Output buffer. */
enum termenc enc; /* Type of encoding. */
- void *symtab; /* Encoded-symbol table. */
+ struct mchars *symtab; /* Encoded-symbol table. */
enum termfont fontl; /* Last font set. */
enum termfont fontq[10]; /* Symmetric fonts. */
int fonti; /* Index of font stack. */
term_margin headf; /* invoked to print head */
term_margin footf; /* invoked to print foot */
- void (*letter)(struct termp *, char);
+ void (*letter)(struct termp *, int);
void (*begin)(struct termp *);
void (*end)(struct termp *);
void (*endline)(struct termp *);
void (*advance)(struct termp *, size_t);
- size_t (*width)(const struct termp *, char);
+ size_t (*width)(const struct termp *, int);
double (*hspan)(const struct termp *,
const struct roffsu *);
const void *argf; /* arg for headf/footf */
- union {
- struct termp_ps ps;
- } engine;
+ struct termp_ps *ps;
};
-struct termp *term_alloc(enum termenc);
void term_tbl(struct termp *, const struct tbl_span *);
void term_free(struct termp *);
void term_newln(struct termp *);
diff --git a/usr.bin/mandoc/term_ascii.c b/usr.bin/mandoc/term_ascii.c
index 5462ec5e5f7..7d70dc4a86a 100644
--- a/usr.bin/mandoc/term_ascii.c
+++ b/usr.bin/mandoc/term_ascii.c
@@ -1,6 +1,6 @@
-/* $Id: term_ascii.c,v 1.5 2011/01/31 02:36:55 schwarze Exp $ */
+/* $Id: term_ascii.c,v 1.6 2011/05/29 21:22:18 schwarze Exp $ */
/*
- * Copyright (c) 2010 Kristaps Dzonsons <kristaps@bsd.lv>
+ * Copyright (c) 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
@@ -17,47 +17,70 @@
#include <sys/types.h>
#include <assert.h>
+#include <locale.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
+#include <wchar.h>
#include "mandoc.h"
#include "out.h"
#include "term.h"
#include "main.h"
+static struct termp *ascii_init(enum termenc, char *);
static double ascii_hspan(const struct termp *,
const struct roffsu *);
-static size_t ascii_width(const struct termp *, char);
+static size_t ascii_width(const struct termp *, int);
static void ascii_advance(struct termp *, size_t);
static void ascii_begin(struct termp *);
static void ascii_end(struct termp *);
static void ascii_endline(struct termp *);
-static void ascii_letter(struct termp *, char);
+static void ascii_letter(struct termp *, int);
+static void locale_advance(struct termp *, size_t);
+static void locale_endline(struct termp *);
+static void locale_letter(struct termp *, int);
+static size_t locale_width(const struct termp *, int);
-void *
-ascii_alloc(char *outopts)
+static struct termp *
+ascii_init(enum termenc enc, char *outopts)
{
- struct termp *p;
const char *toks[2];
char *v;
+ struct termp *p;
- p = term_alloc(TERMENC_ASCII);
+ p = mandoc_calloc(1, sizeof(struct termp));
+ p->enc = enc;
p->tabwidth = 5;
p->defrmargin = 78;
- p->advance = ascii_advance;
p->begin = ascii_begin;
p->end = ascii_end;
- p->endline = ascii_endline;
p->hspan = ascii_hspan;
- p->letter = ascii_letter;
p->type = TERMTYPE_CHAR;
+
+ p->enc = TERMENC_ASCII;
+ p->advance = ascii_advance;
+ p->endline = ascii_endline;
+ p->letter = ascii_letter;
p->width = ascii_width;
+ if (TERMENC_ASCII != enc) {
+ v = TERMENC_LOCALE == enc ?
+ setlocale(LC_ALL, "") :
+ setlocale(LC_CTYPE, "UTF-8");
+ if (NULL != v && MB_CUR_MAX > 1) {
+ p->enc = enc;
+ p->advance = locale_advance;
+ p->endline = locale_endline;
+ p->letter = locale_letter;
+ p->width = locale_width;
+ }
+ }
+
toks[0] = "width";
toks[1] = NULL;
@@ -77,16 +100,36 @@ ascii_alloc(char *outopts)
return(p);
}
+void *
+ascii_alloc(char *outopts)
+{
+
+ return(ascii_init(TERMENC_ASCII, outopts));
+}
+
+void *
+utf8_alloc(char *outopts)
+{
+
+ return(ascii_init(TERMENC_UTF8, outopts));
+}
+
+
+void *
+locale_alloc(char *outopts)
+{
+
+ return(ascii_init(TERMENC_LOCALE, outopts));
+}
/* ARGSUSED */
static size_t
-ascii_width(const struct termp *p, char c)
+ascii_width(const struct termp *p, int c)
{
return(1);
}
-
void
ascii_free(void *arg)
{
@@ -94,16 +137,14 @@ ascii_free(void *arg)
term_free((struct termp *)arg);
}
-
/* ARGSUSED */
static void
-ascii_letter(struct termp *p, char c)
+ascii_letter(struct termp *p, int c)
{
putchar(c);
}
-
static void
ascii_begin(struct termp *p)
{
@@ -111,7 +152,6 @@ ascii_begin(struct termp *p)
(*p->headf)(p, p->argf);
}
-
static void
ascii_end(struct termp *p)
{
@@ -119,7 +159,6 @@ ascii_end(struct termp *p)
(*p->footf)(p, p->argf);
}
-
/* ARGSUSED */
static void
ascii_endline(struct termp *p)
@@ -128,19 +167,16 @@ ascii_endline(struct termp *p)
putchar('\n');
}
-
/* ARGSUSED */
static void
ascii_advance(struct termp *p, size_t len)
{
size_t i;
- /* Just print whitespace on the terminal. */
for (i = 0; i < len; i++)
putchar(' ');
}
-
/* ARGSUSED */
static double
ascii_hspan(const struct termp *p, const struct roffsu *su)
@@ -179,3 +215,37 @@ ascii_hspan(const struct termp *p, const struct roffsu *su)
return(r);
}
+/* ARGSUSED */
+static size_t
+locale_width(const struct termp *p, int c)
+{
+ int rc;
+
+ return((rc = wcwidth(c)) < 0 ? 0 : rc);
+}
+
+/* ARGSUSED */
+static void
+locale_advance(struct termp *p, size_t len)
+{
+ size_t i;
+
+ for (i = 0; i < len; i++)
+ putwchar(L' ');
+}
+
+/* ARGSUSED */
+static void
+locale_endline(struct termp *p)
+{
+
+ putwchar(L'\n');
+}
+
+/* ARGSUSED */
+static void
+locale_letter(struct termp *p, int c)
+{
+
+ putwchar(c);
+}
diff --git a/usr.bin/mandoc/term_ps.c b/usr.bin/mandoc/term_ps.c
index 512c602ffe0..761dc1b0423 100644
--- a/usr.bin/mandoc/term_ps.c
+++ b/usr.bin/mandoc/term_ps.c
@@ -1,6 +1,6 @@
-/* $Id: term_ps.c,v 1.16 2011/04/21 22:59:54 schwarze Exp $ */
+/* $Id: term_ps.c,v 1.17 2011/05/29 21:22:18 schwarze Exp $ */
/*
- * Copyright (c) 2010 Kristaps Dzonsons <kristaps@bsd.lv>
+ * Copyright (c) 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
@@ -34,13 +34,16 @@
#include "main.h"
#include "term.h"
+/* These work the buffer used by the header and footer. */
+#define PS_BUFSLOP 128
+
/* Convert PostScript point "x" to an AFM unit. */
#define PNT2AFM(p, x) /* LINTED */ \
- (size_t)((double)(x) * (1000.0 / (double)(p)->engine.ps.scale))
+ (size_t)((double)(x) * (1000.0 / (double)(p)->ps->scale))
/* Convert an AFM unit "x" to a PostScript points */
#define AFM2PNT(p, x) /* LINTED */ \
- ((double)(x) / (1000.0 / (double)(p)->engine.ps.scale))
+ ((double)(x) / (1000.0 / (double)(p)->ps->scale))
struct glyph {
unsigned short wx; /* WX in AFM */
@@ -52,6 +55,54 @@ struct font {
struct glyph gly[MAXCHAR]; /* glyph metrics */
};
+struct termp_ps {
+ int flags;
+#define PS_INLINE (1 << 0) /* we're in a word */
+#define PS_MARGINS (1 << 1) /* we're in the margins */
+#define PS_NEWPAGE (1 << 2) /* new page, no words yet */
+ size_t pscol; /* visible column (AFM units) */
+ size_t psrow; /* visible row (AFM units) */
+ char *psmarg; /* margin buf */
+ size_t psmargsz; /* margin buf size */
+ size_t psmargcur; /* cur index in margin buf */
+ char last; /* character buffer */
+ enum termfont lastf; /* last set font */
+ size_t scale; /* font scaling factor */
+ size_t pages; /* number of pages shown */
+ size_t lineheight; /* line height (AFM units) */
+ size_t top; /* body top (AFM units) */
+ size_t bottom; /* body bottom (AFM units) */
+ size_t height; /* page height (AFM units */
+ size_t width; /* page width (AFM units) */
+ size_t left; /* body left (AFM units) */
+ size_t header; /* header pos (AFM units) */
+ size_t footer; /* footer pos (AFM units) */
+ size_t pdfbytes; /* current output byte */
+ size_t pdflastpg; /* byte of last page mark */
+ size_t pdfbody; /* start of body object */
+ size_t *pdfobjs; /* table of object offsets */
+ size_t pdfobjsz; /* size of pdfobjs */
+};
+
+static double ps_hspan(const struct termp *,
+ const struct roffsu *);
+static size_t ps_width(const struct termp *, int);
+static void ps_advance(struct termp *, size_t);
+static void ps_begin(struct termp *);
+static void ps_closepage(struct termp *);
+static void ps_end(struct termp *);
+static void ps_endline(struct termp *);
+static void ps_fclose(struct termp *);
+static void ps_growbuf(struct termp *, size_t);
+static void ps_letter(struct termp *, int);
+static void ps_pclose(struct termp *);
+static void ps_pletter(struct termp *, int);
+static void ps_printf(struct termp *, const char *, ...);
+static void ps_putchar(struct termp *, char);
+static void ps_setfont(struct termp *, enum termfont);
+static struct termp *pspdf_alloc(char *);
+static void pdf_obj(struct termp *, size_t);
+
/*
* We define, for the time being, three fonts: bold, oblique/italic, and
* normal (roman). The following table hard-codes the font metrics for
@@ -352,44 +403,6 @@ static const struct font fonts[TERMFONT__MAX] = {
} },
};
-/* These work the buffer used by the header and footer. */
-#define PS_BUFSLOP 128
-
-static void
-ps_growbuf(struct termp *p, size_t sz)
-{
- if (p->engine.ps.psmargcur + sz <= p->engine.ps.psmargsz)
- return;
-
- if (sz < PS_BUFSLOP)
- sz = PS_BUFSLOP;
-
- p->engine.ps.psmargsz += sz;
-
- p->engine.ps.psmarg = mandoc_realloc
- (p->engine.ps.psmarg,
- p->engine.ps.psmargsz);
-}
-
-static double ps_hspan(const struct termp *,
- const struct roffsu *);
-static size_t ps_width(const struct termp *, char);
-static void ps_advance(struct termp *, size_t);
-static void ps_begin(struct termp *);
-static void ps_closepage(struct termp *);
-static void ps_end(struct termp *);
-static void ps_endline(struct termp *);
-static void ps_fclose(struct termp *);
-static void ps_letter(struct termp *, char);
-static void ps_pclose(struct termp *);
-static void ps_pletter(struct termp *, int);
-static void ps_printf(struct termp *, const char *, ...);
-static void ps_putchar(struct termp *, char);
-static void ps_setfont(struct termp *, enum termfont);
-static struct termp *pspdf_alloc(char *);
-static void pdf_obj(struct termp *, size_t);
-
-
void *
pdf_alloc(char *outopts)
{
@@ -401,7 +414,6 @@ pdf_alloc(char *outopts)
return(p);
}
-
void *
ps_alloc(char *outopts)
{
@@ -413,7 +425,6 @@ ps_alloc(char *outopts)
return(p);
}
-
static struct termp *
pspdf_alloc(char *outopts)
{
@@ -423,7 +434,9 @@ pspdf_alloc(char *outopts)
const char *pp;
char *v;
- p = term_alloc(TERMENC_ASCII);
+ p = mandoc_calloc(1, sizeof(struct termp));
+ p->enc = TERMENC_ASCII;
+ p->ps = mandoc_calloc(1, sizeof(struct termp_ps));
p->advance = ps_advance;
p->begin = ps_begin;
@@ -482,7 +495,7 @@ pspdf_alloc(char *outopts)
* calculations occur.
*/
- p->engine.ps.scale = 11;
+ p->ps->scale = 11;
/* Remember millimetres -> AFM units. */
@@ -498,16 +511,16 @@ pspdf_alloc(char *outopts)
/* Line-height is 1.4em. */
- lineheight = PNT2AFM(p, ((double)p->engine.ps.scale * 1.4));
+ lineheight = PNT2AFM(p, ((double)p->ps->scale * 1.4));
- p->engine.ps.width = pagex;
- p->engine.ps.height = pagey;
- p->engine.ps.header = pagey - (marginy / 2) - (lineheight / 2);
- p->engine.ps.top = pagey - marginy;
- p->engine.ps.footer = (marginy / 2) - (lineheight / 2);
- p->engine.ps.bottom = marginy;
- p->engine.ps.left = marginx;
- p->engine.ps.lineheight = lineheight;
+ p->ps->width = pagex;
+ p->ps->height = pagey;
+ p->ps->header = pagey - (marginy / 2) - (lineheight / 2);
+ p->ps->top = pagey - marginy;
+ p->ps->footer = (marginy / 2) - (lineheight / 2);
+ p->ps->bottom = marginy;
+ p->ps->left = marginx;
+ p->ps->lineheight = lineheight;
p->defrmargin = pagex - (marginx * 2);
return(p);
@@ -521,11 +534,12 @@ pspdf_free(void *arg)
p = (struct termp *)arg;
- if (p->engine.ps.psmarg)
- free(p->engine.ps.psmarg);
- if (p->engine.ps.pdfobjs)
- free(p->engine.ps.pdfobjs);
+ if (p->ps->psmarg)
+ free(p->ps->psmarg);
+ if (p->ps->pdfobjs)
+ free(p->ps->pdfobjs);
+ free(p->ps);
term_free(p);
}
@@ -544,10 +558,10 @@ ps_printf(struct termp *p, const char *fmt, ...)
* into our growable margin buffer.
*/
- if ( ! (PS_MARGINS & p->engine.ps.flags)) {
+ if ( ! (PS_MARGINS & p->ps->flags)) {
len = vprintf(fmt, ap);
va_end(ap);
- p->engine.ps.pdfbytes += /* LINTED */
+ p->ps->pdfbytes += /* LINTED */
len < 0 ? 0 : (size_t)len;
return;
}
@@ -560,12 +574,12 @@ ps_printf(struct termp *p, const char *fmt, ...)
ps_growbuf(p, PS_BUFSLOP);
- pos = (int)p->engine.ps.psmargcur;
- len = vsnprintf(&p->engine.ps.psmarg[pos], PS_BUFSLOP, fmt, ap);
+ pos = (int)p->ps->psmargcur;
+ len = vsnprintf(&p->ps->psmarg[pos], PS_BUFSLOP, fmt, ap);
va_end(ap);
- p->engine.ps.psmargcur = strlen(p->engine.ps.psmarg);
+ p->ps->psmargcur = strlen(p->ps->psmarg);
}
@@ -576,18 +590,18 @@ ps_putchar(struct termp *p, char c)
/* See ps_printf(). */
- if ( ! (PS_MARGINS & p->engine.ps.flags)) {
+ if ( ! (PS_MARGINS & p->ps->flags)) {
/* LINTED */
putchar(c);
- p->engine.ps.pdfbytes++;
+ p->ps->pdfbytes++;
return;
}
ps_growbuf(p, 2);
- pos = (int)p->engine.ps.psmargcur++;
- p->engine.ps.psmarg[pos++] = c;
- p->engine.ps.psmarg[pos] = '\0';
+ pos = (int)p->ps->psmargcur++;
+ p->ps->psmarg[pos++] = c;
+ p->ps->psmarg[pos] = '\0';
}
@@ -597,18 +611,18 @@ pdf_obj(struct termp *p, size_t obj)
assert(obj > 0);
- if ((obj - 1) >= p->engine.ps.pdfobjsz) {
- p->engine.ps.pdfobjsz = obj + 128;
- p->engine.ps.pdfobjs = realloc
- (p->engine.ps.pdfobjs,
- p->engine.ps.pdfobjsz * sizeof(size_t));
- if (NULL == p->engine.ps.pdfobjs) {
+ if ((obj - 1) >= p->ps->pdfobjsz) {
+ p->ps->pdfobjsz = obj + 128;
+ p->ps->pdfobjs = realloc
+ (p->ps->pdfobjs,
+ p->ps->pdfobjsz * sizeof(size_t));
+ if (NULL == p->ps->pdfobjs) {
perror(NULL);
exit((int)MANDOCLEVEL_SYSERR);
}
}
- p->engine.ps.pdfobjs[(int)obj - 1] = p->engine.ps.pdfbytes;
+ p->ps->pdfobjs[(int)obj - 1] = p->ps->pdfbytes;
ps_printf(p, "%zu 0 obj\n", obj);
}
@@ -626,14 +640,14 @@ ps_closepage(struct termp *p)
* for the page contents.
*/
- assert(p->engine.ps.psmarg && p->engine.ps.psmarg[0]);
- ps_printf(p, "%s", p->engine.ps.psmarg);
+ assert(p->ps->psmarg && p->ps->psmarg[0]);
+ ps_printf(p, "%s", p->ps->psmarg);
if (TERMTYPE_PS != p->type) {
ps_printf(p, "ET\n");
- len = p->engine.ps.pdfbytes - p->engine.ps.pdflastpg;
- base = p->engine.ps.pages * 4 + p->engine.ps.pdfbody;
+ len = p->ps->pdfbytes - p->ps->pdflastpg;
+ base = p->ps->pages * 4 + p->ps->pdfbody;
ps_printf(p, "endstream\nendobj\n");
@@ -660,10 +674,10 @@ ps_closepage(struct termp *p)
} else
ps_printf(p, "showpage\n");
- p->engine.ps.pages++;
- p->engine.ps.psrow = p->engine.ps.top;
- assert( ! (PS_NEWPAGE & p->engine.ps.flags));
- p->engine.ps.flags |= PS_NEWPAGE;
+ p->ps->pages++;
+ p->ps->psrow = p->ps->top;
+ assert( ! (PS_NEWPAGE & p->ps->flags));
+ p->ps->flags |= PS_NEWPAGE;
}
@@ -679,15 +693,15 @@ ps_end(struct termp *p)
* well as just one.
*/
- if ( ! (PS_NEWPAGE & p->engine.ps.flags)) {
- assert(0 == p->engine.ps.flags);
- assert('\0' == p->engine.ps.last);
+ if ( ! (PS_NEWPAGE & p->ps->flags)) {
+ assert(0 == p->ps->flags);
+ assert('\0' == p->ps->last);
ps_closepage(p);
}
if (TERMTYPE_PS == p->type) {
ps_printf(p, "%%%%Trailer\n");
- ps_printf(p, "%%%%Pages: %zu\n", p->engine.ps.pages);
+ ps_printf(p, "%%%%Pages: %zu\n", p->ps->pages);
ps_printf(p, "%%%%EOF\n");
return;
}
@@ -695,18 +709,18 @@ ps_end(struct termp *p)
pdf_obj(p, 2);
ps_printf(p, "<<\n/Type /Pages\n");
ps_printf(p, "/MediaBox [0 0 %zu %zu]\n",
- (size_t)AFM2PNT(p, p->engine.ps.width),
- (size_t)AFM2PNT(p, p->engine.ps.height));
+ (size_t)AFM2PNT(p, p->ps->width),
+ (size_t)AFM2PNT(p, p->ps->height));
- ps_printf(p, "/Count %zu\n", p->engine.ps.pages);
+ ps_printf(p, "/Count %zu\n", p->ps->pages);
ps_printf(p, "/Kids [");
- for (i = 0; i < p->engine.ps.pages; i++)
+ for (i = 0; i < p->ps->pages; i++)
ps_printf(p, " %zu 0 R", i * 4 +
- p->engine.ps.pdfbody + 3);
+ p->ps->pdfbody + 3);
- base = (p->engine.ps.pages - 1) * 4 +
- p->engine.ps.pdfbody + 4;
+ base = (p->ps->pages - 1) * 4 +
+ p->ps->pdfbody + 4;
ps_printf(p, "]\n>>\nendobj\n");
pdf_obj(p, base);
@@ -714,14 +728,14 @@ ps_end(struct termp *p)
ps_printf(p, "/Type /Catalog\n");
ps_printf(p, "/Pages 2 0 R\n");
ps_printf(p, ">>\n");
- xref = p->engine.ps.pdfbytes;
+ xref = p->ps->pdfbytes;
ps_printf(p, "xref\n");
ps_printf(p, "0 %zu\n", base + 1);
ps_printf(p, "0000000000 65535 f \n");
for (i = 0; i < base; i++)
ps_printf(p, "%.10zu 00000 n \n",
- p->engine.ps.pdfobjs[(int)i]);
+ p->ps->pdfobjs[(int)i]);
ps_printf(p, "trailer\n");
ps_printf(p, "<<\n");
@@ -746,33 +760,33 @@ ps_begin(struct termp *p)
* screen yet, so we don't need to initialise the primary state.
*/
- if (p->engine.ps.psmarg) {
- assert(p->engine.ps.psmargsz);
- p->engine.ps.psmarg[0] = '\0';
+ if (p->ps->psmarg) {
+ assert(p->ps->psmargsz);
+ p->ps->psmarg[0] = '\0';
}
- /*p->engine.ps.pdfbytes = 0;*/
- p->engine.ps.psmargcur = 0;
- p->engine.ps.flags = PS_MARGINS;
- p->engine.ps.pscol = p->engine.ps.left;
- p->engine.ps.psrow = p->engine.ps.header;
+ /*p->ps->pdfbytes = 0;*/
+ p->ps->psmargcur = 0;
+ p->ps->flags = PS_MARGINS;
+ p->ps->pscol = p->ps->left;
+ p->ps->psrow = p->ps->header;
ps_setfont(p, TERMFONT_NONE);
(*p->headf)(p, p->argf);
(*p->endline)(p);
- p->engine.ps.pscol = p->engine.ps.left;
- p->engine.ps.psrow = p->engine.ps.footer;
+ p->ps->pscol = p->ps->left;
+ p->ps->psrow = p->ps->footer;
(*p->footf)(p, p->argf);
(*p->endline)(p);
- p->engine.ps.flags &= ~PS_MARGINS;
+ p->ps->flags &= ~PS_MARGINS;
- assert(0 == p->engine.ps.flags);
- assert(p->engine.ps.psmarg);
- assert('\0' != p->engine.ps.psmarg[0]);
+ assert(0 == p->ps->flags);
+ assert(p->ps->psmarg);
+ assert('\0' != p->ps->psmarg[0]);
/*
* Print header and initialise page state. Following this,
@@ -790,8 +804,8 @@ ps_begin(struct termp *p)
ps_printf(p, "%%%%PageOrder: Ascend\n");
ps_printf(p, "%%%%DocumentMedia: "
"Default %zu %zu 0 () ()\n",
- (size_t)AFM2PNT(p, p->engine.ps.width),
- (size_t)AFM2PNT(p, p->engine.ps.height));
+ (size_t)AFM2PNT(p, p->ps->width),
+ (size_t)AFM2PNT(p, p->ps->height));
ps_printf(p, "%%%%DocumentNeededResources: font");
for (i = 0; i < (int)TERMFONT__MAX; i++)
@@ -816,10 +830,10 @@ ps_begin(struct termp *p)
}
}
- p->engine.ps.pdfbody = (size_t)TERMFONT__MAX + 3;
- p->engine.ps.pscol = p->engine.ps.left;
- p->engine.ps.psrow = p->engine.ps.top;
- p->engine.ps.flags |= PS_NEWPAGE;
+ p->ps->pdfbody = (size_t)TERMFONT__MAX + 3;
+ p->ps->pscol = p->ps->left;
+ p->ps->psrow = p->ps->top;
+ p->ps->flags |= PS_NEWPAGE;
ps_setfont(p, TERMFONT_NONE);
}
@@ -834,25 +848,25 @@ ps_pletter(struct termp *p, int c)
* in a new page and make sure the font is correctly set.
*/
- if (PS_NEWPAGE & p->engine.ps.flags) {
+ if (PS_NEWPAGE & p->ps->flags) {
if (TERMTYPE_PS == p->type) {
ps_printf(p, "%%%%Page: %zu %zu\n",
- p->engine.ps.pages + 1,
- p->engine.ps.pages + 1);
+ p->ps->pages + 1,
+ p->ps->pages + 1);
ps_printf(p, "/%s %zu selectfont\n",
- fonts[(int)p->engine.ps.lastf].name,
- p->engine.ps.scale);
+ fonts[(int)p->ps->lastf].name,
+ p->ps->scale);
} else {
- pdf_obj(p, p->engine.ps.pdfbody +
- p->engine.ps.pages * 4);
+ pdf_obj(p, p->ps->pdfbody +
+ p->ps->pages * 4);
ps_printf(p, "<<\n");
ps_printf(p, "/Length %zu 0 R\n",
- p->engine.ps.pdfbody + 1 +
- p->engine.ps.pages * 4);
+ p->ps->pdfbody + 1 +
+ p->ps->pages * 4);
ps_printf(p, ">>\nstream\n");
}
- p->engine.ps.pdflastpg = p->engine.ps.pdfbytes;
- p->engine.ps.flags &= ~PS_NEWPAGE;
+ p->ps->pdflastpg = p->ps->pdfbytes;
+ p->ps->flags &= ~PS_NEWPAGE;
}
/*
@@ -860,22 +874,22 @@ ps_pletter(struct termp *p, int c)
* now at the current cursor.
*/
- if ( ! (PS_INLINE & p->engine.ps.flags)) {
+ if ( ! (PS_INLINE & p->ps->flags)) {
if (TERMTYPE_PS != p->type) {
ps_printf(p, "BT\n/F%d %zu Tf\n",
- (int)p->engine.ps.lastf,
- p->engine.ps.scale);
+ (int)p->ps->lastf,
+ p->ps->scale);
ps_printf(p, "%.3f %.3f Td\n(",
- AFM2PNT(p, p->engine.ps.pscol),
- AFM2PNT(p, p->engine.ps.psrow));
+ AFM2PNT(p, p->ps->pscol),
+ AFM2PNT(p, p->ps->psrow));
} else
ps_printf(p, "%.3f %.3f moveto\n(",
- AFM2PNT(p, p->engine.ps.pscol),
- AFM2PNT(p, p->engine.ps.psrow));
- p->engine.ps.flags |= PS_INLINE;
+ AFM2PNT(p, p->ps->pscol),
+ AFM2PNT(p, p->ps->psrow));
+ p->ps->flags |= PS_INLINE;
}
- assert( ! (PS_NEWPAGE & p->engine.ps.flags));
+ assert( ! (PS_NEWPAGE & p->ps->flags));
/*
* We need to escape these characters as per the PostScript
@@ -898,17 +912,17 @@ ps_pletter(struct termp *p, int c)
/* Write the character and adjust where we are on the page. */
- f = (int)p->engine.ps.lastf;
+ f = (int)p->ps->lastf;
if (c <= 32 || (c - 32 >= MAXCHAR)) {
ps_putchar(p, ' ');
- p->engine.ps.pscol += (size_t)fonts[f].gly[0].wx;
+ p->ps->pscol += (size_t)fonts[f].gly[0].wx;
return;
}
ps_putchar(p, (char)c);
c -= 32;
- p->engine.ps.pscol += (size_t)fonts[f].gly[c].wx;
+ p->ps->pscol += (size_t)fonts[f].gly[c].wx;
}
@@ -922,7 +936,7 @@ ps_pclose(struct termp *p)
* or anything).
*/
- if ( ! (PS_INLINE & p->engine.ps.flags))
+ if ( ! (PS_INLINE & p->ps->flags))
return;
if (TERMTYPE_PS != p->type) {
@@ -930,7 +944,7 @@ ps_pclose(struct termp *p)
} else
ps_printf(p, ") show\n");
- p->engine.ps.flags &= ~PS_INLINE;
+ p->ps->flags &= ~PS_INLINE;
}
@@ -946,16 +960,16 @@ ps_fclose(struct termp *p)
* Following this, close out any scope that's open.
*/
- if ('\0' != p->engine.ps.last) {
- if (p->engine.ps.lastf != TERMFONT_NONE) {
+ if ('\0' != p->ps->last) {
+ if (p->ps->lastf != TERMFONT_NONE) {
ps_pclose(p);
ps_setfont(p, TERMFONT_NONE);
}
- ps_pletter(p, p->engine.ps.last);
- p->engine.ps.last = '\0';
+ ps_pletter(p, p->ps->last);
+ p->ps->last = '\0';
}
- if ( ! (PS_INLINE & p->engine.ps.flags))
+ if ( ! (PS_INLINE & p->ps->flags))
return;
ps_pclose(p);
@@ -963,9 +977,12 @@ ps_fclose(struct termp *p)
static void
-ps_letter(struct termp *p, char c)
+ps_letter(struct termp *p, int arg)
{
- char cc;
+ char cc, c;
+
+ /* LINTED */
+ c = arg >= 128 || arg <= 0 ? '?' : arg;
/*
* State machine dictates whether to buffer the last character
@@ -976,33 +993,33 @@ ps_letter(struct termp *p, char c)
* regular character and a regular buffer character.
*/
- if ('\0' == p->engine.ps.last) {
+ if ('\0' == p->ps->last) {
assert(8 != c);
- p->engine.ps.last = c;
+ p->ps->last = c;
return;
- } else if (8 == p->engine.ps.last) {
+ } else if (8 == p->ps->last) {
assert(8 != c);
- p->engine.ps.last = '\0';
+ p->ps->last = '\0';
} else if (8 == c) {
- assert(8 != p->engine.ps.last);
- if ('_' == p->engine.ps.last) {
- if (p->engine.ps.lastf != TERMFONT_UNDER) {
+ assert(8 != p->ps->last);
+ if ('_' == p->ps->last) {
+ if (p->ps->lastf != TERMFONT_UNDER) {
ps_pclose(p);
ps_setfont(p, TERMFONT_UNDER);
}
- } else if (p->engine.ps.lastf != TERMFONT_BOLD) {
+ } else if (p->ps->lastf != TERMFONT_BOLD) {
ps_pclose(p);
ps_setfont(p, TERMFONT_BOLD);
}
- p->engine.ps.last = c;
+ p->ps->last = c;
return;
} else {
- if (p->engine.ps.lastf != TERMFONT_NONE) {
+ if (p->ps->lastf != TERMFONT_NONE) {
ps_pclose(p);
ps_setfont(p, TERMFONT_NONE);
}
- cc = p->engine.ps.last;
- p->engine.ps.last = c;
+ cc = p->ps->last;
+ p->ps->last = c;
c = cc;
}
@@ -1022,7 +1039,7 @@ ps_advance(struct termp *p, size_t len)
*/
ps_fclose(p);
- p->engine.ps.pscol += len;
+ p->ps->pscol += len;
}
@@ -1040,16 +1057,16 @@ ps_endline(struct termp *p)
* lines, we'll do nasty stuff.
*/
- if (PS_MARGINS & p->engine.ps.flags)
+ if (PS_MARGINS & p->ps->flags)
return;
/* Left-justify. */
- p->engine.ps.pscol = p->engine.ps.left;
+ p->ps->pscol = p->ps->left;
/* If we haven't printed anything, return. */
- if (PS_NEWPAGE & p->engine.ps.flags)
+ if (PS_NEWPAGE & p->ps->flags)
return;
/*
@@ -1057,9 +1074,9 @@ ps_endline(struct termp *p)
* showpage and restart our row.
*/
- if (p->engine.ps.psrow >= p->engine.ps.lineheight +
- p->engine.ps.bottom) {
- p->engine.ps.psrow -= p->engine.ps.lineheight;
+ if (p->ps->psrow >= p->ps->lineheight +
+ p->ps->bottom) {
+ p->ps->psrow -= p->ps->lineheight;
return;
}
@@ -1072,37 +1089,37 @@ ps_setfont(struct termp *p, enum termfont f)
{
assert(f < TERMFONT__MAX);
- p->engine.ps.lastf = f;
+ p->ps->lastf = f;
/*
* If we're still at the top of the page, let the font-setting
* be delayed until we actually have stuff to print.
*/
- if (PS_NEWPAGE & p->engine.ps.flags)
+ if (PS_NEWPAGE & p->ps->flags)
return;
if (TERMTYPE_PS == p->type)
ps_printf(p, "/%s %zu selectfont\n",
fonts[(int)f].name,
- p->engine.ps.scale);
+ p->ps->scale);
else
ps_printf(p, "/F%d %zu Tf\n",
(int)f,
- p->engine.ps.scale);
+ p->ps->scale);
}
/* ARGSUSED */
static size_t
-ps_width(const struct termp *p, char c)
+ps_width(const struct termp *p, int c)
{
if (c <= 32 || c - 32 >= MAXCHAR)
return((size_t)fonts[(int)TERMFONT_NONE].gly[0].wx);
c -= 32;
- return((size_t)fonts[(int)TERMFONT_NONE].gly[(int)c].wx);
+ return((size_t)fonts[(int)TERMFONT_NONE].gly[c].wx);
}
@@ -1141,7 +1158,7 @@ ps_hspan(const struct termp *p, const struct roffsu *su)
fonts[(int)TERMFONT_NONE].gly[110 - 32].wx;
break;
case (SCALE_VS):
- r = su->scale * p->engine.ps.lineheight;
+ r = su->scale * p->ps->lineheight;
break;
default:
r = su->scale;
@@ -1151,3 +1168,18 @@ ps_hspan(const struct termp *p, const struct roffsu *su)
return(r);
}
+static void
+ps_growbuf(struct termp *p, size_t sz)
+{
+ if (p->ps->psmargcur + sz <= p->ps->psmargsz)
+ return;
+
+ if (sz < PS_BUFSLOP)
+ sz = PS_BUFSLOP;
+
+ p->ps->psmargsz += sz;
+
+ p->ps->psmarg = mandoc_realloc
+ (p->ps->psmarg, p->ps->psmargsz);
+}
+