src - OpenBSD base system

diff options


context:
space:
mode:

author	Ingo Schwarze <schwarze@cvs.openbsd.org>	2011-05-29 21:22:19 +0000
committer	Ingo Schwarze <schwarze@cvs.openbsd.org>	2011-05-29 21:22:19 +0000
commit	d49b1d8e996d3b7d5b11ff7f6fec1308da0f4d19 (patch)
tree	1b65c2913c52e78683a870fa30aacb6d0da621ec /usr.bin
parent	34e3b2211040149f713e27fa1d0e45aa08dcaa93 (diff)

Merge release 1.11.3, almost all code by kristaps@:

* Unicode output support (no Unicode input yet, though). * Refactoring: completely handle predefined strings in roff.c. - New function mandoc_escape() replaces a2roffdeco() and mandoc_special(). - Start using mandoc_getarg() in mdoc_argv.c. - Clean up parsing of delimiters in mdoc(7). * And many minor fixes and lots of cleanup.

Diffstat (limited to 'usr.bin')

-rw-r--r--

usr.bin/mandoc/Makefile

-rw-r--r--

usr.bin/mandoc/chars.c

129

-rw-r--r--

usr.bin/mandoc/chars.in

-rw-r--r--

usr.bin/mandoc/html.c

341

-rw-r--r--

usr.bin/mandoc/html.h

-rw-r--r--

usr.bin/mandoc/libmandoc.h

-rw-r--r--

usr.bin/mandoc/libmdoc.h

-rw-r--r--

usr.bin/mandoc/main.c

-rw-r--r--

usr.bin/mandoc/main.h

-rw-r--r--

usr.bin/mandoc/man_html.c

-rw-r--r--

usr.bin/mandoc/man_term.c

-rw-r--r--

usr.bin/mandoc/man_validate.c

-rw-r--r--

usr.bin/mandoc/mandoc.1

-rw-r--r--

usr.bin/mandoc/mandoc.c

485

-rw-r--r--

usr.bin/mandoc/mandoc.h

-rw-r--r--

usr.bin/mandoc/mdoc_argv.c

409

-rw-r--r--

usr.bin/mandoc/mdoc_html.c

-rw-r--r--

usr.bin/mandoc/mdoc_macro.c

-rw-r--r--

usr.bin/mandoc/mdoc_term.c

-rw-r--r--

usr.bin/mandoc/mdoc_validate.c

-rw-r--r--

usr.bin/mandoc/out.c

239

-rw-r--r--

usr.bin/mandoc/out.h

-rw-r--r--

usr.bin/mandoc/predefs.in

-rw-r--r--

usr.bin/mandoc/read.c

-rw-r--r--

usr.bin/mandoc/roff.c

124

-rw-r--r--

usr.bin/mandoc/tbl_layout.c

-rw-r--r--

usr.bin/mandoc/term.c

315

-rw-r--r--

usr.bin/mandoc/term.h

-rw-r--r--

usr.bin/mandoc/term_ascii.c

112

-rw-r--r--

usr.bin/mandoc/term_ps.c

382

30 files changed, 1597 insertions, 1590 deletions

diff --git a/usr.bin/mandoc/Makefile b/usr.bin/mandoc/Makefile
index 8e4388b2513..d507dc9af42 100644
--- a/usr.bin/mandoc/Makefile
+++ b/usr.bin/mandoc/Makefile

@@ -1,8 +1,8 @@

-# $OpenBSD: Makefile,v 1.55 2011/04/24 16:22:02 schwarze Exp $

+# $OpenBSD: Makefile,v 1.56 2011/05/29 21:22:18 schwarze Exp $

.include <bsd.own.mk>

-VERSION=1.11.1

+VERSION=1.11.3

CFLAGS+=-DVERSION=\"${VERSION}\"

CFLAGS+=-W -Wall -Wstrict-prototypes

diff --git a/usr.bin/mandoc/chars.c b/usr.bin/mandoc/chars.c
index 0446fa53ea4..7e27a3a8ff5 100644
--- a/usr.bin/mandoc/chars.c
+++ b/usr.bin/mandoc/chars.c

@@ -1,6 +1,6 @@

-/* $Id: chars.c,v 1.18 2011/04/24 16:22:02 schwarze Exp $ */

+/* $Id: chars.c,v 1.19 2011/05/29 21:22:18 schwarze Exp $ */

* Permission to use, copy, modify, and distribute this software for any

@@ -16,12 +16,13 @@

* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.

#include <assert.h>

+#include <ctype.h>

#include <stdio.h>

#include <stdlib.h>

#include <string.h>

#include "mandoc.h"

-#include "out.h"

+#include "libmandoc.h"

#define PRINT_HI 126

#define PRINT_LO 32

@@ -31,52 +32,37 @@ struct ln {

const char *code;

const char *ascii;

int unicode;

- int type;

-#define CHARS_CHAR (1 << 0)

-#define CHARS_STRING (1 << 1)

-#define CHARS_BOTH (CHARS_CHAR | CHARS_STRING)

};

-#define LINES_MAX 353

+#define LINES_MAX 325

#define CHAR(in, ch, code) \

- { NULL, (in), (ch), (code), CHARS_CHAR },

-#define STRING(in, ch, code) \

- { NULL, (in), (ch), (code), CHARS_STRING },

-#define BOTH(in, ch, code) \

- { NULL, (in), (ch), (code), CHARS_BOTH },

+ { NULL, (in), (ch), (code) },

#define CHAR_TBL_START static struct ln lines[LINES_MAX] = {

#define CHAR_TBL_END };

#include "chars.in"

-struct ctab {

- enum chars type;

+struct mchars {

struct ln **htab;

};

-static inline int match(const struct ln *,

- const char *, size_t, int);

-static const struct ln *find(struct ctab *, const char *, size_t, int);

+static inline int match(const struct ln *, const char *, size_t);

+static const struct ln *find(struct mchars *, const char *, size_t);

void

-chars_free(void *arg)

+mchars_free(struct mchars *arg)

{

- struct ctab *tab;

- tab = (struct ctab *)arg;

- free(tab->htab);

- free(tab);

+ free(arg->htab);

+ free(arg);

}

-void *

-chars_init(enum chars type)

+struct mchars *

+mchars_alloc(void)

{

- struct ctab *tab;

+ struct mchars *tab;

struct ln **htab;

struct ln *pp;

int i, hash;

@@ -88,7 +74,7 @@ chars_init(enum chars type)

* (they're in-line re-ordered during lookup).

- tab = mandoc_malloc(sizeof(struct ctab));

+ tab = mandoc_malloc(sizeof(struct mchars));

htab = mandoc_calloc(PRINT_HI - PRINT_LO + 1, sizeof(struct ln **));

for (i = 0; i < LINES_MAX; i++) {

@@ -105,7 +91,6 @@ chars_init(enum chars type)

}

tab->htab = htab;

- tab->type = type;

return(tab);

}

@@ -114,79 +99,57 @@ chars_init(enum chars type)

* Special character to Unicode codepoint.

int

-chars_spec2cp(void *arg, const char *p, size_t sz)

- const struct ln *ln;

- ln = find((struct ctab *)arg, p, sz, CHARS_CHAR);

- if (NULL == ln)

- return(-1);

- return(ln->unicode);

-/*

- * Reserved word to Unicode codepoint.

- */

-int

-chars_res2cp(void *arg, const char *p, size_t sz)

+mchars_spec2cp(struct mchars *arg, const char *p, size_t sz)

{

const struct ln *ln;

- ln = find((struct ctab *)arg, p, sz, CHARS_STRING);

+ ln = find(arg, p, sz);

if (NULL == ln)

return(-1);

return(ln->unicode);

}

- * Numbered character to literal character,

- * represented as a null-terminated string for additional safety.

+ * Numbered character string to ASCII codepoint.

+ * This can only be a printable character (i.e., alnum, punct, space) so

+ * prevent the character from ruining our state (backspace, newline, and

+ * so on).

+ * If the character is illegal, returns '\0'.

-const char *

-chars_num2char(const char *p, size_t sz)

+char

+mchars_num2char(const char *p, size_t sz)

{

int i;

- static char c[2];

- if (sz > 3)

- return(NULL);

- i = atoi(p);

- if (i < 0 || i > 255)

- return(NULL);

- c[0] = (char)i;

- c[1] = '\0';

- return(c);

+ if ((i = mandoc_strntou(p, sz, 10)) < 0)

+ return('\0');

+ return(isprint(i) ? i : '\0');

}

-/*

- * Special character to string array.

+/*

+ * Hex character string to Unicode codepoint.

+ * If the character is illegal, returns '\0'.

-const char *

-chars_spec2str(void *arg, const char *p, size_t sz, size_t *rsz)

+int

+mchars_num2uc(const char *p, size_t sz)

{

- const struct ln *ln;

- ln = find((struct ctab *)arg, p, sz, CHARS_CHAR);

- if (NULL == ln)

- return(NULL);

+ int i;

- *rsz = strlen(ln->ascii);

- return(ln->ascii);

+ if ((i = mandoc_strntou(p, sz, 16)) < 0)

+ return('\0');

+ /* FIXME: make sure we're not in a bogus range. */

+ return(i > 0x80 && i <= 0x10FFFF ? i : '\0');

}

- * Reserved word to string array.

+ * Special character to string array.

const char *

-chars_res2str(void *arg, const char *p, size_t sz, size_t *rsz)

+mchars_spec2str(struct mchars *arg, const char *p, size_t sz, size_t *rsz)

{

const struct ln *ln;

- ln = find((struct ctab *)arg, p, sz, CHARS_STRING);

+ ln = find(arg, p, sz);

if (NULL == ln)

return(NULL);

@@ -194,9 +157,8 @@ chars_res2str(void *arg, const char *p, size_t sz, size_t *rsz)

return(ln->ascii);

}

static const struct ln *

-find(struct ctab *tab, const char *p, size_t sz, int type)

+find(struct mchars *tab, const char *p, size_t sz)

{

struct ln *pp, *prev;

struct ln **htab;

@@ -222,7 +184,7 @@ find(struct ctab *tab, const char *p, size_t sz, int type)

return(NULL);

for (prev = NULL; pp; pp = pp->next) {

- if ( ! match(pp, p, sz, type)) {

+ if ( ! match(pp, p, sz)) {

prev = pp;

continue;

}

@@ -239,13 +201,10 @@ find(struct ctab *tab, const char *p, size_t sz, int type)

return(NULL);

}

static inline int

-match(const struct ln *ln, const char *p, size_t sz, int type)

+match(const struct ln *ln, const char *p, size_t sz)

{

- if ( ! (ln->type & type))

- return(0);

if (strncmp(ln->code, p, sz))

return(0);

return('\0' == ln->code[(int)sz]);

diff --git a/usr.bin/mandoc/chars.in b/usr.bin/mandoc/chars.in
index 49676cd4b1e..e4b2c65aa2d 100644
--- a/usr.bin/mandoc/chars.in
+++ b/usr.bin/mandoc/chars.in

@@ -1,4 +1,4 @@

-/* $Id: chars.in,v 1.15 2011/04/24 16:22:02 schwarze Exp $ */

+/* $Id: chars.in,v 1.16 2011/05/29 21:22:18 schwarze Exp $ */

@@ -16,15 +16,12 @@

- * The ASCII translation tables. STRING corresponds to predefined

- * strings (cf. mdoc_samples.7 and tmac/mdoc/doc-nroff). CHAR

- * corresponds to special characters (cf. groff_char.7). BOTH contains

- * sequences that are equivalent in both STRING and CHAR.

+ * The ASCII translation tables.

- * Either way, the left-hand side corresponds to the input sequence (\x,

- * \(xx, \*(xx and so on) whose length is listed second element. The

- * right-hand side is what's produced by the front-end, with the fourth

- * element being its length.

+ * The left-hand side corresponds to the input sequence (\x, \(xx, \*(xx

+ * and so on) whose length is listed second element. The right-hand

+ * side is what's produced by the front-end, with the fourth element

+ * being its length.

* XXX - C-escape strings!

* XXX - update LINES_MAX if adding more!

@@ -36,25 +33,25 @@ static const char ascii_nbrsp[2] = { ASCII_NBRSP, '\0' };

CHAR_TBL_START

/* Spacing. */

-CHAR("c", "", 0)

+CHAR("c", "", 8203)

CHAR("0", " ", 8194)

CHAR(" ", ascii_nbrsp, 160)

CHAR("~", ascii_nbrsp, 160)

-CHAR("%", "", 0)

-CHAR("&", "", 0)

-CHAR("^", "", 0)

-CHAR("|", "", 0)

-CHAR("}", "", 0)

+CHAR("%", "", 8203)

+CHAR("&", "", 8203)

+CHAR("^", "", 8203)

+CHAR("|", "", 8203)

+CHAR("}", "", 8203)

/* Accents. */

CHAR("a\"", "\"", 779)

CHAR("a-", "-", 175)

CHAR("a.", ".", 729)

CHAR("a^", "^", 770)

-BOTH("\'", "\'", 769)

-BOTH("aa", "\'", 769)

-BOTH("ga", "`", 768)

-BOTH("`", "`", 768)

+CHAR("\'", "\'", 769)

+CHAR("aa", "\'", 769)

+CHAR("ga", "`", 768)

+CHAR("`", "`", 768)

CHAR("ab", "`", 774)

CHAR("ac", ",", 807)

CHAR("ad", "\"", 776)

@@ -68,8 +65,8 @@ CHAR("ti", "~", 126)

/* Quotes. */

CHAR("Bq", ",,", 8222)

CHAR("bq", ",", 8218)

-BOTH("lq", "``", 8220)

-BOTH("rq", "\'\'", 8221)

+CHAR("lq", "``", 8220)

+CHAR("rq", "\'\'", 8221)

CHAR("oq", "`", 8216)

CHAR("cq", "\'", 8217)

CHAR("aq", "\'", 39)

@@ -232,8 +229,8 @@ CHAR("<-", "<-", 8592)

CHAR("->", "->", 8594)

CHAR("<>", "<>", 8596)

CHAR("da", "v", 8595)

-BOTH("ua", "^", 8593)

-BOTH("va", "^v", 8597)

+CHAR("ua", "^", 8593)

+CHAR("va", "^v", 8597)

CHAR("lA", "<=", 8656)

CHAR("rA", "=>", 8658)

CHAR("hA", "<=>", 8660)

@@ -270,8 +267,8 @@ CHAR("di", "-:-", 247)

CHAR("tdi", "-:-", 247)

CHAR("f/", "/", 8260)

CHAR("**", "*", 8727)

-BOTH("<=", "<=", 8804)

-BOTH(">=", ">=", 8805)

+CHAR("<=", "<=", 8804)

+CHAR(">=", ">=", 8805)

CHAR("<<", "<<", 8810)

CHAR(">>", ">>", 8811)

CHAR("eq", "=", 61)

@@ -348,36 +345,6 @@ CHAR("Po", "L", 163)

CHAR("Cs", "x", 164)

CHAR("Fn", "f", 402)

-/* Old style. */

-STRING("Am", "&", 38)

-STRING("Ba", "|", 124)

-STRING("Ge", ">=", 8805)

-STRING("Gt", ">", 62)

-STRING("If", "infinity", 0)

-STRING("Le", "<=", 8804)

-STRING("Lq", "``", 8220)

-STRING("Lt", "<", 60)

-STRING("Na", "NaN", 0)

-STRING("Ne", "!=", 8800)

-STRING("Pi", "pi", 960)

-STRING("Pm", "+-", 177)

-STRING("Rq", "\'\'", 8221)

-STRING("left-bracket", "[", 91)

-STRING("left-parenthesis", "(", 40)

-STRING("left-singlequote", "`", 8216)

-STRING("lp", "(", 40)

-STRING("q", "\"", 34)

-STRING("quote-left", "`", 8216)

-STRING("quote-right", "\'", 8217)

-STRING("R", "(R)", 174)

-STRING("right-bracket", "]", 93)

-STRING("right-parenthesis", ")", 41)

-STRING("right-singlequote", "\'", 8217)

-STRING("rp", ")", 41)

-STRING("Tm", "(Tm)", 8482)

-STRING("Px", "POSIX", 0)

-STRING("Ai", "ANSI", 0)

/* Lines. */

CHAR("ba", "|", 124)

CHAR("br", "|", 9474)

diff --git a/usr.bin/mandoc/html.c b/usr.bin/mandoc/html.c
index 45197ad76ef..5ad6860d850 100644
--- a/usr.bin/mandoc/html.c
+++ b/usr.bin/mandoc/html.c

@@ -1,4 +1,4 @@

-/* $Id: html.c,v 1.25 2011/04/24 16:22:02 schwarze Exp $ */

+/* $Id: html.c,v 1.26 2011/05/29 21:22:18 schwarze Exp $ */

@@ -27,6 +27,7 @@

#include <unistd.h>

#include "mandoc.h"

+#include "libmandoc.h"

#include "out.h"

#include "html.h"

#include "main.h"

@@ -89,19 +90,25 @@ static const char *const htmlattrs[ATTR_MAX] = {

"colspan", /* ATTR_COLSPAN */

};

-static void print_num(struct html *, const char *, size_t);

-static void print_spec(struct html *, enum roffdeco,

- const char *, size_t);

-static void print_res(struct html *, const char *, size_t);

-static void print_ctag(struct html *, enum htmltag);

-static void print_doctype(struct html *);

-static void print_xmltype(struct html *);

-static int print_encode(struct html *, const char *, int);

-static void print_metaf(struct html *, enum roffdeco);

-static void print_attr(struct html *,

- const char *, const char *);

-static void *ml_alloc(char *, enum htmltype);

+static const char *const roffscales[SCALE_MAX] = {

+ "cm", /* SCALE_CM */

+ "in", /* SCALE_IN */

+ "pc", /* SCALE_PC */

+ "pt", /* SCALE_PT */

+ "em", /* SCALE_EM */

+ "em", /* SCALE_MM */

+ "ex", /* SCALE_EN */

+ "ex", /* SCALE_BU */

+ "em", /* SCALE_VS */

+ "ex", /* SCALE_FS */

+};

+static void bufncat(struct html *, const char *, size_t);

+static void print_ctag(struct html *, enum htmltag);

+static int print_encode(struct html *, const char *, int);

+static void print_metaf(struct html *, enum mandoc_esc);

+static void print_attr(struct html *, const char *, const char *);

+static void *ml_alloc(char *, enum htmltype);

static void *

ml_alloc(char *outopts, enum htmltype type)

@@ -119,7 +126,7 @@ ml_alloc(char *outopts, enum htmltype type)

h->type = type;

h->tags.head = NULL;

- h->symtab = chars_init(CHARS_HTML);

+ h->symtab = mchars_alloc();

while (outopts && *outopts)

switch (getsubopt(&outopts, UNCONST(toks), &v)) {

@@ -169,7 +176,7 @@ html_free(void *p)

}

if (h->symtab)

- chars_free(h->symtab);

+ mchars_free(h->symtab);

free(h);

}

@@ -205,72 +212,24 @@ print_gen_head(struct html *h)

}

-/* ARGSUSED */

-static void

-print_num(struct html *h, const char *p, size_t len)

- const char *rhs;

- rhs = chars_num2char(p, len);

- if (rhs)

- putchar((int)*rhs);

static void

-print_spec(struct html *h, enum roffdeco d, const char *p, size_t len)

- int cp;

- const char *rhs;

- size_t sz;

- if ((cp = chars_spec2cp(h->symtab, p, len)) > 0) {

- printf("&#%d;", cp);

- return;

- } else if (-1 == cp && DECO_SSPECIAL == d) {

- fwrite(p, 1, len, stdout);

- return;

- } else if (-1 == cp)

- return;

- if (NULL != (rhs = chars_spec2str(h->symtab, p, len, &sz)))

- fwrite(rhs, 1, sz, stdout);

-static void

-print_res(struct html *h, const char *p, size_t len)

- int cp;

- const char *rhs;

- size_t sz;

- if ((cp = chars_res2cp(h->symtab, p, len)) > 0) {

- printf("&#%d;", cp);

- return;

- } else if (-1 == cp)

- return;

- if (NULL != (rhs = chars_res2str(h->symtab, p, len, &sz)))

- fwrite(rhs, 1, sz, stdout);

-static void

-print_metaf(struct html *h, enum roffdeco deco)

+print_metaf(struct html *h, enum mandoc_esc deco)

{

enum htmlfont font;

switch (deco) {

- case (DECO_PREVIOUS):

+ case (ESCAPE_FONTPREV):

font = h->metal;

break;

- case (DECO_ITALIC):

+ case (ESCAPE_FONTITALIC):

font = HTMLFONT_ITALIC;

break;

- case (DECO_BOLD):

+ case (ESCAPE_FONTBOLD):

font = HTMLFONT_BOLD;

break;

- case (DECO_ROMAN):

+ case (ESCAPE_FONT):

+ /* FALLTHROUGH */

+ case (ESCAPE_FONTROMAN):

font = HTMLFONT_NONE;

break;

default:

@@ -292,80 +251,123 @@ print_metaf(struct html *h, enum roffdeco deco)

print_otag(h, TAG_I, 0, NULL);

}

+int

+html_strlen(const char *cp)

+ int ssz, sz;

+ const char *seq, *p;

+ /*

+ * Account for escaped sequences within string length

+ * calculations. This follows the logic in term_strlen() as we

+ * must calculate the width of produced strings.

+ * Assume that characters are always width of "1". This is

+ * hacky, but it gets the job done for approximation of widths.

+ */

+ sz = 0;

+ while (NULL != (p = strchr(cp, '\\'))) {

+ sz += (int)(p - cp);

+ ++cp;

+ switch (mandoc_escape(&cp, &seq, &ssz)) {

+ case (ESCAPE_ERROR):

+ return(sz);

+ case (ESCAPE_UNICODE):

+ /* FALLTHROUGH */

+ case (ESCAPE_NUMBERED):

+ /* FALLTHROUGH */

+ case (ESCAPE_SPECIAL):

+ sz++;

+ break;

+ default:

+ break;

+ }

+ assert(sz >= 0);

+ return(sz + strlen(cp));

static int

print_encode(struct html *h, const char *p, int norecurse)

{

size_t sz;

- int len, nospace;

+ int c, len, nospace;

const char *seq;

- enum roffdeco deco;

+ enum mandoc_esc esc;

static const char rejs[6] = { '\\', '<', '>', '&', ASCII_HYPH, '\0' };

nospace = 0;

- for (; *p; p++) {

+ while ('\0' != *p) {

sz = strcspn(p, rejs);

fwrite(p, 1, sz, stdout);

- p += /* LINTED */

- sz;

+ p += (int)sz;

+ if ('\0' == *p)

+ break;

- if ('<' == *p) {

+ switch (*p++) {

+ case ('<'):

printf("<");

continue;

- } else if ('>' == *p) {

+ case ('>'):

printf(">");

continue;

- } else if ('&' == *p) {

+ case ('&'):

printf("&");

continue;

- } else if (ASCII_HYPH == *p) {

- /*

- * Note: "soft hyphens" aren't graphically

- * displayed when not breaking the text; we want

- * them to be displayed.

- */

- /*printf("");*/

+ case (ASCII_HYPH):

putchar('-');

continue;

- } else if ('\0' == *p)

+ default:

break;

+ }

- seq = ++p;

- len = a2roffdeco(&deco, &seq, &sz);

+ esc = mandoc_escape(&p, &seq, &len);

+ if (ESCAPE_ERROR == esc)

+ break;

- switch (deco) {

- case (DECO_NUMBERED):

- print_num(h, seq, sz);

+ switch (esc) {

+ case (ESCAPE_UNICODE):

+ /* Skip passed "u" header. */

+ c = mchars_num2uc(seq + 1, len - 1);

+ if ('\0' != c)

+ printf("&#x%x;", c);

break;

- case (DECO_RESERVED):

- print_res(h, seq, sz);

+ case (ESCAPE_NUMBERED):

+ c = mchars_num2char(seq, len);

+ if ('\0' != c)

+ putchar(c);

break;

- case (DECO_SSPECIAL):

- /* FALLTHROUGH */

- case (DECO_SPECIAL):

- print_spec(h, deco, seq, sz);

+ case (ESCAPE_SPECIAL):

+ c = mchars_spec2cp(h->symtab, seq, len);

+ if (c > 0)

+ printf("&#%d;", c);

+ else if (-1 == c && 1 == len)

+ putchar((int)*seq);

break;

- case (DECO_PREVIOUS):

+ case (ESCAPE_FONT):

+ /* FALLTHROUGH */

+ case (ESCAPE_FONTPREV):

/* FALLTHROUGH */

- case (DECO_BOLD):

+ case (ESCAPE_FONTBOLD):

/* FALLTHROUGH */

- case (DECO_ITALIC):

+ case (ESCAPE_FONTITALIC):

/* FALLTHROUGH */

- case (DECO_ROMAN):

+ case (ESCAPE_FONTROMAN):

if (norecurse)

break;

- print_metaf(h, deco);

+ print_metaf(h, esc);

+ break;

+ case (ESCAPE_NOSPACE):

+ if ('\0' == *p)

+ nospace = 1;

break;

default:

break;

}

- p += len - 1;

- if (DECO_NOSPACE == deco && '\0' == *(p + 1))

- nospace = 1;

}

return(nospace);

@@ -428,7 +430,7 @@ print_otag(struct html *h, enum htmltag tag,

print_attr(h, "lang", "en");

}

- /* Accomodate for XML "well-formed" singleton escaping. */

+ /* Accommodate for XML "well-formed" singleton escaping. */

if (HTML_AUTOCLOSE & htmltags[tag].flags)

switch (h->type) {

@@ -461,28 +463,9 @@ print_ctag(struct html *h, enum htmltag tag)

}

void

print_gen_decls(struct html *h)

{

- print_xmltype(h);

- print_doctype(h);

-static void

-print_xmltype(struct html *h)

- if (HTML_XHTML_1_0_STRICT == h->type)

- puts("<?xml version=\"1.0\" encoding=\"UTF-8\"?>");

-static void

-print_doctype(struct html *h)

const char *doctype;

const char *dtd;

const char *name;

@@ -494,6 +477,7 @@ print_doctype(struct html *h)

dtd = "http://www.w3.org/TR/html4/strict.dtd";

break;

default:

+ puts("<?xml version=\"1.0\" encoding=\"UTF-8\"?>");

name = "html";

doctype = "-//W3C//DTD XHTML 1.0 Strict//EN";

dtd = "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd";

@@ -583,7 +567,6 @@ print_stagq(struct html *h, const struct tag *suntil)

}

void

bufinit(struct html *h)

{

@@ -592,28 +575,27 @@ bufinit(struct html *h)

h->buflen = 0;

}

void

bufcat_style(struct html *h, const char *key, const char *val)

{

bufcat(h, key);

- bufncat(h, ":", 1);

+ bufcat(h, ":");

bufcat(h, val);

- bufncat(h, ";", 1);

+ bufcat(h, ";");

}

void

bufcat(struct html *h, const char *p)

{

- bufncat(h, p, strlen(p));

+ h->buflen = strlcat(h->buf, p, BUFSIZ);

+ assert(h->buflen < BUFSIZ);

+ h->buflen--;

}

void

-buffmt(struct html *h, const char *fmt, ...)

+bufcat_fmt(struct html *h, const char *fmt, ...)

{

va_list ap;

@@ -624,19 +606,15 @@ buffmt(struct html *h, const char *fmt, ...)

h->buflen = strlen(h->buf);

}

-void

+static void

bufncat(struct html *h, const char *p, size_t sz)

{

- if (h->buflen + sz > BUFSIZ - 1)

- sz = BUFSIZ - 1 - h->buflen;

- (void)strncat(h->buf, p, sz);

+ assert(h->buflen + sz + 1 < BUFSIZ);

+ strncat(h->buf, p, sz);

h->buflen += sz;

}

void

buffmt_includes(struct html *h, const char *name)

{

@@ -644,6 +622,7 @@ buffmt_includes(struct html *h, const char *name)

pp = h->base_includes;

+ bufinit(h);

while (NULL != (p = strchr(pp, '%'))) {

bufncat(h, pp, (size_t)(p - pp));

switch (*(p + 1)) {

@@ -660,7 +639,6 @@ buffmt_includes(struct html *h, const char *name)

bufcat(h, pp);

}

void

buffmt_man(struct html *h,

const char *name, const char *sec)

@@ -669,7 +647,7 @@ buffmt_man(struct html *h,

pp = h->base_man;

- /* LINTED */

+ bufinit(h);

while (NULL != (p = strchr(pp, '%'))) {

bufncat(h, pp, (size_t)(p - pp));

switch (*(p + 1)) {

@@ -677,7 +655,7 @@ buffmt_man(struct html *h,

bufcat(h, sec ? sec : "1");

break;

case('N'):

- buffmt(h, name);

+ bufcat_fmt(h, name);

break;

default:

bufncat(h, p, 2);

@@ -689,85 +667,24 @@ buffmt_man(struct html *h,

bufcat(h, pp);

}

void

bufcat_su(struct html *h, const char *p, const struct roffsu *su)

{

double v;

- const char *u;

v = su->scale;

+ if (SCALE_MM == su->unit && 0.0 == (v /= 100.0))

+ v = 1.0;

- switch (su->unit) {

- case (SCALE_CM):

- u = "cm";

- break;

- case (SCALE_IN):

- u = "in";

- break;

- case (SCALE_PC):

- u = "pc";

- break;

- case (SCALE_PT):

- u = "pt";

- break;

- case (SCALE_EM):

- u = "em";

- break;

- case (SCALE_MM):

- if (0 == (v /= 100))

- v = 1;

- u = "em";

- break;

- case (SCALE_EN):

- u = "ex";

- break;

- case (SCALE_BU):

- u = "ex";

- break;

- case (SCALE_VS):

- u = "em";

- break;

- default:

- u = "ex";

- break;

- }

- /*

- * XXX: the CSS spec isn't clear as to which types accept

- * integer or real numbers, so we just make them all decimals.

- */

- buffmt(h, "%s: %.2f%s;", p, v, u);

+ bufcat_fmt(h, "%s: %.2f%s;", p, v, roffscales[su->unit]);

}

void

-html_idcat(char *dst, const char *src, int sz)

+bufcat_id(struct html *h, const char *src)

{

- int ssz;

- assert(sz > 2);

/* Cf. <http://www.w3.org/TR/html4/types.html#h-6.2>. */

- /* We can't start with a number (bah). */

- if ('#' == *dst) {

- dst++;

- sz--;

- }

- if ('\0' == *dst) {

- *dst++ = 'x';

- *dst = '\0';

- sz--;

- }

- for ( ; *dst != '\0' && sz; dst++, sz--)

- /* Jump to end. */ ;

- for ( ; *src != '\0' && sz > 1; src++) {

- ssz = snprintf(dst, (size_t)sz, "%.2x", *src);

- sz -= ssz;

- dst += ssz;

- }

+ while ('\0' != *src)

+ bufcat_fmt(h, "%.2x", *src++);

}

diff --git a/usr.bin/mandoc/html.h b/usr.bin/mandoc/html.h
index 4643e81afd9..10f9a3a5787 100644
--- a/usr.bin/mandoc/html.h
+++ b/usr.bin/mandoc/html.h

@@ -1,4 +1,4 @@

-/* $Id: html.h,v 1.15 2011/01/31 03:04:26 schwarze Exp $ */

+/* $Id: html.h,v 1.16 2011/05/29 21:22:18 schwarze Exp $ */

@@ -120,7 +120,7 @@ struct html {

struct tagq tags; /* stack of open tags */

struct rofftbl tbl; /* current table */

struct tag *tblt; /* current open table scope */

- void *symtab; /* character-escapes */

+ struct mchars *symtab; /* character-escapes */

char *base_man; /* base for manpage href */

char *base_includes; /* base for include href */

char *style; /* style-sheet URI */

@@ -142,19 +142,19 @@ void print_text(struct html *, const char *);

void print_tblclose(struct html *);

void print_tbl(struct html *, const struct tbl_span *);

+void bufcat_fmt(struct html *, const char *, ...);

+void bufcat(struct html *, const char *);

+void bufcat_id(struct html *, const char *);

+void bufcat_style(struct html *,

+ const char *, const char *);

void bufcat_su(struct html *, const char *,

const struct roffsu *);

+void bufinit(struct html *);

void buffmt_man(struct html *,

const char *, const char *);

void buffmt_includes(struct html *, const char *);

-void buffmt(struct html *, const char *, ...);

-void bufcat(struct html *, const char *);

-void bufcat_style(struct html *,

- const char *, const char *);

-void bufncat(struct html *, const char *, size_t);

-void bufinit(struct html *);

-void html_idcat(char *, const char *, int);

+int html_strlen(const char *);

__END_DECLS

diff --git a/usr.bin/mandoc/libmandoc.h b/usr.bin/mandoc/libmandoc.h
index eaacbfccbf1..1efe5da07a5 100644
--- a/usr.bin/mandoc/libmandoc.h
+++ b/usr.bin/mandoc/libmandoc.h

@@ -1,4 +1,4 @@

-/* $Id: libmandoc.h,v 1.11 2011/04/24 16:22:02 schwarze Exp $ */

+/* $Id: libmandoc.h,v 1.12 2011/05/29 21:22:18 schwarze Exp $ */

@@ -73,13 +73,13 @@ void mandoc_msg(enum mandocerr, struct mparse *,

int, int, const char *);

void mandoc_vmsg(enum mandocerr, struct mparse *,

int, int, const char *, ...);

-int mandoc_special(char *);

char *mandoc_strdup(const char *);

char *mandoc_getarg(struct mparse *, char **, int, int *);

char *mandoc_normdate(struct mparse *, char *, int, int);

int mandoc_eos(const char *, size_t, int);

int mandoc_hyph(const char *, const char *);

int mandoc_getcontrol(const char *, int *);

+int mandoc_strntou(const char *, size_t, int);

void mdoc_free(struct mdoc *);

struct mdoc *mdoc_alloc(struct regset *, struct mparse *);

diff --git a/usr.bin/mandoc/libmdoc.h b/usr.bin/mandoc/libmdoc.h
index ceffcb05332..ee99633aa61 100644
--- a/usr.bin/mandoc/libmdoc.h
+++ b/usr.bin/mandoc/libmdoc.h

@@ -1,4 +1,4 @@

-/* $Id: libmdoc.h,v 1.45 2011/04/24 16:22:02 schwarze Exp $ */

+/* $Id: libmdoc.h,v 1.46 2011/05/29 21:22:18 schwarze Exp $ */

@@ -62,20 +62,20 @@ struct mdoc_macro {

enum margserr {

ARGS_ERROR,

- ARGS_EOLN,

- ARGS_WORD,

- ARGS_PUNCT,

- ARGS_QWORD,

- ARGS_PHRASE,

- ARGS_PPHRASE,

- ARGS_PEND

+ ARGS_EOLN, /* end-of-line */

+ ARGS_WORD, /* normal word */

+ ARGS_PUNCT, /* series of punctuation */

+ ARGS_QWORD, /* quoted word */

+ ARGS_PHRASE, /* Ta'd phrase (-column) */

+ ARGS_PPHRASE, /* tabbed phrase (-column) */

+ ARGS_PEND /* last phrase (-column) */

};

enum margverr {

ARGV_ERROR,

- ARGV_EOLN,

- ARGV_ARG,

- ARGV_WORD

+ ARGV_EOLN, /* end of line */

+ ARGV_ARG, /* valid argument */

+ ARGV_WORD /* normal word (or bad argument---same thing) */

};

@@ -133,14 +133,8 @@ void mdoc_argv_free(struct mdoc_arg *);

enum margserr mdoc_args(struct mdoc *, int,

int *, char *, enum mdoct, char **);

enum margserr mdoc_zargs(struct mdoc *, int,

- int *, char *, int, char **);

-#define ARGS_DELIM (1 << 1)

-#define ARGS_TABSEP (1 << 2)

-#define ARGS_NOWARN (1 << 3)

+ int *, char *, char **);

int mdoc_macroend(struct mdoc *);

-#define DELIMSZ 6 /* hint: max possible size of a delimiter */

enum mdelim mdoc_isdelim(const char *);

__END_DECLS

diff --git a/usr.bin/mandoc/main.c b/usr.bin/mandoc/main.c
index 088940778ef..3b2fd636dff 100644
--- a/usr.bin/mandoc/main.c
+++ b/usr.bin/mandoc/main.c

@@ -1,4 +1,4 @@

-/* $Id: main.c,v 1.76 2011/04/24 16:22:02 schwarze Exp $ */

+/* $Id: main.c,v 1.77 2011/05/29 21:22:18 schwarze Exp $ */

@@ -34,6 +34,8 @@ typedef void (*out_free)(void *);

enum outt {

OUTT_ASCII = 0, /* -Tascii */

+ OUTT_LOCALE, /* -Tlocale */

+ OUTT_UTF8, /* -Tutf8 */

OUTT_TREE, /* -Ttree */

OUTT_HTML, /* -Thtml */

OUTT_XHTML, /* -Txhtml */

@@ -197,9 +199,19 @@ parse(struct curparse *curp, int fd,

switch (curp->outtype) {

case (OUTT_XHTML):

curp->outdata = xhtml_alloc(curp->outopts);

+ curp->outfree = html_free;

break;

case (OUTT_HTML):

curp->outdata = html_alloc(curp->outopts);

+ curp->outfree = html_free;

+ break;

+ case (OUTT_UTF8):

+ curp->outdata = utf8_alloc(curp->outopts);

+ curp->outfree = ascii_free;

+ break;

+ case (OUTT_LOCALE):

+ curp->outdata = locale_alloc(curp->outopts);

+ curp->outfree = ascii_free;

break;

case (OUTT_ASCII):

curp->outdata = ascii_alloc(curp->outopts);

@@ -223,7 +235,6 @@ parse(struct curparse *curp, int fd,

case (OUTT_XHTML):

curp->outman = html_man;

curp->outmdoc = html_mdoc;

- curp->outfree = html_free;

break;

case (OUTT_TREE):

curp->outman = tree_man;

@@ -233,6 +244,10 @@ parse(struct curparse *curp, int fd,

/* FALLTHROUGH */

case (OUTT_ASCII):

/* FALLTHROUGH */

+ case (OUTT_UTF8):

+ /* FALLTHROUGH */

+ case (OUTT_LOCALE):

+ /* FALLTHROUGH */

case (OUTT_PS):

curp->outman = terminal_man;

curp->outmdoc = terminal_mdoc;

@@ -290,6 +305,10 @@ toptions(struct curparse *curp, char *arg)

curp->outtype = OUTT_TREE;

else if (0 == strcmp(arg, "html"))

curp->outtype = OUTT_HTML;

+ else if (0 == strcmp(arg, "utf8"))

+ curp->outtype = OUTT_UTF8;

+ else if (0 == strcmp(arg, "locale"))

+ curp->outtype = OUTT_LOCALE;

else if (0 == strcmp(arg, "xhtml"))

curp->outtype = OUTT_XHTML;

else if (0 == strcmp(arg, "ps"))

diff --git a/usr.bin/mandoc/main.h b/usr.bin/mandoc/main.h
index 2cb020dedc2..1efb9d34387 100644
--- a/usr.bin/mandoc/main.h
+++ b/usr.bin/mandoc/main.h

@@ -1,6 +1,6 @@

-/* $Id: main.h,v 1.7 2010/07/25 18:05:54 schwarze Exp $ */

+/* $Id: main.h,v 1.8 2011/05/29 21:22:18 schwarze Exp $ */

* Permission to use, copy, modify, and distribute this software for any

* purpose with or without fee is hereby granted, provided that the above

@@ -41,6 +41,8 @@ void html_free(void *);

void tree_mdoc(void *, const struct mdoc *);

void tree_man(void *, const struct man *);

+void *locale_alloc(char *);

+void *utf8_alloc(char *);

void *ascii_alloc(char *);

void ascii_free(void *);

diff --git a/usr.bin/mandoc/man_html.c b/usr.bin/mandoc/man_html.c
index d805cce275e..5437cb1557f 100644
--- a/usr.bin/mandoc/man_html.c
+++ b/usr.bin/mandoc/man_html.c

@@ -1,4 +1,4 @@

-/* $Id: man_html.c,v 1.37 2011/04/21 22:59:54 schwarze Exp $ */

+/* $Id: man_html.c,v 1.38 2011/05/29 21:22:18 schwarze Exp $ */

@@ -63,7 +63,7 @@ static int man_ign_pre(MAN_ARGS);

static int man_in_pre(MAN_ARGS);

static int man_literal_pre(MAN_ARGS);

static void man_root_post(MAN_ARGS);

-static int man_root_pre(MAN_ARGS);

+static void man_root_pre(MAN_ARGS);

static int man_B_pre(MAN_ARGS);

static int man_HP_pre(MAN_ARGS);

static int man_I_pre(MAN_ARGS);

@@ -153,9 +153,7 @@ print_man_head(MAN_ARGS)

{

print_gen_head(h);

- bufinit(h);

- buffmt(h, "%s(%s)", m->title, m->msec);

+ bufcat_fmt(h, "%s(%s)", m->title, m->msec);

print_otag(h, TAG_TITLE, 0, NULL);

print_text(h, h->buf);

}

@@ -181,13 +179,16 @@ print_man_node(MAN_ARGS)

child = 1;

t = h->tags.head;

- bufinit(h);

switch (n->type) {

case (MAN_ROOT):

- child = man_root_pre(m, n, mh, h);

+ man_root_pre(m, n, mh, h);

break;

case (MAN_TEXT):

+ /*

+ * If we have a blank line, output a vertical space.

+ * If we have a space as the first character, break

+ * before printing the line's data.

+ */

if ('\0' == *n->string) {

print_otag(h, TAG_P, 0, NULL);

return;

@@ -196,6 +197,13 @@ print_man_node(MAN_ARGS)

print_text(h, n->string);

+ /*

+ * If we're in a literal context, make sure that words

+ * togehter on the same line stay together. This is a

+ * POST-printing call, so we check the NEXT word. Since

+ * -man doesn't have nested macros, we don't need to be

+ * more specific than this.

+ */

if (MANH_LITERAL & mh->fl &&

(NULL == n->next ||

n->next->line > n->line))

@@ -244,8 +252,6 @@ print_man_node(MAN_ARGS)

/* This will automatically close out any font scope. */

print_stagq(h, t);

- bufinit(h);

switch (n->type) {

case (MAN_ROOT):

man_root_post(m, n, mh, h);

@@ -274,7 +280,7 @@ a2width(const struct man_node *n, struct roffsu *su)

/* ARGSUSED */

-static int

+static void

man_root_pre(MAN_ARGS)

{

struct htmlpair tag[3];

@@ -328,7 +334,6 @@ man_root_pre(MAN_ARGS)

print_text(h, title);

print_tagq(h, t);

- return(1);

}

@@ -387,6 +392,7 @@ man_br_pre(MAN_ARGS)

} else

su.scale = 0;

+ bufinit(h);

bufcat_su(h, "height", &su);

PAIR_STYLE_INIT(&tag, h);

print_otag(h, TAG_DIV, 1, &tag);

@@ -555,6 +561,7 @@ man_IP_pre(MAN_ARGS)

if (MAN_BLOCK == n->type) {

print_otag(h, TAG_P, 0, NULL);

print_otag(h, TAG_TABLE, 0, NULL);

+ bufinit(h);

bufcat_su(h, "width", &su);

PAIR_STYLE_INIT(&tag, h);

print_otag(h, TAG_COL, 1, &tag);

@@ -590,6 +597,8 @@ man_HP_pre(MAN_ARGS)

struct roffsu su;

const struct man_node *np;

+ bufinit(h);

np = MAN_BLOCK == n->type ?

n->head->child :

n->parent->head->child;

@@ -690,6 +699,7 @@ man_RS_pre(MAN_ARGS)

if (n->head->child)

a2width(n->head->child, &su);

+ bufinit(h);

bufcat_su(h, "margin-left", &su);

PAIR_STYLE_INIT(&tag, h);

print_otag(h, TAG_DIV, 1, &tag);

diff --git a/usr.bin/mandoc/man_term.c b/usr.bin/mandoc/man_term.c
index ab5c37bd86f..56b1b010756 100644
--- a/usr.bin/mandoc/man_term.c
+++ b/usr.bin/mandoc/man_term.c

@@ -1,4 +1,4 @@

-/* $Id: man_term.c,v 1.67 2011/04/24 16:22:02 schwarze Exp $ */

+/* $Id: man_term.c,v 1.68 2011/05/29 21:22:18 schwarze Exp $ */

@@ -152,14 +152,7 @@ terminal_man(void *arg, const struct man *man)

p->tabwidth = term_len(p, 5);

if (NULL == p->symtab)

- switch (p->enc) {

- case (TERMENC_ASCII):

- p->symtab = chars_init(CHARS_ASCII);

- break;

- default:

- abort();

- /* NOTREACHED */

- }

+ p->symtab = mchars_alloc();

n = man_node(man);

m = man_meta(man);

diff --git a/usr.bin/mandoc/man_validate.c b/usr.bin/mandoc/man_validate.c
index c062c60905e..bfa17bd77c9 100644
--- a/usr.bin/mandoc/man_validate.c
+++ b/usr.bin/mandoc/man_validate.c

@@ -1,4 +1,4 @@

-/* $Id: man_validate.c,v 1.44 2011/04/24 16:22:02 schwarze Exp $ */

+/* $Id: man_validate.c,v 1.45 2011/05/29 21:22:18 schwarze Exp $ */

@@ -50,7 +50,7 @@ static int check_par(CHKARGS);

static int check_part(CHKARGS);

static int check_root(CHKARGS);

static int check_sec(CHKARGS);

-static int check_text(CHKARGS);

+static void check_text(CHKARGS);

static int post_AT(CHKARGS);

static int post_fi(CHKARGS);

@@ -147,7 +147,8 @@ man_valid_post(struct man *m)

switch (m->last->type) {

case (MAN_TEXT):

- return(check_text(m, m->last));

+ check_text(m, m->last);

+ return(1);

case (MAN_ROOT):

return(check_root(m, m->last));

case (MAN_EQN):

@@ -200,43 +201,48 @@ check_root(CHKARGS)

return(1);

}

-static int

+static void

check_text(CHKARGS)

{

- char *p;

- int pos, c;

+ char *p, *pp, *cpp;

+ int pos;

size_t sz;

- for (p = n->string, pos = n->pos + 1; *p; p++, pos++) {

- sz = strcspn(p, "\t\\");

- p += (int)sz;

+ p = n->string;

+ pos = n->pos + 1;

- if ('\0' == *p)

- break;

+ while ('\0' != *p) {

+ sz = strcspn(p, "\t\\");

+ p += (int)sz;

pos += (int)sz;

if ('\t' == *p) {

- if (MAN_LITERAL & m->flags)

- continue;

- man_pmsg(m, n->line, pos, MANDOCERR_BADTAB);

+ if ( ! (MAN_LITERAL & m->flags))

+ man_pmsg(m, n->line, pos, MANDOCERR_BADTAB);

+ p++;

+ pos++;

continue;

- }

+ } else if ('\0' == *p)

+ break;

- /* Check the special character. */

+ pos++;

+ pp = ++p;

- c = mandoc_special(p);

- if (c) {

- p += c - 1;

- pos += c - 1;

- } else

+ if (ESCAPE_ERROR == mandoc_escape

+ ((const char **)&pp, NULL, NULL)) {

man_pmsg(m, n->line, pos, MANDOCERR_BADESCAPE);

- }

+ break;

+ }

- return(1);

+ cpp = p;

+ while (NULL != (cpp = memchr(cpp, ASCII_HYPH, pp - cpp)))

+ *cpp = '-';

+ pos += pp - p;

+ p = pp;

+ }

#define INEQ_DEFINE(x, ineq, name) \

static int \

diff --git a/usr.bin/mandoc/mandoc.1 b/usr.bin/mandoc/mandoc.1
index 3117c92aa8c..7b2720d5b50 100644
--- a/usr.bin/mandoc/mandoc.1
+++ b/usr.bin/mandoc/mandoc.1

@@ -1,6 +1,6 @@

-.\" $OpenBSD: mandoc.1,v 1.43 2011/01/09 15:24:57 schwarze Exp $

+.\" $OpenBSD: mandoc.1,v 1.44 2011/05/29 21:22:18 schwarze Exp $

.\"

.\" Permission to use, copy, modify, and distribute this software for any

.\" purpose with or without fee is hereby granted, provided that the above

@@ -14,7 +14,7 @@

.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF

.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.

.\"

-.Dd $Mdocdate: January 9 2011 $

+.Dd $Mdocdate: May 29 2011 $

.Dt MANDOC 1

.Os

.Sh NAME

@@ -158,6 +158,15 @@ utility accepts the following

.Fl T

arguments, which correspond to output modes:

.Bl -tag -width Ds

+.It Fl T Ns Cm utf8

+Encode output in the UTF-8 multi-byte format.

+See

+.Xr UTF-8 Output .

+.It Fl T Ns Cm locale

+Encode output using the current

+.Xr locale 1 .

+See

+.Sx Locale Output .

.It Fl T Ns Cm ascii

Produce 7-bit ASCII output.

This is the default.

@@ -189,6 +198,23 @@ See

.Pp

If multiple input files are specified, these will be processed by the

corresponding filter in-order.

+.Ss UTF-8 Output

+Use

+.Fl T Ns Cm utf8

+to force a UTF-8 locale.

+See

+.Sx Locale Output

+for details and options.

+.Ss Locale Output

+Locale-depending output encoding is triggered with

+.Fl T Ns Cm locale .

+This option is not available on all systems: systems without locale

+support, or those whose internal representation is not natively UCS-4,

+will fall back to

+.Fl T Ns Cm ascii .

+See

+.Sx ASCII Output

+for font style specification and available command-line arguments.

.Ss ASCII Output

Output produced by

.Fl T Ns Cm ascii ,

@@ -209,6 +235,9 @@ Emboldened characters are rendered as

The special characters documented in

.Xr mandoc_char 7

are rendered best-effort in an ASCII equivalent.

+If no equivalent is found,

+.Sq \&?

+is used instead.

.Pp

Output width is limited to 78 visible columns unless literal input lines

exceed this limit.

@@ -460,6 +489,13 @@ Each input and output format is separately noted.

.Ss ASCII Compatibility

.Bl -bullet -compact

.It

+Unrenderable unicode codepoints specified with

+.Sq \e[uNNNN]

+escapes are printed as

+.Sq \&?

+in mandoc.

+In GNU troff, these raise an error.

+.It

The

.Sq \&Bd \-literal

and

@@ -470,7 +506,7 @@ in

.Fl T Ns Cm ascii

are synonyms, as are \-filled and \-ragged.

.It

-In GNU troff, the

+In historic GNU troff, the

.Sq \&Pa

.Xr mdoc 7

macro does not underline when scoped under an

@@ -495,8 +531,6 @@ macro in

has no effect.

.It

Words aren't hyphenated.

-.It

-Sentences are unilaterally monospaced.

.El

.Ss HTML/XHTML Compatibility

.Bl -bullet -compact

diff --git a/usr.bin/mandoc/mandoc.c b/usr.bin/mandoc/mandoc.c
index 931ce863017..b9ec46283e8 100644
--- a/usr.bin/mandoc/mandoc.c
+++ b/usr.bin/mandoc/mandoc.c

@@ -1,4 +1,4 @@

-/* $Id: mandoc.c,v 1.25 2011/04/24 16:22:02 schwarze Exp $ */

+/* $Id: mandoc.c,v 1.26 2011/05/29 21:22:18 schwarze Exp $ */

@@ -19,6 +19,8 @@

#include <assert.h>

#include <ctype.h>

+#include <errno.h>

+#include <limits.h>

#include <stdlib.h>

#include <stdio.h>

#include <string.h>

@@ -31,199 +33,358 @@

static int a2time(time_t *, const char *, const char *);

static char *time2a(time_t);

+static int numescape(const char *);

-int

-mandoc_special(char *p)

+/*

+ * Pass over recursive numerical expressions. This context of this

+ * function is important: it's only called within character-terminating

+ * escapes (e.g., \s[xxxyyy]), so all we need to do is handle initial

+ * recursion: we don't care about what's in these blocks.

+ * This returns the number of characters skipped or -1 if an error

+ * occurs (the caller should bail).

+ */

+static int

+numescape(const char *start)

{

- int len, i;

- char term;

- char *sv;

- len = 0;

- term = '\0';

- sv = p;

- assert('\\' == *p);

- p++;

- switch (*p++) {

-#if 0

- case ('Z'):

+ int i;

+ size_t sz;

+ const char *cp;

+ i = 0;

+ /* The expression consists of a subexpression. */

+ if ('\\' == start[i]) {

+ cp = &start[++i];

+ /*

+ * Read past the end of the subexpression.

+ * Bail immediately on errors.

+ */

+ if (ESCAPE_ERROR == mandoc_escape(&cp, NULL, NULL))

+ return(-1);

+ return(i + cp - &start[i]);

+ }

+ if ('(' != start[i++])

+ return(0);

+ /*

+ * A parenthesised subexpression. Read until the closing

+ * parenthesis, making sure to handle any nested subexpressions

+ * that might ruin our parse.

+ */

+ while (')' != start[i]) {

+ sz = strcspn(&start[i], ")\\");

+ i += (int)sz;

+ if ('\0' == start[i])

+ return(-1);

+ else if ('\\' != start[i])

+ continue;

+ cp = &start[++i];

+ if (ESCAPE_ERROR == mandoc_escape(&cp, NULL, NULL))

+ return(-1);

+ i += cp - &start[i];

+ }

+ /* Read past the terminating ')'. */

+ return(++i);

+enum mandoc_esc

+mandoc_escape(const char **end, const char **start, int *sz)

+ char c, term, numeric;

+ int i, lim, ssz, rlim;

+ const char *cp, *rstart;

+ enum mandoc_esc gly;

+ cp = *end;

+ rstart = cp;

+ if (start)

+ *start = rstart;

+ i = lim = 0;

+ gly = ESCAPE_ERROR;

+ term = numeric = '\0';

+ switch ((c = cp[i++])) {

+ /*

+ * First the glyphs. There are several different forms of

+ * these, but each eventually returns a substring of the glyph

+ * name.

+ */

+ case ('('):

+ gly = ESCAPE_SPECIAL;

+ lim = 2;

+ break;

+ case ('['):

+ gly = ESCAPE_SPECIAL;

+ /*

+ * Unicode escapes are defined in groff as \[uXXXX] to

+ * \[u10FFFF], where the contained value must be a valid

+ * Unicode codepoint. Here, however, only check whether

+ * it's not a zero-width escape.

+ */

+ if ('u' == cp[i] && ']' != cp[i + 1])

+ gly = ESCAPE_UNICODE;

+ term = ']';

+ break;

+ case ('C'):

+ if ('\'' != cp[i])

+ return(ESCAPE_ERROR);

+ gly = ESCAPE_SPECIAL;

+ term = '\'';

+ break;

+ /*

+ * Handle all triggers matching \X(xy, \Xx, and \X[xxxx], where

+ * 'X' is the trigger. These have opaque sub-strings.

+ */

+ case ('F'):

/* FALLTHROUGH */

- case ('X'):

+ case ('g'):

/* FALLTHROUGH */

- case ('x'):

+ case ('k'):

/* FALLTHROUGH */

- case ('S'):

+ case ('M'):

/* FALLTHROUGH */

- case ('R'):

+ case ('m'):

/* FALLTHROUGH */

- case ('N'):

+ case ('n'):

/* FALLTHROUGH */

- case ('l'):

+ case ('V'):

/* FALLTHROUGH */

- case ('L'):

+ case ('Y'):

+ if (ESCAPE_ERROR == gly)

+ gly = ESCAPE_IGNORE;

/* FALLTHROUGH */

- case ('H'):

+ case ('f'):

+ if (ESCAPE_ERROR == gly)

+ gly = ESCAPE_FONT;

+ rstart= &cp[i];

+ if (start)

+ *start = rstart;

+ switch (cp[i++]) {

+ case ('('):

+ lim = 2;

+ break;

+ case ('['):

+ term = ']';

+ break;

+ default:

+ lim = 1;

+ i--;

+ break;

+ }

+ break;

+ /*

+ * These escapes are of the form \X'Y', where 'X' is the trigger

+ * and 'Y' is any string. These have opaque sub-strings.

+ */

+ case ('A'):

/* FALLTHROUGH */

- case ('h'):

+ case ('b'):

/* FALLTHROUGH */

case ('D'):

/* FALLTHROUGH */

- case ('C'):

- /* FALLTHROUGH */

- case ('b'):

+ case ('o'):

/* FALLTHROUGH */

- case ('B'):

+ case ('R'):

/* FALLTHROUGH */

- case ('a'):

+ case ('X'):

/* FALLTHROUGH */

- case ('A'):

- if (*p++ != '\'')

- return(0);

+ case ('Z'):

+ if ('\'' != cp[i++])

+ return(ESCAPE_ERROR);

+ gly = ESCAPE_IGNORE;

term = '\'';

break;

-#endif

+ /*

+ * These escapes are of the form \X'N', where 'X' is the trigger

+ * and 'N' resolves to a numerical expression.

+ */

+ case ('B'):

+ /* FALLTHROUGH */

case ('h'):

/* FALLTHROUGH */

+ case ('H'):

+ /* FALLTHROUGH */

+ case ('L'):

+ /* FALLTHROUGH */

+ case ('l'):

+ /* FALLTHROUGH */

+ case ('N'):

+ if (ESCAPE_ERROR == gly)

+ gly = ESCAPE_NUMBERED;

+ /* FALLTHROUGH */

+ case ('S'):

+ /* FALLTHROUGH */

case ('v'):

/* FALLTHROUGH */

+ case ('w'):

+ /* FALLTHROUGH */

+ case ('x'):

+ if (ESCAPE_ERROR == gly)

+ gly = ESCAPE_IGNORE;

+ if ('\'' != cp[i++])

+ return(ESCAPE_ERROR);

+ term = numeric = '\'';

+ break;

+ /*

+ * Sizes get a special category of their own.

+ */

case ('s'):

- if (ASCII_HYPH == *p)

- *p = '-';

+ gly = ESCAPE_IGNORE;

- i = 0;

- if ('+' == *p || '-' == *p) {

- p++;

- i = 1;

- }

+ rstart = &cp[i];

+ if (start)

+ *start = rstart;

- switch (*p++) {

+ /* See +/- counts as a sign. */

+ c = cp[i];

+ if ('+' == c || '-' == c || ASCII_HYPH == c)

+ ++i;

+ switch (cp[i++]) {

case ('('):

- len = 2;

+ lim = 2;

break;

case ('['):

- term = ']';

+ term = numeric = ']';

break;

case ('\''):

- term = '\'';

+ term = numeric = '\'';

break;

- case ('0'):

- i = 1;

- /* FALLTHROUGH */

default:

- len = 1;

- p--;

+ lim = 1;

+ i--;

break;

}

- if (ASCII_HYPH == *p)

- *p = '-';

- if ('+' == *p || '-' == *p) {

- if (i)

- return(0);

- p++;

- }

- /* Handle embedded numerical subexp or escape. */

- if ('(' == *p) {

- while (*p && ')' != *p)

- if ('\\' == *p++) {

- i = mandoc_special(--p);

- if (0 == i)

- return(0);

- p += i;

- }

- if (')' == *p++)

- break;

+ /* See +/- counts as a sign. */

+ c = cp[i];

+ if ('+' == c || '-' == c || ASCII_HYPH == c)

+ ++i;

- return(0);

- } else if ('\\' == *p) {

- if (0 == (i = mandoc_special(p)))

- return(0);

- p += i;

- }

+ break;

+ /*

+ * Anything else is assumed to be a glyph.

+ */

+ default:

+ gly = ESCAPE_SPECIAL;

+ lim = 1;

+ i--;

break;

-#if 0

- case ('Y'):

- /* FALLTHROUGH */

- case ('V'):

- /* FALLTHROUGH */

- case ('$'):

- /* FALLTHROUGH */

- case ('n'):

- /* FALLTHROUGH */

-#endif

- case ('k'):

- /* FALLTHROUGH */

- case ('M'):

- /* FALLTHROUGH */

- case ('m'):

- /* FALLTHROUGH */

- case ('f'):

- /* FALLTHROUGH */

- case ('F'):

- /* FALLTHROUGH */

- case ('*'):

- switch (*p++) {

- case ('('):

- len = 2;

+ }

+ assert(ESCAPE_ERROR != gly);

+ rstart = &cp[i];

+ if (start)

+ *start = rstart;

+ /*

+ * If a terminating block has been specified, we need to

+ * handle the case of recursion, which could have their

+ * own terminating blocks that mess up our parse. This, by the

+ * way, means that the "start" and "size" values will be

+ * effectively meaningless.

+ */

+ ssz = 0;

+ if (numeric && -1 == (ssz = numescape(&cp[i])))

+ return(ESCAPE_ERROR);

+ i += ssz;

+ rlim = -1;

+ /*

+ * We have a character terminator. Try to read up to that

+ * character. If we can't (i.e., we hit the nil), then return

+ * an error; if we can, calculate our length, read past the

+ * terminating character, and exit.

+ */

+ if ('\0' != term) {

+ *end = strchr(&cp[i], term);

+ if ('\0' == *end)

+ return(ESCAPE_ERROR);

+ rlim = *end - &cp[i];

+ if (sz)

+ *sz = rlim;

+ (*end)++;

+ goto out;

+ }

+ assert(lim > 0);

+ /*

+ * We have a numeric limit. If the string is shorter than that,

+ * stop and return an error. Else adjust our endpoint, length,

+ * and return the current glyph.

+ */

+ if ((size_t)lim > strlen(&cp[i]))

+ return(ESCAPE_ERROR);

+ rlim = lim;

+ if (sz)

+ *sz = rlim;

+ *end = &cp[i] + lim;

+out:

+ assert(rlim >= 0 && rstart);

+ /* Run post-processors. */

+ switch (gly) {

+ case (ESCAPE_FONT):

+ if (1 != rlim)

break;

- case ('['):

- term = ']';

+ switch (*rstart) {

+ case ('3'):

+ /* FALLTHROUGH */

+ case ('B'):

+ gly = ESCAPE_FONTBOLD;

break;

- default:

- len = 1;

- p--;

+ case ('2'):

+ /* FALLTHROUGH */

+ case ('I'):

+ gly = ESCAPE_FONTITALIC;

+ break;

+ case ('P'):

+ gly = ESCAPE_FONTPREV;

+ break;

+ case ('1'):

+ /* FALLTHROUGH */

+ case ('R'):

+ gly = ESCAPE_FONTROMAN;

break;

}

break;

- case ('('):

- len = 2;

- break;

- case ('['):

- term = ']';

- break;

- case ('z'):

- len = 1;

- if ('\\' == *p) {

- if (0 == (i = mandoc_special(p)))

- return(0);

- p += i;

- return(*p ? (int)(p - sv) : 0);

- }

- break;

- case ('o'):

- /* FALLTHROUGH */

- case ('w'):

- if ('\'' == *p++) {

- term = '\'';

+ case (ESCAPE_SPECIAL):

+ if (1 != rlim)

break;

- }

- /* FALLTHROUGH */

+ if ('c' == *rstart)

+ gly = ESCAPE_NOSPACE;

+ break;

default:

- len = 1;

- p--;

break;

}

- if (term) {

- for ( ; *p && term != *p; p++)

- if (ASCII_HYPH == *p)

- *p = '-';

- return(*p ? (int)(p - sv) : 0);

- }

- for (i = 0; *p && i < len; i++, p++)

- if (ASCII_HYPH == *p)

- *p = '-';

- return(i == len ? (int)(p - sv) : 0);

+ return(gly);

}

void *

mandoc_calloc(size_t num, size_t size)

{

@@ -299,11 +460,11 @@ mandoc_getarg(struct mparse *parse, char **cpp, int ln, int *pos)

/* Quoting can only start with a new word. */

start = *cpp;

+ quoted = 0;

if ('"' == *start) {

quoted = 1;

start++;

- } else

- quoted = 0;

+ }

pairs = 0;

white = 0;

@@ -444,7 +605,7 @@ mandoc_eos(const char *p, size_t sz, int enclosed)

* End-of-sentence recognition must include situations where

* some symbols, such as `)', allow prior EOS punctuation to

- * propogate outward.

+ * propagate outward.

found = 0;

@@ -527,3 +688,35 @@ mandoc_getcontrol(const char *cp, int *ppos)

*ppos = pos;

return(1);

}

+/*

+ * Convert a string to a long that may not be <0.

+ * If the string is invalid, or is less than 0, return -1.

+ */

+int

+mandoc_strntou(const char *p, size_t sz, int base)

+ char buf[32];

+ char *ep;

+ long v;

+ if (sz > 31)

+ return(-1);

+ memcpy(buf, p, sz);

+ buf[(int)sz] = '\0';

+ errno = 0;

+ v = strtol(buf, &ep, base);

+ if (buf[0] == '\0' || *ep != '\0')

+ return(-1);

+ if ((errno == ERANGE &&

+ (v == LONG_MAX || v == LONG_MIN)) ||

+ (v > INT_MAX || v < 0))

+ return(-1);

+ return((int)v);

diff --git a/usr.bin/mandoc/mandoc.h b/usr.bin/mandoc/mandoc.h
index 70999ca7149..39c4b2e0a2c 100644
--- a/usr.bin/mandoc/mandoc.h
+++ b/usr.bin/mandoc/mandoc.h

@@ -1,4 +1,4 @@

-/* $Id: mandoc.h,v 1.37 2011/04/24 16:22:02 schwarze Exp $ */

+/* $Id: mandoc.h,v 1.38 2011/05/29 21:22:18 schwarze Exp $ */

@@ -288,10 +288,25 @@ enum mparset {

MPARSE_MAN /* assume -man */

};

+enum mandoc_esc {

+ ESCAPE_ERROR = 0, /* bail! unparsable escape */

+ ESCAPE_IGNORE, /* escape to be ignored */

+ ESCAPE_SPECIAL, /* a regular special character */

+ ESCAPE_FONT, /* a generic font mode */

+ ESCAPE_FONTBOLD, /* bold font mode */

+ ESCAPE_FONTITALIC, /* italic font mode */

+ ESCAPE_FONTROMAN, /* roman font mode */

+ ESCAPE_FONTPREV, /* previous font mode */

+ ESCAPE_NUMBERED, /* a numbered glyph */

+ ESCAPE_UNICODE, /* a unicode codepoint */

+ ESCAPE_NOSPACE /* suppress space if the last on a line */

+};

typedef void (*mandocmsg)(enum mandocerr, enum mandoclevel,

const char *, int, int, const char *);

struct mparse;

+struct mchars;

struct mdoc;

struct man;

@@ -310,6 +325,16 @@ void *mandoc_calloc(size_t, size_t);

void *mandoc_malloc(size_t);

void *mandoc_realloc(void *, size_t);

+enum mandoc_esc mandoc_escape(const char **, const char **, int *);

+struct mchars *mchars_alloc(void);

+char mchars_num2char(const char *, size_t);

+int mchars_num2uc(const char *, size_t);

+const char *mchars_spec2str(struct mchars *, const char *, size_t, size_t *);

+int mchars_spec2cp(struct mchars *, const char *, size_t);

+void mchars_free(struct mchars *);

__END_DECLS

#endif /*!MANDOC_H*/

diff --git a/usr.bin/mandoc/mdoc_argv.c b/usr.bin/mandoc/mdoc_argv.c
index c35fcf2517c..5bc1386f021 100644
--- a/usr.bin/mandoc/mdoc_argv.c
+++ b/usr.bin/mandoc/mdoc_argv.c

@@ -1,4 +1,4 @@

-/* $Id: mdoc_argv.c,v 1.37 2011/04/24 16:22:02 schwarze Exp $ */

+/* $Id: mdoc_argv.c,v 1.38 2011/05/29 21:22:18 schwarze Exp $ */

@@ -28,11 +28,25 @@

#include "libmandoc.h"

#define MULTI_STEP 5 /* pre-allocate argument values */

+#define DELIMSZ 6 /* max possible size of a delimiter */

+enum argsflag {

+ ARGSFL_NONE = 0,

+ ARGSFL_DELIM, /* handle delimiters of [[::delim::][ ]+]+ */

+ ARGSFL_TABSEP /* handle tab/`Ta' separated phrases */

+};

+enum argvflag {

+ ARGV_NONE, /* no args to flag (e.g., -split) */

+ ARGV_SINGLE, /* one arg to flag (e.g., -file xxx) */

+ ARGV_MULTI, /* multiple args (e.g., -column xxx yyy) */

+ ARGV_OPT_SINGLE /* optional arg (e.g., -offset [xxx]) */

+};

static enum mdocargt argv_a2arg(enum mdoct, const char *);

static enum margserr args(struct mdoc *, int, int *,

- char *, int, char **);

-static int args_checkpunct(const char *);

+ char *, enum argsflag, char **);

+static int args_checkpunct(const char *, int);

static int argv(struct mdoc *, int,

struct mdoc_argv *, int *, char *);

static int argv_single(struct mdoc *, int,

@@ -43,13 +57,6 @@ static int argv_multi(struct mdoc *, int,

struct mdoc_argv *, int *, char *);

static void argn_free(struct mdoc_arg *, int);

-enum argvflag {

- ARGV_NONE, /* no args to flag (e.g., -split) */

- ARGV_SINGLE, /* one arg to flag (e.g., -file xxx) */

- ARGV_MULTI, /* multiple args (e.g., -column xxx yyy) */

- ARGV_OPT_SINGLE /* optional arg (e.g., -offset [xxx]) */

-};

static const enum argvflag argvflags[MDOC_ARG_MAX] = {

ARGV_NONE, /* MDOC_Split */

ARGV_NONE, /* MDOC_Nosplit */

@@ -79,129 +86,129 @@ static const enum argvflag argvflags[MDOC_ARG_MAX] = {

ARGV_NONE /* MDOC_Symbolic */

};

-static const int argflags[MDOC_MAX] = {

- 0, /* Ap */

- 0, /* Dd */

- 0, /* Dt */

- 0, /* Os */

- 0, /* Sh */

- 0, /* Ss */

- 0, /* Pp */

- ARGS_DELIM, /* D1 */

- ARGS_DELIM, /* Dl */

- 0, /* Bd */

- 0, /* Ed */

- 0, /* Bl */

- 0, /* El */

- 0, /* It */

- ARGS_DELIM, /* Ad */

- ARGS_DELIM, /* An */

- ARGS_DELIM, /* Ar */

- 0, /* Cd */

- ARGS_DELIM, /* Cm */

- ARGS_DELIM, /* Dv */

- ARGS_DELIM, /* Er */

- ARGS_DELIM, /* Ev */

- 0, /* Ex */

- ARGS_DELIM, /* Fa */

- 0, /* Fd */

- ARGS_DELIM, /* Fl */

- ARGS_DELIM, /* Fn */

- ARGS_DELIM, /* Ft */

- ARGS_DELIM, /* Ic */

- 0, /* In */

- ARGS_DELIM, /* Li */

- 0, /* Nd */

- ARGS_DELIM, /* Nm */

- ARGS_DELIM, /* Op */

- 0, /* Ot */

- ARGS_DELIM, /* Pa */

- 0, /* Rv */

- ARGS_DELIM, /* St */

- ARGS_DELIM, /* Va */

- ARGS_DELIM, /* Vt */

- ARGS_DELIM, /* Xr */

- 0, /* %A */

- 0, /* %B */

- 0, /* %D */

- 0, /* %I */

- 0, /* %J */

- 0, /* %N */

- 0, /* %O */

- 0, /* %P */

- 0, /* %R */

- 0, /* %T */

- 0, /* %V */

- ARGS_DELIM, /* Ac */

- 0, /* Ao */

- ARGS_DELIM, /* Aq */

- ARGS_DELIM, /* At */

- ARGS_DELIM, /* Bc */

- 0, /* Bf */

- 0, /* Bo */

- ARGS_DELIM, /* Bq */

- ARGS_DELIM, /* Bsx */

- ARGS_DELIM, /* Bx */

- 0, /* Db */

- ARGS_DELIM, /* Dc */

- 0, /* Do */

- ARGS_DELIM, /* Dq */

- ARGS_DELIM, /* Ec */

- 0, /* Ef */

- ARGS_DELIM, /* Em */

- 0, /* Eo */

- ARGS_DELIM, /* Fx */

- ARGS_DELIM, /* Ms */

- ARGS_DELIM, /* No */

- ARGS_DELIM, /* Ns */

- ARGS_DELIM, /* Nx */

- ARGS_DELIM, /* Ox */

- ARGS_DELIM, /* Pc */

- ARGS_DELIM, /* Pf */

- 0, /* Po */

- ARGS_DELIM, /* Pq */

- ARGS_DELIM, /* Qc */

- ARGS_DELIM, /* Ql */

- 0, /* Qo */

- ARGS_DELIM, /* Qq */

- 0, /* Re */

- 0, /* Rs */

- ARGS_DELIM, /* Sc */

- 0, /* So */

- ARGS_DELIM, /* Sq */

- 0, /* Sm */

- ARGS_DELIM, /* Sx */

- ARGS_DELIM, /* Sy */

- ARGS_DELIM, /* Tn */

- ARGS_DELIM, /* Ux */

- ARGS_DELIM, /* Xc */

- 0, /* Xo */

- 0, /* Fo */

- 0, /* Fc */

- 0, /* Oo */

- ARGS_DELIM, /* Oc */

- 0, /* Bk */

- 0, /* Ek */

- 0, /* Bt */

- 0, /* Hf */

- 0, /* Fr */

- 0, /* Ud */

- 0, /* Lb */

- 0, /* Lp */

- ARGS_DELIM, /* Lk */

- ARGS_DELIM, /* Mt */

- ARGS_DELIM, /* Brq */

- 0, /* Bro */

- ARGS_DELIM, /* Brc */

- 0, /* %C */

- 0, /* Es */

- 0, /* En */

- 0, /* Dx */

- 0, /* %Q */

- 0, /* br */

- 0, /* sp */

- 0, /* %U */

- 0, /* Ta */

+static const enum argsflag argflags[MDOC_MAX] = {

+ ARGSFL_NONE, /* Ap */

+ ARGSFL_NONE, /* Dd */

+ ARGSFL_NONE, /* Dt */

+ ARGSFL_NONE, /* Os */

+ ARGSFL_NONE, /* Sh */

+ ARGSFL_NONE, /* Ss */

+ ARGSFL_NONE, /* Pp */

+ ARGSFL_DELIM, /* D1 */

+ ARGSFL_DELIM, /* Dl */

+ ARGSFL_NONE, /* Bd */

+ ARGSFL_NONE, /* Ed */

+ ARGSFL_NONE, /* Bl */

+ ARGSFL_NONE, /* El */

+ ARGSFL_NONE, /* It */

+ ARGSFL_DELIM, /* Ad */

+ ARGSFL_DELIM, /* An */

+ ARGSFL_DELIM, /* Ar */

+ ARGSFL_NONE, /* Cd */

+ ARGSFL_DELIM, /* Cm */

+ ARGSFL_DELIM, /* Dv */

+ ARGSFL_DELIM, /* Er */

+ ARGSFL_DELIM, /* Ev */

+ ARGSFL_NONE, /* Ex */

+ ARGSFL_DELIM, /* Fa */

+ ARGSFL_NONE, /* Fd */

+ ARGSFL_DELIM, /* Fl */

+ ARGSFL_DELIM, /* Fn */

+ ARGSFL_DELIM, /* Ft */

+ ARGSFL_DELIM, /* Ic */

+ ARGSFL_NONE, /* In */

+ ARGSFL_DELIM, /* Li */

+ ARGSFL_NONE, /* Nd */

+ ARGSFL_DELIM, /* Nm */

+ ARGSFL_DELIM, /* Op */

+ ARGSFL_NONE, /* Ot */

+ ARGSFL_DELIM, /* Pa */

+ ARGSFL_NONE, /* Rv */

+ ARGSFL_DELIM, /* St */

+ ARGSFL_DELIM, /* Va */

+ ARGSFL_DELIM, /* Vt */

+ ARGSFL_DELIM, /* Xr */

+ ARGSFL_NONE, /* %A */

+ ARGSFL_NONE, /* %B */

+ ARGSFL_NONE, /* %D */

+ ARGSFL_NONE, /* %I */

+ ARGSFL_NONE, /* %J */

+ ARGSFL_NONE, /* %N */

+ ARGSFL_NONE, /* %O */

+ ARGSFL_NONE, /* %P */

+ ARGSFL_NONE, /* %R */

+ ARGSFL_NONE, /* %T */

+ ARGSFL_NONE, /* %V */

+ ARGSFL_DELIM, /* Ac */

+ ARGSFL_NONE, /* Ao */

+ ARGSFL_DELIM, /* Aq */

+ ARGSFL_DELIM, /* At */

+ ARGSFL_DELIM, /* Bc */

+ ARGSFL_NONE, /* Bf */

+ ARGSFL_NONE, /* Bo */

+ ARGSFL_DELIM, /* Bq */

+ ARGSFL_DELIM, /* Bsx */

+ ARGSFL_DELIM, /* Bx */

+ ARGSFL_NONE, /* Db */

+ ARGSFL_DELIM, /* Dc */

+ ARGSFL_NONE, /* Do */

+ ARGSFL_DELIM, /* Dq */

+ ARGSFL_DELIM, /* Ec */

+ ARGSFL_NONE, /* Ef */

+ ARGSFL_DELIM, /* Em */

+ ARGSFL_NONE, /* Eo */

+ ARGSFL_DELIM, /* Fx */

+ ARGSFL_DELIM, /* Ms */

+ ARGSFL_DELIM, /* No */

+ ARGSFL_DELIM, /* Ns */

+ ARGSFL_DELIM, /* Nx */

+ ARGSFL_DELIM, /* Ox */

+ ARGSFL_DELIM, /* Pc */

+ ARGSFL_DELIM, /* Pf */

+ ARGSFL_NONE, /* Po */

+ ARGSFL_DELIM, /* Pq */

+ ARGSFL_DELIM, /* Qc */

+ ARGSFL_DELIM, /* Ql */

+ ARGSFL_NONE, /* Qo */

+ ARGSFL_DELIM, /* Qq */

+ ARGSFL_NONE, /* Re */

+ ARGSFL_NONE, /* Rs */

+ ARGSFL_DELIM, /* Sc */

+ ARGSFL_NONE, /* So */

+ ARGSFL_DELIM, /* Sq */

+ ARGSFL_NONE, /* Sm */

+ ARGSFL_DELIM, /* Sx */

+ ARGSFL_DELIM, /* Sy */

+ ARGSFL_DELIM, /* Tn */

+ ARGSFL_DELIM, /* Ux */

+ ARGSFL_DELIM, /* Xc */

+ ARGSFL_NONE, /* Xo */

+ ARGSFL_NONE, /* Fo */

+ ARGSFL_NONE, /* Fc */

+ ARGSFL_NONE, /* Oo */

+ ARGSFL_DELIM, /* Oc */

+ ARGSFL_NONE, /* Bk */

+ ARGSFL_NONE, /* Ek */

+ ARGSFL_NONE, /* Bt */

+ ARGSFL_NONE, /* Hf */

+ ARGSFL_NONE, /* Fr */

+ ARGSFL_NONE, /* Ud */

+ ARGSFL_NONE, /* Lb */

+ ARGSFL_NONE, /* Lp */

+ ARGSFL_DELIM, /* Lk */

+ ARGSFL_DELIM, /* Mt */

+ ARGSFL_DELIM, /* Brq */

+ ARGSFL_NONE, /* Bro */

+ ARGSFL_DELIM, /* Brc */

+ ARGSFL_NONE, /* %C */

+ ARGSFL_NONE, /* Es */

+ ARGSFL_NONE, /* En */

+ ARGSFL_NONE, /* Dx */

+ ARGSFL_NONE, /* %Q */

+ ARGSFL_NONE, /* br */

+ ARGSFL_NONE, /* sp */

+ ARGSFL_NONE, /* %U */

+ ARGSFL_NONE, /* Ta */

};

static const enum mdocargt args_Ex[] = {

@@ -371,18 +378,17 @@ argn_free(struct mdoc_arg *p, int iarg)

}

enum margserr

-mdoc_zargs(struct mdoc *m, int line, int *pos,

- char *buf, int flags, char **v)

+mdoc_zargs(struct mdoc *m, int line, int *pos, char *buf, char **v)

{

- return(args(m, line, pos, buf, flags, v));

+ return(args(m, line, pos, buf, ARGSFL_NONE, v));

}

enum margserr

mdoc_args(struct mdoc *m, int line, int *pos,

char *buf, enum mdoct tok, char **v)

{

- int fl;

+ enum argsflag fl;

struct mdoc_node *n;

fl = argflags[tok];

@@ -399,39 +405,21 @@ mdoc_args(struct mdoc *m, int line, int *pos,

for (n = m->last; n; n = n->parent)

if (MDOC_Bl == n->tok)

- break;

- if (n && LIST_column == n->norm->Bl.type) {

- fl |= ARGS_TABSEP;

- fl &= ~ARGS_DELIM;

- }

+ if (LIST_column == n->norm->Bl.type) {

+ fl = ARGSFL_TABSEP;

+ break;

+ }

return(args(m, line, pos, buf, fl, v));

}

static enum margserr

args(struct mdoc *m, int line, int *pos,

- char *buf, int fl, char **v)

+ char *buf, enum argsflag fl, char **v)

{

- int i;

char *p, *pp;

enum margserr rc;

- /*

- * Parse out the terms (like `val' in `.Xx -arg val' or simply

- * `.Xx val'), which can have all sorts of properties:

- *

- * ARGS_DELIM: use special handling if encountering trailing

- * delimiters in the form of [[::delim::][ ]+]+.

- *

- * ARGS_NOWARN: don't post warnings. This is only used when

- * re-parsing delimiters, as the warnings have already been

- * posted.

- *

- * ARGS_TABSEP: use special handling for tab/`Ta' separated

- * phrases like in `Bl -column'.

- */

assert(' ' != buf[*pos]);

if ('\0' == buf[*pos]) {

@@ -451,15 +439,9 @@ args(struct mdoc *m, int line, int *pos,

*v = &buf[*pos];

- if (ARGS_DELIM & fl && args_checkpunct(&buf[*pos])) {

- i = strlen(&buf[*pos]) + *pos;

- if (i && ' ' != buf[i - 1])

+ if (ARGSFL_DELIM == fl)

+ if (args_checkpunct(buf, *pos))

return(ARGS_PUNCT);

- if (ARGS_NOWARN & fl)

- return(ARGS_PUNCT);

- mdoc_pmsg(m, line, *pos, MANDOCERR_EOLNSPACE);

- return(ARGS_PUNCT);

- }

* First handle TABSEP items, restricted to `Bl -column'. This

@@ -468,7 +450,7 @@ args(struct mdoc *m, int line, int *pos,

* for arguments at a later phase.

- if (ARGS_TABSEP & fl) {

+ if (ARGSFL_TABSEP == fl) {

/* Scan ahead to tab (can't be escaped). */

p = strchr(*v, '\t');

pp = NULL;

@@ -507,7 +489,7 @@ args(struct mdoc *m, int line, int *pos,

}

/* Whitespace check for eoln case... */

- if ('\0' == *p && ' ' == *(p - 1) && ! (ARGS_NOWARN & fl))

+ if ('\0' == *p && ' ' == *(p - 1))

mdoc_pmsg(m, line, *pos, MANDOCERR_EOLNSPACE);

*pos += (int)(p - *v);

@@ -550,7 +532,7 @@ args(struct mdoc *m, int line, int *pos,

}

if ('\0' == buf[*pos]) {

- if (ARGS_NOWARN & fl || MDOC_PPHRASE & m->flags)

+ if (MDOC_PPHRASE & m->flags)

return(ARGS_QWORD);

mdoc_pmsg(m, line, *pos, MANDOCERR_BADQUOTE);

return(ARGS_QWORD);

@@ -565,31 +547,14 @@ args(struct mdoc *m, int line, int *pos,

while (' ' == buf[*pos])

(*pos)++;

- if (0 == buf[*pos] && ! (ARGS_NOWARN & fl))

+ if ('\0' == buf[*pos])

mdoc_pmsg(m, line, *pos, MANDOCERR_EOLNSPACE);

return(ARGS_QWORD);

}

- /*

- * A non-quoted term progresses until either the end of line or

- * a non-escaped whitespace.

- */

- for ( ; buf[*pos]; (*pos)++)

- if (*pos && ' ' == buf[*pos] && '\\' != buf[*pos - 1])

- break;

- if ('\0' == buf[*pos])

- return(ARGS_WORD);

- buf[(*pos)++] = '\0';

- while (' ' == buf[*pos])

- (*pos)++;

- if ('\0' == buf[*pos] && ! (ARGS_NOWARN & fl))

- mdoc_pmsg(m, line, *pos, MANDOCERR_EOLNSPACE);

+ p = &buf[*pos];

+ *v = mandoc_getarg(m->parse, &p, line, pos);

return(ARGS_WORD);

}

@@ -601,49 +566,47 @@ args(struct mdoc *m, int line, int *pos,

* whitespace may separate these tokens.

static int

-args_checkpunct(const char *p)

+args_checkpunct(const char *buf, int i)

{

- int i, j;

- char buf[DELIMSZ];

+ int j;

+ char dbuf[DELIMSZ];

enum mdelim d;

- i = 0;

/* First token must be a close-delimiter. */

- for (j = 0; p[i] && ' ' != p[i] && j < DELIMSZ; j++, i++)

- buf[j] = p[i];

+ for (j = 0; buf[i] && ' ' != buf[i] && j < DELIMSZ; j++, i++)

+ dbuf[j] = buf[i];

if (DELIMSZ == j)

return(0);

- buf[j] = '\0';

- if (DELIM_CLOSE != mdoc_isdelim(buf))

+ dbuf[j] = '\0';

+ if (DELIM_CLOSE != mdoc_isdelim(dbuf))

return(0);

- while (' ' == p[i])

+ while (' ' == buf[i])

i++;

/* Remaining must NOT be open/none. */

- while (p[i]) {

+ while (buf[i]) {

j = 0;

- while (p[i] && ' ' != p[i] && j < DELIMSZ)

- buf[j++] = p[i++];

+ while (buf[i] && ' ' != buf[i] && j < DELIMSZ)

+ dbuf[j++] = buf[i++];

if (DELIMSZ == j)

return(0);

- buf[j] = '\0';

- d = mdoc_isdelim(buf);

+ dbuf[j] = '\0';

+ d = mdoc_isdelim(dbuf);

if (DELIM_NONE == d || DELIM_OPEN == d)

return(0);

- while (' ' == p[i])

+ while (' ' == buf[i])

i++;

}

- return('\0' == p[i]);

+ return('\0' == buf[i]);

}

@@ -654,40 +617,40 @@ args_checkpunct(const char *p)

static enum mdocargt

argv_a2arg(enum mdoct tok, const char *p)

{

- const enum mdocargt *args;

+ const enum mdocargt *argsp;

- args = NULL;

+ argsp = NULL;

switch (tok) {

case (MDOC_An):

- args = args_An;

+ argsp = args_An;

break;

case (MDOC_Bd):

- args = args_Bd;

+ argsp = args_Bd;

break;

case (MDOC_Bf):

- args = args_Bf;

+ argsp = args_Bf;

break;

case (MDOC_Bk):

- args = args_Bk;

+ argsp = args_Bk;

break;

case (MDOC_Bl):

- args = args_Bl;

+ argsp = args_Bl;

break;

case (MDOC_Rv):

/* FALLTHROUGH */

case (MDOC_Ex):

- args = args_Ex;

+ argsp = args_Ex;

break;

default:

return(MDOC_ARG_MAX);

}

- assert(args);

+ assert(argsp);

- for ( ; MDOC_ARG_MAX != *args ; args++)

- if (0 == strcmp(p, mdoc_argnames[*args]))

- return(*args);

+ for ( ; MDOC_ARG_MAX != *argsp ; argsp++)

+ if (0 == strcmp(p, mdoc_argnames[*argsp]))

+ return(*argsp);

return(MDOC_ARG_MAX);

}

@@ -702,7 +665,7 @@ argv_multi(struct mdoc *m, int line,

for (v->sz = 0; ; v->sz++) {

if ('-' == buf[*pos])

break;

- ac = args(m, line, pos, buf, 0, &p);

+ ac = args(m, line, pos, buf, ARGSFL_NONE, &p);

if (ARGS_ERROR == ac)

return(0);

else if (ARGS_EOLN == ac)

@@ -728,7 +691,7 @@ argv_opt_single(struct mdoc *m, int line,

if ('-' == buf[*pos])

return(1);

- ac = args(m, line, pos, buf, 0, &p);

+ ac = args(m, line, pos, buf, ARGSFL_NONE, &p);

if (ARGS_ERROR == ac)

return(0);

if (ARGS_EOLN == ac)

@@ -754,7 +717,7 @@ argv_single(struct mdoc *m, int line,

ppos = *pos;

- ac = args(m, line, pos, buf, 0, &p);

+ ac = args(m, line, pos, buf, ARGSFL_NONE, &p);

if (ARGS_EOLN == ac) {

mdoc_pmsg(m, line, ppos, MANDOCERR_SYNTARGVCOUNT);

return(0);

diff --git a/usr.bin/mandoc/mdoc_html.c b/usr.bin/mandoc/mdoc_html.c
index 47112e20804..2bbf5f6fe75 100644
--- a/usr.bin/mandoc/mdoc_html.c
+++ b/usr.bin/mandoc/mdoc_html.c

@@ -1,4 +1,4 @@

-/* $Id: mdoc_html.c,v 1.56 2011/04/24 16:22:02 schwarze Exp $ */

+/* $Id: mdoc_html.c,v 1.57 2011/05/29 21:22:18 schwarze Exp $ */

@@ -284,7 +284,7 @@ a2width(const char *p, struct roffsu *su)

if ( ! a2roffsu(p, su, SCALE_MAX)) {

su->unit = SCALE_BU;

- su->scale = (int)strlen(p);

+ su->scale = html_strlen(p);

}

@@ -351,7 +351,7 @@ a2offs(const char *p, struct roffsu *su)

SCALE_HS_INIT(su, INDENT * 2);

else if ( ! a2roffsu(p, su, SCALE_MAX)) {

su->unit = SCALE_BU;

- su->scale = (int)strlen(p);

+ su->scale = html_strlen(p);

}

@@ -378,13 +378,10 @@ print_mdoc_head(MDOC_ARGS)

print_gen_head(h);

bufinit(h);

- buffmt(h, "%s(%s)", m->title, m->msec);

+ bufcat_fmt(h, "%s(%s)", m->title, m->msec);

- if (m->arch) {

- bufcat(h, " (");

- bufcat(h, m->arch);

- bufcat(h, ")");

- }

+ if (m->arch)

+ bufcat_fmt(h, " (%s)", m->arch);

print_otag(h, TAG_TITLE, 0, NULL);

print_text(h, h->buf);

@@ -411,7 +408,6 @@ print_mdoc_node(MDOC_ARGS)

child = 1;

t = h->tags.head;

- bufinit(h);

switch (n->type) {

case (MDOC_ROOT):

child = mdoc_root_pre(m, n, h);

@@ -480,7 +476,6 @@ print_mdoc_node(MDOC_ARGS)

print_stagq(h, t);

- bufinit(h);

switch (n->type) {

case (MDOC_ROOT):

mdoc_root_post(m, n, h);

@@ -602,7 +597,6 @@ static int

mdoc_sh_pre(MDOC_ARGS)

{

struct htmlpair tag;

- char buf[BUFSIZ];

if (MDOC_BLOCK == n->type) {

PAIR_CLASS_INIT(&tag, "section");

@@ -611,14 +605,14 @@ mdoc_sh_pre(MDOC_ARGS)

} else if (MDOC_BODY == n->type)

return(1);

- buf[0] = '\0';

+ bufinit(h);

for (n = n->child; n; n = n->next) {

- html_idcat(buf, n->string, BUFSIZ);

+ bufcat_id(h, n->string);

if (n->next)

- html_idcat(buf, " ", BUFSIZ);

+ bufcat_id(h, " ");

}

- PAIR_ID_INIT(&tag, buf);

+ PAIR_ID_INIT(&tag, h->buf);

print_otag(h, TAG_H1, 1, &tag);

return(1);

}

@@ -629,7 +623,6 @@ static int

mdoc_ss_pre(MDOC_ARGS)

{

struct htmlpair tag;

- char buf[BUFSIZ];

if (MDOC_BLOCK == n->type) {

PAIR_CLASS_INIT(&tag, "subsection");

@@ -638,14 +631,14 @@ mdoc_ss_pre(MDOC_ARGS)

} else if (MDOC_BODY == n->type)

return(1);

- buf[0] = '\0';

+ bufinit(h);

for (n = n->child; n; n = n->next) {

- html_idcat(buf, n->string, BUFSIZ);

+ bufcat_id(h, n->string);

if (n->next)

- html_idcat(buf, " ", BUFSIZ);

+ bufcat_id(h, " ");

}

- PAIR_ID_INIT(&tag, buf);

+ PAIR_ID_INIT(&tag, h->buf);

print_otag(h, TAG_H2, 1, &tag);

return(1);

}

@@ -699,7 +692,7 @@ mdoc_nm_pre(MDOC_ARGS)

{

struct htmlpair tag;

struct roffsu su;

- size_t len;

+ int len;

switch (n->type) {

case (MDOC_ELEM):

@@ -727,12 +720,13 @@ mdoc_nm_pre(MDOC_ARGS)

for (len = 0, n = n->child; n; n = n->next)

if (MDOC_TEXT == n->type)

- len += strlen(n->string);

+ len += html_strlen(n->string);

if (0 == len && m->name)

- len = strlen(m->name);

+ len = html_strlen(m->name);

SCALE_HS_INIT(&su, (double)len);

+ bufinit(h);

bufcat_su(h, "width", &su);

PAIR_STYLE_INIT(&tag, h);

print_otag(h, TAG_COL, 1, &tag);

@@ -895,6 +889,8 @@ mdoc_it_pre(MDOC_ARGS)

assert(lists[type]);

PAIR_CLASS_INIT(&tag[0], lists[type]);

+ bufinit(h);

if (MDOC_HEAD == n->type) {

switch (type) {

case(LIST_bullet):

@@ -995,6 +991,8 @@ mdoc_bl_pre(MDOC_ARGS)

struct roffsu su;

char buf[BUFSIZ];

+ bufinit(h);

if (MDOC_BODY == n->type) {

if (LIST_column == n->norm->Bl.type)

print_otag(h, TAG_TBODY, 0, NULL);

@@ -1014,7 +1012,6 @@ mdoc_bl_pre(MDOC_ARGS)

for (i = 0; i < (int)n->norm->Bl.ncols; i++) {

a2width(n->norm->Bl.cols[i], &su);

- bufinit(h);

if (i < (int)n->norm->Bl.ncols - 1)

bufcat_su(h, "width", &su);

else

@@ -1143,6 +1140,7 @@ mdoc_d1_pre(MDOC_ARGS)

return(1);

SCALE_VS_INIT(&su, 0);

+ bufinit(h);

bufcat_su(h, "margin-top", &su);

bufcat_su(h, "margin-bottom", &su);

PAIR_STYLE_INIT(&tag[0], h);

@@ -1167,17 +1165,17 @@ static int

mdoc_sx_pre(MDOC_ARGS)

{

struct htmlpair tag[2];

- char buf[BUFSIZ];

- strlcpy(buf, "#", BUFSIZ);

+ bufinit(h);

+ bufcat(h, "#x");

for (n = n->child; n; n = n->next) {

- html_idcat(buf, n->string, BUFSIZ);

+ bufcat_id(h, n->string);

if (n->next)

- html_idcat(buf, " ", BUFSIZ);

+ bufcat_id(h, " ");

}

PAIR_CLASS_INIT(&tag[0], "link-sec");

- PAIR_HREF_INIT(&tag[1], buf);

+ PAIR_HREF_INIT(&tag[1], h->buf);

print_otag(h, TAG_I, 1, tag);

print_otag(h, TAG_A, 2, tag);

@@ -1215,7 +1213,8 @@ mdoc_bd_pre(MDOC_ARGS)

SCALE_HS_INIT(&su, 0);

if (n->norm->Bd.offs)

a2offs(n->norm->Bd.offs, &su);

+ bufinit(h);

bufcat_su(h, "margin-left", &su);

PAIR_STYLE_INIT(&tag[0], h);

@@ -1434,7 +1433,6 @@ mdoc_fd_pre(MDOC_ARGS)

buf[sz - 1] = '\0';

PAIR_CLASS_INIT(&tag[0], "link-includes");

- bufinit(h);

i = 1;

if (h->base_includes) {

@@ -1556,8 +1554,8 @@ mdoc_fn_pre(MDOC_ARGS)

print_text(h, "(");

h->flags |= HTML_NOSPACE;

- bufinit(h);

PAIR_CLASS_INIT(&tag[0], "farg");

+ bufinit(h);

bufcat_style(h, "white-space", "nowrap");

PAIR_STYLE_INIT(&tag[1], h);

@@ -1636,6 +1634,7 @@ mdoc_sp_pre(MDOC_ARGS)

} else

su.scale = 0;

+ bufinit(h);

bufcat_su(h, "height", &su);

PAIR_STYLE_INIT(&tag, h);

print_otag(h, TAG_DIV, 1, &tag);

@@ -1772,10 +1771,8 @@ mdoc_in_pre(MDOC_ARGS)

assert(MDOC_TEXT == n->type);

PAIR_CLASS_INIT(&tag[0], "link-includes");

- bufinit(h);

i = 1;

if (h->base_includes) {

buffmt_includes(h, n->string);

PAIR_HREF_INIT(&tag[i], h->buf);

@@ -1914,6 +1911,7 @@ mdoc_bf_pre(MDOC_ARGS)

* We want this to be inline-formatted, but needs to be div to

* accept block children.

+ bufinit(h);

bufcat_style(h, "display", "inline");

SCALE_HS_INIT(&su, 1);

/* Needs a left-margin for spacing. */

diff --git a/usr.bin/mandoc/mdoc_macro.c b/usr.bin/mandoc/mdoc_macro.c
index e29fb2610d6..bc22ce7330a 100644
--- a/usr.bin/mandoc/mdoc_macro.c
+++ b/usr.bin/mandoc/mdoc_macro.c

@@ -1,4 +1,4 @@

-/* $Id: mdoc_macro.c,v 1.67 2011/04/24 16:49:10 schwarze Exp $ */

+/* $Id: mdoc_macro.c,v 1.68 2011/05/29 21:22:18 schwarze Exp $ */

@@ -615,7 +615,7 @@ append_delims(struct mdoc *m, int line, int *pos, char *buf)

for (;;) {

la = *pos;

- ac = mdoc_zargs(m, line, pos, buf, ARGS_NOWARN, &p);

+ ac = mdoc_zargs(m, line, pos, buf, &p);

if (ARGS_ERROR == ac)

return(0);

@@ -628,12 +628,12 @@ append_delims(struct mdoc *m, int line, int *pos, char *buf)

* If we encounter end-of-sentence symbols, then trigger

* the double-space.

- * XXX: it's easy to allow this to propogate outward to

+ * XXX: it's easy to allow this to propagate outward to

* the last symbol, such that `. )' will cause the

* correct double-spacing. However, (1) groff isn't

* smart enough to do this and (2) it would require

* knowing which symbols break this behaviour, for

- * example, `. ;' shouldn't propogate the double-space.

+ * example, `. ;' shouldn't propagate the double-space.

if (mandoc_eos(p, strlen(p), 0))

m->last->flags |= MDOC_EOS;

@@ -992,7 +992,7 @@ blk_full(MACRO_PROT_ARGS)

}

- * This routine accomodates implicitly- and explicitly-scoped

+ * This routine accommodates implicitly- and explicitly-scoped

* macro openings. Implicit ones first close out prior scope

* (seen above). Delay opening the head until necessary to

* allow leading punctuation to print. Special consideration

@@ -1289,7 +1289,7 @@ blk_part_imp(MACRO_PROT_ARGS)

if (mandoc_eos(n->string, strlen(n->string), 1))

n->flags |= MDOC_EOS;

- /* Up-propogate the end-of-space flag. */

+ /* Up-propagate the end-of-space flag. */

if (n && (MDOC_EOS & n->flags)) {

body->flags |= MDOC_EOS;

@@ -1711,7 +1711,7 @@ phrase(struct mdoc *m, int line, int ppos, char *buf)

for (pos = ppos; ; ) {

la = pos;

- ac = mdoc_zargs(m, line, &pos, buf, 0, &p);

+ ac = mdoc_zargs(m, line, &pos, buf, &p);

if (ARGS_ERROR == ac)

return(0);

@@ -1756,7 +1756,7 @@ phrase_ta(MACRO_PROT_ARGS)

for (;;) {

la = *pos;

- ac = mdoc_zargs(m, line, pos, buf, 0, &p);

+ ac = mdoc_zargs(m, line, pos, buf, &p);

if (ARGS_ERROR == ac)

return(0);

diff --git a/usr.bin/mandoc/mdoc_term.c b/usr.bin/mandoc/mdoc_term.c
index a992ed09cf8..24ca2a3e485 100644
--- a/usr.bin/mandoc/mdoc_term.c
+++ b/usr.bin/mandoc/mdoc_term.c

@@ -1,4 +1,4 @@

-/* $Id: mdoc_term.c,v 1.132 2011/04/24 16:22:02 schwarze Exp $ */

+/* $Id: mdoc_term.c,v 1.133 2011/05/29 21:22:18 schwarze Exp $ */

@@ -260,14 +260,7 @@ terminal_mdoc(void *arg, const struct mdoc *mdoc)

p->tabwidth = term_len(p, 5);

if (NULL == p->symtab)

- switch (p->enc) {

- case (TERMENC_ASCII):

- p->symtab = chars_init(CHARS_ASCII);

- break;

- default:

- abort();

- /* NOTREACHED */

- }

+ p->symtab = mchars_alloc();

n = mdoc_node(mdoc);

m = mdoc_meta(mdoc);

diff --git a/usr.bin/mandoc/mdoc_validate.c b/usr.bin/mandoc/mdoc_validate.c
index b915a49b24a..1c08e614a80 100644
--- a/usr.bin/mandoc/mdoc_validate.c
+++ b/usr.bin/mandoc/mdoc_validate.c

@@ -1,4 +1,4 @@

-/* $Id: mdoc_validate.c,v 1.92 2011/04/24 16:22:02 schwarze Exp $ */

+/* $Id: mdoc_validate.c,v 1.93 2011/05/29 21:22:18 schwarze Exp $ */

@@ -176,7 +176,7 @@ static v_pre pres_sh[] = { pre_sh, NULL };

static v_pre pres_ss[] = { pre_ss, NULL };

static v_pre pres_std[] = { pre_std, NULL };

-static const struct valids mdoc_valids[MDOC_MAX] = {

+static const struct valids mdoc_valids[MDOC_MAX] = {

{ NULL, NULL }, /* Ap */

{ pres_dd, posts_dd }, /* Dd */

{ pres_dt, posts_dt }, /* Dt */

@@ -541,31 +541,39 @@ check_argv(struct mdoc *m, struct mdoc_node *n, struct mdoc_argv *v)

static void

check_text(struct mdoc *m, int ln, int pos, char *p)

{

- int c;

+ char *cpp, *pp;

size_t sz;

- for ( ; *p; p++, pos++) {

+ while ('\0' != *p) {

sz = strcspn(p, "\t\\");

- p += (int)sz;

- if ('\0' == *p)

- break;

+ p += (int)sz;

pos += (int)sz;

if ('\t' == *p) {

if ( ! (MDOC_LITERAL & m->flags))

mdoc_pmsg(m, ln, pos, MANDOCERR_BADTAB);

+ p++;

+ pos++;

continue;

- }

+ } else if ('\0' == *p)

+ break;

+ pos++;

+ pp = ++p;

- if (0 == (c = mandoc_special(p))) {

+ if (ESCAPE_ERROR == mandoc_escape

+ ((const char **)&pp, NULL, NULL)) {

mdoc_pmsg(m, ln, pos, MANDOCERR_BADESCAPE);

- continue;

+ break;

}

- p += c - 1;

- pos += c - 1;

+ cpp = p;

+ while (NULL != (cpp = memchr(cpp, ASCII_HYPH, pp - cpp)))

+ *cpp = '-';

+ pos += pp - p;

+ p = pp;

}

@@ -1523,7 +1531,7 @@ post_bl_head(POST_ARGS)

assert(0 == np->args->argv[j].sz);

- * Accomodate for new-style groff column syntax. Shuffle the

+ * Accommodate for new-style groff column syntax. Shuffle the

* child nodes, all of which must be TEXT, as arguments for the

* column field. Then, delete the head children.

diff --git a/usr.bin/mandoc/out.c b/usr.bin/mandoc/out.c
index 48e3b3c5c8b..eaebdc4a697 100644
--- a/usr.bin/mandoc/out.c
+++ b/usr.bin/mandoc/out.c

@@ -1,4 +1,4 @@

-/* $Id: out.c,v 1.13 2011/04/21 22:59:54 schwarze Exp $ */

+/* $Id: out.c,v 1.14 2011/05/29 21:22:18 schwarze Exp $ */

@@ -170,243 +170,6 @@ time2a(time_t t, char *dst, size_t sz)

(void)strftime(p, sz, "%Y", &tm);

}

-int

-a2roffdeco(enum roffdeco *d, const char **word, size_t *sz)

- int i, j, lim;

- char term, c;

- const char *wp;

- enum roffdeco dd;

- *d = DECO_NONE;

- lim = i = 0;

- term = '\0';

- wp = *word;

- switch ((c = wp[i++])) {

- case ('('):

- *d = DECO_SPECIAL;

- lim = 2;

- break;

- case ('F'):

- /* FALLTHROUGH */

- case ('f'):

- *d = 'F' == c ? DECO_FFONT : DECO_FONT;

- switch (wp[i++]) {

- case ('('):

- lim = 2;

- break;

- case ('['):

- term = ']';

- break;

- case ('3'):

- /* FALLTHROUGH */

- case ('B'):

- *d = DECO_BOLD;

- return(i);

- case ('2'):

- /* FALLTHROUGH */

- case ('I'):

- *d = DECO_ITALIC;

- return(i);

- case ('P'):

- *d = DECO_PREVIOUS;

- return(i);

- case ('1'):

- /* FALLTHROUGH */

- case ('R'):

- *d = DECO_ROMAN;

- return(i);

- default:

- i--;

- lim = 1;

- break;

- }

- break;

- case ('k'):

- /* FALLTHROUGH */

- case ('M'):

- /* FALLTHROUGH */

- case ('m'):

- /* FALLTHROUGH */

- case ('*'):

- if ('*' == c)

- *d = DECO_RESERVED;

- switch (wp[i++]) {

- case ('('):

- lim = 2;

- break;

- case ('['):

- term = ']';

- break;

- default:

- i--;

- lim = 1;

- break;

- }

- break;

- case ('N'):

- /*

- * Sequence of characters: backslash, 'N' (i = 0),

- * starting delimiter (i = 1), character number (i = 2).

- */

- *word = wp + 2;

- *sz = 0;

- /*

- * Cannot use a digit as a starting delimiter;

- * but skip the digit anyway.

- */

- if (isdigit((int)wp[1]))

- return(2);

- /*

- * Any non-digit terminates the character number.

- * That is, the terminating delimiter need not

- * match the starting delimiter.

- */

- for (i = 2; isdigit((int)wp[i]); i++)

- (*sz)++;

- /*

- * This is only a numbered character

- * if the character number has at least one digit.

- */

- if (*sz)

- *d = DECO_NUMBERED;

- /*

- * Skip the terminating delimiter, even if it does not

- * match, and even if there is no character number.

- */

- return(++i);

- case ('h'):

- /* FALLTHROUGH */

- case ('v'):

- /* FALLTHROUGH */

- case ('s'):

- j = 0;

- if ('+' == wp[i] || '-' == wp[i]) {

- i++;

- j = 1;

- }

- switch (wp[i++]) {

- case ('('):

- lim = 2;

- break;

- case ('['):

- term = ']';

- break;

- case ('\''):

- term = '\'';

- break;

- case ('0'):

- j = 1;

- /* FALLTHROUGH */

- default:

- i--;

- lim = 1;

- break;

- }

- if ('+' == wp[i] || '-' == wp[i]) {

- if (j)

- return(i);

- i++;

- }

- /* Handle embedded numerical subexp or escape. */

- if ('(' == wp[i]) {

- while (wp[i] && ')' != wp[i])

- if ('\\' == wp[i++]) {

- /* Handle embedded escape. */

- *word = &wp[i];

- i += a2roffdeco(&dd, word, sz);

- }

- if (')' == wp[i++])

- break;

- *d = DECO_NONE;

- return(i - 1);

- } else if ('\\' == wp[i]) {

- *word = &wp[++i];

- i += a2roffdeco(&dd, word, sz);

- }

- break;

- case ('['):

- *d = DECO_SPECIAL;

- term = ']';

- break;

- case ('c'):

- *d = DECO_NOSPACE;

- return(i);

- case ('z'):

- *d = DECO_NONE;

- if ('\\' == wp[i]) {

- *word = &wp[++i];

- return(i + a2roffdeco(&dd, word, sz));

- } else

- lim = 1;

- break;

- case ('o'):

- /* FALLTHROUGH */

- case ('w'):

- if ('\'' == wp[i++]) {

- term = '\'';

- break;

- }

- /* FALLTHROUGH */

- default:

- *d = DECO_SSPECIAL;

- i--;

- lim = 1;

- break;

- }

- assert(term || lim);

- *word = &wp[i];

- if (term) {

- j = i;

- while (wp[i] && wp[i] != term)

- i++;

- if ('\0' == wp[i]) {

- *d = DECO_NONE;

- return(i);

- }

- assert(i >= j);

- *sz = (size_t)(i - j);

- return(i + 1);

- }

- assert(lim > 0);

- *sz = (size_t)lim;

- for (j = 0; wp[i] && j < lim; j++)

- i++;

- if (j < lim)

- *d = DECO_NONE;

- return(i);

* Calculate the abstract widths and decimal positions of columns in a

* table. This routine allocates the columns structures then runs over

diff --git a/usr.bin/mandoc/out.h b/usr.bin/mandoc/out.h
index 0386a989d71..76f6bafe040 100644
--- a/usr.bin/mandoc/out.h
+++ b/usr.bin/mandoc/out.h

@@ -1,4 +1,4 @@

-/* $Id: out.h,v 1.10 2011/04/24 16:22:02 schwarze Exp $ */

+/* $Id: out.h,v 1.11 2011/05/29 21:22:18 schwarze Exp $ */

@@ -31,27 +31,6 @@ enum roffscale {

SCALE_MAX

};

-enum roffdeco {

- DECO_NONE,

- DECO_NUMBERED, /* numbered character */

- DECO_SPECIAL, /* special character */

- DECO_SSPECIAL, /* single-char special */

- DECO_RESERVED, /* reserved word */

- DECO_BOLD, /* bold font */

- DECO_ITALIC, /* italic font */

- DECO_ROMAN, /* "normal" undecorated font */

- DECO_PREVIOUS, /* revert to previous font */

- DECO_NOSPACE, /* suppress spacing */

- DECO_FONT, /* font */

- DECO_FFONT, /* font family */

- DECO_MAX

-};

-enum chars {

- CHARS_ASCII, /* 7-bit ascii representation */

- CHARS_HTML /* unicode values */

-};

struct roffcol {

size_t width; /* width of cell */

size_t decimal; /* decimal position in cell */

@@ -85,18 +64,9 @@ __BEGIN_DECLS

while (/* CONSTCOND */ 0)

int a2roffsu(const char *, struct roffsu *, enum roffscale);

-int a2roffdeco(enum roffdeco *, const char **, size_t *);

void time2a(time_t, char *, size_t);

void tblcalc(struct rofftbl *tbl, const struct tbl_span *);

-void *chars_init(enum chars);

-const char *chars_num2char(const char *, size_t);

-const char *chars_spec2str(void *, const char *, size_t, size_t *);

-int chars_spec2cp(void *, const char *, size_t);

-const char *chars_res2str(void *, const char *, size_t, size_t *);

-int chars_res2cp(void *, const char *, size_t);

-void chars_free(void *);

__END_DECLS

#endif /*!OUT_H*/

diff --git a/usr.bin/mandoc/predefs.in b/usr.bin/mandoc/predefs.in
new file mode 100644
index 00000000000..6713bff1976
--- /dev/null
+++ b/usr.bin/mandoc/predefs.in

@@ -0,0 +1,65 @@

+/* $Id: predefs.in,v 1.1 2011/05/29 21:22:18 schwarze Exp $ */

+/*

+ *

+ * Permission to use, copy, modify, and distribute this software for any

+ * purpose with or without fee is hereby granted, provided that the above

+ * copyright notice and this permission notice appear in all copies.

+ *

+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES

+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF

+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR

+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES

+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN

+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF

+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.

+ */

+/*

+ * The predefined-string translation tables. Each corresponds to a

+ * predefined strings from (e.g.) tmac/mdoc/doc-nroff. The left-hand

+ * side corresponds to the input sequence (\*x, \*(xx and so on). The

+ * right-hand side is what's produced by libroff.

+ *

+ * XXX - C-escape strings!

+ * XXX - update PREDEF_MAX in roff.c if adding more!

+ */

+PREDEF("Am", "&")

+PREDEF("Ba", "|")

+PREDEF("Ge", "\\(>=")

+PREDEF("Gt", ">")

+PREDEF("If", "\\(if")

+PREDEF("Le", "\\(<=")

+PREDEF("Lq", "\\(lq")

+PREDEF("Lt", "<")

+PREDEF("Na", "NaN")

+PREDEF("Ne", "\\(!=")

+PREDEF("Pi", "\\(*p")

+PREDEF("Pm", "\\(+-")

+PREDEF("Rq", "\\(rq")

+PREDEF("left-bracket", "[")

+PREDEF("left-parenthesis", "(")

+PREDEF("lp", "(")

+PREDEF("left-singlequote", "\\(oq")

+PREDEF("q", "\\(dq")

+PREDEF("quote-left", "\\(oq")

+PREDEF("quote-right", "\\(cq")

+PREDEF("R", "\\(rg")

+PREDEF("right-bracket", "]")

+PREDEF("right-parenthesis", ")")

+PREDEF("rp", ")")

+PREDEF("right-singlequote", "\\(cq")

+PREDEF("Tm", "\\(tm")

+PREDEF("Px", "POSIX")

+PREDEF("Ai", "ANSI")

+PREDEF("\'", "\\\'")

+PREDEF("aa", "\\(aa")

+PREDEF("ga", "\\(ga")

+PREDEF("`", "\\`")

+PREDEF("lq", "\\(lq")

+PREDEF("rq", "\\(rq")

+PREDEF("ua", "\\(ua")

+PREDEF("va", "\\(va")

+PREDEF("<=", "\\(<=")

+PREDEF(">=", "\\(>=")

diff --git a/usr.bin/mandoc/read.c b/usr.bin/mandoc/read.c
index d3b837fe6d7..aa491c9fe34 100644
--- a/usr.bin/mandoc/read.c
+++ b/usr.bin/mandoc/read.c

@@ -1,4 +1,4 @@

-/* $Id: read.c,v 1.1 2011/04/24 16:22:02 schwarze Exp $ */

+/* $Id: read.c,v 1.2 2011/05/29 21:22:18 schwarze Exp $ */

@@ -134,7 +134,7 @@ static const char * const mandocerrs[MANDOCERR_MAX] = {

"tab in non-literal context",

"end of line whitespace",

"bad comment style",

- "unknown escape sequence",

+ "bad escape sequence",

"unterminated quoted string",

"generic error",

@@ -437,7 +437,7 @@ rerun:

* If input parsers have not been allocated, do so now.

- * We keep these instanced betwen parsers, but set them

+ * We keep these instanced between parsers, but set them

* locally per parse routine since we can use different

* parsers with each one.

diff --git a/usr.bin/mandoc/roff.c b/usr.bin/mandoc/roff.c
index 6cf1164ba45..39393ccadf0 100644
--- a/usr.bin/mandoc/roff.c
+++ b/usr.bin/mandoc/roff.c

@@ -1,4 +1,4 @@

-/* $Id: roff.c,v 1.36 2011/04/24 16:28:48 schwarze Exp $ */

+/* $Id: roff.c,v 1.37 2011/05/29 21:22:18 schwarze Exp $ */

@@ -16,17 +16,15 @@

* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.

#include <assert.h>

-#include <errno.h>

#include <ctype.h>

-#include <limits.h>

#include <stdlib.h>

#include <string.h>

-#include <stdio.h>

#include "mandoc.h"

#include "libroff.h"

#include "libmandoc.h"

+/* Maximum number of nested if-else conditionals. */

#define RSTACK_MAX 128

enum rofft {

@@ -59,7 +57,7 @@ enum rofft {

ROFF_EQ,

ROFF_EN,

ROFF_cblock,

- ROFF_ccond, /* FIXME: remove this. */

+ ROFF_ccond,

ROFF_USERDEF,

ROFF_MAX

};

@@ -123,6 +121,14 @@ struct roffmac {

struct roffmac *next;

};

+struct predef {

+ const char *name; /* predefined input name */

+ const char *str; /* replacement symbol */

+};

+#define PREDEF(__name, __str) \

+ { (__name), (__str) },

static enum rofferr roff_block(ROFF_ARGS);

static enum rofferr roff_block_text(ROFF_ARGS);

static enum rofferr roff_block_sub(ROFF_ARGS);

@@ -140,7 +146,7 @@ static const char *roff_getstrn(const struct roff *,

static enum rofferr roff_line_ignore(ROFF_ARGS);

static enum rofferr roff_nr(ROFF_ARGS);

static int roff_res(struct roff *,

- char **, size_t *, int);

+ char **, size_t *, int, int);

static enum rofferr roff_rm(ROFF_ARGS);

static void roff_setstr(struct roff *,

const char *, const char *, int);

@@ -194,6 +200,12 @@ static struct roffmac roffs[ROFF_MAX] = {

{ NULL, roff_userdef, NULL, NULL, 0, NULL },

};

+/* Array of injected predefined strings. */

+#define PREDEFS_MAX 38

+static const struct predef predefs[PREDEFS_MAX] = {

+#include "predefs.in"

+};

static void roff_free1(struct roff *);

static enum rofft roff_hash_find(const char *, size_t);

static void roff_hash_init(void);

@@ -202,7 +214,6 @@ static void roffnode_push(struct roff *, enum rofft,

const char *, int, int);

static void roffnode_pop(struct roff *);

static enum rofft roff_parse(struct roff *, const char *, int *);

-static int roff_parse_nat(const char *, unsigned int *);

/* See roff_hash_find() */

#define ROFF_HASH(p) (p[0] - ASCII_LO)

@@ -228,7 +239,6 @@ roff_hash_init(void)

}

* Look up a roff token by its name. Returns ROFF_MAX if no macro by

* the nil-terminated string name could be found.

@@ -351,6 +361,7 @@ struct roff *

roff_alloc(struct regset *regs, struct mparse *parse)

{

struct roff *r;

+ int i;

r = mandoc_calloc(1, sizeof(struct roff));

r->regs = regs;

@@ -358,6 +369,10 @@ roff_alloc(struct regset *regs, struct mparse *parse)

r->rstackpos = -1;

roff_hash_init();

+ for (i = 0; i < PREDEFS_MAX; i++)

+ roff_setstr(r, predefs[i].name, predefs[i].str, 0);

return(r);

}

@@ -368,7 +383,7 @@ roff_alloc(struct regset *regs, struct mparse *parse)

* is processed.

static int

-roff_res(struct roff *r, char **bufp, size_t *szp, int pos)

+roff_res(struct roff *r, char **bufp, size_t *szp, int ln, int pos)

{

const char *stesc; /* start of an escape sequence ('\\') */

const char *stnam; /* start of the name, after "[(*" */

@@ -435,8 +450,9 @@ roff_res(struct roff *r, char **bufp, size_t *szp, int pos)

res = roff_getstrn(r, stnam, (size_t)i);

if (NULL == res) {

- cp -= maxl ? 1 : 0;

- continue;

+ /* TODO: keep track of the correct position. */

+ mandoc_msg(MANDOCERR_BADESCAPE, r->parse, ln, pos, NULL);

+ res = "";

}

/* Replace the escape sequence by the string. */

@@ -472,7 +488,7 @@ roff_parseln(struct roff *r, int ln, char **bufp,

* words to fill in.

- if (r->first_string && ! roff_res(r, bufp, szp, pos))

+ if (r->first_string && ! roff_res(r, bufp, szp, ln, pos))

return(ROFF_REPARSE);

ppos = pos;

@@ -589,27 +605,6 @@ roff_parse(struct roff *r, const char *buf, int *pos)

return(t);

}

-static int

-roff_parse_nat(const char *buf, unsigned int *res)

- char *ep;

- long lval;

- errno = 0;

- lval = strtol(buf, &ep, 10);

- if (buf[0] == '\0' || *ep != '\0')

- return(0);

- if ((errno == ERANGE &&

- (lval == LONG_MAX || lval == LONG_MIN)) ||

- (lval > INT_MAX || lval < 0))

- return(0);

- *res = (unsigned int)lval;

- return(1);

/* ARGSUSED */

static enum rofferr

roff_cblock(ROFF_ARGS)

@@ -861,21 +856,29 @@ roff_cond_sub(ROFF_ARGS)

{

enum rofft t;

enum roffrule rr;

+ char *ep;

rr = r->last->rule;

+ roffnode_cleanscope(r);

- /*

- * Clean out scope. If we've closed ourselves, then don't

- * continue.

+ /*

+ * If the macro is unknown, first check if it contains a closing

+ * delimiter `\}'. If it does, close out our scope and return

+ * the currently-scoped rule (ignore or continue). Else, drop

+ * into the currently-scoped rule.

- roffnode_cleanscope(r);

if (ROFF_MAX == (t = roff_parse(r, *bufp, &pos))) {

- if ('\\' == (*bufp)[pos] && '}' == (*bufp)[pos + 1])

- return(roff_ccond

- (r, ROFF_ccond, bufp, szp,

- ln, pos, pos + 2, offs));

+ ep = &(*bufp)[pos];

+ for ( ; NULL != (ep = strchr(ep, '\\')); ep++) {

+ ep++;

+ if ('}' != *ep)

+ continue;

+ *ep = '&';

+ roff_ccond(r, ROFF_ccond, bufp, szp,

+ ln, pos, pos + 2, offs);

+ break;

+ }

return(ROFFRULE_DENY == rr ? ROFF_IGN : ROFF_CONT);

}

@@ -884,6 +887,7 @@ roff_cond_sub(ROFF_ARGS)

* if they're either structurally required (such as loops and

* conditionals) or a closing macro.

if (ROFFRULE_DENY == rr)

if ( ! (ROFFMAC_STRUCT & roffs[t].flags))

if (ROFF_ccond != t)

@@ -894,37 +898,28 @@ roff_cond_sub(ROFF_ARGS)

ln, ppos, pos, offs));

}

/* ARGSUSED */

static enum rofferr

roff_cond_text(ROFF_ARGS)

{

- char *ep, *st;

+ char *ep;

enum roffrule rr;

rr = r->last->rule;

+ roffnode_cleanscope(r);

- /*

- * We display the value of the text if out current evaluation

- * scope permits us to do so.

- */

- /* FIXME: use roff_ccond? */

- st = &(*bufp)[pos];

- if (NULL == (ep = strstr(st, "\\}"))) {

- roffnode_cleanscope(r);

- return(ROFFRULE_DENY == rr ? ROFF_IGN : ROFF_CONT);

+ ep = &(*bufp)[pos];

+ for ( ; NULL != (ep = strchr(ep, '\\')); ep++) {

+ ep++;

+ if ('}' != *ep)

+ continue;

+ *ep = '&';

+ roff_ccond(r, ROFF_ccond, bufp, szp,

+ ln, pos, pos + 2, offs);

}

- if (ep == st || (ep > st && '\\' != *(ep - 1)))

- roffnode_pop(r);

- roffnode_cleanscope(r);

return(ROFFRULE_DENY == rr ? ROFF_IGN : ROFF_CONT);

}

static enum roffrule

roff_evalcond(const char *v, int *pos)

{

@@ -1086,6 +1081,7 @@ roff_nr(ROFF_ARGS)

{

const char *key;

char *val;

+ int iv;

struct reg *rg;

val = *bufp + pos;

@@ -1094,8 +1090,10 @@ roff_nr(ROFF_ARGS)

if (0 == strcmp(key, "nS")) {

rg[(int)REG_nS].set = 1;

- if ( ! roff_parse_nat(val, &rg[(int)REG_nS].v.u))

- rg[(int)REG_nS].v.u = 0;

+ if ((iv = mandoc_strntou(val, strlen(val), 10)) >= 0)

+ rg[REG_nS].v.u = (unsigned)iv;

+ else

+ rg[(int)REG_nS].v.u = 0u;

}

return(ROFF_IGN);

diff --git a/usr.bin/mandoc/tbl_layout.c b/usr.bin/mandoc/tbl_layout.c
index 85efa0ee561..2d1989fa4e8 100644
--- a/usr.bin/mandoc/tbl_layout.c
+++ b/usr.bin/mandoc/tbl_layout.c

@@ -1,4 +1,4 @@

-/* $Id: tbl_layout.c,v 1.8 2011/04/24 16:22:02 schwarze Exp $ */

+/* $Id: tbl_layout.c,v 1.9 2011/05/29 21:22:18 schwarze Exp $ */

@@ -68,6 +68,23 @@ mods(struct tbl_node *tbl, struct tbl_cell *cp,

char buf[5];

int i;

+ /* Not all types accept modifiers. */

+ switch (cp->pos) {

+ case (TBL_CELL_DOWN):

+ /* FALLTHROUGH */

+ case (TBL_CELL_HORIZ):

+ /* FALLTHROUGH */

+ case (TBL_CELL_DHORIZ):

+ /* FALLTHROUGH */

+ case (TBL_CELL_VERT):

+ /* FALLTHROUGH */

+ case (TBL_CELL_DVERT):

+ return(1);

+ default:

+ break;

+ }

mod:

* XXX: since, at least for now, modifiers are non-conflicting

@@ -423,19 +440,19 @@ cell_alloc(struct tbl_node *tbl, struct tbl_row *rp, enum tbl_cellt pos)

}

static void

-head_adjust(const struct tbl_cell *cell, struct tbl_head *head)

+head_adjust(const struct tbl_cell *cellp, struct tbl_head *head)

{

- if (TBL_CELL_VERT != cell->pos &&

- TBL_CELL_DVERT != cell->pos) {

+ if (TBL_CELL_VERT != cellp->pos &&

+ TBL_CELL_DVERT != cellp->pos) {

head->pos = TBL_HEAD_DATA;

return;

}

- if (TBL_CELL_VERT == cell->pos)

+ if (TBL_CELL_VERT == cellp->pos)

if (TBL_HEAD_DVERT != head->pos)

head->pos = TBL_HEAD_VERT;

- if (TBL_CELL_DVERT == cell->pos)

+ if (TBL_CELL_DVERT == cellp->pos)

head->pos = TBL_HEAD_DVERT;

}

diff --git a/usr.bin/mandoc/term.c b/usr.bin/mandoc/term.c
index e89f927c9a6..863e0a7a602 100644
--- a/usr.bin/mandoc/term.c
+++ b/usr.bin/mandoc/term.c

@@ -1,6 +1,6 @@

-/* $Id: term.c,v 1.58 2011/04/24 16:22:02 schwarze Exp $ */

+/* $Id: term.c,v 1.59 2011/05/29 21:22:18 schwarze Exp $ */

* Permission to use, copy, modify, and distribute this software for any

@@ -29,13 +29,10 @@

#include "term.h"

#include "main.h"

-static void spec(struct termp *, enum roffdeco,

- const char *, size_t);

-static void res(struct termp *, const char *, size_t);

-static void bufferc(struct termp *, char);

-static void adjbuf(struct termp *p, size_t);

-static void encode(struct termp *, const char *, size_t);

+static void adjbuf(struct termp *p, int);

+static void bufferc(struct termp *, char);

+static void encode(struct termp *, const char *, size_t);

+static void encode1(struct termp *, int);

void

term_free(struct termp *p)

@@ -44,7 +41,7 @@ term_free(struct termp *p)

if (p->buf)

free(p->buf);

if (p->symtab)

- chars_free(p->symtab);

+ mchars_free(p->symtab);

free(p);

}

@@ -69,18 +66,6 @@ term_end(struct termp *p)

(*p->end)(p);

}

-struct termp *

-term_alloc(enum termenc enc)

- struct termp *p;

- p = mandoc_calloc(1, sizeof(struct termp));

- p->enc = enc;

- return(p);

* Flush a line of text. A "line" is loosely defined as being something

* that should be followed by a newline, regardless of whether it's

@@ -152,12 +137,12 @@ term_flushln(struct termp *p)

vis = vend = 0;

i = 0;

- while (i < (int)p->col) {

+ while (i < p->col) {

* Handle literal tab characters: collapse all

* subsequent tabs into a single huge set of spaces.

- while (i < (int)p->col && '\t' == p->buf[i]) {

+ while (i < p->col && '\t' == p->buf[i]) {

vend = (vis / p->tabwidth + 1) * p->tabwidth;

vbl += vend - vis;

vis = vend;

@@ -171,7 +156,7 @@ term_flushln(struct termp *p)

* space is printed according to regular spacing rules).

- for (j = i, jhy = 0; j < (int)p->col; j++) {

+ for (j = i, jhy = 0; j < p->col; j++) {

if ((j && ' ' == p->buf[j]) || '\t' == p->buf[j])

break;

@@ -214,7 +199,7 @@ term_flushln(struct termp *p)

}

/* Write out the [remaining] word. */

- for ( ; i < (int)p->col; i++) {

+ for ( ; i < p->col; i++) {

if (vend > bp && jhy > 0 && i > jhy)

break;

if ('\t' == p->buf[i])

@@ -341,44 +326,6 @@ term_vspace(struct termp *p)

(*p->endline)(p);

}

-static void

-numbered(struct termp *p, const char *word, size_t len)

- const char *rhs;

- rhs = chars_num2char(word, len);

- if (rhs)

- encode(p, rhs, 1);

-static void

-spec(struct termp *p, enum roffdeco d, const char *word, size_t len)

- const char *rhs;

- size_t sz;

- rhs = chars_spec2str(p->symtab, word, len, &sz);

- if (rhs)

- encode(p, rhs, sz);

- else if (DECO_SSPECIAL == d)

- encode(p, word, len);

-static void

-res(struct termp *p, const char *word, size_t len)

- const char *rhs;

- size_t sz;

- rhs = chars_res2str(p->symtab, word, len, &sz);

- if (rhs)

- encode(p, rhs, sz);

void

term_fontlast(struct termp *p)

{

@@ -443,7 +390,6 @@ term_fontpop(struct termp *p)

p->fonti--;

}

* Handle pwords, partial words, which may be either a single word or a

* phrase that cannot be broken down (such as a literal string). This

@@ -452,9 +398,11 @@ term_fontpop(struct termp *p)

void

term_word(struct termp *p, const char *word)

{

- const char *seq;

+ const char *seq, *cp;

+ char c;

+ int sz, uc;

size_t ssz;

- enum roffdeco deco;

+ enum mandoc_esc esc;

if ( ! (TERMP_NOSPACE & p->flags)) {

if ( ! (TERMP_KEEP & p->flags)) {

@@ -474,7 +422,7 @@ term_word(struct termp *p, const char *word)

p->flags &= ~(TERMP_SENTENCE | TERMP_IGNDELIM);

- while (*word) {

+ while ('\0' != *word) {

if ((ssz = strcspn(word, "\\")) > 0)

encode(p, word, ssz);

@@ -482,45 +430,71 @@ term_word(struct termp *p, const char *word)

if ('\\' != *word)

continue;

- seq = ++word;

- word += a2roffdeco(&deco, &seq, &ssz);

+ word++;

+ esc = mandoc_escape(&word, &seq, &sz);

+ if (ESCAPE_ERROR == esc)

+ break;

+ if (TERMENC_ASCII != p->enc)

+ switch (esc) {

+ case (ESCAPE_UNICODE):

+ uc = mchars_num2uc(seq + 1, sz - 1);

+ if ('\0' == uc)

+ break;

+ encode1(p, uc);

+ continue;

+ case (ESCAPE_SPECIAL):

+ uc = mchars_spec2cp(p->symtab, seq, sz);

+ if (uc <= 0)

+ break;

+ encode1(p, uc);

+ continue;

+ default:

+ break;

+ }

- switch (deco) {

- case (DECO_NUMBERED):

- numbered(p, seq, ssz);

+ switch (esc) {

+ case (ESCAPE_UNICODE):

+ encode1(p, '?');

break;

- case (DECO_RESERVED):

- res(p, seq, ssz);

+ case (ESCAPE_NUMBERED):

+ c = mchars_num2char(seq, sz);

+ if ('\0' != c)

+ encode(p, &c, 1);

break;

- case (DECO_SPECIAL):

- /* FALLTHROUGH */

- case (DECO_SSPECIAL):

- spec(p, deco, seq, ssz);

+ case (ESCAPE_SPECIAL):

+ cp = mchars_spec2str(p->symtab, seq, sz, &ssz);

+ if (NULL != cp)

+ encode(p, cp, ssz);

+ else if (1 == ssz)

+ encode(p, seq, sz);

break;

- case (DECO_BOLD):

+ case (ESCAPE_FONTBOLD):

term_fontrepl(p, TERMFONT_BOLD);

break;

- case (DECO_ITALIC):

+ case (ESCAPE_FONTITALIC):

term_fontrepl(p, TERMFONT_UNDER);

break;

- case (DECO_ROMAN):

+ case (ESCAPE_FONT):

+ /* FALLTHROUGH */

+ case (ESCAPE_FONTROMAN):

term_fontrepl(p, TERMFONT_NONE);

break;

- case (DECO_PREVIOUS):

+ case (ESCAPE_FONTPREV):

term_fontlast(p);

break;

+ case (ESCAPE_NOSPACE):

+ if ('\0' == *word)

+ p->flags |= TERMP_NOSPACE;

+ break;

default:

break;

}

- if (DECO_NOSPACE == deco && '\0' == *word)

- p->flags |= TERMP_NOSPACE;

}

static void

-adjbuf(struct termp *p, size_t sz)

+adjbuf(struct termp *p, int sz)

{

if (0 == p->maxcols)

@@ -528,10 +502,10 @@ adjbuf(struct termp *p, size_t sz)

while (sz >= p->maxcols)

p->maxcols <<= 2;

- p->buf = mandoc_realloc(p->buf, p->maxcols);

+ p->buf = mandoc_realloc

+ (p->buf, sizeof(int) * (size_t)p->maxcols);

}

static void

bufferc(struct termp *p, char c)

{

@@ -539,15 +513,44 @@ bufferc(struct termp *p, char c)

if (p->col + 1 >= p->maxcols)

adjbuf(p, p->col + 1);

- p->buf[(int)p->col++] = c;

+ p->buf[p->col++] = c;

}

+/*

+ * See encode().

+ * Do this for a single (probably unicode) value.

+ * Does not check for non-decorated glyphs.

+ */

+static void

+encode1(struct termp *p, int c)

+ enum termfont f;

+ if (p->col + 4 >= p->maxcols)

+ adjbuf(p, p->col + 4);

+ f = term_fonttop(p);

+ if (TERMFONT_NONE == f) {

+ p->buf[p->col++] = c;

+ return;

+ } else if (TERMFONT_UNDER == f) {

+ p->buf[p->col++] = '_';

+ } else

+ p->buf[p->col++] = c;

+ p->buf[p->col++] = 8;

+ p->buf[p->col++] = c;

static void

encode(struct termp *p, const char *word, size_t sz)

{

enum termfont f;

- int i;

+ int i, len;

+ /* LINTED */

+ len = sz;

* Encode and buffer a string of characters. If the current

@@ -556,35 +559,34 @@ encode(struct termp *p, const char *word, size_t sz)

if (TERMFONT_NONE == (f = term_fonttop(p))) {

- if (p->col + sz >= p->maxcols)

- adjbuf(p, p->col + sz);

- memcpy(&p->buf[(int)p->col], word, sz);

- p->col += sz;

+ if (p->col + len >= p->maxcols)

+ adjbuf(p, p->col + len);

+ for (i = 0; i < len; i++)

+ p->buf[p->col++] = word[i];

return;

}

/* Pre-buffer, assuming worst-case. */

- if (p->col + 1 + (sz * 3) >= p->maxcols)

- adjbuf(p, p->col + 1 + (sz * 3));

+ if (p->col + 1 + (len * 3) >= p->maxcols)

+ adjbuf(p, p->col + 1 + (len * 3));

- for (i = 0; i < (int)sz; i++) {

- if ( ! isgraph((u_char)word[i])) {

- p->buf[(int)p->col++] = word[i];

+ for (i = 0; i < len; i++) {

+ if ( ! isgraph((unsigned char)word[i])) {

+ p->buf[p->col++] = word[i];

continue;

}

if (TERMFONT_UNDER == f)

- p->buf[(int)p->col++] = '_';

+ p->buf[p->col++] = '_';

else

- p->buf[(int)p->col++] = word[i];

+ p->buf[p->col++] = word[i];

- p->buf[(int)p->col++] = 8;

- p->buf[(int)p->col++] = word[i];

+ p->buf[p->col++] = 8;

+ p->buf[p->col++] = word[i];

}

size_t

term_len(const struct termp *p, size_t sz)

{

@@ -596,59 +598,99 @@ term_len(const struct termp *p, size_t sz)

size_t

term_strlen(const struct termp *p, const char *cp)

{

- size_t sz, ssz, rsz, i;

- enum roffdeco d;

+ size_t sz, rsz, i;

+ int ssz, c;

const char *seq, *rhs;

+ enum mandoc_esc esc;

+ static const char rej[] = { '\\', ASCII_HYPH, ASCII_NBRSP, '\0' };

- for (sz = 0; '\0' != *cp; )

- /*

- * Account for escaped sequences within string length

- * calculations. This follows the logic in term_word()

- * as we must calculate the width of produced strings.

- */

- if ('\\' == *cp) {

- seq = ++cp;

- cp += a2roffdeco(&d, &seq, &ssz);

+ /*

+ * Account for escaped sequences within string length

+ * calculations. This follows the logic in term_word() as we

+ * must calculate the width of produced strings.

+ */

- switch (d) {

- case (DECO_RESERVED):

- rhs = chars_res2str

- (p->symtab, seq, ssz, &rsz);

+ sz = 0;

+ while ('\0' != *cp) {

+ rsz = strcspn(cp, rej);

+ for (i = 0; i < rsz; i++)

+ sz += (*p->width)(p, *cp++);

+ c = 0;

+ switch (*cp) {

+ case ('\\'):

+ cp++;

+ esc = mandoc_escape(&cp, &seq, &ssz);

+ if (ESCAPE_ERROR == esc)

+ return(sz);

+ if (TERMENC_ASCII != p->enc)

+ switch (esc) {

+ case (ESCAPE_UNICODE):

+ c = mchars_num2uc

+ (seq + 1, ssz - 1);

+ if ('\0' == c)

+ break;

+ sz += (*p->width)(p, c);

+ continue;

+ case (ESCAPE_SPECIAL):

+ c = mchars_spec2cp

+ (p->symtab, seq, ssz);

+ if (c <= 0)

+ break;

+ sz += (*p->width)(p, c);

+ continue;

+ default:

+ break;

+ }

+ rhs = NULL;

+ switch (esc) {

+ case (ESCAPE_UNICODE):

+ sz += (*p->width)(p, '?');

break;

- case (DECO_SPECIAL):

- /* FALLTHROUGH */

- case (DECO_SSPECIAL):

- rhs = chars_spec2str

+ case (ESCAPE_NUMBERED):

+ c = mchars_num2char(seq, ssz);

+ if ('\0' != c)

+ sz += (*p->width)(p, c);

+ break;

+ case (ESCAPE_SPECIAL):

+ rhs = mchars_spec2str

(p->symtab, seq, ssz, &rsz);

- /* Allow for one-char escapes. */

- if (DECO_SSPECIAL != d || rhs)

+ if (ssz != 1 || rhs)

break;

rhs = seq;

rsz = ssz;

break;

default:

- rhs = NULL;

break;

}

- if (rhs)

- for (i = 0; i < rsz; i++)

- sz += (*p->width)(p, *rhs++);

- } else if (ASCII_NBRSP == *cp) {

+ if (NULL == rhs)

+ break;

+ for (i = 0; i < rsz; i++)

+ sz += (*p->width)(p, *rhs++);

+ break;

+ case (ASCII_NBRSP):

sz += (*p->width)(p, ' ');

cp++;

- } else if (ASCII_HYPH == *cp) {

+ break;

+ case (ASCII_HYPH):

sz += (*p->width)(p, '-');

cp++;

- } else

- sz += (*p->width)(p, *cp++);

+ break;

+ default:

+ break;

+ }

return(sz);

}

/* ARGSUSED */

size_t

term_vspan(const struct termp *p, const struct roffsu *su)

@@ -685,7 +727,6 @@ term_vspan(const struct termp *p, const struct roffsu *su)

r);

}

size_t

term_hspan(const struct termp *p, const struct roffsu *su)

{

diff --git a/usr.bin/mandoc/term.h b/usr.bin/mandoc/term.h
index 1dfeecf0e1f..30d9b70a9d5 100644
--- a/usr.bin/mandoc/term.h
+++ b/usr.bin/mandoc/term.h

@@ -1,4 +1,4 @@

-/* $Id: term.h,v 1.29 2011/01/09 14:30:48 schwarze Exp $ */

+/* $Id: term.h,v 1.30 2011/05/29 21:22:18 schwarze Exp $ */

@@ -22,7 +22,9 @@ __BEGIN_DECLS

struct termp;

enum termenc {

- TERMENC_ASCII

+ TERMENC_ASCII,

+ TERMENC_LOCALE,

+ TERMENC_UTF8

};

enum termtype {

@@ -42,35 +44,6 @@ enum termfont {

typedef void (*term_margin)(struct termp *, const void *);

-struct termp_ps {

- int flags;

-#define PS_INLINE (1 << 0) /* we're in a word */

-#define PS_MARGINS (1 << 1) /* we're in the margins */

-#define PS_NEWPAGE (1 << 2) /* new page, no words yet */

- size_t pscol; /* visible column (AFM units) */

- size_t psrow; /* visible row (AFM units) */

- char *psmarg; /* margin buf */

- size_t psmargsz; /* margin buf size */

- size_t psmargcur; /* cur index in margin buf */

- char last; /* character buffer */

- enum termfont lastf; /* last set font */

- size_t scale; /* font scaling factor */

- size_t pages; /* number of pages shown */

- size_t lineheight; /* line height (AFM units) */

- size_t top; /* body top (AFM units) */

- size_t bottom; /* body bottom (AFM units) */

- size_t height; /* page height (AFM units */

- size_t width; /* page width (AFM units) */

- size_t left; /* body left (AFM units) */

- size_t header; /* header pos (AFM units) */

- size_t footer; /* footer pos (AFM units) */

- size_t pdfbytes; /* current output byte */

- size_t pdflastpg; /* byte of last page mark */

- size_t pdfbody; /* start of body object */

- size_t *pdfobjs; /* table of object offsets */

- size_t pdfobjsz; /* size of pdfobjs */

-};

struct termp_tbl {

int width; /* width in fixed chars */

int decimal; /* decimal point position */

@@ -82,10 +55,10 @@ struct termp {

size_t defrmargin; /* Right margin of the device. */

size_t rmargin; /* Current right margin. */

size_t maxrmargin; /* Max right margin. */

- size_t maxcols; /* Max size of buf. */

+ int maxcols; /* Max size of buf. */

size_t offset; /* Margin offest. */

size_t tabwidth; /* Distance of tab positions. */

- size_t col; /* Bytes in buf. */

+ int col; /* Bytes in buf. */

size_t viscol; /* Chars on current line. */

int overstep; /* See termp_flushln(). */

int flags;

@@ -103,29 +76,26 @@ struct termp {

#define TERMP_ANPREC (1 << 13) /* See termp_an_pre(). */

#define TERMP_KEEP (1 << 14) /* Keep words together. */

#define TERMP_PREKEEP (1 << 15) /* ...starting with the next one. */

- char *buf; /* Output buffer. */

+ int *buf; /* Output buffer. */

enum termenc enc; /* Type of encoding. */

- void *symtab; /* Encoded-symbol table. */

+ struct mchars *symtab; /* Encoded-symbol table. */

enum termfont fontl; /* Last font set. */

enum termfont fontq[10]; /* Symmetric fonts. */

int fonti; /* Index of font stack. */

term_margin headf; /* invoked to print head */

term_margin footf; /* invoked to print foot */

- void (*letter)(struct termp *, char);

+ void (*letter)(struct termp *, int);

void (*begin)(struct termp *);

void (*end)(struct termp *);

void (*endline)(struct termp *);

void (*advance)(struct termp *, size_t);

- size_t (*width)(const struct termp *, char);

+ size_t (*width)(const struct termp *, int);

double (*hspan)(const struct termp *,

const struct roffsu *);

const void *argf; /* arg for headf/footf */

- union {

- struct termp_ps ps;

- } engine;

+ struct termp_ps *ps;

};

-struct termp *term_alloc(enum termenc);

void term_tbl(struct termp *, const struct tbl_span *);

void term_free(struct termp *);

void term_newln(struct termp *);

diff --git a/usr.bin/mandoc/term_ascii.c b/usr.bin/mandoc/term_ascii.c
index 5462ec5e5f7..7d70dc4a86a 100644
--- a/usr.bin/mandoc/term_ascii.c
+++ b/usr.bin/mandoc/term_ascii.c

@@ -1,6 +1,6 @@

-/* $Id: term_ascii.c,v 1.5 2011/01/31 02:36:55 schwarze Exp $ */

+/* $Id: term_ascii.c,v 1.6 2011/05/29 21:22:18 schwarze Exp $ */

* Permission to use, copy, modify, and distribute this software for any

* purpose with or without fee is hereby granted, provided that the above

@@ -17,47 +17,70 @@

#include <sys/types.h>

#include <assert.h>

+#include <locale.h>

#include <stdint.h>

#include <stdio.h>

#include <stdlib.h>

#include <unistd.h>

+#include <wchar.h>

#include "mandoc.h"

#include "out.h"

#include "term.h"

#include "main.h"

+static struct termp *ascii_init(enum termenc, char *);

static double ascii_hspan(const struct termp *,

const struct roffsu *);

-static size_t ascii_width(const struct termp *, char);

+static size_t ascii_width(const struct termp *, int);

static void ascii_advance(struct termp *, size_t);

static void ascii_begin(struct termp *);

static void ascii_end(struct termp *);

static void ascii_endline(struct termp *);

-static void ascii_letter(struct termp *, char);

+static void ascii_letter(struct termp *, int);

+static void locale_advance(struct termp *, size_t);

+static void locale_endline(struct termp *);

+static void locale_letter(struct termp *, int);

+static size_t locale_width(const struct termp *, int);

-void *

-ascii_alloc(char *outopts)

+static struct termp *

+ascii_init(enum termenc enc, char *outopts)

{

- struct termp *p;

const char *toks[2];

char *v;

+ struct termp *p;

- p = term_alloc(TERMENC_ASCII);

+ p = mandoc_calloc(1, sizeof(struct termp));

+ p->enc = enc;

p->tabwidth = 5;

p->defrmargin = 78;

- p->advance = ascii_advance;

p->begin = ascii_begin;

p->end = ascii_end;

- p->endline = ascii_endline;

p->hspan = ascii_hspan;

- p->letter = ascii_letter;

p->type = TERMTYPE_CHAR;

+ p->enc = TERMENC_ASCII;

+ p->advance = ascii_advance;

+ p->endline = ascii_endline;

+ p->letter = ascii_letter;

p->width = ascii_width;

+ if (TERMENC_ASCII != enc) {

+ v = TERMENC_LOCALE == enc ?

+ setlocale(LC_ALL, "") :

+ setlocale(LC_CTYPE, "UTF-8");

+ if (NULL != v && MB_CUR_MAX > 1) {

+ p->enc = enc;

+ p->advance = locale_advance;

+ p->endline = locale_endline;

+ p->letter = locale_letter;

+ p->width = locale_width;

+ }

toks[0] = "width";

toks[1] = NULL;

@@ -77,16 +100,36 @@ ascii_alloc(char *outopts)

return(p);

}

+void *

+ascii_alloc(char *outopts)

+ return(ascii_init(TERMENC_ASCII, outopts));

+void *

+utf8_alloc(char *outopts)

+ return(ascii_init(TERMENC_UTF8, outopts));

+void *

+locale_alloc(char *outopts)

+ return(ascii_init(TERMENC_LOCALE, outopts));

/* ARGSUSED */

static size_t

-ascii_width(const struct termp *p, char c)

+ascii_width(const struct termp *p, int c)

{

return(1);

}

void

ascii_free(void *arg)

{

@@ -94,16 +137,14 @@ ascii_free(void *arg)

term_free((struct termp *)arg);

}

/* ARGSUSED */

static void

-ascii_letter(struct termp *p, char c)

+ascii_letter(struct termp *p, int c)

{

putchar(c);

}

static void

ascii_begin(struct termp *p)

{

@@ -111,7 +152,6 @@ ascii_begin(struct termp *p)

(*p->headf)(p, p->argf);

}

static void

ascii_end(struct termp *p)

{

@@ -119,7 +159,6 @@ ascii_end(struct termp *p)

(*p->footf)(p, p->argf);

}

/* ARGSUSED */

static void

ascii_endline(struct termp *p)

@@ -128,19 +167,16 @@ ascii_endline(struct termp *p)

putchar('\n');

}

/* ARGSUSED */

static void

ascii_advance(struct termp *p, size_t len)

{

size_t i;

- /* Just print whitespace on the terminal. */

for (i = 0; i < len; i++)

putchar(' ');

}

/* ARGSUSED */

static double

ascii_hspan(const struct termp *p, const struct roffsu *su)

@@ -179,3 +215,37 @@ ascii_hspan(const struct termp *p, const struct roffsu *su)

return(r);

}

+/* ARGSUSED */

+static size_t

+locale_width(const struct termp *p, int c)

+ int rc;

+ return((rc = wcwidth(c)) < 0 ? 0 : rc);

+/* ARGSUSED */

+static void

+locale_advance(struct termp *p, size_t len)

+ size_t i;

+ for (i = 0; i < len; i++)

+ putwchar(L' ');

+/* ARGSUSED */

+static void

+locale_endline(struct termp *p)

+ putwchar(L'\n');

+/* ARGSUSED */

+static void

+locale_letter(struct termp *p, int c)

+ putwchar(c);

diff --git a/usr.bin/mandoc/term_ps.c b/usr.bin/mandoc/term_ps.c
index 512c602ffe0..761dc1b0423 100644
--- a/usr.bin/mandoc/term_ps.c
+++ b/usr.bin/mandoc/term_ps.c

@@ -1,6 +1,6 @@

-/* $Id: term_ps.c,v 1.16 2011/04/21 22:59:54 schwarze Exp $ */

+/* $Id: term_ps.c,v 1.17 2011/05/29 21:22:18 schwarze Exp $ */

* Permission to use, copy, modify, and distribute this software for any

* purpose with or without fee is hereby granted, provided that the above

@@ -34,13 +34,16 @@

#include "main.h"

#include "term.h"

+/* These work the buffer used by the header and footer. */

+#define PS_BUFSLOP 128

/* Convert PostScript point "x" to an AFM unit. */

#define PNT2AFM(p, x) /* LINTED */ \

- (size_t)((double)(x) * (1000.0 / (double)(p)->engine.ps.scale))

+ (size_t)((double)(x) * (1000.0 / (double)(p)->ps->scale))

/* Convert an AFM unit "x" to a PostScript points */

#define AFM2PNT(p, x) /* LINTED */ \

- ((double)(x) / (1000.0 / (double)(p)->engine.ps.scale))

+ ((double)(x) / (1000.0 / (double)(p)->ps->scale))

struct glyph {

unsigned short wx; /* WX in AFM */

@@ -52,6 +55,54 @@ struct font {

struct glyph gly[MAXCHAR]; /* glyph metrics */

};

+struct termp_ps {

+ int flags;

+#define PS_INLINE (1 << 0) /* we're in a word */

+#define PS_MARGINS (1 << 1) /* we're in the margins */

+#define PS_NEWPAGE (1 << 2) /* new page, no words yet */

+ size_t pscol; /* visible column (AFM units) */

+ size_t psrow; /* visible row (AFM units) */

+ char *psmarg; /* margin buf */

+ size_t psmargsz; /* margin buf size */

+ size_t psmargcur; /* cur index in margin buf */

+ char last; /* character buffer */

+ enum termfont lastf; /* last set font */

+ size_t scale; /* font scaling factor */

+ size_t pages; /* number of pages shown */

+ size_t lineheight; /* line height (AFM units) */

+ size_t top; /* body top (AFM units) */

+ size_t bottom; /* body bottom (AFM units) */

+ size_t height; /* page height (AFM units */

+ size_t width; /* page width (AFM units) */

+ size_t left; /* body left (AFM units) */

+ size_t header; /* header pos (AFM units) */

+ size_t footer; /* footer pos (AFM units) */

+ size_t pdfbytes; /* current output byte */

+ size_t pdflastpg; /* byte of last page mark */

+ size_t pdfbody; /* start of body object */

+ size_t *pdfobjs; /* table of object offsets */

+ size_t pdfobjsz; /* size of pdfobjs */

+};

+static double ps_hspan(const struct termp *,

+ const struct roffsu *);

+static size_t ps_width(const struct termp *, int);

+static void ps_advance(struct termp *, size_t);

+static void ps_begin(struct termp *);

+static void ps_closepage(struct termp *);

+static void ps_end(struct termp *);

+static void ps_endline(struct termp *);

+static void ps_fclose(struct termp *);

+static void ps_growbuf(struct termp *, size_t);

+static void ps_letter(struct termp *, int);

+static void ps_pclose(struct termp *);

+static void ps_pletter(struct termp *, int);

+static void ps_printf(struct termp *, const char *, ...);

+static void ps_putchar(struct termp *, char);

+static void ps_setfont(struct termp *, enum termfont);

+static struct termp *pspdf_alloc(char *);

+static void pdf_obj(struct termp *, size_t);

* We define, for the time being, three fonts: bold, oblique/italic, and

* normal (roman). The following table hard-codes the font metrics for

@@ -352,44 +403,6 @@ static const struct font fonts[TERMFONT__MAX] = {

} },

};

-/* These work the buffer used by the header and footer. */

-#define PS_BUFSLOP 128

-static void

-ps_growbuf(struct termp *p, size_t sz)

- if (p->engine.ps.psmargcur + sz <= p->engine.ps.psmargsz)

- return;

- if (sz < PS_BUFSLOP)

- sz = PS_BUFSLOP;

- p->engine.ps.psmargsz += sz;

- p->engine.ps.psmarg = mandoc_realloc

- (p->engine.ps.psmarg,

- p->engine.ps.psmargsz);

-static double ps_hspan(const struct termp *,

- const struct roffsu *);

-static size_t ps_width(const struct termp *, char);

-static void ps_advance(struct termp *, size_t);

-static void ps_begin(struct termp *);

-static void ps_closepage(struct termp *);

-static void ps_end(struct termp *);

-static void ps_endline(struct termp *);

-static void ps_fclose(struct termp *);

-static void ps_letter(struct termp *, char);

-static void ps_pclose(struct termp *);

-static void ps_pletter(struct termp *, int);

-static void ps_printf(struct termp *, const char *, ...);

-static void ps_putchar(struct termp *, char);

-static void ps_setfont(struct termp *, enum termfont);

-static struct termp *pspdf_alloc(char *);

-static void pdf_obj(struct termp *, size_t);

void *

pdf_alloc(char *outopts)

{

@@ -401,7 +414,6 @@ pdf_alloc(char *outopts)

return(p);

}

void *

ps_alloc(char *outopts)

{

@@ -413,7 +425,6 @@ ps_alloc(char *outopts)

return(p);

}

static struct termp *

pspdf_alloc(char *outopts)

{

@@ -423,7 +434,9 @@ pspdf_alloc(char *outopts)

const char *pp;

char *v;

- p = term_alloc(TERMENC_ASCII);

+ p = mandoc_calloc(1, sizeof(struct termp));

+ p->enc = TERMENC_ASCII;

+ p->ps = mandoc_calloc(1, sizeof(struct termp_ps));

p->advance = ps_advance;

p->begin = ps_begin;

@@ -482,7 +495,7 @@ pspdf_alloc(char *outopts)

* calculations occur.

- p->engine.ps.scale = 11;

+ p->ps->scale = 11;

/* Remember millimetres -> AFM units. */

@@ -498,16 +511,16 @@ pspdf_alloc(char *outopts)

/* Line-height is 1.4em. */

- lineheight = PNT2AFM(p, ((double)p->engine.ps.scale * 1.4));

+ lineheight = PNT2AFM(p, ((double)p->ps->scale * 1.4));

- p->engine.ps.width = pagex;

- p->engine.ps.height = pagey;

- p->engine.ps.header = pagey - (marginy / 2) - (lineheight / 2);

- p->engine.ps.top = pagey - marginy;

- p->engine.ps.footer = (marginy / 2) - (lineheight / 2);

- p->engine.ps.bottom = marginy;

- p->engine.ps.left = marginx;

- p->engine.ps.lineheight = lineheight;

+ p->ps->width = pagex;

+ p->ps->height = pagey;

+ p->ps->header = pagey - (marginy / 2) - (lineheight / 2);

+ p->ps->top = pagey - marginy;

+ p->ps->footer = (marginy / 2) - (lineheight / 2);

+ p->ps->bottom = marginy;

+ p->ps->left = marginx;

+ p->ps->lineheight = lineheight;

p->defrmargin = pagex - (marginx * 2);

return(p);

@@ -521,11 +534,12 @@ pspdf_free(void *arg)

p = (struct termp *)arg;

- if (p->engine.ps.psmarg)

- free(p->engine.ps.psmarg);

- if (p->engine.ps.pdfobjs)

- free(p->engine.ps.pdfobjs);

+ if (p->ps->psmarg)

+ free(p->ps->psmarg);

+ if (p->ps->pdfobjs)

+ free(p->ps->pdfobjs);

+ free(p->ps);

term_free(p);

}

@@ -544,10 +558,10 @@ ps_printf(struct termp *p, const char *fmt, ...)

* into our growable margin buffer.

- if ( ! (PS_MARGINS & p->engine.ps.flags)) {

+ if ( ! (PS_MARGINS & p->ps->flags)) {

len = vprintf(fmt, ap);

va_end(ap);

- p->engine.ps.pdfbytes += /* LINTED */

+ p->ps->pdfbytes += /* LINTED */

len < 0 ? 0 : (size_t)len;

return;

}

@@ -560,12 +574,12 @@ ps_printf(struct termp *p, const char *fmt, ...)

ps_growbuf(p, PS_BUFSLOP);

- pos = (int)p->engine.ps.psmargcur;

- len = vsnprintf(&p->engine.ps.psmarg[pos], PS_BUFSLOP, fmt, ap);

+ pos = (int)p->ps->psmargcur;

+ len = vsnprintf(&p->ps->psmarg[pos], PS_BUFSLOP, fmt, ap);

va_end(ap);

- p->engine.ps.psmargcur = strlen(p->engine.ps.psmarg);

+ p->ps->psmargcur = strlen(p->ps->psmarg);

}

@@ -576,18 +590,18 @@ ps_putchar(struct termp *p, char c)

/* See ps_printf(). */

- if ( ! (PS_MARGINS & p->engine.ps.flags)) {

+ if ( ! (PS_MARGINS & p->ps->flags)) {

/* LINTED */

putchar(c);

- p->engine.ps.pdfbytes++;

+ p->ps->pdfbytes++;

return;

}

ps_growbuf(p, 2);

- pos = (int)p->engine.ps.psmargcur++;

- p->engine.ps.psmarg[pos++] = c;

- p->engine.ps.psmarg[pos] = '\0';

+ pos = (int)p->ps->psmargcur++;

+ p->ps->psmarg[pos++] = c;

+ p->ps->psmarg[pos] = '\0';

}

@@ -597,18 +611,18 @@ pdf_obj(struct termp *p, size_t obj)

assert(obj > 0);

- if ((obj - 1) >= p->engine.ps.pdfobjsz) {

- p->engine.ps.pdfobjsz = obj + 128;

- p->engine.ps.pdfobjs = realloc

- (p->engine.ps.pdfobjs,

- p->engine.ps.pdfobjsz * sizeof(size_t));

- if (NULL == p->engine.ps.pdfobjs) {

+ if ((obj - 1) >= p->ps->pdfobjsz) {

+ p->ps->pdfobjsz = obj + 128;

+ p->ps->pdfobjs = realloc

+ (p->ps->pdfobjs,

+ p->ps->pdfobjsz * sizeof(size_t));

+ if (NULL == p->ps->pdfobjs) {

perror(NULL);

exit((int)MANDOCLEVEL_SYSERR);

}

- p->engine.ps.pdfobjs[(int)obj - 1] = p->engine.ps.pdfbytes;

+ p->ps->pdfobjs[(int)obj - 1] = p->ps->pdfbytes;

ps_printf(p, "%zu 0 obj\n", obj);

}

@@ -626,14 +640,14 @@ ps_closepage(struct termp *p)

* for the page contents.

- assert(p->engine.ps.psmarg && p->engine.ps.psmarg[0]);

- ps_printf(p, "%s", p->engine.ps.psmarg);

+ assert(p->ps->psmarg && p->ps->psmarg[0]);

+ ps_printf(p, "%s", p->ps->psmarg);

if (TERMTYPE_PS != p->type) {

ps_printf(p, "ET\n");

- len = p->engine.ps.pdfbytes - p->engine.ps.pdflastpg;

- base = p->engine.ps.pages * 4 + p->engine.ps.pdfbody;

+ len = p->ps->pdfbytes - p->ps->pdflastpg;

+ base = p->ps->pages * 4 + p->ps->pdfbody;

ps_printf(p, "endstream\nendobj\n");

@@ -660,10 +674,10 @@ ps_closepage(struct termp *p)

} else

ps_printf(p, "showpage\n");

- p->engine.ps.pages++;

- p->engine.ps.psrow = p->engine.ps.top;

- assert( ! (PS_NEWPAGE & p->engine.ps.flags));

- p->engine.ps.flags |= PS_NEWPAGE;

+ p->ps->pages++;

+ p->ps->psrow = p->ps->top;

+ assert( ! (PS_NEWPAGE & p->ps->flags));

+ p->ps->flags |= PS_NEWPAGE;

}

@@ -679,15 +693,15 @@ ps_end(struct termp *p)

* well as just one.

- if ( ! (PS_NEWPAGE & p->engine.ps.flags)) {

- assert(0 == p->engine.ps.flags);

- assert('\0' == p->engine.ps.last);

+ if ( ! (PS_NEWPAGE & p->ps->flags)) {

+ assert(0 == p->ps->flags);

+ assert('\0' == p->ps->last);

ps_closepage(p);

}

if (TERMTYPE_PS == p->type) {

ps_printf(p, "%%%%Trailer\n");

- ps_printf(p, "%%%%Pages: %zu\n", p->engine.ps.pages);

+ ps_printf(p, "%%%%Pages: %zu\n", p->ps->pages);

ps_printf(p, "%%%%EOF\n");

return;

}

@@ -695,18 +709,18 @@ ps_end(struct termp *p)

pdf_obj(p, 2);

ps_printf(p, "<<\n/Type /Pages\n");

ps_printf(p, "/MediaBox [0 0 %zu %zu]\n",

- (size_t)AFM2PNT(p, p->engine.ps.width),

- (size_t)AFM2PNT(p, p->engine.ps.height));

+ (size_t)AFM2PNT(p, p->ps->width),

+ (size_t)AFM2PNT(p, p->ps->height));

- ps_printf(p, "/Count %zu\n", p->engine.ps.pages);

+ ps_printf(p, "/Count %zu\n", p->ps->pages);

ps_printf(p, "/Kids [");

- for (i = 0; i < p->engine.ps.pages; i++)

+ for (i = 0; i < p->ps->pages; i++)

ps_printf(p, " %zu 0 R", i * 4 +

- p->engine.ps.pdfbody + 3);

+ p->ps->pdfbody + 3);

- base = (p->engine.ps.pages - 1) * 4 +

- p->engine.ps.pdfbody + 4;

+ base = (p->ps->pages - 1) * 4 +

+ p->ps->pdfbody + 4;

ps_printf(p, "]\n>>\nendobj\n");

pdf_obj(p, base);

@@ -714,14 +728,14 @@ ps_end(struct termp *p)

ps_printf(p, "/Type /Catalog\n");

ps_printf(p, "/Pages 2 0 R\n");

ps_printf(p, ">>\n");

- xref = p->engine.ps.pdfbytes;

+ xref = p->ps->pdfbytes;

ps_printf(p, "xref\n");

ps_printf(p, "0 %zu\n", base + 1);

ps_printf(p, "0000000000 65535 f \n");

for (i = 0; i < base; i++)

ps_printf(p, "%.10zu 00000 n \n",

- p->engine.ps.pdfobjs[(int)i]);

+ p->ps->pdfobjs[(int)i]);

ps_printf(p, "trailer\n");

ps_printf(p, "<<\n");

@@ -746,33 +760,33 @@ ps_begin(struct termp *p)

* screen yet, so we don't need to initialise the primary state.

- if (p->engine.ps.psmarg) {

- assert(p->engine.ps.psmargsz);

- p->engine.ps.psmarg[0] = '\0';

+ if (p->ps->psmarg) {

+ assert(p->ps->psmargsz);

+ p->ps->psmarg[0] = '\0';

}

- /*p->engine.ps.pdfbytes = 0;*/

- p->engine.ps.psmargcur = 0;

- p->engine.ps.flags = PS_MARGINS;

- p->engine.ps.pscol = p->engine.ps.left;

- p->engine.ps.psrow = p->engine.ps.header;

+ /*p->ps->pdfbytes = 0;*/

+ p->ps->psmargcur = 0;

+ p->ps->flags = PS_MARGINS;

+ p->ps->pscol = p->ps->left;

+ p->ps->psrow = p->ps->header;

ps_setfont(p, TERMFONT_NONE);

(*p->headf)(p, p->argf);

(*p->endline)(p);

- p->engine.ps.pscol = p->engine.ps.left;

- p->engine.ps.psrow = p->engine.ps.footer;

+ p->ps->pscol = p->ps->left;

+ p->ps->psrow = p->ps->footer;

(*p->footf)(p, p->argf);

(*p->endline)(p);

- p->engine.ps.flags &= ~PS_MARGINS;

+ p->ps->flags &= ~PS_MARGINS;

- assert(0 == p->engine.ps.flags);

- assert(p->engine.ps.psmarg);

- assert('\0' != p->engine.ps.psmarg[0]);

+ assert(0 == p->ps->flags);

+ assert(p->ps->psmarg);

+ assert('\0' != p->ps->psmarg[0]);

* Print header and initialise page state. Following this,

@@ -790,8 +804,8 @@ ps_begin(struct termp *p)

ps_printf(p, "%%%%PageOrder: Ascend\n");

ps_printf(p, "%%%%DocumentMedia: "

"Default %zu %zu 0 () ()\n",

- (size_t)AFM2PNT(p, p->engine.ps.width),

- (size_t)AFM2PNT(p, p->engine.ps.height));

+ (size_t)AFM2PNT(p, p->ps->width),

+ (size_t)AFM2PNT(p, p->ps->height));

ps_printf(p, "%%%%DocumentNeededResources: font");

for (i = 0; i < (int)TERMFONT__MAX; i++)

@@ -816,10 +830,10 @@ ps_begin(struct termp *p)

}

- p->engine.ps.pdfbody = (size_t)TERMFONT__MAX + 3;

- p->engine.ps.pscol = p->engine.ps.left;

- p->engine.ps.psrow = p->engine.ps.top;

- p->engine.ps.flags |= PS_NEWPAGE;

+ p->ps->pdfbody = (size_t)TERMFONT__MAX + 3;

+ p->ps->pscol = p->ps->left;

+ p->ps->psrow = p->ps->top;

+ p->ps->flags |= PS_NEWPAGE;

ps_setfont(p, TERMFONT_NONE);

}

@@ -834,25 +848,25 @@ ps_pletter(struct termp *p, int c)

* in a new page and make sure the font is correctly set.

- if (PS_NEWPAGE & p->engine.ps.flags) {

+ if (PS_NEWPAGE & p->ps->flags) {

if (TERMTYPE_PS == p->type) {

ps_printf(p, "%%%%Page: %zu %zu\n",

- p->engine.ps.pages + 1,

- p->engine.ps.pages + 1);

+ p->ps->pages + 1,

+ p->ps->pages + 1);

ps_printf(p, "/%s %zu selectfont\n",

- fonts[(int)p->engine.ps.lastf].name,

- p->engine.ps.scale);

+ fonts[(int)p->ps->lastf].name,

+ p->ps->scale);

} else {

- pdf_obj(p, p->engine.ps.pdfbody +

- p->engine.ps.pages * 4);

+ pdf_obj(p, p->ps->pdfbody +

+ p->ps->pages * 4);

ps_printf(p, "<<\n");

ps_printf(p, "/Length %zu 0 R\n",

- p->engine.ps.pdfbody + 1 +

- p->engine.ps.pages * 4);

+ p->ps->pdfbody + 1 +

+ p->ps->pages * 4);

ps_printf(p, ">>\nstream\n");

}

- p->engine.ps.pdflastpg = p->engine.ps.pdfbytes;

- p->engine.ps.flags &= ~PS_NEWPAGE;

+ p->ps->pdflastpg = p->ps->pdfbytes;

+ p->ps->flags &= ~PS_NEWPAGE;

}

@@ -860,22 +874,22 @@ ps_pletter(struct termp *p, int c)

* now at the current cursor.

- if ( ! (PS_INLINE & p->engine.ps.flags)) {

+ if ( ! (PS_INLINE & p->ps->flags)) {

if (TERMTYPE_PS != p->type) {

ps_printf(p, "BT\n/F%d %zu Tf\n",

- (int)p->engine.ps.lastf,

- p->engine.ps.scale);

+ (int)p->ps->lastf,

+ p->ps->scale);

ps_printf(p, "%.3f %.3f Td\n(",

- AFM2PNT(p, p->engine.ps.pscol),

- AFM2PNT(p, p->engine.ps.psrow));

+ AFM2PNT(p, p->ps->pscol),

+ AFM2PNT(p, p->ps->psrow));

} else

ps_printf(p, "%.3f %.3f moveto\n(",

- AFM2PNT(p, p->engine.ps.pscol),

- AFM2PNT(p, p->engine.ps.psrow));

- p->engine.ps.flags |= PS_INLINE;

+ AFM2PNT(p, p->ps->pscol),

+ AFM2PNT(p, p->ps->psrow));

+ p->ps->flags |= PS_INLINE;

}

- assert( ! (PS_NEWPAGE & p->engine.ps.flags));

+ assert( ! (PS_NEWPAGE & p->ps->flags));

* We need to escape these characters as per the PostScript

@@ -898,17 +912,17 @@ ps_pletter(struct termp *p, int c)

/* Write the character and adjust where we are on the page. */

- f = (int)p->engine.ps.lastf;

+ f = (int)p->ps->lastf;

if (c <= 32 || (c - 32 >= MAXCHAR)) {

ps_putchar(p, ' ');

- p->engine.ps.pscol += (size_t)fonts[f].gly[0].wx;

+ p->ps->pscol += (size_t)fonts[f].gly[0].wx;

return;

}

ps_putchar(p, (char)c);

c -= 32;

- p->engine.ps.pscol += (size_t)fonts[f].gly[c].wx;

+ p->ps->pscol += (size_t)fonts[f].gly[c].wx;

}

@@ -922,7 +936,7 @@ ps_pclose(struct termp *p)

* or anything).

- if ( ! (PS_INLINE & p->engine.ps.flags))

+ if ( ! (PS_INLINE & p->ps->flags))

return;

if (TERMTYPE_PS != p->type) {

@@ -930,7 +944,7 @@ ps_pclose(struct termp *p)

} else

ps_printf(p, ") show\n");

- p->engine.ps.flags &= ~PS_INLINE;

+ p->ps->flags &= ~PS_INLINE;

}

@@ -946,16 +960,16 @@ ps_fclose(struct termp *p)

* Following this, close out any scope that's open.

- if ('\0' != p->engine.ps.last) {

- if (p->engine.ps.lastf != TERMFONT_NONE) {

+ if ('\0' != p->ps->last) {

+ if (p->ps->lastf != TERMFONT_NONE) {

ps_pclose(p);

ps_setfont(p, TERMFONT_NONE);

}

- ps_pletter(p, p->engine.ps.last);

- p->engine.ps.last = '\0';

+ ps_pletter(p, p->ps->last);

+ p->ps->last = '\0';

}

- if ( ! (PS_INLINE & p->engine.ps.flags))

+ if ( ! (PS_INLINE & p->ps->flags))

return;

ps_pclose(p);

@@ -963,9 +977,12 @@ ps_fclose(struct termp *p)

static void

-ps_letter(struct termp *p, char c)

+ps_letter(struct termp *p, int arg)

{

- char cc;

+ char cc, c;

+ /* LINTED */

+ c = arg >= 128 || arg <= 0 ? '?' : arg;

* State machine dictates whether to buffer the last character

@@ -976,33 +993,33 @@ ps_letter(struct termp *p, char c)

* regular character and a regular buffer character.

- if ('\0' == p->engine.ps.last) {

+ if ('\0' == p->ps->last) {

assert(8 != c);

- p->engine.ps.last = c;

+ p->ps->last = c;

return;

- } else if (8 == p->engine.ps.last) {

+ } else if (8 == p->ps->last) {

assert(8 != c);

- p->engine.ps.last = '\0';

+ p->ps->last = '\0';

} else if (8 == c) {

- assert(8 != p->engine.ps.last);

- if ('_' == p->engine.ps.last) {

- if (p->engine.ps.lastf != TERMFONT_UNDER) {

+ assert(8 != p->ps->last);

+ if ('_' == p->ps->last) {

+ if (p->ps->lastf != TERMFONT_UNDER) {

ps_pclose(p);

ps_setfont(p, TERMFONT_UNDER);

}

- } else if (p->engine.ps.lastf != TERMFONT_BOLD) {

+ } else if (p->ps->lastf != TERMFONT_BOLD) {

ps_pclose(p);

ps_setfont(p, TERMFONT_BOLD);

}

- p->engine.ps.last = c;

+ p->ps->last = c;

return;

} else {

- if (p->engine.ps.lastf != TERMFONT_NONE) {

+ if (p->ps->lastf != TERMFONT_NONE) {

ps_pclose(p);

ps_setfont(p, TERMFONT_NONE);

}

- cc = p->engine.ps.last;

- p->engine.ps.last = c;

+ cc = p->ps->last;

+ p->ps->last = c;

c = cc;

}

@@ -1022,7 +1039,7 @@ ps_advance(struct termp *p, size_t len)

ps_fclose(p);

- p->engine.ps.pscol += len;

+ p->ps->pscol += len;

}

@@ -1040,16 +1057,16 @@ ps_endline(struct termp *p)

* lines, we'll do nasty stuff.

- if (PS_MARGINS & p->engine.ps.flags)

+ if (PS_MARGINS & p->ps->flags)

return;

/* Left-justify. */

- p->engine.ps.pscol = p->engine.ps.left;

+ p->ps->pscol = p->ps->left;

/* If we haven't printed anything, return. */

- if (PS_NEWPAGE & p->engine.ps.flags)

+ if (PS_NEWPAGE & p->ps->flags)

return;

@@ -1057,9 +1074,9 @@ ps_endline(struct termp *p)

* showpage and restart our row.

- if (p->engine.ps.psrow >= p->engine.ps.lineheight +

- p->engine.ps.bottom) {

- p->engine.ps.psrow -= p->engine.ps.lineheight;

+ if (p->ps->psrow >= p->ps->lineheight +

+ p->ps->bottom) {

+ p->ps->psrow -= p->ps->lineheight;

return;

}

@@ -1072,37 +1089,37 @@ ps_setfont(struct termp *p, enum termfont f)

{

assert(f < TERMFONT__MAX);

- p->engine.ps.lastf = f;

+ p->ps->lastf = f;

* If we're still at the top of the page, let the font-setting

* be delayed until we actually have stuff to print.

- if (PS_NEWPAGE & p->engine.ps.flags)

+ if (PS_NEWPAGE & p->ps->flags)

return;

if (TERMTYPE_PS == p->type)

ps_printf(p, "/%s %zu selectfont\n",

fonts[(int)f].name,

- p->engine.ps.scale);

+ p->ps->scale);

else

ps_printf(p, "/F%d %zu Tf\n",

(int)f,

- p->engine.ps.scale);

+ p->ps->scale);

}

/* ARGSUSED */

static size_t

-ps_width(const struct termp *p, char c)

+ps_width(const struct termp *p, int c)

{

if (c <= 32 || c - 32 >= MAXCHAR)

return((size_t)fonts[(int)TERMFONT_NONE].gly[0].wx);

c -= 32;

- return((size_t)fonts[(int)TERMFONT_NONE].gly[(int)c].wx);

+ return((size_t)fonts[(int)TERMFONT_NONE].gly[c].wx);

}

@@ -1141,7 +1158,7 @@ ps_hspan(const struct termp *p, const struct roffsu *su)

fonts[(int)TERMFONT_NONE].gly[110 - 32].wx;

break;

case (SCALE_VS):

- r = su->scale * p->engine.ps.lineheight;

+ r = su->scale * p->ps->lineheight;

break;

default:

r = su->scale;

@@ -1151,3 +1168,18 @@ ps_hspan(const struct termp *p, const struct roffsu *su)

return(r);

}

+static void

+ps_growbuf(struct termp *p, size_t sz)

+ if (p->ps->psmargcur + sz <= p->ps->psmargsz)

+ return;

+ if (sz < PS_BUFSLOP)

+ sz = PS_BUFSLOP;

+ p->ps->psmargsz += sz;

+ p->ps->psmarg = mandoc_realloc

+ (p->ps->psmarg, p->ps->psmargsz);