diff options
author | Ingo Schwarze <schwarze@cvs.openbsd.org> | 2011-05-29 21:22:19 +0000 |
---|---|---|
committer | Ingo Schwarze <schwarze@cvs.openbsd.org> | 2011-05-29 21:22:19 +0000 |
commit | d49b1d8e996d3b7d5b11ff7f6fec1308da0f4d19 (patch) | |
tree | 1b65c2913c52e78683a870fa30aacb6d0da621ec /usr.bin | |
parent | 34e3b2211040149f713e27fa1d0e45aa08dcaa93 (diff) |
Merge release 1.11.3, almost all code by kristaps@:
* Unicode output support (no Unicode input yet, though).
* Refactoring: completely handle predefined strings in roff.c.
- New function mandoc_escape() replaces a2roffdeco() and mandoc_special().
- Start using mandoc_getarg() in mdoc_argv.c.
- Clean up parsing of delimiters in mdoc(7).
* And many minor fixes and lots of cleanup.
Diffstat (limited to 'usr.bin')
30 files changed, 1597 insertions, 1590 deletions
diff --git a/usr.bin/mandoc/Makefile b/usr.bin/mandoc/Makefile index 8e4388b2513..d507dc9af42 100644 --- a/usr.bin/mandoc/Makefile +++ b/usr.bin/mandoc/Makefile @@ -1,8 +1,8 @@ -# $OpenBSD: Makefile,v 1.55 2011/04/24 16:22:02 schwarze Exp $ +# $OpenBSD: Makefile,v 1.56 2011/05/29 21:22:18 schwarze Exp $ .include <bsd.own.mk> -VERSION=1.11.1 +VERSION=1.11.3 CFLAGS+=-DVERSION=\"${VERSION}\" CFLAGS+=-W -Wall -Wstrict-prototypes diff --git a/usr.bin/mandoc/chars.c b/usr.bin/mandoc/chars.c index 0446fa53ea4..7e27a3a8ff5 100644 --- a/usr.bin/mandoc/chars.c +++ b/usr.bin/mandoc/chars.c @@ -1,6 +1,6 @@ -/* $Id: chars.c,v 1.18 2011/04/24 16:22:02 schwarze Exp $ */ +/* $Id: chars.c,v 1.19 2011/05/29 21:22:18 schwarze Exp $ */ /* - * Copyright (c) 2009, 2010 Kristaps Dzonsons <kristaps@bsd.lv> + * Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> * Copyright (c) 2011 Ingo Schwarze <schwarze@openbsd.org> * * Permission to use, copy, modify, and distribute this software for any @@ -16,12 +16,13 @@ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ #include <assert.h> +#include <ctype.h> #include <stdio.h> #include <stdlib.h> #include <string.h> #include "mandoc.h" -#include "out.h" +#include "libmandoc.h" #define PRINT_HI 126 #define PRINT_LO 32 @@ -31,52 +32,37 @@ struct ln { const char *code; const char *ascii; int unicode; - int type; -#define CHARS_CHAR (1 << 0) -#define CHARS_STRING (1 << 1) -#define CHARS_BOTH (CHARS_CHAR | CHARS_STRING) }; -#define LINES_MAX 353 +#define LINES_MAX 325 #define CHAR(in, ch, code) \ - { NULL, (in), (ch), (code), CHARS_CHAR }, -#define STRING(in, ch, code) \ - { NULL, (in), (ch), (code), CHARS_STRING }, -#define BOTH(in, ch, code) \ - { NULL, (in), (ch), (code), CHARS_BOTH }, + { NULL, (in), (ch), (code) }, #define CHAR_TBL_START static struct ln lines[LINES_MAX] = { #define CHAR_TBL_END }; #include "chars.in" -struct ctab { - enum chars type; +struct mchars { struct ln **htab; }; -static inline int match(const struct ln *, - const char *, size_t, int); -static const struct ln *find(struct ctab *, const char *, size_t, int); - +static inline int match(const struct ln *, const char *, size_t); +static const struct ln *find(struct mchars *, const char *, size_t); void -chars_free(void *arg) +mchars_free(struct mchars *arg) { - struct ctab *tab; - - tab = (struct ctab *)arg; - free(tab->htab); - free(tab); + free(arg->htab); + free(arg); } - -void * -chars_init(enum chars type) +struct mchars * +mchars_alloc(void) { - struct ctab *tab; + struct mchars *tab; struct ln **htab; struct ln *pp; int i, hash; @@ -88,7 +74,7 @@ chars_init(enum chars type) * (they're in-line re-ordered during lookup). */ - tab = mandoc_malloc(sizeof(struct ctab)); + tab = mandoc_malloc(sizeof(struct mchars)); htab = mandoc_calloc(PRINT_HI - PRINT_LO + 1, sizeof(struct ln **)); for (i = 0; i < LINES_MAX; i++) { @@ -105,7 +91,6 @@ chars_init(enum chars type) } tab->htab = htab; - tab->type = type; return(tab); } @@ -114,79 +99,57 @@ chars_init(enum chars type) * Special character to Unicode codepoint. */ int -chars_spec2cp(void *arg, const char *p, size_t sz) -{ - const struct ln *ln; - - ln = find((struct ctab *)arg, p, sz, CHARS_CHAR); - if (NULL == ln) - return(-1); - return(ln->unicode); -} - - -/* - * Reserved word to Unicode codepoint. - */ -int -chars_res2cp(void *arg, const char *p, size_t sz) +mchars_spec2cp(struct mchars *arg, const char *p, size_t sz) { const struct ln *ln; - ln = find((struct ctab *)arg, p, sz, CHARS_STRING); + ln = find(arg, p, sz); if (NULL == ln) return(-1); return(ln->unicode); } - /* - * Numbered character to literal character, - * represented as a null-terminated string for additional safety. + * Numbered character string to ASCII codepoint. + * This can only be a printable character (i.e., alnum, punct, space) so + * prevent the character from ruining our state (backspace, newline, and + * so on). + * If the character is illegal, returns '\0'. */ -const char * -chars_num2char(const char *p, size_t sz) +char +mchars_num2char(const char *p, size_t sz) { int i; - static char c[2]; - if (sz > 3) - return(NULL); - i = atoi(p); - if (i < 0 || i > 255) - return(NULL); - c[0] = (char)i; - c[1] = '\0'; - return(c); + if ((i = mandoc_strntou(p, sz, 10)) < 0) + return('\0'); + return(isprint(i) ? i : '\0'); } - -/* - * Special character to string array. +/* + * Hex character string to Unicode codepoint. + * If the character is illegal, returns '\0'. */ -const char * -chars_spec2str(void *arg, const char *p, size_t sz, size_t *rsz) +int +mchars_num2uc(const char *p, size_t sz) { - const struct ln *ln; - - ln = find((struct ctab *)arg, p, sz, CHARS_CHAR); - if (NULL == ln) - return(NULL); + int i; - *rsz = strlen(ln->ascii); - return(ln->ascii); + if ((i = mandoc_strntou(p, sz, 16)) < 0) + return('\0'); + /* FIXME: make sure we're not in a bogus range. */ + return(i > 0x80 && i <= 0x10FFFF ? i : '\0'); } - /* - * Reserved word to string array. + * Special character to string array. */ const char * -chars_res2str(void *arg, const char *p, size_t sz, size_t *rsz) +mchars_spec2str(struct mchars *arg, const char *p, size_t sz, size_t *rsz) { const struct ln *ln; - ln = find((struct ctab *)arg, p, sz, CHARS_STRING); + ln = find(arg, p, sz); if (NULL == ln) return(NULL); @@ -194,9 +157,8 @@ chars_res2str(void *arg, const char *p, size_t sz, size_t *rsz) return(ln->ascii); } - static const struct ln * -find(struct ctab *tab, const char *p, size_t sz, int type) +find(struct mchars *tab, const char *p, size_t sz) { struct ln *pp, *prev; struct ln **htab; @@ -222,7 +184,7 @@ find(struct ctab *tab, const char *p, size_t sz, int type) return(NULL); for (prev = NULL; pp; pp = pp->next) { - if ( ! match(pp, p, sz, type)) { + if ( ! match(pp, p, sz)) { prev = pp; continue; } @@ -239,13 +201,10 @@ find(struct ctab *tab, const char *p, size_t sz, int type) return(NULL); } - static inline int -match(const struct ln *ln, const char *p, size_t sz, int type) +match(const struct ln *ln, const char *p, size_t sz) { - if ( ! (ln->type & type)) - return(0); if (strncmp(ln->code, p, sz)) return(0); return('\0' == ln->code[(int)sz]); diff --git a/usr.bin/mandoc/chars.in b/usr.bin/mandoc/chars.in index 49676cd4b1e..e4b2c65aa2d 100644 --- a/usr.bin/mandoc/chars.in +++ b/usr.bin/mandoc/chars.in @@ -1,4 +1,4 @@ -/* $Id: chars.in,v 1.15 2011/04/24 16:22:02 schwarze Exp $ */ +/* $Id: chars.in,v 1.16 2011/05/29 21:22:18 schwarze Exp $ */ /* * Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> * @@ -16,15 +16,12 @@ */ /* - * The ASCII translation tables. STRING corresponds to predefined - * strings (cf. mdoc_samples.7 and tmac/mdoc/doc-nroff). CHAR - * corresponds to special characters (cf. groff_char.7). BOTH contains - * sequences that are equivalent in both STRING and CHAR. + * The ASCII translation tables. * - * Either way, the left-hand side corresponds to the input sequence (\x, - * \(xx, \*(xx and so on) whose length is listed second element. The - * right-hand side is what's produced by the front-end, with the fourth - * element being its length. + * The left-hand side corresponds to the input sequence (\x, \(xx, \*(xx + * and so on) whose length is listed second element. The right-hand + * side is what's produced by the front-end, with the fourth element + * being its length. * * XXX - C-escape strings! * XXX - update LINES_MAX if adding more! @@ -36,25 +33,25 @@ static const char ascii_nbrsp[2] = { ASCII_NBRSP, '\0' }; CHAR_TBL_START /* Spacing. */ -CHAR("c", "", 0) +CHAR("c", "", 8203) CHAR("0", " ", 8194) CHAR(" ", ascii_nbrsp, 160) CHAR("~", ascii_nbrsp, 160) -CHAR("%", "", 0) -CHAR("&", "", 0) -CHAR("^", "", 0) -CHAR("|", "", 0) -CHAR("}", "", 0) +CHAR("%", "", 8203) +CHAR("&", "", 8203) +CHAR("^", "", 8203) +CHAR("|", "", 8203) +CHAR("}", "", 8203) /* Accents. */ CHAR("a\"", "\"", 779) CHAR("a-", "-", 175) CHAR("a.", ".", 729) CHAR("a^", "^", 770) -BOTH("\'", "\'", 769) -BOTH("aa", "\'", 769) -BOTH("ga", "`", 768) -BOTH("`", "`", 768) +CHAR("\'", "\'", 769) +CHAR("aa", "\'", 769) +CHAR("ga", "`", 768) +CHAR("`", "`", 768) CHAR("ab", "`", 774) CHAR("ac", ",", 807) CHAR("ad", "\"", 776) @@ -68,8 +65,8 @@ CHAR("ti", "~", 126) /* Quotes. */ CHAR("Bq", ",,", 8222) CHAR("bq", ",", 8218) -BOTH("lq", "``", 8220) -BOTH("rq", "\'\'", 8221) +CHAR("lq", "``", 8220) +CHAR("rq", "\'\'", 8221) CHAR("oq", "`", 8216) CHAR("cq", "\'", 8217) CHAR("aq", "\'", 39) @@ -232,8 +229,8 @@ CHAR("<-", "<-", 8592) CHAR("->", "->", 8594) CHAR("<>", "<>", 8596) CHAR("da", "v", 8595) -BOTH("ua", "^", 8593) -BOTH("va", "^v", 8597) +CHAR("ua", "^", 8593) +CHAR("va", "^v", 8597) CHAR("lA", "<=", 8656) CHAR("rA", "=>", 8658) CHAR("hA", "<=>", 8660) @@ -270,8 +267,8 @@ CHAR("di", "-:-", 247) CHAR("tdi", "-:-", 247) CHAR("f/", "/", 8260) CHAR("**", "*", 8727) -BOTH("<=", "<=", 8804) -BOTH(">=", ">=", 8805) +CHAR("<=", "<=", 8804) +CHAR(">=", ">=", 8805) CHAR("<<", "<<", 8810) CHAR(">>", ">>", 8811) CHAR("eq", "=", 61) @@ -348,36 +345,6 @@ CHAR("Po", "L", 163) CHAR("Cs", "x", 164) CHAR("Fn", "f", 402) -/* Old style. */ -STRING("Am", "&", 38) -STRING("Ba", "|", 124) -STRING("Ge", ">=", 8805) -STRING("Gt", ">", 62) -STRING("If", "infinity", 0) -STRING("Le", "<=", 8804) -STRING("Lq", "``", 8220) -STRING("Lt", "<", 60) -STRING("Na", "NaN", 0) -STRING("Ne", "!=", 8800) -STRING("Pi", "pi", 960) -STRING("Pm", "+-", 177) -STRING("Rq", "\'\'", 8221) -STRING("left-bracket", "[", 91) -STRING("left-parenthesis", "(", 40) -STRING("left-singlequote", "`", 8216) -STRING("lp", "(", 40) -STRING("q", "\"", 34) -STRING("quote-left", "`", 8216) -STRING("quote-right", "\'", 8217) -STRING("R", "(R)", 174) -STRING("right-bracket", "]", 93) -STRING("right-parenthesis", ")", 41) -STRING("right-singlequote", "\'", 8217) -STRING("rp", ")", 41) -STRING("Tm", "(Tm)", 8482) -STRING("Px", "POSIX", 0) -STRING("Ai", "ANSI", 0) - /* Lines. */ CHAR("ba", "|", 124) CHAR("br", "|", 9474) diff --git a/usr.bin/mandoc/html.c b/usr.bin/mandoc/html.c index 45197ad76ef..5ad6860d850 100644 --- a/usr.bin/mandoc/html.c +++ b/usr.bin/mandoc/html.c @@ -1,4 +1,4 @@ -/* $Id: html.c,v 1.25 2011/04/24 16:22:02 schwarze Exp $ */ +/* $Id: html.c,v 1.26 2011/05/29 21:22:18 schwarze Exp $ */ /* * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> * Copyright (c) 2011 Ingo Schwarze <schwarze@openbsd.org> @@ -27,6 +27,7 @@ #include <unistd.h> #include "mandoc.h" +#include "libmandoc.h" #include "out.h" #include "html.h" #include "main.h" @@ -89,19 +90,25 @@ static const char *const htmlattrs[ATTR_MAX] = { "colspan", /* ATTR_COLSPAN */ }; -static void print_num(struct html *, const char *, size_t); -static void print_spec(struct html *, enum roffdeco, - const char *, size_t); -static void print_res(struct html *, const char *, size_t); -static void print_ctag(struct html *, enum htmltag); -static void print_doctype(struct html *); -static void print_xmltype(struct html *); -static int print_encode(struct html *, const char *, int); -static void print_metaf(struct html *, enum roffdeco); -static void print_attr(struct html *, - const char *, const char *); -static void *ml_alloc(char *, enum htmltype); +static const char *const roffscales[SCALE_MAX] = { + "cm", /* SCALE_CM */ + "in", /* SCALE_IN */ + "pc", /* SCALE_PC */ + "pt", /* SCALE_PT */ + "em", /* SCALE_EM */ + "em", /* SCALE_MM */ + "ex", /* SCALE_EN */ + "ex", /* SCALE_BU */ + "em", /* SCALE_VS */ + "ex", /* SCALE_FS */ +}; +static void bufncat(struct html *, const char *, size_t); +static void print_ctag(struct html *, enum htmltag); +static int print_encode(struct html *, const char *, int); +static void print_metaf(struct html *, enum mandoc_esc); +static void print_attr(struct html *, const char *, const char *); +static void *ml_alloc(char *, enum htmltype); static void * ml_alloc(char *outopts, enum htmltype type) @@ -119,7 +126,7 @@ ml_alloc(char *outopts, enum htmltype type) h->type = type; h->tags.head = NULL; - h->symtab = chars_init(CHARS_HTML); + h->symtab = mchars_alloc(); while (outopts && *outopts) switch (getsubopt(&outopts, UNCONST(toks), &v)) { @@ -169,7 +176,7 @@ html_free(void *p) } if (h->symtab) - chars_free(h->symtab); + mchars_free(h->symtab); free(h); } @@ -205,72 +212,24 @@ print_gen_head(struct html *h) } } -/* ARGSUSED */ -static void -print_num(struct html *h, const char *p, size_t len) -{ - const char *rhs; - - rhs = chars_num2char(p, len); - if (rhs) - putchar((int)*rhs); -} - static void -print_spec(struct html *h, enum roffdeco d, const char *p, size_t len) -{ - int cp; - const char *rhs; - size_t sz; - - if ((cp = chars_spec2cp(h->symtab, p, len)) > 0) { - printf("&#%d;", cp); - return; - } else if (-1 == cp && DECO_SSPECIAL == d) { - fwrite(p, 1, len, stdout); - return; - } else if (-1 == cp) - return; - - if (NULL != (rhs = chars_spec2str(h->symtab, p, len, &sz))) - fwrite(rhs, 1, sz, stdout); -} - - -static void -print_res(struct html *h, const char *p, size_t len) -{ - int cp; - const char *rhs; - size_t sz; - - if ((cp = chars_res2cp(h->symtab, p, len)) > 0) { - printf("&#%d;", cp); - return; - } else if (-1 == cp) - return; - - if (NULL != (rhs = chars_res2str(h->symtab, p, len, &sz))) - fwrite(rhs, 1, sz, stdout); -} - - -static void -print_metaf(struct html *h, enum roffdeco deco) +print_metaf(struct html *h, enum mandoc_esc deco) { enum htmlfont font; switch (deco) { - case (DECO_PREVIOUS): + case (ESCAPE_FONTPREV): font = h->metal; break; - case (DECO_ITALIC): + case (ESCAPE_FONTITALIC): font = HTMLFONT_ITALIC; break; - case (DECO_BOLD): + case (ESCAPE_FONTBOLD): font = HTMLFONT_BOLD; break; - case (DECO_ROMAN): + case (ESCAPE_FONT): + /* FALLTHROUGH */ + case (ESCAPE_FONTROMAN): font = HTMLFONT_NONE; break; default: @@ -292,80 +251,123 @@ print_metaf(struct html *h, enum roffdeco deco) print_otag(h, TAG_I, 0, NULL); } +int +html_strlen(const char *cp) +{ + int ssz, sz; + const char *seq, *p; + + /* + * Account for escaped sequences within string length + * calculations. This follows the logic in term_strlen() as we + * must calculate the width of produced strings. + * Assume that characters are always width of "1". This is + * hacky, but it gets the job done for approximation of widths. + */ + + sz = 0; + while (NULL != (p = strchr(cp, '\\'))) { + sz += (int)(p - cp); + ++cp; + switch (mandoc_escape(&cp, &seq, &ssz)) { + case (ESCAPE_ERROR): + return(sz); + case (ESCAPE_UNICODE): + /* FALLTHROUGH */ + case (ESCAPE_NUMBERED): + /* FALLTHROUGH */ + case (ESCAPE_SPECIAL): + sz++; + break; + default: + break; + } + } + + assert(sz >= 0); + return(sz + strlen(cp)); +} static int print_encode(struct html *h, const char *p, int norecurse) { size_t sz; - int len, nospace; + int c, len, nospace; const char *seq; - enum roffdeco deco; + enum mandoc_esc esc; static const char rejs[6] = { '\\', '<', '>', '&', ASCII_HYPH, '\0' }; nospace = 0; - for (; *p; p++) { + while ('\0' != *p) { sz = strcspn(p, rejs); fwrite(p, 1, sz, stdout); - p += /* LINTED */ - sz; + p += (int)sz; + + if ('\0' == *p) + break; - if ('<' == *p) { + switch (*p++) { + case ('<'): printf("<"); continue; - } else if ('>' == *p) { + case ('>'): printf(">"); continue; - } else if ('&' == *p) { + case ('&'): printf("&"); continue; - } else if (ASCII_HYPH == *p) { - /* - * Note: "soft hyphens" aren't graphically - * displayed when not breaking the text; we want - * them to be displayed. - */ - /*printf("­");*/ + case (ASCII_HYPH): putchar('-'); continue; - } else if ('\0' == *p) + default: break; + } - seq = ++p; - len = a2roffdeco(&deco, &seq, &sz); + esc = mandoc_escape(&p, &seq, &len); + if (ESCAPE_ERROR == esc) + break; - switch (deco) { - case (DECO_NUMBERED): - print_num(h, seq, sz); + switch (esc) { + case (ESCAPE_UNICODE): + /* Skip passed "u" header. */ + c = mchars_num2uc(seq + 1, len - 1); + if ('\0' != c) + printf("&#x%x;", c); break; - case (DECO_RESERVED): - print_res(h, seq, sz); + case (ESCAPE_NUMBERED): + c = mchars_num2char(seq, len); + if ('\0' != c) + putchar(c); break; - case (DECO_SSPECIAL): - /* FALLTHROUGH */ - case (DECO_SPECIAL): - print_spec(h, deco, seq, sz); + case (ESCAPE_SPECIAL): + c = mchars_spec2cp(h->symtab, seq, len); + if (c > 0) + printf("&#%d;", c); + else if (-1 == c && 1 == len) + putchar((int)*seq); break; - case (DECO_PREVIOUS): + case (ESCAPE_FONT): + /* FALLTHROUGH */ + case (ESCAPE_FONTPREV): /* FALLTHROUGH */ - case (DECO_BOLD): + case (ESCAPE_FONTBOLD): /* FALLTHROUGH */ - case (DECO_ITALIC): + case (ESCAPE_FONTITALIC): /* FALLTHROUGH */ - case (DECO_ROMAN): + case (ESCAPE_FONTROMAN): if (norecurse) break; - print_metaf(h, deco); + print_metaf(h, esc); + break; + case (ESCAPE_NOSPACE): + if ('\0' == *p) + nospace = 1; break; default: break; } - - p += len - 1; - - if (DECO_NOSPACE == deco && '\0' == *(p + 1)) - nospace = 1; } return(nospace); @@ -428,7 +430,7 @@ print_otag(struct html *h, enum htmltag tag, print_attr(h, "lang", "en"); } - /* Accomodate for XML "well-formed" singleton escaping. */ + /* Accommodate for XML "well-formed" singleton escaping. */ if (HTML_AUTOCLOSE & htmltags[tag].flags) switch (h->type) { @@ -461,28 +463,9 @@ print_ctag(struct html *h, enum htmltag tag) } } - void print_gen_decls(struct html *h) { - - print_xmltype(h); - print_doctype(h); -} - - -static void -print_xmltype(struct html *h) -{ - - if (HTML_XHTML_1_0_STRICT == h->type) - puts("<?xml version=\"1.0\" encoding=\"UTF-8\"?>"); -} - - -static void -print_doctype(struct html *h) -{ const char *doctype; const char *dtd; const char *name; @@ -494,6 +477,7 @@ print_doctype(struct html *h) dtd = "http://www.w3.org/TR/html4/strict.dtd"; break; default: + puts("<?xml version=\"1.0\" encoding=\"UTF-8\"?>"); name = "html"; doctype = "-//W3C//DTD XHTML 1.0 Strict//EN"; dtd = "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"; @@ -583,7 +567,6 @@ print_stagq(struct html *h, const struct tag *suntil) } } - void bufinit(struct html *h) { @@ -592,28 +575,27 @@ bufinit(struct html *h) h->buflen = 0; } - void bufcat_style(struct html *h, const char *key, const char *val) { bufcat(h, key); - bufncat(h, ":", 1); + bufcat(h, ":"); bufcat(h, val); - bufncat(h, ";", 1); + bufcat(h, ";"); } - void bufcat(struct html *h, const char *p) { - bufncat(h, p, strlen(p)); + h->buflen = strlcat(h->buf, p, BUFSIZ); + assert(h->buflen < BUFSIZ); + h->buflen--; } - void -buffmt(struct html *h, const char *fmt, ...) +bufcat_fmt(struct html *h, const char *fmt, ...) { va_list ap; @@ -624,19 +606,15 @@ buffmt(struct html *h, const char *fmt, ...) h->buflen = strlen(h->buf); } - -void +static void bufncat(struct html *h, const char *p, size_t sz) { - if (h->buflen + sz > BUFSIZ - 1) - sz = BUFSIZ - 1 - h->buflen; - - (void)strncat(h->buf, p, sz); + assert(h->buflen + sz + 1 < BUFSIZ); + strncat(h->buf, p, sz); h->buflen += sz; } - void buffmt_includes(struct html *h, const char *name) { @@ -644,6 +622,7 @@ buffmt_includes(struct html *h, const char *name) pp = h->base_includes; + bufinit(h); while (NULL != (p = strchr(pp, '%'))) { bufncat(h, pp, (size_t)(p - pp)); switch (*(p + 1)) { @@ -660,7 +639,6 @@ buffmt_includes(struct html *h, const char *name) bufcat(h, pp); } - void buffmt_man(struct html *h, const char *name, const char *sec) @@ -669,7 +647,7 @@ buffmt_man(struct html *h, pp = h->base_man; - /* LINTED */ + bufinit(h); while (NULL != (p = strchr(pp, '%'))) { bufncat(h, pp, (size_t)(p - pp)); switch (*(p + 1)) { @@ -677,7 +655,7 @@ buffmt_man(struct html *h, bufcat(h, sec ? sec : "1"); break; case('N'): - buffmt(h, name); + bufcat_fmt(h, name); break; default: bufncat(h, p, 2); @@ -689,85 +667,24 @@ buffmt_man(struct html *h, bufcat(h, pp); } - void bufcat_su(struct html *h, const char *p, const struct roffsu *su) { double v; - const char *u; v = su->scale; + if (SCALE_MM == su->unit && 0.0 == (v /= 100.0)) + v = 1.0; - switch (su->unit) { - case (SCALE_CM): - u = "cm"; - break; - case (SCALE_IN): - u = "in"; - break; - case (SCALE_PC): - u = "pc"; - break; - case (SCALE_PT): - u = "pt"; - break; - case (SCALE_EM): - u = "em"; - break; - case (SCALE_MM): - if (0 == (v /= 100)) - v = 1; - u = "em"; - break; - case (SCALE_EN): - u = "ex"; - break; - case (SCALE_BU): - u = "ex"; - break; - case (SCALE_VS): - u = "em"; - break; - default: - u = "ex"; - break; - } - - /* - * XXX: the CSS spec isn't clear as to which types accept - * integer or real numbers, so we just make them all decimals. - */ - buffmt(h, "%s: %.2f%s;", p, v, u); + bufcat_fmt(h, "%s: %.2f%s;", p, v, roffscales[su->unit]); } - void -html_idcat(char *dst, const char *src, int sz) +bufcat_id(struct html *h, const char *src) { - int ssz; - - assert(sz > 2); /* Cf. <http://www.w3.org/TR/html4/types.html#h-6.2>. */ - /* We can't start with a number (bah). */ - - if ('#' == *dst) { - dst++; - sz--; - } - if ('\0' == *dst) { - *dst++ = 'x'; - *dst = '\0'; - sz--; - } - - for ( ; *dst != '\0' && sz; dst++, sz--) - /* Jump to end. */ ; - - for ( ; *src != '\0' && sz > 1; src++) { - ssz = snprintf(dst, (size_t)sz, "%.2x", *src); - sz -= ssz; - dst += ssz; - } + while ('\0' != *src) + bufcat_fmt(h, "%.2x", *src++); } diff --git a/usr.bin/mandoc/html.h b/usr.bin/mandoc/html.h index 4643e81afd9..10f9a3a5787 100644 --- a/usr.bin/mandoc/html.h +++ b/usr.bin/mandoc/html.h @@ -1,4 +1,4 @@ -/* $Id: html.h,v 1.15 2011/01/31 03:04:26 schwarze Exp $ */ +/* $Id: html.h,v 1.16 2011/05/29 21:22:18 schwarze Exp $ */ /* * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> * @@ -120,7 +120,7 @@ struct html { struct tagq tags; /* stack of open tags */ struct rofftbl tbl; /* current table */ struct tag *tblt; /* current open table scope */ - void *symtab; /* character-escapes */ + struct mchars *symtab; /* character-escapes */ char *base_man; /* base for manpage href */ char *base_includes; /* base for include href */ char *style; /* style-sheet URI */ @@ -142,19 +142,19 @@ void print_text(struct html *, const char *); void print_tblclose(struct html *); void print_tbl(struct html *, const struct tbl_span *); +void bufcat_fmt(struct html *, const char *, ...); +void bufcat(struct html *, const char *); +void bufcat_id(struct html *, const char *); +void bufcat_style(struct html *, + const char *, const char *); void bufcat_su(struct html *, const char *, const struct roffsu *); +void bufinit(struct html *); void buffmt_man(struct html *, const char *, const char *); void buffmt_includes(struct html *, const char *); -void buffmt(struct html *, const char *, ...); -void bufcat(struct html *, const char *); -void bufcat_style(struct html *, - const char *, const char *); -void bufncat(struct html *, const char *, size_t); -void bufinit(struct html *); -void html_idcat(char *, const char *, int); +int html_strlen(const char *); __END_DECLS diff --git a/usr.bin/mandoc/libmandoc.h b/usr.bin/mandoc/libmandoc.h index eaacbfccbf1..1efe5da07a5 100644 --- a/usr.bin/mandoc/libmandoc.h +++ b/usr.bin/mandoc/libmandoc.h @@ -1,4 +1,4 @@ -/* $Id: libmandoc.h,v 1.11 2011/04/24 16:22:02 schwarze Exp $ */ +/* $Id: libmandoc.h,v 1.12 2011/05/29 21:22:18 schwarze Exp $ */ /* * Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> * @@ -73,13 +73,13 @@ void mandoc_msg(enum mandocerr, struct mparse *, int, int, const char *); void mandoc_vmsg(enum mandocerr, struct mparse *, int, int, const char *, ...); -int mandoc_special(char *); char *mandoc_strdup(const char *); char *mandoc_getarg(struct mparse *, char **, int, int *); char *mandoc_normdate(struct mparse *, char *, int, int); int mandoc_eos(const char *, size_t, int); int mandoc_hyph(const char *, const char *); int mandoc_getcontrol(const char *, int *); +int mandoc_strntou(const char *, size_t, int); void mdoc_free(struct mdoc *); struct mdoc *mdoc_alloc(struct regset *, struct mparse *); diff --git a/usr.bin/mandoc/libmdoc.h b/usr.bin/mandoc/libmdoc.h index ceffcb05332..ee99633aa61 100644 --- a/usr.bin/mandoc/libmdoc.h +++ b/usr.bin/mandoc/libmdoc.h @@ -1,4 +1,4 @@ -/* $Id: libmdoc.h,v 1.45 2011/04/24 16:22:02 schwarze Exp $ */ +/* $Id: libmdoc.h,v 1.46 2011/05/29 21:22:18 schwarze Exp $ */ /* * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> * @@ -62,20 +62,20 @@ struct mdoc_macro { enum margserr { ARGS_ERROR, - ARGS_EOLN, - ARGS_WORD, - ARGS_PUNCT, - ARGS_QWORD, - ARGS_PHRASE, - ARGS_PPHRASE, - ARGS_PEND + ARGS_EOLN, /* end-of-line */ + ARGS_WORD, /* normal word */ + ARGS_PUNCT, /* series of punctuation */ + ARGS_QWORD, /* quoted word */ + ARGS_PHRASE, /* Ta'd phrase (-column) */ + ARGS_PPHRASE, /* tabbed phrase (-column) */ + ARGS_PEND /* last phrase (-column) */ }; enum margverr { ARGV_ERROR, - ARGV_EOLN, - ARGV_ARG, - ARGV_WORD + ARGV_EOLN, /* end of line */ + ARGV_ARG, /* valid argument */ + ARGV_WORD /* normal word (or bad argument---same thing) */ }; /* @@ -133,14 +133,8 @@ void mdoc_argv_free(struct mdoc_arg *); enum margserr mdoc_args(struct mdoc *, int, int *, char *, enum mdoct, char **); enum margserr mdoc_zargs(struct mdoc *, int, - int *, char *, int, char **); -#define ARGS_DELIM (1 << 1) -#define ARGS_TABSEP (1 << 2) -#define ARGS_NOWARN (1 << 3) - + int *, char *, char **); int mdoc_macroend(struct mdoc *); - -#define DELIMSZ 6 /* hint: max possible size of a delimiter */ enum mdelim mdoc_isdelim(const char *); __END_DECLS diff --git a/usr.bin/mandoc/main.c b/usr.bin/mandoc/main.c index 088940778ef..3b2fd636dff 100644 --- a/usr.bin/mandoc/main.c +++ b/usr.bin/mandoc/main.c @@ -1,4 +1,4 @@ -/* $Id: main.c,v 1.76 2011/04/24 16:22:02 schwarze Exp $ */ +/* $Id: main.c,v 1.77 2011/05/29 21:22:18 schwarze Exp $ */ /* * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> * Copyright (c) 2010, 2011 Ingo Schwarze <schwarze@openbsd.org> @@ -34,6 +34,8 @@ typedef void (*out_free)(void *); enum outt { OUTT_ASCII = 0, /* -Tascii */ + OUTT_LOCALE, /* -Tlocale */ + OUTT_UTF8, /* -Tutf8 */ OUTT_TREE, /* -Ttree */ OUTT_HTML, /* -Thtml */ OUTT_XHTML, /* -Txhtml */ @@ -197,9 +199,19 @@ parse(struct curparse *curp, int fd, switch (curp->outtype) { case (OUTT_XHTML): curp->outdata = xhtml_alloc(curp->outopts); + curp->outfree = html_free; break; case (OUTT_HTML): curp->outdata = html_alloc(curp->outopts); + curp->outfree = html_free; + break; + case (OUTT_UTF8): + curp->outdata = utf8_alloc(curp->outopts); + curp->outfree = ascii_free; + break; + case (OUTT_LOCALE): + curp->outdata = locale_alloc(curp->outopts); + curp->outfree = ascii_free; break; case (OUTT_ASCII): curp->outdata = ascii_alloc(curp->outopts); @@ -223,7 +235,6 @@ parse(struct curparse *curp, int fd, case (OUTT_XHTML): curp->outman = html_man; curp->outmdoc = html_mdoc; - curp->outfree = html_free; break; case (OUTT_TREE): curp->outman = tree_man; @@ -233,6 +244,10 @@ parse(struct curparse *curp, int fd, /* FALLTHROUGH */ case (OUTT_ASCII): /* FALLTHROUGH */ + case (OUTT_UTF8): + /* FALLTHROUGH */ + case (OUTT_LOCALE): + /* FALLTHROUGH */ case (OUTT_PS): curp->outman = terminal_man; curp->outmdoc = terminal_mdoc; @@ -290,6 +305,10 @@ toptions(struct curparse *curp, char *arg) curp->outtype = OUTT_TREE; else if (0 == strcmp(arg, "html")) curp->outtype = OUTT_HTML; + else if (0 == strcmp(arg, "utf8")) + curp->outtype = OUTT_UTF8; + else if (0 == strcmp(arg, "locale")) + curp->outtype = OUTT_LOCALE; else if (0 == strcmp(arg, "xhtml")) curp->outtype = OUTT_XHTML; else if (0 == strcmp(arg, "ps")) diff --git a/usr.bin/mandoc/main.h b/usr.bin/mandoc/main.h index 2cb020dedc2..1efb9d34387 100644 --- a/usr.bin/mandoc/main.h +++ b/usr.bin/mandoc/main.h @@ -1,6 +1,6 @@ -/* $Id: main.h,v 1.7 2010/07/25 18:05:54 schwarze Exp $ */ +/* $Id: main.h,v 1.8 2011/05/29 21:22:18 schwarze Exp $ */ /* - * Copyright (c) 2009, 2010 Kristaps Dzonsons <kristaps@bsd.lv> + * Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above @@ -41,6 +41,8 @@ void html_free(void *); void tree_mdoc(void *, const struct mdoc *); void tree_man(void *, const struct man *); +void *locale_alloc(char *); +void *utf8_alloc(char *); void *ascii_alloc(char *); void ascii_free(void *); diff --git a/usr.bin/mandoc/man_html.c b/usr.bin/mandoc/man_html.c index d805cce275e..5437cb1557f 100644 --- a/usr.bin/mandoc/man_html.c +++ b/usr.bin/mandoc/man_html.c @@ -1,4 +1,4 @@ -/* $Id: man_html.c,v 1.37 2011/04/21 22:59:54 schwarze Exp $ */ +/* $Id: man_html.c,v 1.38 2011/05/29 21:22:18 schwarze Exp $ */ /* * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> * @@ -63,7 +63,7 @@ static int man_ign_pre(MAN_ARGS); static int man_in_pre(MAN_ARGS); static int man_literal_pre(MAN_ARGS); static void man_root_post(MAN_ARGS); -static int man_root_pre(MAN_ARGS); +static void man_root_pre(MAN_ARGS); static int man_B_pre(MAN_ARGS); static int man_HP_pre(MAN_ARGS); static int man_I_pre(MAN_ARGS); @@ -153,9 +153,7 @@ print_man_head(MAN_ARGS) { print_gen_head(h); - bufinit(h); - buffmt(h, "%s(%s)", m->title, m->msec); - + bufcat_fmt(h, "%s(%s)", m->title, m->msec); print_otag(h, TAG_TITLE, 0, NULL); print_text(h, h->buf); } @@ -181,13 +179,16 @@ print_man_node(MAN_ARGS) child = 1; t = h->tags.head; - bufinit(h); - switch (n->type) { case (MAN_ROOT): - child = man_root_pre(m, n, mh, h); + man_root_pre(m, n, mh, h); break; case (MAN_TEXT): + /* + * If we have a blank line, output a vertical space. + * If we have a space as the first character, break + * before printing the line's data. + */ if ('\0' == *n->string) { print_otag(h, TAG_P, 0, NULL); return; @@ -196,6 +197,13 @@ print_man_node(MAN_ARGS) print_text(h, n->string); + /* + * If we're in a literal context, make sure that words + * togehter on the same line stay together. This is a + * POST-printing call, so we check the NEXT word. Since + * -man doesn't have nested macros, we don't need to be + * more specific than this. + */ if (MANH_LITERAL & mh->fl && (NULL == n->next || n->next->line > n->line)) @@ -244,8 +252,6 @@ print_man_node(MAN_ARGS) /* This will automatically close out any font scope. */ print_stagq(h, t); - bufinit(h); - switch (n->type) { case (MAN_ROOT): man_root_post(m, n, mh, h); @@ -274,7 +280,7 @@ a2width(const struct man_node *n, struct roffsu *su) /* ARGSUSED */ -static int +static void man_root_pre(MAN_ARGS) { struct htmlpair tag[3]; @@ -328,7 +334,6 @@ man_root_pre(MAN_ARGS) print_text(h, title); print_tagq(h, t); - return(1); } @@ -387,6 +392,7 @@ man_br_pre(MAN_ARGS) } else su.scale = 0; + bufinit(h); bufcat_su(h, "height", &su); PAIR_STYLE_INIT(&tag, h); print_otag(h, TAG_DIV, 1, &tag); @@ -555,6 +561,7 @@ man_IP_pre(MAN_ARGS) if (MAN_BLOCK == n->type) { print_otag(h, TAG_P, 0, NULL); print_otag(h, TAG_TABLE, 0, NULL); + bufinit(h); bufcat_su(h, "width", &su); PAIR_STYLE_INIT(&tag, h); print_otag(h, TAG_COL, 1, &tag); @@ -590,6 +597,8 @@ man_HP_pre(MAN_ARGS) struct roffsu su; const struct man_node *np; + bufinit(h); + np = MAN_BLOCK == n->type ? n->head->child : n->parent->head->child; @@ -690,6 +699,7 @@ man_RS_pre(MAN_ARGS) if (n->head->child) a2width(n->head->child, &su); + bufinit(h); bufcat_su(h, "margin-left", &su); PAIR_STYLE_INIT(&tag, h); print_otag(h, TAG_DIV, 1, &tag); diff --git a/usr.bin/mandoc/man_term.c b/usr.bin/mandoc/man_term.c index ab5c37bd86f..56b1b010756 100644 --- a/usr.bin/mandoc/man_term.c +++ b/usr.bin/mandoc/man_term.c @@ -1,4 +1,4 @@ -/* $Id: man_term.c,v 1.67 2011/04/24 16:22:02 schwarze Exp $ */ +/* $Id: man_term.c,v 1.68 2011/05/29 21:22:18 schwarze Exp $ */ /* * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> * Copyright (c) 2010, 2011 Ingo Schwarze <schwarze@openbsd.org> @@ -152,14 +152,7 @@ terminal_man(void *arg, const struct man *man) p->tabwidth = term_len(p, 5); if (NULL == p->symtab) - switch (p->enc) { - case (TERMENC_ASCII): - p->symtab = chars_init(CHARS_ASCII); - break; - default: - abort(); - /* NOTREACHED */ - } + p->symtab = mchars_alloc(); n = man_node(man); m = man_meta(man); diff --git a/usr.bin/mandoc/man_validate.c b/usr.bin/mandoc/man_validate.c index c062c60905e..bfa17bd77c9 100644 --- a/usr.bin/mandoc/man_validate.c +++ b/usr.bin/mandoc/man_validate.c @@ -1,4 +1,4 @@ -/* $Id: man_validate.c,v 1.44 2011/04/24 16:22:02 schwarze Exp $ */ +/* $Id: man_validate.c,v 1.45 2011/05/29 21:22:18 schwarze Exp $ */ /* * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> * Copyright (c) 2010 Ingo Schwarze <schwarze@openbsd.org> @@ -50,7 +50,7 @@ static int check_par(CHKARGS); static int check_part(CHKARGS); static int check_root(CHKARGS); static int check_sec(CHKARGS); -static int check_text(CHKARGS); +static void check_text(CHKARGS); static int post_AT(CHKARGS); static int post_fi(CHKARGS); @@ -147,7 +147,8 @@ man_valid_post(struct man *m) switch (m->last->type) { case (MAN_TEXT): - return(check_text(m, m->last)); + check_text(m, m->last); + return(1); case (MAN_ROOT): return(check_root(m, m->last)); case (MAN_EQN): @@ -200,43 +201,48 @@ check_root(CHKARGS) return(1); } - -static int +static void check_text(CHKARGS) { - char *p; - int pos, c; + char *p, *pp, *cpp; + int pos; size_t sz; - for (p = n->string, pos = n->pos + 1; *p; p++, pos++) { - sz = strcspn(p, "\t\\"); - p += (int)sz; + p = n->string; + pos = n->pos + 1; - if ('\0' == *p) - break; + while ('\0' != *p) { + sz = strcspn(p, "\t\\"); + p += (int)sz; pos += (int)sz; if ('\t' == *p) { - if (MAN_LITERAL & m->flags) - continue; - man_pmsg(m, n->line, pos, MANDOCERR_BADTAB); + if ( ! (MAN_LITERAL & m->flags)) + man_pmsg(m, n->line, pos, MANDOCERR_BADTAB); + p++; + pos++; continue; - } + } else if ('\0' == *p) + break; - /* Check the special character. */ + pos++; + pp = ++p; - c = mandoc_special(p); - if (c) { - p += c - 1; - pos += c - 1; - } else + if (ESCAPE_ERROR == mandoc_escape + ((const char **)&pp, NULL, NULL)) { man_pmsg(m, n->line, pos, MANDOCERR_BADESCAPE); - } + break; + } - return(1); -} + cpp = p; + while (NULL != (cpp = memchr(cpp, ASCII_HYPH, pp - cpp))) + *cpp = '-'; + pos += pp - p; + p = pp; + } +} #define INEQ_DEFINE(x, ineq, name) \ static int \ diff --git a/usr.bin/mandoc/mandoc.1 b/usr.bin/mandoc/mandoc.1 index 3117c92aa8c..7b2720d5b50 100644 --- a/usr.bin/mandoc/mandoc.1 +++ b/usr.bin/mandoc/mandoc.1 @@ -1,6 +1,6 @@ -.\" $OpenBSD: mandoc.1,v 1.43 2011/01/09 15:24:57 schwarze Exp $ +.\" $OpenBSD: mandoc.1,v 1.44 2011/05/29 21:22:18 schwarze Exp $ .\" -.\" Copyright (c) 2009, 2010 Kristaps Dzonsons <kristaps@bsd.lv> +.\" Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> .\" .\" Permission to use, copy, modify, and distribute this software for any .\" purpose with or without fee is hereby granted, provided that the above @@ -14,7 +14,7 @@ .\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF .\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. .\" -.Dd $Mdocdate: January 9 2011 $ +.Dd $Mdocdate: May 29 2011 $ .Dt MANDOC 1 .Os .Sh NAME @@ -158,6 +158,15 @@ utility accepts the following .Fl T arguments, which correspond to output modes: .Bl -tag -width Ds +.It Fl T Ns Cm utf8 +Encode output in the UTF-8 multi-byte format. +See +.Xr UTF-8 Output . +.It Fl T Ns Cm locale +Encode output using the current +.Xr locale 1 . +See +.Sx Locale Output . .It Fl T Ns Cm ascii Produce 7-bit ASCII output. This is the default. @@ -189,6 +198,23 @@ See .Pp If multiple input files are specified, these will be processed by the corresponding filter in-order. +.Ss UTF-8 Output +Use +.Fl T Ns Cm utf8 +to force a UTF-8 locale. +See +.Sx Locale Output +for details and options. +.Ss Locale Output +Locale-depending output encoding is triggered with +.Fl T Ns Cm locale . +This option is not available on all systems: systems without locale +support, or those whose internal representation is not natively UCS-4, +will fall back to +.Fl T Ns Cm ascii . +See +.Sx ASCII Output +for font style specification and available command-line arguments. .Ss ASCII Output Output produced by .Fl T Ns Cm ascii , @@ -209,6 +235,9 @@ Emboldened characters are rendered as The special characters documented in .Xr mandoc_char 7 are rendered best-effort in an ASCII equivalent. +If no equivalent is found, +.Sq \&? +is used instead. .Pp Output width is limited to 78 visible columns unless literal input lines exceed this limit. @@ -460,6 +489,13 @@ Each input and output format is separately noted. .Ss ASCII Compatibility .Bl -bullet -compact .It +Unrenderable unicode codepoints specified with +.Sq \e[uNNNN] +escapes are printed as +.Sq \&? +in mandoc. +In GNU troff, these raise an error. +.It The .Sq \&Bd \-literal and @@ -470,7 +506,7 @@ in .Fl T Ns Cm ascii are synonyms, as are \-filled and \-ragged. .It -In GNU troff, the +In historic GNU troff, the .Sq \&Pa .Xr mdoc 7 macro does not underline when scoped under an @@ -495,8 +531,6 @@ macro in has no effect. .It Words aren't hyphenated. -.It -Sentences are unilaterally monospaced. .El .Ss HTML/XHTML Compatibility .Bl -bullet -compact diff --git a/usr.bin/mandoc/mandoc.c b/usr.bin/mandoc/mandoc.c index 931ce863017..b9ec46283e8 100644 --- a/usr.bin/mandoc/mandoc.c +++ b/usr.bin/mandoc/mandoc.c @@ -1,4 +1,4 @@ -/* $Id: mandoc.c,v 1.25 2011/04/24 16:22:02 schwarze Exp $ */ +/* $Id: mandoc.c,v 1.26 2011/05/29 21:22:18 schwarze Exp $ */ /* * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> * Copyright (c) 2011 Ingo Schwarze <schwarze@openbsd.org> @@ -19,6 +19,8 @@ #include <assert.h> #include <ctype.h> +#include <errno.h> +#include <limits.h> #include <stdlib.h> #include <stdio.h> #include <string.h> @@ -31,199 +33,358 @@ static int a2time(time_t *, const char *, const char *); static char *time2a(time_t); +static int numescape(const char *); -int -mandoc_special(char *p) +/* + * Pass over recursive numerical expressions. This context of this + * function is important: it's only called within character-terminating + * escapes (e.g., \s[xxxyyy]), so all we need to do is handle initial + * recursion: we don't care about what's in these blocks. + * This returns the number of characters skipped or -1 if an error + * occurs (the caller should bail). + */ +static int +numescape(const char *start) { - int len, i; - char term; - char *sv; - - len = 0; - term = '\0'; - sv = p; - - assert('\\' == *p); - p++; - - switch (*p++) { -#if 0 - case ('Z'): + int i; + size_t sz; + const char *cp; + + i = 0; + + /* The expression consists of a subexpression. */ + + if ('\\' == start[i]) { + cp = &start[++i]; + /* + * Read past the end of the subexpression. + * Bail immediately on errors. + */ + if (ESCAPE_ERROR == mandoc_escape(&cp, NULL, NULL)) + return(-1); + return(i + cp - &start[i]); + } + + if ('(' != start[i++]) + return(0); + + /* + * A parenthesised subexpression. Read until the closing + * parenthesis, making sure to handle any nested subexpressions + * that might ruin our parse. + */ + + while (')' != start[i]) { + sz = strcspn(&start[i], ")\\"); + i += (int)sz; + + if ('\0' == start[i]) + return(-1); + else if ('\\' != start[i]) + continue; + + cp = &start[++i]; + if (ESCAPE_ERROR == mandoc_escape(&cp, NULL, NULL)) + return(-1); + i += cp - &start[i]; + } + + /* Read past the terminating ')'. */ + return(++i); +} + +enum mandoc_esc +mandoc_escape(const char **end, const char **start, int *sz) +{ + char c, term, numeric; + int i, lim, ssz, rlim; + const char *cp, *rstart; + enum mandoc_esc gly; + + cp = *end; + rstart = cp; + if (start) + *start = rstart; + i = lim = 0; + gly = ESCAPE_ERROR; + term = numeric = '\0'; + + switch ((c = cp[i++])) { + /* + * First the glyphs. There are several different forms of + * these, but each eventually returns a substring of the glyph + * name. + */ + case ('('): + gly = ESCAPE_SPECIAL; + lim = 2; + break; + case ('['): + gly = ESCAPE_SPECIAL; + /* + * Unicode escapes are defined in groff as \[uXXXX] to + * \[u10FFFF], where the contained value must be a valid + * Unicode codepoint. Here, however, only check whether + * it's not a zero-width escape. + */ + if ('u' == cp[i] && ']' != cp[i + 1]) + gly = ESCAPE_UNICODE; + term = ']'; + break; + case ('C'): + if ('\'' != cp[i]) + return(ESCAPE_ERROR); + gly = ESCAPE_SPECIAL; + term = '\''; + break; + + /* + * Handle all triggers matching \X(xy, \Xx, and \X[xxxx], where + * 'X' is the trigger. These have opaque sub-strings. + */ + case ('F'): /* FALLTHROUGH */ - case ('X'): + case ('g'): /* FALLTHROUGH */ - case ('x'): + case ('k'): /* FALLTHROUGH */ - case ('S'): + case ('M'): /* FALLTHROUGH */ - case ('R'): + case ('m'): /* FALLTHROUGH */ - case ('N'): + case ('n'): /* FALLTHROUGH */ - case ('l'): + case ('V'): /* FALLTHROUGH */ - case ('L'): + case ('Y'): + if (ESCAPE_ERROR == gly) + gly = ESCAPE_IGNORE; /* FALLTHROUGH */ - case ('H'): + case ('f'): + if (ESCAPE_ERROR == gly) + gly = ESCAPE_FONT; + + rstart= &cp[i]; + if (start) + *start = rstart; + + switch (cp[i++]) { + case ('('): + lim = 2; + break; + case ('['): + term = ']'; + break; + default: + lim = 1; + i--; + break; + } + break; + + /* + * These escapes are of the form \X'Y', where 'X' is the trigger + * and 'Y' is any string. These have opaque sub-strings. + */ + case ('A'): /* FALLTHROUGH */ - case ('h'): + case ('b'): /* FALLTHROUGH */ case ('D'): /* FALLTHROUGH */ - case ('C'): - /* FALLTHROUGH */ - case ('b'): + case ('o'): /* FALLTHROUGH */ - case ('B'): + case ('R'): /* FALLTHROUGH */ - case ('a'): + case ('X'): /* FALLTHROUGH */ - case ('A'): - if (*p++ != '\'') - return(0); + case ('Z'): + if ('\'' != cp[i++]) + return(ESCAPE_ERROR); + gly = ESCAPE_IGNORE; term = '\''; break; -#endif + + /* + * These escapes are of the form \X'N', where 'X' is the trigger + * and 'N' resolves to a numerical expression. + */ + case ('B'): + /* FALLTHROUGH */ case ('h'): /* FALLTHROUGH */ + case ('H'): + /* FALLTHROUGH */ + case ('L'): + /* FALLTHROUGH */ + case ('l'): + /* FALLTHROUGH */ + case ('N'): + if (ESCAPE_ERROR == gly) + gly = ESCAPE_NUMBERED; + /* FALLTHROUGH */ + case ('S'): + /* FALLTHROUGH */ case ('v'): /* FALLTHROUGH */ + case ('w'): + /* FALLTHROUGH */ + case ('x'): + if (ESCAPE_ERROR == gly) + gly = ESCAPE_IGNORE; + if ('\'' != cp[i++]) + return(ESCAPE_ERROR); + term = numeric = '\''; + break; + + /* + * Sizes get a special category of their own. + */ case ('s'): - if (ASCII_HYPH == *p) - *p = '-'; + gly = ESCAPE_IGNORE; - i = 0; - if ('+' == *p || '-' == *p) { - p++; - i = 1; - } + rstart = &cp[i]; + if (start) + *start = rstart; - switch (*p++) { + /* See +/- counts as a sign. */ + c = cp[i]; + if ('+' == c || '-' == c || ASCII_HYPH == c) + ++i; + + switch (cp[i++]) { case ('('): - len = 2; + lim = 2; break; case ('['): - term = ']'; + term = numeric = ']'; break; case ('\''): - term = '\''; + term = numeric = '\''; break; - case ('0'): - i = 1; - /* FALLTHROUGH */ default: - len = 1; - p--; + lim = 1; + i--; break; } - if (ASCII_HYPH == *p) - *p = '-'; - if ('+' == *p || '-' == *p) { - if (i) - return(0); - p++; - } - - /* Handle embedded numerical subexp or escape. */ - - if ('(' == *p) { - while (*p && ')' != *p) - if ('\\' == *p++) { - i = mandoc_special(--p); - if (0 == i) - return(0); - p += i; - } - - if (')' == *p++) - break; + /* See +/- counts as a sign. */ + c = cp[i]; + if ('+' == c || '-' == c || ASCII_HYPH == c) + ++i; - return(0); - } else if ('\\' == *p) { - if (0 == (i = mandoc_special(p))) - return(0); - p += i; - } + break; + /* + * Anything else is assumed to be a glyph. + */ + default: + gly = ESCAPE_SPECIAL; + lim = 1; + i--; break; -#if 0 - case ('Y'): - /* FALLTHROUGH */ - case ('V'): - /* FALLTHROUGH */ - case ('$'): - /* FALLTHROUGH */ - case ('n'): - /* FALLTHROUGH */ -#endif - case ('k'): - /* FALLTHROUGH */ - case ('M'): - /* FALLTHROUGH */ - case ('m'): - /* FALLTHROUGH */ - case ('f'): - /* FALLTHROUGH */ - case ('F'): - /* FALLTHROUGH */ - case ('*'): - switch (*p++) { - case ('('): - len = 2; + } + + assert(ESCAPE_ERROR != gly); + + rstart = &cp[i]; + if (start) + *start = rstart; + + /* + * If a terminating block has been specified, we need to + * handle the case of recursion, which could have their + * own terminating blocks that mess up our parse. This, by the + * way, means that the "start" and "size" values will be + * effectively meaningless. + */ + + ssz = 0; + if (numeric && -1 == (ssz = numescape(&cp[i]))) + return(ESCAPE_ERROR); + + i += ssz; + rlim = -1; + + /* + * We have a character terminator. Try to read up to that + * character. If we can't (i.e., we hit the nil), then return + * an error; if we can, calculate our length, read past the + * terminating character, and exit. + */ + + if ('\0' != term) { + *end = strchr(&cp[i], term); + if ('\0' == *end) + return(ESCAPE_ERROR); + + rlim = *end - &cp[i]; + if (sz) + *sz = rlim; + (*end)++; + goto out; + } + + assert(lim > 0); + + /* + * We have a numeric limit. If the string is shorter than that, + * stop and return an error. Else adjust our endpoint, length, + * and return the current glyph. + */ + + if ((size_t)lim > strlen(&cp[i])) + return(ESCAPE_ERROR); + + rlim = lim; + if (sz) + *sz = rlim; + + *end = &cp[i] + lim; + +out: + assert(rlim >= 0 && rstart); + + /* Run post-processors. */ + + switch (gly) { + case (ESCAPE_FONT): + if (1 != rlim) break; - case ('['): - term = ']'; + switch (*rstart) { + case ('3'): + /* FALLTHROUGH */ + case ('B'): + gly = ESCAPE_FONTBOLD; break; - default: - len = 1; - p--; + case ('2'): + /* FALLTHROUGH */ + case ('I'): + gly = ESCAPE_FONTITALIC; + break; + case ('P'): + gly = ESCAPE_FONTPREV; + break; + case ('1'): + /* FALLTHROUGH */ + case ('R'): + gly = ESCAPE_FONTROMAN; break; } break; - case ('('): - len = 2; - break; - case ('['): - term = ']'; - break; - case ('z'): - len = 1; - if ('\\' == *p) { - if (0 == (i = mandoc_special(p))) - return(0); - p += i; - return(*p ? (int)(p - sv) : 0); - } - break; - case ('o'): - /* FALLTHROUGH */ - case ('w'): - if ('\'' == *p++) { - term = '\''; + case (ESCAPE_SPECIAL): + if (1 != rlim) break; - } - /* FALLTHROUGH */ + if ('c' == *rstart) + gly = ESCAPE_NOSPACE; + break; default: - len = 1; - p--; break; } - if (term) { - for ( ; *p && term != *p; p++) - if (ASCII_HYPH == *p) - *p = '-'; - return(*p ? (int)(p - sv) : 0); - } - - for (i = 0; *p && i < len; i++, p++) - if (ASCII_HYPH == *p) - *p = '-'; - return(i == len ? (int)(p - sv) : 0); + return(gly); } - void * mandoc_calloc(size_t num, size_t size) { @@ -299,11 +460,11 @@ mandoc_getarg(struct mparse *parse, char **cpp, int ln, int *pos) /* Quoting can only start with a new word. */ start = *cpp; + quoted = 0; if ('"' == *start) { quoted = 1; start++; - } else - quoted = 0; + } pairs = 0; white = 0; @@ -444,7 +605,7 @@ mandoc_eos(const char *p, size_t sz, int enclosed) /* * End-of-sentence recognition must include situations where * some symbols, such as `)', allow prior EOS punctuation to - * propogate outward. + * propagate outward. */ found = 0; @@ -527,3 +688,35 @@ mandoc_getcontrol(const char *cp, int *ppos) *ppos = pos; return(1); } + +/* + * Convert a string to a long that may not be <0. + * If the string is invalid, or is less than 0, return -1. + */ +int +mandoc_strntou(const char *p, size_t sz, int base) +{ + char buf[32]; + char *ep; + long v; + + if (sz > 31) + return(-1); + + memcpy(buf, p, sz); + buf[(int)sz] = '\0'; + + errno = 0; + v = strtol(buf, &ep, base); + + if (buf[0] == '\0' || *ep != '\0') + return(-1); + + if ((errno == ERANGE && + (v == LONG_MAX || v == LONG_MIN)) || + (v > INT_MAX || v < 0)) + return(-1); + + return((int)v); +} + diff --git a/usr.bin/mandoc/mandoc.h b/usr.bin/mandoc/mandoc.h index 70999ca7149..39c4b2e0a2c 100644 --- a/usr.bin/mandoc/mandoc.h +++ b/usr.bin/mandoc/mandoc.h @@ -1,4 +1,4 @@ -/* $Id: mandoc.h,v 1.37 2011/04/24 16:22:02 schwarze Exp $ */ +/* $Id: mandoc.h,v 1.38 2011/05/29 21:22:18 schwarze Exp $ */ /* * Copyright (c) 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> * @@ -288,10 +288,25 @@ enum mparset { MPARSE_MAN /* assume -man */ }; +enum mandoc_esc { + ESCAPE_ERROR = 0, /* bail! unparsable escape */ + ESCAPE_IGNORE, /* escape to be ignored */ + ESCAPE_SPECIAL, /* a regular special character */ + ESCAPE_FONT, /* a generic font mode */ + ESCAPE_FONTBOLD, /* bold font mode */ + ESCAPE_FONTITALIC, /* italic font mode */ + ESCAPE_FONTROMAN, /* roman font mode */ + ESCAPE_FONTPREV, /* previous font mode */ + ESCAPE_NUMBERED, /* a numbered glyph */ + ESCAPE_UNICODE, /* a unicode codepoint */ + ESCAPE_NOSPACE /* suppress space if the last on a line */ +}; + typedef void (*mandocmsg)(enum mandocerr, enum mandoclevel, const char *, int, int, const char *); struct mparse; +struct mchars; struct mdoc; struct man; @@ -310,6 +325,16 @@ void *mandoc_calloc(size_t, size_t); void *mandoc_malloc(size_t); void *mandoc_realloc(void *, size_t); +enum mandoc_esc mandoc_escape(const char **, const char **, int *); + +struct mchars *mchars_alloc(void); +char mchars_num2char(const char *, size_t); +int mchars_num2uc(const char *, size_t); +const char *mchars_spec2str(struct mchars *, const char *, size_t, size_t *); +int mchars_spec2cp(struct mchars *, const char *, size_t); +void mchars_free(struct mchars *); + + __END_DECLS #endif /*!MANDOC_H*/ diff --git a/usr.bin/mandoc/mdoc_argv.c b/usr.bin/mandoc/mdoc_argv.c index c35fcf2517c..5bc1386f021 100644 --- a/usr.bin/mandoc/mdoc_argv.c +++ b/usr.bin/mandoc/mdoc_argv.c @@ -1,4 +1,4 @@ -/* $Id: mdoc_argv.c,v 1.37 2011/04/24 16:22:02 schwarze Exp $ */ +/* $Id: mdoc_argv.c,v 1.38 2011/05/29 21:22:18 schwarze Exp $ */ /* * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> * @@ -28,11 +28,25 @@ #include "libmandoc.h" #define MULTI_STEP 5 /* pre-allocate argument values */ +#define DELIMSZ 6 /* max possible size of a delimiter */ + +enum argsflag { + ARGSFL_NONE = 0, + ARGSFL_DELIM, /* handle delimiters of [[::delim::][ ]+]+ */ + ARGSFL_TABSEP /* handle tab/`Ta' separated phrases */ +}; + +enum argvflag { + ARGV_NONE, /* no args to flag (e.g., -split) */ + ARGV_SINGLE, /* one arg to flag (e.g., -file xxx) */ + ARGV_MULTI, /* multiple args (e.g., -column xxx yyy) */ + ARGV_OPT_SINGLE /* optional arg (e.g., -offset [xxx]) */ +}; static enum mdocargt argv_a2arg(enum mdoct, const char *); static enum margserr args(struct mdoc *, int, int *, - char *, int, char **); -static int args_checkpunct(const char *); + char *, enum argsflag, char **); +static int args_checkpunct(const char *, int); static int argv(struct mdoc *, int, struct mdoc_argv *, int *, char *); static int argv_single(struct mdoc *, int, @@ -43,13 +57,6 @@ static int argv_multi(struct mdoc *, int, struct mdoc_argv *, int *, char *); static void argn_free(struct mdoc_arg *, int); -enum argvflag { - ARGV_NONE, /* no args to flag (e.g., -split) */ - ARGV_SINGLE, /* one arg to flag (e.g., -file xxx) */ - ARGV_MULTI, /* multiple args (e.g., -column xxx yyy) */ - ARGV_OPT_SINGLE /* optional arg (e.g., -offset [xxx]) */ -}; - static const enum argvflag argvflags[MDOC_ARG_MAX] = { ARGV_NONE, /* MDOC_Split */ ARGV_NONE, /* MDOC_Nosplit */ @@ -79,129 +86,129 @@ static const enum argvflag argvflags[MDOC_ARG_MAX] = { ARGV_NONE /* MDOC_Symbolic */ }; -static const int argflags[MDOC_MAX] = { - 0, /* Ap */ - 0, /* Dd */ - 0, /* Dt */ - 0, /* Os */ - 0, /* Sh */ - 0, /* Ss */ - 0, /* Pp */ - ARGS_DELIM, /* D1 */ - ARGS_DELIM, /* Dl */ - 0, /* Bd */ - 0, /* Ed */ - 0, /* Bl */ - 0, /* El */ - 0, /* It */ - ARGS_DELIM, /* Ad */ - ARGS_DELIM, /* An */ - ARGS_DELIM, /* Ar */ - 0, /* Cd */ - ARGS_DELIM, /* Cm */ - ARGS_DELIM, /* Dv */ - ARGS_DELIM, /* Er */ - ARGS_DELIM, /* Ev */ - 0, /* Ex */ - ARGS_DELIM, /* Fa */ - 0, /* Fd */ - ARGS_DELIM, /* Fl */ - ARGS_DELIM, /* Fn */ - ARGS_DELIM, /* Ft */ - ARGS_DELIM, /* Ic */ - 0, /* In */ - ARGS_DELIM, /* Li */ - 0, /* Nd */ - ARGS_DELIM, /* Nm */ - ARGS_DELIM, /* Op */ - 0, /* Ot */ - ARGS_DELIM, /* Pa */ - 0, /* Rv */ - ARGS_DELIM, /* St */ - ARGS_DELIM, /* Va */ - ARGS_DELIM, /* Vt */ - ARGS_DELIM, /* Xr */ - 0, /* %A */ - 0, /* %B */ - 0, /* %D */ - 0, /* %I */ - 0, /* %J */ - 0, /* %N */ - 0, /* %O */ - 0, /* %P */ - 0, /* %R */ - 0, /* %T */ - 0, /* %V */ - ARGS_DELIM, /* Ac */ - 0, /* Ao */ - ARGS_DELIM, /* Aq */ - ARGS_DELIM, /* At */ - ARGS_DELIM, /* Bc */ - 0, /* Bf */ - 0, /* Bo */ - ARGS_DELIM, /* Bq */ - ARGS_DELIM, /* Bsx */ - ARGS_DELIM, /* Bx */ - 0, /* Db */ - ARGS_DELIM, /* Dc */ - 0, /* Do */ - ARGS_DELIM, /* Dq */ - ARGS_DELIM, /* Ec */ - 0, /* Ef */ - ARGS_DELIM, /* Em */ - 0, /* Eo */ - ARGS_DELIM, /* Fx */ - ARGS_DELIM, /* Ms */ - ARGS_DELIM, /* No */ - ARGS_DELIM, /* Ns */ - ARGS_DELIM, /* Nx */ - ARGS_DELIM, /* Ox */ - ARGS_DELIM, /* Pc */ - ARGS_DELIM, /* Pf */ - 0, /* Po */ - ARGS_DELIM, /* Pq */ - ARGS_DELIM, /* Qc */ - ARGS_DELIM, /* Ql */ - 0, /* Qo */ - ARGS_DELIM, /* Qq */ - 0, /* Re */ - 0, /* Rs */ - ARGS_DELIM, /* Sc */ - 0, /* So */ - ARGS_DELIM, /* Sq */ - 0, /* Sm */ - ARGS_DELIM, /* Sx */ - ARGS_DELIM, /* Sy */ - ARGS_DELIM, /* Tn */ - ARGS_DELIM, /* Ux */ - ARGS_DELIM, /* Xc */ - 0, /* Xo */ - 0, /* Fo */ - 0, /* Fc */ - 0, /* Oo */ - ARGS_DELIM, /* Oc */ - 0, /* Bk */ - 0, /* Ek */ - 0, /* Bt */ - 0, /* Hf */ - 0, /* Fr */ - 0, /* Ud */ - 0, /* Lb */ - 0, /* Lp */ - ARGS_DELIM, /* Lk */ - ARGS_DELIM, /* Mt */ - ARGS_DELIM, /* Brq */ - 0, /* Bro */ - ARGS_DELIM, /* Brc */ - 0, /* %C */ - 0, /* Es */ - 0, /* En */ - 0, /* Dx */ - 0, /* %Q */ - 0, /* br */ - 0, /* sp */ - 0, /* %U */ - 0, /* Ta */ +static const enum argsflag argflags[MDOC_MAX] = { + ARGSFL_NONE, /* Ap */ + ARGSFL_NONE, /* Dd */ + ARGSFL_NONE, /* Dt */ + ARGSFL_NONE, /* Os */ + ARGSFL_NONE, /* Sh */ + ARGSFL_NONE, /* Ss */ + ARGSFL_NONE, /* Pp */ + ARGSFL_DELIM, /* D1 */ + ARGSFL_DELIM, /* Dl */ + ARGSFL_NONE, /* Bd */ + ARGSFL_NONE, /* Ed */ + ARGSFL_NONE, /* Bl */ + ARGSFL_NONE, /* El */ + ARGSFL_NONE, /* It */ + ARGSFL_DELIM, /* Ad */ + ARGSFL_DELIM, /* An */ + ARGSFL_DELIM, /* Ar */ + ARGSFL_NONE, /* Cd */ + ARGSFL_DELIM, /* Cm */ + ARGSFL_DELIM, /* Dv */ + ARGSFL_DELIM, /* Er */ + ARGSFL_DELIM, /* Ev */ + ARGSFL_NONE, /* Ex */ + ARGSFL_DELIM, /* Fa */ + ARGSFL_NONE, /* Fd */ + ARGSFL_DELIM, /* Fl */ + ARGSFL_DELIM, /* Fn */ + ARGSFL_DELIM, /* Ft */ + ARGSFL_DELIM, /* Ic */ + ARGSFL_NONE, /* In */ + ARGSFL_DELIM, /* Li */ + ARGSFL_NONE, /* Nd */ + ARGSFL_DELIM, /* Nm */ + ARGSFL_DELIM, /* Op */ + ARGSFL_NONE, /* Ot */ + ARGSFL_DELIM, /* Pa */ + ARGSFL_NONE, /* Rv */ + ARGSFL_DELIM, /* St */ + ARGSFL_DELIM, /* Va */ + ARGSFL_DELIM, /* Vt */ + ARGSFL_DELIM, /* Xr */ + ARGSFL_NONE, /* %A */ + ARGSFL_NONE, /* %B */ + ARGSFL_NONE, /* %D */ + ARGSFL_NONE, /* %I */ + ARGSFL_NONE, /* %J */ + ARGSFL_NONE, /* %N */ + ARGSFL_NONE, /* %O */ + ARGSFL_NONE, /* %P */ + ARGSFL_NONE, /* %R */ + ARGSFL_NONE, /* %T */ + ARGSFL_NONE, /* %V */ + ARGSFL_DELIM, /* Ac */ + ARGSFL_NONE, /* Ao */ + ARGSFL_DELIM, /* Aq */ + ARGSFL_DELIM, /* At */ + ARGSFL_DELIM, /* Bc */ + ARGSFL_NONE, /* Bf */ + ARGSFL_NONE, /* Bo */ + ARGSFL_DELIM, /* Bq */ + ARGSFL_DELIM, /* Bsx */ + ARGSFL_DELIM, /* Bx */ + ARGSFL_NONE, /* Db */ + ARGSFL_DELIM, /* Dc */ + ARGSFL_NONE, /* Do */ + ARGSFL_DELIM, /* Dq */ + ARGSFL_DELIM, /* Ec */ + ARGSFL_NONE, /* Ef */ + ARGSFL_DELIM, /* Em */ + ARGSFL_NONE, /* Eo */ + ARGSFL_DELIM, /* Fx */ + ARGSFL_DELIM, /* Ms */ + ARGSFL_DELIM, /* No */ + ARGSFL_DELIM, /* Ns */ + ARGSFL_DELIM, /* Nx */ + ARGSFL_DELIM, /* Ox */ + ARGSFL_DELIM, /* Pc */ + ARGSFL_DELIM, /* Pf */ + ARGSFL_NONE, /* Po */ + ARGSFL_DELIM, /* Pq */ + ARGSFL_DELIM, /* Qc */ + ARGSFL_DELIM, /* Ql */ + ARGSFL_NONE, /* Qo */ + ARGSFL_DELIM, /* Qq */ + ARGSFL_NONE, /* Re */ + ARGSFL_NONE, /* Rs */ + ARGSFL_DELIM, /* Sc */ + ARGSFL_NONE, /* So */ + ARGSFL_DELIM, /* Sq */ + ARGSFL_NONE, /* Sm */ + ARGSFL_DELIM, /* Sx */ + ARGSFL_DELIM, /* Sy */ + ARGSFL_DELIM, /* Tn */ + ARGSFL_DELIM, /* Ux */ + ARGSFL_DELIM, /* Xc */ + ARGSFL_NONE, /* Xo */ + ARGSFL_NONE, /* Fo */ + ARGSFL_NONE, /* Fc */ + ARGSFL_NONE, /* Oo */ + ARGSFL_DELIM, /* Oc */ + ARGSFL_NONE, /* Bk */ + ARGSFL_NONE, /* Ek */ + ARGSFL_NONE, /* Bt */ + ARGSFL_NONE, /* Hf */ + ARGSFL_NONE, /* Fr */ + ARGSFL_NONE, /* Ud */ + ARGSFL_NONE, /* Lb */ + ARGSFL_NONE, /* Lp */ + ARGSFL_DELIM, /* Lk */ + ARGSFL_DELIM, /* Mt */ + ARGSFL_DELIM, /* Brq */ + ARGSFL_NONE, /* Bro */ + ARGSFL_DELIM, /* Brc */ + ARGSFL_NONE, /* %C */ + ARGSFL_NONE, /* Es */ + ARGSFL_NONE, /* En */ + ARGSFL_NONE, /* Dx */ + ARGSFL_NONE, /* %Q */ + ARGSFL_NONE, /* br */ + ARGSFL_NONE, /* sp */ + ARGSFL_NONE, /* %U */ + ARGSFL_NONE, /* Ta */ }; static const enum mdocargt args_Ex[] = { @@ -371,18 +378,17 @@ argn_free(struct mdoc_arg *p, int iarg) } enum margserr -mdoc_zargs(struct mdoc *m, int line, int *pos, - char *buf, int flags, char **v) +mdoc_zargs(struct mdoc *m, int line, int *pos, char *buf, char **v) { - return(args(m, line, pos, buf, flags, v)); + return(args(m, line, pos, buf, ARGSFL_NONE, v)); } enum margserr mdoc_args(struct mdoc *m, int line, int *pos, char *buf, enum mdoct tok, char **v) { - int fl; + enum argsflag fl; struct mdoc_node *n; fl = argflags[tok]; @@ -399,39 +405,21 @@ mdoc_args(struct mdoc *m, int line, int *pos, for (n = m->last; n; n = n->parent) if (MDOC_Bl == n->tok) - break; - - if (n && LIST_column == n->norm->Bl.type) { - fl |= ARGS_TABSEP; - fl &= ~ARGS_DELIM; - } + if (LIST_column == n->norm->Bl.type) { + fl = ARGSFL_TABSEP; + break; + } return(args(m, line, pos, buf, fl, v)); } static enum margserr args(struct mdoc *m, int line, int *pos, - char *buf, int fl, char **v) + char *buf, enum argsflag fl, char **v) { - int i; char *p, *pp; enum margserr rc; - /* - * Parse out the terms (like `val' in `.Xx -arg val' or simply - * `.Xx val'), which can have all sorts of properties: - * - * ARGS_DELIM: use special handling if encountering trailing - * delimiters in the form of [[::delim::][ ]+]+. - * - * ARGS_NOWARN: don't post warnings. This is only used when - * re-parsing delimiters, as the warnings have already been - * posted. - * - * ARGS_TABSEP: use special handling for tab/`Ta' separated - * phrases like in `Bl -column'. - */ - assert(' ' != buf[*pos]); if ('\0' == buf[*pos]) { @@ -451,15 +439,9 @@ args(struct mdoc *m, int line, int *pos, *v = &buf[*pos]; - if (ARGS_DELIM & fl && args_checkpunct(&buf[*pos])) { - i = strlen(&buf[*pos]) + *pos; - if (i && ' ' != buf[i - 1]) + if (ARGSFL_DELIM == fl) + if (args_checkpunct(buf, *pos)) return(ARGS_PUNCT); - if (ARGS_NOWARN & fl) - return(ARGS_PUNCT); - mdoc_pmsg(m, line, *pos, MANDOCERR_EOLNSPACE); - return(ARGS_PUNCT); - } /* * First handle TABSEP items, restricted to `Bl -column'. This @@ -468,7 +450,7 @@ args(struct mdoc *m, int line, int *pos, * for arguments at a later phase. */ - if (ARGS_TABSEP & fl) { + if (ARGSFL_TABSEP == fl) { /* Scan ahead to tab (can't be escaped). */ p = strchr(*v, '\t'); pp = NULL; @@ -507,7 +489,7 @@ args(struct mdoc *m, int line, int *pos, } /* Whitespace check for eoln case... */ - if ('\0' == *p && ' ' == *(p - 1) && ! (ARGS_NOWARN & fl)) + if ('\0' == *p && ' ' == *(p - 1)) mdoc_pmsg(m, line, *pos, MANDOCERR_EOLNSPACE); *pos += (int)(p - *v); @@ -550,7 +532,7 @@ args(struct mdoc *m, int line, int *pos, } if ('\0' == buf[*pos]) { - if (ARGS_NOWARN & fl || MDOC_PPHRASE & m->flags) + if (MDOC_PPHRASE & m->flags) return(ARGS_QWORD); mdoc_pmsg(m, line, *pos, MANDOCERR_BADQUOTE); return(ARGS_QWORD); @@ -565,31 +547,14 @@ args(struct mdoc *m, int line, int *pos, while (' ' == buf[*pos]) (*pos)++; - if (0 == buf[*pos] && ! (ARGS_NOWARN & fl)) + if ('\0' == buf[*pos]) mdoc_pmsg(m, line, *pos, MANDOCERR_EOLNSPACE); return(ARGS_QWORD); } - /* - * A non-quoted term progresses until either the end of line or - * a non-escaped whitespace. - */ - - for ( ; buf[*pos]; (*pos)++) - if (*pos && ' ' == buf[*pos] && '\\' != buf[*pos - 1]) - break; - - if ('\0' == buf[*pos]) - return(ARGS_WORD); - - buf[(*pos)++] = '\0'; - - while (' ' == buf[*pos]) - (*pos)++; - - if ('\0' == buf[*pos] && ! (ARGS_NOWARN & fl)) - mdoc_pmsg(m, line, *pos, MANDOCERR_EOLNSPACE); + p = &buf[*pos]; + *v = mandoc_getarg(m->parse, &p, line, pos); return(ARGS_WORD); } @@ -601,49 +566,47 @@ args(struct mdoc *m, int line, int *pos, * whitespace may separate these tokens. */ static int -args_checkpunct(const char *p) +args_checkpunct(const char *buf, int i) { - int i, j; - char buf[DELIMSZ]; + int j; + char dbuf[DELIMSZ]; enum mdelim d; - i = 0; - /* First token must be a close-delimiter. */ - for (j = 0; p[i] && ' ' != p[i] && j < DELIMSZ; j++, i++) - buf[j] = p[i]; + for (j = 0; buf[i] && ' ' != buf[i] && j < DELIMSZ; j++, i++) + dbuf[j] = buf[i]; if (DELIMSZ == j) return(0); - buf[j] = '\0'; - if (DELIM_CLOSE != mdoc_isdelim(buf)) + dbuf[j] = '\0'; + if (DELIM_CLOSE != mdoc_isdelim(dbuf)) return(0); - while (' ' == p[i]) + while (' ' == buf[i]) i++; /* Remaining must NOT be open/none. */ - while (p[i]) { + while (buf[i]) { j = 0; - while (p[i] && ' ' != p[i] && j < DELIMSZ) - buf[j++] = p[i++]; + while (buf[i] && ' ' != buf[i] && j < DELIMSZ) + dbuf[j++] = buf[i++]; if (DELIMSZ == j) return(0); - buf[j] = '\0'; - d = mdoc_isdelim(buf); + dbuf[j] = '\0'; + d = mdoc_isdelim(dbuf); if (DELIM_NONE == d || DELIM_OPEN == d) return(0); - while (' ' == p[i]) + while (' ' == buf[i]) i++; } - return('\0' == p[i]); + return('\0' == buf[i]); } /* @@ -654,40 +617,40 @@ args_checkpunct(const char *p) static enum mdocargt argv_a2arg(enum mdoct tok, const char *p) { - const enum mdocargt *args; + const enum mdocargt *argsp; - args = NULL; + argsp = NULL; switch (tok) { case (MDOC_An): - args = args_An; + argsp = args_An; break; case (MDOC_Bd): - args = args_Bd; + argsp = args_Bd; break; case (MDOC_Bf): - args = args_Bf; + argsp = args_Bf; break; case (MDOC_Bk): - args = args_Bk; + argsp = args_Bk; break; case (MDOC_Bl): - args = args_Bl; + argsp = args_Bl; break; case (MDOC_Rv): /* FALLTHROUGH */ case (MDOC_Ex): - args = args_Ex; + argsp = args_Ex; break; default: return(MDOC_ARG_MAX); } - assert(args); + assert(argsp); - for ( ; MDOC_ARG_MAX != *args ; args++) - if (0 == strcmp(p, mdoc_argnames[*args])) - return(*args); + for ( ; MDOC_ARG_MAX != *argsp ; argsp++) + if (0 == strcmp(p, mdoc_argnames[*argsp])) + return(*argsp); return(MDOC_ARG_MAX); } @@ -702,7 +665,7 @@ argv_multi(struct mdoc *m, int line, for (v->sz = 0; ; v->sz++) { if ('-' == buf[*pos]) break; - ac = args(m, line, pos, buf, 0, &p); + ac = args(m, line, pos, buf, ARGSFL_NONE, &p); if (ARGS_ERROR == ac) return(0); else if (ARGS_EOLN == ac) @@ -728,7 +691,7 @@ argv_opt_single(struct mdoc *m, int line, if ('-' == buf[*pos]) return(1); - ac = args(m, line, pos, buf, 0, &p); + ac = args(m, line, pos, buf, ARGSFL_NONE, &p); if (ARGS_ERROR == ac) return(0); if (ARGS_EOLN == ac) @@ -754,7 +717,7 @@ argv_single(struct mdoc *m, int line, ppos = *pos; - ac = args(m, line, pos, buf, 0, &p); + ac = args(m, line, pos, buf, ARGSFL_NONE, &p); if (ARGS_EOLN == ac) { mdoc_pmsg(m, line, ppos, MANDOCERR_SYNTARGVCOUNT); return(0); diff --git a/usr.bin/mandoc/mdoc_html.c b/usr.bin/mandoc/mdoc_html.c index 47112e20804..2bbf5f6fe75 100644 --- a/usr.bin/mandoc/mdoc_html.c +++ b/usr.bin/mandoc/mdoc_html.c @@ -1,4 +1,4 @@ -/* $Id: mdoc_html.c,v 1.56 2011/04/24 16:22:02 schwarze Exp $ */ +/* $Id: mdoc_html.c,v 1.57 2011/05/29 21:22:18 schwarze Exp $ */ /* * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> * @@ -284,7 +284,7 @@ a2width(const char *p, struct roffsu *su) if ( ! a2roffsu(p, su, SCALE_MAX)) { su->unit = SCALE_BU; - su->scale = (int)strlen(p); + su->scale = html_strlen(p); } } @@ -351,7 +351,7 @@ a2offs(const char *p, struct roffsu *su) SCALE_HS_INIT(su, INDENT * 2); else if ( ! a2roffsu(p, su, SCALE_MAX)) { su->unit = SCALE_BU; - su->scale = (int)strlen(p); + su->scale = html_strlen(p); } } @@ -378,13 +378,10 @@ print_mdoc_head(MDOC_ARGS) print_gen_head(h); bufinit(h); - buffmt(h, "%s(%s)", m->title, m->msec); + bufcat_fmt(h, "%s(%s)", m->title, m->msec); - if (m->arch) { - bufcat(h, " ("); - bufcat(h, m->arch); - bufcat(h, ")"); - } + if (m->arch) + bufcat_fmt(h, " (%s)", m->arch); print_otag(h, TAG_TITLE, 0, NULL); print_text(h, h->buf); @@ -411,7 +408,6 @@ print_mdoc_node(MDOC_ARGS) child = 1; t = h->tags.head; - bufinit(h); switch (n->type) { case (MDOC_ROOT): child = mdoc_root_pre(m, n, h); @@ -480,7 +476,6 @@ print_mdoc_node(MDOC_ARGS) print_stagq(h, t); - bufinit(h); switch (n->type) { case (MDOC_ROOT): mdoc_root_post(m, n, h); @@ -602,7 +597,6 @@ static int mdoc_sh_pre(MDOC_ARGS) { struct htmlpair tag; - char buf[BUFSIZ]; if (MDOC_BLOCK == n->type) { PAIR_CLASS_INIT(&tag, "section"); @@ -611,14 +605,14 @@ mdoc_sh_pre(MDOC_ARGS) } else if (MDOC_BODY == n->type) return(1); - buf[0] = '\0'; + bufinit(h); for (n = n->child; n; n = n->next) { - html_idcat(buf, n->string, BUFSIZ); + bufcat_id(h, n->string); if (n->next) - html_idcat(buf, " ", BUFSIZ); + bufcat_id(h, " "); } - PAIR_ID_INIT(&tag, buf); + PAIR_ID_INIT(&tag, h->buf); print_otag(h, TAG_H1, 1, &tag); return(1); } @@ -629,7 +623,6 @@ static int mdoc_ss_pre(MDOC_ARGS) { struct htmlpair tag; - char buf[BUFSIZ]; if (MDOC_BLOCK == n->type) { PAIR_CLASS_INIT(&tag, "subsection"); @@ -638,14 +631,14 @@ mdoc_ss_pre(MDOC_ARGS) } else if (MDOC_BODY == n->type) return(1); - buf[0] = '\0'; + bufinit(h); for (n = n->child; n; n = n->next) { - html_idcat(buf, n->string, BUFSIZ); + bufcat_id(h, n->string); if (n->next) - html_idcat(buf, " ", BUFSIZ); + bufcat_id(h, " "); } - PAIR_ID_INIT(&tag, buf); + PAIR_ID_INIT(&tag, h->buf); print_otag(h, TAG_H2, 1, &tag); return(1); } @@ -699,7 +692,7 @@ mdoc_nm_pre(MDOC_ARGS) { struct htmlpair tag; struct roffsu su; - size_t len; + int len; switch (n->type) { case (MDOC_ELEM): @@ -727,12 +720,13 @@ mdoc_nm_pre(MDOC_ARGS) for (len = 0, n = n->child; n; n = n->next) if (MDOC_TEXT == n->type) - len += strlen(n->string); + len += html_strlen(n->string); if (0 == len && m->name) - len = strlen(m->name); + len = html_strlen(m->name); SCALE_HS_INIT(&su, (double)len); + bufinit(h); bufcat_su(h, "width", &su); PAIR_STYLE_INIT(&tag, h); print_otag(h, TAG_COL, 1, &tag); @@ -895,6 +889,8 @@ mdoc_it_pre(MDOC_ARGS) assert(lists[type]); PAIR_CLASS_INIT(&tag[0], lists[type]); + bufinit(h); + if (MDOC_HEAD == n->type) { switch (type) { case(LIST_bullet): @@ -995,6 +991,8 @@ mdoc_bl_pre(MDOC_ARGS) struct roffsu su; char buf[BUFSIZ]; + bufinit(h); + if (MDOC_BODY == n->type) { if (LIST_column == n->norm->Bl.type) print_otag(h, TAG_TBODY, 0, NULL); @@ -1014,7 +1012,6 @@ mdoc_bl_pre(MDOC_ARGS) for (i = 0; i < (int)n->norm->Bl.ncols; i++) { a2width(n->norm->Bl.cols[i], &su); - bufinit(h); if (i < (int)n->norm->Bl.ncols - 1) bufcat_su(h, "width", &su); else @@ -1143,6 +1140,7 @@ mdoc_d1_pre(MDOC_ARGS) return(1); SCALE_VS_INIT(&su, 0); + bufinit(h); bufcat_su(h, "margin-top", &su); bufcat_su(h, "margin-bottom", &su); PAIR_STYLE_INIT(&tag[0], h); @@ -1167,17 +1165,17 @@ static int mdoc_sx_pre(MDOC_ARGS) { struct htmlpair tag[2]; - char buf[BUFSIZ]; - strlcpy(buf, "#", BUFSIZ); + bufinit(h); + bufcat(h, "#x"); for (n = n->child; n; n = n->next) { - html_idcat(buf, n->string, BUFSIZ); + bufcat_id(h, n->string); if (n->next) - html_idcat(buf, " ", BUFSIZ); + bufcat_id(h, " "); } PAIR_CLASS_INIT(&tag[0], "link-sec"); - PAIR_HREF_INIT(&tag[1], buf); + PAIR_HREF_INIT(&tag[1], h->buf); print_otag(h, TAG_I, 1, tag); print_otag(h, TAG_A, 2, tag); @@ -1215,7 +1213,8 @@ mdoc_bd_pre(MDOC_ARGS) SCALE_HS_INIT(&su, 0); if (n->norm->Bd.offs) a2offs(n->norm->Bd.offs, &su); - + + bufinit(h); bufcat_su(h, "margin-left", &su); PAIR_STYLE_INIT(&tag[0], h); @@ -1434,7 +1433,6 @@ mdoc_fd_pre(MDOC_ARGS) buf[sz - 1] = '\0'; PAIR_CLASS_INIT(&tag[0], "link-includes"); - bufinit(h); i = 1; if (h->base_includes) { @@ -1556,8 +1554,8 @@ mdoc_fn_pre(MDOC_ARGS) print_text(h, "("); h->flags |= HTML_NOSPACE; - bufinit(h); PAIR_CLASS_INIT(&tag[0], "farg"); + bufinit(h); bufcat_style(h, "white-space", "nowrap"); PAIR_STYLE_INIT(&tag[1], h); @@ -1636,6 +1634,7 @@ mdoc_sp_pre(MDOC_ARGS) } else su.scale = 0; + bufinit(h); bufcat_su(h, "height", &su); PAIR_STYLE_INIT(&tag, h); print_otag(h, TAG_DIV, 1, &tag); @@ -1772,10 +1771,8 @@ mdoc_in_pre(MDOC_ARGS) assert(MDOC_TEXT == n->type); PAIR_CLASS_INIT(&tag[0], "link-includes"); - bufinit(h); i = 1; - if (h->base_includes) { buffmt_includes(h, n->string); PAIR_HREF_INIT(&tag[i], h->buf); @@ -1914,6 +1911,7 @@ mdoc_bf_pre(MDOC_ARGS) * We want this to be inline-formatted, but needs to be div to * accept block children. */ + bufinit(h); bufcat_style(h, "display", "inline"); SCALE_HS_INIT(&su, 1); /* Needs a left-margin for spacing. */ diff --git a/usr.bin/mandoc/mdoc_macro.c b/usr.bin/mandoc/mdoc_macro.c index e29fb2610d6..bc22ce7330a 100644 --- a/usr.bin/mandoc/mdoc_macro.c +++ b/usr.bin/mandoc/mdoc_macro.c @@ -1,4 +1,4 @@ -/* $Id: mdoc_macro.c,v 1.67 2011/04/24 16:49:10 schwarze Exp $ */ +/* $Id: mdoc_macro.c,v 1.68 2011/05/29 21:22:18 schwarze Exp $ */ /* * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> * Copyright (c) 2010 Ingo Schwarze <schwarze@openbsd.org> @@ -615,7 +615,7 @@ append_delims(struct mdoc *m, int line, int *pos, char *buf) for (;;) { la = *pos; - ac = mdoc_zargs(m, line, pos, buf, ARGS_NOWARN, &p); + ac = mdoc_zargs(m, line, pos, buf, &p); if (ARGS_ERROR == ac) return(0); @@ -628,12 +628,12 @@ append_delims(struct mdoc *m, int line, int *pos, char *buf) * If we encounter end-of-sentence symbols, then trigger * the double-space. * - * XXX: it's easy to allow this to propogate outward to + * XXX: it's easy to allow this to propagate outward to * the last symbol, such that `. )' will cause the * correct double-spacing. However, (1) groff isn't * smart enough to do this and (2) it would require * knowing which symbols break this behaviour, for - * example, `. ;' shouldn't propogate the double-space. + * example, `. ;' shouldn't propagate the double-space. */ if (mandoc_eos(p, strlen(p), 0)) m->last->flags |= MDOC_EOS; @@ -992,7 +992,7 @@ blk_full(MACRO_PROT_ARGS) } /* - * This routine accomodates implicitly- and explicitly-scoped + * This routine accommodates implicitly- and explicitly-scoped * macro openings. Implicit ones first close out prior scope * (seen above). Delay opening the head until necessary to * allow leading punctuation to print. Special consideration @@ -1289,7 +1289,7 @@ blk_part_imp(MACRO_PROT_ARGS) if (mandoc_eos(n->string, strlen(n->string), 1)) n->flags |= MDOC_EOS; - /* Up-propogate the end-of-space flag. */ + /* Up-propagate the end-of-space flag. */ if (n && (MDOC_EOS & n->flags)) { body->flags |= MDOC_EOS; @@ -1711,7 +1711,7 @@ phrase(struct mdoc *m, int line, int ppos, char *buf) for (pos = ppos; ; ) { la = pos; - ac = mdoc_zargs(m, line, &pos, buf, 0, &p); + ac = mdoc_zargs(m, line, &pos, buf, &p); if (ARGS_ERROR == ac) return(0); @@ -1756,7 +1756,7 @@ phrase_ta(MACRO_PROT_ARGS) for (;;) { la = *pos; - ac = mdoc_zargs(m, line, pos, buf, 0, &p); + ac = mdoc_zargs(m, line, pos, buf, &p); if (ARGS_ERROR == ac) return(0); diff --git a/usr.bin/mandoc/mdoc_term.c b/usr.bin/mandoc/mdoc_term.c index a992ed09cf8..24ca2a3e485 100644 --- a/usr.bin/mandoc/mdoc_term.c +++ b/usr.bin/mandoc/mdoc_term.c @@ -1,4 +1,4 @@ -/* $Id: mdoc_term.c,v 1.132 2011/04/24 16:22:02 schwarze Exp $ */ +/* $Id: mdoc_term.c,v 1.133 2011/05/29 21:22:18 schwarze Exp $ */ /* * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> * Copyright (c) 2010 Ingo Schwarze <schwarze@openbsd.org> @@ -260,14 +260,7 @@ terminal_mdoc(void *arg, const struct mdoc *mdoc) p->tabwidth = term_len(p, 5); if (NULL == p->symtab) - switch (p->enc) { - case (TERMENC_ASCII): - p->symtab = chars_init(CHARS_ASCII); - break; - default: - abort(); - /* NOTREACHED */ - } + p->symtab = mchars_alloc(); n = mdoc_node(mdoc); m = mdoc_meta(mdoc); diff --git a/usr.bin/mandoc/mdoc_validate.c b/usr.bin/mandoc/mdoc_validate.c index b915a49b24a..1c08e614a80 100644 --- a/usr.bin/mandoc/mdoc_validate.c +++ b/usr.bin/mandoc/mdoc_validate.c @@ -1,4 +1,4 @@ -/* $Id: mdoc_validate.c,v 1.92 2011/04/24 16:22:02 schwarze Exp $ */ +/* $Id: mdoc_validate.c,v 1.93 2011/05/29 21:22:18 schwarze Exp $ */ /* * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> * Copyright (c) 2010, 2011 Ingo Schwarze <schwarze@openbsd.org> @@ -176,7 +176,7 @@ static v_pre pres_sh[] = { pre_sh, NULL }; static v_pre pres_ss[] = { pre_ss, NULL }; static v_pre pres_std[] = { pre_std, NULL }; -static const struct valids mdoc_valids[MDOC_MAX] = { +static const struct valids mdoc_valids[MDOC_MAX] = { { NULL, NULL }, /* Ap */ { pres_dd, posts_dd }, /* Dd */ { pres_dt, posts_dt }, /* Dt */ @@ -541,31 +541,39 @@ check_argv(struct mdoc *m, struct mdoc_node *n, struct mdoc_argv *v) static void check_text(struct mdoc *m, int ln, int pos, char *p) { - int c; + char *cpp, *pp; size_t sz; - for ( ; *p; p++, pos++) { + while ('\0' != *p) { sz = strcspn(p, "\t\\"); - p += (int)sz; - - if ('\0' == *p) - break; + p += (int)sz; pos += (int)sz; if ('\t' == *p) { if ( ! (MDOC_LITERAL & m->flags)) mdoc_pmsg(m, ln, pos, MANDOCERR_BADTAB); + p++; + pos++; continue; - } + } else if ('\0' == *p) + break; + + pos++; + pp = ++p; - if (0 == (c = mandoc_special(p))) { + if (ESCAPE_ERROR == mandoc_escape + ((const char **)&pp, NULL, NULL)) { mdoc_pmsg(m, ln, pos, MANDOCERR_BADESCAPE); - continue; + break; } - p += c - 1; - pos += c - 1; + cpp = p; + while (NULL != (cpp = memchr(cpp, ASCII_HYPH, pp - cpp))) + *cpp = '-'; + + pos += pp - p; + p = pp; } } @@ -1523,7 +1531,7 @@ post_bl_head(POST_ARGS) assert(0 == np->args->argv[j].sz); /* - * Accomodate for new-style groff column syntax. Shuffle the + * Accommodate for new-style groff column syntax. Shuffle the * child nodes, all of which must be TEXT, as arguments for the * column field. Then, delete the head children. */ diff --git a/usr.bin/mandoc/out.c b/usr.bin/mandoc/out.c index 48e3b3c5c8b..eaebdc4a697 100644 --- a/usr.bin/mandoc/out.c +++ b/usr.bin/mandoc/out.c @@ -1,4 +1,4 @@ -/* $Id: out.c,v 1.13 2011/04/21 22:59:54 schwarze Exp $ */ +/* $Id: out.c,v 1.14 2011/05/29 21:22:18 schwarze Exp $ */ /* * Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> * Copyright (c) 2011 Ingo Schwarze <schwarze@openbsd.org> @@ -170,243 +170,6 @@ time2a(time_t t, char *dst, size_t sz) (void)strftime(p, sz, "%Y", &tm); } - -int -a2roffdeco(enum roffdeco *d, const char **word, size_t *sz) -{ - int i, j, lim; - char term, c; - const char *wp; - enum roffdeco dd; - - *d = DECO_NONE; - lim = i = 0; - term = '\0'; - wp = *word; - - switch ((c = wp[i++])) { - case ('('): - *d = DECO_SPECIAL; - lim = 2; - break; - case ('F'): - /* FALLTHROUGH */ - case ('f'): - *d = 'F' == c ? DECO_FFONT : DECO_FONT; - - switch (wp[i++]) { - case ('('): - lim = 2; - break; - case ('['): - term = ']'; - break; - case ('3'): - /* FALLTHROUGH */ - case ('B'): - *d = DECO_BOLD; - return(i); - case ('2'): - /* FALLTHROUGH */ - case ('I'): - *d = DECO_ITALIC; - return(i); - case ('P'): - *d = DECO_PREVIOUS; - return(i); - case ('1'): - /* FALLTHROUGH */ - case ('R'): - *d = DECO_ROMAN; - return(i); - default: - i--; - lim = 1; - break; - } - break; - case ('k'): - /* FALLTHROUGH */ - case ('M'): - /* FALLTHROUGH */ - case ('m'): - /* FALLTHROUGH */ - case ('*'): - if ('*' == c) - *d = DECO_RESERVED; - - switch (wp[i++]) { - case ('('): - lim = 2; - break; - case ('['): - term = ']'; - break; - default: - i--; - lim = 1; - break; - } - break; - - case ('N'): - - /* - * Sequence of characters: backslash, 'N' (i = 0), - * starting delimiter (i = 1), character number (i = 2). - */ - - *word = wp + 2; - *sz = 0; - - /* - * Cannot use a digit as a starting delimiter; - * but skip the digit anyway. - */ - - if (isdigit((int)wp[1])) - return(2); - - /* - * Any non-digit terminates the character number. - * That is, the terminating delimiter need not - * match the starting delimiter. - */ - - for (i = 2; isdigit((int)wp[i]); i++) - (*sz)++; - - /* - * This is only a numbered character - * if the character number has at least one digit. - */ - - if (*sz) - *d = DECO_NUMBERED; - - /* - * Skip the terminating delimiter, even if it does not - * match, and even if there is no character number. - */ - - return(++i); - - case ('h'): - /* FALLTHROUGH */ - case ('v'): - /* FALLTHROUGH */ - case ('s'): - j = 0; - if ('+' == wp[i] || '-' == wp[i]) { - i++; - j = 1; - } - - switch (wp[i++]) { - case ('('): - lim = 2; - break; - case ('['): - term = ']'; - break; - case ('\''): - term = '\''; - break; - case ('0'): - j = 1; - /* FALLTHROUGH */ - default: - i--; - lim = 1; - break; - } - - if ('+' == wp[i] || '-' == wp[i]) { - if (j) - return(i); - i++; - } - - /* Handle embedded numerical subexp or escape. */ - - if ('(' == wp[i]) { - while (wp[i] && ')' != wp[i]) - if ('\\' == wp[i++]) { - /* Handle embedded escape. */ - *word = &wp[i]; - i += a2roffdeco(&dd, word, sz); - } - - if (')' == wp[i++]) - break; - - *d = DECO_NONE; - return(i - 1); - } else if ('\\' == wp[i]) { - *word = &wp[++i]; - i += a2roffdeco(&dd, word, sz); - } - - break; - case ('['): - *d = DECO_SPECIAL; - term = ']'; - break; - case ('c'): - *d = DECO_NOSPACE; - return(i); - case ('z'): - *d = DECO_NONE; - if ('\\' == wp[i]) { - *word = &wp[++i]; - return(i + a2roffdeco(&dd, word, sz)); - } else - lim = 1; - break; - case ('o'): - /* FALLTHROUGH */ - case ('w'): - if ('\'' == wp[i++]) { - term = '\''; - break; - } - /* FALLTHROUGH */ - default: - *d = DECO_SSPECIAL; - i--; - lim = 1; - break; - } - - assert(term || lim); - *word = &wp[i]; - - if (term) { - j = i; - while (wp[i] && wp[i] != term) - i++; - if ('\0' == wp[i]) { - *d = DECO_NONE; - return(i); - } - - assert(i >= j); - *sz = (size_t)(i - j); - - return(i + 1); - } - - assert(lim > 0); - *sz = (size_t)lim; - - for (j = 0; wp[i] && j < lim; j++) - i++; - if (j < lim) - *d = DECO_NONE; - - return(i); -} - /* * Calculate the abstract widths and decimal positions of columns in a * table. This routine allocates the columns structures then runs over diff --git a/usr.bin/mandoc/out.h b/usr.bin/mandoc/out.h index 0386a989d71..76f6bafe040 100644 --- a/usr.bin/mandoc/out.h +++ b/usr.bin/mandoc/out.h @@ -1,4 +1,4 @@ -/* $Id: out.h,v 1.10 2011/04/24 16:22:02 schwarze Exp $ */ +/* $Id: out.h,v 1.11 2011/05/29 21:22:18 schwarze Exp $ */ /* * Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> * @@ -31,27 +31,6 @@ enum roffscale { SCALE_MAX }; -enum roffdeco { - DECO_NONE, - DECO_NUMBERED, /* numbered character */ - DECO_SPECIAL, /* special character */ - DECO_SSPECIAL, /* single-char special */ - DECO_RESERVED, /* reserved word */ - DECO_BOLD, /* bold font */ - DECO_ITALIC, /* italic font */ - DECO_ROMAN, /* "normal" undecorated font */ - DECO_PREVIOUS, /* revert to previous font */ - DECO_NOSPACE, /* suppress spacing */ - DECO_FONT, /* font */ - DECO_FFONT, /* font family */ - DECO_MAX -}; - -enum chars { - CHARS_ASCII, /* 7-bit ascii representation */ - CHARS_HTML /* unicode values */ -}; - struct roffcol { size_t width; /* width of cell */ size_t decimal; /* decimal position in cell */ @@ -85,18 +64,9 @@ __BEGIN_DECLS while (/* CONSTCOND */ 0) int a2roffsu(const char *, struct roffsu *, enum roffscale); -int a2roffdeco(enum roffdeco *, const char **, size_t *); void time2a(time_t, char *, size_t); void tblcalc(struct rofftbl *tbl, const struct tbl_span *); -void *chars_init(enum chars); -const char *chars_num2char(const char *, size_t); -const char *chars_spec2str(void *, const char *, size_t, size_t *); -int chars_spec2cp(void *, const char *, size_t); -const char *chars_res2str(void *, const char *, size_t, size_t *); -int chars_res2cp(void *, const char *, size_t); -void chars_free(void *); - __END_DECLS #endif /*!OUT_H*/ diff --git a/usr.bin/mandoc/predefs.in b/usr.bin/mandoc/predefs.in new file mode 100644 index 00000000000..6713bff1976 --- /dev/null +++ b/usr.bin/mandoc/predefs.in @@ -0,0 +1,65 @@ +/* $Id: predefs.in,v 1.1 2011/05/29 21:22:18 schwarze Exp $ */ +/* + * Copyright (c) 2011 Kristaps Dzonsons <kristaps@bsd.lv> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +/* + * The predefined-string translation tables. Each corresponds to a + * predefined strings from (e.g.) tmac/mdoc/doc-nroff. The left-hand + * side corresponds to the input sequence (\*x, \*(xx and so on). The + * right-hand side is what's produced by libroff. + * + * XXX - C-escape strings! + * XXX - update PREDEF_MAX in roff.c if adding more! + */ + +PREDEF("Am", "&") +PREDEF("Ba", "|") +PREDEF("Ge", "\\(>=") +PREDEF("Gt", ">") +PREDEF("If", "\\(if") +PREDEF("Le", "\\(<=") +PREDEF("Lq", "\\(lq") +PREDEF("Lt", "<") +PREDEF("Na", "NaN") +PREDEF("Ne", "\\(!=") +PREDEF("Pi", "\\(*p") +PREDEF("Pm", "\\(+-") +PREDEF("Rq", "\\(rq") +PREDEF("left-bracket", "[") +PREDEF("left-parenthesis", "(") +PREDEF("lp", "(") +PREDEF("left-singlequote", "\\(oq") +PREDEF("q", "\\(dq") +PREDEF("quote-left", "\\(oq") +PREDEF("quote-right", "\\(cq") +PREDEF("R", "\\(rg") +PREDEF("right-bracket", "]") +PREDEF("right-parenthesis", ")") +PREDEF("rp", ")") +PREDEF("right-singlequote", "\\(cq") +PREDEF("Tm", "\\(tm") +PREDEF("Px", "POSIX") +PREDEF("Ai", "ANSI") +PREDEF("\'", "\\\'") +PREDEF("aa", "\\(aa") +PREDEF("ga", "\\(ga") +PREDEF("`", "\\`") +PREDEF("lq", "\\(lq") +PREDEF("rq", "\\(rq") +PREDEF("ua", "\\(ua") +PREDEF("va", "\\(va") +PREDEF("<=", "\\(<=") +PREDEF(">=", "\\(>=") diff --git a/usr.bin/mandoc/read.c b/usr.bin/mandoc/read.c index d3b837fe6d7..aa491c9fe34 100644 --- a/usr.bin/mandoc/read.c +++ b/usr.bin/mandoc/read.c @@ -1,4 +1,4 @@ -/* $Id: read.c,v 1.1 2011/04/24 16:22:02 schwarze Exp $ */ +/* $Id: read.c,v 1.2 2011/05/29 21:22:18 schwarze Exp $ */ /* * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> * Copyright (c) 2010, 2011 Ingo Schwarze <schwarze@openbsd.org> @@ -134,7 +134,7 @@ static const char * const mandocerrs[MANDOCERR_MAX] = { "tab in non-literal context", "end of line whitespace", "bad comment style", - "unknown escape sequence", + "bad escape sequence", "unterminated quoted string", "generic error", @@ -437,7 +437,7 @@ rerun: /* * If input parsers have not been allocated, do so now. - * We keep these instanced betwen parsers, but set them + * We keep these instanced between parsers, but set them * locally per parse routine since we can use different * parsers with each one. */ diff --git a/usr.bin/mandoc/roff.c b/usr.bin/mandoc/roff.c index 6cf1164ba45..39393ccadf0 100644 --- a/usr.bin/mandoc/roff.c +++ b/usr.bin/mandoc/roff.c @@ -1,4 +1,4 @@ -/* $Id: roff.c,v 1.36 2011/04/24 16:28:48 schwarze Exp $ */ +/* $Id: roff.c,v 1.37 2011/05/29 21:22:18 schwarze Exp $ */ /* * Copyright (c) 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> * Copyright (c) 2010, 2011 Ingo Schwarze <schwarze@openbsd.org> @@ -16,17 +16,15 @@ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ #include <assert.h> -#include <errno.h> #include <ctype.h> -#include <limits.h> #include <stdlib.h> #include <string.h> -#include <stdio.h> #include "mandoc.h" #include "libroff.h" #include "libmandoc.h" +/* Maximum number of nested if-else conditionals. */ #define RSTACK_MAX 128 enum rofft { @@ -59,7 +57,7 @@ enum rofft { ROFF_EQ, ROFF_EN, ROFF_cblock, - ROFF_ccond, /* FIXME: remove this. */ + ROFF_ccond, ROFF_USERDEF, ROFF_MAX }; @@ -123,6 +121,14 @@ struct roffmac { struct roffmac *next; }; +struct predef { + const char *name; /* predefined input name */ + const char *str; /* replacement symbol */ +}; + +#define PREDEF(__name, __str) \ + { (__name), (__str) }, + static enum rofferr roff_block(ROFF_ARGS); static enum rofferr roff_block_text(ROFF_ARGS); static enum rofferr roff_block_sub(ROFF_ARGS); @@ -140,7 +146,7 @@ static const char *roff_getstrn(const struct roff *, static enum rofferr roff_line_ignore(ROFF_ARGS); static enum rofferr roff_nr(ROFF_ARGS); static int roff_res(struct roff *, - char **, size_t *, int); + char **, size_t *, int, int); static enum rofferr roff_rm(ROFF_ARGS); static void roff_setstr(struct roff *, const char *, const char *, int); @@ -194,6 +200,12 @@ static struct roffmac roffs[ROFF_MAX] = { { NULL, roff_userdef, NULL, NULL, 0, NULL }, }; +/* Array of injected predefined strings. */ +#define PREDEFS_MAX 38 +static const struct predef predefs[PREDEFS_MAX] = { +#include "predefs.in" +}; + static void roff_free1(struct roff *); static enum rofft roff_hash_find(const char *, size_t); static void roff_hash_init(void); @@ -202,7 +214,6 @@ static void roffnode_push(struct roff *, enum rofft, const char *, int, int); static void roffnode_pop(struct roff *); static enum rofft roff_parse(struct roff *, const char *, int *); -static int roff_parse_nat(const char *, unsigned int *); /* See roff_hash_find() */ #define ROFF_HASH(p) (p[0] - ASCII_LO) @@ -228,7 +239,6 @@ roff_hash_init(void) } } - /* * Look up a roff token by its name. Returns ROFF_MAX if no macro by * the nil-terminated string name could be found. @@ -351,6 +361,7 @@ struct roff * roff_alloc(struct regset *regs, struct mparse *parse) { struct roff *r; + int i; r = mandoc_calloc(1, sizeof(struct roff)); r->regs = regs; @@ -358,6 +369,10 @@ roff_alloc(struct regset *regs, struct mparse *parse) r->rstackpos = -1; roff_hash_init(); + + for (i = 0; i < PREDEFS_MAX; i++) + roff_setstr(r, predefs[i].name, predefs[i].str, 0); + return(r); } @@ -368,7 +383,7 @@ roff_alloc(struct regset *regs, struct mparse *parse) * is processed. */ static int -roff_res(struct roff *r, char **bufp, size_t *szp, int pos) +roff_res(struct roff *r, char **bufp, size_t *szp, int ln, int pos) { const char *stesc; /* start of an escape sequence ('\\') */ const char *stnam; /* start of the name, after "[(*" */ @@ -435,8 +450,9 @@ roff_res(struct roff *r, char **bufp, size_t *szp, int pos) res = roff_getstrn(r, stnam, (size_t)i); if (NULL == res) { - cp -= maxl ? 1 : 0; - continue; + /* TODO: keep track of the correct position. */ + mandoc_msg(MANDOCERR_BADESCAPE, r->parse, ln, pos, NULL); + res = ""; } /* Replace the escape sequence by the string. */ @@ -472,7 +488,7 @@ roff_parseln(struct roff *r, int ln, char **bufp, * words to fill in. */ - if (r->first_string && ! roff_res(r, bufp, szp, pos)) + if (r->first_string && ! roff_res(r, bufp, szp, ln, pos)) return(ROFF_REPARSE); ppos = pos; @@ -589,27 +605,6 @@ roff_parse(struct roff *r, const char *buf, int *pos) return(t); } - -static int -roff_parse_nat(const char *buf, unsigned int *res) -{ - char *ep; - long lval; - - errno = 0; - lval = strtol(buf, &ep, 10); - if (buf[0] == '\0' || *ep != '\0') - return(0); - if ((errno == ERANGE && - (lval == LONG_MAX || lval == LONG_MIN)) || - (lval > INT_MAX || lval < 0)) - return(0); - - *res = (unsigned int)lval; - return(1); -} - - /* ARGSUSED */ static enum rofferr roff_cblock(ROFF_ARGS) @@ -861,21 +856,29 @@ roff_cond_sub(ROFF_ARGS) { enum rofft t; enum roffrule rr; + char *ep; rr = r->last->rule; + roffnode_cleanscope(r); - /* - * Clean out scope. If we've closed ourselves, then don't - * continue. + /* + * If the macro is unknown, first check if it contains a closing + * delimiter `\}'. If it does, close out our scope and return + * the currently-scoped rule (ignore or continue). Else, drop + * into the currently-scoped rule. */ - roffnode_cleanscope(r); - if (ROFF_MAX == (t = roff_parse(r, *bufp, &pos))) { - if ('\\' == (*bufp)[pos] && '}' == (*bufp)[pos + 1]) - return(roff_ccond - (r, ROFF_ccond, bufp, szp, - ln, pos, pos + 2, offs)); + ep = &(*bufp)[pos]; + for ( ; NULL != (ep = strchr(ep, '\\')); ep++) { + ep++; + if ('}' != *ep) + continue; + *ep = '&'; + roff_ccond(r, ROFF_ccond, bufp, szp, + ln, pos, pos + 2, offs); + break; + } return(ROFFRULE_DENY == rr ? ROFF_IGN : ROFF_CONT); } @@ -884,6 +887,7 @@ roff_cond_sub(ROFF_ARGS) * if they're either structurally required (such as loops and * conditionals) or a closing macro. */ + if (ROFFRULE_DENY == rr) if ( ! (ROFFMAC_STRUCT & roffs[t].flags)) if (ROFF_ccond != t) @@ -894,37 +898,28 @@ roff_cond_sub(ROFF_ARGS) ln, ppos, pos, offs)); } - /* ARGSUSED */ static enum rofferr roff_cond_text(ROFF_ARGS) { - char *ep, *st; + char *ep; enum roffrule rr; rr = r->last->rule; + roffnode_cleanscope(r); - /* - * We display the value of the text if out current evaluation - * scope permits us to do so. - */ - - /* FIXME: use roff_ccond? */ - - st = &(*bufp)[pos]; - if (NULL == (ep = strstr(st, "\\}"))) { - roffnode_cleanscope(r); - return(ROFFRULE_DENY == rr ? ROFF_IGN : ROFF_CONT); + ep = &(*bufp)[pos]; + for ( ; NULL != (ep = strchr(ep, '\\')); ep++) { + ep++; + if ('}' != *ep) + continue; + *ep = '&'; + roff_ccond(r, ROFF_ccond, bufp, szp, + ln, pos, pos + 2, offs); } - - if (ep == st || (ep > st && '\\' != *(ep - 1))) - roffnode_pop(r); - - roffnode_cleanscope(r); return(ROFFRULE_DENY == rr ? ROFF_IGN : ROFF_CONT); } - static enum roffrule roff_evalcond(const char *v, int *pos) { @@ -1086,6 +1081,7 @@ roff_nr(ROFF_ARGS) { const char *key; char *val; + int iv; struct reg *rg; val = *bufp + pos; @@ -1094,8 +1090,10 @@ roff_nr(ROFF_ARGS) if (0 == strcmp(key, "nS")) { rg[(int)REG_nS].set = 1; - if ( ! roff_parse_nat(val, &rg[(int)REG_nS].v.u)) - rg[(int)REG_nS].v.u = 0; + if ((iv = mandoc_strntou(val, strlen(val), 10)) >= 0) + rg[REG_nS].v.u = (unsigned)iv; + else + rg[(int)REG_nS].v.u = 0u; } return(ROFF_IGN); diff --git a/usr.bin/mandoc/tbl_layout.c b/usr.bin/mandoc/tbl_layout.c index 85efa0ee561..2d1989fa4e8 100644 --- a/usr.bin/mandoc/tbl_layout.c +++ b/usr.bin/mandoc/tbl_layout.c @@ -1,4 +1,4 @@ -/* $Id: tbl_layout.c,v 1.8 2011/04/24 16:22:02 schwarze Exp $ */ +/* $Id: tbl_layout.c,v 1.9 2011/05/29 21:22:18 schwarze Exp $ */ /* * Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> * @@ -68,6 +68,23 @@ mods(struct tbl_node *tbl, struct tbl_cell *cp, char buf[5]; int i; + /* Not all types accept modifiers. */ + + switch (cp->pos) { + case (TBL_CELL_DOWN): + /* FALLTHROUGH */ + case (TBL_CELL_HORIZ): + /* FALLTHROUGH */ + case (TBL_CELL_DHORIZ): + /* FALLTHROUGH */ + case (TBL_CELL_VERT): + /* FALLTHROUGH */ + case (TBL_CELL_DVERT): + return(1); + default: + break; + } + mod: /* * XXX: since, at least for now, modifiers are non-conflicting @@ -423,19 +440,19 @@ cell_alloc(struct tbl_node *tbl, struct tbl_row *rp, enum tbl_cellt pos) } static void -head_adjust(const struct tbl_cell *cell, struct tbl_head *head) +head_adjust(const struct tbl_cell *cellp, struct tbl_head *head) { - if (TBL_CELL_VERT != cell->pos && - TBL_CELL_DVERT != cell->pos) { + if (TBL_CELL_VERT != cellp->pos && + TBL_CELL_DVERT != cellp->pos) { head->pos = TBL_HEAD_DATA; return; } - if (TBL_CELL_VERT == cell->pos) + if (TBL_CELL_VERT == cellp->pos) if (TBL_HEAD_DVERT != head->pos) head->pos = TBL_HEAD_VERT; - if (TBL_CELL_DVERT == cell->pos) + if (TBL_CELL_DVERT == cellp->pos) head->pos = TBL_HEAD_DVERT; } diff --git a/usr.bin/mandoc/term.c b/usr.bin/mandoc/term.c index e89f927c9a6..863e0a7a602 100644 --- a/usr.bin/mandoc/term.c +++ b/usr.bin/mandoc/term.c @@ -1,6 +1,6 @@ -/* $Id: term.c,v 1.58 2011/04/24 16:22:02 schwarze Exp $ */ +/* $Id: term.c,v 1.59 2011/05/29 21:22:18 schwarze Exp $ */ /* - * Copyright (c) 2008, 2009, 2010 Kristaps Dzonsons <kristaps@bsd.lv> + * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> * Copyright (c) 2010, 2011 Ingo Schwarze <schwarze@openbsd.org> * * Permission to use, copy, modify, and distribute this software for any @@ -29,13 +29,10 @@ #include "term.h" #include "main.h" -static void spec(struct termp *, enum roffdeco, - const char *, size_t); -static void res(struct termp *, const char *, size_t); -static void bufferc(struct termp *, char); -static void adjbuf(struct termp *p, size_t); -static void encode(struct termp *, const char *, size_t); - +static void adjbuf(struct termp *p, int); +static void bufferc(struct termp *, char); +static void encode(struct termp *, const char *, size_t); +static void encode1(struct termp *, int); void term_free(struct termp *p) @@ -44,7 +41,7 @@ term_free(struct termp *p) if (p->buf) free(p->buf); if (p->symtab) - chars_free(p->symtab); + mchars_free(p->symtab); free(p); } @@ -69,18 +66,6 @@ term_end(struct termp *p) (*p->end)(p); } - -struct termp * -term_alloc(enum termenc enc) -{ - struct termp *p; - - p = mandoc_calloc(1, sizeof(struct termp)); - p->enc = enc; - return(p); -} - - /* * Flush a line of text. A "line" is loosely defined as being something * that should be followed by a newline, regardless of whether it's @@ -152,12 +137,12 @@ term_flushln(struct termp *p) vis = vend = 0; i = 0; - while (i < (int)p->col) { + while (i < p->col) { /* * Handle literal tab characters: collapse all * subsequent tabs into a single huge set of spaces. */ - while (i < (int)p->col && '\t' == p->buf[i]) { + while (i < p->col && '\t' == p->buf[i]) { vend = (vis / p->tabwidth + 1) * p->tabwidth; vbl += vend - vis; vis = vend; @@ -171,7 +156,7 @@ term_flushln(struct termp *p) * space is printed according to regular spacing rules). */ - for (j = i, jhy = 0; j < (int)p->col; j++) { + for (j = i, jhy = 0; j < p->col; j++) { if ((j && ' ' == p->buf[j]) || '\t' == p->buf[j]) break; @@ -214,7 +199,7 @@ term_flushln(struct termp *p) } /* Write out the [remaining] word. */ - for ( ; i < (int)p->col; i++) { + for ( ; i < p->col; i++) { if (vend > bp && jhy > 0 && i > jhy) break; if ('\t' == p->buf[i]) @@ -341,44 +326,6 @@ term_vspace(struct termp *p) (*p->endline)(p); } - -static void -numbered(struct termp *p, const char *word, size_t len) -{ - const char *rhs; - - rhs = chars_num2char(word, len); - if (rhs) - encode(p, rhs, 1); -} - - -static void -spec(struct termp *p, enum roffdeco d, const char *word, size_t len) -{ - const char *rhs; - size_t sz; - - rhs = chars_spec2str(p->symtab, word, len, &sz); - if (rhs) - encode(p, rhs, sz); - else if (DECO_SSPECIAL == d) - encode(p, word, len); -} - - -static void -res(struct termp *p, const char *word, size_t len) -{ - const char *rhs; - size_t sz; - - rhs = chars_res2str(p->symtab, word, len, &sz); - if (rhs) - encode(p, rhs, sz); -} - - void term_fontlast(struct termp *p) { @@ -443,7 +390,6 @@ term_fontpop(struct termp *p) p->fonti--; } - /* * Handle pwords, partial words, which may be either a single word or a * phrase that cannot be broken down (such as a literal string). This @@ -452,9 +398,11 @@ term_fontpop(struct termp *p) void term_word(struct termp *p, const char *word) { - const char *seq; + const char *seq, *cp; + char c; + int sz, uc; size_t ssz; - enum roffdeco deco; + enum mandoc_esc esc; if ( ! (TERMP_NOSPACE & p->flags)) { if ( ! (TERMP_KEEP & p->flags)) { @@ -474,7 +422,7 @@ term_word(struct termp *p, const char *word) p->flags &= ~(TERMP_SENTENCE | TERMP_IGNDELIM); - while (*word) { + while ('\0' != *word) { if ((ssz = strcspn(word, "\\")) > 0) encode(p, word, ssz); @@ -482,45 +430,71 @@ term_word(struct termp *p, const char *word) if ('\\' != *word) continue; - seq = ++word; - word += a2roffdeco(&deco, &seq, &ssz); + word++; + esc = mandoc_escape(&word, &seq, &sz); + if (ESCAPE_ERROR == esc) + break; + + if (TERMENC_ASCII != p->enc) + switch (esc) { + case (ESCAPE_UNICODE): + uc = mchars_num2uc(seq + 1, sz - 1); + if ('\0' == uc) + break; + encode1(p, uc); + continue; + case (ESCAPE_SPECIAL): + uc = mchars_spec2cp(p->symtab, seq, sz); + if (uc <= 0) + break; + encode1(p, uc); + continue; + default: + break; + } - switch (deco) { - case (DECO_NUMBERED): - numbered(p, seq, ssz); + switch (esc) { + case (ESCAPE_UNICODE): + encode1(p, '?'); break; - case (DECO_RESERVED): - res(p, seq, ssz); + case (ESCAPE_NUMBERED): + c = mchars_num2char(seq, sz); + if ('\0' != c) + encode(p, &c, 1); break; - case (DECO_SPECIAL): - /* FALLTHROUGH */ - case (DECO_SSPECIAL): - spec(p, deco, seq, ssz); + case (ESCAPE_SPECIAL): + cp = mchars_spec2str(p->symtab, seq, sz, &ssz); + if (NULL != cp) + encode(p, cp, ssz); + else if (1 == ssz) + encode(p, seq, sz); break; - case (DECO_BOLD): + case (ESCAPE_FONTBOLD): term_fontrepl(p, TERMFONT_BOLD); break; - case (DECO_ITALIC): + case (ESCAPE_FONTITALIC): term_fontrepl(p, TERMFONT_UNDER); break; - case (DECO_ROMAN): + case (ESCAPE_FONT): + /* FALLTHROUGH */ + case (ESCAPE_FONTROMAN): term_fontrepl(p, TERMFONT_NONE); break; - case (DECO_PREVIOUS): + case (ESCAPE_FONTPREV): term_fontlast(p); break; + case (ESCAPE_NOSPACE): + if ('\0' == *word) + p->flags |= TERMP_NOSPACE; + break; default: break; } - - if (DECO_NOSPACE == deco && '\0' == *word) - p->flags |= TERMP_NOSPACE; } } - static void -adjbuf(struct termp *p, size_t sz) +adjbuf(struct termp *p, int sz) { if (0 == p->maxcols) @@ -528,10 +502,10 @@ adjbuf(struct termp *p, size_t sz) while (sz >= p->maxcols) p->maxcols <<= 2; - p->buf = mandoc_realloc(p->buf, p->maxcols); + p->buf = mandoc_realloc + (p->buf, sizeof(int) * (size_t)p->maxcols); } - static void bufferc(struct termp *p, char c) { @@ -539,15 +513,44 @@ bufferc(struct termp *p, char c) if (p->col + 1 >= p->maxcols) adjbuf(p, p->col + 1); - p->buf[(int)p->col++] = c; + p->buf[p->col++] = c; } +/* + * See encode(). + * Do this for a single (probably unicode) value. + * Does not check for non-decorated glyphs. + */ +static void +encode1(struct termp *p, int c) +{ + enum termfont f; + + if (p->col + 4 >= p->maxcols) + adjbuf(p, p->col + 4); + + f = term_fonttop(p); + + if (TERMFONT_NONE == f) { + p->buf[p->col++] = c; + return; + } else if (TERMFONT_UNDER == f) { + p->buf[p->col++] = '_'; + } else + p->buf[p->col++] = c; + + p->buf[p->col++] = 8; + p->buf[p->col++] = c; +} static void encode(struct termp *p, const char *word, size_t sz) { enum termfont f; - int i; + int i, len; + + /* LINTED */ + len = sz; /* * Encode and buffer a string of characters. If the current @@ -556,35 +559,34 @@ encode(struct termp *p, const char *word, size_t sz) */ if (TERMFONT_NONE == (f = term_fonttop(p))) { - if (p->col + sz >= p->maxcols) - adjbuf(p, p->col + sz); - memcpy(&p->buf[(int)p->col], word, sz); - p->col += sz; + if (p->col + len >= p->maxcols) + adjbuf(p, p->col + len); + for (i = 0; i < len; i++) + p->buf[p->col++] = word[i]; return; } /* Pre-buffer, assuming worst-case. */ - if (p->col + 1 + (sz * 3) >= p->maxcols) - adjbuf(p, p->col + 1 + (sz * 3)); + if (p->col + 1 + (len * 3) >= p->maxcols) + adjbuf(p, p->col + 1 + (len * 3)); - for (i = 0; i < (int)sz; i++) { - if ( ! isgraph((u_char)word[i])) { - p->buf[(int)p->col++] = word[i]; + for (i = 0; i < len; i++) { + if ( ! isgraph((unsigned char)word[i])) { + p->buf[p->col++] = word[i]; continue; } if (TERMFONT_UNDER == f) - p->buf[(int)p->col++] = '_'; + p->buf[p->col++] = '_'; else - p->buf[(int)p->col++] = word[i]; + p->buf[p->col++] = word[i]; - p->buf[(int)p->col++] = 8; - p->buf[(int)p->col++] = word[i]; + p->buf[p->col++] = 8; + p->buf[p->col++] = word[i]; } } - size_t term_len(const struct termp *p, size_t sz) { @@ -596,59 +598,99 @@ term_len(const struct termp *p, size_t sz) size_t term_strlen(const struct termp *p, const char *cp) { - size_t sz, ssz, rsz, i; - enum roffdeco d; + size_t sz, rsz, i; + int ssz, c; const char *seq, *rhs; + enum mandoc_esc esc; + static const char rej[] = { '\\', ASCII_HYPH, ASCII_NBRSP, '\0' }; - for (sz = 0; '\0' != *cp; ) - /* - * Account for escaped sequences within string length - * calculations. This follows the logic in term_word() - * as we must calculate the width of produced strings. - */ - if ('\\' == *cp) { - seq = ++cp; - cp += a2roffdeco(&d, &seq, &ssz); + /* + * Account for escaped sequences within string length + * calculations. This follows the logic in term_word() as we + * must calculate the width of produced strings. + */ - switch (d) { - case (DECO_RESERVED): - rhs = chars_res2str - (p->symtab, seq, ssz, &rsz); + sz = 0; + while ('\0' != *cp) { + rsz = strcspn(cp, rej); + for (i = 0; i < rsz; i++) + sz += (*p->width)(p, *cp++); + + c = 0; + switch (*cp) { + case ('\\'): + cp++; + esc = mandoc_escape(&cp, &seq, &ssz); + if (ESCAPE_ERROR == esc) + return(sz); + + if (TERMENC_ASCII != p->enc) + switch (esc) { + case (ESCAPE_UNICODE): + c = mchars_num2uc + (seq + 1, ssz - 1); + if ('\0' == c) + break; + sz += (*p->width)(p, c); + continue; + case (ESCAPE_SPECIAL): + c = mchars_spec2cp + (p->symtab, seq, ssz); + if (c <= 0) + break; + sz += (*p->width)(p, c); + continue; + default: + break; + } + + rhs = NULL; + + switch (esc) { + case (ESCAPE_UNICODE): + sz += (*p->width)(p, '?'); break; - case (DECO_SPECIAL): - /* FALLTHROUGH */ - case (DECO_SSPECIAL): - rhs = chars_spec2str + case (ESCAPE_NUMBERED): + c = mchars_num2char(seq, ssz); + if ('\0' != c) + sz += (*p->width)(p, c); + break; + case (ESCAPE_SPECIAL): + rhs = mchars_spec2str (p->symtab, seq, ssz, &rsz); - /* Allow for one-char escapes. */ - if (DECO_SSPECIAL != d || rhs) + if (ssz != 1 || rhs) break; rhs = seq; rsz = ssz; break; default: - rhs = NULL; break; } - if (rhs) - for (i = 0; i < rsz; i++) - sz += (*p->width)(p, *rhs++); - } else if (ASCII_NBRSP == *cp) { + if (NULL == rhs) + break; + + for (i = 0; i < rsz; i++) + sz += (*p->width)(p, *rhs++); + break; + case (ASCII_NBRSP): sz += (*p->width)(p, ' '); cp++; - } else if (ASCII_HYPH == *cp) { + break; + case (ASCII_HYPH): sz += (*p->width)(p, '-'); cp++; - } else - sz += (*p->width)(p, *cp++); + break; + default: + break; + } + } return(sz); } - /* ARGSUSED */ size_t term_vspan(const struct termp *p, const struct roffsu *su) @@ -685,7 +727,6 @@ term_vspan(const struct termp *p, const struct roffsu *su) r); } - size_t term_hspan(const struct termp *p, const struct roffsu *su) { diff --git a/usr.bin/mandoc/term.h b/usr.bin/mandoc/term.h index 1dfeecf0e1f..30d9b70a9d5 100644 --- a/usr.bin/mandoc/term.h +++ b/usr.bin/mandoc/term.h @@ -1,4 +1,4 @@ -/* $Id: term.h,v 1.29 2011/01/09 14:30:48 schwarze Exp $ */ +/* $Id: term.h,v 1.30 2011/05/29 21:22:18 schwarze Exp $ */ /* * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> * @@ -22,7 +22,9 @@ __BEGIN_DECLS struct termp; enum termenc { - TERMENC_ASCII + TERMENC_ASCII, + TERMENC_LOCALE, + TERMENC_UTF8 }; enum termtype { @@ -42,35 +44,6 @@ enum termfont { typedef void (*term_margin)(struct termp *, const void *); -struct termp_ps { - int flags; -#define PS_INLINE (1 << 0) /* we're in a word */ -#define PS_MARGINS (1 << 1) /* we're in the margins */ -#define PS_NEWPAGE (1 << 2) /* new page, no words yet */ - size_t pscol; /* visible column (AFM units) */ - size_t psrow; /* visible row (AFM units) */ - char *psmarg; /* margin buf */ - size_t psmargsz; /* margin buf size */ - size_t psmargcur; /* cur index in margin buf */ - char last; /* character buffer */ - enum termfont lastf; /* last set font */ - size_t scale; /* font scaling factor */ - size_t pages; /* number of pages shown */ - size_t lineheight; /* line height (AFM units) */ - size_t top; /* body top (AFM units) */ - size_t bottom; /* body bottom (AFM units) */ - size_t height; /* page height (AFM units */ - size_t width; /* page width (AFM units) */ - size_t left; /* body left (AFM units) */ - size_t header; /* header pos (AFM units) */ - size_t footer; /* footer pos (AFM units) */ - size_t pdfbytes; /* current output byte */ - size_t pdflastpg; /* byte of last page mark */ - size_t pdfbody; /* start of body object */ - size_t *pdfobjs; /* table of object offsets */ - size_t pdfobjsz; /* size of pdfobjs */ -}; - struct termp_tbl { int width; /* width in fixed chars */ int decimal; /* decimal point position */ @@ -82,10 +55,10 @@ struct termp { size_t defrmargin; /* Right margin of the device. */ size_t rmargin; /* Current right margin. */ size_t maxrmargin; /* Max right margin. */ - size_t maxcols; /* Max size of buf. */ + int maxcols; /* Max size of buf. */ size_t offset; /* Margin offest. */ size_t tabwidth; /* Distance of tab positions. */ - size_t col; /* Bytes in buf. */ + int col; /* Bytes in buf. */ size_t viscol; /* Chars on current line. */ int overstep; /* See termp_flushln(). */ int flags; @@ -103,29 +76,26 @@ struct termp { #define TERMP_ANPREC (1 << 13) /* See termp_an_pre(). */ #define TERMP_KEEP (1 << 14) /* Keep words together. */ #define TERMP_PREKEEP (1 << 15) /* ...starting with the next one. */ - char *buf; /* Output buffer. */ + int *buf; /* Output buffer. */ enum termenc enc; /* Type of encoding. */ - void *symtab; /* Encoded-symbol table. */ + struct mchars *symtab; /* Encoded-symbol table. */ enum termfont fontl; /* Last font set. */ enum termfont fontq[10]; /* Symmetric fonts. */ int fonti; /* Index of font stack. */ term_margin headf; /* invoked to print head */ term_margin footf; /* invoked to print foot */ - void (*letter)(struct termp *, char); + void (*letter)(struct termp *, int); void (*begin)(struct termp *); void (*end)(struct termp *); void (*endline)(struct termp *); void (*advance)(struct termp *, size_t); - size_t (*width)(const struct termp *, char); + size_t (*width)(const struct termp *, int); double (*hspan)(const struct termp *, const struct roffsu *); const void *argf; /* arg for headf/footf */ - union { - struct termp_ps ps; - } engine; + struct termp_ps *ps; }; -struct termp *term_alloc(enum termenc); void term_tbl(struct termp *, const struct tbl_span *); void term_free(struct termp *); void term_newln(struct termp *); diff --git a/usr.bin/mandoc/term_ascii.c b/usr.bin/mandoc/term_ascii.c index 5462ec5e5f7..7d70dc4a86a 100644 --- a/usr.bin/mandoc/term_ascii.c +++ b/usr.bin/mandoc/term_ascii.c @@ -1,6 +1,6 @@ -/* $Id: term_ascii.c,v 1.5 2011/01/31 02:36:55 schwarze Exp $ */ +/* $Id: term_ascii.c,v 1.6 2011/05/29 21:22:18 schwarze Exp $ */ /* - * Copyright (c) 2010 Kristaps Dzonsons <kristaps@bsd.lv> + * Copyright (c) 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above @@ -17,47 +17,70 @@ #include <sys/types.h> #include <assert.h> +#include <locale.h> #include <stdint.h> #include <stdio.h> #include <stdlib.h> #include <unistd.h> +#include <wchar.h> #include "mandoc.h" #include "out.h" #include "term.h" #include "main.h" +static struct termp *ascii_init(enum termenc, char *); static double ascii_hspan(const struct termp *, const struct roffsu *); -static size_t ascii_width(const struct termp *, char); +static size_t ascii_width(const struct termp *, int); static void ascii_advance(struct termp *, size_t); static void ascii_begin(struct termp *); static void ascii_end(struct termp *); static void ascii_endline(struct termp *); -static void ascii_letter(struct termp *, char); +static void ascii_letter(struct termp *, int); +static void locale_advance(struct termp *, size_t); +static void locale_endline(struct termp *); +static void locale_letter(struct termp *, int); +static size_t locale_width(const struct termp *, int); -void * -ascii_alloc(char *outopts) +static struct termp * +ascii_init(enum termenc enc, char *outopts) { - struct termp *p; const char *toks[2]; char *v; + struct termp *p; - p = term_alloc(TERMENC_ASCII); + p = mandoc_calloc(1, sizeof(struct termp)); + p->enc = enc; p->tabwidth = 5; p->defrmargin = 78; - p->advance = ascii_advance; p->begin = ascii_begin; p->end = ascii_end; - p->endline = ascii_endline; p->hspan = ascii_hspan; - p->letter = ascii_letter; p->type = TERMTYPE_CHAR; + + p->enc = TERMENC_ASCII; + p->advance = ascii_advance; + p->endline = ascii_endline; + p->letter = ascii_letter; p->width = ascii_width; + if (TERMENC_ASCII != enc) { + v = TERMENC_LOCALE == enc ? + setlocale(LC_ALL, "") : + setlocale(LC_CTYPE, "UTF-8"); + if (NULL != v && MB_CUR_MAX > 1) { + p->enc = enc; + p->advance = locale_advance; + p->endline = locale_endline; + p->letter = locale_letter; + p->width = locale_width; + } + } + toks[0] = "width"; toks[1] = NULL; @@ -77,16 +100,36 @@ ascii_alloc(char *outopts) return(p); } +void * +ascii_alloc(char *outopts) +{ + + return(ascii_init(TERMENC_ASCII, outopts)); +} + +void * +utf8_alloc(char *outopts) +{ + + return(ascii_init(TERMENC_UTF8, outopts)); +} + + +void * +locale_alloc(char *outopts) +{ + + return(ascii_init(TERMENC_LOCALE, outopts)); +} /* ARGSUSED */ static size_t -ascii_width(const struct termp *p, char c) +ascii_width(const struct termp *p, int c) { return(1); } - void ascii_free(void *arg) { @@ -94,16 +137,14 @@ ascii_free(void *arg) term_free((struct termp *)arg); } - /* ARGSUSED */ static void -ascii_letter(struct termp *p, char c) +ascii_letter(struct termp *p, int c) { putchar(c); } - static void ascii_begin(struct termp *p) { @@ -111,7 +152,6 @@ ascii_begin(struct termp *p) (*p->headf)(p, p->argf); } - static void ascii_end(struct termp *p) { @@ -119,7 +159,6 @@ ascii_end(struct termp *p) (*p->footf)(p, p->argf); } - /* ARGSUSED */ static void ascii_endline(struct termp *p) @@ -128,19 +167,16 @@ ascii_endline(struct termp *p) putchar('\n'); } - /* ARGSUSED */ static void ascii_advance(struct termp *p, size_t len) { size_t i; - /* Just print whitespace on the terminal. */ for (i = 0; i < len; i++) putchar(' '); } - /* ARGSUSED */ static double ascii_hspan(const struct termp *p, const struct roffsu *su) @@ -179,3 +215,37 @@ ascii_hspan(const struct termp *p, const struct roffsu *su) return(r); } +/* ARGSUSED */ +static size_t +locale_width(const struct termp *p, int c) +{ + int rc; + + return((rc = wcwidth(c)) < 0 ? 0 : rc); +} + +/* ARGSUSED */ +static void +locale_advance(struct termp *p, size_t len) +{ + size_t i; + + for (i = 0; i < len; i++) + putwchar(L' '); +} + +/* ARGSUSED */ +static void +locale_endline(struct termp *p) +{ + + putwchar(L'\n'); +} + +/* ARGSUSED */ +static void +locale_letter(struct termp *p, int c) +{ + + putwchar(c); +} diff --git a/usr.bin/mandoc/term_ps.c b/usr.bin/mandoc/term_ps.c index 512c602ffe0..761dc1b0423 100644 --- a/usr.bin/mandoc/term_ps.c +++ b/usr.bin/mandoc/term_ps.c @@ -1,6 +1,6 @@ -/* $Id: term_ps.c,v 1.16 2011/04/21 22:59:54 schwarze Exp $ */ +/* $Id: term_ps.c,v 1.17 2011/05/29 21:22:18 schwarze Exp $ */ /* - * Copyright (c) 2010 Kristaps Dzonsons <kristaps@bsd.lv> + * Copyright (c) 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above @@ -34,13 +34,16 @@ #include "main.h" #include "term.h" +/* These work the buffer used by the header and footer. */ +#define PS_BUFSLOP 128 + /* Convert PostScript point "x" to an AFM unit. */ #define PNT2AFM(p, x) /* LINTED */ \ - (size_t)((double)(x) * (1000.0 / (double)(p)->engine.ps.scale)) + (size_t)((double)(x) * (1000.0 / (double)(p)->ps->scale)) /* Convert an AFM unit "x" to a PostScript points */ #define AFM2PNT(p, x) /* LINTED */ \ - ((double)(x) / (1000.0 / (double)(p)->engine.ps.scale)) + ((double)(x) / (1000.0 / (double)(p)->ps->scale)) struct glyph { unsigned short wx; /* WX in AFM */ @@ -52,6 +55,54 @@ struct font { struct glyph gly[MAXCHAR]; /* glyph metrics */ }; +struct termp_ps { + int flags; +#define PS_INLINE (1 << 0) /* we're in a word */ +#define PS_MARGINS (1 << 1) /* we're in the margins */ +#define PS_NEWPAGE (1 << 2) /* new page, no words yet */ + size_t pscol; /* visible column (AFM units) */ + size_t psrow; /* visible row (AFM units) */ + char *psmarg; /* margin buf */ + size_t psmargsz; /* margin buf size */ + size_t psmargcur; /* cur index in margin buf */ + char last; /* character buffer */ + enum termfont lastf; /* last set font */ + size_t scale; /* font scaling factor */ + size_t pages; /* number of pages shown */ + size_t lineheight; /* line height (AFM units) */ + size_t top; /* body top (AFM units) */ + size_t bottom; /* body bottom (AFM units) */ + size_t height; /* page height (AFM units */ + size_t width; /* page width (AFM units) */ + size_t left; /* body left (AFM units) */ + size_t header; /* header pos (AFM units) */ + size_t footer; /* footer pos (AFM units) */ + size_t pdfbytes; /* current output byte */ + size_t pdflastpg; /* byte of last page mark */ + size_t pdfbody; /* start of body object */ + size_t *pdfobjs; /* table of object offsets */ + size_t pdfobjsz; /* size of pdfobjs */ +}; + +static double ps_hspan(const struct termp *, + const struct roffsu *); +static size_t ps_width(const struct termp *, int); +static void ps_advance(struct termp *, size_t); +static void ps_begin(struct termp *); +static void ps_closepage(struct termp *); +static void ps_end(struct termp *); +static void ps_endline(struct termp *); +static void ps_fclose(struct termp *); +static void ps_growbuf(struct termp *, size_t); +static void ps_letter(struct termp *, int); +static void ps_pclose(struct termp *); +static void ps_pletter(struct termp *, int); +static void ps_printf(struct termp *, const char *, ...); +static void ps_putchar(struct termp *, char); +static void ps_setfont(struct termp *, enum termfont); +static struct termp *pspdf_alloc(char *); +static void pdf_obj(struct termp *, size_t); + /* * We define, for the time being, three fonts: bold, oblique/italic, and * normal (roman). The following table hard-codes the font metrics for @@ -352,44 +403,6 @@ static const struct font fonts[TERMFONT__MAX] = { } }, }; -/* These work the buffer used by the header and footer. */ -#define PS_BUFSLOP 128 - -static void -ps_growbuf(struct termp *p, size_t sz) -{ - if (p->engine.ps.psmargcur + sz <= p->engine.ps.psmargsz) - return; - - if (sz < PS_BUFSLOP) - sz = PS_BUFSLOP; - - p->engine.ps.psmargsz += sz; - - p->engine.ps.psmarg = mandoc_realloc - (p->engine.ps.psmarg, - p->engine.ps.psmargsz); -} - -static double ps_hspan(const struct termp *, - const struct roffsu *); -static size_t ps_width(const struct termp *, char); -static void ps_advance(struct termp *, size_t); -static void ps_begin(struct termp *); -static void ps_closepage(struct termp *); -static void ps_end(struct termp *); -static void ps_endline(struct termp *); -static void ps_fclose(struct termp *); -static void ps_letter(struct termp *, char); -static void ps_pclose(struct termp *); -static void ps_pletter(struct termp *, int); -static void ps_printf(struct termp *, const char *, ...); -static void ps_putchar(struct termp *, char); -static void ps_setfont(struct termp *, enum termfont); -static struct termp *pspdf_alloc(char *); -static void pdf_obj(struct termp *, size_t); - - void * pdf_alloc(char *outopts) { @@ -401,7 +414,6 @@ pdf_alloc(char *outopts) return(p); } - void * ps_alloc(char *outopts) { @@ -413,7 +425,6 @@ ps_alloc(char *outopts) return(p); } - static struct termp * pspdf_alloc(char *outopts) { @@ -423,7 +434,9 @@ pspdf_alloc(char *outopts) const char *pp; char *v; - p = term_alloc(TERMENC_ASCII); + p = mandoc_calloc(1, sizeof(struct termp)); + p->enc = TERMENC_ASCII; + p->ps = mandoc_calloc(1, sizeof(struct termp_ps)); p->advance = ps_advance; p->begin = ps_begin; @@ -482,7 +495,7 @@ pspdf_alloc(char *outopts) * calculations occur. */ - p->engine.ps.scale = 11; + p->ps->scale = 11; /* Remember millimetres -> AFM units. */ @@ -498,16 +511,16 @@ pspdf_alloc(char *outopts) /* Line-height is 1.4em. */ - lineheight = PNT2AFM(p, ((double)p->engine.ps.scale * 1.4)); + lineheight = PNT2AFM(p, ((double)p->ps->scale * 1.4)); - p->engine.ps.width = pagex; - p->engine.ps.height = pagey; - p->engine.ps.header = pagey - (marginy / 2) - (lineheight / 2); - p->engine.ps.top = pagey - marginy; - p->engine.ps.footer = (marginy / 2) - (lineheight / 2); - p->engine.ps.bottom = marginy; - p->engine.ps.left = marginx; - p->engine.ps.lineheight = lineheight; + p->ps->width = pagex; + p->ps->height = pagey; + p->ps->header = pagey - (marginy / 2) - (lineheight / 2); + p->ps->top = pagey - marginy; + p->ps->footer = (marginy / 2) - (lineheight / 2); + p->ps->bottom = marginy; + p->ps->left = marginx; + p->ps->lineheight = lineheight; p->defrmargin = pagex - (marginx * 2); return(p); @@ -521,11 +534,12 @@ pspdf_free(void *arg) p = (struct termp *)arg; - if (p->engine.ps.psmarg) - free(p->engine.ps.psmarg); - if (p->engine.ps.pdfobjs) - free(p->engine.ps.pdfobjs); + if (p->ps->psmarg) + free(p->ps->psmarg); + if (p->ps->pdfobjs) + free(p->ps->pdfobjs); + free(p->ps); term_free(p); } @@ -544,10 +558,10 @@ ps_printf(struct termp *p, const char *fmt, ...) * into our growable margin buffer. */ - if ( ! (PS_MARGINS & p->engine.ps.flags)) { + if ( ! (PS_MARGINS & p->ps->flags)) { len = vprintf(fmt, ap); va_end(ap); - p->engine.ps.pdfbytes += /* LINTED */ + p->ps->pdfbytes += /* LINTED */ len < 0 ? 0 : (size_t)len; return; } @@ -560,12 +574,12 @@ ps_printf(struct termp *p, const char *fmt, ...) ps_growbuf(p, PS_BUFSLOP); - pos = (int)p->engine.ps.psmargcur; - len = vsnprintf(&p->engine.ps.psmarg[pos], PS_BUFSLOP, fmt, ap); + pos = (int)p->ps->psmargcur; + len = vsnprintf(&p->ps->psmarg[pos], PS_BUFSLOP, fmt, ap); va_end(ap); - p->engine.ps.psmargcur = strlen(p->engine.ps.psmarg); + p->ps->psmargcur = strlen(p->ps->psmarg); } @@ -576,18 +590,18 @@ ps_putchar(struct termp *p, char c) /* See ps_printf(). */ - if ( ! (PS_MARGINS & p->engine.ps.flags)) { + if ( ! (PS_MARGINS & p->ps->flags)) { /* LINTED */ putchar(c); - p->engine.ps.pdfbytes++; + p->ps->pdfbytes++; return; } ps_growbuf(p, 2); - pos = (int)p->engine.ps.psmargcur++; - p->engine.ps.psmarg[pos++] = c; - p->engine.ps.psmarg[pos] = '\0'; + pos = (int)p->ps->psmargcur++; + p->ps->psmarg[pos++] = c; + p->ps->psmarg[pos] = '\0'; } @@ -597,18 +611,18 @@ pdf_obj(struct termp *p, size_t obj) assert(obj > 0); - if ((obj - 1) >= p->engine.ps.pdfobjsz) { - p->engine.ps.pdfobjsz = obj + 128; - p->engine.ps.pdfobjs = realloc - (p->engine.ps.pdfobjs, - p->engine.ps.pdfobjsz * sizeof(size_t)); - if (NULL == p->engine.ps.pdfobjs) { + if ((obj - 1) >= p->ps->pdfobjsz) { + p->ps->pdfobjsz = obj + 128; + p->ps->pdfobjs = realloc + (p->ps->pdfobjs, + p->ps->pdfobjsz * sizeof(size_t)); + if (NULL == p->ps->pdfobjs) { perror(NULL); exit((int)MANDOCLEVEL_SYSERR); } } - p->engine.ps.pdfobjs[(int)obj - 1] = p->engine.ps.pdfbytes; + p->ps->pdfobjs[(int)obj - 1] = p->ps->pdfbytes; ps_printf(p, "%zu 0 obj\n", obj); } @@ -626,14 +640,14 @@ ps_closepage(struct termp *p) * for the page contents. */ - assert(p->engine.ps.psmarg && p->engine.ps.psmarg[0]); - ps_printf(p, "%s", p->engine.ps.psmarg); + assert(p->ps->psmarg && p->ps->psmarg[0]); + ps_printf(p, "%s", p->ps->psmarg); if (TERMTYPE_PS != p->type) { ps_printf(p, "ET\n"); - len = p->engine.ps.pdfbytes - p->engine.ps.pdflastpg; - base = p->engine.ps.pages * 4 + p->engine.ps.pdfbody; + len = p->ps->pdfbytes - p->ps->pdflastpg; + base = p->ps->pages * 4 + p->ps->pdfbody; ps_printf(p, "endstream\nendobj\n"); @@ -660,10 +674,10 @@ ps_closepage(struct termp *p) } else ps_printf(p, "showpage\n"); - p->engine.ps.pages++; - p->engine.ps.psrow = p->engine.ps.top; - assert( ! (PS_NEWPAGE & p->engine.ps.flags)); - p->engine.ps.flags |= PS_NEWPAGE; + p->ps->pages++; + p->ps->psrow = p->ps->top; + assert( ! (PS_NEWPAGE & p->ps->flags)); + p->ps->flags |= PS_NEWPAGE; } @@ -679,15 +693,15 @@ ps_end(struct termp *p) * well as just one. */ - if ( ! (PS_NEWPAGE & p->engine.ps.flags)) { - assert(0 == p->engine.ps.flags); - assert('\0' == p->engine.ps.last); + if ( ! (PS_NEWPAGE & p->ps->flags)) { + assert(0 == p->ps->flags); + assert('\0' == p->ps->last); ps_closepage(p); } if (TERMTYPE_PS == p->type) { ps_printf(p, "%%%%Trailer\n"); - ps_printf(p, "%%%%Pages: %zu\n", p->engine.ps.pages); + ps_printf(p, "%%%%Pages: %zu\n", p->ps->pages); ps_printf(p, "%%%%EOF\n"); return; } @@ -695,18 +709,18 @@ ps_end(struct termp *p) pdf_obj(p, 2); ps_printf(p, "<<\n/Type /Pages\n"); ps_printf(p, "/MediaBox [0 0 %zu %zu]\n", - (size_t)AFM2PNT(p, p->engine.ps.width), - (size_t)AFM2PNT(p, p->engine.ps.height)); + (size_t)AFM2PNT(p, p->ps->width), + (size_t)AFM2PNT(p, p->ps->height)); - ps_printf(p, "/Count %zu\n", p->engine.ps.pages); + ps_printf(p, "/Count %zu\n", p->ps->pages); ps_printf(p, "/Kids ["); - for (i = 0; i < p->engine.ps.pages; i++) + for (i = 0; i < p->ps->pages; i++) ps_printf(p, " %zu 0 R", i * 4 + - p->engine.ps.pdfbody + 3); + p->ps->pdfbody + 3); - base = (p->engine.ps.pages - 1) * 4 + - p->engine.ps.pdfbody + 4; + base = (p->ps->pages - 1) * 4 + + p->ps->pdfbody + 4; ps_printf(p, "]\n>>\nendobj\n"); pdf_obj(p, base); @@ -714,14 +728,14 @@ ps_end(struct termp *p) ps_printf(p, "/Type /Catalog\n"); ps_printf(p, "/Pages 2 0 R\n"); ps_printf(p, ">>\n"); - xref = p->engine.ps.pdfbytes; + xref = p->ps->pdfbytes; ps_printf(p, "xref\n"); ps_printf(p, "0 %zu\n", base + 1); ps_printf(p, "0000000000 65535 f \n"); for (i = 0; i < base; i++) ps_printf(p, "%.10zu 00000 n \n", - p->engine.ps.pdfobjs[(int)i]); + p->ps->pdfobjs[(int)i]); ps_printf(p, "trailer\n"); ps_printf(p, "<<\n"); @@ -746,33 +760,33 @@ ps_begin(struct termp *p) * screen yet, so we don't need to initialise the primary state. */ - if (p->engine.ps.psmarg) { - assert(p->engine.ps.psmargsz); - p->engine.ps.psmarg[0] = '\0'; + if (p->ps->psmarg) { + assert(p->ps->psmargsz); + p->ps->psmarg[0] = '\0'; } - /*p->engine.ps.pdfbytes = 0;*/ - p->engine.ps.psmargcur = 0; - p->engine.ps.flags = PS_MARGINS; - p->engine.ps.pscol = p->engine.ps.left; - p->engine.ps.psrow = p->engine.ps.header; + /*p->ps->pdfbytes = 0;*/ + p->ps->psmargcur = 0; + p->ps->flags = PS_MARGINS; + p->ps->pscol = p->ps->left; + p->ps->psrow = p->ps->header; ps_setfont(p, TERMFONT_NONE); (*p->headf)(p, p->argf); (*p->endline)(p); - p->engine.ps.pscol = p->engine.ps.left; - p->engine.ps.psrow = p->engine.ps.footer; + p->ps->pscol = p->ps->left; + p->ps->psrow = p->ps->footer; (*p->footf)(p, p->argf); (*p->endline)(p); - p->engine.ps.flags &= ~PS_MARGINS; + p->ps->flags &= ~PS_MARGINS; - assert(0 == p->engine.ps.flags); - assert(p->engine.ps.psmarg); - assert('\0' != p->engine.ps.psmarg[0]); + assert(0 == p->ps->flags); + assert(p->ps->psmarg); + assert('\0' != p->ps->psmarg[0]); /* * Print header and initialise page state. Following this, @@ -790,8 +804,8 @@ ps_begin(struct termp *p) ps_printf(p, "%%%%PageOrder: Ascend\n"); ps_printf(p, "%%%%DocumentMedia: " "Default %zu %zu 0 () ()\n", - (size_t)AFM2PNT(p, p->engine.ps.width), - (size_t)AFM2PNT(p, p->engine.ps.height)); + (size_t)AFM2PNT(p, p->ps->width), + (size_t)AFM2PNT(p, p->ps->height)); ps_printf(p, "%%%%DocumentNeededResources: font"); for (i = 0; i < (int)TERMFONT__MAX; i++) @@ -816,10 +830,10 @@ ps_begin(struct termp *p) } } - p->engine.ps.pdfbody = (size_t)TERMFONT__MAX + 3; - p->engine.ps.pscol = p->engine.ps.left; - p->engine.ps.psrow = p->engine.ps.top; - p->engine.ps.flags |= PS_NEWPAGE; + p->ps->pdfbody = (size_t)TERMFONT__MAX + 3; + p->ps->pscol = p->ps->left; + p->ps->psrow = p->ps->top; + p->ps->flags |= PS_NEWPAGE; ps_setfont(p, TERMFONT_NONE); } @@ -834,25 +848,25 @@ ps_pletter(struct termp *p, int c) * in a new page and make sure the font is correctly set. */ - if (PS_NEWPAGE & p->engine.ps.flags) { + if (PS_NEWPAGE & p->ps->flags) { if (TERMTYPE_PS == p->type) { ps_printf(p, "%%%%Page: %zu %zu\n", - p->engine.ps.pages + 1, - p->engine.ps.pages + 1); + p->ps->pages + 1, + p->ps->pages + 1); ps_printf(p, "/%s %zu selectfont\n", - fonts[(int)p->engine.ps.lastf].name, - p->engine.ps.scale); + fonts[(int)p->ps->lastf].name, + p->ps->scale); } else { - pdf_obj(p, p->engine.ps.pdfbody + - p->engine.ps.pages * 4); + pdf_obj(p, p->ps->pdfbody + + p->ps->pages * 4); ps_printf(p, "<<\n"); ps_printf(p, "/Length %zu 0 R\n", - p->engine.ps.pdfbody + 1 + - p->engine.ps.pages * 4); + p->ps->pdfbody + 1 + + p->ps->pages * 4); ps_printf(p, ">>\nstream\n"); } - p->engine.ps.pdflastpg = p->engine.ps.pdfbytes; - p->engine.ps.flags &= ~PS_NEWPAGE; + p->ps->pdflastpg = p->ps->pdfbytes; + p->ps->flags &= ~PS_NEWPAGE; } /* @@ -860,22 +874,22 @@ ps_pletter(struct termp *p, int c) * now at the current cursor. */ - if ( ! (PS_INLINE & p->engine.ps.flags)) { + if ( ! (PS_INLINE & p->ps->flags)) { if (TERMTYPE_PS != p->type) { ps_printf(p, "BT\n/F%d %zu Tf\n", - (int)p->engine.ps.lastf, - p->engine.ps.scale); + (int)p->ps->lastf, + p->ps->scale); ps_printf(p, "%.3f %.3f Td\n(", - AFM2PNT(p, p->engine.ps.pscol), - AFM2PNT(p, p->engine.ps.psrow)); + AFM2PNT(p, p->ps->pscol), + AFM2PNT(p, p->ps->psrow)); } else ps_printf(p, "%.3f %.3f moveto\n(", - AFM2PNT(p, p->engine.ps.pscol), - AFM2PNT(p, p->engine.ps.psrow)); - p->engine.ps.flags |= PS_INLINE; + AFM2PNT(p, p->ps->pscol), + AFM2PNT(p, p->ps->psrow)); + p->ps->flags |= PS_INLINE; } - assert( ! (PS_NEWPAGE & p->engine.ps.flags)); + assert( ! (PS_NEWPAGE & p->ps->flags)); /* * We need to escape these characters as per the PostScript @@ -898,17 +912,17 @@ ps_pletter(struct termp *p, int c) /* Write the character and adjust where we are on the page. */ - f = (int)p->engine.ps.lastf; + f = (int)p->ps->lastf; if (c <= 32 || (c - 32 >= MAXCHAR)) { ps_putchar(p, ' '); - p->engine.ps.pscol += (size_t)fonts[f].gly[0].wx; + p->ps->pscol += (size_t)fonts[f].gly[0].wx; return; } ps_putchar(p, (char)c); c -= 32; - p->engine.ps.pscol += (size_t)fonts[f].gly[c].wx; + p->ps->pscol += (size_t)fonts[f].gly[c].wx; } @@ -922,7 +936,7 @@ ps_pclose(struct termp *p) * or anything). */ - if ( ! (PS_INLINE & p->engine.ps.flags)) + if ( ! (PS_INLINE & p->ps->flags)) return; if (TERMTYPE_PS != p->type) { @@ -930,7 +944,7 @@ ps_pclose(struct termp *p) } else ps_printf(p, ") show\n"); - p->engine.ps.flags &= ~PS_INLINE; + p->ps->flags &= ~PS_INLINE; } @@ -946,16 +960,16 @@ ps_fclose(struct termp *p) * Following this, close out any scope that's open. */ - if ('\0' != p->engine.ps.last) { - if (p->engine.ps.lastf != TERMFONT_NONE) { + if ('\0' != p->ps->last) { + if (p->ps->lastf != TERMFONT_NONE) { ps_pclose(p); ps_setfont(p, TERMFONT_NONE); } - ps_pletter(p, p->engine.ps.last); - p->engine.ps.last = '\0'; + ps_pletter(p, p->ps->last); + p->ps->last = '\0'; } - if ( ! (PS_INLINE & p->engine.ps.flags)) + if ( ! (PS_INLINE & p->ps->flags)) return; ps_pclose(p); @@ -963,9 +977,12 @@ ps_fclose(struct termp *p) static void -ps_letter(struct termp *p, char c) +ps_letter(struct termp *p, int arg) { - char cc; + char cc, c; + + /* LINTED */ + c = arg >= 128 || arg <= 0 ? '?' : arg; /* * State machine dictates whether to buffer the last character @@ -976,33 +993,33 @@ ps_letter(struct termp *p, char c) * regular character and a regular buffer character. */ - if ('\0' == p->engine.ps.last) { + if ('\0' == p->ps->last) { assert(8 != c); - p->engine.ps.last = c; + p->ps->last = c; return; - } else if (8 == p->engine.ps.last) { + } else if (8 == p->ps->last) { assert(8 != c); - p->engine.ps.last = '\0'; + p->ps->last = '\0'; } else if (8 == c) { - assert(8 != p->engine.ps.last); - if ('_' == p->engine.ps.last) { - if (p->engine.ps.lastf != TERMFONT_UNDER) { + assert(8 != p->ps->last); + if ('_' == p->ps->last) { + if (p->ps->lastf != TERMFONT_UNDER) { ps_pclose(p); ps_setfont(p, TERMFONT_UNDER); } - } else if (p->engine.ps.lastf != TERMFONT_BOLD) { + } else if (p->ps->lastf != TERMFONT_BOLD) { ps_pclose(p); ps_setfont(p, TERMFONT_BOLD); } - p->engine.ps.last = c; + p->ps->last = c; return; } else { - if (p->engine.ps.lastf != TERMFONT_NONE) { + if (p->ps->lastf != TERMFONT_NONE) { ps_pclose(p); ps_setfont(p, TERMFONT_NONE); } - cc = p->engine.ps.last; - p->engine.ps.last = c; + cc = p->ps->last; + p->ps->last = c; c = cc; } @@ -1022,7 +1039,7 @@ ps_advance(struct termp *p, size_t len) */ ps_fclose(p); - p->engine.ps.pscol += len; + p->ps->pscol += len; } @@ -1040,16 +1057,16 @@ ps_endline(struct termp *p) * lines, we'll do nasty stuff. */ - if (PS_MARGINS & p->engine.ps.flags) + if (PS_MARGINS & p->ps->flags) return; /* Left-justify. */ - p->engine.ps.pscol = p->engine.ps.left; + p->ps->pscol = p->ps->left; /* If we haven't printed anything, return. */ - if (PS_NEWPAGE & p->engine.ps.flags) + if (PS_NEWPAGE & p->ps->flags) return; /* @@ -1057,9 +1074,9 @@ ps_endline(struct termp *p) * showpage and restart our row. */ - if (p->engine.ps.psrow >= p->engine.ps.lineheight + - p->engine.ps.bottom) { - p->engine.ps.psrow -= p->engine.ps.lineheight; + if (p->ps->psrow >= p->ps->lineheight + + p->ps->bottom) { + p->ps->psrow -= p->ps->lineheight; return; } @@ -1072,37 +1089,37 @@ ps_setfont(struct termp *p, enum termfont f) { assert(f < TERMFONT__MAX); - p->engine.ps.lastf = f; + p->ps->lastf = f; /* * If we're still at the top of the page, let the font-setting * be delayed until we actually have stuff to print. */ - if (PS_NEWPAGE & p->engine.ps.flags) + if (PS_NEWPAGE & p->ps->flags) return; if (TERMTYPE_PS == p->type) ps_printf(p, "/%s %zu selectfont\n", fonts[(int)f].name, - p->engine.ps.scale); + p->ps->scale); else ps_printf(p, "/F%d %zu Tf\n", (int)f, - p->engine.ps.scale); + p->ps->scale); } /* ARGSUSED */ static size_t -ps_width(const struct termp *p, char c) +ps_width(const struct termp *p, int c) { if (c <= 32 || c - 32 >= MAXCHAR) return((size_t)fonts[(int)TERMFONT_NONE].gly[0].wx); c -= 32; - return((size_t)fonts[(int)TERMFONT_NONE].gly[(int)c].wx); + return((size_t)fonts[(int)TERMFONT_NONE].gly[c].wx); } @@ -1141,7 +1158,7 @@ ps_hspan(const struct termp *p, const struct roffsu *su) fonts[(int)TERMFONT_NONE].gly[110 - 32].wx; break; case (SCALE_VS): - r = su->scale * p->engine.ps.lineheight; + r = su->scale * p->ps->lineheight; break; default: r = su->scale; @@ -1151,3 +1168,18 @@ ps_hspan(const struct termp *p, const struct roffsu *su) return(r); } +static void +ps_growbuf(struct termp *p, size_t sz) +{ + if (p->ps->psmargcur + sz <= p->ps->psmargsz) + return; + + if (sz < PS_BUFSLOP) + sz = PS_BUFSLOP; + + p->ps->psmargsz += sz; + + p->ps->psmarg = mandoc_realloc + (p->ps->psmarg, p->ps->psmargsz); +} + |