diff options
author | Ingo Schwarze <schwarze@cvs.openbsd.org> | 2017-01-19 01:00:12 +0000 |
---|---|---|
committer | Ingo Schwarze <schwarze@cvs.openbsd.org> | 2017-01-19 01:00:12 +0000 |
commit | 3c7dd0c63b29394e0857d839ebb61ed0dc6c7e57 (patch) | |
tree | 6af9e784b53005ca7f0547fd5f47c10ac07575a4 | |
parent | 2fe4f2951c5a277ad92c2addcc71ede30c4f09cf (diff) |
Implement line breaking of the generated HTML code at space characters
in filled text. This does not affect HTML semantics, but makes the
HTML code even more humanly readable.
While here,
- collapse multiple consecutive space characters in filled text
- and insert a blank between style entries.
-rw-r--r-- | regress/usr.bin/mandoc/char/unicode/latin1.out_html | 2 | ||||
-rw-r--r-- | usr.bin/mandoc/html.c | 236 | ||||
-rw-r--r-- | usr.bin/mandoc/html.h | 9 | ||||
-rw-r--r-- | usr.bin/mandoc/man_html.c | 4 | ||||
-rw-r--r-- | usr.bin/mandoc/mdoc_html.c | 4 |
5 files changed, 180 insertions, 75 deletions
diff --git a/regress/usr.bin/mandoc/char/unicode/latin1.out_html b/regress/usr.bin/mandoc/char/unicode/latin1.out_html index 45cc7578845..5178895f17c 100644 --- a/regress/usr.bin/mandoc/char/unicode/latin1.out_html +++ b/regress/usr.bin/mandoc/char/unicode/latin1.out_html @@ -18,7 +18,7 @@ BEGINTEST <br/> ¬¬¬ NOT SIGN <br/> -­ SOFT HYPHEN +­ SOFT HYPHEN <br/> ®® REGISTERED SIGN <br/> diff --git a/usr.bin/mandoc/html.c b/usr.bin/mandoc/html.c index f42a354c3ad..e893103f335 100644 --- a/usr.bin/mandoc/html.c +++ b/usr.bin/mandoc/html.c @@ -1,4 +1,4 @@ -/* $OpenBSD: html.c,v 1.66 2017/01/18 19:22:18 schwarze Exp $ */ +/* $OpenBSD: html.c,v 1.67 2017/01/19 01:00:11 schwarze Exp $ */ /* * Copyright (c) 2008-2011, 2014 Kristaps Dzonsons <kristaps@bsd.lv> * Copyright (c) 2011-2015, 2017 Ingo Schwarze <schwarze@openbsd.org> @@ -112,10 +112,14 @@ static const char *const roffscales[SCALE_MAX] = { }; static void a2width(const char *, struct roffsu *); -static void html_endline(struct html *); -static void html_indent(struct html *); +static void print_byte(struct html *, char); +static void print_endline(struct html *); +static void print_endword(struct html *); +static void print_indent(struct html *); +static void print_word(struct html *, const char *); + static void print_ctag(struct html *, struct tag *); -static int print_escape(char); +static int print_escape(struct html *, char); static int print_encode(struct html *, const char *, const char *, int); static void print_href(struct html *, const char *, const char *, int); static void print_metaf(struct html *, enum mandoc_esc); @@ -167,15 +171,15 @@ print_gen_head(struct html *h) t = print_otag(h, TAG_STYLE, ""); print_text(h, "table.head, table.foot { width: 100%; }"); - html_endline(h); + print_endline(h); print_text(h, "td.head-rtitle, td.foot-os { text-align: right; }"); - html_endline(h); + print_endline(h); print_text(h, "td.head-vol { text-align: center; }"); - html_endline(h); + print_endline(h); print_text(h, "table.foot td { width: 50%; }"); - html_endline(h); + print_endline(h); print_text(h, "table.head td { width: 33%; }"); - html_endline(h); + print_endline(h); print_text(h, "div.spacer { margin: 1em 0; }"); print_tagq(h, t); @@ -286,27 +290,27 @@ html_strlen(const char *cp) } static int -print_escape(char c) +print_escape(struct html *h, char c) { switch (c) { case '<': - printf("<"); + print_word(h, "<"); break; case '>': - printf(">"); + print_word(h, ">"); break; case '&': - printf("&"); + print_word(h, "&"); break; case '"': - printf("""); + print_word(h, """); break; case ASCII_NBRSP: - printf(" "); + print_word(h, " "); break; case ASCII_HYPH: - putchar('-'); + print_byte(h, '-'); break; case ASCII_BREAK: break; @@ -319,6 +323,7 @@ print_escape(char c) static int print_encode(struct html *h, const char *p, const char *pend, int norecurse) { + char numbuf[16]; size_t sz; int c, len, nospace; const char *seq; @@ -338,17 +343,16 @@ print_encode(struct html *h, const char *p, const char *pend, int norecurse) continue; } - sz = strcspn(p, rejs); - if (p + sz > pend) - sz = pend - p; - - fwrite(p, 1, sz, stdout); - p += (int)sz; + for (sz = strcspn(p, rejs); sz-- && p < pend; p++) + if (*p == ' ') + print_endword(h); + else + print_byte(h, *p); if (p >= pend) break; - if (print_escape(*p++)) + if (print_escape(h, *p++)) continue; esc = mandoc_escape(&p, &seq, &len); @@ -407,10 +411,11 @@ print_encode(struct html *h, const char *p, const char *pend, int norecurse) if ((c < 0x20 && c != 0x09) || (c > 0x7E && c < 0xA0)) c = 0xFFFD; - if (c > 0x7E) - printf("&#%d;", c); - else if ( ! print_escape(c)) - putchar(c); + if (c > 0x7E) { + (void)snprintf(numbuf, sizeof(numbuf), "&#%d;", c); + print_word(h, numbuf); + } else if (print_escape(h, c) == 0) + print_byte(h, c); } return nospace; @@ -426,7 +431,7 @@ print_href(struct html *h, const char *name, const char *sec, int man) print_encode(h, pp, p, 1); if (man && p[1] == 'S') { if (sec == NULL) - putchar('1'); + print_byte(h, '1'); else print_encode(h, sec, NULL, 1); } else if ((man && p[1] == 'N') || @@ -445,6 +450,7 @@ print_otag(struct html *h, enum htmltag tag, const char *fmt, ...) { va_list ap; struct roffsu mysu, *su; + char numbuf[16]; struct tag *t; const char *attr; char *s; @@ -464,16 +470,16 @@ print_otag(struct html *h, enum htmltag tag, const char *fmt, ...) t = NULL; if (tflags & HTML_NLBEFORE) - html_endline(h); - if (h->flags & HTML_NLDONE) - html_indent(h); + print_endline(h); + if (h->col == 0) + print_indent(h); else if ((h->flags & HTML_NOSPACE) == 0) { if (h->flags & HTML_KEEP) - printf(" "); + print_word(h, " "); else { if (h->flags & HTML_PREKEEP) h->flags |= HTML_KEEP; - putchar(' '); + print_endword(h); } } @@ -484,14 +490,15 @@ print_otag(struct html *h, enum htmltag tag, const char *fmt, ...) /* Print out the tag name and attributes. */ - printf("<%s", htmltags[tag].name); + print_byte(h, '<'); + print_word(h, htmltags[tag].name); va_start(ap, fmt); have_style = 0; while (*fmt != '\0') { if (*fmt == 's') { - printf(" style=\""); + print_word(h, " style=\""); have_style = 1; fmt++; break; @@ -514,7 +521,10 @@ print_otag(struct html *h, enum htmltag tag, const char *fmt, ...) default: abort(); } - printf(" %s=\"", attr); + print_byte(h, ' '); + print_word(h, attr); + print_byte(h, '='); + print_byte(h, '"'); switch (*fmt) { case 'M': print_href(h, s, va_arg(ap, char *), 1); @@ -525,14 +535,14 @@ print_otag(struct html *h, enum htmltag tag, const char *fmt, ...) fmt++; break; case 'R': - putchar('#'); + print_byte(h, '#'); fmt++; /* FALLTHROUGH */ default: print_encode(h, s, NULL, 1); break; } - putchar('"'); + print_byte(h, '"'); } /* Print out styles. */ @@ -591,7 +601,13 @@ print_otag(struct html *h, enum htmltag tag, const char *fmt, ...) attr = "min-width"; break; case '?': - printf("%s: %s;", s, va_arg(ap, char *)); + print_word(h, s); + print_byte(h, ':'); + print_byte(h, ' '); + print_word(h, va_arg(ap, char *)); + print_byte(h, ';'); + if (*fmt != '\0') + print_byte(h, ' '); continue; default: abort(); @@ -601,22 +617,30 @@ print_otag(struct html *h, enum htmltag tag, const char *fmt, ...) v = 1.0; else if (su->unit == SCALE_BU) v /= 24.0; - printf("%s: %.2f%s;", attr, v, roffscales[su->unit]); + print_word(h, attr); + print_byte(h, ':'); + print_byte(h, ' '); + (void)snprintf(numbuf, sizeof(numbuf), "%.2f", v); + print_word(h, numbuf); + print_word(h, roffscales[su->unit]); + print_byte(h, ';'); + if (*fmt != '\0') + print_byte(h, ' '); } if (have_style) - putchar('"'); + print_byte(h, '"'); va_end(ap); /* Accommodate for "well-formed" singleton escaping. */ if (HTML_AUTOCLOSE & htmltags[tag].flags) - putchar('/'); + print_byte(h, '/'); - putchar('>'); + print_byte(h, '>'); if (tflags & HTML_NLBEGIN) - html_endline(h); + print_endline(h); else h->flags |= HTML_NOSPACE; @@ -649,11 +673,14 @@ print_ctag(struct html *h, struct tag *tag) if (tflags & HTML_NOINDENT) h->noindent--; if (tflags & HTML_NLEND) - html_endline(h); - html_indent(h); - printf("</%s>", htmltags[tag->tag].name); + print_endline(h); + print_indent(h); + print_byte(h, '<'); + print_byte(h, '/'); + print_word(h, htmltags[tag->tag].name); + print_byte(h, '>'); if (tflags & HTML_NLAFTER) - html_endline(h); + print_endline(h); h->tags.head = tag->next; free(tag); @@ -662,21 +689,20 @@ print_ctag(struct html *h, struct tag *tag) void print_gen_decls(struct html *h) { - - puts("<!DOCTYPE html>"); - h->flags |= HTML_NLDONE; + print_word(h, "<!DOCTYPE html>"); + print_endline(h); } void print_text(struct html *h, const char *word) { - if ((h->flags & (HTML_NLDONE | HTML_NOSPACE)) == 0) { + if (h->col && (h->flags & HTML_NOSPACE) == 0) { if ( ! (HTML_KEEP & h->flags)) { if (HTML_PREKEEP & h->flags) h->flags |= HTML_KEEP; - putchar(' '); + print_endword(h); } else - printf(" "); + print_word(h, " "); } assert(NULL == h->metaf); @@ -692,7 +718,7 @@ print_text(struct html *h, const char *word) print_otag(h, TAG_I, ""); break; default: - html_indent(h); + print_indent(h); break; } @@ -745,18 +771,85 @@ print_paragraph(struct html *h) print_tagq(h, t); } + +/*********************************************************************** + * Low level output functions. + * They implement line breaking using a short static buffer. + ***********************************************************************/ + +/* + * Buffer one HTML output byte. + * If the buffer is full, flush and deactivate it and start a new line. + * If the buffer is inactive, print directly. + */ +static void +print_byte(struct html *h, char c) +{ + if ((h->flags & HTML_BUFFER) == 0) { + putchar(c); + h->col++; + return; + } + + if (h->col + h->bufcol < sizeof(h->buf)) { + h->buf[h->bufcol++] = c; + return; + } + + putchar('\n'); + h->col = 0; + print_indent(h); + putchar(' '); + putchar(' '); + fwrite(h->buf, h->bufcol, 1, stdout); + putchar(c); + h->col = (h->indent + 1) * 2 + h->bufcol + 1; + h->bufcol = 0; + h->flags &= ~HTML_BUFFER; +} + /* * If something was printed on the current output line, end it. - * Not to be called right after html_indent(). + * Not to be called right after print_indent(). */ static void -html_endline(struct html *h) +print_endline(struct html *h) { - if (h->flags & HTML_NLDONE) + if (h->col == 0) return; + if (h->bufcol) { + putchar(' '); + fwrite(h->buf, h->bufcol, 1, stdout); + h->bufcol = 0; + } putchar('\n'); - h->flags |= HTML_NLDONE | HTML_NOSPACE; + h->col = 0; + h->flags |= HTML_NOSPACE; + h->flags &= ~HTML_BUFFER; +} + +/* + * Flush the HTML output buffer. + * If it is inactive, activate it. + */ +static void +print_endword(struct html *h) +{ + if (h->noindent) { + print_byte(h, ' '); + return; + } + + if ((h->flags & HTML_BUFFER) == 0) { + h->col++; + h->flags |= HTML_BUFFER; + } else if (h->bufcol) { + putchar(' '); + fwrite(h->buf, h->bufcol, 1, stdout); + h->col += h->bufcol + 1; + } + h->bufcol = 0; } /* @@ -766,17 +859,30 @@ html_endline(struct html *h) * but do not use print_otag() for producing it. */ static void -html_indent(struct html *h) +print_indent(struct html *h) { - int i; + size_t i; - if ((h->flags & HTML_NLDONE) == 0) + if (h->col) return; - if (h->noindent == 0) - for (i = 0; i < h->indent * 2; i++) + if (h->noindent == 0) { + h->col = h->indent * 2; + for (i = 0; i < h->col; i++) putchar(' '); - h->flags &= ~(HTML_NLDONE | HTML_NOSPACE); + } + h->flags &= ~HTML_NOSPACE; +} + +/* + * Print or buffer some characters + * depending on the current HTML output buffer state. + */ +static void +print_word(struct html *h, const char *cp) +{ + while (*cp != '\0') + print_byte(h, *cp++); } /* diff --git a/usr.bin/mandoc/html.h b/usr.bin/mandoc/html.h index 8dbeab6dcc0..8434f6f7143 100644 --- a/usr.bin/mandoc/html.h +++ b/usr.bin/mandoc/html.h @@ -1,4 +1,4 @@ -/* $OpenBSD: html.h,v 1.38 2017/01/18 19:22:18 schwarze Exp $ */ +/* $OpenBSD: html.h,v 1.39 2017/01/19 01:00:11 schwarze Exp $ */ /* * Copyright (c) 2008-2011, 2014 Kristaps Dzonsons <kristaps@bsd.lv> * Copyright (c) 2017 Ingo Schwarze <schwarze@openbsd.org> @@ -95,9 +95,12 @@ struct html { #define HTML_NOSPLIT (1 << 7) /* do not break line before .An */ #define HTML_SPLIT (1 << 8) /* break line before .An */ #define HTML_NONEWLINE (1 << 9) /* No line break in nofill mode. */ -#define HTML_NLDONE (1 << 10) /* Just started a new line of HTML. */ - int indent; /* current output indentation level */ +#define HTML_BUFFER (1 << 10) /* Collect a word to see if it fits. */ + size_t indent; /* current output indentation level */ int noindent; /* indent disabled by <pre> */ + size_t col; /* current output byte position */ + size_t bufcol; /* current buf byte position */ + char buf[80]; /* output buffer */ struct tagq tags; /* stack of open tags */ struct rofftbl tbl; /* current table */ struct tag *tblt; /* current open table scope */ diff --git a/usr.bin/mandoc/man_html.c b/usr.bin/mandoc/man_html.c index 99b2dcaa99e..6182f74c7f4 100644 --- a/usr.bin/mandoc/man_html.c +++ b/usr.bin/mandoc/man_html.c @@ -1,4 +1,4 @@ -/* $OpenBSD: man_html.c,v 1.77 2017/01/18 19:22:18 schwarze Exp $ */ +/* $OpenBSD: man_html.c,v 1.78 2017/01/19 01:00:11 schwarze Exp $ */ /* * Copyright (c) 2008-2012, 2014 Kristaps Dzonsons <kristaps@bsd.lv> * Copyright (c) 2013, 2014, 2015, 2017 Ingo Schwarze <schwarze@openbsd.org> @@ -213,8 +213,6 @@ print_man_node(MAN_ARGS) print_text(h, n->string); return; case ROFFT_EQN: - if (n->flags & NODE_LINE) - putchar('\n'); print_eqn(h, n->eqn); break; case ROFFT_TBL: diff --git a/usr.bin/mandoc/mdoc_html.c b/usr.bin/mandoc/mdoc_html.c index 2a1f06c4552..b1f8379f80f 100644 --- a/usr.bin/mandoc/mdoc_html.c +++ b/usr.bin/mandoc/mdoc_html.c @@ -1,4 +1,4 @@ -/* $OpenBSD: mdoc_html.c,v 1.125 2017/01/18 19:22:18 schwarze Exp $ */ +/* $OpenBSD: mdoc_html.c,v 1.126 2017/01/19 01:00:11 schwarze Exp $ */ /* * Copyright (c) 2008-2011, 2014 Kristaps Dzonsons <kristaps@bsd.lv> * Copyright (c) 2014, 2015, 2016, 2017 Ingo Schwarze <schwarze@openbsd.org> @@ -382,8 +382,6 @@ print_mdoc_node(MDOC_ARGS) h->flags |= HTML_NOSPACE; return; case ROFFT_EQN: - if (n->flags & NODE_LINE) - putchar('\n'); print_eqn(h, n->eqn); break; case ROFFT_TBL: |