From 3c7dd0c63b29394e0857d839ebb61ed0dc6c7e57 Mon Sep 17 00:00:00 2001 From: Ingo Schwarze Date: Thu, 19 Jan 2017 01:00:12 +0000 Subject: Implement line breaking of the generated HTML code at space characters in filled text. This does not affect HTML semantics, but makes the HTML code even more humanly readable. While here, - collapse multiple consecutive space characters in filled text - and insert a blank between style entries. --- .../usr.bin/mandoc/char/unicode/latin1.out_html | 2 +- usr.bin/mandoc/html.c | 236 +++++++++++++++------ usr.bin/mandoc/html.h | 9 +- usr.bin/mandoc/man_html.c | 4 +- usr.bin/mandoc/mdoc_html.c | 4 +- 5 files changed, 180 insertions(+), 75 deletions(-) diff --git a/regress/usr.bin/mandoc/char/unicode/latin1.out_html b/regress/usr.bin/mandoc/char/unicode/latin1.out_html index 45cc7578845..5178895f17c 100644 --- a/regress/usr.bin/mandoc/char/unicode/latin1.out_html +++ b/regress/usr.bin/mandoc/char/unicode/latin1.out_html @@ -18,7 +18,7 @@ BEGINTEST
¬¬¬ NOT SIGN
-­ SOFT HYPHEN +­ SOFT HYPHEN
®® REGISTERED SIGN
diff --git a/usr.bin/mandoc/html.c b/usr.bin/mandoc/html.c index f42a354c3ad..e893103f335 100644 --- a/usr.bin/mandoc/html.c +++ b/usr.bin/mandoc/html.c @@ -1,4 +1,4 @@ -/* $OpenBSD: html.c,v 1.66 2017/01/18 19:22:18 schwarze Exp $ */ +/* $OpenBSD: html.c,v 1.67 2017/01/19 01:00:11 schwarze Exp $ */ /* * Copyright (c) 2008-2011, 2014 Kristaps Dzonsons * Copyright (c) 2011-2015, 2017 Ingo Schwarze @@ -112,10 +112,14 @@ static const char *const roffscales[SCALE_MAX] = { }; static void a2width(const char *, struct roffsu *); -static void html_endline(struct html *); -static void html_indent(struct html *); +static void print_byte(struct html *, char); +static void print_endline(struct html *); +static void print_endword(struct html *); +static void print_indent(struct html *); +static void print_word(struct html *, const char *); + static void print_ctag(struct html *, struct tag *); -static int print_escape(char); +static int print_escape(struct html *, char); static int print_encode(struct html *, const char *, const char *, int); static void print_href(struct html *, const char *, const char *, int); static void print_metaf(struct html *, enum mandoc_esc); @@ -167,15 +171,15 @@ print_gen_head(struct html *h) t = print_otag(h, TAG_STYLE, ""); print_text(h, "table.head, table.foot { width: 100%; }"); - html_endline(h); + print_endline(h); print_text(h, "td.head-rtitle, td.foot-os { text-align: right; }"); - html_endline(h); + print_endline(h); print_text(h, "td.head-vol { text-align: center; }"); - html_endline(h); + print_endline(h); print_text(h, "table.foot td { width: 50%; }"); - html_endline(h); + print_endline(h); print_text(h, "table.head td { width: 33%; }"); - html_endline(h); + print_endline(h); print_text(h, "div.spacer { margin: 1em 0; }"); print_tagq(h, t); @@ -286,27 +290,27 @@ html_strlen(const char *cp) } static int -print_escape(char c) +print_escape(struct html *h, char c) { switch (c) { case '<': - printf("<"); + print_word(h, "<"); break; case '>': - printf(">"); + print_word(h, ">"); break; case '&': - printf("&"); + print_word(h, "&"); break; case '"': - printf("""); + print_word(h, """); break; case ASCII_NBRSP: - printf(" "); + print_word(h, " "); break; case ASCII_HYPH: - putchar('-'); + print_byte(h, '-'); break; case ASCII_BREAK: break; @@ -319,6 +323,7 @@ print_escape(char c) static int print_encode(struct html *h, const char *p, const char *pend, int norecurse) { + char numbuf[16]; size_t sz; int c, len, nospace; const char *seq; @@ -338,17 +343,16 @@ print_encode(struct html *h, const char *p, const char *pend, int norecurse) continue; } - sz = strcspn(p, rejs); - if (p + sz > pend) - sz = pend - p; - - fwrite(p, 1, sz, stdout); - p += (int)sz; + for (sz = strcspn(p, rejs); sz-- && p < pend; p++) + if (*p == ' ') + print_endword(h); + else + print_byte(h, *p); if (p >= pend) break; - if (print_escape(*p++)) + if (print_escape(h, *p++)) continue; esc = mandoc_escape(&p, &seq, &len); @@ -407,10 +411,11 @@ print_encode(struct html *h, const char *p, const char *pend, int norecurse) if ((c < 0x20 && c != 0x09) || (c > 0x7E && c < 0xA0)) c = 0xFFFD; - if (c > 0x7E) - printf("&#%d;", c); - else if ( ! print_escape(c)) - putchar(c); + if (c > 0x7E) { + (void)snprintf(numbuf, sizeof(numbuf), "&#%d;", c); + print_word(h, numbuf); + } else if (print_escape(h, c) == 0) + print_byte(h, c); } return nospace; @@ -426,7 +431,7 @@ print_href(struct html *h, const char *name, const char *sec, int man) print_encode(h, pp, p, 1); if (man && p[1] == 'S') { if (sec == NULL) - putchar('1'); + print_byte(h, '1'); else print_encode(h, sec, NULL, 1); } else if ((man && p[1] == 'N') || @@ -445,6 +450,7 @@ print_otag(struct html *h, enum htmltag tag, const char *fmt, ...) { va_list ap; struct roffsu mysu, *su; + char numbuf[16]; struct tag *t; const char *attr; char *s; @@ -464,16 +470,16 @@ print_otag(struct html *h, enum htmltag tag, const char *fmt, ...) t = NULL; if (tflags & HTML_NLBEFORE) - html_endline(h); - if (h->flags & HTML_NLDONE) - html_indent(h); + print_endline(h); + if (h->col == 0) + print_indent(h); else if ((h->flags & HTML_NOSPACE) == 0) { if (h->flags & HTML_KEEP) - printf(" "); + print_word(h, " "); else { if (h->flags & HTML_PREKEEP) h->flags |= HTML_KEEP; - putchar(' '); + print_endword(h); } } @@ -484,14 +490,15 @@ print_otag(struct html *h, enum htmltag tag, const char *fmt, ...) /* Print out the tag name and attributes. */ - printf("<%s", htmltags[tag].name); + print_byte(h, '<'); + print_word(h, htmltags[tag].name); va_start(ap, fmt); have_style = 0; while (*fmt != '\0') { if (*fmt == 's') { - printf(" style=\""); + print_word(h, " style=\""); have_style = 1; fmt++; break; @@ -514,7 +521,10 @@ print_otag(struct html *h, enum htmltag tag, const char *fmt, ...) default: abort(); } - printf(" %s=\"", attr); + print_byte(h, ' '); + print_word(h, attr); + print_byte(h, '='); + print_byte(h, '"'); switch (*fmt) { case 'M': print_href(h, s, va_arg(ap, char *), 1); @@ -525,14 +535,14 @@ print_otag(struct html *h, enum htmltag tag, const char *fmt, ...) fmt++; break; case 'R': - putchar('#'); + print_byte(h, '#'); fmt++; /* FALLTHROUGH */ default: print_encode(h, s, NULL, 1); break; } - putchar('"'); + print_byte(h, '"'); } /* Print out styles. */ @@ -591,7 +601,13 @@ print_otag(struct html *h, enum htmltag tag, const char *fmt, ...) attr = "min-width"; break; case '?': - printf("%s: %s;", s, va_arg(ap, char *)); + print_word(h, s); + print_byte(h, ':'); + print_byte(h, ' '); + print_word(h, va_arg(ap, char *)); + print_byte(h, ';'); + if (*fmt != '\0') + print_byte(h, ' '); continue; default: abort(); @@ -601,22 +617,30 @@ print_otag(struct html *h, enum htmltag tag, const char *fmt, ...) v = 1.0; else if (su->unit == SCALE_BU) v /= 24.0; - printf("%s: %.2f%s;", attr, v, roffscales[su->unit]); + print_word(h, attr); + print_byte(h, ':'); + print_byte(h, ' '); + (void)snprintf(numbuf, sizeof(numbuf), "%.2f", v); + print_word(h, numbuf); + print_word(h, roffscales[su->unit]); + print_byte(h, ';'); + if (*fmt != '\0') + print_byte(h, ' '); } if (have_style) - putchar('"'); + print_byte(h, '"'); va_end(ap); /* Accommodate for "well-formed" singleton escaping. */ if (HTML_AUTOCLOSE & htmltags[tag].flags) - putchar('/'); + print_byte(h, '/'); - putchar('>'); + print_byte(h, '>'); if (tflags & HTML_NLBEGIN) - html_endline(h); + print_endline(h); else h->flags |= HTML_NOSPACE; @@ -649,11 +673,14 @@ print_ctag(struct html *h, struct tag *tag) if (tflags & HTML_NOINDENT) h->noindent--; if (tflags & HTML_NLEND) - html_endline(h); - html_indent(h); - printf("", htmltags[tag->tag].name); + print_endline(h); + print_indent(h); + print_byte(h, '<'); + print_byte(h, '/'); + print_word(h, htmltags[tag->tag].name); + print_byte(h, '>'); if (tflags & HTML_NLAFTER) - html_endline(h); + print_endline(h); h->tags.head = tag->next; free(tag); @@ -662,21 +689,20 @@ print_ctag(struct html *h, struct tag *tag) void print_gen_decls(struct html *h) { - - puts(""); - h->flags |= HTML_NLDONE; + print_word(h, ""); + print_endline(h); } void print_text(struct html *h, const char *word) { - if ((h->flags & (HTML_NLDONE | HTML_NOSPACE)) == 0) { + if (h->col && (h->flags & HTML_NOSPACE) == 0) { if ( ! (HTML_KEEP & h->flags)) { if (HTML_PREKEEP & h->flags) h->flags |= HTML_KEEP; - putchar(' '); + print_endword(h); } else - printf(" "); + print_word(h, " "); } assert(NULL == h->metaf); @@ -692,7 +718,7 @@ print_text(struct html *h, const char *word) print_otag(h, TAG_I, ""); break; default: - html_indent(h); + print_indent(h); break; } @@ -745,18 +771,85 @@ print_paragraph(struct html *h) print_tagq(h, t); } + +/*********************************************************************** + * Low level output functions. + * They implement line breaking using a short static buffer. + ***********************************************************************/ + +/* + * Buffer one HTML output byte. + * If the buffer is full, flush and deactivate it and start a new line. + * If the buffer is inactive, print directly. + */ +static void +print_byte(struct html *h, char c) +{ + if ((h->flags & HTML_BUFFER) == 0) { + putchar(c); + h->col++; + return; + } + + if (h->col + h->bufcol < sizeof(h->buf)) { + h->buf[h->bufcol++] = c; + return; + } + + putchar('\n'); + h->col = 0; + print_indent(h); + putchar(' '); + putchar(' '); + fwrite(h->buf, h->bufcol, 1, stdout); + putchar(c); + h->col = (h->indent + 1) * 2 + h->bufcol + 1; + h->bufcol = 0; + h->flags &= ~HTML_BUFFER; +} + /* * If something was printed on the current output line, end it. - * Not to be called right after html_indent(). + * Not to be called right after print_indent(). */ static void -html_endline(struct html *h) +print_endline(struct html *h) { - if (h->flags & HTML_NLDONE) + if (h->col == 0) return; + if (h->bufcol) { + putchar(' '); + fwrite(h->buf, h->bufcol, 1, stdout); + h->bufcol = 0; + } putchar('\n'); - h->flags |= HTML_NLDONE | HTML_NOSPACE; + h->col = 0; + h->flags |= HTML_NOSPACE; + h->flags &= ~HTML_BUFFER; +} + +/* + * Flush the HTML output buffer. + * If it is inactive, activate it. + */ +static void +print_endword(struct html *h) +{ + if (h->noindent) { + print_byte(h, ' '); + return; + } + + if ((h->flags & HTML_BUFFER) == 0) { + h->col++; + h->flags |= HTML_BUFFER; + } else if (h->bufcol) { + putchar(' '); + fwrite(h->buf, h->bufcol, 1, stdout); + h->col += h->bufcol + 1; + } + h->bufcol = 0; } /* @@ -766,17 +859,30 @@ html_endline(struct html *h) * but do not use print_otag() for producing it. */ static void -html_indent(struct html *h) +print_indent(struct html *h) { - int i; + size_t i; - if ((h->flags & HTML_NLDONE) == 0) + if (h->col) return; - if (h->noindent == 0) - for (i = 0; i < h->indent * 2; i++) + if (h->noindent == 0) { + h->col = h->indent * 2; + for (i = 0; i < h->col; i++) putchar(' '); - h->flags &= ~(HTML_NLDONE | HTML_NOSPACE); + } + h->flags &= ~HTML_NOSPACE; +} + +/* + * Print or buffer some characters + * depending on the current HTML output buffer state. + */ +static void +print_word(struct html *h, const char *cp) +{ + while (*cp != '\0') + print_byte(h, *cp++); } /* diff --git a/usr.bin/mandoc/html.h b/usr.bin/mandoc/html.h index 8dbeab6dcc0..8434f6f7143 100644 --- a/usr.bin/mandoc/html.h +++ b/usr.bin/mandoc/html.h @@ -1,4 +1,4 @@ -/* $OpenBSD: html.h,v 1.38 2017/01/18 19:22:18 schwarze Exp $ */ +/* $OpenBSD: html.h,v 1.39 2017/01/19 01:00:11 schwarze Exp $ */ /* * Copyright (c) 2008-2011, 2014 Kristaps Dzonsons * Copyright (c) 2017 Ingo Schwarze @@ -95,9 +95,12 @@ struct html { #define HTML_NOSPLIT (1 << 7) /* do not break line before .An */ #define HTML_SPLIT (1 << 8) /* break line before .An */ #define HTML_NONEWLINE (1 << 9) /* No line break in nofill mode. */ -#define HTML_NLDONE (1 << 10) /* Just started a new line of HTML. */ - int indent; /* current output indentation level */ +#define HTML_BUFFER (1 << 10) /* Collect a word to see if it fits. */ + size_t indent; /* current output indentation level */ int noindent; /* indent disabled by
 */
+	size_t		  col; /* current output byte position */
+	size_t		  bufcol; /* current buf byte position */
+	char		  buf[80]; /* output buffer */
 	struct tagq	  tags; /* stack of open tags */
 	struct rofftbl	  tbl; /* current table */
 	struct tag	 *tblt; /* current open table scope */
diff --git a/usr.bin/mandoc/man_html.c b/usr.bin/mandoc/man_html.c
index 99b2dcaa99e..6182f74c7f4 100644
--- a/usr.bin/mandoc/man_html.c
+++ b/usr.bin/mandoc/man_html.c
@@ -1,4 +1,4 @@
-/*	$OpenBSD: man_html.c,v 1.77 2017/01/18 19:22:18 schwarze Exp $ */
+/*	$OpenBSD: man_html.c,v 1.78 2017/01/19 01:00:11 schwarze Exp $ */
 /*
  * Copyright (c) 2008-2012, 2014 Kristaps Dzonsons 
  * Copyright (c) 2013, 2014, 2015, 2017 Ingo Schwarze 
@@ -213,8 +213,6 @@ print_man_node(MAN_ARGS)
 		print_text(h, n->string);
 		return;
 	case ROFFT_EQN:
-		if (n->flags & NODE_LINE)
-			putchar('\n');
 		print_eqn(h, n->eqn);
 		break;
 	case ROFFT_TBL:
diff --git a/usr.bin/mandoc/mdoc_html.c b/usr.bin/mandoc/mdoc_html.c
index 2a1f06c4552..b1f8379f80f 100644
--- a/usr.bin/mandoc/mdoc_html.c
+++ b/usr.bin/mandoc/mdoc_html.c
@@ -1,4 +1,4 @@
-/*	$OpenBSD: mdoc_html.c,v 1.125 2017/01/18 19:22:18 schwarze Exp $ */
+/*	$OpenBSD: mdoc_html.c,v 1.126 2017/01/19 01:00:11 schwarze Exp $ */
 /*
  * Copyright (c) 2008-2011, 2014 Kristaps Dzonsons 
  * Copyright (c) 2014, 2015, 2016, 2017 Ingo Schwarze 
@@ -382,8 +382,6 @@ print_mdoc_node(MDOC_ARGS)
 			h->flags |= HTML_NOSPACE;
 		return;
 	case ROFFT_EQN:
-		if (n->flags & NODE_LINE)
-			putchar('\n');
 		print_eqn(h, n->eqn);
 		break;
 	case ROFFT_TBL:
-- 
cgit v1.2.3