summaryrefslogtreecommitdiff
path: root/usr.bin/mandoc
diff options
context:
space:
mode:
authorIngo Schwarze <schwarze@cvs.openbsd.org>2010-05-26 02:39:59 +0000
committerIngo Schwarze <schwarze@cvs.openbsd.org>2010-05-26 02:39:59 +0000
commit71c147c136372d4309b120eda4306853560b848a (patch)
tree1b4efa482538f8fc7431ab27c2147a028931f03e /usr.bin/mandoc
parentc03090c978c31a26b24683e4bbdef4271bbf5b3a (diff)
When a word does not fully fit onto the output line, but it contains
at least one hyphen, we already had support for breaking the line a the last fitting hyphen. This patch improves this functionality by only breaking at hyphens in free-form text, and by not breaking at hyphens * at the beginning or end of a word or * immediately preceded or followed by another hyphen or * escaped by a preceding backslash. Before this patch, differences in break-at-hyphen support were one of the major sources of noise in automatic comparisons to mdoc(7) groff output. Now, the remaining differences are hard to find among the noise coming from other sources. Where there are still differences, what we do seems to be better than what groff does, see e.g. the chio(1) exchange and position commands for one of the now rare examples. idea and coding by kristaps@ Besides, this was the last substantial code difference left between bsd.lv and openbsd.org. We are now in full sync.
Diffstat (limited to 'usr.bin/mandoc')
-rw-r--r--usr.bin/mandoc/chars.c3
-rw-r--r--usr.bin/mandoc/chars.h4
-rw-r--r--usr.bin/mandoc/html.c31
-rw-r--r--usr.bin/mandoc/libmandoc.h3
-rw-r--r--usr.bin/mandoc/man_validate.c4
-rw-r--r--usr.bin/mandoc/mandoc.c30
-rw-r--r--usr.bin/mandoc/mandoc.h6
-rw-r--r--usr.bin/mandoc/mdoc.c6
-rw-r--r--usr.bin/mandoc/mdoc_validate.c4
-rw-r--r--usr.bin/mandoc/term.c18
10 files changed, 74 insertions, 35 deletions
diff --git a/usr.bin/mandoc/chars.c b/usr.bin/mandoc/chars.c
index 0e8348bc13a..3f839261af4 100644
--- a/usr.bin/mandoc/chars.c
+++ b/usr.bin/mandoc/chars.c
@@ -1,4 +1,4 @@
-/* $Id: chars.c,v 1.6 2010/03/26 01:22:05 schwarze Exp $ */
+/* $Id: chars.c,v 1.7 2010/05/26 02:39:58 schwarze Exp $ */
/*
* Copyright (c) 2009 Kristaps Dzonsons <kristaps@kth.se>
*
@@ -19,6 +19,7 @@
#include <stdlib.h>
#include <string.h>
+#include "mandoc.h"
#include "chars.h"
#define PRINT_HI 126
diff --git a/usr.bin/mandoc/chars.h b/usr.bin/mandoc/chars.h
index 3685451762f..8e55ed2c013 100644
--- a/usr.bin/mandoc/chars.h
+++ b/usr.bin/mandoc/chars.h
@@ -1,4 +1,4 @@
-/* $Id: chars.h,v 1.3 2010/05/14 19:52:43 schwarze Exp $ */
+/* $Id: chars.h,v 1.4 2010/05/26 02:39:58 schwarze Exp $ */
/*
* Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@kth.se>
*
@@ -17,8 +17,6 @@
#ifndef CHARS_H
#define CHARS_H
-#define ASCII_NBRSP 31 /* non-breaking space */
-
__BEGIN_DECLS
enum chars {
diff --git a/usr.bin/mandoc/html.c b/usr.bin/mandoc/html.c
index ac71994c12a..02221b64a8c 100644
--- a/usr.bin/mandoc/html.c
+++ b/usr.bin/mandoc/html.c
@@ -1,4 +1,4 @@
-/* $Id: html.c,v 1.8 2010/05/14 01:54:37 schwarze Exp $ */
+/* $Id: html.c,v 1.9 2010/05/26 02:39:58 schwarze Exp $ */
/*
* Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@kth.se>
*
@@ -25,6 +25,7 @@
#include <string.h>
#include <unistd.h>
+#include "mandoc.h"
#include "out.h"
#include "chars.h"
#include "html.h"
@@ -292,11 +293,12 @@ print_encode(struct html *h, const char *p, int norecurse)
int len, nospace;
const char *seq;
enum roffdeco deco;
+ static const char rejs[6] = { '\\', '<', '>', '&', ASCII_HYPH, '\0' };
nospace = 0;
for (; *p; p++) {
- sz = strcspn(p, "\\<>&");
+ sz = strcspn(p, rejs);
fwrite(p, 1, sz, stdout);
p += /* LINTED */
@@ -311,6 +313,15 @@ print_encode(struct html *h, const char *p, int norecurse)
} else if ('&' == *p) {
printf("&amp;");
continue;
+ } else if (ASCII_HYPH == *p) {
+ /*
+ * Note: "soft hyphens" aren't graphically
+ * displayed when not breaking the text; we want
+ * them to be displayed.
+ */
+ /*printf("&#173;");*/
+ putchar('-');
+ continue;
} else if ('\0' == *p)
break;
@@ -439,21 +450,9 @@ print_gen_decls(struct html *h)
static void
print_xmltype(struct html *h)
{
- const char *decl;
-
- switch (h->type) {
- case (HTML_XHTML_1_0_STRICT):
- decl = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>";
- break;
- default:
- decl = NULL;
- break;
- }
-
- if (NULL == decl)
- return;
- printf("%s\n", decl);
+ if (HTML_XHTML_1_0_STRICT == h->type)
+ printf("<?xml version=\"1.0\" encoding=\"UTF-8\"?>");
}
diff --git a/usr.bin/mandoc/libmandoc.h b/usr.bin/mandoc/libmandoc.h
index 7b43a25e329..4ceac4bfe37 100644
--- a/usr.bin/mandoc/libmandoc.h
+++ b/usr.bin/mandoc/libmandoc.h
@@ -1,4 +1,4 @@
-/* $Id: libmandoc.h,v 1.4 2010/05/14 19:52:43 schwarze Exp $ */
+/* $Id: libmandoc.h,v 1.5 2010/05/26 02:39:58 schwarze Exp $ */
/*
* Copyright (c) 2009 Kristaps Dzonsons <kristaps@kth.se>
*
@@ -30,6 +30,7 @@ time_t mandoc_a2time(int, const char *);
#define MTIME_MDOCDATE (1 << 2)
#define MTIME_ISO_8601 (1 << 3)
int mandoc_eos(const char *, size_t);
+int mandoc_hyph(const char *, const char *);
__END_DECLS
diff --git a/usr.bin/mandoc/man_validate.c b/usr.bin/mandoc/man_validate.c
index f0ff2b49c70..b36b9c1d9e2 100644
--- a/usr.bin/mandoc/man_validate.c
+++ b/usr.bin/mandoc/man_validate.c
@@ -1,4 +1,4 @@
-/* $Id: man_validate.c,v 1.25 2010/05/23 22:45:00 schwarze Exp $ */
+/* $Id: man_validate.c,v 1.26 2010/05/26 02:39:58 schwarze Exp $ */
/*
* Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@kth.se>
*
@@ -219,7 +219,7 @@ check_text(CHKARGS)
return(c);
}
- if ('\t' == *p || isprint((u_char)*p))
+ if ('\t' == *p || isprint((u_char)*p) || ASCII_HYPH == *p)
continue;
if ( ! man_pmsg(m, n->line, pos, MANDOCERR_BADCHAR))
return(0);
diff --git a/usr.bin/mandoc/mandoc.c b/usr.bin/mandoc/mandoc.c
index 92c65e9d2e1..4f534f1589b 100644
--- a/usr.bin/mandoc/mandoc.c
+++ b/usr.bin/mandoc/mandoc.c
@@ -1,4 +1,4 @@
-/* $Id: mandoc.c,v 1.11 2010/05/15 15:37:53 schwarze Exp $ */
+/* $Id: mandoc.c,v 1.12 2010/05/26 02:39:58 schwarze Exp $ */
/*
* Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@kth.se>
*
@@ -336,3 +336,31 @@ mandoc_eos(const char *p, size_t sz)
return(0);
}
+
+
+int
+mandoc_hyph(const char *start, const char *c)
+{
+
+ /*
+ * Choose whether to break at a hyphenated character. We only
+ * do this if it's free-standing within a word.
+ */
+
+ /* Skip first/last character of buffer. */
+ if (c == start || '\0' == *(c + 1))
+ return(0);
+ /* Skip first/last character of word. */
+ if ('\t' == *(c + 1) || '\t' == *(c - 1))
+ return(0);
+ if (' ' == *(c + 1) || ' ' == *(c - 1))
+ return(0);
+ /* Skip double invocations. */
+ if ('-' == *(c + 1) || '-' == *(c - 1))
+ return(0);
+ /* Skip escapes. */
+ if ('\\' == *(c - 1))
+ return(0);
+
+ return(1);
+}
diff --git a/usr.bin/mandoc/mandoc.h b/usr.bin/mandoc/mandoc.h
index 4c2986c37a7..fb9911c46a7 100644
--- a/usr.bin/mandoc/mandoc.h
+++ b/usr.bin/mandoc/mandoc.h
@@ -1,4 +1,4 @@
-/* $Id: mandoc.h,v 1.4 2010/05/23 23:35:26 schwarze Exp $ */
+/* $Id: mandoc.h,v 1.5 2010/05/26 02:39:58 schwarze Exp $ */
/*
* Copyright (c) 2010 Kristaps Dzonsons <kristaps@bsd.lv>
*
@@ -17,6 +17,10 @@
#ifndef MANDOC_H
#define MANDOC_H
+#define ASCII_NBRSP 31 /* non-breaking space */
+#define ASCII_HYPH 30 /* breakable hyphen */
+
+
__BEGIN_DECLS
enum mandocerr {
diff --git a/usr.bin/mandoc/mdoc.c b/usr.bin/mandoc/mdoc.c
index 52c4f982734..2544ebb9113 100644
--- a/usr.bin/mandoc/mdoc.c
+++ b/usr.bin/mandoc/mdoc.c
@@ -1,4 +1,4 @@
-/* $Id: mdoc.c,v 1.54 2010/05/23 22:45:00 schwarze Exp $ */
+/* $Id: mdoc.c,v 1.55 2010/05/26 02:39:58 schwarze Exp $ */
/*
* Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@kth.se>
*
@@ -559,6 +559,10 @@ mdoc_ptext(struct mdoc *m, int line, char *buf, int offs)
ws = NULL;
for (c = end = buf + offs; *c; c++) {
switch (*c) {
+ case '-':
+ if (mandoc_hyph(buf + offs, c))
+ *c = ASCII_HYPH;
+ break;
case ' ':
if (NULL == ws)
ws = c;
diff --git a/usr.bin/mandoc/mdoc_validate.c b/usr.bin/mandoc/mdoc_validate.c
index 9a7fa033ee4..7ac6c642a67 100644
--- a/usr.bin/mandoc/mdoc_validate.c
+++ b/usr.bin/mandoc/mdoc_validate.c
@@ -1,4 +1,4 @@
-/* $Id: mdoc_validate.c,v 1.57 2010/05/24 12:48:11 schwarze Exp $ */
+/* $Id: mdoc_validate.c,v 1.58 2010/05/26 02:39:58 schwarze Exp $ */
/*
* Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@bsd.lv>
*
@@ -459,7 +459,7 @@ check_text(struct mdoc *mdoc, int line, int pos, const char *p)
if ( ! (MDOC_LITERAL & mdoc->flags))
if ( ! mdoc_pmsg(mdoc, line, pos, MANDOCERR_BADCHAR))
return(0);
- } else if ( ! isprint((u_char)*p))
+ } else if ( ! isprint((u_char)*p) && ASCII_HYPH != *p)
if ( ! mdoc_pmsg(mdoc, line, pos, MANDOCERR_BADCHAR))
return(0);
diff --git a/usr.bin/mandoc/term.c b/usr.bin/mandoc/term.c
index 75fa28e3396..28799b3d0ac 100644
--- a/usr.bin/mandoc/term.c
+++ b/usr.bin/mandoc/term.c
@@ -1,4 +1,4 @@
-/* $Id: term.c,v 1.34 2010/05/23 22:45:01 schwarze Exp $ */
+/* $Id: term.c,v 1.35 2010/05/26 02:39:58 schwarze Exp $ */
/*
* Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@kth.se>
*
@@ -190,14 +190,13 @@ term_flushln(struct termp *p)
for (jhy = 0; j < (int)p->col; j++) {
if ((j && ' ' == p->buf[j]) || '\t' == p->buf[j])
break;
- if (8 == p->buf[j])
- vend--;
- else {
+ if (8 != p->buf[j]) {
if (vend > vis && vend < bp &&
- '-' == p->buf[j])
+ ASCII_HYPH == p->buf[j])
jhy = j;
vend++;
- }
+ } else
+ vend--;
}
/*
@@ -259,7 +258,12 @@ term_flushln(struct termp *p)
p->viscol += vbl;
vbl = 0;
}
- putchar(p->buf[i]);
+
+ if (ASCII_HYPH == p->buf[i])
+ putchar('-');
+ else
+ putchar(p->buf[i]);
+
p->viscol += 1;
}
vend += vbl;