diff options
author | Ingo Schwarze <schwarze@cvs.openbsd.org> | 2010-03-02 00:39:00 +0000 |
---|---|---|
committer | Ingo Schwarze <schwarze@cvs.openbsd.org> | 2010-03-02 00:39:00 +0000 |
commit | 038ab018e83805ffd7295eb1d96d2eba5f39b351 (patch) | |
tree | 40615593ece0dbe15d0236d2a533dd64649fdfed /usr.bin | |
parent | ff35626491e806b44f055b355277d2e9beeb458b (diff) |
Proper inter-sentence spacing for mdoc(7).
When a text line or a non-block macro line in the source code ends
in any of ".!?", consider that an end of sentence (EOS).
This makes Jason's rule "new sentence, new line" even more important.
Let the parser detect the EOS and insert a token into the AST.
Let the -Tascii frontend render the EOS token as a double space before
the next word.
Diffstat (limited to 'usr.bin')
-rw-r--r-- | usr.bin/mandoc/chars.h | 5 | ||||
-rw-r--r-- | usr.bin/mandoc/chars.in | 4 | ||||
-rw-r--r-- | usr.bin/mandoc/mdoc.c | 36 | ||||
-rw-r--r-- | usr.bin/mandoc/mdoc.h | 5 | ||||
-rw-r--r-- | usr.bin/mandoc/mdoc_action.c | 3 | ||||
-rw-r--r-- | usr.bin/mandoc/mdoc_argv.c | 3 | ||||
-rw-r--r-- | usr.bin/mandoc/mdoc_hash.c | 18 | ||||
-rw-r--r-- | usr.bin/mandoc/mdoc_html.c | 3 | ||||
-rw-r--r-- | usr.bin/mandoc/mdoc_macro.c | 3 | ||||
-rw-r--r-- | usr.bin/mandoc/mdoc_term.c | 16 | ||||
-rw-r--r-- | usr.bin/mandoc/mdoc_validate.c | 6 | ||||
-rw-r--r-- | usr.bin/mandoc/term.c | 20 |
12 files changed, 92 insertions, 30 deletions
diff --git a/usr.bin/mandoc/chars.h b/usr.bin/mandoc/chars.h index fa1608a3b58..88ece13ec5a 100644 --- a/usr.bin/mandoc/chars.h +++ b/usr.bin/mandoc/chars.h @@ -1,4 +1,4 @@ -/* $Id: chars.h,v 1.1 2009/10/19 09:16:58 schwarze Exp $ */ +/* $Id: chars.h,v 1.2 2010/03/02 00:38:59 schwarze Exp $ */ /* * Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@kth.se> * @@ -17,6 +17,9 @@ #ifndef CHARS_H #define CHARS_H +#define ASCII_EOS 30 /* end of sentence marker */ +#define ASCII_NBRSP 31 /* non-breaking space */ + __BEGIN_DECLS enum chars { diff --git a/usr.bin/mandoc/chars.in b/usr.bin/mandoc/chars.in index 5129c8e43d3..75b6601ee09 100644 --- a/usr.bin/mandoc/chars.in +++ b/usr.bin/mandoc/chars.in @@ -1,4 +1,4 @@ -/* $Id: chars.in,v 1.4 2010/02/18 02:11:25 schwarze Exp $ */ +/* $Id: chars.in,v 1.5 2010/03/02 00:38:59 schwarze Exp $ */ /* * Copyright (c) 2009 Kristaps Dzonsons <kristaps@kth.se> * @@ -31,7 +31,7 @@ */ /* Non-breaking, non-collapsing space uses unit separator. */ -static const char ascii_nbrsp[2] = { 31, 0 }; +static const char ascii_nbrsp[2] = { ASCII_NBRSP, 0 }; CHAR_TBL_START diff --git a/usr.bin/mandoc/mdoc.c b/usr.bin/mandoc/mdoc.c index 8d2cf52b80c..707f9bd6364 100644 --- a/usr.bin/mandoc/mdoc.c +++ b/usr.bin/mandoc/mdoc.c @@ -1,4 +1,4 @@ -/* $Id: mdoc.c,v 1.34 2010/02/18 02:11:26 schwarze Exp $ */ +/* $Id: mdoc.c,v 1.35 2010/03/02 00:38:59 schwarze Exp $ */ /* * Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@kth.se> * @@ -118,7 +118,7 @@ const char *const __mdoc_macronames[MDOC_MAX] = { /* LINTED */ "Dx", "%Q", "br", "sp", /* LINTED */ - "%U" + "%U", "eos" }; const char *const __mdoc_argnames[MDOC_ARG_MAX] = { @@ -641,6 +641,16 @@ parsetext(struct mdoc *m, int line, char *buf) if (j != i && ! pstring(m, line, j, &buf[j], (size_t)(i - j))) return(0); + /* + * Mark the end of a sentence. Only works when you respect + * Jason's rule: "new sentence, new line". + */ + if ('.' == buf[i-1] || '!' == buf[i-1] || '?' == buf[i-1]) { + m->next = MDOC_NEXT_SIBLING; + if ( ! mdoc_elem_alloc(m, line, i, MDOC_eos, NULL)) + return(0); + } + m->next = MDOC_NEXT_SIBLING; return(1); } @@ -668,6 +678,8 @@ parsemacro(struct mdoc *m, int ln, char *buf) { int i, j, c; char mac[5]; + struct mdoc_node *n; + char *t; /* Empty lines are ignored. */ @@ -733,6 +745,26 @@ parsemacro(struct mdoc *m, int ln, char *buf) if ( ! mdoc_macro(m, c, ln, 1, &i, buf)) goto err; + /* + * Mark the end of a sentence, but be careful not to insert + * markers into reference blocks. + */ + n = m->last; + if (n->child) + n = n->child; + while (n->next) + n = n->next; + if (MDOC_TEXT == n->type && m->last->parent->tok != MDOC_Rs) { + t = n->string; + while (t && t[1]) + t++; + if ('.' == *t || '!' == *t || '?' == *t) { + if ( ! mdoc_elem_alloc(m, ln, i, MDOC_eos, NULL)) + return(0); + m->next = MDOC_NEXT_SIBLING; + } + } + return(1); err: /* Error out. */ diff --git a/usr.bin/mandoc/mdoc.h b/usr.bin/mandoc/mdoc.h index 92c7534a180..14f2d8bf967 100644 --- a/usr.bin/mandoc/mdoc.h +++ b/usr.bin/mandoc/mdoc.h @@ -1,4 +1,4 @@ -/* $Id: mdoc.h,v 1.17 2010/02/26 12:12:24 schwarze Exp $ */ +/* $Id: mdoc.h,v 1.18 2010/03/02 00:38:59 schwarze Exp $ */ /* * Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@kth.se> * @@ -148,7 +148,8 @@ #define MDOC_br 118 #define MDOC_sp 119 #define MDOC__U 120 -#define MDOC_MAX 121 +#define MDOC_eos 121 +#define MDOC_MAX 122 /* What follows is a list of ALL possible macro arguments. */ diff --git a/usr.bin/mandoc/mdoc_action.c b/usr.bin/mandoc/mdoc_action.c index 5c9e1f30bd8..a65b6801616 100644 --- a/usr.bin/mandoc/mdoc_action.c +++ b/usr.bin/mandoc/mdoc_action.c @@ -1,4 +1,4 @@ -/* $Id: mdoc_action.c,v 1.26 2009/12/23 22:30:17 schwarze Exp $ */ +/* $Id: mdoc_action.c,v 1.27 2010/03/02 00:38:59 schwarze Exp $ */ /* * Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@kth.se> * @@ -188,6 +188,7 @@ static const struct actions mdoc_actions[MDOC_MAX] = { { NULL, NULL }, /* br */ { NULL, NULL }, /* sp */ { NULL, NULL }, /* %U */ + { NULL, NULL }, /* eos */ }; #define RSORD_MAX 14 diff --git a/usr.bin/mandoc/mdoc_argv.c b/usr.bin/mandoc/mdoc_argv.c index c1aac1c1c86..fd614b4c54e 100644 --- a/usr.bin/mandoc/mdoc_argv.c +++ b/usr.bin/mandoc/mdoc_argv.c @@ -1,4 +1,4 @@ -/* $Id: mdoc_argv.c,v 1.20 2010/03/02 00:13:57 schwarze Exp $ */ +/* $Id: mdoc_argv.c,v 1.21 2010/03/02 00:38:59 schwarze Exp $ */ /* * Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@kth.se> * @@ -205,6 +205,7 @@ static int mdoc_argflags[MDOC_MAX] = { 0, /* br */ 0, /* sp */ 0, /* %U */ + 0, /* eos */ }; diff --git a/usr.bin/mandoc/mdoc_hash.c b/usr.bin/mandoc/mdoc_hash.c index 4af63bea72c..19122466d46 100644 --- a/usr.bin/mandoc/mdoc_hash.c +++ b/usr.bin/mandoc/mdoc_hash.c @@ -1,4 +1,4 @@ -/* $Id: mdoc_hash.c,v 1.6 2009/09/21 21:11:37 schwarze Exp $ */ +/* $Id: mdoc_hash.c,v 1.7 2010/03/02 00:38:59 schwarze Exp $ */ /* * Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@kth.se> * @@ -25,7 +25,7 @@ #include "libmdoc.h" -static u_char table[27 * 12]; +static u_char table[27 * 13]; /* * XXX - this hash has global scope, so if intended for use as a library @@ -43,17 +43,17 @@ mdoc_hash_init(void) p = mdoc_macronames[i]; if (isalpha((u_char)p[1])) - major = 12 * (tolower((u_char)p[1]) - 97); + major = 13 * (tolower((u_char)p[1]) - 97); else - major = 12 * 26; + major = 13 * 26; - for (j = 0; j < 12; j++) + for (j = 0; j < 13; j++) if (UCHAR_MAX == table[major + j]) { table[major + j] = (u_char)i; break; } - assert(j < 12); + assert(j < 13); } } @@ -68,16 +68,16 @@ mdoc_hash_find(const char *p) return(MDOC_MAX); if (isalpha((u_char)p[1])) - major = 12 * (tolower((u_char)p[1]) - 97); + major = 13 * (tolower((u_char)p[1]) - 97); else if ('1' == p[1]) - major = 12 * 26; + major = 13 * 26; else return(MDOC_MAX); if (p[2] && p[3]) return(MDOC_MAX); - for (j = 0; j < 12; j++) { + for (j = 0; j < 13; j++) { if (UCHAR_MAX == (i = table[major + j])) break; if (0 == strcmp(p, mdoc_macronames[i])) diff --git a/usr.bin/mandoc/mdoc_html.c b/usr.bin/mandoc/mdoc_html.c index d3eefba017b..ab1250ee032 100644 --- a/usr.bin/mandoc/mdoc_html.c +++ b/usr.bin/mandoc/mdoc_html.c @@ -1,4 +1,4 @@ -/* $Id: mdoc_html.c,v 1.7 2010/02/18 02:11:26 schwarze Exp $ */ +/* $Id: mdoc_html.c,v 1.8 2010/03/02 00:38:59 schwarze Exp $ */ /* * Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@kth.se> * @@ -251,6 +251,7 @@ static const struct htmlmdoc mdocs[MDOC_MAX] = { {mdoc_sp_pre, NULL}, /* br */ {mdoc_sp_pre, NULL}, /* sp */ {mdoc__x_pre, mdoc__x_post}, /* %U */ + {NULL, NULL}, /* eos */ }; diff --git a/usr.bin/mandoc/mdoc_macro.c b/usr.bin/mandoc/mdoc_macro.c index b0c04c55387..5897c5c9472 100644 --- a/usr.bin/mandoc/mdoc_macro.c +++ b/usr.bin/mandoc/mdoc_macro.c @@ -1,4 +1,4 @@ -/* $Id: mdoc_macro.c,v 1.30 2010/03/02 00:13:57 schwarze Exp $ */ +/* $Id: mdoc_macro.c,v 1.31 2010/03/02 00:38:59 schwarze Exp $ */ /* * Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@kth.se> * @@ -177,6 +177,7 @@ const struct mdoc_macro __mdoc_macros[MDOC_MAX] = { { in_line_eoln, 0 }, /* br */ { in_line_eoln, 0 }, /* sp */ { in_line_eoln, 0 }, /* %U */ + { NULL, 0 }, /* eos */ }; const struct mdoc_macro * const mdoc_macros = __mdoc_macros; diff --git a/usr.bin/mandoc/mdoc_term.c b/usr.bin/mandoc/mdoc_term.c index d6cbb0d1568..dba0d2db54f 100644 --- a/usr.bin/mandoc/mdoc_term.c +++ b/usr.bin/mandoc/mdoc_term.c @@ -1,4 +1,4 @@ -/* $Id: mdoc_term.c,v 1.69 2010/02/18 02:11:26 schwarze Exp $ */ +/* $Id: mdoc_term.c,v 1.70 2010/03/02 00:38:59 schwarze Exp $ */ /* * Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@kth.se> * @@ -131,6 +131,7 @@ static int termp_ud_pre(DECL_ARGS); static int termp_vt_pre(DECL_ARGS); static int termp_xr_pre(DECL_ARGS); static int termp_xx_pre(DECL_ARGS); +static int termp_eos_pre(DECL_ARGS); static const struct termact termacts[MDOC_MAX] = { { termp_ap_pre, NULL }, /* Ap */ @@ -254,6 +255,7 @@ static const struct termact termacts[MDOC_MAX] = { { termp_sp_pre, NULL }, /* br */ { termp_sp_pre, NULL }, /* sp */ { termp_under_pre, termp____post }, /* %U */ + { termp_eos_pre, NULL }, /* eos */ }; @@ -2128,3 +2130,15 @@ termp__t_pre(DECL_ARGS) p->flags |= TERMP_NOSPACE; return(1); } + + +/* ARGSUSED */ +static int +termp_eos_pre(DECL_ARGS) +{ + const char ascii_eos[2] = { ASCII_EOS, 0 }; + + term_word(p, ascii_eos); + p->flags |= TERMP_NOSPACE; + return(1); +} diff --git a/usr.bin/mandoc/mdoc_validate.c b/usr.bin/mandoc/mdoc_validate.c index 1c3b1b71a56..34cc96c9423 100644 --- a/usr.bin/mandoc/mdoc_validate.c +++ b/usr.bin/mandoc/mdoc_validate.c @@ -1,4 +1,4 @@ -/* $Id: mdoc_validate.c,v 1.42 2010/02/18 02:11:26 schwarze Exp $ */ +/* $Id: mdoc_validate.c,v 1.43 2010/03/02 00:38:59 schwarze Exp $ */ /* * Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@kth.se> * @@ -269,6 +269,7 @@ const struct valids mdoc_valids[MDOC_MAX] = { { NULL, posts_notext }, /* br */ { NULL, posts_sp }, /* sp */ { NULL, posts_text1 }, /* %U */ + { NULL, NULL }, /* eos */ }; @@ -904,7 +905,8 @@ post_vt(POST_ARGS) return(1); for (n = mdoc->last->child; n; n = n->next) - if (MDOC_TEXT != n->type) + if (MDOC_TEXT != n->type && + (MDOC_ELEM != n->type || MDOC_eos != n->tok)) if ( ! mdoc_nwarn(mdoc, n, EBADCHILD)) return(0); diff --git a/usr.bin/mandoc/term.c b/usr.bin/mandoc/term.c index 0f62eba372e..31de5a40886 100644 --- a/usr.bin/mandoc/term.c +++ b/usr.bin/mandoc/term.c @@ -1,4 +1,4 @@ -/* $Id: term.c,v 1.20 2009/12/24 02:08:14 schwarze Exp $ */ +/* $Id: term.c,v 1.21 2010/03/02 00:38:59 schwarze Exp $ */ /* * Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@kth.se> * @@ -184,16 +184,24 @@ term_flushln(struct termp *p) break; else if (8 == p->buf[j]) vsz--; - else + else if (ASCII_EOS != p->buf[j]) vsz++; } /* + * Skip empty words. This happens due to the ASCII_EOS + * after the end of the final sentence of a paragraph. + */ + if (vsz == 0 && j == (int)p->col) + break; + + /* * Choose the number of blanks to prepend: no blank at the * beginning of a line, one between words -- but do not * actually write them yet. */ - vbl = (size_t)(0 == vis ? 0 : 1); + vbl = (size_t)(ASCII_EOS == p->buf[i] ? 2 : + (0 == vis ? 0 : 1)); /* * Find out whether we would exceed the right margin. @@ -227,11 +235,9 @@ term_flushln(struct termp *p) for ( ; i < (int)p->col; i++) { if (' ' == p->buf[i]) break; - - /* The unit sep. is a non-breaking space. */ - if (31 == p->buf[i]) + if (ASCII_NBRSP == p->buf[i]) putchar(' '); - else + else if (ASCII_EOS != p->buf[i]) putchar(p->buf[i]); } vis += vsz; |