From e0f3ca1e45ac39a9d861bcdc913ff8d889157334 Mon Sep 17 00:00:00 2001 From: Ingo Schwarze Date: Sat, 22 Aug 2009 20:14:38 +0000 Subject: Kristaps@ significantly overhauled libman. I'm committing this in one large chunk because in contrast to -mdoc, -man is mostly untested in OpenBSD anyway, so any fallout can be fixed in-tree. Among others, improved support for .IP, .HP, and .TP. Now in sync with release 1.9.0. --- usr.bin/mandoc/Makefile | 7 +- usr.bin/mandoc/libman.h | 36 ++- usr.bin/mandoc/man.3 | 12 +- usr.bin/mandoc/man.7 | 541 +++++++++++++++++++++++++++++------------ usr.bin/mandoc/man.c | 163 +++++++++---- usr.bin/mandoc/man.h | 14 +- usr.bin/mandoc/man_action.c | 7 +- usr.bin/mandoc/man_macro.c | 380 +++++++++++++++++------------ usr.bin/mandoc/man_term.c | 544 +++++++++++++++++++++++++++++++++++------- usr.bin/mandoc/man_validate.c | 221 ++++++++++++----- usr.bin/mandoc/tree.c | 17 +- 11 files changed, 1425 insertions(+), 517 deletions(-) diff --git a/usr.bin/mandoc/Makefile b/usr.bin/mandoc/Makefile index c928311ba7b..e6cf2ef04a7 100644 --- a/usr.bin/mandoc/Makefile +++ b/usr.bin/mandoc/Makefile @@ -1,8 +1,8 @@ -# $OpenBSD: Makefile,v 1.15 2009/08/22 17:23:13 schwarze Exp $ +# $OpenBSD: Makefile,v 1.16 2009/08/22 20:14:37 schwarze Exp $ .include -VERSION=1.8.5 +VERSION=1.9.0 CFLAGS+=-DVERSION=\"${VERSION}\" CFLAGS+=-W -Wall -Wstrict-prototypes .if ${USE_GCC3:L} != "no" @@ -12,7 +12,8 @@ CFLAGS+=-Wno-unused-parameter SRCS= mandoc.c mdoc_macro.c mdoc.c mdoc_hash.c mdoc_strings.c \ mdoc_argv.c mdoc_validate.c mdoc_action.c lib.c att.c \ arch.c vol.c msec.c st.c -SRCS+= man_macro.c man.c man_hash.c man_validate.c man_action.c +SRCS+= man_macro.c man.c man_hash.c man_validate.c \ + man_action.c man_argv.c SRCS+= main.c mdoc_term.c ascii.c term.c tree.c man_term.c PROG= mandoc diff --git a/usr.bin/mandoc/libman.h b/usr.bin/mandoc/libman.h index 32ca61a0d44..2f23ca70323 100644 --- a/usr.bin/mandoc/libman.h +++ b/usr.bin/mandoc/libman.h @@ -1,4 +1,4 @@ -/* $Id: libman.h,v 1.6 2009/08/22 15:15:37 schwarze Exp $ */ +/* $Id: libman.h,v 1.7 2009/08/22 20:14:37 schwarze Exp $ */ /* * Copyright (c) 2009 Kristaps Dzonsons * @@ -31,7 +31,8 @@ struct man { int pflags; int flags; #define MAN_HALT (1 << 0) -#define MAN_NLINE (1 << 1) +#define MAN_ELINE (1 << 1) /* Next-line element scope. */ +#define MAN_BLINE (1 << 2) /* Next-line block scope. */ enum man_next next; struct man_node *last; struct man_node *first; @@ -50,9 +51,26 @@ enum merr { WNOTITLE, WESCAPE, WNUMFMT, + WHEADARGS, + WBODYARGS, + WNHEADARGS, + WMACRO, + WMACROFORM, + WEXITSCOPE, WERRMAX }; +#define MACRO_PROT_ARGS struct man *m, int tok, int line, \ + int ppos, int *pos, char *buf + +struct man_macro { + int (*fp)(MACRO_PROT_ARGS); + int flags; +#define MAN_SCOPED (1 << 0) +}; + +extern const struct man_macro *const man_macros; + __BEGIN_DECLS #define man_perr(m, l, p, t) \ @@ -64,21 +82,29 @@ __BEGIN_DECLS #define man_nwarn(m, n, t) \ man_err((m), (n)->line, (n)->pos, 0, (t)) -int man_err(struct man *, int, int, int, enum merr); int man_word_alloc(struct man *, int, int, const char *); +int man_block_alloc(struct man *, int, int, int); +int man_head_alloc(struct man *, int, int, int); +int man_body_alloc(struct man *, int, int, int); int man_elem_alloc(struct man *, int, int, int); void man_node_free(struct man_node *); void man_node_freelist(struct man_node *); void *man_hash_alloc(void); -int man_macro(struct man *, int, - int, int, int *, char *); int man_hash_find(const void *, const char *); void man_hash_free(void *); int man_macroend(struct man *); +int man_args(struct man *, int, int *, char *, char **); +#define ARGS_ERROR (-1) +#define ARGS_EOLN (0) +#define ARGS_WORD (1) +#define ARGS_QWORD (1) +int man_err(struct man *, int, int, int, enum merr); int man_vwarn(struct man *, int, int, const char *, ...); int man_verr(struct man *, int, int, const char *, ...); int man_valid_post(struct man *); +int man_valid_pre(struct man *, const struct man_node *); int man_action_post(struct man *); +int man_unscope(struct man *, const struct man_node *); __END_DECLS diff --git a/usr.bin/mandoc/man.3 b/usr.bin/mandoc/man.3 index f9b20f69f5d..a7db356ff1c 100644 --- a/usr.bin/mandoc/man.3 +++ b/usr.bin/mandoc/man.3 @@ -1,4 +1,4 @@ -.\" $Id: man.3,v 1.4 2009/08/09 17:20:17 schwarze Exp $ +.\" $Id: man.3,v 1.5 2009/08/22 20:14:37 schwarze Exp $ .\" .\" Copyright (c) 2009 Kristaps Dzonsons .\" @@ -14,7 +14,7 @@ .\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF .\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. .\" -.Dd $Mdocdate: August 9 2009 $ +.Dd $Mdocdate: August 22 2009 $ .Dt MAN 3 .Os .\" SECTION @@ -220,7 +220,13 @@ where capitalised non-terminals represent nodes. .It ROOT \(<- mnode+ .It mnode -\(<- ELEMENT | TEXT +\(<- ELEMENT | TEXT | BLOCK +.It BLOCK +\(<- HEAD BODY +.It HEAD +\(<- mnode* +.It BODY +\(<- mnode* .It ELEMENT \(<- ELEMENT | TEXT* .It TEXT diff --git a/usr.bin/mandoc/man.7 b/usr.bin/mandoc/man.7 index 49a49b0236f..ab7ebca76c4 100644 --- a/usr.bin/mandoc/man.7 +++ b/usr.bin/mandoc/man.7 @@ -1,4 +1,4 @@ -.\" $Id: man.7,v 1.8 2009/08/22 16:41:45 schwarze Exp $ +.\" $Id: man.7,v 1.9 2009/08/22 20:14:37 schwarze Exp $ .\" .\" Copyright (c) 2009 Kristaps Dzonsons .\" @@ -17,228 +17,451 @@ .Dd $Mdocdate: August 22 2009 $ .Dt MAN 7 .Os -.\" SECTION +. +. .Sh NAME -.Nm man -.Nd man language reference -.\" SECTION +. Nm man +. Nd man language reference +. +. .Sh DESCRIPTION The -.Nm man +. Nm man language was historically used to format -.Ux +. Ux manuals. This reference document describes its syntax, structure, and usage. -.Pp -.Bf -emphasis +. Pp +. Bf -emphasis Do not use -.Nm +. Nm to write your manuals. -.Ef +. Ef Use the -.Xr mdoc 7 +. Xr mdoc 7 language, instead. -.\" PARAGRAPH -.Pp +. Pp An -.Nm +. Nm document follows simple rules: lines beginning with the control character -.Sq \&. +. Sq \&. are parsed for macros. Other lines are interpreted within the scope of prior macros: -.Bd -literal -offset indent +. Bd -literal -offset indent \&.SH Macro lines change control state. Other lines are interpreted within the current state. -.Ed -.\" SECTION +. Ed +. +. .Sh INPUT ENCODING -.Nm +. Nm documents may contain only graphable 7-bit ASCII characters, the space character, and the tabs character. All manuals must have -.Ux +. Ux line termination. -.Pp +. Pp Blank lines are acceptable; where found, the output will assert a vertical space. -.Pp +. Pp The -.Sq \ec +. Sq \ec escape is common in historical -.Nm +. Nm documents; if encountered at the end of a word, it ensures that the subsequent word isn't off-set by whitespace. -.\" SUB-SECTION -.Ss Comments +. +. +. Ss Comments Text following a -.Sq \e" , +. Sq \e\*" , whether in a macro or free-form text line, is ignored to the end of line. A macro line with only a control character and comment escape, -.Sq \&.\e" , -is also ignored. -.\" SUB-SECTION -.Ss Special Characters +. Sq \&.\e" , +is also ignored. Macro lines with only a control charater and +optionally whitespace are stripped from input. +. +. +. Ss Special Characters Special characters may occur in both macro and free-form lines. Sequences begin with the escape character -.Sq \e +. Sq \e followed by either an open-parenthesis -.Sq \&( +. Sq \&( for two-character sequences; an open-bracket -.Sq \&[ +. Sq \&[ for n-character sequences (terminated at a close-bracket -.Sq \&] ) ; +. Sq \&] ) ; or a single one-character sequence. See -.Xr mandoc_char 7 +. Xr mandoc_char 7 for a complete list. Examples include -.Sq \e(em -.Pq em-dash +. Sq \e(em +. Pq em-dash and -.Sq \ee -.Pq back-slash . -.\" SUB-SECTION---------------------- -.Ss Text Decoration +. Sq \ee +. Pq back-slash . +. +. +. Ss Text Decoration Terms may be text-decorated using the -.Sq \ef +. Sq \ef escape followed by an indicator: B (bold), I, (italic), or P and R (Roman, or reset). -.\" SUB-SECTION---------------------- -.Ss Whitespace +. +. +. Ss Whitespace Unless specifically escaped, consecutive blocks of whitespace are pruned from input. These are later re-added, if applicable, by a front-end utility such as -.Xr mandoc 1 . -.\" SECTION -.Sh STRUCTURE +. Xr mandoc 1 . +. +. +.Sh MANUAL STRUCTURE Each -.Nm +. Nm document must contain contains at least the -.Sq \&.TH +. Sq \&TH macro describing the document's section and title. It may occur anywhere in the document, although conventionally, it appears as the first macro. -.Pp -Beyond the -.Sq \&.TH , -at least one macro or text node must appear in the document. -.\" SECTION -.Sh SYNTAX +. Pp +Beyond +. Sq \&TH , +at least one macro or text node must appear in the document. Documents +are generally structured as follows: +. Bd -literal -offset indent +\&.TH FOO 1 "13 Aug 2009" +\&. +\&.SH NAME +foo \e- a description goes here +\&. +\&.SH SYNOPSIS +\efBfoo\efR [\efB\e-options\efR] arguments... +\&. +\&.SH DESCRIPTION +The \efBfoo\efR utility does... +\&. +\&.\e\*q .SH RETURN VALUES +\&.\e\*q .SH ENVIRONMENT +\&.\e\*q .SH FILES +\&.\e\*q .SH EXAMPLES +\&.\e\*q .SH DIAGNOSTICS +\&.\e\*q .SH ERRORS +\&.\e\*q .SH SEE ALSO +\&.\e\*q \efBbar\efR(1) +\&.\e\*q .SH STANDARDS +\&.\e\*q .SH HISTORY +\&.\e\*q .SH AUTHORS +\&.\e\*q .SH CAVEATS +\&.\e\*q .SH BUGS +. Ed +. +. +.Sh MACRO SYNTAX Macros are one to three three characters in length and begin with a control character , -.Sq \&. , +. Sq \&. , at the beginning of the line. An arbitrary amount of whitespace may sit between the control character and the macro name. Thus, -.Sq \&.PP +. Sq \&.PP and -.Sq \&.\ \ \ \&PP +. Sq \&.\ \ \ \&PP are equivalent. -.Pp -All -.Nm -macros follow the same structural rules: -.Bd -literal -offset indent -\&.YO \(lBbody...\(rB -.Ed -.Pp +. Pp The -.Dq body -consists of zero or more arguments to the macro. -.Pp -.Nm -has a primitive notion of multi-line scope for the following macros: -.Sq \&.TM , -.Sq \&.SM , -.Sq \&.SB , -.Sq \&.BI , -.Sq \&.IB , -.Sq \&.BR , -.Sq \&.RB , -.Sq \&.R , -.Sq \&.B , -.Sq \&.I , -.Sq \&.IR -and -.Sq \&.RI . -When these macros are invoked without arguments, the subsequent line is -considered a continuation of the macro. Thus: -.Bd -literal -offset indent +. Nm +macros are classified by scope: line scope or block scope. Line-scoped +macros are only scoped to the current line (and, in some situations, +the subsequent line). Block macros are scoped to the current line and +subsequent lines until closed by another block macro. +. +. +. Ss Line Macros +Line-macros are scoped to the current line, with the body consisting of +zero or more arguments. If a macro is next-line scoped and the line +arguments are empty, the next line is used instead. Thus: +. Bd -literal -offset indent \&.RI foo -.Ed -.Pp +. Ed +. Pp is equivalent to -.Sq \&.RI foo . -If two consecutive lines exhibit the latter behaviour, -an error is raised. Thus, the following is not acceptable: -.Bd -literal -offset indent -\&.RI -\&.I -Hello, world. -.Ed -.Pp +. Sq \&.RI foo . +.\" PARAGRAPH +Consecutive next-line invocations are disallowed. +. Bd -literal -offset indent +\&.YO \(lBbody...\(rB +\(lBbody...\(rB +. Ed +. Pp +. Bl -column -compact -offset indent "MacroX" "ArgumentsX" "ScopeXXXXX" +. It Em Macro Ta Em Arguments Ta Em Scope +. It \&B Ta n Ta next-line +. It \&BI Ta n Ta current +. It \&BR Ta n Ta current +. It \&I Ta n Ta next-line +. It \&IB Ta n Ta current +. It \&IR Ta n Ta current +. It \&R Ta n Ta next-line +. It \&RB Ta n Ta current +. It \&RI Ta n Ta current +. It \&SB Ta n Ta next-line +. It \&SM Ta n Ta next-line +. It \&TH Ta >1, <6 Ta current +. It \&br Ta 0 Ta current +. It \&fi Ta 0 Ta current +. It \&i Ta n Ta current +. It \&na Ta 0 Ta current +. It \&nf Ta 0 Ta current +. It \&r Ta 0 Ta current +. It \&sp Ta 1 Ta current +. El +. Pp +The lower-case +. Sq \&br , +. Sq \&fi , +. Sq \&i , +. Sq \&na , +. Sq \&nf , +. Sq \&r , +and +. Sq \&sp +macros aren't historically part of +. Nm +and should not be used. They're included for compatibility. +. +. +. Ss Block Macros +Block macros are comprised of a head and body. The head is scoped to +the current line and, in one circumstance, the next line; the body is +scoped to subsequent lines and is closed out by a subsequent block macro +invocation. +. Bd -literal -offset indent +\&.YO \(lBhead...\(rB +\(lBhead...\(rB +\(lBbody...\(rB +. Ed +. Pp +If a block macro is next-line scoped, it may only be followed by in-line +macros (excluding +. Sq br , +. Sq na , +. Sq sp , +. Sq nf , +. Sq fi , +and +. Sq TH ) . +. Pp +. Bl -column "MacroX" "Arguments" "ScopeXXXX" -compact -offset indent +. It Em Macro Ta Em Arguments Ta Em Scope +. It \&HP Ta <2 Ta current +. It \&IP Ta <3 Ta current +. It \&LP Ta 0 Ta current +. It \&P Ta 0 Ta current +. It \&PP Ta 0 Ta current +. It \&SH Ta >0 Ta current +. It \&SS Ta >0 Ta current +. It \&TP Ta n Ta next-line +. El +. +. +.Sh REFERENCE +This section is a canonical reference to all macros, arranged +alphabetically. For the scoping of individual macros, see +. Sx MACRO SYNTAX . +. +. +. Ss Terms +In this reference, a numerical width may be either a standalone natural +number (such as 3, 4, 10, etc.) or a natural number followed by a width +multiplier +. Qq n , +corresponding to the width of the formatted letter n, or +. Qq m , +corresponding to the width of the formatted letter m. The latter is the +default, if unspecified. Thus, +. Bd -literal -offset indent +\&.HP 12n +. Ed +. Pp +indicates an offset of 12 +. Qq n +. Ns -sized +letters. +. +. +. Ss Macro Reference +. Bl -tag -width Ds +. It \&B +Text is rendered in bold face. +. It \&BI +Text is rendered alternately in bold face and italic. Thus, +. Sq \&.BI this word and that +causes +. Sq this +and +. Sq and +to render in bold face, while +. Sq word +and +. Sq that +render in italics. Whitespace between arguments is omitted in output. +. It \&BR +Text is rendered alternately in bold face and roman (the default font). +Whitespace between arguments is omitted in output. +. It \&HP +Begin a paragraph whose initial output line is left-justified, but +subsequent output lines are indented, with the following syntax: +. Bd -literal -offset indent +\&.HP [width] +. Ed +. Pp +If +. Va width +is specified, it's saved for later paragraph left-margins; if +unspecified, the saved or default width is used. +. It \&I +Text is rendered in italics. +. It \&IB +Text is rendered alternately in italics and bold face. Whitespace +between arguments is omitted in output. +. It \&IP +Begin a paragraph with the following syntax: +. Bd -literal -offset indent +\&.IP [head [width]] +. Ed +. Pp +This follows the behaviour of the +. Sq \&TP +except for the macro syntax (all arguments on the line, instead of +having next-line scope). If +. Va width +is specified, it's saved for later paragraph left-margins; if +unspecified, the saved or default width is used. +. It \&IR +Text is rendered alternately in italics and roman (the default font). +Whitespace between arguments is omitted in output. +. It \&LP, \&P, \&PP +Begin an undecorated paragraph. The scope of a paragraph is closed by a +subsequent paragraph, sub-section, section, or end of file. The saved +paragraph left-margin width is re-set to the default. +. It \&R +Text is rendered in roman (the default font). +. It \&RB +Text is rendered alternately in roman (the default font) and bold face. +Whitespace between arguments is omitted in output. +. It \&RI +Text is rendered alternately in roman (the default font) and italics. +Whitespace between arguments is omitted in output. +. It \&SB +Text is rendered in small size (one point smaller than the default font) +bold face. +. It \&SH +Begin a section. The scope of a section is only closed by another +section or the end of file. The paragraph left-margin width is re-set +to the default. +. It \&SM +Text is rendered in small size (one point smaller than the default +font). +. It \&SS +Begin a sub-section. The scope of a sub-section is closed by a +subsequent sub-section, section, or end of file. The paragraph +left-margin width is re-set to the default. +. It \&TH +Sets the title of the manual page with the following syntax: +. Bd -literal -offset indent +\&.TH title section date source volume +. Ed +. Pp +At least the +. Va title +and +. Va section +arguments must be provided. The +. Va date +argument should be formatted as +. Qq %b [%d] %Y +format, described in +. Xr strptime 3 . The -.Sq \&.TP -macro is similar, but does not need an empty argument line to trigger -the behaviour. -.\" SECTION -.Sh MACROS -This section contains a complete list of all -.Nm -macros and corresponding number of arguments. -.Pp -.Bl -column "MacroX" "Arguments" -compact -offset indent -.It Em Macro Ta Em Arguments -.It \&.TH Ta >1, <6 -.It \&.SH Ta >0 -.It \&.SS Ta >0 -.It \&.TP Ta n -.It \&.LP Ta 0 -.It \&.PP Ta 0 -.It \&.P Ta 0 -.It \&.IP Ta <3 -.It \&.HP Ta <2 -.It \&.SM Ta n -.It \&.SB Ta n -.It \&.BI Ta n -.It \&.IB Ta n -.It \&.BR Ta n -.It \&.RB Ta n -.It \&.R Ta n -.It \&.B Ta n -.It \&.I Ta n -.It \&.IR Ta n -.It \&.RI Ta n -.El -.Pp -Although not historically part of the -.Nm -system, the following macros are also supported: -.Pp -.Bl -column "MacroX" "Arguments" -compact -offset indent -.It Em Macro Ta Em Arguments -.It \&.br Ta 0 -.It \&.i Ta n -.El -.Pp -These follow the same calling conventions as the above -.Nm -macros. -.\" SECTION +. Va source +string specifies the organisation providing the utility. The +. Va volume +replaces the default rendered volume as dictated by the manual section. +. It \&TP +Begin a paragraph where the head, if exceeding the indentation width, is +followed by a newline; if not, the body follows on the same line after a +buffer to the indentation width. Subsequent output lines are indented. +. Pp +The indentation width may be set as follows: +. Bd -literal -offset indent +\&.TP [width] +. Ed +. Pp +Where +. Va width +must be a properly-formed numeric width. If +. Va width +is specified, it's saved for later paragraph left-margins; if +unspecified, the saved or default width is used. +. It \&br +Breaks the current line. Consecutive invocations have no further effect. +. It \&fi +End literal mode begun by +. Sq \&nf . +. It \&i +Italicise arguments. If no arguments are specified, all subsequent text +is italicised. +. It \&na +No alignment to the right margin. +. It \&nf +Begin literal mode: all subsequent free-form lines have their end of +line boundaries preserved. May be ended by +. Sq \&fi . +. It \&r +Fonts and styles (bold face, italics) reset to roman (default font). +. It \&sp +Insert n spaces, where n is the macro's positive numeric argument. If +0, this is equivalent to the +. Sq br +macro. +. El +. +. .Sh COMPATIBILITY -See -.Xr mdoc 7 -for groff compatibility notes. -.\" SECTION +This section documents compatibility with other roff implementations, at +this time limited to +. Xr groff 1 . +. Bl -hyphen +. It +In quoted literals, groff allowed pair-wise double-quotes to produce a +standalone double-quote in formatted output. This idiosyncratic +behaviour is no longer applicable. +. It +The +. Sq \&sp +macro does not accept negative numbers. +. It +Blocks of whitespace are stripped from both macro and free-form text +lines (except when in literal mode), while groff would retain whitespace +in free-form text lines. +. El +. +. .Sh SEE ALSO -.Xr mandoc 1 , -.Xr mandoc_char 7 -.\" SECTION +. Xr mandoc 1 , +. Xr mandoc_char 7 +. +. .Sh AUTHORS The -.Nm -utility was written by -.An Kristaps Dzonsons Aq kristaps@kth.se . -.\" SECTION +. Nm +reference was written by +. An Kristaps Dzonsons Aq kristaps@kth.se . +. +. .Sh CAVEATS Do not use this language. Use -.Xr mdoc 7 , +. Xr mdoc 7 , instead. +. diff --git a/usr.bin/mandoc/man.c b/usr.bin/mandoc/man.c index abd8a02fd9b..a35d914b6d4 100644 --- a/usr.bin/mandoc/man.c +++ b/usr.bin/mandoc/man.c @@ -1,4 +1,4 @@ -/* $Id: man.c,v 1.8 2009/08/22 15:15:37 schwarze Exp $ */ +/* $Id: man.c,v 1.9 2009/08/22 20:14:37 schwarze Exp $ */ /* * Copyright (c) 2008, 2009 Kristaps Dzonsons * @@ -35,6 +35,12 @@ const char *const __man_merrnames[WERRMAX] = { "document has no title/section", /* WNOTITLE */ "invalid escape sequence", /* WESCAPE */ "invalid number format", /* WNUMFMT */ + "expected block head arguments", /* WHEADARGS */ + "expected block body arguments", /* WBODYARGS */ + "expected empty block head", /* WNHEADARGS */ + "unknown macro", /* WMACRO */ + "ill-formed macro", /* WMACROFORM */ + "scope open on exit" /* WEXITSCOPE */ }; const char *const __man_macronames[MAN_MAX] = { @@ -43,7 +49,8 @@ const char *const __man_macronames[MAN_MAX] = { "IP", "HP", "SM", "SB", "BI", "IB", "BR", "RB", "R", "B", "I", "IR", - "RI", "na", "i", "sp" + "RI", "na", "i", "sp", + "nf", "fi", "r" }; const char * const *man_macronames = __man_macronames; @@ -199,6 +206,22 @@ man_node_append(struct man *man, struct man_node *p) p->parent->nchild++; + if ( ! man_valid_pre(man, p)) + return(0); + + switch (p->type) { + case (MAN_HEAD): + assert(MAN_BLOCK == p->parent->type); + p->parent->head = p; + break; + case (MAN_BODY): + assert(MAN_BLOCK == p->parent->type); + p->parent->body = p; + break; + default: + break; + } + man->last = p; switch (p->type) { @@ -245,6 +268,51 @@ man_elem_alloc(struct man *man, int line, int pos, int tok) } +int +man_head_alloc(struct man *m, int line, int pos, int tok) +{ + struct man_node *p; + + p = man_node_alloc(line, pos, MAN_HEAD, tok); + if (NULL == p) + return(0); + if ( ! man_node_append(m, p)) + return(0); + m->next = MAN_NEXT_CHILD; + return(1); +} + + +int +man_body_alloc(struct man *m, int line, int pos, int tok) +{ + struct man_node *p; + + p = man_node_alloc(line, pos, MAN_BODY, tok); + if (NULL == p) + return(0); + if ( ! man_node_append(m, p)) + return(0); + m->next = MAN_NEXT_CHILD; + return(1); +} + + +int +man_block_alloc(struct man *m, int line, int pos, int tok) +{ + struct man_node *p; + + p = man_node_alloc(line, pos, MAN_BLOCK, tok); + if (NULL == p) + return(0); + if ( ! man_node_append(m, p)) + return(0); + m->next = MAN_NEXT_CHILD; + return(1); +} + + int man_word_alloc(struct man *man, int line, int pos, const char *word) @@ -290,29 +358,33 @@ static int man_ptext(struct man *m, int line, char *buf) { + /* First allocate word. */ + if ( ! man_word_alloc(m, line, 0, buf)) return(0); m->next = MAN_NEXT_SIBLING; /* - * If this is one of the zany NLINE macros that consumes the - * next line of input as being influenced, then close out the - * existing macro "scope" and continue processing. + * Co-ordinate what happens with having a next-line scope open: + * first close out the element scope (if applicable), then close + * out the block scope (also if applicable). */ - if ( ! (MAN_NLINE & m->flags)) - return(1); + /* XXX - this should be in man_action.c. */ - m->flags &= ~MAN_NLINE; - m->last = m->last->parent; + if (MAN_ELINE & m->flags) { + m->flags &= ~MAN_ELINE; + if ( ! man_unscope(m, m->last->parent)) + return(0); + } - assert(MAN_ROOT != m->last->type); - if ( ! man_valid_post(m)) - return(0); - if ( ! man_action_post(m)) - return(0); + if ( ! (MAN_BLINE & m->flags)) + return(1); + m->flags &= ~MAN_BLINE; - return(1); + if ( ! man_unscope(m, m->last->parent)) + return(0); + return(man_body_alloc(m, line, 0, m->last->tok)); } @@ -321,12 +393,10 @@ man_pmacro(struct man *m, int ln, char *buf) { int i, j, c, ppos, fl; char mac[5]; - struct man_node *n; /* Comments and empties are quickly ignored. */ - n = m->last; - fl = MAN_NLINE & m->flags; + fl = m->flags; if (0 == buf[1]) goto out; @@ -356,24 +426,20 @@ man_pmacro(struct man *m, int ln, char *buf) if (j == 4 || j < 1) { if ( ! (MAN_IGN_MACRO & m->pflags)) { - (void)man_verr(m, ln, ppos, - "ill-formed macro: %s", mac); + (void)man_perr(m, ln, ppos, WMACROFORM); goto err; } - if ( ! man_vwarn(m, ln, ppos, - "ill-formed macro: %s", mac)) + if ( ! man_pwarn(m, ln, ppos, WMACROFORM)) goto err; return(1); } if (MAN_MAX == (c = man_hash_find(m->htab, mac))) { if ( ! (MAN_IGN_MACRO & m->pflags)) { - (void)man_verr(m, ln, ppos, - "unknown macro: %s", mac); + (void)man_perr(m, ln, ppos, WMACRO); goto err; } - if ( ! man_vwarn(m, ln, ppos, - "unknown macro: %s", mac)) + if ( ! man_pwarn(m, ln, ppos, WMACRO)) goto err; return(1); } @@ -385,32 +451,33 @@ man_pmacro(struct man *m, int ln, char *buf) /* Begin recursive parse sequence. */ - if ( ! man_macro(m, c, ln, ppos, &i, buf)) + assert(man_macros[c].fp); + + if ( ! (*man_macros[c].fp)(m, c, ln, ppos, &i, buf)) goto err; out: - if (fl) { - /* - * A NLINE macro has been immediately followed with - * another. Close out the preceding macro's scope, and - * continue. - */ - assert(MAN_ROOT != m->last->type); - assert(m->last->parent); - assert(MAN_ROOT != m->last->parent->type); - - if (n != m->last) - m->last = m->last->parent; - - if ( ! man_valid_post(m)) - return(0); - if ( ! man_action_post(m)) - return(0); - m->next = MAN_NEXT_SIBLING; - m->flags &= ~MAN_NLINE; - } + if ( ! (MAN_BLINE & fl)) + return(1); - return(1); + /* + * If we've opened a new next-line element scope, then return + * now, as the next line will close out the block scope. + */ + + if (MAN_ELINE & m->flags) + return(1); + + /* Close out the block scope opened in the prior line. */ + + /* XXX - this should be in man_action.c. */ + + assert(MAN_BLINE & m->flags); + m->flags &= ~MAN_BLINE; + + if ( ! man_unscope(m, m->last->parent)) + return(0); + return(man_body_alloc(m, ln, 0, m->last->tok)); err: /* Error out. */ diff --git a/usr.bin/mandoc/man.h b/usr.bin/mandoc/man.h index 2c100eb2c4a..fd49874369a 100644 --- a/usr.bin/mandoc/man.h +++ b/usr.bin/mandoc/man.h @@ -1,4 +1,4 @@ -/* $Id: man.h,v 1.6 2009/08/22 15:15:37 schwarze Exp $ */ +/* $Id: man.h,v 1.7 2009/08/22 20:14:37 schwarze Exp $ */ /* * Copyright (c) 2009 Kristaps Dzonsons * @@ -43,12 +43,18 @@ #define MAN_na 21 #define MAN_i 22 #define MAN_sp 23 -#define MAN_MAX 24 +#define MAN_nf 24 +#define MAN_fi 25 +#define MAN_r 26 +#define MAN_MAX 27 enum man_type { MAN_TEXT, MAN_ELEM, - MAN_ROOT + MAN_ROOT, + MAN_BLOCK, + MAN_HEAD, + MAN_BODY }; struct man_meta { @@ -73,6 +79,8 @@ struct man_node { #define MAN_ACTED (1 << 1) enum man_type type; char *string; + struct man_node *head; + struct man_node *body; }; #define MAN_IGN_MACRO (1 << 0) diff --git a/usr.bin/mandoc/man_action.c b/usr.bin/mandoc/man_action.c index 5dda9537a6f..ec612f39b06 100644 --- a/usr.bin/mandoc/man_action.c +++ b/usr.bin/mandoc/man_action.c @@ -1,4 +1,4 @@ -/* $Id: man_action.c,v 1.5 2009/08/22 15:15:37 schwarze Exp $ */ +/* $Id: man_action.c,v 1.6 2009/08/22 20:14:37 schwarze Exp $ */ /* * Copyright (c) 2008, 2009 Kristaps Dzonsons * @@ -56,6 +56,9 @@ const struct actions man_actions[MAN_MAX] = { { NULL }, /* na */ { NULL }, /* i */ { NULL }, /* sp */ + { NULL }, /* nf */ + { NULL }, /* fi */ + { NULL }, /* r*/ }; @@ -147,12 +150,10 @@ post_TH(struct man *m) */ if (m->last->parent->child == m->last) { - assert(MAN_ROOT == m->last->parent->type); m->last->parent->child = NULL; n = m->last; m->last = m->last->parent; m->next = MAN_NEXT_CHILD; - assert(m->last == m->first); } else { assert(m->last->prev); m->last->prev->next = NULL; diff --git a/usr.bin/mandoc/man_macro.c b/usr.bin/mandoc/man_macro.c index ae292d3213a..12d1d45ef74 100644 --- a/usr.bin/mandoc/man_macro.c +++ b/usr.bin/mandoc/man_macro.c @@ -1,4 +1,4 @@ -/* $Id: man_macro.c,v 1.5 2009/08/22 15:15:37 schwarze Exp $ */ +/* $Id: man_macro.c,v 1.6 2009/08/22 20:14:37 schwarze Exp $ */ /* * Copyright (c) 2008, 2009 Kristaps Dzonsons * @@ -21,204 +21,290 @@ #include "libman.h" -#define FL_NLINE (1 << 0) -#define FL_TLINE (1 << 1) - -static int man_args(struct man *, int, - int *, char *, char **); - -static int man_flags[MAN_MAX] = { - 0, /* br */ - 0, /* TH */ - 0, /* SH */ - 0, /* SS */ - FL_TLINE, /* TP */ - 0, /* LP */ - 0, /* PP */ - 0, /* P */ - 0, /* IP */ - 0, /* HP */ - FL_NLINE, /* SM */ - FL_NLINE, /* SB */ - FL_NLINE, /* BI */ - FL_NLINE, /* IB */ - FL_NLINE, /* BR */ - FL_NLINE, /* RB */ - FL_NLINE, /* R */ - FL_NLINE, /* B */ - FL_NLINE, /* I */ - FL_NLINE, /* IR */ - FL_NLINE, /* RI */ - 0, /* na */ - FL_NLINE, /* i */ - 0, /* sp */ +#define REW_REWIND (0) /* See rew_scope(). */ +#define REW_NOHALT (1) /* See rew_scope(). */ +#define REW_HALT (2) /* See rew_scope(). */ + +static int in_line_eoln(MACRO_PROT_ARGS); +static int blk_imp(MACRO_PROT_ARGS); + +static int rew_scope(enum man_type, struct man *, int); +static int rew_dohalt(int, enum man_type, + const struct man_node *); + +const struct man_macro __man_macros[MAN_MAX] = { + { in_line_eoln, 0 }, /* br */ + { in_line_eoln, 0 }, /* TH */ + { blk_imp, 0 }, /* SH */ + { blk_imp, 0 }, /* SS */ + { blk_imp, MAN_SCOPED }, /* TP */ + { blk_imp, 0 }, /* LP */ + { blk_imp, 0 }, /* PP */ + { blk_imp, 0 }, /* P */ + { blk_imp, 0 }, /* IP */ + { blk_imp, 0 }, /* HP */ + { in_line_eoln, MAN_SCOPED }, /* SM */ + { in_line_eoln, MAN_SCOPED }, /* SB */ + { in_line_eoln, 0 }, /* BI */ + { in_line_eoln, 0 }, /* IB */ + { in_line_eoln, 0 }, /* BR */ + { in_line_eoln, 0 }, /* RB */ + { in_line_eoln, MAN_SCOPED }, /* R */ + { in_line_eoln, MAN_SCOPED }, /* B */ + { in_line_eoln, MAN_SCOPED }, /* I */ + { in_line_eoln, 0 }, /* IR */ + { in_line_eoln, 0 }, /* RI */ + { in_line_eoln, 0 }, /* na */ + { in_line_eoln, 0 }, /* i */ + { in_line_eoln, 0 }, /* sp */ + { in_line_eoln, 0 }, /* nf */ + { in_line_eoln, 0 }, /* fi */ + { in_line_eoln, 0 }, /* r */ }; +const struct man_macro * const man_macros = __man_macros; + + int -man_macro(struct man *man, int tok, int line, - int ppos, int *pos, char *buf) +man_unscope(struct man *m, const struct man_node *n) { - int w, la; - char *p; - struct man_node *n; - - if ( ! man_elem_alloc(man, line, ppos, tok)) - return(0); - n = man->last; - man->next = MAN_NEXT_CHILD; - for (;;) { - la = *pos; - w = man_args(man, line, pos, buf, &p); + assert(n); + m->next = MAN_NEXT_SIBLING; - if (-1 == w) + /* LINTED */ + while (m->last != n) { + if ( ! man_valid_post(m)) return(0); - if (0 == w) - break; - - if ( ! man_word_alloc(man, line, la, p)) + if ( ! man_action_post(m)) return(0); - man->next = MAN_NEXT_SIBLING; + m->last = m->last->parent; + assert(m->last); } - if (n == man->last && (FL_NLINE & man_flags[tok])) { - if (MAN_NLINE & man->flags) - return(man_perr(man, line, ppos, WLNSCOPE)); - man->flags |= MAN_NLINE; - return(1); - } + if ( ! man_valid_post(m)) + return(0); + return(man_action_post(m)); +} - if (FL_TLINE & man_flags[tok]) { - if (MAN_NLINE & man->flags) - return(man_perr(man, line, ppos, WLNSCOPE)); - man->flags |= MAN_NLINE; - return(1); + +/* + * There are three scope levels: scoped to the root (all), scoped to the + * section (all less sections), and scoped to subsections (all less + * sections and subsections). + */ +static int +rew_dohalt(int tok, enum man_type type, const struct man_node *n) +{ + + if (MAN_ROOT == n->type) + return(REW_HALT); + assert(n->parent); + if (MAN_ROOT == n->parent->type) + return(REW_REWIND); + if (MAN_VALID & n->flags) + return(REW_NOHALT); + + switch (tok) { + case (MAN_SH): + /* Rewind to ourselves. */ + if (type == n->type && tok == n->tok) + return(REW_REWIND); + break; + case (MAN_SS): + /* Rewind to ourselves. */ + if (type == n->type && tok == n->tok) + return(REW_REWIND); + /* Rewind to a section, if a block. */ + if (MAN_BLOCK == type && MAN_SH == n->parent->tok && + MAN_BODY == n->parent->type) + return(REW_REWIND); + /* Don't go beyond a section. */ + if (MAN_SH == n->tok) + return(REW_HALT); + break; + default: + /* Rewind to ourselves. */ + if (type == n->type && tok == n->tok) + return(REW_REWIND); + /* Rewind to a subsection, if a block. */ + if (MAN_BLOCK == type && MAN_SS == n->parent->tok && + MAN_BODY == n->parent->type) + return(REW_REWIND); + /* Don't go beyond a subsection. */ + if (MAN_SS == n->tok) + return(REW_HALT); + /* Rewind to a section, if a block. */ + if (MAN_BLOCK == type && MAN_SH == n->parent->tok && + MAN_BODY == n->parent->type) + return(REW_REWIND); + /* Don't go beyond a section. */ + if (MAN_SH == n->tok) + return(REW_HALT); + break; } - /* - * Note that when TH is pruned, we'll be back at the root, so - * make sure that we don't clobber as its sibling. - */ + return(REW_NOHALT); +} - for ( ; man->last; man->last = man->last->parent) { - if (man->last == n) - break; - if (man->last->type == MAN_ROOT) + +/* + * Rewinding entails ascending the parse tree until a coherent point, + * for example, the `SH' macro will close out any intervening `SS' + * scopes. When a scope is closed, it must be validated and actioned. + */ +static int +rew_scope(enum man_type type, struct man *m, int tok) +{ + struct man_node *n; + int c; + + /* LINTED */ + for (n = m->last; n; n = n->parent) { + /* + * Whether we should stop immediately (REW_HALT), stop + * and rewind until this point (REW_REWIND), or keep + * rewinding (REW_NOHALT). + */ + c = rew_dohalt(tok, type, n); + if (REW_HALT == c) + return(1); + if (REW_REWIND == c) break; - if ( ! man_valid_post(man)) - return(0); - if ( ! man_action_post(man)) - return(0); } - assert(man->last); + /* Rewind until the current point. */ + + assert(n); + return(man_unscope(m, n)); +} - /* - * Same here regarding whether we're back at the root. - */ - if (man->last->type != MAN_ROOT && ! man_valid_post(man)) +/* + * Parse an implicit-block macro. These contain a MAN_HEAD and a + * MAN_BODY contained within a MAN_BLOCK. Rules for closing out other + * scopes, such as `SH' closing out an `SS', are defined in the rew + * routines. + */ +int +blk_imp(MACRO_PROT_ARGS) +{ + int w, la; + char *p; + + /* Close out prior scopes. */ + + if ( ! rew_scope(MAN_BODY, m, tok)) return(0); - if (man->last->type != MAN_ROOT && ! man_action_post(man)) + if ( ! rew_scope(MAN_BLOCK, m, tok)) return(0); - if (man->last->type != MAN_ROOT) - man->next = MAN_NEXT_SIBLING; - return(1); -} + /* Allocate new block & head scope. */ + if ( ! man_block_alloc(m, line, ppos, tok)) + return(0); + if ( ! man_head_alloc(m, line, ppos, tok)) + return(0); -int -man_macroend(struct man *m) -{ + /* Add line arguments. */ - for ( ; m->last && m->last != m->first; - m->last = m->last->parent) { - if ( ! man_valid_post(m)) + for (;;) { + la = *pos; + w = man_args(m, line, pos, buf, &p); + + if (-1 == w) return(0); - if ( ! man_action_post(m)) + if (0 == w) + break; + + if ( ! man_word_alloc(m, line, la, p)) return(0); + m->next = MAN_NEXT_SIBLING; } - assert(m->last == m->first); - if ( ! man_valid_post(m)) - return(0); - if ( ! man_action_post(m)) + /* Close out head and open body (unless MAN_SCOPE). */ + + if (MAN_SCOPED & man_macros[tok].flags) { + m->flags |= MAN_BLINE; + return(1); + } else if ( ! rew_scope(MAN_HEAD, m, tok)) return(0); - return(1); + return(man_body_alloc(m, line, ppos, tok)); } -/* ARGSUSED */ -static int -man_args(struct man *m, int line, - int *pos, char *buf, char **v) +int +in_line_eoln(MACRO_PROT_ARGS) { + int w, la; + char *p; + struct man_node *n; - if (0 == buf[*pos]) + if ( ! man_elem_alloc(m, line, ppos, tok)) return(0); - /* First parse non-quoted strings. */ - - if ('\"' != buf[*pos]) { - *v = &buf[*pos]; - - while (buf[*pos]) { - if (' ' == buf[*pos]) - if ('\\' != buf[*pos - 1]) - break; - (*pos)++; - } - - if (0 == buf[*pos]) - return(1); - - buf[(*pos)++] = 0; + n = m->last; + m->next = MAN_NEXT_CHILD; - if (0 == buf[*pos]) - return(1); - - while (buf[*pos] && ' ' == buf[*pos]) - (*pos)++; + for (;;) { + la = *pos; + w = man_args(m, line, pos, buf, &p); - if (buf[*pos]) - return(1); + if (-1 == w) + return(0); + if (0 == w) + break; - if ( ! man_pwarn(m, line, *pos, WTSPACE)) - return(-1); + if ( ! man_word_alloc(m, line, la, p)) + return(0); + m->next = MAN_NEXT_SIBLING; + } + if (n == m->last && (MAN_SCOPED & man_macros[tok].flags)) { + m->flags |= MAN_ELINE; return(1); - } + } /* - * If we're a quoted string (and quoted strings are allowed), - * then parse ahead to the next quote. If none's found, it's an - * error. After, parse to the next word. + * Note that when TH is pruned, we'll be back at the root, so + * make sure that we don't clobber as its sibling. */ - *v = &buf[++(*pos)]; - - while (buf[*pos] && '\"' != buf[*pos]) - (*pos)++; + /* FIXME: clean this to use man_unscope(). */ - if (0 == buf[*pos]) { - if ( ! man_pwarn(m, line, *pos, WTQUOTE)) - return(-1); - return(1); + for ( ; m->last; m->last = m->last->parent) { + if (m->last == n) + break; + if (m->last->type == MAN_ROOT) + break; + if ( ! man_valid_post(m)) + return(0); + if ( ! man_action_post(m)) + return(0); } - buf[(*pos)++] = 0; - if (0 == buf[*pos]) - return(1); + assert(m->last); - while (buf[*pos] && ' ' == buf[*pos]) - (*pos)++; + /* + * Same here regarding whether we're back at the root. + */ - if (buf[*pos]) - return(1); + if (m->last->type != MAN_ROOT && ! man_valid_post(m)) + return(0); + if (m->last->type != MAN_ROOT && ! man_action_post(m)) + return(0); + if (m->last->type != MAN_ROOT) + m->next = MAN_NEXT_SIBLING; - if ( ! man_pwarn(m, line, *pos, WTSPACE)) - return(-1); return(1); } + + +int +man_macroend(struct man *m) +{ + + return(man_unscope(m, m->first)); +} + diff --git a/usr.bin/mandoc/man_term.c b/usr.bin/mandoc/man_term.c index 83bd34269e8..f5a62be77f1 100644 --- a/usr.bin/mandoc/man_term.c +++ b/usr.bin/mandoc/man_term.c @@ -1,4 +1,4 @@ -/* $Id: man_term.c,v 1.10 2009/08/22 18:10:02 schwarze Exp $ */ +/* $Id: man_term.c,v 1.11 2009/08/22 20:14:37 schwarze Exp $ */ /* * Copyright (c) 2008, 2009 Kristaps Dzonsons * @@ -15,6 +15,7 @@ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ #include +#include #include #include #include @@ -26,7 +27,14 @@ #define INDENT 7 #define HALFINDENT 3 +struct mtermp { + int fl; +#define MANT_LITERAL (1 << 0) + int lmargin; +}; + #define DECL_ARGS struct termp *p, \ + struct mtermp *mt, \ const struct man_node *n, \ const struct man_meta *m @@ -38,7 +46,7 @@ struct termact { static int pre_B(DECL_ARGS); static int pre_BI(DECL_ARGS); static int pre_BR(DECL_ARGS); -static int pre_br(DECL_ARGS); +static int pre_HP(DECL_ARGS); static int pre_I(DECL_ARGS); static int pre_IB(DECL_ARGS); static int pre_IP(DECL_ARGS); @@ -49,23 +57,32 @@ static int pre_RI(DECL_ARGS); static int pre_SH(DECL_ARGS); static int pre_SS(DECL_ARGS); static int pre_TP(DECL_ARGS); +static int pre_br(DECL_ARGS); +static int pre_fi(DECL_ARGS); +static int pre_nf(DECL_ARGS); +static int pre_r(DECL_ARGS); +static int pre_sp(DECL_ARGS); static void post_B(DECL_ARGS); static void post_I(DECL_ARGS); +static void post_IP(DECL_ARGS); +static void post_HP(DECL_ARGS); static void post_SH(DECL_ARGS); static void post_SS(DECL_ARGS); +static void post_TP(DECL_ARGS); +static void post_i(DECL_ARGS); static const struct termact termacts[MAN_MAX] = { { pre_br, NULL }, /* br */ { NULL, NULL }, /* TH */ { pre_SH, post_SH }, /* SH */ { pre_SS, post_SS }, /* SS */ - { pre_TP, NULL }, /* TP */ + { pre_TP, post_TP }, /* TP */ { pre_PP, NULL }, /* LP */ { pre_PP, NULL }, /* PP */ { pre_PP, NULL }, /* P */ - { pre_IP, NULL }, /* IP */ - { pre_PP, NULL }, /* HP */ /* FIXME */ + { pre_IP, post_IP }, /* IP */ + { pre_HP, post_HP }, /* HP */ { NULL, NULL }, /* SM */ { pre_B, post_B }, /* SB */ { pre_BI, NULL }, /* BI */ @@ -77,35 +94,96 @@ static const struct termact termacts[MAN_MAX] = { { pre_I, post_I }, /* I */ { pre_IR, NULL }, /* IR */ { pre_RI, NULL }, /* RI */ - { NULL, NULL }, /* na */ - { pre_I, post_I }, /* i */ - { NULL, NULL }, /* sp */ + { NULL, NULL }, /* na */ /* TODO: document that has no effect */ + { pre_I, post_i }, /* i */ + { pre_sp, NULL }, /* sp */ + { pre_nf, NULL }, /* nf */ + { pre_fi, NULL }, /* fi */ + { pre_r, NULL }, /* r */ }; +#ifdef __linux__ +extern size_t strlcpy(char *, const char *, size_t); +extern size_t strlcat(char *, const char *, size_t); +#endif + static void print_head(struct termp *, const struct man_meta *); static void print_body(DECL_ARGS); static void print_node(DECL_ARGS); static void print_foot(struct termp *, const struct man_meta *); +static void fmt_block_vspace(struct termp *, + const struct man_node *); +static int arg_width(const struct man_node *); int man_run(struct termp *p, const struct man *m) { + struct mtermp mt; print_head(p, man_meta(m)); p->flags |= TERMP_NOSPACE; assert(man_node(m)); assert(MAN_ROOT == man_node(m)->type); + + mt.fl = 0; + mt.lmargin = INDENT; + if (man_node(m)->child) - print_body(p, man_node(m)->child, man_meta(m)); + print_body(p, &mt, man_node(m)->child, man_meta(m)); print_foot(p, man_meta(m)); return(1); } +static void +fmt_block_vspace(struct termp *p, const struct man_node *n) +{ + term_newln(p); + + if (NULL == n->prev) + return; + + if (MAN_SS == n->prev->tok) + return; + if (MAN_SH == n->prev->tok) + return; + + term_vspace(p); +} + + +static int +arg_width(const struct man_node *n) +{ + int i, len; + const char *p; + + assert(MAN_TEXT == n->type); + assert(n->string); + + p = n->string; + + if (0 == (len = (int)strlen(p))) + return(-1); + + for (i = 0; i < len; i++) + if ( ! isdigit((u_char)p[i])) + break; + + if (i == len - 1) { + if ('n' == p[len - 1] || 'm' == p[len - 1]) + return(atoi(p)); + } else if (i == len) + return(atoi(p)); + + return(-1); +} + + /* ARGSUSED */ static int pre_I(DECL_ARGS) @@ -116,6 +194,27 @@ pre_I(DECL_ARGS) } +/* ARGSUSED */ +static int +pre_r(DECL_ARGS) +{ + + p->flags &= ~TERMP_UNDER; + p->flags &= ~TERMP_BOLD; + return(1); +} + + +/* ARGSUSED */ +static void +post_i(DECL_ARGS) +{ + + if (n->nchild) + p->flags &= ~TERMP_UNDER; +} + + /* ARGSUSED */ static void post_I(DECL_ARGS) @@ -125,6 +224,27 @@ post_I(DECL_ARGS) } +/* ARGSUSED */ +static int +pre_fi(DECL_ARGS) +{ + + mt->fl &= ~MANT_LITERAL; + return(1); +} + + +/* ARGSUSED */ +static int +pre_nf(DECL_ARGS) +{ + + term_newln(p); + mt->fl |= MANT_LITERAL; + return(1); +} + + /* ARGSUSED */ static int pre_IR(DECL_ARGS) @@ -137,7 +257,7 @@ pre_IR(DECL_ARGS) p->flags |= TERMP_UNDER; if (i > 0) p->flags |= TERMP_NOSPACE; - print_node(p, nn, m); + print_node(p, mt, nn, m); if ( ! (i % 2)) p->flags &= ~TERMP_UNDER; } @@ -156,7 +276,7 @@ pre_IB(DECL_ARGS) p->flags |= i % 2 ? TERMP_BOLD : TERMP_UNDER; if (i > 0) p->flags |= TERMP_NOSPACE; - print_node(p, nn, m); + print_node(p, mt, nn, m); p->flags &= i % 2 ? ~TERMP_BOLD : ~TERMP_UNDER; } return(0); @@ -175,7 +295,7 @@ pre_RB(DECL_ARGS) p->flags |= TERMP_BOLD; if (i > 0) p->flags |= TERMP_NOSPACE; - print_node(p, nn, m); + print_node(p, mt, nn, m); if (i % 2) p->flags &= ~TERMP_BOLD; } @@ -195,7 +315,7 @@ pre_RI(DECL_ARGS) p->flags |= TERMP_UNDER; if (i > 0) p->flags |= TERMP_NOSPACE; - print_node(p, nn, m); + print_node(p, mt, nn, m); if ( ! (i % 2)) p->flags &= ~TERMP_UNDER; } @@ -215,7 +335,7 @@ pre_BR(DECL_ARGS) p->flags |= TERMP_BOLD; if (i > 0) p->flags |= TERMP_NOSPACE; - print_node(p, nn, m); + print_node(p, mt, nn, m); if ( ! (i % 2)) p->flags &= ~TERMP_BOLD; } @@ -234,7 +354,7 @@ pre_BI(DECL_ARGS) p->flags |= i % 2 ? TERMP_UNDER : TERMP_BOLD; if (i > 0) p->flags |= TERMP_NOSPACE; - print_node(p, nn, m); + print_node(p, mt, nn, m); p->flags &= i % 2 ? ~TERMP_UNDER : ~TERMP_BOLD; } return(0); @@ -260,6 +380,27 @@ post_B(DECL_ARGS) } +/* ARGSUSED */ +static int +pre_sp(DECL_ARGS) +{ + int i, len; + + if (NULL == n->child) { + term_vspace(p); + return(0); + } + + len = atoi(n->child->string); + if (0 == len) + term_newln(p); + for (i = 0; i < len; i++) + term_vspace(p); + + return(0); +} + + /* ARGSUSED */ static int pre_br(DECL_ARGS) @@ -272,47 +413,173 @@ pre_br(DECL_ARGS) /* ARGSUSED */ static int -pre_PP(DECL_ARGS) +pre_HP(DECL_ARGS) { + size_t len; + int ival; + const struct man_node *nn; + + switch (n->type) { + case (MAN_BLOCK): + fmt_block_vspace(p, n); + return(1); + case (MAN_BODY): + p->flags |= TERMP_NOBREAK; + p->flags |= TERMP_TWOSPACE; + break; + default: + return(0); + } + + len = (size_t)mt->lmargin; + ival = -1; + + /* Calculate offset. */ + + if (NULL != (nn = n->parent->head->child)) + if ((ival = arg_width(nn)) >= 0) + len = (size_t)ival; + + if (0 == len) + len = 1; - term_vspace(p); p->offset = INDENT; - return(0); + p->rmargin = INDENT + len; + + if (ival >= 0) + mt->lmargin = ival; + + return(1); +} + + +/* ARGSUSED */ +static void +post_HP(DECL_ARGS) +{ + + switch (n->type) { + case (MAN_BLOCK): + term_flushln(p); + break; + case (MAN_BODY): + term_flushln(p); + p->flags &= ~TERMP_NOBREAK; + p->flags &= ~TERMP_TWOSPACE; + p->offset = INDENT; + p->rmargin = p->maxrmargin; + break; + default: + break; + } } /* ARGSUSED */ static int -pre_IP(DECL_ARGS) +pre_PP(DECL_ARGS) { -#if 0 - const struct man_node *nn; - size_t offs; -#endif - term_vspace(p); - p->offset = INDENT; + switch (n->type) { + case (MAN_BLOCK): + mt->lmargin = INDENT; + fmt_block_vspace(p, n); + break; + default: + p->offset = INDENT; + break; + } + + return(1); +} + -#if 0 - if (NULL == (nn = n->child)) +/* ARGSUSED */ +static int +pre_IP(DECL_ARGS) +{ + const struct man_node *nn; + size_t len; + int ival; + + switch (n->type) { + case (MAN_BODY): + p->flags |= TERMP_NOLPAD; + p->flags |= TERMP_NOSPACE; + break; + case (MAN_HEAD): + p->flags |= TERMP_NOBREAK; + p->flags |= TERMP_TWOSPACE; + break; + case (MAN_BLOCK): + fmt_block_vspace(p, n); + /* FALLTHROUGH */ + default: return(1); - if (MAN_TEXT != nn->type) - errx(1, "expected text line argument"); + } - if (nn->next) { - if (MAN_TEXT != nn->next->type) - errx(1, "expected text line argument"); - offs = (size_t)atoi(nn->next->string); - } else - offs = strlen(nn->string); + len = (size_t)mt->lmargin; + ival = -1; - p->flags |= TERMP_NOSPACE; - /* FIXME */ - if ((p->offset += offs) > p->rmargin) - errx(1, "line too long"); -#endif + /* Calculate offset. */ - return(0); + if (NULL != (nn = n->parent->head->child)) + if (NULL != (nn = nn->next)) { + for ( ; nn->next; nn = nn->next) + /* Do nothing. */ ; + if ((ival = arg_width(nn)) >= 0) + len = (size_t)ival; + } + + switch (n->type) { + case (MAN_HEAD): + /* Handle zero-width lengths. */ + if (0 == len) + len = 1; + + p->offset = INDENT; + p->rmargin = INDENT + len; + if (ival < 0) + break; + + /* Set the saved left-margin. */ + mt->lmargin = ival; + + /* Don't print the length value. */ + for (nn = n->child; nn->next; nn = nn->next) + print_node(p, mt, nn, m); + return(0); + case (MAN_BODY): + p->offset = INDENT + len; + p->rmargin = p->maxrmargin; + break; + default: + break; + } + + return(1); +} + + +/* ARGSUSED */ +static void +post_IP(DECL_ARGS) +{ + + switch (n->type) { + case (MAN_HEAD): + term_flushln(p); + p->flags &= ~TERMP_NOBREAK; + p->flags &= ~TERMP_TWOSPACE; + p->rmargin = p->maxrmargin; + break; + case (MAN_BODY): + term_flushln(p); + p->flags &= ~TERMP_NOLPAD; + break; + default: + break; + } } @@ -320,31 +587,85 @@ pre_IP(DECL_ARGS) static int pre_TP(DECL_ARGS) { - const struct man_node *nn; - size_t offs; + const struct man_node *nn; + size_t len; + int ival; - term_vspace(p); + switch (n->type) { + case (MAN_HEAD): + p->flags |= TERMP_NOBREAK; + p->flags |= TERMP_TWOSPACE; + break; + case (MAN_BODY): + p->flags |= TERMP_NOLPAD; + p->flags |= TERMP_NOSPACE; + break; + case (MAN_BLOCK): + fmt_block_vspace(p, n); + /* FALLTHROUGH */ + default: + return(1); + } - p->offset = INDENT; + len = (size_t)mt->lmargin; + ival = -1; - if (NULL == (nn = n->child)) - return(1); + /* Calculate offset. */ - if (nn->line == n->line) { - if (MAN_TEXT != nn->type) - errx(1, "expected text line argument"); - offs = (size_t)atoi(nn->string); - nn = nn->next; - } else - offs = INDENT; + if (NULL != (nn = n->parent->head->child)) + if (NULL != nn->next) + if ((ival = arg_width(nn)) >= 0) + len = (size_t)ival; - for ( ; nn; nn = nn->next) - print_node(p, nn, m); + switch (n->type) { + case (MAN_HEAD): + /* Handle zero-length properly. */ + if (0 == len) + len = 1; + + p->offset = INDENT; + p->rmargin = INDENT + len; + + /* Don't print same-line elements. */ + for (nn = n->child; nn; nn = nn->next) + if (nn->line > n->line) + print_node(p, mt, nn, m); + + if (ival >= 0) + mt->lmargin = ival; + + return(0); + case (MAN_BODY): + p->offset = INDENT + len; + p->rmargin = p->maxrmargin; + break; + default: + break; + } - term_flushln(p); - p->flags |= TERMP_NOSPACE; - p->offset += offs; - return(0); + return(1); +} + + +/* ARGSUSED */ +static void +post_TP(DECL_ARGS) +{ + + switch (n->type) { + case (MAN_HEAD): + term_flushln(p); + p->flags &= ~TERMP_NOBREAK; + p->flags &= ~TERMP_TWOSPACE; + p->rmargin = p->maxrmargin; + break; + case (MAN_BODY): + term_flushln(p); + p->flags &= ~TERMP_NOLPAD; + break; + default: + break; + } } @@ -353,8 +674,28 @@ static int pre_SS(DECL_ARGS) { - term_vspace(p); - p->flags |= TERMP_BOLD; + switch (n->type) { + case (MAN_BLOCK): + mt->lmargin = INDENT; + /* If following a prior empty `SS', no vspace. */ + if (n->prev && MAN_SS == n->prev->tok) + if (NULL == n->prev->body->child) + break; + if (NULL == n->prev) + break; + term_vspace(p); + break; + case (MAN_HEAD): + p->flags |= TERMP_BOLD; + p->offset = HALFINDENT; + break; + case (MAN_BODY): + p->offset = INDENT; + break; + default: + break; + } + return(1); } @@ -364,9 +705,17 @@ static void post_SS(DECL_ARGS) { - term_flushln(p); - p->flags &= ~TERMP_BOLD; - p->flags |= TERMP_NOSPACE; + switch (n->type) { + case (MAN_HEAD): + term_newln(p); + p->flags &= ~TERMP_BOLD; + break; + case (MAN_BODY): + term_newln(p); + break; + default: + break; + } } @@ -375,9 +724,26 @@ static int pre_SH(DECL_ARGS) { - term_vspace(p); - p->offset = 0; - p->flags |= TERMP_BOLD; + switch (n->type) { + case (MAN_BLOCK): + mt->lmargin = INDENT; + /* If following a prior empty `SH', no vspace. */ + if (n->prev && MAN_SH == n->prev->tok) + if (NULL == n->prev->body->child) + break; + term_vspace(p); + break; + case (MAN_HEAD): + p->flags |= TERMP_BOLD; + p->offset = 0; + break; + case (MAN_BODY): + p->offset = INDENT; + break; + default: + break; + } + return(1); } @@ -387,10 +753,17 @@ static void post_SH(DECL_ARGS) { - term_flushln(p); - p->offset = INDENT; - p->flags &= ~TERMP_BOLD; - p->flags |= TERMP_NOSPACE; + switch (n->type) { + case (MAN_HEAD): + term_newln(p); + p->flags &= ~TERMP_BOLD; + break; + case (MAN_BODY): + term_newln(p); + break; + default: + break; + } } @@ -402,10 +775,6 @@ print_node(DECL_ARGS) c = 1; switch (n->type) { - case(MAN_ELEM): - if (termacts[n->tok].pre) - c = (*termacts[n->tok].pre)(p, n, m); - break; case(MAN_TEXT): if (0 == *n->string) { term_vspace(p); @@ -422,32 +791,35 @@ print_node(DECL_ARGS) if (sz >= 2 && n->string[sz - 1] == 'c' && n->string[sz - 2] == '\\') p->flags |= TERMP_NOSPACE; + /* FIXME: this means that macro lines are munged! */ + if (MANT_LITERAL & mt->fl) { + p->flags |= TERMP_NOSPACE; + term_flushln(p); + } break; default: + if (termacts[n->tok].pre) + c = (*termacts[n->tok].pre)(p, mt, n, m); break; } if (c && n->child) - print_body(p, n->child, m); + print_body(p, mt, n->child, m); - switch (n->type) { - case (MAN_ELEM): + if (MAN_TEXT != n->type) if (termacts[n->tok].post) - (*termacts[n->tok].post)(p, n, m); - break; - default: - break; - } + (*termacts[n->tok].post)(p, mt, n, m); } static void print_body(DECL_ARGS) { - print_node(p, n, m); + + print_node(p, mt, n, m); if ( ! n->next) return; - print_body(p, n->next, m); + print_body(p, mt, n->next, m); } diff --git a/usr.bin/mandoc/man_validate.c b/usr.bin/mandoc/man_validate.c index 5cc8a914009..0b015a2e552 100644 --- a/usr.bin/mandoc/man_validate.c +++ b/usr.bin/mandoc/man_validate.c @@ -1,4 +1,4 @@ -/* $Id: man_validate.c,v 1.6 2009/08/22 15:15:37 schwarze Exp $ */ +/* $Id: man_validate.c,v 1.7 2009/08/22 20:14:37 schwarze Exp $ */ /* * Copyright (c) 2008, 2009 Kristaps Dzonsons * @@ -26,64 +26,89 @@ #include "libman.h" #include "libmandoc.h" -#define POSTARGS struct man *m, const struct man_node *n +#define CHKARGS struct man *m, const struct man_node *n -typedef int (*v_post)(POSTARGS); +typedef int (*v_check)(CHKARGS); struct man_valid { - v_post *posts; + v_check *pres; + v_check *posts; }; -static int check_eq0(POSTARGS); -static int check_eq1(POSTARGS); -static int check_ge1(POSTARGS); -static int check_ge2(POSTARGS); -static int check_le1(POSTARGS); -static int check_le2(POSTARGS); -static int check_le5(POSTARGS); -static int check_root(POSTARGS); -static int check_sp(POSTARGS); -static int check_text(POSTARGS); - -static v_post posts_eq0[] = { check_eq0, NULL }; -static v_post posts_ge1[] = { check_ge1, NULL }; -static v_post posts_ge2_le5[] = { check_ge2, check_le5, NULL }; -static v_post posts_le1[] = { check_le1, NULL }; -static v_post posts_le2[] = { check_le2, NULL }; -static v_post posts_sp[] = { check_sp, NULL }; +static int check_bline(CHKARGS); +static int check_eline(CHKARGS); +static int check_eq0(CHKARGS); +static int check_eq1(CHKARGS); +static int check_ge2(CHKARGS); +static int check_le5(CHKARGS); +static int check_par(CHKARGS); +static int check_root(CHKARGS); +static int check_sec(CHKARGS); +static int check_sp(CHKARGS); +static int check_text(CHKARGS); + +static v_check posts_eq0[] = { check_eq0, NULL }; +static v_check posts_ge2_le5[] = { check_ge2, check_le5, NULL }; +static v_check posts_par[] = { check_par, NULL }; +static v_check posts_sec[] = { check_sec, NULL }; +static v_check posts_sp[] = { check_sp, NULL }; +static v_check pres_eline[] = { check_eline, NULL }; +static v_check pres_bline[] = { check_bline, NULL }; static const struct man_valid man_valids[MAN_MAX] = { - { posts_eq0 }, /* br */ - { posts_ge2_le5 }, /* TH */ - { posts_ge1 }, /* SH */ - { posts_ge1 }, /* SS */ - { NULL }, /* TP */ - { posts_eq0 }, /* LP */ - { posts_eq0 }, /* PP */ - { posts_eq0 }, /* P */ - { posts_le2 }, /* IP */ - { posts_le1 }, /* HP */ - { NULL }, /* SM */ - { NULL }, /* SB */ - { NULL }, /* BI */ - { NULL }, /* IB */ - { NULL }, /* BR */ - { NULL }, /* RB */ - { NULL }, /* R */ - { NULL }, /* B */ - { NULL }, /* I */ - { NULL }, /* IR */ - { NULL }, /* RI */ - { posts_eq0 }, /* na */ - { NULL }, /* i */ - { posts_sp }, /* sp */ + { pres_bline, posts_eq0 }, /* br */ + { pres_bline, posts_ge2_le5 }, /* TH */ + { pres_bline, posts_sec }, /* SH */ + { pres_bline, posts_sec }, /* SS */ + { pres_bline, posts_par }, /* TP */ + { pres_bline, posts_par }, /* LP */ + { pres_bline, posts_par }, /* PP */ + { pres_bline, posts_par }, /* P */ + { pres_bline, posts_par }, /* IP */ + { pres_bline, posts_par }, /* HP */ + { pres_eline, NULL }, /* SM */ + { pres_eline, NULL }, /* SB */ + { NULL, NULL }, /* BI */ + { NULL, NULL }, /* IB */ + { NULL, NULL }, /* BR */ + { NULL, NULL }, /* RB */ + { pres_eline, NULL }, /* R */ + { pres_eline, NULL }, /* B */ + { pres_eline, NULL }, /* I */ + { NULL, NULL }, /* IR */ + { NULL, NULL }, /* RI */ + { pres_bline, posts_eq0 }, /* na */ + { NULL, NULL }, /* i */ + { pres_bline, posts_sp }, /* sp */ + { pres_bline, posts_eq0 }, /* nf */ + { pres_bline, posts_eq0 }, /* fi */ + { NULL, NULL }, /* r */ }; +int +man_valid_pre(struct man *m, const struct man_node *n) +{ + v_check *cp; + + if (MAN_TEXT == n->type) + return(1); + if (MAN_ROOT == n->type) + return(1); + + if (NULL == (cp = man_valids[n->tok].pres)) + return(1); + for ( ; *cp; cp++) + if ( ! (*cp)(m, n)) + return(0); + return(1); +} + + int man_valid_post(struct man *m) { - v_post *cp; + v_check *cp; if (MAN_VALID & m->last->flags) return(1); @@ -109,9 +134,16 @@ man_valid_post(struct man *m) static int -check_root(POSTARGS) +check_root(CHKARGS) { - + + /* XXX - make this into a warning? */ + if (MAN_BLINE & m->flags) + return(man_nerr(m, n, WEXITSCOPE)); + /* XXX - make this into a warning? */ + if (MAN_ELINE & m->flags) + return(man_nerr(m, n, WEXITSCOPE)); + if (NULL == m->first->child) return(man_nerr(m, n, WNODATA)); if (NULL == m->meta.title) @@ -122,7 +154,7 @@ check_root(POSTARGS) static int -check_text(POSTARGS) +check_text(CHKARGS) { const char *p; int pos, c; @@ -158,7 +190,7 @@ check_text(POSTARGS) #define INEQ_DEFINE(x, ineq, name) \ static int \ -check_##name(POSTARGS) \ +check_##name(CHKARGS) \ { \ if (n->nchild ineq (x)) \ return(1); \ @@ -169,37 +201,108 @@ check_##name(POSTARGS) \ INEQ_DEFINE(0, ==, eq0) INEQ_DEFINE(1, ==, eq1) -INEQ_DEFINE(1, >=, ge1) INEQ_DEFINE(2, >=, ge2) -INEQ_DEFINE(1, <=, le1) -INEQ_DEFINE(2, <=, le2) INEQ_DEFINE(5, <=, le5) static int -check_sp(POSTARGS) +check_sp(CHKARGS) { long lval; char *ep, *buf; - if (NULL == m->last->child) + if (NULL == n->child) return(1); else if ( ! check_eq1(m, n)) return(0); - assert(MAN_TEXT == m->last->child->type); - buf = m->last->child->string; + assert(MAN_TEXT == n->child->type); + buf = n->child->string; assert(buf); /* From OpenBSD's strtol(3). */ + errno = 0; lval = strtol(buf, &ep, 10); if (buf[0] == '\0' || *ep != '\0') - return(man_nerr(m, m->last->child, WNUMFMT)); + return(man_nerr(m, n->child, WNUMFMT)); if ((errno == ERANGE && (lval == LONG_MAX || lval == LONG_MIN)) || (lval > INT_MAX || lval < 0)) - return(man_nerr(m, m->last->child, WNUMFMT)); + return(man_nerr(m, n->child, WNUMFMT)); + + return(1); +} + + +static int +check_sec(CHKARGS) +{ + if (MAN_BODY == n->type && 0 == n->nchild) + return(man_nwarn(m, n, WBODYARGS)); + if (MAN_HEAD == n->type && 0 == n->nchild) + return(man_nerr(m, n, WHEADARGS)); return(1); } + + +static int +check_par(CHKARGS) +{ + + if (MAN_BODY == n->type) + switch (n->tok) { + case (MAN_IP): + /* FALLTHROUGH */ + case (MAN_HP): + /* FALLTHROUGH */ + case (MAN_TP): + /* Body-less lists are ok. */ + break; + default: + if (n->nchild) + break; + return(man_nwarn(m, n, WBODYARGS)); + } + if (MAN_HEAD == n->type) + switch (n->tok) { + case (MAN_PP): + /* FALLTHROUGH */ + case (MAN_P): + /* FALLTHROUGH */ + case (MAN_LP): + if (0 == n->nchild) + break; + return(man_nwarn(m, n, WNHEADARGS)); + default: + if (n->nchild) + break; + return(man_nwarn(m, n, WHEADARGS)); + } + + return(1); +} + + +static int +check_eline(CHKARGS) +{ + + if (MAN_ELINE & m->flags) + return(man_nerr(m, n, WLNSCOPE)); + return(1); +} + + +static int +check_bline(CHKARGS) +{ + + if (MAN_BLINE & m->flags) + return(man_nerr(m, n, WLNSCOPE)); + if (MAN_ELINE & m->flags) + return(man_nerr(m, n, WLNSCOPE)); + return(1); +} + diff --git a/usr.bin/mandoc/tree.c b/usr.bin/mandoc/tree.c index 82265a4c6a2..d4c6adc91e5 100644 --- a/usr.bin/mandoc/tree.c +++ b/usr.bin/mandoc/tree.c @@ -1,4 +1,4 @@ -/* $Id: tree.c,v 1.2 2009/06/14 23:00:57 schwarze Exp $ */ +/* $Id: tree.c,v 1.3 2009/08/22 20:14:37 schwarze Exp $ */ /* * Copyright (c) 2008, 2009 Kristaps Dzonsons * @@ -163,6 +163,15 @@ print_man(const struct man_node *n, int indent) case (MAN_TEXT): t = "text"; break; + case (MAN_BLOCK): + t = "block"; + break; + case (MAN_HEAD): + t = "block-head"; + break; + case (MAN_BODY): + t = "block-body"; + break; default: abort(); /* NOTREACHED */ @@ -173,6 +182,12 @@ print_man(const struct man_node *n, int indent) p = n->string; break; case (MAN_ELEM): + /* FALLTHROUGH */ + case (MAN_BLOCK): + /* FALLTHROUGH */ + case (MAN_HEAD): + /* FALLTHROUGH */ + case (MAN_BODY): p = man_macronames[n->tok]; break; case (MAN_ROOT): -- cgit v1.2.3