summaryrefslogtreecommitdiff
path: root/usr.bin
diff options
context:
space:
mode:
authorIngo Schwarze <schwarze@cvs.openbsd.org>2010-11-25 22:23:32 +0000
committerIngo Schwarze <schwarze@cvs.openbsd.org>2010-11-25 22:23:32 +0000
commite943394d2019543ed24f032a4496533e4e7e59a2 (patch)
tree7c6c5de0305d3314366a7169a1e851ad7d2a9f05 /usr.bin
parenta36726dfb327c2a2b2d1124c057b2ecd598ed1ec (diff)
Implement the .de (define macro) roff instruction.
This fixes various Xenocara manuals. Do not define your own macros in new manuals, though: this code exists purely to cope with existing and old stuff. Like in both traditional and GNU roff, the .de and .ds (define string) roff instructions share the same string table, so one can abuse strings as macros and vice versa. This implementation supports multi-line user-defined macros and user-defined macros taking up to 9 arguments. Project started near the end of p2k10, now mature for production, but there is still room for future improvements in various respects.
Diffstat (limited to 'usr.bin')
-rw-r--r--usr.bin/mandoc/main.c112
-rw-r--r--usr.bin/mandoc/roff.c247
-rw-r--r--usr.bin/mandoc/roff.h4
3 files changed, 253 insertions, 110 deletions
diff --git a/usr.bin/mandoc/main.c b/usr.bin/mandoc/main.c
index 2e31b3fd7cb..ed3d1ba85fa 100644
--- a/usr.bin/mandoc/main.c
+++ b/usr.bin/mandoc/main.c
@@ -1,4 +1,4 @@
-/* $Id: main.c,v 1.54 2010/10/26 23:34:38 schwarze Exp $ */
+/* $Id: main.c,v 1.55 2010/11/25 22:23:31 schwarze Exp $ */
/*
* Copyright (c) 2008, 2009, 2010 Kristaps Dzonsons <kristaps@bsd.lv>
* Copyright (c) 2010 Ingo Schwarze <schwarze@openbsd.org>
@@ -64,6 +64,7 @@ enum outt {
struct curparse {
const char *file; /* Current parse. */
int fd; /* Current parse. */
+ int line; /* Line number in the file. */
enum mandoclevel wlevel; /* Ignore messages below this. */
int wstop; /* Stop after a file with a warning. */
enum intt inttype; /* which parser to use */
@@ -190,10 +191,11 @@ static const char * const mandocerrs[MANDOCERR_MAX] = {
"static buffer exhausted",
};
+static void parsebuf(struct curparse *, struct buf, int);
static void pdesc(struct curparse *);
static void fdesc(struct curparse *);
static void ffile(const char *, struct curparse *);
-static int pfile(const char *, struct curparse *, int);
+static int pfile(const char *, struct curparse *);
static int moptions(enum intt *, char *);
static int mmsg(enum mandocerr, void *,
int, int, const char *);
@@ -320,7 +322,7 @@ ffile(const char *file, struct curparse *curp)
}
static int
-pfile(const char *file, struct curparse *curp, int ln)
+pfile(const char *file, struct curparse *curp)
{
const char *savefile;
int fd, savefd;
@@ -552,20 +554,8 @@ fdesc(struct curparse *curp)
static void
pdesc(struct curparse *curp)
{
- struct buf ln, blk;
- int i, pos, lnn, lnn_start, with_mmap, of;
- enum rofferr re;
- unsigned char c;
- struct man *man;
- struct mdoc *mdoc;
- struct roff *roff;
-
- memset(&ln, 0, sizeof(struct buf));
-
- /*
- * Two buffers: ln and buf. buf is the input file and may be
- * memory mapped. ln is a line buffer and grows on-demand.
- */
+ struct buf blk;
+ int with_mmap;
if ( ! read_whole_file(curp, &blk, &with_mmap)) {
exit_status = MANDOCLEVEL_SYSERR;
@@ -575,14 +565,42 @@ pdesc(struct curparse *curp)
if (NULL == curp->roff)
curp->roff = roff_alloc(&curp->regs, curp, mmsg);
assert(curp->roff);
- roff = curp->roff;
- mdoc = curp->mdoc;
+
+ curp->line = 1;
+ parsebuf(curp, blk, 1);
+
+ if (with_mmap)
+ munmap(blk.buf, blk.sz);
+ else
+ free(blk.buf);
+}
+
+static void
+parsebuf(struct curparse *curp, struct buf blk, int start)
+{
+ struct buf ln;
+ int i, pos, lnn, of;
+ unsigned char c;
+ struct man *man;
+ struct mdoc *mdoc;
+ struct roff *roff;
+
man = curp->man;
+ mdoc = curp->mdoc;
+ roff = curp->roff;
- for (i = 0, lnn = 1; i < (int)blk.sz;) {
- pos = 0;
- lnn_start = lnn;
- while (i < (int)blk.sz) {
+ memset(&ln, 0, sizeof(struct buf));
+
+ lnn = curp->line; /* line number in the real file */
+ pos = 0; /* byte number in the ln buffer */
+
+ for (i = 0; i < (int)blk.sz;) {
+ if (0 == pos && '\0' == blk.buf[i])
+ break;
+ if (start)
+ curp->line = lnn;
+
+ while (i < (int)blk.sz && (start || '\0' != blk.buf[i])) {
if ('\n' == blk.buf[i]) {
++i;
++lnn;
@@ -601,7 +619,7 @@ pdesc(struct curparse *curp)
c = (unsigned char) blk.buf[i];
if ( ! (isascii(c) && (isgraph(c) || isblank(c)))) {
mmsg(MANDOCERR_BADCHAR, curp,
- lnn_start, pos, "ignoring byte");
+ curp->line, pos, "ignoring byte");
i++;
continue;
}
@@ -661,21 +679,32 @@ pdesc(struct curparse *curp)
*/
of = 0;
- do {
- re = roff_parseln(roff, lnn_start,
- &ln.buf, &ln.sz, of, &of);
- } while (ROFF_RERUN == re);
-
- if (ROFF_IGN == re) {
+rerun:
+ switch (roff_parseln(roff, curp->line, &ln.buf, &ln.sz,
+ of, &of)) {
+ case (ROFF_REPARSE):
+ parsebuf(curp, ln, 0);
+ pos = 0;
continue;
- } else if (ROFF_ERR == re) {
+ case (ROFF_APPEND):
+ pos = strlen(ln.buf);
+ continue;
+ case (ROFF_RERUN):
+ goto rerun;
+ case (ROFF_IGN):
+ pos = 0;
+ continue;
+ case (ROFF_ERR):
assert(MANDOCLEVEL_FATAL <= exit_status);
break;
- } else if (ROFF_SO == re) {
- if (pfile(ln.buf + of, curp, lnn_start))
+ case (ROFF_SO):
+ if (pfile(ln.buf + of, curp)) {
+ pos = 0;
continue;
- else
+ } else
break;
+ case (ROFF_CONT):
+ break;
}
/*
@@ -690,21 +719,24 @@ pdesc(struct curparse *curp)
/* Lastly, push down into the parsers themselves. */
- if (man && ! man_parseln(man, lnn_start, ln.buf, of)) {
+ if (man && ! man_parseln(man, curp->line, ln.buf, of)) {
assert(MANDOCLEVEL_FATAL <= exit_status);
break;
}
- if (mdoc && ! mdoc_parseln(mdoc, lnn_start, ln.buf, of)) {
+ if (mdoc && ! mdoc_parseln(mdoc, curp->line, ln.buf, of)) {
assert(MANDOCLEVEL_FATAL <= exit_status);
break;
}
+
+ /* Temporary buffers typically are not full. */
+ if (0 == start && '\0' == blk.buf[i])
+ break;
+
+ /* Start the next input line. */
+ pos = 0;
}
free(ln.buf);
- if (with_mmap)
- munmap(blk.buf, blk.sz);
- else
- free(blk.buf);
}
diff --git a/usr.bin/mandoc/roff.c b/usr.bin/mandoc/roff.c
index aadc1a6d2ad..23bf4e41226 100644
--- a/usr.bin/mandoc/roff.c
+++ b/usr.bin/mandoc/roff.c
@@ -1,4 +1,4 @@
-/* $Id: roff.c,v 1.15 2010/10/26 23:34:38 schwarze Exp $ */
+/* $Id: roff.c,v 1.16 2010/11/25 22:23:31 schwarze Exp $ */
/*
* Copyright (c) 2010 Kristaps Dzonsons <kristaps@bsd.lv>
* Copyright (c) 2010 Ingo Schwarze <schwarze@openbsd.org>
@@ -7,9 +7,9 @@
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
- * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
@@ -54,6 +54,7 @@ enum rofft {
ROFF_tr,
ROFF_cblock,
ROFF_ccond, /* FIXME: remove this. */
+ ROFF_USERDEF,
ROFF_MAX
};
@@ -76,7 +77,8 @@ struct roff {
enum roffrule rstack[RSTACK_MAX]; /* stack of !`ie' rules */
int rstackpos; /* position in rstack */
struct regset *regs; /* read/writable registers */
- struct roffstr *first_string;
+ struct roffstr *first_string; /* user-defined strings & macros */
+ const char *current_string; /* value of last called user macro */
};
struct roffnode {
@@ -84,6 +86,7 @@ struct roffnode {
struct roffnode *parent; /* up one in stack */
int line; /* parse line */
int col; /* parse col */
+ char *name; /* node name, e.g. macro name */
char *end; /* end-rules: custom token */
int endspan; /* end-rules: next-line or infty */
enum roffrule rule; /* current evaluation rule */
@@ -128,9 +131,9 @@ static enum rofferr roff_nr(ROFF_ARGS);
static int roff_res(struct roff *,
char **, size_t *, int);
static void roff_setstr(struct roff *,
- const char *, const char *);
+ const char *, const char *, int);
static enum rofferr roff_so(ROFF_ARGS);
-static char *roff_strdup(const char *);
+static enum rofferr roff_userdef(ROFF_ARGS);
/* See roff_hash_find() */
@@ -158,16 +161,17 @@ static struct roffmac roffs[ROFF_MAX] = {
{ "tr", roff_line, NULL, NULL, 0, NULL },
{ ".", roff_cblock, NULL, NULL, 0, NULL },
{ "\\}", roff_ccond, NULL, NULL, 0, NULL },
+ { NULL, roff_userdef, NULL, NULL, 0, NULL },
};
static void roff_free1(struct roff *);
-static enum rofft roff_hash_find(const char *);
+static enum rofft roff_hash_find(const char *, size_t);
static void roff_hash_init(void);
static void roffnode_cleanscope(struct roff *);
-static void roffnode_push(struct roff *,
- enum rofft, int, int);
+static void roffnode_push(struct roff *, enum rofft,
+ const char *, int, int);
static void roffnode_pop(struct roff *);
-static enum rofft roff_parse(const char *, int *);
+static enum rofft roff_parse(struct roff *, const char *, int *);
static int roff_parse_nat(const char *, unsigned int *);
/* See roff_hash_find() */
@@ -179,7 +183,7 @@ roff_hash_init(void)
struct roffmac *n;
int buc, i;
- for (i = 0; i < (int)ROFF_MAX; i++) {
+ for (i = 0; i < (int)ROFF_USERDEF; i++) {
assert(roffs[i].name[0] >= ASCII_LO);
assert(roffs[i].name[0] <= ASCII_HI);
@@ -200,7 +204,7 @@ roff_hash_init(void)
* the nil-terminated string name could be found.
*/
static enum rofft
-roff_hash_find(const char *p)
+roff_hash_find(const char *p, size_t s)
{
int buc;
struct roffmac *n;
@@ -220,7 +224,7 @@ roff_hash_find(const char *p)
if (NULL == (n = hash[buc]))
return(ROFF_MAX);
for ( ; n; n = n->next)
- if (0 == strcmp(n->name, p))
+ if (0 == strncmp(n->name, p, s) && '\0' == n->name[(int)s])
return((enum rofft)(n - roffs));
return(ROFF_MAX);
@@ -244,8 +248,8 @@ roffnode_pop(struct roff *r)
r->rstackpos--;
r->last = r->last->parent;
- if (p->end)
- free(p->end);
+ free(p->name);
+ free(p->end);
free(p);
}
@@ -255,12 +259,15 @@ roffnode_pop(struct roff *r)
* removed with roffnode_pop().
*/
static void
-roffnode_push(struct roff *r, enum rofft tok, int line, int col)
+roffnode_push(struct roff *r, enum rofft tok, const char *name,
+ int line, int col)
{
struct roffnode *p;
p = mandoc_calloc(1, sizeof(struct roffnode));
p->tok = tok;
+ if (name)
+ p->name = mandoc_strdup(name);
p->parent = r->last;
p->line = line;
p->col = col;
@@ -392,7 +399,7 @@ roff_parseln(struct roff *r, int ln, char **bufp,
*/
if (r->first_string && ! roff_res(r, bufp, szp, pos))
- return(ROFF_RERUN);
+ return(ROFF_REPARSE);
/*
* First, if a scope is open and we're not a macro, pass the
@@ -429,7 +436,7 @@ roff_parseln(struct roff *r, int ln, char **bufp,
*/
ppos = pos;
- if (ROFF_MAX == (t = roff_parse(*bufp, &pos)))
+ if (ROFF_MAX == (t = roff_parse(r, *bufp, &pos)))
return(ROFF_CONT);
assert(roffs[t].proc);
@@ -455,35 +462,28 @@ roff_endparse(struct roff *r)
* form of ".foo xxx" in the usual way.
*/
static enum rofft
-roff_parse(const char *buf, int *pos)
+roff_parse(struct roff *r, const char *buf, int *pos)
{
- int j;
- char mac[5];
+ const char *mac;
+ size_t maclen;
enum rofft t;
assert(ROFF_CTL(buf[*pos]));
(*pos)++;
- while (buf[*pos] && (' ' == buf[*pos] || '\t' == buf[*pos]))
+ while (' ' == buf[*pos] || '\t' == buf[*pos])
(*pos)++;
if ('\0' == buf[*pos])
return(ROFF_MAX);
- for (j = 0; j < 4; j++, (*pos)++)
- if ('\0' == (mac[j] = buf[*pos]))
- break;
- else if (' ' == buf[*pos] || (j && '\\' == buf[*pos]))
- break;
-
- if (j == 4 || j < 1)
- return(ROFF_MAX);
+ mac = buf + *pos;
+ maclen = strcspn(mac, " \\\t\0");
- mac[j] = '\0';
-
- if (ROFF_MAX == (t = roff_hash_find(mac)))
- return(t);
+ t = (r->current_string = roff_getstrn(r, mac, maclen))
+ ? ROFF_USERDEF : roff_hash_find(mac, maclen);
+ *pos += maclen;
while (buf[*pos] && ' ' == buf[*pos])
(*pos)++;
@@ -617,19 +617,32 @@ roff_block(ROFF_ARGS)
{
int sv;
size_t sz;
+ char *name;
- if (ROFF_ig != tok && '\0' == (*bufp)[pos]) {
- if ( ! (*r->msg)(MANDOCERR_NOARGS, r->data, ln, ppos, NULL))
- return(ROFF_ERR);
- return(ROFF_IGN);
- } else if (ROFF_ig != tok) {
+ name = NULL;
+
+ if (ROFF_ig != tok) {
+ if ('\0' == (*bufp)[pos]) {
+ (*r->msg)(MANDOCERR_NOARGS, r->data, ln, ppos, NULL);
+ return(ROFF_IGN);
+ }
+ if (ROFF_de == tok)
+ name = *bufp + pos;
while ((*bufp)[pos] && ' ' != (*bufp)[pos])
pos++;
while (' ' == (*bufp)[pos])
- pos++;
+ (*bufp)[pos++] = '\0';
}
- roffnode_push(r, tok, ln, ppos);
+ roffnode_push(r, tok, name, ln, ppos);
+
+ /*
+ * At the beginning of a `de' macro, clear the existing string
+ * with the same name, if there is one. New content will be
+ * added from roff_block_text() in multiline mode.
+ */
+ if (ROFF_de == tok)
+ roff_setstr(r, name, NULL, 0);
if ('\0' == (*bufp)[pos])
return(ROFF_IGN);
@@ -696,7 +709,7 @@ roff_block_sub(ROFF_ARGS)
roffnode_pop(r);
roffnode_cleanscope(r);
- if (ROFF_MAX != roff_parse(*bufp, &pos))
+ if (ROFF_MAX != roff_parse(r, *bufp, &pos))
return(ROFF_RERUN);
return(ROFF_IGN);
}
@@ -708,11 +721,17 @@ roff_block_sub(ROFF_ARGS)
*/
ppos = pos;
- t = roff_parse(*bufp, &pos);
+ t = roff_parse(r, *bufp, &pos);
- /* If we're not a comment-end, then throw it away. */
- if (ROFF_cblock != t)
+ /*
+ * Macros other than block-end are only significant
+ * in `de' blocks; elsewhere, simply throw them away.
+ */
+ if (ROFF_cblock != t) {
+ if (ROFF_de == tok)
+ roff_setstr(r, r->last->name, *bufp + ppos, 1);
return(ROFF_IGN);
+ }
assert(roffs[t].proc);
return((*roffs[t].proc)(r, t, bufp, szp,
@@ -725,6 +744,9 @@ static enum rofferr
roff_block_text(ROFF_ARGS)
{
+ if (ROFF_de == tok)
+ roff_setstr(r, r->last->name, *bufp + pos, 1);
+
return(ROFF_IGN);
}
@@ -746,7 +768,7 @@ roff_cond_sub(ROFF_ARGS)
roffnode_cleanscope(r);
- if (ROFF_MAX == (t = roff_parse(*bufp, &pos))) {
+ if (ROFF_MAX == (t = roff_parse(r, *bufp, &pos))) {
if ('\\' == (*bufp)[pos] && '}' == (*bufp)[pos + 1])
return(roff_ccond
(r, ROFF_ccond, bufp, szp,
@@ -880,7 +902,7 @@ roff_cond(ROFF_ARGS)
return(ROFF_ERR);
}
- roffnode_push(r, tok, ln, ppos);
+ roffnode_push(r, tok, NULL, ln, ppos);
r->last->rule = rule;
@@ -967,7 +989,7 @@ roff_ds(ROFF_ARGS)
string++;
/* The rest is the value. */
- roff_setstr(r, name, string);
+ roff_setstr(r, name, string, 0);
return(ROFF_IGN);
}
@@ -1030,48 +1052,135 @@ roff_so(ROFF_ARGS)
}
-static char *
-roff_strdup(const char *name)
+/* ARGSUSED */
+static enum rofferr
+roff_userdef(ROFF_ARGS)
{
- char *namecopy, *sv;
+ const char *arg[9];
+ char *cp, *n1, *n2;
+ int i;
- /*
- * This isn't a nice simple mandoc_strdup() because we must
- * handle roff's stupid double-escape rule.
+ /*
+ * Collect pointers to macro argument strings
+ * and null-terminate them.
*/
- sv = namecopy = mandoc_malloc(strlen(name) + 1);
- while (*name) {
- if ('\\' == *name && '\\' == *(name + 1))
- name++;
- *namecopy++ = *name++;
+ cp = *bufp + pos;
+ for (i = 0; i < 9; i++) {
+ arg[i] = cp;
+ while ('\0' != *cp && ' ' != *cp)
+ cp++;
+ if ('\0' == *cp)
+ continue;
+ *cp++ = '\0';
+ while (' ' == *cp)
+ cp++;
}
- *namecopy = '\0';
- return(sv);
-}
+ /*
+ * Expand macro arguments.
+ */
+ *szp = 0;
+ n1 = cp = mandoc_strdup(r->current_string);
+ while (NULL != (cp = strstr(cp, "\\$"))) {
+ i = cp[2] - '1';
+ if (0 > i || 8 < i) {
+ /* Not an argument invocation. */
+ cp += 2;
+ continue;
+ }
+ *szp = strlen(n1) - 3 + strlen(arg[i]) + 1;
+ n2 = mandoc_malloc(*szp);
+
+ strlcpy(n2, n1, (size_t)(cp - n1 + 1));
+ strlcat(n2, arg[i], *szp);
+ strlcat(n2, cp + 3, *szp);
+
+ cp = n2 + (cp - n1);
+ free(n1);
+ n1 = n2;
+ }
+ /*
+ * Replace the macro invocation
+ * by the expanded macro.
+ */
+ free(*bufp);
+ *bufp = n1;
+ if (0 == *szp)
+ *szp = strlen(*bufp) + 1;
+
+ return(*szp && '\n' == (*bufp)[(int)*szp - 2] ?
+ ROFF_REPARSE : ROFF_APPEND);
+}
+
+/*
+ * Store *string into the user-defined string called *name.
+ * In multiline mode, append to an existing entry and append '\n';
+ * else replace the existing entry, if there is one.
+ * To clear an existing entry, call with (*r, *name, NULL, 0).
+ */
static void
-roff_setstr(struct roff *r, const char *name, const char *string)
+roff_setstr(struct roff *r, const char *name, const char *string,
+ int multiline)
{
struct roffstr *n;
- char *namecopy;
+ char *c;
+ size_t oldch, newch;
+ /* Search for an existing string with the same name. */
n = r->first_string;
while (n && strcmp(name, n->name))
n = n->next;
if (NULL == n) {
- namecopy = mandoc_strdup(name);
+ /* Create a new string table entry. */
n = mandoc_malloc(sizeof(struct roffstr));
- n->name = namecopy;
+ n->name = mandoc_strdup(name);
+ n->string = NULL;
n->next = r->first_string;
r->first_string = n;
- } else
+ } else if (0 == multiline) {
+ /* In multiline mode, append; else replace. */
free(n->string);
+ n->string = NULL;
+ }
+
+ if (NULL == string)
+ return;
+
+ /*
+ * One additional byte for the '\n' in multiline mode,
+ * and one for the terminating '\0'.
+ */
+ newch = strlen(string) + (multiline ? 2 : 1);
+ if (NULL == n->string) {
+ n->string = mandoc_malloc(newch);
+ *n->string = '\0';
+ oldch = 0;
+ } else {
+ oldch = strlen(n->string);
+ n->string = mandoc_realloc(n->string, oldch + newch);
+ }
+
+ /* Skip existing content in the destination buffer. */
+ c = n->string + oldch;
+
+ /* Append new content to the destination buffer. */
+ while (*string) {
+ /*
+ * Rudimentary roff copy mode:
+ * Handle escaped backslashes.
+ */
+ if ('\\' == *string && '\\' == *(string + 1))
+ string++;
+ *c++ = *string++;
+ }
- /* Don't use mandoc_strdup: clean out double-escapes. */
- n->string = string ? roff_strdup(string) : NULL;
+ /* Append terminating bytes. */
+ if (multiline)
+ *c++ = '\n';
+ *c = '\0';
}
diff --git a/usr.bin/mandoc/roff.h b/usr.bin/mandoc/roff.h
index 9992c42126e..938c300f32f 100644
--- a/usr.bin/mandoc/roff.h
+++ b/usr.bin/mandoc/roff.h
@@ -1,4 +1,4 @@
-/* $Id: roff.h,v 1.5 2010/10/26 22:28:57 schwarze Exp $ */
+/* $Id: roff.h,v 1.6 2010/11/25 22:23:31 schwarze Exp $ */
/*
* Copyright (c) 2010 Kristaps Dzonsons <kristaps@bsd.lv>
*
@@ -20,6 +20,8 @@
enum rofferr {
ROFF_CONT, /* continue processing line */
ROFF_RERUN, /* re-run roff interpreter with offset */
+ ROFF_APPEND, /* re-run main parser, appending next line */
+ ROFF_REPARSE, /* re-run main parser on the result */
ROFF_SO, /* include another file */
ROFF_IGN, /* ignore current line */
ROFF_ERR /* badness: puke and stop */