summaryrefslogtreecommitdiff
path: root/usr.bin/mandoc/roff.c
diff options
context:
space:
mode:
Diffstat (limited to 'usr.bin/mandoc/roff.c')
-rw-r--r--usr.bin/mandoc/roff.c432
1 files changed, 345 insertions, 87 deletions
diff --git a/usr.bin/mandoc/roff.c b/usr.bin/mandoc/roff.c
index 9efb8fd385b..5921a141785 100644
--- a/usr.bin/mandoc/roff.c
+++ b/usr.bin/mandoc/roff.c
@@ -1,4 +1,4 @@
-/* $Id: roff.c,v 1.41 2011/09/18 10:25:28 schwarze Exp $ */
+/* $Id: roff.c,v 1.42 2011/09/18 15:54:48 schwarze Exp $ */
/*
* Copyright (c) 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
* Copyright (c) 2010, 2011 Ingo Schwarze <schwarze@openbsd.org>
@@ -73,14 +73,25 @@ enum roffrule {
* Registers are assumed to be unsigned ints for now.
*/
struct reg {
- int set; /* whether set or not */
- unsigned int u; /* unsigned integer */
+ int set; /* whether set or not */
+ unsigned int u; /* unsigned integer */
};
+/*
+ * An incredibly-simple string buffer.
+ */
struct roffstr {
- char *name; /* key of symbol */
- char *string; /* current value */
- struct roffstr *next; /* next in list */
+ char *p; /* nil-terminated buffer */
+ size_t sz; /* saved strlen(p) */
+};
+
+/*
+ * A key-value roffstr pair as part of a singly-linked list.
+ */
+struct roffkv {
+ struct roffstr key;
+ struct roffstr val;
+ struct roffkv *next; /* next in list */
};
struct roff {
@@ -89,7 +100,9 @@ struct roff {
enum roffrule rstack[RSTACK_MAX]; /* stack of !`ie' rules */
int rstackpos; /* position in rstack */
struct reg regs[REG__MAX];
- struct roffstr *first_string; /* user-defined strings & macros */
+ struct roffkv *strtab; /* user-defined strings & macros */
+ struct roffkv *xmbtab; /* multi-byte trans table (`tr') */
+ struct roffstr *xtab; /* single-byte trans table (`tr') */
const char *current_string; /* value of last called user macro */
struct tbl_node *first_tbl; /* first table parsed */
struct tbl_node *last_tbl; /* last table parsed */
@@ -139,6 +152,12 @@ struct predef {
#define PREDEF(__name, __str) \
{ (__name), (__str) },
+static enum rofft roffhash_find(const char *, size_t);
+static void roffhash_init(void);
+static void roffnode_cleanscope(struct roff *);
+static void roffnode_pop(struct roff *);
+static void roffnode_push(struct roff *, enum rofft,
+ const char *, int, int);
static enum rofferr roff_block(ROFF_ARGS);
static enum rofferr roff_block_text(ROFF_ARGS);
static enum rofferr roff_block_sub(ROFF_ARGS);
@@ -149,7 +168,8 @@ static enum rofferr roff_cond_text(ROFF_ARGS);
static enum rofferr roff_cond_sub(ROFF_ARGS);
static enum rofferr roff_ds(ROFF_ARGS);
static enum roffrule roff_evalcond(const char *, int *);
-static void roff_freestr(struct roff *);
+static void roff_free1(struct roff *);
+static void roff_freestr(struct roffkv *);
static char *roff_getname(struct roff *, char **, int, int);
static const char *roff_getstrn(const struct roff *,
const char *, size_t);
@@ -157,12 +177,17 @@ static enum rofferr roff_line_ignore(ROFF_ARGS);
static enum rofferr roff_nr(ROFF_ARGS);
static void roff_openeqn(struct roff *, const char *,
int, int, const char *);
-static int roff_res(struct roff *,
+static enum rofft roff_parse(struct roff *, const char *, int *);
+static enum rofferr roff_parsetext(char *);
+static void roff_res(struct roff *,
char **, size_t *, int, int);
static enum rofferr roff_rm(ROFF_ARGS);
static void roff_setstr(struct roff *,
const char *, const char *, int);
+static void roff_setstrn(struct roffkv **, const char *,
+ size_t, const char *, size_t, int);
static enum rofferr roff_so(ROFF_ARGS);
+static enum rofferr roff_tr(ROFF_ARGS);
static enum rofferr roff_TE(ROFF_ARGS);
static enum rofferr roff_TS(ROFF_ARGS);
static enum rofferr roff_EQ(ROFF_ARGS);
@@ -170,7 +195,7 @@ static enum rofferr roff_EN(ROFF_ARGS);
static enum rofferr roff_T_(ROFF_ARGS);
static enum rofferr roff_userdef(ROFF_ARGS);
-/* See roff_hash_find() */
+/* See roffhash_find() */
#define ASCII_HI 126
#define ASCII_LO 33
@@ -201,7 +226,7 @@ static struct roffmac roffs[ROFF_MAX] = {
{ "rm", roff_rm, NULL, NULL, 0, NULL },
{ "so", roff_so, NULL, NULL, 0, NULL },
{ "ta", roff_line_ignore, NULL, NULL, 0, NULL },
- { "tr", roff_line_ignore, NULL, NULL, 0, NULL },
+ { "tr", roff_tr, NULL, NULL, 0, NULL },
{ "TS", roff_TS, NULL, NULL, 0, NULL },
{ "TE", roff_TE, NULL, NULL, 0, NULL },
{ "T&", roff_T_, NULL, NULL, 0, NULL },
@@ -218,20 +243,11 @@ static const struct predef predefs[PREDEFS_MAX] = {
#include "predefs.in"
};
-static void roff_free1(struct roff *);
-static enum rofft roff_hash_find(const char *, size_t);
-static void roff_hash_init(void);
-static void roffnode_cleanscope(struct roff *);
-static void roffnode_push(struct roff *, enum rofft,
- const char *, int, int);
-static void roffnode_pop(struct roff *);
-static enum rofft roff_parse(struct roff *, const char *, int *);
-
-/* See roff_hash_find() */
+/* See roffhash_find() */
#define ROFF_HASH(p) (p[0] - ASCII_LO)
static void
-roff_hash_init(void)
+roffhash_init(void)
{
struct roffmac *n;
int buc, i;
@@ -256,7 +272,7 @@ roff_hash_init(void)
* the nil-terminated string name could be found.
*/
static enum rofft
-roff_hash_find(const char *p, size_t s)
+roffhash_find(const char *p, size_t s)
{
int buc;
struct roffmac *n;
@@ -330,6 +346,7 @@ roff_free1(struct roff *r)
{
struct tbl_node *t;
struct eqn_node *e;
+ int i;
while (NULL != (t = r->first_tbl)) {
r->first_tbl = t->next;
@@ -348,9 +365,18 @@ roff_free1(struct roff *r)
while (r->last)
roffnode_pop(r);
- roff_freestr(r);
-}
+ roff_freestr(r->strtab);
+ roff_freestr(r->xmbtab);
+ r->strtab = r->xmbtab = NULL;
+
+ if (r->xtab)
+ for (i = 0; i < 128; i++)
+ free(r->xtab[i].p);
+
+ free(r->xtab);
+ r->xtab = NULL;
+}
void
roff_reset(struct roff *r)
@@ -385,7 +411,7 @@ roff_alloc(struct mparse *parse)
r->parse = parse;
r->rstackpos = -1;
- roff_hash_init();
+ roffhash_init();
for (i = 0; i < PREDEFS_MAX; i++)
roff_setstr(r, predefs[i].name, predefs[i].str, 0);
@@ -393,15 +419,16 @@ roff_alloc(struct mparse *parse)
return(r);
}
-
/*
* Pre-filter each and every line for reserved words (one beginning with
* `\*', e.g., `\*(ab'). These must be handled before the actual line
* is processed.
+ * This also checks the syntax of regular escapes.
*/
-static int
+static void
roff_res(struct roff *r, char **bufp, size_t *szp, int ln, int pos)
{
+ enum mandoc_esc esc;
const char *stesc; /* start of an escape sequence ('\\') */
const char *stnam; /* start of the name, after "[(*" */
const char *cp; /* end of the name, e.g. before ']' */
@@ -410,8 +437,7 @@ roff_res(struct roff *r, char **bufp, size_t *szp, int ln, int pos)
size_t nsz;
char *n;
- /* Search for a leading backslash and save a pointer to it. */
-
+again:
cp = *bufp + pos;
while (NULL != (cp = strchr(cp, '\\'))) {
stesc = cp++;
@@ -423,9 +449,21 @@ roff_res(struct roff *r, char **bufp, size_t *szp, int ln, int pos)
*/
if ('\0' == *cp)
- return(1);
- if ('*' != *cp++)
- continue;
+ return;
+
+ if ('*' != *cp) {
+ res = cp;
+ esc = mandoc_escape(&cp, NULL, NULL);
+ if (ESCAPE_ERROR != esc)
+ continue;
+ cp = res;
+ mandoc_msg
+ (MANDOCERR_BADESCAPE, r->parse,
+ ln, (int)(stesc - *bufp), NULL);
+ return;
+ }
+
+ cp++;
/*
* The third character decides the length
@@ -435,7 +473,7 @@ roff_res(struct roff *r, char **bufp, size_t *szp, int ln, int pos)
switch (*cp) {
case ('\0'):
- return(1);
+ return;
case ('('):
cp++;
maxl = 2;
@@ -453,8 +491,13 @@ roff_res(struct roff *r, char **bufp, size_t *szp, int ln, int pos)
/* Advance to the end of the name. */
for (i = 0; 0 == maxl || i < maxl; i++, cp++) {
- if ('\0' == *cp)
- return(1); /* Error. */
+ if ('\0' == *cp) {
+ mandoc_msg
+ (MANDOCERR_BADESCAPE,
+ r->parse, ln,
+ (int)(stesc - *bufp), NULL);
+ return;
+ }
if (0 == maxl && ']' == *cp)
break;
}
@@ -467,13 +510,16 @@ roff_res(struct roff *r, char **bufp, size_t *szp, int ln, int pos)
res = roff_getstrn(r, stnam, (size_t)i);
if (NULL == res) {
- /* TODO: keep track of the correct position. */
- mandoc_msg(MANDOCERR_BADESCAPE, r->parse, ln, pos, NULL);
+ mandoc_msg
+ (MANDOCERR_BADESCAPE, r->parse,
+ ln, (int)(stesc - *bufp), NULL);
res = "";
}
/* Replace the escape sequence by the string. */
+ pos = stesc - *bufp;
+
nsz = *szp + strlen(res) + 1;
n = mandoc_malloc(nsz);
@@ -485,12 +531,57 @@ roff_res(struct roff *r, char **bufp, size_t *szp, int ln, int pos)
*bufp = n;
*szp = nsz;
- return(0);
+ goto again;
}
-
- return(1);
}
+/*
+ * Process text streams: convert all breakable hyphens into ASCII_HYPH.
+ */
+static enum rofferr
+roff_parsetext(char *p)
+{
+ char l, r;
+ size_t sz;
+ const char *start;
+ enum mandoc_esc esc;
+
+ start = p;
+
+ while ('\0' != *p) {
+ sz = strcspn(p, "-\\");
+ p += sz;
+
+ if ('\0' == *p)
+ break;
+
+ if ('\\' == *p) {
+ /* Skip over escapes. */
+ p++;
+ esc = mandoc_escape
+ ((const char **)&p, NULL, NULL);
+ if (ESCAPE_ERROR == esc)
+ break;
+ continue;
+ } else if (p == start) {
+ p++;
+ continue;
+ }
+
+ l = *(p - 1);
+ r = *(p + 1);
+ if ('\\' != l &&
+ '\t' != r && '\t' != l &&
+ ' ' != r && ' ' != l &&
+ '-' != r && '-' != l &&
+ ! isdigit((unsigned char)l) &&
+ ! isdigit((unsigned char)r))
+ *p = ASCII_HYPH;
+ p++;
+ }
+
+ return(ROFF_CONT);
+}
enum rofferr
roff_parseln(struct roff *r, int ln, char **bufp,
@@ -505,8 +596,7 @@ roff_parseln(struct roff *r, int ln, char **bufp,
* words to fill in.
*/
- if (r->first_string && ! roff_res(r, bufp, szp, ln, pos))
- return(ROFF_REPARSE);
+ roff_res(r, bufp, szp, ln, pos);
ppos = pos;
ctl = mandoc_getcontrol(*bufp, &pos);
@@ -531,13 +621,13 @@ roff_parseln(struct roff *r, int ln, char **bufp,
return(eqn_read(&r->eqn, ln, *bufp, pos, offs));
if (r->tbl)
return(tbl_read(r->tbl, ln, *bufp, pos));
- return(ROFF_CONT);
+ return(roff_parsetext(*bufp + pos));
} else if ( ! ctl) {
if (r->eqn)
return(eqn_read(&r->eqn, ln, *bufp, pos, offs));
if (r->tbl)
return(tbl_read(r->tbl, ln, *bufp, pos));
- return(ROFF_CONT);
+ return(roff_parsetext(*bufp + pos));
} else if (r->eqn)
return(eqn_read(&r->eqn, ln, *bufp, ppos, offs));
@@ -617,7 +707,7 @@ roff_parse(struct roff *r, const char *buf, int *pos)
maclen = strcspn(mac + 1, " \\\t\0") + 1;
t = (r->current_string = roff_getstrn(r, mac, maclen))
- ? ROFF_USERDEF : roff_hash_find(mac, maclen);
+ ? ROFF_USERDEF : roffhash_find(mac, maclen);
*pos += (int)maclen;
@@ -1270,6 +1360,71 @@ roff_TS(ROFF_ARGS)
/* ARGSUSED */
static enum rofferr
+roff_tr(ROFF_ARGS)
+{
+ const char *p, *first, *second;
+ size_t fsz, ssz;
+ enum mandoc_esc esc;
+
+ p = *bufp + pos;
+
+ if ('\0' == *p) {
+ mandoc_msg(MANDOCERR_ARGCOUNT, r->parse, ln, ppos, NULL);
+ return(ROFF_IGN);
+ }
+
+ while ('\0' != *p) {
+ fsz = ssz = 1;
+
+ first = p++;
+ if ('\\' == *first) {
+ esc = mandoc_escape(&p, NULL, NULL);
+ if (ESCAPE_ERROR == esc) {
+ mandoc_msg
+ (MANDOCERR_BADESCAPE, r->parse,
+ ln, (int)(p - *bufp), NULL);
+ return(ROFF_IGN);
+ }
+ fsz = (size_t)(p - first);
+ }
+
+ second = p++;
+ if ('\\' == *second) {
+ esc = mandoc_escape(&p, NULL, NULL);
+ if (ESCAPE_ERROR == esc) {
+ mandoc_msg
+ (MANDOCERR_BADESCAPE, r->parse,
+ ln, (int)(p - *bufp), NULL);
+ return(ROFF_IGN);
+ }
+ ssz = (size_t)(p - second);
+ } else if ('\0' == *second) {
+ mandoc_msg(MANDOCERR_ARGCOUNT, r->parse,
+ ln, (int)(p - *bufp), NULL);
+ second = " ";
+ p--;
+ }
+
+ if (fsz > 1) {
+ roff_setstrn(&r->xmbtab, first,
+ fsz, second, ssz, 0);
+ continue;
+ }
+
+ if (NULL == r->xtab)
+ r->xtab = mandoc_calloc
+ (128, sizeof(struct roffstr));
+
+ free(r->xtab[(int)*first].p);
+ r->xtab[(int)*first].p = mandoc_strndup(second, ssz);
+ r->xtab[(int)*first].sz = ssz;
+ }
+
+ return(ROFF_IGN);
+}
+
+/* ARGSUSED */
+static enum rofferr
roff_so(ROFF_ARGS)
{
char *name;
@@ -1391,32 +1546,40 @@ static void
roff_setstr(struct roff *r, const char *name, const char *string,
int multiline)
{
- struct roffstr *n;
- char *c;
- size_t oldch, newch;
-
- /* XXX workaround for the Perl preamble until we get .tr */
- if ( ! strcmp(name, "--")) {
- string = "--";
- multiline = 0;
- }
+
+ roff_setstrn(&r->strtab, name, strlen(name), string,
+ string ? strlen(string) : 0, multiline);
+}
+
+static void
+roff_setstrn(struct roffkv **r, const char *name, size_t namesz,
+ const char *string, size_t stringsz, int multiline)
+{
+ struct roffkv *n;
+ char *c;
+ int i;
+ size_t oldch, newch;
/* Search for an existing string with the same name. */
- n = r->first_string;
- while (n && strcmp(name, n->name))
+ n = *r;
+
+ while (n && strcmp(name, n->key.p))
n = n->next;
if (NULL == n) {
/* Create a new string table entry. */
- n = mandoc_malloc(sizeof(struct roffstr));
- n->name = mandoc_strdup(name);
- n->string = NULL;
- n->next = r->first_string;
- r->first_string = n;
+ n = mandoc_malloc(sizeof(struct roffkv));
+ n->key.p = mandoc_strndup(name, namesz);
+ n->key.sz = namesz;
+ n->val.p = NULL;
+ n->val.sz = 0;
+ n->next = *r;
+ *r = n;
} else if (0 == multiline) {
/* In multiline mode, append; else replace. */
- free(n->string);
- n->string = NULL;
+ free(n->val.p);
+ n->val.p = NULL;
+ n->val.sz = 0;
}
if (NULL == string)
@@ -1426,61 +1589,64 @@ roff_setstr(struct roff *r, const char *name, const char *string,
* One additional byte for the '\n' in multiline mode,
* and one for the terminating '\0'.
*/
- newch = strlen(string) + (multiline ? 2u : 1u);
- if (NULL == n->string) {
- n->string = mandoc_malloc(newch);
- *n->string = '\0';
+ newch = stringsz + (multiline ? 2u : 1u);
+
+ if (NULL == n->val.p) {
+ n->val.p = mandoc_malloc(newch);
+ *n->val.p = '\0';
oldch = 0;
} else {
- oldch = strlen(n->string);
- n->string = mandoc_realloc(n->string, oldch + newch);
+ oldch = n->val.sz;
+ n->val.p = mandoc_realloc(n->val.p, oldch + newch);
}
/* Skip existing content in the destination buffer. */
- c = n->string + (int)oldch;
+ c = n->val.p + (int)oldch;
/* Append new content to the destination buffer. */
- while (*string) {
+ i = 0;
+ while (i < (int)stringsz) {
/*
* Rudimentary roff copy mode:
* Handle escaped backslashes.
*/
- if ('\\' == *string && '\\' == *(string + 1))
- string++;
- *c++ = *string++;
+ if ('\\' == string[i] && '\\' == string[i + 1])
+ i++;
+ *c++ = string[i++];
}
/* Append terminating bytes. */
if (multiline)
*c++ = '\n';
+
*c = '\0';
+ n->val.sz = (int)(c - n->val.p);
}
static const char *
roff_getstrn(const struct roff *r, const char *name, size_t len)
{
- const struct roffstr *n;
+ const struct roffkv *n;
- n = r->first_string;
- while (n && (strncmp(name, n->name, len) || '\0' != n->name[(int)len]))
- n = n->next;
+ for (n = r->strtab; n; n = n->next)
+ if (0 == strncmp(name, n->key.p, len) &&
+ '\0' == n->key.p[(int)len])
+ return(n->val.p);
- return(n ? n->string : NULL);
+ return(NULL);
}
static void
-roff_freestr(struct roff *r)
+roff_freestr(struct roffkv *r)
{
- struct roffstr *n, *nn;
+ struct roffkv *n, *nn;
- for (n = r->first_string; n; n = nn) {
- free(n->name);
- free(n->string);
+ for (n = r; n; n = nn) {
+ free(n->key.p);
+ free(n->val.p);
nn = n->next;
free(n);
}
-
- r->first_string = NULL;
}
const struct tbl_span *
@@ -1496,3 +1662,95 @@ roff_eqn(const struct roff *r)
return(r->last_eqn ? &r->last_eqn->eqn : NULL);
}
+
+/*
+ * Duplicate an input string, making the appropriate character
+ * conversations (as stipulated by `tr') along the way.
+ * Returns a heap-allocated string with all the replacements made.
+ */
+char *
+roff_strdup(const struct roff *r, const char *p)
+{
+ const struct roffkv *cp;
+ char *res;
+ const char *pp;
+ size_t ssz, sz;
+ enum mandoc_esc esc;
+
+ if (NULL == r->xmbtab && NULL == r->xtab)
+ return(mandoc_strdup(p));
+ else if ('\0' == *p)
+ return(mandoc_strdup(""));
+
+ /*
+ * Step through each character looking for term matches
+ * (remember that a `tr' can be invoked with an escape, which is
+ * a glyph but the escape is multi-character).
+ * We only do this if the character hash has been initialised
+ * and the string is >0 length.
+ */
+
+ res = NULL;
+ ssz = 0;
+
+ while ('\0' != *p) {
+ if ('\\' != *p && r->xtab && r->xtab[(int)*p].p) {
+ sz = r->xtab[(int)*p].sz;
+ res = mandoc_realloc(res, ssz + sz + 1);
+ memcpy(res + ssz, r->xtab[(int)*p].p, sz);
+ ssz += sz;
+ p++;
+ continue;
+ } else if ('\\' != *p) {
+ res = mandoc_realloc(res, ssz + 2);
+ res[ssz++] = *p++;
+ continue;
+ }
+
+ /* Search for term matches. */
+ for (cp = r->xmbtab; cp; cp = cp->next)
+ if (0 == strncmp(p, cp->key.p, cp->key.sz))
+ break;
+
+ if (NULL != cp) {
+ /*
+ * A match has been found.
+ * Append the match to the array and move
+ * forward by its keysize.
+ */
+ res = mandoc_realloc
+ (res, ssz + cp->val.sz + 1);
+ memcpy(res + ssz, cp->val.p, cp->val.sz);
+ ssz += cp->val.sz;
+ p += (int)cp->key.sz;
+ continue;
+ }
+
+ /*
+ * Handle escapes carefully: we need to copy
+ * over just the escape itself, or else we might
+ * do replacements within the escape itself.
+ * Make sure to pass along the bogus string.
+ */
+ pp = p++;
+ esc = mandoc_escape(&p, NULL, NULL);
+ if (ESCAPE_ERROR == esc) {
+ sz = strlen(pp);
+ res = mandoc_realloc(res, ssz + sz + 1);
+ memcpy(res + ssz, pp, sz);
+ break;
+ }
+ /*
+ * We bail out on bad escapes.
+ * No need to warn: we already did so when
+ * roff_res() was called.
+ */
+ sz = (int)(p - pp);
+ res = mandoc_realloc(res, ssz + sz + 1);
+ memcpy(res + ssz, pp, sz);
+ ssz += sz;
+ }
+
+ res[(int)ssz] = '\0';
+ return(res);
+}