summaryrefslogtreecommitdiff
path: root/usr.bin/mandoc
diff options
context:
space:
mode:
authorIngo Schwarze <schwarze@cvs.openbsd.org>2015-01-26 18:41:47 +0000
committerIngo Schwarze <schwarze@cvs.openbsd.org>2015-01-26 18:41:47 +0000
commitfdcd3d21832c83c6ea3965777b6a70d1d311ab3f (patch)
tree6168d393d345d69e29a9311221f3a67017d67c5f /usr.bin/mandoc
parent962371a7a40258a1f1047908d55e725532879bff (diff)
Rework tbl(7) layout parsing:
* Continue parsing even if part of the input is invalid. * Do not require whitespace between cell specifications. * Allow tabs as well as blanks between modifiers. * Mark the 'm' modifier as unsupported. * Parse and ignore the 'p' and 'v' modifiers. * Better warning and error messages. * Get rid of a static buffer. Improved functionality but minus 50 lines of code.
Diffstat (limited to 'usr.bin/mandoc')
-rw-r--r--usr.bin/mandoc/mandoc.h13
-rw-r--r--usr.bin/mandoc/read.c13
-rw-r--r--usr.bin/mandoc/tbl_layout.c238
3 files changed, 103 insertions, 161 deletions
diff --git a/usr.bin/mandoc/mandoc.h b/usr.bin/mandoc/mandoc.h
index 639dd6ac36e..53f93f202c4 100644
--- a/usr.bin/mandoc/mandoc.h
+++ b/usr.bin/mandoc/mandoc.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: mandoc.h,v 1.130 2015/01/26 13:02:53 schwarze Exp $ */
+/* $OpenBSD: mandoc.h,v 1.131 2015/01/26 18:41:45 schwarze Exp $ */
/*
* Copyright (c) 2010, 2011, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
* Copyright (c) 2010-2015 Ingo Schwarze <schwarze@openbsd.org>
@@ -127,6 +127,11 @@ enum mandocerr {
MANDOCERR_ESC_BAD, /* invalid escape sequence: esc */
MANDOCERR_STR_UNDEF, /* undefined string, using "": name */
+ /* related to tables */
+ MANDOCERR_TBLLAYOUT_SPAN, /* tbl line starts with span */
+ MANDOCERR_TBLLAYOUT_DOWN, /* tbl column starts with span */
+ MANDOCERR_TBLLAYOUT_VERT, /* skipping vertical bar in tbl layout */
+
MANDOCERR_ERROR, /* ===== start of errors ===== */
/* related to equations */
@@ -140,7 +145,9 @@ enum mandocerr {
MANDOCERR_TBLOPT_BAD, /* skipping unknown tbl option: option */
MANDOCERR_TBLOPT_NOARG, /* missing tbl option argument */
MANDOCERR_TBLOPT_ARGSZ, /* wrong tbl option argument size */
- MANDOCERR_TBLNOLAYOUT, /* no table layout cells specified */
+ MANDOCERR_TBLLAYOUT_NONE, /* empty tbl layout */
+ MANDOCERR_TBLLAYOUT_CHAR, /* invalid character in tbl layout: char */
+ MANDOCERR_TBLLAYOUT_PAR, /* unmatched parenthesis in tbl layout */
MANDOCERR_TBLNODATA, /* no table data cells specified */
MANDOCERR_TBLIGNDATA, /* ignore data in cell */
MANDOCERR_TBLBLOCK, /* data block still open */
@@ -179,7 +186,7 @@ enum mandocerr {
MANDOCERR_TOOLARGE, /* input too large */
MANDOCERR_CHAR_UNSUPP, /* unsupported control character: number */
MANDOCERR_REQ_UNSUPP, /* unsupported roff request: request */
- MANDOCERR_TBLLAYOUT, /* unsupported table layout */
+ MANDOCERR_TBLLAYOUT_MOD, /* unsupported tbl layout modifier: m */
MANDOCERR_TBLMACRO, /* ignoring macro in table: macro */
MANDOCERR_TBLEQN, /* eqn in tbl */
diff --git a/usr.bin/mandoc/read.c b/usr.bin/mandoc/read.c
index f38fbc62083..18e25e9e87c 100644
--- a/usr.bin/mandoc/read.c
+++ b/usr.bin/mandoc/read.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: read.c,v 1.92 2015/01/26 13:02:53 schwarze Exp $ */
+/* $OpenBSD: read.c,v 1.93 2015/01/26 18:41:45 schwarze Exp $ */
/*
* Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
* Copyright (c) 2010-2015 Ingo Schwarze <schwarze@openbsd.org>
@@ -166,6 +166,11 @@ static const char * const mandocerrs[MANDOCERR_MAX] = {
"invalid escape sequence",
"undefined string, using \"\"",
+ /* related to tables */
+ "tbl line starts with span",
+ "tbl column starts with span",
+ "skipping vertical bar in tbl layout",
+
"generic error",
/* related to equations */
@@ -179,7 +184,9 @@ static const char * const mandocerrs[MANDOCERR_MAX] = {
"skipping unknown tbl option",
"missing tbl option argument",
"wrong tbl option argument size",
- "no table layout cells specified",
+ "empty tbl layout",
+ "invalid character in tbl layout",
+ "unmatched parenthesis in tbl layout",
"no table data cells specified",
"ignore data in cell",
"data block still open",
@@ -217,7 +224,7 @@ static const char * const mandocerrs[MANDOCERR_MAX] = {
"input too large",
"unsupported control character",
"unsupported roff request",
- "unsupported table layout",
+ "unsupported tbl layout modifier",
"ignoring macro in table",
"eqn in tbl",
};
diff --git a/usr.bin/mandoc/tbl_layout.c b/usr.bin/mandoc/tbl_layout.c
index 31506fb98cb..daa49a26f82 100644
--- a/usr.bin/mandoc/tbl_layout.c
+++ b/usr.bin/mandoc/tbl_layout.c
@@ -1,7 +1,7 @@
-/* $OpenBSD: tbl_layout.c,v 1.18 2015/01/14 22:44:51 schwarze Exp $ */
+/* $OpenBSD: tbl_layout.c,v 1.19 2015/01/26 18:41:45 schwarze Exp $ */
/*
* Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
- * Copyright (c) 2012, 2014 Ingo Schwarze <schwarze@openbsd.org>
+ * Copyright (c) 2012, 2014, 2015 Ingo Schwarze <schwarze@openbsd.org>
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
@@ -32,15 +32,7 @@ struct tbl_phrase {
enum tbl_cellt key;
};
-/*
- * FIXME: we can make this parse a lot nicer by, when an error is
- * encountered in a layout key, bailing to the next key (i.e. to the
- * next whitespace then continuing).
- */
-
-#define KEYS_MAX 11
-
-static const struct tbl_phrase keys[KEYS_MAX] = {
+static const struct tbl_phrase keys[] = {
{ 'c', TBL_CELL_CENTRE },
{ 'r', TBL_CELL_RIGHT },
{ 'l', TBL_CELL_LEFT },
@@ -53,57 +45,30 @@ static const struct tbl_phrase keys[KEYS_MAX] = {
{ '=', TBL_CELL_DHORIZ }
};
-static int mods(struct tbl_node *, struct tbl_cell *,
+#define KEYS_MAX ((int)(sizeof(keys)/sizeof(keys[0])))
+
+static void mods(struct tbl_node *, struct tbl_cell *,
int, const char *, int *);
-static int cell(struct tbl_node *, struct tbl_row *,
+static void cell(struct tbl_node *, struct tbl_row *,
int, const char *, int *);
static struct tbl_cell *cell_alloc(struct tbl_node *, struct tbl_row *,
enum tbl_cellt, int vert);
-static int
+static void
mods(struct tbl_node *tbl, struct tbl_cell *cp,
int ln, const char *p, int *pos)
{
- char buf[5];
- int i;
+ char *endptr;
- /* Not all types accept modifiers. */
+mod:
+ while (p[*pos] == ' ' || p[*pos] == '\t')
+ (*pos)++;
- switch (cp->pos) {
- case TBL_CELL_DOWN:
- /* FALLTHROUGH */
- case TBL_CELL_HORIZ:
- /* FALLTHROUGH */
- case TBL_CELL_DHORIZ:
- return(1);
- default:
- break;
- }
+ /* Row delimiters and cell specifiers end modifier lists. */
-mod:
- /*
- * XXX: since, at least for now, modifiers are non-conflicting
- * (are separable by value, regardless of position), we let
- * modifiers come in any order. The existing tbl doesn't let
- * this happen.
- */
- switch (p[*pos]) {
- case '\0':
- /* FALLTHROUGH */
- case ' ':
- /* FALLTHROUGH */
- case '\t':
- /* FALLTHROUGH */
- case ',':
- /* FALLTHROUGH */
- case '.':
- /* FALLTHROUGH */
- case '|':
- return(1);
- default:
- break;
- }
+ if (strchr(".,-=^_ACLNRSaclnrs|", p[*pos]) != NULL)
+ return;
/* Throw away parenthesised expression. */
@@ -115,72 +80,65 @@ mod:
(*pos)++;
goto mod;
}
- mandoc_msg(MANDOCERR_TBLLAYOUT, tbl->parse,
+ mandoc_msg(MANDOCERR_TBLLAYOUT_PAR, tbl->parse,
ln, *pos, NULL);
- return(0);
+ return;
}
/* Parse numerical spacing from modifier string. */
if (isdigit((unsigned char)p[*pos])) {
- for (i = 0; i < 4; i++) {
- if ( ! isdigit((unsigned char)p[*pos + i]))
- break;
- buf[i] = p[*pos + i];
- }
- buf[i] = '\0';
-
- /* No greater than 4 digits. */
-
- if (4 == i) {
- mandoc_msg(MANDOCERR_TBLLAYOUT,
- tbl->parse, ln, *pos, NULL);
- return(0);
- }
-
- *pos += i;
- cp->spacing = (size_t)atoi(buf);
-
+ cp->spacing = strtoull(p + *pos, &endptr, 10);
+ *pos = endptr - p;
goto mod;
- /* NOTREACHED */
}
- /* TODO: GNU has many more extensions. */
-
switch (tolower((unsigned char)p[(*pos)++])) {
- case 'z':
- cp->flags |= TBL_CELL_WIGN;
- goto mod;
- case 'u':
- cp->flags |= TBL_CELL_UP;
+ case 'b':
+ /* FALLTHROUGH */
+ case 'i':
+ /* FALLTHROUGH */
+ case 'r':
+ (*pos)--;
+ break;
+ case 'd':
+ cp->flags |= TBL_CELL_BALIGN;
goto mod;
case 'e':
cp->flags |= TBL_CELL_EQUAL;
goto mod;
+ case 'f':
+ break;
+ case 'm':
+ mandoc_msg(MANDOCERR_TBLLAYOUT_MOD, tbl->parse,
+ ln, *pos, "m");
+ goto mod;
+ case 'p':
+ /* FALLTHROUGH */
+ case 'v':
+ if (p[*pos] == '-' || p[*pos] == '+')
+ (*pos)++;
+ while (isdigit((unsigned char)p[*pos]))
+ (*pos)++;
+ goto mod;
case 't':
cp->flags |= TBL_CELL_TALIGN;
goto mod;
- case 'd':
- cp->flags |= TBL_CELL_BALIGN;
+ case 'u':
+ cp->flags |= TBL_CELL_UP;
goto mod;
case 'w': /* XXX for now, ignore minimal column width */
goto mod;
case 'x':
cp->flags |= TBL_CELL_WMAX;
goto mod;
- case 'f':
- break;
- case 'r':
- /* FALLTHROUGH */
- case 'b':
- /* FALLTHROUGH */
- case 'i':
- (*pos)--;
- break;
+ case 'z':
+ cp->flags |= TBL_CELL_WIGN;
+ goto mod;
default:
- mandoc_msg(MANDOCERR_TBLLAYOUT, tbl->parse,
- ln, *pos - 1, NULL);
- return(0);
+ mandoc_vmsg(MANDOCERR_TBLLAYOUT_CHAR, tbl->parse,
+ ln, *pos - 1, "%c", p[*pos - 1]);
+ goto mod;
}
switch (tolower((unsigned char)p[(*pos)++])) {
@@ -199,20 +157,13 @@ mod:
case 'r':
goto mod;
default:
- break;
- }
- if (isalnum((unsigned char)p[*pos - 1])) {
mandoc_vmsg(MANDOCERR_FT_BAD, tbl->parse,
ln, *pos - 1, "TS f%c", p[*pos - 1]);
goto mod;
}
-
- mandoc_msg(MANDOCERR_TBLLAYOUT, tbl->parse,
- ln, *pos - 1, NULL);
- return(0);
}
-static int
+static void
cell(struct tbl_node *tbl, struct tbl_row *rp,
int ln, const char *p, int *pos)
{
@@ -221,16 +172,24 @@ cell(struct tbl_node *tbl, struct tbl_row *rp,
/* Handle vertical lines. */
- for (vert = 0; '|' == p[*pos]; ++*pos)
- vert++;
- while (' ' == p[*pos])
+ vert = 0;
+again:
+ while (p[*pos] == ' ' || p[*pos] == '\t' || p[*pos] == '|') {
+ if (p[*pos] == '|') {
+ if (vert < 2)
+ vert++;
+ else
+ mandoc_msg(MANDOCERR_TBLLAYOUT_VERT,
+ tbl->parse, ln, *pos, NULL);
+ }
(*pos)++;
+ }
/* Handle trailing vertical lines */
if ('.' == p[*pos] || '\0' == p[*pos]) {
rp->vert = vert;
- return(1);
+ return;
}
/* Parse the column position (`c', `l', `r', ...). */
@@ -239,62 +198,32 @@ cell(struct tbl_node *tbl, struct tbl_row *rp,
if (tolower((unsigned char)p[*pos]) == keys[i].name)
break;
- if (KEYS_MAX == i) {
- mandoc_msg(MANDOCERR_TBLLAYOUT, tbl->parse,
- ln, *pos, NULL);
- return(0);
+ if (i == KEYS_MAX) {
+ mandoc_vmsg(MANDOCERR_TBLLAYOUT_CHAR, tbl->parse,
+ ln, *pos, "%c", p[*pos]);
+ (*pos)++;
+ goto again;
}
-
c = keys[i].key;
- /*
- * If a span cell is found first, raise a warning and abort the
- * parse. If a span cell is found and the last layout element
- * isn't a "normal" layout, bail.
- *
- * FIXME: recover from this somehow?
- */
-
- if (TBL_CELL_SPAN == c) {
- if (NULL == rp->first) {
- mandoc_msg(MANDOCERR_TBLLAYOUT, tbl->parse,
- ln, *pos, NULL);
- return(0);
- } else if (rp->last)
- switch (rp->last->pos) {
- case TBL_CELL_HORIZ:
- /* FALLTHROUGH */
- case TBL_CELL_DHORIZ:
- mandoc_msg(MANDOCERR_TBLLAYOUT,
- tbl->parse, ln, *pos, NULL);
- return(0);
- default:
- break;
- }
- }
+ /* Special cases of spanners. */
- /*
- * If a vertical spanner is found, we may not be in the first
- * row.
- */
-
- if (TBL_CELL_DOWN == c && rp == tbl->first_row) {
- mandoc_msg(MANDOCERR_TBLLAYOUT, tbl->parse, ln, *pos, NULL);
- return(0);
- }
+ if (c == TBL_CELL_SPAN) {
+ if (rp->last == NULL)
+ mandoc_msg(MANDOCERR_TBLLAYOUT_SPAN,
+ tbl->parse, ln, *pos, NULL);
+ else if (rp->last->pos == TBL_CELL_HORIZ ||
+ rp->last->pos == TBL_CELL_DHORIZ)
+ c = rp->last->pos;
+ } else if (c == TBL_CELL_DOWN && rp == tbl->first_row)
+ mandoc_msg(MANDOCERR_TBLLAYOUT_DOWN,
+ tbl->parse, ln, *pos, NULL);
(*pos)++;
- /* Disallow adjacent spacers. */
-
- if (vert > 2) {
- mandoc_msg(MANDOCERR_TBLLAYOUT, tbl->parse, ln, *pos - 1, NULL);
- return(0);
- }
-
/* Allocate cell then parse its modifiers. */
- return(mods(tbl, cell_alloc(tbl, rp, c, vert), ln, p, pos));
+ mods(tbl, cell_alloc(tbl, rp, c, vert), ln, p, pos);
}
void
@@ -309,7 +238,7 @@ tbl_layout(struct tbl_node *tbl, int ln, const char *p)
for (;;) {
/* Skip whitespace before and after each cell. */
- while (isspace((unsigned char)p[pos]))
+ while (p[pos] == ' ' || p[pos] == '\t')
pos++;
switch (p[pos]) {
@@ -324,7 +253,7 @@ tbl_layout(struct tbl_node *tbl, int ln, const char *p)
tbl->part = TBL_PART_DATA;
if (tbl->first_row != NULL)
return;
- mandoc_msg(MANDOCERR_TBLNOLAYOUT,
+ mandoc_msg(MANDOCERR_TBLLAYOUT_NONE,
tbl->parse, ln, pos, NULL);
rp = mandoc_calloc(1, sizeof(*rp));
cell_alloc(tbl, rp, TBL_CELL_LEFT, 0);
@@ -342,8 +271,7 @@ tbl_layout(struct tbl_node *tbl, int ln, const char *p)
tbl->first_row = rp;
tbl->last_row = rp;
}
- if ( ! cell(tbl, rp, ln, p, &pos))
- return;
+ cell(tbl, rp, ln, p, &pos);
}
}