/* $OpenBSD: checknr.c,v 1.14 2005/03/29 23:46:19 jaredy Exp $ */ /* $NetBSD: checknr.c,v 1.4 1995/03/26 04:10:19 glass Exp $ */ /* * Copyright (c) 1980, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #ifndef lint static const char copyright[] = "@(#) Copyright (c) 1980, 1993\n\ The Regents of the University of California. All rights reserved.\n"; #endif /* not lint */ #ifndef lint #if 0 static const char sccsid[] = "@(#)checknr.c 8.1 (Berkeley) 6/6/93"; #else static const char rcsid[] = "$OpenBSD: checknr.c,v 1.14 2005/03/29 23:46:19 jaredy Exp $"; #endif #endif /* not lint */ /* * checknr: check an nroff/troff input file for matching macro calls. * we also attempt to match size and font changes, but only the embedded * kind. These must end in \s0 and \fP resp. Maybe more sophistication * later but for now think of these restrictions as contributions to * structured typesetting. */ #include #include #include #include #include #include #define MAXSTK 100 /* Stack size */ #define MAXBR 100 /* Max number of bracket pairs known */ #define MAXCMDS 500 /* Max number of commands known */ /* * The stack on which we remember what we've seen so far. */ struct stkstr { int opno; /* number of opening bracket */ int pl; /* '+', '-', ' ' for \s, 1 for \f, 0 for .ft */ int parm; /* parm to size, font, etc */ int lno; /* line number the thing came in in */ } stk[MAXSTK]; int stktop; void usage(void); void addmac(char *); void process(FILE *); void pe(int); int eq(char *, char *); void complain(int); void prop(int); void chkcmd(char *, char *); void addcmd(char *); void nomatch(char *); void checkknown(char *); int binsrch(char *); /* * The kinds of opening and closing brackets. */ struct brstr { char *opbr; char *clbr; } br[MAXBR] = { /* A few bare bones troff commands */ #define SZ 0 { "sz", "sz" }, /* also \s */ #define FT 1 { "ft", "ft" }, /* also \f */ /* the -mm package */ { "AL", "LE" }, { "AS", "AE" }, { "BL", "LE" }, { "BS", "BE" }, { "DF", "DE" }, { "DL", "LE" }, { "DS", "DE" }, { "FS", "FE" }, { "ML", "LE" }, { "NS", "NE" }, { "RL", "LE" }, { "VL", "LE" }, /* the -ms package */ { "AB", "AE" }, { "BD", "DE" }, { "CD", "DE" }, { "DS", "DE" }, { "FS", "FE" }, { "ID", "DE" }, { "KF", "KE" }, { "KS", "KE" }, { "LD", "DE" }, { "LG", "NL" }, { "QS", "QE" }, { "RS", "RE" }, { "SM", "NL" }, { "XA", "XE" }, { "XS", "XE" }, /* The -me package */ { "(b", ")b" }, { "(c", ")c" }, { "(d", ")d" }, { "(f", ")f" }, { "(l", ")l" }, { "(q", ")q" }, { "(x", ")x" }, { "(z", ")z" }, /* Things needed by preprocessors */ { "EQ", "EN" }, { "TS", "TE" }, /* Refer */ { "[", "]" }, { 0, } }; /* * All commands known to nroff, plus macro packages. * Used so we can complain about unrecognized commands. */ char *knowncmds[MAXCMDS] = { "$c", "$f", "$h", "$p", "$s", "(b", "(c", "(d", "(f", "(l", "(q", "(t", "(x", "(z", ")b", ")c", ")d", ")f", ")l", ")q", ")t", ")x", ")z", "++", "+c", "1C", "1c", "2C", "2c", "@(", "@)", "@C", "@D", "@F", "@I", "@M", "@c", "@e", "@f", "@h", "@m", "@n", "@o", "@p", "@r", "@t", "@z", "AB", "AE", "AF", "AI", "AL", "AM", "AS", "AT", "AU", "AX", "B", "B1", "B2", "BD", "BE", "BG", "BL", "BS", "BT", "BX", "C1", "C2", "CD", "CM", "CT", "D", "DA", "DE", "DF", "DL", "DS", "DT", "EC", "EF", "EG", "EH", "EM", "EN", "EQ", "EX", "FA", "FD", "FE", "FG", "FJ", "FK", "FL", "FN", "FO", "FQ", "FS", "FV", "FX", "H", "HC", "HD", "HM", "HO", "HU", "I", "ID", "IE", "IH", "IM", "IP", "IX", "IZ", "KD", "KE", "KF", "KQ", "KS", "LB", "LC", "LD", "LE", "LG", "LI", "LP", "MC", "ME", "MF", "MH", "ML", "MR", "MT", "ND", "NE", "NH", "NL", "NP", "NS", "OF", "OH", "OK", "OP", "P", "P1", "PF", "PH", "PP", "PT", "PX", "PY", "QE", "QP", "QS", "R", "RA", "RC", "RE", "RL", "RP", "RQ", "RS", "RT", "S", "S0", "S2", "S3", "SA", "SG", "SH", "SK", "SM", "SP", "SY", "T&", "TA", "TB", "TC", "TD", "TE", "TH", "TL", "TM", "TP", "TQ", "TR", "TS", "TX", "UL", "US", "UX", "VL", "WC", "WH", "XA", "XD", "XE", "XF", "XK", "XP", "XS", "[", "[-", "[0", "[1", "[2", "[3", "[4", "[5", "[<", "[>", "[]", "]", "]-", "]<", "]>", "][", "ab", "ac", "ad", "af", "am", "ar", "as", "b", "ba", "bc", "bd", "bi", "bl", "bp", "br", "bx", "c.", "c2", "cc", "ce", "cf", "ch", "cs", "ct", "cu", "da", "de", "di", "dl", "dn", "ds", "dt", "dw", "dy", "ec", "ef", "eh", "el", "em", "eo", "ep", "ev", "ex", "fc", "fi", "fl", "fo", "fp", "ft", "fz", "hc", "he", "hl", "hp", "ht", "hw", "hx", "hy", "i", "ie", "if", "ig", "in", "ip", "it", "ix", "lc", "lg", "li", "ll", "ln", "lo", "lp", "ls", "lt", "m1", "m2", "m3", "m4", "mc", "mk", "mo", "n1", "n2", "na", "ne", "nf", "nh", "nl", "nm", "nn", "np", "nr", "ns", "nx", "of", "oh", "os", "pa", "pc", "pi", "pl", "pm", "pn", "po", "pp", "ps", "q", "r", "rb", "rd", "re", "rm", "rn", "ro", "rr", "rs", "rt", "sb", "sc", "sh", "sk", "so", "sp", "ss", "st", "sv", "sz", "ta", "tc", "th", "ti", "tl", "tm", "tp", "tr", "u", "uf", "uh", "ul", "vs", "wh", "xp", "yr", 0 }; int lineno; /* current line number in input file */ char line[256]; /* the current line */ char *cfilename; /* name of current file */ int nfiles; /* number of files to process */ int fflag; /* -f: ignore \f */ int sflag; /* -s: ignore \s */ int ncmds; /* size of knowncmds */ int slot; /* slot in knowncmds found by binsrch */ int main(int argc, char *argv[]) { FILE *f; int i; char *cp; char b1[4]; /* Figure out how many known commands there are */ while (knowncmds[ncmds]) ncmds++; while (argc > 1 && argv[1][0] == '-') { switch(argv[1][1]) { /* -a: add pairs of macros */ case 'a': i = strlen(argv[1]) - 2; if (i % 6 != 0) usage(); /* look for empty macro slots */ for (i=0; br[i].opbr; i++) ; for (cp=argv[1]+3; cp[-1]; cp += 6) { if (i >= MAXBR) errx(1, "too many pairs"); if ((br[i].opbr = malloc(3)) == NULL) err(1, "malloc"); strlcpy(br[i].opbr, cp, 3); if ((br[i].clbr = malloc(3)) == NULL) err(1, "malloc"); strlcpy(br[i].clbr, cp+3, 3); addmac(br[i].opbr); /* knows pairs are also known cmds */ addmac(br[i].clbr); i++; } break; /* -c: add known commands */ case 'c': i = strlen(argv[1]) - 2; if (i % 3 != 0) usage(); for (cp=argv[1]+3; cp[-1]; cp += 3) { if (cp[2] && cp[2] != '.') usage(); strncpy(b1, cp, 2); addmac(b1); } break; /* -f: ignore font changes */ case 'f': fflag = 1; break; /* -s: ignore size changes */ case 's': sflag = 1; break; default: usage(); } argc--; argv++; } nfiles = argc - 1; if (nfiles > 0) { for (i=1; i=0; i--) { complain(i); } } void complain(int i) { pe(stk[i].lno); printf("Unmatched "); prop(i); printf("\n"); } void prop(int i) { if (stk[i].pl == 0) printf(".%s", br[stk[i].opno].opbr); else switch(stk[i].opno) { case SZ: printf("\\s%c%d", stk[i].pl, stk[i].parm); break; case FT: printf("\\f%c", stk[i].parm); break; default: printf("Bug: stk[%d].opno = %d = .%s, .%s", i, stk[i].opno, br[stk[i].opno].opbr, br[stk[i].opno].clbr); } } void chkcmd(char *line, char *mac) { int i; /* * Check to see if it matches top of stack. */ if (stktop >= 0 && eq(mac, br[stk[stktop].opno].clbr)) stktop--; /* OK. Pop & forget */ else { /* No. Maybe it's an opener */ for (i=0; br[i].opbr; i++) { if (eq(mac, br[i].opbr)) { /* Found. Push it. */ stktop++; stk[stktop].opno = i; stk[stktop].pl = 0; stk[stktop].parm = 0; stk[stktop].lno = lineno; break; } /* * Maybe it's an unmatched closer. * NOTE: this depends on the fact * that none of the closers can be * openers too. */ if (eq(mac, br[i].clbr)) { nomatch(mac); break; } } } } void nomatch(char *mac) { int i, j; /* * Look for a match further down on stack * If we find one, it suggests that the stuff in * between is supposed to match itself. */ for (j=stktop; j>=0; j--) if (eq(mac,br[stk[j].opno].clbr)) { /* Found. Make a good diagnostic. */ if (j == stktop-2) { /* * Check for special case \fx..\fR and don't * complain. */ if (stk[j+1].opno==FT && stk[j+1].parm!='R' && stk[j+2].opno==FT && stk[j+2].parm=='R') { stktop = j -1; return; } /* * We have two unmatched frobs. Chances are * they were intended to match, so we mention * them together. */ pe(stk[j+1].lno); prop(j+1); printf(" does not match %d: ", stk[j+2].lno); prop(j+2); printf("\n"); } else for (i=j+1; i <= stktop; i++) { complain(i); } stktop = j-1; return; } /* Didn't find one. Throw this away. */ pe(lineno); printf("Unmatched .%s\n", mac); } /* eq: are two strings equal? */ int eq(char *s1, char *s2) { return (strcmp(s1, s2) == 0); } /* print the first part of an error message, given the line number */ void pe(int lineno) { if (nfiles > 1) printf("%s: ", cfilename); printf("%d: ", lineno); } void checkknown(char *mac) { if (eq(mac, ".")) return; if (binsrch(mac) >= 0) return; if (mac[0] == '\\' && mac[1] == '"') /* comments */ return; pe(lineno); printf("Unknown command: .%s\n", mac); } /* * We have a .de xx line in "line". Add xx to the list of known commands. */ void addcmd(char *line) { char *mac; /* grab the macro being defined */ mac = line+4; while (isspace(*mac)) mac++; if (*mac == 0) { pe(lineno); printf("illegal define: %s\n", line); return; } mac[2] = 0; if (isspace(mac[1]) || mac[1] == '\\') mac[1] = 0; if (ncmds >= MAXCMDS) { printf("Only %d known commands allowed\n", MAXCMDS); exit(1); } addmac(mac); } /* * Add mac to the list. We should really have some kind of tree * structure here but this is a quick-and-dirty job and I just don't * have time to mess with it. (I wonder if this will come back to haunt * me someday?) Anyway, I claim that .de is fairly rare in user * nroff programs, and the register loop below is pretty fast. */ void addmac(char *mac) { char **src, **dest, **loc; if (binsrch(mac) >= 0){ /* it's OK to redefine something */ #ifdef DEBUG printf("binsrch(%s) -> already in table\n", mac); #endif /* DEBUG */ return; } /* binsrch sets slot as a side effect */ #ifdef DEBUG printf("binsrch(%s) -> %d\n", mac, slot); #endif loc = &knowncmds[slot]; src = &knowncmds[ncmds-1]; dest = src+1; while (dest > loc) *dest-- = *src--; if ((*loc = strdup(mac)) == NULL) err(1, "strdup"); ncmds++; #ifdef DEBUG printf("after: %s %s %s %s %s, %d cmds\n", knowncmds[slot-2], knowncmds[slot-1], knowncmds[slot], knowncmds[slot+1], knowncmds[slot+2], ncmds); #endif } /* * Do a binary search in knowncmds for mac. * If found, return the index. If not, return -1. */ int binsrch(char *mac) { char *p; /* pointer to current cmd in list */ int d; /* difference if any */ int mid; /* mid point in binary search */ int top, bot; /* boundaries of bin search, inclusive */ top = ncmds-1; bot = 0; while (top >= bot) { mid = (top+bot)/2; p = knowncmds[mid]; d = p[0] - mac[0]; if (d == 0) d = p[1] - mac[1]; if (d == 0) return mid; if (d < 0) bot = mid + 1; else top = mid - 1; } slot = bot; /* place it would have gone */ return -1; }