summaryrefslogtreecommitdiff
path: root/usr.bin/unifdef
diff options
context:
space:
mode:
authorTheo de Raadt <deraadt@cvs.openbsd.org>2003-01-22 18:26:16 +0000
committerTheo de Raadt <deraadt@cvs.openbsd.org>2003-01-22 18:26:16 +0000
commita8f93a88a8a119d94c225817ad5706c7c9604837 (patch)
treeede774dd65c7e0d5c7bd27dc45ba389e5c166034 /usr.bin/unifdef
parent18c397d8a8ce974998380a8fd88671f84248deb8 (diff)
more sync from freebsd; tedu@Stanford.EDU
Diffstat (limited to 'usr.bin/unifdef')
-rw-r--r--usr.bin/unifdef/unifdef.168
-rw-r--r--usr.bin/unifdef/unifdef.c210
2 files changed, 206 insertions, 72 deletions
diff --git a/usr.bin/unifdef/unifdef.1 b/usr.bin/unifdef/unifdef.1
index 4bfbc60f717..13ce3508232 100644
--- a/usr.bin/unifdef/unifdef.1
+++ b/usr.bin/unifdef/unifdef.1
@@ -1,4 +1,4 @@
-.\" $OpenBSD: unifdef.1,v 1.10 2002/12/02 07:18:50 deraadt Exp $
+.\" $OpenBSD: unifdef.1,v 1.11 2003/01/22 18:26:15 deraadt Exp $
.\" Copyright (c) 1985, 1991, 1993
.\" The Regents of the University of California. All rights reserved.
.\"
@@ -45,7 +45,7 @@
.Nd remove preprocessor conditionals from code
.Sh SYNOPSIS
.Nm
-.Op Fl cklst
+.Op Fl ceklst
.Oo
.Fl I Ns Ar path
.Fl D Ns Ar sym
@@ -111,13 +111,9 @@ utility also understands just enough about C
to know when one of the directives is inactive
because it is inside
a comment,
-or a single or double quote.
-Parsing for quotes is very simplistic:
-when it finds an open quote,
-it ignores everything (except escaped quotes)
-until it finds a close quote, and
-it will not complain if it gets
-to the end of a line and finds no backslash for continuation.
+or affected by a backslash-continued line.
+It spots unusually-formatted preprocessor directives
+and knows when the layout is too odd to handle.
.Pp
Available options:
.Bl -tag -width indent -compact
@@ -146,6 +142,24 @@ is complemented,
i.e., the lines that would have been removed or blanked
are retained and vice versa.
.Pp
+.It Fl e
+Because
+.Nm
+processes its input one line at a time,
+it cannot remove preprocessor directives that span more than one line.
+The most common example of this is a directive with a multi-line
+comment hanging off its right hand end.
+By default,
+if
+.Nm
+has to process such a directive,
+it will complain that the line is too obfuscated.
+The
+.Fl e
+option changes the behavior so that,
+where possible,
+such lines are left unprocessed instead of reporting an error.
+.Pp
.It Fl k
Process
.Ic #if
@@ -179,7 +193,9 @@ for creating
command lines.
.Pp
.It Fl t
-Disables parsing for C comments and quotes, which is useful
+Disables parsing for C comments
+and line continuations,
+which is useful
for plain text.
.Pp
.It Fl iD Ns Ar sym
@@ -195,7 +211,7 @@ or code which is under construction,
then you must tell
.Nm
which symbols are used for that purpose so that it will not try to parse
-for quotes and comments
+comments and line continuations
inside those
.Ic #ifdef Ns s .
One specifies ignored symbols with
@@ -233,12 +249,23 @@ option of
.Sh DIAGNOSTICS
.Bl -item
.It
-Inappropriate elif, else or endif.
+Too many levels of nesting.
+.It
+Inappropriate
+.Ic #elif ,
+.Ic #else
+or
+.Ic #endif .
+.It
+Obfuscated preprocessor control line.
.It
Premature
.Tn EOF
-with line numbers of the unterminated
-.Ic #ifdef Ns s .
+(with the line number of the most recent unterminated
+.Ic #if ) .
+.It
+.Tn EOF
+in comment.
.El
.Pp
The
@@ -248,7 +275,18 @@ utility exits 0 if the output is an exact copy of the input,
.Sh BUGS
Expression evaluation is very limited.
.Pp
-Does not work correctly if input contains nul characters.
+Preprocessor control lines split across more than one physical line
+(because of comments or backslash-newline)
+cannot be handled in every situation.
+.Pp
+Trigraphs are not recognized.
+.Pp
+There is no support for symbols with different definitions at
+different points in the source file.
+.Pp
+The text-mode and ignore functionality doesn't correspond to modern
+.Xr cpp 1
+behaviour.
.Sh HISTORY
The
.Nm
diff --git a/usr.bin/unifdef/unifdef.c b/usr.bin/unifdef/unifdef.c
index 06bfde3b848..aa0d8a05ed6 100644
--- a/usr.bin/unifdef/unifdef.c
+++ b/usr.bin/unifdef/unifdef.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: unifdef.c,v 1.8 2003/01/18 23:42:51 deraadt Exp $ */
+/* $OpenBSD: unifdef.c,v 1.9 2003/01/22 18:26:15 deraadt Exp $ */
/*
* Copyright (c) 1985, 1993
* The Regents of the University of California. All rights reserved.
@@ -43,7 +43,7 @@ static const char copyright[] =
#if 0
static char sccsid[] = "@(#)unifdef.c 8.1 (Berkeley) 6/6/93";
#endif
-static const char rcsid[] = "$OpenBSD: unifdef.c,v 1.8 2003/01/18 23:42:51 deraadt Exp $";
+static const char rcsid[] = "$OpenBSD: unifdef.c,v 1.9 2003/01/22 18:26:15 deraadt Exp $";
#endif
/*
@@ -56,6 +56,9 @@ static const char rcsid[] = "$OpenBSD: unifdef.c,v 1.8 2003/01/18 23:42:51 deraa
* #else's and #endif's to see that they match their
* corresponding #ifdef or #ifndef
* generate #line directives in place of deleted code
+ *
+ * The first two items above require better buffer handling, which would
+ * also make it possible to handle all "dodgy" directives correctly.
*/
#include <ctype.h>
@@ -69,7 +72,6 @@ static const char rcsid[] = "$OpenBSD: unifdef.c,v 1.8 2003/01/18 23:42:51 deraa
/* types of input lines: */
typedef enum {
- LT_PLAIN, /* ordinary line */
LT_TRUEI, /* a true #if with ignore flag */
LT_FALSEI, /* a false #if with ignore flag */
LT_IF, /* an unknown #if */
@@ -80,13 +82,21 @@ typedef enum {
LT_ELFALSE, /* a false #elif */
LT_ELSE, /* #else */
LT_ENDIF, /* #endif */
+ LT_DODGY, /* flag: directive is not on one line */
+ LT_DODGY_LAST = LT_DODGY + LT_ENDIF,
+ LT_PLAIN, /* ordinary line */
LT_EOF, /* end of file */
LT_COUNT
} Linetype;
static char const * const linetype_name[] = {
- "PLAIN", "TRUEI", "FALSEI", "IF", "TRUE", "FALSE",
- "ELIF", "ELTRUE", "ELFALSE", "ELSE", "ENDIF", "EOF"
+ "TRUEI", "FALSEI", "IF", "TRUE", "FALSE",
+ "ELIF", "ELTRUE", "ELFALSE", "ELSE", "ENDIF",
+ "DODGY TRUEI", "DODGY FALSEI",
+ "DODGY IF", "DODGY TRUE", "DODGY FALSE",
+ "DODGY ELIF", "DODGY ELTRUE", "DODGY ELFALSE",
+ "DODGY ELSE", "DODGY ENDIF",
+ "PLAIN", "EOF"
};
/* state of #if processing */
@@ -143,11 +153,18 @@ static char const * const linestate_name[] = {
#define MAXSYMS 4096 /* maximum number of symbols */
/*
+ * Sometimes when editing a keyword the replacement text is longer, so
+ * we leave some space at the end of the tline buffer to accommodate this.
+ */
+#define EDITSLOP 10
+
+/*
* Globals.
*/
static bool complement; /* -c: do the complement */
static bool debugging; /* -d: debugging reports */
+static bool iocccok; /* -e: fewer IOCCC errors */
static bool killconsts; /* -k: eval constant #ifs */
static bool lnblank; /* -l: blank deleted lines */
static bool symlist; /* -s: output symbol list */
@@ -162,9 +179,7 @@ static FILE *input; /* input file pointer */
static const char *filename; /* input file name */
static int linenum; /* current line number */
-static char tline[MAXLINE + 10]; /* input buffer plus space */
-static const char *endtline = &tline[MAXLINE + 9]; /* tline ends here */
-
+static char tline[MAXLINE+EDITSLOP];/* input buffer plus space */
static char *keyword; /* used for editing #elif's */
static Comment_state incomment; /* comment parser state */
@@ -184,13 +199,15 @@ static int findsym(const char *);
static void flushline(bool);
static Linetype getline(void);
static Linetype ifeval(const char **);
+static void ignoreoff(void);
+static void ignoreon(void);
+static void keywordedit(const char *);
static void nest(void);
static void process(void);
static const char *skipcomment(const char *);
static const char *skipsym(const char *);
static void state(Ifstate);
static int strlcmp(const char *, const char *, size_t);
-static void unignore(void);
static void usage(void);
#define endsym(c) (!isalpha((unsigned char)c) && !isdigit((unsigned char)c) && c != '_')
@@ -203,7 +220,7 @@ main(int argc, char *argv[])
{
int opt;
- while ((opt = getopt(argc, argv, "i:D:U:I:cdklst")) != -1)
+ while ((opt = getopt(argc, argv, "i:D:U:cdeklst")) != -1)
switch (opt) {
case 'i': /* treat stuff controlled by these symbols as text */
/*
@@ -225,15 +242,15 @@ main(int argc, char *argv[])
case 'U': /* undef a symbol */
addsym(false, false, optarg);
break;
- case 'I':
- /* no-op for compatibility with cpp */
- break;
case 'c': /* treat -D as -U and vice versa */
complement = true;
break;
case 'd':
debugging = true;
break;
+ case 'e': /* fewer errors from dodgy lines */
+ iocccok = true;
+ break;
case 'k': /* process constant #ifs */
killconsts = true;
break;
@@ -276,8 +293,8 @@ main(int argc, char *argv[])
static void
usage(void)
{
- fprintf(stderr, "usage: unifdef [-cdklst] [[-Dsym[=val]]"
- " [-Usym] [-iDsym[=val]] [-iUsym]] ... [file]\n");
+ fprintf(stderr, "usage: unifdef [-cdeklst]"
+ " [-Dsym[=val]] [-Usym] [-iDsym[=val]] [-iUsym] ... [file]\n");
exit(2);
}
@@ -285,22 +302,31 @@ usage(void)
* A state transition function alters the global #if processing state
* in a particular way. The table below is indexed by the current
* processing state and the type of the current line. A NULL entry
- * indicate that processing is complete.
+ * indicates that processing is complete.
*
* Nesting is handled by keeping a stack of states; some transition
- * functions increase or decrease the depth. They also maintin the
+ * functions increase or decrease the depth. They also maintain the
* ignore state on a stack. In some complicated cases they have to
* alter the preprocessor directive, as follows.
*
* When we have processed a group that starts off with a known-false
* #if/#elif sequence (which has therefore been deleted) followed by a
- * #elif that we don't understand and therefore must keep, we turn the
+ * #elif that we don't understand and therefore must keep, we edit the
* latter into a #if to keep the nesting correct.
*
* When we find a true #elif in a group, the following block will
* always be kept and the rest of the sequence after the next #elif or
- * #else will be discarded. We change the #elif to #else and the
+ * #else will be discarded. We edit the #elif into a #else and the
* following directive to #endif since this has the desired behaviour.
+ *
+ * "Dodgy" directives are split across multiple lines, the most common
+ * example being a multi-line comment hanging off the right of the
+ * directive. We can handle them correctly only if there is no change
+ * from printing to dropping (or vice versa) caused by that directive.
+ * If the directive is the first of a group we have a choice between
+ * failing with an error, or passing it through unchanged instead of
+ * evaluating it. The latter is not the default to avoid questions from
+ * users about unifdef unexpectedly leaving behind preprocessor directives.
*/
typedef void state_fn(void);
@@ -353,7 +379,7 @@ static void
Strue(void)
{
drop();
- unignore();
+ ignoreoff();
state(IS_TRUE_PREFIX);
}
@@ -361,7 +387,7 @@ static void
Sfalse(void)
{
drop();
- unignore();
+ ignoreoff();
state(IS_FALSE_PREFIX);
}
@@ -377,7 +403,7 @@ static void
Pelif(void)
{
print();
- unignore();
+ ignoreoff();
state(IS_PASS_MIDDLE);
}
@@ -400,7 +426,7 @@ static void
Dfalse(void)
{
drop();
- unignore();
+ ignoreoff();
state(IS_FALSE_TRAILER);
}
@@ -408,7 +434,7 @@ static void
Delif(void)
{
drop();
- unignore();
+ ignoreoff();
state(IS_FALSE_MIDDLE);
}
@@ -455,28 +481,58 @@ Ffalse(void)
Sfalse();
}
+/* variable pedantry for obfuscated lines */
+static void
+Oiffy(void)
+{
+ if (iocccok)
+ Fpass();
+ else
+ Eioccc();
+ ignoreon();
+}
+
+static void
+Oif(void)
+{
+ if (iocccok)
+ Fpass();
+ else
+ Eioccc();
+}
+
+static void
+Oelif(void)
+{
+ if (iocccok)
+ Pelif();
+ else
+ Eioccc();
+}
+
/* ignore comments in this block */
static void
Idrop(void)
{
Fdrop();
- ignore[depth] = true;
+ ignoreon();
}
static void
-Itrue(void) {
+Itrue(void)
+{
Ftrue();
- ignore[depth] = true;
+ ignoreon();
}
static void
Ifalse(void)
{
Ffalse();
- ignore[depth] = true;
+ ignoreon();
}
-/* modify this line */
+/* edit this line */
static void
Mpass (void)
{
@@ -487,55 +543,93 @@ Mpass (void)
static void
Mtrue (void)
{
- strlcpy(keyword, "else\n", endtline - keyword);
- print();
+ keywordedit("else\n");
state(IS_TRUE_MIDDLE);
}
static void
Melif (void)
{
- strlcpy(keyword, "endif\n", endtline - keyword);
- print();
+ keywordedit("endif\n");
state(IS_FALSE_TRAILER);
}
static void
Melse (void)
{
- strlcpy(keyword, "endif\n", endtline - keyword);
- print();
+ keywordedit("endif\n");
state(IS_FALSE_ELSE);
}
static state_fn * const trans_table[IS_COUNT][LT_COUNT] = {
/* IS_OUTSIDE */
-{print,Itrue,Ifalse,Fpass,Ftrue,Ffalse,Eelif, Eelif, Eelif, Eelse,Eendif,NULL},
+{ Itrue, Ifalse,Fpass, Ftrue, Ffalse,Eelif, Eelif, Eelif, Eelse, Eendif,
+ Oiffy, Oiffy, Fpass, Oif, Oif, Eelif, Eelif, Eelif, Eelse, Eendif,
+ print, NULL },
/* IS_FALSE_PREFIX */
-{drop, Idrop,Idrop, Fdrop,Fdrop,Fdrop, Mpass, Strue, Sfalse,Selse,Dendif,Eeof},
+{ Idrop, Idrop, Fdrop, Fdrop, Fdrop, Mpass, Strue, Sfalse,Selse, Dendif,
+ Idrop, Idrop, Fdrop, Fdrop, Fdrop, Mpass, Eioccc,Eioccc,Eioccc,Eioccc,
+ drop, Eeof },
/* IS_TRUE_PREFIX */
-{print,Itrue,Ifalse,Fpass,Ftrue,Ffalse,Dfalse,Dfalse,Dfalse,Delse,Dendif,Eeof},
+{ Itrue, Ifalse,Fpass, Ftrue, Ffalse,Dfalse,Dfalse,Dfalse,Delse, Dendif,
+ Oiffy, Oiffy, Fpass, Oif, Oif, Eioccc,Eioccc,Eioccc,Eioccc,Eioccc,
+ print, Eeof },
/* IS_PASS_MIDDLE */
-{print,Itrue,Ifalse,Fpass,Ftrue,Ffalse,Pelif, Mtrue, Delif, Pelse,Pendif,Eeof},
+{ Itrue, Ifalse,Fpass, Ftrue, Ffalse,Pelif, Mtrue, Delif, Pelse, Pendif,
+ Oiffy, Oiffy, Fpass, Oif, Oif, Pelif, Oelif, Oelif, Pelse, Pendif,
+ print, Eeof },
/* IS_FALSE_MIDDLE */
-{drop, Idrop,Idrop, Fdrop,Fdrop,Fdrop, Pelif, Mtrue, Delif, Pelse,Pendif,Eeof},
+{ Idrop, Idrop, Fdrop, Fdrop, Fdrop, Pelif, Mtrue, Delif, Pelse, Pendif,
+ Idrop, Idrop, Fdrop, Fdrop, Fdrop, Eioccc,Eioccc,Eioccc,Eioccc,Eioccc,
+ drop, Eeof },
/* IS_TRUE_MIDDLE */
-{print,Itrue,Ifalse,Fpass,Ftrue,Ffalse,Melif, Melif, Melif, Melse,Pendif,Eeof},
+{ Itrue, Ifalse,Fpass, Ftrue, Ffalse,Melif, Melif, Melif, Melse, Pendif,
+ Oiffy, Oiffy, Fpass, Oif, Oif, Eioccc,Eioccc,Eioccc,Eioccc,Pendif,
+ print, Eeof },
/* IS_PASS_ELSE */
-{print,Itrue,Ifalse,Fpass,Ftrue,Ffalse,Eelif, Eelif, Eelif, Eelse,Pendif,Eeof},
+{ Itrue, Ifalse,Fpass, Ftrue, Ffalse,Eelif, Eelif, Eelif, Eelse, Pendif,
+ Oiffy, Oiffy, Fpass, Oif, Oif, Eelif, Eelif, Eelif, Eelse, Pendif,
+ print, Eeof },
/* IS_FALSE_ELSE */
-{drop, Idrop,Idrop, Fdrop,Fdrop,Fdrop, Eelif, Eelif, Eelif, Eelse,Dendif,Eeof},
+{ Idrop, Idrop, Fdrop, Fdrop, Fdrop, Eelif, Eelif, Eelif, Eelse, Dendif,
+ Idrop, Idrop, Fdrop, Fdrop, Fdrop, Eelif, Eelif, Eelif, Eelse, Eioccc,
+ drop, Eeof },
/* IS_TRUE_ELSE */
-{print,Itrue,Ifalse,Fpass,Ftrue,Ffalse,Eelif, Eelif, Eelif, Eelse,Dendif,Eeof},
+{ Itrue, Ifalse,Fpass, Ftrue, Ffalse,Eelif, Eelif, Eelif, Eelse, Dendif,
+ Oiffy, Oiffy, Fpass, Oif, Oif, Eelif, Eelif, Eelif, Eelse, Eioccc,
+ print, Eeof },
/* IS_FALSE_TRAILER */
-{drop, Idrop,Idrop, Fdrop,Fdrop,Fdrop, Dfalse,Dfalse,Dfalse,Delse,Dendif,Eeof}
-/*PLAIN TRUEI FALSEI IF TRUE FALSE ELIF ELTRUE ELFALSE ELSE ENDIF EOF*/
+{ Idrop, Idrop, Fdrop, Fdrop, Fdrop, Dfalse,Dfalse,Dfalse,Delse, Dendif,
+ Idrop, Idrop, Fdrop, Fdrop, Fdrop, Dfalse,Dfalse,Dfalse,Delse, Eioccc,
+ drop, Eeof }
+/*TRUEI FALSEI IF TRUE FALSE ELIF ELTRUE ELFALSE ELSE ENDIF
+ TRUEI FALSEI IF TRUE FALSE ELIF ELTRUE ELFALSE ELSE ENDIF (DODGY)
+ PLAIN EOF */
};
/*
* State machine utility functions
*/
static void
+ignoreoff(void)
+{
+ ignoring[depth] = ignoring[depth-1];
+}
+
+static void
+ignoreon(void)
+{
+ ignoring[depth] = true;
+}
+
+static void
+keywordedit(const char *replacement)
+{
+ strlcpy(keyword, replacement, tline + sizeof(tline) - keyword);
+ print();
+}
+
+static void
nest(void)
{
depth += 1;
@@ -550,12 +644,6 @@ state(Ifstate is)
ifstate[depth] = is;
}
-static void
-unignore(void)
-{
- ignore[depth] = ignore[depth-1];
-}
-
/*
* Write a line to the output or not, according to command line options.
*/
@@ -626,6 +714,7 @@ getline(void)
keyword = tline + (cp - tline);
cp = skipsym(cp);
kwlen = cp - keyword;
+ /* no way can we deal with a continuation inside a keyword */
if (strncmp(cp, "\\\n", 2) == 0)
Eioccc();
if (strlcmp("ifdef", keyword, kwlen) == 0 ||
@@ -665,8 +754,12 @@ getline(void)
if (retval == LT_ELTRUE || retval == LT_ELFALSE)
retval = LT_ELIF;
}
- if (retval != LT_PLAIN && (wascomment || incomment))
- Eioccc();
+ if (retval != LT_PLAIN && (wascomment || incomment)) {
+ retval += LT_DODGY;
+ if (incomment)
+ linestate = LS_DIRTY;
+ }
+ /* skipcomment should have changed the state */
if (linestate == LS_HASH)
abort(); /* bug */
}
@@ -680,7 +773,9 @@ getline(void)
}
/*
- * These are the operators that are supported by the expression evaluator.
+ * These are the operators that are supported by the expression
+ * evaluator. Note that if support for division is added then we also
+ * need short-circuiting booleans because of divide-by-zero.
*/
static int
op_lt(int a, int b)
@@ -1070,8 +1165,9 @@ static void
error(const char *msg)
{
if (depth == 0)
- errx(2, "%s: %d: %s", filename, linenum, msg);
+ warnx("%s: %d: %s", filename, linenum, msg);
else
- errx(2, "%s: %d: %s (#if line %d depth %d)",
+ warnx("%s: %d: %s (#if line %d depth %d)",
filename, linenum, msg, stifline[depth], depth);
+ errx(2, "output may be truncated");
}