diff options
author | Theo de Raadt <deraadt@cvs.openbsd.org> | 2002-12-02 07:16:24 +0000 |
---|---|---|
committer | Theo de Raadt <deraadt@cvs.openbsd.org> | 2002-12-02 07:16:24 +0000 |
commit | 01986fd70f4f1073a22fe5046a608a2a6c2e6e30 (patch) | |
tree | 80f8ecb5d4774b5fbe0ab77cb839174ac8f540ff /usr.bin | |
parent | bbb67bb04a170bdc451e6121f8d95d7248c85090 (diff) |
freebsd diffs; merged by tedu@Stanford.EDU
Diffstat (limited to 'usr.bin')
-rw-r--r-- | usr.bin/unifdef/Makefile | 8 | ||||
-rw-r--r-- | usr.bin/unifdef/unifdef.1 | 234 | ||||
-rw-r--r-- | usr.bin/unifdef/unifdef.c | 1170 |
3 files changed, 920 insertions, 492 deletions
diff --git a/usr.bin/unifdef/Makefile b/usr.bin/unifdef/Makefile index 3c17eaaf066..35f0b597fe6 100644 --- a/usr.bin/unifdef/Makefile +++ b/usr.bin/unifdef/Makefile @@ -1,5 +1,11 @@ -# $OpenBSD: Makefile,v 1.3 1997/09/21 11:51:32 deraadt Exp $ +# $OpenBSD: Makefile,v 1.4 2002/12/02 07:16:23 deraadt Exp $ PROG= unifdef +COPTS+= -Wall -Werror +#MLINKS= unifdef.1 unifdefall.1 + +#beforeinstall: +# ${INSTALL} ${INSTALL_COPY} -o ${BINOWN} -g ${BINGRP} -m ${BINMODE} \ +# ${.CURDIR}/unifdefall.sh ${DESTDIR}${BINDIR}/unifdefall .include <bsd.prog.mk> diff --git a/usr.bin/unifdef/unifdef.1 b/usr.bin/unifdef/unifdef.1 index 8b0e490fdae..037ace25b2e 100644 --- a/usr.bin/unifdef/unifdef.1 +++ b/usr.bin/unifdef/unifdef.1 @@ -1,11 +1,9 @@ -.\" $OpenBSD: unifdef.1,v 1.8 2000/03/11 21:40:06 aaron Exp $ -.\" $NetBSD: unifdef.1,v 1.4 1994/12/07 00:33:48 jtc Exp $ -.\" +.\" $OpenBSD: unifdef.1,v 1.9 2002/12/02 07:16:23 deraadt Exp $ .\" Copyright (c) 1985, 1991, 1993 .\" The Regents of the University of California. All rights reserved. .\" .\" This code is derived from software contributed to Berkeley by -.\" Dave Yost. +.\" Dave Yost. Support for #if and #elif was added by Tony Finch. .\" .\" Redistribution and use in source and binary forms, with or without .\" modification, are permitted provided that the following conditions @@ -36,33 +34,85 @@ .\" SUCH DAMAGE. .\" .\" @(#)unifdef.1 8.2 (Berkeley) 4/1/94 +.\" $dotat: things/unifdef.1,v 1.26 2002/09/24 19:44:12 fanf2 Exp $ +.\" $FreeBSD: src/usr.bin/unifdef/unifdef.1,v 1.15 2002/09/24 19:48:39 fanf Exp $ .\" -.Dd April 1, 1994 +.Dd September 24, 2002 .Dt UNIFDEF 1 .Os .Sh NAME -.Nm unifdef -.Nd remove ifdef'ed lines +.Nm unifdef , unifdefall +.Nd remove preprocessor conditionals from code .Sh SYNOPSIS -.Nm unifdef -.Op Fl clt +.Nm +.Op Fl cklst .Oo +.Fl I Ns Ar path .Fl D Ns Ar sym +.Ns Op = Ns Ar val .Fl U Ns Ar sym .Fl iD Ns Ar sym -.Fl iD Ns Ar sym +.Ns Op = Ns Ar val +.Fl iU Ns Ar sym .Oc .Ar ... .Op Ar file +.Nm unifdefall +.Op Fl I Ns Ar path +.Ar ... +.Ar file .Sh DESCRIPTION +The +.Nm +utility selectively processes conditional +.Xr cpp 1 +directives. +It removes from a file +both the directives +and any additional text that they specify should be removed, +while otherwise leaving the file alone. +.Pp +The .Nm -is useful for removing ifdef'ed lines -from a file while otherwise leaving the file alone. +utility acts on +.Ic #if , #ifdef , #ifndef , #elif , #else , +and +.Ic #endif +lines, +and it understands only the commonly-used subset +of the expression syntax for +.Ic #if +and +.Ic #elif +lines. +It handles +integer values of symbols defined on the command line, +the +.Fn defined +operator applied to symbols defined or undefined on the command line, +the operators +.Ic \&! , < , > , <= , >= , == , != , && , || , +and parenthesized expressions. +Anything that it does not understand is passed through unharmed. +It only processes +.Ic #ifdef +and +.Ic #ifndef +directives if the symbol is specified on the command line, +otherwise they are also passed through unchanged. +By default, it ignores +.Ic #if +and +.Ic #elif +lines with constant expressions, +or they may be processed by specifying the +.Fl k +flag on the command line. +.Pp +The .Nm -acts on -#ifdef, #ifndef, #else, and #endif lines, -and it knows only enough about C -to know when one of these is inactive +utility also understands just enough about C +to know when one of the directives is inactive because it is inside a comment, or a single or double quote. @@ -73,27 +123,38 @@ until it finds a close quote, and it will not complain if it gets to the end of a line and finds no backslash for continuation. .Pp -The options are as follows: -.Bl -tag -width Ds -.It Xo Fl D Ns Ar sym , -.Fl U Ns Ar sym -.Xc -Specify which symbols to define or undefine, -and the lines inside those ifdefs will be copied to the output or removed as -appropriate. -The ifdef, ifndef, else, and endif lines associated with -.Ar sym -will also be removed. -ifdefs involving symbols you don't specify +A script called +.Nm unifdefall +can be used to remove all conditional +.Xr cpp 1 +directives from a file. +It uses +.Nm Fl s and -.Dq #if -control lines are untouched and copied out -along with their associated -ifdef, else, and endif lines. -If an ifdef X occurs nested inside another ifdef X, then the -inside ifdef is treated as if it were an unrecognized symbol. +.Nm cpp Fl dM +to get lists of all the controlling symbols +and their definitions (or lack thereof), +then invokes +.Nm +with appropriate arguments to process the file. +.Pp +Available options: +.Bl -tag -width indent -compact +.It Fl D Ns Ar sym +.Ns Op = Ns Ar val +Specify that a symbol is defined, +and optionally specify what value to give it +for the purpose of handling +.Ic #if +and +.Ic #elif +directives. +.Pp +.It Fl U Ns Ar sym +Specify that a symbol is undefined. If the same symbol appears in more than one argument, the last occurrence dominates. +.Pp .It Fl c If the .Fl c @@ -103,37 +164,88 @@ then the operation of is complemented, i.e., the lines that would have been removed or blanked are retained and vice versa. +.Pp +.It Fl k +Process +.Ic #if +and +.Ic #elif +lines with constant expressions. +By default, sections controlled by such lines are passed through unchanged +because they typically start +.Li #if 0 +and are used as a kind of comment to sketch out future or past development. +It would be rude to strip them out, just as it would be for normal comments. +.Pp .It Fl l Replace removed lines with blank lines instead of deleting them. +.Pp +.It Fl s +Instead of processing the input file as usual, +this option causes +.Nm +to produce a list of symbols that appear in expressions +that +.Nm +understands. +It is useful in conjunction with the +.Fl dM +option of +.Xr cpp 1 +for creating +.Nm +command lines. +.Pp .It Fl t Disables parsing for C comments and quotes, which is useful for plain text. -.It Xo Fl iD Ns Ar sym , -.Fl iU Ns Ar sym -.Xc -Ignore ifdefs. -If your C code uses ifdefs to delimit non-C lines, +.Pp +.It Fl iD Ns Ar sym +.Ns Op = Ns Ar val +.It Fl iU Ns Ar sym +Ignore +.Ic #ifdef Ns s . +If your C code uses +.Ic #ifdef Ns s +to delimit non-C lines, such as comments or code which is under construction, then you must tell .Nm -which symbols are used for that purpose so that it won't try to parse +which symbols are used for that purpose so that it will not try to parse for quotes and comments -inside those ifdefs. -One specifies ignored ifdefs with +inside those +.Ic #ifdef Ns s . +One specifies ignored symbols with .Fl iD Ns Ar sym +.Ns Oo = Ns Ar val Oc and .Fl iU Ns Ar sym similar to .Fl D Ns Ar sym +.Ns Op = Ns Ar val and .Fl U Ns Ar sym above. +.Pp +.It Fl I Ns Ar path +Specifies to +.Nm unifdefall +an additional place to look for +.Ic #include +files. +This option is ignored by +.Nm +for compatibility with +.Xr cpp 1 +and to simplify the implementation of +.Nm unifdefall . .El .Pp +The .Nm -copies its output to +utility copies its output to .Em stdout and will take its input from .Em stdin @@ -141,30 +253,36 @@ if no .Ar file argument is given. .Pp +The .Nm -works nicely with the +utility works nicely with the .Fl D Ns Ar sym -option added to +option of +.Xr diff 1 . +.Sh SEE ALSO +.Xr cpp 1 , .Xr diff 1 -as of the 4.1 Berkeley Software Distribution. .Sh DIAGNOSTICS -Inappropriate else or endif. -.br +.Bl -item +.It +Inappropriate elif, else or endif. +.It Premature .Tn EOF -with line numbers of the unterminated #ifdefs. +with line numbers of the unterminated +.Ic #ifdef Ns s . +.El .Pp -Exit status is 0 if output is exact copy of input, 1 if not, 2 if trouble. -.Sh SEE ALSO -.Xr diff 1 +The +.Nm +utility exits 0 if the output is an exact copy of the input, +1 if not, and 2 if in trouble. +.Sh BUGS +Expression evaluation is very limited. +.Pp +Does not work correctly if input contains nul characters. .Sh HISTORY The .Nm command appeared in .Bx 4.3 . -.Sh BUGS -Should try to deal with -.Dq #if -lines. -.Pp -Doesn't work correctly if input contains null characters. diff --git a/usr.bin/unifdef/unifdef.c b/usr.bin/unifdef/unifdef.c index 4d95c39107d..0501e06a7ce 100644 --- a/usr.bin/unifdef/unifdef.c +++ b/usr.bin/unifdef/unifdef.c @@ -1,12 +1,10 @@ -/* $OpenBSD: unifdef.c,v 1.6 2002/10/04 20:27:16 deraadt Exp $ */ -/* $NetBSD: unifdef.c,v 1.6 1998/10/08 01:31:59 wsanchez Exp $ */ - +/* $OpenBSD: unifdef.c,v 1.7 2002/12/02 07:16:23 deraadt Exp $ */ /* * Copyright (c) 1985, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by - * Dave Yost. + * Dave Yost. Support for #if and #elif was added by Tony Finch. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -38,22 +36,20 @@ */ #ifndef lint -static char copyright[] = +static const char copyright[] = "@(#) Copyright (c) 1985, 1993\n\ The Regents of the University of California. All rights reserved.\n"; -#endif /* not lint */ -#ifndef lint #if 0 static char sccsid[] = "@(#)unifdef.c 8.1 (Berkeley) 6/6/93"; #endif -static char rcsid[] = "$OpenBSD: unifdef.c,v 1.6 2002/10/04 20:27:16 deraadt Exp $"; -#endif /* not lint */ +static const char rcsid[] = "$OpenBSD: unifdef.c,v 1.7 2002/12/02 07:16:23 deraadt Exp $"; +#endif /* * unifdef - remove ifdef'ed lines * - * Warning: will not work correctly if input contains null characters. + * Warning: will not work correctly if input contains nul characters. * * Wishlist: * provide an option which will append the name of the @@ -61,398 +57,669 @@ static char rcsid[] = "$OpenBSD: unifdef.c,v 1.6 2002/10/04 20:27:16 deraadt Exp * provide an option which will check symbols after * #else's and #endif's to see that they match their * corresponding #ifdef or #ifndef + * generate #line directives in place of deleted code */ -#include <stdio.h> #include <ctype.h> +#include <err.h> +#include <stdarg.h> +#include <stdbool.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> -FILE *input; -#ifndef YES -#define YES 1 -#define NO 0 -#endif /* YES */ -#define C_COMMENT 1 -#define CXX_COMMENT 2 -typedef int Bool; - -char *progname; -char *filename; -char text; /* -t option in effect: this is a text file */ -char lnblank; /* -l option in effect: blank deleted lines */ -char complement; /* -c option in effect: complement the - * operation */ - -#define MAXSYMS 100 -char *symname[MAXSYMS]; /* symbol name */ -char true[MAXSYMS]; /* -Dsym */ -char ignore[MAXSYMS]; /* -iDsym or -iUsym */ -char insym[MAXSYMS]; /* state: false, inactive, true */ -#define SYM_INACTIVE 0 /* symbol is currently inactive */ -#define SYM_FALSE 1 /* symbol is currently false */ -#define SYM_TRUE 2 /* symbol is currently true */ - -char nsyms; -char incomment; /* inside C comment */ - -#define QUOTE_NONE 0 -#define QUOTE_SINGLE 1 -#define QUOTE_DOUBLE 2 -char inquote; /* inside single or double quotes */ -int exitstat; - -int error(int, int, int); -int findsym(char *); -void flushline(Bool); -int getlin(char *, int, FILE *, int); -void pfile(void); -void prname(void); -char *skipcomment(char *); -char *skipquote(char *, int); - -int -main(argc, argv) - int argc; - char **argv; -{ - char **curarg; - char *cp; - char *cp1; - char ignorethis; - - progname = argv[0][0] ? argv[0] : "unifdef"; - - for (curarg = &argv[1]; --argc > 0; curarg++) { - if (*(cp1 = cp = *curarg) != '-') - break; - if (*++cp1 == 'i') { - ignorethis = YES; - cp1++; - } else - ignorethis = NO; - if ((*cp1 == 'D' - || *cp1 == 'U' - ) - && cp1[1] != '\0' - ) { - int symind; - - if ((symind = findsym(&cp1[1])) < 0) { - if (nsyms >= MAXSYMS) { - prname(); - fprintf(stderr, "too many symbols.\n"); - exit(2); - } - symind = nsyms++; - symname[symind] = &cp1[1]; - insym[symind] = SYM_INACTIVE; - } - ignore[symind] = ignorethis; - true[symind] = *cp1 == 'D' ? YES : NO; - } else - if (ignorethis) - goto unrec; - else - if (strcmp(&cp[1], "t") == 0) - text = YES; - else - if (strcmp(&cp[1], "l") == 0) - lnblank = YES; - else - if (strcmp(&cp[1], "c") == 0) - complement = YES; - else { - unrec: - prname(); - fprintf(stderr, "unrecognized option: %s\n", cp); - goto usage; - } - } - if (nsyms == 0) { -usage: - fprintf(stderr, "\ -Usage: %s [-l] [-t] [-c] [[-Dsym] [-Usym] [-iDsym] [-iUsym]]... [file]\n\ - At least one arg from [-D -U -iD -iU] is required\n", progname); - exit(2); - } - if (argc > 1) { - prname(); - fprintf(stderr, "can only do one file.\n"); - } else - if (argc == 1) { - filename = *curarg; - if ((input = fopen(filename, "r")) != NULL) { - pfile(); - (void) fclose(input); - } else { - prname(); - fprintf(stderr, "can't open "); - perror(*curarg); - } - } else { - filename = "[stdin]"; - input = stdin; - pfile(); - } - - (void) fflush(stdout); - exit(exitstat); -} /* types of input lines: */ -typedef int Linetype; -#define LT_PLAIN 0 /* ordinary line */ -#define LT_TRUE 1 /* a true #ifdef of a symbol known to us */ -#define LT_FALSE 2 /* a false #ifdef of a symbol known to us */ -#define LT_OTHER 3 /* an #ifdef of a symbol not known to us */ -#define LT_IF 4 /* an #ifdef of a symbol not known to us */ -#define LT_ELSE 5 /* #else */ -#define LT_ENDIF 6 /* #endif */ -#define LT_LEOF 7 /* end of file */ -Linetype checkline(int *); - -typedef int Reject_level; -Reject_level reject; /* 0 or 1: pass thru; 1 or 2: ignore comments */ -#define REJ_NO 0 -#define REJ_IGNORE 1 -#define REJ_YES 2 -int doif(int, int, Reject_level, int); - -int linenum; /* current line number */ -int stqcline; /* start of current coment or quote */ -char *errs[] = { +typedef enum { + LT_PLAIN, /* ordinary line */ + LT_TRUE, /* a true #if */ + LT_FALSE, /* a false #if */ + LT_ELTRUE, /* a true #elif */ + LT_ELFALSE, /* a false #elif */ + LT_IF, /* an unknown #if */ + LT_ELIF, /* an unknown #elif */ + LT_ELSE, /* #else */ + LT_ENDIF, /* #endif */ + LT_EOF /* end of file */ +} Linetype; + +typedef enum { /* 0 or 1: pass thru; 1 or 2: ignore comments */ + REJ_NO, + REJ_IGNORE, + REJ_YES +} Reject_level; + +typedef enum { + NO_COMMENT = false, + C_COMMENT, + CXX_COMMENT +} Comment_state; + +typedef enum { + QUOTE_NONE = false, + QUOTE_SINGLE, + QUOTE_DOUBLE +} Quote_state; + +const char *const errs[] = { #define NO_ERR 0 "", #define END_ERR 1 "", -#define ELSE_ERR 2 +#define ELIF_ERR 2 + "Inappropriate elif", +#define ELSE_ERR 3 "Inappropriate else", -#define ENDIF_ERR 3 +#define ENDIF_ERR 4 "Inappropriate endif", -#define IEOF_ERR 4 +#define IEOF_ERR 5 "Premature EOF in ifdef", -#define CEOF_ERR 5 +#define CEOF_ERR 6 "Premature EOF in comment", -#define Q1EOF_ERR 6 +#define Q1EOF_ERR 7 "Premature EOF in quoted character", -#define Q2EOF_ERR 7 +#define Q2EOF_ERR 8 "Premature EOF in quoted string" }; -/* States for inif arg to doif */ -#define IN_NONE 0 -#define IN_IF 1 -#define IN_ELSE 2 -void -pfile() +/* + * These are the operators that are supported by the expression evaluator. + */ +static int op_lt(int a, int b) { return a < b; } +static int op_gt(int a, int b) { return a > b; } +static int op_le(int a, int b) { return a <= b; } +static int op_ge(int a, int b) { return a >= b; } +static int op_eq(int a, int b) { return a == b; } +static int op_ne(int a, int b) { return a != b; } +static int op_or(int a, int b) { return a || b; } +static int op_and(int a, int b) { return a && b; } + +struct ops; + +/* + * An evaluation function takes three arguments, as follows: (1) a pointer to + * an element of the precedence table which lists the operators at the current + * level of precedence; (2) a pointer to an integer which will receive the + * value of the expression; and (3) a pointer to a char* that points to the + * expression to be evaluated and that is updated to the end of the expression + * when evaluation is complete. The function returns LT_FALSE if the value of + * the expression is zero, LT_TRUE if it is non-zero, or LT_IF if the + * expression could not be evaluated. + */ +typedef Linetype eval_fn(struct ops *, int *, const char **); + +eval_fn eval_table, eval_unary; + +/* + * The precedence table. Expressions involving binary operators are evaluated + * in a table-driven way by eval_table. When it evaluates a subexpression it + * calls the inner function with its first argument pointing to the next + * element of the table. Innermost expressions have special non-table-driven + * handling. + */ +struct ops { + eval_fn *inner; + struct op { + const char *str; + int (*fn)(int, int); + } op[5]; +} eval_ops[] = { + { eval_table, { { "||", op_or } } }, + { eval_table, { { "&&", op_and } } }, + { eval_table, { { "==", op_eq }, + { "!=", op_ne } } }, + { eval_unary, { { "<=", op_le }, + { ">=", op_ge }, + { "<", op_lt }, + { ">", op_gt } } } +}; + +FILE *input; +const char *filename; +int linenum; /* current line number */ +int stifline; /* start of current #if */ +int stqcline; /* start of current coment or quote */ +bool keepthis; /* ignore this #if's value 'cause it's const */ + +#define MAXLINE 1024 +#define KWSIZE 8 +/* tline has extra space so that it isn't overflowed when editing #elifs */ +char tline[MAXLINE+KWSIZE]; /* input buffer */ +char *keyword; /* used for editing #elif's */ + +bool complement; /* -c option in effect: do the complement */ +bool debugging; /* -d option in effect: debugging reports */ +bool killconsts; /* -k option in effect: eval constant #ifs */ +bool lnblank; /* -l option in effect: blank deleted lines */ +bool symlist; /* -s option in effect: output symbol list */ +bool text; /* -t option in effect: this is a text file */ + +#define MAXSYMS 1000 +const char *symname[MAXSYMS]; /* symbol name */ +const char *value[MAXSYMS]; /* -Dsym=value */ +bool ignore[MAXSYMS]; /* -iDsym or -iUsym */ + +int nsyms = 1; /* symbol 0 is used for tracking #ifs */ + +Reject_level reject; /* what kind of filtering we are doing */ +Comment_state incomment; /* inside C comment */ +Quote_state inquote; /* inside single or double quotes */ + +Linetype checkline(int *); +void debug(const char *, ...); +Linetype process(int); +void doif(int, Linetype, bool); +void elif2if(void); +void elif2endif(void); +void error(int, int); +void addsym(bool, bool, char *); +int findsym(const char *); +void flushline(bool); +#if 0 +int getline(char *, int, FILE *, bool); +#endif +Linetype ifeval(const char **); +const char *skipcomment(const char *); +const char *skipquote(const char *, Quote_state); +const char *skipsym(const char *); +void usage(void); + +#define endsym(c) (!isalpha((unsigned char)c) && !isdigit((unsigned char)c) && c != '_') + +int +main(int argc, char *argv[]) { - reject = REJ_NO; - (void) doif(-1, IN_NONE, reject, 0); - return; + int opt; + + while ((opt = getopt(argc, argv, "i:D:U:I:cdklst")) != -1) + switch (opt) { + case 'i': /* treat stuff controlled by these symbols as text */ + /* + * For strict backwards-compatibility the U or D + * should be immediately after the -i but it doesn't + * matter much if we relax that requirement. + */ + opt = *optarg++; + if (opt == 'D') + addsym(true, true, optarg); + else if (opt == 'U') + addsym(true, false, optarg); + else + usage(); + break; + case 'D': /* define a symbol */ + addsym(false, true, optarg); + break; + case 'U': /* undef a symbol */ + addsym(false, false, optarg); + break; + case 'I': + /* ignore for compatibility with cpp */ + break; + case 'c': /* treat -D as -U and vice versa */ + complement = true; + break; + case 'k': /* process constant #ifs */ + killconsts = true; + break; + case 'd': + debugging = true; + break; + case 'l': /* blank deleted lines instead of omitting them */ + lnblank = true; + break; + case 's': /* only output list of symbols that control #ifs */ + symlist = true; + break; + case 't': /* don't parse C comments or strings */ + text = true; + break; + default: + usage(); + } + argc -= optind; + argv += optind; + if (nsyms == 1 && !symlist) { + warnx("must -D or -U at least one symbol"); + usage(); + } + if (argc > 1) { + errx(2, "can only do one file"); + } else if (argc == 1 && strcmp(*argv, "-") != 0) { + filename = *argv; + if ((input = fopen(filename, "r")) != NULL) { + (void) process(0); + (void) fclose(input); + } else + err(2, "can't open %s", *argv); + } else { + filename = "[stdin]"; + input = stdin; + (void) process(0); + } + + exit(0); } -int -doif(thissym, inif, prevreject, depth) - int thissym; /* index of the symbol who was last ifdef'ed */ - int inif; /* YES or NO we are inside an ifdef */ - Reject_level prevreject;/* previous value of reject */ - int depth; /* depth of ifdef's */ +void +usage(void) { - Linetype lineval; - Reject_level thisreject; - int doret; /* tmp return value of doif */ - int cursym; /* index of the symbol returned by checkline */ - int stline; /* line number when called this time */ + fprintf (stderr, "usage: unifdef [-cdklst] [[-Dsym[=val]]" + "[-Usym] [-iDsym[=val]] [-iUsym]] ... [file]\n"); + exit (2); +} - stline = linenum; +/* + * This function processes #if lines and alters the pass-through + * state accordingly. All the complicated state transition suff is + * dealt with in this function, as well as checking that the + * #if/#elif/#else/#endif lines happen in the correct order. Lines + * between #if lines are handled by a recursive call to process(). + */ +void +doif(int depth, Linetype lineval, bool ignoring) +{ + Reject_level savereject; + bool active; + bool donetrue; + bool inelse; + int saveline; + + debug("#if line %d code %d depth %d", + linenum, lineval, depth); + saveline = stifline; + stifline = linenum; + savereject = reject; + inelse = false; + donetrue = false; + if (lineval == LT_IF || reject != REJ_NO) { + active = false; + ignoring = false; + flushline(true); + } else if (ignoring) { + active = false; + flushline(true); + if (lineval == LT_FALSE) + reject = REJ_IGNORE; + else + donetrue = true; + } else { + active = true; + flushline(false); + if (lineval == LT_FALSE) + reject = REJ_YES; + else + donetrue = true; + } + debug("active %d ignore %d", active, ignoring); for (;;) { - switch (lineval = checkline(&cursym)) { - case LT_PLAIN: - flushline(YES); + switch (lineval = process(depth)) { + case LT_ELIF: + debug("#elif start %d line %d code %d depth %d", + stifline, linenum, lineval, depth); + if (inelse) + error(ELIF_ERR, depth); + donetrue = false; + reject = savereject; + if (active) { + active = false; + elif2if(); + flushline(true); + } else { + ignoring = false; + flushline(true); + } + debug("active %d ignore %d", active, ignoring); break; - - case LT_TRUE: - case LT_FALSE: - thisreject = reject; - if (lineval == LT_TRUE) - insym[cursym] = SYM_TRUE; + case LT_ELTRUE: + case LT_ELFALSE: + debug("#elif start %d line %d code %d depth %d", + stifline, linenum, lineval, depth); + if (inelse) + error(ELIF_ERR, depth); + if (active) + flushline(false); else { - if (reject != REJ_YES) - reject = ignore[cursym] ? REJ_IGNORE : REJ_YES; - insym[cursym] = SYM_FALSE; + ignoring = false; + active = true; + elif2endif(); + flushline(true); } - if (ignore[cursym]) - flushline(YES); + if (lineval == LT_ELFALSE) + reject = REJ_YES; else { - exitstat = 1; - flushline(NO); + reject = REJ_NO; + donetrue = true; } - if ((doret = doif(cursym, IN_IF, thisreject, depth + 1)) != NO_ERR) - return error(doret, stline, depth); - break; - - case LT_IF: - case LT_OTHER: - flushline(YES); - if ((doret = doif(-1, IN_IF, reject, depth + 1)) != NO_ERR) - return error(doret, stline, depth); + debug("active %d ignore %d", active, ignoring); break; - case LT_ELSE: - if (inif != IN_IF) - return error(ELSE_ERR, linenum, depth); - inif = IN_ELSE; - if (thissym >= 0) { - if (insym[thissym] == SYM_TRUE) { - reject = ignore[thissym] ? REJ_IGNORE : REJ_YES; - insym[thissym] = SYM_FALSE; - } else { /* (insym[thissym] == - * SYM_FALSE) */ - reject = prevreject; - insym[thissym] = SYM_TRUE; - } - if (!ignore[thissym]) { - flushline(NO); - break; + debug("#else start %d line %d code %d depth %d", + stifline, linenum, lineval, depth); + if (inelse) + error(ELSE_ERR, depth); + if (active) { + flushline(false); + reject = REJ_YES; + if (reject == REJ_YES && !donetrue) + reject = REJ_NO; + } else { + flushline(true); + if (ignoring) { + if (reject == REJ_IGNORE) + reject = REJ_NO; } } - flushline(YES); + inelse = true; + debug("active %d ignore %d", active, ignoring); break; - case LT_ENDIF: - if (inif == IN_NONE) - return error(ENDIF_ERR, linenum, depth); - if (thissym >= 0) { - insym[thissym] = SYM_INACTIVE; - reject = prevreject; - if (!ignore[thissym]) { - flushline(NO); - return NO_ERR; - } - } - flushline(YES); - return NO_ERR; - - case LT_LEOF:{ - int err; - err = incomment - ? CEOF_ERR - : inquote == QUOTE_SINGLE - ? Q1EOF_ERR - : inquote == QUOTE_DOUBLE - ? Q2EOF_ERR - : NO_ERR; - if (inif != IN_NONE) { - if (err != NO_ERR) - (void) error(err, stqcline, depth); - return error(IEOF_ERR, stline, depth); - } else - if (err != NO_ERR) - return error(err, stqcline, depth); - else - return NO_ERR; - } + debug("#endif start %d line %d code %d depth %d", + stifline, linenum, lineval, depth); + if (active) + flushline(false); + else + flushline(true); + reject = savereject; + stifline = saveline; + return; + default: + /* bug */ + abort(); } } } -#define endsym(c) (!isalpha (c) && !isdigit (c) && c != '_') -#define MAXLINE 256 -char tline[MAXLINE]; +/* + * The main file processing routine. This function deals with passing + * through normal non-#if lines, correct nesting of #if sections, and + * checking that things terminate correctly at the end of file. The + * complicated stuff is delegated to doif(). + */ +Linetype +process(int depth) +{ + Linetype lineval; + int cursym; + + for (;;) { + linenum++; + if (fgets(tline, MAXLINE, input) == NULL) { + if (incomment) + error(CEOF_ERR, depth); + if (inquote == QUOTE_SINGLE) + error(Q1EOF_ERR, depth); + if (inquote == QUOTE_DOUBLE) + error(Q2EOF_ERR, depth); + if (depth != 0) + error(IEOF_ERR, depth); + return (LT_EOF); + } + switch (lineval = checkline(&cursym)) { + case LT_PLAIN: + flushline(true); + break; + case LT_IF: + case LT_TRUE: + case LT_FALSE: + doif(depth + 1, lineval, ignore[cursym]); + break; + case LT_ELIF: + case LT_ELTRUE: + case LT_ELFALSE: + case LT_ELSE: + case LT_ENDIF: + if (depth != 0) + return (lineval); + if (lineval == LT_ENDIF) + error(ENDIF_ERR, depth); + if (lineval == LT_ELSE) + error(ELSE_ERR, depth); + error(ELIF_ERR, depth); + default: + /* bug */ + abort(); + } + } +} +/* + * Parse a line and determine its type. + */ Linetype -checkline(cursym) - int *cursym; /* if LT_TRUE or LT_FALSE returned, set this - * to sym index */ +checkline(int *cursym) { - char *cp; + const char *cp; char *symp; - char *scp; Linetype retval; -#define KWSIZE 8 - char keyword[KWSIZE]; - - linenum++; - if (getlin(tline, sizeof tline, input, NO) == EOF) - return LT_LEOF; + char kw[KWSIZE]; retval = LT_PLAIN; - if (*(cp = tline) != '#' - || incomment - || inquote == QUOTE_SINGLE - || inquote == QUOTE_DOUBLE - ) + cp = skipcomment(tline); + if (*cp != '#' || incomment || inquote == QUOTE_SINGLE || + inquote == QUOTE_DOUBLE) goto eol; cp = skipcomment(++cp); - symp = keyword; + keyword = (char *)cp; + symp = kw; while (!endsym(*cp)) { *symp = *cp++; - if (++symp >= &keyword[KWSIZE]) + if (++symp >= &kw[KWSIZE]) goto eol; } *symp = '\0'; - if (strcmp(keyword, "ifdef") == 0) { - retval = YES; + if (strcmp(kw, "ifdef") == 0) { + retval = LT_TRUE; goto ifdef; - } else - if (strcmp(keyword, "ifndef") == 0) { - retval = NO; - ifdef: - scp = cp = skipcomment(++cp); - if (incomment) { - retval = LT_PLAIN; - goto eol; - } { - int symind; - - if ((symind = findsym(scp)) >= 0) - retval = (retval ^ true[*cursym = symind]) - ? LT_FALSE : LT_TRUE; - else - retval = LT_OTHER; - } - } else - if (strcmp(keyword, "if") == 0) - retval = LT_IF; - else - if (strcmp(keyword, "else") == 0) - retval = LT_ELSE; - else - if (strcmp(keyword, "endif") == 0) - retval = LT_ENDIF; + } else if (strcmp(kw, "ifndef") == 0) { + retval = LT_FALSE; +ifdef: + cp = skipcomment(++cp); + if (incomment) { + retval = LT_PLAIN; + goto eol; + } + if ((*cursym = findsym(cp)) == 0) + retval = LT_IF; + else if (value[*cursym] == NULL) + retval = (retval == LT_TRUE) + ? LT_FALSE : LT_TRUE; + } else if (strcmp(kw, "if") == 0) { + retval = ifeval(&cp); + cp = skipcomment(cp); + if (*cp != '\n' || keepthis) + retval = LT_IF; + *cursym = 0; + } else if (strcmp(kw, "elif") == 0) { + retval = ifeval(&cp); + cp = skipcomment(cp); + if (*cp != '\n' || keepthis) + retval = LT_ELIF; + if (retval == LT_IF) + retval = LT_ELIF; + if (retval == LT_TRUE) + retval = LT_ELTRUE; + if (retval == LT_FALSE) + retval = LT_ELFALSE; + *cursym = 0; + } else if (strcmp(kw, "else") == 0) + retval = LT_ELSE; + else if (strcmp(kw, "endif") == 0) + retval = LT_ENDIF; eol: - if (!text && reject != REJ_IGNORE) + if (!text && reject != REJ_IGNORE) { for (; *cp;) { if (incomment) cp = skipcomment(cp); + else if (inquote == QUOTE_SINGLE) + cp = skipquote(cp, QUOTE_SINGLE); + else if (inquote == QUOTE_DOUBLE) + cp = skipquote(cp, QUOTE_DOUBLE); + else if (*cp == '/' && (cp[1] == '*' || cp[1] == '/')) + cp = skipcomment(cp); + else if (*cp == '\'') + cp = skipquote(cp, QUOTE_SINGLE); + else if (*cp == '"') + cp = skipquote(cp, QUOTE_DOUBLE); else - if (inquote == QUOTE_SINGLE) - cp = skipquote(cp, QUOTE_SINGLE); - else - if (inquote == QUOTE_DOUBLE) - cp = skipquote(cp, QUOTE_DOUBLE); - else - if (*cp == '/' && (cp[1] == '*' || cp[1] == '/')) - cp = skipcomment(cp); - else - if (*cp == '\'') - cp = skipquote(cp, QUOTE_SINGLE); - else - if (*cp == '"') - cp = skipquote(cp, QUOTE_DOUBLE); - else - cp++; + cp++; } - return retval; + } + return (retval); } + /* - * Skip over comments and stop at the next charaacter - * position that is not whitespace. + * Turn a #elif line into a #if. This function is used when we are + * processing a #if/#elif/#else/#endif sequence that starts off with a + * #if that we understand (and therefore it has been deleted) which is + * followed by a #elif that we don't understand and therefore must be + * kept. We turn it into a #if to keep the nesting correct. */ -char * -skipcomment(cp) - char *cp; +void +elif2if(void) +{ + strncpy(keyword, "if ", 4); +} + +/* + * Turn a #elif line into a #endif. This is used in the opposite + * situation to elif2if, i.e. a #if that we don't understand is + * followed by a #elif that we do; rather than deleting the #elif (as + * we would for a #if) we turn it into a #endif to keep the nesting + * correct. + */ +void +elif2endif(void) +{ + strcpy(keyword, "endif\n"); +} + +/* + * Function for evaluating the innermost parts of expressions, + * viz. !expr (expr) defined(symbol) symbol number + * We reset the keepthis flag when we find a non-constant subexpression. + */ +Linetype +eval_unary(struct ops *ops, int *valp, const char **cpp) +{ + const char *cp; + char *ep; + int sym; + + cp = skipcomment(*cpp); + if(*cp == '!') { + debug("eval%d !", ops - eval_ops); + cp++; + if (eval_unary(ops, valp, &cp) == LT_IF) + return (LT_IF); + *valp = !*valp; + } else if (*cp == '(') { + cp++; + debug("eval%d (", ops - eval_ops); + if (eval_table(eval_ops, valp, &cp) == LT_IF) + return (LT_IF); + cp = skipcomment(cp); + if (*cp++ != ')') + return (LT_IF); + } else if (isdigit((unsigned char)*cp)) { + debug("eval%d number", ops - eval_ops); + *valp = strtol(cp, &ep, 0); + cp = skipsym(cp); + } else if (strncmp(cp, "defined", 7) == 0 && endsym(cp[7])) { + cp = skipcomment(cp+7); + debug("eval%d defined", ops - eval_ops); + if (*cp++ != '(') + return (LT_IF); + cp = skipcomment(cp); + sym = findsym(cp); + if (sym == 0 && !symlist) + return (LT_IF); + *valp = (value[sym] != NULL); + cp = skipsym(cp); + cp = skipcomment(cp); + if (*cp++ != ')') + return (LT_IF); + keepthis = false; + } else if (!endsym(*cp)) { + debug("eval%d symbol", ops - eval_ops); + sym = findsym(cp); + if (sym == 0 && !symlist) + return (LT_IF); + if (value[sym] == NULL) + *valp = 0; + else { + *valp = strtol(value[sym], &ep, 0); + if (*ep != '\0' || ep == value[sym]) + return (LT_IF); + } + cp = skipsym(cp); + keepthis = false; + } else + return (LT_IF); + + *cpp = cp; + debug("eval%d = %d", ops - eval_ops, *valp); + return (*valp ? LT_TRUE : LT_FALSE); +} + +/* + * Table-driven evaluation of binary operators. + */ +Linetype +eval_table(struct ops *ops, int *valp, const char **cpp) +{ + const char *cp; + struct op *op; + int val; + + debug("eval%d", ops - eval_ops); + cp = *cpp; + if (ops->inner(ops+1, valp, &cp) == LT_IF) + return (LT_IF); + for (;;) { + cp = skipcomment(cp); + for (op = ops->op; op->str != NULL; op++) + if (strncmp(cp, op->str, strlen(op->str)) == 0) + break; + if (op->str == NULL) + break; + cp += strlen(op->str); + debug("eval%d %s", ops - eval_ops, op->str); + if (ops->inner(ops+1, &val, &cp) == LT_IF) + return LT_IF; + *valp = op->fn(*valp, val); + } + + *cpp = cp; + debug("eval%d = %d", ops - eval_ops, *valp); + return (*valp ? LT_TRUE : LT_FALSE); +} + +/* + * Evaluate the expression on a #if or #elif line. If we can work out + * the result we return LT_TRUE or LT_FALSE accordingly, otherwise we + * return just a generic LT_IF. If the expression is constant and + * we are not processing constant #ifs then the keepthis flag is true. + */ +Linetype +ifeval(const char **cpp) +{ + int val; + + debug("eval %s", *cpp); + keepthis = killconsts ? false : true; + return (eval_table(eval_ops, &val, cpp)); +} + +/* + * Skip over comments and stop at the next character position that is + * not whitespace. + */ +const char * +skipcomment(const char *cp) { if (incomment) goto inside; @@ -460,9 +727,9 @@ skipcomment(cp) while (*cp == ' ' || *cp == '\t') cp++; if (text) - return cp; + return (cp); if (cp[0] != '/') - return cp; + return (cp); if (cp[1] == '*') { if (!incomment) { @@ -475,7 +742,7 @@ skipcomment(cp) stqcline = linenum; } } else - return cp; + return (cp); cp += 2; inside: @@ -483,29 +750,27 @@ inside: for (;;) { for (; *cp != '*'; cp++) if (*cp == '\0') - return cp; + return (cp); if (*++cp == '/') { - incomment = NO; + incomment = NO_COMMENT; break; } } - } - else if (incomment == CXX_COMMENT) { + } else if (incomment == CXX_COMMENT) { for (; *cp != '\n'; cp++) if (*cp == '\0') - return cp; - incomment = NO; + return (cp); + incomment = NO_COMMENT; } } } + /* - * Skip over a quoted string or character and stop at the next charaacter - * position that is not whitespace. + * Skip over a quoted string or character and stop at the next charaacter + * position that is not whitespace. */ -char * -skipquote(cp, type) - char *cp; - int type; +const char * +skipquote(const char *cp, Quote_state type) { char qchar; @@ -515,7 +780,7 @@ skipquote(cp, type) goto inside; for (;; cp++) { if (*cp != qchar) - return cp; + return (cp); cp++; inquote = type; stqcline = linenum; @@ -524,78 +789,124 @@ inside: if (*cp == qchar) break; if (*cp == '\0' || (*cp == '\\' && *++cp == '\0')) - return cp; + return (cp); } inquote = QUOTE_NONE; } } + +/* + * Skip over an identifier. + */ +const char * +skipsym(const char *cp) +{ + while (!endsym(*cp)) + ++cp; + return (cp); +} + /* - * findsym - look for the symbol in the symbol table. - * if found, return symbol table index, - * else return -1. + * Look for the symbol in the symbol table. If is is found, we return + * the symbol table index, else we return 0. */ int -findsym(str) - char *str; +findsym(const char *str) { - char *cp; - char *symp; + const char *cp; + const char *symp; int symind; - char chr; - - for (symind = 0; symind < nsyms; ++symind) { - if (insym[symind] == SYM_INACTIVE) { - for (symp = symname[symind], cp = str - ; *symp && *cp == *symp - ; cp++, symp++ - ) - continue; - chr = *cp; - if (*symp == '\0' && endsym(chr)) - return symind; + + if (symlist) { + for (cp = str; !endsym(*cp); cp++) + continue; + printf("%.*s\n", (int)(cp-str), str); + } + for (symind = 1; symind < nsyms; ++symind) { + for (cp = str, symp = symname[symind]; + *cp && *symp && *cp == *symp; cp++, symp++) + continue; + if (*symp == '\0' && endsym(*cp)) { + debug("findsym %s %s", symname[symind], + value[symind] ? value[symind] : ""); + return (symind); } } - return -1; + return (0); +} + +/* + * Add a symbol to the symbol table. + */ +void +addsym(bool ignorethis, bool definethis, char *sym) +{ + int symind; + char *val; + + symind = findsym(sym); + if (symind == 0) { + if (nsyms >= MAXSYMS) + errx(2, "too many symbols"); + symind = nsyms++; + } + symname[symind] = sym; + ignore[symind] = ignorethis; + val = (char *)skipsym(sym); + if (definethis) { + if (*val == '=') { + value[symind] = val+1; + *val = '\0'; + } else if (*val == '\0') + value[symind] = ""; + else + usage(); + } else { + if (*val != '\0') + usage(); + value[symind] = NULL; + } } + +#if 0 /* - * getlin - expands tabs if asked for - * and (if compiled in) treats form-feed as an end-of-line + * Read a line from the input and expand tabs if requested and (if + * compiled in) treats form-feed as an end-of-line. */ int -getlin(line, maxline, inp, expandtabs) - char *line; - int maxline; - FILE *inp; - int expandtabs; +getline(char *line, int maxline, FILE *inp, bool expandtabs) { - int tmp; + int tmp; int num; int chr; #ifdef FFSPECIAL - static char havechar = NO; /* have leftover char from last time */ + static bool havechar = false; /* have leftover char from last time */ static char svchar; -#endif /* FFSPECIAL */ +#endif /* FFSPECIAL */ num = 0; #ifdef FFSPECIAL if (havechar) { - havechar = NO; + havechar = false; chr = svchar; goto ent; } -#endif /* FFSPECIAL */ +#endif /* FFSPECIAL */ while (num + 8 < maxline) { /* leave room for tab */ chr = getc(inp); if (chr == EOF) - return EOF; - if (isprint(chr)) { + return (EOF); + if (0 && isprint(chr)) { #ifdef FFSPECIAL - ent: -#endif /* FFSPECIAL */ +ent: +#endif /* FFSPECIAL */ *line++ = chr; num++; } else switch (chr) { + case EOF: + return (EOF); + case '\t': if (expandtabs) { num += tmp = 8 - (num & 7); @@ -604,10 +915,6 @@ getlin(line, maxline, inp, expandtabs) while (--tmp); break; } - default: - *line++ = chr; - num++; - break; case '\n': *line = '\n'; @@ -620,61 +927,58 @@ getlin(line, maxline, inp, expandtabs) *line = '\f'; else { *line = '\n'; - havechar = YES; + havechar = true; svchar = chr; } goto end; -#endif /* FFSPECIAL */ +#endif /* FFSPECIAL */ + default: + *line++ = chr; + num++; + break; } } end: *++line = '\0'; - return num; + return (num); } +#endif +/* + * Write a line to the output or not, according to the current + * filtering state. + */ void -flushline(keep) - Bool keep; +flushline(bool keep) { - if ((keep && reject != REJ_YES) ^ complement) { - char *line = tline; - FILE *out = stdout; - char chr; - - while ((chr = *line++)) - putc(chr, out); - } else - if (lnblank) - putc('\n', stdout); - return; + if (symlist) + return; + if ((keep && reject != REJ_YES) ^ complement) + fputs(tline, stdout); + else if (lnblank) + putc('\n', stdout); } void -prname() +debug(const char *msg, ...) { - fprintf(stderr, "%s: ", progname); - return; + va_list ap; + + if (debugging) { + va_start(ap, msg); + vwarnx(msg, ap); + va_end(ap); + } } -int -error(err, line, depth) - int err; /* type of error & index into error string - * array */ - int line; /* line number */ - int depth; /* how many ifdefs we are inside */ +void +error(int code, int depth) { - if (err == END_ERR) - return err; - - prname(); - -#ifndef TESTING - fprintf(stderr, "Error in %s line %d: %s.\n", filename, line, errs[err]); -#else /* TESTING */ - fprintf(stderr, "Error in %s line %d: %s. ", filename, line, errs[err]); - fprintf(stderr, "ifdef depth: %d\n", depth); -#endif /* TESTING */ - - exitstat = 2; - return depth > 1 ? IEOF_ERR : END_ERR; + if (incomment || inquote) + errx(2, "error in %s line %d: %s (#if depth %d)", + filename, stqcline, errs[code], depth); + else + errx(2, "error in %s line %d: %s" + " (#if depth %d start line %d)", + filename, linenum, errs[code], depth, stifline); } |