summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMatthieu Herrb <matthieu@cvs.openbsd.org>2014-07-12 16:23:46 +0000
committerMatthieu Herrb <matthieu@cvs.openbsd.org>2014-07-12 16:23:46 +0000
commit30586300cf458635b5a017a2f2e7c7345538d628 (patch)
tree9452c6d042c7b9aa7ffe674908c212648b58cf81
parent7ae399a8c52c29893555a95704091044d8ee9b50 (diff)
Import a copy of ucpp, lightweight cpp that doesn't depend
on the 'comp' set. Moving from xenocara with a new name. ok deraadt@.
-rw-r--r--libexec/auxcpp/CHANGELOG21
-rw-r--r--libexec/auxcpp/Makefile115
-rw-r--r--libexec/auxcpp/README877
-rw-r--r--libexec/auxcpp/arith.c1462
-rw-r--r--libexec/auxcpp/arith.h255
-rw-r--r--libexec/auxcpp/assert.c420
-rw-r--r--libexec/auxcpp/atest.c236
-rw-r--r--libexec/auxcpp/config.h352
-rw-r--r--libexec/auxcpp/cpp.c2565
-rw-r--r--libexec/auxcpp/cpp.h317
-rw-r--r--libexec/auxcpp/eval.c699
-rw-r--r--libexec/auxcpp/hash.c329
-rw-r--r--libexec/auxcpp/hash.h58
-rw-r--r--libexec/auxcpp/lexer.c1020
-rw-r--r--libexec/auxcpp/macro.c1921
-rw-r--r--libexec/auxcpp/mem.c328
-rw-r--r--libexec/auxcpp/mem.h155
-rw-r--r--libexec/auxcpp/nhash.c481
-rw-r--r--libexec/auxcpp/nhash.h132
-rw-r--r--libexec/auxcpp/sample.c114
-rw-r--r--libexec/auxcpp/tune.h422
-rw-r--r--libexec/auxcpp/ucpp.1212
-rw-r--r--libexec/auxcpp/ucppi.h196
23 files changed, 12687 insertions, 0 deletions
diff --git a/libexec/auxcpp/CHANGELOG b/libexec/auxcpp/CHANGELOG
new file mode 100644
index 00000000000..071b4682db4
--- /dev/null
+++ b/libexec/auxcpp/CHANGELOG
@@ -0,0 +1,21 @@
+ucpp-1.3.2
+* Fixed Issue 8, Included files missing a "terminating carriage
+ return character" will interrupt preprocessing in sample.c/LEXER
+ mode.
+ (http://code.google.com/p/ucpp/issues/detail?id=8)
+
+ucpp-1.3.1
+* Fixed Issue 5, "\r\n" carriage return characters are double
+ counted.
+ (http://code.google.com/p/ucpp/issues/detail?id=5)
+* Fixed Issue 6, Included files missing a "terminating carriage
+ return character" will interrupt preprocessing in ucpp
+ (STAND_ALONE mode).
+ (http://code.google.com/p/ucpp/issues/detail?id=6)
+* Fixed Issue 7, STD_MACROS & STD_ASSERTS undefined when trying to
+ build ucpp -DSTAND_ALONE.
+ (http://code.google.com/p/ucpp/issues/detail?id=7)
+* Build ucpp & libucpp with 'make'.
+
+ucpp-1.3
+* Original import into svn at code.google.com/p/ucpp
diff --git a/libexec/auxcpp/Makefile b/libexec/auxcpp/Makefile
new file mode 100644
index 00000000000..c37f0b438c7
--- /dev/null
+++ b/libexec/auxcpp/Makefile
@@ -0,0 +1,115 @@
+# Makefile for ucpp
+#
+# (c) Thomas Pornin 1999 - 2002
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+# 1. Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# 2. Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+# 4. The name of the authors may not be used to endorse or promote
+# products derived from this software without specific prior written
+# permission.
+#
+# THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
+# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
+# OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+# BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+# WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+# OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+# EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+.POSIX:
+
+# ----- user configurable part -----
+
+# Edit the variables to suit your system.
+#
+# use -DAUDIT to enable some internal sanity checks
+# use -DMEM_CHECK to check the return value of malloc()
+# (superseded by AUDIT)
+# use -DMEM_DEBUG to enable memory leak research (warning: this
+# slows down ucpp a bit, and greatly increases memory consumption)
+# use -DINLINE=foobar to enable use of the 'foobar'
+# non standard qualifier, as an equivalent to the C99 'inline'
+# qualifier. See tune.h for details.
+#
+# Two FLAGS lines are given for each system type; chose the first one for
+# debug, the second one for a fast binary.
+
+# for a generic compiler called cc
+#CC = cc
+#FLAGS = -DAUDIT
+#FLAGS = -O -DMEM_CHECK
+
+# for Minix-86
+#CC = cc
+#LDFLAGS = -i
+#FLAGS = -m -DAUDIT
+#FLAGS = -O -m -DMEM_CHECK
+
+# for gcc
+CC = gcc
+FLAGS = -O3 -W -Wall -ansi
+#FLAGS = -g -W -Wall -ansi -DAUDIT -DMEM_DEBUG
+#FLAGS = -O3 -mcpu=pentiumpro -fomit-frame-pointer -W -Wall -ansi -DMEM_CHECK
+#FLAGS = -O -pg -W -Wall -ansi -DMEM_CHECK
+#LDFLAGS = -pg
+
+# for the Compaq C compiler on Alpha/Linux
+#CC = ccc
+#FLAGS = -w0 -g -DAUDIT
+#FLAGS = -w0 -fast -DMEM_CHECK
+
+# for the Sun Workshop C Compiler
+#CC = cc
+#FLAGS = -g -Xa -DAUDIT
+#FLAGS = -Xa -fast -DMEM_CHECK
+
+# flags for the link step
+LIBS =
+#LIBS = libefence.a
+#LIBS = -lgc_dbg
+
+STAND_ALONE = -DSTAND_ALONE
+
+ifdef STAND_ALONE
+ CSRC = mem.c nhash.c cpp.c lexer.c assert.c macro.c eval.c
+ FINAL_STEP = $(CC) $(LDFLAGS) -DUCPP_CONFIG $(STAND_ALONE) -o ucpp $(CSRC) $(LIBS)
+endif
+
+# ----- nothing should be changed below this line -----
+
+COBJ = mem.o nhash.o cpp.o lexer.o assert.o macro.o eval.o
+CFLAGS = $(FLAGS)
+
+all: ucpp
+ @ar cq libucpp.a *.o
+
+clean:
+ @rm -f *.o ucpp core *.a
+
+ucpp: $(COBJ)
+ @$(FINAL_STEP)
+
+assert.o: tune.h ucppi.h cpp.h nhash.h mem.h
+ @$(CC) $(CFLAGS) -c assert.c
+cpp.o: tune.h ucppi.h cpp.h nhash.h mem.h
+ @$(CC) $(CFLAGS) -c cpp.c
+eval.o: tune.h ucppi.h cpp.h nhash.h mem.h arith.c arith.h
+ @$(CC) $(CFLAGS) -c eval.c
+lexer.o: tune.h ucppi.h cpp.h nhash.h mem.h
+ @$(CC) $(CFLAGS) -c lexer.c
+macro.o: tune.h ucppi.h cpp.h nhash.h mem.h
+ @$(CC) $(CFLAGS) -c macro.c
+mem.o: mem.h
+ @$(CC) $(CFLAGS) -c mem.c
+nhash.o: nhash.h mem.h
+ @$(CC) $(CFLAGS) -c nhash.c
diff --git a/libexec/auxcpp/README b/libexec/auxcpp/README
new file mode 100644
index 00000000000..f47ba9f3e71
--- /dev/null
+++ b/libexec/auxcpp/README
@@ -0,0 +1,877 @@
+ucpp-1.3 is a C preprocessor compliant to ISO-C99.
+
+Author: Thomas Pornin <pornin@bolet.org>
+Main site: http://pornin.nerim.net/ucpp/
+
+
+
+INTRODUCTION
+------------
+
+A C preprocessor is a part of a C compiler responsible for macro
+replacement, conditional compilation and inclusion of header files.
+It is often found as a stand-alone program on Unix systems.
+
+ucpp is such a preprocessor; it is designed to be quick and light,
+but anyway fully compliant to the ISO standard 9899:1999, also known
+as C99. ucpp can be compiled as a stand-alone program, or linked to
+some other code; in the latter case, ucpp will output tokens, one
+at a time, on demand, as an integrated lexer.
+
+ucpp operates in two modes:
+-- lexer mode: ucpp is linked to some other code and outputs a stream of
+tokens (each call to the lex() function will yield one token)
+-- non-lexer mode: ucpp preprocesses text and outputs the resulting text
+to a file descriptor; if linked to some other code, the cpp() function
+must be called repeatedly, otherwise ucpp is a stand-alone binary.
+
+
+
+INSTALLATION
+------------
+
+1. Uncompress the archive file and extract the source files.
+
+2. Edit tune.h. Here is a short explanation of compile-time options:
+
+ LOW_MEM
+ Enable memory-saving functions; this is for low-end and old systems,
+ but seems to be good for larger systems too. Keep it.
+ NO_LIBC_BUF
+ NO_UCPP_BUF
+ Two options used to disable the two bufferings inside ucpp. Define
+ both options for maximum memory savings but you will probably want
+ to keep libc buffering for decent performance. Define none on large
+ systems (modern 32 or 64-bit systems).
+ UCPP_MMAP
+ With this option, if ucpp internal buffering is active, ucpp will
+ try to mmap() the input files. This might yield a slight performance
+ improvement, but will work only on a limited set of architectures.
+ PRAGMA_TOKENIZE
+ Make ucpp generate tokenized PRAGMA tokens on #pragma and _Pragma();
+ tokenization is made this way: tokens are assembled as a null
+ terminated array of unsigned chars; if a token has a string value
+ (as defined by the STRING_TOKEN macro), the value follows the token,
+ terminated by PRAGMA_TOKEN_END (by default, a newline character cast
+ to unsigned char). Whitespace tokens are skipped. The "name" value
+ of the PRAGMA token is a pointer to that array. This setting is
+ irrelevant in non-lexer mode.
+ PRAGMA_DUMP
+ In non-lexer mode, keep #pragma in output; non-void _Pragma() are
+ translated to the equivalent #pragma. Irrelevant in lexer mode.
+ NO_PRAGMA_IN_DIRECTIVE
+ Do not evaluate _Pragma() inside #if, #include, #include_next and #line
+ directives; instead, emit an error (since the remaining _Pragma will
+ surely imply a syntax error).
+ DSHARP_TOKEN_MERGE
+ When two tokens are to be merged with the `##' operator, but fail
+ because they do not merge into a single valid token, ucpp keeps those
+ two tokens separate by adding an extra space between them in text
+ output. With this option on, that extra space is not added, which means
+ that some tokens may merge partially if the text output is preprocessed
+ again. See tune.h for details.
+ INMACRO_FLAG
+ In lexer mode, set the inmacro flag to 1 if the current token comes
+ from a macro replacement, 0 otherwise. macro_count maintains an
+ increasing counter of such replacements. CONTEXT tokens count as
+ one macro replacement each. #pragma, and _Pragma() that do not come
+ from a macro replacement, also count as one macro replacement each.
+ This setting is irrelevant in non-lexer mode.
+ STD_INCLUDE_PATH
+ Default include path in stand-alone ucpp.
+ STD_MACROS
+ Default predefined macros in stand-alone ucpp.
+ STD_ASSERT
+ Default assertions in stand-alone ucpp.
+ NATIVE_SIGNED
+ NATIVE_UNSIGNED
+ NATIVE_UNSIGNED_BITS
+ NATIVE_SIGNED_MIN
+ NATIVE_SIGNED_MAX
+ SIMUL_ARITH_SUBTYPE
+ SIMUL_SUBTYPE_BITS
+ SIMUL_NUMBITS
+ WCHAR_SIGNEDNESS
+ Those options define how #if expressions are evaluated; see the
+ cross-compilation section of this file for more info, and the
+ comments in tune.h. Extra info is found in arith.h and arith.c,
+ at the possible expense of your mental health.
+ DEFAULT_LEXER_FLAGS
+ DEFAULT_CPP_FLAGS
+ Default flags in respectively lexer and non-lexer modes.
+ POSIX_JMP
+ Define this if your architecture defines sigsetjmp() and
+ siglongjmp(); it is known to (very slightly) improve performance
+ on AIX systems.
+ MAX_CHAR_VAL
+ ucpp will consider characters whose value is equal or above
+ MAX_CHAR_VAL as outside the C source charset (so they will be
+ treated just like '@', for instance). For ASCII systems, 128
+ is fine. 256 is a safer value, but uses more (static) memory.
+ For performance reasons, use a power of two. If MAX_CHAR_VAL is
+ correctly adjusted, ucpp should be compatible with any character
+ set.
+ UNBREAKABLE_SPACE
+ If you want an extra-whitespace character, define this macro to that
+ character. For instance, define this to 160 on an ISO-8859-1 system
+ if you want the 'unbreakable space' to be considered as whitespace.
+ SEMPER_FIDELIS
+ With this option set, ucpp, when used as a lexer, will pass
+ whitespace tokens to its caller, and those tokens will have their
+ true content; this is intended for reconstruction of the source
+ line. Beware that some comments may have embedded newlines.
+ COPY_LINE_LENGTH
+ ucpp can maintain a copy of the current source line, up to that
+ length. Irrelevant to stand-alone version.
+ *_MEMG
+ Those settings modify ucpp behaviour, wrt memory allocations. With
+ higher values, ucpp will perform less malloc() calls and will run
+ faster, but it will use more memory. Reduce INPUT_BUF_MEMG and
+ OUTPUT_BUF_MEMG on low-memory systems, if you kept ucpp buffering
+ (see NO_UCPP_BUF option).
+
+3. Edit the Makefile. You should define the variables CC and FLAGS;
+ there are the following options:
+
+ -DAUDIT
+ Enable internal sanity checks; this slows down a bit ucpp. Do not
+ define unless you plan to debug ucpp.
+ -DMEM_CHECK
+ With this setting, ucpp will check for the return value of malloc()
+ and exit with a diagnostic when out of memory. MEM_CHECK is implied
+ by AUDIT.
+ -DMEM_DEBUG
+ Enable memory debug code. This will track memory leaks and several
+ occurrences of memory management errors; it will also slow down
+ things and increase memory consumption, so you probably do not
+ want to use this option.
+ -DINLINE=foobar
+ The ucpp code uses "inline" qualifier for some functions; by
+ default, that qualifier is macro-replaced with nothing. Define
+ INLINE to the correct replacement for your compiler, if supported.
+ Note that all "inline" functions in ucpp are also "static". For any
+ C99-compliant compiler, the GNU compiler (gcc), and the Compaq C
+ compiler under Linux/Alpha, no -DINLINE is needed (see tune.h for
+ details).
+
+4. Compile by typing "make". This should produce the ucpp executable
+ file. You might see some warning messages, especially with gcc:
+ gcc believes some variables might be used prior to their
+ initialization; ignore those messages.
+
+5. Install wherever you want the binary and the man page ucpp.1. I
+ have not provided an install sequence because I didn't bother.
+
+6. If you do not have the make utility, compile each file separately
+ and link them together. The exact details depend on your compiler.
+ You must define the macro STAND_ALONE when compiling cpp.c (there
+ is such a definition, commented out, in cpp.c, line 34).
+
+There is no "configure" script because:
+-- I do not like the very idea of a "configure" script.
+-- ucpp is written in ANSI-C and should be fairly portable.
+-- There is no such thing as "standard" settings for a C preprocessor.
+ The predefined system macros, standard assertions,... must be tuned
+ by the sysadmin.
+-- The primary goal of ucpp is to be included in compilers. The
+ stand-alone version is mainly a debugging tool.
+
+Please note that you need an ISO-C90 (formerly ANSI) C compiler suite
+(including the standard library) to compile ucpp. If your compiler is
+not C99 (or later), read the cross-compilation section in this README
+file.
+
+The C90 and C99 standards state that external linkage names might be
+considered equal or different based upon only their first 6 characters;
+this rule might make ucpp not compile on a conformant C implementation.
+I have yet to see such an implementation, however.
+
+If you want to use ucpp as an integrated preprocessor and lexer, see the
+section REUSE. Compiling ucpp as a library is an exercise left to the
+reader.
+
+With the LOW_MEM code enabled, ucpp can run on a Minix-i86 or Msdos
+16-bit small-memory-model machine. It will not be fully compliant
+on such an architecture to C99, since C99 states that at least one
+source code with 4095 simultaneously defined macros must be processed;
+ucpp will be limited to about 1500 macros (at most) due to memory
+restrictions. At least ucpp can preprocess its own code in these
+conditions. LOW_MEM is on by default because it seems to improve
+performance on large systems.
+
+
+
+LICENSE
+-------
+
+The copyright notice and license is at the beginning of the Makefile and
+each source file. It is basically a BSD license, without the advertising
+subclause (which BSD dropped recently anyway) and with no reference to
+Berkeley (since the code is all mine, written from scratch). Informally,
+this means that you can reuse and redistribute the code as you want,
+provided that you state in the documentation (or any substantial part of
+the software) of redistributed code that I am the original author. (If
+you press a cdrom with 200 software packages, I do not insist on having
+my name on the cover of the cdrom -- just keep a Readme file somewhere
+on the cdrom, with the copyright notice included.)
+
+As a courteous gesture, if you reuse my code, please drop me a mail.
+It raises my self-esteem.
+
+
+
+REUSE
+-----
+
+The code has been thought as part of a bigger project; it might be
+used as an integrated lexer, that will read files, process them as a
+C preprocessor, and output a stream of C tokens. To include this code
+into a project, compile with STAND_ALONE undefined.
+
+To use the preprocessor and lexer, several steps should be performed.
+See the file 'sample.c' for an example.
+
+1. call init_cpp(). This function initializes the lexer automaton.
+
+2. set the following global variables:
+ no_special_macros
+ non-zero if the special macros (__FILE__ and others)
+ should not be defined. This is a global flag since
+ it affects the redefinition of such macros (which are
+ allowed if the special macros are not defined)
+ c99_compliant
+ if non-zero, define __STDC_VERSION__ to 199901L; this
+ is the default; otherwise, do not define __STDC_VERSION__.
+ Note that ucpp will accept to undefine __STDC_VERSION__
+ with a #undef directive.
+ c99_hosted
+ if strictly positive, define __STDC_HOSTED__ to 1.
+ If zero, define __STDC_HOSTED__ to 0. If negative,
+ do not define __STDC_HOSTED__. The default is 1.
+ emit_defines and emit_assertions should be set to 0 for
+ the step 3.
+
+3. call init_tables(). This function initializes the macro table
+ and other things; it will intialize assertions if it has a non-zero
+ argument.
+
+4. call init_include_path(). This function will reset the include
+ path to the list of paths given as argument.
+
+5. set the following global variables
+ emit_dependencies
+ set to 1 if dependencies should be emitted during
+ preprocessing
+ set to 2 if dependencies should also be emitted for
+ system include files
+ emit_defines
+ set to non-zero if #define macro definitions should be
+ emitted when macros are defined
+ emit_assertions
+ set to non-zero if #define macro definitions should be
+ emitted when macros are defined
+ emit_output
+ the FILE * where the above items are sent if one of the
+ three emit_ variables is set to non zero
+ transient_characters
+ this is for some cross-compilation; see the relevant
+ part in this README file for details
+
+6. call set_init_filename() with the initial filename as argument;
+ the second argument indicates whether the filename is real or
+ conventional ("real" means "an fopen() on it will work").
+
+7. initialize your struct lexer_state:
+ call init_lexer_state()
+ call init_lexer_mode() if the preprocessor is supposed to
+ output a list of tokens, otherwise set the flags field
+ to DEFAULT_CPP_FLAGS and set the output field to the
+ FILE * where output should be sent
+ (init_lexer_mode(), if called at all, must be called after
+ init_lexer_state())
+ adjust the flags field; here is the meaning of flags:
+
+WARN_STANDARD
+ emit the standard warnings
+WARN_ANNOYING
+ emit the useless and annoying warnings
+WARN_TRIGRAPHS
+ count trigraphs encountered; it is up to the caller to emit
+ a warning if some trigraphs were indeed encountered; the count
+ is stored in the count_trigraphs field of the struct lexer_state
+WARN_TRIGRAPHS_MORE
+ emit a warning for each trigraph encountered
+WARN_PRAGMA
+ emit a warning for each non-void _Pragma encountered in non-lexer
+ mode (because these are dumped as #pragma in the output) and for each
+ #pragma too, if ucpp was compiled without PRAGMA_DUMP
+FAIL_SHARP
+ emit errors on '#' tokens beginning a line and not followed
+ by a valid cpp directive
+CCHARSET
+ emit errors when non-C characters are encountered; if this flag
+ is not set, each non-C character will be considered as a BUNCH
+ token (since C99 states that non-C characters are allowed as
+ long as they "disappear" during preprocessing [through macro
+ replacement and stringification for instance], this flag must
+ not be set, for maximum C99 compliance)
+DISCARD_COMMENTS
+ do not keep comments in output (irrelevant in lexer mode)
+CPLUSPLUS_COMMENTS
+ understand new style comments (//) (mandatory for C99)
+LINE_NUM
+ emit #line directives when entering a file, if not in lexer mode;
+ emit CONTEXT token in lexer mode for #line and new files
+GCC_LINE_NUM
+ if LINE_NUM is set, emit gcc-like directives instead of #line
+HANDLE_ASSERTIONS
+ understand assertions in #if expressions (and #assert, #unassert)
+HANDLE_PRAGMA
+ make PRAGMA tokens for #pragma; irrelevant in non-lexer mode
+ (handling of some pragmas is required in C99 but is not of
+ the competence of the preprocessor; without this flag, ucpp will
+ ignore the contents of #pragma and _Pragma directives)
+MACRO_VAARG
+ understand macros with a variable number of arguments (mandatory
+ for C99)
+UTF8_SOURCE
+ understand UTF-8 encoding: multibyte characters are considered
+ equivalent to letters as far as syntax is concerned (they can
+ be used in identifiers)
+LEXER
+ act as a lexer, outputting tokens
+TEXT_OUTPUT
+ this flag should be set to 0 if ucpp works as a lexer, 1 otherwise.
+ It is somehow redundant with the LEXER flag, but the presence of
+ those two different flags is needed in ucpp.
+KEEP_OUTPUT
+ in non-lexer mode, emit the result of preprocessing
+COPY_LINE
+ maintain a copy of the last read line in the copy_line field of
+ the struct lexer_state ; see below for how to use this buffer
+HANDLE_TRIGRAPHS
+ understand trigraphs, such as ??/ for \. This option should be
+ set by default, except for some legacy code.
+
+ There are other flags, but they are for private usage of ucpp.
+
+8. adjust the input field in the lexer_state to the FILE * from where
+ source file is read. If you use the UCPP_MMAP compile-time option,
+ and your input file is eligible to mmap(), then you can call
+ fopen_mmap_file() to open it, then set_input_file() to set ls->input
+ and some other internal options. Do not call set_input_file() unless
+ you just called fopen_mmap_file() just before on the same file.
+
+9. call add_incpath() to add an include path, define_macro() and
+ undef_macro() to add or remove macros, make_assertion() and
+ destroy_assertion() to add or remove assertions.
+
+10. call enter_file() (this is needed only in non-lexer mode, or if
+ LINE_NUM is set).
+
+
+Afterwards:
+
+-- if you are in lexer mode, call lex(); each call will make the ctok
+ field point to the next token. A non-zero return value is an error.
+ lex() skips whitespace tokens. The memory used by the string value
+ of some tokens (identifiers, numbers...) is automatically freed,
+ so copy the contents of each such token if you want to keep it
+ (tokens with a string content are identified by the STRING_TOKEN
+ macro applied to their type).
+ When lex() returned a non-zero value: if it is CPPERR_EOF, then
+ end-of-input was reached. Otherwise, it is a genuine error and
+ ls->ctok is an undefined token; skip it and call lex() again to
+ ignore the error.
+
+-- otherwise, call cpp(); each call will analyze one or more tokens
+ (one token if it did find neither a cpp directive nor a macro name).
+ A positive return value is an error.
+
+For both functions, if the return value is CPPERR_EOF (which is a
+strictly positive value), then it means that the end of file was
+reached. Call check_cpp_errors() after end of file for pending errors
+(unfinished #if constructions for instance). In non-lexer mode,
+call flush_output().
+
+In the struct lexer_state, the following fields might be read:
+ line the current input line number
+ oline the current output line number (in non-lexer mode)
+ flags the flags described above
+ count_trigraphs the number of trigraphs encountered
+ inmacro the current token comes from a macro
+ macro_count the current macro counter
+"flags" is an unsigned long and might be modified; the three others
+are of long type.
+
+
+To perform another preprocessing: use free_lexer_state() to release
+memory used by the buffers referenced in lexer_state, and go back to
+step 2. The different tables (macros, assertions...) should be reset to
+their respective initial contents.
+
+There is also the wipeout() function: when called, it should release
+(almost) all memory blocks allocated dynamically. After a wipeout(),
+ucpp should be back to its state at step 2 (init_cpp() initializes only
+static tables, that are never freed nor modified afterwards).
+
+
+The COPY_LINE buffer: the struct lexer_state contains two interesting
+fields, copy_line[] and cli. If the COPY_LINE flag is on, each read
+line is stored in this buffer, up to (at most) COPY_LINE_LENGTH - 1
+characters (COPY_LINE_LENGTH is defined in tune.h). The last character
+of the buffer is always a zero, and if the line was read entirely, it is
+zero terminated; the trailing newline is not included.
+
+The purpose of this buffer is error-reporting. When an error occurs
+(cpp() returns a strictly positive value, or lex() returns a non-zero
+value), if your struct lexer_state is called ls, use this code:
+
+ if (ls.cli != 0) ls.copy_line[ls.cli] = 0;
+
+This will add a trailing 0 if the line was not read entirely.
+
+
+ucpp may be configured at runtime to accept alternate characters as
+possible parts of identifiers. Typical intended usage is for the '$'
+and '@' characters. The two relevant functions are set_identifier_char()
+and unset_identifier_char(). When this call is issued:
+ set_identifier_char('$');
+then for all the remaining input, the '$' character will be considered
+as just another letter, as far as identifier tokenizing is concerned. This
+is for identifiers only; numeric constants are not modified by that setting.
+This call resets things back:
+ unset_identifier_char('$');
+Those two functions modify the static table which is initialized by
+init_cpp(). You may call init_cpp() at any time to restore the table
+to its standard state.
+
+When using this feature, take care of the following points:
+
+-- Do NOT use a character whose numeric value (as an `unsigned char'
+cast into an `int') is greater than or equal to MAX_CHAR_VAL (in tune.h).
+This would lead to unpredictable results, including an abrupt crash of
+ucpp. ucpp makes absolutely no check whatsoever on that matter: this is
+the programmer's responsibility.
+
+-- If you use a standard character such as '+' or '{', tokens which
+begin with those characters cease to exist. This can be troublesome.
+If you use set_identifier_char() on the '<' character, the handling of
+#include directives will be greatly disturbed. Therefore the use of any
+standard C character in set_identifier_char() of unset_identifier_char()
+is declared unsupported, forbidden and altogether unwise.
+
+-- Stricto sensu, when an extra character is declared as part of an
+identifier, ucpp behaviour cease to conform to C99, which mandates that
+characters such as '$' or '@' must be treated as independant tokens of
+their own. Therefore, if your purpose is to use ucpp in a conformant
+C implementation, the use of set_identifier_char() should be made at
+least a runtime option.
+
+-- When enabling a new character in the middle of a macro replacement,
+the effect of that replacement may be delayed up to the end of that
+macro (but this is a "may" !). If you wish to trigger this feature with
+a custom #pragma or _Pragma(), you should remember it (for instance,
+usine _Pragma() in a macro replacement, and then the extra character
+in the same macro replacement, is not reliable).
+
+
+
+COMPATIBILITY NOTES
+-------------------
+
+The C language has a lengthening history. Nowadays, C comes in three
+flavours:
+
+-- Traditional C, aka "K&R". This is the language first described by
+Brian Kernighan and Dennis Ritchie, and implemented in the first C
+compiler that was ever coded. There are actually several dialects of
+K&R, and all of them are considered deprecated.
+
+-- ISO 9899:1990, aka C90, aka C89, aka ANSI-C. Formalized by ANSI
+in 1989 and adopted by ISO the next year, it is the C flavour many C
+compilers understand. It is mostly backward compatible with K&R C, but
+with enhancements, clarifications and several new features.
+
+-- ISO 9899:1999, aka C99. This is an evolution on C90, almost fully
+backward compatible with C90. C99 introduces many new and useful
+features, however, including in the preprocessor.
+
+There was also a normative addendum in 1995, that added a few features
+to C90 (for instance, digraphs) that are also present in C99. It is
+sometimes refered to as "C95" or "AMD 1".
+
+
+ucpp implements the C99 standard, but can be used in a stricter mode,
+to enforce C90 compatibility (it will, however, still recognize some
+constructions that are not in plain C90).
+
+ucpp also knows about several extensions to C99:
+
+-- Assertions: this is an extension to the defined() operator, with
+ its own namespace. Assertions seem to be used in several places,
+ therefore ucpp knows about them. It is recommended to enable
+ assertions by default on Solaris systems.
+-- Unicode: the C99 norm specifies that extended characters, from
+ the ISO-10646 charset (aka "unicode") can be used in identifiers
+ with the notations \u and \U. ucpp also accepts (with the proper
+ flag) the UTF-8 encoding in the source file for such characters.
+-- #include_next directive: it works as a #include, but will look
+ for files only in the directories specified in the include path
+ after the one the current file was found. This is a GNU-ism that
+ is useful for writing transparent wrappers around header files.
+
+Assertions and unicode are activated by specific flags; the #include_next
+support is always active.
+
+The ucpp code itself should be compatible with any ISO-C90 compiler.
+The cpp.c file is rather big (~ 64kB), it might confuse old 16-bit C
+compilers; the macro.c file is somewhat large also (~ 47kB).
+
+The evaluation of #if expressions is subject to some subtleties, see the
+section "cross-compilation".
+
+The lexer code makes no assumption about the source character set, but
+the following: source characters (those which have a syntactic value in
+C; comment and string literal contents are not concerned) must have a
+strictly positive value that is strictly lower than MAX_CHAR_VAL. The
+strict positivity is already assured by the C standard, so you just need
+to adjust MAX_CHAR_VAL.
+
+ucpp has been tested succesfully on ASCII/ISO-8859-1 and EBCDIC systems.
+Beware that UTF-8 is NOT compatible with EBCDIC.
+
+Pragma handling: when used in non-lexer mode, ucpp tries to output a
+source text that, when read again, will yield the exact same stream of
+tokens. This is not completely true with regards to line numbering in
+some tricky macro replacements, but it should work correctly otherwise,
+especially with pragma directives if the compile-time option PRAGMA_DUMP
+was set: #pragma are dumped, non-void _Pragma() are converted to the
+corresponding #pragma and dumped also.
+
+ucpp does not macro-replace the contents of #pragma and _Pragma();
+If you want a macro-replaced pragma, use this:
+
+#define pragma_(x) _Pragma(#x)
+#define pragma(x) pragma_(x)
+
+Anyway, pragmas do not nest (an _Pragma() cannot be evaluated if it is
+inside a #pragma or another _Pragma).
+
+
+I wrote ucpp according to what is found in "The C Programming Language"
+from Brian Kernighan and Dennis Ritchie (2nd edition) and the C99
+standard; but I could have misinterpreted some points. On some tricky
+points I got help from the helpful people from the comp.std.c newsgroup.
+For assertions and #include_next, I mimicked the behaviour of GNU cpp,
+as is stated in the GNU cpp info documentation. An open question is
+related to the following code:
+
+#define undefined !
+#define makeun(x) un ## x
+#if makeun(defined foo)
+qux
+#else
+bar
+#endif
+
+ucpp will replace 'defined foo' with 0 first (since foo is not defined),
+then it will replace the macro makeun, and the expression will become
+'un0', which is replaced by 0 since this is a remaining identifier. The
+expression evaluates to false, and 'bar' is emitted.
+However, some other preprocessors will replace makeun first, considering
+that it is not part of a 'defined' operator application; this will
+produce the macro 'undefined', which is replaced, and the expression
+becomes '!foo'. 'foo' is replaced by 0, the expression evaluates to
+true, and 'qux' is emitted.
+
+My opinion is that the behaviour is undefined, because use of the
+'defined' operator does not match an allowed form prior to macro
+replacement (I mean, its syntax matches, but its use is reconverted
+to inexistant and therefore is not anymore matching). Other people
+think that the behaviour is well-specified, and contrary to what ucpp
+does. The only thing clear to me is that the wording of the standard
+(paragraph 6.10.1.3) is unclear.
+
+Since the ucpp behaviour makes ucpp code simpler and cleaner, and
+that it is unlikely that any real-life code would ever be disturbed
+by that interpretation of the standard, ucpp will keep its current
+behaviour until convincing evidence of my misinterpretation of the
+standard is given to me. The problem can only occur if one uses ## to
+make a 'defined' operator disappear from a #if expression (everybody
+agrees that the generation of a 'defined' operator triggers undefined
+behaviour).
+
+
+Another point about macro replacement has been discussed at length in
+several occasions. It is about the following code:
+
+#define CAT(a, b) CAT_(a, b)
+#define CAT_(a, b) a ## b
+#define AB(x, y) CAT(x, y)
+CAT(A, B)(X, Y)
+
+ucpp will produce `CAT(X,Y)' as replacement for the last line, whereas
+some other preprocessors output `XY'. The answer to the question
+"which behaviour is correct" seems to be "this is not defined by the
+C standard". It is the answer that has been actually given by the C
+standardization committee in 1992, to the defect report #017, question
+23, which asked that very same question. Since the wording of the
+standard has not changed in these parts from the 1990 to the 1999
+version, the preprocessor behaviour on the above-stated code should
+still be considered as undefined.
+
+It seems, however, that there used to be a time (around 1988) when the
+committee members agreed upon a precise macro-replacement algorithm,
+which specified quite clearly the preprocessor behaviour in such
+situation. ucpp behaviour is occasionnaly claimed as "incorrect" with
+regards to that algorithm. Since that macro replacement algorithm has
+never been published, and the committee itself backed out from it in
+1992, I decided to disregard those feeble claims.
+
+It is possible, however, that at some point in the future I rewrite the
+ucpp macro replacement code, since that code is a bit messy and might be
+made to use less memory in some occasions. It is then possible that, in
+the aftermath of such a rewrite, the ucpp behaviour for the above stated
+code become tunable. Don't hold your breath, though.
+
+
+About _Pragma: the standard is not clear about when this operator is
+evaluated, and if it is allowed inside #if directives and such. For
+ucpp, I coded _Pragma as a special macro with lazy replacement: it will
+be evaluated wherever a macro could be replaced, and only at the end of
+the macro replacement (for practical purposes, _Pragma can be considered
+as a macro taking one argument, and being replaced by nothing, except
+for some tricky uses of the # and ## operators). This means that, by
+default, ucpp will evaluate _Pragma inside some directives (mainly, #if,
+#include, #include_next and #line), but it can be taught not to do so by
+defining NO_PRAGMA_IN_DIRECTIVE in tune.h.
+
+
+
+CROSS-COMPILATION
+-----------------
+
+If compiled with a C99 development suite, ucpp should be fully
+C99-compliant on the host platform (up to my own understanding of the
+standard -- remember that this software is distributed as-is, without
+any guarantee). However, if a pre-C99 compiler is used, or if the
+target machine is not the host machine (for instance when you build a
+cross-compiler), the evaluation of #if expressions is subject to some
+cross-compiling issues:
+
+
+-- character constants: when evaluating expressions, character constants
+are interpreted in the source character set context; this is allowed
+by the standard but this can lead to problems with code that expects
+this interpretation to match the one made in the C code. To ease
+cross-compilation, you can define a conversion array, and make the
+global variable transient_characters point to it. The array should
+contain 256 int; transient_characters[x] is the value of the character
+whose value is x in the source character set.
+
+This facility is provided for inclusion of ucpp inside another code;
+if you want a stand-alone ucpp with that conversion, hard-code the
+conversion table into eval.c and make transient_characters[] statically
+point to it. Alternatively, you could provide an option syntax to
+provide such a table on command-line, if you feel like it.
+
+
+-- wide character constants signedness: by default, ucpp makes wide
+characters as signed as what plain chars are on the build host. To
+force wide character constant signedness, define WCHAR_SIGNEDNESS to 0
+(for unsigned) or 1 (for signed). Beware, however, that "native" wide
+character constants, even signed, are considered positive. Non-wide
+character constants are, according to the C99 standard, of type int, and
+therefore always signed.
+
+
+-- evaluation type: C90 states that all constants in #if expressions
+are considered as either long or unsigned long, and that the evaluation
+is performed with operands of that size. In C99, the situation is
+equivalent, except that the types used are intmax_t and uintmax_t, as
+defined in <stdint.h>.
+
+ucpp can use two expression evaluators: one uses native integer types
+(one signed and one unsigned), the other evaluator emulates big integer
+numbers by representing them with two values of some unsigned type. The
+emulated type handles signed values in two's complement representation,
+and can be any width ranging from 2 bits to twice the size of the
+underlying native unsigned type used. An odd width is allowed. When
+right shifting an emulated signed negative value, it is left-padded with
+bits set to 1 (this is sign extension).
+
+When the ARITHMETIC_CHECKS macro is defined in tune.h, all occurrences
+of implementation-defined or undefined behaviour during arithmetic
+evaluation are reported as errors or warned upon. This includes all
+overflows and underflows on signed quantities, constants too large,
+and so on. Errors (which terminate immediately evaluation) are emitted
+for division by 0 (on / and % operators) and overflow (on / operator);
+otherwise, warnings are emitted and the faulty evaluation takes place.
+This prevents ucpp from crashing on typical x86 machines, while still
+allowing to use some extensions.
+
+
+
+FUTURE EVOLUTIONS
+-----------------
+
+ucpp is quite complete now. There was a longstanding project of
+"traditional" preprocessing, but I dropped it because it would not
+map cleanly on the token-based ucpp structure. Maybe I will code a
+string-based preprocessor one day; it would certainly use some of the
+code from lexer.c, eval.c, mem.c and nhash.c. However, making such a
+tool is almost irrelevant nowadays. If one wants to handle such project,
+using ucpp as code base, I would happily provide some help, if needed.
+
+
+
+CHANGES
+-------
+
+From 1.2 to 1.3:
+
+* brand new integer evaluation code, with precise evaluation and checks
+* new hash table implementation, with binary trees
+* relaxed attitude on failed `##' operators
+* bugfix on macro definition on command-line wrt nesting macros
+* support for up to 32766 macro arguments in LOW_MEM code
+* support for optional additional "identifier" characters such as '$' or '@'
+* bugfix: memory leak on void #assert
+
+From 1.1 to 1.2:
+
+* bugfix: numerous memory leaks
+* new function: wipeout(); this should release all malloc() blocks
+* bugfix: missing "newline" and trailing "context" tokens
+* improved included files name caching
+* included memory leak detection code
+
+From 1.0 to 1.1:
+
+* bugfix: missing newline when exiting from a non-newline-terminated file
+* bugfix: crash when resetting due to definition of the _Pragma pseudo-macro
+* bugfix: handling of additional "optional" whitespace with SEMPER_FIDELIS
+* improved handling of unreplaced arg macros wrt output line
+* tricky handling of utterly tricky #include
+* bugfix: spurious token `~=' eliminated
+
+From 0.9 to 1.0:
+
+* bugfix: crash after erroneous #assert
+* changed ERR_SHARP to FAIL_SHARP, EMUL_UINTMAX to SIMUL_UINTMAX
+* made "inline" default on gcc and DEC ccc (Linux/Alpha)
+* semantic of -I is now Unix-like (added directories are looked first)
+* added -J flag (to add include directories after the system ones)
+* cleaned up non-ascii issues
+* bugfix: missing brace in no-LOW_MEM code
+* bugfix: argument number check in variadic macros
+* bugfix: crash in non-lexer mode after some cases of unreplaced macro
+* bugfix: _Pragma() handling wrt # and ##
+* made evaluation of _Pragma() optional in #if, #include and #line
+* bugfix: re-dump of multiline #pragma
+* added the inmacro and macro_count flags
+* added mmap() support
+* added option to retain whitespace content in lexer mode
+
+From 0.8 to 0.9:
+
+* added check for division by 0 in #if evaluation
+* added check for non-standard line numbers
+* added check for trailing garbage in most directives
+* corrected signedness of char constants (always int, therefore always signed)
+* made LOW_MEM code, so that ucpp runs smoothly on low memory architectures
+* multiple bugfixes (using the GNU cpp testsuite)
+* added handling of _Pragma (as a macro)
+* added tokenization of pragma directives
+* added conservation of pragma directives in text output
+* produced Msdos 16-bit small memory model executable
+* produced Minix-86 executable
+
+From 0.7 to 0.8:
+
+* added some support for Amiga systems
+* fixed extra spacing in stringified tokens
+* fixed bug related to %:% and tolerated rogue sharps
+* namespace cleanup
+* bugfix for macro redefinition
+* added warning for evaluated comma operators in #if (ISO requirement)
+* -Dfoo now defines foo with content 1 (and not void content)
+* trigraphs can be disabled (for incorrect but legacy code)
+* fixed semantics for #include "file" (local directory)
+* fixed detection of protected files
+* produced a Msdos 16-bit executable
+
+From 0.6 to 0.7:
+
+* officially changed the goal to full C99 compliance
+* added the CONTEXT token and let NEWLINE tokens go
+* added report_context() for error reporting
+* enforced matching of #if/#endif (file-global nesting level = 0)
+* added support of C99 digraphs
+* added UTF-8 encoding support
+* added universal character names
+* rewrote #if expressions (sizes fixed, bignum, signed/unsigned fixed)
+* fixed incomplete evaluation of #if expressions
+* added transient_characters[]
+
+From 0.5 to 0.6:
+
+* disappearance of error_nonl()
+* added extra optional warnings for trigraphs
+* some bugfixes, especially in lexer mode
+* handled MacIntosh files correctly
+
+From 0.4 to 0.5:
+
+* nicer #pragma handling (a token can be emitted)
+* bugfix in lexer mode after #line and #error
+* sample.c an example of code linked with ucpp
+* made #if expressions conforming to standard signed/unsigned handling
+* added the copy_line[] buffer feature
+
+From 0.3 to 0.4:
+
+* relaxed interpretation of '#include foo' when foo ends up, after macro
+ substitution, with a '<bar>' content
+* corrected the 'double-dot' bug
+* corrected two bugs related to the treatment of macro aborted calls (due
+ to lack of arguments)
+* some namespaces cleanup, to ease integration into other code
+* documented the way to include ucpp into another program
+* made newlines embedded into strings illegal (and reported as such)
+
+From 0.2 to 0.3:
+
+* added support for system predefined macros
+* made several bugfixes
+* checked C99 compliance for most of the features
+* ucpp now accepts non-C characters on standard when used stand-alone
+* removed many useless spaces in the output
+
+From 0.1 to 0.2:
+
+* added support for assertions
+* added support for macros with variable arguments
+* split the pharaonic cpp.c file into many
+* made several bugfixes
+* relaxed the behaviour with regards to the void arguments
+* made C++-like comments an option
+
+
+
+THANKS TO
+---------
+
+Volker Barthelmann, Neil Booth, Stephen Davies, Stéphane Ecolivet,
+Marc Espie, Marcus Holland-Moritz, Antoine Leca, Cyrille Lefevre,
+Dave Rivers, Loic Tortay and Laurent Wacrenier, for suggestions and
+beta-testing.
+
+Paul Eggert, Douglas A. Gwyn, Clive D.W. Feather, and the other guys from
+comp.std.c, for explanations about the standard.
+
+Dave Brolley, Jamie Lokier and Neil Booth, for discussion about tricky
+points on nesting macros.
+
+Brian Kernighan and Dennis Ritchie, for bringing C to mortal Men.
diff --git a/libexec/auxcpp/arith.c b/libexec/auxcpp/arith.c
new file mode 100644
index 00000000000..bef258052a1
--- /dev/null
+++ b/libexec/auxcpp/arith.c
@@ -0,0 +1,1462 @@
+/*
+ * Integer arithmetic evaluation.
+ *
+ * (c) Thomas Pornin 2002
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 4. The name of the authors may not be used to endorse or promote
+ * products derived from this software without specific prior written
+ * permission.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
+ * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+ * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include <limits.h>
+#include "arith.h"
+
+#define ARITH_OCTAL(x) ((x) >= '0' && (x) <= '7')
+#define ARITH_OVAL(x) ((x) - '0')
+#define ARITH_DECIM(x) ((x) >= '0' && (x) <= '9')
+#define ARITH_DVAL(x) ((x) - '0')
+#define ARITH_HEXAD(x) (ARITH_DECIM(x) \
+ || (x) == 'a' || (x) == 'A' \
+ || (x) == 'b' || (x) == 'B' \
+ || (x) == 'c' || (x) == 'C' \
+ || (x) == 'd' || (x) == 'D' \
+ || (x) == 'e' || (x) == 'E' \
+ || (x) == 'f' || (x) == 'F')
+#define ARITH_HVAL(x) (ARITH_DECIM(x) ? ARITH_DVAL(x) \
+ : (x) == 'a' || (x) == 'A' ? 10 \
+ : (x) == 'b' || (x) == 'B' ? 11 \
+ : (x) == 'c' || (x) == 'C' ? 12 \
+ : (x) == 'd' || (x) == 'D' ? 13 \
+ : (x) == 'e' || (x) == 'E' ? 14 : 15)
+
+#ifdef NATIVE_SIGNED
+/* ====================================================================== */
+/* Arithmetics with native types */
+/* ====================================================================== */
+
+/*
+ * The following properties are imposed by the C standard:
+ *
+ * -- Arithmetics on the unsigned type should never overflow; every
+ * result is reduced modulo some power of 2. The macro NATIVE_UNSIGNED_BITS
+ * should have been defined to that specific exponent.
+ *
+ * -- The signed type should use either two's complement, one's complement
+ * or a sign bit and a magnitude. There should be an integer N such that
+ * the maximum signed value is (2^N)-1 and the minimum signed value is
+ * either -(2^N) or -((2^N)-1). -(2^N) is possible only for two's complement.
+ *
+ * -- The maximum signed value is at most equal to the maximum unsigned
+ * value.
+ *
+ * -- Trap representations can only be:
+ * ** In two's complement, 1 as sign bit and 0 for all value bits.
+ * This can happen only if the minimum signed value is -((2^N)-1).
+ * ** In one's complement, all bits set to 1.
+ * ** In mantissa + sign, sign bit to 1 and 0 for all value bits.
+ * Unsigned values have no trap representation achievable with numerical
+ * operators. Only signed values can have such representations, with
+ * operators &, |, ^, ~, << and >>. If trap representations are possible,
+ * such occurrences are reported as warnings.
+ *
+ * -- The operators +, -, * and << may overflow or underflow on signed
+ * quantities, which is potentially an error. A warning is emitted.
+ *
+ * -- The operator >> yields an implementation-defined result on
+ * signed negative quantities. Usually, the sign is extended, but this
+ * is not guaranteed. A warning is emitted.
+ *
+ * -- The operators / and % used with a second operand of 0 cannot work.
+ * An error is emitted when such a call is performed. Furthermore, in
+ * two's complemement representation, with NATIVE_SIGNED_MIN == -(2^N)
+ * for some N, the expression `NATIVE_SIGNED_MIN / (-1)' yields an
+ * unrepresentable result, which is also an error.
+ *
+ *
+ * For the value checks, we need to consider those different cases. So
+ * we calculate the following macros:
+ * -- TWOS_COMPLEMENT: is 1 if representation is two's complement, 0
+ * otherwise.
+ * -- ONES_COMPLEMENT: is 1 if representation is one's complement, 0
+ * otherwise.
+ * -- SIGNED_IS_BIGGER: 1 if the maximum signed value is equal to the
+ * maximum unsigned value, 0 otherwise. NATIVE_SIGNED_MAX cannot
+ * exceed the maximum unsigned value. If SIGNED_IS_BIGGER is 0, then
+ * the maximum unsigned value is strictly superior to twice the
+ * value of NATIVE_SIGNED_MAX (e.g. 65535 to 32767).
+ * -- TRAP_REPRESENTATION: 1 if a trap representation is possible, 0
+ * otherwise. The only way trap representations are guaranteed
+ * impossible is when TWOS_COMPLEMENT is set, and NATIVE_SIGNED_MIN
+ * is equal to -NATIVE_SIGNED_MAX - 1.
+ *
+ * Those macros are calculated by some preprocessor directives. This
+ * supposes that the implementation conforms to C99. Rules on preprocessing
+ * were quite looser in C90, and it could be that an old compiler, used
+ * for a cross-compiling task, does not get those right. Therefore, if
+ * ARCH_DEFINED is defined prior to the inclusion of this file, those
+ * four macros are supposed to be already defined. Otherwise they are
+ * (re)defined. The macro ARCH_TRAP_DEFINED has the same meaning, but
+ * is limited to the TRAP_REPRESENTATION macro (if ARCH_TRAP_DEFINED is
+ * defined, the macro TRAP_REPRESENTATION is supposed to be already
+ * defined; the three other macros are recalculated).
+ *
+ *
+ * To sum up:
+ * -- Whenever a division operator (/ or %) is invoked and would yield
+ * an unrepresentable result, ARITH_ERROR() is invoked.
+ * -- With ARITHMETIC_CHECKS undefined, ARITH_WARNING() is never invoked.
+ * -- With ARITHMETIC_CHECKS defined:
+ * ** If ARCH_DEFINED is defined, the including context must provide
+ * the macros TWOS_COMPLEMENT, ONES_COMPLEMENT, SIGNED_IS_BIGGER
+ * and TRAP_REPRESENTATION.
+ * ** Otherwise, if ARCH_TRAP_DEFINED is defined, the including context
+ * must provide the macro TRAP_REPRESENTATION.
+ * The code then detects all operator invokations that would yield an
+ * overflow, underflow, trap representation, or any implementation
+ * defined result or undefined behaviour. The macro ARITH_WARNING() is
+ * invoked for each detection.
+ * -- Trap representation detection code supposes that the operands are
+ * _not_ trap representation.
+ */
+
+#ifndef ARCH_DEFINED
+
+#undef TWOS_COMPLEMENT
+#undef ONES_COMPLEMENT
+#undef SIGNED_IS_BIGGER
+#ifndef ARCH_TRAP_DEFINED
+#undef TRAP_REPRESENTATION
+#endif
+
+#if (-1) & 3 == 3
+/*
+ * Two's complement.
+ */
+#define TWOS_COMPLEMENT 1
+#define ONES_COMPLEMENT 0
+#ifndef ARCH_TRAP_DEFINED
+#if NATIVE_SIGNED_MIN < -NATIVE_SIGNED_MAX
+#define TRAP_REPRESENTATION 0
+#else
+#define TRAP_REPRESENTATION 1
+#endif
+#endif
+
+#elif (-1) & 3 == 2
+/*
+ * One's complement.
+ */
+#define TWOS_COMPLEMENT 0
+#define ONES_COMPLEMENT 1
+#ifndef ARCH_TRAP_DEFINED
+#define TRAP_REPRESENTATION 1
+#endif
+
+#else
+/*
+ * Mantissa + sign.
+ */
+#define TWOS_COMPLEMENT 0
+#define ONES_COMPLEMENT 0
+#ifndef ARCH_TRAP_DEFINED
+#define TRAP_REPRESENTATION 1
+#endif
+
+#endif
+
+/*
+ * Maximum native unsigned value. The first macro is for #if directives,
+ * the second macro is for use as constant expression in C code.
+ */
+#define NATIVE_UNSIGNED_MAX ((((1U << (NATIVE_UNSIGNED_BITS - 1)) - 1U) \
+ << 1) + 1U)
+#define NATIVE_UNSIGNED_MAX_A (((((arith_u)1 << (NATIVE_UNSIGNED_BITS - 1)) \
+ - (arith_u)1) << 1) + (arith_u)1)
+
+#if NATIVE_SIGNED_MAX == NATIVE_UNSIGNED_MAX
+#define SIGNED_IS_BIGGER 1
+#else
+#define SIGNED_IS_BIGGER 0
+#endif
+
+#endif
+
+#undef NEGATIVE_IS_BIGGER
+#if NATIVE_SIGNED_MIN < -NATIVE_SIGNED_MAX
+#define NEGATIVE_IS_BIGGER 1
+#else
+#define NEGATIVE_IS_BIGGER 0
+#endif
+
+/* sanity check: we cannot have a trap representation if we have
+ two's complement with NATIVE_SIGNED_MIN < -NATIVE_SIGNED_MAX */
+#if TRAP_REPRESENTATION && NEGATIVE_IS_BIGGER
+#error Impossible to get trap representations.
+#endif
+
+/* operations on the unsigned type */
+
+ARITH_DECL_MONO_S_U(to_u) { return (arith_u)x; }
+ARITH_DECL_MONO_I_U(fromint) { return (arith_u)x; }
+ARITH_DECL_MONO_L_U(fromulong) { return (arith_u)x; }
+
+ARITH_DECL_MONO_U_I(toint)
+{
+#if NATIVE_UNSIGNED_MAX > INT_MAX
+ if (x > (arith_u)INT_MAX) return INT_MAX;
+#endif
+ return (int)x;
+}
+
+ARITH_DECL_MONO_U_L(toulong)
+{
+#if NATIVE_UNSIGNED_MAX > LONG_MAX
+ if (x > (arith_u)LONG_MAX) return LONG_MAX;
+#endif
+ return (long)x;
+}
+
+ARITH_DECL_MONO_U_U(neg) { return -x; }
+ARITH_DECL_MONO_U_U(not) { return ~x; }
+ARITH_DECL_MONO_U_I(lnot) { return !x; }
+ARITH_DECL_MONO_U_I(lval) { return x != 0; }
+
+ARITH_DECL_BI_UU_U(plus) { return x + y; }
+ARITH_DECL_BI_UU_U(minus) { return x - y; }
+ARITH_DECL_BI_UU_I(lt) { return x < y; }
+ARITH_DECL_BI_UU_I(leq) { return x <= y; }
+ARITH_DECL_BI_UU_I(gt) { return x > y; }
+ARITH_DECL_BI_UU_I(geq) { return x >= y; }
+ARITH_DECL_BI_UU_I(same) { return x == y; }
+ARITH_DECL_BI_UU_I(neq) { return x != y; }
+ARITH_DECL_BI_UU_U(and) { return x & y; }
+ARITH_DECL_BI_UU_U(xor) { return x ^ y; }
+ARITH_DECL_BI_UU_U(or) { return x | y; }
+ARITH_DECL_BI_UU_U(star) { return x * y; }
+
+ARITH_DECL_BI_UI_U(lsh)
+{
+#ifdef ARITHMETIC_CHECKS
+ if (y >= NATIVE_UNSIGNED_BITS)
+ ARITH_WARNING(ARITH_EXCEP_LSH_W);
+ else if (y < 0)
+ ARITH_WARNING(ARITH_EXCEP_LSH_C);
+#endif
+ return x << y;
+}
+
+ARITH_DECL_BI_UI_U(rsh)
+{
+#ifdef ARITHMETIC_CHECKS
+ if (y >= NATIVE_UNSIGNED_BITS)
+ ARITH_WARNING(ARITH_EXCEP_RSH_W);
+ else if (y < 0)
+ ARITH_WARNING(ARITH_EXCEP_RSH_C);
+#endif
+ return x >> y;
+}
+
+ARITH_DECL_BI_UU_U(slash)
+{
+ if (y == 0) ARITH_ERROR(ARITH_EXCEP_SLASH_D);
+ return x / y;
+}
+
+ARITH_DECL_BI_UU_U(pct)
+{
+ if (y == 0) ARITH_ERROR(ARITH_EXCEP_PCT_D);
+ return x % y;
+}
+
+/* operations on the signed type */
+
+ARITH_DECL_MONO_U_S(to_s)
+{
+#ifdef ARITHMETIC_CHECKS
+#if !SIGNED_IS_BIGGER
+ if (x > (arith_u)NATIVE_SIGNED_MAX)
+ ARITH_WARNING(ARITH_EXCEP_CONV_O);
+#endif
+#endif
+ return (arith_s)x;
+}
+
+ARITH_DECL_MONO_I_S(fromint) { return (arith_s)x; }
+ARITH_DECL_MONO_L_S(fromlong) { return (arith_s)x; }
+
+ARITH_DECL_MONO_S_I(toint)
+{
+#if NATIVE_SIGNED_MIN < INT_MIN
+ if (x < (arith_s)INT_MIN) return INT_MIN;
+#endif
+#if NATIVE_SIGNED_MAX > INT_MAX
+ if (x > (arith_s)INT_MAX) return INT_MAX;
+#endif
+ return (int)x;
+}
+
+ARITH_DECL_MONO_S_L(tolong)
+{
+#if NATIVE_SIGNED_MIN < LONG_MIN
+ if (x < (arith_s)LONG_MIN) return LONG_MIN;
+#endif
+#if NATIVE_SIGNED_MAX > LONG_MAX
+ if (x > (arith_s)LONG_MAX) return LONG_MAX;
+#endif
+ return (long)x;
+}
+
+ARITH_DECL_MONO_S_S(neg)
+{
+#ifdef ARITHMETIC_CHECKS
+#if NEGATIVE_IS_BIGGER
+ if (x == NATIVE_SIGNED_MIN)
+ ARITH_WARNING(ARITH_EXCEP_NEG_O);
+#endif
+#endif
+ return -x;
+}
+
+ARITH_DECL_MONO_S_S(not)
+{
+#ifdef ARITHMETIC_CHECKS
+#if TRAP_REPRESENTATION
+ if (
+#if TWOS_COMPLEMENT
+ (x == NATIVE_SIGNED_MAX)
+#elif ONES_COMPLEMENT
+ (x == 0)
+#else
+ (x == NATIVE_SIGNED_MAX)
+#endif
+ ) ARITH_WARNING(ARITH_EXCEP_NOT_T);
+#endif
+#endif
+ return ~x;
+}
+
+ARITH_DECL_MONO_S_I(lnot) { return !x; }
+ARITH_DECL_MONO_S_I(lval) { return x != 0; }
+
+/*
+ * Addition of signed values:
+ * -- overflows occur only when both operands are strictly positive
+ * -- underflows occur only when both operands are strictly negative
+ * -- overflow check (both operands > 0):
+ * ** if SIGNED_IS_BIGGER == 1, overflows are kept as such in the
+ * unsigned world (if the signed addition overflows, so does the
+ * unsigned, and vice versa)
+ * ** if SIGNED_IS_BIGGER == 0, no overflow can happen in the unsigned
+ * world
+ * -- underflow check (both operands < 0):
+ * ** if NEGATIVE_IS_BIGGER == 1 (must be two's complement)
+ * ++ we have a guaranteed underflow if one of the operand is equal
+ * to NATIVE_SIGNED_MIN; otherwise, -x and -y are valid integers,
+ * and we cast them into the unsigned world
+ * ++ if SIGNED_IS_BIGGER == 1, underflows become unsigned overflows
+ * with a non-zero result
+ * ++ if SIGNED_IS_BIGGER == 0, no overflow happens in the unsigned
+ * world; we use the fact that -NATIVE_SIGNED_MIN is then
+ * exaxctly 1 more than NATIVE_SIGNED_MAX
+ * ** if NEGATIVE_IS_BIGGER == 0, underflow check is identical to
+ * overflow check on (signed) -x and -y.
+ */
+ARITH_DECL_BI_SS_S(plus)
+{
+#ifdef ARITHMETIC_CHECKS
+ if (x > 0 && y > 0 && (
+#if SIGNED_IS_BIGGER
+ ((arith_u)((arith_u)x + (arith_u)y) < (arith_u)x)
+#else
+ (((arith_u)x + (arith_u)y) > (arith_u)NATIVE_SIGNED_MAX)
+#endif
+ )) ARITH_WARNING(ARITH_EXCEP_PLUS_O);
+ else if (x < 0 && y < 0 && (
+#if NEGATIVE_IS_BIGGER
+ (x == NATIVE_SIGNED_MIN || y == NATIVE_SIGNED_MIN) ||
+#if SIGNED_IS_BIGGER
+ (((arith_u)(-x) + (arith_u)(-y) != 0)
+ && (arith_u)((arith_u)(-x) + (arith_u)(-y))
+ < (arith_u)(-x))
+#else
+ (((arith_u)(-x) + (arith_u)(-y))
+ > ((arith_u)1 + (arith_u)NATIVE_SIGNED_MAX))
+#endif
+#else
+#if SIGNED_IS_BIGGER
+ ((arith_u)((arith_u)(-x) + (arith_u)(-y)) < (arith_u)(-x))
+#else
+ (((arith_u)(-x) + (arith_u)(-y))
+ > (arith_u)NATIVE_SIGNED_MAX)
+#endif
+#endif
+ )) ARITH_WARNING(ARITH_EXCEP_PLUS_U);
+#endif
+ return x + y;
+}
+
+/*
+ * Subtraction of signed values:
+ * -- overflow: only if x > 0 and y < 0
+ * ** if NEGATIVE_IS_BIGGER == 1 (must be two's complement) and
+ * y == NATIVE_SIGNED_MIN then overflow
+ * ** otherwise, cast x and -y to unsigned, then add and check
+ * for overflows
+ * -- underflow: only if x < 0 and y > 0
+ * ** if NEGATIVE_IS_BIGGER == 1 (must be two's complement):
+ * ++ if x == NATIVE_SIGNED_MIN then underflow
+ * ++ cast -x and y to unsigned, then add. If SIGNED_IS_BIGGER == 0,
+ * just check. Otherwise, check for overflow with non-zero result.
+ * ** if NEGATIVE_IS_BIGGER == 0: cast -x and y to unsigned, then
+ * add. Overflow check as in addition.
+ */
+ARITH_DECL_BI_SS_S(minus)
+{
+#ifdef ARITHMETIC_CHECKS
+ if (x > 0 && y < 0 && (
+#if NEGATIVE_IS_BIGGER
+ (y == NATIVE_SIGNED_MIN) ||
+#endif
+#if SIGNED_IS_BIGGER
+ ((arith_u)((arith_u)x + (arith_u)(-y)) < (arith_u)x)
+#else
+ (((arith_u)x + (arith_u)(-y)) > (arith_u)NATIVE_SIGNED_MAX)
+#endif
+ )) ARITH_WARNING(ARITH_EXCEP_MINUS_O);
+ else if (x < 0 && y > 0 && (
+#if NEGATIVE_IS_BIGGER
+ (x == NATIVE_SIGNED_MIN) ||
+#if SIGNED_IS_BIGGER
+ ((((arith_u)(-x) + (arith_u)y) != 0) &&
+ ((arith_u)((arith_u)(-x) + (arith_u)y) < (arith_u)(-x)))
+#else
+ (((arith_u)(-x) + (arith_u)y) >
+ ((arith_u)1 + (arith_u)NATIVE_SIGNED_MAX))
+#endif
+#else
+#if SIGNED_IS_BIGGER
+ ((arith_u)((arith_u)(-x) + (arith_u)y) < (arith_u)(-x))
+#else
+ (((arith_u)(-x) + (arith_u)y) > (arith_u)NATIVE_SIGNED_MAX)
+#endif
+#endif
+ )) ARITH_WARNING(ARITH_EXCEP_MINUS_U);
+#endif
+ return x - y;
+}
+
+ARITH_DECL_BI_SS_I(lt) { return x < y; }
+ARITH_DECL_BI_SS_I(leq) { return x <= y; }
+ARITH_DECL_BI_SS_I(gt) { return x > y; }
+ARITH_DECL_BI_SS_I(geq) { return x >= y; }
+ARITH_DECL_BI_SS_I(same) { return x == y; }
+ARITH_DECL_BI_SS_I(neq) { return x != y; }
+
+/*
+ * Provided neither x nor y is a trap representation:
+ * -- one's complement: impossible to get a trap representation
+ * -- two's complement and sign + mantissa: trap representation if and
+ * only if x and y are strictly negative and (-x) & (-y) == 0
+ * (in two's complement, -x is safe because overflow would occur only
+ * if x was already a trap representation).
+ */
+ARITH_DECL_BI_SS_S(and)
+{
+#ifdef ARITHMETIC_CHECKS
+#if TRAP_REPRESENTATION && !ONES_COMPLEMENT
+ if (x < 0 && y < 0 && ((-x) & (-y)) == 0)
+ ARITH_WARNING(ARITH_EXCEP_AND_T);
+#endif
+#endif
+ return x & y;
+}
+
+/*
+ * Provided neither x nor y is a trap representation:
+ * -- two's complement: trap if and only if x != NATIVE_SIGNED_MAX && ~x == y
+ * -- one's complement: trap if and only if x != 0 && ~x == y
+ * -- mantissa + sign: trap if and only if x != 0 && -x == y
+ */
+ARITH_DECL_BI_SS_S(xor)
+{
+#ifdef ARITHMETIC_CHECKS
+#if TRAP_REPRESENTATION
+ if (
+#if TWOS_COMPLEMENT
+ (x != NATIVE_SIGNED_MAX && ~x == y)
+#elif ONES_COMPLEMENT
+ (x != 0 && ~x == y)
+#else
+ (x != 0 && -x == y)
+#endif
+ ) ARITH_WARNING(ARITH_EXCEP_XOR_T);
+#endif
+#endif
+ return x ^ y;
+}
+
+/*
+ * Provided neither x nor y is a trap representation:
+ * -- two's complement: impossible to trap
+ * -- one's complement: trap if and only if x != 0 && y != 0 && (~x & ~y) == 0
+ * -- mantissa + sign: impossible to trap
+ */
+ARITH_DECL_BI_SS_S(or)
+{
+#ifdef ARITHMETIC_CHECKS
+#if TRAP_REPRESENTATION
+#if ONES_COMPLEMENT
+ if (x != 0 && y != 0 && (~x & ~y) == 0)
+ ARITH_WARNING(ARITH_EXCEP_OR_T);
+#endif
+#endif
+#endif
+ return x | y;
+}
+
+/*
+ * Left-shifting by a negative or greater than type width count is
+ * forbidden. Left-shifting a negative value is forbidden (underflow).
+ * Left-shifting a positive value can trigger an overflow. We check it
+ * by casting into the unsigned world and simulating a truncation.
+ *
+ * If SIGNED_IS_BIGGER is set, then the signed type width is 1 more
+ * than the unsigned type width (the sign bit is included in the width);
+ * otherwise, if W is the signed type width, 1U << (W-1) is equal to
+ * NATIVE_SIGNED_MAX + 1.
+ */
+ARITH_DECL_BI_SI_S(lsh)
+{
+#ifdef ARITHMETIC_CHECKS
+ if (y < 0) ARITH_WARNING(ARITH_EXCEP_LSH_C);
+ else if (
+#if SIGNED_IS_BIGGER
+ y > NATIVE_UNSIGNED_BITS
+#else
+ y >= NATIVE_UNSIGNED_BITS
+ || (y > 0 && (((arith_u)1 << (y - 1))
+ > (arith_u)NATIVE_SIGNED_MAX))
+#endif
+ ) ARITH_WARNING(ARITH_EXCEP_LSH_W);
+ else if (x < 0) ARITH_WARNING(ARITH_EXCEP_LSH_U);
+ else if (x > 0 && ((((arith_u)x << y) & NATIVE_SIGNED_MAX) >> y)
+ != (arith_u)x) ARITH_WARNING(ARITH_EXCEP_LSH_O);
+#endif
+ return x << y;
+}
+
+/*
+ * Right-shifting is handled as left-shifting, except that the problem
+ * is somehow simpler: there is no possible overflow or underflow. Only
+ * right-shifting a negative value yields an implementation defined
+ * result (_not_ an undefined behaviour).
+ */
+ARITH_DECL_BI_SI_S(rsh)
+{
+#ifdef ARITHMETIC_CHECKS
+ if (y < 0) ARITH_WARNING(ARITH_EXCEP_RSH_C);
+ else if (
+#if SIGNED_IS_BIGGER
+ y > NATIVE_UNSIGNED_BITS
+#else
+ y >= NATIVE_UNSIGNED_BITS
+ || (y > 0 && (((arith_u)1 << (y - 1))
+ > (arith_u)NATIVE_SIGNED_MAX))
+#endif
+ ) ARITH_WARNING(ARITH_EXCEP_RSH_W);
+ else if (x < 0) ARITH_WARNING(ARITH_EXCEP_RSH_N);
+#endif
+ return x >> y;
+}
+
+/*
+ * Overflow can happen only if both operands have the same sign.
+ * Underflow can happen only if both operands have opposite signs.
+ *
+ * Overflow checking: this is done quite inefficiently by performing
+ * a division on the result and check if it matches the initial operand.
+ */
+ARITH_DECL_BI_SS_S(star)
+{
+#ifdef ARITHMETIC_CHECKS
+ if (x == 0 || y == 0) return 0;
+ if (x > 0 && y > 0) {
+ if ((((arith_u)x * (arith_u)y) & (arith_u)NATIVE_SIGNED_MAX)
+ / (arith_u)y != (arith_u)x)
+ ARITH_WARNING(ARITH_EXCEP_STAR_O);
+ } else if (x < 0 && y < 0) {
+ if (
+#if NEGATIVE_IS_BIGGER
+ (x == NATIVE_SIGNED_MIN || y == NATIVE_SIGNED_MIN) ||
+#endif
+ (((arith_u)(-x) * (arith_u)(-y))
+ & (arith_u)NATIVE_SIGNED_MAX) / (arith_u)(-y)
+ != (arith_u)(-x))
+ ARITH_WARNING(ARITH_EXCEP_STAR_O);
+ } else if (x > 0 && y < 0) {
+ if ((arith_u)x > (arith_u)1 && (
+#if NEGATIVE_IS_BIGGER
+ y == NATIVE_SIGNED_MIN ||
+#endif
+ (((arith_u)x * (arith_u)(-y)) & (arith_u)NATIVE_SIGNED_MAX)
+ / (arith_u)(-y) != (arith_u)x))
+ ARITH_WARNING(ARITH_EXCEP_STAR_U);
+ } else {
+ if ((arith_u)y > (arith_u)1 && (
+#if NEGATIVE_IS_BIGGER
+ x == NATIVE_SIGNED_MIN ||
+#endif
+ (((arith_u)y * (arith_u)(-x)) & (arith_u)NATIVE_SIGNED_MAX)
+ / (arith_u)(-x) != (arith_u)y))
+ ARITH_WARNING(ARITH_EXCEP_STAR_U);
+ }
+#endif
+ return x * y;
+}
+
+/*
+ * Division by 0 is an error. The only other possible problem is an
+ * overflow of the result. Such an overflow can only happen in two's
+ * complement representation, when NEGATIVE_IS_BIGGER is set, and
+ * one attempts to divide NATIVE_SIGNED_MIN by -1: the result is then
+ * -NATIVE_SIGNED_MIN, which is not representable by the type. This is
+ * considered as an error, not a warning, because it actually triggers
+ * an exception on modern Pentium-based PC.
+ */
+ARITH_DECL_BI_SS_S(slash)
+{
+ if (y == 0) ARITH_ERROR(ARITH_EXCEP_SLASH_D);
+#if NEGATIVE_IS_BIGGER
+ else if (x == NATIVE_SIGNED_MIN && y == (arith_s)(-1))
+ ARITH_ERROR(ARITH_EXCEP_SLASH_O);
+#endif
+ return x / y;
+}
+
+/*
+ * Only division by 0 needs to be checked.
+ */
+ARITH_DECL_BI_SS_S(pct)
+{
+ if (y == 0) ARITH_ERROR(ARITH_EXCEP_PCT_D);
+ return x % y;
+}
+
+ARITH_DECL_MONO_ST_US(octconst)
+{
+ arith_u z = 0;
+
+ for (; ARITH_OCTAL(*c); c ++) {
+ arith_u w = ARITH_OVAL(*c);
+ if (z > (NATIVE_UNSIGNED_MAX_A / 8))
+ ARITH_ERROR(ARITH_EXCEP_CONST_O);
+ z *= 8;
+#if 0
+/* obsolete */
+/* NATIVE_UNSIGNED_MAX_A is 2^N - 1, 0 <= w <= 7 and 8 divides z */
+ if (z > (NATIVE_UNSIGNED_MAX_A - w))
+ ARITH_ERROR(ARITH_EXCEP_CONST_O);
+#endif
+ z += w;
+ }
+ *ru = z;
+#if SIGNED_IS_BIGGER
+ *rs = z;
+ *sp = 1;
+#else
+ if (z > NATIVE_SIGNED_MAX) {
+ *sp = 0;
+ } else {
+ *rs = z;
+ *sp = 1;
+ }
+#endif
+ return c;
+}
+
+ARITH_DECL_MONO_ST_US(decconst)
+{
+ arith_u z = 0;
+
+ for (; ARITH_DECIM(*c); c ++) {
+ arith_u w = ARITH_DVAL(*c);
+ if (z > (NATIVE_UNSIGNED_MAX_A / 10))
+ ARITH_ERROR(ARITH_EXCEP_CONST_O);
+ z *= 10;
+ if (z > (NATIVE_UNSIGNED_MAX_A - w))
+ ARITH_ERROR(ARITH_EXCEP_CONST_O);
+ z += w;
+ }
+ *ru = z;
+#if SIGNED_IS_BIGGER
+ *rs = z;
+ *sp = 1;
+#else
+ if (z > NATIVE_SIGNED_MAX) {
+ *sp = 0;
+ } else {
+ *rs = z;
+ *sp = 1;
+ }
+#endif
+ return c;
+}
+
+ARITH_DECL_MONO_ST_US(hexconst)
+{
+ arith_u z = 0;
+
+ for (; ARITH_HEXAD(*c); c ++) {
+ arith_u w = ARITH_HVAL(*c);
+ if (z > (NATIVE_UNSIGNED_MAX_A / 16))
+ ARITH_ERROR(ARITH_EXCEP_CONST_O);
+ z *= 16;
+#if 0
+/* obsolete */
+/* NATIVE_UNSIGNED_MAX_A is 2^N - 1, 0 <= w <= 15 and 16 divides z */
+ if (z > (NATIVE_UNSIGNED_MAX_A - w))
+ ARITH_ERROR(ARITH_EXCEP_CONST_O);
+#endif
+ z += w;
+ }
+ *ru = z;
+#if SIGNED_IS_BIGGER
+ *rs = z;
+ *sp = 1;
+#else
+ if (z > NATIVE_SIGNED_MAX) {
+ *sp = 0;
+ } else {
+ *rs = z;
+ *sp = 1;
+ }
+#endif
+ return c;
+}
+
+#else
+/* ====================================================================== */
+/* Arithmetics with a simple simulated type */
+/* ====================================================================== */
+
+/*
+ * We simulate a type with the following characteristics:
+ * -- the signed type width is equal to the unsigned type width (which
+ * means that there is one less value bit in the signed type);
+ * -- the signed type uses two's complement representation;
+ * -- there is no trap representation;
+ * -- overflows and underflows are truncated (but a warning is emitted
+ * if ARITHMETIC_CHECKS is defined);
+ * -- overflow on integer division is still an error;
+ * -- right-shifting of a negative value extends the sign;
+ * -- the shift count value is first cast to unsigned, then reduced modulo
+ * the type size.
+ *
+ * These characteristics follow what is usually found on modern
+ * architectures.
+ *
+ * The maximum emulated type size is twice the size of the unsigned native
+ * type which is used to emulate the type.
+ */
+
+#undef SIMUL_ONE_TMP
+#undef SIMUL_MSW_TMP1
+#undef SIMUL_MSW_MASK
+#undef SIMUL_LSW_TMP1
+#undef SIMUL_LSW_MASK
+
+#define SIMUL_ONE_TMP ((SIMUL_ARITH_SUBTYPE)1)
+#define SIMUL_MSW_TMP1 (SIMUL_ONE_TMP << (SIMUL_MSW_WIDTH - 1))
+#define SIMUL_MSW_MASK (SIMUL_MSW_TMP1 | (SIMUL_MSW_TMP1 - SIMUL_ONE_TMP))
+#define SIMUL_LSW_TMP1 (SIMUL_ONE_TMP << (SIMUL_LSW_WIDTH - 1))
+#define SIMUL_LSW_MASK (SIMUL_LSW_TMP1 | (SIMUL_LSW_TMP1 - SIMUL_ONE_TMP))
+
+#undef TMSW
+#undef TLSW
+
+#define TMSW(x) ((x) & SIMUL_MSW_MASK)
+#define TLSW(x) ((x) & SIMUL_LSW_MASK)
+
+#undef SIMUL_ZERO
+#undef SIMUL_ONE
+
+#define SIMUL_ZERO arith_strc(ARITH_TYPENAME, _zero)
+#define SIMUL_ONE arith_strc(ARITH_TYPENAME, _one)
+
+static arith_u SIMUL_ZERO = { 0, 0 };
+static arith_u SIMUL_ONE = { 0, 1 };
+
+/*
+ * We use the fact that both the signed and unsigned type are the same
+ * structure. The difference between the signed and the unsigned type
+ * is a type information, and, as such, is considered compile-time and
+ * not maintained in the value structure itself. This is a job for
+ * the programmer / compiler.
+ */
+ARITH_DECL_MONO_S_U(to_u) { return x; }
+
+ARITH_DECL_MONO_I_U(fromint)
+{
+ arith_u z;
+
+ if (x < 0) return arith_op_u(neg)(arith_op_u(fromint)(-x));
+ /*
+ * This code works because types smaller than int are promoted
+ * by the C compiler before evaluating the >> operator.
+ */
+ z.msw = TMSW(((SIMUL_ARITH_SUBTYPE)x >> (SIMUL_LSW_WIDTH - 1)) >> 1);
+ z.lsw = TLSW((SIMUL_ARITH_SUBTYPE)x);
+ return z;
+}
+
+ARITH_DECL_MONO_L_U(fromulong)
+{
+ arith_u z;
+
+#if (ULONG_MAX >> (SIMUL_LSW_WIDTH - 1)) >> 1 == 0
+ z.msw = 0;
+ z.lsw = x;
+#else
+ z.msw = TMSW(x >> SIMUL_LSW_WIDTH);
+ z.lsw = TLSW((SIMUL_ARITH_SUBTYPE)x);
+#endif
+ return z;
+}
+
+ARITH_DECL_MONO_U_I(toint)
+{
+#if ((INT_MAX >> (SIMUL_LSW_WIDTH - 1)) >> 1) == 0
+ if (x.msw != 0 || x.lsw > (SIMUL_ARITH_SUBTYPE)INT_MAX)
+ return INT_MAX;
+ return (int)x.lsw;
+#else
+#if (INT_MAX >> (SIMUL_SUBTYPE_BITS - 1)) == 0
+ if (x.msw > (SIMUL_ARITH_SUBTYPE)(INT_MAX >> SIMUL_LSW_WIDTH))
+ return INT_MAX;
+#endif
+ return ((int)x.msw << SIMUL_LSW_WIDTH) | (int)x.lsw;
+#endif
+}
+
+ARITH_DECL_MONO_U_L(toulong)
+{
+#if ((ULONG_MAX >> (SIMUL_LSW_WIDTH - 1)) >> 1) == 0
+ if (x.msw != 0 || x.lsw > (SIMUL_ARITH_SUBTYPE)ULONG_MAX)
+ return ULONG_MAX;
+ return (unsigned long)x.lsw;
+#else
+#if (ULONG_MAX >> (SIMUL_SUBTYPE_BITS - 1)) == 0
+ if (x.msw > (SIMUL_ARITH_SUBTYPE)(ULONG_MAX >> SIMUL_LSW_WIDTH))
+ return ULONG_MAX;
+#endif
+ return ((unsigned long)x.msw << SIMUL_LSW_WIDTH) | (unsigned long)x.lsw;
+#endif
+}
+
+ARITH_DECL_MONO_U_U(neg)
+{
+ x = arith_op_u(not)(x);
+ return arith_op_u(plus)(x, SIMUL_ONE);
+}
+
+ARITH_DECL_MONO_U_U(not)
+{
+ x.msw = TMSW(~x.msw);
+ x.lsw = TLSW(~x.lsw);
+ return x;
+}
+
+ARITH_DECL_MONO_U_I(lnot)
+{
+ return x.msw == 0 && x.lsw == 0;
+}
+
+ARITH_DECL_MONO_U_I(lval)
+{
+ return x.msw != 0 || x.lsw != 0;
+}
+
+ARITH_DECL_BI_UU_U(plus)
+{
+ x.lsw = TLSW(x.lsw + y.lsw);
+ x.msw = TMSW(x.msw + y.msw);
+ if (x.lsw < y.lsw) x.msw = TMSW(x.msw + 1);
+ return x;
+}
+
+ARITH_DECL_BI_UU_U(minus)
+{
+ return arith_op_u(plus)(x, arith_op_u(neg)(y));
+}
+
+ARITH_DECL_BI_UI_U(lsh)
+{
+ if (y == 0) return x;
+#ifdef ARITHMETIC_CHECKS
+ if (y < 0) ARITH_WARNING(ARITH_EXCEP_LSH_C);
+ else if (y >= SIMUL_NUMBITS) ARITH_WARNING(ARITH_EXCEP_LSH_W);
+#endif
+ y = (unsigned)y % SIMUL_NUMBITS;
+ if (y >= SIMUL_LSW_WIDTH) {
+ /*
+ * We use here the fact that the LSW size is always
+ * equal to or greater than the MSW size.
+ */
+ x.msw = TMSW(x.lsw << (y - SIMUL_LSW_WIDTH));
+ x.lsw = 0;
+ return x;
+ }
+ x.msw = TMSW((x.msw << y) | (x.lsw >> (SIMUL_LSW_WIDTH - y)));
+ x.lsw = TLSW(x.lsw << y);
+ return x;
+}
+
+ARITH_DECL_BI_UI_U(rsh)
+{
+#ifdef ARITHMETIC_CHECKS
+ if (y < 0) ARITH_WARNING(ARITH_EXCEP_RSH_C);
+ else if (y >= SIMUL_NUMBITS) ARITH_WARNING(ARITH_EXCEP_RSH_W);
+#endif
+ y = (unsigned)y % SIMUL_NUMBITS;
+ if (y >= SIMUL_LSW_WIDTH) {
+ x.lsw = x.msw >> (y - SIMUL_LSW_WIDTH);
+ x.msw = 0;
+ return x;
+ }
+ x.lsw = TLSW((x.lsw >> y) | (x.msw << (SIMUL_LSW_WIDTH - y)));
+ x.msw >>= y;
+ return x;
+}
+
+ARITH_DECL_BI_UU_I(lt)
+{
+ return x.msw < y.msw || (x.msw == y.msw && x.lsw < y.lsw);
+}
+
+ARITH_DECL_BI_UU_I(leq)
+{
+ return x.msw < y.msw || (x.msw == y.msw && x.lsw <= y.lsw);
+}
+
+ARITH_DECL_BI_UU_I(gt)
+{
+ return arith_op_u(lt)(y, x);
+}
+
+ARITH_DECL_BI_UU_I(geq)
+{
+ return arith_op_u(leq)(y, x);
+}
+
+ARITH_DECL_BI_UU_I(same)
+{
+ return x.msw == y.msw && x.lsw == y.lsw;
+}
+
+ARITH_DECL_BI_UU_I(neq)
+{
+ return !arith_op_u(same)(x, y);
+}
+
+ARITH_DECL_BI_UU_U(and)
+{
+ x.msw &= y.msw;
+ x.lsw &= y.lsw;
+ return x;
+}
+
+ARITH_DECL_BI_UU_U(xor)
+{
+ x.msw ^= y.msw;
+ x.lsw ^= y.lsw;
+ return x;
+}
+
+ARITH_DECL_BI_UU_U(or)
+{
+ x.msw |= y.msw;
+ x.lsw |= y.lsw;
+ return x;
+}
+
+#undef SIMUL_LSW_ODDLEN
+#undef SIMUL_LSW_HALFLEN
+#undef SIMUL_LSW_HALFMASK
+
+#define SIMUL_LSW_ODDLEN (SIMUL_LSW_WIDTH & 1)
+#define SIMUL_LSW_HALFLEN (SIMUL_LSW_WIDTH / 2)
+#define SIMUL_LSW_HALFMASK (~(~(SIMUL_ARITH_SUBTYPE)0 << SIMUL_LSW_HALFLEN))
+
+ARITH_DECL_BI_UU_U(star)
+{
+ arith_u z;
+ SIMUL_ARITH_SUBTYPE a = x.lsw, b = y.lsw, t00, t01, t10, t11, c = 0, t;
+#if SIMUL_LSW_ODDLEN
+ SIMUL_ARITH_SUBTYPE bms = b & (SIMUL_ONE_TMP << (SIMUL_LSW_WIDTH - 1));
+
+ b &= ~(SIMUL_ONE_TMP << (SIMUL_LSW_WIDTH - 1));
+#endif
+
+ t00 = (a & SIMUL_LSW_HALFMASK) * (b & SIMUL_LSW_HALFMASK);
+ t01 = (a & SIMUL_LSW_HALFMASK) * (b >> SIMUL_LSW_HALFLEN);
+ t10 = (a >> SIMUL_LSW_HALFLEN) * (b & SIMUL_LSW_HALFMASK);
+ t11 = (a >> SIMUL_LSW_HALFLEN) * (b >> SIMUL_LSW_HALFLEN);
+ t = z.lsw = t00;
+ z.lsw = TLSW(z.lsw + (t01 << SIMUL_LSW_HALFLEN));
+ if (t > z.lsw) c ++;
+ t = z.lsw;
+ z.lsw = TLSW(z.lsw + (t10 << SIMUL_LSW_HALFLEN));
+ if (t > z.lsw) c ++;
+#if SIMUL_LSW_ODDLEN
+ t = z.lsw;
+ z.lsw = TLSW(z.lsw + (t11 << (2 * SIMUL_LSW_HALFLEN)));
+ if (t > z.lsw) c ++;
+ if (bms && (a & SIMUL_ONE_TMP)) {
+ t = z.lsw;
+ z.lsw = TLSW(z.lsw + b);
+ if (t > z.lsw) c ++;
+ }
+#endif
+ z.msw = TMSW(x.lsw * y.msw + x.msw * y.lsw + c
+ + (t01 >> (SIMUL_LSW_WIDTH - SIMUL_LSW_HALFLEN))
+ + (t10 >> (SIMUL_LSW_WIDTH - SIMUL_LSW_HALFLEN))
+ + (t11 >> (SIMUL_LSW_WIDTH - (2 * SIMUL_LSW_HALFLEN))));
+ return z;
+}
+
+/*
+ * This function calculates the unsigned integer division, yielding
+ * both quotient and remainder. The divider (y) MUST be non-zero.
+ */
+static void arith_op_u(udiv)(arith_u x, arith_u y, arith_u *q, arith_u *r)
+{
+ int i, j;
+ arith_u a;
+
+ *q = SIMUL_ZERO;
+ for (i = SIMUL_NUMBITS - 1; i >= 0; i --) {
+ if (i >= (int)SIMUL_LSW_WIDTH
+ && (y.msw & (SIMUL_ONE_TMP << (i - SIMUL_LSW_WIDTH))))
+ break;
+ if (i < (int)SIMUL_LSW_WIDTH && (y.lsw & (SIMUL_ONE_TMP << i)))
+ break;
+ }
+ a = arith_op_u(lsh)(y, SIMUL_NUMBITS - 1 - i);
+ for (j = SIMUL_NUMBITS - 1 - i; j >= SIMUL_LSW_WIDTH; j --) {
+ if (arith_op_u(leq)(a, x)) {
+ x = arith_op_u(minus)(x, a);
+ q->msw |= SIMUL_ONE_TMP << (j - SIMUL_LSW_WIDTH);
+ }
+ a = arith_op_u(rsh)(a, 1);
+ }
+ for (; j >= 0; j --) {
+ if (arith_op_u(leq)(a, x)) {
+ x = arith_op_u(minus)(x, a);
+ q->lsw |= SIMUL_ONE_TMP << j;
+ }
+ a = arith_op_u(rsh)(a, 1);
+ }
+ *r = x;
+}
+
+ARITH_DECL_BI_UU_U(slash)
+{
+ arith_u q, r;
+
+ if (arith_op_u(same)(y, SIMUL_ZERO))
+ ARITH_ERROR(ARITH_EXCEP_SLASH_D);
+ arith_op_u(udiv)(x, y, &q, &r);
+ return q;
+}
+
+ARITH_DECL_BI_UU_U(pct)
+{
+ arith_u q, r;
+
+ if (arith_op_u(same)(y, SIMUL_ZERO))
+ ARITH_ERROR(ARITH_EXCEP_PCT_D);
+ arith_op_u(udiv)(x, y, &q, &r);
+ return r;
+}
+
+#undef SIMUL_TRAP
+#undef SIMUL_TRAPL
+#define SIMUL_TRAP (SIMUL_ONE_TMP << (SIMUL_MSW_WIDTH - 1))
+#define SIMUL_TRAPL (SIMUL_ONE_TMP << (SIMUL_LSW_WIDTH - 1))
+
+ARITH_DECL_MONO_U_S(to_s)
+{
+#ifdef ARITHMETIC_CHECKS
+ if (x.msw & SIMUL_TRAP) ARITH_WARNING(ARITH_EXCEP_CONV_O);
+#endif
+ return x;
+}
+
+ARITH_DECL_MONO_I_S(fromint) { return arith_op_u(fromint)(x); }
+ARITH_DECL_MONO_L_S(fromlong)
+{
+ if (x < 0) return arith_op_u(neg)(
+ arith_op_u(fromulong)((unsigned long)(-x)));
+ return arith_op_u(fromulong)((unsigned long)x);
+}
+
+ARITH_DECL_MONO_S_I(toint)
+{
+ if (x.msw & SIMUL_TRAP) return -arith_op_u(toint)(arith_op_u(neg)(x));
+ return arith_op_u(toint)(x);
+}
+
+ARITH_DECL_MONO_S_L(tolong)
+{
+ if (x.msw & SIMUL_TRAP)
+ return -(long)arith_op_u(toulong)(arith_op_u(neg)(x));
+ return (long)arith_op_u(toulong)(x);
+}
+
+ARITH_DECL_MONO_S_S(neg)
+{
+#ifdef ARITHMETIC_CHECKS
+ if (x.lsw == 0 && x.msw == SIMUL_TRAP)
+ ARITH_WARNING(ARITH_EXCEP_NEG_O);
+#endif
+ return arith_op_u(neg)(x);
+}
+
+ARITH_DECL_MONO_S_S(not) { return arith_op_u(not)(x); }
+ARITH_DECL_MONO_S_I(lnot) { return arith_op_u(lnot)(x); }
+ARITH_DECL_MONO_S_I(lval) { return arith_op_u(lval)(x); }
+
+ARITH_DECL_BI_SS_S(plus)
+{
+ arith_u z = arith_op_u(plus)(x, y);
+
+#ifdef ARITHMETIC_CHECKS
+ if (x.msw & y.msw & ~z.msw & SIMUL_TRAP)
+ ARITH_WARNING(ARITH_EXCEP_PLUS_U);
+ else if (~x.msw & ~y.msw & z.msw & SIMUL_TRAP)
+ ARITH_WARNING(ARITH_EXCEP_PLUS_O);
+#endif
+ return z;
+}
+
+ARITH_DECL_BI_SS_S(minus)
+{
+ arith_s z = arith_op_u(minus)(x, y);
+
+#ifdef ARITHMETIC_CHECKS
+ if (x.msw & ~y.msw & ~z.msw & SIMUL_TRAP)
+ ARITH_WARNING(ARITH_EXCEP_MINUS_U);
+ else if (~x.msw & y.msw & z.msw & SIMUL_TRAP)
+ ARITH_WARNING(ARITH_EXCEP_MINUS_O);
+#endif
+ return z;
+}
+
+/*
+ * Since signed and unsigned widths are equal for the simulated type,
+ * we can use the unsigned left shift function, which performs the
+ * the checks on the type width.
+ */
+ARITH_DECL_BI_SI_S(lsh)
+{
+ arith_s z = arith_op_u(lsh)(x, y);
+
+#ifdef ARITHMETIC_CHECKS
+ if (x.msw & SIMUL_TRAP) ARITH_WARNING(ARITH_EXCEP_LSH_U);
+ else {
+ /*
+ * To check for possible overflow, we right shift the
+ * result. We need to make the shift count proper so that
+ * we do not emit a double-warning. Besides, the left shift
+ * could have been untruncated but yet affet the sign bit,
+ * so we must test this explicitly.
+ */
+ arith_s w = arith_op_u(rsh)(z, (unsigned)y % SIMUL_NUMBITS);
+
+ if ((z.msw & SIMUL_TRAP) || w.msw != x.msw || w.lsw != x.lsw)
+ ARITH_WARNING(ARITH_EXCEP_LSH_O);
+ }
+#endif
+ return z;
+}
+
+/*
+ * We define that right shifting a negative value, besides being worth a
+ * warning, duplicates the sign bit. This is the most useful and most
+ * usually encountered behaviour, and the standard allows it.
+ */
+ARITH_DECL_BI_SI_S(rsh)
+{
+ int xn = (x.msw & SIMUL_TRAP) != 0;
+ arith_s z = arith_op_u(rsh)(x, y);
+ int gy = (unsigned)y % SIMUL_NUMBITS;
+
+#ifdef ARITHMETIC_CHECKS
+ if (xn) ARITH_WARNING(ARITH_EXCEP_RSH_N);
+#endif
+ if (xn && gy > 0) {
+ if (gy <= SIMUL_MSW_WIDTH) {
+ z.msw |= TMSW(~(SIMUL_MSW_MASK >> gy));
+ } else {
+ z.msw = SIMUL_MSW_MASK;
+ z.lsw |= TLSW(~(SIMUL_LSW_MASK
+ >> (gy - SIMUL_MSW_WIDTH)));
+ }
+ }
+ return z;
+}
+
+ARITH_DECL_BI_SS_I(lt)
+{
+ int xn = (x.msw & SIMUL_TRAP) != 0;
+ int yn = (y.msw & SIMUL_TRAP) != 0;
+
+ if (xn == yn) {
+ return x.msw < y.msw || (x.msw == y.msw && x.lsw < y.lsw);
+ } else {
+ return xn;
+ }
+}
+
+ARITH_DECL_BI_SS_I(leq)
+{
+ int xn = (x.msw & SIMUL_TRAP) != 0;
+ int yn = (y.msw & SIMUL_TRAP) != 0;
+
+ if (xn == yn) {
+ return x.msw < y.msw || (x.msw == y.msw && x.lsw <= y.lsw);
+ } else {
+ return xn;
+ }
+}
+
+ARITH_DECL_BI_SS_I(gt)
+{
+ return arith_op_s(lt)(y, x);
+}
+
+ARITH_DECL_BI_SS_I(geq)
+{
+ return arith_op_s(leq)(y, x);
+}
+
+ARITH_DECL_BI_SS_I(same)
+{
+ return x.msw == y.msw && x.lsw == y.lsw;
+}
+
+ARITH_DECL_BI_SS_I(neq)
+{
+ return !arith_op_s(same)(x, y);
+}
+
+ARITH_DECL_BI_SS_S(and)
+{
+ return arith_op_u(and)(x, y);
+}
+
+ARITH_DECL_BI_SS_S(xor)
+{
+ return arith_op_u(xor)(x, y);
+}
+
+ARITH_DECL_BI_SS_S(or)
+{
+ return arith_op_u(or)(x, y);
+}
+
+/*
+ * This function calculates the signed integer division, yielding
+ * both quotient and remainder. The divider (y) MUST be non-zero.
+ */
+static void arith_op_s(sdiv)(arith_s x, arith_s y, arith_s *q, arith_s *r)
+{
+ arith_u a = x, b = y, c, d;
+ int xn = 0, yn = 0;
+
+ if (x.msw & SIMUL_TRAP) { a = arith_op_u(neg)(x); xn = 1; }
+ if (y.msw & SIMUL_TRAP) { b = arith_op_u(neg)(y); yn = 1; }
+ arith_op_u(udiv)(a, b, &c, &d);
+ if (xn != yn) *q = arith_op_u(neg)(c); else *q = c;
+ if (xn != yn) *r = arith_op_u(neg)(d); else *r = d;
+}
+
+/*
+ * Overflow/underflow check is done the following way: obvious cases
+ * are checked (both upper words non-null, both upper words null...)
+ * and border-line occurrences are verified with an unsigned division
+ * (which is quite computationaly expensive).
+ */
+ARITH_DECL_BI_SS_S(star)
+{
+#ifdef ARITHMETIC_CHECKS
+ arith_s z = arith_op_u(star)(x, y);
+ int warn = 0;
+
+ if (x.msw > 0) {
+ if (y.msw > 0
+#if SIMUL_LSW_ODDLEN
+ || (y.lsw & SIMUL_TRAPL)
+#endif
+ ) warn = 1;
+ }
+#if SIMUL_LSW_ODDLEN
+ else if (y.msw > 0 && (x.lsw & SIMUL_TRAPL)) warn = 1;
+#endif
+ if (!warn && (x.msw > 0 || y.msw > 0
+#if SIMUL_LSW_ODDLEN
+ || ((x.lsw | y.lsw) & SIMUL_TRAPL)
+#endif
+ )) {
+ if (x.msw == SIMUL_MSW_MASK && x.lsw == SIMUL_LSW_MASK) {
+ if (y.msw == SIMUL_TRAP && y.lsw == 0) warn = 1;
+ } else if (!(x.msw == 0 && x.lsw == 0)
+ && !arith_op_s(same)(arith_op_s(slash)(z, x), y)) {
+ } warn = 1;
+ }
+ if (warn) ARITH_WARNING(((x.msw ^ y.msw) & SIMUL_TRAP)
+ ? ARITH_EXCEP_STAR_U : ARITH_EXCEP_STAR_O);
+ return z;
+#else
+ return arith_op_u(star)(x, y);
+#endif
+}
+
+ARITH_DECL_BI_SS_S(slash)
+{
+ arith_s q, r;
+
+ if (arith_op_s(same)(y, SIMUL_ZERO))
+ ARITH_ERROR(ARITH_EXCEP_SLASH_D);
+ else if (x.msw == SIMUL_TRAP && x.lsw == 0
+ && y.msw == SIMUL_MSW_MASK && y.lsw == SIMUL_LSW_MASK)
+ ARITH_ERROR(ARITH_EXCEP_SLASH_O);
+ arith_op_s(sdiv)(x, y, &q, &r);
+ return q;
+}
+
+ARITH_DECL_BI_SS_S(pct)
+{
+ arith_s q, r;
+
+ if (arith_op_s(same)(y, SIMUL_ZERO))
+ ARITH_ERROR(ARITH_EXCEP_PCT_D);
+ arith_op_s(sdiv)(x, y, &q, &r);
+ return r;
+}
+
+ARITH_DECL_MONO_ST_US(octconst)
+{
+ arith_u z = { 0, 0 };
+
+ for (; ARITH_OCTAL(*c); c ++) {
+ unsigned w = ARITH_OVAL(*c);
+ if (z.msw > (SIMUL_MSW_MASK / 8))
+ ARITH_ERROR(ARITH_EXCEP_CONST_O);
+ z = arith_op_u(lsh)(z, 3);
+ z.lsw |= w;
+ }
+ *ru = z;
+ if (z.msw & SIMUL_TRAP) {
+ *sp = 0;
+ } else {
+ *rs = z;
+ *sp = 1;
+ }
+ return c;
+}
+
+ARITH_DECL_MONO_ST_US(decconst)
+{
+#define ARITH_ALPHA_TRAP (1U << (SIMUL_MSW_WIDTH - 1))
+#define ARITH_ALPHA_MASK (ARITH_ALPHA_TRAP | (ARITH_ALPHA_TRAP - 1))
+#define ARITH_ALPHA ((ARITH_ALPHA_MASK - 10 * (ARITH_ALPHA_TRAP / 5)) + 1)
+#define ARITH_ALPHA_A ((SIMUL_MSW_MASK - 10 * (SIMUL_TRAP / 5)) + 1)
+
+ arith_u z = { 0, 0 };
+
+ for (; ARITH_DECIM(*c); c ++) {
+ unsigned w = ARITH_DVAL(*c);
+ SIMUL_ARITH_SUBTYPE t;
+
+ if (z.msw > (SIMUL_MSW_MASK / 10)
+ || (z.msw == (SIMUL_MSW_MASK / 10) &&
+/* ARITH_ALPHA is between 1 and 9, inclusive. */
+#if ARITH_ALPHA == 5
+ z.lsw >= SIMUL_TRAPL
+#else
+ z.lsw > ((SIMUL_TRAPL / 5) * ARITH_ALPHA_A
+ + ((SIMUL_TRAPL % 5) * ARITH_ALPHA_A) / 5)
+#endif
+ )) ARITH_ERROR(ARITH_EXCEP_CONST_O);
+ z = arith_op_u(plus)(arith_op_u(lsh)(z, 3),
+ arith_op_u(lsh)(z, 1));
+ t = TLSW(z.lsw + w);
+ if (t < z.lsw) z.msw ++;
+ z.lsw = t;
+ }
+ *ru = z;
+ if (z.msw & SIMUL_TRAP) {
+ *sp = 0;
+ } else {
+ *rs = z;
+ *sp = 1;
+ }
+ return c;
+
+#undef ARITH_ALPHA_A
+#undef ARITH_ALPHA
+#undef ARITH_ALPHA_TRAP
+#undef ARITH_ALPHA_MASK
+}
+
+ARITH_DECL_MONO_ST_US(hexconst)
+{
+ arith_u z = { 0, 0 };
+
+ for (; ARITH_HEXAD(*c); c ++) {
+ unsigned w = ARITH_HVAL(*c);
+ if (z.msw > (SIMUL_MSW_MASK / 16))
+ ARITH_ERROR(ARITH_EXCEP_CONST_O);
+ z = arith_op_u(lsh)(z, 4);
+ z.lsw |= w;
+ }
+ *ru = z;
+ if (z.msw & SIMUL_TRAP) {
+ *sp = 0;
+ } else {
+ *rs = z;
+ *sp = 1;
+ }
+ return c;
+}
+
+#endif
+
+#undef ARITH_HVAL
+#undef ARITH_HEXAD
+#undef ARITH_DVAL
+#undef ARITH_DECIM
+#undef ARITH_OVAL
+#undef ARITH_OCTAL
diff --git a/libexec/auxcpp/arith.h b/libexec/auxcpp/arith.h
new file mode 100644
index 00000000000..ae64e5cdabc
--- /dev/null
+++ b/libexec/auxcpp/arith.h
@@ -0,0 +1,255 @@
+/*
+ * Integer arithmetic evaluation, header file.
+ *
+ * (c) Thomas Pornin 2002
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 4. The name of the authors may not be used to endorse or promote
+ * products derived from this software without specific prior written
+ * permission.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
+ * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+ * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+/*
+ * This arithmetic evaluator uses two files: this header file (arith.h)
+ * and the source file (arith.c). To use this code, the source file should
+ * be included from another .c file which defines some macros (see below).
+ * Then the functions defined in the arith.c file become available to the
+ * including source file. If those functions are defined with external
+ * linkage (that is, `ARITH_FUNCTION_HEADER' does not contain `static'),
+ * it is possible for other source files to use the arithmetic functions
+ * by including the arith.h header only. The source file which includes
+ * arith.c should *not* include arith.h.
+ *
+ * If the #include is for arith.h, the following macros should be
+ * defined:
+ *
+ * -- If the evaluator is supposed to use a native type:
+ * NATIVE_SIGNED the native signed integer type
+ * NATIVE_UNSIGNED the native unsigned integer type
+ *
+ * -- If the evaluator is supposed to use an emulated type:
+ * SIMUL_ARITH_SUBTYPE the native unsigned type used for the simulation
+ * SIMUL_SUBTYPE_BITS the native unsigned type size
+ * SIMUL_NUMBITS the emulated type size
+ *
+ * -- For both cases:
+ * ARITH_TYPENAME the central arithmetic type name
+ * ARITH_FUNCTION_HEADER the qualifiers to add to function definitions
+ *
+ * The presence (respectively absence) of the NATIVE_SIGNED macro triggers
+ * the use of the native type evaluator (respectively simulated type
+ * evaluator).
+ *
+ * If the #include is for arith.c, the macros for arith.h should be defined,
+ * and the following should be defined as well:
+ *
+ * -- If the evaluator is supposed to use a native type:
+ * NATIVE_UNSIGNED_BITS the native unsigned type size
+ * NATIVE_SIGNED_MIN the native signed minimum value
+ * NATIVE_SIGNED_MAX the native signed maximum value
+ * (the last two macros must evaluate to signed constant expressions)
+ *
+ * -- For both cases:
+ * ARITH_WARNING(type) code to perform on warning
+ * ARITH_ERROR(type) code to perform on error
+ *
+ * The macro ARITH_WARNING() and ARITH_ERROR() are invoked with a
+ * numerical argument which is one of the enumeration constants
+ * defined below (ARITH_EXCEP_*) that identifies the specific problem.
+ *
+ * If the #include is for arith.c, the macro ARITHMETIC_CHECKS may be
+ * defined. When this macro is defined, checks are performed so that all
+ * operation which would lead to undefined or implementation-defined
+ * behaviour are first reported through ARITH_WARNING(). Code is smaller
+ * and faster without these checks, of course. Regardless of the status
+ * of that macro, divisions by 0 and overflows on signed division are
+ * reported as errors through ARITH_ERROR().
+ *
+ */
+
+#ifndef ARITH_H__
+#define ARITH_H__
+
+enum {
+ /* Warnings */
+ ARITH_EXCEP_CONV_O, /* overflow on conversion */
+ ARITH_EXCEP_NEG_O, /* overflow on unary minus */
+ ARITH_EXCEP_NOT_T, /* trap representation on bitwise inversion */
+ ARITH_EXCEP_PLUS_O, /* overflow on addition */
+ ARITH_EXCEP_PLUS_U, /* underflow on addition */
+ ARITH_EXCEP_MINUS_O, /* overflow on subtraction */
+ ARITH_EXCEP_MINUS_U, /* underflow on subtraction */
+ ARITH_EXCEP_AND_T, /* trap representation on bitwise and */
+ ARITH_EXCEP_XOR_T, /* trap representation on bitwise xor */
+ ARITH_EXCEP_OR_T, /* trap representation on bitwise or */
+ ARITH_EXCEP_LSH_W, /* left shift by type width or more */
+ ARITH_EXCEP_LSH_C, /* left shift by negative count */
+ ARITH_EXCEP_LSH_O, /* overflow on left shift */
+ ARITH_EXCEP_LSH_U, /* underflow on left shift */
+ ARITH_EXCEP_RSH_W, /* right shift by type width or more */
+ ARITH_EXCEP_RSH_C, /* right shift by negative count */
+ ARITH_EXCEP_RSH_N, /* right shift of negative value */
+ ARITH_EXCEP_STAR_O, /* overflow on multiplication */
+ ARITH_EXCEP_STAR_U, /* underflow on multiplication */
+
+ /* Errors */
+ ARITH_EXCEP_SLASH_D, /* division by 0 */
+ ARITH_EXCEP_SLASH_O, /* overflow on division */
+ ARITH_EXCEP_PCT_D, /* division by 0 on modulus operator */
+ ARITH_EXCEP_CONST_O /* constant too large */
+};
+
+#define arith_strc_(x, y) x ## y
+#define arith_strc(x, y) arith_strc_(x, y)
+
+#define arith_u arith_strc(u_, ARITH_TYPENAME)
+#define arith_s arith_strc(s_, ARITH_TYPENAME)
+#define arith_op_u(op) arith_strc(ARITH_TYPENAME, arith_strc(_u_, op))
+#define arith_op_s(op) arith_strc(ARITH_TYPENAME, arith_strc(_s_, op))
+
+#define ARITH_DECL_MONO_U_U(op) ARITH_FUNCTION_HEADER arith_u \
+ arith_op_u(op)(arith_u x)
+#define ARITH_DECL_MONO_U_S(op) ARITH_FUNCTION_HEADER arith_s \
+ arith_op_u(op)(arith_u x)
+#define ARITH_DECL_MONO_U_I(op) ARITH_FUNCTION_HEADER int \
+ arith_op_u(op)(arith_u x)
+#define ARITH_DECL_MONO_U_L(op) ARITH_FUNCTION_HEADER unsigned long \
+ arith_op_u(op)(arith_u x)
+#define ARITH_DECL_MONO_S_U(op) ARITH_FUNCTION_HEADER arith_u \
+ arith_op_s(op)(arith_s x)
+#define ARITH_DECL_MONO_S_S(op) ARITH_FUNCTION_HEADER arith_s \
+ arith_op_s(op)(arith_s x)
+#define ARITH_DECL_MONO_S_I(op) ARITH_FUNCTION_HEADER int \
+ arith_op_s(op)(arith_s x)
+#define ARITH_DECL_MONO_S_L(op) ARITH_FUNCTION_HEADER long \
+ arith_op_s(op)(arith_s x)
+#define ARITH_DECL_MONO_I_U(op) ARITH_FUNCTION_HEADER arith_u \
+ arith_op_u(op)(int x)
+#define ARITH_DECL_MONO_L_U(op) ARITH_FUNCTION_HEADER arith_u \
+ arith_op_u(op)(unsigned long x)
+#define ARITH_DECL_MONO_I_S(op) ARITH_FUNCTION_HEADER arith_s \
+ arith_op_s(op)(int x)
+#define ARITH_DECL_MONO_L_S(op) ARITH_FUNCTION_HEADER arith_s \
+ arith_op_s(op)(long x)
+#define ARITH_DECL_MONO_ST_US(op) ARITH_FUNCTION_HEADER char *arith_op_u(op) \
+ (char *c, arith_u *ru, arith_s *rs, int *sp)
+
+#define ARITH_DECL_BI_UU_U(op) ARITH_FUNCTION_HEADER arith_u \
+ arith_op_u(op)(arith_u x, arith_u y)
+#define ARITH_DECL_BI_UI_U(op) ARITH_FUNCTION_HEADER arith_u \
+ arith_op_u(op)(arith_u x, int y)
+#define ARITH_DECL_BI_UU_I(op) ARITH_FUNCTION_HEADER int \
+ arith_op_u(op)(arith_u x, arith_u y)
+#define ARITH_DECL_BI_SS_S(op) ARITH_FUNCTION_HEADER arith_s \
+ arith_op_s(op)(arith_s x, arith_s y)
+#define ARITH_DECL_BI_SI_S(op) ARITH_FUNCTION_HEADER arith_s \
+ arith_op_s(op)(arith_s x, int y)
+#define ARITH_DECL_BI_SS_I(op) ARITH_FUNCTION_HEADER int \
+ arith_op_s(op)(arith_s x, arith_s y)
+
+#endif
+
+#ifdef NATIVE_SIGNED
+
+typedef NATIVE_SIGNED arith_s;
+typedef NATIVE_UNSIGNED arith_u;
+
+#else
+
+#if SIMUL_NUMBITS > (2 * SIMUL_SUBTYPE_BITS)
+#error Native subtype too small for arithmetic simulation.
+#endif
+
+#define SIMUL_MSW_WIDTH (SIMUL_NUMBITS / 2)
+#define SIMUL_LSW_WIDTH ((SIMUL_NUMBITS + 1) / 2)
+
+typedef struct {
+ SIMUL_ARITH_SUBTYPE msw, lsw;
+} arith_u, arith_s;
+
+#endif
+
+/* functions with the unsigned type */
+
+ARITH_DECL_MONO_S_U(to_u);
+ARITH_DECL_MONO_I_U(fromint);
+ARITH_DECL_MONO_L_U(fromulong);
+ARITH_DECL_MONO_U_I(toint);
+ARITH_DECL_MONO_U_L(toulong);
+
+ARITH_DECL_MONO_U_U(neg);
+ARITH_DECL_MONO_U_U(not);
+ARITH_DECL_MONO_U_I(lnot);
+ARITH_DECL_MONO_U_I(lval);
+
+ARITH_DECL_BI_UU_U(plus);
+ARITH_DECL_BI_UU_U(minus);
+ARITH_DECL_BI_UI_U(lsh);
+ARITH_DECL_BI_UI_U(rsh);
+ARITH_DECL_BI_UU_I(lt);
+ARITH_DECL_BI_UU_I(leq);
+ARITH_DECL_BI_UU_I(gt);
+ARITH_DECL_BI_UU_I(geq);
+ARITH_DECL_BI_UU_I(same);
+ARITH_DECL_BI_UU_I(neq);
+ARITH_DECL_BI_UU_U(and);
+ARITH_DECL_BI_UU_U(xor);
+ARITH_DECL_BI_UU_U(or);
+ARITH_DECL_BI_UU_U(star);
+ARITH_DECL_BI_UU_U(slash);
+ARITH_DECL_BI_UU_U(pct);
+
+/* functions with the signed type */
+
+ARITH_DECL_MONO_U_S(to_s);
+ARITH_DECL_MONO_I_S(fromint);
+ARITH_DECL_MONO_L_S(fromlong);
+ARITH_DECL_MONO_S_I(toint);
+ARITH_DECL_MONO_S_L(tolong);
+
+ARITH_DECL_MONO_S_S(neg);
+ARITH_DECL_MONO_S_S(not);
+ARITH_DECL_MONO_S_I(lnot);
+ARITH_DECL_MONO_S_I(lval);
+
+ARITH_DECL_BI_SS_S(plus);
+ARITH_DECL_BI_SS_S(minus);
+ARITH_DECL_BI_SI_S(lsh);
+ARITH_DECL_BI_SI_S(rsh);
+ARITH_DECL_BI_SS_I(lt);
+ARITH_DECL_BI_SS_I(leq);
+ARITH_DECL_BI_SS_I(gt);
+ARITH_DECL_BI_SS_I(geq);
+ARITH_DECL_BI_SS_I(same);
+ARITH_DECL_BI_SS_I(neq);
+ARITH_DECL_BI_SS_S(and);
+ARITH_DECL_BI_SS_S(xor);
+ARITH_DECL_BI_SS_S(or);
+ARITH_DECL_BI_SS_S(star);
+ARITH_DECL_BI_SS_S(slash);
+ARITH_DECL_BI_SS_S(pct);
+
+/* conversions from string */
+ARITH_DECL_MONO_ST_US(octconst);
+ARITH_DECL_MONO_ST_US(hexconst);
+ARITH_DECL_MONO_ST_US(decconst);
diff --git a/libexec/auxcpp/assert.c b/libexec/auxcpp/assert.c
new file mode 100644
index 00000000000..579d47e0a0a
--- /dev/null
+++ b/libexec/auxcpp/assert.c
@@ -0,0 +1,420 @@
+/*
+ * (c) Thomas Pornin 1999 - 2002
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 4. The name of the authors may not be used to endorse or promote
+ * products derived from this software without specific prior written
+ * permission.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
+ * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+ * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include "tune.h"
+#include <stdio.h>
+#include <string.h>
+#include <stddef.h>
+#include <limits.h>
+#include <time.h>
+#include "ucppi.h"
+#include "mem.h"
+#include "nhash.h"
+
+/*
+ * Assertion support. Each assertion is indexed by its predicate, and
+ * the list of 'questions' which yield a true answer.
+ */
+
+static HTT assertions;
+static int assertions_init_done = 0;
+
+static struct assert *new_assertion(void)
+{
+ struct assert *a = getmem(sizeof(struct assert));
+
+ a->nbval = 0;
+ return a;
+}
+
+static void del_token_fifo(struct token_fifo *tf)
+{
+ size_t i;
+
+ for (i = 0; i < tf->nt; i ++)
+ if (S_TOKEN(tf->t[i].type)) freemem(tf->t[i].name);
+ if (tf->nt) freemem(tf->t);
+}
+
+static void del_assertion(void *va)
+{
+ struct assert *a = va;
+ size_t i;
+
+ for (i = 0; i < a->nbval; i ++) del_token_fifo(a->val + i);
+ if (a->nbval) freemem(a->val);
+ freemem(a);
+}
+
+/*
+ * print the contents of a token list
+ */
+static void print_token_fifo(struct token_fifo *tf)
+{
+ size_t i;
+
+ for (i = 0; i < tf->nt; i ++)
+ if (ttMWS(tf->t[i].type)) fputc(' ', emit_output);
+ else fputs(token_name(tf->t + i), emit_output);
+}
+
+/*
+ * print all assertions related to a given name
+ */
+static void print_assert(void *va)
+{
+ struct assert *a = va;
+ size_t i;
+
+ for (i = 0; i < a->nbval; i ++) {
+ fprintf(emit_output, "#assert %s(", HASH_ITEM_NAME(a));
+ print_token_fifo(a->val + i);
+ fprintf(emit_output, ")\n");
+ }
+}
+
+/*
+ * compare two token_fifo, return 0 if they are identical, 1 otherwise.
+ * All whitespace tokens are considered identical, but sequences of
+ * whitespace are not shrinked.
+ */
+int cmp_token_list(struct token_fifo *f1, struct token_fifo *f2)
+{
+ size_t i;
+
+ if (f1->nt != f2->nt) return 1;
+ for (i = 0; i < f1->nt; i ++) {
+ if (ttMWS(f1->t[i].type) && ttMWS(f2->t[i].type)) continue;
+ if (f1->t[i].type != f2->t[i].type) return 1;
+ if (f1->t[i].type == MACROARG
+ && f1->t[i].line != f2->t[i].line) return 1;
+ if (S_TOKEN(f1->t[i].type)
+ && strcmp(f1->t[i].name, f2->t[i].name)) return 1;
+ }
+ return 0;
+}
+
+/*
+ * for #assert
+ * Assertions are not part of the ISO-C89 standard, but they are sometimes
+ * encountered, for instance in Solaris standard include files.
+ */
+int handle_assert(struct lexer_state *ls)
+{
+ int ina = 0, ltww;
+ struct token t;
+ struct token_fifo *atl = 0;
+ struct assert *a;
+ char *aname;
+ int ret = -1;
+ long l = ls->line;
+ int nnp;
+ size_t i;
+
+ while (!next_token(ls)) {
+ if (ls->ctok->type == NEWLINE) break;
+ if (ttMWS(ls->ctok->type)) continue;
+ if (ls->ctok->type == NAME) {
+ if (!(a = HTT_get(&assertions, ls->ctok->name))) {
+ a = new_assertion();
+ aname = sdup(ls->ctok->name);
+ ina = 1;
+ }
+ goto handle_assert_next;
+ }
+ error(l, "illegal assertion name for #assert");
+ goto handle_assert_warp_ign;
+ }
+ goto handle_assert_trunc;
+
+handle_assert_next:
+ while (!next_token(ls)) {
+ if (ls->ctok->type == NEWLINE) break;
+ if (ttMWS(ls->ctok->type)) continue;
+ if (ls->ctok->type != LPAR) {
+ error(l, "syntax error in #assert");
+ goto handle_assert_warp_ign;
+ }
+ goto handle_assert_next2;
+ }
+ goto handle_assert_trunc;
+
+handle_assert_next2:
+ atl = getmem(sizeof(struct token_fifo));
+ atl->art = atl->nt = 0;
+ for (nnp = 1, ltww = 1; nnp && !next_token(ls);) {
+ if (ls->ctok->type == NEWLINE) break;
+ if (ltww && ttMWS(ls->ctok->type)) continue;
+ ltww = ttMWS(ls->ctok->type);
+ if (ls->ctok->type == LPAR) nnp ++;
+ else if (ls->ctok->type == RPAR) {
+ if (!(-- nnp)) goto handle_assert_next3;
+ }
+ t.type = ls->ctok->type;
+ if (S_TOKEN(t.type)) t.name = sdup(ls->ctok->name);
+ aol(atl->t, atl->nt, t, TOKEN_LIST_MEMG);
+ }
+ goto handle_assert_trunc;
+
+handle_assert_next3:
+ while (!next_token(ls) && ls->ctok->type != NEWLINE) {
+ if (!ttWHI(ls->ctok->type) && (ls->flags & WARN_STANDARD)) {
+ warning(l, "trailing garbage in #assert");
+ }
+ }
+ if (atl->nt && ttMWS(atl->t[atl->nt - 1].type) && (-- atl->nt) == 0)
+ freemem(atl->t);
+ if (atl->nt == 0) {
+ error(l, "void assertion in #assert");
+ goto handle_assert_error;
+ }
+ for (i = 0; i < a->nbval && cmp_token_list(atl, a->val + i); i ++);
+ if (i != a->nbval) {
+ /* we already have it */
+ ret = 0;
+ goto handle_assert_error;
+ }
+
+ /* This is a new assertion. Let's keep it. */
+ aol(a->val, a->nbval, *atl, TOKEN_LIST_MEMG);
+ if (ina) {
+ HTT_put(&assertions, a, aname);
+ freemem(aname);
+ }
+ if (emit_assertions) {
+ fprintf(emit_output, "#assert %s(", HASH_ITEM_NAME(a));
+ print_token_fifo(atl);
+ fputs(")\n", emit_output);
+ }
+ freemem(atl);
+ return 0;
+
+handle_assert_trunc:
+ error(l, "unfinished #assert");
+handle_assert_error:
+ if (atl) {
+ del_token_fifo(atl);
+ freemem(atl);
+ }
+ if (ina) {
+ freemem(aname);
+ freemem(a);
+ }
+ return ret;
+handle_assert_warp_ign:
+ while (!next_token(ls) && ls->ctok->type != NEWLINE);
+ if (ina) {
+ freemem(aname);
+ freemem(a);
+ }
+ return ret;
+}
+
+/*
+ * for #unassert
+ */
+int handle_unassert(struct lexer_state *ls)
+{
+ int ltww;
+ struct token t;
+ struct token_fifo atl;
+ struct assert *a;
+ int ret = -1;
+ long l = ls->line;
+ int nnp;
+ size_t i;
+
+ atl.art = atl.nt = 0;
+ while (!next_token(ls)) {
+ if (ls->ctok->type == NEWLINE) break;
+ if (ttMWS(ls->ctok->type)) continue;
+ if (ls->ctok->type == NAME) {
+ if (!(a = HTT_get(&assertions, ls->ctok->name))) {
+ ret = 0;
+ goto handle_unassert_warp;
+ }
+ goto handle_unassert_next;
+ }
+ error(l, "illegal assertion name for #unassert");
+ goto handle_unassert_warp;
+ }
+ goto handle_unassert_trunc;
+
+handle_unassert_next:
+ while (!next_token(ls)) {
+ if (ls->ctok->type == NEWLINE) break;
+ if (ttMWS(ls->ctok->type)) continue;
+ if (ls->ctok->type != LPAR) {
+ error(l, "syntax error in #unassert");
+ goto handle_unassert_warp;
+ }
+ goto handle_unassert_next2;
+ }
+ if (emit_assertions)
+ fprintf(emit_output, "#unassert %s\n", HASH_ITEM_NAME(a));
+ HTT_del(&assertions, HASH_ITEM_NAME(a));
+ return 0;
+
+handle_unassert_next2:
+ for (nnp = 1, ltww = 1; nnp && !next_token(ls);) {
+ if (ls->ctok->type == NEWLINE) break;
+ if (ltww && ttMWS(ls->ctok->type)) continue;
+ ltww = ttMWS(ls->ctok->type);
+ if (ls->ctok->type == LPAR) nnp ++;
+ else if (ls->ctok->type == RPAR) {
+ if (!(-- nnp)) goto handle_unassert_next3;
+ }
+ t.type = ls->ctok->type;
+ if (S_TOKEN(t.type)) t.name = sdup(ls->ctok->name);
+ aol(atl.t, atl.nt, t, TOKEN_LIST_MEMG);
+ }
+ goto handle_unassert_trunc;
+
+handle_unassert_next3:
+ while (!next_token(ls) && ls->ctok->type != NEWLINE) {
+ if (!ttWHI(ls->ctok->type) && (ls->flags & WARN_STANDARD)) {
+ warning(l, "trailing garbage in #unassert");
+ }
+ }
+ if (atl.nt && ttMWS(atl.t[atl.nt - 1].type) && (-- atl.nt) == 0)
+ freemem(atl.t);
+ if (atl.nt == 0) {
+ error(l, "void assertion in #unassert");
+ return ret;
+ }
+ for (i = 0; i < a->nbval && cmp_token_list(&atl, a->val + i); i ++);
+ if (i != a->nbval) {
+ /* we have it, undefine it */
+ del_token_fifo(a->val + i);
+ if (i < (a->nbval - 1))
+ mmvwo(a->val + i, a->val + i + 1, (a->nbval - i - 1)
+ * sizeof(struct token_fifo));
+ if ((-- a->nbval) == 0) freemem(a->val);
+ if (emit_assertions) {
+ fprintf(emit_output, "#unassert %s(",
+ HASH_ITEM_NAME(a));
+ print_token_fifo(&atl);
+ fputs(")\n", emit_output);
+ }
+ }
+ ret = 0;
+ goto handle_unassert_finish;
+
+handle_unassert_trunc:
+ error(l, "unfinished #unassert");
+handle_unassert_finish:
+ if (atl.nt) del_token_fifo(&atl);
+ return ret;
+handle_unassert_warp:
+ while (!next_token(ls) && ls->ctok->type != NEWLINE);
+ return ret;
+}
+
+/*
+ * Add the given assertion (as string).
+ */
+int make_assertion(char *aval)
+{
+ struct lexer_state lls;
+ size_t n = strlen(aval) + 1;
+ char *c = sdup(aval);
+ int ret;
+
+ *(c + n - 1) = '\n';
+ init_buf_lexer_state(&lls, 0);
+ lls.flags = DEFAULT_LEXER_FLAGS;
+ lls.input = 0;
+ lls.input_string = (unsigned char *)c;
+ lls.pbuf = 0;
+ lls.ebuf = n;
+ lls.line = -1;
+ ret = handle_assert(&lls);
+ freemem(c);
+ free_lexer_state(&lls);
+ return ret;
+}
+
+/*
+ * Remove the given assertion (as string).
+ */
+int destroy_assertion(char *aval)
+{
+ struct lexer_state lls;
+ size_t n = strlen(aval) + 1;
+ char *c = sdup(aval);
+ int ret;
+
+ *(c + n - 1) = '\n';
+ init_buf_lexer_state(&lls, 0);
+ lls.flags = DEFAULT_LEXER_FLAGS;
+ lls.input = 0;
+ lls.input_string = (unsigned char *)c;
+ lls.pbuf = 0;
+ lls.ebuf = n;
+ lls.line = -1;
+ ret = handle_unassert(&lls);
+ freemem(c);
+ free_lexer_state(&lls);
+ return ret;
+}
+
+/*
+ * erase the assertion table
+ */
+void wipe_assertions(void)
+{
+ if (assertions_init_done) HTT_kill(&assertions);
+ assertions_init_done = 0;
+}
+
+/*
+ * initialize the assertion table
+ */
+void init_assertions(void)
+{
+ wipe_assertions();
+ HTT_init(&assertions, del_assertion);
+ assertions_init_done = 1;
+}
+
+/*
+ * retrieve an assertion from the hash table
+ */
+struct assert *get_assertion(char *name)
+{
+ return HTT_get(&assertions, name);
+}
+
+/*
+ * print already defined assertions
+ */
+void print_assertions(void)
+{
+ HTT_scan(&assertions, print_assert);
+}
diff --git a/libexec/auxcpp/atest.c b/libexec/auxcpp/atest.c
new file mode 100644
index 00000000000..7137d930f00
--- /dev/null
+++ b/libexec/auxcpp/atest.c
@@ -0,0 +1,236 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <limits.h>
+#include <setjmp.h>
+
+#if defined TEST_NATIVE
+
+#define NATIVE_SIGNED int
+#define NATIVE_UNSIGNED unsigned
+
+#define NATIVE_UNSIGNED_BITS 32
+#define NATIVE_SIGNED_MIN LONG_MIN
+#define NATIVE_SIGNED_MAX LONG_MAX
+
+#elif defined TEST_SIMUL
+
+#define SIMUL_ARITH_SUBTYPE unsigned short
+#define SIMUL_SUBTYPE_BITS 16
+#define SIMUL_NUMBITS 31
+
+#else
+
+#error ====== Either TEST_NATIVE or TEST_SIMUL must be defined.
+
+#endif
+
+#define ARITH_TYPENAME zoinx
+#define ARITH_FUNCTION_HEADER static inline
+
+#define ARITH_WARNING(type) z_warn(type)
+#define ARITH_ERROR(type) z_error(type)
+
+void z_warn(int type);
+void z_error(int type);
+
+#include "arith.c"
+
+#if defined TEST_NATIVE
+
+static inline u_zoinx unsigned_to_uz(unsigned x)
+{
+ return (u_zoinx)x;
+}
+
+static inline s_zoinx int_to_sz(int x)
+{
+ return (s_zoinx)x;
+}
+
+static inline void print_uz(u_zoinx x)
+{
+ printf("%u", x);
+}
+
+static inline void print_sz(s_zoinx x)
+{
+ printf("%d", x);
+}
+
+#else
+
+static inline u_zoinx unsigned_to_uz(unsigned x)
+{
+ u_zoinx v;
+ v.msw = (x >> 16) & 0x7FFFU;
+ v.lsw = x & 0xFFFFU;
+ return v;
+}
+
+static inline s_zoinx int_to_sz(int x)
+{
+ return unsigned_to_uz((unsigned)x);
+}
+
+static inline void print_uz(u_zoinx x)
+{
+ printf("%u", ((unsigned)(x.msw) << 16) + (unsigned)(x.lsw));
+}
+
+static inline void print_sz(s_zoinx x)
+{
+ if (x.msw & 0x4000U) {
+ putchar('-');
+ x = zoinx_u_neg(x);
+ }
+ print_uz(x);
+}
+
+#endif
+
+static inline void print_int(int x)
+{
+ printf("%d", x);
+}
+
+static jmp_buf jbuf;
+
+void z_warn(int type)
+{
+ switch (type) {
+ case ARITH_EXCEP_CONV_O:
+ fputs("[overflow on conversion] ", stdout); break;
+ case ARITH_EXCEP_NEG_O:
+ fputs("[overflow on unary minus] ", stdout); break;
+ case ARITH_EXCEP_NOT_T:
+ fputs("[trap representation on bitwise inversion] ", stdout);
+ break;
+ case ARITH_EXCEP_PLUS_O:
+ fputs("[overflow on addition] ", stdout); break;
+ case ARITH_EXCEP_PLUS_U:
+ fputs("[underflow on addition] ", stdout); break;
+ case ARITH_EXCEP_MINUS_O:
+ fputs("[overflow on subtraction] ", stdout); break;
+ case ARITH_EXCEP_MINUS_U:
+ fputs("[underflow on subtraction] ", stdout); break;
+ case ARITH_EXCEP_AND_T:
+ fputs("[trap representation on bitwise and] ", stdout); break;
+ case ARITH_EXCEP_XOR_T:
+ fputs("[trap representation on bitwise xor] ", stdout); break;
+ case ARITH_EXCEP_OR_T:
+ fputs("[trap representation on bitwise or] ", stdout); break;
+ case ARITH_EXCEP_LSH_W:
+ fputs("[left shift by type width or more] ", stdout); break;
+ case ARITH_EXCEP_LSH_C:
+ fputs("[left shift by negative count] ", stdout); break;
+ case ARITH_EXCEP_LSH_O:
+ fputs("[overflow on left shift] ", stdout); break;
+ case ARITH_EXCEP_LSH_U:
+ fputs("[underflow on left shift] ", stdout); break;
+ case ARITH_EXCEP_RSH_W:
+ fputs("[right shift by type width or more] ", stdout); break;
+ case ARITH_EXCEP_RSH_C:
+ fputs("[right shift by negative count] ", stdout); break;
+ case ARITH_EXCEP_RSH_N:
+ fputs("[right shift of negative value] ", stdout); break;
+ case ARITH_EXCEP_STAR_O:
+ fputs("[overflow on multiplication] ", stdout); break;
+ case ARITH_EXCEP_STAR_U:
+ fputs("[underflow on multiplication] ", stdout); break;
+ default:
+ fprintf(stdout, "UNKNOWN WARNING TYPE: %d\n", type);
+ exit(EXIT_FAILURE);
+ }
+}
+
+void z_error(int type)
+{
+ switch (type) {
+ case ARITH_EXCEP_SLASH_D:
+ fputs("division by 0\n", stdout);
+ break;
+ case ARITH_EXCEP_SLASH_O:
+ fputs("overflow on division\n", stdout);
+ break;
+ case ARITH_EXCEP_PCT_D:
+ fputs("division by 0 on modulus operator\n", stdout);
+ break;
+ default:
+ fprintf(stdout, "UNKNOWN ERROR TYPE: %d\n", type);
+ exit(EXIT_FAILURE);
+ }
+ longjmp(jbuf, 1);
+}
+
+int main(void)
+{
+
+#define OPTRY_GEN(op, x, y, convx, convy, printz) do { \
+ printf("%s %s %s -> ", #x, #op, #y); \
+ if (!setjmp(jbuf)) { \
+ printz(zoinx_ ## op (convx(x), convy(y))); \
+ putchar('\n'); \
+ } \
+ } while (0)
+
+#define IDENT(x) x
+
+#define OPTRY_UU_U(op, x, y) \
+ OPTRY_GEN(op, x, y, unsigned_to_uz, unsigned_to_uz, print_uz)
+
+#define OPTRY_UI_U(op, x, y) \
+ OPTRY_GEN(op, x, y, unsigned_to_uz, IDENT, print_uz)
+
+#define OPTRY_UU_I(op, x, y) \
+ OPTRY_GEN(op, x, y, unsigned_to_uz, unsigned_to_uz, print_int)
+
+#define OPTRY_SS_S(op, x, y) \
+ OPTRY_GEN(op, x, y, int_to_sz, int_to_sz, print_sz)
+
+#define OPTRY_SI_S(op, x, y) \
+ OPTRY_GEN(op, x, y, int_to_sz, IDENT, print_sz)
+
+#define OPTRY_SS_I(op, x, y) \
+ OPTRY_GEN(op, x, y, int_to_sz, int_to_sz, print_int)
+
+ OPTRY_UU_U(u_plus, 3, 4);
+ OPTRY_UU_U(u_plus, 1549587182, 1790478233);
+ OPTRY_UU_U(u_minus, 1549587182, 1790478233);
+ OPTRY_UU_U(u_minus, 1790478233, 1549587182);
+ OPTRY_UU_U(u_star, 432429875, 347785487);
+ OPTRY_UU_U(u_slash, 432429875, 34487);
+ OPTRY_UU_U(u_pct, 432429875, 34487);
+ OPTRY_UI_U(u_lsh, 1783, 19);
+ OPTRY_UI_U(u_lsh, 1783, 20);
+ OPTRY_UI_U(u_lsh, 1783, 21);
+ OPTRY_UI_U(u_rsh, 475902857, 7);
+ OPTRY_UI_U(u_rsh, 475902857, 17);
+ OPTRY_UI_U(u_rsh, 475902857, 38);
+
+ OPTRY_SS_S(s_plus, 3, 4);
+ OPTRY_SS_S(s_plus, 1549587182, 1790478233);
+ OPTRY_SS_S(s_plus, -1549587182, -1790478233);
+ OPTRY_SS_S(s_minus, 1549587182, 1790478233);
+ OPTRY_SS_S(s_minus, 1790478233, 1549587182);
+ OPTRY_SS_S(s_minus, -1790478233, -1549587182);
+ OPTRY_SS_S(s_minus, -1790478233, 1549587182);
+ OPTRY_SS_S(s_star, 432429875, 347785487);
+ OPTRY_SS_S(s_star, 432429875, -347785487);
+ OPTRY_SS_S(s_slash, 432429875, 34487);
+ OPTRY_SS_S(s_slash, -432429875, 34487);
+ OPTRY_SS_S(s_slash, 432429875, -34487);
+ OPTRY_SS_S(s_slash, -432429875, -34487);
+ OPTRY_SS_S(s_slash, 432429875, 0);
+ OPTRY_SS_S(s_slash, -2147483647 - 1, -1);
+ OPTRY_SS_S(s_pct, 432429875, 34487);
+ OPTRY_SS_S(s_pct, 432429875, 0);
+ OPTRY_SI_S(s_lsh, -1, 10);
+ OPTRY_SI_S(s_lsh, 1783, 19);
+ OPTRY_SI_S(s_lsh, 1783, 20);
+ OPTRY_SI_S(s_lsh, 1783, 21);
+ OPTRY_SI_S(s_rsh, -1024, 8);
+ OPTRY_SI_S(s_rsh, 475902857, 7);
+ OPTRY_SI_S(s_rsh, 475902857, 17);
+
+ return 0;
+}
diff --git a/libexec/auxcpp/config.h b/libexec/auxcpp/config.h
new file mode 100644
index 00000000000..a1bbe993bc5
--- /dev/null
+++ b/libexec/auxcpp/config.h
@@ -0,0 +1,352 @@
+/*
+ * (c) Thomas Pornin 1999 - 2002
+ * (c) Louis P. Santillan 2011
+ * This file is derived from tune.h
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 4. The name of the authors may not be used to endorse or promote
+ * products derived from this software without specific prior written
+ * permission.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
+ * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+ * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+/* ====================================================================== */
+/*
+ * The LOW_MEM macro triggers the use of macro storage which uses less
+ * memory. It actually also improves performance on large, modern machines
+ * (due to less cache pressure). This option implies no limitation (except
+ * on the number of arguments a macro may, which is then limited to 32766)
+ * so it is on by default. Non-LOW_MEM code is considered deprecated.
+ */
+#define LOW_MEM
+
+/* ====================================================================== */
+/*
+ * Define AMIGA for systems using "drive letters" at the beginning of
+ * some paths; define MSDOS on systems with drive letters and using
+ * backslashes to seperate directory components.
+ */
+/* #define AMIGA */
+/* #define MSDOS */
+
+/* ====================================================================== */
+/*
+ * Define this if your compiler does not know the strftime() function;
+ * TurboC 2.01 under Msdos does not know strftime().
+ */
+/* #define NOSTRFTIME */
+
+/* ====================================================================== */
+/*
+ * Buffering: there are two levels of buffering on input and output streams:
+ * the standard libc buffering (manageable with setbuf() and setvbuf())
+ * and some buffering provided by ucpp itself. The ucpp buffering uses
+ * two buffers, of size respectively INPUT_BUF_MEMG and OUTPUT_BUF_MEMG
+ * (as defined below).
+ * You can disable one or both of these bufferings by defining the macros
+ * NO_LIBC_BUF and NO_UCPP_BUF.
+ */
+/* #define NO_LIBC_BUF */
+/* #define NO_UCPP_BUF */
+
+/*
+ * On Unix stations, the system call mmap() might be used on input files.
+ * This option is a subclause of ucpp internal buffering. On one station,
+ * a 10% speed improvement was observed. Do not define this unless the
+ * host architecture has the following characteristics:
+ * -- Posix / Single Unix compliance
+ * -- Text files correspond one to one with memory representation
+ * If a file is not seekable or not mmapable, ucpp will revert to the
+ * standard fread() solution.
+ *
+ * This feature is still considered beta quality. On some systems where
+ * files can be bigger than memory address space (mainly, 32-bit systems
+ * with files bigger than 4 GB), this option makes ucpp fail to operate
+ * on those extremely large files.
+ */
+#define UCPP_MMAP
+
+/*
+ * Performance issues:
+ * -- On memory-starved systems, such as Minix-i86, do not use ucpp
+ * buffering; keep only libc buffering.
+ * -- If you do not use libc buffering, activate the UCPP_MMAP option.
+ * Note that the UCPP_MMAP option is ignored if ucpp buffering is not
+ * activated.
+ *
+ * On an Athlon 1200 running FreeBSD 4.7, the best performances are
+ * achieved when libc buffering is activated and/or UCPP_MMAP is on.
+ */
+
+/* ====================================================================== */
+/*
+ * Define this if you want ucpp to generate tokenized PRAGMA tokens;
+ * otherwise, it will generate raw string contents. This setting is
+ * irrelevant to the stand-alone version of ucpp.
+ */
+#define PRAGMA_TOKENIZE
+
+/*
+ * Define this to the special character that marks the end of tokens with
+ * a string value inside a tokenized PRAGMA token. The #pragma and _Pragma()
+ * directives which use this character will be a bit more difficult to
+ * decode (but ucpp will not mind). 0 cannot be used. '\n' is fine because
+ * it cannot appear inside a #pragma or _Pragma(), since newlines cannot be
+ * embedded inside tokens, neither directly nor by macro substitution and
+ * stringization. Besides, '\n' is portable.
+ */
+#define PRAGMA_TOKEN_END ((unsigned char)'\n')
+
+/*
+ * Define this if you want ucpp to include encountered #pragma directives
+ * in its output in non-lexer mode; _Pragma() are translated to equivalent
+ * #pragma directives.
+ */
+#define PRAGMA_DUMP
+
+/*
+ * According to my interpretation of the C99 standard, _Pragma() are
+ * evaluated wherever macro expansion could take place. However, Neil Booth,
+ * whose mother language is English (contrary to me) and who is well aware
+ * of the C99 standard (and especially the C preprocessor) told me that
+ * it was unclear whether _Pragma() are evaluated inside directives such
+ * as #if, #include and #line. If you want to disable the evaluation of
+ * _Pragma() inside such directives, define the following macro.
+ */
+/* #define NO_PRAGMA_IN_DIRECTIVE */
+
+/*
+ * The C99 standard mandates that the operator `##' must yield a single,
+ * valid token, lest undefined behaviour befall upon thy head. Hence,
+ * for instance, `+ ## +=' is forbidden, because `++=' is not a valid
+ * token (although it is a valid list of two tokens, `++' and `=').
+ * However, ucpp only emits a warning for such sin, and unmerges the
+ * tokens (thus emitting `+' then `+=' for that example). When ucpp
+ * produces text output, those two tokens will be separated by a space
+ * character so that the basic rule of text output is preserved: when
+ * parsed again, text output yields the exact same stream of tokens.
+ * That extra space is virtual: it does not count as a true whitespace
+ * token for stringization.
+ *
+ * However, it might be desirable, for some uses other than preprocessing
+ * C source code, not to emit that extra space at all. To make ucpp behave
+ * that way, define the DSHARP_TOKEN_MERGE macro. Please note that this
+ * can trigger spurious token merging. For instance, with that macro
+ * activated, `+ ## +=' will be output as `++=' which, if preprocessed
+ * again, will read as `++' followed by `='.
+ *
+ * All this is irrelevant to lexer mode; and trying to merge incompatible
+ * tokens is a shooting offence, anyway.
+ */
+/* #define DSHARP_TOKEN_MERGE */
+
+/* ====================================================================== */
+/*
+ * Define INMACRO_FLAG to include two flags to the structure lexer_state,
+ * that tell whether tokens come from a macro-replacement, and count those
+ * macro-replacements.
+ */
+/* #define INMACRO_FLAG */
+
+/* ====================================================================== */
+/*
+ * Paths where files are looked for by default, when #include is used.
+ * Typical path is /usr/local/include and /usr/include, in that order.
+ * If you want to set up no path, define the macro to 0.
+ *
+ * For Linux, get gcc includes too, or you will miss things like stddef.h.
+ * The exact path varies much, depending on the distribution.
+ */
+#define STD_INCLUDE_PATH "/usr/local/include", "/usr/include"
+
+/* ====================================================================== */
+/*
+ * Arithmetic code for evaluation of #if expressions. Evaluation
+ * uses either a native machine type, or an emulated two's complement
+ * type. Division by 0 and overflow on division are considered as errors
+ * and reported as such. If ARITHMETIC_CHECKS is defined, all other
+ * operations that imply undefined or implementation-defined behaviour
+ * are reported as warnings but otherwise performed nonetheless.
+ *
+ * For native type evaluation, the following macros should be defined:
+ * NATIVE_SIGNED the native signed type
+ * NATIVE_UNSIGNED the native corresponding unsigned type
+ * NATIVE_UNSIGNED_BITS the native unsigned type width, in bits
+ * NATIVE_SIGNED_MIN the native signed type minimum value
+ * NATIVE_SIGNED_MAX the native signed type maximum value
+ *
+ * The code in the arith.c file performs some tricky detection
+ * operations on the native type representation and possible existence
+ * of a trap representation. These operations assume a C99-compliant
+ * compiler; on a C90-only compiler, the operations are valid but may
+ * yield incorrect results. You may force those settings with some
+ * more macros: see the comments in arith.c (look for "ARCH_DEFINED").
+ * Remember that this is mostly a non-issue, unless you are building
+ * ucpp with a pre-C99 cross-compiler and either the host or target
+ * architecture uses a non-two's complement representation of signed
+ * integers. Such a combination is pretty rare nowadays, so the best
+ * you can do is forgetting completely this paragraph and live in peace.
+ *
+ *
+ * If you do not have a handy native type (for instance, you compile ucpp
+ * with a C90 compiler which lacks the "long long" type, or you compile
+ * ucpp for a cross-compiler which should support an evaluation integer
+ * type of a size that is not available on the host machine), you may use
+ * a simulated type. The type uses two's complement representation and
+ * may have any width from 2 bits to twice the underlying native type
+ * width, inclusive (odd widths are allowed). To use an emulated type,
+ * make sure that NATIVE_SIGNED is not defined, and define the following
+ * macros:
+ * SIMUL_ARITH_SUBTYPE the native underlying type to use
+ * SIMUL_SUBTYPE_BITS the native underlying type width
+ * SIMUL_NUMBITS the emulated type width
+ *
+ * Undefined and implementation-defined behaviours are warned upon, if
+ * ARITHMETIC_CHECKS is defined. Results are truncated to the type
+ * width; shift count for the << and >> operators is reduced modulo the
+ * emulatd type width; right shifting of a signed negative value performs
+ * sign extension (the result is left-padded with bits set to 1).
+ */
+
+/*
+ * For native type evaluation with a 64-bit "long long" type.
+ */
+#define NATIVE_SIGNED long long
+#define NATIVE_UNSIGNED unsigned long long
+#define NATIVE_UNSIGNED_BITS 64
+#define NATIVE_SIGNED_MIN (-9223372036854775807LL - 1)
+#define NATIVE_SIGNED_MAX 9223372036854775807LL
+
+/*
+ * For emulation of a 64-bit type using a native 32-bit "unsigned long"
+ * type.
+#undef NATIVE_SIGNED
+#define SIMUL_ARITH_SUBTYPE unsigned long
+#define SIMUL_SUBTYPE_BITS 32
+#define SIMUL_NUMBITS 64
+ */
+
+/*
+ * Comment out the following line if you want to deactivate arithmetic
+ * checks (warnings upon undefined and implementation-defined
+ * behaviour). Arithmetic checks slow down a bit arithmetic operations,
+ * especially multiplications, but this should not be an issue with
+ * typical C source code.
+ */
+#define ARITHMETIC_CHECKS
+
+/* ====================================================================== */
+/*
+ * To force signedness of wide character constants, define WCHAR_SIGNEDNESS
+ * to 0 for unsigned, 1 for signed. By default, wide character constants
+ * are signed if the native `char' type is signed, and unsigned otherwise.
+#define WCHAR_SIGNEDNESS 0
+ */
+
+/*
+ * Standard assertions. They should include one cpu() assertion, one machine()
+ * assertion (identical to cpu()), and one or more system() assertions.
+ *
+ * for Linux/PC: cpu(i386), machine(i386), system(unix), system(linux)
+ * for Linux/Alpha: cpu(alpha), machine(alpha), system(unix), system(linux)
+ * for Sparc/Solaris: cpu(sparc), machine(sparc), system(unix), system(solaris)
+ *
+ * These are only suggestions. On Solaris, machine() should be defined
+ * for i386 or sparc (standard system header use such an assertion). For
+ * cross-compilation, define assertions related to the target architecture.
+ *
+ * If you want no standard assertion, define STD_ASSERT to 0.
+ */
+#define STD_ASSERT 0
+/*
+#define STD_ASSERT "cpu(i386)", "machine(i386)", "system(unix)", \
+ "system(freebsd)"
+*/
+
+/* ====================================================================== */
+/*
+ * System predefined macros. Nothing really mandatory, but some programs
+ * might rely on those.
+ * Each string must be either "name" or "name=token-list". If you want
+ * no predefined macro, define STD_MACROS to 0.
+ */
+#define STD_MACROS 0
+/*
+#define STD_MACROS "__FreeBSD=4", "__unix", "__i386", \
+ "__FreeBSD__=4", "__unix__", "__i386__"
+*/
+
+/* ====================================================================== */
+/*
+ * Default flags; HANDLE_ASSERTIONS is required for Solaris system headers.
+ * See cpp.h for the definition of these flags.
+ */
+#define DEFAULT_CPP_FLAGS (DISCARD_COMMENTS | WARN_STANDARD \
+ | WARN_PRAGMA | FAIL_SHARP | MACRO_VAARG \
+ | CPLUSPLUS_COMMENTS | LINE_NUM | TEXT_OUTPUT \
+ | KEEP_OUTPUT | HANDLE_TRIGRAPHS \
+ | HANDLE_ASSERTIONS)
+#define DEFAULT_LEXER_FLAGS (DISCARD_COMMENTS | WARN_STANDARD | FAIL_SHARP \
+ | MACRO_VAARG | CPLUSPLUS_COMMENTS | LEXER \
+ | HANDLE_TRIGRAPHS | HANDLE_ASSERTIONS)
+
+/* ====================================================================== */
+/*
+ * Define this to use sigsetjmp()/siglongjmp() instead of setjmp()/longjmp().
+ * This is non-ANSI, but it improves performance on some POSIX system.
+ * On typical C source code, such improvement is completely negligeable.
+ */
+/* #define POSIX_JMP */
+
+/* ====================================================================== */
+/*
+ * Maximum value (plus one) of a character handled by the lexer; 128 is
+ * alright for ASCII native source code, but 256 is needed for EBCDIC.
+ * 256 is safe in both cases; you will have big problems if you set
+ * this value to INT_MAX or above. On Minix-i86 or Msdos (small memory
+ * model), define MAX_CHAR_VAL to 128.
+ *
+ * Set MAX_CHAR_VAL to a power of two to increase lexing speed. Beware
+ * that lexer.c defines a static array of size MSTATE * MAX_CHAR_VAL
+ * values of type int (MSTATE is defined in lexer.c and is about 40).
+ */
+#define MAX_CHAR_VAL 128
+
+/*
+ * If you want some extra character to be considered as whitespace,
+ * define this macro to that space. On ISO-8859-1 machines, 160 is
+ * the code for the unbreakable space.
+ */
+/* #define UNBREAKABLE_SPACE 160 */
+
+/*
+ * If you want whitespace tokens contents to be recorded (making them
+ * tokens with a string content), define this. The macro STRING_TOKEN
+ * will be adjusted accordingly.
+ * Without this option, whitespace tokens are not even returned by the
+ * lex() function. This is irrelevant for the non-lexer mode (almost --
+ * it might slow down a bit ucpp, and with this option, comments will be
+ * kept inside #pragma directives).
+ */
+/* #define SEMPER_FIDELIS */
+
+/* End of options overridable by UCPP_CONFIG and config.h */
diff --git a/libexec/auxcpp/cpp.c b/libexec/auxcpp/cpp.c
new file mode 100644
index 00000000000..7cdc358cac6
--- /dev/null
+++ b/libexec/auxcpp/cpp.c
@@ -0,0 +1,2565 @@
+/*
+ * C and T preprocessor, and integrated lexer
+ * (c) Thomas Pornin 1999 - 2002
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 4. The name of the authors may not be used to endorse or promote
+ * products derived from this software without specific prior written
+ * permission.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
+ * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+ * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#define VERS_MAJ 1
+#define VERS_MIN 3
+/* uncomment the following if you cannot set it with a compiler flag */
+/* #define STAND_ALONE */
+
+#include "tune.h"
+#include <stdio.h>
+#include <string.h>
+#include <stdarg.h>
+#include <setjmp.h>
+#include <stddef.h>
+#include <limits.h>
+#include <time.h>
+#include "ucppi.h"
+#include "mem.h"
+#include "nhash.h"
+#ifdef UCPP_MMAP
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/mman.h>
+#include <fcntl.h>
+#endif
+
+/*
+ * The standard path where includes are looked for.
+ */
+#ifdef STAND_ALONE
+static char *include_path_std[] = { STD_INCLUDE_PATH, 0 };
+#endif
+static char **include_path;
+static size_t include_path_nb = 0;
+
+int no_special_macros = 0;
+int emit_dependencies = 0, emit_defines = 0, emit_assertions = 0;
+FILE *emit_output;
+
+#ifdef STAND_ALONE
+static char *system_macros_def[] = { STD_MACROS, 0 };
+static char *system_assertions_def[] = { STD_ASSERT, 0 };
+#endif
+
+char *current_filename = 0, *current_long_filename = 0;
+static int current_incdir = -1;
+
+#ifndef NO_UCPP_ERROR_FUNCTIONS
+/*
+ * "ouch" is the name for an internal ucpp error. If AUDIT is not defined,
+ * no code calling this function will be generated; a "ouch" may still be
+ * emitted by getmem() (in mem.c) if MEM_CHECK is defined, but this "ouch"
+ * does not use this function.
+ */
+void ucpp_ouch(char *fmt, ...)
+{
+ va_list ap;
+
+ va_start(ap, fmt);
+ fprintf(stderr, "%s: ouch, ", current_filename);
+ vfprintf(stderr, fmt, ap);
+ fprintf(stderr, "\n");
+ va_end(ap);
+ die();
+}
+
+/*
+ * report an error, with current_filename, line, and printf-like syntax
+ */
+void ucpp_error(long line, char *fmt, ...)
+{
+ va_list ap;
+
+ va_start(ap, fmt);
+ if (line > 0)
+ fprintf(stderr, "%s: line %ld: ", current_filename, line);
+ else if (line == 0) fprintf(stderr, "%s: ", current_filename);
+ vfprintf(stderr, fmt, ap);
+ fprintf(stderr, "\n");
+ if (line >= 0) {
+ struct stack_context *sc = report_context();
+ size_t i;
+
+ for (i = 0; sc[i].line >= 0; i ++)
+ fprintf(stderr, "\tincluded from %s:%ld\n",
+ sc[i].long_name ? sc[i].long_name : sc[i].name,
+ sc[i].line);
+ freemem(sc);
+ }
+ va_end(ap);
+}
+
+/*
+ * like error(), with the mention "warning"
+ */
+void ucpp_warning(long line, char *fmt, ...)
+{
+ va_list ap;
+
+ va_start(ap, fmt);
+ if (line > 0)
+ fprintf(stderr, "%s: warning: line %ld: ",
+ current_filename, line);
+ else if (line == 0)
+ fprintf(stderr, "%s: warning: ", current_filename);
+ else fprintf(stderr, "warning: ");
+ vfprintf(stderr, fmt, ap);
+ fprintf(stderr, "\n");
+ if (line >= 0) {
+ struct stack_context *sc = report_context();
+ size_t i;
+
+ for (i = 0; sc[i].line >= 0; i ++)
+ fprintf(stderr, "\tincluded from %s:%ld\n",
+ sc[i].long_name ? sc[i].long_name : sc[i].name,
+ sc[i].line);
+ freemem(sc);
+ }
+ va_end(ap);
+}
+#endif /* NO_UCPP_ERROR_FUNCTIONS */
+
+/*
+ * Some memory allocations are manually garbage-collected; essentially,
+ * strings duplicated in the process of macro replacement. Each such
+ * string is referenced in the garbage_fifo, which is cleared when all
+ * nested macros have been resolved.
+ */
+
+struct garbage_fifo {
+ char **garbage;
+ size_t ngarb, memgarb;
+};
+
+/*
+ * throw_away() marks a string to be collected later
+ */
+void throw_away(struct garbage_fifo *gf, char *n)
+{
+ wan(gf->garbage, gf->ngarb, n, gf->memgarb);
+}
+
+/*
+ * free marked strings
+ */
+void garbage_collect(struct garbage_fifo *gf)
+{
+ size_t i;
+
+ for (i = 0; i < gf->ngarb; i ++) freemem(gf->garbage[i]);
+ gf->ngarb = 0;
+}
+
+static void init_garbage_fifo(struct garbage_fifo *gf)
+{
+ gf->garbage = getmem((gf->memgarb = GARBAGE_LIST_MEMG)
+ * sizeof(char *));
+ gf->ngarb = 0;
+}
+
+static void free_garbage_fifo(struct garbage_fifo *gf)
+{
+ garbage_collect(gf);
+ freemem(gf->garbage);
+ freemem(gf);
+}
+
+/*
+ * order is important: it must match the token-constants declared as an
+ * enum in the header file.
+ */
+char *operators_name[] = {
+ " ", "\n", " ",
+ "0000", "name", "bunch", "pragma", "context",
+ "\"dummy string\"", "'dummy char'",
+ "/", "/=", "-", "--", "-=", "->", "+", "++", "+=", "<", "<=", "<<",
+ "<<=", ">", ">=", ">>", ">>=", "=", "==",
+#ifdef CAST_OP
+ "=>",
+#endif
+ "~", "!=", "&", "&&", "&=", "|", "||", "|=", "%", "%=", "*", "*=",
+ "^", "^=", "!",
+ "{", "}", "[", "]", "(", ")", ",", "?", ";",
+ ":", ".", "...", "#", "##", " ", "ouch", "<:", ":>", "<%", "%>",
+ "%:", "%:%:"
+};
+
+/* the ascii representation of a token */
+#ifdef SEMPER_FIDELIS
+#define tname(x) (ttWHI((x).type) ? " " : S_TOKEN((x).type) \
+ ? (x).name : operators_name[(x).type])
+#else
+#define tname(x) (S_TOKEN((x).type) ? (x).name \
+ : operators_name[(x).type])
+#endif
+
+char *token_name(struct token *t)
+{
+ return tname(*t);
+}
+
+/*
+ * To speed up deeply nested and repeated inclusions, we:
+ * -- use a hash table to remember where we found each file
+ * -- remember when the file is protected by a #ifndef/#define/#endif
+ * construction; we can then avoid including several times a file
+ * when this is not necessary.
+ * -- remember in which directory, in the include path, the file was found.
+ */
+struct found_file {
+ hash_item_header head; /* first field */
+ char *name;
+ char *protect;
+};
+
+/*
+ * For files from system include path.
+ */
+struct found_file_sys {
+ hash_item_header head; /* first field */
+ struct found_file *rff;
+ int incdir;
+};
+
+static HTT found_files, found_files_sys;
+static int found_files_init_done = 0, found_files_sys_init_done = 0;
+
+static struct found_file *new_found_file(void)
+{
+ struct found_file *ff = getmem(sizeof(struct found_file));
+
+ ff->name = 0;
+ ff->protect = 0;
+ return ff;
+}
+
+static void del_found_file(void *m)
+{
+ struct found_file *ff = (struct found_file *)m;
+
+ if (ff->name) freemem(ff->name);
+ if (ff->protect) freemem(ff->protect);
+ freemem(ff);
+}
+
+static struct found_file_sys *new_found_file_sys(void)
+{
+ struct found_file_sys *ffs = getmem(sizeof(struct found_file_sys));
+
+ ffs->rff = 0;
+ ffs->incdir = -1;
+ return ffs;
+}
+
+static void del_found_file_sys(void *m)
+{
+ struct found_file_sys *ffs = (struct found_file_sys *)m;
+
+ freemem(ffs);
+}
+
+/*
+ * To keep up with the #ifndef/#define/#endif protection mechanism
+ * detection.
+ */
+struct protect protect_detect;
+static struct protect *protect_detect_stack = 0;
+
+void set_init_filename(char *x, int real_file)
+{
+ if (current_filename) freemem(current_filename);
+ current_filename = sdup(x);
+ current_long_filename = 0;
+ current_incdir = -1;
+ if (real_file) {
+ protect_detect.macro = 0;
+ protect_detect.state = 1;
+ protect_detect.ff = new_found_file();
+ protect_detect.ff->name = sdup(x);
+ HTT_put(&found_files, protect_detect.ff, x);
+ } else {
+ protect_detect.state = 0;
+ }
+}
+
+static void init_found_files(void)
+{
+ if (found_files_init_done) HTT_kill(&found_files);
+ HTT_init(&found_files, del_found_file);
+ found_files_init_done = 1;
+ if (found_files_sys_init_done) HTT_kill(&found_files_sys);
+ HTT_init(&found_files_sys, del_found_file_sys);
+ found_files_sys_init_done = 1;
+}
+
+/*
+ * Set the lexer state at the beginning of a file.
+ */
+static void reinit_lexer_state(struct lexer_state *ls, int wb)
+{
+#ifndef NO_UCPP_BUF
+ ls->input_buf = wb ? getmem(INPUT_BUF_MEMG) : 0;
+#ifdef UCPP_MMAP
+ ls->from_mmap = 0;
+#endif
+#endif
+ ls->input = 0;
+ ls->ebuf = ls->pbuf = 0;
+ ls->nlka = 0;
+ ls->macfile = 0;
+ ls->discard = 1;
+ ls->last = 0; /* we suppose '\n' is not 0 */
+ ls->line = 1;
+ ls->ltwnl = 1;
+ ls->oline = 1;
+ ls->pending_token = 0;
+ ls->cli = 0;
+ ls->copy_line[COPY_LINE_LENGTH - 1] = 0;
+ ls->ifnest = 0;
+ ls->condf[0] = ls->condf[1] = 0;
+}
+
+/*
+ * Initialize the struct lexer_state, with optional input and output buffers.
+ */
+void init_buf_lexer_state(struct lexer_state *ls, int wb)
+{
+ reinit_lexer_state(ls, wb);
+#ifndef NO_UCPP_BUF
+ ls->output_buf = wb ? getmem(OUTPUT_BUF_MEMG) : 0;
+#endif
+ ls->sbuf = 0;
+ ls->output_fifo = 0;
+
+ ls->ctok = getmem(sizeof(struct token));
+ ls->ctok->name = getmem(ls->tknl = TOKEN_NAME_MEMG);
+ ls->pending_token = 0;
+
+ ls->flags = 0;
+ ls->count_trigraphs = 0;
+ ls->gf = getmem(sizeof(struct garbage_fifo));
+ init_garbage_fifo(ls->gf);
+ ls->condcomp = 1;
+ ls->condnest = 0;
+#ifdef INMACRO_FLAG
+ ls->inmacro = 0;
+ ls->macro_count = 0;
+#endif
+}
+
+/*
+ * Initialize the (complex) struct lexer_state.
+ */
+void init_lexer_state(struct lexer_state *ls)
+{
+ init_buf_lexer_state(ls, 1);
+ ls->input = 0;
+}
+
+/*
+ * Restore what is needed from a lexer_state. This is used for #include.
+ */
+static void restore_lexer_state(struct lexer_state *ls,
+ struct lexer_state *lsbak)
+{
+#ifndef NO_UCPP_BUF
+ freemem(ls->input_buf);
+ ls->input_buf = lsbak->input_buf;
+#ifdef UCPP_MMAP
+ ls->from_mmap = lsbak->from_mmap;
+ ls->input_buf_sav = lsbak->input_buf_sav;
+#endif
+#endif
+ ls->input = lsbak->input;
+ ls->ebuf = lsbak->ebuf;
+ ls->pbuf = lsbak->pbuf;
+ ls->nlka = lsbak->nlka;
+ ls->discard = lsbak->discard;
+ ls->line = lsbak->line;
+ ls->oline = lsbak->oline;
+ ls->ifnest = lsbak->ifnest;
+ ls->condf[0] = lsbak->condf[0];
+ ls->condf[1] = lsbak->condf[1];
+}
+
+/*
+ * close input file operations on a struct lexer_state
+ */
+static void close_input(struct lexer_state *ls)
+{
+#ifdef UCPP_MMAP
+ if (ls->from_mmap) {
+ munmap((void *)ls->input_buf, ls->ebuf);
+ ls->from_mmap = 0;
+ ls->input_buf = ls->input_buf_sav;
+ }
+#endif
+ if (ls->input) {
+ fclose(ls->input);
+ ls->input = 0;
+ }
+}
+
+/*
+ * file_context (and the two functions push_ and pop_) are used to save
+ * all that is needed when including a file.
+ */
+static struct file_context {
+ struct lexer_state ls;
+ char *name, *long_name;
+ int incdir;
+} *ls_stack;
+static size_t ls_depth = 0;
+
+static void push_file_context(struct lexer_state *ls)
+{
+ struct file_context fc;
+
+ fc.name = current_filename;
+ fc.long_name = current_long_filename;
+ fc.incdir = current_incdir;
+ mmv(&(fc.ls), ls, sizeof(struct lexer_state));
+ aol(ls_stack, ls_depth, fc, LS_STACK_MEMG);
+ ls_depth --;
+ aol(protect_detect_stack, ls_depth, protect_detect, LS_STACK_MEMG);
+ protect_detect.macro = 0;
+}
+
+static void pop_file_context(struct lexer_state *ls)
+{
+#ifdef AUDIT
+ if (ls_depth <= 0) ouch("prepare to meet thy creator");
+#endif
+ close_input(ls);
+ restore_lexer_state(ls, &(ls_stack[-- ls_depth].ls));
+ if (protect_detect.macro) freemem(protect_detect.macro);
+ protect_detect = protect_detect_stack[ls_depth];
+ if (current_filename) freemem(current_filename);
+ current_filename = ls_stack[ls_depth].name;
+ current_long_filename = ls_stack[ls_depth].long_name;
+ current_incdir = ls_stack[ls_depth].incdir;
+ if (ls_depth == 0) {
+ freemem(ls_stack);
+ freemem(protect_detect_stack);
+ }
+}
+
+/*
+ * report_context() returns the list of successive includers of the
+ * current file, ending with a dummy entry with a negative line number.
+ * The caller is responsible for freeing the returned pointer.
+ */
+struct stack_context *report_context(void)
+{
+ struct stack_context *sc;
+ size_t i;
+
+ sc = getmem((ls_depth + 1) * sizeof(struct stack_context));
+ for (i = 0; i < ls_depth; i ++) {
+ sc[i].name = ls_stack[ls_depth - i - 1].name;
+ sc[i].long_name = ls_stack[ls_depth - i - 1].long_name;
+ sc[i].line = ls_stack[ls_depth - i - 1].ls.line - 1;
+ }
+ sc[ls_depth].line = -1;
+ return sc;
+}
+
+/*
+ * init_lexer_mode() is used to end initialization of a struct lexer_state
+ * if it must be used for a lexer
+ */
+void init_lexer_mode(struct lexer_state *ls)
+{
+ ls->flags = DEFAULT_LEXER_FLAGS;
+ ls->output_fifo = getmem(sizeof(struct token_fifo));
+ ls->output_fifo->art = ls->output_fifo->nt = 0;
+ ls->toplevel_of = ls->output_fifo;
+ ls->save_ctok = ls->ctok;
+}
+
+/*
+ * release memory used by a struct lexer_state; this implies closing
+ * any input stream held by this structure.
+ */
+void free_lexer_state(struct lexer_state *ls)
+{
+ close_input(ls);
+#ifndef NO_UCPP_BUF
+ if (ls->input_buf) {
+ freemem(ls->input_buf);
+ ls->input_buf = 0;
+ }
+ if (ls->output_buf) {
+ freemem(ls->output_buf);
+ ls->output_buf = 0;
+ }
+#endif
+ if (ls->ctok && (!ls->output_fifo || ls->output_fifo->nt == 0)) {
+ freemem(ls->ctok->name);
+ freemem(ls->ctok);
+ ls->ctok = 0;
+ }
+ if (ls->gf) {
+ free_garbage_fifo(ls->gf);
+ ls->gf = 0;
+ }
+ if (ls->output_fifo) {
+ freemem(ls->output_fifo);
+ ls->output_fifo = 0;
+ }
+}
+
+/*
+ * Print line information.
+ */
+static void print_line_info(struct lexer_state *ls, unsigned long flags)
+{
+ char *fn = current_long_filename ?
+ current_long_filename : current_filename;
+ char *b, *d;
+
+ b = getmem(50 + strlen(fn));
+ if (flags & GCC_LINE_NUM) {
+ sprintf(b, "# %ld \"%s\"\n", ls->line, fn);
+ } else {
+ sprintf(b, "#line %ld \"%s\"\n", ls->line, fn);
+ }
+ for (d = b; *d; d ++) put_char(ls, (unsigned char)(*d));
+ freemem(b);
+}
+
+/*
+ * Enter a file; this implies the possible emission of a #line directive.
+ * The flags used are passed as second parameter instead of being
+ * extracted from the struct lexer_state.
+ *
+ * As a command-line option, gcc-like directives (with only a '#',
+ * without 'line') may be produced.
+ *
+ * enter_file() returns 1 if a (CONTEXT) token was produced, 0 otherwise.
+ */
+int enter_file(struct lexer_state *ls, unsigned long flags)
+{
+ char *fn = current_long_filename ?
+ current_long_filename : current_filename;
+
+ if (!(flags & LINE_NUM)) return 0;
+ if ((flags & LEXER) && !(flags & TEXT_OUTPUT)) {
+ struct token t;
+
+ t.type = CONTEXT;
+ t.line = ls->line;
+ t.name = fn;
+ print_token(ls, &t, 0);
+ return 1;
+ }
+ print_line_info(ls, flags);
+ ls->oline --; /* emitted #line troubled oline */
+ return 0;
+}
+
+#ifdef UCPP_MMAP
+/*
+ * We open() the file, then fdopen() it and fseek() to its end. If the
+ * fseek() worked, we try to mmap() the file, up to the point where we
+ * arrived.
+ * On an architecture where end-of-lines are multibytes and translated
+ * into single '\n', bad things could happen. We strongly hope that, if
+ * we could fseek() to the end but could not mmap(), then we can get back.
+ */
+static void *find_file_map;
+static size_t map_length;
+
+FILE *fopen_mmap_file(char *name)
+{
+ FILE *f;
+ int fd;
+ long l;
+
+ find_file_map = 0;
+ fd = open(name, O_RDONLY, 0);
+ if (fd < 0) return 0;
+ l = lseek(fd, 0, SEEK_END);
+ f = fdopen(fd, "r");
+ if (!f) {
+ close(fd);
+ return 0;
+ }
+ if (l < 0) return f; /* not seekable */
+ map_length = l;
+ if ((find_file_map = mmap(0, map_length, PROT_READ,
+ MAP_PRIVATE, fd, 0)) == MAP_FAILED) {
+ /* we could not mmap() the file; get back */
+ find_file_map = 0;
+ if (fseek(f, 0, SEEK_SET)) {
+ /* bwaah... can't get back. This file is cursed. */
+ fclose(f);
+ return 0;
+ }
+ }
+ return f;
+}
+
+void set_input_file(struct lexer_state *ls, FILE *f)
+{
+ ls->input = f;
+ if (find_file_map) {
+ ls->from_mmap = 1;
+ ls->input_buf_sav = ls->input_buf;
+ ls->input_buf = find_file_map;
+ ls->pbuf = 0;
+ ls->ebuf = map_length;
+ } else {
+ ls->from_mmap = 0;
+ }
+}
+#endif
+
+/*
+ * Find a file by looking through the include path.
+ * return value: a FILE * on the file, opened in "r" mode, or 0.
+ *
+ * find_file_error will contain:
+ * FF_ERROR on error (file not found or impossible to read)
+ * FF_PROTECT file is protected and therefore useless to read
+ * FF_KNOWN file is already known
+ * FF_UNKNOWN file was not already known
+ */
+static int find_file_error;
+
+enum { FF_ERROR, FF_PROTECT, FF_KNOWN, FF_UNKNOWN };
+
+static FILE *find_file(char *name, int localdir)
+{
+ FILE *f;
+ int i, incdir = -1;
+ size_t nl = strlen(name);
+ char *s = 0;
+ struct found_file *ff = 0, *nff;
+ int lf = 0;
+ int nffa = 0;
+
+ find_file_error = FF_ERROR;
+ protect_detect.state = -1;
+ protect_detect.macro = 0;
+ if (localdir) {
+ int i;
+ char *rfn = current_long_filename ? current_long_filename
+ : current_filename;
+
+ for (i = strlen(rfn) - 1; i >= 0; i --)
+#ifdef MSDOS
+ if (rfn[i] == '\\') break;
+#else
+ if (rfn[i] == '/') break;
+#endif
+#if defined MSDOS
+ if (i >= 0 && *name != '\\' && (nl < 2 || name[1] != ':'))
+#elif defined AMIGA
+ if (i >= 0 && *name != '/' && (nl < 2 || name[1] != ':'))
+#else
+ if (i >= 0 && *name != '/')
+#endif
+ {
+ /*
+ * current file is somewhere else, and the provided
+ * file name is not absolute, so we must adjust the
+ * base for looking for the file; besides,
+ * found_files and found_files_loc are irrelevant
+ * for this search.
+ */
+ s = getmem(i + 2 + nl);
+ mmv(s, rfn, i);
+#ifdef MSDOS
+ s[i] = '\\';
+#else
+ s[i] = '/';
+#endif
+ mmv(s + i + 1, name, nl);
+ s[i + 1 + nl] = 0;
+ ff = HTT_get(&found_files, s);
+ } else ff = HTT_get(&found_files, name);
+ }
+ if (!ff) {
+ struct found_file_sys *ffs = HTT_get(&found_files_sys, name);
+
+ if (ffs) {
+ ff = ffs->rff;
+ incdir = ffs->incdir;
+ }
+ }
+ /*
+ * At that point: if the file was found in the cache, ff points to
+ * the cached descriptive structure; its name is s if s is not 0,
+ * name otherwise.
+ */
+ if (ff) goto found_file_cache;
+
+ /*
+ * This is the first time we find the file, or it was not protected.
+ */
+ protect_detect.ff = new_found_file();
+ nffa = 1;
+ if (localdir &&
+#ifdef UCPP_MMAP
+ (f = fopen_mmap_file(s ? s : name))
+#else
+ (f = fopen(s ? s : name, "r"))
+#endif
+ ) {
+ lf = 1;
+ goto found_file;
+ }
+ /*
+ * If s contains a name, that name is now irrelevant: it was a
+ * filename for a search in the current directory, and the file
+ * was not found.
+ */
+ if (s) {
+ freemem(s);
+ s = 0;
+ }
+ for (i = 0; (size_t)i < include_path_nb; i ++) {
+ size_t ni = strlen(include_path[i]);
+
+ s = getmem(ni + nl + 2);
+ mmv(s, include_path[i], ni);
+#ifdef AMIGA
+ /* contributed by Volker Barthelmann */
+ if (ni == 1 && *s == '.') {
+ *s = 0;
+ ni = 0;
+ }
+ if (ni > 0 && s[ni - 1] != ':' && s[ni - 1] != '/') {
+ s[ni] = '/';
+ mmv(s + ni + 1, name, nl + 1);
+ } else {
+ mmv(s + ni, name, nl + 1);
+ }
+#else
+ s[ni] = '/';
+ mmv(s + ni + 1, name, nl + 1);
+#endif
+#ifdef MSDOS
+ /* on msdos systems, replace all / by \ */
+ {
+ char *c;
+
+ for (c = s; *c; c ++) if (*c == '/') *c = '\\';
+ }
+#endif
+ incdir = i;
+ if ((ff = HTT_get(&found_files, s)) != 0) {
+ /*
+ * The file is known, but not as a system include
+ * file under the name provided.
+ */
+ struct found_file_sys *ffs = new_found_file_sys();
+
+ ffs->rff = ff;
+ ffs->incdir = incdir;
+ HTT_put(&found_files_sys, ffs, name);
+ freemem(s);
+ s = 0;
+ if (nffa) {
+ del_found_file(protect_detect.ff);
+ protect_detect.ff = 0;
+ nffa = 0;
+ }
+ goto found_file_cache;
+ }
+#ifdef UCPP_MMAP
+ f = fopen_mmap_file(s);
+#else
+ f = fopen(s, "r");
+#endif
+ if (f) goto found_file;
+ freemem(s);
+ s = 0;
+ }
+zero_out:
+ if (s) freemem(s);
+ if (nffa) {
+ del_found_file(protect_detect.ff);
+ protect_detect.ff = 0;
+ nffa = 0;
+ }
+ return 0;
+
+ /*
+ * This part is invoked when the file was found in the
+ * cache.
+ */
+found_file_cache:
+ if (ff->protect) {
+ if (get_macro(ff->protect)) {
+ /* file is protected, do not include it */
+ find_file_error = FF_PROTECT;
+ goto zero_out;
+ }
+ /* file is protected but the guardian macro is
+ not available; disable guardian detection. */
+ protect_detect.state = 0;
+ }
+ protect_detect.ff = ff;
+#ifdef UCPP_MMAP
+ f = fopen_mmap_file(HASH_ITEM_NAME(ff));
+#else
+ f = fopen(HASH_ITEM_NAME(ff), "r");
+#endif
+ if (!f) goto zero_out;
+ find_file_error = FF_KNOWN;
+ goto found_file_2;
+
+ /*
+ * This part is invoked when we found a new file, which was not
+ * yet referenced. If lf == 1, then the file was found directly,
+ * otherwise it was found in some system include directory.
+ * A new found_file structure has been allocated and is in
+ * protect_detect.ff
+ */
+found_file:
+ if (f && ((emit_dependencies == 1 && lf && current_incdir == -1)
+ || emit_dependencies == 2)) {
+ fprintf(emit_output, " %s", s ? s : name);
+ }
+ nff = protect_detect.ff;
+ nff->name = sdup(name);
+#ifdef AUDIT
+ if (
+#endif
+ HTT_put(&found_files, nff, s ? s : name)
+#ifdef AUDIT
+ ) ouch("filename collided with a wraith")
+#endif
+ ;
+ if (!lf) {
+ struct found_file_sys *ffs = new_found_file_sys();
+
+ ffs->rff = nff;
+ ffs->incdir = incdir;
+ HTT_put(&found_files_sys, ffs, name);
+ }
+ if (s) freemem(s);
+ s = 0;
+ find_file_error = FF_UNKNOWN;
+ ff = nff;
+
+found_file_2:
+ if (s) freemem(s);
+ current_long_filename = HASH_ITEM_NAME(ff);
+#ifdef NO_LIBC_BUF
+ setbuf(f, 0);
+#endif
+ current_incdir = incdir;
+ return f;
+}
+
+/*
+ * Find the named file by looking through the end of the include path.
+ * This is for #include_next directives.
+ * #include_next <foo> and #include_next "foo" are considered identical,
+ * for all practical purposes.
+ */
+static FILE *find_file_next(char *name)
+{
+ int i;
+ size_t nl = strlen(name);
+ FILE *f;
+ struct found_file *ff;
+
+ find_file_error = FF_ERROR;
+ protect_detect.state = -1;
+ protect_detect.macro = 0;
+ for (i = current_incdir + 1; (size_t)i < include_path_nb; i ++) {
+ char *s;
+ size_t ni = strlen(include_path[i]);
+
+ s = getmem(ni + nl + 2);
+ mmv(s, include_path[i], ni);
+ s[ni] = '/';
+ mmv(s + ni + 1, name, nl + 1);
+#ifdef MSDOS
+ /* on msdos systems, replace all / by \ */
+ {
+ char *c;
+
+ for (c = s; *c; c ++) if (*c == '/') *c = '\\';
+ }
+#endif
+ ff = HTT_get(&found_files, s);
+ if (ff) {
+ /* file was found in the cache */
+ if (ff->protect) {
+ if (get_macro(ff->protect)) {
+ find_file_error = FF_PROTECT;
+ freemem(s);
+ return 0;
+ }
+ /* file is protected but the guardian macro is
+ not available; disable guardian detection. */
+ protect_detect.state = 0;
+ }
+ protect_detect.ff = ff;
+#ifdef UCPP_MMAP
+ f = fopen_mmap_file(HASH_ITEM_NAME(ff));
+#else
+ f = fopen(HASH_ITEM_NAME(ff), "r");
+#endif
+ if (!f) {
+ /* file is referenced but yet unavailable. */
+ freemem(s);
+ return 0;
+ }
+ find_file_error = FF_KNOWN;
+ freemem(s);
+ s = HASH_ITEM_NAME(ff);
+ } else {
+#ifdef UCPP_MMAP
+ f = fopen_mmap_file(s);
+#else
+ f = fopen(s, "r");
+#endif
+ if (f) {
+ if (emit_dependencies == 2) {
+ fprintf(emit_output, " %s", s);
+ }
+ ff = protect_detect.ff = new_found_file();
+ ff->name = sdup(s);
+#ifdef AUDIT
+ if (
+#endif
+ HTT_put(&found_files, ff, s)
+#ifdef AUDIT
+ ) ouch("filename collided with a wraith")
+#endif
+ ;
+ find_file_error = FF_UNKNOWN;
+ freemem(s);
+ s = HASH_ITEM_NAME(ff);
+ }
+ }
+ if (f) {
+ current_long_filename = s;
+ current_incdir = i;
+ return f;
+ }
+ freemem(s);
+ }
+ return 0;
+}
+
+/*
+ * The #if directive. This function parse the expression, performs macro
+ * expansion (and handles the "defined" operator), and call eval_expr.
+ * return value: 1 if the expression is true, 0 if it is false, -1 on error.
+ */
+static int handle_if(struct lexer_state *ls)
+{
+ struct token_fifo tf, tf1, tf2, tf3, *save_tf;
+ long l = ls->line;
+ unsigned long z;
+ int ret = 0, ltww = 1;
+
+ /* first, get the whole line */
+ tf.art = tf.nt = 0;
+ while (!next_token(ls) && ls->ctok->type != NEWLINE) {
+ struct token t;
+
+ if (ltww && ttMWS(ls->ctok->type)) continue;
+ ltww = ttMWS(ls->ctok->type);
+ t.type = ls->ctok->type;
+ t.line = l;
+ if (S_TOKEN(ls->ctok->type)) {
+ t.name = sdup(ls->ctok->name);
+ throw_away(ls->gf, t.name);
+ }
+ aol(tf.t, tf.nt, t, TOKEN_LIST_MEMG);
+ }
+ if (ltww && tf.nt) if ((-- tf.nt) == 0) freemem(tf.t);
+ if (tf.nt == 0) {
+ error(l, "void condition for a #if/#elif");
+ return -1;
+ }
+ /* handle the "defined" operator */
+ tf1.art = tf1.nt = 0;
+ while (tf.art < tf.nt) {
+ struct token *ct, rt;
+ struct macro *m;
+ size_t nidx, eidx;
+
+ ct = tf.t + (tf.art ++);
+ if (ct->type == NAME && !strcmp(ct->name, "defined")) {
+ if (tf.art >= tf.nt) goto store_token;
+ nidx = tf.art;
+ if (ttMWS(tf.t[nidx].type))
+ if (++ nidx >= tf.nt) goto store_token;
+ if (tf.t[nidx].type == NAME) {
+ eidx = nidx;
+ goto check_macro;
+ }
+ if (tf.t[nidx].type != LPAR) goto store_token;
+ if (++ nidx >= tf.nt) goto store_token;
+ if (ttMWS(tf.t[nidx].type))
+ if (++ nidx >= tf.nt) goto store_token;
+ if (tf.t[nidx].type != NAME) goto store_token;
+ eidx = nidx + 1;
+ if (eidx >= tf.nt) goto store_token;
+ if (ttMWS(tf.t[eidx].type))
+ if (++ eidx >= tf.nt) goto store_token;
+ if (tf.t[eidx].type != RPAR) goto store_token;
+ goto check_macro;
+ }
+ store_token:
+ aol(tf1.t, tf1.nt, *ct, TOKEN_LIST_MEMG);
+ continue;
+
+ check_macro:
+ m = get_macro(tf.t[nidx].name);
+ rt.type = NUMBER;
+ rt.name = m ? "1L" : "0L";
+ aol(tf1.t, tf1.nt, rt, TOKEN_LIST_MEMG);
+ tf.art = eidx + 1;
+ }
+ freemem(tf.t);
+ if (tf1.nt == 0) {
+ error(l, "void condition (after expansion) for a #if/#elif");
+ return -1;
+ }
+
+ /* perform all macro substitutions */
+ tf2.art = tf2.nt = 0;
+ save_tf = ls->output_fifo;
+ ls->output_fifo = &tf2;
+ while (tf1.art < tf1.nt) {
+ struct token *ct;
+
+ ct = tf1.t + (tf1.art ++);
+ if (ct->type == NAME) {
+ struct macro *m = get_macro(ct->name);
+
+ if (m) {
+ if (substitute_macro(ls, m, &tf1, 0,
+#ifdef NO_PRAGMA_IN_DIRECTIVE
+ 1,
+#else
+ 0,
+#endif
+ ct->line)) {
+ ls->output_fifo = save_tf;
+ goto error1;
+ }
+ continue;
+ }
+ } else if ((ct->type == SHARP || ct->type == DIG_SHARP)
+ && (ls->flags & HANDLE_ASSERTIONS)) {
+ /* we have an assertion; parse it */
+ int nnp, ltww = 1;
+ size_t i = tf1.art;
+ struct token_fifo atl;
+ char *aname;
+ struct assert *a;
+ int av = 0;
+ struct token rt;
+
+ atl.art = atl.nt = 0;
+ while (i < tf1.nt && ttMWS(tf1.t[i].type)) i ++;
+ if (i >= tf1.nt) goto assert_error;
+ if (tf1.t[i].type != NAME) goto assert_error;
+ aname = tf1.t[i ++].name;
+ while (i < tf1.nt && ttMWS(tf1.t[i].type)) i ++;
+ if (i >= tf1.nt) goto assert_generic;
+ if (tf1.t[i].type != LPAR) goto assert_generic;
+ i ++;
+ for (nnp = 1; nnp && i < tf1.nt; i ++) {
+ if (ltww && ttMWS(tf1.t[i].type)) continue;
+ if (tf1.t[i].type == LPAR) nnp ++;
+ else if (tf1.t[i].type == RPAR
+ && (-- nnp) == 0) {
+ tf1.art = i + 1;
+ break;
+ }
+ ltww = ttMWS(tf1.t[i].type);
+ aol(atl.t, atl.nt, tf1.t[i], TOKEN_LIST_MEMG);
+ }
+ if (nnp) goto assert_error;
+ if (ltww && atl.nt && (-- atl.nt) == 0) freemem(atl.t);
+ if (atl.nt == 0) goto assert_error;
+
+ /* the assertion is in aname and atl; check it */
+ a = get_assertion(aname);
+ if (a) for (i = 0; i < a->nbval; i ++)
+ if (!cmp_token_list(&atl, a->val + i)) {
+ av = 1;
+ break;
+ }
+ rt.type = NUMBER;
+ rt.name = av ? "1" : "0";
+ aol(tf2.t, tf2.nt, rt, TOKEN_LIST_MEMG);
+ if (atl.nt) freemem(atl.t);
+ continue;
+
+ assert_generic:
+ tf1.art = i;
+ rt.type = NUMBER;
+ rt.name = get_assertion(aname) ? "1" : "0";
+ aol(tf2.t, tf2.nt, rt, TOKEN_LIST_MEMG);
+ continue;
+
+ assert_error:
+ error(l, "syntax error for assertion in #if");
+ ls->output_fifo = save_tf;
+ goto error1;
+ }
+ aol(tf2.t, tf2.nt, *ct, TOKEN_LIST_MEMG);
+ }
+ ls->output_fifo = save_tf;
+ freemem(tf1.t);
+ if (tf2.nt == 0) {
+ error(l, "void condition (after expansion) for a #if/#elif");
+ return -1;
+ }
+
+ /*
+ * suppress whitespace and replace rogue identifiers by 0
+ */
+ tf3.art = tf3.nt = 0;
+ while (tf2.art < tf2.nt) {
+ struct token *ct = tf2.t + (tf2.art ++);
+
+ if (ttMWS(ct->type)) continue;
+ if (ct->type == NAME) {
+ /*
+ * a rogue identifier; we replace it with "0".
+ */
+ struct token rt;
+
+ rt.type = NUMBER;
+ rt.name = "0";
+ aol(tf3.t, tf3.nt, rt, TOKEN_LIST_MEMG);
+ continue;
+ }
+ aol(tf3.t, tf3.nt, *ct, TOKEN_LIST_MEMG);
+ }
+ freemem(tf2.t);
+
+ if (tf3.nt == 0) {
+ error(l, "void condition (after expansion) for a #if/#elif");
+ return -1;
+ }
+ eval_line = l;
+ z = eval_expr(&tf3, &ret, (ls->flags & WARN_STANDARD) != 0);
+ freemem(tf3.t);
+ if (ret) return -1;
+ return (z != 0);
+
+error1:
+ if (tf1.nt) freemem(tf1.t);
+ if (tf2.nt) freemem(tf2.t);
+ return -1;
+}
+
+/*
+ * A #include was found; parse the end of line, replace macros if
+ * necessary.
+ *
+ * If nex is set to non-zero, the directive is considered as a #include_next
+ * (extension to C99, mimicked from GNU)
+ */
+static int handle_include(struct lexer_state *ls, unsigned long flags, int nex)
+{
+ int c, string_fname = 0;
+ char *fname;
+ unsigned char *fname2;
+ size_t fname_ptr = 0;
+ long l = ls->line;
+ int x, y;
+ FILE *f;
+ struct token_fifo tf, tf2, *save_tf;
+ size_t nl;
+ int tgd;
+ struct lexer_state alt_ls;
+
+#define left_angle(t) ((t) == LT || (t) == LEQ || (t) == LSH \
+ || (t) == ASLSH || (t) == DIG_LBRK || (t) == LBRA)
+#define right_angle(t) ((t) == GT || (t) == RSH || (t) == ARROW \
+ || (t) == DIG_RBRK || (t) == DIG_RBRA)
+
+ while ((c = grap_char(ls)) >= 0 && c != '\n') {
+ if (space_char(c)) {
+ discard_char(ls);
+ continue;
+ }
+ if (c == '<') {
+ discard_char(ls);
+ while ((c = grap_char(ls)) >= 0) {
+ discard_char(ls);
+ if (c == '\n') goto include_last_chance;
+ if (c == '>') break;
+ aol(fname, fname_ptr, (char)c, FNAME_MEMG);
+ }
+ aol(fname, fname_ptr, (char)0, FNAME_MEMG);
+ string_fname = 0;
+ goto do_include;
+ } else if (c == '"') {
+ discard_char(ls);
+ while ((c = grap_char(ls)) >= 0) {
+ discard_char(ls);
+ if (c == '\n') {
+ /* macro replacements won't save that one */
+ if (fname_ptr) freemem(fname);
+ goto include_error;
+ }
+ if (c == '"') break;
+ aol(fname, fname_ptr, (char)c, FNAME_MEMG);
+ }
+ aol(fname, fname_ptr, (char)0, FNAME_MEMG);
+ string_fname = 1;
+ goto do_include;
+ }
+ goto include_macro;
+ }
+
+include_last_chance:
+ /*
+ * We found a '<' but not the trailing '>'; so we tokenize the
+ * line, and try to act upon it. The standard lets us free in that
+ * matter, and no sane programmer would use such a construct, but
+ * it is no reason not to support it.
+ */
+ if (fname_ptr == 0) goto include_error;
+ fname2 = getmem(fname_ptr + 1);
+ mmv(fname2 + 1, fname, fname_ptr);
+ fname2[0] = '<';
+ /*
+ * We merely copy the lexer_state structure; this should be ok,
+ * since we do want to share the memory structure (garbage_fifo),
+ * and do not touch any other context-full thing.
+ */
+ alt_ls = *ls;
+ alt_ls.input = 0;
+ alt_ls.input_string = fname2;
+ alt_ls.pbuf = 0;
+ alt_ls.ebuf = fname_ptr + 1;
+ tf.art = tf.nt = 0;
+ while (!next_token(&alt_ls)) {
+ if (!ttMWS(alt_ls.ctok->type)) {
+ struct token t;
+
+ t.type = alt_ls.ctok->type;
+ t.line = l;
+ if (S_TOKEN(alt_ls.ctok->type)) {
+ t.name = sdup(alt_ls.ctok->name);
+ throw_away(alt_ls.gf, t.name);
+ }
+ aol(tf.t, tf.nt, t, TOKEN_LIST_MEMG);
+ }
+ }
+ freemem(fname2);
+ if (alt_ls.pbuf < alt_ls.ebuf) goto include_error;
+ /* tokenizing failed */
+ goto include_macro2;
+
+include_error:
+ error(l, "invalid '#include'");
+ return 1;
+
+include_macro:
+ tf.art = tf.nt = 0;
+ while (!next_token(ls) && ls->ctok->type != NEWLINE) {
+ if (!ttMWS(ls->ctok->type)) {
+ struct token t;
+
+ t.type = ls->ctok->type;
+ t.line = l;
+ if (S_TOKEN(ls->ctok->type)) {
+ t.name = sdup(ls->ctok->name);
+ throw_away(ls->gf, t.name);
+ }
+ aol(tf.t, tf.nt, t, TOKEN_LIST_MEMG);
+ }
+ }
+include_macro2:
+ tf2.art = tf2.nt = 0;
+ save_tf = ls->output_fifo;
+ ls->output_fifo = &tf2;
+ while (tf.art < tf.nt) {
+ struct token *ct;
+
+ ct = tf.t + (tf.art ++);
+ if (ct->type == NAME) {
+ struct macro *m = get_macro(ct->name);
+ if (m) {
+ if (substitute_macro(ls, m, &tf, 0,
+#ifdef NO_PRAGMA_IN_DIRECTIVE
+ 1,
+#else
+ 0,
+#endif
+ ct->line)) {
+ ls->output_fifo = save_tf;
+ return -1;
+ }
+ continue;
+ }
+ }
+ aol(tf2.t, tf2.nt, *ct, TOKEN_LIST_MEMG);
+ }
+ freemem(tf.t);
+ ls->output_fifo = save_tf;
+ for (x = 0; (size_t)x < tf2.nt && ttWHI(tf2.t[x].type); x ++);
+ for (y = tf2.nt - 1; y >= 0 && ttWHI(tf2.t[y].type); y --);
+ if ((size_t)x >= tf2.nt) goto include_macro_err;
+ if (tf2.t[x].type == STRING) {
+ if (y != x) goto include_macro_err;
+ if (tf2.t[x].name[0] == 'L') {
+ if (ls->flags & WARN_STANDARD)
+ warning(l, "wide string for #include");
+ fname = sdup(tf2.t[x].name);
+ nl = strlen(fname);
+ *(fname + nl - 1) = 0;
+ mmvwo(fname, fname + 2, nl - 2);
+ } else {
+ fname = sdup(tf2.t[x].name);
+ nl = strlen(fname);
+ *(fname + nl - 1) = 0;
+ mmvwo(fname, fname + 1, nl - 1);
+ }
+ string_fname = 1;
+ } else if (left_angle(tf2.t[x].type) && right_angle(tf2.t[y].type)) {
+ int i, j;
+
+ if (ls->flags & WARN_ANNOYING) warning(l, "reconstruction "
+ "of <foo> in #include");
+ for (j = 0, i = x; i <= y; i ++) if (!ttWHI(tf2.t[i].type))
+ j += strlen(tname(tf2.t[i]));
+ fname = getmem(j + 1);
+ for (j = 0, i = x; i <= y; i ++) {
+ if (ttWHI(tf2.t[i].type)) continue;
+ strcpy(fname + j, tname(tf2.t[i]));
+ j += strlen(tname(tf2.t[i]));
+ }
+ *(fname + j - 1) = 0;
+ mmvwo(fname, fname + 1, j);
+ string_fname = 0;
+ } else goto include_macro_err;
+ freemem(tf2.t);
+ goto do_include_next;
+
+include_macro_err:
+ error(l, "macro expansion did not produce a valid filename "
+ "for #include");
+ if (tf2.nt) freemem(tf2.t);
+ return 1;
+
+do_include:
+ tgd = 1;
+ while (!next_token(ls)) {
+ if (tgd && !ttWHI(ls->ctok->type)
+ && (ls->flags & WARN_STANDARD)) {
+ warning(l, "trailing garbage in #include");
+ tgd = 0;
+ }
+ if (ls->ctok->type == NEWLINE) break;
+ }
+
+ /* the increment of ls->line is intended so that the line
+ numbering is reported correctly in report_context() even if
+ the #include is at the end of the file with no trailing newline */
+ if (ls->ctok->type != NEWLINE) ls->line ++;
+do_include_next:
+ if (!(ls->flags & LEXER) && (ls->flags & KEEP_OUTPUT))
+ put_char(ls, '\n');
+ push_file_context(ls);
+ reinit_lexer_state(ls, 1);
+#ifdef MSDOS
+ /* on msdos systems, replace all / by \ */
+ {
+ char *d;
+
+ for (d = fname; *d; d ++) if (*d == '/') *d = '\\';
+ }
+#endif
+ f = nex ? find_file_next(fname) : find_file(fname, string_fname);
+ if (!f) {
+ current_filename = 0;
+ pop_file_context(ls);
+ if (find_file_error == FF_ERROR) {
+ error(l, "file '%s' not found", fname);
+ freemem(fname);
+ return 1;
+ }
+ /* file was found, but it is useless to include it again */
+ freemem(fname);
+ return 0;
+ }
+#ifdef UCPP_MMAP
+ set_input_file(ls, f);
+#else
+ ls->input = f;
+#endif
+ current_filename = fname;
+ enter_file(ls, flags);
+ return 0;
+
+#undef left_angle
+#undef right_angle
+}
+
+/*
+ * for #line directives
+ */
+static int handle_line(struct lexer_state *ls, unsigned long flags)
+{
+ char *fname;
+ long l = ls->line;
+ struct token_fifo tf, tf2, *save_tf;
+ size_t nl, j;
+ unsigned long z;
+
+ tf.art = tf.nt = 0;
+ while (!next_token(ls) && ls->ctok->type != NEWLINE) {
+ if (!ttMWS(ls->ctok->type)) {
+ struct token t;
+
+ t.type = ls->ctok->type;
+ t.line = l;
+ if (S_TOKEN(ls->ctok->type)) {
+ t.name = sdup(ls->ctok->name);
+ throw_away(ls->gf, t.name);
+ }
+ aol(tf.t, tf.nt, t, TOKEN_LIST_MEMG);
+ }
+ }
+ tf2.art = tf2.nt = 0;
+ save_tf = ls->output_fifo;
+ ls->output_fifo = &tf2;
+ while (tf.art < tf.nt) {
+ struct token *ct;
+
+ ct = tf.t + (tf.art ++);
+ if (ct->type == NAME) {
+ struct macro *m = get_macro(ct->name);
+ if (m) {
+ if (substitute_macro(ls, m, &tf, 0,
+#ifdef NO_PRAGMA_IN_DIRECTIVE
+ 1,
+#else
+ 0,
+#endif
+ ct->line)) {
+ ls->output_fifo = save_tf;
+ return -1;
+ }
+ continue;
+ }
+ }
+ aol(tf2.t, tf2.nt, *ct, TOKEN_LIST_MEMG);
+ }
+ freemem(tf.t);
+ for (tf2.art = 0; tf2.art < tf2.nt && ttWHI(tf2.t[tf2.art].type);
+ tf2.art ++);
+ ls->output_fifo = save_tf;
+ if (tf2.art == tf2.nt || (tf2.t[tf2.art].type != NUMBER
+ && tf2.t[tf2.art].type != CHAR)) {
+ error(l, "not a valid number for #line");
+ goto line_macro_err;
+ }
+ for (j = 0; tf2.t[tf2.art].name[j]; j ++)
+ if (tf2.t[tf2.art].name[j] < '0'
+ || tf2.t[tf2.art].name[j] > '9')
+ if (ls->flags & WARN_STANDARD)
+ warning(l, "non-standard line number in #line");
+ if (catch(eval_exception)) goto line_macro_err;
+ z = strtoconst(tf2.t[tf2.art].name);
+ if (j > 10 || z > 2147483647U) {
+ error(l, "out-of-bound line number for #line");
+ goto line_macro_err;
+ }
+ ls->oline = ls->line = z;
+ if ((++ tf2.art) < tf2.nt) {
+ size_t i;
+
+ for (i = tf2.art; i < tf2.nt && ttMWS(tf2.t[i].type); i ++);
+ if (i < tf2.nt) {
+ if (tf2.t[i].type != STRING) {
+ error(l, "not a valid filename for #line");
+ goto line_macro_err;
+ }
+ if (tf2.t[i].name[0] == 'L') {
+ if (ls->flags & WARN_STANDARD) {
+ warning(l, "wide string for #line");
+ }
+ fname = sdup(tf2.t[i].name);
+ nl = strlen(fname);
+ *(fname + nl - 1) = 0;
+ mmvwo(fname, fname + 2, nl - 2);
+ } else {
+ fname = sdup(tf2.t[i].name);
+ nl = strlen(fname);
+ *(fname + nl - 1) = 0;
+ mmvwo(fname, fname + 1, nl - 1);
+ }
+ if (current_filename) freemem(current_filename);
+ current_filename = fname;
+ }
+ for (i ++; i < tf2.nt && ttMWS(tf2.t[i].type); i ++);
+ if (i < tf2.nt && (ls->flags & WARN_STANDARD)) {
+ warning(l, "trailing garbage in #line");
+ }
+ }
+ freemem(tf2.t);
+ enter_file(ls, flags);
+ return 0;
+
+line_macro_err:
+ if (tf2.nt) freemem(tf2.t);
+ return 1;
+}
+
+/*
+ * a #error directive: we emit the message without any modification
+ * (except the usual backslash+newline and trigraphs)
+ */
+static void handle_error(struct lexer_state *ls)
+{
+ int c;
+ size_t p = 0, lp = 128;
+ long l = ls->line;
+ unsigned char *buf = getmem(lp);
+
+ while ((c = grap_char(ls)) >= 0 && c != '\n') {
+ discard_char(ls);
+ wan(buf, p, (unsigned char)c, lp);
+ }
+ wan(buf, p, 0, lp);
+ error(l, "#error%s", buf);
+ freemem(buf);
+}
+
+/*
+ * convert digraph tokens to their standard equivalent.
+ */
+static int undig(int type)
+{
+ static int ud[6] = { LBRK, RBRK, LBRA, RBRA, SHARP, DSHARP };
+
+ return ud[type - DIG_LBRK];
+}
+
+#ifdef PRAGMA_TOKENIZE
+/*
+ * Make a compressed representation of a token list; the contents of
+ * the token_fifo are freed. Values equal to 0 are replaced by
+ * PRAGMA_TOKEN_END (by default, (unsigned char)'\n') and the compressed
+ * string is padded by a 0 (so that it may be * handled like a string).
+ * Digraph tokens are replaced by their non-digraph equivalents.
+ */
+struct comp_token_fifo compress_token_list(struct token_fifo *tf)
+{
+ struct comp_token_fifo ct;
+ size_t l;
+
+ for (l = 0, tf->art = 0; tf->art < tf->nt; tf->art ++) {
+ l ++;
+ if (S_TOKEN(tf->t[tf->art].type))
+ l += strlen(tf->t[tf->art].name) + 1;
+ }
+ ct.t = getmem((ct.length = l) + 1);
+ for (l = 0, tf->art = 0; tf->art < tf->nt; tf->art ++) {
+ int tt = tf->t[tf->art].type;
+
+ if (tt == 0) tt = PRAGMA_TOKEN_END;
+ if (tt > DIGRAPH_TOKENS && tt < DIGRAPH_TOKENS_END)
+ tt = undig(tt);
+ ct.t[l ++] = tt;
+ if (S_TOKEN(tt)) {
+ char *tn = tf->t[tf->art].name;
+ size_t sl = strlen(tn);
+
+ mmv(ct.t + l, tn, sl);
+ l += sl;
+ ct.t[l ++] = PRAGMA_TOKEN_END;
+ freemem(tn);
+ }
+ }
+ ct.t[l] = 0;
+ if (tf->nt) freemem(tf->t);
+ ct.rp = 0;
+ return ct;
+}
+#endif
+
+/*
+ * A #pragma directive: we make a PRAGMA token containing the rest of
+ * the line.
+ *
+ * We strongly hope that we are called only in LEXER mode.
+ */
+static void handle_pragma(struct lexer_state *ls)
+{
+ unsigned char *buf;
+ struct token t;
+ long l = ls->line;
+
+#ifdef PRAGMA_TOKENIZE
+ struct token_fifo tf;
+
+ tf.art = tf.nt = 0;
+ while (!next_token(ls) && ls->ctok->type != NEWLINE)
+ if (!ttMWS(ls->ctok->type)) break;
+ if (ls->ctok->type != NEWLINE) {
+ do {
+ struct token t;
+
+ t.type = ls->ctok->type;
+ if (ttMWS(t.type)) continue;
+ if (S_TOKEN(t.type)) t.name = sdup(ls->ctok->name);
+ aol(tf.t, tf.nt, t, TOKEN_LIST_MEMG);
+ } while (!next_token(ls) && ls->ctok->type != NEWLINE);
+ }
+ if (tf.nt == 0) {
+ /* void pragma are silently ignored */
+ return;
+ }
+ buf = (compress_token_list(&tf)).t;
+#else
+ int c, x = 1, y = 32;
+
+ while ((c = grap_char(ls)) >= 0 && c != '\n') {
+ discard_char(ls);
+ if (!space_char(c)) break;
+ }
+ /* void #pragma are ignored */
+ if (c == '\n') return;
+ buf = getmem(y);
+ buf[0] = c;
+ while ((c = grap_char(ls)) >= 0 && c != '\n') {
+ discard_char(ls);
+ wan(buf, x, c, y);
+ }
+ for (x --; x >= 0 && space_char(buf[x]); x --);
+ x ++;
+ wan(buf, x, 0, y);
+#endif
+ t.type = PRAGMA;
+ t.line = l;
+ t.name = (char *)buf;
+ aol(ls->output_fifo->t, ls->output_fifo->nt, t, TOKEN_LIST_MEMG);
+ throw_away(ls->gf, (char *)buf);
+}
+
+/*
+ * We saw a # at the beginning of a line (or preceeded only by whitespace).
+ * We check the directive name and act accordingly.
+ */
+static int handle_cpp(struct lexer_state *ls, int sharp_type)
+{
+#define condfset(x) do { \
+ ls->condf[(x) / 32] |= 1UL << ((x) % 32); \
+ } while (0)
+#define condfclr(x) do { \
+ ls->condf[(x) / 32] &= ~(1UL << ((x) % 32)); \
+ } while (0)
+#define condfval(x) ((ls->condf[(x) / 32] & (1UL << ((x) % 32))) != 0)
+
+ long l = ls->line;
+ unsigned long save_flags = ls->flags;
+ int ret = 0;
+
+ save_flags = ls->flags;
+ ls->flags |= LEXER;
+ while (!next_token(ls)) {
+ int t = ls->ctok->type;
+
+ switch (t) {
+ case COMMENT:
+ if (ls->flags & WARN_ANNOYING) {
+ warning(l, "comment in the middle of "
+ "a cpp directive");
+ }
+ /* fall through */
+ case NONE:
+ continue;
+ case NEWLINE:
+ /* null directive */
+ if (ls->flags & WARN_ANNOYING) {
+ /* truly an annoying warning; null directives
+ are rare but may increase readability of
+ some source files, and they are legal */
+ warning(l, "null cpp directive");
+ }
+ if (!(ls->flags & LEXER)) put_char(ls, '\n');
+ goto handle_exit2;
+ case NAME:
+ break;
+ default:
+ if (ls->flags & FAIL_SHARP) {
+ /* LPS 20050602 - ignores '#!' if on the first line */
+ if( ( l == 1 ) &&
+ ( ls->condcomp ) )
+ {
+ ret = 1;
+ }
+ else
+ /* LPS 20050602 */
+ if (ls->condcomp) {
+ error(l, "rogue '#'");
+ ret = 1;
+ } else {
+ if (ls->flags & WARN_STANDARD) {
+ warning(l, "rogue '#' in code "
+ "compiled out");
+ ret = 0;
+ }
+ }
+ ls->flags = save_flags;
+ goto handle_warp_ign;
+ } else {
+ struct token u;
+
+ u.type = sharp_type;
+ u.line = l;
+ ls->flags = save_flags;
+ print_token(ls, &u, 0);
+ print_token(ls, ls->ctok, 0);
+ if (ls->flags & WARN_ANNOYING) {
+ warning(l, "rogue '#' dumped");
+ }
+ goto handle_exit3;
+ }
+ }
+ if (ls->condcomp) {
+ if (!strcmp(ls->ctok->name, "define")) {
+ ret = handle_define(ls);
+ goto handle_exit;
+ } else if (!strcmp(ls->ctok->name, "undef")) {
+ ret = handle_undef(ls);
+ goto handle_exit;
+ } else if (!strcmp(ls->ctok->name, "if")) {
+ if ((++ ls->ifnest) > 63) goto too_many_if;
+ condfclr(ls->ifnest - 1);
+ ret = handle_if(ls);
+ if (ret > 0) ret = 0;
+ else if (ret == 0) {
+ ls->condcomp = 0;
+ ls->condmet = 0;
+ ls->condnest = ls->ifnest - 1;
+ }
+ else ret = 1;
+ goto handle_exit;
+ } else if (!strcmp(ls->ctok->name, "ifdef")) {
+ if ((++ ls->ifnest) > 63) goto too_many_if;
+ condfclr(ls->ifnest - 1);
+ ret = handle_ifdef(ls);
+ if (ret > 0) ret = 0;
+ else if (ret == 0) {
+ ls->condcomp = 0;
+ ls->condmet = 0;
+ ls->condnest = ls->ifnest - 1;
+ }
+ else ret = 1;
+ goto handle_exit;
+ } else if (!strcmp(ls->ctok->name, "ifndef")) {
+ if ((++ ls->ifnest) > 63) goto too_many_if;
+ condfclr(ls->ifnest - 1);
+ ret = handle_ifndef(ls);
+ if (ret > 0) ret = 0;
+ else if (ret == 0) {
+ ls->condcomp = 0;
+ ls->condmet = 0;
+ ls->condnest = ls->ifnest - 1;
+ }
+ else ret = 1;
+ goto handle_exit;
+ } else if (!strcmp(ls->ctok->name, "else")) {
+ if (ls->ifnest == 0
+ || condfval(ls->ifnest - 1)) {
+ error(l, "rogue #else");
+ ret = 1;
+ goto handle_warp;
+ }
+ condfset(ls->ifnest - 1);
+ if (ls->ifnest == 1) protect_detect.state = 0;
+ ls->condcomp = 0;
+ ls->condmet = 1;
+ ls->condnest = ls->ifnest - 1;
+ goto handle_warp;
+ } else if (!strcmp(ls->ctok->name, "elif")) {
+ if (ls->ifnest == 0
+ || condfval(ls->ifnest - 1)) {
+ error(l, "rogue #elif");
+ ret = 1;
+ goto handle_warp_ign;
+ }
+ if (ls->ifnest == 1) protect_detect.state = 0;
+ ls->condcomp = 0;
+ ls->condmet = 1;
+ ls->condnest = ls->ifnest - 1;
+ goto handle_warp_ign;
+ } else if (!strcmp(ls->ctok->name, "endif")) {
+ if (ls->ifnest == 0) {
+ error(l, "unmatched #endif");
+ ret = 1;
+ goto handle_warp;
+ }
+ if ((-- ls->ifnest) == 0
+ && protect_detect.state == 2) {
+ protect_detect.state = 3;
+ }
+ goto handle_warp;
+ } else if (!strcmp(ls->ctok->name, "include")) {
+ ret = handle_include(ls, save_flags, 0);
+ goto handle_exit3;
+ } else if (!strcmp(ls->ctok->name, "include_next")) {
+ ret = handle_include(ls, save_flags, 1);
+ goto handle_exit3;
+ } else if (!strcmp(ls->ctok->name, "pragma")) {
+ if (!(save_flags & LEXER)) {
+#ifdef PRAGMA_DUMP
+ /* dump #pragma in output */
+ struct token u;
+
+ u.type = sharp_type;
+ u.line = l;
+ ls->flags = save_flags;
+ print_token(ls, &u, 0);
+ print_token(ls, ls->ctok, 0);
+ while (ls->flags |= LEXER,
+ !next_token(ls)) {
+ long save_line;
+
+ ls->flags &= ~LEXER;
+ save_line = ls->line;
+ ls->line = l;
+ print_token(ls, ls->ctok, 0);
+ ls->line = save_line;
+ if (ls->ctok->type == NEWLINE)
+ break;
+ }
+ goto handle_exit3;
+#else
+ if (ls->flags & WARN_PRAGMA)
+ warning(l, "#pragma ignored "
+ "and not dumped");
+ goto handle_warp_ign;
+#endif
+ }
+ if (!(ls->flags & HANDLE_PRAGMA))
+ goto handle_warp_ign;
+ handle_pragma(ls);
+ goto handle_exit;
+ } else if (!strcmp(ls->ctok->name, "error")) {
+ ret = 1;
+ handle_error(ls);
+ goto handle_exit;
+ } else if (!strcmp(ls->ctok->name, "line")) {
+ ret = handle_line(ls, save_flags);
+ goto handle_exit;
+ } else if ((ls->flags & HANDLE_ASSERTIONS)
+ && !strcmp(ls->ctok->name, "assert")) {
+ ret = handle_assert(ls);
+ goto handle_exit;
+ } else if ((ls->flags & HANDLE_ASSERTIONS)
+ && !strcmp(ls->ctok->name, "unassert")) {
+ ret = handle_unassert(ls);
+ goto handle_exit;
+ }
+ } else {
+ if (!strcmp(ls->ctok->name, "else")) {
+ if (condfval(ls->ifnest - 1)
+ && (ls->flags & WARN_STANDARD)) {
+ warning(l, "rogue #else in code "
+ "compiled out");
+ }
+ if (ls->condnest == ls->ifnest - 1) {
+ if (!ls->condmet) ls->condcomp = 1;
+ }
+ condfset(ls->ifnest - 1);
+ if (ls->ifnest == 1) protect_detect.state = 0;
+ goto handle_warp;
+ } else if (!strcmp(ls->ctok->name, "elif")) {
+ if (condfval(ls->ifnest - 1)
+ && (ls->flags & WARN_STANDARD)) {
+ warning(l, "rogue #elif in code "
+ "compiled out");
+ }
+ if (ls->condnest != ls->ifnest - 1
+ || ls->condmet)
+ goto handle_warp_ign;
+ if (ls->ifnest == 1) protect_detect.state = 0;
+ ret = handle_if(ls);
+ if (ret > 0) {
+ ls->condcomp = 1;
+ ls->condmet = 1;
+ ret = 0;
+ } else if (ret < 0) ret = 1;
+ goto handle_exit;
+ } else if (!strcmp(ls->ctok->name, "endif")) {
+ if ((-- ls->ifnest) == ls->condnest) {
+ if (ls->ifnest == 0 &&
+ protect_detect.state == 2)
+ protect_detect.state = 3;
+ ls->condcomp = 1;
+ }
+ goto handle_warp;
+ } else if (!strcmp(ls->ctok->name, "if")
+ || !strcmp(ls->ctok->name, "ifdef")
+ || !strcmp(ls->ctok->name, "ifndef")) {
+ if ((++ ls->ifnest) > 63) goto too_many_if;
+ condfclr(ls->ifnest - 1);
+ }
+ goto handle_warp_ign;
+ }
+ /*
+ * Unrecognized directive. We emit either an error or
+ * an annoying warning, depending on a command-line switch.
+ */
+ if (ls->flags & FAIL_SHARP) {
+ error(l, "unknown cpp directive '#%s'",
+ ls->ctok->name);
+ goto handle_warp_ign;
+ } else {
+ struct token u;
+
+ u.type = sharp_type;
+ u.line = l;
+ ls->flags = save_flags;
+ print_token(ls, &u, 0);
+ print_token(ls, ls->ctok, 0);
+ if (ls->flags & WARN_ANNOYING) {
+ warning(l, "rogue '#' dumped");
+ }
+ }
+ }
+ return 1;
+
+handle_warp_ign:
+ while (!next_token(ls)) if (ls->ctok->type == NEWLINE) break;
+ goto handle_exit;
+handle_warp:
+ while (!next_token(ls)) {
+ if (!ttWHI(ls->ctok->type) && (ls->flags & WARN_STANDARD)) {
+ warning(l, "trailing garbage in "
+ "preprocessing directive");
+ }
+ if (ls->ctok->type == NEWLINE) break;
+ }
+handle_exit:
+ if (!(ls->flags & LEXER)) put_char(ls, '\n');
+handle_exit3:
+ if (protect_detect.state == 1) {
+ protect_detect.state = 0;
+ } else if (protect_detect.state == -1) {
+ /* just after the #include */
+ protect_detect.state = 1;
+ }
+handle_exit2:
+ ls->flags = save_flags;
+ return ret;
+too_many_if:
+ error(l, "too many levels of conditional inclusion (max 63)");
+ ret = 1;
+ goto handle_warp;
+#undef condfset
+#undef condfclr
+#undef condfval
+}
+
+/*
+ * This is the main entry function. It maintains count of #, and call the
+ * appropriate functions when it encounters a cpp directive or a macro
+ * name.
+ * return value: positive on error; CPPERR_EOF means "end of input reached"
+ */
+int cpp(struct lexer_state *ls)
+{
+ int r = 0;
+
+ while (next_token(ls)) {
+ if (protect_detect.state == 3) {
+ /*
+ * At that point, protect_detect.ff->protect might
+ * be non-zero, if the file has been recursively
+ * included, and a guardian detected.
+ */
+ if (!protect_detect.ff->protect) {
+ /* Cool ! A new guardian has been detected. */
+ protect_detect.ff->protect =
+ protect_detect.macro;
+ } else if (protect_detect.macro) {
+ /* We found a guardian but an old one. */
+ freemem(protect_detect.macro);
+ }
+ protect_detect.macro = 0;
+ }
+ if (ls->ifnest) {
+ error(ls->line, "unterminated #if construction "
+ "(depth %ld)", ls->ifnest);
+ r = CPPERR_NEST;
+ }
+ if (ls_depth == 0) return CPPERR_EOF;
+ close_input(ls);
+ if (!(ls->flags & LEXER) && !ls->ltwnl) {
+ put_char(ls, '\n');
+ ls->ltwnl = 1;
+ }
+ pop_file_context(ls);
+ ls->oline ++;
+ if (enter_file(ls, ls->flags)) {
+ ls->ctok->type = NEWLINE;
+ ls->ltwnl = 1;
+ break;
+ }
+ }
+ if (!(ls->ltwnl && (ls->ctok->type == SHARP
+ || ls->ctok->type == DIG_SHARP))
+ && protect_detect.state == 1 && !ttWHI(ls->ctok->type)) {
+ /* the first non-whitespace token encountered is not
+ a sharp introducing a cpp directive */
+ protect_detect.state = 0;
+ }
+ if (protect_detect.state == 3 && !ttWHI(ls->ctok->type)) {
+ /* a non-whitespace token encountered after the #endif */
+ protect_detect.state = 0;
+ }
+ if (ls->condcomp) {
+ if (ls->ltwnl && (ls->ctok->type == SHARP
+ || ls->ctok->type == DIG_SHARP)) {
+ int x = handle_cpp(ls, ls->ctok->type);
+
+ ls->ltwnl = 1;
+ return r ? r : x;
+ }
+ if (ls->ctok->type == NAME) {
+ struct macro *m;
+
+ if ((m = get_macro(ls->ctok->name)) != 0) {
+ int x;
+
+ x = substitute_macro(ls, m, 0, 1, 0,
+ ls->ctok->line);
+ if (!(ls->flags & LEXER))
+ garbage_collect(ls->gf);
+ return r ? r : x;
+ }
+ if (!(ls->flags & LEXER))
+ print_token(ls, ls->ctok, 0);
+ }
+ } else {
+ if (ls->ltwnl && (ls->ctok->type == SHARP
+ || ls->ctok->type == DIG_SHARP)) {
+ int x = handle_cpp(ls, ls->ctok->type);
+
+ ls->ltwnl = 1;
+ return r ? r : x;
+ }
+ }
+ if (ls->ctok->type == NEWLINE) ls->ltwnl = 1;
+ else if (!ttWHI(ls->ctok->type)) ls->ltwnl = 0;
+ return r ? r : -1;
+}
+
+#ifndef STAND_ALONE
+/*
+ * llex() and lex() are the lexing functions, when the preprocessor is
+ * linked to another code. llex() should be called only by lex().
+ */
+static int llex(struct lexer_state *ls)
+{
+ struct token_fifo *tf = ls->output_fifo;
+ int r;
+
+ if (tf->nt != 0) {
+ if (tf->art < tf->nt) {
+#ifdef INMACRO_FLAG
+ if (!ls->inmacro) {
+ ls->inmacro = 1;
+ ls->macro_count ++;
+ }
+#endif
+ ls->ctok = tf->t + (tf->art ++);
+ if (ls->ctok->type > DIGRAPH_TOKENS
+ && ls->ctok->type < DIGRAPH_TOKENS_END) {
+ ls->ctok->type = undig(ls->ctok->type);
+ }
+ return 0;
+ } else {
+#ifdef INMACRO_FLAG
+ ls->inmacro = 0;
+#endif
+ freemem(tf->t);
+ tf->art = tf->nt = 0;
+ garbage_collect(ls->gf);
+ ls->ctok = ls->save_ctok;
+ }
+ }
+ r = cpp(ls);
+ if (ls->ctok->type > DIGRAPH_TOKENS
+ && ls->ctok->type < LAST_MEANINGFUL_TOKEN) {
+ ls->ctok->type = undig(ls->ctok->type);
+ }
+ if (r > 0) return r;
+ if (r < 0) return 0;
+ return llex(ls);
+}
+
+/*
+ * lex() reads the next token from the processed stream and stores it
+ * into ls->ctok.
+ * return value: non zero on error (including CPPERR_EOF, which is not
+ * quite an error)
+ */
+int lex(struct lexer_state *ls)
+{
+ int r;
+
+ do {
+ r = llex(ls);
+#ifdef SEMPER_FIDELIS
+ } while (!r && !ls->condcomp);
+#else
+ } while (!r && (!ls->condcomp || (ttWHI(ls->ctok->type) &&
+ (!(ls->flags & LINE_NUM) || ls->ctok->type != NEWLINE))));
+#endif
+ return r;
+}
+#endif
+
+/*
+ * check_cpp_errors() must be called when the end of input is reached;
+ * it checks pending errors due to truncated constructs (actually none,
+ * this is reserved for future evolutions).
+ */
+int check_cpp_errors(struct lexer_state *ls)
+{
+ if (ls->flags & KEEP_OUTPUT) {
+ put_char(ls, '\n');
+ }
+ if (emit_dependencies) fputc('\n', emit_output);
+#ifndef NO_UCPP_BUF
+ if (!(ls->flags & LEXER)) {
+ flush_output(ls);
+ }
+#endif
+ if ((ls->flags & WARN_TRIGRAPHS) && ls->count_trigraphs)
+ warning(0, "%ld trigraph(s) encountered", ls->count_trigraphs);
+ return 0;
+}
+
+/*
+ * init_cpp() initializes static tables inside ucpp. It needs not be
+ * called more than once.
+ */
+void init_cpp(void)
+{
+ init_cppm();
+}
+
+/*
+ * (re)init the global tables.
+ * If standard_assertions is non 0, init the assertions table.
+ */
+void init_tables(int with_assertions)
+{
+ time_t t;
+ struct tm *ct;
+
+ init_buf_lexer_state(&dsharp_lexer, 0);
+#ifdef PRAGMA_TOKENIZE
+ init_buf_lexer_state(&tokenize_lexer, 0);
+#endif
+ time(&t);
+ ct = localtime(&t);
+#ifdef NOSTRFTIME
+ /* we have a quite old compiler, that does not know the
+ (standard since 1990) strftime() function. */
+ {
+ char *c = asctime(ct);
+
+ compile_time[0] = '"';
+ mmv(compile_time + 1, c + 11, 8);
+ compile_time[9] = '"';
+ compile_time[10] = 0;
+ compile_date[0] = '"';
+ mmv(compile_date + 1, c + 4, 7);
+ mmv(compile_date + 8, c + 20, 4);
+ compile_date[12] = '"';
+ compile_date[13] = 0;
+ }
+#else
+ strftime(compile_time, 12, "\"%H:%M:%S\"", ct);
+ strftime(compile_date, 24, "\"%b %d %Y\"", ct);
+#endif
+ init_macros();
+ if (with_assertions) init_assertions();
+ init_found_files();
+}
+
+/*
+ * Resets the include path.
+ */
+void init_include_path(char *incpath[])
+{
+ if (include_path_nb) {
+ size_t i;
+
+ for (i = 0; i < include_path_nb; i ++)
+ freemem(include_path[i]);
+ freemem(include_path);
+ include_path_nb = 0;
+ }
+ if (incpath) {
+ int i;
+
+ for (i = 0; incpath[i]; i ++)
+ aol(include_path, include_path_nb,
+ sdup(incpath[i]), INCPATH_MEMG);
+ }
+}
+
+/*
+ * add_incpath() adds "path" to the standard include path.
+ */
+void add_incpath(char *path)
+{
+ aol(include_path, include_path_nb, sdup(path), INCPATH_MEMG);
+}
+
+/*
+ * This function cleans the memory. It should release all allocated
+ * memory structures and may be called even if the current pre-processing
+ * is not finished or reported an error.
+ */
+void wipeout()
+{
+ struct lexer_state ls;
+
+ if (include_path_nb > 0) {
+ size_t i;
+
+ for (i = 0; i < include_path_nb; i ++)
+ freemem(include_path[i]);
+ freemem(include_path);
+ include_path = 0;
+ include_path_nb = 0;
+ }
+ if (current_filename) freemem(current_filename);
+ current_filename = 0;
+ current_long_filename = 0;
+ current_incdir = -1;
+ protect_detect.state = 0;
+ if (protect_detect.macro) freemem(protect_detect.macro);
+ protect_detect.macro = 0;
+ protect_detect.ff = 0;
+ init_lexer_state(&ls);
+ while (ls_depth > 0) pop_file_context(&ls);
+ free_lexer_state(&ls);
+ free_lexer_state(&dsharp_lexer);
+#ifdef PRAGMA_TOKENIZE
+ free_lexer_state(&tokenize_lexer);
+#endif
+ if (found_files_init_done) HTT_kill(&found_files);
+ found_files_init_done = 0;
+ if (found_files_sys_init_done) HTT_kill(&found_files_sys);
+ found_files_sys_init_done = 0;
+ wipe_macros();
+ wipe_assertions();
+}
+
+#ifdef STAND_ALONE
+/*
+ * print some help
+ */
+static void usage(char *command_name)
+{
+ fprintf(stderr,
+ "Usage: %s [options] [file]\n"
+ "language options:\n"
+ " -C keep comments in output\n"
+ " -s keep '#' when no cpp directive is recognized\n"
+ " -l do not emit line numbers\n"
+ " -lg emit gcc-like line numbers\n"
+ " -CC disable C++-like comments\n"
+ " -a, -na, -a0 handle (or not) assertions\n"
+ " -V disable macros with extra arguments\n"
+ " -u understand UTF-8 in source\n"
+ " -X enable -a, -u and -Y\n"
+ " -c90 mimic C90 behaviour\n"
+ " -t disable trigraph support\n"
+ "warning options:\n"
+ " -wt emit a final warning when trigaphs are encountered\n"
+ " -wtt emit warnings for each trigaph encountered\n"
+ " -wa emit warnings that are usually useless\n"
+ " -w0 disable standard warnings\n"
+ "directory options:\n"
+ " -I directory add 'directory' before the standard include path\n"
+ " -J directory add 'directory' after the standard include path\n"
+ " -zI do not use the standard include path\n"
+ " -M emit Makefile-like dependencies instead of normal "
+ "output\n"
+ " -Ma emit also dependancies for system files\n"
+ " -o file store output in file\n"
+ "macro and assertion options:\n"
+ " -Dmacro predefine 'macro'\n"
+ " -Dmacro=def predefine 'macro' with 'def' content\n"
+ " -Umacro undefine 'macro'\n"
+ " -Afoo(bar) assert foo(bar)\n"
+ " -Bfoo(bar) unassert foo(bar)\n"
+ " -Y predefine system-dependant macros\n"
+ " -Z do not predefine special macros\n"
+ " -d emit defined macros\n"
+ " -e emit assertions\n"
+ "misc options:\n"
+ " -v print version number and settings\n"
+ " -h show this help\n",
+ command_name);
+}
+
+/*
+ * print version and compile-time settings
+ */
+static void version(void)
+{
+ size_t i;
+
+ fprintf(stderr, "ucpp version %d.%d\n", VERS_MAJ, VERS_MIN);
+ fprintf(stderr, "search path:\n");
+ for (i = 0; i < include_path_nb; i ++)
+ fprintf(stderr, " %s\n", include_path[i]);
+}
+
+/*
+ * parse_opt() initializes many things according to the command-line
+ * options.
+ * Return values:
+ * 0 on success
+ * 1 on semantic error (redefinition of a special macro, for instance)
+ * 2 on syntaxic error (unknown options for instance)
+ */
+static int parse_opt(int argc, char *argv[], struct lexer_state *ls)
+{
+ int i, ret = 0;
+ char *filename = 0;
+ int with_std_incpath = 1;
+ int print_version = 0, print_defs = 0, print_asserts = 0;
+ int system_macros = 0, standard_assertions = 1;
+
+ init_lexer_state(ls);
+ ls->flags = DEFAULT_CPP_FLAGS;
+ emit_output = ls->output = stdout;
+ for (i = 1; i < argc; i ++) if (argv[i][0] == '-') {
+ if (!strcmp(argv[i], "-h")) {
+ return 2;
+ } else if (!strcmp(argv[i], "-C")) {
+ ls->flags &= ~DISCARD_COMMENTS;
+ } else if (!strcmp(argv[i], "-CC")) {
+ ls->flags &= ~CPLUSPLUS_COMMENTS;
+ } else if (!strcmp(argv[i], "-a")) {
+ ls->flags |= HANDLE_ASSERTIONS;
+ } else if (!strcmp(argv[i], "-na")) {
+ ls->flags |= HANDLE_ASSERTIONS;
+ standard_assertions = 0;
+ } else if (!strcmp(argv[i], "-a0")) {
+ ls->flags &= ~HANDLE_ASSERTIONS;
+ } else if (!strcmp(argv[i], "-V")) {
+ ls->flags &= ~MACRO_VAARG;
+ } else if (!strcmp(argv[i], "-u")) {
+ ls->flags |= UTF8_SOURCE;
+ } else if (!strcmp(argv[i], "-X")) {
+ ls->flags |= HANDLE_ASSERTIONS;
+ ls->flags |= UTF8_SOURCE;
+ system_macros = 1;
+ } else if (!strcmp(argv[i], "-c90")) {
+ ls->flags &= ~MACRO_VAARG;
+ ls->flags &= ~CPLUSPLUS_COMMENTS;
+ c99_compliant = 0;
+ c99_hosted = -1;
+ } else if (!strcmp(argv[i], "-t")) {
+ ls->flags &= ~HANDLE_TRIGRAPHS;
+ } else if (!strcmp(argv[i], "-wt")) {
+ ls->flags |= WARN_TRIGRAPHS;
+ } else if (!strcmp(argv[i], "-wtt")) {
+ ls->flags |= WARN_TRIGRAPHS_MORE;
+ } else if (!strcmp(argv[i], "-wa")) {
+ ls->flags |= WARN_ANNOYING;
+ } else if (!strcmp(argv[i], "-w0")) {
+ ls->flags &= ~WARN_STANDARD;
+ ls->flags &= ~WARN_PRAGMA;
+ } else if (!strcmp(argv[i], "-s")) {
+ ls->flags &= ~FAIL_SHARP;
+ } else if (!strcmp(argv[i], "-l")) {
+ ls->flags &= ~LINE_NUM;
+ } else if (!strcmp(argv[i], "-lg")) {
+ ls->flags |= GCC_LINE_NUM;
+ } else if (!strcmp(argv[i], "-M")) {
+ ls->flags &= ~KEEP_OUTPUT;
+ emit_dependencies = 1;
+ } else if (!strcmp(argv[i], "-Ma")) {
+ ls->flags &= ~KEEP_OUTPUT;
+ emit_dependencies = 2;
+ } else if (!strcmp(argv[i], "-Y")) {
+ system_macros = 1;
+ } else if (!strcmp(argv[i], "-Z")) {
+ no_special_macros = 1;
+ } else if (!strcmp(argv[i], "-d")) {
+ ls->flags &= ~KEEP_OUTPUT;
+ print_defs = 1;
+ } else if (!strcmp(argv[i], "-e")) {
+ ls->flags &= ~KEEP_OUTPUT;
+ print_asserts = 1;
+ } else if (!strcmp(argv[i], "-zI")) {
+ with_std_incpath = 0;
+ } else if (!strcmp(argv[i], "-I") || !strcmp(argv[i], "-J")) {
+ i ++;
+ } else if (!strcmp(argv[i], "-o")) {
+ if ((++ i) >= argc) {
+ error(-1, "missing filename after -o");
+ return 2;
+ }
+ if (argv[i][0] == '-' && argv[i][1] == 0) {
+ emit_output = ls->output = stdout;
+ } else {
+ ls->output = fopen(argv[i], "w");
+ if (!ls->output) {
+ error(-1, "failed to open for "
+ "writing: %s", argv[i]);
+ return 2;
+ }
+ emit_output = ls->output;
+ }
+ } else if (!strcmp(argv[i], "-v")) {
+ print_version = 1;
+ } else if (argv[i][1] != 'I' && argv[i][1] != 'J'
+ && argv[i][1] != 'D' && argv[i][1] != 'U'
+ && argv[i][1] != 'A' && argv[i][1] != 'B')
+ warning(-1, "unknown option '%s'", argv[i]);
+ } else {
+ if (filename != 0) {
+ error(-1, "spurious filename '%s'", argv[i]);
+ return 2;
+ }
+ filename = argv[i];
+ }
+ init_tables(ls->flags & HANDLE_ASSERTIONS);
+ init_include_path(0);
+ if (filename) {
+#ifdef UCPP_MMAP
+ FILE *f = fopen_mmap_file(filename);
+
+ ls->input = 0;
+ if (f) set_input_file(ls, f);
+#else
+ ls->input = fopen(filename, "r");
+#endif
+ if (!ls->input) {
+ error(-1, "file '%s' not found", filename);
+ return 1;
+ }
+#ifdef NO_LIBC_BUF
+ setbuf(ls->input, 0);
+#endif
+ set_init_filename(filename, 1);
+ } else {
+ ls->input = stdin;
+ set_init_filename("<stdin>", 0);
+ }
+ for (i = 1; i < argc; i ++)
+ if (argv[i][0] == '-' && argv[i][1] == 'I')
+ add_incpath(argv[i][2] ? argv[i] + 2 : argv[i + 1]);
+ if (system_macros) for (i = 0; system_macros_def[i]; i ++)
+ ret = ret || define_macro(ls, system_macros_def[i]);
+ for (i = 1; i < argc; i ++)
+ if (argv[i][0] == '-' && argv[i][1] == 'D')
+ ret = ret || define_macro(ls, argv[i] + 2);
+ for (i = 1; i < argc; i ++)
+ if (argv[i][0] == '-' && argv[i][1] == 'U')
+ ret = ret || undef_macro(ls, argv[i] + 2);
+ if (ls->flags & HANDLE_ASSERTIONS) {
+ if (standard_assertions)
+ for (i = 0; system_assertions_def[i]; i ++)
+ make_assertion(system_assertions_def[i]);
+ for (i = 1; i < argc; i ++)
+ if (argv[i][0] == '-' && argv[i][1] == 'A')
+ ret = ret || make_assertion(argv[i] + 2);
+ for (i = 1; i < argc; i ++)
+ if (argv[i][0] == '-' && argv[i][1] == 'B')
+ ret = ret || destroy_assertion(argv[i] + 2);
+ } else {
+ for (i = 1; i < argc; i ++)
+ if (argv[i][0] == '-'
+ && (argv[i][1] == 'A' || argv[i][1] == 'B'))
+ warning(-1, "assertions disabled");
+ }
+ if (with_std_incpath) {
+ for (i = 0; include_path_std[i]; i ++)
+ add_incpath(include_path_std[i]);
+ }
+ for (i = 1; i < argc; i ++)
+ if (argv[i][0] == '-' && argv[i][1] == 'J')
+ add_incpath(argv[i][2] ? argv[i] + 2 : argv[i + 1]);
+
+ if (print_version) {
+ version();
+ return 1;
+ }
+ if (print_defs) {
+ print_defines();
+ emit_defines = 1;
+ }
+ if (print_asserts && (ls->flags & HANDLE_ASSERTIONS)) {
+ print_assertions();
+ emit_assertions = 1;
+ }
+ return ret;
+}
+
+int main(int argc, char *argv[])
+{
+ struct lexer_state ls;
+ int r, fr = 0;
+
+ init_cpp();
+ if ((r = parse_opt(argc, argv, &ls)) != 0) {
+ if (r == 2) usage(argv[0]);
+ return EXIT_FAILURE;
+ }
+ enter_file(&ls, ls.flags);
+ while ((r = cpp(&ls)) < CPPERR_EOF) fr = fr || (r > 0);
+ fr = fr || check_cpp_errors(&ls);
+ free_lexer_state(&ls);
+ wipeout();
+#ifdef MEM_DEBUG
+ report_leaks();
+#endif
+ return fr ? EXIT_FAILURE : EXIT_SUCCESS;
+}
+#endif
diff --git a/libexec/auxcpp/cpp.h b/libexec/auxcpp/cpp.h
new file mode 100644
index 00000000000..2bb707a324c
--- /dev/null
+++ b/libexec/auxcpp/cpp.h
@@ -0,0 +1,317 @@
+/*
+ * (c) Thomas Pornin 1999 - 2002
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 4. The name of the authors may not be used to endorse or promote
+ * products derived from this software without specific prior written
+ * permission.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
+ * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+ * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#ifndef UCPP__CPP__
+#define UCPP__CPP__
+
+/*
+ * Uncomment the following if you want ucpp to use externally provided
+ * error-reporting functions (ucpp_warning(), ucpp_error() and ucpp_ouch())
+ */
+/* #define NO_UCPP_ERROR_FUNCTIONS */
+
+/*
+ * Tokens (do not change the order unless checking operators_name[] in cpp.c)
+ *
+ * It is important that the token NONE is 0
+ * Check the STRING_TOKEN macro
+ */
+#define CPPERR 512
+enum {
+ NONE, /* whitespace */
+ NEWLINE, /* newline */
+ COMMENT, /* comment */
+ NUMBER, /* number constant */
+ NAME, /* identifier */
+ BUNCH, /* non-C characters */
+ PRAGMA, /* a #pragma directive */
+ CONTEXT, /* new file or #line */
+ STRING, /* constant "xxx" */
+ CHAR, /* constant 'xxx' */
+ SLASH, /* / */
+ ASSLASH, /* /= */
+ MINUS, /* - */
+ MMINUS, /* -- */
+ ASMINUS, /* -= */
+ ARROW, /* -> */
+ PLUS, /* + */
+ PPLUS, /* ++ */
+ ASPLUS, /* += */
+ LT, /* < */
+ LEQ, /* <= */
+ LSH, /* << */
+ ASLSH, /* <<= */
+ GT, /* > */
+ GEQ, /* >= */
+ RSH, /* >> */
+ ASRSH, /* >>= */
+ ASGN, /* = */
+ SAME, /* == */
+#ifdef CAST_OP
+ CAST, /* => */
+#endif
+ NOT, /* ~ */
+ NEQ, /* != */
+ AND, /* & */
+ LAND, /* && */
+ ASAND, /* &= */
+ OR, /* | */
+ LOR, /* || */
+ ASOR, /* |= */
+ PCT, /* % */
+ ASPCT, /* %= */
+ STAR, /* * */
+ ASSTAR, /* *= */
+ CIRC, /* ^ */
+ ASCIRC, /* ^= */
+ LNOT, /* ! */
+ LBRA, /* { */
+ RBRA, /* } */
+ LBRK, /* [ */
+ RBRK, /* ] */
+ LPAR, /* ( */
+ RPAR, /* ) */
+ COMMA, /* , */
+ QUEST, /* ? */
+ SEMIC, /* ; */
+ COLON, /* : */
+ DOT, /* . */
+ MDOTS, /* ... */
+ SHARP, /* # */
+ DSHARP, /* ## */
+
+ OPT_NONE, /* optional space to separate tokens in text output */
+
+ DIGRAPH_TOKENS, /* there begin digraph tokens */
+
+ /* for DIG_*, do not change order, unless checking undig() in cpp.c */
+ DIG_LBRK, /* <: */
+ DIG_RBRK, /* :> */
+ DIG_LBRA, /* <% */
+ DIG_RBRA, /* %> */
+ DIG_SHARP, /* %: */
+ DIG_DSHARP, /* %:%: */
+
+ DIGRAPH_TOKENS_END, /* digraph tokens end here */
+
+ LAST_MEANINGFUL_TOKEN, /* reserved words will go there */
+
+ MACROARG, /* special token for representing macro arguments */
+
+ UPLUS = CPPERR, /* unary + */
+ UMINUS /* unary - */
+};
+
+#include "tune.h"
+#include <stdio.h>
+#include <setjmp.h>
+
+struct token {
+ int type;
+ long line;
+ char *name;
+};
+
+struct token_fifo {
+ struct token *t;
+ size_t nt, art;
+};
+
+struct lexer_state {
+ /* input control */
+ FILE *input;
+#ifndef NO_UCPP_BUF
+ unsigned char *input_buf;
+#ifdef UCPP_MMAP
+ int from_mmap;
+ unsigned char *input_buf_sav;
+#endif
+#endif
+ unsigned char *input_string;
+ size_t ebuf;
+ size_t pbuf;
+ int lka[2];
+ int nlka;
+ int macfile;
+ int last;
+ int discard;
+ unsigned long utf8;
+ unsigned char copy_line[COPY_LINE_LENGTH];
+ int cli;
+
+ /* output control */
+ FILE *output;
+ struct token_fifo *output_fifo, *toplevel_of;
+#ifndef NO_UCPP_BUF
+ unsigned char *output_buf;
+#endif
+ size_t sbuf;
+
+ /* token control */
+ struct token *ctok;
+ struct token *save_ctok;
+ size_t tknl;
+ int ltwnl;
+ int pending_token;
+#ifdef INMACRO_FLAG
+ int inmacro;
+ long macro_count;
+#endif
+
+ /* lexer options */
+ long line;
+ long oline;
+ unsigned long flags;
+ long count_trigraphs;
+ struct garbage_fifo *gf;
+ int ifnest;
+ int condnest;
+ int condcomp;
+ int condmet;
+ unsigned long condf[2];
+};
+
+/*
+ * Flags for struct lexer_state
+ */
+/* warning flags */
+#define WARN_STANDARD 0x000001UL /* emit standard warnings */
+#define WARN_ANNOYING 0x000002UL /* emit annoying warnings */
+#define WARN_TRIGRAPHS 0x000004UL /* warn when trigraphs are used */
+#define WARN_TRIGRAPHS_MORE 0x000008UL /* extra-warn for trigraphs */
+#define WARN_PRAGMA 0x000010UL /* warn for pragmas in non-lexer mode */
+
+/* error flags */
+#define FAIL_SHARP 0x000020UL /* emit errors on rogue '#' */
+#define CCHARSET 0x000040UL /* emit errors on non-C characters */
+
+/* emission flags */
+#define DISCARD_COMMENTS 0x000080UL /* discard comments from text output */
+#define CPLUSPLUS_COMMENTS 0x000100UL /* understand C++-like comments */
+#define LINE_NUM 0x000200UL /* emit #line directives in output */
+#define GCC_LINE_NUM 0x000400UL /* same as #line, with gcc-syntax */
+
+/* language flags */
+#define HANDLE_ASSERTIONS 0x000800UL /* understand assertions */
+#define HANDLE_PRAGMA 0x001000UL /* emit PRAGMA tokens in lexer mode */
+#define MACRO_VAARG 0x002000UL /* understand macros with '...' */
+#define UTF8_SOURCE 0x004000UL /* identifiers are in UTF8 encoding */
+#define HANDLE_TRIGRAPHS 0x008000UL /* handle trigraphs */
+
+/* global ucpp behaviour */
+#define LEXER 0x010000UL /* behave as a lexer */
+#define KEEP_OUTPUT 0x020000UL /* emit the result of preprocessing */
+#define COPY_LINE 0x040000UL /* make a copy of the parsed line */
+
+/* internal flags */
+#define READ_AGAIN 0x080000UL /* emit again the last token */
+#define TEXT_OUTPUT 0x100000UL /* output text */
+
+/*
+ * Public function prototypes
+ */
+
+#ifndef NO_UCPP_BUF
+void flush_output(struct lexer_state *);
+#endif
+
+void init_assertions(void);
+int make_assertion(char *);
+int destroy_assertion(char *);
+void print_assertions(void);
+
+void init_macros(void);
+int define_macro(struct lexer_state *, char *);
+int undef_macro(struct lexer_state *, char *);
+void print_defines(void);
+
+void set_init_filename(char *, int);
+void init_cpp(void);
+void init_include_path(char *[]);
+void init_lexer_state(struct lexer_state *);
+void init_lexer_mode(struct lexer_state *);
+void free_lexer_state(struct lexer_state *);
+void wipeout(void);
+int lex(struct lexer_state *);
+int check_cpp_errors(struct lexer_state *);
+void add_incpath(char *);
+void init_tables(int);
+int enter_file(struct lexer_state *, unsigned long);
+int cpp(struct lexer_state *);
+void set_identifier_char(int c);
+void unset_identifier_char(int c);
+
+#ifdef UCPP_MMAP
+FILE *fopen_mmap_file(char *);
+void set_input_file(struct lexer_state *, FILE *);
+#endif
+
+struct stack_context {
+ char *long_name, *name;
+ long line;
+};
+struct stack_context *report_context(void);
+
+extern int no_special_macros, system_macros,
+ emit_dependencies, emit_defines, emit_assertions;
+extern int c99_compliant, c99_hosted;
+extern FILE *emit_output;
+extern char *current_filename, *current_long_filename;
+extern char *operators_name[];
+extern struct protect {
+ char *macro;
+ int state;
+ struct found_file *ff;
+} protect_detect;
+
+void ucpp_ouch(char *, ...);
+void ucpp_error(long, char *, ...);
+void ucpp_warning(long, char *, ...);
+
+extern int *transient_characters;
+
+/*
+ * Errors from CPPERR_EOF and above are not real erros, only show-stoppers.
+ * Errors below CPPERR_EOF are real ones.
+ */
+#define CPPERR_NEST 900
+#define CPPERR_EOF 1000
+
+/*
+ * This macro tells whether the name field of a given token type is
+ * relevant, or not. Irrelevant name field means that it might point
+ * to outerspace.
+ */
+#ifdef SEMPER_FIDELIS
+#define STRING_TOKEN(x) ((x) == NONE || ((x) >= COMMENT && (x) <= CHAR))
+#else
+#define STRING_TOKEN(x) ((x) >= NUMBER && (x) <= CHAR)
+#endif
+
+#endif
diff --git a/libexec/auxcpp/eval.c b/libexec/auxcpp/eval.c
new file mode 100644
index 00000000000..82971973f75
--- /dev/null
+++ b/libexec/auxcpp/eval.c
@@ -0,0 +1,699 @@
+/*
+ * (c) Thomas Pornin 1999 - 2002
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 4. The name of the authors may not be used to endorse or promote
+ * products derived from this software without specific prior written
+ * permission.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
+ * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+ * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include "tune.h"
+#include <stdio.h>
+#include <string.h>
+#include <setjmp.h>
+#include <limits.h>
+#include "ucppi.h"
+#include "mem.h"
+
+JMP_BUF eval_exception;
+long eval_line;
+static int emit_eval_warnings;
+
+/*
+ * If you want to hardcode a conversion table, define a static array
+ * of 256 int, and make transient_characters point to it.
+ */
+int *transient_characters = 0;
+
+#define OCTAL(x) ((x) >= '0' && (x) <= '7')
+#define DECIM(x) ((x) >= '0' && (x) <= '9')
+#define HEXAD(x) (DECIM(x) \
+ || (x) == 'a' || (x) == 'b' || (x) == 'c' \
+ || (x) == 'd' || (x) == 'e' || (x) == 'f' \
+ || (x) == 'A' || (x) == 'B' || (x) == 'C' \
+ || (x) == 'D' || (x) == 'E' || (x) == 'F')
+#define OVAL(x) ((int)((x) - '0'))
+#define DVAL(x) ((int)((x) - '0'))
+#define HVAL(x) (DECIM(x) ? DVAL(x) \
+ : (x) == 'a' || (x) == 'A' ? 10 \
+ : (x) == 'b' || (x) == 'B' ? 11 \
+ : (x) == 'c' || (x) == 'C' ? 12 \
+ : (x) == 'd' || (x) == 'D' ? 13 \
+ : (x) == 'e' || (x) == 'E' ? 14 : 15)
+
+#define ARITH_TYPENAME big
+#define ARITH_FUNCTION_HEADER static inline
+
+#define ARITH_ERROR(type) z_error(type)
+static void z_error(int type);
+
+#ifdef ARITHMETIC_CHECKS
+#define ARITH_WARNING(type) z_warn(type)
+static void z_warn(int type);
+#endif
+
+#include "arith.c"
+
+static void z_error(int type)
+{
+ switch (type) {
+ case ARITH_EXCEP_SLASH_D:
+ error(eval_line, "division by 0");
+ break;
+ case ARITH_EXCEP_SLASH_O:
+ error(eval_line, "overflow on division");
+ break;
+ case ARITH_EXCEP_PCT_D:
+ error(eval_line, "division by 0 on modulus operator");
+ break;
+ case ARITH_EXCEP_CONST_O:
+ error(eval_line, "constant too large for destination type");
+ break;
+#ifdef AUDIT
+ default:
+ ouch("erroneous integer error: %d", type);
+#endif
+ }
+ throw(eval_exception);
+}
+
+#ifdef ARITHMETIC_CHECKS
+static void z_warn(int type)
+{
+ switch (type) {
+ case ARITH_EXCEP_CONV_O:
+ warning(eval_line, "overflow on integer conversion");
+ break;
+ case ARITH_EXCEP_NEG_O:
+ warning(eval_line, "overflow on unary minus");
+ break;
+ case ARITH_EXCEP_NOT_T:
+ warning(eval_line,
+ "bitwise inversion yields trap representation");
+ break;
+ case ARITH_EXCEP_PLUS_O:
+ warning(eval_line, "overflow on addition");
+ break;
+ case ARITH_EXCEP_PLUS_U:
+ warning(eval_line, "underflow on addition");
+ break;
+ case ARITH_EXCEP_MINUS_O:
+ warning(eval_line, "overflow on subtraction");
+ break;
+ case ARITH_EXCEP_MINUS_U:
+ warning(eval_line, "underflow on subtraction");
+ break;
+ case ARITH_EXCEP_AND_T:
+ warning(eval_line,
+ "bitwise AND yields trap representation");
+ break;
+ case ARITH_EXCEP_XOR_T:
+ warning(eval_line,
+ "bitwise XOR yields trap representation");
+ break;
+ case ARITH_EXCEP_OR_T:
+ warning(eval_line,
+ "bitwise OR yields trap representation");
+ break;
+ case ARITH_EXCEP_LSH_W:
+ warning(eval_line, "left shift count greater than "
+ "or equal to type width");
+ break;
+ case ARITH_EXCEP_LSH_C:
+ warning(eval_line, "left shift count negative");
+ break;
+ case ARITH_EXCEP_LSH_O:
+ warning(eval_line, "overflow on left shift");
+ break;
+ case ARITH_EXCEP_RSH_W:
+ warning(eval_line, "right shift count greater than "
+ "or equal to type width");
+ break;
+ case ARITH_EXCEP_RSH_C:
+ warning(eval_line, "right shift count negative");
+ break;
+ case ARITH_EXCEP_RSH_N:
+ warning(eval_line, "right shift of negative value");
+ break;
+ case ARITH_EXCEP_STAR_O:
+ warning(eval_line, "overflow on multiplication");
+ break;
+ case ARITH_EXCEP_STAR_U:
+ warning(eval_line, "underflow on multiplication");
+ break;
+#ifdef AUDIT
+ default:
+ ouch("erroneous integer warning: %d", type);
+#endif
+ }
+}
+#endif
+
+typedef struct {
+ int sign;
+ union {
+ u_big uv;
+ s_big sv;
+ } u;
+} ppval;
+
+static int boolval(ppval x)
+{
+ return x.sign ? big_s_lval(x.u.sv) : big_u_lval(x.u.uv);
+}
+
+#if !defined(WCHAR_SIGNEDNESS)
+# if CHAR_MIN == 0
+# define WCHAR_SIGNEDNESS 0
+# else
+# define WCHAR_SIGNEDNESS 1
+# endif
+#endif
+
+/*
+ * Check the suffix, return 1 if it is signed, 0 otherwise. 1 is
+ * returned for a void suffix. Legal suffixes are:
+ * unsigned: u U ul uL Ul UL lu Lu lU LU ull uLL Ull ULL llu LLu llU LLU
+ * signed: l L ll LL
+ */
+static int pp_suffix(char *d, char *refc)
+{
+ if (!*d) return 1;
+ if (*d == 'u' || *d == 'U') {
+ if (!*(++ d)) return 0;
+ if (*d == 'l' || *d == 'L') {
+ char *e = d + 1;
+
+ if (*e && *e != *d) goto suffix_error;
+ if (!*e || !*(e + 1)) return 0;
+ goto suffix_error;
+ }
+ goto suffix_error;
+ }
+ if (*d == 'l' || *d == 'L') {
+ if (!*(++ d)) return 1;
+ if (*d == *(d - 1)) {
+ d ++;
+ if (!*d) return 1;
+ }
+ if (*d == 'u' || *d == 'U') {
+ d ++;
+ if (!*d) return 0;
+ }
+ goto suffix_error;
+ }
+suffix_error:
+ error(eval_line, "invalid integer constant '%s'", refc);
+ throw(eval_exception);
+ return 666;
+}
+
+static unsigned long pp_char(char *c, char *refc)
+{
+ unsigned long r = 0;
+
+ c ++;
+ if (*c == '\\') {
+ int i;
+
+ c ++;
+ switch (*c) {
+ case 'n': r = '\n'; c ++; break;
+ case 't': r = '\t'; c ++; break;
+ case 'v': r = '\v'; c ++; break;
+ case 'b': r = '\b'; c ++; break;
+ case 'r': r = '\r'; c ++; break;
+ case 'f': r = '\f'; c ++; break;
+ case 'a': r = '\a'; c ++; break;
+ case '\\': r = '\\'; c ++; break;
+ case '\?': r = '\?'; c ++; break;
+ case '\'': r = '\''; c ++; break;
+ case '\"': r = '\"'; c ++; break;
+ case 'u':
+ for (i = 0, c ++; i < 4 && HEXAD(*c); i ++, c ++) {
+ r = (r * 16) + HVAL(*c);
+ }
+ if (i != 4) {
+ error(eval_line, "malformed UCN in %s", refc);
+ throw(eval_exception);
+ }
+ break;
+ case 'U':
+ for (i = 0, c ++; i < 8 && HEXAD(*c); i ++, c ++) {
+ r = (r * 16) + HVAL(*c);
+ }
+ if (i != 8) {
+ error(eval_line, "malformed UCN in %s", refc);
+ throw(eval_exception);
+ }
+ break;
+ case 'x':
+ for (c ++; HEXAD(*c); c ++) r = (r * 16) + HVAL(*c);
+ break;
+ default:
+ if (OCTAL(*c)) {
+ r = OVAL(*(c ++));
+ if (OCTAL(*c)) r = (r * 8) + OVAL(*(c ++));
+ if (OCTAL(*c)) r = (r * 8) + OVAL(*(c ++));
+ } else {
+ error(eval_line, "invalid escape sequence "
+ "'\\%c'", *c);
+ throw(eval_exception);
+ }
+ }
+ } else if (*c == '\'') {
+ error(eval_line, "empty character constant");
+ throw(eval_exception);
+ } else {
+ r = *((unsigned char *)(c ++));
+ }
+
+ if (transient_characters && r < 256) {
+ r = transient_characters[(size_t)r];
+ }
+
+ if (*c != '\'' && emit_eval_warnings) {
+ warning(eval_line, "multicharacter constant");
+ }
+ return r;
+}
+
+static ppval pp_strtoconst(char *refc)
+{
+ ppval q;
+ char *c = refc, *d;
+ u_big ru;
+ s_big rs;
+ int sp, dec;
+
+ if (*c == '\'' || *c == 'L') {
+ q.sign = (*c == 'L') ? WCHAR_SIGNEDNESS : 1;
+ if (*c == 'L' && *(++ c) != '\'') {
+ error(eval_line,
+ "invalid wide character constant: %s", refc);
+ throw(eval_exception);
+ }
+ if (q.sign) {
+ q.u.sv = big_s_fromlong(pp_char(c, refc));
+ } else {
+ q.u.uv = big_u_fromulong(pp_char(c, refc));
+ }
+ return q;
+ }
+ if (*c == '0') {
+ /* octal or hexadecimal */
+ dec = 0;
+ c ++;
+ if (*c == 'x' || *c == 'X') {
+ c ++;
+ d = big_u_hexconst(c, &ru, &rs, &sp);
+ } else {
+ d = big_u_octconst(c, &ru, &rs, &sp);
+ }
+ } else {
+ dec = 1;
+ d = big_u_decconst(c, &ru, &rs, &sp);
+ }
+ q.sign = pp_suffix(d, refc);
+ if (q.sign) {
+ if (!sp) {
+ if (dec) {
+ error(eval_line, "constant too large "
+ "for destination type");
+ throw(eval_exception);
+ } else {
+ warning(eval_line, "constant is so large "
+ "that it is unsigned");
+ }
+ q.u.uv = ru;
+ q.sign = 0;
+ } else {
+ q.u.sv = rs;
+ }
+ } else {
+ q.u.uv = ru;
+ }
+ return q;
+}
+
+/*
+ * Used by #line directives -- anything beyond what can be put in an
+ * unsigned long, is considered absurd.
+ */
+unsigned long strtoconst(char *c)
+{
+ ppval q = pp_strtoconst(c);
+
+ if (q.sign) q.u.uv = big_s_to_u(q.u.sv);
+ return big_u_toulong(q.u.uv);
+}
+
+#define OP_UN(x) ((x) == LNOT || (x) == NOT || (x) == UPLUS \
+ || (x) == UMINUS)
+
+static ppval eval_opun(int op, ppval v)
+{
+ if (op == LNOT) {
+ v.sign = 1;
+ v.u.sv = big_s_fromint(big_s_lnot(v.u.sv));
+ return v;
+ }
+ if (v.sign) {
+ switch (op) {
+ case NOT: v.u.sv = big_s_not(v.u.sv); break;
+ case UPLUS: break;
+ case UMINUS: v.u.sv = big_s_neg(v.u.sv); break;
+ }
+ } else {
+ switch (op) {
+ case NOT: v.u.uv = big_u_not(v.u.uv); break;
+ case UPLUS: break;
+ case UMINUS: v.u.uv = big_u_neg(v.u.uv); break;
+ }
+ }
+ return v;
+}
+
+#define OP_BIN(x) ((x) == STAR || (x) == SLASH || (x) == PCT \
+ || (x) == PLUS || (x) == MINUS || (x) == LSH \
+ || (x) == RSH || (x) == LT || (x) == LEQ \
+ || (x) == GT || (x) == GEQ || (x) == SAME \
+ || (x) == NEQ || (x) == AND || (x) == CIRC \
+ || (x) == OR || (x) == LAND || (x) == LOR \
+ || (x) == COMMA)
+
+static ppval eval_opbin(int op, ppval v1, ppval v2)
+{
+ ppval r;
+ int iv2 = 0;
+
+ switch (op) {
+ case STAR: case SLASH: case PCT:
+ case PLUS: case MINUS: case AND:
+ case CIRC: case OR:
+ /* promote operands, adjust signedness of result */
+ if (!v1.sign || !v2.sign) {
+ if (v1.sign) {
+ v1.u.uv = big_s_to_u(v1.u.sv);
+ v1.sign = 0;
+ } else if (v2.sign) {
+ v2.u.uv = big_s_to_u(v2.u.sv);
+ v2.sign = 0;
+ }
+ r.sign = 0;
+ } else {
+ r.sign = 1;
+ }
+ break;
+ case LT: case LEQ: case GT:
+ case GEQ: case SAME: case NEQ:
+ /* promote operands */
+ if (!v1.sign || !v2.sign) {
+ if (v1.sign) {
+ v1.u.uv = big_s_to_u(v1.u.sv);
+ v1.sign = 0;
+ } else if (v2.sign) {
+ v2.u.uv = big_s_to_u(v2.u.sv);
+ v2.sign = 0;
+ }
+ }
+ /* fall through */
+ case LAND:
+ case LOR:
+ /* result is signed anyway */
+ r.sign = 1;
+ break;
+ case LSH:
+ case RSH:
+ /* result is as signed as left operand; convert right
+ operand to int */
+ r.sign = v1.sign;
+ if (v2.sign) {
+ iv2 = big_s_toint(v2.u.sv);
+ } else {
+ iv2 = big_u_toint(v2.u.uv);
+ }
+ break;
+ case COMMA:
+ if (emit_eval_warnings) {
+ warning(eval_line, "ISO C forbids evaluated comma "
+ "operators in #if expressions");
+ }
+ r.sign = v2.sign;
+ break;
+#ifdef AUDIT
+ default: ouch("a good operator is a dead operator");
+#endif
+ }
+
+#define SBINOP(x) if (r.sign) r.u.sv = big_s_ ## x (v1.u.sv, v2.u.sv); \
+ else r.u.uv = big_u_ ## x (v1.u.uv, v2.u.uv);
+
+#define NSSBINOP(x) if (v1.sign) r.u.sv = big_s_fromint(big_s_ ## x \
+ (v1.u.sv, v2.u.sv)); else r.u.sv = big_s_fromint( \
+ big_u_ ## x (v1.u.uv, v2.u.uv));
+
+#define LBINOP(x) if (v1.sign) r.u.sv = big_s_fromint( \
+ big_s_lval(v1.u.sv) x big_s_lval(v2.u.sv)); \
+ else r.u.sv = big_s_fromint( \
+ big_u_lval(v1.u.uv) x big_u_lval(v2.u.uv));
+
+#define ABINOP(x) if (r.sign) r.u.sv = big_s_ ## x (v1.u.sv, iv2); \
+ else r.u.uv = big_u_ ## x (v1.u.uv, iv2);
+
+ switch (op) {
+ case STAR: SBINOP(star); break;
+ case SLASH: SBINOP(slash); break;
+ case PCT: SBINOP(pct); break;
+ case PLUS: SBINOP(plus); break;
+ case MINUS: SBINOP(minus); break;
+ case LSH: ABINOP(lsh); break;
+ case RSH: ABINOP(rsh); break;
+ case LT: NSSBINOP(lt); break;
+ case LEQ: NSSBINOP(leq); break;
+ case GT: NSSBINOP(gt); break;
+ case GEQ: NSSBINOP(geq); break;
+ case SAME: NSSBINOP(same); break;
+ case NEQ: NSSBINOP(neq); break;
+ case AND: SBINOP(and); break;
+ case CIRC: SBINOP(xor); break;
+ case OR: SBINOP(or); break;
+ case LAND: LBINOP(&&); break;
+ case LOR: LBINOP(||); break;
+ case COMMA: r = v2; break;
+ }
+ return r;
+}
+
+#define ttOP(x) (OP_UN(x) || OP_BIN(x) || (x) == QUEST || (x) == COLON)
+
+static int op_prec(int op)
+{
+ switch (op) {
+ case LNOT:
+ case NOT:
+ case UPLUS:
+ case UMINUS:
+ return 13;
+ case STAR:
+ case SLASH:
+ case PCT:
+ return 12;
+ case PLUS:
+ case MINUS:
+ return 11;
+ case LSH:
+ case RSH:
+ return 10;
+ case LT:
+ case LEQ:
+ case GT:
+ case GEQ:
+ return 9;
+ case SAME:
+ case NEQ:
+ return 8;
+ case AND:
+ return 7;
+ case CIRC:
+ return 6;
+ case OR:
+ return 5;
+ case LAND:
+ return 4;
+ case LOR:
+ return 3;
+ case QUEST:
+ return 2;
+ case COMMA:
+ return 1;
+ }
+#ifdef AUDIT
+ ouch("an unknown species should have a higher precedence");
+#endif
+ return 666;
+}
+
+/*
+ * Perform the hard work of evaluation.
+ *
+ * This function works because:
+ * -- all unary operators are right to left associative, and with
+ * identical precedence
+ * -- all binary operators are left to right associative
+ * -- there is only one non-unary and non-binary operator: the quest-colon
+ *
+ * If do_eval is 0, the evaluation of operators is not done. This is
+ * for sequence point operators (&&, || and ?:).
+ */
+static ppval eval_shrd(struct token_fifo *tf, int minprec, int do_eval)
+{
+ ppval top;
+ struct token *ct;
+
+ top.sign = 1;
+ if (tf->art == tf->nt) goto trunc_err;
+ ct = tf->t + (tf->art ++);
+ if (ct->type == LPAR) {
+ top = eval_shrd(tf, 0, do_eval);
+ if (tf->art == tf->nt) goto trunc_err;
+ ct = tf->t + (tf->art ++);
+ if (ct->type != RPAR) {
+ error(eval_line, "a right parenthesis was expected");
+ throw(eval_exception);
+ }
+ } else if (ct->type == NUMBER || ct->type == CHAR) {
+ top = pp_strtoconst(ct->name);
+ } else if (OP_UN(ct->type)) {
+ top = eval_opun(ct->type, eval_shrd(tf,
+ op_prec(ct->type), do_eval));
+ goto eval_loop;
+ } else if (ttOP(ct->type)) goto rogue_op_err;
+ else {
+ goto invalid_token_err;
+ }
+
+eval_loop:
+ if (tf->art == tf->nt) {
+ return top;
+ }
+ ct = tf->t + (tf->art ++);
+ if (OP_BIN(ct->type)) {
+ int bp = op_prec(ct->type);
+
+ if (bp > minprec) {
+ ppval tr;
+
+ if ((ct->type == LOR && boolval(top))
+ || (ct->type == LAND && !boolval(top))) {
+ tr = eval_shrd(tf, bp, 0);
+ if (do_eval) {
+ top.sign = 1;
+ if (ct->type == LOR)
+ top.u.sv = big_s_fromint(1);
+ if (ct->type == LAND)
+ top.u.sv = big_s_fromint(0);
+ }
+ } else {
+ tr = eval_shrd(tf, bp, do_eval);
+ if (do_eval)
+ top = eval_opbin(ct->type, top, tr);
+ }
+ goto eval_loop;
+ }
+ } else if (ct->type == QUEST) {
+ int bp = op_prec(QUEST);
+ ppval r1, r2;
+
+ if (bp >= minprec) {
+ int qv = boolval(top);
+
+ r1 = eval_shrd(tf, bp, qv ? do_eval : 0);
+ if (tf->art == tf->nt) goto trunc_err;
+ ct = tf->t + (tf->art ++);
+ if (ct->type != COLON) {
+ error(eval_line, "a colon was expected");
+ throw(eval_exception);
+ }
+ r2 = eval_shrd(tf, bp, qv ? 0 : do_eval);
+ if (do_eval) {
+ if (qv) top = r1; else top = r2;
+ }
+ goto eval_loop;
+ }
+ }
+ tf->art --;
+ return top;
+
+trunc_err:
+ error(eval_line, "truncated constant integral expression");
+ throw(eval_exception);
+rogue_op_err:
+ error(eval_line, "rogue operator '%s' in constant integral "
+ "expression", operators_name[ct->type]);
+ throw(eval_exception);
+invalid_token_err:
+ error(eval_line, "invalid token in constant integral expression");
+ throw(eval_exception);
+}
+
+#define UNARY(x) ((x) != NUMBER && (x) != NAME && (x) != CHAR \
+ && (x) != RPAR)
+
+/*
+ * Evaluate the integer expression contained in the given token_fifo.
+ * Evaluation is made by precedence of operators, as described in the
+ * Dragon Book. The unary + and - are distinguished from their binary
+ * counterparts using the Fortran way: a + or a - is considered unary
+ * if it does not follow a constant, an identifier or a right parenthesis.
+ */
+unsigned long eval_expr(struct token_fifo *tf, int *ret, int ew)
+{
+ size_t sart;
+ ppval r;
+
+ emit_eval_warnings = ew;
+ if (catch(eval_exception)) goto eval_err;
+ /* first, distinguish unary + and - from binary + and - */
+ for (sart = tf->art; tf->art < tf->nt; tf->art ++) {
+ if (tf->t[tf->art].type == PLUS) {
+ if (sart == tf->art || UNARY(tf->t[tf->art - 1].type))
+ tf->t[tf->art].type = UPLUS;
+ } else if (tf->t[tf->art].type == MINUS) {
+ if (sart == tf->art || UNARY(tf->t[tf->art - 1].type))
+ tf->t[tf->art].type = UMINUS;
+ }
+ }
+ tf->art = sart;
+ r = eval_shrd(tf, 0, 1);
+ if (tf->art < tf->nt) {
+ error(eval_line, "trailing garbage in constant integral "
+ "expression");
+ goto eval_err;
+ }
+ *ret = 0;
+ return boolval(r);
+eval_err:
+ *ret = 1;
+ return 0;
+}
diff --git a/libexec/auxcpp/hash.c b/libexec/auxcpp/hash.c
new file mode 100644
index 00000000000..60bd2124137
--- /dev/null
+++ b/libexec/auxcpp/hash.c
@@ -0,0 +1,329 @@
+/*
+ * Generic hash table routines.
+ * (c) Thomas Pornin 1998, 1999, 2000
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 4. The name of the authors may not be used to endorse or promote
+ * products derived from this software without specific prior written
+ * permission.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
+ * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+ * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include <string.h>
+#include "hash.h"
+#include "mem.h"
+#include "tune.h"
+
+/*
+ * hash_string() is a sample hash function for strings
+ */
+int hash_string(char *s)
+{
+#ifdef FAST_HASH
+ unsigned h = 0, g;
+
+ while (*s) {
+ h = (h << 4) + *(unsigned char *)(s ++);
+ if ((g = h & 0xF000U) != 0) h ^= (g >> 12);
+ h &= ~g;
+ }
+ return (h ^ (h >> 9)) & 127U;
+#else
+ unsigned char h = 0;
+
+ for (; *s; s ++) h ^= (unsigned char)(*s);
+ return ((int)h);
+#endif
+}
+
+/*
+ * struct hash_item is the basic data type to internally handle hash tables
+ */
+struct hash_item {
+ void *data;
+ struct hash_item *next;
+};
+
+/*
+ * This function adds an entry to the struct hash_item list
+ */
+static struct hash_item *add_entry(struct hash_item *blist, void *data)
+{
+ struct hash_item *t = getmem(sizeof(struct hash_item));
+
+ t->data = data;
+ t->next = blist;
+ return t;
+}
+
+/*
+ * This function finds a struct hash_item in a list, using the
+ * comparison function provided as cmpdata (*cmpdata() returns
+ * non-zero if the two parameters are to be considered identical).
+ *
+ * It returns 0 if the item is not found.
+ */
+static struct hash_item *get_entry(struct hash_item *blist, void *data,
+ int (*cmpdata)(void *, void *))
+{
+ while (blist) {
+ if ((*cmpdata)(data, blist->data)) return blist;
+ blist = blist->next;
+ }
+ return 0;
+}
+
+/*
+ * This function acts like get_entry but deletes the found item, using
+ * the provided function deldata(); it returns 0 if the given data was
+ * not found.
+ */
+static struct hash_item *del_entry(struct hash_item *blist, void *data,
+ int (*cmpdata)(void *, void *), void (*deldata)(void *))
+{
+ struct hash_item *prev = 0, *save = blist;
+
+ while (blist) {
+ if ((*cmpdata)(data, blist->data)) {
+ if (deldata) (*deldata)(blist->data);
+ if (prev) prev->next = blist->next;
+ if (save == blist) save = blist->next;
+ freemem(blist);
+ return save;
+ }
+ prev = blist;
+ blist = blist->next;
+ }
+ return 0;
+}
+
+/*
+ * This function creates a new hashtable, with the hashing and comparison
+ * functions given as parameters
+ */
+struct HT *newHT(int n, int (*cmpdata)(void *, void *), int (*hash)(void *),
+ void (*deldata)(void *))
+{
+ struct HT *t = getmem(sizeof(struct HT));
+ int i;
+
+ t->lists = getmem(n * sizeof(struct hash_item *));
+ for (i = 0; i < n; i ++) t->lists[i] = 0;
+ t->nb_lists = n;
+ t->cmpdata = cmpdata;
+ t->hash = hash;
+ t->deldata = deldata;
+ return t;
+}
+
+/*
+ * This function adds a new entry in the hashtable ht; it returns 0
+ * on success, or a pointer to the already present item otherwise.
+ */
+void *putHT(struct HT *ht, void *data)
+{
+ int h;
+ struct hash_item *d;
+
+ h = ((*(ht->hash))(data));
+#ifndef FAST_HASH
+ h %= ht->nb_lists;
+#endif
+ if ((d = get_entry(ht->lists[h], data, ht->cmpdata)))
+ return d->data;
+ ht->lists[h] = add_entry(ht->lists[h], data);
+ return 0;
+}
+
+/*
+ * This function adds a new entry in the hashtable ht, even if an equal
+ * entry is already there. Exercise caution !
+ * The new entry will "hide" the old one, which means that the new will be
+ * found upon lookup/delete, not the old one.
+ */
+void *forceputHT(struct HT *ht, void *data)
+{
+ int h;
+
+ h = ((*(ht->hash))(data));
+#ifndef FAST_HASH
+ h %= ht->nb_lists;
+#endif
+ ht->lists[h] = add_entry(ht->lists[h], data);
+ return 0;
+}
+
+/*
+ * This function finds the entry corresponding to *data in the
+ * hashtable ht (using the comparison function given as argument
+ * to newHT)
+ */
+void *getHT(struct HT *ht, void *data)
+{
+ int h;
+ struct hash_item *t;
+
+ h = ((*(ht->hash))(data));
+#ifndef FAST_HASH
+ h %= ht->nb_lists;
+#endif
+ if ((t = get_entry(ht->lists[h], data, ht->cmpdata)) == 0)
+ return 0;
+ return (t->data);
+}
+
+/*
+ * This function finds and delete the entry corresponding to *data
+ * in the hashtable ht (using the comparison function given as
+ * argument to newHT).
+ */
+
+int delHT(struct HT *ht, void *data)
+{
+ int h;
+
+ h = ((*(ht->hash))(data));
+#ifndef FAST_HASH
+ h %= ht->nb_lists;
+#endif
+ ht->lists[h] = del_entry(ht->lists[h], data, ht->cmpdata, ht->deldata);
+ return 1;
+}
+
+/*
+ * This function completely eradicates from memory a given hash table,
+ * releasing all objects
+ */
+void killHT(struct HT *ht)
+{
+ int i;
+ struct hash_item *t, *n;
+ void (*dd)(void *) = ht->deldata;
+
+ for (i = 0; i < ht->nb_lists; i ++) for (t = ht->lists[i]; t;) {
+ n = t->next;
+ if (dd) (*dd)(t->data);
+ freemem(t);
+ t = n;
+ }
+ freemem(ht->lists);
+ freemem(ht);
+}
+
+/*
+ * This function stores a backup of the hash table, for context stacking.
+ */
+void saveHT(struct HT *ht, void **buffer)
+{
+ struct hash_item **b = (struct hash_item **)buffer;
+
+ mmv(b, ht->lists, ht->nb_lists * sizeof(struct hash_item *));
+}
+
+/*
+ * This function restores the saved state of the hash table.
+ * Do NOT use if some of the entries that were present before the backup
+ * have been removed (even temporarily).
+ */
+void restoreHT(struct HT *ht, void **buffer)
+{
+ struct hash_item **b = (struct hash_item **)buffer;
+ int i;
+
+ for (i = 0; i < ht->nb_lists; i ++) {
+ struct hash_item *t = ht->lists[i], *n;
+
+ while (t != b[i]) {
+ n = t->next;
+ (*(ht->deldata))(t->data);
+ freemem(t);
+ t = n;
+ }
+ ht->lists[i] = b[i];
+ }
+}
+
+/*
+ * This function is evil. It inserts a new item in a saved hash table,
+ * tweaking the save buffer and the hash table in order to keep things
+ * stable. There are no checks.
+ */
+void tweakHT(struct HT *ht, void **buffer, void *data)
+{
+ int h;
+ struct hash_item *d, *e;
+
+ h = ((*(ht->hash))(data));
+#ifndef FAST_HASH
+ h %= ht->nb_lists;
+#endif
+ for (d = ht->lists[h]; d != buffer[h]; d = d->next);
+ d = add_entry(buffer[h], data);
+ if (buffer[h] == ht->lists[h]) {
+ buffer[h] = ht->lists[h] = d;
+ return;
+ }
+ for (e = ht->lists[h]; e->next != buffer[h]; e = e->next);
+ e->next = d;
+ buffer[h] = d;
+}
+
+/*
+ * This function scans the whole table and calls the given function on
+ * each entry.
+ */
+void scanHT(struct HT *ht, void (*action)(void *))
+{
+ int i;
+
+ for (i = 0; i < ht->nb_lists; i ++) {
+ struct hash_item *t = ht->lists[i];
+
+ while (t) {
+ (*action)(t->data);
+ t = t->next;
+ }
+ }
+}
+
+/*
+ * The two following fonctions are generic for storing structures
+ * uniquely identified by their name, which must be the first
+ * field of the structure.
+ */
+int hash_struct(void *m)
+{
+ char *n = *(char **)m;
+
+#ifdef FAST_HASH
+ return hash_string(n);
+#else
+ return hash_string(n) & 127;
+#endif
+}
+
+int cmp_struct(void *m1, void *m2)
+{
+ char *n1 = *(char **)m1, *n2 = *(char **)m2;
+
+ return !strcmp(n1, n2);
+}
diff --git a/libexec/auxcpp/hash.h b/libexec/auxcpp/hash.h
new file mode 100644
index 00000000000..3adbb777df9
--- /dev/null
+++ b/libexec/auxcpp/hash.h
@@ -0,0 +1,58 @@
+/*
+ * (c) Thomas Pornin 1998, 1999, 2000
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 4. The name of the authors may not be used to endorse or promote
+ * products derived from this software without specific prior written
+ * permission.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
+ * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+ * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#ifndef UCPP__HASH__
+#define UCPP__HASH__
+
+struct hash_item;
+
+struct HT {
+ struct hash_item **lists;
+ int nb_lists;
+ int (*cmpdata)(void *, void *);
+ int (*hash)(void *);
+ void (*deldata)(void *);
+};
+
+int hash_string(char *);
+struct HT *newHT(int, int (*)(void *, void *), int (*)(void *),
+ void (*)(void *));
+void *putHT(struct HT *, void *);
+void *forceputHT(struct HT *, void *);
+void *getHT(struct HT *, void *);
+int delHT(struct HT *, void *);
+void killHT(struct HT *);
+void saveHT(struct HT *, void **);
+void restoreHT(struct HT *, void **);
+void tweakHT(struct HT *, void **, void *);
+void scanHT(struct HT *, void (*)(void *));
+int hash_struct(void *);
+int cmp_struct(void *, void *);
+
+#endif
diff --git a/libexec/auxcpp/lexer.c b/libexec/auxcpp/lexer.c
new file mode 100644
index 00000000000..38125676318
--- /dev/null
+++ b/libexec/auxcpp/lexer.c
@@ -0,0 +1,1020 @@
+/*
+ * (c) Thomas Pornin 1999 - 2002
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 4. The name of the authors may not be used to endorse or promote
+ * products derived from this software without specific prior written
+ * permission.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
+ * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+ * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include "tune.h"
+#include <stdio.h>
+#include <string.h>
+#include <stddef.h>
+#include <limits.h>
+#include "ucppi.h"
+#include "mem.h"
+#ifdef UCPP_MMAP
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/mman.h>
+#endif
+
+/*
+ * Character classes for description of the automaton.
+ * The characters used for representing classes should not appear
+ * explicitely in an automaton rule.
+ */
+#define SPC ' ' /* whitespace characters */
+#define ALP 'Z' /* A-Z, a-z, _ */
+#define NUM '9' /* 0-9 */
+#define ANY 'Y' /* any character */
+#define VCH 'F' /* void character (for end of input) */
+
+/*
+ * flags and macros to test those flags
+ * STO: the currently read string is a complete token
+ * PUT: the currently read character must be added to the string
+ * FRZ: the currently read character must be kept and read again
+ */
+#define MOD_MK 255
+#define noMOD(x) ((x) & 255)
+#define STO(x) ((x) | 256)
+#define ttSTO(x) ((x) & 256)
+#define FRZ(x) ((x) | 512)
+#define ttFRZ(x) ((x) & 512)
+#define PUT(x) ((x) | 1024)
+#define ttPUT(x) ((x) & 1024)
+
+/* order is important */
+enum {
+ S_START, S_SPACE, S_BANG, S_STRING, S_STRING2, S_COLON,
+ S_SHARP, S_PCT, S_PCT2, S_PCT3, S_AMPER, S_CHAR, S_CHAR2, S_STAR,
+ S_PLUS, S_MINUS, S_DOT, S_DOT2, S_SLASH, S_NUMBER, S_NUMBER2, S_LT,
+ S_LT2, S_EQ, S_GT, S_GT2, S_CIRC, S_PIPE, S_BACKSLASH,
+ S_COMMENT, S_COMMENT2, S_COMMENT3, S_COMMENT4, S_COMMENT5,
+ S_NAME, S_NAME_BS, S_LCHAR,
+ MSTATE,
+ S_ILL, S_DDOT, S_DDSHARP, S_BS, S_ROGUE_BS, S_BEHEAD, S_DECAY,
+ S_TRUNC, S_TRUNCC, S_OUCH
+};
+
+#define CMT(x) ((x) >= S_COMMENT && (x) <= S_COMMENT5)
+
+#define CMCR 2
+
+/*
+ * This is the description of the automaton. It is not used "as is"
+ * but copied at execution time into a table.
+ *
+ * To my utmost displeasure, there are a few hacks in read_token()
+ * (which uses the transformed automaton) about the special handling
+ * of slashes, sharps, and the letter L.
+ */
+static struct machine_state {
+ int state;
+ unsigned char input[CMCR];
+ int new_state;
+} cppms[] = {
+ /* S_START is the generic beginning state */
+ { S_START, { ANY }, S_ILL },
+#ifdef SEMPER_FIDELIS
+ { S_START, { SPC }, PUT(S_SPACE) },
+#else
+ { S_START, { SPC }, S_SPACE },
+#endif
+ { S_START, { '\n' }, STO(NEWLINE) },
+ { S_START, { '!' }, S_BANG },
+ { S_START, { '"' }, PUT(S_STRING) },
+ { S_START, { '#' }, S_SHARP },
+ { S_START, { '%' }, S_PCT },
+ { S_START, { '&' }, S_AMPER },
+ { S_START, { '\'' }, PUT(S_CHAR) },
+ { S_START, { '(' }, STO(LPAR) },
+ { S_START, { ')' }, STO(RPAR) },
+ { S_START, { '*' }, S_STAR },
+ { S_START, { '+' }, S_PLUS },
+ { S_START, { ',' }, STO(COMMA) },
+ { S_START, { '-' }, S_MINUS },
+ { S_START, { '.' }, PUT(S_DOT) },
+#ifdef SEMPER_FIDELIS
+ { S_START, { '/' }, PUT(S_SLASH) },
+#else
+ { S_START, { '/' }, S_SLASH },
+#endif
+ { S_START, { NUM }, PUT(S_NUMBER) },
+ { S_START, { ':' }, S_COLON },
+ { S_START, { ';' }, STO(SEMIC) },
+ { S_START, { '<' }, S_LT },
+ { S_START, { '=' }, S_EQ },
+ { S_START, { '>' }, S_GT },
+ { S_START, { '?' }, STO(QUEST) },
+ { S_START, { ALP }, PUT(S_NAME) },
+ { S_START, { 'L' }, PUT(S_LCHAR) },
+ { S_START, { '[' }, STO(LBRK) },
+ { S_START, { ']' }, STO(RBRK) },
+ { S_START, { '^' }, S_CIRC },
+ { S_START, { '{' }, STO(LBRA) },
+ { S_START, { '|' }, S_PIPE },
+ { S_START, { '}' }, STO(RBRA) },
+ { S_START, { '~' }, STO(NOT) },
+ { S_START, { '\\' }, S_BACKSLASH },
+
+ /* after a space */
+ { S_SPACE, { ANY }, FRZ(STO(NONE)) },
+#ifdef SEMPER_FIDELIS
+ { S_SPACE, { SPC }, PUT(S_SPACE) },
+#else
+ { S_SPACE, { SPC }, S_SPACE },
+#endif
+
+ /* after a ! */
+ { S_BANG, { ANY }, FRZ(STO(LNOT)) },
+ { S_BANG, { '=' }, STO(NEQ) },
+
+ /* after a " */
+ { S_STRING, { ANY }, PUT(S_STRING) },
+ { S_STRING, { VCH }, FRZ(S_TRUNC) },
+ { S_STRING, { '\n' }, FRZ(S_BEHEAD) },
+ { S_STRING, { '\\' }, PUT(S_STRING2) },
+ { S_STRING, { '"' }, PUT(STO(STRING)) },
+
+ { S_STRING2, { ANY }, PUT(S_STRING) },
+ { S_STRING2, { VCH }, FRZ(S_TRUNC) },
+
+ /* after a # */
+ { S_SHARP, { ANY }, FRZ(STO(SHARP)) },
+ { S_SHARP, { '#' }, STO(DSHARP) },
+
+ /* after a : */
+ { S_COLON, { ANY }, FRZ(STO(COLON)) },
+ { S_COLON, { '>' }, STO(DIG_RBRK) },
+
+ /* after a % */
+ { S_PCT, { ANY }, FRZ(STO(PCT)) },
+ { S_PCT, { '=' }, STO(ASPCT) },
+ { S_PCT, { '>' }, STO(DIG_RBRA) },
+ { S_PCT, { ':' }, S_PCT2 },
+
+ /* after a %: */
+ { S_PCT2, { ANY }, FRZ(STO(DIG_SHARP)) },
+ { S_PCT2, { '%' }, S_PCT3 },
+
+ /* after a %:% */
+ { S_PCT3, { ANY }, FRZ(S_DDSHARP) },
+ { S_PCT3, { ':' }, STO(DIG_DSHARP) },
+
+ /* after a & */
+ { S_AMPER, { ANY }, FRZ(STO(AND)) },
+ { S_AMPER, { '=' }, STO(ASAND) },
+ { S_AMPER, { '&' }, STO(LAND) },
+
+ /* after a ' */
+ { S_CHAR, { ANY }, PUT(S_CHAR) },
+ { S_CHAR, { VCH }, FRZ(S_TRUNC) },
+ { S_CHAR, { '\'' }, PUT(STO(CHAR)) },
+ { S_CHAR, { '\\' }, PUT(S_CHAR2) },
+
+ /* after a \ in a character constant
+ useful only for '\'' */
+ { S_CHAR2, { ANY }, PUT(S_CHAR) },
+ { S_CHAR2, { VCH }, FRZ(S_TRUNC) },
+
+ /* after a * */
+ { S_STAR, { ANY }, FRZ(STO(STAR)) },
+ { S_STAR, { '=' }, STO(ASSTAR) },
+
+ /* after a + */
+ { S_PLUS, { ANY }, FRZ(STO(PLUS)) },
+ { S_PLUS, { '+' }, STO(PPLUS) },
+ { S_PLUS, { '=' }, STO(ASPLUS) },
+
+ /* after a - */
+ { S_MINUS, { ANY }, FRZ(STO(MINUS)) },
+ { S_MINUS, { '-' }, STO(MMINUS) },
+ { S_MINUS, { '=' }, STO(ASMINUS) },
+ { S_MINUS, { '>' }, STO(ARROW) },
+
+ /* after a . */
+ { S_DOT, { ANY }, FRZ(STO(DOT)) },
+ { S_DOT, { NUM }, PUT(S_NUMBER) },
+ { S_DOT, { '.' }, S_DOT2 },
+
+ /* after .. */
+ { S_DOT2, { ANY }, FRZ(S_DDOT) },
+ { S_DOT2, { '.' }, STO(MDOTS) },
+
+ /* after a / */
+ { S_SLASH, { ANY }, FRZ(STO(SLASH)) },
+ { S_SLASH, { '=' }, STO(ASSLASH) },
+#ifdef SEMPER_FIDELIS
+ { S_SLASH, { '*' }, PUT(S_COMMENT) },
+ { S_SLASH, { '/' }, PUT(S_COMMENT5) },
+#else
+ { S_SLASH, { '*' }, S_COMMENT },
+ { S_SLASH, { '/' }, S_COMMENT5 },
+#endif
+ /*
+ * There is a little hack in read_token() to disable
+ * this last rule, if C++ (C99) comments are not enabled.
+ */
+
+ /* after a number */
+ { S_NUMBER, { ANY }, FRZ(STO(NUMBER)) },
+ { S_NUMBER, { ALP, NUM }, PUT(S_NUMBER) },
+ { S_NUMBER, { '.' }, PUT(S_NUMBER) },
+ { S_NUMBER, { 'E', 'e' }, PUT(S_NUMBER2) },
+ { S_NUMBER, { 'P', 'p' }, PUT(S_NUMBER2) },
+
+ { S_NUMBER2, { ANY }, FRZ(STO(NUMBER)) },
+ { S_NUMBER2, { ALP, NUM }, PUT(S_NUMBER) },
+ { S_NUMBER2, { '+', '-' }, PUT(S_NUMBER) },
+
+ /* after a < */
+ { S_LT, { ANY }, FRZ(STO(LT)) },
+ { S_LT, { '=' }, STO(LEQ) },
+ { S_LT, { '<' }, S_LT2 },
+ { S_LT, { ':' }, STO(DIG_LBRK) },
+ { S_LT, { '%' }, STO(DIG_LBRA) },
+
+ { S_LT2, { ANY }, FRZ(STO(LSH)) },
+ { S_LT2, { '=' }, STO(ASLSH) },
+
+ /* after a > */
+ { S_GT, { ANY }, FRZ(STO(GT)) },
+ { S_GT, { '=' }, STO(GEQ) },
+ { S_GT, { '>' }, S_GT2 },
+
+ { S_GT2, { ANY }, FRZ(STO(RSH)) },
+ { S_GT2, { '=' }, STO(ASRSH) },
+
+ /* after a = */
+ { S_EQ, { ANY }, FRZ(STO(ASGN)) },
+ { S_EQ, { '=' }, STO(SAME) },
+#ifdef CAST_OP
+ { S_EQ, { '>' }, STO(CAST) },
+#endif
+
+ /* after a \ */
+ { S_BACKSLASH, { ANY }, FRZ(S_BS) },
+ { S_BACKSLASH, { 'U', 'u' }, FRZ(S_NAME_BS) },
+
+ /* after a letter */
+ { S_NAME, { ANY }, FRZ(STO(NAME)) },
+ { S_NAME, { ALP, NUM }, PUT(S_NAME) },
+ { S_NAME, { '\\' }, S_NAME_BS },
+
+ /* after a \ in an identifier */
+ { S_NAME_BS, { ANY }, FRZ(S_ROGUE_BS) },
+ { S_NAME_BS, { 'u', 'U' }, PUT(S_NAME) },
+
+ /* after a L */
+ { S_LCHAR, { ANY }, FRZ(S_NAME) },
+ { S_LCHAR, { '"' }, PUT(S_STRING) },
+ { S_LCHAR, { '\'' }, PUT(S_CHAR) },
+
+ /* after a ^ */
+ { S_CIRC, { ANY }, FRZ(STO(CIRC)) },
+ { S_CIRC, { '=' }, STO(ASCIRC) },
+
+ /* after a | */
+ { S_PIPE, { ANY }, FRZ(STO(OR)) },
+ { S_PIPE, { '=' }, STO(ASOR) },
+ { S_PIPE, { '|' }, STO(LOR) },
+
+ /* after a / and * */
+#ifdef SEMPER_FIDELIS
+ { S_COMMENT, { ANY }, PUT(S_COMMENT) },
+ { S_COMMENT, { VCH }, FRZ(S_TRUNCC) },
+ { S_COMMENT, { '*' }, PUT(S_COMMENT2) },
+
+ { S_COMMENT2, { ANY }, FRZ(S_COMMENT) },
+ { S_COMMENT2, { VCH }, FRZ(S_TRUNCC) },
+ { S_COMMENT2, { '*' }, PUT(S_COMMENT2) },
+ { S_COMMENT2, { '/' }, STO(PUT(COMMENT)) },
+
+ { S_COMMENT5, { ANY }, PUT(S_COMMENT5) },
+ { S_COMMENT5, { VCH }, FRZ(S_DECAY) },
+ { S_COMMENT5, { '\n' }, FRZ(STO(COMMENT)) },
+#else
+ { S_COMMENT, { ANY }, S_COMMENT },
+ { S_COMMENT, { VCH }, FRZ(S_TRUNCC) },
+ { S_COMMENT, { '*' }, S_COMMENT2 },
+
+ { S_COMMENT2, { ANY }, FRZ(S_COMMENT) },
+ { S_COMMENT2, { VCH }, FRZ(S_TRUNCC) },
+ { S_COMMENT2, { '*' }, S_COMMENT2 },
+ { S_COMMENT2, { '/' }, STO(COMMENT) },
+
+ { S_COMMENT5, { ANY }, S_COMMENT5 },
+ { S_COMMENT5, { VCH }, FRZ(S_DECAY) },
+ { S_COMMENT5, { '\n' }, FRZ(STO(COMMENT)) },
+#endif
+
+ /* dummy end of machine description */
+ { 0, { 0 }, 0 }
+};
+
+/*
+ * cppm is the table used to store the automaton: if we are in state s
+ * and we read character c, we apply the action cppm[s][c] (jumping to
+ * another state, or emitting a token).
+ * cppm_vch is the table for the special virtual character "end of input"
+ */
+static int cppm[MSTATE][MAX_CHAR_VAL];
+static int cppm_vch[MSTATE];
+
+/*
+ * init_cppm() fills cppm[][] with the information stored in cppms[].
+ * It must be called before beginning the lexing process.
+ */
+void init_cppm(void)
+{
+ int i, j, k, c;
+ static unsigned char upper[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ";
+ static unsigned char lower[] = "abcdefghijklmnopqrstuvwxyz";
+ unsigned char *cp;
+
+ for (i = 0; i < MSTATE; i ++) {
+ for (j = 0; j < MAX_CHAR_VAL; j ++) cppm[i][j] = S_OUCH;
+ cppm_vch[i] = S_OUCH;
+ }
+ for (i = 0; cppms[i].input[0]; i ++) for (k = 0; k < CMCR; k ++) {
+ int s = cppms[i].state;
+ int ns = cppms[i].new_state;
+
+ switch (c = cppms[i].input[k]) {
+ case 0:
+ break;
+ case SPC:
+ /* see space_char() also */
+ cppm[s][' '] = ns;
+ cppm[s]['\t'] = ns;
+ cppm[s]['\v'] = ns;
+ cppm[s]['\f'] = ns;
+#ifdef UNBREAKABLE_SPACE
+ if (MAX_CHAR_VAL > UNBREAKABLE_SPACE)
+ cppm[s][UNBREAKABLE_SPACE] = ns;
+#endif
+ break;
+ case ALP:
+ for (cp = upper; *cp; cp ++) cppm[s][(int)*cp] = ns;
+ for (cp = lower; *cp; cp ++) cppm[s][(int)*cp] = ns;
+ cppm[s]['_'] = ns;
+ break;
+ case NUM:
+ for (j = '0'; j <= '9'; j ++) cppm[s][j] = ns;
+ break;
+ case ANY:
+ for (j = 0; j < MAX_CHAR_VAL; j ++) cppm[s][j] = ns;
+ cppm_vch[s] = ns;
+ break;
+ case VCH:
+ cppm_vch[s] = ns;
+ break;
+ default:
+ cppm[s][c] = ns;
+ break;
+ }
+ }
+}
+
+/*
+ * Make some character as equivalent to a letter for identifiers.
+ */
+void set_identifier_char(int c)
+{
+ cppm[S_START][c] = PUT(S_NAME);
+ cppm[S_NAME][c] = PUT(S_NAME);
+}
+
+/*
+ * Remove the "identifier" status from a character.
+ */
+void unset_identifier_char(int c)
+{
+ cppm[S_START][c] = S_ILL;
+ cppm[S_NAME][c] = FRZ(STO(NAME));
+}
+
+int space_char(int c)
+{
+ if (c == ' ' || c == '\t' || c == '\v' || c == '\f'
+#ifdef UNBREAKABLE_SPACE
+ || c == UNBREAKABLE_SPACE
+#endif
+ ) return 1;
+ return 0;
+}
+
+#ifndef NO_UCPP_BUF
+/*
+ * our output buffer is full, flush it
+ */
+void flush_output(struct lexer_state *ls)
+{
+ size_t x = ls->sbuf, y = 0, z;
+
+ if (ls->sbuf == 0) return;
+ do {
+ z = fwrite(ls->output_buf + y, 1, x, ls->output);
+ x -= z;
+ y += z;
+ } while (z && x > 0);
+ if (!y) {
+ error(ls->line, "could not flush output (disk full ?)");
+ die();
+ }
+ ls->sbuf = 0;
+}
+#endif
+
+/*
+ * Output one character; flush the buffer if needed.
+ * This function should not be called, except by put_char().
+ */
+static inline void write_char(struct lexer_state *ls, unsigned char c)
+{
+#ifndef NO_UCPP_BUF
+ ls->output_buf[ls->sbuf ++] = c;
+ if (ls->sbuf == OUTPUT_BUF_MEMG) flush_output(ls);
+#else
+ if (putc((int)c, ls->output) == EOF) {
+ error(ls->line, "output write error (disk full ?)");
+ die();
+ }
+#endif
+ if (c == '\n') {
+ ls->oline ++;
+ }
+}
+
+/*
+ * schedule a character for output
+ */
+void put_char(struct lexer_state *ls, unsigned char c)
+{
+ if (ls->flags & KEEP_OUTPUT) write_char(ls, c);
+}
+
+/*
+ * get next raw input character
+ */
+static inline int read_char(struct lexer_state *ls)
+{
+ unsigned char c;
+
+ if (!ls->input) {
+ return ((ls->pbuf ++) < ls->ebuf) ?
+ ls->input_string[ls->pbuf - 1] : -1;
+ }
+ while (1) {
+#ifndef NO_UCPP_BUF
+ if (ls->pbuf == ls->ebuf) {
+#ifdef UCPP_MMAP
+ if (ls->from_mmap) {
+ munmap((void *)ls->input_buf, ls->ebuf);
+ ls->from_mmap = 0;
+ ls->input_buf = ls->input_buf_sav;
+ }
+#endif
+ ls->ebuf = fread(ls->input_buf, 1,
+ INPUT_BUF_MEMG, ls->input);
+ ls->pbuf = 0;
+ }
+ if (ls->ebuf == 0) return -1;
+ c = ls->input_buf[ls->pbuf ++];
+#else
+ int x = getc(ls->input);
+
+ if (x == EOF) return -1;
+ c = x;
+#endif
+ if (ls->flags & COPY_LINE) {
+ if (c == '\n') {
+ ls->copy_line[ls->cli] = 0;
+ ls->cli = 0;
+ } else if (ls->cli < (COPY_LINE_LENGTH - 1)) {
+ ls->copy_line[ls->cli ++] = c;
+ }
+ }
+ if (ls->macfile && c == '\n') {
+ ls->macfile = 0;
+ continue;
+ }
+ ls->macfile = 0;
+ if (c == '\r') {
+ /*
+ * We found a '\r'; we handle it as a newline
+ * and ignore the next newline. This should work
+ * with all combinations of Msdos, MacIntosh and
+ * Unix files on these three platforms. On other
+ * platforms, native file formats are always
+ * supported.
+ */
+ ls->macfile = 1;
+ c = '\n';
+ }
+ break;
+ }
+ return c;
+}
+
+/*
+ * next_fifo_char(), char_lka1() and char_lka2() give a two character
+ * look-ahead on the input stream; this is needed for trigraphs
+ */
+static inline int next_fifo_char(struct lexer_state *ls)
+{
+ int c;
+
+ if (ls->nlka != 0) {
+ c = ls->lka[0];
+ ls->lka[0] = ls->lka[1];
+ ls->nlka --;
+ } else c = read_char(ls);
+ return c;
+}
+
+static inline int char_lka1(struct lexer_state *ls)
+{
+ if (ls->nlka == 0) {
+ ls->lka[0] = read_char(ls);
+ ls->nlka ++;
+ }
+ return ls->lka[0];
+}
+
+static inline int char_lka2(struct lexer_state *ls)
+{
+#ifdef AUDIT
+ if (ls->nlka == 0) ouch("always in motion future is");
+#endif
+ if (ls->nlka == 1) {
+ ls->lka[1] = read_char(ls);
+ ls->nlka ++;
+ }
+ return ls->lka[1];
+}
+
+static struct trigraph {
+ int old, new;
+} trig[9] = {
+ { '=', '#' },
+ { '/', '\\' },
+ { '\'', '^' },
+ { '(', '[' },
+ { ')', ']' },
+ { '!', '|' },
+ { '<', '{' },
+ { '>', '}' },
+ { '-', '~' }
+};
+
+/*
+ * Returns the next character, after treatment of trigraphs and terminating
+ * backslashes. Return value is -1 if there is no more input.
+ */
+static inline int next_char(struct lexer_state *ls)
+{
+ int c;
+
+ if (!ls->discard) return ls->last;
+ ls->discard = 0;
+ do {
+ c = next_fifo_char(ls);
+ /* check trigraphs */
+ if (c == '?' && char_lka1(ls) == '?'
+ && (ls->flags & HANDLE_TRIGRAPHS)) {
+ int i, d;
+
+ d = char_lka2(ls);
+ for (i = 0; i < 9; i ++) if (d == trig[i].old) {
+ if (ls->flags & WARN_TRIGRAPHS) {
+ ls->count_trigraphs ++;
+ }
+ if (ls->flags & WARN_TRIGRAPHS_MORE) {
+ warning(ls->line, "trigraph ?""?%c "
+ "encountered", d);
+ }
+ next_fifo_char(ls);
+ next_fifo_char(ls);
+ c = trig[i].new;
+ break;
+ }
+ }
+ if (c == '\\' && char_lka1(ls) == '\n') {
+ ls->line ++;
+ next_fifo_char(ls);
+ } else if (c == '\r' && char_lka1(ls) == '\n') {
+ ls->line ++;
+ next_fifo_char(ls);
+ c = '\n';
+ return c;
+ } else {
+ ls->last = c;
+ return c;
+ }
+ } while (1);
+}
+
+/*
+ * wrapper for next_char(), to be called from outside
+ * (used by #error, #include directives)
+ */
+int grap_char(struct lexer_state *ls)
+{
+ return next_char(ls);
+}
+
+/*
+ * Discard the current character, so that the next call to next_char()
+ * will step into the input stream.
+ */
+void discard_char(struct lexer_state *ls)
+{
+#ifdef AUDIT
+ if (ls->discard) ouch("overcollecting garbage");
+#endif
+ ls->discard = 1;
+ ls->utf8 = 0;
+ if (ls->last == '\n') ls->line ++;
+}
+
+/*
+ * Convert an UTF-8 encoded character to a Universal Character Name
+ * using \u (or \U when appropriate).
+ */
+static int utf8_to_string(unsigned char buf[], unsigned long utf8)
+{
+ unsigned long val = 0;
+ static char hex[16] = "0123456789abcdef";
+
+ if (utf8 & 0x80UL) {
+ unsigned long x1, x2, x3, x4;
+
+ x1 = (utf8 >> 24) & 0x7fUL;
+ x2 = (utf8 >> 16) & 0x7fUL;
+ x3 = (utf8 >> 8) & 0x7fUL;
+ x4 = (utf8) & 0x3fUL;
+ x1 &= 0x07UL;
+ if (x2 & 0x40UL) x2 &= 0x0fUL;
+ if (x3 & 0x40UL) x3 &= 0x1fUL;
+ val = x4 | (x3 << 6) | (x2 << 12) | (x1 << 16);
+ } else val = utf8;
+ if (val < 128) {
+ buf[0] = val;
+ buf[1] = 0;
+ return 1;
+ } else if (val < 0xffffUL) {
+ buf[0] = '\\';
+ buf[1] = 'u';
+ buf[2] = hex[(size_t)(val >> 12)];
+ buf[3] = hex[(size_t)((val >> 8) & 0xfU)];
+ buf[4] = hex[(size_t)((val >> 4) & 0xfU)];
+ buf[5] = hex[(size_t)(val & 0xfU)];
+ buf[6] = 0;
+ return 6;
+ }
+ buf[0] = '\\';
+ buf[1] = 'U';
+ buf[2] = '0';
+ buf[3] = '0';
+ buf[4] = hex[(size_t)(val >> 20)];
+ buf[5] = hex[(size_t)((val >> 16) & 0xfU)];
+ buf[6] = hex[(size_t)((val >> 12) & 0xfU)];
+ buf[7] = hex[(size_t)((val >> 8) & 0xfU)];
+ buf[8] = hex[(size_t)((val >> 4) & 0xfU)];
+ buf[9] = hex[(size_t)(val & 0xfU)];
+ buf[10] = 0;
+ return 10;
+}
+
+/*
+ * Scan the identifier and put it in canonical form:
+ * -- tranform \U0000xxxx into \uxxxx
+ * -- inside \u and \U, make letters low case
+ * -- report (some) incorrect use of UCN
+ */
+static void canonize_id(struct lexer_state *ls, char *id)
+{
+ char *c, *d;
+
+ for (c = d = id; *c;) {
+ if (*c == '\\') {
+ int i;
+
+ if (!*(c + 1)) goto canon_error;
+ if (*(c + 1) == 'U') {
+ for (i = 0; i < 8 && *(c + i + 2); i ++);
+ if (i != 8) goto canon_error;
+ *(d ++) = '\\';
+ c += 2;
+ for (i = 0; i < 4 && *(c + i) == '0'; i ++);
+ if (i == 4) {
+ *(d ++) = 'u';
+ c += 4;
+ } else {
+ *(d ++) = 'U';
+ i = 8;
+ }
+ for (; i > 0; i --) {
+ switch (*c) {
+ case 'A': *(d ++) = 'a'; break;
+ case 'B': *(d ++) = 'b'; break;
+ case 'C': *(d ++) = 'c'; break;
+ case 'D': *(d ++) = 'd'; break;
+ case 'E': *(d ++) = 'e'; break;
+ case 'F': *(d ++) = 'f'; break;
+ default: *(d ++) = *c; break;
+ }
+ c ++;
+ }
+ } else if (*(c + 1) == 'u') {
+ for (i = 0; i < 4 && *(c + i + 2); i ++);
+ if (i != 4) goto canon_error;
+ *(d ++) = '\\';
+ *(d ++) = 'u';
+ c += 2;
+ for (; i > 0; i --) {
+ switch (*c) {
+ case 'A': *(d ++) = 'a'; break;
+ case 'B': *(d ++) = 'b'; break;
+ case 'C': *(d ++) = 'c'; break;
+ case 'D': *(d ++) = 'd'; break;
+ case 'E': *(d ++) = 'e'; break;
+ case 'F': *(d ++) = 'f'; break;
+ default: *(d ++) = *c; break;
+ }
+ c ++;
+ }
+ } else goto canon_error;
+ continue;
+ }
+ *(d ++) = *(c ++);
+ }
+ *d = 0;
+ return;
+
+canon_error:
+ for (; *c; *(d ++) = *(c ++));
+ if (ls->flags & WARN_STANDARD) {
+ warning(ls->line, "malformed identifier with UCN: '%s'", id);
+ }
+ *d = 0;
+}
+
+/*
+ * Run the automaton, in order to get the next token.
+ * This function should not be called, except by next_token()
+ *
+ * return value: 1 on error, 2 on end-of-file, 0 otherwise.
+ */
+static inline int read_token(struct lexer_state *ls)
+{
+ int cstat = S_START, nstat;
+ size_t ltok = 0;
+ int c, outc = 0, ucn_in_id = 0;
+ int shift_state;
+ unsigned long utf8;
+ long l = ls->line;
+
+ ls->ctok->line = l;
+ if (ls->pending_token) {
+ if ((ls->ctok->type = ls->pending_token) == BUNCH) {
+ ls->ctok->name[0] = '\\';
+ ls->ctok->name[1] = 0;
+ }
+ ls->pending_token = 0;
+ return 0;
+ }
+ if (ls->flags & UTF8_SOURCE) {
+ utf8 = ls->utf8;
+ shift_state = 0;
+ }
+ if (!(ls->flags & LEXER) && (ls->flags & KEEP_OUTPUT))
+ for (; ls->line > ls->oline;) put_char(ls, '\n');
+ do {
+ c = next_char(ls);
+ if (c < 0) {
+ if ((ls->flags & UTF8_SOURCE) && shift_state) {
+ if (ls->flags & WARN_STANDARD)
+ warning(ls->line, "truncated UTF-8 "
+ "character");
+ shift_state = 0;
+ utf8 = 0;
+ }
+ if (cstat == S_START) return 2;
+ nstat = cppm_vch[cstat];
+ } else {
+ if (ls->flags & UTF8_SOURCE) {
+ if (shift_state) {
+ if ((c & 0xc0) != 0x80) {
+ if (ls->flags & WARN_STANDARD)
+ warning(ls->line,
+ "truncated "
+ "UTF-8 "
+ "character");
+ shift_state = 0;
+ utf8 = 0;
+ c = '_';
+ } else {
+ utf8 = (utf8 << 8) | c;
+ if (-- shift_state) {
+ ls->discard = 1;
+ continue;
+ }
+ c = '_';
+ }
+ } else if ((c & 0xc0) == 0xc0) {
+ if ((c & 0x30) == 0x30) {
+ shift_state = 3;
+ } else if (c & 0x20) {
+ shift_state = 2;
+ } else {
+ shift_state = 1;
+ }
+ utf8 = c;
+ ls->discard = 1;
+ continue;
+ } else utf8 = 0;
+ }
+ nstat = cppm[cstat][c < MAX_CHAR_VAL ? c : 0];
+ }
+#ifdef AUDIT
+ if (nstat == S_OUCH) {
+ ouch("bad move...");
+ }
+#endif
+ /*
+ * disable C++-like comments
+ */
+ if (nstat == S_COMMENT5 && !(ls->flags & CPLUSPLUS_COMMENTS))
+ nstat = FRZ(STO(SLASH));
+
+ if (noMOD(nstat) >= MSTATE && !ttSTO(nstat))
+ switch (noMOD(nstat)) {
+ case S_ILL:
+ if (ls->flags & CCHARSET) {
+ error(ls->line, "illegal character '%c'", c);
+ return 1;
+ }
+ nstat = PUT(STO(BUNCH));
+ break;
+ case S_BS:
+ ls->ctok->name[0] = '\\';
+ ltok ++;
+ nstat = FRZ(STO(BUNCH));
+ if (!(ls->flags & LEXER)) put_char(ls, '\\');
+ break;
+ case S_ROGUE_BS:
+ ls->pending_token = BUNCH;
+ nstat = FRZ(STO(NAME));
+ break;
+ case S_DDOT:
+ ls->pending_token = DOT;
+ nstat = FRZ(STO(DOT));
+ break;
+ case S_DDSHARP:
+ ls->pending_token = PCT;
+ nstat = FRZ(STO(DIG_SHARP));
+ break;
+ case S_BEHEAD:
+ error(l, "unfinished string at end of line");
+ return 1;
+ case S_DECAY:
+ warning(l, "unterminated // comment");
+ nstat = FRZ(STO(COMMENT));
+ break;
+ case S_TRUNC:
+ error(l, "truncated token");
+ return 1;
+ case S_TRUNCC:
+ error(l, "truncated comment");
+ return 1;
+#ifdef AUDIT
+ case S_OUCH:
+ ouch("machine went out of control");
+ break;
+#endif
+ }
+ if (!ttFRZ(nstat)) {
+ discard_char(ls);
+ if (!(ls->flags & LEXER) && ls->condcomp) {
+ int z = ttSTO(nstat) ? S_ILL : noMOD(nstat);
+
+ if (cstat == S_NAME || z == S_NAME
+ || ((CMT(cstat) || CMT(z))
+ && (ls->flags & DISCARD_COMMENTS))) {
+ outc = 0;
+ } else if (z == S_LCHAR || z == S_SLASH
+ || (z == S_SHARP && ls->ltwnl)
+ || (z == S_PCT && ls->ltwnl)
+ || (z == S_BACKSLASH)) {
+ outc = c;
+ } else if (z == S_PCT2 && ls->ltwnl) {
+ outc = -1;
+ } else if (z == S_PCT3 && ls->ltwnl) {
+ /* we have %:% but this still might
+ not be a %:%: */
+ outc = -2;
+ } else {
+ if (outc < 0) {
+ put_char(ls, '%');
+ put_char(ls, ':');
+ if (outc == -2)
+ put_char(ls, '%');
+ outc = 0;
+ } else if (outc) {
+ put_char(ls, outc);
+ outc = 0;
+ }
+ put_char(ls, c);
+ }
+ }
+ } else if (outc == '/' && !(ls->flags & LEXER)
+ && ls->condcomp) {
+ /* this is a hack: we need to dump a pending slash */
+ put_char(ls, outc);
+ outc = 0;
+ }
+ if (ttPUT(nstat)) {
+ if (cstat == S_NAME_BS) {
+ ucn_in_id = 1;
+ wan(ls->ctok->name, ltok, '\\', ls->tknl);
+ }
+ if ((ls->flags & UTF8_SOURCE) && utf8) {
+ unsigned char buf[11];
+ int i, j;
+
+ for (i = 0, j = utf8_to_string(buf, utf8);
+ i < j; i ++)
+ wan(ls->ctok->name, ltok, buf[i],
+ ls->tknl);
+ /* if (j > 1) ucn_in_id = 1; */
+ } else wan(ls->ctok->name, ltok,
+ (unsigned char)c, ls->tknl);
+ }
+ if (ttSTO(nstat)) {
+ if (S_TOKEN(noMOD(nstat))) {
+ wan(ls->ctok->name, ltok,
+ (unsigned char)0, ls->tknl);
+ }
+ ls->ctok->type = noMOD(nstat);
+ break;
+ }
+ cstat = noMOD(nstat);
+ } while (1);
+ if (!(ls->flags & LEXER) && (ls->flags & DISCARD_COMMENTS)
+ && ls->ctok->type == COMMENT) put_char(ls, ' ');
+ if (ucn_in_id && ls->ctok->type == NAME)
+ canonize_id(ls, ls->ctok->name);
+ return 0;
+}
+
+/*
+ * fills ls->ctok with the next token
+ */
+int next_token(struct lexer_state *ls)
+{
+ if (ls->flags & READ_AGAIN) {
+ ls->flags &= ~READ_AGAIN;
+ if (!(ls->flags & LEXER)) {
+ char *c = S_TOKEN(ls->ctok->type) ?
+ ls->ctok->name : token_name(ls->ctok);
+ if (ls->ctok->type == OPT_NONE) {
+ ls->ctok->type = NONE;
+#ifdef SEMPER_FIDELIS
+ ls->ctok->name[0] = ' ';
+ ls->ctok->name[1] = 0;
+#endif
+ put_char(ls, ' ');
+ } else if (ls->ctok->type != NAME &&
+ !(ls->ltwnl && (ls->ctok->type == SHARP
+ || ls->ctok->type == DIG_SHARP)))
+ for (; *c; c ++) put_char(ls, *c);
+ }
+ return 0;
+ }
+ return read_token(ls);
+}
diff --git a/libexec/auxcpp/macro.c b/libexec/auxcpp/macro.c
new file mode 100644
index 00000000000..5b9540c67c2
--- /dev/null
+++ b/libexec/auxcpp/macro.c
@@ -0,0 +1,1921 @@
+/*
+ * (c) Thomas Pornin 1999 - 2002
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 4. The name of the authors may not be used to endorse or promote
+ * products derived from this software without specific prior written
+ * permission.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
+ * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+ * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include "tune.h"
+#include <stdio.h>
+#include <string.h>
+#include <stddef.h>
+#include <limits.h>
+#include "ucppi.h"
+#include "mem.h"
+#include "nhash.h"
+
+/*
+ * we store macros in a hash table, and retrieve them using their name
+ * as identifier.
+ */
+static HTT macros;
+static int macros_init_done = 0;
+
+static void del_macro(void *m)
+{
+ struct macro *n = m;
+ size_t i;
+
+ for (i = 0; (int)i < n->narg; i ++) freemem(n->arg[i]);
+ if (n->narg > 0) freemem(n->arg);
+#ifdef LOW_MEM
+ if (n->cval.length) freemem(n->cval.t);
+#else
+ if (n->val.nt) {
+ for (i = 0; i < n->val.nt; i ++)
+ if (S_TOKEN(n->val.t[i].type))
+ freemem(n->val.t[i].name);
+ freemem(n->val.t);
+ }
+#endif
+ freemem(n);
+}
+
+static inline struct macro *new_macro(void)
+{
+ struct macro *m = getmem(sizeof(struct macro));
+
+ m->narg = -1;
+ m->nest = 0;
+#ifdef LOW_MEM
+ m->cval.length = 0;
+#else
+ m->val.nt = m->val.art = 0;
+#endif
+ m->vaarg = 0;
+ return m;
+}
+
+/*
+ * for special macros, and the "defined" operator
+ */
+enum {
+ MAC_NONE, MAC_DEFINED,
+ MAC_LINE, MAC_FILE, MAC_DATE, MAC_TIME, MAC_STDC, MAC_PRAGMA
+};
+#define MAC_SPECIAL MAC_LINE
+
+/*
+ * returns 1 for "defined"
+ * returns x > 1 for a special macro such as __FILE__
+ * returns 0 otherwise
+ */
+static inline int check_special_macro(char *name)
+{
+ if (!strcmp(name, "defined")) return MAC_DEFINED;
+ if (*name != '_') return MAC_NONE;
+ if (*(name + 1) == 'P') {
+ if (!strcmp(name, "_Pragma")) return MAC_PRAGMA;
+ return MAC_NONE;
+ } else if (*(name + 1) != '_') return MAC_NONE;
+ if (no_special_macros) return MAC_NONE;
+ if (!strcmp(name, "__LINE__")) return MAC_LINE;
+ else if (!strcmp(name, "__FILE__")) return MAC_FILE;
+ else if (!strcmp(name, "__DATE__")) return MAC_DATE;
+ else if (!strcmp(name, "__TIME__")) return MAC_TIME;
+ else if (!strcmp(name, "__STDC__")) return MAC_STDC;
+ return MAC_NONE;
+}
+
+int c99_compliant = 1;
+int c99_hosted = 1;
+
+/*
+ * add the special macros to the macro table
+ */
+static void add_special_macros(void)
+{
+ struct macro *m;
+
+ HTT_put(&macros, new_macro(), "__LINE__");
+ HTT_put(&macros, new_macro(), "__FILE__");
+ HTT_put(&macros, new_macro(), "__DATE__");
+ HTT_put(&macros, new_macro(), "__TIME__");
+ HTT_put(&macros, new_macro(), "__STDC__");
+ m = new_macro(); m->narg = 1;
+ m->arg = getmem(sizeof(char *)); m->arg[0] = sdup("foo");
+ HTT_put(&macros, m, "_Pragma");
+ if (c99_compliant) {
+#ifndef LOW_MEM
+ struct token t;
+#endif
+
+ m = new_macro();
+#ifdef LOW_MEM
+ m->cval.t = getmem(9);
+ m->cval.t[0] = NUMBER;
+ mmv(m->cval.t + 1, "199901L", 8);
+ m->cval.length = 9;
+#else
+ t.type = NUMBER;
+ t.line = 0;
+ t.name = sdup("199901L");
+ aol(m->val.t, m->val.nt, t, TOKEN_LIST_MEMG);
+#endif
+ HTT_put(&macros, m, "__STDC_VERSION__");
+ }
+ if (c99_hosted) {
+#ifndef LOW_MEM
+ struct token t;
+#endif
+
+ m = new_macro();
+#ifdef LOW_MEM
+ m->cval.t = getmem(3);
+ m->cval.t[0] = NUMBER;
+ mmv(m->cval.t + 1, "1", 2);
+ m->cval.length = 3;
+#else
+ t.type = NUMBER;
+ t.line = 0;
+ t.name = sdup("1");
+ aol(m->val.t, m->val.nt, t, TOKEN_LIST_MEMG);
+#endif
+ HTT_put(&macros, m, "__STDC_HOSTED__");
+ }
+}
+
+#ifdef LOW_MEM
+/*
+ * We store macro arguments as a single-byte token MACROARG, followed
+ * by the argument number as a one or two-byte value. If the argument
+ * number is between 0 and 127 (inclusive), it is stored as such in
+ * a single byte. Otherwise, it is supposed to be a 14-bit number, with
+ * the 7 upper bits stored in the first byte (with the high bit set to 1)
+ * and the 7 lower bits in the second byte.
+ */
+#endif
+
+/*
+ * print the content of a macro, in #define form
+ */
+static void print_macro(void *vm)
+{
+ struct macro *m = vm;
+ char *mname = HASH_ITEM_NAME(m);
+ int x = check_special_macro(mname);
+ size_t i;
+
+ if (x != MAC_NONE) {
+ fprintf(emit_output, "/* #define %s */ /* special */\n",
+ mname);
+ return;
+ }
+ fprintf(emit_output, "#define %s", mname);
+ if (m->narg >= 0) {
+ fprintf(emit_output, "(");
+ for (i = 0; i < (size_t)(m->narg); i ++) {
+ fprintf(emit_output, i ? ", %s" : "%s", m->arg[i]);
+ }
+ if (m->vaarg) {
+ fputs(m->narg ? ", ..." : "...", emit_output);
+ }
+ fprintf(emit_output, ")");
+ }
+#ifdef LOW_MEM
+ if (m->cval.length == 0) {
+ fputc('\n', emit_output);
+ return;
+ }
+ fputc(' ', emit_output);
+ for (i = 0; i < m->cval.length;) {
+ int tt = m->cval.t[i ++];
+
+ if (tt == MACROARG) {
+ unsigned anum = m->cval.t[i];
+
+ if (anum >= 128) anum = ((anum & 127U) << 8)
+ | m->cval.t[++ i];
+ if (anum == (unsigned)m->narg)
+ fputs("__VA_ARGS__", emit_output);
+ else
+ fputs(m->arg[anum], emit_output);
+ i ++;
+ }
+ else if (S_TOKEN(tt)) {
+ fputs((char *)(m->cval.t + i), emit_output);
+ i += 1 + strlen((char *)(m->cval.t + i));
+ } else fputs(operators_name[tt], emit_output);
+ }
+#else
+ if (m->val.nt == 0) {
+ fputc('\n', emit_output);
+ return;
+ }
+ fputc(' ', emit_output);
+ for (i = 0; i < m->val.nt; i ++) {
+ if (m->val.t[i].type == MACROARG) {
+ if (m->val.t[i].line == m->narg)
+ fputs("__VA_ARGS__", emit_output);
+ else
+ fputs(m->arg[(size_t)(m->val.t[i].line)],
+ emit_output);
+ } else fputs(token_name(m->val.t + i), emit_output);
+ }
+#endif
+ fputc('\n', emit_output);
+}
+
+/*
+ * Send a token to the output (a token_fifo in lexer mode, the output
+ * buffer in stand alone mode).
+ */
+void print_token(struct lexer_state *ls, struct token *t, long uz_line)
+{
+ char *x = t->name;
+
+ if (uz_line && t->line < 0) t->line = uz_line;
+ if (ls->flags & LEXER) {
+ struct token at;
+
+ at = *t;
+ if (S_TOKEN(t->type)) {
+ at.name = sdup(at.name);
+ throw_away(ls->gf, at.name);
+ }
+ aol(ls->output_fifo->t, ls->output_fifo->nt, at,
+ TOKEN_LIST_MEMG);
+ return;
+ }
+ if (ls->flags & KEEP_OUTPUT) {
+ for (; ls->oline < ls->line;) put_char(ls, '\n');
+ }
+ if (!S_TOKEN(t->type)) x = operators_name[t->type];
+ for (; *x; x ++) put_char(ls, *x);
+}
+
+/*
+ * Send a token to the output at a given line (this is for text output
+ * and unreplaced macros due to lack of arguments).
+ */
+static void print_token_nailed(struct lexer_state *ls, struct token *t,
+ long nail_line)
+{
+ char *x = t->name;
+
+ if (ls->flags & LEXER) {
+ print_token(ls, t, 0);
+ return;
+ }
+ if (ls->flags & KEEP_OUTPUT) {
+ for (; ls->oline < nail_line;) put_char(ls, '\n');
+ }
+ if (!S_TOKEN(t->type)) x = operators_name[t->type];
+ for (; *x; x ++) put_char(ls, *x);
+}
+
+/*
+ * send a reduced whitespace token to the output
+ */
+#define print_space(ls) do { \
+ struct token lt; \
+ lt.type = OPT_NONE; \
+ lt.line = (ls)->line; \
+ print_token((ls), &lt, 0); \
+ } while (0)
+
+/*
+ * We found a #define directive; parse the end of the line, perform
+ * sanity checks, store the new macro into the "macros" hash table.
+ *
+ * In case of a redefinition of a macro: we enforce the rule that a
+ * macro should be redefined identically, including the spelling of
+ * parameters. We emit an error on offending code; dura lex, sed lex.
+ * After all, it is easy to avoid such problems, with a #undef directive.
+ */
+int handle_define(struct lexer_state *ls)
+{
+ struct macro *m = 0, *n;
+#ifdef LOW_MEM
+ struct token_fifo mv;
+#endif
+ int ltwws = 1, redef = 0;
+ char *mname = 0;
+ int narg;
+ size_t nt;
+ long l = ls->line;
+
+#ifdef LOW_MEM
+ mv.art = mv.nt = 0;
+#endif
+ /* find the next non-white token on the line, this should be
+ the macro name */
+ while (!next_token(ls) && ls->ctok->type != NEWLINE) {
+ if (ttMWS(ls->ctok->type)) continue;
+ if (ls->ctok->type == NAME) mname = sdup(ls->ctok->name);
+ break;
+ }
+ if (mname == 0) {
+ error(l, "missing macro name");
+ return 1;
+ }
+ if (check_special_macro(mname)) {
+ error(l, "trying to redefine the special macro %s", mname);
+ goto warp_error;
+ }
+ /*
+ * If a macro with this name was already defined: the K&R
+ * states that the new macro should be identical to the old one
+ * (with some arcane rule of equivalence of whitespace); otherwise,
+ * redefining the macro is an error. Most preprocessors would
+ * only emit a warning (or nothing at all) on an unidentical
+ * redefinition.
+ *
+ * Since it is easy to avoid this error (with a #undef directive),
+ * we choose to enforce the rule and emit an error.
+ */
+ if ((n = HTT_get(&macros, mname)) != 0) {
+ /* redefinition of a macro: we must check that we define
+ it identical */
+ redef = 1;
+#ifdef LOW_MEM
+ n->cval.rp = 0;
+#endif
+ freemem(mname);
+ mname = 0;
+ }
+ if (!redef) {
+ m = new_macro();
+ m->narg = -1;
+#ifdef LOW_MEM
+#define mval mv
+#else
+#define mval (m->val)
+#endif
+ }
+ if (next_token(ls)) goto define_end;
+ /*
+ * Check if the token immediately following the macro name is
+ * a left parenthesis; if so, then this is a macro with arguments.
+ * Collect their names and try to match the next parenthesis.
+ */
+ if (ls->ctok->type == LPAR) {
+ int i, j;
+ int need_comma = 0, saw_mdots = 0;
+
+ narg = 0;
+ while (!next_token(ls)) {
+ if (ls->ctok->type == NEWLINE) {
+ error(l, "truncated macro definition");
+ goto define_error;
+ }
+ if (ls->ctok->type == COMMA) {
+ if (saw_mdots) {
+ error(l, "'...' must end the macro "
+ "argument list");
+ goto warp_error;
+ }
+ if (!need_comma) {
+ error(l, "void macro argument");
+ goto warp_error;
+ }
+ need_comma = 0;
+ continue;
+ } else if (ls->ctok->type == NAME) {
+ if (saw_mdots) {
+ error(l, "'...' must end the macro "
+ "argument list");
+ goto warp_error;
+ }
+ if (need_comma) {
+ error(l, "missing comma in "
+ "macro argument list");
+ goto warp_error;
+ }
+ if (!redef) {
+ aol(m->arg, narg,
+ sdup(ls->ctok->name), 8);
+ /* we must keep track of m->narg
+ so that cleanup in case of
+ error works. */
+ m->narg = narg;
+ if (narg == 128
+ && (ls->flags & WARN_STANDARD))
+ warning(l, "more arguments to "
+ "macro than the ISO "
+ "limit (127)");
+#ifdef LOW_MEM
+ if (narg == 32767) {
+ error(l, "too many arguments "
+ "in macro definition "
+ "(max 32766)");
+ goto warp_error;
+ }
+#endif
+ } else {
+ /* this is a redefinition of the
+ macro; check equality between
+ old and new definitions */
+ if (narg >= n->narg) goto redef_error;
+ if (strcmp(ls->ctok->name,
+ n->arg[narg ++]))
+ goto redef_error;
+ }
+ need_comma = 1;
+ continue;
+ } else if ((ls->flags & MACRO_VAARG)
+ && ls->ctok->type == MDOTS) {
+ if (need_comma) {
+ error(l, "missing comma before '...'");
+ goto warp_error;
+ }
+ if (redef && !n->vaarg) goto redef_error;
+ if (!redef) m->vaarg = 1;
+ saw_mdots = 1;
+ need_comma = 1;
+ continue;
+ } else if (ls->ctok->type == RPAR) {
+ if (narg > 0 && !need_comma) {
+ error(l, "void macro argument");
+ goto warp_error;
+ }
+ if (redef && n->vaarg && !saw_mdots)
+ goto redef_error;
+ break;
+ } else if (ttMWS(ls->ctok->type)) {
+ continue;
+ }
+ error(l, "invalid macro argument");
+ goto warp_error;
+ }
+ if (!redef) {
+ for (i = 1; i < narg; i ++) for (j = 0; j < i; j ++)
+ if (!strcmp(m->arg[i], m->arg[j])) {
+ error(l, "duplicate macro "
+ "argument");
+ goto warp_error;
+ }
+ }
+ if (!redef) m->narg = narg;
+ } else {
+ if (!ttWHI(ls->ctok->type) && (ls->flags & WARN_STANDARD))
+ warning(ls->line, "identifier not followed by "
+ "whitespace in #define");
+ ls->flags |= READ_AGAIN;
+ narg = 0;
+ }
+ if (redef) nt = 0;
+
+ /* now, we have the arguments. Let's get the macro contents. */
+ while (!next_token(ls) && ls->ctok->type != NEWLINE) {
+ struct token t;
+
+ t.type = ls->ctok->type;
+ if (ltwws && ttMWS(t.type)) continue;
+ t.line = 0;
+ if (t.type == NAME) {
+ int i;
+
+ if ((ls->flags & MACRO_VAARG)
+ && !strcmp(ls->ctok->name, "__VA_ARGS__")) {
+ if (redef) {
+ if (!n->vaarg) goto redef_error;
+ } else if (!m->vaarg) {
+ error(l, "'__VA_ARGS__' is forbidden "
+ "in macros with a fixed "
+ "number of arguments");
+ goto warp_error;
+ }
+ t.type = MACROARG;
+ t.line = redef ? n->narg : m->narg;
+ }
+ for (i = 0; i < narg; i ++)
+ if (!strcmp(redef ? n->arg[i] : m->arg[i],
+ ls->ctok->name)) {
+ t.type = MACROARG;
+ /* this is a hack: we store the
+ argument number in the line field */
+ t.line = i;
+ break;
+ }
+ }
+ if (!redef && S_TOKEN(t.type)) t.name = sdup(ls->ctok->name);
+ if (ttMWS(t.type)) {
+ if (ltwws) continue;
+#ifdef SEMPER_FIDELIS
+ t.type = OPT_NONE;
+#else
+ t.type = NONE;
+#endif
+ ltwws = 1;
+ } else ltwws = 0;
+ if (!redef) {
+ /* we ensure that each macro token has a correct
+ line number */
+ if (t.type != MACROARG) t.line = 1;
+ aol(mval.t, mval.nt, t, TOKEN_LIST_MEMG);
+ } else {
+#ifdef LOW_MEM
+ int tt;
+
+ if (n->cval.rp >= n->cval.length) {
+#ifdef SEMPER_FIDELIS
+ if (t.type != OPT_NONE) goto redef_error;
+#else
+ if (t.type != NONE) goto redef_error;
+#endif
+ } else if (t.type != n->cval.t[n->cval.rp]) {
+ goto redef_error;
+ } else if (t.type == MACROARG) {
+ unsigned anum = n->cval.t[n->cval.rp + 1];
+
+ if (anum >= 128U) anum = ((anum & 127U) << 8)
+ | m->cval.t[n->cval.rp + 2];
+ if (anum != (unsigned)t.line) goto redef_error;
+ } else if (S_TOKEN(t.type) && strcmp(ls->ctok->name,
+ (char *)(n->cval.t + n->cval.rp + 1))) {
+ goto redef_error;
+ }
+ tt = n->cval.t[n->cval.rp ++];
+ if (S_TOKEN(tt)) n->cval.rp += 1
+ + strlen((char *)(n->cval.t + n->cval.rp));
+ else if (tt == MACROARG) {
+ if (n->cval.t[++ n->cval.rp] >= 128)
+ n->cval.rp ++;
+ }
+#else
+ if (nt >= n->val.nt) {
+#ifdef SEMPER_FIDELIS
+ if (t.type != OPT_NONE) goto redef_error;
+#else
+ if (t.type != NONE) goto redef_error;
+#endif
+ } else if (t.type != n->val.t[nt].type
+ || (t.type == MACROARG
+ && t.line != n->val.t[nt].line)
+ || (S_TOKEN(t.type) && strcmp(ls->ctok->name,
+ n->val.t[nt].name))) {
+ goto redef_error;
+ }
+#endif
+ nt ++;
+ }
+ }
+
+ if (redef) {
+#ifdef LOW_MEM
+ if (n->cval.rp < n->cval.length) goto redef_error_2;
+#else
+ if (nt < n->val.nt) goto redef_error_2;
+#endif
+ return 0;
+ }
+
+ /* now we have the complete macro; perform some checks about
+ the operators # and ##, and, if everything is ok,
+ store the macro into the hash table */
+define_end:
+#ifdef SEMPER_FIDELIS
+ if (mval.nt && mval.t[mval.nt - 1].type == OPT_NONE) {
+#else
+ if (mval.nt && mval.t[mval.nt - 1].type == NONE) {
+#endif
+ mval.nt --;
+ if (mval.nt == 0) freemem(mval.t);
+ }
+ if (mval.nt != 0) {
+ size_t i;
+
+ /* some checks about the macro */
+ if (mval.t[0].type == DSHARP
+ || mval.t[0].type == DIG_DSHARP
+ || mval.t[mval.nt - 1].type == DSHARP
+ || mval.t[mval.nt - 1].type == DIG_DSHARP) {
+ error(l, "operator '##' may neither begin "
+ "nor end a macro");
+ goto define_error;
+ }
+ if (m->narg >= 0) for (i = 0; i < mval.nt; i ++)
+ if ((mval.t[i].type == SHARP
+ || mval.t[i].type == DIG_SHARP) &&
+ (i == (mval.nt - 1)
+ || (ttMWS(mval.t[i + 1].type) &&
+ (i == mval.nt - 2
+ || mval.t[i + 2].type != MACROARG))
+ || (!ttMWS(mval.t[i + 1].type)
+ && mval.t[i + 1].type != MACROARG))) {
+ error(l, "operator '#' not followed "
+ "by a macro argument");
+ goto define_error;
+ }
+ }
+#ifdef LOW_MEM
+ {
+ size_t i, l;
+
+ for (i = 0, l = 0; i < mval.nt; i ++) {
+ l ++;
+ if (S_TOKEN(mval.t[i].type))
+ l += 1 + strlen(mval.t[i].name);
+ else if (mval.t[i].type == MACROARG) {
+ l ++;
+ if (mval.t[i].line >= 128) l ++;
+ }
+ }
+ m->cval.length = l;
+ if (l) m->cval.t = getmem(l);
+ for (i = 0, l = 0; i < mval.nt; i ++) {
+ m->cval.t[l ++] = mval.t[i].type;
+ if (S_TOKEN(mval.t[i].type)) {
+ size_t x = 1 + strlen(mval.t[i].name);
+
+ mmv(m->cval.t + l, mval.t[i].name, x);
+ l += x;
+ freemem(mval.t[i].name);
+ }
+ else if (mval.t[i].type == MACROARG) {
+ unsigned anum = mval.t[i].line;
+
+ if (anum >= 128) {
+ m->cval.t[l ++] = 128 | (anum >> 8);
+ m->cval.t[l ++] = anum & 0xFF;
+ } else {
+ m->cval.t[l ++] = anum;
+ }
+ }
+ }
+ if (mval.nt) freemem(mval.t);
+ }
+#endif
+ HTT_put(&macros, m, mname);
+ freemem(mname);
+ if (emit_defines) print_macro(m);
+ return 0;
+
+redef_error:
+ while (ls->ctok->type != NEWLINE && !next_token(ls));
+redef_error_2:
+ error(l, "macro '%s' redefined unidentically", HASH_ITEM_NAME(n));
+ return 1;
+warp_error:
+ while (ls->ctok->type != NEWLINE && !next_token(ls));
+define_error:
+ if (m) del_macro(m);
+ if (mname) freemem(mname);
+#ifdef LOW_MEM
+ if (mv.nt) {
+ size_t i;
+
+ for (i = 0; i < mv.nt; i ++)
+ if (S_TOKEN(mv.t[i].type)) freemem(mv.t[i].name);
+ freemem(mv.t);
+ }
+#endif
+ return 1;
+#undef mval
+}
+
+/*
+ * Get the arguments for a macro. This code is tricky because there can
+ * be multiple sources for these arguments, if we are in the middle of
+ * a macro replacement; arguments are macro-replaced before inclusion
+ * into the macro replacement.
+ *
+ * return value:
+ * 1 no argument (last token read from next_token())
+ * 2 no argument (last token read from tfi)
+ * 3 no argument (nothing read)
+ * 4 error
+ *
+ * Void arguments are allowed in C99.
+ */
+static int collect_arguments(struct lexer_state *ls, struct token_fifo *tfi,
+ int penury, struct token_fifo *atl, int narg, int vaarg, int *wr)
+{
+ int ltwws = 1, npar = 0, i;
+ struct token *ct = 0;
+ int read_from_fifo = 0;
+ long begin_line = ls->line;
+
+#define unravel(ls) (read_from_fifo = 0, !((tfi && tfi->art < tfi->nt \
+ && (read_from_fifo = 1) != 0 && (ct = tfi->t + (tfi->art ++))) \
+ || ((!tfi || penury) && !next_token(ls) && (ct = (ls)->ctok))))
+
+ /*
+ * collect_arguments() is assumed to setup correctly atl
+ * (this is not elegant, but it works)
+ */
+ for (i = 0; i < narg; i ++) atl[i].art = atl[i].nt = 0;
+ if (vaarg) atl[narg].art = atl[narg].nt = 0;
+ *wr = 0;
+ while (!unravel(ls)) {
+ if (!read_from_fifo && ct->type == NEWLINE) ls->ltwnl = 1;
+ if (ttWHI(ct->type)) {
+ *wr = 1;
+ continue;
+ }
+ if (ct->type == LPAR) {
+ npar = 1;
+ }
+ break;
+ }
+ if (!npar) {
+ if (ct == ls->ctok) return 1;
+ if (read_from_fifo) return 2;
+ return 3;
+ }
+ if (!read_from_fifo && ct == ls->ctok) ls->ltwnl = 0;
+ i = 0;
+ if ((narg + vaarg) == 0) {
+ while(!unravel(ls)) {
+ if (ttWHI(ct->type)) continue;
+ if (ct->type == RPAR) goto harvested;
+ npar = 1;
+ goto too_many_args;
+ }
+ }
+ while (!unravel(ls)) {
+ struct token t;
+
+ if (ct->type == LPAR) npar ++;
+ else if (ct->type == RPAR && (-- npar) == 0) {
+ if (atl[i].nt != 0
+ && ttMWS(atl[i].t[atl[i].nt - 1].type))
+ atl[i].nt --;
+ i ++;
+ /*
+ * C99 standard states that at least one argument
+ * should be present for the ... part; to relax
+ * this behaviour, change 'narg + vaarg' to 'narg'.
+ */
+ if (i < (narg + vaarg)) {
+ error(begin_line, "not enough arguments "
+ "to macro");
+ return 4;
+ }
+ if (i > narg) {
+ if (!(ls->flags & MACRO_VAARG) || !vaarg)
+ goto too_many_args;
+ }
+ goto harvested;
+ } else if (ct->type == COMMA && npar <= 1 && i < narg) {
+ if (atl[i].nt != 0
+ && ttMWS(atl[i].t[atl[i].nt - 1].type))
+ atl[i].nt --;
+ if (++ i == narg) {
+ if (!(ls->flags & MACRO_VAARG) || !vaarg)
+ goto too_many_args;
+ }
+ if (i > 30000) goto too_many_args;
+ ltwws = 1;
+ continue;
+ } else if (ltwws && ttWHI(ct->type)) continue;
+
+ t.type = ct->type;
+ if (!read_from_fifo) t.line = ls->line; else t.line = ct->line;
+ /*
+ * Stringification applies only to macro arguments;
+ * so we handle here OPT_NONE.
+ * OPT_NONE is kept, but does not count as whitespace,
+ * and merges with other whitespace to give a fully
+ * qualified NONE token. Two OPT_NONE tokens merge.
+ * Initial and final OPT_NONE are discarded (initial
+ * is already done, as OPT_NONE is matched by ttWHI).
+ */
+ if (ttWHI(t.type)) {
+ if (t.type != OPT_NONE) {
+ t.type = NONE;
+#ifdef SEMPER_FIDELIS
+ t.name = sdup(" ");
+ throw_away(ls->gf, t.name);
+#endif
+ ltwws = 1;
+ }
+ if (atl[i].nt > 0
+ && atl[i].t[atl[i].nt - 1].type == OPT_NONE)
+ atl[i].nt --;
+ } else {
+ ltwws = 0;
+ if (S_TOKEN(t.type)) {
+ t.name = ct->name;
+ if (ct == (ls)->ctok) {
+ t.name = sdup(t.name);
+ throw_away(ls->gf, t.name);
+ }
+ }
+ }
+ aol(atl[i].t, atl[i].nt, t, TOKEN_LIST_MEMG);
+ }
+ error(begin_line, "unfinished macro call");
+ return 4;
+too_many_args:
+ error(begin_line, "too many arguments to macro");
+ while (npar && !unravel(ls)) {
+ if (ct->type == LPAR) npar ++;
+ else if (ct->type == RPAR) npar --;
+ }
+ return 4;
+harvested:
+ if (i > 127 && (ls->flags & WARN_STANDARD))
+ warning(begin_line, "macro call with %d arguments (ISO "
+ "specifies 127 max)", i);
+ return 0;
+#undef unravel
+}
+
+/*
+ * concat_token() is called when the ## operator is used. It uses
+ * the struct lexer_state dsharp_lexer to parse the result of the
+ * concatenation.
+ *
+ * Law enforcement: if the whole string does not produce a valid
+ * single token, an error (non-zero result) is returned.
+ */
+struct lexer_state dsharp_lexer;
+
+static inline int concat_token(struct token *t1, struct token *t2)
+{
+ char *n1 = token_name(t1), *n2 = token_name(t2);
+ size_t l1 = strlen(n1), l2 = strlen(n2);
+ unsigned char *x = getmem(l1 + l2 + 1);
+ int r;
+
+ mmv(x, n1, l1);
+ mmv(x + l1, n2, l2);
+ x[l1 + l2] = 0;
+ dsharp_lexer.input = 0;
+ dsharp_lexer.input_string = x;
+ dsharp_lexer.pbuf = 0;
+ dsharp_lexer.ebuf = l1 + l2;
+ dsharp_lexer.discard = 1;
+ dsharp_lexer.flags = DEFAULT_LEXER_FLAGS;
+ dsharp_lexer.pending_token = 0;
+ r = next_token(&dsharp_lexer);
+ freemem(x);
+ return (r == 1 || dsharp_lexer.pbuf < (l1 + l2)
+ || dsharp_lexer.pending_token
+ || (dsharp_lexer.pbuf == (l1 + l2) && !dsharp_lexer.discard));
+}
+
+#ifdef PRAGMA_TOKENIZE
+/*
+ * tokenize_string() takes a string as input, and split it into tokens,
+ * reassembling the tokens into a single compressed string generated by
+ * compress_token_list(); this function is used for _Pragma processing.
+ */
+struct lexer_state tokenize_lexer;
+
+static char *tokenize_string(struct lexer_state *ls, char *buf)
+{
+ struct token_fifo tf;
+ size_t bl = strlen(buf);
+ int r;
+
+ tokenize_lexer.input = 0;
+ tokenize_lexer.input_string = (unsigned char *)buf;
+ tokenize_lexer.pbuf = 0;
+ tokenize_lexer.ebuf = bl;
+ tokenize_lexer.discard = 1;
+ tokenize_lexer.flags = ls->flags | LEXER;
+ tokenize_lexer.pending_token = 0;
+ tf.art = tf.nt = 0;
+ while (!(r = next_token(&tokenize_lexer))) {
+ struct token t, *ct = tokenize_lexer.ctok;
+
+ if (ttWHI(ct->type)) continue;
+ t = *ct;
+ if (S_TOKEN(t.type)) t.name = sdup(t.name);
+ aol(tf.t, tf.nt, t, TOKEN_LIST_MEMG);
+ }
+ if (tokenize_lexer.pbuf < bl) goto tokenize_error;
+ return (char *)((compress_token_list(&tf)).t);
+
+tokenize_error:
+ if (tf.nt) {
+ for (tf.art = 0; tf.art < tf.nt; tf.art ++)
+ if (S_TOKEN(tf.t[tf.art].type))
+ freemem(tf.t[tf.art].name);
+ freemem(tf.t);
+ }
+ return 0;
+}
+#endif
+
+/*
+ * stringify_string() has a self-explanatory name. It is called when
+ * the # operator is used in a macro and a string constant must be
+ * stringified.
+ */
+static inline char *stringify_string(char *x)
+{
+ size_t l;
+ int i, inside_str = 0, inside_cc = 0, must_quote, has_quoted = 0;
+ char *y, *d;
+
+ for (i = 0; i < 2; i ++) {
+ if (i) d[0] = '"';
+ for (l = 1, y = x; *y; y ++, l ++) {
+ must_quote = 0;
+ if (inside_cc) {
+ if (*y == '\\') {
+ must_quote = 1;
+ has_quoted = 1;
+ } else if (!has_quoted && *y == '\'')
+ inside_cc = 0;
+ } else if (inside_str) {
+ if (*y == '"' || *y == '\\') must_quote = 1;
+ if (*y == '\\') has_quoted = 1;
+ else if (!has_quoted && *y == '"')
+ inside_str = 0;
+ } else if (*y == '"') {
+ inside_str = 1;
+ must_quote = 1;
+ } else if (*y == '\'') {
+ inside_cc = 1;
+ }
+ if (must_quote) {
+ if (i) d[l] = '\\';
+ l ++;
+ }
+ if (i) d[l] = *y;
+ }
+ if (!i) d = getmem(l + 2);
+ if (i) {
+ d[l] = '"';
+ d[l + 1] = 0;
+ }
+ }
+ return d;
+}
+
+/*
+ * stringify() produces a constant string, result of the # operator
+ * on a list of tokens.
+ */
+static char *stringify(struct token_fifo *tf)
+{
+ size_t tlen;
+ size_t i;
+ char *x, *y;
+
+ for (tlen = 0, i = 0; i < tf->nt; i ++)
+ if (tf->t[i].type < CPPERR && tf->t[i].type != OPT_NONE)
+ tlen += strlen(token_name(tf->t + i));
+ if (tlen == 0) return sdup("\"\"");
+ x = getmem(tlen + 1);
+ for (tlen = 0, i = 0; i < tf->nt; i ++) {
+ if (tf->t[i].type >= CPPERR || tf->t[i].type == OPT_NONE)
+ continue;
+ strcpy(x + tlen, token_name(tf->t + i));
+ tlen += strlen(token_name(tf->t + i));
+ }
+ /* no need to add a trailing 0: strcpy() did that (and the string
+ is not empty) */
+ y = stringify_string(x);
+ freemem(x);
+ return y;
+}
+
+/*
+ * Two strings evaluated at initialization time, to handle the __TIME__
+ * and __DATE__ special macros.
+ *
+ * C99 specifies that these macros should remain constant throughout
+ * the whole preprocessing.
+ */
+char compile_time[12], compile_date[24];
+
+/*
+ * substitute_macro() performs the macro substitution. It is called when
+ * an identifier recognized as a macro name has been found; this function
+ * tries to collect the arguments (if needed), applies # and ## operators
+ * and perform recursive and nested macro expansions.
+ *
+ * In the substitution of a macro, we remove all newlines that were in the
+ * arguments. This might confuse error reporting (which could report
+ * erroneous line numbers) or have worse effect is the preprocessor is
+ * used for another language pickier than C. Since the interface between
+ * the preprocessor and the compiler is not fully specified, I believe
+ * that this is no violation of the standard. Comments welcome.
+ *
+ * We take tokens from tfi. If tfi has no more tokens to give: we may
+ * take some tokens from ls to complete a call (fetch arguments) if
+ * and only if penury is non zero.
+ */
+int substitute_macro(struct lexer_state *ls, struct macro *m,
+ struct token_fifo *tfi, int penury, int reject_nested, long l)
+{
+ char *mname = HASH_ITEM_NAME(m);
+ struct token_fifo *atl, etl;
+ struct token t, *ct;
+ int i, save_nest = m->nest;
+ size_t save_art, save_tfi, etl_limit;
+ int ltwds, ntwds, ltwws;
+ int pragma_op = 0;
+
+ /*
+ * Reject the replacement, if we are already inside the macro.
+ */
+ if (m->nest > reject_nested) {
+ t.type = NAME;
+ t.line = ls->line;
+ t.name = mname;
+ print_token(ls, &t, 0);
+ return 0;
+ }
+
+ /*
+ * put a separation from preceeding tokens
+ */
+ print_space(ls);
+
+ /*
+ * Check if the macro is a special one.
+ */
+ if ((i = check_special_macro(mname)) >= MAC_SPECIAL) {
+ /* we have a special macro */
+ switch (i) {
+ char buf[30], *bbuf, *cfn;
+
+ case MAC_LINE:
+ t.type = NUMBER;
+ t.line = l;
+ sprintf(buf, "%ld", l);
+ t.name = buf;
+ print_space(ls);
+ print_token(ls, &t, 0);
+ break;
+ case MAC_FILE:
+ t.type = STRING;
+ t.line = l;
+ cfn = current_long_filename ?
+ current_long_filename : current_filename;
+ bbuf = getmem(2 * strlen(cfn) + 3);
+ {
+ char *c, *d;
+ int lcwb = 0;
+
+ bbuf[0] = '"';
+ for (c = cfn, d = bbuf + 1; *c; c ++) {
+ if (*c == '\\') {
+ if (lcwb) continue;
+ *(d ++) = '\\';
+ lcwb = 1;
+ } else lcwb = 0;
+ *(d ++) = *c;
+ }
+ *(d ++) = '"';
+ *(d ++) = 0;
+ }
+ t.name = bbuf;
+ print_space(ls);
+ print_token(ls, &t, 0);
+ freemem(bbuf);
+ break;
+ case MAC_DATE:
+ t.type = STRING;
+ t.line = l;
+ t.name = compile_date;
+ print_space(ls);
+ print_token(ls, &t, 0);
+ break;
+ case MAC_TIME:
+ t.type = STRING;
+ t.line = l;
+ t.name = compile_time;
+ print_space(ls);
+ print_token(ls, &t, 0);
+ break;
+ case MAC_STDC:
+ t.type = NUMBER;
+ t.line = l;
+ t.name = "1";
+ print_space(ls);
+ print_token(ls, &t, 0);
+ break;
+ case MAC_PRAGMA:
+ if (reject_nested > 0) {
+ /* do not replace _Pragma() unless toplevel */
+ t.type = NAME;
+ t.line = ls->line;
+ t.name = mname;
+ print_token(ls, &t, 0);
+ return 0;
+ }
+ pragma_op = 1;
+ goto collect_args;
+#ifdef AUDIT
+ default:
+ ouch("unbekanntes fliegendes macro");
+#endif
+ }
+ return 0;
+ }
+
+ /*
+ * If the macro has arguments, collect them.
+ */
+collect_args:
+ if (m->narg >= 0) {
+ unsigned long save_flags = ls->flags;
+ int wr = 0;
+
+ ls->flags |= LEXER;
+ if (m->narg > 0 || m->vaarg)
+ atl = getmem((m->narg + m->vaarg)
+ * sizeof(struct token_fifo));
+ switch (collect_arguments(ls, tfi, penury, atl,
+ m->narg, m->vaarg, &wr)) {
+ case 1:
+ /* the macro expected arguments, but we did not
+ find any; the last read token should be read
+ again. */
+ ls->flags = save_flags | READ_AGAIN;
+ goto no_argument_next;
+ case 2:
+ tfi->art --;
+ /* fall through */
+ case 3:
+ ls->flags = save_flags;
+ no_argument_next:
+ t.type = NAME;
+ t.line = l;
+ t.name = mname;
+ print_token_nailed(ls, &t, l);
+ if (wr) {
+ t.type = NONE;
+ t.line = l;
+#ifdef SEMPER_FIDELIS
+ t.name = " ";
+#endif
+ print_token(ls, &t, 0);
+ goto exit_macro_2;
+ }
+ goto exit_macro_1;
+ case 4:
+ ls->flags = save_flags;
+ goto exit_error_1;
+ }
+ ls->flags = save_flags;
+ }
+
+ /*
+ * If the macro is _Pragma, and we got here, then we have
+ * exactly one argument. We check it, unstringize it, and
+ * emit a PRAGMA token.
+ */
+ if (pragma_op) {
+ char *pn;
+
+ if (atl[0].nt != 1 || atl[0].t[0].type != STRING) {
+ error(ls->line, "invalid argument to _Pragma");
+ if (atl[0].nt) freemem(atl[0].t);
+ freemem(atl);
+ goto exit_error;
+ }
+ pn = atl[0].t[0].name;
+ if ((pn[0] == '"' && pn[1] == '"') || (pn[0] == 'L'
+ && pn[1] == '"' && pn[2] == '"')) {
+ /* void pragma -- just ignore it */
+ freemem(atl[0].t);
+ freemem(atl);
+ return 0;
+ }
+ if (ls->flags & TEXT_OUTPUT) {
+#ifdef PRAGMA_DUMP
+ /*
+ * This code works because we actually evaluate arguments in a
+ * lazy way: we scan a macro argument only if it appears in the
+ * output, and exactly as many times as it appears. Therefore,
+ * _Pragma() will get evaluated just like they should.
+ */
+ char *c = atl[0].t[0].name, *d;
+
+ for (d = "\n#pragma "; *d; d ++) put_char(ls, *d);
+ d = (*c == 'L') ? c + 2 : c + 1;
+ for (; *d != '"'; d ++) {
+ if (*d == '\\' && (*(d + 1) == '\\'
+ || *(d + 1) == '"')) {
+ d ++;
+ }
+ put_char(ls, *d);
+ }
+ put_char(ls, '\n');
+ ls->oline = ls->line;
+ enter_file(ls, ls->flags);
+#else
+ if (ls->flags & WARN_PRAGMA)
+ warning(ls->line,
+ "_Pragma() ignored and not dumped");
+#endif
+ } else if (ls->flags & HANDLE_PRAGMA) {
+ char *c = atl[0].t[0].name, *d, *buf;
+ struct token t;
+
+ /* a wide string is a string */
+ if (*c == 'L') c ++;
+ c ++;
+ for (buf = d = getmem(strlen(c)); *c != '"'; c ++) {
+ if (*c == '\\' && (*(c + 1) == '\\'
+ || *(c + 1) == '"')) {
+ *(d ++) = *(++ c);
+ } else *(d ++) = *c;
+ }
+ *d = 0;
+ t.type = PRAGMA;
+ t.line = ls->line;
+#ifdef PRAGMA_TOKENIZE
+ t.name = tokenize_string(ls, buf);
+ freemem(buf);
+ buf = t.name;
+ if (!buf) {
+ freemem(atl[0].t);
+ freemem(atl);
+ goto exit_error;
+ }
+#else
+ t.name = buf;
+#endif
+ aol(ls->toplevel_of->t, ls->toplevel_of->nt,
+ t, TOKEN_LIST_MEMG);
+ throw_away(ls->gf, buf);
+ }
+ freemem(atl[0].t);
+ freemem(atl);
+ return 0;
+ }
+
+ /*
+ * Now we expand and replace the arguments in the macro; we
+ * also handle '#' and '##'. If we find an argument, that has
+ * to be replaced, we expand it in its own token list, then paste
+ * it. Tricky point: when we paste an argument, we must scan
+ * again the resulting list for further replacements. This
+ * implies problems with regards to nesting self-referencing
+ * macros.
+ *
+ * We do then YAUH (yet another ugly hack): if a macro is replaced,
+ * and nested replacement exhibit the same macro, we mark it with
+ * a negative line number. All produced negative line numbers
+ * must be cleaned in the end.
+ */
+
+#define ZAP_LINE(t) do { \
+ if ((t).type == NAME) { \
+ struct macro *zlm = HTT_get(&macros, (t).name); \
+ if (zlm && zlm->nest > reject_nested) \
+ (t).line = -1 - (t).line; \
+ } \
+ } while (0)
+
+#ifdef LOW_MEM
+ save_art = m->cval.rp;
+ m->cval.rp = 0;
+#else
+ save_art = m->val.art;
+ m->val.art = 0;
+#endif
+ etl.art = etl.nt = 0;
+ m->nest = reject_nested + 1;
+ ltwds = ntwds = 0;
+#ifdef LOW_MEM
+ while (m->cval.rp < m->cval.length) {
+#else
+ while (m->val.art < m->val.nt) {
+#endif
+ size_t next, z;
+#ifdef LOW_MEM
+ struct token uu;
+
+ ct = &uu;
+ ct->line = 1;
+ t.type = ct->type = m->cval.t[m->cval.rp ++];
+ if (ct->type == MACROARG) {
+ unsigned anum = m->cval.t[m->cval.rp ++];
+
+ if (anum >= 128U) anum = ((anum & 127U) << 8)
+ | (unsigned)m->cval.t[m->cval.rp ++];
+ ct->line = anum;
+ } else if (S_TOKEN(ct->type)) {
+ t.name = ct->name = (char *)(m->cval.t + m->cval.rp);
+ m->cval.rp += 1 + strlen(ct->name);
+ }
+#ifdef SEMPER_FIDELIS
+ else if (ct->type == OPT_NONE) {
+ t.type = ct->type = NONE;
+ t.name = ct->name = " ";
+ }
+#endif
+ t.line = ls->line;
+ next = m->cval.rp;
+ if ((next < m->cval.length && (m->cval.t[z = next] == DSHARP
+ || m->cval.t[z = next] == DIG_DSHARP))
+ || ((next + 1) < m->cval.length
+ && ttWHI(m->cval.t[next])
+ && (m->cval.t[z = next + 1] == DSHARP
+ || m->cval.t[z = next + 1] == DIG_DSHARP))) {
+ ntwds = 1;
+ m->cval.rp = z;
+ } else ntwds = 0;
+#else
+ ct = m->val.t + (m->val.art ++);
+ next = m->val.art;
+ t.type = ct->type;
+ t.line = ls->line;
+#ifdef SEMPER_FIDELIS
+ if (t.type == OPT_NONE) {
+ t.type = NONE;
+ t.name = " ";
+ } else
+#endif
+ t.name = ct->name;
+ if ((next < m->val.nt && (m->val.t[z = next].type == DSHARP
+ || m->val.t[z = next].type == DIG_DSHARP))
+ || ((next + 1) < m->val.nt
+ && ttWHI(m->val.t[next].type)
+ && (m->val.t[z = next + 1].type == DSHARP
+ || m->val.t[z = next + 1].type == DIG_DSHARP))) {
+ ntwds = 1;
+ m->val.art = z;
+ } else ntwds = 0;
+#endif
+ if (ct->type == MACROARG) {
+#ifdef DSHARP_TOKEN_MERGE
+ int need_opt_space = 1;
+#endif
+ z = ct->line; /* the argument number is there */
+ if (ltwds && atl[z].nt != 0 && etl.nt) {
+ if (concat_token(etl.t + (-- etl.nt),
+ atl[z].t)) {
+ warning(ls->line, "operator '##' "
+ "produced the invalid token "
+ "'%s%s'",
+ token_name(etl.t + etl.nt),
+ token_name(atl[z].t));
+#if 0
+/* obsolete */
+#ifdef LOW_MEM
+ m->cval.rp = save_art;
+#else
+ m->val.art = save_art;
+#endif
+ etl.nt ++;
+ goto exit_error_2;
+#endif
+ etl.nt ++;
+ atl[z].art = 0;
+#ifdef DSHARP_TOKEN_MERGE
+ need_opt_space = 0;
+#endif
+ } else {
+ if (etl.nt == 0) freemem(etl.t);
+ else if (!ttWHI(etl.t[etl.nt - 1]
+ .type)) {
+ t.type = OPT_NONE;
+ t.line = ls->line;
+ aol(etl.t, etl.nt, t,
+ TOKEN_LIST_MEMG);
+ }
+ t.type = dsharp_lexer.ctok->type;
+ t.line = ls->line;
+ if (S_TOKEN(t.type)) {
+ t.name = sdup(dsharp_lexer
+ .ctok->name);
+ throw_away(ls->gf, t.name);
+ }
+ ZAP_LINE(t);
+ aol(etl.t, etl.nt, t, TOKEN_LIST_MEMG);
+ atl[z].art = 1;
+ }
+ } else atl[z].art = 0;
+ if (
+#ifdef DSHARP_TOKEN_MERGE
+ need_opt_space &&
+#endif
+ atl[z].art < atl[z].nt && (!etl.nt
+ || !ttWHI(etl.t[etl.nt - 1].type))) {
+ t.type = OPT_NONE;
+ t.line = ls->line;
+ aol(etl.t, etl.nt, t, TOKEN_LIST_MEMG);
+ }
+ if (ltwds || ntwds) {
+ while (atl[z].art < atl[z].nt) {
+ t = atl[z].t[atl[z].art ++];
+ t.line = ls->line;
+ ZAP_LINE(t);
+ aol(etl.t, etl.nt, t, TOKEN_LIST_MEMG);
+ }
+ } else {
+ struct token_fifo *save_tf;
+ unsigned long save_flags;
+ int ret = 0;
+
+ atl[z].art = 0;
+ save_tf = ls->output_fifo;
+ ls->output_fifo = &etl;
+ save_flags = ls->flags;
+ ls->flags |= LEXER;
+ while (atl[z].art < atl[z].nt) {
+ struct macro *nm;
+ struct token *cct;
+
+ cct = atl[z].t + (atl[z].art ++);
+ if (cct->type == NAME
+ && cct->line >= 0
+ && (nm = HTT_get(&macros,
+ cct->name))
+ && nm->nest <=
+ (reject_nested + 1)) {
+ ret |= substitute_macro(ls,
+ nm, atl + z, 0,
+ reject_nested + 1, l);
+ continue;
+ }
+ t = *cct;
+ ZAP_LINE(t);
+ aol(etl.t, etl.nt, t, TOKEN_LIST_MEMG);
+ }
+ ls->output_fifo = save_tf;
+ ls->flags = save_flags;
+ if (ret) {
+#ifdef LOW_MEM
+ m->cval.rp = save_art;
+#else
+ m->val.art = save_art;
+#endif
+ goto exit_error_2;
+ }
+ }
+ if (!ntwds && (!etl.nt
+ || !ttWHI(etl.t[etl.nt - 1].type))) {
+ t.type = OPT_NONE;
+ t.line = ls->line;
+ aol(etl.t, etl.nt, t, TOKEN_LIST_MEMG);
+ }
+ ltwds = 0;
+ continue;
+ }
+ /*
+ * This code is definitely cursed.
+ *
+ * For the extremely brave reader who tries to understand
+ * what is happening: ltwds is a flag meaning "last token
+ * was double-sharp" and ntwds means "next token will be
+ * double-sharp". The tokens are from the macro definition,
+ * and scanned from left to right. Arguments that are
+ * not implied into a #/## construction are macro-expanded
+ * seperately, then included into the token stream.
+ */
+ if (ct->type == DSHARP || ct->type == DIG_DSHARP) {
+ if (ltwds) {
+ error(ls->line, "quad sharp");
+#ifdef LOW_MEM
+ m->cval.rp = save_art;
+#else
+ m->val.art = save_art;
+#endif
+ goto exit_error_2;
+ }
+#ifdef LOW_MEM
+ if (m->cval.rp < m->cval.length
+ && ttMWS(m->cval.t[m->cval.rp]))
+ m->cval.rp ++;
+#else
+ if (m->val.art < m->val.nt
+ && ttMWS(m->val.t[m->val.art].type))
+ m->val.art ++;
+#endif
+ ltwds = 1;
+ continue;
+ } else if (ltwds && etl.nt != 0) {
+ if (concat_token(etl.t + (-- etl.nt), ct)) {
+ warning(ls->line, "operator '##' produced "
+ "the invalid token '%s%s'",
+ token_name(etl.t + etl.nt),
+ token_name(ct));
+#if 0
+/* obsolete */
+#ifdef LOW_MEM
+ m->cval.rp = save_art;
+#else
+ m->val.art = save_art;
+#endif
+ etl.nt ++;
+ goto exit_error_2;
+#endif
+ etl.nt ++;
+ } else {
+ if (etl.nt == 0) freemem(etl.t);
+ t.type = dsharp_lexer.ctok->type;
+ t.line = ls->line;
+ if (S_TOKEN(t.type)) {
+ t.name = sdup(dsharp_lexer.ctok->name);
+ throw_away(ls->gf, t.name);
+ }
+ ct = &t;
+ }
+ }
+ ltwds = 0;
+#ifdef LOW_MEM
+ if ((ct->type == SHARP || ct->type == DIG_SHARP)
+ && next < m->cval.length
+ && (m->cval.t[next] == MACROARG
+ || (ttMWS(m->cval.t[next])
+ && (next + 1) < m->cval.length
+ && m->cval.t[next + 1] == MACROARG))) {
+
+ unsigned anum;
+#else
+ if ((ct->type == SHARP || ct->type == DIG_SHARP)
+ && next < m->val.nt
+ && (m->val.t[next].type == MACROARG
+ || (ttMWS(m->val.t[next].type)
+ && (next + 1) < m->val.nt
+ && m->val.t[next + 1].type == MACROARG))) {
+#endif
+ /*
+ * We have a # operator followed by (an optional
+ * whitespace and) a macro argument; this means
+ * stringification. So be it.
+ */
+#ifdef LOW_MEM
+ if (ttMWS(m->cval.t[next])) m->cval.rp ++;
+#else
+ if (ttMWS(m->val.t[next].type)) m->val.art ++;
+#endif
+ t.type = STRING;
+#ifdef LOW_MEM
+ anum = m->cval.t[++ m->cval.rp];
+ if (anum >= 128U) anum = ((anum & 127U) << 8)
+ | (unsigned)m->cval.t[++ m->cval.rp];
+ t.name = stringify(atl + anum);
+ m->cval.rp ++;
+#else
+ t.name = stringify(atl +
+ (size_t)(m->val.t[m->val.art ++].line));
+#endif
+ throw_away(ls->gf, t.name);
+ ct = &t;
+ /*
+ * There is no need for extra spaces here.
+ */
+ }
+ t = *ct;
+ ZAP_LINE(t);
+ aol(etl.t, etl.nt, t, TOKEN_LIST_MEMG);
+ }
+#ifdef LOW_MEM
+ m->cval.rp = save_art;
+#else
+ m->val.art = save_art;
+#endif
+
+ /*
+ * Now etl contains the expanded macro, to be parsed again for
+ * further expansions -- much easier, since '#' and '##' have
+ * already been handled.
+ * However, we might need some input from tfi. So, we paste
+ * the contents of tfi after etl, and we put back what was
+ * not used.
+ *
+ * Some adjacent spaces are merged; only unique NONE, or sequences
+ * OPT_NONE NONE are emitted.
+ */
+ etl_limit = etl.nt;
+ if (tfi) {
+ save_tfi = tfi->art;
+ while (tfi->art < tfi->nt) aol(etl.t, etl.nt,
+ tfi->t[tfi->art ++], TOKEN_LIST_MEMG);
+ }
+ ltwws = 0;
+ while (etl.art < etl_limit) {
+ struct macro *nm;
+
+ ct = etl.t + (etl.art ++);
+ if (ct->type == NAME && ct->line >= 0
+ && (nm = HTT_get(&macros, ct->name))) {
+ if (substitute_macro(ls, nm, &etl,
+ penury, reject_nested, l)) {
+ m->nest = save_nest;
+ goto exit_error_2;
+ }
+ ltwws = 0;
+ continue;
+ }
+ if (ttMWS(ct->type)) {
+ if (ltwws == 1) {
+ if (ct->type == OPT_NONE) continue;
+ ltwws = 2;
+ } else if (ltwws == 2) continue;
+ else if (ct->type == OPT_NONE) ltwws = 1;
+ else ltwws = 2;
+ } else ltwws = 0;
+ if (ct->line >= 0) ct->line = l;
+ print_token(ls, ct, reject_nested ? 0 : l);
+ }
+ if (etl.nt) freemem(etl.t);
+ if (tfi) {
+ tfi->art = save_tfi + (etl.art - etl_limit);
+ }
+
+exit_macro_1:
+ print_space(ls);
+exit_macro_2:
+ for (i = 0; i < (m->narg + m->vaarg); i ++)
+ if (atl[i].nt) freemem(atl[i].t);
+ if (m->narg > 0 || m->vaarg) freemem(atl);
+ m->nest = save_nest;
+ return 0;
+
+exit_error_2:
+ if (etl.nt) freemem(etl.t);
+exit_error_1:
+ for (i = 0; i < (m->narg + m->vaarg); i ++)
+ if (atl[i].nt) freemem(atl[i].t);
+ if (m->narg > 0 || m->vaarg) freemem(atl);
+ m->nest = save_nest;
+exit_error:
+ return 1;
+}
+
+/*
+ * print already defined macros
+ */
+void print_defines(void)
+{
+ HTT_scan(&macros, print_macro);
+}
+
+/*
+ * define_macro() defines a new macro, whom definition is given in
+ * the command-line syntax: macro=def
+ * The '=def' part is optional.
+ *
+ * It returns non-zero on error.
+ */
+int define_macro(struct lexer_state *ls, char *def)
+{
+ char *c = sdup(def), *d;
+ int with_def = 0;
+ int ret = 0;
+
+ for (d = c; *d && *d != '='; d ++);
+ if (*d) {
+ *d = ' ';
+ with_def = 1;
+ }
+ if (with_def) {
+ struct lexer_state lls;
+ size_t n = strlen(c) + 1;
+
+ if (c == d) {
+ error(-1, "void macro name");
+ ret = 1;
+ } else {
+ *(c + n - 1) = '\n';
+ init_buf_lexer_state(&lls, 0);
+ lls.flags = ls->flags | LEXER;
+ lls.input = 0;
+ lls.input_string = (unsigned char *)c;
+ lls.pbuf = 0;
+ lls.ebuf = n;
+ lls.line = -1;
+ ret = handle_define(&lls);
+ free_lexer_state(&lls);
+ }
+ } else {
+ struct macro *m;
+
+ if (!*c) {
+ error(-1, "void macro name");
+ ret = 1;
+ } else if ((m = HTT_get(&macros, c))
+#ifdef LOW_MEM
+ && (m->cval.length != 3
+ || m->cval.t[0] != NUMBER
+ || strcmp((char *)(m->cval.t + 1), "1"))) {
+#else
+ && (m->val.nt != 1
+ || m->val.t[0].type != NUMBER
+ || strcmp(m->val.t[0].name, "1"))) {
+#endif
+ error(-1, "macro %s already defined", c);
+ ret = 1;
+ } else {
+#ifndef LOW_MEM
+ struct token t;
+#endif
+
+ m = new_macro();
+#ifdef LOW_MEM
+ m->cval.length = 3;
+ m->cval.t = getmem(3);
+ m->cval.t[0] = NUMBER;
+ m->cval.t[1] = '1';
+ m->cval.t[2] = 0;
+#else
+ t.type = NUMBER;
+ t.name = sdup("1");
+ aol(m->val.t, m->val.nt, t, TOKEN_LIST_MEMG);
+#endif
+ HTT_put(&macros, m, c);
+ }
+ }
+ freemem(c);
+ return ret;
+}
+
+/*
+ * undef_macro() undefines the macro whom name is given as "def";
+ * it is not an error to try to undef a macro that does not exist.
+ *
+ * It returns non-zero on error (undefinition of a special macro,
+ * void macro name).
+ */
+int undef_macro(struct lexer_state *ls, char *def)
+{
+ char *c = def;
+
+ if (!*c) {
+ error(-1, "void macro name");
+ return 1;
+ }
+ if (HTT_get(&macros, c)) {
+ if (check_special_macro(c)) {
+ error(-1, "trying to undef special macro %s", c);
+ return 1;
+ } else HTT_del(&macros, c);
+ }
+ return 0;
+}
+
+/*
+ * We saw a #ifdef directive. Parse the line.
+ * return value: 1 if the macro is defined, 0 if it is not, -1 on error
+ */
+int handle_ifdef(struct lexer_state *ls)
+{
+ while (!next_token(ls)) {
+ int tgd = 1;
+
+ if (ls->ctok->type == NEWLINE) break;
+ if (ttMWS(ls->ctok->type)) continue;
+ if (ls->ctok->type == NAME) {
+ int x = (HTT_get(&macros, ls->ctok->name) != 0);
+ while (!next_token(ls) && ls->ctok->type != NEWLINE)
+ if (tgd && !ttWHI(ls->ctok->type)
+ && (ls->flags & WARN_STANDARD)) {
+ warning(ls->line, "trailing garbage "
+ "in #ifdef");
+ tgd = 0;
+ }
+ return x;
+ }
+ error(ls->line, "illegal macro name for #ifdef");
+ while (!next_token(ls) && ls->ctok->type != NEWLINE)
+ if (tgd && !ttWHI(ls->ctok->type)
+ && (ls->flags & WARN_STANDARD)) {
+ warning(ls->line, "trailing garbage in "
+ "#ifdef");
+ tgd = 0;
+ }
+ return -1;
+ }
+ error(ls->line, "unfinished #ifdef");
+ return -1;
+}
+
+/*
+ * for #undef
+ * return value: 1 on error, 0 on success. Undefining a macro that was
+ * already not defined is not an error.
+ */
+int handle_undef(struct lexer_state *ls)
+{
+ while (!next_token(ls)) {
+ if (ls->ctok->type == NEWLINE) break;
+ if (ttMWS(ls->ctok->type)) continue;
+ if (ls->ctok->type == NAME) {
+ struct macro *m = HTT_get(&macros, ls->ctok->name);
+ int tgd = 1;
+
+ if (m != 0) {
+ if (check_special_macro(ls->ctok->name)) {
+ error(ls->line, "trying to undef "
+ "special macro %s",
+ ls->ctok->name);
+ goto undef_error;
+ }
+ if (emit_defines)
+ fprintf(emit_output, "#undef %s\n",
+ ls->ctok->name);
+ HTT_del(&macros, ls->ctok->name);
+ }
+ while (!next_token(ls) && ls->ctok->type != NEWLINE)
+ if (tgd && !ttWHI(ls->ctok->type)
+ && (ls->flags & WARN_STANDARD)) {
+ warning(ls->line, "trailing garbage "
+ "in #undef");
+ tgd = 0;
+ }
+ return 0;
+ }
+ error(ls->line, "illegal macro name for #undef");
+ undef_error:
+ while (!next_token(ls) && ls->ctok->type != NEWLINE);
+ return 1;
+ }
+ error(ls->line, "unfinished #undef");
+ return 1;
+}
+
+/*
+ * for #ifndef
+ * return value: 0 if the macro is defined, 1 if it is not, -1 on error.
+ */
+int handle_ifndef(struct lexer_state *ls)
+{
+ while (!next_token(ls)) {
+ int tgd = 1;
+
+ if (ls->ctok->type == NEWLINE) break;
+ if (ttMWS(ls->ctok->type)) continue;
+ if (ls->ctok->type == NAME) {
+ int x = (HTT_get(&macros, ls->ctok->name) == 0);
+
+ while (!next_token(ls) && ls->ctok->type != NEWLINE)
+ if (tgd && !ttWHI(ls->ctok->type)
+ && (ls->flags & WARN_STANDARD)) {
+ warning(ls->line, "trailing garbage "
+ "in #ifndef");
+ tgd = 0;
+ }
+ if (protect_detect.state == 1) {
+ protect_detect.state = 2;
+ protect_detect.macro = sdup(ls->ctok->name);
+ }
+ return x;
+ }
+ error(ls->line, "illegal macro name for #ifndef");
+ while (!next_token(ls) && ls->ctok->type != NEWLINE)
+ if (tgd && !ttWHI(ls->ctok->type)
+ && (ls->flags & WARN_STANDARD)) {
+ warning(ls->line, "trailing garbage in "
+ "#ifndef");
+ tgd = 0;
+ }
+ return -1;
+ }
+ error(ls->line, "unfinished #ifndef");
+ return -1;
+}
+
+/*
+ * erase the macro table.
+ */
+void wipe_macros(void)
+{
+ if (macros_init_done) HTT_kill(&macros);
+ macros_init_done = 0;
+}
+
+/*
+ * initialize the macro table
+ */
+void init_macros(void)
+{
+ wipe_macros();
+ HTT_init(&macros, del_macro);
+ macros_init_done = 1;
+ if (!no_special_macros) add_special_macros();
+}
+
+/*
+ * find a macro from its name
+ */
+struct macro *get_macro(char *name)
+{
+ return HTT_get(&macros, name);
+}
diff --git a/libexec/auxcpp/mem.c b/libexec/auxcpp/mem.c
new file mode 100644
index 00000000000..dabde952e32
--- /dev/null
+++ b/libexec/auxcpp/mem.c
@@ -0,0 +1,328 @@
+/*
+ * Memory manipulation routines
+ * (c) Thomas Pornin 1998 - 2002
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 4. The name of the authors may not be used to endorse or promote
+ * products derived from this software without specific prior written
+ * permission.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
+ * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+ * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include "mem.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+/*
+ * Shifting a pointer of that some bytes is supposed to satisfy
+ * alignment requirements. This is *not* guaranteed by the standard
+ * but should work everywhere anyway.
+ */
+#define ALIGNSHIFT (sizeof(long) > sizeof(long double) \
+ ? sizeof(long) : sizeof(long double))
+
+#ifdef AUDIT
+void die(void)
+{
+ abort();
+}
+
+static void suicide(unsigned long e)
+{
+ fprintf(stderr, "ouch: Schrodinger's beef is not dead ! %lx\n", e);
+ die();
+}
+#else
+void die(void)
+{
+ exit(EXIT_FAILURE);
+}
+#endif
+
+#if defined AUDIT || defined MEM_CHECK || defined MEM_DEBUG
+/*
+ * This function is equivalent to a malloc(), but will display an error
+ * message and exit if the wanted memory is not available
+ */
+#ifdef MEM_DEBUG
+static void *getmem_raw(size_t x)
+#else
+void *(getmem)(size_t x)
+#endif
+{
+ void *m;
+
+#ifdef AUDIT
+ m = malloc(x + ALIGNSHIFT);
+#else
+ m = malloc(x);
+#endif
+ if (m == 0) {
+ fprintf(stderr, "ouch: malloc() failed\n");
+ die();
+ }
+#ifdef AUDIT
+ *((unsigned long *)m) = 0xdeadbeefUL;
+ return (void *)(((char *)m) + ALIGNSHIFT);
+#else
+ return m;
+#endif
+}
+#endif
+
+#ifndef MEM_DEBUG
+/*
+ * This function is equivalent to a realloc(); if the realloc() call
+ * fails, it will try a malloc() and a memcpy(). If not enough memory is
+ * available, the program exits with an error message
+ */
+void *(incmem)(void *m, size_t x, size_t nx)
+{
+ void *nm;
+
+#ifdef AUDIT
+ m = (void *)(((char *)m) - ALIGNSHIFT);
+ if (*((unsigned long *)m) != 0xdeadbeefUL)
+ suicide(*((unsigned long *)m));
+ x += ALIGNSHIFT; nx += ALIGNSHIFT;
+#endif
+ if (!(nm = realloc(m, nx))) {
+ if (x > nx) x = nx;
+ nm = (getmem)(nx);
+ memcpy(nm, m, x);
+ /* free() and not freemem(), because of the Schrodinger beef */
+ free(m);
+ }
+#ifdef AUDIT
+ return (void *)(((char *)nm) + ALIGNSHIFT);
+#else
+ return nm;
+#endif
+}
+#endif
+
+#if defined AUDIT || defined MEM_DEBUG
+/*
+ * This function frees the given block
+ */
+#ifdef MEM_DEBUG
+static void freemem_raw(void *x)
+#else
+void (freemem)(void *x)
+#endif
+{
+#ifdef AUDIT
+ void *y = (void *)(((char *)x) - ALIGNSHIFT);
+
+ if ((*((unsigned long *)y)) != 0xdeadbeefUL)
+ suicide(*((unsigned long *)y));
+ *((unsigned long *)y) = 0xfeedbabeUL;
+ free(y);
+#else
+ free(x);
+#endif
+}
+#endif
+
+#ifdef AUDIT
+/*
+ * This function copies n bytes from src to dest
+ */
+void *mmv(void *dest, void *src, size_t n)
+{
+ return memcpy(dest, src, n);
+}
+
+/*
+ * This function copies n bytes from src to dest
+ */
+void *mmvwo(void *dest, void *src, size_t n)
+{
+ return memmove(dest, src, n);
+}
+#endif
+
+#ifndef MEM_DEBUG
+/*
+ * This function creates a new char * and fills it with a copy of src
+ */
+char *(sdup)(char *src)
+{
+ size_t n = 1 + strlen(src);
+ char *x = getmem(n);
+
+ mmv(x, src, n);
+ return x;
+}
+#endif
+
+#ifdef MEM_DEBUG
+/*
+ * We include here special versions of getmem(), freemem() and incmem()
+ * that track allocations and are used to detect memory leaks.
+ *
+ * Each allocation is referenced in a list, with a serial number.
+ */
+
+/*
+ * Define "true" functions for applications that need pointers
+ * to such functions.
+ */
+void *(getmem)(size_t n)
+{
+ return getmem(n);
+}
+
+void (freemem)(void *x)
+{
+ freemem(x);
+}
+
+void *(incmem)(void *x, size_t s, size_t ns)
+{
+ return incmem(x, s, ns);
+}
+
+char *(sdup)(char *s)
+{
+ return sdup(s);
+}
+
+static long current_serial = 0L;
+
+/* must be a power of two */
+#define MEMDEBUG_MEMG 128U
+
+static struct mem_track {
+ void *block;
+ long serial;
+ char *file;
+ int line;
+} *mem = 0;
+
+static size_t meml = 0;
+
+static unsigned int current_ptr = 0;
+
+static void *true_incmem(void *x, size_t old_size, size_t new_size)
+{
+ void * y = realloc(x, new_size);
+
+ if (y == 0) {
+ y = malloc(new_size);
+ if (y == 0) {
+ fprintf(stderr, "ouch: malloc() failed\n");
+ die();
+ }
+ mmv(y, x, old_size < new_size ? old_size : new_size);
+ free(x);
+ }
+ return y;
+}
+
+static long find_free_block(void)
+{
+ unsigned int n;
+ size_t i;
+
+ for (i = 0, n = current_ptr; i < meml; i ++) {
+ if (mem[n].block == 0) {
+ current_ptr = n;
+ return n;
+ }
+ n = (n + 1) & (meml - 1U);
+ }
+ if (meml == 0) {
+ size_t j;
+
+ meml = MEMDEBUG_MEMG;
+ mem = malloc(meml * sizeof(struct mem_track));
+ current_ptr = 0;
+ for (j = 0; j < meml ; j ++) mem[j].block = 0;
+ } else {
+ size_t j;
+
+ mem = true_incmem(mem, meml * sizeof(struct mem_track),
+ 2 * meml * sizeof(struct mem_track));
+ current_ptr = meml;
+ for (j = meml; j < 2 * meml ; j ++) mem[j].block = 0;
+ meml *= 2;
+ }
+ return current_ptr;
+}
+
+void *getmem_debug(size_t n, char *file, int line)
+{
+ void *x = getmem_raw(n + ALIGNSHIFT);
+ long i = find_free_block();
+
+ *(long *)x = i;
+ mem[i].block = x;
+ mem[i].serial = current_serial ++;
+ mem[i].file = file;
+ mem[i].line = line;
+ return (void *)((unsigned char *)x + ALIGNSHIFT);
+}
+
+void freemem_debug(void *x, char *file, int line)
+{
+ void *y = (unsigned char *)x - ALIGNSHIFT;
+ long i = *(long *)y;
+
+ if (i < 0 || (size_t)i >= meml || mem[i].block != y) {
+ fprintf(stderr, "ouch: freeing free people (from %s:%d)\n",
+ file, line);
+ die();
+ }
+ mem[i].block = 0;
+ freemem_raw(y);
+}
+
+void *incmem_debug(void *x, size_t ol, size_t nl, char *file, int line)
+{
+ void *y = getmem_debug(nl, file, line);
+ mmv(y, x, ol < nl ? ol : nl);
+ freemem_debug(x, file, line);
+ return y;
+}
+
+char *sdup_debug(char *src, char *file, int line)
+{
+ size_t n = 1 + strlen(src);
+ char *x = getmem_debug(n, file, line);
+
+ mmv(x, src, n);
+ return x;
+}
+
+void report_leaks(void)
+{
+ size_t i;
+
+ for (i = 0; i < meml; i ++) {
+ if (mem[i].block) fprintf(stderr, "leak: serial %ld, %s:%d\n",
+ mem[i].serial, mem[i].file, mem[i].line);
+ }
+}
+
+#endif
diff --git a/libexec/auxcpp/mem.h b/libexec/auxcpp/mem.h
new file mode 100644
index 00000000000..4403c2fc476
--- /dev/null
+++ b/libexec/auxcpp/mem.h
@@ -0,0 +1,155 @@
+/*
+ * (c) Thomas Pornin 1998 - 2002
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 4. The name of the authors may not be used to endorse or promote
+ * products derived from this software without specific prior written
+ * permission.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
+ * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+ * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#ifndef UCPP__MEM__
+#define UCPP__MEM__
+
+#include <stdlib.h>
+
+void die(void);
+
+#if defined AUDIT || defined MEM_CHECK || defined MEM_DEBUG
+void *getmem(size_t);
+#else
+#define getmem malloc
+#endif
+
+#if defined MEM_DEBUG
+void *getmem_debug(size_t, char *, int);
+#undef getmem
+#define getmem(x) getmem_debug(x, __FILE__, __LINE__)
+#endif
+
+#if defined AUDIT || defined MEM_DEBUG
+void freemem(void *);
+#else
+#define freemem free
+#endif
+
+#if defined MEM_DEBUG
+void freemem_debug(void *, char *, int);
+#undef freemem
+#define freemem(x) freemem_debug(x, __FILE__, __LINE__)
+#endif
+
+void *incmem(void *, size_t, size_t);
+char *sdup(char *);
+
+#if defined MEM_DEBUG
+void *incmem_debug(void *, size_t, size_t, char *, int);
+#undef incmem
+#define incmem(x, y, z) incmem_debug(x, y, z, __FILE__, __LINE__)
+void report_leaks(void);
+char *sdup_debug(char *, char *, int);
+#define sdup(x) sdup_debug(x, __FILE__, __LINE__)
+#endif
+
+#ifdef AUDIT
+void *mmv(void *, void *, size_t);
+void *mmvwo(void *, void *, size_t);
+#else
+#define mmv memcpy
+#define mmvwo memmove
+#endif
+
+/*
+ * this macro adds the object obj at the end of the array list, handling
+ * memory allocation when needed; ptr contains the number of elements in
+ * the array, and memg is the granularity of memory allocations (a power
+ * of 2 is recommanded, for optimization reasons).
+ *
+ * list and ptr may be updated, and thus need to be lvalues.
+ */
+#define aol(list, ptr, obj, memg) do { \
+ if (((ptr) % (memg)) == 0) { \
+ if ((ptr) != 0) { \
+ (list) = incmem((list), (ptr) * sizeof(obj), \
+ ((ptr) + (memg)) * sizeof(obj)); \
+ } else { \
+ (list) = getmem((memg) * sizeof(obj)); \
+ } \
+ } \
+ (list)[(ptr) ++] = (obj); \
+ } while (0)
+
+/*
+ * bol() does the same as aol(), but adds the new item at the beginning
+ * of the list; beware, the computational cost is greater.
+ */
+#define bol(list, ptr, obj, memg) do { \
+ if (((ptr) % (memg)) == 0) { \
+ if ((ptr) != 0) { \
+ (list) = incmem((list), (ptr) * sizeof(obj), \
+ ((ptr) + (memg)) * sizeof(obj)); \
+ } else { \
+ (list) = getmem((memg) * sizeof(obj)); \
+ } \
+ } \
+ if ((ptr) != 0) \
+ mmvwo((list) + 1, (list), (ptr) * sizeof(obj)); \
+ (ptr) ++; \
+ (list)[0] = (obj); \
+ } while (0)
+
+/*
+ * mbol() does the same as bol(), but adds the new item at the given
+ * emplacement; bol() is equivalent to mbol with 0 as last argument.
+ */
+#define mbol(list, ptr, obj, memg, n) do { \
+ if (((ptr) % (memg)) == 0) { \
+ if ((ptr) != 0) { \
+ (list) = incmem((list), (ptr) * sizeof(obj), \
+ ((ptr) + (memg)) * sizeof(obj)); \
+ } else { \
+ (list) = getmem((memg) * sizeof(obj)); \
+ } \
+ } \
+ if ((ptr) > n) \
+ mmvwo((list) + n + 1, (list) + n, \
+ ((ptr) - n) * sizeof(obj)); \
+ (ptr) ++; \
+ (list)[n] = (obj); \
+ } while (0)
+
+/*
+ * this macro adds the object obj at the end of the array list, doubling
+ * the size of list when needed; as for aol(), ptr and list must be
+ * lvalues, and so must be llng
+ */
+
+#define wan(list, ptr, obj, llng) do { \
+ if ((ptr) == (llng)) { \
+ (llng) += (llng); \
+ (list) = incmem((list), (ptr) * sizeof(obj), \
+ (llng) * sizeof(obj)); \
+ } \
+ (list)[(ptr) ++] = (obj); \
+ } while (0)
+
+#endif
diff --git a/libexec/auxcpp/nhash.c b/libexec/auxcpp/nhash.c
new file mode 100644
index 00000000000..6e5e4f7f8eb
--- /dev/null
+++ b/libexec/auxcpp/nhash.c
@@ -0,0 +1,481 @@
+/*
+ * Mixed hash table / binary tree code.
+ * (c) Thomas Pornin 2002
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 4. The name of the authors may not be used to endorse or promote
+ * products derived from this software without specific prior written
+ * permission.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
+ * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+ * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include <stddef.h>
+#include <string.h>
+#include <limits.h>
+#include "nhash.h"
+#include "mem.h"
+
+/*
+ * Hash a string into an `unsigned' value. This function is derived
+ * from the hash function used in the ELF binary object file format
+ * hash tables. The result size is a 32-bit number if the `unsigned'
+ * type is big enough to hold 32-bit arbitrary numbers, a 16-bit number
+ * otherwise.
+ */
+static unsigned hash_string(char *name)
+{
+ unsigned h = 0;
+
+ for (h = 0; *name; name ++) {
+ unsigned g;
+
+ h = (h << 4) + *(unsigned char *)name;
+#if UINT_MAX >= 0xffffffffU
+ g = h & 0xF0000000U;
+ h ^= (g >> 24);
+#else
+ g = h & 0xF000U;
+ h ^= (g >> 12);
+#endif
+ h &= ~g;
+ }
+ return h;
+}
+
+/*
+ * Each item in the table is a structure beginning with a `hash_item_header'
+ * structure. Those headers define binary trees such that all left-descendants
+ * (respectively right-descendants) of a given tree node have an associated
+ * hash value strictly smaller (respectively greater) than the hash value
+ * associated with this node.
+ *
+ * The `ident' field points to an array of char. The `sizeof(unsigned)'
+ * first `char' contain a copy of an `unsigned' value which is the hashed
+ * string, except the least significant bit. When this bit is set to 0,
+ * the node contains the unique item using that hash value. If the bit
+ * is set to 1, then there are several items with that hash value.
+ *
+ * When several items share the same hash value, they are linked together
+ * in a linked list by their `left' field. The node contains no data;
+ * it is a "fake item".
+ *
+ * The `char' following the hash value encode the item name for true items.
+ * For fake items, they contain the pointer to the first true item of the
+ * corresponding link list (suitably aligned).
+ *
+ * There are HTT_NUM_TREES trees; the items are sorted among trees by the
+ * lest significant bits of their hash value.
+ */
+
+static void internal_init(HTT *htt, void (*deldata)(void *), int reduced)
+{
+ htt->deldata = deldata;
+ if (reduced) {
+ HTT2 *htt2 = (HTT2 *)htt;
+
+ htt2->tree[0] = htt2->tree[1] = NULL;
+ } else {
+ unsigned u;
+
+ for (u = 0; u < HTT_NUM_TREES; u ++) htt->tree[u] = NULL;
+ }
+}
+
+/* see nhash.h */
+void HTT_init(HTT *htt, void (*deldata)(void *))
+{
+ internal_init(htt, deldata, 0);
+}
+
+/* see nhash.h */
+void HTT2_init(HTT2 *htt, void (*deldata)(void *))
+{
+ internal_init((HTT *)htt, deldata, 1);
+}
+
+#define PTR_SHIFT (sizeof(hash_item_header *) * \
+ ((sizeof(unsigned) + sizeof(hash_item_header *) - 1) / \
+ sizeof(hash_item_header *)))
+
+#define TREE(u) (*(reduced ? ((HTT2 *)htt)->tree + ((u) & 1) \
+ : htt->tree + ((u) & (HTT_NUM_TREES - 1))))
+
+/*
+ * Find a node for the given hash value. If `father' is not NULL, fill
+ * `*father' with a pointer to the node's father.
+ * If the return value is NULL, then no existing node was found; if `*father'
+ * is also NULL, the tree is empty. If the return value is not NULL but
+ * `*father' is NULL, then the found node is the tree root.
+ *
+ * If `father' is not NULL, then `*leftson' is filled with 1 if the node
+ * was looked for as the father left son, 0 otherwise.
+ */
+static hash_item_header *find_node(HTT *htt, unsigned u,
+ hash_item_header **father, int *leftson, int reduced)
+{
+ hash_item_header *node = TREE(u);
+ hash_item_header *nodef = NULL;
+ int ls;
+
+ u &= ~1U;
+ while (node != NULL) {
+ unsigned v = *(unsigned *)(node->ident);
+ unsigned w = v & ~1U;
+
+ if (u == w) break;
+ nodef = node;
+ if (u < w) {
+ node = node->left;
+ ls = 1;
+ } else {
+ node = node->right;
+ ls = 0;
+ }
+ }
+ if (father != NULL) {
+ *father = nodef;
+ *leftson = ls;
+ }
+ return node;
+}
+
+static void *internal_get(HTT *htt, char *name, int reduced)
+{
+ unsigned u = hash_string(name), v;
+ hash_item_header *node = find_node(htt, u, NULL, NULL, reduced);
+
+ if (node == NULL) return NULL;
+ v = *(unsigned *)(node->ident);
+ if ((v & 1U) == 0) {
+ return (strcmp(HASH_ITEM_NAME(node), name) == 0) ? node : NULL;
+ }
+ node = *(hash_item_header **)(node->ident + PTR_SHIFT);
+ while (node != NULL) {
+ if (strcmp(HASH_ITEM_NAME(node), name) == 0) return node;
+ node = node->left;
+ }
+ return NULL;
+}
+
+/* see nhash.h */
+void *HTT_get(HTT *htt, char *name)
+{
+ return internal_get(htt, name, 0);
+}
+
+/* see nhash.h */
+void *HTT2_get(HTT2 *htt, char *name)
+{
+ return internal_get((HTT *)htt, name, 1);
+}
+
+/*
+ * Make an item identifier from its name and its hash value.
+ */
+static char *make_ident(char *name, unsigned u)
+{
+ size_t n = strlen(name) + 1;
+ char *ident = getmem(n + sizeof(unsigned));
+
+ *(unsigned *)ident = u & ~1U;
+ memcpy(ident + sizeof(unsigned), name, n);
+ return ident;
+}
+
+/*
+ * Make an identifier for a fake item, pointing to a true item.
+ */
+static char *make_fake_ident(unsigned u, hash_item_header *next)
+{
+ char *ident = getmem(PTR_SHIFT + sizeof(hash_item_header *));
+
+ *(unsigned *)ident = u | 1U;
+ *(hash_item_header **)(ident + PTR_SHIFT) = next;
+ return ident;
+}
+
+/*
+ * Adding an item is straightforward:
+ * 1. look for its emplacement
+ * 2. if no node is found, use the item as a new node and link it to the tree
+ * 3. if a node is found:
+ * 3.1. if the node is real, check for name inequality, then create a
+ * fake node and assemble the two-element linked list
+ * 3.2. if the node is fake, look for the name in the list; if not found,
+ * add the node at the list end
+ */
+static void *internal_put(HTT *htt, void *item, char *name, int reduced)
+{
+ unsigned u = hash_string(name), v;
+ int ls;
+ hash_item_header *father;
+ hash_item_header *node = find_node(htt, u, &father, &ls, reduced);
+ hash_item_header *itemg = item, *pnode;
+
+ if (node == NULL) {
+ itemg->left = itemg->right = NULL;
+ itemg->ident = make_ident(name, u);
+ if (father == NULL) {
+ TREE(u) = itemg;
+ } else if (ls) {
+ father->left = itemg;
+ } else {
+ father->right = itemg;
+ }
+ return NULL;
+ }
+ v = *(unsigned *)(node->ident);
+ if ((v & 1U) == 0) {
+ if (strcmp(HASH_ITEM_NAME(node), name) == 0)
+ return node;
+ pnode = getmem(sizeof *pnode);
+ pnode->left = node->left;
+ pnode->right = node->right;
+ pnode->ident = make_fake_ident(u, node);
+ node->left = itemg;
+ node->right = NULL;
+ itemg->left = itemg->right = NULL;
+ itemg->ident = make_ident(name, u);
+ if (father == NULL) {
+ TREE(u) = pnode;
+ } else if (ls) {
+ father->left = pnode;
+ } else {
+ father->right = pnode;
+ }
+ return NULL;
+ }
+ node = *(hash_item_header **)(node->ident + PTR_SHIFT);
+ while (node != NULL) {
+ if (strcmp(HASH_ITEM_NAME(node), name) == 0) return node;
+ pnode = node;
+ node = node->left;
+ }
+ itemg->left = itemg->right = NULL;
+ itemg->ident = make_ident(name, u);
+ pnode->left = itemg;
+ return NULL;
+}
+
+/* see nhash.h */
+void *HTT_put(HTT *htt, void *item, char *name)
+{
+ return internal_put(htt, item, name, 0);
+}
+
+/* see nhash.h */
+void *HTT2_put(HTT2 *htt, void *item, char *name)
+{
+ return internal_put((HTT *)htt, item, name, 1);
+}
+
+/*
+ * A fake node subnode list has shrunk to one item only; make the
+ * node real again.
+ * fnode the fake node
+ * node the last remaining node
+ * father the fake node father (NULL if the fake node is root)
+ * leftson 1 if the fake node is a left son, 0 otehrwise
+ * u the hash value for this node
+ */
+static void shrink_node(HTT *htt, hash_item_header *fnode,
+ hash_item_header *node, hash_item_header *father, int leftson,
+ unsigned u, int reduced)
+{
+ node->left = fnode->left;
+ node->right = fnode->right;
+ if (father == NULL) {
+ TREE(u) = node;
+ } else if (leftson) {
+ father->left = node;
+ } else {
+ father->right = node;
+ }
+ freemem(fnode->ident);
+ freemem(fnode);
+}
+
+/*
+ * Deletion algorithm:
+ * 1. look for the node; if not found, exit
+ * 2. if the node is real:
+ * 2.1. check for equality; exit otherwise
+ * 2.2. delete the node
+ * 2.3. promote the leftest of right descendants or rightest of left
+ * descendants
+ * 3. if the node is fake:
+ * 3.1. check the list items for equality; exit otherwise
+ * 3.2. delete the correct item
+ * 3.3. if there remains only one item, supress the fake node
+ */
+static int internal_del(HTT *htt, char *name, int reduced)
+{
+ unsigned u = hash_string(name), v;
+ int ls;
+ hash_item_header *father;
+ hash_item_header *node = find_node(htt, u, &father, &ls, reduced);
+ hash_item_header *pnode, *fnode, *znode;
+ char *tmp;
+
+ if (node == NULL) return 0;
+ v = *(unsigned *)(node->ident);
+ if ((v & 1U) != 0) {
+ fnode = node;
+ node = znode = *(hash_item_header **)(node->ident + PTR_SHIFT);
+ pnode = NULL;
+ while (node != NULL) {
+ if (strcmp(HASH_ITEM_NAME(node), name) == 0) break;
+ pnode = node;
+ node = node->left;
+ }
+ if (node == NULL) return 0;
+ if (pnode == NULL) {
+ /*
+ * We supress the first item in the list.
+ */
+ *(hash_item_header **)(fnode->ident + PTR_SHIFT) =
+ node->left;
+ if (node->left->left == NULL) {
+ shrink_node(htt, fnode, node->left,
+ father, ls, u, reduced);
+ }
+ } else {
+ pnode->left = node->left;
+ if (pnode->left == NULL && znode == pnode) {
+ shrink_node(htt, fnode, pnode,
+ father, ls, u, reduced);
+ }
+ }
+ } else {
+ if (strcmp(HASH_ITEM_NAME(node), name) != 0) return 0;
+ if (node->left != NULL) {
+ for (znode = node, pnode = node->left; pnode->right;
+ znode = pnode, pnode = pnode->right);
+ if (znode != node) {
+ znode->right = pnode->left;
+ pnode->left = node->left;
+ }
+ pnode->right = node->right;
+ } else if (node->right != NULL) {
+ for (znode = node, pnode = node->right; pnode->left;
+ znode = pnode, pnode = pnode->left);
+ if (znode != node) {
+ znode->left = pnode->right;
+ pnode->right = node->right;
+ }
+ pnode->left = node->left;
+ } else pnode = NULL;
+ if (father == NULL) {
+ TREE(u) = pnode;
+ } else if (ls) {
+ father->left = pnode;
+ } else {
+ father->right = pnode;
+ }
+ }
+ tmp = node->ident;
+ htt->deldata(node);
+ freemem(tmp);
+ return 1;
+}
+
+/* see nhash.h */
+int HTT_del(HTT *htt, char *name)
+{
+ return internal_del(htt, name, 0);
+}
+
+/* see nhash.h */
+int HTT2_del(HTT2 *htt, char *name)
+{
+ return internal_del((HTT *)htt, name, 1);
+}
+
+/*
+ * Apply `action()' on all nodes of the tree whose root is given as
+ * parameter `node'. If `wipe' is non-zero, the nodes are removed
+ * from memory.
+ */
+static void scan_node(hash_item_header *node, void (*action)(void *), int wipe)
+{
+ unsigned v;
+
+ if (node == NULL) return;
+ scan_node(node->left, action, wipe);
+ scan_node(node->right, action, wipe);
+ v = *(unsigned *)(node->ident);
+ if ((v & 1U) != 0) {
+ hash_item_header *pnode, *nnode;
+
+ for (pnode = *(hash_item_header **)(node->ident + PTR_SHIFT);
+ pnode != NULL; pnode = nnode) {
+ char *tmp = pnode->ident;
+
+ nnode = pnode->left;
+ action(pnode);
+ if (wipe) freemem(tmp);
+ }
+ if (wipe) {
+ freemem(node->ident);
+ freemem(node);
+ }
+ } else {
+ char *tmp = node->ident;
+
+ action(node);
+ if (wipe) freemem(tmp);
+ }
+}
+
+/* see nhash.h */
+void HTT_scan(HTT *htt, void (*action)(void *))
+{
+ unsigned u;
+
+ for (u = 0; u < HTT_NUM_TREES; u ++) {
+ scan_node(htt->tree[u], action, 0);
+ }
+}
+
+/* see nhash.h */
+void HTT2_scan(HTT2 *htt, void (*action)(void *))
+{
+ scan_node(htt->tree[0], action, 0);
+ scan_node(htt->tree[1], action, 0);
+}
+
+/* see nhash.h */
+void HTT_kill(HTT *htt)
+{
+ unsigned u;
+
+ for (u = 0; u < HTT_NUM_TREES; u ++) {
+ scan_node(htt->tree[u], htt->deldata, 1);
+ }
+}
+
+/* see nhash.h */
+void HTT2_kill(HTT2 *htt)
+{
+ scan_node(htt->tree[0], htt->deldata, 1);
+ scan_node(htt->tree[1], htt->deldata, 1);
+}
diff --git a/libexec/auxcpp/nhash.h b/libexec/auxcpp/nhash.h
new file mode 100644
index 00000000000..00156f57b8e
--- /dev/null
+++ b/libexec/auxcpp/nhash.h
@@ -0,0 +1,132 @@
+/*
+ * (c) Thomas Pornin 2002
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 4. The name of the authors may not be used to endorse or promote
+ * products derived from this software without specific prior written
+ * permission.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
+ * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+ * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#ifndef UCPP__NHASH__
+#define UCPP__NHASH__
+
+/*
+ * Each item stored in the hash table should be a structure beginning
+ * with the following header.
+ */
+typedef struct hash_item_header_ {
+ char *ident;
+ struct hash_item_header_ *left, *right;
+} hash_item_header;
+
+/*
+ * This macro takes as argument a pointer to a hash table item (a
+ * structure beginning with `hash_item_header') and returns a pointer to
+ * the item name. This name should be considered as read-only. The
+ * retrieved pointer can become invalid whenever a new item is inserted
+ * in or removed from the table.
+ */
+#define HASH_ITEM_NAME(s) (((hash_item_header *)(s))->ident + sizeof(unsigned))
+
+/*
+ * Number of lists for the primary hash step. Can be reduced to save more
+ * memory, or increased to speed things up. It should be a power of 2
+ * greater or equal than 2 and smaller than UINT_MAX.
+ */
+#define HTT_NUM_TREES 128
+
+/*
+ * Type for a hash table.
+ */
+typedef struct {
+ void (*deldata)(void *);
+ hash_item_header *tree[HTT_NUM_TREES];
+} HTT;
+
+/*
+ * Type for a reduced version of HTT with only two binary trees. That
+ * version has a lower initialization time and is suitable for situation
+ * where only a limited number of elements will be stored, but new tables
+ * need frequent initializations.
+ */
+typedef struct {
+ void (*deldata)(void *);
+ hash_item_header *tree[2];
+} HTT2;
+
+/*
+ * Initialize a hash table. The `deldata' parameter should point to a
+ * function which will be invoked on any item removed from the table;
+ * that function should take care of the release of memory allocated for
+ * that item (except the hash_item_header contents, which are handled
+ * internally).
+ */
+void HTT_init(HTT *htt, void (*deldata)(void *));
+
+/*
+ * Link an item into the hash table under the given name. If another
+ * item of identical name is already present in the table, a pointer to
+ * that item is returned; otherwise, the new item is linked into the
+ * table and NULL is returned. The object pointed to by `item' is
+ * linked from the table, but not the string pointed to by `name'.
+ */
+void *HTT_put(HTT *htt, void *item, char *name);
+
+/*
+ * Retrieve an item by name from the hash table. NULL is returned if
+ * the object is not found.
+ */
+void *HTT_get(HTT *htt, char *name);
+
+/*
+ * Remove an item from the hash table. 1 is returned if the item was
+ * removed, 0 if it was not found.
+ */
+int HTT_del(HTT *htt, char *name);
+
+/*
+ * For all items stored within the hash table, invoke the provided
+ * function with the item as parameter. The function may abort the
+ * scan by performing a longjmp() to a context encapsulating the
+ * call to that function.
+ */
+void HTT_scan(HTT *htt, void (*action)(void *));
+
+/*
+ * Release the whole table contents. After a call to this function,
+ * the table is ready to accept new items.
+ */
+void HTT_kill(HTT *htt);
+
+/*
+ * The following functions are identical to the HTT_*() functions, except
+ * that they operate on the reduced HTT2 tables.
+ */
+void HTT2_init(HTT2 *htt, void (*deldata)(void *));
+void *HTT2_put(HTT2 *htt, void *item, char *name);
+void *HTT2_get(HTT2 *htt, char *name);
+int HTT2_del(HTT2 *htt, char *name);
+void HTT2_scan(HTT2 *htt, void (*action)(void *));
+void HTT2_kill(HTT2 *htt);
+
+#endif
diff --git a/libexec/auxcpp/sample.c b/libexec/auxcpp/sample.c
new file mode 100644
index 00000000000..f94f5c9d7ec
--- /dev/null
+++ b/libexec/auxcpp/sample.c
@@ -0,0 +1,114 @@
+/*
+ * Sample code showing how to use ucpp as an integrated lexer.
+ * This file is public domain.
+ */
+
+/*
+ * This is an example of how to use ucpp as a preprocessor and lexer
+ * into another project. The steps are those described in ucpp README
+ * file. To use this code, compile the ucpp source files with
+ * STAND_ALONE not defined, and link them with this code. The resulting
+ * binary will take a C source file as standard input, preprocess it,
+ * and output each non-whitespace token on stdout, with its numerical
+ * value (defined as an enum in cpp.h) and its contents. This code
+ * defines no system include path.
+ *
+ * This code supposes that the ucpp files are compiled with PRAGMA_TOKENIZE
+ * enabled (see the tune.h file).
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "mem.h"
+#include "cpp.h"
+
+int main(int argc, char *argv[])
+{
+ int i, r;
+ struct lexer_state ls;
+
+ /* step 1 */
+ init_cpp();
+
+ /* step 2 */
+ no_special_macros = 0;
+ emit_defines = emit_assertions = 0;
+
+ /* step 3 -- with assertions */
+ init_tables(1);
+
+ /* step 4 -- no default include path */
+ init_include_path(0);
+
+ /* step 5 -- no need to reset the two emit_* variables set in 2 */
+ emit_dependencies = 0;
+
+ /* step 6 -- we work with stdin, this is not a real filename */
+ set_init_filename("[stdin]", 0);
+
+ /* step 7 -- we make sure that assertions are on, and pragma are
+ handled */
+ init_lexer_state(&ls);
+ init_lexer_mode(&ls);
+ ls.flags |= HANDLE_ASSERTIONS | HANDLE_PRAGMA | LINE_NUM;
+
+ /* step 8 -- input is from stdin */
+ ls.input = stdin;
+
+ /* step 9 -- we do not have any macro to define, but we add any
+ argument as an include path */
+ for (i = 1; i < argc; i ++) add_incpath(argv[i]);
+
+ /* step 10 -- we are a lexer and we want CONTEXT tokens */
+ enter_file(&ls, ls.flags);
+
+ /* read tokens until end-of-input is reached -- errors (non-zero
+ return values different from CPPERR_EOF) are ignored */
+ while ((r = lex(&ls)) < CPPERR_EOF) {
+ if (r) {
+ /* error condition -- no token was retrieved */
+ continue;
+ }
+ /* we print each token: its numerical value, and its
+ string content; if this is a PRAGMA token, the
+ string content is in fact a compressed token list,
+ that we uncompress and print. */
+ if (ls.ctok->type == PRAGMA) {
+ unsigned char *c = (unsigned char *)(ls.ctok->name);
+
+ printf("line %ld: <#pragma>\n", ls.line);
+ for (; *c; c ++) {
+ int t = *c;
+
+ if (STRING_TOKEN(t)) {
+ printf(" <%2d> ", t);
+ for (c ++; *c != PRAGMA_TOKEN_END;
+ c ++) putchar(*c);
+ putchar('\n');
+ } else {
+ printf(" <%2d> `%s'\n", t,
+ operators_name[t]);
+ }
+ }
+ } else if (ls.ctok->type == CONTEXT) {
+ printf("new context: file '%s', line %ld\n",
+ ls.ctok->name, ls.ctok->line);
+ } else if (ls.ctok->type == NEWLINE) {
+ printf("[newline]\n");
+ } else {
+ printf("line %ld: <%2d> `%s'\n", ls.ctok->line,
+ ls.ctok->type,
+ STRING_TOKEN(ls.ctok->type) ? ls.ctok->name
+ : operators_name[ls.ctok->type]);
+ }
+ }
+
+ /* give back memory and exit */
+ wipeout();
+ free_lexer_state(&ls);
+#ifdef MEM_DEBUG
+ report_leaks();
+#endif
+ return 0;
+}
diff --git a/libexec/auxcpp/tune.h b/libexec/auxcpp/tune.h
new file mode 100644
index 00000000000..e4afc31f9c4
--- /dev/null
+++ b/libexec/auxcpp/tune.h
@@ -0,0 +1,422 @@
+/*
+ * (c) Thomas Pornin 1999 - 2002
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 4. The name of the authors may not be used to endorse or promote
+ * products derived from this software without specific prior written
+ * permission.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
+ * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+ * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#ifndef UCPP__TUNE__
+#define UCPP__TUNE__
+
+#ifdef UCPP_CONFIG
+#include "config.h"
+#else
+
+/* ====================================================================== */
+/*
+ * The LOW_MEM macro triggers the use of macro storage which uses less
+ * memory. It actually also improves performance on large, modern machines
+ * (due to less cache pressure). This option implies no limitation (except
+ * on the number of arguments a macro may, which is then limited to 32766)
+ * so it is on by default. Non-LOW_MEM code is considered deprecated.
+ */
+#define LOW_MEM
+
+/* ====================================================================== */
+/*
+ * Define AMIGA for systems using "drive letters" at the beginning of
+ * some paths; define MSDOS on systems with drive letters and using
+ * backslashes to seperate directory components.
+ */
+/* #define AMIGA */
+/* #define MSDOS */
+
+/* ====================================================================== */
+/*
+ * Define this if your compiler does not know the strftime() function;
+ * TurboC 2.01 under Msdos does not know strftime().
+ */
+/* #define NOSTRFTIME */
+
+/* ====================================================================== */
+/*
+ * Buffering: there are two levels of buffering on input and output streams:
+ * the standard libc buffering (manageable with setbuf() and setvbuf())
+ * and some buffering provided by ucpp itself. The ucpp buffering uses
+ * two buffers, of size respectively INPUT_BUF_MEMG and OUTPUT_BUF_MEMG
+ * (as defined below).
+ * You can disable one or both of these bufferings by defining the macros
+ * NO_LIBC_BUF and NO_UCPP_BUF.
+ */
+/* #define NO_LIBC_BUF */
+/* #define NO_UCPP_BUF */
+
+/*
+ * On Unix stations, the system call mmap() might be used on input files.
+ * This option is a subclause of ucpp internal buffering. On one station,
+ * a 10% speed improvement was observed. Do not define this unless the
+ * host architecture has the following characteristics:
+ * -- Posix / Single Unix compliance
+ * -- Text files correspond one to one with memory representation
+ * If a file is not seekable or not mmapable, ucpp will revert to the
+ * standard fread() solution.
+ *
+ * This feature is still considered beta quality. On some systems where
+ * files can be bigger than memory address space (mainly, 32-bit systems
+ * with files bigger than 4 GB), this option makes ucpp fail to operate
+ * on those extremely large files.
+ */
+#define UCPP_MMAP
+
+/*
+ * Performance issues:
+ * -- On memory-starved systems, such as Minix-i86, do not use ucpp
+ * buffering; keep only libc buffering.
+ * -- If you do not use libc buffering, activate the UCPP_MMAP option.
+ * Note that the UCPP_MMAP option is ignored if ucpp buffering is not
+ * activated.
+ *
+ * On an Athlon 1200 running FreeBSD 4.7, the best performances are
+ * achieved when libc buffering is activated and/or UCPP_MMAP is on.
+ */
+
+/* ====================================================================== */
+/*
+ * Define this if you want ucpp to generate tokenized PRAGMA tokens;
+ * otherwise, it will generate raw string contents. This setting is
+ * irrelevant to the stand-alone version of ucpp.
+ */
+#define PRAGMA_TOKENIZE
+
+/*
+ * Define this to the special character that marks the end of tokens with
+ * a string value inside a tokenized PRAGMA token. The #pragma and _Pragma()
+ * directives which use this character will be a bit more difficult to
+ * decode (but ucpp will not mind). 0 cannot be used. '\n' is fine because
+ * it cannot appear inside a #pragma or _Pragma(), since newlines cannot be
+ * embedded inside tokens, neither directly nor by macro substitution and
+ * stringization. Besides, '\n' is portable.
+ */
+#define PRAGMA_TOKEN_END ((unsigned char)'\n')
+
+/*
+ * Define this if you want ucpp to include encountered #pragma directives
+ * in its output in non-lexer mode; _Pragma() are translated to equivalent
+ * #pragma directives.
+ */
+#define PRAGMA_DUMP
+
+/*
+ * According to my interpretation of the C99 standard, _Pragma() are
+ * evaluated wherever macro expansion could take place. However, Neil Booth,
+ * whose mother language is English (contrary to me) and who is well aware
+ * of the C99 standard (and especially the C preprocessor) told me that
+ * it was unclear whether _Pragma() are evaluated inside directives such
+ * as #if, #include and #line. If you want to disable the evaluation of
+ * _Pragma() inside such directives, define the following macro.
+ */
+/* #define NO_PRAGMA_IN_DIRECTIVE */
+
+/*
+ * The C99 standard mandates that the operator `##' must yield a single,
+ * valid token, lest undefined behaviour befall upon thy head. Hence,
+ * for instance, `+ ## +=' is forbidden, because `++=' is not a valid
+ * token (although it is a valid list of two tokens, `++' and `=').
+ * However, ucpp only emits a warning for such sin, and unmerges the
+ * tokens (thus emitting `+' then `+=' for that example). When ucpp
+ * produces text output, those two tokens will be separated by a space
+ * character so that the basic rule of text output is preserved: when
+ * parsed again, text output yields the exact same stream of tokens.
+ * That extra space is virtual: it does not count as a true whitespace
+ * token for stringization.
+ *
+ * However, it might be desirable, for some uses other than preprocessing
+ * C source code, not to emit that extra space at all. To make ucpp behave
+ * that way, define the DSHARP_TOKEN_MERGE macro. Please note that this
+ * can trigger spurious token merging. For instance, with that macro
+ * activated, `+ ## +=' will be output as `++=' which, if preprocessed
+ * again, will read as `++' followed by `='.
+ *
+ * All this is irrelevant to lexer mode; and trying to merge incompatible
+ * tokens is a shooting offence, anyway.
+ */
+/* #define DSHARP_TOKEN_MERGE */
+
+/* ====================================================================== */
+/*
+ * Define INMACRO_FLAG to include two flags to the structure lexer_state,
+ * that tell whether tokens come from a macro-replacement, and count those
+ * macro-replacements.
+ */
+/* #define INMACRO_FLAG */
+
+/* ====================================================================== */
+/*
+ * Paths where files are looked for by default, when #include is used.
+ * Typical path is /usr/local/include and /usr/include, in that order.
+ * If you want to set up no path, define the macro to 0.
+ *
+ * For Linux, get gcc includes too, or you will miss things like stddef.h.
+ * The exact path varies much, depending on the distribution.
+ */
+#define STD_INCLUDE_PATH "/usr/local/include", "/usr/include"
+
+/* ====================================================================== */
+/*
+ * Arithmetic code for evaluation of #if expressions. Evaluation
+ * uses either a native machine type, or an emulated two's complement
+ * type. Division by 0 and overflow on division are considered as errors
+ * and reported as such. If ARITHMETIC_CHECKS is defined, all other
+ * operations that imply undefined or implementation-defined behaviour
+ * are reported as warnings but otherwise performed nonetheless.
+ *
+ * For native type evaluation, the following macros should be defined:
+ * NATIVE_SIGNED the native signed type
+ * NATIVE_UNSIGNED the native corresponding unsigned type
+ * NATIVE_UNSIGNED_BITS the native unsigned type width, in bits
+ * NATIVE_SIGNED_MIN the native signed type minimum value
+ * NATIVE_SIGNED_MAX the native signed type maximum value
+ *
+ * The code in the arith.c file performs some tricky detection
+ * operations on the native type representation and possible existence
+ * of a trap representation. These operations assume a C99-compliant
+ * compiler; on a C90-only compiler, the operations are valid but may
+ * yield incorrect results. You may force those settings with some
+ * more macros: see the comments in arith.c (look for "ARCH_DEFINED").
+ * Remember that this is mostly a non-issue, unless you are building
+ * ucpp with a pre-C99 cross-compiler and either the host or target
+ * architecture uses a non-two's complement representation of signed
+ * integers. Such a combination is pretty rare nowadays, so the best
+ * you can do is forgetting completely this paragraph and live in peace.
+ *
+ *
+ * If you do not have a handy native type (for instance, you compile ucpp
+ * with a C90 compiler which lacks the "long long" type, or you compile
+ * ucpp for a cross-compiler which should support an evaluation integer
+ * type of a size that is not available on the host machine), you may use
+ * a simulated type. The type uses two's complement representation and
+ * may have any width from 2 bits to twice the underlying native type
+ * width, inclusive (odd widths are allowed). To use an emulated type,
+ * make sure that NATIVE_SIGNED is not defined, and define the following
+ * macros:
+ * SIMUL_ARITH_SUBTYPE the native underlying type to use
+ * SIMUL_SUBTYPE_BITS the native underlying type width
+ * SIMUL_NUMBITS the emulated type width
+ *
+ * Undefined and implementation-defined behaviours are warned upon, if
+ * ARITHMETIC_CHECKS is defined. Results are truncated to the type
+ * width; shift count for the << and >> operators is reduced modulo the
+ * emulatd type width; right shifting of a signed negative value performs
+ * sign extension (the result is left-padded with bits set to 1).
+ */
+
+/*
+ * For native type evaluation with a 64-bit "long long" type.
+ */
+#define NATIVE_SIGNED long long
+#define NATIVE_UNSIGNED unsigned long long
+#define NATIVE_UNSIGNED_BITS 64
+#define NATIVE_SIGNED_MIN (-9223372036854775807LL - 1)
+#define NATIVE_SIGNED_MAX 9223372036854775807LL
+
+/*
+ * For emulation of a 64-bit type using a native 32-bit "unsigned long"
+ * type.
+#undef NATIVE_SIGNED
+#define SIMUL_ARITH_SUBTYPE unsigned long
+#define SIMUL_SUBTYPE_BITS 32
+#define SIMUL_NUMBITS 64
+ */
+
+/*
+ * Comment out the following line if you want to deactivate arithmetic
+ * checks (warnings upon undefined and implementation-defined
+ * behaviour). Arithmetic checks slow down a bit arithmetic operations,
+ * especially multiplications, but this should not be an issue with
+ * typical C source code.
+ */
+#define ARITHMETIC_CHECKS
+
+/* ====================================================================== */
+/*
+ * To force signedness of wide character constants, define WCHAR_SIGNEDNESS
+ * to 0 for unsigned, 1 for signed. By default, wide character constants
+ * are signed if the native `char' type is signed, and unsigned otherwise.
+#define WCHAR_SIGNEDNESS 0
+ */
+
+/*
+ * Standard assertions. They should include one cpu() assertion, one machine()
+ * assertion (identical to cpu()), and one or more system() assertions.
+ *
+ * for Linux/PC: cpu(i386), machine(i386), system(unix), system(linux)
+ * for Linux/Alpha: cpu(alpha), machine(alpha), system(unix), system(linux)
+ * for Sparc/Solaris: cpu(sparc), machine(sparc), system(unix), system(solaris)
+ *
+ * These are only suggestions. On Solaris, machine() should be defined
+ * for i386 or sparc (standard system header use such an assertion). For
+ * cross-compilation, define assertions related to the target architecture.
+ *
+ * If you want no standard assertion, define STD_ASSERT to 0.
+ */
+/*
+#define STD_ASSERT "cpu(i386)", "machine(i386)", "system(unix)", \
+ "system(freebsd)"
+*/
+
+/* ====================================================================== */
+/*
+ * System predefined macros. Nothing really mandatory, but some programs
+ * might rely on those.
+ * Each string must be either "name" or "name=token-list". If you want
+ * no predefined macro, define STD_MACROS to 0.
+ */
+/*
+#define STD_MACROS "__FreeBSD=4", "__unix", "__i386", \
+ "__FreeBSD__=4", "__unix__", "__i386__"
+*/
+
+/* ====================================================================== */
+/*
+ * Default flags; HANDLE_ASSERTIONS is required for Solaris system headers.
+ * See cpp.h for the definition of these flags.
+ */
+#define DEFAULT_CPP_FLAGS (DISCARD_COMMENTS | WARN_STANDARD \
+ | WARN_PRAGMA | FAIL_SHARP | MACRO_VAARG \
+ | CPLUSPLUS_COMMENTS | LINE_NUM | TEXT_OUTPUT \
+ | KEEP_OUTPUT | HANDLE_TRIGRAPHS \
+ | HANDLE_ASSERTIONS)
+#define DEFAULT_LEXER_FLAGS (DISCARD_COMMENTS | WARN_STANDARD | FAIL_SHARP \
+ | MACRO_VAARG | CPLUSPLUS_COMMENTS | LEXER \
+ | HANDLE_TRIGRAPHS | HANDLE_ASSERTIONS)
+
+/* ====================================================================== */
+/*
+ * Define this to use sigsetjmp()/siglongjmp() instead of setjmp()/longjmp().
+ * This is non-ANSI, but it improves performance on some POSIX system.
+ * On typical C source code, such improvement is completely negligeable.
+ */
+/* #define POSIX_JMP */
+
+/* ====================================================================== */
+/*
+ * Maximum value (plus one) of a character handled by the lexer; 128 is
+ * alright for ASCII native source code, but 256 is needed for EBCDIC.
+ * 256 is safe in both cases; you will have big problems if you set
+ * this value to INT_MAX or above. On Minix-i86 or Msdos (small memory
+ * model), define MAX_CHAR_VAL to 128.
+ *
+ * Set MAX_CHAR_VAL to a power of two to increase lexing speed. Beware
+ * that lexer.c defines a static array of size MSTATE * MAX_CHAR_VAL
+ * values of type int (MSTATE is defined in lexer.c and is about 40).
+ */
+#define MAX_CHAR_VAL 128
+
+/*
+ * If you want some extra character to be considered as whitespace,
+ * define this macro to that space. On ISO-8859-1 machines, 160 is
+ * the code for the unbreakable space.
+ */
+/* #define UNBREAKABLE_SPACE 160 */
+
+/*
+ * If you want whitespace tokens contents to be recorded (making them
+ * tokens with a string content), define this. The macro STRING_TOKEN
+ * will be adjusted accordingly.
+ * Without this option, whitespace tokens are not even returned by the
+ * lex() function. This is irrelevant for the non-lexer mode (almost --
+ * it might slow down a bit ucpp, and with this option, comments will be
+ * kept inside #pragma directives).
+ */
+/* #define SEMPER_FIDELIS */
+
+#endif
+/* End of options overridable by UCPP_CONFIG and config.h */
+
+/* ====================================================================== */
+/*
+ * Some constants used for memory increment granularity. Increasing these
+ * values reduces the number of calls to malloc() but increases memory
+ * consumption.
+ *
+ * Values should be powers of 2.
+ */
+
+/* for cpp.c */
+#define COPY_LINE_LENGTH 80
+#define INPUT_BUF_MEMG 8192
+#define OUTPUT_BUF_MEMG 8192
+#define TOKEN_NAME_MEMG 64 /* must be at least 4 */
+#define TOKEN_LIST_MEMG 32
+#define INCPATH_MEMG 16
+#define GARBAGE_LIST_MEMG 32
+#define LS_STACK_MEMG 4
+#define FNAME_MEMG 32
+
+/* ====================================================================== */
+
+/* To protect the innocent. */
+#if defined(NO_UCPP_BUF) && defined(UCPP_MMAP)
+#undef UCPP_MMAP
+#endif
+
+#if defined(UCPP_MMAP) || defined(POSIX_JMP)
+#ifndef _POSIX_SOURCE
+#define _POSIX_SOURCE 1
+#endif
+#endif
+
+/*
+ * C90 does not know about the "inline" keyword, but C99 does know,
+ * and some C90 compilers know it as an extension. This part detects
+ * these occurrences.
+ */
+
+#ifndef INLINE
+
+#if __STDC__ && __STDC_VERSION__ >= 199901L
+/* this is a C99 compiler, keep inline unchanged */
+#elif defined(__GNUC__)
+/* this is GNU gcc; modify inline. The semantics is not identical to C99
+ but the differences are irrelevant as long as inline functions are static */
+#undef inline
+#define inline __inline__
+#elif defined(__DECC) && defined(__linux__)
+/* this is Compaq C under Linux, use __inline__ */
+#undef inline
+#define inline __inline__
+#else
+/* unknown compiler -> deactivate inline */
+#undef inline
+#define inline
+#endif
+
+#else
+/* INLINE has been set, use its value */
+#undef inline
+#define inline INLINE
+#endif
+
+#endif
diff --git a/libexec/auxcpp/ucpp.1 b/libexec/auxcpp/ucpp.1
new file mode 100644
index 00000000000..c6c30515056
--- /dev/null
+++ b/libexec/auxcpp/ucpp.1
@@ -0,0 +1,212 @@
+.TH UCPP 1 "Oct 21 2000"
+.SH NAME
+ucpp \- C preprocessor
+.SH SYNOPSIS
+.B ucpp
+[
+.I options
+]
+[
+.I file
+]
+.SH DESCRIPTION
+.LP
+.B ucpp
+is a C preprocessor mostly compatible with ISO-C99.
+It is rather strict and uses only a small amount of memory. It uses
+standard input as primary input if no file argument is given.
+.SH OPTIONS
+There are several classes of options.
+.TP
+.B Language Options
+.TP
+.BI \-C
+keep comments in the output.
+.TP
+.BI \-s
+if a rogue '#' is encountered, do not emit an error and keep it in
+the output.
+.TP
+.BI \-l
+supress the emission of '#line' directives in the output.
+.TP
+.BI \-lg
+convert the '#line' to the gcc-style equivalent.
+.TP
+.BI \-CC
+disable C++-like comments (a '//' begins a comment, up to the end
+of the line). Use this option to get closer to C90 behaviour.
+.TP
+.B \-a, \-na
+handle assertions (defined with #assert);
+.B \-a
+also defines the standard assertions
+.I #machine
+,
+.I #cpu
+and
+.I #system
+(see
+.B \-e
+to get the local definition of such assertions).
+.TP
+.BI \-a0
+disable assertion support.
+.TP
+.BI \-V
+disable support for macros with a variable number of arguments: in C99,
+a macro may be declared with
+.I ...
+as the last argument; inside the replacement list,
+.I __VA_ARGS__
+is replaced with the optional extra arguments given in the call to the macro.
+Use this option to get closer to C90 behaviour.
+.TP
+.BI \-u
+enable UTF-8 support: with this option, the source is considered as
+an ISO/10646 source, encoded in UTF-8. Characters represented as two bytes
+or more are considered as alphabetic characters, like letters, and
+therefore usable in identifiers. These characters hold the same
+syntactic value than the corresponding Universal Character Names.
+.TP
+.BI \-X
+enable
+.B \-a, \-u
+and
+.B \-Y.
+This should make
+.B ucpp
+behave closer to what is requested from a "modern" C preprocessor.
+.TP
+.BI \-c90
+enable
+.B \-V
+and
+.B \-CC,
+and do not define
+.B __STDC_VERSION__.
+This should make
+.B ucpp
+mimic older C90 behaviour.
+.TP
+.BI \-t
+disable trigraph support; this seems to be required for some legacy code.
+.TP
+.B Warning Options
+.TP
+.BI \-wt
+emit a final warning when trigraphs are encountered.
+.TP
+.BI \-wtt
+emit warnings for each trigraph encountered.
+.TP
+.BI \-wa
+emit annoying warnings (these are usually useless).
+.TP
+.BI \-w0
+supress standard warnings.
+.TP
+.B Directory Options
+.TP
+.BI \-I directory
+.TP
+.BI "\-I " directory
+add
+.I directory
+to the include path, before the standard include path.
+.TP
+.BI \-J directory
+.TP
+.BI "\-J " directory
+add
+.I directory
+to the include path, after the standard include path.
+.TP
+.BI \-zI
+do not use the standard (compile-time) include path.
+.TP
+.BI \-M
+emit only the names of encountered files, separated by spaces; this is
+intended for automatic generation of Makefile dependencies.
+.TP
+.BI \-Ma
+do the same as
+.B \-M
+but also for system files.
+.TP
+.BI "\-o " file
+direct the ouput to
+.I file
+instead of standard output.
+.TP
+.B Macro Options
+.TP
+.BI \-D macro
+predefine
+.I macro
+with content
+.B 1.
+.TP
+.BI \-D macro=def
+predefine
+.I macro
+with the content
+.I def.
+.TP
+.BI \-U macro
+undefine
+.I macro.
+.TP
+.BI \-Y
+predefine system-dependant macros.
+.TP
+.BI \-Z
+do not predefine special macros such as
+.B __TIME__.
+.TP
+.BI \-A foo(bar)
+add
+.I foo(bar)
+to the list of assertions.
+.TP
+.BI \-B foo(bar)
+remove
+.I foo(bar)
+of the list of assertions; you may also use
+.BI \-B foo
+to remove all
+.BI \-B foo(xxx)
+from the list of assertions.
+.TP
+.BI \-d
+instead of normal output, emit '#define' directives representing all
+macros defined during processing.
+.TP
+.BI \-e
+instead of normal output, emit '#assert' directives representing all
+assertions defined during processing.
+.TP
+.B Miscellaneous Options
+.TP
+.BI \-v
+print version number, include path and (optionaly) defined assertions.
+.TP
+.BI \-h
+print some help.
+.SH ENVIRONMENT
+.PP
+.B ucpp
+is not itself affected by environment variables. However, it uses
+library functions that might be affected, depending on the system.
+.SH AUTHOR
+Thomas Pornin <pornin@bolet.org>
+.SH BUGS
+.PP
+.B ucpp
+is considered stable software. However improbable it is, please report
+bugs to the author (possibly with a file that exhibits the problem) if
+the latest version, available from this site:
+.TP
+http://pornin.nerim.net/ucpp/
+.PP
+has the bug.
diff --git a/libexec/auxcpp/ucppi.h b/libexec/auxcpp/ucppi.h
new file mode 100644
index 00000000000..ce4df74be52
--- /dev/null
+++ b/libexec/auxcpp/ucppi.h
@@ -0,0 +1,196 @@
+/*
+ * (c) Thomas Pornin 1999 - 2002
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 4. The name of the authors may not be used to endorse or promote
+ * products derived from this software without specific prior written
+ * permission.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
+ * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+ * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#ifndef UCPP__UCPPI__
+#define UCPP__UCPPI__
+
+#include "tune.h"
+#include "cpp.h"
+#include "nhash.h"
+
+/*
+ * A macro represented in a compact form; simple tokens are represented
+ * by one byte, containing their number. Tokens with a string value are
+ * followed by the value (string finished by a 0). Macro arguments are
+ * followed by the argument number (in one byte -- thus implying a hard
+ * limit of 254 arguments (number 255 is for __VA_ARGS__).
+ */
+struct comp_token_fifo {
+ size_t length;
+ size_t rp;
+ unsigned char *t;
+};
+
+/* These declarations are used only internally by ucpp */
+
+/*
+ * S_TOKEN(x) checks whether x is a token type with an embedded string
+ * ttMWS(x) checks whether x is macro whitespace (space, comment...)
+ * ttWHI(x) checks whether x is whitespace (MWS or newline)
+ */
+#define S_TOKEN(x) STRING_TOKEN(x)
+#define ttMWS(x) ((x) == NONE || (x) == COMMENT || (x) == OPT_NONE)
+#define ttWHI(x) (ttMWS(x) || (x) == NEWLINE)
+
+/*
+ * Function prototypes
+ */
+/*
+ * from lexer.c
+ */
+#define init_cppm ucpp_init_cppm
+#define put_char ucpp_put_char
+#define discard_char ucpp_discard_char
+#define next_token ucpp_next_token
+#define grap_char ucpp_grap_char
+#define space_char ucpp_space_char
+
+void init_cppm(void);
+void put_char(struct lexer_state *, unsigned char);
+void discard_char(struct lexer_state *);
+int next_token(struct lexer_state *);
+int grap_char(struct lexer_state *);
+int space_char(int);
+
+/*
+ * from assert.c
+ */
+struct assert {
+ hash_item_header head; /* first field */
+ size_t nbval;
+ struct token_fifo *val;
+};
+
+#define cmp_token_list ucpp_cmp_token_list
+#define handle_assert ucpp_handle_assert
+#define handle_unassert ucpp_handle_unassert
+#define get_assertion ucpp_get_assertion
+#define wipe_assertions ucpp_wipe_assertions
+
+int cmp_token_list(struct token_fifo *, struct token_fifo *);
+int handle_assert(struct lexer_state *);
+int handle_unassert(struct lexer_state *);
+struct assert *get_assertion(char *);
+void wipe_assertions(void);
+
+/*
+ * from macro.c
+ */
+struct macro {
+ hash_item_header head; /* first field */
+ int narg;
+ char **arg;
+ int nest;
+ int vaarg;
+#ifdef LOW_MEM
+ struct comp_token_fifo cval;
+#else
+ struct token_fifo val;
+#endif
+};
+
+#define print_token ucpp_print_token
+#define handle_define ucpp_handle_define
+#define handle_undef ucpp_handle_undef
+#define handle_ifdef ucpp_handle_ifdef
+#define handle_ifndef ucpp_handle_ifndef
+#define substitute_macro ucpp_substitute_macro
+#define get_macro ucpp_get_macro
+#define wipe_macros ucpp_wipe_macros
+#define dsharp_lexer ucpp_dsharp_lexer
+#define compile_time ucpp_compile_time
+#define compile_date ucpp_compile_date
+#ifdef PRAGMA_TOKENIZE
+#define tokenize_lexer ucpp_tokenize_lexer
+#endif
+
+void print_token(struct lexer_state *, struct token *, long);
+int handle_define(struct lexer_state *);
+int handle_undef(struct lexer_state *);
+int handle_ifdef(struct lexer_state *);
+int handle_ifndef(struct lexer_state *);
+int substitute_macro(struct lexer_state *, struct macro *,
+ struct token_fifo *, int, int, long);
+struct macro *get_macro(char *);
+void wipe_macros(void);
+
+extern struct lexer_state dsharp_lexer;
+extern char compile_time[], compile_date[];
+#ifdef PRAGMA_TOKENIZE
+extern struct lexer_state tokenize_lexer;
+#endif
+
+/*
+ * from eval.c
+ */
+#define strtoconst ucpp_strtoconst
+#define eval_expr ucpp_eval_expr
+#define eval_line ucpp_eval_line
+
+unsigned long strtoconst(char *);
+unsigned long eval_expr(struct token_fifo *, int *, int);
+extern long eval_line;
+
+#define eval_exception ucpp_eval_exception
+
+#ifdef POSIX_JMP
+#define JMP_BUF sigjmp_buf
+#define catch(x) sigsetjmp((x), 0)
+#define throw(x) siglongjmp((x), 1)
+#else
+#define JMP_BUF jmp_buf
+#define catch(x) setjmp((x))
+#define throw(x) longjmp((x), 1)
+#endif
+extern JMP_BUF eval_exception;
+
+/*
+ * from cpp.c
+ */
+#define token_name ucpp_token_name
+#define throw_away ucpp_throw_away
+#define garbage_collect ucpp_garbage_collect
+#define init_buf_lexer_state ucpp_init_buf_lexer_state
+#ifdef PRAGMA_TOKENIZE
+#define compress_token_list ucpp_compress_token_list
+#endif
+
+char *token_name(struct token *);
+void throw_away(struct garbage_fifo *, char *);
+void garbage_collect(struct garbage_fifo *);
+void init_buf_lexer_state(struct lexer_state *, int);
+#ifdef PRAGMA_TOKENIZE
+struct comp_token_fifo compress_token_list(struct token_fifo *);
+#endif
+
+#define ouch ucpp_ouch
+#define error ucpp_error
+#define warning ucpp_warning
+
+#endif