summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--usr.bin/Makefile4
-rw-r--r--usr.bin/awk/FIXES460
-rw-r--r--usr.bin/awk/Makefile23
-rw-r--r--usr.bin/awk/README69
-rw-r--r--usr.bin/awk/awk.1539
-rw-r--r--usr.bin/awk/awk.h247
-rw-r--r--usr.bin/awk/awkgram.y476
-rw-r--r--usr.bin/awk/awklex.l405
-rw-r--r--usr.bin/awk/b.c841
-rw-r--r--usr.bin/awk/lib.c636
-rw-r--r--usr.bin/awk/main.c180
-rw-r--r--usr.bin/awk/maketab.c168
-rw-r--r--usr.bin/awk/parse.c255
-rw-r--r--usr.bin/awk/proctab.c207
-rw-r--r--usr.bin/awk/proto.h181
-rw-r--r--usr.bin/awk/run.c1849
-rw-r--r--usr.bin/awk/tran.c419
17 files changed, 6957 insertions, 2 deletions
diff --git a/usr.bin/Makefile b/usr.bin/Makefile
index 4899fd60f86..09ea63aabe3 100644
--- a/usr.bin/Makefile
+++ b/usr.bin/Makefile
@@ -1,8 +1,8 @@
-# $OpenBSD: Makefile,v 1.10 1996/06/23 04:22:35 tholo Exp $
+# $OpenBSD: Makefile,v 1.11 1996/07/04 20:34:34 tholo Exp $
# $NetBSD: Makefile,v 1.62 1996/03/10 05:45:43 thorpej Exp $
# from: @(#)Makefile 5.8.1.1 (Berkeley) 5/8/91
-SUBDIR= apply apropos arch asa at banner basename bdes biff cal calendar \
+SUBDIR= apply apropos arch asa at awk banner basename bdes biff cal calendar \
cap_mkdb checknr chflags chpass cksum cmp col colcrt colrm column \
comm compress cpp crontab ctags cut dirname du \
env error expand false file find finger fmt fold fpr from \
diff --git a/usr.bin/awk/FIXES b/usr.bin/awk/FIXES
new file mode 100644
index 00000000000..9ac0b21ac16
--- /dev/null
+++ b/usr.bin/awk/FIXES
@@ -0,0 +1,460 @@
+/****************************************************************
+Copyright (C) AT&T and Lucent Technologies 1996
+All Rights Reserved
+
+Permission to use, copy, modify, and distribute this software and
+its documentation for any purpose and without fee is hereby
+granted, provided that the above copyright notice appear in all
+copies and that both that the copyright notice and this
+permission notice and warranty disclaimer appear in supporting
+documentation, and that the names of AT&T or Lucent Technologies
+or any of their entities not be used in advertising or publicity
+pertaining to distribution of the software without specific,
+written prior permission.
+
+AT&T AND LUCENT DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
+SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
+FITNESS. IN NO EVENT SHALL AT&T OR LUCENT OR ANY OF THEIR
+ENTITIES BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL
+DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
+DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
+OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE
+USE OR PERFORMANCE OF THIS SOFTWARE.
+****************************************************************/
+
+This file lists all bug fixes, changes, etc., made since the AWK book
+was sent to the printers in August, 1987.
+
+Sep 12, 1987:
+ Very long printf strings caused core dump;
+ fixed aprintf, asprintf, format to catch them.
+ Can still get a core dump in printf itself.
+
+Sep 17, 1987:
+ Error-message printer had printf(s) instead of
+ printf("%s",s); got core dumps when the message
+ included a %.
+
+Oct xx, 1987:
+ Reluctantly added toupper and tolower functions.
+ Subject to rescinding without notice.
+
+Dec 2, 1987:
+ Newer C compilers apply a strict scope rule to extern
+ declarations within functions. Two extern declarations in
+ lib.c and tran.c have been moved to obviate this problem.
+
+Mar 25, 1988:
+ main.c fixed to recognize -- as terminator of command-
+ line options. Illegal options flagged.
+ Error reporting slightly cleaned up.
+
+May 10, 1988:
+ Fixed lib.c to permit _ in commandline variable names.
+
+May 22, 1988:
+ Removed limit on depth of function calls.
+
+May 28, 1988:
+ srand returns seed value it's using.
+ see 1/18/90
+
+June 1, 1988:
+ check error status on close
+
+July 2, 1988:
+ performance bug in b.c/cgoto(): not freeing some sets of states.
+ partial fix only right now, and the number of states increased
+ to make it less obvious.
+
+July 2, 1988:
+ flush stdout before opening file or pipe
+
+July 24, 1988:
+ fixed egregious error in toupper/tolower functions.
+ still subject to rescinding, however.
+
+Aug 23, 1988:
+ setting FILENAME in BEGIN caused core dump, apparently
+ because it was freeing space not allocated by malloc.
+
+Sep 30, 1988:
+ Now guarantees to evaluate all arguments of built-in
+ functions, as in C; the appearance is that arguments
+ are evaluated before the function is called. Places
+ affected are sub (gsub was ok), substr, printf, and
+ all the built-in arithmetic functions in bltin().
+ A warning is generated if a bltin() is called with
+ the wrong number of arguments.
+
+ This requires changing makeprof on p167 of the book.
+
+Oct 12, 1988:
+ Fixed bug in call() that freed local arrays twice.
+
+ Fixed to handle deletion of non-existent array right;
+ complains about attempt to delete non-array element.
+
+Oct 20, 1988:
+ Fixed %c: if expr is numeric, use numeric value;
+ otherwise print 1st char of string value. still
+ doesn't work if the value is 0 -- won't print \0.
+
+ Added a few more checks for running out of malloc.
+
+Oct 30, 1988:
+ Fixed bug in call() that failed to recover storage.
+
+ A warning is now generated if there are more arguments
+ in the call than in the definition (in lieu of fixing
+ another storage leak).
+
+Nov 27, 1988:
+ With fear and trembling, modified the grammar to permit
+ multiple pattern-action statements on one line without
+ an explicit separator. By definition, this capitulation
+ to the ghost of ancient implementations remains undefined
+ and thus subject to change without notice or apology.
+ DO NOT COUNT ON IT.
+
+Dec 7, 1988:
+ Added a bit of code to error printing to avoid printing nulls.
+ (Not clear that it actually would.)
+
+Dec 17, 1988:
+ Catches some more commandline errors in main.
+ Removed redundant decl of modf in run.c (confuses some compilers).
+ Warning: there's no single declaration of malloc, etc., in awk.h
+ that seems to satisfy all compilers.
+
+Jan 9, 1989:
+ Fixed bug that caused tempcell list to contain a duplicate.
+ The fix is kludgy.
+
+Apr 9, 1989:
+ Changed grammar to prohibit constants as 3rd arg of sub and gsub;
+ prevents class of overwriting-a-constant errors. (Last one?)
+ This invalidates the "banana" example on page 43 of the book.
+
+ Added \a ("alert"), \v (vertical tab), \xhhh (hexadecimal),
+ as in ANSI, for strings. Rescinded the sloppiness that permitted
+ non-octal digits in \ooo. Warning: not all compilers and libraries
+ will be able to deal with \x correctly.
+
+Apr 26, 1989:
+ Debugging output now includes a version date,
+ if one compiles it into the source each time.
+
+Apr 27, 1989:
+ Line number now accumulated correctly for comment lines.
+
+Jun 4, 1989:
+ ENVIRON array contains environment: if shell variable V=thing,
+ ENVIRON["V"] is "thing"
+
+ multiple -f arguments permitted. error reporting is naive.
+ (they were permitted before, but only the last was used.)
+
+ fixed a really stupid botch in the debugging macro dprintf
+
+ fixed order of evaluation of commandline assignments to match
+ what the book claims: an argument of the form x=e is evaluated
+ at the time it would have been opened if it were a filename (p 63).
+ this invalidates the suggested answer to ex 4-1 (p 195).
+
+ removed some code that permitted -F (space) fieldseparator,
+ since it didn't quite work right anyway. (restored aug 2)
+
+Jun 14, 1989:
+ added some missing ansi printf conversion letters: %i %X %E %G.
+ no sensible meaning for h or L, so they may not do what one expects.
+
+ made %* conversions work.
+
+ changed x^y so that if n is a positive integer, it's done
+ by explicit multiplication, thus achieving maximum accuracy.
+ (this should be done by pow() but it seems not to be locally.)
+ done to x ^= y as well.
+
+Jun 23, 1989:
+ add newline to usage message.
+
+Jul 10, 1989:
+ fixed ref-thru-zero bug in environment code in tran.c
+
+Jul 30, 1989:
+ added -v x=1 y=2 ... for immediate commandline variable assignment;
+ done before the BEGIN block for sure. they have to precede the
+ program if the program is on the commandline.
+ Modified Aug 2 to require a separate -v for each assignment.
+
+Aug 2, 1989:
+ restored -F (space) separator
+
+Aug 11, 1989:
+ fixed bug: commandline variable assignment has to look like
+ var=something. (consider the man page for =, in file =.1)
+
+ changed number of arguments to functions to static arrays
+ to avoid repeated malloc calls.
+
+Aug 24, 1989:
+ removed redundant relational tests against nullnode if parse
+ tree already had a relational at that point.
+
+Oct 11, 1989:
+ FILENAME is now defined in the BEGIN block -- too many old
+ programs broke.
+
+ "-" means stdin in getline as well as on the commandline.
+
+ added a bunch of casts to the code to tell the truth about
+ char * vs. unsigned char *, a right royal pain. added a
+ setlocale call to the front of main, though probably no one
+ has it usefully implemented yet.
+
+Oct 18, 1989:
+ another try to get the max number of open files set with
+ relatively machine-independent code.
+
+ small fix to input() in case of multiple reads after EOF.
+
+Jan 5, 1990:
+ fix potential problem in tran.c -- something was freed,
+ then used in freesymtab.
+
+Jan 18, 1990:
+ srand now returns previous seed value (0 to start).
+
+Feb 9, 1990:
+ fixed null pointer dereference bug in main.c: -F[nothing]. sigh.
+
+ restored srand behavior: it returns the current seed.
+
+May 6, 1990:
+ AVA fixed the grammar so that ! is uniformly of the same precedence as
+ unary + and -. This renders illegal some constructs like !x=y, which
+ now has to be parenthesized as !(x=y), and makes others work properly:
+ !x+y is (!x)+y, and x!y is x !y, not two pattern-action statements.
+ (These problems were pointed out by Bob Lenk of Posix.)
+
+ Added \x to regular expressions (already in strings).
+ Limited octal to octal digits; \8 and \9 are not octal.
+ Centralized the code for parsing escapes in regular expressions.
+ Added a bunch of tests to T.re and T.sub to verify some of this.
+
+Jun 26, 1990:
+ changed struct rrow (awk.h) to use long instead of int for lval,
+ since cfoll() stores a pointer in it. now works better when int's
+ are smaller than pointers!
+
+Aug 24, 1990:
+ changed NCHARS to 256 to handle 8-bit characters in strings
+ presented to match(), etc.
+
+Oct 8, 1990:
+ fixed horrible bug: types and values were not preserved in
+ some kinds of self-assignment. (in assign().)
+
+Oct 14, 1990:
+ fixed the bug on p. 198 in which it couldn't deduce that an
+ argument was an array in some contexts. replaced the error
+ message in intest() by code that damn well makes it an array.
+
+Oct 29, 1990:
+ fixed sleazy buggy code in lib.c that looked (incorrectly) for
+ too long input lines.
+
+Nov 2, 1990:
+ fixed sleazy test for integrality in getsval; use modf.
+
+Jan 11, 1991:
+ failed to set numeric state on $0 in cmd|getline context in run.c.
+
+Jan 28, 1991:
+ awk -f - reads the program from stdin.
+
+Feb 10, 1991:
+ check error status on all writes, to avoid banging on full disks.
+
+May 6, 1991:
+ fixed silly bug in hex parsing in hexstr().
+ removed an apparently unnecessary test in isnumber().
+ warn about weird printf conversions.
+ fixed unchecked array overwrite in relex().
+
+ changed for (i in array) to access elements in sorted order.
+ then unchanged it -- it really does run slower in too many cases.
+ left the code in place, commented out.
+
+May 13, 1991:
+ removed extra arg on gettemp, tempfree. minor error message rewording.
+
+Jun 2, 1991:
+ better defense against very long printf strings.
+ made break and continue illegal outside of loops.
+
+Jun 30, 1991:
+ better test for detecting too-long output record.
+
+Jul 21, 1991:
+ fixed so that in self-assignment like $1=$1, side effects
+ like recomputing $0 take place. (this is getting subtle.)
+
+Jul 27, 1991:
+ allow newline after ; in for statements.
+
+Aug 18, 1991:
+ enforce variable name syntax for commandline variables: has to
+ start with letter or _.
+
+Sep 24, 1991:
+ increased buffer in gsub. a very crude fix to a general problem.
+ and again on Sep 26.
+
+Nov 12, 1991:
+ cranked up some fixed-size arrays in b.c, and added a test for
+ overflow in penter. thanks to mark larsen.
+
+Nov 19, 1991:
+ use RAND_MAX instead of literal in builtin().
+
+Nov 30, 1991:
+ fixed storage leak in freefa, failing to recover [N]CCL.
+ thanks to Bill Jones (jones@skorpio.usask.ca)
+
+Dec 2, 1991:
+ die-casting time: converted to ansi C, installed that.
+
+Feb 20, 1992:
+ recompile after abortive changes; should be unchanged.
+
+Apr 12, 1992:
+ added explicit check for /dev/std(in,out,err) in redirection.
+ unlike gawk, no /dev/fd/n yet.
+
+ added (file/pipe) builtin. hard to test satisfactorily.
+ not posix.
+
+Apr 24, 1992:
+ remove redundant close of stdin when using -f -.
+
+ got rid of core dump with -d; awk -d just prints date.
+
+May 31, 1992:
+ added -mr N and -mf N options: more record and fields.
+ these really ought to adjust automatically.
+
+ cleaned up some error messages; "out of space" now means
+ malloc returned NULL in all cases.
+
+ changed rehash so that if it runs out, it just returns;
+ things will continue to run slow, but maybe a bit longer.
+
+Nov 28, 1992:
+ deleted yyunput and yyoutput from proto.h;
+ different versions of lex give these different declarations.
+
+Jul 23, 1993:
+ cosmetic changes: increased sizes of some arrays,
+ reworded some error messages.
+
+ added CONVFMT as in posix (just replaced OFMT in getsval)
+
+ FILENAME is now "" until the first thing that causes a file
+ to be opened.
+
+Feb 2, 1994:
+ changed error() to print line number as %d, not %g.
+
+Apr 22, 1994:
+ fixed yet another subtle self-assignment problem:
+ $1 = $2; $1 = $1 clobbered $1.
+
+ Regression tests now use private echo, to avoid quoting problems.
+
+May 11, 1994:
+ trivial fix to printf to limit string size in sub().
+
+Aug 24, 1994:
+ detect duplicate arguments in function definitions (mdm).
+
+Jul 17, 1995:
+ added dynamically growing strings to awk.lx.l and b.c
+ to permit regular expressions to be much bigger.
+ the state arrays can still overflow.
+
+Aug 15, 1995:
+ initialized Cells in setsymtab more carefully; some fields
+ were not set. (thanks to purify, all of whose complaints i
+ think i now understand.)
+
+ fixed at least one error in gsub that looked at -1-th element
+ of an array when substituting for a null match (e.g., $).
+
+ delete arrayname is now legal; it clears the elements but leaves
+ the array, which may not be the right behavior.
+
+ modified makefile: my current make can't cope with the test used
+ to avoid unnecessary yacc invocations.
+
+Apr 29, 1996:
+ replaced uchar by uschar everwhere; apparently some compilers
+ usurp this name and this causes conflicts.
+
+ fixed call to time in run.c (bltin); arg is time_t *.
+
+ replaced horrible pointer/long punning in b.c by a legitimate
+ union. should be safer on 64-bit machines and cleaner everywhere.
+ (thanks to nelson beebe for pointing out some of these problems.)
+
+ replaced nested comments by #if 0...#endif in run.c, lib.c.
+
+ removed getsval, setsval, execute macros from run.c and lib.c.
+ machines are 100x faster than they were when these macros were
+ first used.
+
+ revised filenames: awk.g.y => awkgram.y, awk.lx.l => awklex.l,
+ y.tab.[ch] => ytab.[ch], lex.yy.c => lexyy.c, all in the aid of
+ portability to nameless systems.
+
+ "make bundle" now includes yacc and lex output files for recipients
+ who don't have yacc or lex.
+
+May 2, 1996:
+ removed all register declarations.
+
+ enhanced split(), as in gawk, etc: split(s, a, "") splits s into
+ a[1]...a[length(s)] with each character a single element.
+
+ made the same changes for field-splitting if FS is "".
+
+ added nextfile, as in gawk: causes immediate advance to next
+ input file. (thanks to arnold robbins for inspiration and code).
+
+ small fixes to regexpr code: can now handle []], [[], and
+ variants; [] is now a syntax error, rather than matching
+ everything; [z-a] is now empty, not z. far from complete
+ or correct, however. (thanks to jeffrey friedl for pointing out
+ some awful behaviors.)
+
+May 26, 1996:
+ an attempt to rationalize the (unsigned) char issue. almost all
+ instances of unsigned char have been removed; the handful of places
+ in b.c where chars are used as table indices have been hand-crafted.
+ added some latin-1 tests to the regression, but i'm not confident;
+ none of my compilers seem to care much. thanks to nelson beebe for
+ pointing out some others that do care.
+
+May 27, 1996:
+ cleaned up some declarations so gcc -Wall is now almost silent.
+
+ makefile now includes backup copies of ytab.c and lexyy.c in case
+ one makes before looking; it also avoids recreating lexyy.c unless
+ really needed.
+
+ s/aprintf/awkprint, s/asprintf/awksprintf/ to avoid some name clashes
+ with unwisely-written header files.
+
+ thanks to jeffrey friedl for several of these.
+
diff --git a/usr.bin/awk/Makefile b/usr.bin/awk/Makefile
new file mode 100644
index 00000000000..936941b7d71
--- /dev/null
+++ b/usr.bin/awk/Makefile
@@ -0,0 +1,23 @@
+PROG= awk
+SRCS= awkgram.c awklex.c b.c main.c parse.c proctab.c tran.c lib.c run.c
+LDADD= -ll -lm
+DPADD= ${LIBL} ${LIBM}
+CLEANFILES+=awkgram.c awkgram.h awklex.c awklex.h proctab.c maketab prevawkgram.h
+CFLAGS+=-I. -I${.CURDIR}
+
+awkgram.c awkgram.h prevawkgram.h: awkgram.y
+ ${YACC} -d ${.IMPSRC}
+ mv y.tab.c awkgram.c
+ mv y.tab.h awkgram.h
+ cmp -s awkgram.h prevawkgram.h || cp awkgram.h prevawkgram.h
+
+awklex.c: awklex.l
+ lex ${.IMPSRC}
+ mv lex.yy.c awklex.c
+
+proctab.c: maketab
+ ./maketab >proctab.c
+
+maketab.o: prevawkgram.h
+
+.include <bsd.prog.mk>
diff --git a/usr.bin/awk/README b/usr.bin/awk/README
new file mode 100644
index 00000000000..ffb736b6b48
--- /dev/null
+++ b/usr.bin/awk/README
@@ -0,0 +1,69 @@
+/****************************************************************
+Copyright (C) AT&T and Lucent Technologies 1996
+All Rights Reserved
+
+Permission to use, copy, modify, and distribute this software and
+its documentation for any purpose and without fee is hereby
+granted, provided that the above copyright notice appear in all
+copies and that both that the copyright notice and this
+permission notice and warranty disclaimer appear in supporting
+documentation, and that the names of AT&T or Lucent Technologies
+or any of their entities not be used in advertising or publicity
+pertaining to distribution of the software without specific,
+written prior permission.
+
+AT&T AND LUCENT DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
+SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
+FITNESS. IN NO EVENT SHALL AT&T OR LUCENT OR ANY OF THEIR
+ENTITIES BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL
+DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
+DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
+OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE
+USE OR PERFORMANCE OF THIS SOFTWARE.
+****************************************************************/
+
+This is the version of awk described in "The AWK Programming Language",
+by A. V. Aho, B. W. Kernighan, and P. J. Weinberger
+(Addison-Wesley, 1988, ISBN 0-201-07981-X).
+Changes, mostly bug fixes and occasional enhancements, are listed
+in FIXES. If you distribute this code further, please please please
+distribute FIXES with it. If you find errors, please report them
+to bwk@bell-labs.com. Thanks.
+
+The program itself is created by
+ make
+which should produce a longish sequence of messages roughly like this:
+
+ yacc -d awkgram.y
+
+conflicts: 43 shift/reduce, 85 reduce/reduce
+ mv y.tab.c ytab.c
+ mv y.tab.h ytab.h
+ cc -O -c ytab.c
+ cmp -s ytab.h prevytab.h || (cp ytab.h prevytab.h; echo change maketab)
+change maketab
+ cc -O -c b.c
+ cc -O -c main.c
+ cc -O -c parse.c
+ cc -O maketab.c -o maketab
+ ./maketab >proctab.c
+ cc -O -c proctab.c
+ cc -O -c tran.c
+ cc -O -c lib.c
+ cc -O -c run.c
+ lex awklex.l
+ mv lex.yy.c lexyy.c
+ cc -O -c lexyy.c
+ cc -O ytab.o lexyy.o b.o main.o parse.o proctab.o tran.o lib.o run.o -lm
+
+This produces an executable a.out; you will eventually
+want to move this to some place like /usr/bin/awk.
+
+If your system is missing some of the components, e.g., yacc or lex,
+you should be able to compile the pieces manually. We have included
+yacc output in ytab.c and ytab.h, and lex output in lexyy.c.
+
+NOTE: This version uses ANSI C, as you should also.
+
+The version of malloc that comes with some systems (e.g., IRIX) is
+astonishly slow. If awk seems slow, you might try fixing that.
diff --git a/usr.bin/awk/awk.1 b/usr.bin/awk/awk.1
new file mode 100644
index 00000000000..0097beebd94
--- /dev/null
+++ b/usr.bin/awk/awk.1
@@ -0,0 +1,539 @@
+.de EX
+.nf
+.ft CW
+..
+.de EE
+.br
+.fi
+.ft 1
+..
+awk
+.TH AWK 1
+.CT 1 files prog_other
+.SH NAME
+awk \- pattern-directed scanning and processing language
+.SH SYNOPSIS
+.B awk
+[
+.BI \-F
+.I fs
+]
+[
+.BI \-v
+.I var=value
+]
+[
+.BI \-mr n
+]
+[
+.BI \-mf n
+]
+[
+.I 'prog'
+|
+.BI \-f
+.I progfile
+]
+[
+.I file ...
+]
+.SH DESCRIPTION
+.I Awk
+scans each input
+.I file
+for lines that match any of a set of patterns specified literally in
+.IR prog
+or in one or more files
+specified as
+.B \-f
+.IR progfile .
+With each pattern
+there can be an associated action that will be performed
+when a line of a
+.I file
+matches the pattern.
+Each line is matched against the
+pattern portion of every pattern-action statement;
+the associated action is performed for each matched pattern.
+The file name
+.B \-
+means the standard input.
+Any
+.IR file
+of the form
+.I var=value
+is treated as an assignment, not a filename,
+and is executed at the time it would have been opened if it were a filename.
+The option
+.B \-v
+followed by
+.I var=value
+is an assignment to be done before
+.I prog
+is executed;
+any number of
+.B \-v
+options may be present.
+The
+.B \-F
+.IR fs
+option defines the input field separator to be the regular expression
+.IR fs.
+.PP
+An input line is normally made up of fields separated by white space,
+or by regular expression
+.BR FS .
+The fields are denoted
+.BR $1 ,
+.BR $2 ,
+\&..., while
+.B $0
+refers to the entire line.
+If
+.BR FS
+is null, the input line is split into one field per character.
+.PP
+To compensate for inadequate implementation of storage management,
+the
+.B \-mr
+option can be used to set the maximum size of the input record,
+and the
+.B \-mf
+option to set the maximum number of fields.
+.PP
+A pattern-action statement has the form
+.IP
+.IB pattern " { " action " }
+.PP
+A missing
+.BI { " action " }
+means print the line;
+a missing pattern always matches.
+Pattern-action statements are separated by newlines or semicolons.
+.PP
+An action is a sequence of statements.
+A statement can be one of the following:
+.PP
+.EX
+.ta \w'\f(CWdelete array[expression]'u
+.RS
+.nf
+.ft CW
+if(\fI expression \fP)\fI statement \fP\fR[ \fPelse\fI statement \fP\fR]\fP
+while(\fI expression \fP)\fI statement\fP
+for(\fI expression \fP;\fI expression \fP;\fI expression \fP)\fI statement\fP
+for(\fI var \fPin\fI array \fP)\fI statement\fP
+do\fI statement \fPwhile(\fI expression \fP)
+break
+continue
+{\fR [\fP\fI statement ... \fP\fR] \fP}
+\fIexpression\fP #\fR commonly\fP\fI var = expression\fP
+print\fR [ \fP\fIexpression-list \fP\fR] \fP\fR[ \fP>\fI expression \fP\fR]\fP
+printf\fI format \fP\fR[ \fP,\fI expression-list \fP\fR] \fP\fR[ \fP>\fI expression \fP\fR]\fP
+return\fR [ \fP\fIexpression \fP\fR]\fP
+next #\fR skip remaining patterns on this input line\fP
+nextfile #\fR skip rest of this file, open next, start at top\fP
+delete\fI array\fP[\fI expression \fP] #\fR delete an array element\fP
+delete\fI array\fP #\fR delete all elements of array\fP
+exit\fR [ \fP\fIexpression \fP\fR]\fP #\fR exit immediately; status is \fP\fIexpression\fP
+.fi
+.RE
+.EE
+.DT
+.PP
+Statements are terminated by
+semicolons, newlines or right braces.
+An empty
+.I expression-list
+stands for
+.BR $0 .
+String constants are quoted \&\f(CW"\ "\fR,
+with the usual C escapes recognized within.
+Expressions take on string or numeric values as appropriate,
+and are built using the operators
+.B + \- * / % ^
+(exponentiation), and concatenation (indicated by white space).
+The operators
+.B
+! ++ \-\- += \-= *= /= %= ^= > >= < <= == != ?:
+are also available in expressions.
+Variables may be scalars, array elements
+(denoted
+.IB x [ i ] )
+or fields.
+Variables are initialized to the null string.
+Array subscripts may be any string,
+not necessarily numeric;
+this allows for a form of associative memory.
+Multiple subscripts such as
+.B [i,j,k]
+are permitted; the constituents are concatenated,
+separated by the value of
+.BR SUBSEP .
+.PP
+The
+.B print
+statement prints its arguments on the standard output
+(or on a file if
+.BI > file
+or
+.BI >> file
+is present or on a pipe if
+.BI | cmd
+is present), separated by the current output field separator,
+and terminated by the output record separator.
+.I file
+and
+.I cmd
+may be literal names or parenthesized expressions;
+identical string values in different statements denote
+the same open file.
+The
+.B printf
+statement formats its expression list according to the format
+(see
+.IR printf (3)) .
+The built-in function
+.BI close( expr )
+closes the file or pipe
+.IR expr .
+The built-in function
+.BI fflush( expr )
+flushes any buffered output for the file or pipe
+.IR expr .
+.PP
+The mathematical functions
+.BR exp ,
+.BR log ,
+.BR sqrt ,
+.BR sin ,
+.BR cos ,
+and
+.BR atan2
+are built in.
+Other built-in functions:
+.TF length
+.TP
+.B length
+the length of its argument
+taken as a string,
+or of
+.B $0
+if no argument.
+.TP
+.B rand
+random number on (0,1)
+.TP
+.B srand
+sets seed for
+.B rand
+and returns the previous seed.
+.TP
+.B int
+truncates to an integer value
+.TP
+.BI substr( s , " m" , " n\fB)
+the
+.IR n -character
+substring of
+.I s
+that begins at position
+.IR m
+counted from 1.
+.TP
+.BI index( s , " t" )
+the position in
+.I s
+where the string
+.I t
+occurs, or 0 if it does not.
+.TP
+.BI match( s , " r" )
+the position in
+.I s
+where the regular expression
+.I r
+occurs, or 0 if it does not.
+The variables
+.B RSTART
+and
+.B RLENGTH
+are set to the position and length of the matched string.
+.TP
+.BI split( s , " a" , " fs\fB)
+splits the string
+.I s
+into array elements
+.IB a [1] ,
+.IB a [2] ,
+\&...,
+.IB a [ n ] ,
+and returns
+.IR n .
+The separation is done with the regular expression
+.I fs
+or with the field separator
+.B FS
+if
+.I fs
+is not given.
+An empty string as field separator splits the string
+into one array element per character.
+.TP
+.BI sub( r , " t" , " s\fB)
+substitutes
+.I t
+for the first occurrence of the regular expression
+.I r
+in the string
+.IR s .
+If
+.I s
+is not given,
+.B $0
+is used.
+.TP
+.B gsub
+same as
+.B sub
+except that all occurrences of the regular expression
+are replaced;
+.B sub
+and
+.B gsub
+return the number of replacements.
+.TP
+.BI sprintf( fmt , " expr" , " ...\fB )
+the string resulting from formatting
+.I expr ...
+according to the
+.IR printf (3)
+format
+.I fmt
+.TP
+.BI system( cmd )
+executes
+.I cmd
+and returns its exit status
+.TP
+.BI tolower( str )
+returns a copy of
+.I str
+with all upper-case characters translated to their
+corresponding lower-case equivalents.
+.TP
+.BI toupper( str )
+returns a copy of
+.I str
+with all lower-case characters translated to their
+corresponding upper-case equivalents.
+.PD
+.PP
+The ``function''
+.B getline
+sets
+.B $0
+to the next input record from the current input file;
+.B getline
+.BI < file
+sets
+.B $0
+to the next record from
+.IR file .
+.B getline
+.I x
+sets variable
+.I x
+instead.
+Finally,
+.IB cmd " | getline
+pipes the output of
+.I cmd
+into
+.BR getline ;
+each call of
+.B getline
+returns the next line of output from
+.IR cmd .
+In all cases,
+.B getline
+returns 1 for a successful input,
+0 for end of file, and \-1 for an error.
+.PP
+Patterns are arbitrary Boolean combinations
+(with
+.BR "! || &&" )
+of regular expressions and
+relational expressions.
+Regular expressions are as in
+.IR egrep ;
+see
+.IR grep (1).
+Isolated regular expressions
+in a pattern apply to the entire line.
+Regular expressions may also occur in
+relational expressions, using the operators
+.BR ~
+and
+.BR !~ .
+.BI / re /
+is a constant regular expression;
+any string (constant or variable) may be used
+as a regular expression, except in the position of an isolated regular expression
+in a pattern.
+.PP
+A pattern may consist of two patterns separated by a comma;
+in this case, the action is performed for all lines
+from an occurrence of the first pattern
+though an occurrence of the second.
+.PP
+A relational expression is one of the following:
+.IP
+.I expression matchop regular-expression
+.br
+.I expression relop expression
+.br
+.IB expression " in " array-name
+.br
+.BI ( expr , expr,... ") in " array-name
+.PP
+where a relop is any of the six relational operators in C,
+and a matchop is either
+.B ~
+(matches)
+or
+.B !~
+(does not match).
+A conditional is an arithmetic expression,
+a relational expression,
+or a Boolean combination
+of these.
+.PP
+The special patterns
+.B BEGIN
+and
+.B END
+may be used to capture control before the first input line is read
+and after the last.
+.B BEGIN
+and
+.B END
+do not combine with other patterns.
+.PP
+Variable names with special meanings:
+.TF FILENAME
+.TP
+.B CONVFMT
+conversion format used when converting numbers
+.BR "%.6g" )
+.TP
+.B FS
+regular expression used to separate fields; also settable
+by option
+.BI \-F fs.
+.TP
+.BR NF
+number of fields in the current record
+.TP
+.B NR
+ordinal number of the current record
+.TP
+.B FNR
+ordinal number of the current record in the current file
+.TP
+.B FILENAME
+the name of the current input file
+.TP
+.B RS
+input record separator (default newline)
+.TP
+.B OFS
+output field separator (default blank)
+.TP
+.B ORS
+output record separator (default newline)
+.TP
+.B OFMT
+output format for numbers (default
+.BR "%.6g" )
+.TP
+.B SUBSEP
+separates multiple subscripts (default 034)
+.TP
+.B ARGC
+argument count, assignable
+.TP
+.B ARGV
+argument array, assignable;
+non-null members are taken as filenames
+.TP
+.B ENVIRON
+array of environment variables; subscripts are names.
+.PD
+.PP
+Functions may be defined (at the position of a pattern-action statement) thus:
+.IP
+.B
+function foo(a, b, c) { ...; return x }
+.PP
+Parameters are passed by value if scalar and by reference if array name;
+functions may be called recursively.
+Parameters are local to the function; all other variables are global.
+Thus local variables may be created by providing excess parameters in
+the function definition.
+.SH EXAMPLES
+.TP
+.B
+length($0) > 72
+Print lines longer than 72 characters.
+.TP
+.B
+{ print $2, $1 }
+Print first two fields in opposite order.
+.PP
+.EX
+BEGIN { FS = ",[ \et]*|[ \et]+" }
+ { print $2, $1 }
+.EE
+.ns
+.IP
+Same, with input fields separated by comma and/or blanks and tabs.
+.PP
+.EX
+.nf
+ { s += $1 }
+END { print "sum is", s, " average is", s/NR }
+.fi
+.EE
+.ns
+.IP
+Add up first column, print sum and average.
+.TP
+.B
+/start/, /stop/
+Print all lines between start/stop pairs.
+.PP
+.EX
+.nf
+BEGIN { # Simulate echo(1)
+ for (i = 1; i < ARGC; i++) printf "%s ", ARGV[i]
+ printf "\en"
+ exit }
+.fi
+.EE
+.SH SEE ALSO
+.IR lex (1),
+.IR sed (1)
+.br
+A. V. Aho, B. W. Kernighan, P. J. Weinberger,
+.I
+The AWK Programming Language,
+Addison-Wesley, 1988. ISBN 0-201-07981-X
+.SH BUGS
+There are no explicit conversions between numbers and strings.
+To force an expression to be treated as a number add 0 to it;
+to force it to be treated as a string concatenate
+\&\f(CW""\fP to it.
+.br
+The scope rules for variables in functions are a botch;
+the syntax is worse.
diff --git a/usr.bin/awk/awk.h b/usr.bin/awk/awk.h
new file mode 100644
index 00000000000..731f6f13cab
--- /dev/null
+++ b/usr.bin/awk/awk.h
@@ -0,0 +1,247 @@
+/****************************************************************
+Copyright (C) AT&T and Lucent Technologies 1996
+All Rights Reserved
+
+Permission to use, copy, modify, and distribute this software and
+its documentation for any purpose and without fee is hereby
+granted, provided that the above copyright notice appear in all
+copies and that both that the copyright notice and this
+permission notice and warranty disclaimer appear in supporting
+documentation, and that the names of AT&T or Lucent Technologies
+or any of their entities not be used in advertising or publicity
+pertaining to distribution of the software without specific,
+written prior permission.
+
+AT&T AND LUCENT DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
+SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
+FITNESS. IN NO EVENT SHALL AT&T OR LUCENT OR ANY OF THEIR
+ENTITIES BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL
+DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
+DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
+OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE
+USE OR PERFORMANCE OF THIS SOFTWARE.
+****************************************************************/
+
+typedef double Awkfloat;
+
+/* unsigned char is more trouble than it's worth */
+
+typedef unsigned char uschar;
+
+#define xfree(a) { if ((a) != NULL) { free((char *) a); a = NULL; } }
+
+#define DEBUG
+#ifdef DEBUG
+ /* uses have to be doubly parenthesized */
+# define dprintf(x) if (dbg) printf x
+#else
+# define dprintf(x)
+#endif
+
+extern char errbuf[200];
+#define ERROR sprintf(errbuf,
+#define FATAL ), error(1, errbuf)
+#define WARNING ), error(0, errbuf)
+#define SYNTAX ), yyerror(errbuf)
+
+extern int compile_time; /* 1 if compiling, 0 if running */
+
+#define RECSIZE (3 * 1024) /* sets limit on records, fields, etc., etc. */
+extern int recsize; /* variable version */
+
+extern char **FS;
+extern char **RS;
+extern char **ORS;
+extern char **OFS;
+extern char **OFMT;
+extern Awkfloat *NR;
+extern Awkfloat *FNR;
+extern Awkfloat *NF;
+extern char **FILENAME;
+extern char **SUBSEP;
+extern Awkfloat *RSTART;
+extern Awkfloat *RLENGTH;
+
+extern char *record; /* points to $0 */
+extern int lineno; /* line number in awk program */
+extern int errorflag; /* 1 if error has occurred */
+extern int donefld; /* 1 if record broken into fields */
+extern int donerec; /* 1 if record is valid (no fld has changed */
+
+extern int dbg;
+
+typedef struct {
+ char *cbuf;
+ int clen;
+ int cmax;
+} Gstring; /* a string that grows */
+
+extern Gstring *newGstring(void); /* constructor */
+extern void delGstring(Gstring *); /* destructor */
+extern char *cadd(Gstring *gs, int c); /* function to grow with */
+extern void caddreset(Gstring *gs); /* set cbuf empty */
+extern void cunadd(Gstring *gs); /* back up one char in cbuf */
+
+extern Gstring *gs; /* used by lex */
+
+extern char *patbeg; /* beginning of pattern matched */
+extern int patlen; /* length of pattern matched. set in b.c */
+
+/* Cell: all information about a variable or constant */
+
+typedef struct Cell {
+ uschar ctype; /* OCELL, OBOOL, OJUMP, etc. */
+ uschar csub; /* CCON, CTEMP, CFLD, etc. */
+ char *nval; /* name, for variables only */
+ char *sval; /* string value */
+ Awkfloat fval; /* value as number */
+ unsigned tval; /* type info: STR|NUM|ARR|FCN|FLD|CON|DONTFREE */
+ struct Cell *cnext; /* ptr to next if chained */
+} Cell;
+
+typedef struct { /* symbol table array */
+ int nelem; /* elements in table right now */
+ int size; /* size of tab */
+ Cell **tab; /* hash table pointers */
+} Array;
+
+#define NSYMTAB 50 /* initial size of a symbol table */
+extern Array *symtab;
+
+extern Cell *recloc; /* location of input record */
+extern Cell *nrloc; /* NR */
+extern Cell *fnrloc; /* FNR */
+extern Cell *nfloc; /* NF */
+extern Cell *rstartloc; /* RSTART */
+extern Cell *rlengthloc; /* RLENGTH */
+
+/* Cell.tval values: */
+#define NUM 01 /* number value is valid */
+#define STR 02 /* string value is valid */
+#define DONTFREE 04 /* string space is not freeable */
+#define CON 010 /* this is a constant */
+#define ARR 020 /* this is an array */
+#define FCN 040 /* this is a function name */
+#define FLD 0100 /* this is a field $1, $2, ... */
+#define REC 0200 /* this is $0 */
+
+
+/* function types */
+#define FLENGTH 1
+#define FSQRT 2
+#define FEXP 3
+#define FLOG 4
+#define FINT 5
+#define FSYSTEM 6
+#define FRAND 7
+#define FSRAND 8
+#define FSIN 9
+#define FCOS 10
+#define FATAN 11
+#define FTOUPPER 12
+#define FTOLOWER 13
+#define FFLUSH 14
+
+/* Node: parse tree is made of nodes, with Cell's at bottom */
+
+typedef struct Node {
+ int ntype;
+ struct Node *nnext;
+ int lineno;
+ int nobj;
+ struct Node *narg[1]; /* variable: actual size set by calling malloc */
+} Node;
+
+#define NIL ((Node *) 0)
+
+extern Node *winner;
+extern Node *nullstat;
+extern Node *nullnode;
+
+/* ctypes */
+#define OCELL 1
+#define OBOOL 2
+#define OJUMP 3
+
+/* Cell subtypes: csub */
+#define CFREE 7
+#define CCOPY 6
+#define CCON 5
+#define CTEMP 4
+#define CNAME 3
+#define CVAR 2
+#define CFLD 1
+#define CUNK 0
+
+/* bool subtypes */
+#define BTRUE 11
+#define BFALSE 12
+
+/* jump subtypes */
+#define JEXIT 21
+#define JNEXT 22
+#define JBREAK 23
+#define JCONT 24
+#define JRET 25
+#define JNEXTFILE 26
+
+/* node types */
+#define NVALUE 1
+#define NSTAT 2
+#define NEXPR 3
+#define NFIELD 4
+
+
+extern int pairstack[], paircnt;
+
+#define notlegal(n) (n <= FIRSTTOKEN || n >= LASTTOKEN || proctab[n-FIRSTTOKEN] == nullproc)
+#define isvalue(n) ((n)->ntype == NVALUE)
+#define isexpr(n) ((n)->ntype == NEXPR)
+#define isjump(n) ((n)->ctype == OJUMP)
+#define isexit(n) ((n)->csub == JEXIT)
+#define isbreak(n) ((n)->csub == JBREAK)
+#define iscont(n) ((n)->csub == JCONT)
+#define isnext(n) ((n)->csub == JNEXT)
+#define isnextfile(n) ((n)->csub == JNEXTFILE)
+#define isret(n) ((n)->csub == JRET)
+#define isstr(n) ((n)->tval & STR)
+#define isnum(n) ((n)->tval & NUM)
+#define isarr(n) ((n)->tval & ARR)
+#define isfunc(n) ((n)->tval & FCN)
+#define istrue(n) ((n)->csub == BTRUE)
+#define istemp(n) ((n)->csub == CTEMP)
+#define isargument(n) ((n)->nobj == ARG)
+#define freeable(p) (!((p)->tval & DONTFREE))
+
+/* structures used by regular expression matching machinery, mostly b.c: */
+
+#define NCHARS (256+1) /* 256 handles 8-bit chars; 128 does 7-bit */
+ /* watch out in match(), etc. */
+#define NSTATES 32
+
+typedef struct rrow {
+ int ltype;
+ union {
+ int i;
+ Node *np;
+ char *up;
+ } lval; /* because Al stores a pointer in it! */
+ int *lfollow;
+} rrow;
+
+typedef struct fa {
+ char *restr;
+ int anchor;
+ int use;
+ uschar gototab[NSTATES][NCHARS];
+ int *posns[NSTATES];
+ uschar out[NSTATES];
+ int initstat;
+ int curstat;
+ int accept;
+ int reset;
+ struct rrow re[1];
+} fa;
+
+
+#include "proto.h"
diff --git a/usr.bin/awk/awkgram.y b/usr.bin/awk/awkgram.y
new file mode 100644
index 00000000000..e1cb9f6e6fe
--- /dev/null
+++ b/usr.bin/awk/awkgram.y
@@ -0,0 +1,476 @@
+/****************************************************************
+Copyright (C) AT&T and Lucent Technologies 1996
+All Rights Reserved
+
+Permission to use, copy, modify, and distribute this software and
+its documentation for any purpose and without fee is hereby
+granted, provided that the above copyright notice appear in all
+copies and that both that the copyright notice and this
+permission notice and warranty disclaimer appear in supporting
+documentation, and that the names of AT&T or Lucent Technologies
+or any of their entities not be used in advertising or publicity
+pertaining to distribution of the software without specific,
+written prior permission.
+
+AT&T AND LUCENT DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
+SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
+FITNESS. IN NO EVENT SHALL AT&T OR LUCENT OR ANY OF THEIR
+ENTITIES BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL
+DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
+DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
+OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE
+USE OR PERFORMANCE OF THIS SOFTWARE.
+****************************************************************/
+
+%{
+#include <stdio.h>
+#include <string.h>
+#include "awk.h"
+
+void checkdup(Node *list, Cell *item);
+int yywrap(void) { return(1); }
+
+Node *beginloc = 0;
+Node *endloc = 0;
+int infunc = 0; /* = 1 if in arglist or body of func */
+int inloop = 0; /* = 1 if in while, for, do */
+char *curfname = 0; /* current function name */
+Node *arglist = 0; /* list of args for current function */
+%}
+
+%union {
+ Node *p;
+ Cell *cp;
+ int i;
+ char *s;
+}
+
+%token <i> FIRSTTOKEN /* must be first */
+%token <p> PROGRAM PASTAT PASTAT2 XBEGIN XEND
+%token <i> NL ',' '{' '(' '|' ';' '/' ')' '}' '[' ']'
+%token <i> ARRAY
+%token <i> MATCH NOTMATCH MATCHOP
+%token <i> FINAL DOT ALL CCL NCCL CHAR OR STAR QUEST PLUS
+%token <i> AND BOR APPEND EQ GE GT LE LT NE IN
+%token <i> ARG BLTIN BREAK CLOSE CONTINUE DELETE DO EXIT FOR FUNC
+%token <i> SUB GSUB IF INDEX LSUBSTR MATCHFCN NEXT NEXTFILE
+%token <i> ADD MINUS MULT DIVIDE MOD
+%token <i> ASSIGN ASGNOP ADDEQ SUBEQ MULTEQ DIVEQ MODEQ POWEQ
+%token <i> PRINT PRINTF SPRINTF
+%token <p> ELSE INTEST CONDEXPR
+%token <i> POSTINCR PREINCR POSTDECR PREDECR
+%token <cp> VAR IVAR VARNF CALL NUMBER STRING FIELD
+%token <s> REGEXPR
+
+%type <p> pas pattern ppattern plist pplist patlist prarg term re
+%type <p> pa_pat pa_stat pa_stats
+%type <s> reg_expr
+%type <p> simple_stmt opt_simple_stmt stmt stmtlist
+%type <p> var varname funcname varlist
+%type <p> for if else while
+%type <i> do st
+%type <i> pst opt_pst lbrace rbrace rparen comma nl opt_nl and bor
+%type <i> subop print
+
+%right ASGNOP
+%right '?'
+%right ':'
+%left BOR
+%left AND
+%left GETLINE
+%nonassoc APPEND EQ GE GT LE LT NE MATCHOP IN '|'
+%left ARG BLTIN BREAK CALL CLOSE CONTINUE DELETE DO EXIT FOR FIELD FUNC
+%left GSUB IF INDEX LSUBSTR MATCHFCN NEXT NUMBER
+%left PRINT PRINTF RETURN SPLIT SPRINTF STRING SUB SUBSTR
+%left REGEXPR VAR VARNF IVAR WHILE '('
+%left CAT
+%left '+' '-'
+%left '*' '/' '%'
+%left NOT UMINUS
+%right POWER
+%right DECR INCR
+%left INDIRECT
+%token LASTTOKEN /* must be last */
+
+%%
+
+program:
+ pas { if (errorflag==0)
+ winner = (Node *)stat3(PROGRAM, beginloc, $1, endloc); }
+ | error { yyclearin; bracecheck(); ERROR "bailing out" SYNTAX; }
+ ;
+
+and:
+ AND | and NL
+ ;
+
+bor:
+ BOR | bor NL
+ ;
+
+comma:
+ ',' | comma NL
+ ;
+
+do:
+ DO | do NL
+ ;
+
+else:
+ ELSE | else NL
+ ;
+
+for:
+ FOR '(' opt_simple_stmt ';' opt_nl pattern ';' opt_nl opt_simple_stmt rparen {inloop++;} stmt
+ { --inloop; $$ = stat4(FOR, $3, notnull($6), $9, $12); }
+ | FOR '(' opt_simple_stmt ';' ';' opt_nl opt_simple_stmt rparen {inloop++;} stmt
+ { --inloop; $$ = stat4(FOR, $3, NIL, $7, $10); }
+ | FOR '(' varname IN varname rparen {inloop++;} stmt
+ { --inloop; $$ = stat3(IN, $3, makearr($5), $8); }
+ ;
+
+funcname:
+ VAR { setfname($1); }
+ | CALL { setfname($1); }
+ ;
+
+if:
+ IF '(' pattern rparen { $$ = notnull($3); }
+ ;
+
+lbrace:
+ '{' | lbrace NL
+ ;
+
+nl:
+ NL | nl NL
+ ;
+
+opt_nl:
+ /* empty */ { $$ = 0; }
+ | nl
+ ;
+
+opt_pst:
+ /* empty */ { $$ = 0; }
+ | pst
+ ;
+
+
+opt_simple_stmt:
+ /* empty */ { $$ = 0; }
+ | simple_stmt
+ ;
+
+pas:
+ opt_pst { $$ = 0; }
+ | opt_pst pa_stats opt_pst { $$ = $2; }
+ ;
+
+pa_pat:
+ pattern { $$ = notnull($1); }
+ ;
+
+pa_stat:
+ pa_pat { $$ = stat2(PASTAT, $1, stat2(PRINT, rectonode(), NIL)); }
+ | pa_pat lbrace stmtlist '}' { $$ = stat2(PASTAT, $1, $3); }
+ | pa_pat ',' pa_pat { $$ = pa2stat($1, $3, stat2(PRINT, rectonode(), NIL)); }
+ | pa_pat ',' pa_pat lbrace stmtlist '}' { $$ = pa2stat($1, $3, $5); }
+ | lbrace stmtlist '}' { $$ = stat2(PASTAT, NIL, $2); }
+ | XBEGIN lbrace stmtlist '}'
+ { beginloc = linkum(beginloc, $3); $$ = 0; }
+ | XEND lbrace stmtlist '}'
+ { endloc = linkum(endloc, $3); $$ = 0; }
+ | FUNC funcname '(' varlist rparen {infunc++;} lbrace stmtlist '}'
+ { infunc--; curfname=0; defn((Cell *)$2, $4, $8); $$ = 0; }
+ ;
+
+pa_stats:
+ pa_stat
+ | pa_stats opt_pst pa_stat { $$ = linkum($1, $3); }
+ ;
+
+patlist:
+ pattern
+ | patlist comma pattern { $$ = linkum($1, $3); }
+ ;
+
+ppattern:
+ var ASGNOP ppattern { $$ = op2($2, $1, $3); }
+ | ppattern '?' ppattern ':' ppattern %prec '?'
+ { $$ = op3(CONDEXPR, notnull($1), $3, $5); }
+ | ppattern bor ppattern %prec BOR
+ { $$ = op2(BOR, notnull($1), notnull($3)); }
+ | ppattern and ppattern %prec AND
+ { $$ = op2(AND, notnull($1), notnull($3)); }
+ | ppattern MATCHOP reg_expr { $$ = op3($2, NIL, $1, (Node*)makedfa($3, 0)); }
+ | ppattern MATCHOP ppattern
+ { if (constnode($3))
+ $$ = op3($2, NIL, $1, (Node*)makedfa(strnode($3), 0));
+ else
+ $$ = op3($2, (Node *)1, $1, $3); }
+ | ppattern IN varname { $$ = op2(INTEST, $1, makearr($3)); }
+ | '(' plist ')' IN varname { $$ = op2(INTEST, $2, makearr($5)); }
+ | ppattern term %prec CAT { $$ = op2(CAT, $1, $2); }
+ | re
+ | term
+ ;
+
+pattern:
+ var ASGNOP pattern { $$ = op2($2, $1, $3); }
+ | pattern '?' pattern ':' pattern %prec '?'
+ { $$ = op3(CONDEXPR, notnull($1), $3, $5); }
+ | pattern bor pattern %prec BOR
+ { $$ = op2(BOR, notnull($1), notnull($3)); }
+ | pattern and pattern %prec AND
+ { $$ = op2(AND, notnull($1), notnull($3)); }
+ | pattern EQ pattern { $$ = op2($2, $1, $3); }
+ | pattern GE pattern { $$ = op2($2, $1, $3); }
+ | pattern GT pattern { $$ = op2($2, $1, $3); }
+ | pattern LE pattern { $$ = op2($2, $1, $3); }
+ | pattern LT pattern { $$ = op2($2, $1, $3); }
+ | pattern NE pattern { $$ = op2($2, $1, $3); }
+ | pattern MATCHOP reg_expr { $$ = op3($2, NIL, $1, (Node*)makedfa($3, 0)); }
+ | pattern MATCHOP pattern
+ { if (constnode($3))
+ $$ = op3($2, NIL, $1, (Node*)makedfa(strnode($3), 0));
+ else
+ $$ = op3($2, (Node *)1, $1, $3); }
+ | pattern IN varname { $$ = op2(INTEST, $1, makearr($3)); }
+ | '(' plist ')' IN varname { $$ = op2(INTEST, $2, makearr($5)); }
+ | pattern '|' GETLINE var { $$ = op3(GETLINE, $4, (Node*)$2, $1); }
+ | pattern '|' GETLINE { $$ = op3(GETLINE, (Node*)0, (Node*)$2, $1); }
+ | pattern term %prec CAT { $$ = op2(CAT, $1, $2); }
+ | re
+ | term
+ ;
+
+plist:
+ pattern comma pattern { $$ = linkum($1, $3); }
+ | plist comma pattern { $$ = linkum($1, $3); }
+ ;
+
+pplist:
+ ppattern
+ | pplist comma ppattern { $$ = linkum($1, $3); }
+ ;
+
+prarg:
+ /* empty */ { $$ = rectonode(); }
+ | pplist
+ | '(' plist ')' { $$ = $2; }
+ ;
+
+print:
+ PRINT | PRINTF
+ ;
+
+pst:
+ NL | ';' | pst NL | pst ';'
+ ;
+
+rbrace:
+ '}' | rbrace NL
+ ;
+
+re:
+ reg_expr
+ { $$ = op3(MATCH, NIL, rectonode(), (Node*)makedfa($1, 0)); }
+ | NOT re { $$ = op1(NOT, notnull($2)); }
+ ;
+
+reg_expr:
+ '/' {startreg();} REGEXPR '/' { $$ = $3; }
+ ;
+
+rparen:
+ ')' | rparen NL
+ ;
+
+simple_stmt:
+ print prarg '|' term { $$ = stat3($1, $2, (Node *) $3, $4); }
+ | print prarg APPEND term { $$ = stat3($1, $2, (Node *) $3, $4); }
+ | print prarg GT term { $$ = stat3($1, $2, (Node *) $3, $4); }
+ | print prarg { $$ = stat3($1, $2, NIL, NIL); }
+ | DELETE varname '[' patlist ']' { $$ = stat2(DELETE, makearr($2), $4); }
+ | DELETE varname { $$ = stat2(DELETE, makearr($2), 0); }
+ | pattern { $$ = exptostat($1); }
+ | error { yyclearin; ERROR "illegal statement" SYNTAX; }
+ ;
+
+st:
+ nl
+ | ';' opt_nl
+ ;
+
+stmt:
+ BREAK st { if (!inloop) ERROR "break illegal outside of loops" SYNTAX;
+ $$ = stat1(BREAK, NIL); }
+ | CLOSE pattern st { $$ = stat1(CLOSE, $2); }
+ | CONTINUE st { if (!inloop) ERROR "continue illegal outside of loops" SYNTAX;
+ $$ = stat1(CONTINUE, NIL); }
+ | do {inloop++;} stmt {--inloop;} WHILE '(' pattern ')' st
+ { $$ = stat2(DO, $3, notnull($7)); }
+ | EXIT pattern st { $$ = stat1(EXIT, $2); }
+ | EXIT st { $$ = stat1(EXIT, NIL); }
+ | for
+ | if stmt else stmt { $$ = stat3(IF, $1, $2, $4); }
+ | if stmt { $$ = stat3(IF, $1, $2, NIL); }
+ | lbrace stmtlist rbrace { $$ = $2; }
+ | NEXT st { if (infunc)
+ ERROR "next is illegal inside a function" SYNTAX;
+ $$ = stat1(NEXT, NIL); }
+ | NEXTFILE st { if (infunc)
+ ERROR "nextfile is illegal inside a function" SYNTAX;
+ $$ = stat1(NEXTFILE, NIL); }
+ | RETURN pattern st { $$ = stat1(RETURN, $2); }
+ | RETURN st { $$ = stat1(RETURN, NIL); }
+ | simple_stmt st
+ | while {inloop++;} stmt { --inloop; $$ = stat2(WHILE, $1, $3); }
+ | ';' opt_nl { $$ = 0; }
+ ;
+
+stmtlist:
+ stmt
+ | stmtlist stmt { $$ = linkum($1, $2); }
+ ;
+
+subop:
+ SUB | GSUB
+ ;
+
+term:
+ term '+' term { $$ = op2(ADD, $1, $3); }
+ | term '-' term { $$ = op2(MINUS, $1, $3); }
+ | term '*' term { $$ = op2(MULT, $1, $3); }
+ | term '/' term { $$ = op2(DIVIDE, $1, $3); }
+ | term '%' term { $$ = op2(MOD, $1, $3); }
+ | term POWER term { $$ = op2(POWER, $1, $3); }
+ | '-' term %prec UMINUS { $$ = op1(UMINUS, $2); }
+ | '+' term %prec UMINUS { $$ = $2; }
+ | NOT term %prec UMINUS { $$ = op1(NOT, notnull($2)); }
+ | BLTIN '(' ')' { $$ = op2(BLTIN, (Node *) $1, rectonode()); }
+ | BLTIN '(' patlist ')' { $$ = op2(BLTIN, (Node *) $1, $3); }
+ | BLTIN { $$ = op2(BLTIN, (Node *) $1, rectonode()); }
+ | CALL '(' ')' { $$ = op2(CALL, valtonode($1,CVAR), NIL); }
+ | CALL '(' patlist ')' { $$ = op2(CALL, valtonode($1,CVAR), $3); }
+ | DECR var { $$ = op1(PREDECR, $2); }
+ | INCR var { $$ = op1(PREINCR, $2); }
+ | var DECR { $$ = op1(POSTDECR, $1); }
+ | var INCR { $$ = op1(POSTINCR, $1); }
+ | GETLINE var LT term { $$ = op3(GETLINE, $2, (Node *)$3, $4); }
+ | GETLINE LT term { $$ = op3(GETLINE, NIL, (Node *)$2, $3); }
+ | GETLINE var { $$ = op3(GETLINE, $2, NIL, NIL); }
+ | GETLINE { $$ = op3(GETLINE, NIL, NIL, NIL); }
+ | INDEX '(' pattern comma pattern ')'
+ { $$ = op2(INDEX, $3, $5); }
+ | INDEX '(' pattern comma reg_expr ')'
+ { ERROR "index() doesn't permit regular expressions" SYNTAX;
+ $$ = op2(INDEX, $3, (Node*)$5); }
+ | '(' pattern ')' { $$ = $2; }
+ | MATCHFCN '(' pattern comma reg_expr ')'
+ { $$ = op3(MATCHFCN, NIL, $3, (Node*)makedfa($5, 1)); }
+ | MATCHFCN '(' pattern comma pattern ')'
+ { if (constnode($5))
+ $$ = op3(MATCHFCN, NIL, $3, (Node*)makedfa(strnode($5), 1));
+ else
+ $$ = op3(MATCHFCN, (Node *)1, $3, $5); }
+ | NUMBER { $$ = valtonode($1, CCON); }
+ | SPLIT '(' pattern comma varname comma pattern ')' /* string */
+ { $$ = op4(SPLIT, $3, makearr($5), $7, (Node*)STRING); }
+ | SPLIT '(' pattern comma varname comma reg_expr ')' /* const /regexp/ */
+ { $$ = op4(SPLIT, $3, makearr($5), (Node*)makedfa($7, 1), (Node *)REGEXPR); }
+ | SPLIT '(' pattern comma varname ')'
+ { $$ = op4(SPLIT, $3, makearr($5), NIL, (Node*)STRING); } /* default */
+ | SPRINTF '(' patlist ')' { $$ = op1($1, $3); }
+ | STRING { $$ = valtonode($1, CCON); }
+ | subop '(' reg_expr comma pattern ')'
+ { $$ = op4($1, NIL, (Node*)makedfa($3, 1), $5, rectonode()); }
+ | subop '(' pattern comma pattern ')'
+ { if (constnode($3))
+ $$ = op4($1, NIL, (Node*)makedfa(strnode($3), 1), $5, rectonode());
+ else
+ $$ = op4($1, (Node *)1, $3, $5, rectonode()); }
+ | subop '(' reg_expr comma pattern comma var ')'
+ { $$ = op4($1, NIL, (Node*)makedfa($3, 1), $5, $7); }
+ | subop '(' pattern comma pattern comma var ')'
+ { if (constnode($3))
+ $$ = op4($1, NIL, (Node*)makedfa(strnode($3), 1), $5, $7);
+ else
+ $$ = op4($1, (Node *)1, $3, $5, $7); }
+ | SUBSTR '(' pattern comma pattern comma pattern ')'
+ { $$ = op3(SUBSTR, $3, $5, $7); }
+ | SUBSTR '(' pattern comma pattern ')'
+ { $$ = op3(SUBSTR, $3, $5, NIL); }
+ | var
+ ;
+
+var:
+ varname
+ | varname '[' patlist ']' { $$ = op2(ARRAY, makearr($1), $3); }
+ | FIELD { $$ = valtonode($1, CFLD); }
+ | IVAR { $$ = op1(INDIRECT, valtonode($1, CVAR)); }
+ | INDIRECT term { $$ = op1(INDIRECT, $2); }
+ ;
+
+varlist:
+ /* nothing */ { arglist = $$ = 0; }
+ | VAR { arglist = $$ = valtonode($1,CVAR); }
+ | varlist comma VAR {
+ checkdup($1, $3);
+ arglist = $$ = linkum($1,valtonode($3,CVAR)); }
+ ;
+
+varname:
+ VAR { $$ = valtonode($1, CVAR); }
+ | ARG { $$ = op1(ARG, (Node *) $1); }
+ | VARNF { $$ = op1(VARNF, (Node *) $1); }
+ ;
+
+
+while:
+ WHILE '(' pattern rparen { $$ = notnull($3); }
+ ;
+
+%%
+
+void setfname(Cell *p)
+{
+ if (isarr(p))
+ ERROR "%s is an array, not a function", p->nval SYNTAX;
+ else if (isfunc(p))
+ ERROR "you can't define function %s more than once", p->nval SYNTAX;
+ curfname = p->nval;
+}
+
+int constnode(Node *p)
+{
+ return isvalue(p) && ((Cell *) (p->narg[0]))->csub == CCON;
+}
+
+char *strnode(Node *p)
+{
+ return ((Cell *)(p->narg[0]))->sval;
+}
+
+Node *notnull(Node *n)
+{
+ switch (n->nobj) {
+ case LE: case LT: case EQ: case NE: case GT: case GE:
+ case BOR: case AND: case NOT:
+ return n;
+ default:
+ return op2(NE, n, nullnode);
+ }
+}
+
+void checkdup(Node *vl, Cell *cp) /* check if name already in list */
+{
+ char *s = cp->nval;
+ for ( ; vl; vl = vl->nnext) {
+ if (strcmp(s, ((Cell *)(vl->narg[0]))->nval) == 0) {
+ ERROR "duplicate argument %s", s SYNTAX;
+ break;
+ }
+ }
+}
diff --git a/usr.bin/awk/awklex.l b/usr.bin/awk/awklex.l
new file mode 100644
index 00000000000..d7ea2357e08
--- /dev/null
+++ b/usr.bin/awk/awklex.l
@@ -0,0 +1,405 @@
+%Start A strng sc reg comment
+
+%{
+/****************************************************************
+Copyright (C) AT&T and Lucent Technologies 1996
+All Rights Reserved
+
+Permission to use, copy, modify, and distribute this software and
+its documentation for any purpose and without fee is hereby
+granted, provided that the above copyright notice appear in all
+copies and that both that the copyright notice and this
+permission notice and warranty disclaimer appear in supporting
+documentation, and that the names of AT&T or Lucent Technologies
+or any of their entities not be used in advertising or publicity
+pertaining to distribution of the software without specific,
+written prior permission.
+
+AT&T AND LUCENT DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
+SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
+FITNESS. IN NO EVENT SHALL AT&T OR LUCENT OR ANY OF THEIR
+ENTITIES BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL
+DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
+DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
+OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE
+USE OR PERFORMANCE OF THIS SOFTWARE.
+****************************************************************/
+
+/* some of this depends on behavior of lex that
+ may not be preserved in other implementations of lex.
+*/
+
+#ifndef FLEX_SCANNER
+#undef input /* defeat lex */
+#undef unput
+#endif /* !FLEX_SCANNER */
+
+#include <stdlib.h>
+#include <string.h>
+#include "awk.h"
+#include "awkgram.h"
+
+extern YYSTYPE yylval;
+extern int infunc;
+
+int lineno = 1;
+int bracecnt = 0;
+int brackcnt = 0;
+int parencnt = 0;
+
+#define DEBUG
+#ifdef DEBUG
+# define RET(x) {if(dbg)printf("lex %s [%s]\n", tokname(x), yytext); return(x); }
+#else
+# define RET(x) return(x)
+#endif
+
+#define CADD if (cadd(gs, yytext[0]) == 0) { \
+ ERROR "string/reg expr %.30s... too long", gs->cbuf SYNTAX; \
+ BEGIN A; \
+ }
+
+char *s;
+Gstring *gs = 0; /* initialized in main() */
+int cflag;
+
+#ifdef FLEX_SCANNER
+static int my_input( YY_CHAR *buf, int max_size );
+
+#undef YY_INPUT
+#define YY_INPUT(buf,result,max_size) result = my_input(buf, max_size);
+
+#undef YY_USER_INIT
+#define YY_USER_INIT init_input_source();
+
+#define FIRST ((yy_start - 1) / 2)
+#else /* FLEX_SCANNER */
+#define FIRST (yybgin - yysvec - 1)
+#endif /* FLEX_SCANNER */
+%}
+
+A [a-zA-Z_]
+B [a-zA-Z0-9_]
+D [0-9]
+O [0-7]
+H [0-9a-fA-F]
+WS [ \t]
+
+%%
+ switch (FIRST) { /* witchcraft */
+ case 0:
+ BEGIN A;
+ break;
+ case sc:
+ BEGIN A;
+ RET('}');
+ }
+
+<A>\n { lineno++; RET(NL); }
+<A>#.* { ; } /* strip comments */
+<A>{WS}+ { ; }
+<A>; { RET(';'); }
+
+<A>"\\"\n { lineno++; }
+<A>BEGIN { RET(XBEGIN); }
+<A>END { RET(XEND); }
+<A>func(tion)? { if (infunc) ERROR "illegal nested function" SYNTAX; RET(FUNC); }
+<A>return { if (!infunc) ERROR "return not in function" SYNTAX; RET(RETURN); }
+<A>"&&" { RET(AND); }
+<A>"||" { RET(BOR); }
+<A>"!" { RET(NOT); }
+<A>"!=" { yylval.i = NE; RET(NE); }
+<A>"~" { yylval.i = MATCH; RET(MATCHOP); }
+<A>"!~" { yylval.i = NOTMATCH; RET(MATCHOP); }
+<A>"<" { yylval.i = LT; RET(LT); }
+<A>"<=" { yylval.i = LE; RET(LE); }
+<A>"==" { yylval.i = EQ; RET(EQ); }
+<A>">=" { yylval.i = GE; RET(GE); }
+<A>">" { yylval.i = GT; RET(GT); }
+<A>">>" { yylval.i = APPEND; RET(APPEND); }
+<A>"++" { yylval.i = INCR; RET(INCR); }
+<A>"--" { yylval.i = DECR; RET(DECR); }
+<A>"+=" { yylval.i = ADDEQ; RET(ASGNOP); }
+<A>"-=" { yylval.i = SUBEQ; RET(ASGNOP); }
+<A>"*=" { yylval.i = MULTEQ; RET(ASGNOP); }
+<A>"/=" { yylval.i = DIVEQ; RET(ASGNOP); }
+<A>"%=" { yylval.i = MODEQ; RET(ASGNOP); }
+<A>"^=" { yylval.i = POWEQ; RET(ASGNOP); }
+<A>"**=" { yylval.i = POWEQ; RET(ASGNOP); }
+<A>"=" { yylval.i = ASSIGN; RET(ASGNOP); }
+<A>"**" { RET(POWER); }
+<A>"^" { RET(POWER); }
+
+<A>"$"{D}+ { yylval.cp = fieldadr(atoi(yytext+1)); RET(FIELD); }
+<A>"$NF" { unputstr("(NF)"); return(INDIRECT); }
+<A>"$"{A}{B}* {
+ int c;
+ char *yytext_copy = strdup(yytext);
+ c = input(); unput(c); /* look for '(' or '[' */
+ if (c == '(' || c == '[' ||
+ infunc && isarg(yytext_copy+1) >= 0) {
+ unputstr(yytext_copy+1);
+ free(yytext_copy);
+ return(INDIRECT);
+ } else {
+ yylval.cp =
+ setsymtab(yytext_copy+1,"",0.0,STR|NUM,symtab);
+ free(yytext_copy);
+ RET(IVAR);
+ }
+ }
+<A>"$" { RET(INDIRECT); }
+<A>NF { yylval.cp = setsymtab(yytext, "", 0.0, NUM, symtab); RET(VARNF); }
+
+<A>({D}+("."?){D}*|"."{D}+)((e|E)("+"|-)?{D}+)? {
+ yylval.cp = setsymtab(yytext, tostring(yytext), atof(yytext), CON|NUM, symtab);
+ /* should this also have STR set? */
+ RET(NUMBER); }
+
+<A>while { RET(WHILE); }
+<A>for { RET(FOR); }
+<A>do { RET(DO); }
+<A>if { RET(IF); }
+<A>else { RET(ELSE); }
+<A>next { RET(NEXT); }
+<A>nextfile { RET(NEXTFILE); }
+<A>exit { RET(EXIT); }
+<A>break { RET(BREAK); }
+<A>continue { RET(CONTINUE); }
+<A>print { yylval.i = PRINT; RET(PRINT); }
+<A>printf { yylval.i = PRINTF; RET(PRINTF); }
+<A>sprintf { yylval.i = SPRINTF; RET(SPRINTF); }
+<A>split { yylval.i = SPLIT; RET(SPLIT); }
+<A>substr { RET(SUBSTR); }
+<A>sub { yylval.i = SUB; RET(SUB); }
+<A>gsub { yylval.i = GSUB; RET(GSUB); }
+<A>index { RET(INDEX); }
+<A>match { RET(MATCHFCN); }
+<A>in { RET(IN); }
+<A>getline { RET(GETLINE); }
+<A>close { RET(CLOSE); }
+<A>delete { RET(DELETE); }
+<A>length { yylval.i = FLENGTH; RET(BLTIN); }
+<A>log { yylval.i = FLOG; RET(BLTIN); }
+<A>int { yylval.i = FINT; RET(BLTIN); }
+<A>exp { yylval.i = FEXP; RET(BLTIN); }
+<A>sqrt { yylval.i = FSQRT; RET(BLTIN); }
+<A>sin { yylval.i = FSIN; RET(BLTIN); }
+<A>cos { yylval.i = FCOS; RET(BLTIN); }
+<A>atan2 { yylval.i = FATAN; RET(BLTIN); }
+<A>system { yylval.i = FSYSTEM; RET(BLTIN); }
+<A>rand { yylval.i = FRAND; RET(BLTIN); }
+<A>srand { yylval.i = FSRAND; RET(BLTIN); }
+<A>toupper { yylval.i = FTOUPPER; RET(BLTIN); }
+<A>tolower { yylval.i = FTOLOWER; RET(BLTIN); }
+<A>fflush { yylval.i = FFLUSH; RET(BLTIN); }
+
+<A>{A}{B}* { int n, c;
+ char *yytext_copy = strdup(yytext);
+ c = input(); unput(c); /* look for '(' */
+ if (c != '(' && infunc && (n=isarg(yytext_copy)) >= 0) {
+ yylval.i = n;
+ free(yytext_copy);
+ RET(ARG);
+ } else {
+ yylval.cp = setsymtab(yytext_copy, "", 0.0, STR|NUM, symtab);
+ free(yytext_copy);
+ if (c == '(') {
+ RET(CALL);
+ } else {
+ RET(VAR);
+ }
+ }
+ }
+<A>\" { BEGIN strng; caddreset(gs); }
+
+<A>"}" { if (--bracecnt < 0) ERROR "extra }" SYNTAX; BEGIN sc; RET(';'); }
+<A>"]" { if (--brackcnt < 0) ERROR "extra ]" SYNTAX; RET(']'); }
+<A>")" { if (--parencnt < 0) ERROR "extra )" SYNTAX; RET(')'); }
+
+<A>. { if (yytext[0] == '{') bracecnt++;
+ else if (yytext[0] == '[') brackcnt++;
+ else if (yytext[0] == '(') parencnt++;
+ RET(yylval.i = yytext[0]); /* everything else */ }
+
+<reg>\\. { cadd(gs, '\\'); cadd(gs, yytext[1]); }
+<reg>\n { ERROR "newline in regular expression %.10s...", gs->cbuf SYNTAX; lineno++; BEGIN A; }
+<reg>"/" { BEGIN A;
+ cadd(gs, 0);
+ yylval.s = tostring(gs->cbuf);
+ unput('/');
+ RET(REGEXPR); }
+<reg>. { CADD; }
+
+<strng>\" { BEGIN A;
+ cadd(gs, 0); s = tostring(gs->cbuf);
+ cunadd(gs);
+ cadd(gs, ' '); cadd(gs, 0);
+ yylval.cp = setsymtab(gs->cbuf, s, 0.0, CON|STR, symtab);
+ RET(STRING); }
+<strng>\n { ERROR "newline in string %.10s...", gs->cbuf SYNTAX; lineno++; BEGIN A; }
+<strng>"\\\"" { cadd(gs, '"'); }
+<strng>"\\"n { cadd(gs, '\n'); }
+<strng>"\\"t { cadd(gs, '\t'); }
+<strng>"\\"f { cadd(gs, '\f'); }
+<strng>"\\"r { cadd(gs, '\r'); }
+<strng>"\\"b { cadd(gs, '\b'); }
+<strng>"\\"v { cadd(gs, '\v'); } /* these ANSIisms may not be known by */
+<strng>"\\"a { cadd(gs, '\007'); } /* your compiler. hence 007 for bell */
+<strng>"\\\\" { cadd(gs, '\\'); }
+<strng>"\\"({O}{O}{O}|{O}{O}|{O}) { int n;
+ sscanf(yytext+1, "%o", &n); cadd(gs, n); }
+<strng>"\\"x({H}+) { int n; /* ANSI permits any number! */
+ sscanf(yytext+2, "%x", &n); cadd(gs, n); }
+<strng>"\\". { cadd(gs, yytext[1]); }
+<strng>. { CADD; }
+
+%%
+
+void startreg(void) /* start parsing a regular expression */
+{
+ BEGIN reg;
+ caddreset(gs);
+}
+
+#ifdef FLEX_SCANNER
+static int my_input( YY_CHAR *buf, int max_size )
+{
+ extern uschar *lexprog;
+
+ if ( lexprog ) { /* awk '...' */
+ int num_chars = strlen( lexprog );
+ if ( num_chars > max_size )
+ {
+ num_chars = max_size;
+ strncpy( buf, lexprog, num_chars );
+ }
+ else
+ strcpy( buf, lexprog );
+ lexprog += num_chars;
+ return num_chars;
+
+ } else { /* awk -f ... */
+ int c = pgetc();
+ if (c == EOF)
+ return 0;
+ buf[0] = c;
+ return 1;
+ }
+}
+#else /* FLEX_SCANNER */
+/* input() and unput() are transcriptions of the standard lex
+ macros for input and output with additions for error message
+ printing. God help us all if someone changes how lex works.
+*/
+
+char ebuf[300];
+char *ep = ebuf;
+
+int input(void) /* get next lexical input character */
+{
+ int c;
+ extern char *lexprog;
+
+ if (yysptr > yysbuf)
+ c = U(*--yysptr);
+ else if (lexprog != NULL) { /* awk '...' */
+ if ((c = *lexprog) != 0)
+ lexprog++;
+ } else /* awk -f ... */
+ c = pgetc();
+ if (c == '\n')
+ yylineno++;
+ else if (c == EOF)
+ c = 0;
+ if (ep >= ebuf + sizeof ebuf)
+ ep = ebuf;
+ return *ep++ = c;
+}
+
+void unput(int c) /* put lexical character back on input */
+{
+ yytchar = c;
+ if (yytchar == '\n')
+ yylineno--;
+ *yysptr++ = yytchar;
+ if (--ep < ebuf)
+ ep = ebuf + sizeof(ebuf) - 1;
+}
+#endif /* FLEX_SCANNER */
+
+void unputstr(char *s) /* put a string back on input */
+{
+ int i;
+
+ for (i = strlen(s)-1; i >= 0; i--)
+ unput(s[i]);
+}
+
+int lex_input()
+{
+ return input();
+}
+
+/* growing-string code */
+
+const int CBUFLEN = 400;
+
+Gstring *newGstring()
+{
+ Gstring *gs = (Gstring *) malloc(sizeof(Gstring));
+ char *cp = (char *) malloc(CBUFLEN);
+
+ if (gs == 0 || cp == 0)
+ ERROR "Out of space for strings" FATAL;
+ gs->cbuf = cp;
+ gs->cmax = CBUFLEN;
+ gs->clen = 0;
+ return gs;
+}
+
+char *cadd(Gstring *gs, int c) /* add one char to gs->cbuf, grow as needed */
+{
+ if (gs->clen >= gs->cmax) { /* need to grow */
+ gs->cmax *= 4;
+ gs->cbuf = (char *) realloc((void *) gs->cbuf, gs->cmax);
+
+ }
+ if (gs->cbuf != 0)
+ gs->cbuf[gs->clen++] = c;
+ return gs->cbuf;
+}
+
+void caddreset(Gstring *gs)
+{
+ gs->clen = 0;
+}
+
+void cunadd(Gstring *gs)
+{
+ if (gs->clen > 0)
+ gs->clen--;
+}
+
+void delGstring(Gstring *gs)
+{
+ free((void *) gs->cbuf);
+ free((void *) gs);
+}
+
+#ifdef FLEX_SCANNER
+void init_input_source(void)
+{
+ extern int curpfile;
+ extern char *pfile[];
+
+ if (yyin == NULL) {
+ if (pfile[curpfile] == 0)
+ return;
+ if (strcmp((char *) pfile[curpfile], "-") == 0)
+ yyin = stdin;
+ else if ((yyin = fopen((char *) pfile[curpfile], "r")) == NULL)
+ ERROR "can't open file %s", pfile[curpfile] FATAL;
+ }
+}
+#endif
diff --git a/usr.bin/awk/b.c b/usr.bin/awk/b.c
new file mode 100644
index 00000000000..3baab2ac071
--- /dev/null
+++ b/usr.bin/awk/b.c
@@ -0,0 +1,841 @@
+/****************************************************************
+Copyright (C) AT&T and Lucent Technologies 1996
+All Rights Reserved
+
+Permission to use, copy, modify, and distribute this software and
+its documentation for any purpose and without fee is hereby
+granted, provided that the above copyright notice appear in all
+copies and that both that the copyright notice and this
+permission notice and warranty disclaimer appear in supporting
+documentation, and that the names of AT&T or Lucent Technologies
+or any of their entities not be used in advertising or publicity
+pertaining to distribution of the software without specific,
+written prior permission.
+
+AT&T AND LUCENT DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
+SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
+FITNESS. IN NO EVENT SHALL AT&T OR LUCENT OR ANY OF THEIR
+ENTITIES BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL
+DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
+DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
+OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE
+USE OR PERFORMANCE OF THIS SOFTWARE.
+****************************************************************/
+
+/* lasciate ogne speranza, voi ch'entrate. */
+
+#define DEBUG
+
+#include <ctype.h>
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include "awk.h"
+#include "awkgram.h"
+
+#define HAT (NCHARS-1) /* matches ^ in regular expr */
+ /* NCHARS is 2**n */
+#define MAXLIN 22
+
+#define type(v) (v)->nobj
+#define left(v) (v)->narg[0]
+#define right(v) (v)->narg[1]
+#define parent(v) (v)->nnext
+
+#define LEAF case CCL: case NCCL: case CHAR: case DOT: case FINAL: case ALL:
+#define UNARY case STAR: case PLUS: case QUEST:
+
+/* encoding in tree Nodes:
+ leaf (CCL, NCCL, CHAR, DOT, FINAL, ALL):
+ left is index, right contains value or pointer to value
+ unary (STAR, PLUS, QUEST): left is child, right is null
+ binary (CAT, OR): left and right are children
+ parent contains pointer to parent
+*/
+
+
+int *setvec;
+int *tmpset;
+int maxsetvec = 0;
+
+int rtok; /* next token in current re */
+int rlxval;
+char *rlxstr;
+char *prestr; /* current position in current re */
+char *lastre; /* origin of last re */
+
+static int setcnt;
+static int poscnt;
+
+char *patbeg;
+int patlen;
+
+#define NFA 20 /* cache this many dynamic fa's */
+fa *fatab[NFA];
+int nfatab = 0; /* entries in fatab */
+
+fa *makedfa(char *s, int anchor) /* returns dfa for reg expr s */
+{
+ int i, use, nuse;
+ fa *pfa;
+
+ if (setvec == 0) { /* first time through any RE */
+ maxsetvec = MAXLIN;
+ setvec = (int *) malloc(maxsetvec * sizeof(int));
+ tmpset = (int *) malloc(maxsetvec * sizeof(int));
+ if (setvec == 0 || tmpset == 0)
+ overflo("out of space initializing makedfa");
+ }
+
+ if (compile_time) /* a constant for sure */
+ return mkdfa(s, anchor);
+ for (i = 0; i < nfatab; i++) /* is it there already? */
+ if (fatab[i]->anchor == anchor
+ && strcmp(fatab[i]->restr, s) == 0) {
+ fatab[i]->use++;
+ return fatab[i];
+ }
+ pfa = mkdfa(s, anchor);
+ if (nfatab < NFA) { /* room for another */
+ fatab[nfatab] = pfa;
+ fatab[nfatab]->use = 1;
+ nfatab++;
+ return pfa;
+ }
+ use = fatab[0]->use; /* replace least-recently used */
+ nuse = 0;
+ for (i = 1; i < nfatab; i++)
+ if (fatab[i]->use < use) {
+ use = fatab[i]->use;
+ nuse = i;
+ }
+ freefa(fatab[nuse]);
+ fatab[nuse] = pfa;
+ pfa->use = 1;
+ return pfa;
+}
+
+fa *mkdfa(char *s, int anchor) /* does the real work of making a dfa */
+ /* anchor = 1 for anchored matches, else 0 */
+{
+ Node *p, *p1;
+ fa *f;
+
+ p = reparse(s);
+ p1 = op2(CAT, op2(STAR, op2(ALL, NIL, NIL), NIL), p);
+ /* put ALL STAR in front of reg. exp. */
+ p1 = op2(CAT, p1, op2(FINAL, NIL, NIL));
+ /* put FINAL after reg. exp. */
+
+ poscnt = 0;
+ penter(p1); /* enter parent pointers and leaf indices */
+ if ((f = (fa *) calloc(1, sizeof(fa) + poscnt*sizeof(rrow))) == NULL)
+ overflo("out of space for fa");
+ f->accept = poscnt-1; /* penter has computed number of positions in re */
+ cfoll(f, p1); /* set up follow sets */
+ freetr(p1);
+ if ((f->posns[0] = (int *) calloc(1, *(f->re[0].lfollow)*sizeof(int))) == NULL)
+ overflo("out of space in makedfa");
+ if ((f->posns[1] = (int *) calloc(1, sizeof(int))) == NULL)
+ overflo("out of space in makedfa");
+ *f->posns[1] = 0;
+ f->initstat = makeinit(f, anchor);
+ f->anchor = anchor;
+ f->restr = tostring(s);
+ return f;
+}
+
+int makeinit(fa *f, int anchor)
+{
+ int i, k;
+
+ f->curstat = 2;
+ f->out[2] = 0;
+ f->reset = 0;
+ k = *(f->re[0].lfollow);
+ xfree(f->posns[2]);
+ if ((f->posns[2] = (int *) calloc(1, (k+1)*sizeof(int))) == NULL)
+ overflo("out of space in makeinit");
+ for (i=0; i <= k; i++) {
+ (f->posns[2])[i] = (f->re[0].lfollow)[i];
+ }
+ if ((f->posns[2])[1] == f->accept)
+ f->out[2] = 1;
+ for (i=0; i < NCHARS; i++)
+ f->gototab[2][i] = 0;
+ f->curstat = cgoto(f, 2, HAT);
+ if (anchor) {
+ *f->posns[2] = k-1; /* leave out position 0 */
+ for (i=0; i < k; i++) {
+ (f->posns[0])[i] = (f->posns[2])[i];
+ }
+
+ f->out[0] = f->out[2];
+ if (f->curstat != 2)
+ --(*f->posns[f->curstat]);
+ }
+ return f->curstat;
+}
+
+void penter(Node *p) /* set up parent pointers and leaf indices */
+{
+ switch (type(p)) {
+ LEAF
+ left(p) = (Node *) poscnt;
+ poscnt++;
+ break;
+ UNARY
+ penter(left(p));
+ parent(left(p)) = p;
+ break;
+ case CAT:
+ case OR:
+ penter(left(p));
+ penter(right(p));
+ parent(left(p)) = p;
+ parent(right(p)) = p;
+ break;
+ default: /* can't happen */
+ ERROR "can't happen: unknown type %d in penter", type(p) FATAL;
+ break;
+ }
+}
+
+void freetr(Node *p) /* free parse tree */
+{
+ switch (type(p)) {
+ LEAF
+ xfree(p);
+ break;
+ UNARY
+ freetr(left(p));
+ xfree(p);
+ break;
+ case CAT:
+ case OR:
+ freetr(left(p));
+ freetr(right(p));
+ xfree(p);
+ break;
+ default: /* can't happen */
+ ERROR "can't happen: unknown type %d in freetr", type(p) FATAL;
+ break;
+ }
+}
+
+/* in the parsing of regular expressions, metacharacters like . have */
+/* to be seen literally; \056 is not a metacharacter. */
+
+int hexstr(char **pp) /* find and eval hex string at pp, return new p */
+{
+ char *p;
+ int n = 0;
+
+ for (p = *pp; isxdigit(*p); p++) {
+ if (isdigit(*p))
+ n = 16 * n + *p - '0';
+ else if (*p >= 'a' && *p <= 'f')
+ n = 16 * n + *p - 'a' + 10;
+ else if (*p >= 'A' && *p <= 'F')
+ n = 16 * n + *p - 'A' + 10;
+ }
+ *pp = p;
+ return n;
+}
+
+#define isoctdigit(c) ((c) >= '0' && (c) <= '8') /* multiple use of arg */
+
+int quoted(char **pp) /* pick up next thing after a \\ */
+ /* and increment *pp */
+{
+ char *p = *pp;
+ int c;
+
+ if ((c = *p++) == 't')
+ c = '\t';
+ else if (c == 'n')
+ c = '\n';
+ else if (c == 'f')
+ c = '\f';
+ else if (c == 'r')
+ c = '\r';
+ else if (c == 'b')
+ c = '\b';
+ else if (c == '\\')
+ c = '\\';
+ else if (c == 'x') { /* hexadecimal goo follows */
+ c = hexstr(&p); /* this adds a null if number is invalid */
+ } else if (isoctdigit(c)) { /* \d \dd \ddd */
+ int n = c - '0';
+ if (isoctdigit(*p)) {
+ n = 8 * n + *p++ - '0';
+ if (isoctdigit(*p))
+ n = 8 * n + *p++ - '0';
+ }
+ c = n;
+ } /* else */
+ /* c = c; */
+ *pp = p;
+ return c;
+}
+
+char *cclenter(char *p) /* add a character class */
+{
+ int i, c, c2;
+ char *op;
+ static Gstring *cgp = 0;
+
+ op = p;
+ if (cgp == 0)
+ cgp = newGstring();
+ caddreset(cgp);
+ i = 0;
+ while ((c = *p++) != 0) {
+ if (c == '\\') {
+ c = quoted(&p);
+ } else if (c == '-' && i > 0 && cgp->cbuf[i-1] != 0) {
+ if (*p != 0) {
+ c = cgp->cbuf[i-1];
+ c2 = *p++;
+ if (c2 == '\\')
+ c2 = quoted(&p);
+ if (c > c2) { /* empty; ignore */
+ cunadd(cgp);
+ i--;
+ continue;
+ }
+ while (c < c2) {
+ cadd(cgp, ++c);
+ i++;
+ }
+ continue;
+ }
+ }
+ cadd(cgp, c);
+ i++;
+ }
+ cadd(cgp, 0);
+ dprintf( ("cclenter: in = |%s|, out = |%s|\n", op, cgp->cbuf) );
+ xfree(op);
+ return(tostring(cgp->cbuf));
+}
+
+void overflo(char *s)
+{
+ ERROR "regular expression too big: %.30s...", s FATAL;
+}
+
+void cfoll(fa *f, Node *v) /* enter follow set of each leaf of vertex v into lfollow[leaf] */
+{
+ int i;
+ int *p;
+
+ switch (type(v)) {
+ LEAF
+ f->re[(int) left(v)].ltype = type(v);
+ f->re[(int) left(v)].lval.np = right(v);
+ while (f->accept >= maxsetvec) { /* guessing here! */
+ maxsetvec *= 4;
+ setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
+ tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int));
+ if (setvec == 0 || tmpset == 0) { abort();
+ overflo("out of space in cfoll()");
+}
+ }
+ for (i = 0; i <= f->accept; i++)
+ setvec[i] = 0;
+ setcnt = 0;
+ follow(v); /* computes setvec and setcnt */
+ if ((p = (int *) calloc(1, (setcnt+1)*sizeof(int))) == NULL)
+ overflo("out of space building follow set");
+ f->re[(int) left(v)].lfollow = p;
+ *p = setcnt;
+ for (i = f->accept; i >= 0; i--)
+ if (setvec[i] == 1)
+ *++p = i;
+ break;
+ UNARY
+ cfoll(f,left(v));
+ break;
+ case CAT:
+ case OR:
+ cfoll(f,left(v));
+ cfoll(f,right(v));
+ break;
+ default: /* can't happen */
+ ERROR "can't happen: unknown type %d in cfoll", type(v) FATAL;
+ }
+}
+
+int first(Node *p) /* collects initially active leaves of p into setvec */
+ /* returns 1 if p matches empty string */
+{
+ int b, lp;
+
+ switch (type(p)) {
+ LEAF
+ lp = (int) left(p); /* look for high-water mark of subscripts */
+ while (setcnt >= maxsetvec || lp >= maxsetvec) { /* guessing here! */
+ maxsetvec *= 4;
+ setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
+ tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int));
+ if (setvec == 0 || tmpset == 0) { abort();
+ overflo("out of space in first()");
+}
+ }
+ if (setvec[lp] != 1) {
+ setvec[lp] = 1;
+ setcnt++;
+ }
+ if (type(p) == CCL && (*(char *) right(p)) == '\0')
+ return(0); /* empty CCL */
+ else return(1);
+ case PLUS:
+ if (first(left(p)) == 0) return(0);
+ return(1);
+ case STAR:
+ case QUEST:
+ first(left(p));
+ return(0);
+ case CAT:
+ if (first(left(p)) == 0 && first(right(p)) == 0) return(0);
+ return(1);
+ case OR:
+ b = first(right(p));
+ if (first(left(p)) == 0 || b == 0) return(0);
+ return(1);
+ }
+ ERROR "can't happen: unknown type %d in first", type(p) FATAL; /* can't happen */
+ return(-1);
+}
+
+void follow(Node *v) /* collects leaves that can follow v into setvec */
+{
+ Node *p;
+
+ if (type(v) == FINAL)
+ return;
+ p = parent(v);
+ switch (type(p)) {
+ case STAR:
+ case PLUS:
+ first(v);
+ follow(p);
+ return;
+
+ case OR:
+ case QUEST:
+ follow(p);
+ return;
+
+ case CAT:
+ if (v == left(p)) { /* v is left child of p */
+ if (first(right(p)) == 0) {
+ follow(p);
+ return;
+ }
+ } else /* v is right child */
+ follow(p);
+ return;
+ }
+}
+
+int member(int c, char *s) /* is c in s? */
+{
+ while (*s)
+ if (c == *s++)
+ return(1);
+ return(0);
+}
+
+int match(fa *f, char *p0) /* shortest match ? */
+{
+ int s, ns;
+ uschar *p = (uschar *) p0;
+
+ s = f->reset ? makeinit(f,0) : f->initstat;
+ if (f->out[s])
+ return(1);
+ do {
+ if ((ns = f->gototab[s][*p]) != 0)
+ s = ns;
+ else
+ s = cgoto(f, s, *p);
+ if (f->out[s])
+ return(1);
+ } while (*p++ != 0);
+ return(0);
+}
+
+int pmatch(fa *f, char *p0) /* longest match, for sub */
+{
+ int s, ns;
+ uschar *p = (uschar *) p0;
+ uschar *q;
+ int i, k;
+
+ s = f->reset ? makeinit(f,1) : f->initstat;
+ patbeg = (char *) p;
+ patlen = -1;
+ do {
+ q = p;
+ do {
+ if (f->out[s]) /* final state */
+ patlen = q-p;
+ if ((ns = f->gototab[s][*q]) != 0)
+ s = ns;
+ else
+ s = cgoto(f, s, *q);
+ if (s == 1) /* no transition */
+ if (patlen >= 0) {
+ patbeg = (char *) p;
+ return(1);
+ }
+ else
+ goto nextin; /* no match */
+ } while (*q++ != 0);
+ if (f->out[s])
+ patlen = q-p-1; /* don't count $ */
+ if (patlen >= 0) {
+ patbeg = (char *) p;
+ return(1);
+ }
+ nextin:
+ s = 2;
+ if (f->reset) {
+ for (i = 2; i <= f->curstat; i++)
+ xfree(f->posns[i]);
+ k = *f->posns[0];
+ if ((f->posns[2] = (int *) calloc(1, (k+1)*sizeof(int))) == NULL)
+ overflo("out of space in pmatch");
+ for (i = 0; i <= k; i++)
+ (f->posns[2])[i] = (f->posns[0])[i];
+ f->initstat = f->curstat = 2;
+ f->out[2] = f->out[0];
+ for (i = 0; i < NCHARS; i++)
+ f->gototab[2][i] = 0;
+ }
+ } while (*p++ != 0);
+ return (0);
+}
+
+int nematch(fa *f, char *p0) /* non-empty match, for sub */
+{
+ int s, ns;
+ uschar *p = (uschar *) p0;
+ uschar *q;
+ int i, k;
+
+ s = f->reset ? makeinit(f,1) : f->initstat;
+ patlen = -1;
+ while (*p) {
+ q = p;
+ do {
+ if (f->out[s]) /* final state */
+ patlen = q-p;
+ if ((ns = f->gototab[s][*q]) != 0)
+ s = ns;
+ else
+ s = cgoto(f, s, *q);
+ if (s == 1) /* no transition */
+ if (patlen > 0) {
+ patbeg = (char *) p;
+ return(1);
+ } else
+ goto nnextin; /* no nonempty match */
+ } while (*q++ != 0);
+ if (f->out[s])
+ patlen = q-p-1; /* don't count $ */
+ if (patlen > 0 ) {
+ patbeg = (char *) p;
+ return(1);
+ }
+ nnextin:
+ s = 2;
+ if (f->reset) {
+ for (i = 2; i <= f->curstat; i++)
+ xfree(f->posns[i]);
+ k = *f->posns[0];
+ if ((f->posns[2] = (int *) calloc(1, (k+1)*sizeof(int))) == NULL)
+ overflo("out of state space");
+ for (i = 0; i <= k; i++)
+ (f->posns[2])[i] = (f->posns[0])[i];
+ f->initstat = f->curstat = 2;
+ f->out[2] = f->out[0];
+ for (i = 0; i < NCHARS; i++)
+ f->gototab[2][i] = 0;
+ }
+ p++;
+ }
+ return (0);
+}
+
+Node *reparse(char *p) /* parses regular expression pointed to by p */
+{ /* uses relex() to scan regular expression */
+ Node *np;
+
+ dprintf( ("reparse <%s>\n", p) );
+ lastre = prestr = p; /* prestr points to string to be parsed */
+ rtok = relex();
+ if (rtok == '\0')
+ ERROR "empty regular expression" FATAL;
+ np = regexp();
+ if (rtok != '\0')
+ ERROR "syntax error in regular expression %s at %s", lastre, prestr FATAL;
+ return(np);
+}
+
+Node *regexp(void) /* top-level parse of reg expr */
+{
+ return (alt(concat(primary())));
+}
+
+Node *primary(void)
+{
+ Node *np;
+
+ switch (rtok) {
+ case CHAR:
+ np = op2(CHAR, NIL, (Node *) rlxval);
+ rtok = relex();
+ return (unary(np));
+ case ALL:
+ rtok = relex();
+ return (unary(op2(ALL, NIL, NIL)));
+ case DOT:
+ rtok = relex();
+ return (unary(op2(DOT, NIL, NIL)));
+ case CCL:
+ np = op2(CCL, NIL, (Node*) cclenter(rlxstr));
+ rtok = relex();
+ return (unary(np));
+ case NCCL:
+ np = op2(NCCL, NIL, (Node *) cclenter(rlxstr));
+ rtok = relex();
+ return (unary(np));
+ case '^':
+ rtok = relex();
+ return (unary(op2(CHAR, NIL, (Node *) HAT)));
+ case '$':
+ rtok = relex();
+ return (unary(op2(CHAR, NIL, NIL)));
+ case '(':
+ rtok = relex();
+ if (rtok == ')') { /* special pleading for () */
+ rtok = relex();
+ return unary(op2(CCL, NIL, (Node *) tostring("")));
+ }
+ np = regexp();
+ if (rtok == ')') {
+ rtok = relex();
+ return (unary(np));
+ }
+ else
+ ERROR "syntax error in regular expression %s at %s", lastre, prestr FATAL;
+ default:
+ ERROR "illegal primary in regular expression %s at %s", lastre, prestr FATAL;
+ }
+ return 0; /*NOTREACHED*/
+}
+
+Node *concat(Node *np)
+{
+ switch (rtok) {
+ case CHAR: case DOT: case ALL: case CCL: case NCCL: case '$': case '(':
+ return (concat(op2(CAT, np, primary())));
+ }
+ return (np);
+}
+
+Node *alt(Node *np)
+{
+ if (rtok == OR) {
+ rtok = relex();
+ return (alt(op2(OR, np, concat(primary()))));
+ }
+ return (np);
+}
+
+Node *unary(Node *np)
+{
+ switch (rtok) {
+ case STAR:
+ rtok = relex();
+ return (unary(op2(STAR, np, NIL)));
+ case PLUS:
+ rtok = relex();
+ return (unary(op2(PLUS, np, NIL)));
+ case QUEST:
+ rtok = relex();
+ return (unary(op2(QUEST, np, NIL)));
+ default:
+ return (np);
+ }
+}
+
+int relex(void) /* lexical analyzer for reparse */
+{
+ int c;
+ int cflag;
+ static Gstring *gp = 0;
+
+ switch (c = *prestr++) {
+ case '|': return OR;
+ case '*': return STAR;
+ case '+': return PLUS;
+ case '?': return QUEST;
+ case '.': return DOT;
+ case '\0': prestr--; return '\0';
+ case '^':
+ case '$':
+ case '(':
+ case ')':
+ return c;
+ case '\\':
+ rlxval = quoted(&prestr);
+ return CHAR;
+ default:
+ rlxval = c;
+ return CHAR;
+ case '[':
+ if (gp == 0)
+ gp = newGstring();
+ caddreset(gp);
+ if (*prestr == '^') {
+ cflag = 1;
+ prestr++;
+ }
+ else
+ cflag = 0;
+ for (; ; ) {
+ if ((c = *prestr++) == '\\') {
+ cadd(gp, '\\');
+ if ((c = *prestr++) == '\0')
+ ERROR "nonterminated character class %.20s...", lastre FATAL;
+ cadd(gp, c);
+ } else if (c == '\n') {
+ ERROR "newline in character class %.20s...", lastre FATAL;
+ } else if (c == '\0') {
+ ERROR "nonterminated character class %.20s", lastre FATAL;
+ } else if (gp->clen == 0) { /* 1st char is special */
+ cadd(gp, c);
+ } else if (c == ']') {
+ cadd(gp, 0);
+ rlxstr = tostring(gp->cbuf);
+ if (cflag == 0)
+ return CCL;
+ else
+ return NCCL;
+ } else
+ cadd(gp, c);
+ }
+ }
+}
+
+int cgoto(fa *f, int s, int c)
+{
+ int i, j, k;
+ int *p, *q;
+
+ if (c < 0)
+ ERROR "can't happen: neg char %d in cgoto", c FATAL;
+ while (f->accept >= maxsetvec) { /* guessing here! */
+ maxsetvec *= 4;
+ setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
+ tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int));
+ if (setvec == 0 || tmpset == 0) { abort();
+ overflo("out of space in cgoto()");
+}
+ }
+ for (i = 0; i <= f->accept; i++)
+ setvec[i] = 0;
+ setcnt = 0;
+ /* compute positions of gototab[s,c] into setvec */
+ p = f->posns[s];
+ for (i = 1; i <= *p; i++) {
+ if ((k = f->re[p[i]].ltype) != FINAL) {
+ if ((k == CHAR && c == f->re[p[i]].lval.i)
+ || (k == DOT && c != 0 && c != HAT)
+ || (k == ALL && c != 0)
+ || (k == CCL && member(c, f->re[p[i]].lval.up))
+ || (k == NCCL && !member(c, f->re[p[i]].lval.up) && c != 0 && c != HAT)) {
+ q = f->re[p[i]].lfollow;
+ for (j = 1; j <= *q; j++) {
+ if (q[j] >= maxsetvec) {
+ maxsetvec *= 4;
+ setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
+ tmpset = (int *) realloc(setvec, maxsetvec * sizeof(int));
+ if (setvec == 0 || tmpset == 0)
+ overflo("cgoto overflow");
+ }
+ if (setvec[q[j]] == 0) {
+ setcnt++;
+ setvec[q[j]] = 1;
+ }
+ }
+ }
+ }
+ }
+ /* determine if setvec is a previous state */
+ tmpset[0] = setcnt;
+ j = 1;
+ for (i = f->accept; i >= 0; i--)
+ if (setvec[i]) {
+ tmpset[j++] = i;
+ }
+ /* tmpset == previous state? */
+ for (i = 1; i <= f->curstat; i++) {
+ p = f->posns[i];
+ if ((k = tmpset[0]) != p[0])
+ goto different;
+ for (j = 1; j <= k; j++)
+ if (tmpset[j] != p[j])
+ goto different;
+ /* setvec is state i */
+ f->gototab[s][c] = i;
+ return i;
+ different:;
+ }
+
+ /* add tmpset to current set of states */
+ if (f->curstat >= NSTATES-1) {
+ f->curstat = 2;
+ f->reset = 1;
+ for (i = 2; i < NSTATES; i++)
+ xfree(f->posns[i]);
+ } else
+ ++(f->curstat);
+ for (i = 0; i < NCHARS; i++)
+ f->gototab[f->curstat][i] = 0;
+ xfree(f->posns[f->curstat]);
+ if ((p = (int *) calloc(1, (setcnt+1)*sizeof(int))) == NULL)
+ overflo("out of space in cgoto");
+
+ f->posns[f->curstat] = p;
+ f->gototab[s][c] = f->curstat;
+ for (i = 0; i <= setcnt; i++)
+ p[i] = tmpset[i];
+ if (setvec[f->accept])
+ f->out[f->curstat] = 1;
+ else
+ f->out[f->curstat] = 0;
+ return f->curstat;
+}
+
+
+void freefa(fa *f) /* free a finite automaton */
+{
+ int i;
+
+ if (f == NULL)
+ return;
+ for (i = 0; i <= f->curstat; i++)
+ xfree(f->posns[i]);
+ for (i = 0; i <= f->accept; i++) {
+ xfree(f->re[i].lfollow);
+ if (f->re[i].ltype == CCL || f->re[i].ltype == NCCL)
+ xfree((f->re[i].lval.np));
+ }
+ xfree(f->restr);
+ xfree(f);
+}
diff --git a/usr.bin/awk/lib.c b/usr.bin/awk/lib.c
new file mode 100644
index 00000000000..6f72be9133a
--- /dev/null
+++ b/usr.bin/awk/lib.c
@@ -0,0 +1,636 @@
+/****************************************************************
+Copyright (C) AT&T and Lucent Technologies 1996
+All Rights Reserved
+
+Permission to use, copy, modify, and distribute this software and
+its documentation for any purpose and without fee is hereby
+granted, provided that the above copyright notice appear in all
+copies and that both that the copyright notice and this
+permission notice and warranty disclaimer appear in supporting
+documentation, and that the names of AT&T or Lucent Technologies
+or any of their entities not be used in advertising or publicity
+pertaining to distribution of the software without specific,
+written prior permission.
+
+AT&T AND LUCENT DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
+SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
+FITNESS. IN NO EVENT SHALL AT&T OR LUCENT OR ANY OF THEIR
+ENTITIES BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL
+DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
+DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
+OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE
+USE OR PERFORMANCE OF THIS SOFTWARE.
+****************************************************************/
+
+#define DEBUG
+#include <stdio.h>
+#include <string.h>
+#include <ctype.h>
+#include <errno.h>
+#include <stdlib.h>
+#include "awk.h"
+#include "awkgram.h"
+
+FILE *infile = NULL;
+char *file = "";
+int recsize = RECSIZE;
+char *recdata;
+char *record;
+char *fields;
+Cell *fldtab;
+
+#define MAXFLD 200
+int nfields = MAXFLD; /* can be set from commandline in main */
+
+int donefld; /* 1 = implies rec broken into fields */
+int donerec; /* 1 = record is valid (no flds have changed) */
+
+int maxfld = 0; /* last used field */
+int argno = 1; /* current input argument number */
+extern Awkfloat *ARGC;
+
+void recinit(unsigned int n)
+{
+ static Cell dollar0 = {
+ OCELL, CFLD, "$0", /*recdata*/0, 0.0, REC|STR|DONTFREE };
+ static Cell dollar1 = {
+ OCELL, CFLD, NULL, "", 0.0, FLD|STR|DONTFREE };
+ int i;
+
+ record = recdata = (char *) malloc(n);
+ fields = (char *) malloc(n);
+ fldtab = (Cell *) malloc(nfields * sizeof(Cell));
+ if (recdata == NULL || fields == NULL || fldtab == NULL)
+ ERROR "out of space for $0 and fields" FATAL;
+ fldtab[0] = dollar0;
+ fldtab[0].sval = recdata;
+ for (i = 1; i < nfields; i++)
+ fldtab[i] = dollar1;
+}
+
+void initgetrec(void)
+{
+ int i;
+ char *p;
+
+ for (i = 1; i < *ARGC; i++) {
+ if (!isclvar(p = getargv(i))) { /* find 1st real filename */
+ setsval(lookup("FILENAME", symtab), getargv(i));
+ return;
+ }
+ setclvar(p); /* a commandline assignment before filename */
+ argno++;
+ }
+ infile = stdin; /* no filenames, so use stdin */
+}
+
+int getrec(char *buf) /* get next input record from whatever source */
+{ /* note: tests whether buf == record */
+ int c;
+ static int firsttime = 1;
+
+ if (firsttime) {
+ firsttime = 0;
+ initgetrec();
+ }
+ dprintf( ("RS=<%s>, FS=<%s>, ARGC=%g, FILENAME=%s\n",
+ *RS, *FS, *ARGC, *FILENAME) );
+ donefld = 0;
+ donerec = 1;
+ buf[0] = 0;
+ while (argno < *ARGC || infile == stdin) {
+ dprintf( ("argno=%d, file=|%s|\n", argno, file) );
+ if (infile == NULL) { /* have to open a new file */
+ file = getargv(argno);
+ if (*file == '\0') { /* it's been zapped */
+ argno++;
+ continue;
+ }
+ if (isclvar(file)) { /* a var=value arg */
+ setclvar(file);
+ argno++;
+ continue;
+ }
+ *FILENAME = file;
+ dprintf( ("opening file %s\n", file) );
+ if (*file == '-' && *(file+1) == '\0')
+ infile = stdin;
+ else if ((infile = fopen((char *)file, "r")) == NULL)
+ ERROR "can't open file %s", file FATAL;
+ setfval(fnrloc, 0.0);
+ }
+ c = readrec(buf, recsize, infile);
+ if (c != 0 || buf[0] != '\0') { /* normal record */
+ if (buf == record) {
+ if (!(recloc->tval & DONTFREE))
+ xfree(recloc->sval);
+ recloc->sval = record;
+ recloc->tval = REC | STR | DONTFREE;
+ if (isnumber(recloc->sval)) {
+ recloc->fval = atof(recloc->sval);
+ recloc->tval |= NUM;
+ }
+ }
+ setfval(nrloc, nrloc->fval+1);
+ setfval(fnrloc, fnrloc->fval+1);
+ return 1;
+ }
+ /* EOF arrived on this file; set up next */
+ if (infile != stdin)
+ fclose(infile);
+ infile = NULL;
+ argno++;
+ }
+ return 0; /* true end of file */
+}
+
+void nextfile(void)
+{
+ if (infile != stdin)
+ fclose(infile);
+ infile = NULL;
+ argno++;
+}
+
+int readrec(char *buf, int bufsize, FILE *inf) /* read one record into buf */
+{
+ int sep, c;
+ char *rr;
+ int nrr;
+
+ if ((sep = **RS) == 0) {
+ sep = '\n';
+ while ((c=getc(inf)) == '\n' && c != EOF) /* skip leading \n's */
+ ;
+ if (c != EOF)
+ ungetc(c, inf);
+ }
+ for (rr = buf, nrr = bufsize; ; ) {
+ for (; (c=getc(inf)) != sep && c != EOF; *rr++ = c)
+ if (--nrr < 0)
+ ERROR "input record `%.30s...' too long; try -mr n", buf FATAL;
+ if (**RS == sep || c == EOF)
+ break;
+ if ((c = getc(inf)) == '\n' || c == EOF) /* 2 in a row */
+ break;
+ *rr++ = '\n';
+ *rr++ = c;
+ }
+ if (rr > buf + bufsize)
+ ERROR "input record `%.30s...' too long; try -mr n", buf FATAL;
+ *rr = 0;
+ dprintf( ("readrec saw <%s>, returns %d\n", buf, c == EOF && rr == buf ? 0 : 1) );
+ return c == EOF && rr == buf ? 0 : 1;
+}
+
+char *getargv(int n) /* get ARGV[n] */
+{
+ Cell *x;
+ char *s, temp[10];
+ extern Array *ARGVtab;
+
+ sprintf(temp, "%d", n);
+ x = setsymtab(temp, "", 0.0, STR, ARGVtab);
+ s = getsval(x);
+ dprintf( ("getargv(%d) returns |%s|\n", n, s) );
+ return s;
+}
+
+void setclvar(char *s) /* set var=value from s */
+{
+ char *p;
+ Cell *q;
+
+ for (p=s; *p != '='; p++)
+ ;
+ *p++ = 0;
+ p = qstring(p, '\0');
+ q = setsymtab(s, p, 0.0, STR, symtab);
+ setsval(q, p);
+ if (isnumber(q->sval)) {
+ q->fval = atof(q->sval);
+ q->tval |= NUM;
+ }
+ dprintf( ("command line set %s to |%s|\n", s, p) );
+}
+
+
+void fldbld(void) /* create fields from current record */
+{
+ char *r, *fr, sep;
+ Cell *p;
+ int i;
+
+ if (donefld)
+ return;
+ if (!(recloc->tval & STR))
+ getsval(recloc);
+ r = recloc->sval;
+ fr = fields;
+ i = 0; /* number of fields accumulated here */
+ if (strlen(*FS) > 1) { /* it's a regular expression */
+ i = refldbld(r, *FS);
+ } else if ((sep = **FS) == ' ') { /* default whitespace */
+ for (i = 0; ; ) {
+ while (*r == ' ' || *r == '\t' || *r == '\n')
+ r++;
+ if (*r == 0)
+ break;
+ i++;
+ if (i >= nfields)
+ break;
+ if (!(fldtab[i].tval & DONTFREE))
+ xfree(fldtab[i].sval);
+ fldtab[i].sval = fr;
+ fldtab[i].tval = FLD | STR | DONTFREE;
+ do
+ *fr++ = *r++;
+ while (*r != ' ' && *r != '\t' && *r != '\n' && *r != '\0');
+ *fr++ = 0;
+ }
+ *fr = 0;
+ } else if ((sep = **FS) == 0) { /* new: FS="" => 1 char/field */
+ for (i = 0; *r != 0; r++) {
+ char buf[2];
+ i++;
+ if (i >= nfields)
+ break;
+ if (!(fldtab[i].tval & DONTFREE))
+ xfree(fldtab[i].sval);
+ buf[0] = *r;
+ buf[1] = 0;
+ fldtab[i].sval = tostring(buf);
+ fldtab[i].tval = FLD | STR;
+ }
+ *fr = 0;
+ } else if (*r != 0) { /* if 0, it's a null field */
+ for (;;) {
+ i++;
+ if (i >= nfields)
+ break;
+ if (!(fldtab[i].tval & DONTFREE))
+ xfree(fldtab[i].sval);
+ fldtab[i].sval = fr;
+ fldtab[i].tval = FLD | STR | DONTFREE;
+ while (*r != sep && *r != '\n' && *r != '\0') /* \n is always a separator */
+ *fr++ = *r++;
+ *fr++ = 0;
+ if (*r++ == 0)
+ break;
+ }
+ *fr = 0;
+ }
+ if (i >= nfields)
+ ERROR "record `%.30s...' has too many fields; try -mf n", record FATAL;
+ /* clean out junk from previous record */
+ cleanfld(i, maxfld);
+ maxfld = i;
+ donefld = 1;
+ for (p = fldtab+1; p <= fldtab+maxfld; p++) {
+ if(isnumber(p->sval)) {
+ p->fval = atof(p->sval);
+ p->tval |= NUM;
+ }
+ }
+ setfval(nfloc, (Awkfloat) maxfld);
+ if (dbg)
+ for (p = fldtab; p <= fldtab+maxfld; p++)
+ printf("field %d: |%s|\n", p-fldtab, p->sval);
+}
+
+void cleanfld(int n1, int n2) /* clean out fields n1..n2 inclusive */
+{
+ static char *nullstat = "";
+ Cell *p, *q;
+
+ for (p = &fldtab[n2], q = &fldtab[n1]; p > q; p--) {
+ if (!(p->tval & DONTFREE))
+ xfree(p->sval);
+ p->tval = FLD | STR | DONTFREE;
+ p->sval = nullstat;
+ }
+}
+
+void newfld(int n) /* add field n (after end) */
+{
+ if (n >= nfields)
+ ERROR "creating too many fields (%d); try -mf n", n FATAL;
+ cleanfld(maxfld, n);
+ maxfld = n;
+ setfval(nfloc, (Awkfloat) n);
+}
+
+int refldbld(char *rec, char *fs) /* build fields from reg expr in FS */
+{
+ char *fr;
+ int i, tempstat;
+ fa *pfa;
+
+ fr = fields;
+ *fr = '\0';
+ if (*rec == '\0')
+ return 0;
+ pfa = makedfa(fs, 1);
+ dprintf( ("into refldbld, rec = <%s>, pat = <%s>\n", rec, fs) );
+ tempstat = pfa->initstat;
+ for (i = 1; i < nfields; i++) {
+ if (!(fldtab[i].tval & DONTFREE))
+ xfree(fldtab[i].sval);
+ fldtab[i].tval = FLD | STR | DONTFREE;
+ fldtab[i].sval = fr;
+ dprintf( ("refldbld: i=%d\n", i) );
+ if (nematch(pfa, rec)) {
+ pfa->initstat = 2; /* horrible coupling */
+ dprintf( ("match %s (%d chars)\n", patbeg, patlen) );
+ strncpy(fr, rec, patbeg-rec);
+ fr += patbeg - rec + 1;
+ *(fr-1) = '\0';
+ rec = patbeg + patlen;
+ } else {
+ dprintf( ("no match %s\n", rec) );
+ strcpy(fr, rec);
+ pfa->initstat = tempstat;
+ break;
+ }
+ }
+ return i;
+}
+
+void recbld(void) /* create $0 from $1..$NF if necessary */
+{
+ int i;
+ char *r, *p;
+ static char *rec = 0;
+
+ if (donerec == 1)
+ return;
+ if (rec == 0) {
+ rec = (char *) malloc(recsize);
+ if (rec == 0)
+ ERROR "out of space building $0, record size %d", recsize FATAL;
+ }
+ r = rec;
+ for (i = 1; i <= *NF; i++) {
+ p = getsval(&fldtab[i]);
+ while (r < rec+recsize-1 && (*r = *p++))
+ r++;
+ if (i < *NF)
+ for (p = *OFS; r < rec+recsize-1 && (*r = *p++); )
+ r++;
+ }
+ if (r > rec + recsize - 1)
+ ERROR "built giant record `%.30s...'; try -mr n", record FATAL;
+ *r = '\0';
+ dprintf( ("in recbld FS=%o, recloc=%p\n", **FS, recloc) );
+ recloc->tval = REC | STR | DONTFREE;
+ recloc->sval = record = rec;
+ dprintf( ("in recbld FS=%o, recloc=%p\n", **FS, recloc) );
+ dprintf( ("recbld = |%s|\n", record) );
+ donerec = 1;
+}
+
+Cell *fieldadr(int n)
+{
+ if (n < 0 || n >= nfields)
+ ERROR "trying to access field %d; try -mf n", n FATAL;
+ return(&fldtab[n]);
+}
+
+int errorflag = 0;
+char errbuf[200];
+
+void yyerror(char *s)
+{
+ extern char *cmdname, *curfname;
+ static int been_here = 0;
+
+ if (been_here++ > 2)
+ return;
+ fprintf(stderr, "%s: %s", cmdname, s);
+ fprintf(stderr, " at source line %d", lineno);
+ if (curfname != NULL)
+ fprintf(stderr, " in function %s", curfname);
+ fprintf(stderr, "\n");
+ errorflag = 2;
+ eprint();
+}
+
+void fpecatch(int n)
+{
+ ERROR "floating point exception %d", n FATAL;
+}
+
+extern int bracecnt, brackcnt, parencnt;
+
+void bracecheck(void)
+{
+ int c;
+ static int beenhere = 0;
+
+ if (beenhere++)
+ return;
+ while ((c = lex_input()) != EOF && c != '\0')
+ bclass(c);
+ bcheck2(bracecnt, '{', '}');
+ bcheck2(brackcnt, '[', ']');
+ bcheck2(parencnt, '(', ')');
+}
+
+void bcheck2(int n, int c1, int c2)
+{
+ if (n == 1)
+ fprintf(stderr, "\tmissing %c\n", c2);
+ else if (n > 1)
+ fprintf(stderr, "\t%d missing %c's\n", n, c2);
+ else if (n == -1)
+ fprintf(stderr, "\textra %c\n", c2);
+ else if (n < -1)
+ fprintf(stderr, "\t%d extra %c's\n", -n, c2);
+}
+
+void error(int f, char *s)
+{
+ extern Node *curnode;
+ extern char *cmdname;
+
+ fflush(stdout);
+ fprintf(stderr, "%s: ", cmdname);
+ fprintf(stderr, "%s", s);
+ fprintf(stderr, "\n");
+ if (compile_time != 2 && NR && *NR > 0) {
+ fprintf(stderr, " input record number %d", (int) (*FNR));
+ if (strcmp(*FILENAME, "-") != 0)
+ fprintf(stderr, ", file %s", *FILENAME);
+ fprintf(stderr, "\n");
+ }
+ if (compile_time != 2 && curnode)
+ fprintf(stderr, " source line number %d\n", curnode->lineno);
+ else if (compile_time != 2 && lineno)
+ fprintf(stderr, " source line number %d\n", lineno);
+ eprint();
+ if (f) {
+ if (dbg > 1) /* core dump if serious debugging on */
+ abort();
+ exit(2);
+ }
+}
+
+void eprint(void) /* try to print context around error */
+{
+#if 0
+ char *p, *q;
+ int c;
+ static int been_here = 0;
+ extern char ebuf[], *ep;
+
+ if (compile_time == 2 || compile_time == 0 || been_here++ > 0)
+ return;
+ p = ep - 1;
+ if (p > ebuf && *p == '\n')
+ p--;
+ for ( ; p > ebuf && *p != '\n' && *p != '\0'; p--)
+ ;
+ while (*p == '\n')
+ p++;
+ fprintf(stderr, " context is\n\t");
+ for (q=ep-1; q>=p && *q!=' ' && *q!='\t' && *q!='\n'; q--)
+ ;
+ for ( ; p < q; p++)
+ if (*p)
+ putc(*p, stderr);
+ fprintf(stderr, " >>> ");
+ for ( ; p < ep; p++)
+ if (*p)
+ putc(*p, stderr);
+ fprintf(stderr, " <<< ");
+ if (*ep)
+ while ((c = input()) != '\n' && c != '\0' && c != EOF) {
+ putc(c, stderr);
+ bclass(c);
+ }
+ putc('\n', stderr);
+ ep = ebuf;
+#endif
+}
+
+void bclass(int c)
+{
+ switch (c) {
+ case '{': bracecnt++; break;
+ case '}': bracecnt--; break;
+ case '[': brackcnt++; break;
+ case ']': brackcnt--; break;
+ case '(': parencnt++; break;
+ case ')': parencnt--; break;
+ }
+}
+
+double errcheck(double x, char *s)
+{
+ extern int errno;
+
+ if (errno == EDOM) {
+ errno = 0;
+ ERROR "%s argument out of domain", s WARNING;
+ x = 1;
+ } else if (errno == ERANGE) {
+ errno = 0;
+ ERROR "%s result out of range", s WARNING;
+ x = 1;
+ }
+ return x;
+}
+
+int isclvar(char *s) /* is s of form var=something ? */
+{
+ char *os = s;
+
+ if (!isalpha(*s) && *s != '_')
+ return 0;
+ for ( ; *s; s++)
+ if (!(isalnum(*s) || *s == '_'))
+ break;
+ return *s == '=' && s > os && *(s+1) != '=';
+}
+
+#define MAXEXPON 38 /* maximum exponent for fp number. should be IEEE */
+
+int isnumber(char *s) /* should be done by a library function */
+{
+ int d1, d2;
+ int point;
+ char *es;
+
+ d1 = d2 = point = 0;
+ while (*s == ' ' || *s == '\t' || *s == '\n')
+ s++;
+ if (*s == '\0')
+ return(0); /* empty stuff isn't a number */
+ if (*s == '+' || *s == '-')
+ s++;
+ if (!isdigit(*s) && *s != '.')
+ return(0);
+ if (isdigit(*s)) {
+ do {
+ d1++;
+ s++;
+ } while (isdigit(*s));
+ }
+ if (*s == '.') {
+ point++;
+ s++;
+ }
+ if (isdigit(*s)) {
+ d2++;
+ do {
+ s++;
+ } while (isdigit(*s));
+ }
+ if (!(d1 || (point && d2)))
+ return(0);
+ if (*s == 'e' || *s == 'E') {
+ s++;
+ if (*s == '+' || *s == '-')
+ s++;
+ if (!isdigit(*s))
+ return(0);
+ es = s;
+ do {
+ s++;
+ } while (isdigit(*s));
+ if (s - es > 2)
+ return(0);
+ else if (s - es == 2 && (int)(10 * (*es-'0') + *(es+1)-'0') >= MAXEXPON)
+ return(0);
+ }
+ while (*s == ' ' || *s == '\t' || *s == '\n')
+ s++;
+ if (*s == '\0')
+ return(1);
+ else
+ return(0);
+}
+
+#if 0
+ /* THIS IS AN EXPERIMENT THAT'S NOT DONE. */
+ /* strtod ought to provide a better test of what's */
+ /* a valid number, but it doesn't work according to */
+ /* the standard on any machine near me! */
+
+ #include <math.h>
+ isnumber(char *s)
+ {
+ double r;
+ char *ep;
+ errno = 0;
+ r = strtod(s, &ep);
+ if (r == HUGE_VAL || errno == ERANGE)
+ return 0;
+ while (*ep == ' ' || *ep == '\t' || *ep == '\n')
+ ep++;
+ if (*ep == '\0')
+ return 1;
+ else
+ return 0;
+ }
+#endif
diff --git a/usr.bin/awk/main.c b/usr.bin/awk/main.c
new file mode 100644
index 00000000000..02c01ebcf4b
--- /dev/null
+++ b/usr.bin/awk/main.c
@@ -0,0 +1,180 @@
+/****************************************************************
+Copyright (C) AT&T and Lucent Technologies 1996
+All Rights Reserved
+
+Permission to use, copy, modify, and distribute this software and
+its documentation for any purpose and without fee is hereby
+granted, provided that the above copyright notice appear in all
+copies and that both that the copyright notice and this
+permission notice and warranty disclaimer appear in supporting
+documentation, and that the names of AT&T or Lucent Technologies
+or any of their entities not be used in advertising or publicity
+pertaining to distribution of the software without specific,
+written prior permission.
+
+AT&T AND LUCENT DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
+SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
+FITNESS. IN NO EVENT SHALL AT&T OR LUCENT OR ANY OF THEIR
+ENTITIES BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL
+DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
+DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
+OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE
+USE OR PERFORMANCE OF THIS SOFTWARE.
+****************************************************************/
+
+char *version = "version May 27, 1996";
+
+#define DEBUG
+#include <stdio.h>
+#include <ctype.h>
+#include <stdlib.h>
+#include <string.h>
+#include <signal.h>
+#include "awk.h"
+#include "awkgram.h"
+
+extern char **environ;
+extern int nfields;
+
+int dbg = 0;
+char *cmdname; /* gets argv[0] for error messages */
+extern FILE *yyin; /* lex input file */
+char *lexprog; /* points to program argument if it exists */
+extern int errorflag; /* non-zero if any syntax errors; set by yyerror */
+int compile_time = 2; /* for error printing: */
+ /* 2 = cmdline, 1 = compile, 0 = running */
+
+char *pfile[20]; /* program filenames from -f's */
+int npfile = 0; /* number of filenames */
+int curpfile = 0; /* current filename */
+
+int main(int argc, char *argv[])
+{
+ char *fs = NULL, *marg;
+ int temp;
+
+ if ((cmdname = strrchr(argv[0], '/')) != NULL)
+ cmdname++;
+ else
+ cmdname = argv[0];
+ if (argc == 1) {
+ fprintf(stderr, "Usage: %s [-f programfile | 'program'] [-Ffieldsep] [-v var=value] [-mf n] [-mr n] [files]\n", cmdname);
+ exit(1);
+ }
+ gs = newGstring(); /* for lex */
+ signal(SIGFPE, fpecatch);
+ yyin = NULL;
+ symtab = makesymtab(NSYMTAB);
+ while (argc > 1 && argv[1][0] == '-' && argv[1][1] != '\0') {
+ if (strcmp((char *) argv[1], "--") == 0) { /* explicit end of args */
+ argc--;
+ argv++;
+ break;
+ }
+ switch (argv[1][1]) {
+ case 'f': /* next argument is program filename */
+ argc--;
+ argv++;
+ if (argc <= 1)
+ ERROR "no program filename" FATAL;
+ pfile[npfile++] = argv[1];
+ break;
+ case 'F': /* set field separator */
+ if (argv[1][2] != 0) { /* arg is -Fsomething */
+ if (argv[1][2] == 't' && argv[1][3] == 0) /* wart: t=>\t */
+ fs = (char *) "\t";
+ else if (argv[1][2] != 0)
+ fs = &argv[1][2];
+ } else { /* arg is -F something */
+ argc--; argv++;
+ if (argc > 1 && argv[1][0] == 't' && argv[1][1] == 0) /* wart: t=>\t */
+ fs = (char *) "\t";
+ else if (argc > 1 && argv[1][0] != 0)
+ fs = &argv[1][0];
+ }
+ if (fs == NULL || *fs == '\0')
+ ERROR "field separator FS is empty" WARNING;
+ break;
+ case 'v': /* -v a=1 to be done NOW. one -v for each */
+ if (argv[1][2] == '\0' && --argc > 1 && isclvar((++argv)[1]))
+ setclvar(argv[1]);
+ break;
+ case 'm': /* more memory: -mr=record, -mf=fields */
+ marg = argv[1];
+ if (argv[1][3])
+ temp = atoi(&argv[1][3]);
+ else {
+ argv++; argc--;
+ temp = atoi(&argv[1][0]);
+ }
+ switch (marg[2]) {
+ case 'r': recsize = temp; break;
+ case 'f': nfields = temp; break;
+ default: ERROR "unknown option %s\n", marg FATAL;
+ }
+ break;
+ case 'd':
+ dbg = atoi(&argv[1][2]);
+ if (dbg == 0)
+ dbg = 1;
+ printf("awk %s\n", version);
+ break;
+ default:
+ ERROR "unknown option %s ignored", argv[1] WARNING;
+ break;
+ }
+ argc--;
+ argv++;
+ }
+ /* argv[1] is now the first argument */
+ if (npfile == 0) { /* no -f; first argument is program */
+ if (argc <= 1) {
+ if (dbg)
+ exit(0);
+ ERROR "no program given" FATAL;
+ }
+ dprintf( ("program = |%s|\n", argv[1]) );
+ lexprog = argv[1];
+ argc--;
+ argv++;
+ }
+ recinit(recsize);
+ syminit();
+ compile_time = 1;
+ argv[0] = cmdname; /* put prog name at front of arglist */
+ dprintf( ("argc=%d, argv[0]=%s\n", argc, argv[0]) );
+ arginit(argc, argv);
+ envinit(environ);
+ yyparse();
+ if (fs)
+ *FS = tostring(qstring(fs, '\0'));
+ dprintf( ("errorflag=%d\n", errorflag) );
+ if (errorflag == 0) {
+ compile_time = 0;
+ run(winner);
+ } else
+ bracecheck();
+ return(errorflag);
+}
+
+int pgetc(void) /* get 1 character from awk program */
+{
+ int c;
+
+ for (;;) {
+ if (yyin == NULL) {
+ if (curpfile >= npfile)
+ return EOF;
+ if (strcmp((char *) pfile[curpfile], "-") == 0)
+ yyin = stdin;
+ else if ((yyin = fopen((char *) pfile[curpfile], "r")) == NULL)
+ ERROR "can't open file %s", pfile[curpfile] FATAL;
+ }
+ if ((c = getc(yyin)) != EOF)
+ return c;
+ if (yyin != stdin)
+ fclose(yyin);
+ yyin = NULL;
+ curpfile++;
+ }
+}
diff --git a/usr.bin/awk/maketab.c b/usr.bin/awk/maketab.c
new file mode 100644
index 00000000000..05dfbf4ada4
--- /dev/null
+++ b/usr.bin/awk/maketab.c
@@ -0,0 +1,168 @@
+/****************************************************************
+Copyright (C) AT&T and Lucent Technologies 1996
+All Rights Reserved
+
+Permission to use, copy, modify, and distribute this software and
+its documentation for any purpose and without fee is hereby
+granted, provided that the above copyright notice appear in all
+copies and that both that the copyright notice and this
+permission notice and warranty disclaimer appear in supporting
+documentation, and that the names of AT&T or Lucent Technologies
+or any of their entities not be used in advertising or publicity
+pertaining to distribution of the software without specific,
+written prior permission.
+
+AT&T AND LUCENT DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
+SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
+FITNESS. IN NO EVENT SHALL AT&T OR LUCENT OR ANY OF THEIR
+ENTITIES BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL
+DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
+DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
+OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE
+USE OR PERFORMANCE OF THIS SOFTWARE.
+****************************************************************/
+
+/*
+ * this program makes the table to link function names
+ * and type indices that is used by execute() in run.c.
+ * it finds the indices in awkgram.h, produced by yacc.
+ */
+
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include "awk.h"
+#include "awkgram.h"
+
+struct xx
+{ int token;
+ char *name;
+ char *pname;
+} proc[] = {
+ { PROGRAM, "program", NULL },
+ { BOR, "boolop", " || " },
+ { AND, "boolop", " && " },
+ { NOT, "boolop", " !" },
+ { NE, "relop", " != " },
+ { EQ, "relop", " == " },
+ { LE, "relop", " <= " },
+ { LT, "relop", " < " },
+ { GE, "relop", " >= " },
+ { GT, "relop", " > " },
+ { ARRAY, "array", NULL },
+ { INDIRECT, "indirect", "$(" },
+ { SUBSTR, "substr", "substr" },
+ { SUB, "sub", "sub" },
+ { GSUB, "gsub", "gsub" },
+ { INDEX, "sindex", "sindex" },
+ { SPRINTF, "awksprintf", "sprintf " },
+ { ADD, "arith", " + " },
+ { MINUS, "arith", " - " },
+ { MULT, "arith", " * " },
+ { DIVIDE, "arith", " / " },
+ { MOD, "arith", " % " },
+ { UMINUS, "arith", " -" },
+ { POWER, "arith", " **" },
+ { PREINCR, "incrdecr", "++" },
+ { POSTINCR, "incrdecr", "++" },
+ { PREDECR, "incrdecr", "--" },
+ { POSTDECR, "incrdecr", "--" },
+ { CAT, "cat", " " },
+ { PASTAT, "pastat", NULL },
+ { PASTAT2, "dopa2", NULL },
+ { MATCH, "matchop", " ~ " },
+ { NOTMATCH, "matchop", " !~ " },
+ { MATCHFCN, "matchop", "matchop" },
+ { INTEST, "intest", "intest" },
+ { PRINTF, "awkprintf", "printf" },
+ { PRINT, "printstat", "print" },
+ { CLOSE, "closefile", "closefile" },
+ { DELETE, "adelete", "adelete" },
+ { SPLIT, "split", "split" },
+ { ASSIGN, "assign", " = " },
+ { ADDEQ, "assign", " += " },
+ { SUBEQ, "assign", " -= " },
+ { MULTEQ, "assign", " *= " },
+ { DIVEQ, "assign", " /= " },
+ { MODEQ, "assign", " %= " },
+ { POWEQ, "assign", " ^= " },
+ { CONDEXPR, "condexpr", " ?: " },
+ { IF, "ifstat", "if(" },
+ { WHILE, "whilestat", "while(" },
+ { FOR, "forstat", "for(" },
+ { DO, "dostat", "do" },
+ { IN, "instat", "instat" },
+ { NEXT, "jump", "next" },
+ { NEXTFILE, "jump", "nextfile" },
+ { EXIT, "jump", "exit" },
+ { BREAK, "jump", "break" },
+ { CONTINUE, "jump", "continue" },
+ { RETURN, "jump", "ret" },
+ { BLTIN, "bltin", "bltin" },
+ { CALL, "call", "call" },
+ { ARG, "arg", "arg" },
+ { VARNF, "getnf", "NF" },
+ { GETLINE, "getline", "getline" },
+ { 0, "", "" },
+};
+
+#define SIZE (LASTTOKEN - FIRSTTOKEN + 1)
+char *table[SIZE];
+char *names[SIZE];
+
+int main(int argc, char *argv[])
+{
+ struct xx *p;
+ int i, n, tok;
+ char c;
+ FILE *fp;
+ char buf[200], name[200], def[200];
+
+ printf("#include <stdio.h>\n");
+ printf("#include \"awk.h\"\n");
+ printf("#include \"awkgram.h\"\n\n");
+ for (i = SIZE; --i >= 0; )
+ names[i] = "";
+
+ if ((fp = fopen("awkgram.h", "r")) == NULL) {
+ fprintf(stderr, "maketab can't open awkgram.h!\n");
+ exit(1);
+ }
+ printf("static char *printname[%d] = {\n", SIZE);
+ i = 0;
+ while (fgets(buf, sizeof buf, fp) != NULL) {
+ n = sscanf(buf, "%1c %s %s %d", &c, def, name, &tok);
+ if (c != '#' || (n != 4 && strcmp(def,"define") != 0)) /* not a valid #define */
+ continue;
+ if (tok < FIRSTTOKEN || tok > LASTTOKEN) {
+ fprintf(stderr, "maketab funny token %d %s\n", tok, buf);
+ exit(1);
+ }
+ names[tok-FIRSTTOKEN] = (char *) malloc(strlen(name)+1);
+ strcpy(names[tok-FIRSTTOKEN], name);
+ printf("\t(char *) \"%s\",\t/* %d */\n", name, tok);
+ i++;
+ }
+ printf("};\n\n");
+
+ for (p=proc; p->token!=0; p++)
+ table[p->token-FIRSTTOKEN] = p->name;
+ printf("\nCell *(*proctab[%d])(Node **, int) = {\n", SIZE);
+ for (i=0; i<SIZE; i++)
+ if (table[i]==0)
+ printf("\tnullproc,\t/* %s */\n", names[i]);
+ else
+ printf("\t%s,\t/* %s */\n", table[i], names[i]);
+ printf("};\n\n");
+
+ printf("char *tokname(int n)\n"); /* print a tokname() function */
+ printf("{\n");
+ printf(" static char buf[100];\n\n");
+ printf(" if (n < FIRSTTOKEN || n > LASTTOKEN) {\n");
+ printf(" sprintf(buf, \"token %%d\", n);\n");
+ printf(" return buf;\n");
+ printf(" }\n");
+ printf(" return printname[n-FIRSTTOKEN];\n");
+ printf("}\n");
+ return 0;
+}
diff --git a/usr.bin/awk/parse.c b/usr.bin/awk/parse.c
new file mode 100644
index 00000000000..e4b005c8dac
--- /dev/null
+++ b/usr.bin/awk/parse.c
@@ -0,0 +1,255 @@
+/****************************************************************
+Copyright (C) AT&T and Lucent Technologies 1996
+All Rights Reserved
+
+Permission to use, copy, modify, and distribute this software and
+its documentation for any purpose and without fee is hereby
+granted, provided that the above copyright notice appear in all
+copies and that both that the copyright notice and this
+permission notice and warranty disclaimer appear in supporting
+documentation, and that the names of AT&T or Lucent Technologies
+or any of their entities not be used in advertising or publicity
+pertaining to distribution of the software without specific,
+written prior permission.
+
+AT&T AND LUCENT DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
+SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
+FITNESS. IN NO EVENT SHALL AT&T OR LUCENT OR ANY OF THEIR
+ENTITIES BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL
+DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
+DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
+OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE
+USE OR PERFORMANCE OF THIS SOFTWARE.
+****************************************************************/
+
+#define DEBUG
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include "awk.h"
+#include "awkgram.h"
+
+Node *nodealloc(int n)
+{
+ Node *x;
+
+ x = (Node *) malloc(sizeof(Node) + (n-1)*sizeof(Node *));
+ if (x == NULL)
+ ERROR "out of space in nodealloc" FATAL;
+ x->nnext = NULL;
+ x->lineno = lineno;
+ return(x);
+}
+
+Node *exptostat(Node *a)
+{
+ a->ntype = NSTAT;
+ return(a);
+}
+
+Node *node1(int a, Node *b)
+{
+ Node *x;
+
+ x = nodealloc(1);
+ x->nobj = a;
+ x->narg[0]=b;
+ return(x);
+}
+
+Node *node2(int a, Node *b, Node *c)
+{
+ Node *x;
+
+ x = nodealloc(2);
+ x->nobj = a;
+ x->narg[0] = b;
+ x->narg[1] = c;
+ return(x);
+}
+
+Node *node3(int a, Node *b, Node *c, Node *d)
+{
+ Node *x;
+
+ x = nodealloc(3);
+ x->nobj = a;
+ x->narg[0] = b;
+ x->narg[1] = c;
+ x->narg[2] = d;
+ return(x);
+}
+
+Node *node4(int a, Node *b, Node *c, Node *d, Node *e)
+{
+ Node *x;
+
+ x = nodealloc(4);
+ x->nobj = a;
+ x->narg[0] = b;
+ x->narg[1] = c;
+ x->narg[2] = d;
+ x->narg[3] = e;
+ return(x);
+}
+
+Node *stat1(int a, Node *b)
+{
+ Node *x;
+
+ x = node1(a,b);
+ x->ntype = NSTAT;
+ return(x);
+}
+
+Node *stat2(int a, Node *b, Node *c)
+{
+ Node *x;
+
+ x = node2(a,b,c);
+ x->ntype = NSTAT;
+ return(x);
+}
+
+Node *stat3(int a, Node *b, Node *c, Node *d)
+{
+ Node *x;
+
+ x = node3(a,b,c,d);
+ x->ntype = NSTAT;
+ return(x);
+}
+
+Node *stat4(int a, Node *b, Node *c, Node *d, Node *e)
+{
+ Node *x;
+
+ x = node4(a,b,c,d,e);
+ x->ntype = NSTAT;
+ return(x);
+}
+
+Node *op1(int a, Node *b)
+{
+ Node *x;
+
+ x = node1(a,b);
+ x->ntype = NEXPR;
+ return(x);
+}
+
+Node *op2(int a, Node *b, Node *c)
+{
+ Node *x;
+
+ x = node2(a,b,c);
+ x->ntype = NEXPR;
+ return(x);
+}
+
+Node *op3(int a, Node *b, Node *c, Node *d)
+{
+ Node *x;
+
+ x = node3(a,b,c,d);
+ x->ntype = NEXPR;
+ return(x);
+}
+
+Node *op4(int a, Node *b, Node *c, Node *d, Node *e)
+{
+ Node *x;
+
+ x = node4(a,b,c,d,e);
+ x->ntype = NEXPR;
+ return(x);
+}
+
+Node *valtonode(Cell *a, int b)
+{
+ Node *x;
+
+ a->ctype = OCELL;
+ a->csub = b;
+ x = node1(0, (Node *) a);
+ x->ntype = NVALUE;
+ return(x);
+}
+
+Node *rectonode(void) /* make $0 into a Node */
+{
+ return valtonode(recloc, CFLD);
+}
+
+Node *makearr(Node *p)
+{
+ Cell *cp;
+
+ if (isvalue(p)) {
+ cp = (Cell *) (p->narg[0]);
+ if (isfunc(cp))
+ ERROR "%s is a function, not an array", cp->nval SYNTAX;
+ else if (!isarr(cp)) {
+ xfree(cp->sval);
+ cp->sval = (char *) makesymtab(NSYMTAB);
+ cp->tval = ARR;
+ }
+ }
+ return p;
+}
+
+Node *pa2stat(Node *a, Node *b, Node *c) /* pat, pat {...} */
+{
+ Node *x;
+
+ x = node4(PASTAT2, a, b, c, (Node *) paircnt);
+ paircnt++;
+ x->ntype = NSTAT;
+ return(x);
+}
+
+Node *linkum(Node *a, Node *b)
+{
+ Node *c;
+
+ if (errorflag) /* don't link things that are wrong */
+ return a;
+ if (a == NULL)
+ return(b);
+ else if (b == NULL)
+ return(a);
+ for (c = a; c->nnext != NULL; c = c->nnext)
+ ;
+ c->nnext = b;
+ return(a);
+}
+
+void defn(Cell *v, Node *vl, Node *st) /* turn on FCN bit in definition, */
+{ /* body of function, arglist */
+ Node *p;
+ int n;
+
+ if (isarr(v)) {
+ ERROR "`%s' is an array name and a function name", v->nval SYNTAX;
+ return;
+ }
+ v->tval = FCN;
+ v->sval = (char *) st;
+ n = 0; /* count arguments */
+ for (p = vl; p; p = p->nnext)
+ n++;
+ v->fval = n;
+ dprintf( ("defining func %s (%d args)\n", v->nval, n) );
+}
+
+int isarg(char *s) /* is s in argument list for current function? */
+{ /* return -1 if not, otherwise arg # */
+ extern Node *arglist;
+ Node *p = arglist;
+ int n;
+
+ for (n = 0; p != 0; p = p->nnext, n++)
+ if (strcmp(((Cell *)(p->narg[0]))->nval, s) == 0)
+ return n;
+ return -1;
+}
diff --git a/usr.bin/awk/proctab.c b/usr.bin/awk/proctab.c
new file mode 100644
index 00000000000..aacbf49ebed
--- /dev/null
+++ b/usr.bin/awk/proctab.c
@@ -0,0 +1,207 @@
+#include <stdio.h>
+#include "awk.h"
+#include "ytab.h"
+
+static char *printname[93] = {
+ (char *) "FIRSTTOKEN", /* 257 */
+ (char *) "PROGRAM", /* 258 */
+ (char *) "PASTAT", /* 259 */
+ (char *) "PASTAT2", /* 260 */
+ (char *) "XBEGIN", /* 261 */
+ (char *) "XEND", /* 262 */
+ (char *) "NL", /* 263 */
+ (char *) "ARRAY", /* 264 */
+ (char *) "MATCH", /* 265 */
+ (char *) "NOTMATCH", /* 266 */
+ (char *) "MATCHOP", /* 267 */
+ (char *) "FINAL", /* 268 */
+ (char *) "DOT", /* 269 */
+ (char *) "ALL", /* 270 */
+ (char *) "CCL", /* 271 */
+ (char *) "NCCL", /* 272 */
+ (char *) "CHAR", /* 273 */
+ (char *) "OR", /* 274 */
+ (char *) "STAR", /* 275 */
+ (char *) "QUEST", /* 276 */
+ (char *) "PLUS", /* 277 */
+ (char *) "AND", /* 278 */
+ (char *) "BOR", /* 279 */
+ (char *) "APPEND", /* 280 */
+ (char *) "EQ", /* 281 */
+ (char *) "GE", /* 282 */
+ (char *) "GT", /* 283 */
+ (char *) "LE", /* 284 */
+ (char *) "LT", /* 285 */
+ (char *) "NE", /* 286 */
+ (char *) "IN", /* 287 */
+ (char *) "ARG", /* 288 */
+ (char *) "BLTIN", /* 289 */
+ (char *) "BREAK", /* 290 */
+ (char *) "CLOSE", /* 291 */
+ (char *) "CONTINUE", /* 292 */
+ (char *) "DELETE", /* 293 */
+ (char *) "DO", /* 294 */
+ (char *) "EXIT", /* 295 */
+ (char *) "FOR", /* 296 */
+ (char *) "FUNC", /* 297 */
+ (char *) "SUB", /* 298 */
+ (char *) "GSUB", /* 299 */
+ (char *) "IF", /* 300 */
+ (char *) "INDEX", /* 301 */
+ (char *) "LSUBSTR", /* 302 */
+ (char *) "MATCHFCN", /* 303 */
+ (char *) "NEXT", /* 304 */
+ (char *) "NEXTFILE", /* 305 */
+ (char *) "ADD", /* 306 */
+ (char *) "MINUS", /* 307 */
+ (char *) "MULT", /* 308 */
+ (char *) "DIVIDE", /* 309 */
+ (char *) "MOD", /* 310 */
+ (char *) "ASSIGN", /* 311 */
+ (char *) "ASGNOP", /* 312 */
+ (char *) "ADDEQ", /* 313 */
+ (char *) "SUBEQ", /* 314 */
+ (char *) "MULTEQ", /* 315 */
+ (char *) "DIVEQ", /* 316 */
+ (char *) "MODEQ", /* 317 */
+ (char *) "POWEQ", /* 318 */
+ (char *) "PRINT", /* 319 */
+ (char *) "PRINTF", /* 320 */
+ (char *) "SPRINTF", /* 321 */
+ (char *) "ELSE", /* 322 */
+ (char *) "INTEST", /* 323 */
+ (char *) "CONDEXPR", /* 324 */
+ (char *) "POSTINCR", /* 325 */
+ (char *) "PREINCR", /* 326 */
+ (char *) "POSTDECR", /* 327 */
+ (char *) "PREDECR", /* 328 */
+ (char *) "VAR", /* 329 */
+ (char *) "IVAR", /* 330 */
+ (char *) "VARNF", /* 331 */
+ (char *) "CALL", /* 332 */
+ (char *) "NUMBER", /* 333 */
+ (char *) "STRING", /* 334 */
+ (char *) "FIELD", /* 335 */
+ (char *) "REGEXPR", /* 336 */
+ (char *) "GETLINE", /* 337 */
+ (char *) "RETURN", /* 338 */
+ (char *) "SPLIT", /* 339 */
+ (char *) "SUBSTR", /* 340 */
+ (char *) "WHILE", /* 341 */
+ (char *) "CAT", /* 342 */
+ (char *) "NOT", /* 343 */
+ (char *) "UMINUS", /* 344 */
+ (char *) "POWER", /* 345 */
+ (char *) "DECR", /* 346 */
+ (char *) "INCR", /* 347 */
+ (char *) "INDIRECT", /* 348 */
+ (char *) "LASTTOKEN", /* 349 */
+};
+
+
+Cell *(*proctab[93])(Node **, int) = {
+ nullproc, /* FIRSTTOKEN */
+ program, /* PROGRAM */
+ pastat, /* PASTAT */
+ dopa2, /* PASTAT2 */
+ nullproc, /* XBEGIN */
+ nullproc, /* XEND */
+ nullproc, /* NL */
+ array, /* ARRAY */
+ matchop, /* MATCH */
+ matchop, /* NOTMATCH */
+ nullproc, /* MATCHOP */
+ nullproc, /* FINAL */
+ nullproc, /* DOT */
+ nullproc, /* ALL */
+ nullproc, /* CCL */
+ nullproc, /* NCCL */
+ nullproc, /* CHAR */
+ nullproc, /* OR */
+ nullproc, /* STAR */
+ nullproc, /* QUEST */
+ nullproc, /* PLUS */
+ boolop, /* AND */
+ boolop, /* BOR */
+ nullproc, /* APPEND */
+ relop, /* EQ */
+ relop, /* GE */
+ relop, /* GT */
+ relop, /* LE */
+ relop, /* LT */
+ relop, /* NE */
+ instat, /* IN */
+ arg, /* ARG */
+ bltin, /* BLTIN */
+ jump, /* BREAK */
+ closefile, /* CLOSE */
+ jump, /* CONTINUE */
+ adelete, /* DELETE */
+ dostat, /* DO */
+ jump, /* EXIT */
+ forstat, /* FOR */
+ nullproc, /* FUNC */
+ sub, /* SUB */
+ gsub, /* GSUB */
+ ifstat, /* IF */
+ sindex, /* INDEX */
+ nullproc, /* LSUBSTR */
+ matchop, /* MATCHFCN */
+ jump, /* NEXT */
+ jump, /* NEXTFILE */
+ arith, /* ADD */
+ arith, /* MINUS */
+ arith, /* MULT */
+ arith, /* DIVIDE */
+ arith, /* MOD */
+ assign, /* ASSIGN */
+ nullproc, /* ASGNOP */
+ assign, /* ADDEQ */
+ assign, /* SUBEQ */
+ assign, /* MULTEQ */
+ assign, /* DIVEQ */
+ assign, /* MODEQ */
+ assign, /* POWEQ */
+ printstat, /* PRINT */
+ awkprintf, /* PRINTF */
+ awksprintf, /* SPRINTF */
+ nullproc, /* ELSE */
+ intest, /* INTEST */
+ condexpr, /* CONDEXPR */
+ incrdecr, /* POSTINCR */
+ incrdecr, /* PREINCR */
+ incrdecr, /* POSTDECR */
+ incrdecr, /* PREDECR */
+ nullproc, /* VAR */
+ nullproc, /* IVAR */
+ getnf, /* VARNF */
+ call, /* CALL */
+ nullproc, /* NUMBER */
+ nullproc, /* STRING */
+ nullproc, /* FIELD */
+ nullproc, /* REGEXPR */
+ getline, /* GETLINE */
+ jump, /* RETURN */
+ split, /* SPLIT */
+ substr, /* SUBSTR */
+ whilestat, /* WHILE */
+ cat, /* CAT */
+ boolop, /* NOT */
+ arith, /* UMINUS */
+ arith, /* POWER */
+ nullproc, /* DECR */
+ nullproc, /* INCR */
+ indirect, /* INDIRECT */
+ nullproc, /* LASTTOKEN */
+};
+
+char *tokname(int n)
+{
+ static char buf[100];
+
+ if (n < FIRSTTOKEN || n > LASTTOKEN) {
+ sprintf(buf, "token %d", n);
+ return buf;
+ }
+ return printname[n-FIRSTTOKEN];
+}
diff --git a/usr.bin/awk/proto.h b/usr.bin/awk/proto.h
new file mode 100644
index 00000000000..9a818b29fa1
--- /dev/null
+++ b/usr.bin/awk/proto.h
@@ -0,0 +1,181 @@
+/****************************************************************
+Copyright (C) AT&T and Lucent Technologies 1996
+All Rights Reserved
+
+Permission to use, copy, modify, and distribute this software and
+its documentation for any purpose and without fee is hereby
+granted, provided that the above copyright notice appear in all
+copies and that both that the copyright notice and this
+permission notice and warranty disclaimer appear in supporting
+documentation, and that the names of AT&T or Lucent Technologies
+or any of their entities not be used in advertising or publicity
+pertaining to distribution of the software without specific,
+written prior permission.
+
+AT&T AND LUCENT DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
+SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
+FITNESS. IN NO EVENT SHALL AT&T OR LUCENT OR ANY OF THEIR
+ENTITIES BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL
+DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
+DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
+OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE
+USE OR PERFORMANCE OF THIS SOFTWARE.
+****************************************************************/
+
+extern void setfname(Cell *);
+extern int constnode(Node *);
+extern char *strnode(Node *);
+extern Node *notnull(Node *);
+extern int yyparse(void);
+
+extern int yylex(void);
+extern void startreg(void);
+extern int lex_input(void);
+extern void unputstr(char *);
+
+extern fa *makedfa(char *, int);
+extern fa *mkdfa(char *, int);
+extern int makeinit(fa *, int);
+extern void penter(Node *);
+extern void freetr(Node *);
+extern int hexstr(char **);
+extern int quoted(char **);
+extern char *cclenter(char *);
+extern void overflo(char *);
+extern void cfoll(fa *, Node *);
+extern int first(Node *);
+extern void follow(Node *);
+extern int member(int, char *);
+extern int match(fa *, char *);
+extern int pmatch(fa *, char *);
+extern int nematch(fa *, char *);
+extern Node *reparse(char *);
+extern Node *regexp(void);
+extern Node *primary(void);
+extern Node *concat(Node *);
+extern Node *alt(Node *);
+extern Node *unary(Node *);
+extern int relex(void);
+extern int cgoto(fa *, int, int);
+extern void freefa(fa *);
+
+extern int pgetc(void);
+extern void init_input_source(void);
+
+extern Node *nodealloc(int);
+extern Node *exptostat(Node *);
+extern Node *node1(int, Node *);
+extern Node *node2(int, Node *, Node *);
+extern Node *node3(int, Node *, Node *, Node *);
+extern Node *node4(int, Node *, Node *, Node *, Node *);
+extern Node *stat3(int, Node *, Node *, Node *);
+extern Node *op2(int, Node *, Node *);
+extern Node *op1(int, Node *);
+extern Node *stat1(int, Node *);
+extern Node *op3(int, Node *, Node *, Node *);
+extern Node *op4(int, Node *, Node *, Node *, Node *);
+extern Node *stat2(int, Node *, Node *);
+extern Node *stat4(int, Node *, Node *, Node *, Node *);
+extern Node *valtonode(Cell *, int);
+extern Node *rectonode(void);
+extern Node *makearr(Node *);
+extern Node *pa2stat(Node *, Node *, Node *);
+extern Node *linkum(Node *, Node *);
+extern void defn(Cell *, Node *, Node *);
+extern int isarg(char *);
+extern char *tokname(int);
+extern Cell *(*proctab[])(Node **, int);
+
+extern void syminit(void);
+extern void arginit(int, char **);
+extern void envinit(char **);
+extern Array *makesymtab(int);
+extern void freesymtab(Cell *);
+extern void freeelem(Cell *, char *);
+extern Cell *setsymtab(char *, char *, double, unsigned int, Array *);
+extern int hash(char *, int);
+extern void rehash(Array *);
+extern Cell *lookup(char *, Array *);
+extern double setfval(Cell *, double);
+extern void funnyvar(Cell *, char *);
+extern char *setsval(Cell *, char *);
+extern double getfval(Cell *);
+extern char *getsval(Cell *);
+extern char *tostring(char *);
+extern char *qstring(char *, int);
+
+extern void recinit(unsigned int);
+extern void initgetrec(void);
+extern int getrec(char *);
+extern void nextfile(void);
+extern int readrec(char *buf, int bufsize, FILE *inf);
+extern char *getargv(int);
+extern void setclvar(char *);
+extern void fldbld(void);
+extern void cleanfld(int, int);
+extern void newfld(int);
+extern int refldbld(char *, char *);
+extern void recbld(void);
+extern Cell *fieldadr(int);
+extern void yyerror(char *);
+extern void fpecatch(int);
+extern void bracecheck(void);
+extern void bcheck2(int, int, int);
+extern void error(int, char *);
+extern void eprint(void);
+extern void bclass(int);
+extern double errcheck(double, char *);
+extern int isclvar(char *);
+extern int isnumber(char *);
+
+extern void run(Node *);
+extern Cell *execute(Node *);
+extern Cell *program(Node **, int);
+extern Cell *call(Node **, int);
+extern Cell *copycell(Cell *);
+extern Cell *arg(Node **, int);
+extern Cell *jump(Node **, int);
+extern Cell *getline(Node **, int);
+extern Cell *getnf(Node **, int);
+extern Cell *array(Node **, int);
+extern Cell *adelete(Node **, int);
+extern Cell *intest(Node **, int);
+extern Cell *matchop(Node **, int);
+extern Cell *boolop(Node **, int);
+extern Cell *relop(Node **, int);
+extern void tfree(Cell *);
+extern Cell *gettemp(void);
+extern Cell *field(Node **, int);
+extern Cell *indirect(Node **, int);
+extern Cell *substr(Node **, int);
+extern Cell *sindex(Node **, int);
+extern int format(char *, int, char *, Node *);
+extern Cell *awksprintf(Node **, int);
+extern Cell *awkprintf(Node **, int);
+extern Cell *arith(Node **, int);
+extern double ipow(double, int);
+extern Cell *incrdecr(Node **, int);
+extern Cell *assign(Node **, int);
+extern Cell *cat(Node **, int);
+extern Cell *pastat(Node **, int);
+extern Cell *dopa2(Node **, int);
+extern Cell *split(Node **, int);
+extern Cell *condexpr(Node **, int);
+extern Cell *ifstat(Node **, int);
+extern Cell *whilestat(Node **, int);
+extern Cell *dostat(Node **, int);
+extern Cell *forstat(Node **, int);
+extern Cell *instat(Node **, int);
+extern Cell *bltin(Node **, int);
+extern Cell *printstat(Node **, int);
+extern Cell *nullproc(Node **, int);
+extern FILE *redirect(int, Node *);
+extern FILE *openfile(int, char *);
+extern char *filename(FILE *);
+extern Cell *closefile(Node **, int);
+extern void closeall(void);
+extern Cell *sub(Node **, int);
+extern Cell *gsub(Node **, int);
+
+extern FILE *popen(const char *, const char *);
+extern int pclose(FILE *);
diff --git a/usr.bin/awk/run.c b/usr.bin/awk/run.c
new file mode 100644
index 00000000000..480173e8539
--- /dev/null
+++ b/usr.bin/awk/run.c
@@ -0,0 +1,1849 @@
+/****************************************************************
+Copyright (C) AT&T and Lucent Technologies 1996
+All Rights Reserved
+
+Permission to use, copy, modify, and distribute this software and
+its documentation for any purpose and without fee is hereby
+granted, provided that the above copyright notice appear in all
+copies and that both that the copyright notice and this
+permission notice and warranty disclaimer appear in supporting
+documentation, and that the names of AT&T or Lucent Technologies
+or any of their entities not be used in advertising or publicity
+pertaining to distribution of the software without specific,
+written prior permission.
+
+AT&T AND LUCENT DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
+SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
+FITNESS. IN NO EVENT SHALL AT&T OR LUCENT OR ANY OF THEIR
+ENTITIES BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL
+DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
+DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
+OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE
+USE OR PERFORMANCE OF THIS SOFTWARE.
+****************************************************************/
+
+#define DEBUG
+#include <stdio.h>
+#include <ctype.h>
+#include <setjmp.h>
+#include <math.h>
+#include <string.h>
+#include <stdlib.h>
+#include <time.h>
+#include "awk.h"
+#include "awkgram.h"
+
+#define tempfree(x) if (istemp(x)) tfree(x); else
+
+/*
+#undef tempfree
+
+void tempfree(Cell *p) {
+ if (p->ctype == OCELL && (p->csub < CUNK || p->csub > CFREE)) {
+ ERROR "bad csub %d in Cell %d %s",
+ p->csub, p->ctype, p->sval WARNING;
+ }
+ if (istemp(p))
+ tfree(p);
+}
+*/
+
+#ifdef _NFILE
+#ifndef FOPEN_MAX
+#define FOPEN_MAX _NFILE
+#endif
+#endif
+
+#ifndef FOPEN_MAX
+#define FOPEN_MAX 40 /* max number of open files */
+#endif
+
+#ifndef RAND_MAX
+#define RAND_MAX 32767 /* all that ansi guarantees */
+#endif
+
+jmp_buf env;
+
+#define PA2NUM 29 /* max number of pat,pat patterns allowed */
+int paircnt; /* number of them in use */
+int pairstack[PA2NUM]; /* state of each pat,pat */
+
+Node *winner = NULL; /* root of parse tree */
+Cell *tmps; /* free temporary cells for execution */
+
+static Cell truecell ={ OBOOL, BTRUE, 0, 0, 1.0, NUM };
+Cell *true = &truecell;
+static Cell falsecell ={ OBOOL, BFALSE, 0, 0, 0.0, NUM };
+Cell *false = &falsecell;
+static Cell breakcell ={ OJUMP, JBREAK, 0, 0, 0.0, NUM };
+Cell *jbreak = &breakcell;
+static Cell contcell ={ OJUMP, JCONT, 0, 0, 0.0, NUM };
+Cell *jcont = &contcell;
+static Cell nextcell ={ OJUMP, JNEXT, 0, 0, 0.0, NUM };
+Cell *jnext = &nextcell;
+static Cell nextfilecell ={ OJUMP, JNEXTFILE, 0, 0, 0.0, NUM };
+Cell *jnextfile = &nextfilecell;
+static Cell exitcell ={ OJUMP, JEXIT, 0, 0, 0.0, NUM };
+Cell *jexit = &exitcell;
+static Cell retcell ={ OJUMP, JRET, 0, 0, 0.0, NUM };
+Cell *jret = &retcell;
+static Cell tempcell ={ OCELL, CTEMP, 0, 0, 0.0, NUM };
+
+Node *curnode = NULL; /* the node being executed, for debugging */
+
+void run(Node *a) /* execution of parse tree starts here */
+{
+ execute(a);
+ closeall();
+}
+
+Cell *execute(Node *u) /* execute a node of the parse tree */
+{
+ Cell *(*proc)(Node **, int);
+ Cell *x;
+ Node *a;
+
+ if (u == NULL)
+ return(true);
+ for (a = u; ; a = a->nnext) {
+ curnode = a;
+ if (isvalue(a)) {
+ x = (Cell *)(a->narg[0]);
+ if ((x->tval & FLD) && !donefld)
+ fldbld();
+ else if ((x->tval & REC) && !donerec)
+ recbld();
+ return(x);
+ }
+ if (notlegal(a->nobj)) /* probably a Cell* but too risky to print */
+ ERROR "illegal statement" FATAL;
+ proc = proctab[a->nobj-FIRSTTOKEN];
+ x = (*proc)(a->narg, a->nobj);
+ if ((x->tval & FLD) && !donefld)
+ fldbld();
+ else if ((x->tval & REC) && !donerec)
+ recbld();
+ if (isexpr(a))
+ return(x);
+ if (isjump(x))
+ return(x);
+ if (a->nnext == NULL)
+ return(x);
+ tempfree(x);
+ }
+}
+
+
+Cell *program(Node **a, int n) /* execute an awk program */
+{ /* a[0] = BEGIN, a[1] = body, a[2] = END */
+ Cell *x;
+
+ if (setjmp(env) != 0)
+ goto ex;
+ if (a[0]) { /* BEGIN */
+ x = execute(a[0]);
+ if (isexit(x))
+ return(true);
+ if (isjump(x))
+ ERROR "illegal break, continue, next or nextfile from BEGIN" FATAL;
+ tempfree(x);
+ }
+ if (a[1] || a[2])
+ while (getrec(record) > 0) {
+ x = execute(a[1]);
+ if (isexit(x))
+ break;
+ tempfree(x);
+ }
+ ex:
+ if (setjmp(env) != 0) /* handles exit within END */
+ goto ex1;
+ if (a[2]) { /* END */
+ x = execute(a[2]);
+ if (isbreak(x) || isnext(x) || iscont(x))
+ ERROR "illegal break, continue, next or nextfile from END" FATAL;
+ tempfree(x);
+ }
+ ex1:
+ return(true);
+}
+
+struct Frame { /* stack frame for awk function calls */
+ int nargs; /* number of arguments in this call */
+ Cell *fcncell; /* pointer to Cell for function */
+ Cell **args; /* pointer to array of arguments after execute */
+ Cell *retval; /* return value */
+};
+
+#define NARGS 50 /* max args in a call */
+
+struct Frame *frame = NULL; /* base of stack frames; dynamically allocated */
+int nframe = 0; /* number of frames allocated */
+struct Frame *fp = NULL; /* frame pointer. bottom level unused */
+
+Cell *call(Node **a, int n) /* function call. very kludgy and fragile */
+{
+ static Cell newcopycell = { OCELL, CCOPY, 0, (char *) "", 0.0, NUM|STR|DONTFREE };
+ int i, ncall, ndef;
+ Node *x;
+ Cell *args[NARGS], *oargs[NARGS], *y, *z, *fcn;
+ char *s;
+
+ fcn = execute(a[0]); /* the function itself */
+ s = fcn->nval;
+ if (!isfunc(fcn))
+ ERROR "calling undefined function %s", s FATAL;
+ if (frame == NULL) {
+ fp = frame = (struct Frame *) calloc(nframe += 100, sizeof(struct Frame));
+ if (frame == NULL)
+ ERROR "out of space for stack frames calling %s", s FATAL;
+ }
+ for (ncall = 0, x = a[1]; x != NULL; x = x->nnext) /* args in call */
+ ncall++;
+ ndef = (int) fcn->fval; /* args in defn */
+ dprintf( ("calling %s, %d args (%d in defn), fp=%d\n", s, ncall, ndef, fp-frame) );
+ if (ncall > ndef)
+ ERROR "function %s called with %d args, uses only %d",
+ s, ncall, ndef WARNING;
+ if (ncall + ndef > NARGS)
+ ERROR "function %s has %d arguments, limit %d", s, ncall+ndef, NARGS FATAL;
+ for (i = 0, x = a[1]; x != NULL; i++, x = x->nnext) { /* get call args */
+ dprintf( ("evaluate args[%d], fp=%d:\n", i, fp-frame) );
+ y = execute(x);
+ oargs[i] = y;
+ dprintf( ("args[%d]: %s %f <%s>, t=%o\n",
+ i, y->nval, y->fval, isarr(y) ? "(array)" : (char*) y->sval, y->tval) );
+ if (isfunc(y))
+ ERROR "can't use function %s as argument in %s", y->nval, s FATAL;
+ if (isarr(y))
+ args[i] = y; /* arrays by ref */
+ else
+ args[i] = copycell(y);
+ tempfree(y);
+ }
+ for ( ; i < ndef; i++) { /* add null args for ones not provided */
+ args[i] = gettemp();
+ *args[i] = newcopycell;
+ }
+ fp++; /* now ok to up frame */
+ if (fp >= frame + nframe) {
+ int dfp = fp - frame; /* old index */
+ frame = (struct Frame *)
+ realloc((char *) frame, (nframe += 100) * sizeof(struct Frame));
+ if (frame == NULL)
+ ERROR "out of space for stack frames in %s", s FATAL;
+ fp = frame + dfp;
+ }
+ fp->fcncell = fcn;
+ fp->args = args;
+ fp->nargs = ndef; /* number defined with (excess are locals) */
+ fp->retval = gettemp();
+
+ dprintf( ("start exec of %s, fp=%d\n", s, fp-frame) );
+ y = execute((Node *)(fcn->sval)); /* execute body */
+ dprintf( ("finished exec of %s, fp=%d\n", s, fp-frame) );
+
+ for (i = 0; i < ndef; i++) {
+ Cell *t = fp->args[i];
+ if (isarr(t)) {
+ if (t->csub == CCOPY) {
+ if (i >= ncall) {
+ freesymtab(t);
+ t->csub = CTEMP;
+ } else {
+ oargs[i]->tval = t->tval;
+ oargs[i]->tval &= ~(STR|NUM|DONTFREE);
+ oargs[i]->sval = t->sval;
+ tempfree(t);
+ }
+ }
+ } else if (t != y) { /* kludge to prevent freeing twice */
+ t->csub = CTEMP;
+ tempfree(t);
+ }
+ }
+ tempfree(fcn);
+ if (isexit(y) || isnext(y) || isnextfile(y))
+ return y;
+ tempfree(y); /* this can free twice! */
+ z = fp->retval; /* return value */
+ dprintf( ("%s returns %g |%s| %o\n", s, getfval(z), getsval(z), z->tval) );
+ fp--;
+ return(z);
+}
+
+Cell *copycell(Cell *x) /* make a copy of a cell in a temp */
+{
+ Cell *y;
+
+ y = gettemp();
+ y->csub = CCOPY; /* prevents freeing until call is over */
+ y->nval = x->nval;
+ y->sval = x->sval ? tostring(x->sval) : NULL;
+ y->fval = x->fval;
+ y->tval = x->tval & ~(CON|FLD|REC|DONTFREE); /* copy is not constant or field */
+ /* is DONTFREE right? */
+ return y;
+}
+
+Cell *arg(Node **a, int n) /* nth argument of a function */
+{
+
+ n = (int) a[0]; /* argument number, counting from 0 */
+ dprintf( ("arg(%d), fp->nargs=%d\n", n, fp->nargs) );
+ if (n+1 > fp->nargs)
+ ERROR "argument #%d of function %s was not supplied",
+ n+1, fp->fcncell->nval FATAL;
+ return fp->args[n];
+}
+
+Cell *jump(Node **a, int n) /* break, continue, next, nextfile, return */
+{
+ Cell *y;
+
+ switch (n) {
+ case EXIT:
+ if (a[0] != NULL) {
+ y = execute(a[0]);
+ errorflag = getfval(y);
+ tempfree(y);
+ }
+ longjmp(env, 1);
+ case RETURN:
+ if (a[0] != NULL) {
+ y = execute(a[0]);
+ if ((y->tval & (STR|NUM)) == (STR|NUM)) {
+ setsval(fp->retval, getsval(y));
+ fp->retval->fval = getfval(y);
+ fp->retval->tval |= NUM;
+ }
+ else if (y->tval & STR)
+ setsval(fp->retval, getsval(y));
+ else if (y->tval & NUM)
+ setfval(fp->retval, getfval(y));
+ else /* can't happen */
+ ERROR "bad type variable %d", y->tval FATAL;
+ tempfree(y);
+ }
+ return(jret);
+ case NEXT:
+ return(jnext);
+ case NEXTFILE:
+ nextfile();
+ return(jnextfile);
+ case BREAK:
+ return(jbreak);
+ case CONTINUE:
+ return(jcont);
+ default: /* can't happen */
+ ERROR "illegal jump type %d", n FATAL;
+ }
+ return 0; /* not reached */
+}
+
+Cell *getline(Node **a, int n) /* get next line from specific input */
+{ /* a[0] is variable, a[1] is operator, a[2] is filename */
+ Cell *r, *x;
+ char buf[RECSIZE];
+ FILE *fp;
+
+ fflush(stdout); /* in case someone is waiting for a prompt */
+ r = gettemp();
+ if (a[1] != NULL) { /* getline < file */
+ x = execute(a[2]); /* filename */
+ if ((int) a[1] == '|') /* input pipe */
+ a[1] = (Node *) LE; /* arbitrary flag */
+ if ((x->tval & STR) == 0)
+ x = copycell(x);
+ fp = openfile((int) a[1], getsval(x));
+ tempfree(x);
+ if (fp == NULL)
+ n = -1;
+ else
+ n = readrec(buf, sizeof(buf), fp);
+ if (n <= 0) {
+ ;
+ } else if (a[0] != NULL) { /* getline var <file */
+ setsval(execute(a[0]), buf);
+ } else { /* getline <file */
+ if (!(recloc->tval & DONTFREE))
+ xfree(recloc->sval);
+ strcpy(record, buf);
+ recloc->sval = record;
+ recloc->tval = REC | STR | DONTFREE;
+ if (isnumber(recloc->sval)) {
+ recloc->fval = atof(recloc->sval);
+ recloc->tval |= NUM;
+ }
+ donerec = 1; donefld = 0;
+ }
+ } else { /* bare getline; use current input */
+ if (a[0] == NULL) /* getline */
+ n = getrec(record);
+ else { /* getline var */
+ n = getrec(buf);
+ setsval(execute(a[0]), buf);
+ }
+ }
+ setfval(r, (Awkfloat) n);
+ return r;
+}
+
+Cell *getnf(Node **a, int n) /* get NF */
+{
+ if (donefld == 0)
+ fldbld();
+ return (Cell *) a[0];
+}
+
+Cell *array(Node **a, int n) /* a[0] is symtab, a[1] is list of subscripts */
+{
+ Cell *x, *y, *z;
+ char *s;
+ Node *np;
+ char buf[RECSIZE];
+
+ x = execute(a[0]); /* Cell* for symbol table */
+ buf[0] = 0;
+ for (np = a[1]; np; np = np->nnext) {
+ y = execute(np); /* subscript */
+ if ((y->tval & STR) == 0)
+ y = copycell(y);
+ s = getsval(y);
+ strcat(buf, s); /* BUG: unchecked! */
+ if (np->nnext)
+ strcat(buf, *SUBSEP);
+ tempfree(y);
+ }
+ if (!isarr(x)) {
+ dprintf( ("making %s into an array\n", x->nval) );
+ if (freeable(x))
+ xfree(x->sval);
+ x->tval &= ~(STR|NUM|DONTFREE);
+ x->tval |= ARR;
+ x->sval = (char *) makesymtab(NSYMTAB);
+ }
+ z = setsymtab(buf, "", 0.0, STR|NUM, (Array *) x->sval);
+ z->ctype = OCELL;
+ z->csub = CVAR;
+ tempfree(x);
+ return(z);
+}
+
+Cell *adelete(Node **a, int n) /* a[0] is symtab, a[1] is list of subscripts */
+{
+ Cell *x, *y;
+ Node *np;
+ char buf[RECSIZE], *s;
+
+ x = execute(a[0]); /* Cell* for symbol table */
+ if (!isarr(x))
+ return true;
+ if (a[1] == 0) { /* delete the elements, not the table */
+ freesymtab(x);
+ x->tval &= ~STR;
+ x->tval |= ARR;
+ x->sval = (char *) makesymtab(NSYMTAB);
+ } else {
+ buf[0] = 0;
+ for (np = a[1]; np; np = np->nnext) {
+ y = execute(np); /* subscript */
+ if ((y->tval & STR) == 0)
+ y = copycell(y);
+ s = getsval(y);
+ strcat(buf, s);
+ if (np->nnext)
+ strcat(buf, *SUBSEP);
+ tempfree(y);
+ }
+ freeelem(x, buf);
+ }
+ tempfree(x);
+ return true;
+}
+
+Cell *intest(Node **a, int n) /* a[0] is index (list), a[1] is symtab */
+{
+ Cell *x, *ap, *k;
+ Node *p;
+ char buf[RECSIZE];
+ char *s;
+
+ ap = execute(a[1]); /* array name */
+ if (!isarr(ap)) {
+ dprintf( ("making %s into an array\n", ap->nval) );
+ if (freeable(ap))
+ xfree(ap->sval);
+ ap->tval &= ~(STR|NUM|DONTFREE);
+ ap->tval |= ARR;
+ ap->sval = (char *) makesymtab(NSYMTAB);
+ }
+ buf[0] = 0;
+ for (p = a[0]; p; p = p->nnext) {
+ x = execute(p); /* expr */
+ if ((x->tval & STR) == 0)
+ x = copycell(x);
+ s = getsval(x);
+ strcat(buf, s);
+ tempfree(x);
+ if (p->nnext)
+ strcat(buf, *SUBSEP);
+ }
+ k = lookup(buf, (Array *) ap->sval);
+ tempfree(ap);
+ if (k == NULL)
+ return(false);
+ else
+ return(true);
+}
+
+
+Cell *matchop(Node **a, int n) /* ~ and match() */
+{
+ Cell *x, *y;
+ char *s, *t;
+ int i;
+ fa *pfa;
+ int (*mf)(fa *, char *) = match, mode = 0;
+
+ if (n == MATCHFCN) {
+ mf = pmatch;
+ mode = 1;
+ }
+ x = execute(a[1]); /* a[1] = target text */
+ if ((x->tval & STR) == 0)
+ x = copycell(x);
+ s = getsval(x);
+ if (a[0] == 0) /* a[1] == 0: already-compiled reg expr */
+ i = (*mf)((fa *) a[2], s);
+ else {
+ y = execute(a[2]); /* a[2] = regular expr */
+ if ((y->tval & STR) == 0)
+ y = copycell(y);
+ t = getsval(y);
+ pfa = makedfa(t, mode);
+ i = (*mf)(pfa, s);
+ tempfree(y);
+ }
+ tempfree(x);
+ if (n == MATCHFCN) {
+ int start = patbeg - s + 1;
+ if (patlen < 0)
+ start = 0;
+ setfval(rstartloc, (Awkfloat) start);
+ setfval(rlengthloc, (Awkfloat) patlen);
+ x = gettemp();
+ x->tval = NUM;
+ x->fval = start;
+ return x;
+ } else if ((n == MATCH && i == 1) || (n == NOTMATCH && i == 0))
+ return(true);
+ else
+ return(false);
+}
+
+
+Cell *boolop(Node **a, int n) /* a[0] || a[1], a[0] && a[1], !a[0] */
+{
+ Cell *x, *y;
+ int i;
+
+ x = execute(a[0]);
+ i = istrue(x);
+ tempfree(x);
+ switch (n) {
+ case BOR:
+ if (i) return(true);
+ y = execute(a[1]);
+ i = istrue(y);
+ tempfree(y);
+ if (i) return(true);
+ else return(false);
+ case AND:
+ if ( !i ) return(false);
+ y = execute(a[1]);
+ i = istrue(y);
+ tempfree(y);
+ if (i) return(true);
+ else return(false);
+ case NOT:
+ if (i) return(false);
+ else return(true);
+ default: /* can't happen */
+ ERROR "unknown boolean operator %d", n FATAL;
+ }
+ return 0; /*NOTREACHED*/
+}
+
+Cell *relop(Node **a, int n) /* a[0 < a[1], etc. */
+{
+ int i;
+ Cell *x, *y;
+ Awkfloat j;
+
+ x = execute(a[0]);
+ y = execute(a[1]);
+ if (x->tval&NUM && y->tval&NUM) {
+ j = x->fval - y->fval;
+ i = j<0? -1: (j>0? 1: 0);
+ } else {
+ if ((x->tval & STR) == 0)
+ x = copycell(x);
+ if ((y->tval & STR) == 0)
+ y = copycell(y);
+ i = strcmp(getsval(x), getsval(y));
+ }
+ tempfree(x);
+ tempfree(y);
+ switch (n) {
+ case LT: if (i<0) return(true);
+ else return(false);
+ case LE: if (i<=0) return(true);
+ else return(false);
+ case NE: if (i!=0) return(true);
+ else return(false);
+ case EQ: if (i == 0) return(true);
+ else return(false);
+ case GE: if (i>=0) return(true);
+ else return(false);
+ case GT: if (i>0) return(true);
+ else return(false);
+ default: /* can't happen */
+ ERROR "unknown relational operator %d", n FATAL;
+ }
+ return 0; /*NOTREACHED*/
+}
+
+void tfree(Cell *a) /* free a tempcell */
+{
+ if (freeable(a))
+ xfree(a->sval);
+ if (a == tmps)
+ ERROR "tempcell list is curdled" FATAL;
+ a->cnext = tmps;
+ tmps = a;
+}
+
+Cell *gettemp(void) /* get a tempcell */
+{ int i;
+ Cell *x;
+
+ if (!tmps) {
+ tmps = (Cell *) calloc(100, sizeof(Cell));
+ if (!tmps)
+ ERROR "out of space for temporaries" FATAL;
+ for(i = 1; i < 100; i++)
+ tmps[i-1].cnext = &tmps[i];
+ tmps[i-1].cnext = 0;
+ }
+ x = tmps;
+ tmps = x->cnext;
+ *x = tempcell;
+ return(x);
+}
+
+Cell *indirect(Node **a, int n) /* $( a[0] ) */
+{
+ Cell *x;
+ int m;
+ char *s;
+
+ x = execute(a[0]);
+ if ((x->tval & STR) == 0)
+ x = copycell(x);
+ m = getfval(x);
+ if (m == 0 && !isnumber(s = getsval(x))) /* suspicion! */
+ ERROR "illegal field $(%s), name \"%s\"", s, x->nval FATAL;
+ /* can x->nval ever be null??? */
+ /* ERROR "illegal field $(%s)", s FATAL; */
+ tempfree(x);
+ x = fieldadr(m);
+ x->ctype = OCELL;
+ x->csub = CFLD;
+ return(x);
+}
+
+Cell *substr(Node **a, int nnn) /* substr(a[0], a[1], a[2]) */
+{
+ int k, m, n;
+ char *s;
+ int temp;
+ Cell *x, *y, *z = 0;
+
+ x = execute(a[0]);
+ y = execute(a[1]);
+ if (a[2] != 0)
+ z = execute(a[2]);
+ if ((x->tval & STR) == 0)
+ x = copycell(x);
+ s = getsval(x);
+ k = strlen(s) + 1;
+ if (k <= 1) {
+ tempfree(x);
+ tempfree(y);
+ if (a[2] != 0)
+ tempfree(z);
+ x = gettemp();
+ setsval(x, "");
+ return(x);
+ }
+ m = getfval(y);
+ if (m <= 0)
+ m = 1;
+ else if (m > k)
+ m = k;
+ tempfree(y);
+ if (a[2] != 0) {
+ n = getfval(z);
+ tempfree(z);
+ } else
+ n = k - 1;
+ if (n < 0)
+ n = 0;
+ else if (n > k - m)
+ n = k - m;
+ dprintf( ("substr: m=%d, n=%d, s=%s\n", m, n, s) );
+ y = gettemp();
+ temp = s[n+m-1]; /* with thanks to John Linderman */
+ s[n+m-1] = '\0';
+ setsval(y, s + m - 1);
+ s[n+m-1] = temp;
+ tempfree(x);
+ return(y);
+}
+
+Cell *sindex(Node **a, int nnn) /* index(a[0], a[1]) */
+{
+ Cell *x, *y, *z;
+ char *s1, *s2, *p1, *p2, *q;
+ Awkfloat v = 0.0;
+
+ x = execute(a[0]);
+ if ((x->tval & STR) == 0)
+ x = copycell(x);
+ s1 = getsval(x);
+ y = execute(a[1]);
+ if ((y->tval & STR) == 0)
+ y = copycell(y);
+ s2 = getsval(y);
+
+ z = gettemp();
+ for (p1 = s1; *p1 != '\0'; p1++) {
+ for (q=p1, p2=s2; *p2 != '\0' && *q == *p2; q++, p2++)
+ ;
+ if (*p2 == '\0') {
+ v = (Awkfloat) (p1 - s1 + 1); /* origin 1 */
+ break;
+ }
+ }
+ tempfree(x);
+ tempfree(y);
+ setfval(z, v);
+ return(z);
+}
+
+int format(char *buf, int bufsize, char *s, Node *a) /* printf-like conversions */
+{
+ char fmt[RECSIZE];
+ char *p, *t, *os;
+ Cell *x;
+ int flag = 0, n;
+
+ os = s;
+ p = buf;
+ while (*s) {
+ if (p - buf >= bufsize)
+ return -1;
+ if (*s != '%') {
+ *p++ = *s++;
+ continue;
+ }
+ if (*(s+1) == '%') {
+ *p++ = '%';
+ s += 2;
+ continue;
+ }
+ for (t=fmt; (*t++ = *s) != '\0'; s++) {
+ if (isalpha(*s) && *s != 'l' && *s != 'h' && *s != 'L')
+ break; /* the ansi panoply */
+ if (*s == '*') {
+ x = execute(a);
+ a = a->nnext;
+ sprintf((char *)t-1, "%d", (int) getfval(x));
+ t = fmt + strlen(fmt);
+ tempfree(x);
+ }
+ }
+ *t = '\0';
+ if (t >= fmt + sizeof(fmt))
+ ERROR "format item %.30s... too long", os FATAL;
+ switch (*s) {
+ case 'f': case 'e': case 'g': case 'E': case 'G':
+ flag = 1;
+ break;
+ case 'd': case 'i':
+ flag = 2;
+ if(*(s-1) == 'l') break;
+ *(t-1) = 'l';
+ *t = 'd';
+ *++t = '\0';
+ break;
+ case 'o': case 'x': case 'X': case 'u':
+ flag = *(s-1) == 'l' ? 2 : 3;
+ break;
+ case 's':
+ flag = 4;
+ break;
+ case 'c':
+ flag = 5;
+ break;
+ default:
+ ERROR "weird printf conversion %s", fmt WARNING;
+ flag = 0;
+ break;
+ }
+ if (a == NULL)
+ ERROR "not enough args in printf(%s)", os FATAL;
+ x = execute(a);
+ if ((x->tval & STR) == 0)
+ x = copycell(x);
+ a = a->nnext;
+ switch (flag) {
+ case 0: sprintf((char *)p, "%s", fmt); /* unknown, so dump it too */
+ p += strlen(p);
+ sprintf((char *)p, "%s", getsval(x));
+ break;
+ case 1: sprintf((char *)p, (char *)fmt, getfval(x)); break;
+ case 2: sprintf((char *)p, (char *)fmt, (long) getfval(x)); break;
+ case 3: sprintf((char *)p, (char *)fmt, (int) getfval(x)); break;
+ case 4:
+ t = getsval(x);
+ n = strlen(t);
+ if (n >= bufsize)
+ ERROR "huge string (%d chars) in printf %.30s...",
+ n, t FATAL;
+ sprintf((char *)p, (char *)fmt, t);
+ break;
+ case 5:
+ isnum(x) ? sprintf((char *)p, (char *)fmt, (int) getfval(x))
+ : sprintf((char *)p, (char *)fmt, getsval(x)[0]);
+ break;
+ }
+ tempfree(x);
+ p += strlen(p);
+ s++;
+ }
+ *p = '\0';
+ for ( ; a; a = a->nnext) /* evaluate any remaining args */
+ execute(a);
+ return 0;
+}
+
+Cell *awksprintf(Node **a, int n) /* sprintf(a[0]) */
+{
+ Cell *x;
+ Node *y;
+ char buf[3*RECSIZE];
+
+ y = a[0]->nnext;
+ x = execute(a[0]);
+ if ((x->tval & STR) == 0)
+ x = copycell(x);
+ if (format(buf, sizeof buf, getsval(x), y) == -1)
+ ERROR "sprintf string %.30s... too long", buf FATAL;
+ tempfree(x);
+ x = gettemp();
+ x->sval = tostring(buf);
+ x->tval = STR;
+ return(x);
+}
+
+Cell *awkprintf(Node **a, int n) /* printf */
+{ /* a[0] is list of args, starting with format string */
+ /* a[1] is redirection operator, a[2] is redirection file */
+ FILE *fp;
+ Cell *x;
+ Node *y;
+ char buf[3*RECSIZE];
+
+ y = a[0]->nnext;
+ x = execute(a[0]);
+ if ((x->tval & STR) == 0)
+ x = copycell(x);
+ if (format(buf, sizeof buf, getsval(x), y) == -1)
+ ERROR "printf string %.30s... too long", buf FATAL;
+ tempfree(x);
+ if (a[1] == NULL) {
+ fputs((char *)buf, stdout);
+ if (ferror(stdout))
+ ERROR "write error on stdout" FATAL;
+ } else {
+ fp = redirect((int)a[1], a[2]);
+ fputs((char *)buf, fp);
+ fflush(fp);
+ if (ferror(fp))
+ ERROR "write error on %s", filename(fp) FATAL;
+ }
+ return(true);
+}
+
+Cell *arith(Node **a, int n) /* a[0] + a[1], etc. also -a[0] */
+{
+ Awkfloat i, j = 0;
+ double v;
+ Cell *x, *y, *z;
+
+ x = execute(a[0]);
+ if ((x->tval & NUM) == 0)
+ x = copycell(x);
+ i = getfval(x);
+ tempfree(x);
+ if (n != UMINUS) {
+ y = execute(a[1]);
+ if ((y->tval & NUM) == 0)
+ y = copycell(y);
+ j = getfval(y);
+ tempfree(y);
+ }
+ z = gettemp();
+ switch (n) {
+ case ADD:
+ i += j;
+ break;
+ case MINUS:
+ i -= j;
+ break;
+ case MULT:
+ i *= j;
+ break;
+ case DIVIDE:
+ if (j == 0)
+ ERROR "division by zero" FATAL;
+ i /= j;
+ break;
+ case MOD:
+ if (j == 0)
+ ERROR "division by zero in mod" FATAL;
+ modf(i/j, &v);
+ i = i - j * v;
+ break;
+ case UMINUS:
+ i = -i;
+ break;
+ case POWER:
+ if (j >= 0 && modf(j, &v) == 0.0) /* pos integer exponent */
+ i = ipow(i, (int) j);
+ else
+ i = errcheck(pow(i, j), "pow");
+ break;
+ default: /* can't happen */
+ ERROR "illegal arithmetic operator %d", n FATAL;
+ }
+ setfval(z, i);
+ return(z);
+}
+
+double ipow(double x, int n) /* x**n. ought to be done by pow, but isn't always */
+{
+ double v;
+
+ if (n <= 0)
+ return 1;
+ v = ipow(x, n/2);
+ if (n % 2 == 0)
+ return v * v;
+ else
+ return x * v * v;
+}
+
+Cell *incrdecr(Node **a, int n) /* a[0]++, etc. */
+{
+ Cell *x, *z;
+ int k;
+ Awkfloat xf;
+
+ x = execute(a[0]);
+ xf = getfval(x);
+ k = (n == PREINCR || n == POSTINCR) ? 1 : -1;
+ if (n == PREINCR || n == PREDECR) {
+ setfval(x, xf + k);
+ return(x);
+ }
+ z = gettemp();
+ setfval(z, xf);
+ setfval(x, xf + k);
+ tempfree(x);
+ return(z);
+}
+
+Cell *assign(Node **a, int n) /* a[0] = a[1], a[0] += a[1], etc. */
+{ /* this is subtle; don't muck with it. */
+ Cell *x, *y;
+ Awkfloat xf, yf;
+ double v;
+
+ y = execute(a[1]);
+ x = execute(a[0]);
+ if (n == ASSIGN) { /* ordinary assignment */
+ if (x == y && !(x->tval & (FLD|REC))) /* self-assignment: */
+ ; /* leave alone unless it's a field */
+ else if ((y->tval & (STR|NUM)) == (STR|NUM)) {
+ setsval(x, getsval(y));
+ x->fval = getfval(y);
+ x->tval |= NUM;
+ }
+ else if (y->tval & STR)
+ setsval(x, getsval(y));
+ else if (y->tval & NUM)
+ setfval(x, getfval(y));
+ else
+ funnyvar(y, "read value of");
+ tempfree(y);
+ return(x);
+ }
+ xf = getfval(x);
+ yf = getfval(y);
+ switch (n) {
+ case ADDEQ:
+ xf += yf;
+ break;
+ case SUBEQ:
+ xf -= yf;
+ break;
+ case MULTEQ:
+ xf *= yf;
+ break;
+ case DIVEQ:
+ if (yf == 0)
+ ERROR "division by zero in /=" FATAL;
+ xf /= yf;
+ break;
+ case MODEQ:
+ if (yf == 0)
+ ERROR "division by zero in %%=" FATAL;
+ modf(xf/yf, &v);
+ xf = xf - yf * v;
+ break;
+ case POWEQ:
+ if (yf >= 0 && modf(yf, &v) == 0.0) /* pos integer exponent */
+ xf = ipow(xf, (int) yf);
+ else
+ xf = errcheck(pow(xf, yf), "pow");
+ break;
+ default:
+ ERROR "illegal assignment operator %d", n FATAL;
+ break;
+ }
+ tempfree(y);
+ setfval(x, xf);
+ return(x);
+}
+
+Cell *cat(Node **a, int q) /* a[0] cat a[1] */
+{
+ Cell *x, *y, *z;
+ int n1, n2;
+ char *s;
+
+ x = execute(a[0]);
+ y = execute(a[1]);
+ if ((x->tval & STR) == 0)
+ x = copycell(x);
+ if ((y->tval & STR) == 0)
+ y = copycell(y);
+ getsval(x);
+ getsval(y);
+ n1 = strlen(x->sval);
+ n2 = strlen(y->sval);
+ s = (char *) malloc(n1 + n2 + 1);
+ if (s == NULL)
+ ERROR "out of space concatenating %.15s... and %.15s...",
+ x->sval, y->sval FATAL;
+ strcpy(s, x->sval);
+ strcpy(s+n1, y->sval);
+ tempfree(y);
+ z = gettemp();
+ z->sval = s;
+ z->tval = STR;
+ tempfree(x);
+ return(z);
+}
+
+Cell *pastat(Node **a, int n) /* a[0] { a[1] } */
+{
+ Cell *x;
+
+ if (a[0] == 0)
+ x = execute(a[1]);
+ else {
+ x = execute(a[0]);
+ if (istrue(x)) {
+ tempfree(x);
+ x = execute(a[1]);
+ }
+ }
+ return x;
+}
+
+Cell *dopa2(Node **a, int n) /* a[0], a[1] { a[2] } */
+{
+ Cell *x;
+ int pair;
+
+ pair = (int) a[3];
+ if (pairstack[pair] == 0) {
+ x = execute(a[0]);
+ if (istrue(x))
+ pairstack[pair] = 1;
+ tempfree(x);
+ }
+ if (pairstack[pair] == 1) {
+ x = execute(a[1]);
+ if (istrue(x))
+ pairstack[pair] = 0;
+ tempfree(x);
+ x = execute(a[2]);
+ return(x);
+ }
+ return(false);
+}
+
+Cell *split(Node **a, int nnn) /* split(a[0], a[1], a[2]); a[3] is type */
+{
+ Cell *x = 0, *y, *ap;
+ char *s;
+ int sep;
+ char *t, temp, num[10], *fs = 0;
+ int n, tempstat;
+
+ y = execute(a[0]); /* source string */
+ if ((y->tval & STR) == 0)
+ y = copycell(y);
+ s = getsval(y);
+ if (a[2] == 0) /* fs string */
+ fs = *FS;
+ else if ((int) a[3] == STRING) { /* split(str,arr,"string") */
+ x = execute(a[2]);
+ if ((x->tval & STR) == 0)
+ x = copycell(x);
+ fs = getsval(x);
+ } else if ((int) a[3] == REGEXPR)
+ fs = (char*) "(regexpr)"; /* split(str,arr,/regexpr/) */
+ else
+ ERROR "illegal type of split()" FATAL;
+ sep = *fs;
+ ap = execute(a[1]); /* array name */
+ freesymtab(ap);
+ dprintf( ("split: s=|%s|, a=%s, sep=|%s|\n", s, ap->nval, fs) );
+ ap->tval &= ~STR;
+ ap->tval |= ARR;
+ ap->sval = (char *) makesymtab(NSYMTAB);
+
+ n = 0;
+ if ((*s != '\0' && strlen(fs) > 1) || (int) a[3] == REGEXPR) { /* reg expr */
+ fa *pfa;
+ if ((int) a[3] == REGEXPR) { /* it's ready already */
+ pfa = (fa *) a[2];
+ } else {
+ pfa = makedfa(fs, 1);
+ }
+ if (nematch(pfa,s)) {
+ tempstat = pfa->initstat;
+ pfa->initstat = 2;
+ do {
+ n++;
+ sprintf((char *)num, "%d", n);
+ temp = *patbeg;
+ *patbeg = '\0';
+ if (isnumber(s))
+ setsymtab(num, s, atof((char *)s), STR|NUM, (Array *) ap->sval);
+ else
+ setsymtab(num, s, 0.0, STR, (Array *) ap->sval);
+ *patbeg = temp;
+ s = patbeg + patlen;
+ if (*(patbeg+patlen-1) == 0 || *s == 0) {
+ n++;
+ sprintf((char *)num, "%d", n);
+ setsymtab(num, "", 0.0, STR, (Array *) ap->sval);
+ pfa->initstat = tempstat;
+ goto spdone;
+ }
+ } while (nematch(pfa,s));
+ }
+ n++;
+ sprintf((char *)num, "%d", n);
+ if (isnumber(s))
+ setsymtab(num, s, atof((char *)s), STR|NUM, (Array *) ap->sval);
+ else
+ setsymtab(num, s, 0.0, STR, (Array *) ap->sval);
+ spdone:
+ pfa = NULL;
+ } else if (sep == ' ') {
+ for (n = 0; ; ) {
+ while (*s == ' ' || *s == '\t' || *s == '\n')
+ s++;
+ if (*s == 0)
+ break;
+ n++;
+ t = s;
+ do
+ s++;
+ while (*s!=' ' && *s!='\t' && *s!='\n' && *s!='\0');
+ temp = *s;
+ *s = '\0';
+ sprintf((char *)num, "%d", n);
+ if (isnumber(t))
+ setsymtab(num, t, atof((char *)t), STR|NUM, (Array *) ap->sval);
+ else
+ setsymtab(num, t, 0.0, STR, (Array *) ap->sval);
+ *s = temp;
+ if (*s != 0)
+ s++;
+ }
+ } else if (sep == 0) { /* new: split(s, a, "") => 1 char/elem */
+ for (n = 0; *s != 0; s++) {
+ char buf[2];
+ n++;
+ sprintf((char *)num, "%d", n);
+ buf[0] = *s;
+ buf[1] = 0;
+ if (isdigit(buf[0]))
+ setsymtab(num, buf, atof(buf), STR|NUM, (Array *) ap->sval);
+ else
+ setsymtab(num, buf, 0.0, STR, (Array *) ap->sval);
+ }
+ } else if (*s != 0) {
+ for (;;) {
+ n++;
+ t = s;
+ while (*s != sep && *s != '\n' && *s != '\0')
+ s++;
+ temp = *s;
+ *s = '\0';
+ sprintf((char *)num, "%d", n);
+ if (isnumber(t))
+ setsymtab(num, t, atof((char *)t), STR|NUM, (Array *) ap->sval);
+ else
+ setsymtab(num, t, 0.0, STR, (Array *) ap->sval);
+ *s = temp;
+ if (*s++ == 0)
+ break;
+ }
+ }
+ tempfree(ap);
+ tempfree(y);
+ if (a[2] != 0 && (int) a[3] == STRING)
+ tempfree(x);
+ x = gettemp();
+ x->tval = NUM;
+ x->fval = n;
+ return(x);
+}
+
+Cell *condexpr(Node **a, int n) /* a[0] ? a[1] : a[2] */
+{
+ Cell *x;
+
+ x = execute(a[0]);
+ if (istrue(x)) {
+ tempfree(x);
+ x = execute(a[1]);
+ } else {
+ tempfree(x);
+ x = execute(a[2]);
+ }
+ return(x);
+}
+
+Cell *ifstat(Node **a, int n) /* if (a[0]) a[1]; else a[2] */
+{
+ Cell *x;
+
+ x = execute(a[0]);
+ if (istrue(x)) {
+ tempfree(x);
+ x = execute(a[1]);
+ } else if (a[2] != 0) {
+ tempfree(x);
+ x = execute(a[2]);
+ }
+ return(x);
+}
+
+Cell *whilestat(Node **a, int n) /* while (a[0]) a[1] */
+{
+ Cell *x;
+
+ for (;;) {
+ x = execute(a[0]);
+ if (!istrue(x))
+ return(x);
+ tempfree(x);
+ x = execute(a[1]);
+ if (isbreak(x)) {
+ x = true;
+ return(x);
+ }
+ if (isnext(x) || isexit(x) || isret(x))
+ return(x);
+ tempfree(x);
+ }
+}
+
+Cell *dostat(Node **a, int n) /* do a[0]; while(a[1]) */
+{
+ Cell *x;
+
+ for (;;) {
+ x = execute(a[0]);
+ if (isbreak(x))
+ return true;
+ if (isnext(x) || isnextfile(x) || isexit(x) || isret(x))
+ return(x);
+ tempfree(x);
+ x = execute(a[1]);
+ if (!istrue(x))
+ return(x);
+ tempfree(x);
+ }
+}
+
+Cell *forstat(Node **a, int n) /* for (a[0]; a[1]; a[2]) a[3] */
+{
+ Cell *x;
+
+ x = execute(a[0]);
+ tempfree(x);
+ for (;;) {
+ if (a[1]!=0) {
+ x = execute(a[1]);
+ if (!istrue(x)) return(x);
+ else tempfree(x);
+ }
+ x = execute(a[3]);
+ if (isbreak(x)) /* turn off break */
+ return true;
+ if (isnext(x) || isexit(x) || isret(x))
+ return(x);
+ tempfree(x);
+ x = execute(a[2]);
+ tempfree(x);
+ }
+}
+
+Cell *instat(Node **a, int n) /* for (a[0] in a[1]) a[2] */
+{
+ Cell *x, *vp, *arrayp, *cp, *ncp;
+ Array *tp;
+ int i;
+
+ vp = execute(a[0]);
+ arrayp = execute(a[1]);
+ if (!isarr(arrayp)) {
+ return true;
+ }
+ tp = (Array *) arrayp->sval;
+ tempfree(arrayp);
+ for (i = 0; i < tp->size; i++) { /* this routine knows too much */
+ for (cp = tp->tab[i]; cp != NULL; cp = ncp) {
+ setsval(vp, cp->nval);
+ ncp = cp->cnext;
+ x = execute(a[2]);
+ if (isbreak(x)) {
+ tempfree(vp);
+ return true;
+ }
+ if (isnext(x) || isexit(x) || isret(x)) {
+ tempfree(vp);
+ return(x);
+ }
+ tempfree(x);
+ }
+ }
+ return true;
+}
+
+#if 0
+ /* if someone ever wants to run over the arrays in sorted order, */
+ /* here it is. but it will likely run slower, not faster. */
+
+ int qstrcmp(p, q)
+ char **p, **q;
+ {
+ return strcmp(*p, *q);
+ }
+
+ Cell *instat(Node **a, int n) /* for (a[0] in a[1]) a[2] */
+ {
+ Cell *x, *vp, *arrayp, *cp, *ncp, *ret;
+ Array *tp;
+ int i, ne;
+ #define BIGENOUGH 1000
+ char *elems[BIGENOUGH], **ep;
+
+ vp = execute(a[0]);
+ arrayp = execute(a[1]);
+ if (!isarr(arrayp))
+ ERROR "%s is not an array", arrayp->nval FATAL;
+ tp = (Array *) arrayp->sval;
+ tempfree(arrayp);
+ ep = elems;
+ ret = true;
+ if (tp->nelem >= BIGENOUGH)
+ ep = (char **) malloc(tp->nelem * sizeof(char *));
+
+ for (i = ne = 0; i < tp->size; i++)
+ for (cp = tp->tab[i]; cp != NULL; cp = cp->cnext)
+ ep[ne++] = cp->nval;
+ if (ne != tp->nelem)
+ ERROR "can't happen: lost elems %d vs. %d", ne, tp->nelem FATAL;
+ qsort(ep, ne, sizeof(char *), qstrcmp);
+ for (i = 0; i < ne; i++) {
+ setsval(vp, ep[i]);
+ x = execute(a[2]);
+ if (isbreak(x)) {
+ tempfree(vp);
+ break;
+ }
+ if (isnext(x) || isnextfile(x) || isexit(x) || isret(x)) {
+ tempfree(vp);
+ ret = x;
+ break;
+ }
+ tempfree(x);
+ }
+ if (ep != elems)
+ free(ep);
+ return ret;
+ }
+#endif
+
+
+Cell *bltin(Node **a, int n) /* builtin functions. a[0] is type, a[1] is arg list */
+{
+ Cell *x, *y;
+ Awkfloat u;
+ int t;
+ char *p, buf[RECSIZE];
+ Node *nextarg;
+ FILE *fp;
+
+ t = (int) a[0];
+ x = execute(a[1]);
+ nextarg = a[1]->nnext;
+ switch (t) {
+ case FLENGTH:
+ if ((x->tval & STR) == 0)
+ x = copycell(x);
+ u = strlen(getsval(x)); break;
+ case FLOG:
+ u = errcheck(log(getfval(x)), "log"); break;
+ case FINT:
+ modf(getfval(x), &u); break;
+ case FEXP:
+ u = errcheck(exp(getfval(x)), "exp"); break;
+ case FSQRT:
+ u = errcheck(sqrt(getfval(x)), "sqrt"); break;
+ case FSIN:
+ u = sin(getfval(x)); break;
+ case FCOS:
+ u = cos(getfval(x)); break;
+ case FATAN:
+ if (nextarg == 0) {
+ ERROR "atan2 requires two arguments; returning 1.0" WARNING;
+ u = 1.0;
+ } else {
+ y = execute(a[1]->nnext);
+ u = atan2(getfval(x), getfval(y));
+ tempfree(y);
+ nextarg = nextarg->nnext;
+ }
+ break;
+ case FSYSTEM:
+ fflush(stdout); /* in case something is buffered already */
+ if ((x->tval & STR) == 0)
+ x = copycell(x);
+ u = (Awkfloat) system((char *)getsval(x)) / 256; /* 256 is unix-dep */
+ break;
+ case FRAND:
+ /* in principle, rand() returns something in 0..RAND_MAX */
+ u = (Awkfloat) (rand() % RAND_MAX) / RAND_MAX;
+ break;
+ case FSRAND:
+ if (x->tval & REC) /* no argument provided */
+ u = time((time_t *)0);
+ else
+ u = getfval(x);
+ srand((int) u); u = (int) u;
+ break;
+ case FTOUPPER:
+ case FTOLOWER:
+ if ((x->tval & STR) == 0)
+ x = copycell(x);
+ strcpy(buf, getsval(x));
+ if (t == FTOUPPER) {
+ for (p = buf; *p; p++)
+ if (islower(*p))
+ *p = toupper(*p);
+ } else {
+ for (p = buf; *p; p++)
+ if (isupper(*p))
+ *p = tolower(*p);
+ }
+ tempfree(x);
+ x = gettemp();
+ setsval(x, buf);
+ return x;
+ case FFLUSH:
+ if ((x->tval & STR) == 0)
+ x = copycell(x);
+ if ((fp = openfile(GT, getsval(x))) == NULL)
+ u = EOF;
+ else
+ u = fflush(fp);
+ break;
+ default: /* can't happen */
+ ERROR "illegal function type %d", t FATAL;
+ break;
+ }
+ tempfree(x);
+ x = gettemp();
+ setfval(x, u);
+ if (nextarg != 0) {
+ ERROR "warning: function has too many arguments" WARNING;
+ for ( ; nextarg; nextarg = nextarg->nnext)
+ execute(nextarg);
+ }
+ return(x);
+}
+
+Cell *printstat(Node **a, int n) /* print a[0] */
+{
+ extern char **OFMT, **CONVFMT;
+ char **save;
+ Node *x;
+ Cell *y;
+ FILE *fp;
+
+ if (a[1] == 0) /* a[1] is redirection operator, a[2] is file */
+ fp = stdout;
+ else
+ fp = redirect((int)a[1], a[2]);
+ for (x = a[0]; x != NULL; x = x->nnext) {
+ y = execute(x);
+ if ((y->tval & STR) == 0)
+ y = copycell(y);
+ save = CONVFMT;
+ CONVFMT = OFMT;
+ fputs((char *)getsval(y), fp);
+ CONVFMT = save;
+ tempfree(y);
+ if (x->nnext == NULL)
+ fputs((char *)*ORS, fp);
+ else
+ fputs((char *)*OFS, fp);
+ }
+ if (a[1] != 0)
+ fflush(fp);
+ if (ferror(fp))
+ ERROR "write error on %s", filename(fp) FATAL;
+ return(true);
+}
+
+Cell *nullproc(Node **a, int n)
+{
+ n = 0;
+ a = 0;
+ return 0;
+}
+
+
+FILE *redirect(int a, Node *b) /* set up all i/o redirections */
+{
+ FILE *fp;
+ Cell *x;
+ char *fname;
+
+ x = execute(b);
+ if ((x->tval & STR) == 0)
+ x = copycell(x);
+ fname = getsval(x);
+ fp = openfile(a, fname);
+ if (fp == NULL)
+ ERROR "can't open file %s", fname FATAL;
+ tempfree(x);
+ return fp;
+}
+
+struct files {
+ FILE *fp;
+ char *fname;
+ int mode; /* '|', 'a', 'w' => LE/LT, GT */
+} files[FOPEN_MAX] ={
+ { stdin, "/dev/stdin", LT }, /* watch out: don't free this! */
+ { stdout, "/dev/stdout", GT },
+ { stderr, "/dev/stderr", GT }
+};
+
+FILE *openfile(int a, char *us)
+{
+ char *s = us;
+ int i, m;
+ FILE *fp = 0;
+
+ if (*s == '\0')
+ ERROR "null file name in print or getline" FATAL;
+ for (i=0; i < FOPEN_MAX; i++)
+ if (files[i].fname && strcmp(s, files[i].fname) == 0)
+ if (a == files[i].mode || (a==APPEND && files[i].mode==GT))
+ return files[i].fp;
+ for (i=0; i < FOPEN_MAX; i++)
+ if (files[i].fp == 0)
+ break;
+ if (i >= FOPEN_MAX)
+ ERROR "%s makes too many open files", s FATAL;
+ fflush(stdout); /* force a semblance of order */
+ m = a;
+ if (a == GT) {
+ fp = fopen(s, "w");
+ } else if (a == APPEND) {
+ fp = fopen(s, "a");
+ m = GT; /* so can mix > and >> */
+ } else if (a == '|') { /* output pipe */
+ fp = popen(s, "w");
+ } else if (a == LE) { /* input pipe */
+ fp = popen(s, "r");
+ } else if (a == LT) { /* getline <file */
+ fp = strcmp(s, "-") == 0 ? stdin : fopen(s, "r"); /* "-" is stdin */
+ } else /* can't happen */
+ ERROR "illegal redirection %d", a FATAL;
+ if (fp != NULL) {
+ files[i].fname = tostring(s);
+ files[i].fp = fp;
+ files[i].mode = m;
+ }
+ return fp;
+}
+
+char *filename(FILE *fp)
+{
+ int i;
+
+ for (i = 0; i < FOPEN_MAX; i++)
+ if (fp == files[i].fp)
+ return files[i].fname;
+ return "???";
+}
+
+Cell *closefile(Node **a, int n)
+{
+ Cell *x;
+ int i, stat;
+
+ n = 0;
+ x = execute(a[0]);
+ if ((x->tval & STR) == 0)
+ x = copycell(x);
+ getsval(x);
+ for (i = 0; i < FOPEN_MAX; i++)
+ if (files[i].fname && strcmp(x->sval, files[i].fname) == 0) {
+ if (ferror(files[i].fp))
+ ERROR "i/o error occurred on %s", files[i].fname WARNING;
+ if (files[i].mode == '|' || files[i].mode == LE)
+ stat = pclose(files[i].fp);
+ else
+ stat = fclose(files[i].fp);
+ if (stat == EOF)
+ ERROR "i/o error occurred closing %s", files[i].fname WARNING;
+ if (i > 2) /* don't do /dev/std... */
+ xfree(files[i].fname);
+ files[i].fname = NULL; /* watch out for ref thru this */
+ files[i].fp = NULL;
+ }
+ tempfree(x);
+ return(true);
+}
+
+void closeall(void)
+{
+ int i, stat;
+
+ for (i = 0; i < FOPEN_MAX; i++)
+ if (files[i].fp) {
+ if (ferror(files[i].fp))
+ ERROR "i/o error occurred on %s", files[i].fname WARNING;
+ if (files[i].mode == '|' || files[i].mode == LE)
+ stat = pclose(files[i].fp);
+ else
+ stat = fclose(files[i].fp);
+ if (stat == EOF)
+ ERROR "i/o error occurred while closing %s", files[i].fname WARNING;
+ }
+}
+
+#define SUBSIZE (20 * RECSIZE)
+
+Cell *sub(Node **a, int nnn) /* substitute command */
+{
+ char *sptr, *pb, *q;
+ Cell *x, *y, *result;
+ char buf[SUBSIZE], *t;
+ fa *pfa;
+
+ x = execute(a[3]); /* target string */
+ if ((x->tval & STR) == 0)
+ x = copycell(x);
+ t = getsval(x);
+ if (a[0] == 0) /* 0 => a[1] is already-compiled regexpr */
+ pfa = (fa *) a[1]; /* regular expression */
+ else {
+ y = execute(a[1]);
+ pfa = makedfa(getsval(y), 1);
+ tempfree(y);
+ }
+ y = execute(a[2]); /* replacement string */
+ if ((y->tval & STR) == 0)
+ y = copycell(y);
+ result = false;
+ if (pmatch(pfa, t)) {
+ pb = buf;
+ sptr = t;
+ while (sptr < patbeg)
+ *pb++ = *sptr++;
+ sptr = getsval(y);
+ while (*sptr != 0 && pb < buf + SUBSIZE - 1)
+ if (*sptr == '\\' && *(sptr+1) == '&') {
+ sptr++; /* skip \, */
+ *pb++ = *sptr++; /* add & */
+ } else if (*sptr == '&') {
+ sptr++;
+ for (q = patbeg; q < patbeg+patlen; )
+ *pb++ = *q++;
+ } else
+ *pb++ = *sptr++;
+ *pb = '\0';
+ if (pb >= buf + SUBSIZE)
+ ERROR "sub() result %.30s too big", buf FATAL;
+ sptr = patbeg + patlen;
+ if ((patlen == 0 && *patbeg) || (patlen && *(sptr-1)))
+ while ((*pb++ = *sptr++) != 0)
+ ;
+ if (pb >= buf + SUBSIZE)
+ ERROR "sub() result %.30s too big", buf FATAL;
+ setsval(x, buf);
+ result = true;;
+ }
+ tempfree(x);
+ tempfree(y);
+ return result;
+}
+
+Cell *gsub(Node **a, int nnn) /* global substitute */
+{
+ Cell *x, *y;
+ char *rptr, *sptr, *t, *pb;
+ char buf[SUBSIZE];
+ fa *pfa;
+ int mflag, tempstat, num;
+
+ mflag = 0; /* if mflag == 0, can replace empty string */
+ num = 0;
+ x = execute(a[3]); /* target string */
+ if ((x->tval & STR) == 0)
+ x = copycell(x);
+ t = getsval(x);
+ if (a[0] == 0) /* 0 => a[1] is already-compiled regexpr */
+ pfa = (fa *) a[1]; /* regular expression */
+ else {
+ y = execute(a[1]);
+ if ((y->tval & STR) == 0)
+ y = copycell(y);
+ pfa = makedfa(getsval(y), 1);
+ tempfree(y);
+ }
+ y = execute(a[2]); /* replacement string */
+ if ((y->tval & STR) == 0)
+ y = copycell(y);
+ if (pmatch(pfa, t)) {
+ tempstat = pfa->initstat;
+ pfa->initstat = 2;
+ pb = buf;
+ rptr = getsval(y);
+ do {
+ /*
+ char *p;
+ int i;
+ printf("target string: %s, *patbeg = %o, patlen = %d\n",
+ t, *patbeg, patlen);
+ printf(" match found: ");
+ p=patbeg;
+ for (i=0; i<patlen; i++)
+ printf("%c", *p++);
+ printf("\n");
+ */
+ if (patlen == 0 && *patbeg != 0) { /* matched empty string */
+ if (mflag == 0) { /* can replace empty */
+ num++;
+ sptr = rptr;
+ while (*sptr != 0 && pb < buf + SUBSIZE-1)
+ if (*sptr == '\\' && *(sptr+1) == '&') {
+ sptr++;
+ *pb++ = *sptr++;
+ } else if (*sptr == '&') {
+ char *q;
+ sptr++;
+ for (q = patbeg; q < patbeg+patlen; )
+ *pb++ = *q++;
+ } else
+ *pb++ = *sptr++;
+ }
+ if (*t == 0) /* at end */
+ goto done;
+ *pb++ = *t++;
+ if (pb >= buf + SUBSIZE-1)
+ ERROR "gsub() result %.30s too big", buf FATAL;
+ mflag = 0;
+ }
+ else { /* matched nonempty string */
+ num++;
+ /* if (patlen <= 0)
+ ERROR "4: buf=%s, patlen %d, t=%s, patbeg=%s", buf, patlen, t, patbeg WARNING; */
+ sptr = t;
+ while (sptr < patbeg && pb < buf + SUBSIZE-1)
+ *pb++ = *sptr++;
+ sptr = rptr;
+ while (*sptr != 0 && pb < buf + SUBSIZE-1)
+ if (*sptr == '\\' && *(sptr+1) == '&') {
+ sptr++;
+ *pb++ = *sptr++;
+ } else if (*sptr == '&') {
+ char *q;
+ sptr++;
+ for (q = patbeg; q < patbeg+patlen; )
+ *pb++ = *q++;
+ } else
+ *pb++ = *sptr++;
+ t = patbeg + patlen;
+ if (patlen == 0 || *t == 0 || *(t-1) == 0)
+ goto done;
+ if (pb >= buf + SUBSIZE-1)
+ ERROR "gsub() result %.30s too big", buf FATAL;
+ mflag = 1;
+ }
+ } while (pmatch(pfa,t));
+ sptr = t;
+ while ((*pb++ = *sptr++) != 0)
+ ;
+ done: if (pb >= buf + SUBSIZE-1)
+ ERROR "gsub() result %.30s too big", buf FATAL;
+ *pb = '\0';
+ setsval(x, buf);
+ pfa->initstat = tempstat;
+ }
+ tempfree(x);
+ tempfree(y);
+ x = gettemp();
+ x->tval = NUM;
+ x->fval = num;
+ return(x);
+}
diff --git a/usr.bin/awk/tran.c b/usr.bin/awk/tran.c
new file mode 100644
index 00000000000..973e355765f
--- /dev/null
+++ b/usr.bin/awk/tran.c
@@ -0,0 +1,419 @@
+/****************************************************************
+Copyright (C) AT&T and Lucent Technologies 1996
+All Rights Reserved
+
+Permission to use, copy, modify, and distribute this software and
+its documentation for any purpose and without fee is hereby
+granted, provided that the above copyright notice appear in all
+copies and that both that the copyright notice and this
+permission notice and warranty disclaimer appear in supporting
+documentation, and that the names of AT&T or Lucent Technologies
+or any of their entities not be used in advertising or publicity
+pertaining to distribution of the software without specific,
+written prior permission.
+
+AT&T AND LUCENT DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
+SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
+FITNESS. IN NO EVENT SHALL AT&T OR LUCENT OR ANY OF THEIR
+ENTITIES BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL
+DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
+DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
+OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE
+USE OR PERFORMANCE OF THIS SOFTWARE.
+****************************************************************/
+
+#define DEBUG
+#include <stdio.h>
+#include <math.h>
+#include <ctype.h>
+#include <string.h>
+#include <stdlib.h>
+#include "awk.h"
+#include "awkgram.h"
+
+#define FULLTAB 2 /* rehash when table gets this x full */
+#define GROWTAB 4 /* grow table by this factor */
+
+Array *symtab; /* main symbol table */
+
+char **FS; /* initial field sep */
+char **RS; /* initial record sep */
+char **OFS; /* output field sep */
+char **ORS; /* output record sep */
+char **OFMT; /* output format for numbers */
+char **CONVFMT; /* format for conversions in getsval */
+Awkfloat *NF; /* number of fields in current record */
+Awkfloat *NR; /* number of current record */
+Awkfloat *FNR; /* number of current record in current file */
+char **FILENAME; /* current filename argument */
+Awkfloat *ARGC; /* number of arguments from command line */
+char **SUBSEP; /* subscript separator for a[i,j,k]; default \034 */
+Awkfloat *RSTART; /* start of re matched with ~; origin 1 (!) */
+Awkfloat *RLENGTH; /* length of same */
+
+Cell *recloc; /* location of record */
+Cell *nrloc; /* NR */
+Cell *nfloc; /* NF */
+Cell *fnrloc; /* FNR */
+Array *ARGVtab; /* symbol table containing ARGV[...] */
+Array *ENVtab; /* symbol table containing ENVIRON[...] */
+Cell *rstartloc; /* RSTART */
+Cell *rlengthloc; /* RLENGTH */
+Cell *symtabloc; /* SYMTAB */
+
+Cell *nullloc; /* a guaranteed empty cell */
+Node *nullnode; /* zero&null, converted into a node for comparisons */
+
+extern Cell *fldtab;
+
+void syminit(void) /* initialize symbol table with builtin vars */
+{
+ setsymtab("0", "0", 0.0, NUM|STR|CON|DONTFREE, symtab);
+ /* this is used for if(x)... tests: */
+ nullloc = setsymtab("$zero&null", "", 0.0, NUM|STR|CON|DONTFREE, symtab);
+ nullnode = valtonode(nullloc, CCON);
+
+ /* recloc = setsymtab("$0", record, 0.0, REC|STR|DONTFREE, symtab); */
+ /* has been done elsewhere */
+ recloc = &fldtab[0];
+ FS = &setsymtab("FS", " ", 0.0, STR|DONTFREE, symtab)->sval;
+ RS = &setsymtab("RS", "\n", 0.0, STR|DONTFREE, symtab)->sval;
+ OFS = &setsymtab("OFS", " ", 0.0, STR|DONTFREE, symtab)->sval;
+ ORS = &setsymtab("ORS", "\n", 0.0, STR|DONTFREE, symtab)->sval;
+ OFMT = &setsymtab("OFMT", "%.6g", 0.0, STR|DONTFREE, symtab)->sval;
+ CONVFMT = &setsymtab("CONVFMT", "%.6g", 0.0, STR|DONTFREE, symtab)->sval;
+ FILENAME = &setsymtab("FILENAME", "-", 0.0, STR|DONTFREE, symtab)->sval;
+ nfloc = setsymtab("NF", "", 0.0, NUM, symtab);
+ NF = &nfloc->fval;
+ nrloc = setsymtab("NR", "", 0.0, NUM, symtab);
+ NR = &nrloc->fval;
+ fnrloc = setsymtab("FNR", "", 0.0, NUM, symtab);
+ FNR = &fnrloc->fval;
+ SUBSEP = &setsymtab("SUBSEP", "\034", 0.0, STR|DONTFREE, symtab)->sval;
+ rstartloc = setsymtab("RSTART", "", 0.0, NUM, symtab);
+ RSTART = &rstartloc->fval;
+ rlengthloc = setsymtab("RLENGTH", "", 0.0, NUM, symtab);
+ RLENGTH = &rlengthloc->fval;
+ symtabloc = setsymtab("SYMTAB", "", 0.0, ARR, symtab);
+ symtabloc->sval = (char *) symtab;
+}
+
+void arginit(int ac, char *av[]) /* set up ARGV and ARGC */
+{
+ Cell *cp;
+ int i;
+ char temp[5];
+
+ ARGC = &setsymtab("ARGC", "", (Awkfloat) ac, NUM, symtab)->fval;
+ cp = setsymtab("ARGV", "", 0.0, ARR, symtab);
+ ARGVtab = makesymtab(NSYMTAB); /* could be (int) ARGC as well */
+ cp->sval = (char *) ARGVtab;
+ for (i = 0; i < ac; i++) {
+ sprintf((char *)temp, "%d", i);
+ if (isnumber(*av))
+ setsymtab(temp, *av, atof(*av), STR|NUM, ARGVtab);
+ else
+ setsymtab(temp, *av, 0.0, STR, ARGVtab);
+ av++;
+ }
+}
+
+void envinit(char **envp) /* set up ENVIRON variable */
+{
+ Cell *cp;
+ char *p;
+
+ cp = setsymtab("ENVIRON", "", 0.0, ARR, symtab);
+ ENVtab = makesymtab(NSYMTAB);
+ cp->sval = (char *) ENVtab;
+ for ( ; *envp; envp++) {
+ if ((p = (char *) strchr((char *) *envp, '=')) == NULL)
+ continue;
+ *p++ = 0; /* split into two strings at = */
+ if (isnumber(p))
+ setsymtab(*envp, p, atof(p), STR|NUM, ENVtab);
+ else
+ setsymtab(*envp, p, 0.0, STR, ENVtab);
+ p[-1] = '='; /* restore in case env is passed down to a shell */
+ }
+}
+
+Array *makesymtab(int n) /* make a new symbol table */
+{
+ Array *ap;
+ Cell **tp;
+
+ ap = (Array *) malloc(sizeof(Array));
+ tp = (Cell **) calloc(n, sizeof(Cell *));
+ if (ap == NULL || tp == NULL)
+ ERROR "out of space in makesymtab" FATAL;
+ ap->nelem = 0;
+ ap->size = n;
+ ap->tab = tp;
+ return(ap);
+}
+
+void freesymtab(Cell *ap) /* free a symbol table */
+{
+ Cell *cp, *temp;
+ Array *tp;
+ int i;
+
+ if (!isarr(ap))
+ return;
+ tp = (Array *) ap->sval;
+ if (tp == NULL)
+ return;
+ for (i = 0; i < tp->size; i++) {
+ for (cp = tp->tab[i]; cp != NULL; cp = temp) {
+ xfree(cp->nval);
+ if (freeable(cp))
+ xfree(cp->sval);
+ temp = cp->cnext; /* avoids freeing then using */
+ free((char *) cp);
+ }
+ tp->tab[i] = 0;
+ }
+ free((char *) (tp->tab));
+ free((char *) tp);
+}
+
+void freeelem(Cell *ap, char *s) /* free elem s from ap (i.e., ap["s"] */
+{
+ Array *tp;
+ Cell *p, *prev = NULL;
+ int h;
+
+ tp = (Array *) ap->sval;
+ h = hash(s, tp->size);
+ for (p = tp->tab[h]; p != NULL; prev = p, p = p->cnext)
+ if (strcmp((char *) s, (char *) p->nval) == 0) {
+ if (prev == NULL) /* 1st one */
+ tp->tab[h] = p->cnext;
+ else /* middle somewhere */
+ prev->cnext = p->cnext;
+ if (freeable(p))
+ xfree(p->sval);
+ free(p->nval);
+ free((char *) p);
+ tp->nelem--;
+ return;
+ }
+}
+
+Cell *setsymtab(char *n, char *s, Awkfloat f, unsigned t, Array *tp)
+{
+ int h;
+ Cell *p;
+
+ if (n != NULL && (p = lookup(n, tp)) != NULL) {
+ dprintf( ("setsymtab found %p: n=%s s=\"%s\" f=%g t=%o\n",
+ p, p->nval, p->sval, p->fval, p->tval) );
+ return(p);
+ }
+ p = (Cell *) malloc(sizeof(Cell));
+ if (p == NULL)
+ ERROR "out of space for symbol table at %s", n FATAL;
+ p->nval = tostring(n);
+ p->sval = s ? tostring(s) : tostring("");
+ p->fval = f;
+ p->tval = t;
+ p->csub = CUNK;
+ p->ctype = OCELL;
+ tp->nelem++;
+ if (tp->nelem > FULLTAB * tp->size)
+ rehash(tp);
+ h = hash(n, tp->size);
+ p->cnext = tp->tab[h];
+ tp->tab[h] = p;
+ dprintf( ("setsymtab set %p: n=%s s=\"%s\" f=%g t=%o\n",
+ p, p->nval, p->sval, p->fval, p->tval) );
+ return(p);
+}
+
+int hash(char *s, int n) /* form hash value for string s */
+{
+ unsigned hashval;
+
+ for (hashval = 0; *s != '\0'; s++)
+ hashval = (*s + 31 * hashval);
+ return hashval % n;
+}
+
+void rehash(Array *tp) /* rehash items in small table into big one */
+{
+ int i, nh, nsz;
+ Cell *cp, *op, **np;
+
+ nsz = GROWTAB * tp->size;
+ np = (Cell **) calloc(nsz, sizeof(Cell *));
+ if (np == NULL) /* can't do it, but can keep running. */
+ return; /* someone else will run out later. */
+ for (i = 0; i < tp->size; i++) {
+ for (cp = tp->tab[i]; cp; cp = op) {
+ op = cp->cnext;
+ nh = hash(cp->nval, nsz);
+ cp->cnext = np[nh];
+ np[nh] = cp;
+ }
+ }
+ free((char *) (tp->tab));
+ tp->tab = np;
+ tp->size = nsz;
+}
+
+Cell *lookup(char *s, Array *tp) /* look for s in tp */
+{
+ Cell *p, *prev = NULL;
+ int h;
+
+ h = hash(s, tp->size);
+ for (p = tp->tab[h]; p != NULL; prev = p, p = p->cnext)
+ if (strcmp((char *) s, (char *) p->nval) == 0)
+ return(p); /* found it */
+ return(NULL); /* not found */
+}
+
+Awkfloat setfval(Cell *vp, Awkfloat f) /* set float val of a Cell */
+{
+ if ((vp->tval & (NUM | STR)) == 0)
+ funnyvar(vp, "assign to");
+ if (vp->tval & FLD) {
+ donerec = 0; /* mark $0 invalid */
+ if (vp-fldtab > *NF)
+ newfld(vp-fldtab);
+ dprintf( ("setting field %d to %g\n", vp-fldtab, f) );
+ } else if (vp->tval & REC) {
+ donefld = 0; /* mark $1... invalid */
+ donerec = 1;
+ }
+ vp->tval &= ~STR; /* mark string invalid */
+ vp->tval |= NUM; /* mark number ok */
+ dprintf( ("setfval %p: %s = %g, t=%o\n", vp, vp->nval, f, vp->tval) );
+ return vp->fval = f;
+}
+
+void funnyvar(Cell *vp, char *rw)
+{
+ if (vp->tval & ARR)
+ ERROR "can't %s %s; it's an array name.", rw, vp->nval FATAL;
+ if (vp->tval & FCN)
+ ERROR "can't %s %s; it's a function.", rw, vp->nval FATAL;
+ ERROR "funny variable %p: n=%s s=\"%s\" f=%g t=%o",
+ vp, vp->nval, vp->sval, vp->fval, vp->tval WARNING;
+}
+
+char *setsval(Cell *vp, char *s) /* set string val of a Cell */
+{
+ char *t;
+
+ if ((vp->tval & (NUM | STR)) == 0)
+ funnyvar(vp, "assign to");
+ if (vp->tval & FLD) {
+ donerec = 0; /* mark $0 invalid */
+ if (vp-fldtab > *NF)
+ newfld(vp-fldtab);
+ dprintf( ("setting field %d to %s (%p)\n", vp-fldtab, s, s) );
+ } else if (vp->tval & REC) {
+ donefld = 0; /* mark $1... invalid */
+ donerec = 1;
+ }
+ t = tostring(s); /* in case it's self-assign */
+ vp->tval &= ~NUM;
+ vp->tval |= STR;
+ if (freeable(vp))
+ xfree(vp->sval);
+ vp->tval &= ~DONTFREE;
+ dprintf( ("setsval %p: %s = \"%s (%p)\", t=%o\n", vp, vp->nval, t,t, vp->tval) );
+ return(vp->sval = t);
+}
+
+Awkfloat getfval(Cell *vp) /* get float val of a Cell */
+{
+ if ((vp->tval & (NUM | STR)) == 0)
+ funnyvar(vp, "read value of");
+ if ((vp->tval & FLD) && donefld == 0)
+ fldbld();
+ else if ((vp->tval & REC) && donerec == 0)
+ recbld();
+ if (!isnum(vp)) { /* not a number */
+ vp->fval = atof(vp->sval); /* best guess */
+ if (isnumber(vp->sval) && !(vp->tval&CON))
+ vp->tval |= NUM; /* make NUM only sparingly */
+ }
+ dprintf( ("getfval %p: %s = %g, t=%o\n", vp, vp->nval, vp->fval, vp->tval) );
+ return(vp->fval);
+}
+
+char *getsval(Cell *vp) /* get string val of a Cell */
+{
+ char s[100];
+ double dtemp;
+
+ if ((vp->tval & (NUM | STR)) == 0)
+ funnyvar(vp, "read value of");
+ if ((vp->tval & FLD) && donefld == 0)
+ fldbld();
+ else if ((vp->tval & REC) && donerec == 0)
+ recbld();
+ if ((vp->tval & STR) == 0) {
+ if (!(vp->tval&DONTFREE))
+ xfree(vp->sval);
+ if (modf(vp->fval, &dtemp) == 0) /* it's integral */
+ sprintf((char *)s, "%.20g", vp->fval);
+ else
+ sprintf((char *)s, (char *)*CONVFMT, vp->fval);
+ vp->sval = tostring(s);
+ vp->tval &= ~DONTFREE;
+ vp->tval |= STR;
+ }
+ dprintf( ("getsval %p: %s = \"%s (%p)\", t=%o\n", vp, vp->nval, vp->sval, vp->sval, vp->tval) );
+ return(vp->sval);
+}
+
+char *tostring(char *s) /* make a copy of string s */
+{
+ char *p;
+
+ p = (char *) malloc(strlen((char *) s)+1);
+ if (p == NULL)
+ ERROR "out of space in tostring on %s", s FATAL;
+ strcpy((char *) p, (char *) s);
+ return(p);
+}
+
+char *qstring(char *s, int delim) /* collect string up to next delim */
+{
+ int c, n;
+
+ for (caddreset(gs); (c = *s) != delim; s++) {
+ if (c == '\n')
+ ERROR "newline in string %.10s...", gs->cbuf SYNTAX;
+ else if (c != '\\')
+ cadd(gs, c);
+ else /* \something */
+ switch (c = *++s) {
+ case '\\': cadd(gs, '\\'); break;
+ case 'n': cadd(gs, '\n'); break;
+ case 't': cadd(gs, '\t'); break;
+ case 'b': cadd(gs, '\b'); break;
+ case 'f': cadd(gs, '\f'); break;
+ case 'r': cadd(gs, '\r'); break;
+ default:
+ if (!isdigit(c)) {
+ cadd(gs, c);
+ break;
+ }
+ n = c - '0';
+ if (isdigit(s[1])) {
+ n = 8 * n + *++s - '0';
+ if (isdigit(s[1]))
+ n = 8 * n + *++s - '0';
+ }
+ cadd(gs, n);
+ break;
+ }
+ }
+ cadd(gs, 0);
+ return gs->cbuf;
+}