diff options
author | Todd C. Miller <millert@cvs.openbsd.org> | 2020-06-10 21:02:34 +0000 |
---|---|---|
committer | Todd C. Miller <millert@cvs.openbsd.org> | 2020-06-10 21:02:34 +0000 |
commit | 8e2cc9d648485b85db60e59bccee4c99723830fe (patch) | |
tree | bc12bdf4629111b6e1d890e190ac3cdd242da095 | |
parent | 00c62b2c6a571e929a14fec3d0a42da1292a653a (diff) |
Update awk to Sep 10, 2019 version.
-rw-r--r-- | usr.bin/awk/FIXES | 71 | ||||
-rw-r--r-- | usr.bin/awk/README | 6 | ||||
-rw-r--r-- | usr.bin/awk/awk.h | 9 | ||||
-rw-r--r-- | usr.bin/awk/awkgram.y | 22 | ||||
-rw-r--r-- | usr.bin/awk/b.c | 36 | ||||
-rw-r--r-- | usr.bin/awk/lex.c | 43 | ||||
-rw-r--r-- | usr.bin/awk/lib.c | 31 | ||||
-rw-r--r-- | usr.bin/awk/main.c | 8 | ||||
-rw-r--r-- | usr.bin/awk/maketab.c | 4 | ||||
-rw-r--r-- | usr.bin/awk/parse.c | 4 | ||||
-rw-r--r-- | usr.bin/awk/proto.h | 4 | ||||
-rw-r--r-- | usr.bin/awk/run.c | 49 | ||||
-rw-r--r-- | usr.bin/awk/tran.c | 38 |
13 files changed, 196 insertions, 129 deletions
diff --git a/usr.bin/awk/FIXES b/usr.bin/awk/FIXES index 901eb42f74a..109eda9c71b 100644 --- a/usr.bin/awk/FIXES +++ b/usr.bin/awk/FIXES @@ -1,4 +1,4 @@ -/* $OpenBSD: FIXES,v 1.24 2020/06/10 21:02:19 millert Exp $ */ +/* $OpenBSD: FIXES,v 1.25 2020/06/10 21:02:33 millert Exp $ */ /**************************************************************** Copyright (C) Lucent Technologies 1997 All Rights Reserved @@ -26,6 +26,41 @@ THIS SOFTWARE. This file lists all bug fixes, changes, etc., made since the AWK book was sent to the printers in August, 1987. +September 10, 2019: + Fixes for various array / memory overruns found via gcc's + -fsanitize=unknown. Thanks to Alexander Richardson (Github + user arichardson). Merges PRs 47 and 48. + +July 28, 2019: + Import grammar optimization from NetBSD: Two string constants + concatenated together get turned into a single string. + +July 26, 2019: + Support POSIX-specified C-style escape sequences "\a" (alarm) + and "\v" (vertical tab) in command line arguments and regular + expressions, further to the support for them in strings added on + Apr 9, 1989. These now no longer match as literal "a" and "v" + characters (as they don't on other awk implementations). + Thanks to Martijn Dekker. + +July 17, 2019: + Pull in a number of code cleanups and minor fixes from + Warner Losh's bsd-ota branch. The only user visible change + is the use of random(3) as the random number generator. + Thanks to Warner Losh for collecting all these fixes in + one easy place to get them from. + +July 16, 2019: + Fix field splitting to use FS value as of the time a record + was read or assigned to. Thanks to GitHub user Cody Mello (melloc) + for the fix. (Merged from his branch, via PR #42.) Updated + testdir/T.split per said PR as well. + +June 24, 2019: + Extract awktest.tar into testdir directory. Add some very + simple mechanics to the makefile for running the tests and + for cleaning up. No changes to awk itself. + June 17, 2019: Disallow deleting SYMTAB and its elements, which creates use-after-free bugs. Thanks to GitHub user Cody Mello (melloc) @@ -171,10 +206,10 @@ Jun 12, 2011: /pat/, \n /pat/ {...} is now legal, though bad style to use. added checks to new -v code that permits -vnospace; thanks to - ruslan ermilov for spotting this and providing the patch. + ruslan ermilov for spotting this and providing the patch. removed fixed limit on number of open files; thanks to aleksey - cheusov and christos zoulos. + cheusov and christos zoulos. fixed day 1 bug that resurrected deleted elements of ARGV when used as filenames (in lib.c). @@ -192,10 +227,10 @@ May 1, 2011: and arnold robbins, changed srand() to return the previous seed (which is 1 on the first call of srand). the seed is an Awkfloat internally though converted to unsigned int to - pass to the library srand(). thanks, everyone. + pass to the library srand(). thanks, everyone. fixed a subtle (and i hope low-probability) overflow error - in fldbld, by adding space for one extra \0. thanks to + in fldbld, by adding space for one extra \0. thanks to robert bassett for spotting this one and providing a fix. removed the files related to compilation on windows. i no @@ -232,7 +267,7 @@ Oct 8, 2008: Oct 23, 2007: minor fix in lib.c: increase inputFS to 100, change malloc - for fields to n+1. + for fields to n+1. fixed memory fault caused by out of order test in setsval. @@ -279,7 +314,7 @@ Jan 17, 2006: core dump on linux with BEGIN {nextfile}, now fixed. - removed some #ifdef's in run.c and lex.c that appear to no + removed some #ifdef's in run.c and lex.c that appear to no longer be necessary. Apr 24, 2005: @@ -293,8 +328,8 @@ Jan 14, 2005: rethinking it. Dec 31, 2004: - prevent overflow of -f array in main, head off potential error in - call of SYNTAX(), test malloc return in lib.c, all with thanks to + prevent overflow of -f array in main, head off potential error in + call of SYNTAX(), test malloc return in lib.c, all with thanks to todd miller. Dec 22, 2004: @@ -322,8 +357,8 @@ Nov 22, 2003: code known to man. fixed a storage leak in call() that appears to have been there since - 1983 or so -- a function without an explicit return that assigns a - string to a parameter leaked a Cell. thanks to moinak ghosh for + 1983 or so -- a function without an explicit return that assigns a + string to a parameter leaked a Cell. thanks to moinak ghosh for spotting this very subtle one. Jul 31, 2003: @@ -345,7 +380,7 @@ Jul 28, 2003: radix character in programs and command line arguments regardless of the locale; otherwise, the locale should prevail for input and output of numbers. so it's intended to work that way. - + i have rescinded the attempt to use strcoll in expanding shorthands in regular expressions (cclenter). its properties are much too surprising; for example [a-c] matches aAbBc in locale en_US but abBcC @@ -409,7 +444,7 @@ Nov 29, 2002: Jun 28, 2002: modified run/format() and tran/getsval() to do a slightly better job on using OFMT for output from print and CONVFMT for other - number->string conversions, as promised by posix and done by + number->string conversions, as promised by posix and done by gawk and mawk. there are still places where it doesn't work right if CONVFMT is changed; by then the STR attribute of the variable has been irrevocably set. thanks to arnold robbins for @@ -441,7 +476,7 @@ Feb 10, 2002: Jan 1, 2002: fflush() or fflush("") flushes all files and pipes. - length(arrayname) returns number of elements; thanks to + length(arrayname) returns number of elements; thanks to arnold robbins for suggestion. added a makefile.win to make it easier to build on windows. @@ -491,7 +526,7 @@ July 5, 2000: May 25, 2000: yet another attempt at making 8-bit input work, with another - band-aid in b.c (member()), and some (uschar) casts to head + band-aid in b.c (member()), and some (uschar) casts to head off potential errors in subscripts (like isdigit). also changed HAT to NCHARS-2. thanks again to santiago vila. @@ -538,7 +573,7 @@ Apr 21, 1999: the test case.) Apr 16, 1999: - with code kindly provided by Bruce Lilly, awk now parses + with code kindly provided by Bruce Lilly, awk now parses /=/ and similar constructs more sensibly in more places. Bruce also provided some helpful test cases. @@ -595,7 +630,7 @@ Jan 13, 1999: Oct 19, 1998: fixed a couple of bugs in getrec: could fail to update $0 - after a getline var; because inputFS wasn't initialized, + after a getline var; because inputFS wasn't initialized, could split $0 on every character, a misleading diversion. fixed caching bug in makedfa: LRU was actually removing @@ -743,7 +778,7 @@ May 2, 1996: input file. (thanks to arnold robbins for inspiration and code). small fixes to regexpr code: can now handle []], [[], and - variants; [] is now a syntax error, rather than matching + variants; [] is now a syntax error, rather than matching everything; [z-a] is now empty, not z. far from complete or correct, however. (thanks to jeffrey friedl for pointing out some awful behaviors.) diff --git a/usr.bin/awk/README b/usr.bin/awk/README index b3cb0540eb1..2df93cb5722 100644 --- a/usr.bin/awk/README +++ b/usr.bin/awk/README @@ -1,4 +1,4 @@ -/* $OpenBSD: README,v 1.7 2011/09/28 19:27:18 millert Exp $ */ +/* $OpenBSD: README,v 1.8 2020/06/10 21:02:33 millert Exp $ */ /**************************************************************** Copyright (C) Lucent Technologies 1997 All Rights Reserved @@ -85,10 +85,6 @@ interpreted within double quotes. This compiles without change on Macintosh OS X using gcc and the standard developer tools. -This is also said to compile on Macintosh OS 9 systems, using the -file "buildmac" provided by Dan Allen (danallen@microsoft.com), -to whom many thanks. - The version of malloc that comes with some systems is sometimes astonishly slow. If awk seems slow, you might try fixing that. More generally, turning on optimization can significantly improve diff --git a/usr.bin/awk/awk.h b/usr.bin/awk/awk.h index ffb30eb9360..6a84b743907 100644 --- a/usr.bin/awk/awk.h +++ b/usr.bin/awk/awk.h @@ -1,4 +1,4 @@ -/* $OpenBSD: awk.h,v 1.17 2020/06/10 21:02:19 millert Exp $ */ +/* $OpenBSD: awk.h,v 1.18 2020/06/10 21:02:33 millert Exp $ */ /**************************************************************** Copyright (C) Lucent Technologies 1997 All Rights Reserved @@ -33,7 +33,7 @@ typedef unsigned char uschar; #define xfree(a) { if ((a) != NULL) { free((void *) (a)); (a) = NULL; } } -#define NN(p) ((p) ? (p) : "(null)") /* guaranteed non-null for DPRINTF +#define NN(p) ((p) ? (p) : "(null)") /* guaranteed non-null for DPRINTF */ #define DEBUG #ifdef DEBUG @@ -169,7 +169,7 @@ extern Node *nullnode; #define CCOPY 6 #define CCON 5 #define CTEMP 4 -#define CNAME 3 +#define CNAME 3 #define CVAR 2 #define CFLD 1 #define CUNK 0 @@ -219,6 +219,7 @@ extern int pairstack[], paircnt; #define NCHARS (256+3) /* 256 handles 8-bit chars; 128 does 7-bit */ /* watch out in match(), etc. */ +#define HAT (NCHARS+2) /* matches ^ in regular expr */ #define NSTATES 32 typedef struct rrow { @@ -232,7 +233,7 @@ typedef struct rrow { } rrow; typedef struct fa { - uschar gototab[NSTATES][NCHARS]; + uschar gototab[NSTATES][HAT + 1]; uschar out[NSTATES]; uschar *restr; int *posns[NSTATES]; diff --git a/usr.bin/awk/awkgram.y b/usr.bin/awk/awkgram.y index 24e110bc9ae..cf131a4388a 100644 --- a/usr.bin/awk/awkgram.y +++ b/usr.bin/awk/awkgram.y @@ -1,4 +1,4 @@ -/* $OpenBSD: awkgram.y,v 1.10 2020/06/10 21:00:01 millert Exp $ */ +/* $OpenBSD: awkgram.y,v 1.11 2020/06/10 21:02:33 millert Exp $ */ /**************************************************************** Copyright (C) Lucent Technologies 1997 All Rights Reserved @@ -53,7 +53,7 @@ Node *arglist = 0; /* list of args for current function */ %token <i> MATCH NOTMATCH MATCHOP %token <i> FINAL DOT ALL CCL NCCL CHAR OR STAR QUEST PLUS EMPTYRE %token <i> AND BOR APPEND EQ GE GT LE LT NE IN -%token <i> ARG BLTIN BREAK CLOSE CONTINUE DELETE DO EXIT FOR FUNC +%token <i> ARG BLTIN BREAK CLOSE CONTINUE DELETE DO EXIT FOR FUNC %token <i> SUB GSUB IF INDEX LSUBSTR MATCHFCN NEXT NEXTFILE %token <i> ADD MINUS MULT DIVIDE MOD %token <i> ASSIGN ASGNOP ADDEQ SUBEQ MULTEQ DIVEQ MODEQ POWEQ @@ -72,6 +72,7 @@ Node *arglist = 0; /* list of args for current function */ %type <i> do st %type <i> pst opt_pst lbrace rbrace rparen comma nl opt_nl and bor %type <i> subop print +%type <cp> string %right ASGNOP %right '?' @@ -80,7 +81,7 @@ Node *arglist = 0; /* list of args for current function */ %left AND %left GETLINE %nonassoc APPEND EQ GE GT LE LT NE MATCHOP IN '|' -%left ARG BLTIN BREAK CALL CLOSE CONTINUE DELETE DO EXIT FOR FUNC +%left ARG BLTIN BREAK CALL CLOSE CONTINUE DELETE DO EXIT FOR FUNC %left GSUB IF INDEX LSUBSTR MATCHFCN NEXT NUMBER %left PRINT PRINTF RETURN SPLIT SPRINTF STRING SUB SUBSTR %left REGEXPR VAR VARNF IVAR WHILE '(' @@ -239,10 +240,10 @@ pattern: $$ = op3($2, (Node *)1, $1, $3); } | pattern IN varname { $$ = op2(INTEST, $1, makearr($3)); } | '(' plist ')' IN varname { $$ = op2(INTEST, $2, makearr($5)); } - | pattern '|' GETLINE var { + | pattern '|' GETLINE var { if (safe) SYNTAX("cmd | getline is unsafe"); else $$ = op3(GETLINE, $4, itonp($2), $1); } - | pattern '|' GETLINE { + | pattern '|' GETLINE { if (safe) SYNTAX("cmd | getline is unsafe"); else $$ = op3(GETLINE, (Node*)0, itonp($2), $1); } | pattern term %prec CAT { $$ = op2(CAT, $1, $2); } @@ -293,7 +294,7 @@ rparen: ; simple_stmt: - print prarg '|' term { + print prarg '|' term { if (safe) SYNTAX("print | is unsafe"); else $$ = stat3($1, $2, itonp($3), $4); } | print prarg APPEND term { @@ -349,6 +350,11 @@ subop: SUB | GSUB ; +string: + STRING + | string STRING { $$ = catstr($1, $2); } + ; + term: term '/' ASGNOP term { $$ = op2(DIVEQ, $1, $4); } | term '+' term { $$ = op2(ADD, $1, $3); } @@ -395,7 +401,7 @@ term: | SPLIT '(' pattern comma varname ')' { $$ = op4(SPLIT, $3, makearr($5), NIL, (Node*)STRING); } /* default */ | SPRINTF '(' patlist ')' { $$ = op1($1, $3); } - | STRING { $$ = celltonode($1, CCON); } + | string { $$ = celltonode($1, CCON); } | subop '(' reg_expr comma pattern ')' { $$ = op4($1, NIL, (Node*)makedfa($3, 1), $5, rectonode()); } | subop '(' pattern comma pattern ')' @@ -422,7 +428,7 @@ var: | varname '[' patlist ']' { $$ = op2(ARRAY, makearr($1), $3); } | IVAR { $$ = op1(INDIRECT, celltonode($1, CVAR)); } | INDIRECT term { $$ = op1(INDIRECT, $2); } - ; + ; varlist: /* nothing */ { arglist = $$ = 0; } diff --git a/usr.bin/awk/b.c b/usr.bin/awk/b.c index 5ce0b6d74d2..73ff3da84ca 100644 --- a/usr.bin/awk/b.c +++ b/usr.bin/awk/b.c @@ -1,4 +1,4 @@ -/* $OpenBSD: b.c,v 1.24 2020/06/10 21:02:19 millert Exp $ */ +/* $OpenBSD: b.c,v 1.25 2020/06/10 21:02:33 millert Exp $ */ /**************************************************************** Copyright (C) Lucent Technologies 1997 All Rights Reserved @@ -35,8 +35,6 @@ THIS SOFTWARE. #include "awk.h" #include "ytab.h" -#define HAT (NCHARS+2) /* matches ^ in regular expr */ - /* NCHARS is 2**n */ #define MAXLIN 22 #define type(v) (v)->nobj /* badly overloaded here */ @@ -89,11 +87,11 @@ fa *makedfa(const char *s, int anchor) /* returns dfa for reg expr s */ fa *pfa; static int now = 1; - if (setvec == 0) { /* first time through any RE */ + if (setvec == NULL) { /* first time through any RE */ maxsetvec = MAXLIN; setvec = (int *) calloc(maxsetvec, sizeof(int)); tmpset = (int *) calloc(maxsetvec, sizeof(int)); - if (setvec == 0 || tmpset == 0) + if (setvec == NULL || tmpset == NULL) overflo("out of space initializing makedfa"); } @@ -169,7 +167,7 @@ int makeinit(fa *f, int anchor) f->out[2] = 0; f->reset = 0; k = *(f->re[0].lfollow); - xfree(f->posns[2]); + xfree(f->posns[2]); if ((f->posns[2] = (int *) calloc(k+1, sizeof(int))) == NULL) overflo("out of space in makeinit"); for (i=0; i <= k; i++) { @@ -272,8 +270,6 @@ int quoted(uschar **pp) /* pick up next thing after a \\ */ if ((c = *p++) == 't') c = '\t'; - else if (c == 'v') - c = '\v'; else if (c == 'n') c = '\n'; else if (c == 'f') @@ -282,8 +278,10 @@ int quoted(uschar **pp) /* pick up next thing after a \\ */ c = '\r'; else if (c == 'b') c = '\b'; + else if (c == 'v') + c = '\v'; else if (c == 'a') - c = '\007'; + c = '\a'; else if (c == '\\') c = '\\'; else if (c == 'x') { /* hexadecimal goo follows */ @@ -307,11 +305,11 @@ char *cclenter(const char *argp) /* add a character class */ int i, c, c2; uschar *p = (uschar *) argp; uschar *op, *bp; - static uschar *buf = 0; + static uschar *buf = NULL; static int bufsz = 100; op = p; - if (buf == 0 && (buf = (uschar *) malloc(bufsz)) == NULL) + if (buf == NULL && (buf = (uschar *) malloc(bufsz)) == NULL) FATAL("out of space for character class [%.10s...] 1", p); bp = buf; for (i = 0; (c = *p++) != 0; ) { @@ -368,7 +366,7 @@ void cfoll(fa *f, Node *v) /* enter follow set of each leaf of vertex v into lfo 4 * sizeof(int)); tmpset = reallocarray(tmpset, maxsetvec, 4 * sizeof(int)); - if (setvec == 0 || tmpset == 0) + if (setvec == NULL || tmpset == NULL) overflo("out of space in cfoll()"); maxsetvec *= 4; } @@ -411,7 +409,7 @@ int first(Node *p) /* collects initially active leaves of p into setvec */ 4 * sizeof(int)); tmpset = reallocarray(tmpset, maxsetvec, 4 * sizeof(int)); - if (setvec == 0 || tmpset == 0) + if (setvec == NULL || tmpset == NULL) overflo("out of space in first()"); maxsetvec *= 4; } @@ -551,7 +549,7 @@ int pmatch(fa *f, const char *p0) /* longest match, for sub */ if (f->reset) { for (i = 2; i <= f->curstat; i++) xfree(f->posns[i]); - k = *f->posns[0]; + k = *f->posns[0]; if ((f->posns[2] = (int *) calloc(k+1, sizeof(int))) == NULL) overflo("out of space in pmatch"); for (i = 0; i <= k; i++) @@ -608,7 +606,7 @@ int nematch(fa *f, const char *p0) /* non-empty match, for sub */ if (f->reset) { for (i = 2; i <= f->curstat; i++) xfree(f->posns[i]); - k = *f->posns[0]; + k = *f->posns[0]; if ((f->posns[2] = (int *) calloc(k+1, sizeof(int))) == NULL) overflo("out of state space"); for (i = 0; i <= k; i++) @@ -913,7 +911,7 @@ int relex(void) /* lexical analyzer for reparse */ { int c, n; int cflag; - static uschar *buf = 0; + static uschar *buf = NULL; static int bufsz = 100; uschar *bp; struct charclass *cc; @@ -952,8 +950,8 @@ rescan: default: rlxval = c; return CHAR; - case '[': - if (buf == 0 && (buf = (uschar *) malloc(bufsz)) == NULL) + case '[': + if (buf == NULL && (buf = (uschar *) malloc(bufsz)) == NULL) FATAL("out of space in reg expr %.10s..", lastre); bp = buf; if (*prestr == '^') { @@ -1128,7 +1126,7 @@ int cgoto(fa *f, int s, int c) while (f->accept >= maxsetvec) { /* guessing here! */ setvec = reallocarray(setvec, maxsetvec, 4 * sizeof(int)); tmpset = reallocarray(tmpset, maxsetvec, 4 * sizeof(int)); - if (setvec == 0 || tmpset == 0) + if (setvec == NULL || tmpset == NULL) overflo("out of space in cgoto()"); maxsetvec *= 4; } diff --git a/usr.bin/awk/lex.c b/usr.bin/awk/lex.c index ce5f185c979..0dd2aaeadae 100644 --- a/usr.bin/awk/lex.c +++ b/usr.bin/awk/lex.c @@ -1,4 +1,4 @@ -/* $OpenBSD: lex.c,v 1.13 2020/06/10 21:01:32 millert Exp $ */ +/* $OpenBSD: lex.c,v 1.14 2020/06/10 21:02:33 millert Exp $ */ /**************************************************************** Copyright (C) Lucent Technologies 1997 All Rights Reserved @@ -147,7 +147,7 @@ int gettok(char **pbuf, int *psz) /* get next input token */ if (bp-buf >= sz) if (!adjbuf(&buf, &sz, bp-buf+2, 100, &bp, "gettok")) FATAL( "out of space for number %.10s...", buf ); - if (isdigit(c) || c == 'e' || c == 'E' + if (isdigit(c) || c == 'e' || c == 'E' || c == '.' || c == '+' || c == '-') *bp++ = c; else { @@ -181,10 +181,10 @@ int reg = 0; /* 1 => return a REGEXPR now */ int yylex(void) { int c; - static char *buf = 0; + static char *buf = NULL; static int bufsize = 5; /* BUG: setting this small causes core dump! */ - if (buf == 0 && (buf = (char *) malloc(bufsize)) == NULL) + if (buf == NULL && (buf = (char *) malloc(bufsize)) == NULL) FATAL( "out of space in yylex" ); if (sc) { sc = 0; @@ -205,7 +205,7 @@ int yylex(void) /* should this also have STR set? */ RET(NUMBER); } - + yylval.i = c; switch (c) { case '\n': /* {EOL} */ @@ -236,7 +236,7 @@ int yylex(void) case '&': if (peek() == '&') { input(); RET(AND); - } else + } else RET('&'); case '|': if (peek() == '|') { @@ -334,7 +334,7 @@ int yylex(void) unputstr(buf); RET(INDIRECT); } - + case '}': if (--bracecnt < 0) SYNTAX( "extra }" ); @@ -357,10 +357,10 @@ int yylex(void) case '(': parencnt++; RET('('); - + case '"': return string(); /* BUG: should be like tran.c ? */ - + default: RET(c); } @@ -371,10 +371,10 @@ int string(void) { int c, n; char *s, *bp; - static char *buf = 0; + static char *buf = NULL; static int bufsz = 500; - if (buf == 0 && (buf = (char *) malloc(bufsz)) == NULL) + if (buf == NULL && (buf = (char *) malloc(bufsz)) == NULL) FATAL("out of space for strings"); for (bp = buf; (c = input()) != '"'; ) { if (!adjbuf(&buf, &bufsz, bp-buf+2, 500, &bp, "string")) @@ -393,7 +393,7 @@ int string(void) c = input(); switch (c) { case '"': *bp++ = '"'; break; - case 'n': *bp++ = '\n'; break; + case 'n': *bp++ = '\n'; break; case 't': *bp++ = '\t'; break; case 'f': *bp++ = '\f'; break; case 'r': *bp++ = '\r'; break; @@ -430,7 +430,7 @@ int string(void) break; } - default: + default: *bp++ = c; break; } @@ -440,7 +440,7 @@ int string(void) break; } } - *bp = 0; + *bp = 0; s = tostring(buf); *bp++ = ' '; *bp++ = 0; yylval.cp = setsymtab(buf, s, 0.0, CON|STR|DONTFREE, symtab); @@ -466,15 +466,14 @@ int binsearch(char *w, Keyword *kp, int n) return -1; } -int word(char *w) +int word(char *w) { Keyword *kp; int c, n; n = binsearch(w, keywords, sizeof(keywords)/sizeof(keywords[0])); -/* BUG: this ought to be inside the if; in theory could fault (daniel barrett) */ - kp = keywords + n; if (n != -1) { /* found in table */ + kp = keywords + n; yylval.i = kp->sub; switch (kp->type) { /* special handling */ case BLTIN: @@ -518,11 +517,11 @@ void startreg(void) /* next call to yylex will return a regular expression */ int regexpr(void) { int c, openclass = 0; - static char *buf = 0; + static char *buf = NULL; static int bufsz = 500; char *bp; - if (buf == 0 && (buf = (char *) malloc(bufsz)) == NULL) + if (buf == NULL && (buf = (char *) malloc(bufsz)) == NULL) FATAL("out of space for rex expr"); bp = buf; for ( ; ((c = input()) != '/' || openclass == 1) && c != 0; ) { @@ -530,11 +529,11 @@ int regexpr(void) FATAL("out of space for reg expr %.10s...", buf); if (c == '\n') { *bp = '\0'; - SYNTAX( "newline in regular expression %.10s...", buf ); + SYNTAX( "newline in regular expression %.10s...", buf ); unput('\n'); break; } else if (c == '\\') { - *bp++ = '\\'; + *bp++ = '\\'; *bp++ = input(); } else { if (c == '[') @@ -558,7 +557,7 @@ char ebuf[300]; char *ep = ebuf; char yysbuf[100]; /* pushback buffer */ char *yysptr = yysbuf; -FILE *yyin = 0; +FILE *yyin = NULL; int input(void) /* get next lexical input character */ { diff --git a/usr.bin/awk/lib.c b/usr.bin/awk/lib.c index 23958453032..03e55eaf701 100644 --- a/usr.bin/awk/lib.c +++ b/usr.bin/awk/lib.c @@ -1,4 +1,4 @@ -/* $OpenBSD: lib.c,v 1.28 2020/06/10 21:02:19 millert Exp $ */ +/* $OpenBSD: lib.c,v 1.29 2020/06/10 21:02:33 millert Exp $ */ /**************************************************************** Copyright (C) Lucent Technologies 1997 All Rights Reserved @@ -107,6 +107,22 @@ void initgetrec(void) infile = stdin; /* no filenames, so use stdin */ } +/* + * POSIX specifies that fields are supposed to be evaluated as if they were + * split using the value of FS at the time that the record's value ($0) was + * read. + * + * Since field-splitting is done lazily, we save the current value of FS + * whenever a new record is read in (implicitly or via getline), or when + * a new value is assigned to $0. + */ +void savefs(void) +{ + if (strlen(getsval(fsloc)) >= sizeof (inputFS)) + FATAL("field separator %.10s... is too long", *FS); + strlcpy(inputFS, *FS, sizeof(inputFS)); +} + static int firsttime = 1; int getrec(char **pbuf, int *pbufsize, int isrecord) /* get next input record */ @@ -125,6 +141,7 @@ int getrec(char **pbuf, int *pbufsize, int isrecord) /* get next input record */ if (isrecord) { donefld = 0; donerec = 1; + savefs(); } saveb0 = buf[0]; buf[0] = 0; @@ -194,10 +211,6 @@ int readrec(char **pbuf, int *pbufsize, FILE *inf) /* read one record into buf * int bufsize = *pbufsize; char *rs = getsval(rsloc); - if (strlen(getsval(fsloc)) >= sizeof (inputFS)) - FATAL("field separator %.10s... is too long", *FS); - /*fflush(stdout); avoids some buffering problem but makes it 25% slower*/ - strlcpy(inputFS, *FS, sizeof inputFS); /* for subsequent field splitting */ if ((sep = *rs) == 0) { sep = '\n'; while ((c=getc(inf)) == '\n' && c != EOF) /* skip leading \n's */ @@ -287,9 +300,6 @@ void fldbld(void) /* create fields from current record */ } fr = fields; i = 0; /* number of fields accumulated here */ - if (strlen(getsval(fsloc)) >= sizeof (inputFS)) - FATAL("field separator %.10s... is too long", *FS); - strlcpy(inputFS, *FS, sizeof(inputFS)); if (strlen(inputFS) > 1) { /* it's a regular expression */ i = refldbld(r, inputFS); } else if ((sep = *inputFS) == ' ') { /* default whitespace */ @@ -480,7 +490,7 @@ int refldbld(const char *rec, const char *fs) /* build fields from reg expr in F break; } } - return i; + return i; } void recbld(void) /* create $0 from $1..$NF if necessary */ @@ -661,8 +671,7 @@ void eprint(void) /* try to print context around error */ static int been_here = 0; extern char ebuf[], *ep; - if (compile_time == 2 || compile_time == 0 || been_here++ > 0 || - ebuf == ep) + if (compile_time == 2 || compile_time == 0 || been_here++ > 0 || ebuf == ep) return; p = ep - 1; if (p > ebuf && *p == '\n') diff --git a/usr.bin/awk/main.c b/usr.bin/awk/main.c index 8ce03dec0e9..bd5cd7ca627 100644 --- a/usr.bin/awk/main.c +++ b/usr.bin/awk/main.c @@ -1,4 +1,4 @@ -/* $OpenBSD: main.c,v 1.29 2020/06/10 21:02:19 millert Exp $ */ +/* $OpenBSD: main.c,v 1.30 2020/06/10 21:02:33 millert Exp $ */ /**************************************************************** Copyright (C) Lucent Technologies 1997 All Rights Reserved @@ -23,7 +23,7 @@ ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. ****************************************************************/ -const char *version = "version 20190617"; +const char *version = "version 20190910"; #define DEBUG #include <stdio.h> @@ -95,14 +95,14 @@ int main(int argc, char *argv[]) case 'f': /* next argument is program filename */ if (argv[1][2] != 0) { /* arg is -fsomething */ if (npfile >= MAX_PFILE - 1) - FATAL("too many -f options"); + FATAL("too many -f options"); pfile[npfile++] = &argv[1][2]; } else { /* arg is -f something */ argc--; argv++; if (argc <= 1) FATAL("no program filename"); if (npfile >= MAX_PFILE - 1) - FATAL("too many -f options"); + FATAL("too many -f options"); pfile[npfile++] = argv[1]; } break; diff --git a/usr.bin/awk/maketab.c b/usr.bin/awk/maketab.c index 1d6b3abb2ae..06708dc99a7 100644 --- a/usr.bin/awk/maketab.c +++ b/usr.bin/awk/maketab.c @@ -1,4 +1,4 @@ -/* $OpenBSD: maketab.c,v 1.14 2020/06/10 21:01:50 millert Exp $ */ +/* $OpenBSD: maketab.c,v 1.15 2020/06/10 21:02:33 millert Exp $ */ /**************************************************************** Copyright (C) Lucent Technologies 1997 All Rights Reserved @@ -160,7 +160,7 @@ int main(int argc, char *argv[]) table[p->token-FIRSTTOKEN] = p->name; printf("\nCell *(*proctab[%d])(Node **, int) = {\n", SIZE); for (i=0; i<SIZE; i++) - if (table[i]==0) + if (table[i]==NULL) printf("\tnullproc,\t/* %s */\n", names[i]); else printf("\t%s,\t/* %s */\n", table[i], names[i]); diff --git a/usr.bin/awk/parse.c b/usr.bin/awk/parse.c index 8720d13d031..9060384027c 100644 --- a/usr.bin/awk/parse.c +++ b/usr.bin/awk/parse.c @@ -1,4 +1,4 @@ -/* $OpenBSD: parse.c,v 1.7 2017/10/09 14:51:31 deraadt Exp $ */ +/* $OpenBSD: parse.c,v 1.8 2020/06/10 21:02:33 millert Exp $ */ /**************************************************************** Copyright (C) Lucent Technologies 1997 All Rights Reserved @@ -260,7 +260,7 @@ int isarg(const char *s) /* is s in argument list for current function? */ Node *p = arglist; int n; - for (n = 0; p != 0; p = p->nnext, n++) + for (n = 0; p != NULL; p = p->nnext, n++) if (strcmp(((Cell *)(p->narg[0]))->nval, s) == 0) return n; return -1; diff --git a/usr.bin/awk/proto.h b/usr.bin/awk/proto.h index 326fddf0c87..3ec687028d8 100644 --- a/usr.bin/awk/proto.h +++ b/usr.bin/awk/proto.h @@ -1,4 +1,4 @@ -/* $OpenBSD: proto.h,v 1.12 2020/06/10 21:00:01 millert Exp $ */ +/* $OpenBSD: proto.h,v 1.13 2020/06/10 21:02:33 millert Exp $ */ /**************************************************************** Copyright (C) Lucent Technologies 1997 All Rights Reserved @@ -112,11 +112,13 @@ extern char *getsval(Cell *); extern char *getpssval(Cell *); /* for print */ extern char *tostring(const char *); extern char *qstring(const char *, int); +extern Cell *catstr(Cell *, Cell *); extern void recinit(unsigned int); extern void initgetrec(void); extern void makefields(int, int); extern void growfldtab(int n); +extern void savefs(void); extern int getrec(char **, int *, int); extern void nextfile(void); extern int readrec(char **buf, int *bufsize, FILE *inf); diff --git a/usr.bin/awk/run.c b/usr.bin/awk/run.c index 770019f962d..051170341b0 100644 --- a/usr.bin/awk/run.c +++ b/usr.bin/awk/run.c @@ -1,4 +1,4 @@ -/* $OpenBSD: run.c,v 1.50 2020/06/10 21:02:19 millert Exp $ */ +/* $OpenBSD: run.c,v 1.51 2020/06/10 21:02:33 millert Exp $ */ /**************************************************************** Copyright (C) Lucent Technologies 1997 All Rights Reserved @@ -520,7 +520,7 @@ Cell *awkdelete(Node **a, int n) /* a[0] is symtab, a[1] is list of subscripts * } if (!isarr(x)) return True; - if (a[1] == 0) { /* delete the elements, not the table */ + if (a[1] == NULL) { /* delete the elements, not the table */ freesymtab(x); x->tval &= ~STR; x->tval |= ARR; @@ -606,7 +606,7 @@ Cell *matchop(Node **a, int n) /* ~ and match() */ } x = execute(a[1]); /* a[1] = target text */ s = getsval(x); - if (a[0] == 0) /* a[1] == 0: already-compiled reg expr */ + if (a[0] == NULL) /* a[1] == 0: already-compiled reg expr */ i = (*mf)((fa *) a[2], s); else { y = execute(a[2]); /* a[2] = regular expr */ @@ -722,7 +722,7 @@ Cell *gettemp(void) /* get a tempcell */ FATAL("out of space for temporaries"); for(i = 1; i < 100; i++) tmps[i-1].cnext = &tmps[i]; - tmps[i-1].cnext = 0; + tmps[i-1].cnext = NULL; } x = tmps; tmps = x->cnext; @@ -757,18 +757,18 @@ Cell *substr(Node **a, int nnn) /* substr(a[0], a[1], a[2]) */ int k, m, n; char *s; int temp; - Cell *x, *y, *z = 0; + Cell *x, *y, *z = NULL; x = execute(a[0]); y = execute(a[1]); - if (a[2] != 0) + if (a[2] != NULL) z = execute(a[2]); s = getsval(x); k = strlen(s) + 1; if (k <= 1) { tempfree(x); tempfree(y); - if (a[2] != 0) { + if (a[2] != NULL) { tempfree(z); } x = gettemp(); @@ -781,7 +781,7 @@ Cell *substr(Node **a, int nnn) /* substr(a[0], a[1], a[2]) */ else if (m > k) m = k; tempfree(y); - if (a[2] != 0) { + if (a[2] != NULL) { n = (int) getfval(z); tempfree(z); } else @@ -1227,7 +1227,7 @@ Cell *pastat(Node **a, int n) /* a[0] { a[1] } */ { Cell *x; - if (a[0] == 0) + if (a[0] == NULL) x = execute(a[1]); else { x = execute(a[0]); @@ -1264,9 +1264,9 @@ Cell *dopa2(Node **a, int n) /* a[0], a[1] { a[2] } */ Cell *split(Node **a, int nnn) /* split(a[0], a[1], a[2]); a[3] is type */ { - Cell *x = 0, *y, *ap; + Cell *x = NULL, *y, *ap; char *s, *origs; - char *fs, *origfs = NULL; + char *fs = NULL, *origfs = NULL; int sep; char *t, temp, num[50]; int n, tempstat, arg3type; @@ -1276,7 +1276,7 @@ Cell *split(Node **a, int nnn) /* split(a[0], a[1], a[2]); a[3] is type */ if (s == NULL) FATAL("out of space in split"); arg3type = ptoi(a[3]); - if (a[2] == 0) /* fs string */ + if (a[2] == NULL) /* fs string */ fs = getsval(fsloc); else if (arg3type == STRING) { /* split(str,arr,"string") */ x = execute(a[2]); @@ -1428,7 +1428,7 @@ Cell *ifstat(Node **a, int n) /* if (a[0]) a[1]; else a[2] */ if (istrue(x)) { tempfree(x); x = execute(a[1]); - } else if (a[2] != 0) { + } else if (a[2] != NULL) { tempfree(x); x = execute(a[2]); } @@ -1480,7 +1480,7 @@ Cell *forstat(Node **a, int n) /* for (a[0]; a[1]; a[2]) a[3] */ x = execute(a[0]); tempfree(x); for (;;) { - if (a[1]!=0) { + if (a[1]!=NULL) { x = execute(a[1]); if (!istrue(x)) return(x); else tempfree(x); @@ -1565,7 +1565,7 @@ Cell *bltin(Node **a, int n) /* builtin functions. a[0] is type, a[1] is arg lis case FCOS: u = cos(getfval(x)); break; case FATAN: - if (nextarg == 0) { + if (nextarg == NULL) { WARNING("atan2 requires two arguments; returning 1.0"); u = 1.0; } else { @@ -1651,7 +1651,10 @@ Cell *bltin(Node **a, int n) /* builtin functions. a[0] is type, a[1] is arg lis } break; case FRAND: - u = (Awkfloat) (random() & RAND_MAX) / ((u_int)RAND_MAX + 1); + /* random() returns numbers in [0..2^31-1] + * in order to get a number in [0, 1), divide it by 2^31 + */ + u = (Awkfloat) random() / (0x7fffffffL + 0x1UL); break; case FSRAND: if (isrec(x)) { /* no argument provided */ @@ -1699,7 +1702,7 @@ Cell *bltin(Node **a, int n) /* builtin functions. a[0] is type, a[1] is arg lis tempfree(x); x = gettemp(); setfval(x, u); - if (nextarg != 0) { + if (nextarg != NULL) { WARNING("warning: function has too many arguments"); for ( ; nextarg; nextarg = nextarg->nnext) execute(nextarg); @@ -1713,7 +1716,7 @@ Cell *printstat(Node **a, int n) /* print a[0] */ Cell *y; FILE *fp; - if (a[1] == 0) /* a[1] is redirection operator, a[2] is file */ + if (a[1] == NULL) /* a[1] is redirection operator, a[2] is file */ fp = stdout; else fp = redirect(ptoi(a[1]), a[2]); @@ -1726,7 +1729,7 @@ Cell *printstat(Node **a, int n) /* print a[0] */ else fputs(getsval(ofsloc), fp); } - if (a[1] != 0) + if (a[1] != NULL) fflush(fp); if (ferror(fp)) FATAL("write error on %s", filename(fp)); @@ -1783,7 +1786,7 @@ FILE *openfile(int a, const char *us) { const char *s = us; int i, m; - FILE *fp = 0; + FILE *fp = NULL; if (*s == '\0') FATAL("null file name in print or getline"); @@ -1798,7 +1801,7 @@ FILE *openfile(int a, const char *us) return NULL; for (i=0; i < nfiles; i++) - if (files[i].fp == 0) + if (files[i].fp == NULL) break; if (i >= nfiles) { struct files *nf; @@ -1914,7 +1917,7 @@ Cell *sub(Node **a, int nnn) /* substitute command */ FATAL("out of memory in sub"); x = execute(a[3]); /* target string */ t = getsval(x); - if (a[0] == 0) /* 0 => a[1] is already-compiled regexpr */ + if (a[0] == NULL) /* 0 => a[1] is already-compiled regexpr */ pfa = (fa *) a[1]; /* regular expression */ else { y = execute(a[1]); @@ -1977,7 +1980,7 @@ Cell *gsub(Node **a, int nnn) /* global substitute */ num = 0; x = execute(a[3]); /* target string */ t = getsval(x); - if (a[0] == 0) /* 0 => a[1] is already-compiled regexpr */ + if (a[0] == NULL) /* 0 => a[1] is already-compiled regexpr */ pfa = (fa *) a[1]; /* regular expression */ else { y = execute(a[1]); diff --git a/usr.bin/awk/tran.c b/usr.bin/awk/tran.c index 8b8b607f55d..89cb7e172b8 100644 --- a/usr.bin/awk/tran.c +++ b/usr.bin/awk/tran.c @@ -1,4 +1,4 @@ -/* $OpenBSD: tran.c,v 1.21 2020/06/10 21:01:32 millert Exp $ */ +/* $OpenBSD: tran.c,v 1.22 2020/06/10 21:02:33 millert Exp $ */ /**************************************************************** Copyright (C) Lucent Technologies 1997 All Rights Reserved @@ -192,10 +192,10 @@ void freesymtab(Cell *ap) /* free a symbol table */ if (freeable(cp)) xfree(cp->sval); temp = cp->cnext; /* avoids freeing then using */ - free(cp); + free(cp); tp->nelem--; } - tp->tab[i] = 0; + tp->tab[i] = NULL; } if (tp->nelem != 0) WARNING("can't happen: inconsistent element count freeing %s", ap->nval); @@ -208,7 +208,7 @@ void freeelem(Cell *ap, const char *s) /* free elem s from ap (i.e., ap["s"] */ Array *tp; Cell *p, *prev = NULL; int h; - + tp = (Array *) ap->sval; h = hash(s, tp->size); for (p = tp->tab[h]; p != NULL; prev = p, p = p->cnext) @@ -304,7 +304,7 @@ Awkfloat setfval(Cell *vp, Awkfloat f) /* set float val of a Cell */ int fldno; f += 0.0; /* normalise negative zero to positive zero */ - if ((vp->tval & (NUM | STR)) == 0) + if ((vp->tval & (NUM | STR)) == 0) funnyvar(vp, "assign to"); if (isfld(vp)) { donerec = 0; /* mark $0 invalid */ @@ -319,6 +319,7 @@ Awkfloat setfval(Cell *vp, Awkfloat f) /* set float val of a Cell */ } else if (isrec(vp)) { donefld = 0; /* mark $1... invalid */ donerec = 1; + savefs(); } else if (vp == ofsloc) { if (donerec == 0) recbld(); @@ -350,7 +351,7 @@ char *setsval(Cell *vp, const char *s) /* set string val of a Cell */ int fldno; Awkfloat f; - DPRINTF( ("starting setsval %p: %s = \"%s\", t=%o, r,f=%d,%d\n", + DPRINTF( ("starting setsval %p: %s = \"%s\", t=%o, r,f=%d,%d\n", (void*)vp, NN(vp->nval), s, vp->tval, donerec, donefld) ); if ((vp->tval & (NUM | STR)) == 0) funnyvar(vp, "assign to"); @@ -363,6 +364,7 @@ char *setsval(Cell *vp, const char *s) /* set string val of a Cell */ } else if (isrec(vp)) { donefld = 0; /* mark $1... invalid */ donerec = 1; + savefs(); } else if (vp == ofsloc) { if (donerec == 0) recbld(); @@ -374,7 +376,7 @@ char *setsval(Cell *vp, const char *s) /* set string val of a Cell */ vp->tval |= STR; vp->fmt = NULL; setfree(vp); - DPRINTF( ("setsval %p: %s = \"%s (%p) \", t=%o r,f=%d,%d\n", + DPRINTF( ("setsval %p: %s = \"%s (%p) \", t=%o r,f=%d,%d\n", (void*)vp, NN(vp->nval), t, (void *) t, vp->tval, donerec, donefld) ); vp->sval = t; if (&vp->fval == NF) { @@ -515,6 +517,22 @@ char *tostring(const char *s) /* make a copy of string s */ return p; } +Cell *catstr(Cell *a, Cell *b) /* concatenate a and b */ +{ + Cell *c; + char *p; + char *sa = getsval(a); + char *sb = getsval(b); + size_t l = strlen(sa) + strlen(sb) + 1; + p = malloc(l); + if (p == NULL) + FATAL("out of space concatenating %s and %s", sa, sb); + snprintf(p, l, "%s%s", sa, sb); + c = setsymtab(p, p, 0.0, CON|STR|DONTFREE, symtab); + free(p); + return c; +} + char *qstring(const char *is, int delim) /* collect string up to next delim */ { const char *os = is; @@ -534,16 +552,16 @@ char *qstring(const char *is, int delim) /* collect string up to next delim */ if (c == 0) { /* \ at end */ *bp++ = '\\'; break; /* for loop */ - } + } switch (c) { case '\\': *bp++ = '\\'; break; case 'n': *bp++ = '\n'; break; case 't': *bp++ = '\t'; break; - case 'v': *bp++ = '\v'; break; case 'b': *bp++ = '\b'; break; case 'f': *bp++ = '\f'; break; case 'r': *bp++ = '\r'; break; - case 'a': *bp++ = '\007'; break; + case 'v': *bp++ = '\v'; break; + case 'a': *bp++ = '\a'; break; default: if (!isdigit(c)) { *bp++ = c; |