diff options
-rw-r--r-- | usr.bin/awk/FIXES | 22 | ||||
-rw-r--r-- | usr.bin/awk/README | 91 | ||||
-rw-r--r-- | usr.bin/awk/README.md | 98 | ||||
-rw-r--r-- | usr.bin/awk/b.c | 43 | ||||
-rw-r--r-- | usr.bin/awk/lex.c | 9 | ||||
-rw-r--r-- | usr.bin/awk/main.c | 4 | ||||
-rw-r--r-- | usr.bin/awk/run.c | 26 | ||||
-rw-r--r-- | usr.bin/awk/tran.c | 14 |
8 files changed, 175 insertions, 132 deletions
diff --git a/usr.bin/awk/FIXES b/usr.bin/awk/FIXES index 700e5519f47..d1c3217d28d 100644 --- a/usr.bin/awk/FIXES +++ b/usr.bin/awk/FIXES @@ -1,4 +1,4 @@ -/* $OpenBSD: FIXES,v 1.29 2020/06/10 21:03:56 millert Exp $ */ +/* $OpenBSD: FIXES,v 1.30 2020/06/10 21:04:40 millert Exp $ */ /**************************************************************** Copyright (C) Lucent Technologies 1997 All Rights Reserved @@ -26,6 +26,26 @@ THIS SOFTWARE. This file lists all bug fixes, changes, etc., made since the AWK book was sent to the printers in August, 1987. +January 5, 2020: + Fix a bug in the concatentation of two string constants into + one done in the grammar. Fixes GitHub issue #61. Thanks + to GitHub user awkfan77 for pointing out the direction for + the fix. New test T.concat added to the test suite. + Fix a few memory leaks reported by valgrind, as well. + +December 27, 2019: + Fix a bug whereby a{0,3} could match four a's. Thanks to + "Anonymous AWK fan" for the report. + +December 11, 2019: + Further printf-related fixes for 32 bit systems. + Thanks again to Christos Zoulas. + +December 8, 2019: + Fix the return value of sprintf("%d") on 32 bit systems. + Thanks to Jim Lowe for the report and to Christos Zoulas + for the fix. + November 10, 2019: Convert a number of Boolean integer variables into actual bools. Convert compile_time variable into an diff --git a/usr.bin/awk/README b/usr.bin/awk/README deleted file mode 100644 index 2df93cb5722..00000000000 --- a/usr.bin/awk/README +++ /dev/null @@ -1,91 +0,0 @@ -/* $OpenBSD: README,v 1.8 2020/06/10 21:02:33 millert Exp $ */ -/**************************************************************** -Copyright (C) Lucent Technologies 1997 -All Rights Reserved - -Permission to use, copy, modify, and distribute this software and -its documentation for any purpose and without fee is hereby -granted, provided that the above copyright notice appear in all -copies and that both that the copyright notice and this -permission notice and warranty disclaimer appear in supporting -documentation, and that the name Lucent Technologies or any of -its entities not be used in advertising or publicity pertaining -to distribution of the software without specific, written prior -permission. - -LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, -INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. -IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY -SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES -WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER -IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, -ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF -THIS SOFTWARE. -****************************************************************/ - -This is the version of awk described in "The AWK Programming Language", -by Al Aho, Brian Kernighan, and Peter Weinberger -(Addison-Wesley, 1988, ISBN 0-201-07981-X). - -Changes, mostly bug fixes and occasional enhancements, are listed -in FIXES. If you distribute this code further, please please please -distribute FIXES with it. If you find errors, please report them -to bwk@cs.princeton.edu. Thanks. - -The program itself is created by - make -which should produce a sequence of messages roughly like this: - - yacc -d awkgram.y - -conflicts: 43 shift/reduce, 85 reduce/reduce - mv y.tab.c ytab.c - mv y.tab.h ytab.h - cc -c ytab.c - cc -c b.c - cc -c main.c - cc -c parse.c - cc maketab.c -o maketab - ./maketab >proctab.c - cc -c proctab.c - cc -c tran.c - cc -c lib.c - cc -c run.c - cc -c lex.c - cc ytab.o b.o main.o parse.o proctab.o tran.o lib.o run.o lex.o -lm - -This produces an executable a.out; you will eventually want to -move this to some place like /usr/bin/awk. - -If your system does not have yacc or bison (the GNU -equivalent), you must compile the pieces manually. We have -included yacc output in ytab.c and ytab.h, and backup copies in -case you overwrite them. We have also included a copy of -proctab.c so you do not need to run maketab. - -NOTE: This version uses ANSI C, as you should also. We have -compiled this without any changes using gcc -Wall and/or local C -compilers on a variety of systems, but new systems or compilers -may raise some new complaint; reports of difficulties are -welcome. - -This also compiles with Visual C++ on all flavors of Windows, -*if* you provide versions of popen and pclose. The file -missing95.c contains versions that can be used to get started -with, though the underlying support has mysterious properties, -the symptom of which can be truncated pipe output. Beware. The -file makefile.win gives hints on how to proceed; if you run -vcvars32.bat, it will set up necessary paths and parameters so -you can subsequently run nmake -f makefile.win. Beware also that -when running on Windows under command.com, various quoting -conventions are different from Unix systems: single quotes won't -work around arguments, and various characters like % are -interpreted within double quotes. - -This compiles without change on Macintosh OS X using gcc and -the standard developer tools. - -The version of malloc that comes with some systems is sometimes -astonishly slow. If awk seems slow, you might try fixing that. -More generally, turning on optimization can significantly improve -awk's speed, perhaps by 1/3 for highest levels. diff --git a/usr.bin/awk/README.md b/usr.bin/awk/README.md new file mode 100644 index 00000000000..a712ccf7a04 --- /dev/null +++ b/usr.bin/awk/README.md @@ -0,0 +1,98 @@ +$OpenBSD: README.md,v 1.1 2020/06/10 21:04:40 millert Exp $ + +# The One True Awk + +This is the version of `awk` described in _The AWK Programming Language_, +by Al Aho, Brian Kernighan, and Peter Weinberger +(Addison-Wesley, 1988, ISBN 0-201-07981-X). + +## Copyright + +Copyright (C) Lucent Technologies 1997<br/> +All Rights Reserved + +Permission to use, copy, modify, and distribute this software and +its documentation for any purpose and without fee is hereby +granted, provided that the above copyright notice appear in all +copies and that both that the copyright notice and this +permission notice and warranty disclaimer appear in supporting +documentation, and that the name Lucent Technologies or any of +its entities not be used in advertising or publicity pertaining +to distribution of the software without specific, written prior +permission. + +LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, +INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. +IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY +SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER +IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, +ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF +THIS SOFTWARE. + +## Distribution and Reporting Problems + +Changes, mostly bug fixes and occasional enhancements, are listed +in `FIXES`. If you distribute this code further, please please please +distribute `FIXES` with it. + +If you find errors, please report them +to bwk@cs.princeton.edu. +Please _also_ open an issue in the GitHub issue tracker, to make +it easy to track issues. +Thanks. + +## Submitting Pull Requests + +Pull requests are welcome. However, please create them with a request +to merge into the `staging` branch instead of into the `master` branch. +This allows us to do testing, and to make any additional edits or changes +after the merge but before merging to `master`. + +## Building + +The program itself is created by + + make + +which should produce a sequence of messages roughly like this: + + yacc -d awkgram.y + conflicts: 43 shift/reduce, 85 reduce/reduce + mv y.tab.c ytab.c + mv y.tab.h ytab.h + cc -c ytab.c + cc -c b.c + cc -c main.c + cc -c parse.c + cc maketab.c -o maketab + ./maketab >proctab.c + cc -c proctab.c + cc -c tran.c + cc -c lib.c + cc -c run.c + cc -c lex.c + cc ytab.o b.o main.o parse.o proctab.o tran.o lib.o run.o lex.o -lm + +This produces an executable `a.out`; you will eventually want to +move this to some place like `/usr/bin/awk`. + +If your system does not have `yacc` or `bison` (the GNU +equivalent), you need to install one of them first. + +NOTE: This version uses ANSI C (C 99), as you should also. We have +compiled this without any changes using `gcc -Wall` and/or local C +compilers on a variety of systems, but new systems or compilers +may raise some new complaint; reports of difficulties are +welcome. + +This compiles without change on Macintosh OS X using `gcc` and +the standard developer tools. + +The version of `malloc` that comes with some systems is sometimes +astonishly slow. If `awk` seems slow, you might try fixing that. +More generally, turning on optimization can significantly improve +`awk`'s speed, perhaps by 1/3 for highest levels. + +#### Last Updated +Wed Jan 1 22:44:38 IST 2020 diff --git a/usr.bin/awk/b.c b/usr.bin/awk/b.c index 85ed2d28300..97719cf1132 100644 --- a/usr.bin/awk/b.c +++ b/usr.bin/awk/b.c @@ -1,4 +1,4 @@ -/* $OpenBSD: b.c,v 1.29 2020/06/10 21:03:56 millert Exp $ */ +/* $OpenBSD: b.c,v 1.30 2020/06/10 21:04:40 millert Exp $ */ /**************************************************************** Copyright (C) Lucent Technologies 1997 All Rights Reserved @@ -188,7 +188,7 @@ fa *makedfa(const char *s, bool anchor) /* returns dfa for reg expr s */ } fa *mkdfa(const char *s, bool anchor) /* does the real work of making a dfa */ - /* anchor = 1 for anchored matches, else 0 */ + /* anchor = true for anchored matches, else false */ { Node *p, *p1; fa *f; @@ -231,17 +231,17 @@ int makeinit(fa *f, bool anchor) k = *(f->re[0].lfollow); xfree(f->posns[2]); f->posns[2] = intalloc(k + 1, __func__); - for (i=0; i <= k; i++) { + for (i = 0; i <= k; i++) { (f->posns[2])[i] = (f->re[0].lfollow)[i]; } if ((f->posns[2])[1] == f->accept) f->out[2] = 1; - for (i=0; i < NCHARS; i++) + for (i = 0; i < NCHARS; i++) f->gototab[2][i] = 0; f->curstat = cgoto(f, 2, HAT); if (anchor) { *f->posns[2] = k-1; /* leave out position 0 */ - for (i=0; i < k; i++) { + for (i = 0; i < k; i++) { (f->posns[0])[i] = (f->posns[2])[i]; } @@ -471,9 +471,10 @@ int first(Node *p) /* collects initially active leaves of p into setvec */ } if (type(p) == CCL && (*(char *) right(p)) == '\0') return(0); /* empty CCL */ - else return(1); + return(1); case PLUS: - if (first(left(p)) == 0) return(0); + if (first(left(p)) == 0) + return(0); return(1); case STAR: case QUEST: @@ -722,7 +723,7 @@ bool fnematch(fa *pfa, FILE *f, char **pbuf, int *pbufsize, int quantum) if (buf[--k] && ungetc(buf[k], f) == EOF) FATAL("unable to ungetc '%c'", buf[k]); while (k > i + patlen); - buf[k] = 0; + buf[k] = '\0'; return true; } else @@ -915,7 +916,7 @@ replace_repeat(const uschar *reptok, int reptoklen, const uschar *atom, int i, j; uschar *buf = NULL; int ret = 1; - int init_q = (firstnum == 0); /* first added char will be ? */ + bool init_q = (firstnum == 0); /* first added char will be ? */ int n_q_reps = secondnum-firstnum; /* m>n, so reduce until {1,m-n} left */ int prefix_length = reptok - basestr; /* prefix includes first rep */ int suffix_length = strlen((const char *) reptok) - reptoklen; /* string after rep specifier */ @@ -942,7 +943,7 @@ replace_repeat(const uschar *reptok, int reptoklen, const uschar *atom, buf[j++] = '('; buf[j++] = ')'; } - for (i=1; i < firstnum; i++) { /* copy x reps */ + for (i = 1; i < firstnum; i++) { /* copy x reps */ memcpy(&buf[j], atom, atomlen); j += atomlen; } @@ -951,7 +952,7 @@ replace_repeat(const uschar *reptok, int reptoklen, const uschar *atom, } else if (special_case == REPEAT_WITH_Q) { if (init_q) buf[j++] = '?'; - for (i = 0; i < n_q_reps; i++) { /* copy x? reps */ + for (i = init_q; i < n_q_reps; i++) { /* copy x? reps */ memcpy(&buf[j], atom, atomlen); j += atomlen; buf[j++] = '?'; @@ -1173,15 +1174,17 @@ rescan: if (commafound) { if (digitfound) { /* {n,m} */ m = num; - if (m<n) + if (m < n) FATAL("illegal repetition expression: class %.20s", lastre); - if ((n==0) && (m==1)) { + if (n == 0 && m == 1) { return QUEST; } } else { /* {n,} */ - if (n==0) return STAR; - if (n==1) return PLUS; + if (n == 0) + return STAR; + else if (n == 1) + return PLUS; } } else { if (digitfound) { /* {n} same as {n,n} */ @@ -1194,7 +1197,7 @@ rescan: } if (repeat(starttok, prestr-starttok, lastatom, startreptok - lastatom, n, m) > 0) { - if ((n==0) && (m==0)) { + if (n == 0 && m == 0) { return EMPTYRE; } /* must rescan input for next token */ @@ -1280,7 +1283,8 @@ int cgoto(fa *f, int s, int c) if (tmpset[j] != p[j]) goto different; /* setvec is state i */ - f->gototab[s][c] = i; + if (c != HAT) + f->gototab[s][c] = i; return i; different:; } @@ -1294,7 +1298,8 @@ int cgoto(fa *f, int s, int c) p = intalloc(setcnt + 1, __func__); f->posns[f->curstat] = p; - f->gototab[s][c] = f->curstat; + if (c != HAT) + f->gototab[s][c] = f->curstat; for (i = 0; i <= setcnt; i++) p[i] = tmpset[i]; if (setvec[f->accept]) @@ -1318,7 +1323,7 @@ void freefa(fa *f) /* free a finite automaton */ for (i = 0; i <= f->accept; i++) { xfree(f->re[i].lfollow); if (f->re[i].ltype == CCL || f->re[i].ltype == NCCL) - xfree((f->re[i].lval.np)); + xfree(f->re[i].lval.np); } xfree(f->restr); xfree(f->out); diff --git a/usr.bin/awk/lex.c b/usr.bin/awk/lex.c index 6a1400a7ca2..9861c7d25b4 100644 --- a/usr.bin/awk/lex.c +++ b/usr.bin/awk/lex.c @@ -1,4 +1,4 @@ -/* $OpenBSD: lex.c,v 1.16 2020/06/10 21:03:56 millert Exp $ */ +/* $OpenBSD: lex.c,v 1.17 2020/06/10 21:04:40 millert Exp $ */ /**************************************************************** Copyright (C) Lucent Technologies 1997 All Rights Reserved @@ -201,7 +201,9 @@ int yylex(void) if (isalpha(c) || c == '_') return word(buf); if (isdigit(c)) { - yylval.cp = setsymtab(buf, tostring(buf), atof(buf), CON|NUM, symtab); + char *cp = tostring(buf); + yylval.cp = setsymtab(buf, cp, atof(buf), CON|NUM, symtab); + free(cp); /* should this also have STR set? */ RET(NUMBER); } @@ -442,8 +444,9 @@ int string(void) } *bp = 0; s = tostring(buf); - *bp++ = ' '; *bp++ = 0; + *bp++ = ' '; *bp++ = '\0'; yylval.cp = setsymtab(buf, s, 0.0, CON|STR|DONTFREE, symtab); + free(s); RET(STRING); } diff --git a/usr.bin/awk/main.c b/usr.bin/awk/main.c index 2b83de5a2e2..cb23a884667 100644 --- a/usr.bin/awk/main.c +++ b/usr.bin/awk/main.c @@ -1,4 +1,4 @@ -/* $OpenBSD: main.c,v 1.34 2020/06/10 21:03:56 millert Exp $ */ +/* $OpenBSD: main.c,v 1.35 2020/06/10 21:04:40 millert Exp $ */ /**************************************************************** Copyright (C) Lucent Technologies 1997 All Rights Reserved @@ -23,7 +23,7 @@ ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. ****************************************************************/ -const char *version = "version 20191110"; +const char *version = "version 20200105"; #define DEBUG #include <stdio.h> diff --git a/usr.bin/awk/run.c b/usr.bin/awk/run.c index 2b2a775c703..605b71e47f6 100644 --- a/usr.bin/awk/run.c +++ b/usr.bin/awk/run.c @@ -1,4 +1,4 @@ -/* $OpenBSD: run.c,v 1.54 2020/06/10 21:03:56 millert Exp $ */ +/* $OpenBSD: run.c,v 1.55 2020/06/10 21:04:40 millert Exp $ */ /**************************************************************** Copyright (C) Lucent Technologies 1997 All Rights Reserved @@ -859,8 +859,13 @@ int format(char **pbuf, int *pbufsize, const char *s, Node *a) /* printf-like co for (t = fmt; (*t++ = *s) != '\0'; s++) { if (!adjbuf(&fmt, &fmtsz, MAXNUMSIZE+1+t-fmt, recsize, &t, "format3")) FATAL("format item %.30s... ran format() out of memory", os); - if (isalpha((uschar)*s) && *s != 'l' && *s != 'h' && *s != 'L') - break; /* the ansi panoply */ + /* Ignore size specifiers */ + if (strchr("hjLlqtz", *s) != NULL) { /* the ansi panoply */ + t--; + continue; + } + if (isalpha((uschar)*s)) + break; if (*s == '$') { FATAL("'$' not permitted in awk formats"); } @@ -893,16 +898,9 @@ int format(char **pbuf, int *pbufsize, const char *s, Node *a) /* printf-like co case 'f': case 'e': case 'g': case 'E': case 'G': flag = 'f'; break; - case 'd': case 'i': - flag = 'd'; - if(*(s-1) == 'l') break; + case 'd': case 'i': case 'o': case 'x': case 'X': case 'u': + flag = (*s == 'd' || *s == 'i') ? 'd' : 'u'; *(t-1) = 'j'; - *t = 'd'; - *++t = '\0'; - break; - case 'o': case 'x': case 'X': case 'u': - flag = *(s-1) == 'l' ? 'd' : 'u'; - *(t-1) = 'l'; *t = *s; *++t = '\0'; break; @@ -939,8 +937,8 @@ int format(char **pbuf, int *pbufsize, const char *s, Node *a) /* printf-like co case 'a': case 'A': case 'f': snprintf(p, BUFSZ(p), fmt, getfval(x)); break; - case 'd': snprintf(p, BUFSZ(p), fmt, (long) getfval(x)); break; - case 'u': snprintf(p, BUFSZ(p), fmt, (int) getfval(x)); break; + case 'd': snprintf(p, BUFSZ(p), fmt, (intmax_t) getfval(x)); break; + case 'u': snprintf(p, BUFSZ(p), fmt, (uintmax_t) getfval(x)); break; case 's': t = getsval(x); n = strlen(t); diff --git a/usr.bin/awk/tran.c b/usr.bin/awk/tran.c index 6bd93b34e69..0318be0c57d 100644 --- a/usr.bin/awk/tran.c +++ b/usr.bin/awk/tran.c @@ -1,4 +1,4 @@ -/* $OpenBSD: tran.c,v 1.24 2020/06/10 21:03:56 millert Exp $ */ +/* $OpenBSD: tran.c,v 1.25 2020/06/10 21:04:40 millert Exp $ */ /**************************************************************** Copyright (C) Lucent Technologies 1997 All Rights Reserved @@ -115,6 +115,7 @@ void syminit(void) /* initialize symbol table with builtin vars */ rlengthloc = setsymtab("RLENGTH", "", 0.0, NUM, symtab); RLENGTH = &rlengthloc->fval; symtabloc = setsymtab("SYMTAB", "", 0.0, ARR, symtab); + free(symtabloc->sval); symtabloc->sval = (char *) symtab; } @@ -127,6 +128,7 @@ void arginit(int ac, char **av) /* set up ARGV and ARGC */ ARGC = &setsymtab("ARGC", "", (Awkfloat) ac, NUM, symtab)->fval; cp = setsymtab("ARGV", "", 0.0, ARR, symtab); ARGVtab = makesymtab(NSYMTAB); /* could be (int) ARGC as well */ + free(cp->sval); cp->sval = (char *) ARGVtab; for (i = 0; i < ac; i++) { snprintf(temp, sizeof temp, "%d", i); @@ -145,6 +147,7 @@ void envinit(char **envp) /* set up ENVIRON variable */ cp = setsymtab("ENVIRON", "", 0.0, ARR, symtab); ENVtab = makesymtab(NSYMTAB); + free(cp->sval); cp->sval = (char *) ENVtab; for ( ; *envp; envp++) { if ((p = strchr(*envp, '=')) == NULL) @@ -526,8 +529,15 @@ Cell *catstr(Cell *a, Cell *b) /* concatenate a and b */ if (p == NULL) FATAL("out of space concatenating %s and %s", sa, sb); snprintf(p, l, "%s%s", sa, sb); - c = setsymtab(p, p, 0.0, CON|STR|DONTFREE, symtab); + char *newbuf = malloc(strlen(p) + 2); + if (newbuf == NULL) + FATAL("out of space concatenating %s and %s", sa, sb); + // See string() in lex.c; a string "xx" is stored in the symbol + // table as "xx ". + sprintf(newbuf, "%s ", p); + c = setsymtab(newbuf, p, 0.0, CON|STR|DONTFREE, symtab); free(p); + free(newbuf); return c; } |