summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--usr.bin/awk/FIXES22
-rw-r--r--usr.bin/awk/README91
-rw-r--r--usr.bin/awk/README.md98
-rw-r--r--usr.bin/awk/b.c43
-rw-r--r--usr.bin/awk/lex.c9
-rw-r--r--usr.bin/awk/main.c4
-rw-r--r--usr.bin/awk/run.c26
-rw-r--r--usr.bin/awk/tran.c14
8 files changed, 175 insertions, 132 deletions
diff --git a/usr.bin/awk/FIXES b/usr.bin/awk/FIXES
index 700e5519f47..d1c3217d28d 100644
--- a/usr.bin/awk/FIXES
+++ b/usr.bin/awk/FIXES
@@ -1,4 +1,4 @@
-/* $OpenBSD: FIXES,v 1.29 2020/06/10 21:03:56 millert Exp $ */
+/* $OpenBSD: FIXES,v 1.30 2020/06/10 21:04:40 millert Exp $ */
/****************************************************************
Copyright (C) Lucent Technologies 1997
All Rights Reserved
@@ -26,6 +26,26 @@ THIS SOFTWARE.
This file lists all bug fixes, changes, etc., made since the AWK book
was sent to the printers in August, 1987.
+January 5, 2020:
+ Fix a bug in the concatentation of two string constants into
+ one done in the grammar. Fixes GitHub issue #61. Thanks
+ to GitHub user awkfan77 for pointing out the direction for
+ the fix. New test T.concat added to the test suite.
+ Fix a few memory leaks reported by valgrind, as well.
+
+December 27, 2019:
+ Fix a bug whereby a{0,3} could match four a's. Thanks to
+ "Anonymous AWK fan" for the report.
+
+December 11, 2019:
+ Further printf-related fixes for 32 bit systems.
+ Thanks again to Christos Zoulas.
+
+December 8, 2019:
+ Fix the return value of sprintf("%d") on 32 bit systems.
+ Thanks to Jim Lowe for the report and to Christos Zoulas
+ for the fix.
+
November 10, 2019:
Convert a number of Boolean integer variables into
actual bools. Convert compile_time variable into an
diff --git a/usr.bin/awk/README b/usr.bin/awk/README
deleted file mode 100644
index 2df93cb5722..00000000000
--- a/usr.bin/awk/README
+++ /dev/null
@@ -1,91 +0,0 @@
-/* $OpenBSD: README,v 1.8 2020/06/10 21:02:33 millert Exp $ */
-/****************************************************************
-Copyright (C) Lucent Technologies 1997
-All Rights Reserved
-
-Permission to use, copy, modify, and distribute this software and
-its documentation for any purpose and without fee is hereby
-granted, provided that the above copyright notice appear in all
-copies and that both that the copyright notice and this
-permission notice and warranty disclaimer appear in supporting
-documentation, and that the name Lucent Technologies or any of
-its entities not be used in advertising or publicity pertaining
-to distribution of the software without specific, written prior
-permission.
-
-LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
-INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
-IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
-SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
-WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
-IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
-ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
-THIS SOFTWARE.
-****************************************************************/
-
-This is the version of awk described in "The AWK Programming Language",
-by Al Aho, Brian Kernighan, and Peter Weinberger
-(Addison-Wesley, 1988, ISBN 0-201-07981-X).
-
-Changes, mostly bug fixes and occasional enhancements, are listed
-in FIXES. If you distribute this code further, please please please
-distribute FIXES with it. If you find errors, please report them
-to bwk@cs.princeton.edu. Thanks.
-
-The program itself is created by
- make
-which should produce a sequence of messages roughly like this:
-
- yacc -d awkgram.y
-
-conflicts: 43 shift/reduce, 85 reduce/reduce
- mv y.tab.c ytab.c
- mv y.tab.h ytab.h
- cc -c ytab.c
- cc -c b.c
- cc -c main.c
- cc -c parse.c
- cc maketab.c -o maketab
- ./maketab >proctab.c
- cc -c proctab.c
- cc -c tran.c
- cc -c lib.c
- cc -c run.c
- cc -c lex.c
- cc ytab.o b.o main.o parse.o proctab.o tran.o lib.o run.o lex.o -lm
-
-This produces an executable a.out; you will eventually want to
-move this to some place like /usr/bin/awk.
-
-If your system does not have yacc or bison (the GNU
-equivalent), you must compile the pieces manually. We have
-included yacc output in ytab.c and ytab.h, and backup copies in
-case you overwrite them. We have also included a copy of
-proctab.c so you do not need to run maketab.
-
-NOTE: This version uses ANSI C, as you should also. We have
-compiled this without any changes using gcc -Wall and/or local C
-compilers on a variety of systems, but new systems or compilers
-may raise some new complaint; reports of difficulties are
-welcome.
-
-This also compiles with Visual C++ on all flavors of Windows,
-*if* you provide versions of popen and pclose. The file
-missing95.c contains versions that can be used to get started
-with, though the underlying support has mysterious properties,
-the symptom of which can be truncated pipe output. Beware. The
-file makefile.win gives hints on how to proceed; if you run
-vcvars32.bat, it will set up necessary paths and parameters so
-you can subsequently run nmake -f makefile.win. Beware also that
-when running on Windows under command.com, various quoting
-conventions are different from Unix systems: single quotes won't
-work around arguments, and various characters like % are
-interpreted within double quotes.
-
-This compiles without change on Macintosh OS X using gcc and
-the standard developer tools.
-
-The version of malloc that comes with some systems is sometimes
-astonishly slow. If awk seems slow, you might try fixing that.
-More generally, turning on optimization can significantly improve
-awk's speed, perhaps by 1/3 for highest levels.
diff --git a/usr.bin/awk/README.md b/usr.bin/awk/README.md
new file mode 100644
index 00000000000..a712ccf7a04
--- /dev/null
+++ b/usr.bin/awk/README.md
@@ -0,0 +1,98 @@
+$OpenBSD: README.md,v 1.1 2020/06/10 21:04:40 millert Exp $
+
+# The One True Awk
+
+This is the version of `awk` described in _The AWK Programming Language_,
+by Al Aho, Brian Kernighan, and Peter Weinberger
+(Addison-Wesley, 1988, ISBN 0-201-07981-X).
+
+## Copyright
+
+Copyright (C) Lucent Technologies 1997<br/>
+All Rights Reserved
+
+Permission to use, copy, modify, and distribute this software and
+its documentation for any purpose and without fee is hereby
+granted, provided that the above copyright notice appear in all
+copies and that both that the copyright notice and this
+permission notice and warranty disclaimer appear in supporting
+documentation, and that the name Lucent Technologies or any of
+its entities not be used in advertising or publicity pertaining
+to distribution of the software without specific, written prior
+permission.
+
+LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
+INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
+IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
+SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
+IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
+THIS SOFTWARE.
+
+## Distribution and Reporting Problems
+
+Changes, mostly bug fixes and occasional enhancements, are listed
+in `FIXES`. If you distribute this code further, please please please
+distribute `FIXES` with it.
+
+If you find errors, please report them
+to bwk@cs.princeton.edu.
+Please _also_ open an issue in the GitHub issue tracker, to make
+it easy to track issues.
+Thanks.
+
+## Submitting Pull Requests
+
+Pull requests are welcome. However, please create them with a request
+to merge into the `staging` branch instead of into the `master` branch.
+This allows us to do testing, and to make any additional edits or changes
+after the merge but before merging to `master`.
+
+## Building
+
+The program itself is created by
+
+ make
+
+which should produce a sequence of messages roughly like this:
+
+ yacc -d awkgram.y
+ conflicts: 43 shift/reduce, 85 reduce/reduce
+ mv y.tab.c ytab.c
+ mv y.tab.h ytab.h
+ cc -c ytab.c
+ cc -c b.c
+ cc -c main.c
+ cc -c parse.c
+ cc maketab.c -o maketab
+ ./maketab >proctab.c
+ cc -c proctab.c
+ cc -c tran.c
+ cc -c lib.c
+ cc -c run.c
+ cc -c lex.c
+ cc ytab.o b.o main.o parse.o proctab.o tran.o lib.o run.o lex.o -lm
+
+This produces an executable `a.out`; you will eventually want to
+move this to some place like `/usr/bin/awk`.
+
+If your system does not have `yacc` or `bison` (the GNU
+equivalent), you need to install one of them first.
+
+NOTE: This version uses ANSI C (C 99), as you should also. We have
+compiled this without any changes using `gcc -Wall` and/or local C
+compilers on a variety of systems, but new systems or compilers
+may raise some new complaint; reports of difficulties are
+welcome.
+
+This compiles without change on Macintosh OS X using `gcc` and
+the standard developer tools.
+
+The version of `malloc` that comes with some systems is sometimes
+astonishly slow. If `awk` seems slow, you might try fixing that.
+More generally, turning on optimization can significantly improve
+`awk`'s speed, perhaps by 1/3 for highest levels.
+
+#### Last Updated
+Wed Jan 1 22:44:38 IST 2020
diff --git a/usr.bin/awk/b.c b/usr.bin/awk/b.c
index 85ed2d28300..97719cf1132 100644
--- a/usr.bin/awk/b.c
+++ b/usr.bin/awk/b.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: b.c,v 1.29 2020/06/10 21:03:56 millert Exp $ */
+/* $OpenBSD: b.c,v 1.30 2020/06/10 21:04:40 millert Exp $ */
/****************************************************************
Copyright (C) Lucent Technologies 1997
All Rights Reserved
@@ -188,7 +188,7 @@ fa *makedfa(const char *s, bool anchor) /* returns dfa for reg expr s */
}
fa *mkdfa(const char *s, bool anchor) /* does the real work of making a dfa */
- /* anchor = 1 for anchored matches, else 0 */
+ /* anchor = true for anchored matches, else false */
{
Node *p, *p1;
fa *f;
@@ -231,17 +231,17 @@ int makeinit(fa *f, bool anchor)
k = *(f->re[0].lfollow);
xfree(f->posns[2]);
f->posns[2] = intalloc(k + 1, __func__);
- for (i=0; i <= k; i++) {
+ for (i = 0; i <= k; i++) {
(f->posns[2])[i] = (f->re[0].lfollow)[i];
}
if ((f->posns[2])[1] == f->accept)
f->out[2] = 1;
- for (i=0; i < NCHARS; i++)
+ for (i = 0; i < NCHARS; i++)
f->gototab[2][i] = 0;
f->curstat = cgoto(f, 2, HAT);
if (anchor) {
*f->posns[2] = k-1; /* leave out position 0 */
- for (i=0; i < k; i++) {
+ for (i = 0; i < k; i++) {
(f->posns[0])[i] = (f->posns[2])[i];
}
@@ -471,9 +471,10 @@ int first(Node *p) /* collects initially active leaves of p into setvec */
}
if (type(p) == CCL && (*(char *) right(p)) == '\0')
return(0); /* empty CCL */
- else return(1);
+ return(1);
case PLUS:
- if (first(left(p)) == 0) return(0);
+ if (first(left(p)) == 0)
+ return(0);
return(1);
case STAR:
case QUEST:
@@ -722,7 +723,7 @@ bool fnematch(fa *pfa, FILE *f, char **pbuf, int *pbufsize, int quantum)
if (buf[--k] && ungetc(buf[k], f) == EOF)
FATAL("unable to ungetc '%c'", buf[k]);
while (k > i + patlen);
- buf[k] = 0;
+ buf[k] = '\0';
return true;
}
else
@@ -915,7 +916,7 @@ replace_repeat(const uschar *reptok, int reptoklen, const uschar *atom,
int i, j;
uschar *buf = NULL;
int ret = 1;
- int init_q = (firstnum == 0); /* first added char will be ? */
+ bool init_q = (firstnum == 0); /* first added char will be ? */
int n_q_reps = secondnum-firstnum; /* m>n, so reduce until {1,m-n} left */
int prefix_length = reptok - basestr; /* prefix includes first rep */
int suffix_length = strlen((const char *) reptok) - reptoklen; /* string after rep specifier */
@@ -942,7 +943,7 @@ replace_repeat(const uschar *reptok, int reptoklen, const uschar *atom,
buf[j++] = '(';
buf[j++] = ')';
}
- for (i=1; i < firstnum; i++) { /* copy x reps */
+ for (i = 1; i < firstnum; i++) { /* copy x reps */
memcpy(&buf[j], atom, atomlen);
j += atomlen;
}
@@ -951,7 +952,7 @@ replace_repeat(const uschar *reptok, int reptoklen, const uschar *atom,
} else if (special_case == REPEAT_WITH_Q) {
if (init_q)
buf[j++] = '?';
- for (i = 0; i < n_q_reps; i++) { /* copy x? reps */
+ for (i = init_q; i < n_q_reps; i++) { /* copy x? reps */
memcpy(&buf[j], atom, atomlen);
j += atomlen;
buf[j++] = '?';
@@ -1173,15 +1174,17 @@ rescan:
if (commafound) {
if (digitfound) { /* {n,m} */
m = num;
- if (m<n)
+ if (m < n)
FATAL("illegal repetition expression: class %.20s",
lastre);
- if ((n==0) && (m==1)) {
+ if (n == 0 && m == 1) {
return QUEST;
}
} else { /* {n,} */
- if (n==0) return STAR;
- if (n==1) return PLUS;
+ if (n == 0)
+ return STAR;
+ else if (n == 1)
+ return PLUS;
}
} else {
if (digitfound) { /* {n} same as {n,n} */
@@ -1194,7 +1197,7 @@ rescan:
}
if (repeat(starttok, prestr-starttok, lastatom,
startreptok - lastatom, n, m) > 0) {
- if ((n==0) && (m==0)) {
+ if (n == 0 && m == 0) {
return EMPTYRE;
}
/* must rescan input for next token */
@@ -1280,7 +1283,8 @@ int cgoto(fa *f, int s, int c)
if (tmpset[j] != p[j])
goto different;
/* setvec is state i */
- f->gototab[s][c] = i;
+ if (c != HAT)
+ f->gototab[s][c] = i;
return i;
different:;
}
@@ -1294,7 +1298,8 @@ int cgoto(fa *f, int s, int c)
p = intalloc(setcnt + 1, __func__);
f->posns[f->curstat] = p;
- f->gototab[s][c] = f->curstat;
+ if (c != HAT)
+ f->gototab[s][c] = f->curstat;
for (i = 0; i <= setcnt; i++)
p[i] = tmpset[i];
if (setvec[f->accept])
@@ -1318,7 +1323,7 @@ void freefa(fa *f) /* free a finite automaton */
for (i = 0; i <= f->accept; i++) {
xfree(f->re[i].lfollow);
if (f->re[i].ltype == CCL || f->re[i].ltype == NCCL)
- xfree((f->re[i].lval.np));
+ xfree(f->re[i].lval.np);
}
xfree(f->restr);
xfree(f->out);
diff --git a/usr.bin/awk/lex.c b/usr.bin/awk/lex.c
index 6a1400a7ca2..9861c7d25b4 100644
--- a/usr.bin/awk/lex.c
+++ b/usr.bin/awk/lex.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: lex.c,v 1.16 2020/06/10 21:03:56 millert Exp $ */
+/* $OpenBSD: lex.c,v 1.17 2020/06/10 21:04:40 millert Exp $ */
/****************************************************************
Copyright (C) Lucent Technologies 1997
All Rights Reserved
@@ -201,7 +201,9 @@ int yylex(void)
if (isalpha(c) || c == '_')
return word(buf);
if (isdigit(c)) {
- yylval.cp = setsymtab(buf, tostring(buf), atof(buf), CON|NUM, symtab);
+ char *cp = tostring(buf);
+ yylval.cp = setsymtab(buf, cp, atof(buf), CON|NUM, symtab);
+ free(cp);
/* should this also have STR set? */
RET(NUMBER);
}
@@ -442,8 +444,9 @@ int string(void)
}
*bp = 0;
s = tostring(buf);
- *bp++ = ' '; *bp++ = 0;
+ *bp++ = ' '; *bp++ = '\0';
yylval.cp = setsymtab(buf, s, 0.0, CON|STR|DONTFREE, symtab);
+ free(s);
RET(STRING);
}
diff --git a/usr.bin/awk/main.c b/usr.bin/awk/main.c
index 2b83de5a2e2..cb23a884667 100644
--- a/usr.bin/awk/main.c
+++ b/usr.bin/awk/main.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: main.c,v 1.34 2020/06/10 21:03:56 millert Exp $ */
+/* $OpenBSD: main.c,v 1.35 2020/06/10 21:04:40 millert Exp $ */
/****************************************************************
Copyright (C) Lucent Technologies 1997
All Rights Reserved
@@ -23,7 +23,7 @@ ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
THIS SOFTWARE.
****************************************************************/
-const char *version = "version 20191110";
+const char *version = "version 20200105";
#define DEBUG
#include <stdio.h>
diff --git a/usr.bin/awk/run.c b/usr.bin/awk/run.c
index 2b2a775c703..605b71e47f6 100644
--- a/usr.bin/awk/run.c
+++ b/usr.bin/awk/run.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: run.c,v 1.54 2020/06/10 21:03:56 millert Exp $ */
+/* $OpenBSD: run.c,v 1.55 2020/06/10 21:04:40 millert Exp $ */
/****************************************************************
Copyright (C) Lucent Technologies 1997
All Rights Reserved
@@ -859,8 +859,13 @@ int format(char **pbuf, int *pbufsize, const char *s, Node *a) /* printf-like co
for (t = fmt; (*t++ = *s) != '\0'; s++) {
if (!adjbuf(&fmt, &fmtsz, MAXNUMSIZE+1+t-fmt, recsize, &t, "format3"))
FATAL("format item %.30s... ran format() out of memory", os);
- if (isalpha((uschar)*s) && *s != 'l' && *s != 'h' && *s != 'L')
- break; /* the ansi panoply */
+ /* Ignore size specifiers */
+ if (strchr("hjLlqtz", *s) != NULL) { /* the ansi panoply */
+ t--;
+ continue;
+ }
+ if (isalpha((uschar)*s))
+ break;
if (*s == '$') {
FATAL("'$' not permitted in awk formats");
}
@@ -893,16 +898,9 @@ int format(char **pbuf, int *pbufsize, const char *s, Node *a) /* printf-like co
case 'f': case 'e': case 'g': case 'E': case 'G':
flag = 'f';
break;
- case 'd': case 'i':
- flag = 'd';
- if(*(s-1) == 'l') break;
+ case 'd': case 'i': case 'o': case 'x': case 'X': case 'u':
+ flag = (*s == 'd' || *s == 'i') ? 'd' : 'u';
*(t-1) = 'j';
- *t = 'd';
- *++t = '\0';
- break;
- case 'o': case 'x': case 'X': case 'u':
- flag = *(s-1) == 'l' ? 'd' : 'u';
- *(t-1) = 'l';
*t = *s;
*++t = '\0';
break;
@@ -939,8 +937,8 @@ int format(char **pbuf, int *pbufsize, const char *s, Node *a) /* printf-like co
case 'a':
case 'A':
case 'f': snprintf(p, BUFSZ(p), fmt, getfval(x)); break;
- case 'd': snprintf(p, BUFSZ(p), fmt, (long) getfval(x)); break;
- case 'u': snprintf(p, BUFSZ(p), fmt, (int) getfval(x)); break;
+ case 'd': snprintf(p, BUFSZ(p), fmt, (intmax_t) getfval(x)); break;
+ case 'u': snprintf(p, BUFSZ(p), fmt, (uintmax_t) getfval(x)); break;
case 's':
t = getsval(x);
n = strlen(t);
diff --git a/usr.bin/awk/tran.c b/usr.bin/awk/tran.c
index 6bd93b34e69..0318be0c57d 100644
--- a/usr.bin/awk/tran.c
+++ b/usr.bin/awk/tran.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: tran.c,v 1.24 2020/06/10 21:03:56 millert Exp $ */
+/* $OpenBSD: tran.c,v 1.25 2020/06/10 21:04:40 millert Exp $ */
/****************************************************************
Copyright (C) Lucent Technologies 1997
All Rights Reserved
@@ -115,6 +115,7 @@ void syminit(void) /* initialize symbol table with builtin vars */
rlengthloc = setsymtab("RLENGTH", "", 0.0, NUM, symtab);
RLENGTH = &rlengthloc->fval;
symtabloc = setsymtab("SYMTAB", "", 0.0, ARR, symtab);
+ free(symtabloc->sval);
symtabloc->sval = (char *) symtab;
}
@@ -127,6 +128,7 @@ void arginit(int ac, char **av) /* set up ARGV and ARGC */
ARGC = &setsymtab("ARGC", "", (Awkfloat) ac, NUM, symtab)->fval;
cp = setsymtab("ARGV", "", 0.0, ARR, symtab);
ARGVtab = makesymtab(NSYMTAB); /* could be (int) ARGC as well */
+ free(cp->sval);
cp->sval = (char *) ARGVtab;
for (i = 0; i < ac; i++) {
snprintf(temp, sizeof temp, "%d", i);
@@ -145,6 +147,7 @@ void envinit(char **envp) /* set up ENVIRON variable */
cp = setsymtab("ENVIRON", "", 0.0, ARR, symtab);
ENVtab = makesymtab(NSYMTAB);
+ free(cp->sval);
cp->sval = (char *) ENVtab;
for ( ; *envp; envp++) {
if ((p = strchr(*envp, '=')) == NULL)
@@ -526,8 +529,15 @@ Cell *catstr(Cell *a, Cell *b) /* concatenate a and b */
if (p == NULL)
FATAL("out of space concatenating %s and %s", sa, sb);
snprintf(p, l, "%s%s", sa, sb);
- c = setsymtab(p, p, 0.0, CON|STR|DONTFREE, symtab);
+ char *newbuf = malloc(strlen(p) + 2);
+ if (newbuf == NULL)
+ FATAL("out of space concatenating %s and %s", sa, sb);
+ // See string() in lex.c; a string "xx" is stored in the symbol
+ // table as "xx ".
+ sprintf(newbuf, "%s ", p);
+ c = setsymtab(newbuf, p, 0.0, CON|STR|DONTFREE, symtab);
free(p);
+ free(newbuf);
return c;
}