summaryrefslogtreecommitdiff
path: root/usr.bin/awk
diff options
context:
space:
mode:
authorTodd C. Miller <millert@cvs.openbsd.org>2020-06-10 21:03:13 +0000
committerTodd C. Miller <millert@cvs.openbsd.org>2020-06-10 21:03:13 +0000
commit39376d8bdeae54592ccfd497c13993baee0afda5 (patch)
tree316d96e3d89f99174ffefc54532f7481f36aded4 /usr.bin/awk
parentf5914bfab18167f4e862a667b6a68739e16ae0df (diff)
Update awk to Oct 17, 2019 version.
Diffstat (limited to 'usr.bin/awk')
-rw-r--r--usr.bin/awk/FIXES6
-rw-r--r--usr.bin/awk/awk.h10
-rw-r--r--usr.bin/awk/b.c211
-rw-r--r--usr.bin/awk/main.c4
-rw-r--r--usr.bin/awk/run.c4
5 files changed, 125 insertions, 110 deletions
diff --git a/usr.bin/awk/FIXES b/usr.bin/awk/FIXES
index c2bc5785fce..6682ea68b4c 100644
--- a/usr.bin/awk/FIXES
+++ b/usr.bin/awk/FIXES
@@ -1,4 +1,4 @@
-/* $OpenBSD: FIXES,v 1.26 2020/06/10 21:02:53 millert Exp $ */
+/* $OpenBSD: FIXES,v 1.27 2020/06/10 21:03:12 millert Exp $ */
/****************************************************************
Copyright (C) Lucent Technologies 1997
All Rights Reserved
@@ -26,6 +26,10 @@ THIS SOFTWARE.
This file lists all bug fixes, changes, etc., made since the AWK book
was sent to the printers in August, 1987.
+October 17, 2019:
+ Import code cleanups from NetBSD. Much thanks to Christos
+ Zoulas (Github user zoulasc). Merges PR 51.
+
October 6, 2019:
Import code from NetBSD awk that implements RS as a regular
expression.
diff --git a/usr.bin/awk/awk.h b/usr.bin/awk/awk.h
index 6a84b743907..719f414882f 100644
--- a/usr.bin/awk/awk.h
+++ b/usr.bin/awk/awk.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: awk.h,v 1.18 2020/06/10 21:02:33 millert Exp $ */
+/* $OpenBSD: awk.h,v 1.19 2020/06/10 21:03:12 millert Exp $ */
/****************************************************************
Copyright (C) Lucent Technologies 1997
All Rights Reserved
@@ -233,16 +233,16 @@ typedef struct rrow {
} rrow;
typedef struct fa {
- uschar gototab[NSTATES][HAT + 1];
- uschar out[NSTATES];
+ unsigned int **gototab;
+ uschar *out;
uschar *restr;
- int *posns[NSTATES];
+ int **posns;
+ int state_count;
int anchor;
int use;
int initstat;
int curstat;
int accept;
- int reset;
struct rrow re[1]; /* variable: actual size set by calling malloc */
} fa;
diff --git a/usr.bin/awk/b.c b/usr.bin/awk/b.c
index 3b6b824ef60..1cc54e55a92 100644
--- a/usr.bin/awk/b.c
+++ b/usr.bin/awk/b.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: b.c,v 1.26 2020/06/10 21:02:53 millert Exp $ */
+/* $OpenBSD: b.c,v 1.27 2020/06/10 21:03:12 millert Exp $ */
/****************************************************************
Copyright (C) Lucent Technologies 1997
All Rights Reserved
@@ -77,10 +77,78 @@ static int poscnt;
char *patbeg;
int patlen;
-#define NFA 20 /* cache this many dynamic fa's */
+#define NFA 128 /* cache this many dynamic fa's */
fa *fatab[NFA];
int nfatab = 0; /* entries in fatab */
+static int *
+intalloc(size_t n, const char *f)
+{
+ void *p = calloc(n, sizeof(int));
+ if (p == NULL)
+ overflo(f);
+ return p;
+}
+
+static void
+allocsetvec(const char *f)
+{
+ maxsetvec = MAXLIN;
+ setvec = reallocarray(setvec, maxsetvec, sizeof(*setvec));
+ tmpset = reallocarray(tmpset, maxsetvec, sizeof(*tmpset));
+ if (setvec == NULL || tmpset == NULL)
+ overflo(f);
+}
+
+static void
+resizesetvec(const char *f)
+{
+ setvec = reallocarray(setvec, maxsetvec, 4 * sizeof(*setvec));
+ tmpset = reallocarray(tmpset, maxsetvec, 4 * sizeof(*tmpset));
+ if (setvec == NULL || tmpset == NULL)
+ overflo(f);
+ maxsetvec *= 4;
+}
+
+static void
+resize_state(fa *f, int state)
+{
+ void *p;
+ int i, new_count;
+
+ if (++state < f->state_count)
+ return;
+
+ new_count = state + 10; /* needs to be tuned */
+
+ p = reallocarray(f->gototab, new_count, sizeof(f->gototab[0]));
+ if (p == NULL)
+ goto out;
+ f->gototab = p;
+
+ p = reallocarray(f->out, new_count, sizeof(f->out[0]));
+ if (p == NULL)
+ goto out;
+ f->out = p;
+
+ p = reallocarray(f->posns, new_count, sizeof(f->posns[0]));
+ if (p == NULL)
+ goto out;
+ f->posns = p;
+
+ for (i = f->state_count; i < new_count; ++i) {
+ f->gototab[i] = calloc(NCHARS, sizeof(**f->gototab));
+ if (f->gototab[i] == NULL)
+ goto out;
+ f->out[i] = 0;
+ f->posns[i] = NULL;
+ }
+ f->state_count = new_count;
+ return;
+out:
+ overflo(__func__);
+}
+
fa *makedfa(const char *s, int anchor) /* returns dfa for reg expr s */
{
int i, use, nuse;
@@ -88,11 +156,7 @@ fa *makedfa(const char *s, int anchor) /* returns dfa for reg expr s */
static int now = 1;
if (setvec == NULL) { /* first time through any RE */
- maxsetvec = MAXLIN;
- setvec = (int *) calloc(maxsetvec, sizeof(int));
- tmpset = (int *) calloc(maxsetvec, sizeof(int));
- if (setvec == NULL || tmpset == NULL)
- overflo("out of space initializing makedfa");
+ allocsetvec(__func__);
}
if (compile_time) /* a constant for sure */
@@ -140,14 +204,13 @@ fa *mkdfa(const char *s, int anchor) /* does the real work of making a dfa */
poscnt = 0;
penter(p1); /* enter parent pointers and leaf indices */
if ((f = (fa *) calloc(1, sizeof(fa) + poscnt*sizeof(rrow))) == NULL)
- overflo("out of space for fa");
+ overflo(__func__);
f->accept = poscnt-1; /* penter has computed number of positions in re */
cfoll(f, p1); /* set up follow sets */
freetr(p1);
- if ((f->posns[0] = (int *) calloc(*(f->re[0].lfollow), sizeof(int))) == NULL)
- overflo("out of space in makedfa");
- if ((f->posns[1] = (int *) calloc(1, sizeof(int))) == NULL)
- overflo("out of space in makedfa");
+ resize_state(f, 1);
+ f->posns[0] = intalloc(*(f->re[0].lfollow), __func__);
+ f->posns[1] = intalloc(1, __func__);
*f->posns[1] = 0;
f->initstat = makeinit(f, anchor);
f->anchor = anchor;
@@ -165,11 +228,9 @@ int makeinit(fa *f, int anchor)
f->curstat = 2;
f->out[2] = 0;
- f->reset = 0;
k = *(f->re[0].lfollow);
xfree(f->posns[2]);
- if ((f->posns[2] = (int *) calloc(k+1, sizeof(int))) == NULL)
- overflo("out of space in makeinit");
+ f->posns[2] = intalloc(k + 1, __func__);
for (i=0; i <= k; i++) {
(f->posns[2])[i] = (f->re[0].lfollow)[i];
}
@@ -309,7 +370,7 @@ char *cclenter(const char *argp) /* add a character class */
static int bufsz = 100;
op = p;
- if (buf == NULL && (buf = (uschar *) malloc(bufsz)) == NULL)
+ if (buf == NULL && (buf = malloc(bufsz)) == NULL)
FATAL("out of space for character class [%.10s...] 1", p);
bp = buf;
for (i = 0; (c = *p++) != 0; ) {
@@ -348,7 +409,7 @@ char *cclenter(const char *argp) /* add a character class */
void overflo(const char *s)
{
- FATAL("regular expression too big: %.30s...", s);
+ FATAL("regular expression too big: out of space in %.30s...", s);
}
void cfoll(fa *f, Node *v) /* enter follow set of each leaf of vertex v into lfollow[leaf] */
@@ -362,20 +423,13 @@ void cfoll(fa *f, Node *v) /* enter follow set of each leaf of vertex v into lfo
f->re[info(v)].ltype = type(v);
f->re[info(v)].lval.np = right(v);
while (f->accept >= maxsetvec) { /* guessing here! */
- setvec = reallocarray(setvec, maxsetvec,
- 4 * sizeof(int));
- tmpset = reallocarray(tmpset, maxsetvec,
- 4 * sizeof(int));
- if (setvec == NULL || tmpset == NULL)
- overflo("out of space in cfoll()");
- maxsetvec *= 4;
+ resizesetvec(__func__);
}
for (i = 0; i <= f->accept; i++)
setvec[i] = 0;
setcnt = 0;
follow(v); /* computes setvec and setcnt */
- if ((p = (int *) calloc(setcnt+1, sizeof(int))) == NULL)
- overflo("out of space building follow set");
+ p = intalloc(setcnt + 1, __func__);
f->re[info(v)].lfollow = p;
*p = setcnt;
for (i = f->accept; i >= 0; i--)
@@ -405,13 +459,7 @@ int first(Node *p) /* collects initially active leaves of p into setvec */
LEAF
lp = info(p); /* look for high-water mark of subscripts */
while (setcnt >= maxsetvec || lp >= maxsetvec) { /* guessing here! */
- setvec = reallocarray(setvec, maxsetvec,
- 4 * sizeof(int));
- tmpset = reallocarray(tmpset, maxsetvec,
- 4 * sizeof(int));
- if (setvec == NULL || tmpset == NULL)
- overflo("out of space in first()");
- maxsetvec *= 4;
+ resizesetvec(__func__);
}
if (type(p) == EMPTYRE) {
setvec[lp] = 0;
@@ -489,7 +537,9 @@ int match(fa *f, const char *p0) /* shortest match ? */
int s, ns;
uschar *p = (uschar *) p0;
- s = f->reset ? makeinit(f,0) : f->initstat;
+ s = f->initstat;
+ assert (s < f->state_count);
+
if (f->out[s])
return(1);
do {
@@ -509,15 +559,11 @@ int pmatch(fa *f, const char *p0) /* longest match, for sub */
int s, ns;
uschar *p = (uschar *) p0;
uschar *q;
- int i, k;
- /* s = f->reset ? makeinit(f,1) : f->initstat; */
- if (f->reset) {
- f->initstat = s = makeinit(f,1);
- } else {
- s = f->initstat;
- }
- patbeg = (char *) p;
+ s = f->initstat;
+ assert(s < f->state_count);
+
+ patbeg = (char *)p;
patlen = -1;
do {
q = p;
@@ -529,6 +575,9 @@ int pmatch(fa *f, const char *p0) /* longest match, for sub */
s = ns;
else
s = cgoto(f, s, *q);
+
+ assert(s < f->state_count);
+
if (s == 1) { /* no transition */
if (patlen >= 0) {
patbeg = (char *) p;
@@ -546,20 +595,7 @@ int pmatch(fa *f, const char *p0) /* longest match, for sub */
}
nextin:
s = 2;
- if (f->reset) {
- for (i = 2; i <= f->curstat; i++)
- xfree(f->posns[i]);
- k = *f->posns[0];
- if ((f->posns[2] = (int *) calloc(k+1, sizeof(int))) == NULL)
- overflo("out of space in pmatch");
- for (i = 0; i <= k; i++)
- (f->posns[2])[i] = (f->posns[0])[i];
- f->initstat = f->curstat = 2;
- f->out[2] = f->out[0];
- for (i = 0; i < NCHARS; i++)
- f->gototab[2][i] = 0;
- }
- } while (*p++ != 0);
+ } while (*p++);
return (0);
}
@@ -568,14 +604,11 @@ int nematch(fa *f, const char *p0) /* non-empty match, for sub */
int s, ns;
uschar *p = (uschar *) p0;
uschar *q;
- int i, k;
- /* s = f->reset ? makeinit(f,1) : f->initstat; */
- if (f->reset) {
- f->initstat = s = makeinit(f,1);
- } else {
- s = f->initstat;
- }
+ s = f->initstat;
+ assert(s < f->state_count);
+
+ patbeg = (char *)p;
patlen = -1;
while (*p) {
q = p;
@@ -603,19 +636,6 @@ int nematch(fa *f, const char *p0) /* non-empty match, for sub */
}
nnextin:
s = 2;
- if (f->reset) {
- for (i = 2; i <= f->curstat; i++)
- xfree(f->posns[i]);
- k = *f->posns[0];
- if ((f->posns[2] = (int *) calloc(k+1, sizeof(int))) == NULL)
- overflo("out of state space");
- for (i = 0; i <= k; i++)
- (f->posns[2])[i] = (f->posns[0])[i];
- f->initstat = f->curstat = 2;
- f->out[2] = f->out[0];
- for (i = 0; i < NCHARS; i++)
- f->gototab[2][i] = 0;
- }
p++;
}
return (0);
@@ -913,7 +933,7 @@ replace_repeat(const uschar *reptok, int reptoklen, const uschar *atom,
} else if (special_case == REPEAT_ZERO) {
size += 2; /* just a null ERE: () */
}
- if ((buf = (uschar *) malloc(size+1)) == NULL)
+ if ((buf = malloc(size + 1)) == NULL)
FATAL("out of space in reg expr %.10s..", lastre);
memcpy(buf, basestr, prefix_length); /* copy prefix */
j = prefix_length;
@@ -1039,7 +1059,7 @@ rescan:
rlxval = c;
return CHAR;
case '[':
- if (buf == NULL && (buf = (uschar *) malloc(bufsz)) == NULL)
+ if (buf == NULL && (buf = malloc(bufsz)) == NULL)
FATAL("out of space in reg expr %.10s..", lastre);
bp = buf;
if (*prestr == '^') {
@@ -1207,20 +1227,17 @@ rescan:
int cgoto(fa *f, int s, int c)
{
- int i, j, k;
int *p, *q;
+ int i, j, k;
assert(c == HAT || c < NCHARS);
while (f->accept >= maxsetvec) { /* guessing here! */
- setvec = reallocarray(setvec, maxsetvec, 4 * sizeof(int));
- tmpset = reallocarray(tmpset, maxsetvec, 4 * sizeof(int));
- if (setvec == NULL || tmpset == NULL)
- overflo("out of space in cgoto()");
- maxsetvec *= 4;
+ resizesetvec(__func__);
}
for (i = 0; i <= f->accept; i++)
setvec[i] = 0;
setcnt = 0;
+ resize_state(f, s);
/* compute positions of gototab[s,c] into setvec */
p = f->posns[s];
for (i = 1; i <= *p; i++) {
@@ -1234,13 +1251,7 @@ int cgoto(fa *f, int s, int c)
q = f->re[p[i]].lfollow;
for (j = 1; j <= *q; j++) {
if (q[j] >= maxsetvec) {
- setvec = reallocarray(setvec,
- maxsetvec, 4 * sizeof(int));
- tmpset = reallocarray(tmpset,
- maxsetvec, 4 * sizeof(int));
- if (setvec == 0 || tmpset == 0)
- overflo("cgoto overflow");
- maxsetvec *= 4;
+ resizesetvec(__func__);
}
if (setvec[q[j]] == 0) {
setcnt++;
@@ -1257,6 +1268,7 @@ int cgoto(fa *f, int s, int c)
if (setvec[i]) {
tmpset[j++] = i;
}
+ resize_state(f, f->curstat > s ? f->curstat : s);
/* tmpset == previous state? */
for (i = 1; i <= f->curstat; i++) {
p = f->posns[i];
@@ -1272,18 +1284,12 @@ int cgoto(fa *f, int s, int c)
}
/* add tmpset to current set of states */
- if (f->curstat >= NSTATES-1) {
- f->curstat = 2;
- f->reset = 1;
- for (i = 2; i < NSTATES; i++)
- xfree(f->posns[i]);
- } else
- ++(f->curstat);
+ ++(f->curstat);
+ resize_state(f, f->curstat);
for (i = 0; i < NCHARS; i++)
f->gototab[f->curstat][i] = 0;
xfree(f->posns[f->curstat]);
- if ((p = (int *) calloc(setcnt+1, sizeof(int))) == NULL)
- overflo("out of space in cgoto");
+ p = intalloc(setcnt + 1, __func__);
f->posns[f->curstat] = p;
f->gototab[s][c] = f->curstat;
@@ -1303,6 +1309,8 @@ void freefa(fa *f) /* free a finite automaton */
if (f == NULL)
return;
+ for (i = 0; i < f->state_count; i++)
+ xfree(f->gototab[i])
for (i = 0; i <= f->curstat; i++)
xfree(f->posns[i]);
for (i = 0; i <= f->accept; i++) {
@@ -1311,5 +1319,8 @@ void freefa(fa *f) /* free a finite automaton */
xfree((f->re[i].lval.np));
}
xfree(f->restr);
+ xfree(f->out);
+ xfree(f->posns);
+ xfree(f->gototab);
xfree(f);
}
diff --git a/usr.bin/awk/main.c b/usr.bin/awk/main.c
index a0e4e1415dc..3d4e2d7b245 100644
--- a/usr.bin/awk/main.c
+++ b/usr.bin/awk/main.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: main.c,v 1.31 2020/06/10 21:02:53 millert Exp $ */
+/* $OpenBSD: main.c,v 1.32 2020/06/10 21:03:12 millert Exp $ */
/****************************************************************
Copyright (C) Lucent Technologies 1997
All Rights Reserved
@@ -23,7 +23,7 @@ ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
THIS SOFTWARE.
****************************************************************/
-const char *version = "version 20191006";
+const char *version = "version 20191017";
#define DEBUG
#include <stdio.h>
diff --git a/usr.bin/awk/run.c b/usr.bin/awk/run.c
index 051170341b0..f0110beecf8 100644
--- a/usr.bin/awk/run.c
+++ b/usr.bin/awk/run.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: run.c,v 1.51 2020/06/10 21:02:33 millert Exp $ */
+/* $OpenBSD: run.c,v 1.52 2020/06/10 21:03:12 millert Exp $ */
/****************************************************************
Copyright (C) Lucent Technologies 1997
All Rights Reserved
@@ -720,7 +720,7 @@ Cell *gettemp(void) /* get a tempcell */
tmps = (Cell *) calloc(100, sizeof(Cell));
if (!tmps)
FATAL("out of space for temporaries");
- for(i = 1; i < 100; i++)
+ for (i = 1; i < 100; i++)
tmps[i-1].cnext = &tmps[i];
tmps[i-1].cnext = NULL;
}