diff options
author | Todd C. Miller <millert@cvs.openbsd.org> | 2020-06-10 21:03:13 +0000 |
---|---|---|
committer | Todd C. Miller <millert@cvs.openbsd.org> | 2020-06-10 21:03:13 +0000 |
commit | 39376d8bdeae54592ccfd497c13993baee0afda5 (patch) | |
tree | 316d96e3d89f99174ffefc54532f7481f36aded4 /usr.bin/awk | |
parent | f5914bfab18167f4e862a667b6a68739e16ae0df (diff) |
Update awk to Oct 17, 2019 version.
Diffstat (limited to 'usr.bin/awk')
-rw-r--r-- | usr.bin/awk/FIXES | 6 | ||||
-rw-r--r-- | usr.bin/awk/awk.h | 10 | ||||
-rw-r--r-- | usr.bin/awk/b.c | 211 | ||||
-rw-r--r-- | usr.bin/awk/main.c | 4 | ||||
-rw-r--r-- | usr.bin/awk/run.c | 4 |
5 files changed, 125 insertions, 110 deletions
diff --git a/usr.bin/awk/FIXES b/usr.bin/awk/FIXES index c2bc5785fce..6682ea68b4c 100644 --- a/usr.bin/awk/FIXES +++ b/usr.bin/awk/FIXES @@ -1,4 +1,4 @@ -/* $OpenBSD: FIXES,v 1.26 2020/06/10 21:02:53 millert Exp $ */ +/* $OpenBSD: FIXES,v 1.27 2020/06/10 21:03:12 millert Exp $ */ /**************************************************************** Copyright (C) Lucent Technologies 1997 All Rights Reserved @@ -26,6 +26,10 @@ THIS SOFTWARE. This file lists all bug fixes, changes, etc., made since the AWK book was sent to the printers in August, 1987. +October 17, 2019: + Import code cleanups from NetBSD. Much thanks to Christos + Zoulas (Github user zoulasc). Merges PR 51. + October 6, 2019: Import code from NetBSD awk that implements RS as a regular expression. diff --git a/usr.bin/awk/awk.h b/usr.bin/awk/awk.h index 6a84b743907..719f414882f 100644 --- a/usr.bin/awk/awk.h +++ b/usr.bin/awk/awk.h @@ -1,4 +1,4 @@ -/* $OpenBSD: awk.h,v 1.18 2020/06/10 21:02:33 millert Exp $ */ +/* $OpenBSD: awk.h,v 1.19 2020/06/10 21:03:12 millert Exp $ */ /**************************************************************** Copyright (C) Lucent Technologies 1997 All Rights Reserved @@ -233,16 +233,16 @@ typedef struct rrow { } rrow; typedef struct fa { - uschar gototab[NSTATES][HAT + 1]; - uschar out[NSTATES]; + unsigned int **gototab; + uschar *out; uschar *restr; - int *posns[NSTATES]; + int **posns; + int state_count; int anchor; int use; int initstat; int curstat; int accept; - int reset; struct rrow re[1]; /* variable: actual size set by calling malloc */ } fa; diff --git a/usr.bin/awk/b.c b/usr.bin/awk/b.c index 3b6b824ef60..1cc54e55a92 100644 --- a/usr.bin/awk/b.c +++ b/usr.bin/awk/b.c @@ -1,4 +1,4 @@ -/* $OpenBSD: b.c,v 1.26 2020/06/10 21:02:53 millert Exp $ */ +/* $OpenBSD: b.c,v 1.27 2020/06/10 21:03:12 millert Exp $ */ /**************************************************************** Copyright (C) Lucent Technologies 1997 All Rights Reserved @@ -77,10 +77,78 @@ static int poscnt; char *patbeg; int patlen; -#define NFA 20 /* cache this many dynamic fa's */ +#define NFA 128 /* cache this many dynamic fa's */ fa *fatab[NFA]; int nfatab = 0; /* entries in fatab */ +static int * +intalloc(size_t n, const char *f) +{ + void *p = calloc(n, sizeof(int)); + if (p == NULL) + overflo(f); + return p; +} + +static void +allocsetvec(const char *f) +{ + maxsetvec = MAXLIN; + setvec = reallocarray(setvec, maxsetvec, sizeof(*setvec)); + tmpset = reallocarray(tmpset, maxsetvec, sizeof(*tmpset)); + if (setvec == NULL || tmpset == NULL) + overflo(f); +} + +static void +resizesetvec(const char *f) +{ + setvec = reallocarray(setvec, maxsetvec, 4 * sizeof(*setvec)); + tmpset = reallocarray(tmpset, maxsetvec, 4 * sizeof(*tmpset)); + if (setvec == NULL || tmpset == NULL) + overflo(f); + maxsetvec *= 4; +} + +static void +resize_state(fa *f, int state) +{ + void *p; + int i, new_count; + + if (++state < f->state_count) + return; + + new_count = state + 10; /* needs to be tuned */ + + p = reallocarray(f->gototab, new_count, sizeof(f->gototab[0])); + if (p == NULL) + goto out; + f->gototab = p; + + p = reallocarray(f->out, new_count, sizeof(f->out[0])); + if (p == NULL) + goto out; + f->out = p; + + p = reallocarray(f->posns, new_count, sizeof(f->posns[0])); + if (p == NULL) + goto out; + f->posns = p; + + for (i = f->state_count; i < new_count; ++i) { + f->gototab[i] = calloc(NCHARS, sizeof(**f->gototab)); + if (f->gototab[i] == NULL) + goto out; + f->out[i] = 0; + f->posns[i] = NULL; + } + f->state_count = new_count; + return; +out: + overflo(__func__); +} + fa *makedfa(const char *s, int anchor) /* returns dfa for reg expr s */ { int i, use, nuse; @@ -88,11 +156,7 @@ fa *makedfa(const char *s, int anchor) /* returns dfa for reg expr s */ static int now = 1; if (setvec == NULL) { /* first time through any RE */ - maxsetvec = MAXLIN; - setvec = (int *) calloc(maxsetvec, sizeof(int)); - tmpset = (int *) calloc(maxsetvec, sizeof(int)); - if (setvec == NULL || tmpset == NULL) - overflo("out of space initializing makedfa"); + allocsetvec(__func__); } if (compile_time) /* a constant for sure */ @@ -140,14 +204,13 @@ fa *mkdfa(const char *s, int anchor) /* does the real work of making a dfa */ poscnt = 0; penter(p1); /* enter parent pointers and leaf indices */ if ((f = (fa *) calloc(1, sizeof(fa) + poscnt*sizeof(rrow))) == NULL) - overflo("out of space for fa"); + overflo(__func__); f->accept = poscnt-1; /* penter has computed number of positions in re */ cfoll(f, p1); /* set up follow sets */ freetr(p1); - if ((f->posns[0] = (int *) calloc(*(f->re[0].lfollow), sizeof(int))) == NULL) - overflo("out of space in makedfa"); - if ((f->posns[1] = (int *) calloc(1, sizeof(int))) == NULL) - overflo("out of space in makedfa"); + resize_state(f, 1); + f->posns[0] = intalloc(*(f->re[0].lfollow), __func__); + f->posns[1] = intalloc(1, __func__); *f->posns[1] = 0; f->initstat = makeinit(f, anchor); f->anchor = anchor; @@ -165,11 +228,9 @@ int makeinit(fa *f, int anchor) f->curstat = 2; f->out[2] = 0; - f->reset = 0; k = *(f->re[0].lfollow); xfree(f->posns[2]); - if ((f->posns[2] = (int *) calloc(k+1, sizeof(int))) == NULL) - overflo("out of space in makeinit"); + f->posns[2] = intalloc(k + 1, __func__); for (i=0; i <= k; i++) { (f->posns[2])[i] = (f->re[0].lfollow)[i]; } @@ -309,7 +370,7 @@ char *cclenter(const char *argp) /* add a character class */ static int bufsz = 100; op = p; - if (buf == NULL && (buf = (uschar *) malloc(bufsz)) == NULL) + if (buf == NULL && (buf = malloc(bufsz)) == NULL) FATAL("out of space for character class [%.10s...] 1", p); bp = buf; for (i = 0; (c = *p++) != 0; ) { @@ -348,7 +409,7 @@ char *cclenter(const char *argp) /* add a character class */ void overflo(const char *s) { - FATAL("regular expression too big: %.30s...", s); + FATAL("regular expression too big: out of space in %.30s...", s); } void cfoll(fa *f, Node *v) /* enter follow set of each leaf of vertex v into lfollow[leaf] */ @@ -362,20 +423,13 @@ void cfoll(fa *f, Node *v) /* enter follow set of each leaf of vertex v into lfo f->re[info(v)].ltype = type(v); f->re[info(v)].lval.np = right(v); while (f->accept >= maxsetvec) { /* guessing here! */ - setvec = reallocarray(setvec, maxsetvec, - 4 * sizeof(int)); - tmpset = reallocarray(tmpset, maxsetvec, - 4 * sizeof(int)); - if (setvec == NULL || tmpset == NULL) - overflo("out of space in cfoll()"); - maxsetvec *= 4; + resizesetvec(__func__); } for (i = 0; i <= f->accept; i++) setvec[i] = 0; setcnt = 0; follow(v); /* computes setvec and setcnt */ - if ((p = (int *) calloc(setcnt+1, sizeof(int))) == NULL) - overflo("out of space building follow set"); + p = intalloc(setcnt + 1, __func__); f->re[info(v)].lfollow = p; *p = setcnt; for (i = f->accept; i >= 0; i--) @@ -405,13 +459,7 @@ int first(Node *p) /* collects initially active leaves of p into setvec */ LEAF lp = info(p); /* look for high-water mark of subscripts */ while (setcnt >= maxsetvec || lp >= maxsetvec) { /* guessing here! */ - setvec = reallocarray(setvec, maxsetvec, - 4 * sizeof(int)); - tmpset = reallocarray(tmpset, maxsetvec, - 4 * sizeof(int)); - if (setvec == NULL || tmpset == NULL) - overflo("out of space in first()"); - maxsetvec *= 4; + resizesetvec(__func__); } if (type(p) == EMPTYRE) { setvec[lp] = 0; @@ -489,7 +537,9 @@ int match(fa *f, const char *p0) /* shortest match ? */ int s, ns; uschar *p = (uschar *) p0; - s = f->reset ? makeinit(f,0) : f->initstat; + s = f->initstat; + assert (s < f->state_count); + if (f->out[s]) return(1); do { @@ -509,15 +559,11 @@ int pmatch(fa *f, const char *p0) /* longest match, for sub */ int s, ns; uschar *p = (uschar *) p0; uschar *q; - int i, k; - /* s = f->reset ? makeinit(f,1) : f->initstat; */ - if (f->reset) { - f->initstat = s = makeinit(f,1); - } else { - s = f->initstat; - } - patbeg = (char *) p; + s = f->initstat; + assert(s < f->state_count); + + patbeg = (char *)p; patlen = -1; do { q = p; @@ -529,6 +575,9 @@ int pmatch(fa *f, const char *p0) /* longest match, for sub */ s = ns; else s = cgoto(f, s, *q); + + assert(s < f->state_count); + if (s == 1) { /* no transition */ if (patlen >= 0) { patbeg = (char *) p; @@ -546,20 +595,7 @@ int pmatch(fa *f, const char *p0) /* longest match, for sub */ } nextin: s = 2; - if (f->reset) { - for (i = 2; i <= f->curstat; i++) - xfree(f->posns[i]); - k = *f->posns[0]; - if ((f->posns[2] = (int *) calloc(k+1, sizeof(int))) == NULL) - overflo("out of space in pmatch"); - for (i = 0; i <= k; i++) - (f->posns[2])[i] = (f->posns[0])[i]; - f->initstat = f->curstat = 2; - f->out[2] = f->out[0]; - for (i = 0; i < NCHARS; i++) - f->gototab[2][i] = 0; - } - } while (*p++ != 0); + } while (*p++); return (0); } @@ -568,14 +604,11 @@ int nematch(fa *f, const char *p0) /* non-empty match, for sub */ int s, ns; uschar *p = (uschar *) p0; uschar *q; - int i, k; - /* s = f->reset ? makeinit(f,1) : f->initstat; */ - if (f->reset) { - f->initstat = s = makeinit(f,1); - } else { - s = f->initstat; - } + s = f->initstat; + assert(s < f->state_count); + + patbeg = (char *)p; patlen = -1; while (*p) { q = p; @@ -603,19 +636,6 @@ int nematch(fa *f, const char *p0) /* non-empty match, for sub */ } nnextin: s = 2; - if (f->reset) { - for (i = 2; i <= f->curstat; i++) - xfree(f->posns[i]); - k = *f->posns[0]; - if ((f->posns[2] = (int *) calloc(k+1, sizeof(int))) == NULL) - overflo("out of state space"); - for (i = 0; i <= k; i++) - (f->posns[2])[i] = (f->posns[0])[i]; - f->initstat = f->curstat = 2; - f->out[2] = f->out[0]; - for (i = 0; i < NCHARS; i++) - f->gototab[2][i] = 0; - } p++; } return (0); @@ -913,7 +933,7 @@ replace_repeat(const uschar *reptok, int reptoklen, const uschar *atom, } else if (special_case == REPEAT_ZERO) { size += 2; /* just a null ERE: () */ } - if ((buf = (uschar *) malloc(size+1)) == NULL) + if ((buf = malloc(size + 1)) == NULL) FATAL("out of space in reg expr %.10s..", lastre); memcpy(buf, basestr, prefix_length); /* copy prefix */ j = prefix_length; @@ -1039,7 +1059,7 @@ rescan: rlxval = c; return CHAR; case '[': - if (buf == NULL && (buf = (uschar *) malloc(bufsz)) == NULL) + if (buf == NULL && (buf = malloc(bufsz)) == NULL) FATAL("out of space in reg expr %.10s..", lastre); bp = buf; if (*prestr == '^') { @@ -1207,20 +1227,17 @@ rescan: int cgoto(fa *f, int s, int c) { - int i, j, k; int *p, *q; + int i, j, k; assert(c == HAT || c < NCHARS); while (f->accept >= maxsetvec) { /* guessing here! */ - setvec = reallocarray(setvec, maxsetvec, 4 * sizeof(int)); - tmpset = reallocarray(tmpset, maxsetvec, 4 * sizeof(int)); - if (setvec == NULL || tmpset == NULL) - overflo("out of space in cgoto()"); - maxsetvec *= 4; + resizesetvec(__func__); } for (i = 0; i <= f->accept; i++) setvec[i] = 0; setcnt = 0; + resize_state(f, s); /* compute positions of gototab[s,c] into setvec */ p = f->posns[s]; for (i = 1; i <= *p; i++) { @@ -1234,13 +1251,7 @@ int cgoto(fa *f, int s, int c) q = f->re[p[i]].lfollow; for (j = 1; j <= *q; j++) { if (q[j] >= maxsetvec) { - setvec = reallocarray(setvec, - maxsetvec, 4 * sizeof(int)); - tmpset = reallocarray(tmpset, - maxsetvec, 4 * sizeof(int)); - if (setvec == 0 || tmpset == 0) - overflo("cgoto overflow"); - maxsetvec *= 4; + resizesetvec(__func__); } if (setvec[q[j]] == 0) { setcnt++; @@ -1257,6 +1268,7 @@ int cgoto(fa *f, int s, int c) if (setvec[i]) { tmpset[j++] = i; } + resize_state(f, f->curstat > s ? f->curstat : s); /* tmpset == previous state? */ for (i = 1; i <= f->curstat; i++) { p = f->posns[i]; @@ -1272,18 +1284,12 @@ int cgoto(fa *f, int s, int c) } /* add tmpset to current set of states */ - if (f->curstat >= NSTATES-1) { - f->curstat = 2; - f->reset = 1; - for (i = 2; i < NSTATES; i++) - xfree(f->posns[i]); - } else - ++(f->curstat); + ++(f->curstat); + resize_state(f, f->curstat); for (i = 0; i < NCHARS; i++) f->gototab[f->curstat][i] = 0; xfree(f->posns[f->curstat]); - if ((p = (int *) calloc(setcnt+1, sizeof(int))) == NULL) - overflo("out of space in cgoto"); + p = intalloc(setcnt + 1, __func__); f->posns[f->curstat] = p; f->gototab[s][c] = f->curstat; @@ -1303,6 +1309,8 @@ void freefa(fa *f) /* free a finite automaton */ if (f == NULL) return; + for (i = 0; i < f->state_count; i++) + xfree(f->gototab[i]) for (i = 0; i <= f->curstat; i++) xfree(f->posns[i]); for (i = 0; i <= f->accept; i++) { @@ -1311,5 +1319,8 @@ void freefa(fa *f) /* free a finite automaton */ xfree((f->re[i].lval.np)); } xfree(f->restr); + xfree(f->out); + xfree(f->posns); + xfree(f->gototab); xfree(f); } diff --git a/usr.bin/awk/main.c b/usr.bin/awk/main.c index a0e4e1415dc..3d4e2d7b245 100644 --- a/usr.bin/awk/main.c +++ b/usr.bin/awk/main.c @@ -1,4 +1,4 @@ -/* $OpenBSD: main.c,v 1.31 2020/06/10 21:02:53 millert Exp $ */ +/* $OpenBSD: main.c,v 1.32 2020/06/10 21:03:12 millert Exp $ */ /**************************************************************** Copyright (C) Lucent Technologies 1997 All Rights Reserved @@ -23,7 +23,7 @@ ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. ****************************************************************/ -const char *version = "version 20191006"; +const char *version = "version 20191017"; #define DEBUG #include <stdio.h> diff --git a/usr.bin/awk/run.c b/usr.bin/awk/run.c index 051170341b0..f0110beecf8 100644 --- a/usr.bin/awk/run.c +++ b/usr.bin/awk/run.c @@ -1,4 +1,4 @@ -/* $OpenBSD: run.c,v 1.51 2020/06/10 21:02:33 millert Exp $ */ +/* $OpenBSD: run.c,v 1.52 2020/06/10 21:03:12 millert Exp $ */ /**************************************************************** Copyright (C) Lucent Technologies 1997 All Rights Reserved @@ -720,7 +720,7 @@ Cell *gettemp(void) /* get a tempcell */ tmps = (Cell *) calloc(100, sizeof(Cell)); if (!tmps) FATAL("out of space for temporaries"); - for(i = 1; i < 100; i++) + for (i = 1; i < 100; i++) tmps[i-1].cnext = &tmps[i]; tmps[i-1].cnext = NULL; } |