From f5914bfab18167f4e862a667b6a68739e16ae0df Mon Sep 17 00:00:00 2001 From: "Todd C. Miller" Date: Wed, 10 Jun 2020 21:02:54 +0000 Subject: Update awk to Oct 6, 2019 version. --- usr.bin/awk/FIXES | 6 +++- usr.bin/awk/awk.1 | 7 +++- usr.bin/awk/b.c | 92 +++++++++++++++++++++++++++++++++++++++++++++++++++-- usr.bin/awk/lib.c | 64 ++++++++++++++++++++++--------------- usr.bin/awk/main.c | 4 +-- usr.bin/awk/proto.h | 3 +- 6 files changed, 143 insertions(+), 33 deletions(-) (limited to 'usr.bin/awk') diff --git a/usr.bin/awk/FIXES b/usr.bin/awk/FIXES index 109eda9c71b..c2bc5785fce 100644 --- a/usr.bin/awk/FIXES +++ b/usr.bin/awk/FIXES @@ -1,4 +1,4 @@ -/* $OpenBSD: FIXES,v 1.25 2020/06/10 21:02:33 millert Exp $ */ +/* $OpenBSD: FIXES,v 1.26 2020/06/10 21:02:53 millert Exp $ */ /**************************************************************** Copyright (C) Lucent Technologies 1997 All Rights Reserved @@ -26,6 +26,10 @@ THIS SOFTWARE. This file lists all bug fixes, changes, etc., made since the AWK book was sent to the printers in August, 1987. +October 6, 2019: + Import code from NetBSD awk that implements RS as a regular + expression. + September 10, 2019: Fixes for various array / memory overruns found via gcc's -fsanitize=unknown. Thanks to Alexander Richardson (Github diff --git a/usr.bin/awk/awk.1 b/usr.bin/awk/awk.1 index fb257d029ec..770f558d1b5 100644 --- a/usr.bin/awk/awk.1 +++ b/usr.bin/awk/awk.1 @@ -1,4 +1,4 @@ -.\" $OpenBSD: awk.1,v 1.48 2020/06/10 21:01:50 millert Exp $ +.\" $OpenBSD: awk.1,v 1.49 2020/06/10 21:02:53 millert Exp $ .\" .\" Copyright (C) Lucent Technologies 1997 .\" All Rights Reserved @@ -419,6 +419,11 @@ The length of the string matched by the function. .It Va RS Input record separator (default newline). +If empty, blank lines separate records. +If more than one character long, +.Va RS +is treated as a regular expression, and records are +separated by text matching the expression. .It Va RSTART The starting position of the string matched by the .Fn match diff --git a/usr.bin/awk/b.c b/usr.bin/awk/b.c index 73ff3da84ca..3b6b824ef60 100644 --- a/usr.bin/awk/b.c +++ b/usr.bin/awk/b.c @@ -1,4 +1,4 @@ -/* $OpenBSD: b.c,v 1.25 2020/06/10 21:02:33 millert Exp $ */ +/* $OpenBSD: b.c,v 1.26 2020/06/10 21:02:53 millert Exp $ */ /**************************************************************** Copyright (C) Lucent Technologies 1997 All Rights Reserved @@ -621,6 +621,94 @@ int nematch(fa *f, const char *p0) /* non-empty match, for sub */ return (0); } + +/* + * NAME + * fnematch + * + * DESCRIPTION + * A stream-fed version of nematch which transfers characters to a + * null-terminated buffer. All characters up to and including the last + * character of the matching text or EOF are placed in the buffer. If + * a match is found, patbeg and patlen are set appropriately. + * + * RETURN VALUES + * 0 No match found. + * 1 Match found. + */ + +int fnematch(fa *pfa, FILE *f, char **pbuf, int *pbufsize, int quantum) +{ + char *buf = *pbuf; + int bufsize = *pbufsize; + int c, i, j, k, ns, s; + + s = pfa->initstat; + patlen = 0; + + /* + * All indices relative to buf. + * i <= j <= k <= bufsize + * + * i: origin of active substring + * j: current character + * k: destination of next getc() + */ + i = -1, k = 0; + do { + j = i++; + do { + if (++j == k) { + if (k == bufsize) + if (!adjbuf(&buf, &bufsize, bufsize+1, quantum, 0, "fnematch")) + FATAL("stream '%.30s...' too long", buf); + buf[k++] = (c = getc(f)) != EOF ? c : 0; + } + c = buf[j]; + /* assert(c < NCHARS); */ + + if ((ns = pfa->gototab[s][c]) != 0) + s = ns; + else + s = cgoto(pfa, s, c); + + if (pfa->out[s]) { /* final state */ + patlen = j - i + 1; + if (c == 0) /* don't count $ */ + patlen--; + } + } while (buf[j] && s != 1); + s = 2; + } while (buf[i] && !patlen); + + /* adjbuf() may have relocated a resized buffer. Inform the world. */ + *pbuf = buf; + *pbufsize = bufsize; + + if (patlen) { + patbeg = buf + i; + /* + * Under no circumstances is the last character fed to + * the automaton part of the match. It is EOF's nullbyte, + * or it sent the automaton into a state with no further + * transitions available (s==1), or both. Room for a + * terminating nullbyte is guaranteed. + * + * ungetc any chars after the end of matching text + * (except for EOF's nullbyte, if present) and null + * terminate the buffer. + */ + do + if (buf[--k] && ungetc(buf[k], f) == EOF) + FATAL("unable to ungetc '%c'", buf[k]); + while (k > i + patlen); + buf[k] = 0; + return 1; + } + else + return 0; +} + Node *reparse(const char *p) /* parses regular expression pointed to by p */ { /* uses relex() to scan regular expression */ Node *np; @@ -805,7 +893,7 @@ replace_repeat(const uschar *reptok, int reptoklen, const uschar *atom, int atomlen, int firstnum, int secondnum, int special_case) { int i, j; - uschar *buf = 0; + uschar *buf = NULL; int ret = 1; int init_q = (firstnum==0); /* first added char will be ? */ int n_q_reps = secondnum-firstnum; /* m>n, so reduce until {1,m-n} left */ diff --git a/usr.bin/awk/lib.c b/usr.bin/awk/lib.c index 03e55eaf701..fe828cb38cd 100644 --- a/usr.bin/awk/lib.c +++ b/usr.bin/awk/lib.c @@ -1,4 +1,4 @@ -/* $OpenBSD: lib.c,v 1.29 2020/06/10 21:02:33 millert Exp $ */ +/* $OpenBSD: lib.c,v 1.30 2020/06/10 21:02:53 millert Exp $ */ /**************************************************************** Copyright (C) Lucent Technologies 1997 All Rights Reserved @@ -206,41 +206,53 @@ void nextfile(void) int readrec(char **pbuf, int *pbufsize, FILE *inf) /* read one record into buf */ { - int sep, c; + int sep, c, isrec; char *rr, *buf = *pbuf; int bufsize = *pbufsize; char *rs = getsval(rsloc); - if ((sep = *rs) == 0) { - sep = '\n'; - while ((c=getc(inf)) == '\n' && c != EOF) /* skip leading \n's */ - ; - if (c != EOF) - ungetc(c, inf); - } - for (rr = buf; ; ) { - for (; (c=getc(inf)) != sep && c != EOF; ) { - if (rr-buf+1 > bufsize) - if (!adjbuf(&buf, &bufsize, 1+rr-buf, recsize, &rr, "readrec 1")) - FATAL("input record `%.30s...' too long", buf); + if (*rs && rs[1]) { + int found; + + fa *pfa = makedfa(rs, 1); + found = fnematch(pfa, inf, &buf, &bufsize, recsize); + if (found) + *patbeg = 0; + } else { + if ((sep = *rs) == 0) { + sep = '\n'; + while ((c=getc(inf)) == '\n' && c != EOF) /* skip leading \n's */ + ; + if (c != EOF) + ungetc(c, inf); + } + for (rr = buf; ; ) { + for (; (c=getc(inf)) != sep && c != EOF; ) { + if (rr-buf+1 > bufsize) + if (!adjbuf(&buf, &bufsize, 1+rr-buf, + recsize, &rr, "readrec 1")) + FATAL("input record `%.30s...' too long", buf); + *rr++ = c; + } + if (*rs == sep || c == EOF) + break; + if ((c = getc(inf)) == '\n' || c == EOF) /* 2 in a row */ + break; + if (!adjbuf(&buf, &bufsize, 2+rr-buf, recsize, &rr, + "readrec 2")) + FATAL("input record `%.30s...' too long", buf); + *rr++ = '\n'; *rr++ = c; } - if (*rs == sep || c == EOF) - break; - if ((c = getc(inf)) == '\n' || c == EOF) /* 2 in a row */ - break; - if (!adjbuf(&buf, &bufsize, 2+rr-buf, recsize, &rr, "readrec 2")) + if (!adjbuf(&buf, &bufsize, 1+rr-buf, recsize, &rr, "readrec 3")) FATAL("input record `%.30s...' too long", buf); - *rr++ = '\n'; - *rr++ = c; + *rr = 0; } - if (!adjbuf(&buf, &bufsize, 1+rr-buf, recsize, &rr, "readrec 3")) - FATAL("input record `%.30s...' too long", buf); - *rr = 0; - DPRINTF( ("readrec saw <%s>, returns %d\n", buf, c == EOF && rr == buf ? 0 : 1) ); *pbuf = buf; *pbufsize = bufsize; - return c == EOF && rr == buf ? 0 : 1; + isrec = *buf || !feof(inf); + DPRINTF( ("readrec saw <%s>, returns %d\n", buf, isrec) ); + return isrec; } char *getargv(int n) /* get ARGV[n] */ diff --git a/usr.bin/awk/main.c b/usr.bin/awk/main.c index bd5cd7ca627..a0e4e1415dc 100644 --- a/usr.bin/awk/main.c +++ b/usr.bin/awk/main.c @@ -1,4 +1,4 @@ -/* $OpenBSD: main.c,v 1.30 2020/06/10 21:02:33 millert Exp $ */ +/* $OpenBSD: main.c,v 1.31 2020/06/10 21:02:53 millert Exp $ */ /**************************************************************** Copyright (C) Lucent Technologies 1997 All Rights Reserved @@ -23,7 +23,7 @@ ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. ****************************************************************/ -const char *version = "version 20190910"; +const char *version = "version 20191006"; #define DEBUG #include diff --git a/usr.bin/awk/proto.h b/usr.bin/awk/proto.h index 3ec687028d8..0abcf78437b 100644 --- a/usr.bin/awk/proto.h +++ b/usr.bin/awk/proto.h @@ -1,4 +1,4 @@ -/* $OpenBSD: proto.h,v 1.13 2020/06/10 21:02:33 millert Exp $ */ +/* $OpenBSD: proto.h,v 1.14 2020/06/10 21:02:53 millert Exp $ */ /**************************************************************** Copyright (C) Lucent Technologies 1997 All Rights Reserved @@ -55,6 +55,7 @@ extern int member(int, const char *); extern int match(fa *, const char *); extern int pmatch(fa *, const char *); extern int nematch(fa *, const char *); +extern int fnematch(fa *, FILE *, char **, int *, int); extern Node *reparse(const char *); extern Node *regexp(void); extern Node *primary(void); -- cgit v1.2.3