summaryrefslogtreecommitdiff
path: root/usr.bin/awk
diff options
context:
space:
mode:
authorTodd C. Miller <millert@cvs.openbsd.org>2020-06-10 21:02:54 +0000
committerTodd C. Miller <millert@cvs.openbsd.org>2020-06-10 21:02:54 +0000
commitf5914bfab18167f4e862a667b6a68739e16ae0df (patch)
treefcb74288902330b86a61e12f645d04e0081b5508 /usr.bin/awk
parent8e2cc9d648485b85db60e59bccee4c99723830fe (diff)
Update awk to Oct 6, 2019 version.
Diffstat (limited to 'usr.bin/awk')
-rw-r--r--usr.bin/awk/FIXES6
-rw-r--r--usr.bin/awk/awk.17
-rw-r--r--usr.bin/awk/b.c92
-rw-r--r--usr.bin/awk/lib.c64
-rw-r--r--usr.bin/awk/main.c4
-rw-r--r--usr.bin/awk/proto.h3
6 files changed, 143 insertions, 33 deletions
diff --git a/usr.bin/awk/FIXES b/usr.bin/awk/FIXES
index 109eda9c71b..c2bc5785fce 100644
--- a/usr.bin/awk/FIXES
+++ b/usr.bin/awk/FIXES
@@ -1,4 +1,4 @@
-/* $OpenBSD: FIXES,v 1.25 2020/06/10 21:02:33 millert Exp $ */
+/* $OpenBSD: FIXES,v 1.26 2020/06/10 21:02:53 millert Exp $ */
/****************************************************************
Copyright (C) Lucent Technologies 1997
All Rights Reserved
@@ -26,6 +26,10 @@ THIS SOFTWARE.
This file lists all bug fixes, changes, etc., made since the AWK book
was sent to the printers in August, 1987.
+October 6, 2019:
+ Import code from NetBSD awk that implements RS as a regular
+ expression.
+
September 10, 2019:
Fixes for various array / memory overruns found via gcc's
-fsanitize=unknown. Thanks to Alexander Richardson (Github
diff --git a/usr.bin/awk/awk.1 b/usr.bin/awk/awk.1
index fb257d029ec..770f558d1b5 100644
--- a/usr.bin/awk/awk.1
+++ b/usr.bin/awk/awk.1
@@ -1,4 +1,4 @@
-.\" $OpenBSD: awk.1,v 1.48 2020/06/10 21:01:50 millert Exp $
+.\" $OpenBSD: awk.1,v 1.49 2020/06/10 21:02:53 millert Exp $
.\"
.\" Copyright (C) Lucent Technologies 1997
.\" All Rights Reserved
@@ -419,6 +419,11 @@ The length of the string matched by the
function.
.It Va RS
Input record separator (default newline).
+If empty, blank lines separate records.
+If more than one character long,
+.Va RS
+is treated as a regular expression, and records are
+separated by text matching the expression.
.It Va RSTART
The starting position of the string matched by the
.Fn match
diff --git a/usr.bin/awk/b.c b/usr.bin/awk/b.c
index 73ff3da84ca..3b6b824ef60 100644
--- a/usr.bin/awk/b.c
+++ b/usr.bin/awk/b.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: b.c,v 1.25 2020/06/10 21:02:33 millert Exp $ */
+/* $OpenBSD: b.c,v 1.26 2020/06/10 21:02:53 millert Exp $ */
/****************************************************************
Copyright (C) Lucent Technologies 1997
All Rights Reserved
@@ -621,6 +621,94 @@ int nematch(fa *f, const char *p0) /* non-empty match, for sub */
return (0);
}
+
+/*
+ * NAME
+ * fnematch
+ *
+ * DESCRIPTION
+ * A stream-fed version of nematch which transfers characters to a
+ * null-terminated buffer. All characters up to and including the last
+ * character of the matching text or EOF are placed in the buffer. If
+ * a match is found, patbeg and patlen are set appropriately.
+ *
+ * RETURN VALUES
+ * 0 No match found.
+ * 1 Match found.
+ */
+
+int fnematch(fa *pfa, FILE *f, char **pbuf, int *pbufsize, int quantum)
+{
+ char *buf = *pbuf;
+ int bufsize = *pbufsize;
+ int c, i, j, k, ns, s;
+
+ s = pfa->initstat;
+ patlen = 0;
+
+ /*
+ * All indices relative to buf.
+ * i <= j <= k <= bufsize
+ *
+ * i: origin of active substring
+ * j: current character
+ * k: destination of next getc()
+ */
+ i = -1, k = 0;
+ do {
+ j = i++;
+ do {
+ if (++j == k) {
+ if (k == bufsize)
+ if (!adjbuf(&buf, &bufsize, bufsize+1, quantum, 0, "fnematch"))
+ FATAL("stream '%.30s...' too long", buf);
+ buf[k++] = (c = getc(f)) != EOF ? c : 0;
+ }
+ c = buf[j];
+ /* assert(c < NCHARS); */
+
+ if ((ns = pfa->gototab[s][c]) != 0)
+ s = ns;
+ else
+ s = cgoto(pfa, s, c);
+
+ if (pfa->out[s]) { /* final state */
+ patlen = j - i + 1;
+ if (c == 0) /* don't count $ */
+ patlen--;
+ }
+ } while (buf[j] && s != 1);
+ s = 2;
+ } while (buf[i] && !patlen);
+
+ /* adjbuf() may have relocated a resized buffer. Inform the world. */
+ *pbuf = buf;
+ *pbufsize = bufsize;
+
+ if (patlen) {
+ patbeg = buf + i;
+ /*
+ * Under no circumstances is the last character fed to
+ * the automaton part of the match. It is EOF's nullbyte,
+ * or it sent the automaton into a state with no further
+ * transitions available (s==1), or both. Room for a
+ * terminating nullbyte is guaranteed.
+ *
+ * ungetc any chars after the end of matching text
+ * (except for EOF's nullbyte, if present) and null
+ * terminate the buffer.
+ */
+ do
+ if (buf[--k] && ungetc(buf[k], f) == EOF)
+ FATAL("unable to ungetc '%c'", buf[k]);
+ while (k > i + patlen);
+ buf[k] = 0;
+ return 1;
+ }
+ else
+ return 0;
+}
+
Node *reparse(const char *p) /* parses regular expression pointed to by p */
{ /* uses relex() to scan regular expression */
Node *np;
@@ -805,7 +893,7 @@ replace_repeat(const uschar *reptok, int reptoklen, const uschar *atom,
int atomlen, int firstnum, int secondnum, int special_case)
{
int i, j;
- uschar *buf = 0;
+ uschar *buf = NULL;
int ret = 1;
int init_q = (firstnum==0); /* first added char will be ? */
int n_q_reps = secondnum-firstnum; /* m>n, so reduce until {1,m-n} left */
diff --git a/usr.bin/awk/lib.c b/usr.bin/awk/lib.c
index 03e55eaf701..fe828cb38cd 100644
--- a/usr.bin/awk/lib.c
+++ b/usr.bin/awk/lib.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: lib.c,v 1.29 2020/06/10 21:02:33 millert Exp $ */
+/* $OpenBSD: lib.c,v 1.30 2020/06/10 21:02:53 millert Exp $ */
/****************************************************************
Copyright (C) Lucent Technologies 1997
All Rights Reserved
@@ -206,41 +206,53 @@ void nextfile(void)
int readrec(char **pbuf, int *pbufsize, FILE *inf) /* read one record into buf */
{
- int sep, c;
+ int sep, c, isrec;
char *rr, *buf = *pbuf;
int bufsize = *pbufsize;
char *rs = getsval(rsloc);
- if ((sep = *rs) == 0) {
- sep = '\n';
- while ((c=getc(inf)) == '\n' && c != EOF) /* skip leading \n's */
- ;
- if (c != EOF)
- ungetc(c, inf);
- }
- for (rr = buf; ; ) {
- for (; (c=getc(inf)) != sep && c != EOF; ) {
- if (rr-buf+1 > bufsize)
- if (!adjbuf(&buf, &bufsize, 1+rr-buf, recsize, &rr, "readrec 1"))
- FATAL("input record `%.30s...' too long", buf);
+ if (*rs && rs[1]) {
+ int found;
+
+ fa *pfa = makedfa(rs, 1);
+ found = fnematch(pfa, inf, &buf, &bufsize, recsize);
+ if (found)
+ *patbeg = 0;
+ } else {
+ if ((sep = *rs) == 0) {
+ sep = '\n';
+ while ((c=getc(inf)) == '\n' && c != EOF) /* skip leading \n's */
+ ;
+ if (c != EOF)
+ ungetc(c, inf);
+ }
+ for (rr = buf; ; ) {
+ for (; (c=getc(inf)) != sep && c != EOF; ) {
+ if (rr-buf+1 > bufsize)
+ if (!adjbuf(&buf, &bufsize, 1+rr-buf,
+ recsize, &rr, "readrec 1"))
+ FATAL("input record `%.30s...' too long", buf);
+ *rr++ = c;
+ }
+ if (*rs == sep || c == EOF)
+ break;
+ if ((c = getc(inf)) == '\n' || c == EOF) /* 2 in a row */
+ break;
+ if (!adjbuf(&buf, &bufsize, 2+rr-buf, recsize, &rr,
+ "readrec 2"))
+ FATAL("input record `%.30s...' too long", buf);
+ *rr++ = '\n';
*rr++ = c;
}
- if (*rs == sep || c == EOF)
- break;
- if ((c = getc(inf)) == '\n' || c == EOF) /* 2 in a row */
- break;
- if (!adjbuf(&buf, &bufsize, 2+rr-buf, recsize, &rr, "readrec 2"))
+ if (!adjbuf(&buf, &bufsize, 1+rr-buf, recsize, &rr, "readrec 3"))
FATAL("input record `%.30s...' too long", buf);
- *rr++ = '\n';
- *rr++ = c;
+ *rr = 0;
}
- if (!adjbuf(&buf, &bufsize, 1+rr-buf, recsize, &rr, "readrec 3"))
- FATAL("input record `%.30s...' too long", buf);
- *rr = 0;
- DPRINTF( ("readrec saw <%s>, returns %d\n", buf, c == EOF && rr == buf ? 0 : 1) );
*pbuf = buf;
*pbufsize = bufsize;
- return c == EOF && rr == buf ? 0 : 1;
+ isrec = *buf || !feof(inf);
+ DPRINTF( ("readrec saw <%s>, returns %d\n", buf, isrec) );
+ return isrec;
}
char *getargv(int n) /* get ARGV[n] */
diff --git a/usr.bin/awk/main.c b/usr.bin/awk/main.c
index bd5cd7ca627..a0e4e1415dc 100644
--- a/usr.bin/awk/main.c
+++ b/usr.bin/awk/main.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: main.c,v 1.30 2020/06/10 21:02:33 millert Exp $ */
+/* $OpenBSD: main.c,v 1.31 2020/06/10 21:02:53 millert Exp $ */
/****************************************************************
Copyright (C) Lucent Technologies 1997
All Rights Reserved
@@ -23,7 +23,7 @@ ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
THIS SOFTWARE.
****************************************************************/
-const char *version = "version 20190910";
+const char *version = "version 20191006";
#define DEBUG
#include <stdio.h>
diff --git a/usr.bin/awk/proto.h b/usr.bin/awk/proto.h
index 3ec687028d8..0abcf78437b 100644
--- a/usr.bin/awk/proto.h
+++ b/usr.bin/awk/proto.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: proto.h,v 1.13 2020/06/10 21:02:33 millert Exp $ */
+/* $OpenBSD: proto.h,v 1.14 2020/06/10 21:02:53 millert Exp $ */
/****************************************************************
Copyright (C) Lucent Technologies 1997
All Rights Reserved
@@ -55,6 +55,7 @@ extern int member(int, const char *);
extern int match(fa *, const char *);
extern int pmatch(fa *, const char *);
extern int nematch(fa *, const char *);
+extern int fnematch(fa *, FILE *, char **, int *, int);
extern Node *reparse(const char *);
extern Node *regexp(void);
extern Node *primary(void);