summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTodd C. Miller <millert@cvs.openbsd.org>2020-06-13 01:21:02 +0000
committerTodd C. Miller <millert@cvs.openbsd.org>2020-06-13 01:21:02 +0000
commit8175fe60828b7ce7020cd2daaf704528047e3ffa (patch)
tree3fa1f10e62dec70935d5ce3c26ef5002bf5b4583
parent317faf26d7c732fe1661e940b697b94c509d22f8 (diff)
Integrate the bsd-features branch from awk github.
Implements the gensub(), systime() and strftime() functions for greater gawk compatibility.
-rw-r--r--usr.bin/awk/awk.1106
-rw-r--r--usr.bin/awk/awk.h4
-rw-r--r--usr.bin/awk/awkgram.y20
-rw-r--r--usr.bin/awk/lex.c5
-rw-r--r--usr.bin/awk/maketab.c3
-rw-r--r--usr.bin/awk/parse.c25
-rw-r--r--usr.bin/awk/proto.h5
-rw-r--r--usr.bin/awk/run.c185
8 files changed, 339 insertions, 14 deletions
diff --git a/usr.bin/awk/awk.1 b/usr.bin/awk/awk.1
index 4d320b705f2..50e6690b521 100644
--- a/usr.bin/awk/awk.1
+++ b/usr.bin/awk/awk.1
@@ -1,4 +1,4 @@
-.\" $OpenBSD: awk.1,v 1.51 2020/06/13 01:19:55 millert Exp $
+.\" $OpenBSD: awk.1,v 1.52 2020/06/13 01:21:01 millert Exp $
.\"
.\" Copyright (C) Lucent Technologies 1997
.\" All Rights Reserved
@@ -495,6 +495,62 @@ is omitted, the time of day is used instead.
.El
.Ss String Functions
.Bl -tag -width "split(s, a, fs)"
+.It Fn gensub r s h [t]
+Search the target string
+.Ar t
+for matches of the regular expression
+.Ar r .
+If
+.Ar h
+is a string beginning with
+.Ic g
+or
+.Ic G ,
+then replace all matches of
+.Ar r
+with
+.Ar s .
+Otherwise,
+.Ar h
+is a number indicating which match of
+.Ar r
+to replace.
+If no
+.Ar t
+is supplied,
+.Va $0
+is used instead.
+.\"Within the replacement text
+.\".Ar s ,
+.\"the sequence
+.\".Ar \en ,
+.\"where
+.\".Ar n
+.\"is a digit from 1 to 9, may be used to indicate just the text that
+.\"matched the
+.\".Ar n Ap th
+.\"parenthesized subexpression.
+.\"The sequence
+.\".Ic \e0
+.\"represents the entire text, as does the character
+.\".Ic & .
+Unlike
+.Fn sub
+and
+.Fn gsub ,
+the modified string is returned as the result of the function,
+and the original target is
+.Em not
+changed.
+Note that
+.Ar \en
+sequences within the replacement string
+.Ar s ,
+as supported by GNU
+.Nm ,
+are
+.Em not
+supported at this time.
.It Fn gsub r t s
The same as
.Fn sub
@@ -605,6 +661,37 @@ Returns a copy of
with all lower-case characters translated to their
corresponding upper-case equivalents.
.El
+.Ss Time Functions
+This version of
+.Nm
+provides the following functions for obtaining and formatting time
+stamps.
+.Bl -tag -width indent
+.It Fn strftime "[format [, timestamp]]"
+Formats
+.Ar timestamp
+according to the string
+.Ar format .
+The format string may contain any of the conversion specifications described
+in the
+.Xr strftime 3
+manual page, as well as any arbitrary text.
+The
+.Ar timestamp
+must be in the same form as a value returned by
+.Fn systime .
+If
+.Ar timestamp
+is not specified, the current time is used.
+If
+.Ar format
+is not specified, a default format equivalent to the output of
+.Xr date 1
+is used.
+.It Fn systime
+Returns the value of time in seconds since 0 hours, 0 minutes,
+0 seconds, January 1, 1970, Coordinated Universal Time (UTC).
+.El
.Ss Input/Output and General Functions
.Bl -tag -width "getline [var] < file"
.It Fn close expr
@@ -780,10 +867,12 @@ Print an error message to standard error:
.Ed
.Sh SEE ALSO
.Xr cut 1 ,
+.Xr date 1 ,
.Xr grep 1 ,
.Xr lex 1 ,
.Xr printf 1 ,
.Xr sed 1 ,
+.Xr strftime 3 ,
.Xr re_format 7 ,
.Xr script 7
.Rs
@@ -813,9 +902,18 @@ The flags
.Op Fl \&dV
and
.Op Fl safe ,
-as well as the commands
-.Cm fflush , compl , and , or ,
-.Cm xor , lshift , rshift ,
+as well as the functions
+.Fn fflush ,
+.Fn gensub ,
+.Fn compl ,
+.Fn and ,
+.Fn or ,
+.Fn xor ,
+.Fn lshift ,
+.Fn rshift ,
+.Fn strftime
+and
+.Fn systime
are extensions to that specification.
.Sh HISTORY
An
diff --git a/usr.bin/awk/awk.h b/usr.bin/awk/awk.h
index 080bf94cc22..d9981a4d228 100644
--- a/usr.bin/awk/awk.h
+++ b/usr.bin/awk/awk.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: awk.h,v 1.24 2020/06/13 01:19:55 millert Exp $ */
+/* $OpenBSD: awk.h,v 1.25 2020/06/13 01:21:01 millert Exp $ */
/****************************************************************
Copyright (C) Lucent Technologies 1997
All Rights Reserved
@@ -159,6 +159,8 @@ extern Cell *symtabloc; /* SYMTAB */
#define FCOMPL 18
#define FLSHIFT 19
#define FRSHIFT 20
+#define FSYSTIME 21
+#define FSTRFTIME 22
/* Node: parse tree is made of nodes, with Cell's at bottom */
diff --git a/usr.bin/awk/awkgram.y b/usr.bin/awk/awkgram.y
index 85034770f07..4d76c9d5e22 100644
--- a/usr.bin/awk/awkgram.y
+++ b/usr.bin/awk/awkgram.y
@@ -1,4 +1,4 @@
-/* $OpenBSD: awkgram.y,v 1.13 2020/06/10 21:05:02 millert Exp $ */
+/* $OpenBSD: awkgram.y,v 1.14 2020/06/13 01:21:01 millert Exp $ */
/****************************************************************
Copyright (C) Lucent Technologies 1997
All Rights Reserved
@@ -54,7 +54,7 @@ Node *arglist = 0; /* list of args for current function */
%token <i> FINAL DOT ALL CCL NCCL CHAR OR STAR QUEST PLUS EMPTYRE ZERO
%token <i> AND BOR APPEND EQ GE GT LE LT NE IN
%token <i> ARG BLTIN BREAK CLOSE CONTINUE DELETE DO EXIT FOR FUNC
-%token <i> SUB GSUB IF INDEX LSUBSTR MATCHFCN NEXT NEXTFILE
+%token <i> GENSUB SUB GSUB IF INDEX LSUBSTR MATCHFCN NEXT NEXTFILE
%token <i> ADD MINUS MULT DIVIDE MOD
%token <i> ASSIGN ASGNOP ADDEQ SUBEQ MULTEQ DIVEQ MODEQ POWEQ
%token <i> PRINT PRINTF SPRINTF
@@ -376,6 +376,22 @@ term:
| INCR var { $$ = op1(PREINCR, $2); }
| var DECR { $$ = op1(POSTDECR, $1); }
| var INCR { $$ = op1(POSTINCR, $1); }
+ | GENSUB '(' reg_expr comma pattern comma pattern ')'
+ { $$ = op5(GENSUB, NIL, (Node*)makedfa($3, 1), $5, $7, rectonode()); }
+ | GENSUB '(' pattern comma pattern comma pattern ')'
+ { if (constnode($3))
+ $$ = op5(GENSUB, NIL, (Node *)makedfa(strnode($3), 1), $5, $7, rectonode());
+ else
+ $$ = op5(GENSUB, (Node *)1, $3, $5, $7, rectonode());
+ }
+ | GENSUB '(' reg_expr comma pattern comma pattern comma pattern ')'
+ { $$ = op5(GENSUB, NIL, (Node*)makedfa($3, 1), $5, $7, $9); }
+ | GENSUB '(' pattern comma pattern comma pattern comma pattern ')'
+ { if (constnode($3))
+ $$ = op5(GENSUB, NIL, (Node *)makedfa(strnode($3),1), $5,$7,$9);
+ else
+ $$ = op5(GENSUB, (Node *)1, $3, $5, $7, $9);
+ }
| GETLINE var LT term { $$ = op3(GETLINE, $2, itonp($3), $4); }
| GETLINE LT term { $$ = op3(GETLINE, NIL, itonp($2), $3); }
| GETLINE var { $$ = op3(GETLINE, $2, NIL, NIL); }
diff --git a/usr.bin/awk/lex.c b/usr.bin/awk/lex.c
index 18a5ecac785..cfe7faac44c 100644
--- a/usr.bin/awk/lex.c
+++ b/usr.bin/awk/lex.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: lex.c,v 1.20 2020/06/13 01:19:55 millert Exp $ */
+/* $OpenBSD: lex.c,v 1.21 2020/06/13 01:21:01 millert Exp $ */
/****************************************************************
Copyright (C) Lucent Technologies 1997
All Rights Reserved
@@ -64,6 +64,7 @@ const Keyword keywords[] = { /* keep sorted: binary searched */
{ "for", FOR, FOR },
{ "func", FUNC, FUNC },
{ "function", FUNC, FUNC },
+ { "gensub", GENSUB, GENSUB },
{ "getline", GETLINE, GETLINE },
{ "gsub", GSUB, GSUB },
{ "if", IF, IF },
@@ -87,9 +88,11 @@ const Keyword keywords[] = { /* keep sorted: binary searched */
{ "sprintf", SPRINTF, SPRINTF },
{ "sqrt", FSQRT, BLTIN },
{ "srand", FSRAND, BLTIN },
+ { "strftime", FSTRFTIME, BLTIN },
{ "sub", SUB, SUB },
{ "substr", SUBSTR, SUBSTR },
{ "system", FSYSTEM, BLTIN },
+ { "systime", FSYSTIME, BLTIN },
{ "tolower", FTOLOWER, BLTIN },
{ "toupper", FTOUPPER, BLTIN },
{ "while", WHILE, WHILE },
diff --git a/usr.bin/awk/maketab.c b/usr.bin/awk/maketab.c
index d39cc676406..c3d4bc202f6 100644
--- a/usr.bin/awk/maketab.c
+++ b/usr.bin/awk/maketab.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: maketab.c,v 1.18 2020/06/10 21:05:50 millert Exp $ */
+/* $OpenBSD: maketab.c,v 1.19 2020/06/13 01:21:01 millert Exp $ */
/****************************************************************
Copyright (C) Lucent Technologies 1997
All Rights Reserved
@@ -105,6 +105,7 @@ struct xx
{ ARG, "arg", "arg" },
{ VARNF, "getnf", "NF" },
{ GETLINE, "awkgetline", "getline" },
+ { GENSUB, "gensub", "gensub" },
{ 0, "", "" },
};
diff --git a/usr.bin/awk/parse.c b/usr.bin/awk/parse.c
index 0394da3a7e8..01b5b7e1304 100644
--- a/usr.bin/awk/parse.c
+++ b/usr.bin/awk/parse.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: parse.c,v 1.9 2020/06/10 21:03:36 millert Exp $ */
+/* $OpenBSD: parse.c,v 1.10 2020/06/13 01:21:01 millert Exp $ */
/****************************************************************
Copyright (C) Lucent Technologies 1997
All Rights Reserved
@@ -94,6 +94,20 @@ Node *node4(int a, Node *b, Node *c, Node *d, Node *e)
return(x);
}
+Node *node5(int a, Node *b, Node *c, Node *d, Node *e, Node *f)
+{
+ Node *x;
+
+ x = nodealloc(5);
+ x->nobj = a;
+ x->narg[0] = b;
+ x->narg[1] = c;
+ x->narg[2] = d;
+ x->narg[3] = e;
+ x->narg[4] = f;
+ return(x);
+}
+
Node *stat1(int a, Node *b)
{
Node *x;
@@ -166,6 +180,15 @@ Node *op4(int a, Node *b, Node *c, Node *d, Node *e)
return(x);
}
+Node *op5(int a, Node *b, Node *c, Node *d, Node *e, Node *f)
+{
+ Node *x;
+
+ x = node5(a,b,c,d,e,f);
+ x->ntype = NEXPR;
+ return(x);
+}
+
Node *celltonode(Cell *a, int b)
{
Node *x;
diff --git a/usr.bin/awk/proto.h b/usr.bin/awk/proto.h
index 3a876c56ce7..de70e5896d8 100644
--- a/usr.bin/awk/proto.h
+++ b/usr.bin/awk/proto.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: proto.h,v 1.18 2020/06/10 21:06:09 millert Exp $ */
+/* $OpenBSD: proto.h,v 1.19 2020/06/13 01:21:01 millert Exp $ */
/****************************************************************
Copyright (C) Lucent Technologies 1997
All Rights Reserved
@@ -75,12 +75,14 @@ extern Node *node1(int, Node *);
extern Node *node2(int, Node *, Node *);
extern Node *node3(int, Node *, Node *, Node *);
extern Node *node4(int, Node *, Node *, Node *, Node *);
+extern Node *node5(int, Node *, Node *, Node *, Node *, Node *);
extern Node *stat3(int, Node *, Node *, Node *);
extern Node *op2(int, Node *, Node *);
extern Node *op1(int, Node *);
extern Node *stat1(int, Node *);
extern Node *op3(int, Node *, Node *, Node *);
extern Node *op4(int, Node *, Node *, Node *, Node *);
+extern Node *op5(int, Node *, Node *, Node *, Node *, Node *);
extern Node *stat2(int, Node *, Node *);
extern Node *stat4(int, Node *, Node *, Node *, Node *);
extern Node *celltonode(Cell *, int);
@@ -198,6 +200,7 @@ extern Cell *closefile(Node **, int);
extern void closeall(void);
extern Cell *sub(Node **, int);
extern Cell *gsub(Node **, int);
+extern Cell *gensub(Node **, int);
extern FILE *popen(const char *, const char *);
extern int pclose(FILE *);
diff --git a/usr.bin/awk/run.c b/usr.bin/awk/run.c
index b150d90a55a..10674135ffc 100644
--- a/usr.bin/awk/run.c
+++ b/usr.bin/awk/run.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: run.c,v 1.58 2020/06/13 01:19:55 millert Exp $ */
+/* $OpenBSD: run.c,v 1.59 2020/06/13 01:21:01 millert Exp $ */
/****************************************************************
Copyright (C) Lucent Technologies 1997
All Rights Reserved
@@ -1581,12 +1581,14 @@ Cell *bltin(Node **a, int n) /* builtin functions. a[0] is type, a[1] is arg lis
{
Cell *x, *y;
Awkfloat u;
- int t;
+ int t, sz;
Awkfloat tmp;
- char *buf;
+ char *buf, *fmt;
Node *nextarg;
FILE *fp;
int status = 0;
+ time_t tv;
+ struct tm *tm;
t = ptoi(a[0]);
x = execute(a[1]);
@@ -1738,6 +1740,42 @@ Cell *bltin(Node **a, int n) /* builtin functions. a[0] is type, a[1] is arg lis
else
u = fflush(fp);
break;
+ case FSYSTIME:
+ u = time((time_t *) 0);
+ break;
+ case FSTRFTIME:
+ /* strftime([format [,timestamp]]) */
+ if (nextarg) {
+ y = execute(nextarg);
+ nextarg = nextarg->nnext;
+ tv = (time_t) getfval(y);
+ tempfree(y);
+ } else
+ tv = time((time_t *) 0);
+ tm = localtime(&tv);
+ if (tm == NULL)
+ FATAL("bad time %ld", (long)tv);
+
+ if (isrec(x)) {
+ /* format argument not provided, use default */
+ fmt = tostring("%a %b %d %H:%M:%S %Z %Y");
+ } else
+ fmt = tostring(getsval(x));
+
+ sz = 32;
+ buf = NULL;
+ do {
+ if ((buf = reallocarray(buf, 2, sz)) == NULL)
+ FATAL("out of memory in strftime");
+ sz *= 2;
+ } while (strftime(buf, sz, fmt, tm) == 0 && fmt[0] != '\0');
+
+ y = gettemp();
+ setsval(y, buf);
+ free(fmt);
+ free(buf);
+
+ return y;
default: /* can't happen */
FATAL("illegal function type %d", t);
break;
@@ -2118,6 +2156,147 @@ Cell *gsub(Node **a, int nnn) /* global substitute */
return(x);
}
+Cell *gensub(Node **a, int nnn) /* global selective substitute */
+ /* XXX incomplete - doesn't support backreferences \0 ... \9 */
+{
+ Cell *x, *y, *res, *h;
+ char *rptr;
+ const char *sptr;
+ char *buf, *pb;
+ const char *t, *q;
+ fa *pfa;
+ int mflag, tempstat, num, whichm;
+ int bufsz = recsize;
+
+ if ((buf = malloc(bufsz)) == NULL)
+ FATAL("out of memory in gensub");
+ mflag = 0; /* if mflag == 0, can replace empty string */
+ num = 0;
+ x = execute(a[4]); /* source string */
+ t = getsval(x);
+ res = copycell(x); /* target string - initially copy of source */
+ res->csub = CTEMP; /* result values are temporary */
+ if (a[0] == 0) /* 0 => a[1] is already-compiled regexpr */
+ pfa = (fa *) a[1]; /* regular expression */
+ else {
+ y = execute(a[1]);
+ pfa = makedfa(getsval(y), 1);
+ tempfree(y);
+ }
+ y = execute(a[2]); /* replacement string */
+ h = execute(a[3]); /* which matches should be replaced */
+ sptr = getsval(h);
+ if (sptr[0] == 'g' || sptr[0] == 'G')
+ whichm = -1;
+ else {
+ /*
+ * The specified number is index of replacement, starting
+ * from 1. GNU awk treats index lower than 0 same as
+ * 1, we do same for compatibility.
+ */
+ whichm = (int) getfval(h) - 1;
+ if (whichm < 0)
+ whichm = 0;
+ }
+ tempfree(h);
+
+ if (pmatch(pfa, t)) {
+ char *sl;
+
+ tempstat = pfa->initstat;
+ pfa->initstat = 2;
+ pb = buf;
+ rptr = getsval(y);
+ /*
+ * XXX if there are any backreferences in subst string,
+ * complain now.
+ */
+ for (sl = rptr; (sl = strchr(sl, '\\')) && sl[1]; sl++) {
+ if (strchr("0123456789", sl[1])) {
+ FATAL("gensub doesn't support backreferences (subst \"%s\")", rptr);
+ }
+ }
+
+ do {
+ if (whichm >= 0 && whichm != num) {
+ num++;
+ adjbuf(&buf, &bufsz, (pb - buf) + (patbeg - t) + patlen, recsize, &pb, "gensub");
+
+ /* copy the part of string up to and including
+ * match to output buffer */
+ while (t < patbeg + patlen)
+ *pb++ = *t++;
+ continue;
+ }
+
+ if (patlen == 0 && *patbeg != 0) { /* matched empty string */
+ if (mflag == 0) { /* can replace empty */
+ num++;
+ sptr = rptr;
+ while (*sptr != 0) {
+ adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "gensub");
+ if (*sptr == '\\') {
+ backsub(&pb, &sptr);
+ } else if (*sptr == '&') {
+ sptr++;
+ adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "gensub");
+ for (q = patbeg; q < patbeg+patlen; )
+ *pb++ = *q++;
+ } else
+ *pb++ = *sptr++;
+ }
+ }
+ if (*t == 0) /* at end */
+ goto done;
+ adjbuf(&buf, &bufsz, 2+pb-buf, recsize, &pb, "gensub");
+ *pb++ = *t++;
+ if (pb > buf + bufsz) /* BUG: not sure of this test */
+ FATAL("gensub result0 %.30s too big; can't happen", buf);
+ mflag = 0;
+ }
+ else { /* matched nonempty string */
+ num++;
+ sptr = t;
+ adjbuf(&buf, &bufsz, 1+(patbeg-sptr)+pb-buf, recsize, &pb, "gensub");
+ while (sptr < patbeg)
+ *pb++ = *sptr++;
+ sptr = rptr;
+ while (*sptr != 0) {
+ adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "gensub");
+ if (*sptr == '\\') {
+ backsub(&pb, &sptr);
+ } else if (*sptr == '&') {
+ sptr++;
+ adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "gensub");
+ for (q = patbeg; q < patbeg+patlen; )
+ *pb++ = *q++;
+ } else
+ *pb++ = *sptr++;
+ }
+ t = patbeg + patlen;
+ if (patlen == 0 || *t == 0 || *(t-1) == 0)
+ goto done;
+ if (pb > buf + bufsz)
+ FATAL("gensub result1 %.30s too big; can't happen", buf);
+ mflag = 1;
+ }
+ } while (pmatch(pfa,t));
+ sptr = t;
+ adjbuf(&buf, &bufsz, 1+strlen(sptr)+pb-buf, 0, &pb, "gensub");
+ while ((*pb++ = *sptr++) != 0)
+ ;
+ done: if (pb > buf + bufsz)
+ FATAL("gensub result2 %.30s too big; can't happen", buf);
+ *pb = '\0';
+ setsval(res, buf);
+ pfa->initstat = tempstat;
+ }
+ tempfree(x);
+ tempfree(y);
+ free(buf);
+ return(res);
+}
+
void backsub(char **pb_ptr, const char **sptr_ptr) /* handle \\& variations */
{ /* sptr[0] == '\\' */
char *pb = *pb_ptr;