diff options
author | Todd C. Miller <millert@cvs.openbsd.org> | 2020-06-13 01:21:02 +0000 |
---|---|---|
committer | Todd C. Miller <millert@cvs.openbsd.org> | 2020-06-13 01:21:02 +0000 |
commit | 8175fe60828b7ce7020cd2daaf704528047e3ffa (patch) | |
tree | 3fa1f10e62dec70935d5ce3c26ef5002bf5b4583 | |
parent | 317faf26d7c732fe1661e940b697b94c509d22f8 (diff) |
Integrate the bsd-features branch from awk github.
Implements the gensub(), systime() and strftime() functions for
greater gawk compatibility.
-rw-r--r-- | usr.bin/awk/awk.1 | 106 | ||||
-rw-r--r-- | usr.bin/awk/awk.h | 4 | ||||
-rw-r--r-- | usr.bin/awk/awkgram.y | 20 | ||||
-rw-r--r-- | usr.bin/awk/lex.c | 5 | ||||
-rw-r--r-- | usr.bin/awk/maketab.c | 3 | ||||
-rw-r--r-- | usr.bin/awk/parse.c | 25 | ||||
-rw-r--r-- | usr.bin/awk/proto.h | 5 | ||||
-rw-r--r-- | usr.bin/awk/run.c | 185 |
8 files changed, 339 insertions, 14 deletions
diff --git a/usr.bin/awk/awk.1 b/usr.bin/awk/awk.1 index 4d320b705f2..50e6690b521 100644 --- a/usr.bin/awk/awk.1 +++ b/usr.bin/awk/awk.1 @@ -1,4 +1,4 @@ -.\" $OpenBSD: awk.1,v 1.51 2020/06/13 01:19:55 millert Exp $ +.\" $OpenBSD: awk.1,v 1.52 2020/06/13 01:21:01 millert Exp $ .\" .\" Copyright (C) Lucent Technologies 1997 .\" All Rights Reserved @@ -495,6 +495,62 @@ is omitted, the time of day is used instead. .El .Ss String Functions .Bl -tag -width "split(s, a, fs)" +.It Fn gensub r s h [t] +Search the target string +.Ar t +for matches of the regular expression +.Ar r . +If +.Ar h +is a string beginning with +.Ic g +or +.Ic G , +then replace all matches of +.Ar r +with +.Ar s . +Otherwise, +.Ar h +is a number indicating which match of +.Ar r +to replace. +If no +.Ar t +is supplied, +.Va $0 +is used instead. +.\"Within the replacement text +.\".Ar s , +.\"the sequence +.\".Ar \en , +.\"where +.\".Ar n +.\"is a digit from 1 to 9, may be used to indicate just the text that +.\"matched the +.\".Ar n Ap th +.\"parenthesized subexpression. +.\"The sequence +.\".Ic \e0 +.\"represents the entire text, as does the character +.\".Ic & . +Unlike +.Fn sub +and +.Fn gsub , +the modified string is returned as the result of the function, +and the original target is +.Em not +changed. +Note that +.Ar \en +sequences within the replacement string +.Ar s , +as supported by GNU +.Nm , +are +.Em not +supported at this time. .It Fn gsub r t s The same as .Fn sub @@ -605,6 +661,37 @@ Returns a copy of with all lower-case characters translated to their corresponding upper-case equivalents. .El +.Ss Time Functions +This version of +.Nm +provides the following functions for obtaining and formatting time +stamps. +.Bl -tag -width indent +.It Fn strftime "[format [, timestamp]]" +Formats +.Ar timestamp +according to the string +.Ar format . +The format string may contain any of the conversion specifications described +in the +.Xr strftime 3 +manual page, as well as any arbitrary text. +The +.Ar timestamp +must be in the same form as a value returned by +.Fn systime . +If +.Ar timestamp +is not specified, the current time is used. +If +.Ar format +is not specified, a default format equivalent to the output of +.Xr date 1 +is used. +.It Fn systime +Returns the value of time in seconds since 0 hours, 0 minutes, +0 seconds, January 1, 1970, Coordinated Universal Time (UTC). +.El .Ss Input/Output and General Functions .Bl -tag -width "getline [var] < file" .It Fn close expr @@ -780,10 +867,12 @@ Print an error message to standard error: .Ed .Sh SEE ALSO .Xr cut 1 , +.Xr date 1 , .Xr grep 1 , .Xr lex 1 , .Xr printf 1 , .Xr sed 1 , +.Xr strftime 3 , .Xr re_format 7 , .Xr script 7 .Rs @@ -813,9 +902,18 @@ The flags .Op Fl \&dV and .Op Fl safe , -as well as the commands -.Cm fflush , compl , and , or , -.Cm xor , lshift , rshift , +as well as the functions +.Fn fflush , +.Fn gensub , +.Fn compl , +.Fn and , +.Fn or , +.Fn xor , +.Fn lshift , +.Fn rshift , +.Fn strftime +and +.Fn systime are extensions to that specification. .Sh HISTORY An diff --git a/usr.bin/awk/awk.h b/usr.bin/awk/awk.h index 080bf94cc22..d9981a4d228 100644 --- a/usr.bin/awk/awk.h +++ b/usr.bin/awk/awk.h @@ -1,4 +1,4 @@ -/* $OpenBSD: awk.h,v 1.24 2020/06/13 01:19:55 millert Exp $ */ +/* $OpenBSD: awk.h,v 1.25 2020/06/13 01:21:01 millert Exp $ */ /**************************************************************** Copyright (C) Lucent Technologies 1997 All Rights Reserved @@ -159,6 +159,8 @@ extern Cell *symtabloc; /* SYMTAB */ #define FCOMPL 18 #define FLSHIFT 19 #define FRSHIFT 20 +#define FSYSTIME 21 +#define FSTRFTIME 22 /* Node: parse tree is made of nodes, with Cell's at bottom */ diff --git a/usr.bin/awk/awkgram.y b/usr.bin/awk/awkgram.y index 85034770f07..4d76c9d5e22 100644 --- a/usr.bin/awk/awkgram.y +++ b/usr.bin/awk/awkgram.y @@ -1,4 +1,4 @@ -/* $OpenBSD: awkgram.y,v 1.13 2020/06/10 21:05:02 millert Exp $ */ +/* $OpenBSD: awkgram.y,v 1.14 2020/06/13 01:21:01 millert Exp $ */ /**************************************************************** Copyright (C) Lucent Technologies 1997 All Rights Reserved @@ -54,7 +54,7 @@ Node *arglist = 0; /* list of args for current function */ %token <i> FINAL DOT ALL CCL NCCL CHAR OR STAR QUEST PLUS EMPTYRE ZERO %token <i> AND BOR APPEND EQ GE GT LE LT NE IN %token <i> ARG BLTIN BREAK CLOSE CONTINUE DELETE DO EXIT FOR FUNC -%token <i> SUB GSUB IF INDEX LSUBSTR MATCHFCN NEXT NEXTFILE +%token <i> GENSUB SUB GSUB IF INDEX LSUBSTR MATCHFCN NEXT NEXTFILE %token <i> ADD MINUS MULT DIVIDE MOD %token <i> ASSIGN ASGNOP ADDEQ SUBEQ MULTEQ DIVEQ MODEQ POWEQ %token <i> PRINT PRINTF SPRINTF @@ -376,6 +376,22 @@ term: | INCR var { $$ = op1(PREINCR, $2); } | var DECR { $$ = op1(POSTDECR, $1); } | var INCR { $$ = op1(POSTINCR, $1); } + | GENSUB '(' reg_expr comma pattern comma pattern ')' + { $$ = op5(GENSUB, NIL, (Node*)makedfa($3, 1), $5, $7, rectonode()); } + | GENSUB '(' pattern comma pattern comma pattern ')' + { if (constnode($3)) + $$ = op5(GENSUB, NIL, (Node *)makedfa(strnode($3), 1), $5, $7, rectonode()); + else + $$ = op5(GENSUB, (Node *)1, $3, $5, $7, rectonode()); + } + | GENSUB '(' reg_expr comma pattern comma pattern comma pattern ')' + { $$ = op5(GENSUB, NIL, (Node*)makedfa($3, 1), $5, $7, $9); } + | GENSUB '(' pattern comma pattern comma pattern comma pattern ')' + { if (constnode($3)) + $$ = op5(GENSUB, NIL, (Node *)makedfa(strnode($3),1), $5,$7,$9); + else + $$ = op5(GENSUB, (Node *)1, $3, $5, $7, $9); + } | GETLINE var LT term { $$ = op3(GETLINE, $2, itonp($3), $4); } | GETLINE LT term { $$ = op3(GETLINE, NIL, itonp($2), $3); } | GETLINE var { $$ = op3(GETLINE, $2, NIL, NIL); } diff --git a/usr.bin/awk/lex.c b/usr.bin/awk/lex.c index 18a5ecac785..cfe7faac44c 100644 --- a/usr.bin/awk/lex.c +++ b/usr.bin/awk/lex.c @@ -1,4 +1,4 @@ -/* $OpenBSD: lex.c,v 1.20 2020/06/13 01:19:55 millert Exp $ */ +/* $OpenBSD: lex.c,v 1.21 2020/06/13 01:21:01 millert Exp $ */ /**************************************************************** Copyright (C) Lucent Technologies 1997 All Rights Reserved @@ -64,6 +64,7 @@ const Keyword keywords[] = { /* keep sorted: binary searched */ { "for", FOR, FOR }, { "func", FUNC, FUNC }, { "function", FUNC, FUNC }, + { "gensub", GENSUB, GENSUB }, { "getline", GETLINE, GETLINE }, { "gsub", GSUB, GSUB }, { "if", IF, IF }, @@ -87,9 +88,11 @@ const Keyword keywords[] = { /* keep sorted: binary searched */ { "sprintf", SPRINTF, SPRINTF }, { "sqrt", FSQRT, BLTIN }, { "srand", FSRAND, BLTIN }, + { "strftime", FSTRFTIME, BLTIN }, { "sub", SUB, SUB }, { "substr", SUBSTR, SUBSTR }, { "system", FSYSTEM, BLTIN }, + { "systime", FSYSTIME, BLTIN }, { "tolower", FTOLOWER, BLTIN }, { "toupper", FTOUPPER, BLTIN }, { "while", WHILE, WHILE }, diff --git a/usr.bin/awk/maketab.c b/usr.bin/awk/maketab.c index d39cc676406..c3d4bc202f6 100644 --- a/usr.bin/awk/maketab.c +++ b/usr.bin/awk/maketab.c @@ -1,4 +1,4 @@ -/* $OpenBSD: maketab.c,v 1.18 2020/06/10 21:05:50 millert Exp $ */ +/* $OpenBSD: maketab.c,v 1.19 2020/06/13 01:21:01 millert Exp $ */ /**************************************************************** Copyright (C) Lucent Technologies 1997 All Rights Reserved @@ -105,6 +105,7 @@ struct xx { ARG, "arg", "arg" }, { VARNF, "getnf", "NF" }, { GETLINE, "awkgetline", "getline" }, + { GENSUB, "gensub", "gensub" }, { 0, "", "" }, }; diff --git a/usr.bin/awk/parse.c b/usr.bin/awk/parse.c index 0394da3a7e8..01b5b7e1304 100644 --- a/usr.bin/awk/parse.c +++ b/usr.bin/awk/parse.c @@ -1,4 +1,4 @@ -/* $OpenBSD: parse.c,v 1.9 2020/06/10 21:03:36 millert Exp $ */ +/* $OpenBSD: parse.c,v 1.10 2020/06/13 01:21:01 millert Exp $ */ /**************************************************************** Copyright (C) Lucent Technologies 1997 All Rights Reserved @@ -94,6 +94,20 @@ Node *node4(int a, Node *b, Node *c, Node *d, Node *e) return(x); } +Node *node5(int a, Node *b, Node *c, Node *d, Node *e, Node *f) +{ + Node *x; + + x = nodealloc(5); + x->nobj = a; + x->narg[0] = b; + x->narg[1] = c; + x->narg[2] = d; + x->narg[3] = e; + x->narg[4] = f; + return(x); +} + Node *stat1(int a, Node *b) { Node *x; @@ -166,6 +180,15 @@ Node *op4(int a, Node *b, Node *c, Node *d, Node *e) return(x); } +Node *op5(int a, Node *b, Node *c, Node *d, Node *e, Node *f) +{ + Node *x; + + x = node5(a,b,c,d,e,f); + x->ntype = NEXPR; + return(x); +} + Node *celltonode(Cell *a, int b) { Node *x; diff --git a/usr.bin/awk/proto.h b/usr.bin/awk/proto.h index 3a876c56ce7..de70e5896d8 100644 --- a/usr.bin/awk/proto.h +++ b/usr.bin/awk/proto.h @@ -1,4 +1,4 @@ -/* $OpenBSD: proto.h,v 1.18 2020/06/10 21:06:09 millert Exp $ */ +/* $OpenBSD: proto.h,v 1.19 2020/06/13 01:21:01 millert Exp $ */ /**************************************************************** Copyright (C) Lucent Technologies 1997 All Rights Reserved @@ -75,12 +75,14 @@ extern Node *node1(int, Node *); extern Node *node2(int, Node *, Node *); extern Node *node3(int, Node *, Node *, Node *); extern Node *node4(int, Node *, Node *, Node *, Node *); +extern Node *node5(int, Node *, Node *, Node *, Node *, Node *); extern Node *stat3(int, Node *, Node *, Node *); extern Node *op2(int, Node *, Node *); extern Node *op1(int, Node *); extern Node *stat1(int, Node *); extern Node *op3(int, Node *, Node *, Node *); extern Node *op4(int, Node *, Node *, Node *, Node *); +extern Node *op5(int, Node *, Node *, Node *, Node *, Node *); extern Node *stat2(int, Node *, Node *); extern Node *stat4(int, Node *, Node *, Node *, Node *); extern Node *celltonode(Cell *, int); @@ -198,6 +200,7 @@ extern Cell *closefile(Node **, int); extern void closeall(void); extern Cell *sub(Node **, int); extern Cell *gsub(Node **, int); +extern Cell *gensub(Node **, int); extern FILE *popen(const char *, const char *); extern int pclose(FILE *); diff --git a/usr.bin/awk/run.c b/usr.bin/awk/run.c index b150d90a55a..10674135ffc 100644 --- a/usr.bin/awk/run.c +++ b/usr.bin/awk/run.c @@ -1,4 +1,4 @@ -/* $OpenBSD: run.c,v 1.58 2020/06/13 01:19:55 millert Exp $ */ +/* $OpenBSD: run.c,v 1.59 2020/06/13 01:21:01 millert Exp $ */ /**************************************************************** Copyright (C) Lucent Technologies 1997 All Rights Reserved @@ -1581,12 +1581,14 @@ Cell *bltin(Node **a, int n) /* builtin functions. a[0] is type, a[1] is arg lis { Cell *x, *y; Awkfloat u; - int t; + int t, sz; Awkfloat tmp; - char *buf; + char *buf, *fmt; Node *nextarg; FILE *fp; int status = 0; + time_t tv; + struct tm *tm; t = ptoi(a[0]); x = execute(a[1]); @@ -1738,6 +1740,42 @@ Cell *bltin(Node **a, int n) /* builtin functions. a[0] is type, a[1] is arg lis else u = fflush(fp); break; + case FSYSTIME: + u = time((time_t *) 0); + break; + case FSTRFTIME: + /* strftime([format [,timestamp]]) */ + if (nextarg) { + y = execute(nextarg); + nextarg = nextarg->nnext; + tv = (time_t) getfval(y); + tempfree(y); + } else + tv = time((time_t *) 0); + tm = localtime(&tv); + if (tm == NULL) + FATAL("bad time %ld", (long)tv); + + if (isrec(x)) { + /* format argument not provided, use default */ + fmt = tostring("%a %b %d %H:%M:%S %Z %Y"); + } else + fmt = tostring(getsval(x)); + + sz = 32; + buf = NULL; + do { + if ((buf = reallocarray(buf, 2, sz)) == NULL) + FATAL("out of memory in strftime"); + sz *= 2; + } while (strftime(buf, sz, fmt, tm) == 0 && fmt[0] != '\0'); + + y = gettemp(); + setsval(y, buf); + free(fmt); + free(buf); + + return y; default: /* can't happen */ FATAL("illegal function type %d", t); break; @@ -2118,6 +2156,147 @@ Cell *gsub(Node **a, int nnn) /* global substitute */ return(x); } +Cell *gensub(Node **a, int nnn) /* global selective substitute */ + /* XXX incomplete - doesn't support backreferences \0 ... \9 */ +{ + Cell *x, *y, *res, *h; + char *rptr; + const char *sptr; + char *buf, *pb; + const char *t, *q; + fa *pfa; + int mflag, tempstat, num, whichm; + int bufsz = recsize; + + if ((buf = malloc(bufsz)) == NULL) + FATAL("out of memory in gensub"); + mflag = 0; /* if mflag == 0, can replace empty string */ + num = 0; + x = execute(a[4]); /* source string */ + t = getsval(x); + res = copycell(x); /* target string - initially copy of source */ + res->csub = CTEMP; /* result values are temporary */ + if (a[0] == 0) /* 0 => a[1] is already-compiled regexpr */ + pfa = (fa *) a[1]; /* regular expression */ + else { + y = execute(a[1]); + pfa = makedfa(getsval(y), 1); + tempfree(y); + } + y = execute(a[2]); /* replacement string */ + h = execute(a[3]); /* which matches should be replaced */ + sptr = getsval(h); + if (sptr[0] == 'g' || sptr[0] == 'G') + whichm = -1; + else { + /* + * The specified number is index of replacement, starting + * from 1. GNU awk treats index lower than 0 same as + * 1, we do same for compatibility. + */ + whichm = (int) getfval(h) - 1; + if (whichm < 0) + whichm = 0; + } + tempfree(h); + + if (pmatch(pfa, t)) { + char *sl; + + tempstat = pfa->initstat; + pfa->initstat = 2; + pb = buf; + rptr = getsval(y); + /* + * XXX if there are any backreferences in subst string, + * complain now. + */ + for (sl = rptr; (sl = strchr(sl, '\\')) && sl[1]; sl++) { + if (strchr("0123456789", sl[1])) { + FATAL("gensub doesn't support backreferences (subst \"%s\")", rptr); + } + } + + do { + if (whichm >= 0 && whichm != num) { + num++; + adjbuf(&buf, &bufsz, (pb - buf) + (patbeg - t) + patlen, recsize, &pb, "gensub"); + + /* copy the part of string up to and including + * match to output buffer */ + while (t < patbeg + patlen) + *pb++ = *t++; + continue; + } + + if (patlen == 0 && *patbeg != 0) { /* matched empty string */ + if (mflag == 0) { /* can replace empty */ + num++; + sptr = rptr; + while (*sptr != 0) { + adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "gensub"); + if (*sptr == '\\') { + backsub(&pb, &sptr); + } else if (*sptr == '&') { + sptr++; + adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "gensub"); + for (q = patbeg; q < patbeg+patlen; ) + *pb++ = *q++; + } else + *pb++ = *sptr++; + } + } + if (*t == 0) /* at end */ + goto done; + adjbuf(&buf, &bufsz, 2+pb-buf, recsize, &pb, "gensub"); + *pb++ = *t++; + if (pb > buf + bufsz) /* BUG: not sure of this test */ + FATAL("gensub result0 %.30s too big; can't happen", buf); + mflag = 0; + } + else { /* matched nonempty string */ + num++; + sptr = t; + adjbuf(&buf, &bufsz, 1+(patbeg-sptr)+pb-buf, recsize, &pb, "gensub"); + while (sptr < patbeg) + *pb++ = *sptr++; + sptr = rptr; + while (*sptr != 0) { + adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "gensub"); + if (*sptr == '\\') { + backsub(&pb, &sptr); + } else if (*sptr == '&') { + sptr++; + adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "gensub"); + for (q = patbeg; q < patbeg+patlen; ) + *pb++ = *q++; + } else + *pb++ = *sptr++; + } + t = patbeg + patlen; + if (patlen == 0 || *t == 0 || *(t-1) == 0) + goto done; + if (pb > buf + bufsz) + FATAL("gensub result1 %.30s too big; can't happen", buf); + mflag = 1; + } + } while (pmatch(pfa,t)); + sptr = t; + adjbuf(&buf, &bufsz, 1+strlen(sptr)+pb-buf, 0, &pb, "gensub"); + while ((*pb++ = *sptr++) != 0) + ; + done: if (pb > buf + bufsz) + FATAL("gensub result2 %.30s too big; can't happen", buf); + *pb = '\0'; + setsval(res, buf); + pfa->initstat = tempstat; + } + tempfree(x); + tempfree(y); + free(buf); + return(res); +} + void backsub(char **pb_ptr, const char **sptr_ptr) /* handle \\& variations */ { /* sptr[0] == '\\' */ char *pb = *pb_ptr; |