summaryrefslogtreecommitdiff
path: root/usr.bin/awk/lib.c
diff options
context:
space:
mode:
authorThorsten Lockert <tholo@cvs.openbsd.org>1996-07-04 20:34:50 +0000
committerThorsten Lockert <tholo@cvs.openbsd.org>1996-07-04 20:34:50 +0000
commit7f1c98407cf2b223c1da59c14405846d569923fb (patch)
treee85dea1828c1d369d50fb41511b40a1866898c83 /usr.bin/awk/lib.c
parent941ca824f558fd952049b2434ab3b77d949bd7a6 (diff)
AT&T awk, by B. W. Kernighan, with fixes from 4.4BSD and by me
Diffstat (limited to 'usr.bin/awk/lib.c')
-rw-r--r--usr.bin/awk/lib.c636
1 files changed, 636 insertions, 0 deletions
diff --git a/usr.bin/awk/lib.c b/usr.bin/awk/lib.c
new file mode 100644
index 00000000000..6f72be9133a
--- /dev/null
+++ b/usr.bin/awk/lib.c
@@ -0,0 +1,636 @@
+/****************************************************************
+Copyright (C) AT&T and Lucent Technologies 1996
+All Rights Reserved
+
+Permission to use, copy, modify, and distribute this software and
+its documentation for any purpose and without fee is hereby
+granted, provided that the above copyright notice appear in all
+copies and that both that the copyright notice and this
+permission notice and warranty disclaimer appear in supporting
+documentation, and that the names of AT&T or Lucent Technologies
+or any of their entities not be used in advertising or publicity
+pertaining to distribution of the software without specific,
+written prior permission.
+
+AT&T AND LUCENT DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
+SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
+FITNESS. IN NO EVENT SHALL AT&T OR LUCENT OR ANY OF THEIR
+ENTITIES BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL
+DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
+DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
+OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE
+USE OR PERFORMANCE OF THIS SOFTWARE.
+****************************************************************/
+
+#define DEBUG
+#include <stdio.h>
+#include <string.h>
+#include <ctype.h>
+#include <errno.h>
+#include <stdlib.h>
+#include "awk.h"
+#include "awkgram.h"
+
+FILE *infile = NULL;
+char *file = "";
+int recsize = RECSIZE;
+char *recdata;
+char *record;
+char *fields;
+Cell *fldtab;
+
+#define MAXFLD 200
+int nfields = MAXFLD; /* can be set from commandline in main */
+
+int donefld; /* 1 = implies rec broken into fields */
+int donerec; /* 1 = record is valid (no flds have changed) */
+
+int maxfld = 0; /* last used field */
+int argno = 1; /* current input argument number */
+extern Awkfloat *ARGC;
+
+void recinit(unsigned int n)
+{
+ static Cell dollar0 = {
+ OCELL, CFLD, "$0", /*recdata*/0, 0.0, REC|STR|DONTFREE };
+ static Cell dollar1 = {
+ OCELL, CFLD, NULL, "", 0.0, FLD|STR|DONTFREE };
+ int i;
+
+ record = recdata = (char *) malloc(n);
+ fields = (char *) malloc(n);
+ fldtab = (Cell *) malloc(nfields * sizeof(Cell));
+ if (recdata == NULL || fields == NULL || fldtab == NULL)
+ ERROR "out of space for $0 and fields" FATAL;
+ fldtab[0] = dollar0;
+ fldtab[0].sval = recdata;
+ for (i = 1; i < nfields; i++)
+ fldtab[i] = dollar1;
+}
+
+void initgetrec(void)
+{
+ int i;
+ char *p;
+
+ for (i = 1; i < *ARGC; i++) {
+ if (!isclvar(p = getargv(i))) { /* find 1st real filename */
+ setsval(lookup("FILENAME", symtab), getargv(i));
+ return;
+ }
+ setclvar(p); /* a commandline assignment before filename */
+ argno++;
+ }
+ infile = stdin; /* no filenames, so use stdin */
+}
+
+int getrec(char *buf) /* get next input record from whatever source */
+{ /* note: tests whether buf == record */
+ int c;
+ static int firsttime = 1;
+
+ if (firsttime) {
+ firsttime = 0;
+ initgetrec();
+ }
+ dprintf( ("RS=<%s>, FS=<%s>, ARGC=%g, FILENAME=%s\n",
+ *RS, *FS, *ARGC, *FILENAME) );
+ donefld = 0;
+ donerec = 1;
+ buf[0] = 0;
+ while (argno < *ARGC || infile == stdin) {
+ dprintf( ("argno=%d, file=|%s|\n", argno, file) );
+ if (infile == NULL) { /* have to open a new file */
+ file = getargv(argno);
+ if (*file == '\0') { /* it's been zapped */
+ argno++;
+ continue;
+ }
+ if (isclvar(file)) { /* a var=value arg */
+ setclvar(file);
+ argno++;
+ continue;
+ }
+ *FILENAME = file;
+ dprintf( ("opening file %s\n", file) );
+ if (*file == '-' && *(file+1) == '\0')
+ infile = stdin;
+ else if ((infile = fopen((char *)file, "r")) == NULL)
+ ERROR "can't open file %s", file FATAL;
+ setfval(fnrloc, 0.0);
+ }
+ c = readrec(buf, recsize, infile);
+ if (c != 0 || buf[0] != '\0') { /* normal record */
+ if (buf == record) {
+ if (!(recloc->tval & DONTFREE))
+ xfree(recloc->sval);
+ recloc->sval = record;
+ recloc->tval = REC | STR | DONTFREE;
+ if (isnumber(recloc->sval)) {
+ recloc->fval = atof(recloc->sval);
+ recloc->tval |= NUM;
+ }
+ }
+ setfval(nrloc, nrloc->fval+1);
+ setfval(fnrloc, fnrloc->fval+1);
+ return 1;
+ }
+ /* EOF arrived on this file; set up next */
+ if (infile != stdin)
+ fclose(infile);
+ infile = NULL;
+ argno++;
+ }
+ return 0; /* true end of file */
+}
+
+void nextfile(void)
+{
+ if (infile != stdin)
+ fclose(infile);
+ infile = NULL;
+ argno++;
+}
+
+int readrec(char *buf, int bufsize, FILE *inf) /* read one record into buf */
+{
+ int sep, c;
+ char *rr;
+ int nrr;
+
+ if ((sep = **RS) == 0) {
+ sep = '\n';
+ while ((c=getc(inf)) == '\n' && c != EOF) /* skip leading \n's */
+ ;
+ if (c != EOF)
+ ungetc(c, inf);
+ }
+ for (rr = buf, nrr = bufsize; ; ) {
+ for (; (c=getc(inf)) != sep && c != EOF; *rr++ = c)
+ if (--nrr < 0)
+ ERROR "input record `%.30s...' too long; try -mr n", buf FATAL;
+ if (**RS == sep || c == EOF)
+ break;
+ if ((c = getc(inf)) == '\n' || c == EOF) /* 2 in a row */
+ break;
+ *rr++ = '\n';
+ *rr++ = c;
+ }
+ if (rr > buf + bufsize)
+ ERROR "input record `%.30s...' too long; try -mr n", buf FATAL;
+ *rr = 0;
+ dprintf( ("readrec saw <%s>, returns %d\n", buf, c == EOF && rr == buf ? 0 : 1) );
+ return c == EOF && rr == buf ? 0 : 1;
+}
+
+char *getargv(int n) /* get ARGV[n] */
+{
+ Cell *x;
+ char *s, temp[10];
+ extern Array *ARGVtab;
+
+ sprintf(temp, "%d", n);
+ x = setsymtab(temp, "", 0.0, STR, ARGVtab);
+ s = getsval(x);
+ dprintf( ("getargv(%d) returns |%s|\n", n, s) );
+ return s;
+}
+
+void setclvar(char *s) /* set var=value from s */
+{
+ char *p;
+ Cell *q;
+
+ for (p=s; *p != '='; p++)
+ ;
+ *p++ = 0;
+ p = qstring(p, '\0');
+ q = setsymtab(s, p, 0.0, STR, symtab);
+ setsval(q, p);
+ if (isnumber(q->sval)) {
+ q->fval = atof(q->sval);
+ q->tval |= NUM;
+ }
+ dprintf( ("command line set %s to |%s|\n", s, p) );
+}
+
+
+void fldbld(void) /* create fields from current record */
+{
+ char *r, *fr, sep;
+ Cell *p;
+ int i;
+
+ if (donefld)
+ return;
+ if (!(recloc->tval & STR))
+ getsval(recloc);
+ r = recloc->sval;
+ fr = fields;
+ i = 0; /* number of fields accumulated here */
+ if (strlen(*FS) > 1) { /* it's a regular expression */
+ i = refldbld(r, *FS);
+ } else if ((sep = **FS) == ' ') { /* default whitespace */
+ for (i = 0; ; ) {
+ while (*r == ' ' || *r == '\t' || *r == '\n')
+ r++;
+ if (*r == 0)
+ break;
+ i++;
+ if (i >= nfields)
+ break;
+ if (!(fldtab[i].tval & DONTFREE))
+ xfree(fldtab[i].sval);
+ fldtab[i].sval = fr;
+ fldtab[i].tval = FLD | STR | DONTFREE;
+ do
+ *fr++ = *r++;
+ while (*r != ' ' && *r != '\t' && *r != '\n' && *r != '\0');
+ *fr++ = 0;
+ }
+ *fr = 0;
+ } else if ((sep = **FS) == 0) { /* new: FS="" => 1 char/field */
+ for (i = 0; *r != 0; r++) {
+ char buf[2];
+ i++;
+ if (i >= nfields)
+ break;
+ if (!(fldtab[i].tval & DONTFREE))
+ xfree(fldtab[i].sval);
+ buf[0] = *r;
+ buf[1] = 0;
+ fldtab[i].sval = tostring(buf);
+ fldtab[i].tval = FLD | STR;
+ }
+ *fr = 0;
+ } else if (*r != 0) { /* if 0, it's a null field */
+ for (;;) {
+ i++;
+ if (i >= nfields)
+ break;
+ if (!(fldtab[i].tval & DONTFREE))
+ xfree(fldtab[i].sval);
+ fldtab[i].sval = fr;
+ fldtab[i].tval = FLD | STR | DONTFREE;
+ while (*r != sep && *r != '\n' && *r != '\0') /* \n is always a separator */
+ *fr++ = *r++;
+ *fr++ = 0;
+ if (*r++ == 0)
+ break;
+ }
+ *fr = 0;
+ }
+ if (i >= nfields)
+ ERROR "record `%.30s...' has too many fields; try -mf n", record FATAL;
+ /* clean out junk from previous record */
+ cleanfld(i, maxfld);
+ maxfld = i;
+ donefld = 1;
+ for (p = fldtab+1; p <= fldtab+maxfld; p++) {
+ if(isnumber(p->sval)) {
+ p->fval = atof(p->sval);
+ p->tval |= NUM;
+ }
+ }
+ setfval(nfloc, (Awkfloat) maxfld);
+ if (dbg)
+ for (p = fldtab; p <= fldtab+maxfld; p++)
+ printf("field %d: |%s|\n", p-fldtab, p->sval);
+}
+
+void cleanfld(int n1, int n2) /* clean out fields n1..n2 inclusive */
+{
+ static char *nullstat = "";
+ Cell *p, *q;
+
+ for (p = &fldtab[n2], q = &fldtab[n1]; p > q; p--) {
+ if (!(p->tval & DONTFREE))
+ xfree(p->sval);
+ p->tval = FLD | STR | DONTFREE;
+ p->sval = nullstat;
+ }
+}
+
+void newfld(int n) /* add field n (after end) */
+{
+ if (n >= nfields)
+ ERROR "creating too many fields (%d); try -mf n", n FATAL;
+ cleanfld(maxfld, n);
+ maxfld = n;
+ setfval(nfloc, (Awkfloat) n);
+}
+
+int refldbld(char *rec, char *fs) /* build fields from reg expr in FS */
+{
+ char *fr;
+ int i, tempstat;
+ fa *pfa;
+
+ fr = fields;
+ *fr = '\0';
+ if (*rec == '\0')
+ return 0;
+ pfa = makedfa(fs, 1);
+ dprintf( ("into refldbld, rec = <%s>, pat = <%s>\n", rec, fs) );
+ tempstat = pfa->initstat;
+ for (i = 1; i < nfields; i++) {
+ if (!(fldtab[i].tval & DONTFREE))
+ xfree(fldtab[i].sval);
+ fldtab[i].tval = FLD | STR | DONTFREE;
+ fldtab[i].sval = fr;
+ dprintf( ("refldbld: i=%d\n", i) );
+ if (nematch(pfa, rec)) {
+ pfa->initstat = 2; /* horrible coupling */
+ dprintf( ("match %s (%d chars)\n", patbeg, patlen) );
+ strncpy(fr, rec, patbeg-rec);
+ fr += patbeg - rec + 1;
+ *(fr-1) = '\0';
+ rec = patbeg + patlen;
+ } else {
+ dprintf( ("no match %s\n", rec) );
+ strcpy(fr, rec);
+ pfa->initstat = tempstat;
+ break;
+ }
+ }
+ return i;
+}
+
+void recbld(void) /* create $0 from $1..$NF if necessary */
+{
+ int i;
+ char *r, *p;
+ static char *rec = 0;
+
+ if (donerec == 1)
+ return;
+ if (rec == 0) {
+ rec = (char *) malloc(recsize);
+ if (rec == 0)
+ ERROR "out of space building $0, record size %d", recsize FATAL;
+ }
+ r = rec;
+ for (i = 1; i <= *NF; i++) {
+ p = getsval(&fldtab[i]);
+ while (r < rec+recsize-1 && (*r = *p++))
+ r++;
+ if (i < *NF)
+ for (p = *OFS; r < rec+recsize-1 && (*r = *p++); )
+ r++;
+ }
+ if (r > rec + recsize - 1)
+ ERROR "built giant record `%.30s...'; try -mr n", record FATAL;
+ *r = '\0';
+ dprintf( ("in recbld FS=%o, recloc=%p\n", **FS, recloc) );
+ recloc->tval = REC | STR | DONTFREE;
+ recloc->sval = record = rec;
+ dprintf( ("in recbld FS=%o, recloc=%p\n", **FS, recloc) );
+ dprintf( ("recbld = |%s|\n", record) );
+ donerec = 1;
+}
+
+Cell *fieldadr(int n)
+{
+ if (n < 0 || n >= nfields)
+ ERROR "trying to access field %d; try -mf n", n FATAL;
+ return(&fldtab[n]);
+}
+
+int errorflag = 0;
+char errbuf[200];
+
+void yyerror(char *s)
+{
+ extern char *cmdname, *curfname;
+ static int been_here = 0;
+
+ if (been_here++ > 2)
+ return;
+ fprintf(stderr, "%s: %s", cmdname, s);
+ fprintf(stderr, " at source line %d", lineno);
+ if (curfname != NULL)
+ fprintf(stderr, " in function %s", curfname);
+ fprintf(stderr, "\n");
+ errorflag = 2;
+ eprint();
+}
+
+void fpecatch(int n)
+{
+ ERROR "floating point exception %d", n FATAL;
+}
+
+extern int bracecnt, brackcnt, parencnt;
+
+void bracecheck(void)
+{
+ int c;
+ static int beenhere = 0;
+
+ if (beenhere++)
+ return;
+ while ((c = lex_input()) != EOF && c != '\0')
+ bclass(c);
+ bcheck2(bracecnt, '{', '}');
+ bcheck2(brackcnt, '[', ']');
+ bcheck2(parencnt, '(', ')');
+}
+
+void bcheck2(int n, int c1, int c2)
+{
+ if (n == 1)
+ fprintf(stderr, "\tmissing %c\n", c2);
+ else if (n > 1)
+ fprintf(stderr, "\t%d missing %c's\n", n, c2);
+ else if (n == -1)
+ fprintf(stderr, "\textra %c\n", c2);
+ else if (n < -1)
+ fprintf(stderr, "\t%d extra %c's\n", -n, c2);
+}
+
+void error(int f, char *s)
+{
+ extern Node *curnode;
+ extern char *cmdname;
+
+ fflush(stdout);
+ fprintf(stderr, "%s: ", cmdname);
+ fprintf(stderr, "%s", s);
+ fprintf(stderr, "\n");
+ if (compile_time != 2 && NR && *NR > 0) {
+ fprintf(stderr, " input record number %d", (int) (*FNR));
+ if (strcmp(*FILENAME, "-") != 0)
+ fprintf(stderr, ", file %s", *FILENAME);
+ fprintf(stderr, "\n");
+ }
+ if (compile_time != 2 && curnode)
+ fprintf(stderr, " source line number %d\n", curnode->lineno);
+ else if (compile_time != 2 && lineno)
+ fprintf(stderr, " source line number %d\n", lineno);
+ eprint();
+ if (f) {
+ if (dbg > 1) /* core dump if serious debugging on */
+ abort();
+ exit(2);
+ }
+}
+
+void eprint(void) /* try to print context around error */
+{
+#if 0
+ char *p, *q;
+ int c;
+ static int been_here = 0;
+ extern char ebuf[], *ep;
+
+ if (compile_time == 2 || compile_time == 0 || been_here++ > 0)
+ return;
+ p = ep - 1;
+ if (p > ebuf && *p == '\n')
+ p--;
+ for ( ; p > ebuf && *p != '\n' && *p != '\0'; p--)
+ ;
+ while (*p == '\n')
+ p++;
+ fprintf(stderr, " context is\n\t");
+ for (q=ep-1; q>=p && *q!=' ' && *q!='\t' && *q!='\n'; q--)
+ ;
+ for ( ; p < q; p++)
+ if (*p)
+ putc(*p, stderr);
+ fprintf(stderr, " >>> ");
+ for ( ; p < ep; p++)
+ if (*p)
+ putc(*p, stderr);
+ fprintf(stderr, " <<< ");
+ if (*ep)
+ while ((c = input()) != '\n' && c != '\0' && c != EOF) {
+ putc(c, stderr);
+ bclass(c);
+ }
+ putc('\n', stderr);
+ ep = ebuf;
+#endif
+}
+
+void bclass(int c)
+{
+ switch (c) {
+ case '{': bracecnt++; break;
+ case '}': bracecnt--; break;
+ case '[': brackcnt++; break;
+ case ']': brackcnt--; break;
+ case '(': parencnt++; break;
+ case ')': parencnt--; break;
+ }
+}
+
+double errcheck(double x, char *s)
+{
+ extern int errno;
+
+ if (errno == EDOM) {
+ errno = 0;
+ ERROR "%s argument out of domain", s WARNING;
+ x = 1;
+ } else if (errno == ERANGE) {
+ errno = 0;
+ ERROR "%s result out of range", s WARNING;
+ x = 1;
+ }
+ return x;
+}
+
+int isclvar(char *s) /* is s of form var=something ? */
+{
+ char *os = s;
+
+ if (!isalpha(*s) && *s != '_')
+ return 0;
+ for ( ; *s; s++)
+ if (!(isalnum(*s) || *s == '_'))
+ break;
+ return *s == '=' && s > os && *(s+1) != '=';
+}
+
+#define MAXEXPON 38 /* maximum exponent for fp number. should be IEEE */
+
+int isnumber(char *s) /* should be done by a library function */
+{
+ int d1, d2;
+ int point;
+ char *es;
+
+ d1 = d2 = point = 0;
+ while (*s == ' ' || *s == '\t' || *s == '\n')
+ s++;
+ if (*s == '\0')
+ return(0); /* empty stuff isn't a number */
+ if (*s == '+' || *s == '-')
+ s++;
+ if (!isdigit(*s) && *s != '.')
+ return(0);
+ if (isdigit(*s)) {
+ do {
+ d1++;
+ s++;
+ } while (isdigit(*s));
+ }
+ if (*s == '.') {
+ point++;
+ s++;
+ }
+ if (isdigit(*s)) {
+ d2++;
+ do {
+ s++;
+ } while (isdigit(*s));
+ }
+ if (!(d1 || (point && d2)))
+ return(0);
+ if (*s == 'e' || *s == 'E') {
+ s++;
+ if (*s == '+' || *s == '-')
+ s++;
+ if (!isdigit(*s))
+ return(0);
+ es = s;
+ do {
+ s++;
+ } while (isdigit(*s));
+ if (s - es > 2)
+ return(0);
+ else if (s - es == 2 && (int)(10 * (*es-'0') + *(es+1)-'0') >= MAXEXPON)
+ return(0);
+ }
+ while (*s == ' ' || *s == '\t' || *s == '\n')
+ s++;
+ if (*s == '\0')
+ return(1);
+ else
+ return(0);
+}
+
+#if 0
+ /* THIS IS AN EXPERIMENT THAT'S NOT DONE. */
+ /* strtod ought to provide a better test of what's */
+ /* a valid number, but it doesn't work according to */
+ /* the standard on any machine near me! */
+
+ #include <math.h>
+ isnumber(char *s)
+ {
+ double r;
+ char *ep;
+ errno = 0;
+ r = strtod(s, &ep);
+ if (r == HUGE_VAL || errno == ERANGE)
+ return 0;
+ while (*ep == ' ' || *ep == '\t' || *ep == '\n')
+ ep++;
+ if (*ep == '\0')
+ return 1;
+ else
+ return 0;
+ }
+#endif