diff options
author | Thorsten Lockert <tholo@cvs.openbsd.org> | 1996-07-04 20:34:50 +0000 |
---|---|---|
committer | Thorsten Lockert <tholo@cvs.openbsd.org> | 1996-07-04 20:34:50 +0000 |
commit | 7f1c98407cf2b223c1da59c14405846d569923fb (patch) | |
tree | e85dea1828c1d369d50fb41511b40a1866898c83 /usr.bin/awk/awklex.l | |
parent | 941ca824f558fd952049b2434ab3b77d949bd7a6 (diff) |
AT&T awk, by B. W. Kernighan, with fixes from 4.4BSD and by me
Diffstat (limited to 'usr.bin/awk/awklex.l')
-rw-r--r-- | usr.bin/awk/awklex.l | 405 |
1 files changed, 405 insertions, 0 deletions
diff --git a/usr.bin/awk/awklex.l b/usr.bin/awk/awklex.l new file mode 100644 index 00000000000..d7ea2357e08 --- /dev/null +++ b/usr.bin/awk/awklex.l @@ -0,0 +1,405 @@ +%Start A strng sc reg comment + +%{ +/**************************************************************** +Copyright (C) AT&T and Lucent Technologies 1996 +All Rights Reserved + +Permission to use, copy, modify, and distribute this software and +its documentation for any purpose and without fee is hereby +granted, provided that the above copyright notice appear in all +copies and that both that the copyright notice and this +permission notice and warranty disclaimer appear in supporting +documentation, and that the names of AT&T or Lucent Technologies +or any of their entities not be used in advertising or publicity +pertaining to distribution of the software without specific, +written prior permission. + +AT&T AND LUCENT DISCLAIM ALL WARRANTIES WITH REGARD TO THIS +SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND +FITNESS. IN NO EVENT SHALL AT&T OR LUCENT OR ANY OF THEIR +ENTITIES BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL +DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, +DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR +OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE +USE OR PERFORMANCE OF THIS SOFTWARE. +****************************************************************/ + +/* some of this depends on behavior of lex that + may not be preserved in other implementations of lex. +*/ + +#ifndef FLEX_SCANNER +#undef input /* defeat lex */ +#undef unput +#endif /* !FLEX_SCANNER */ + +#include <stdlib.h> +#include <string.h> +#include "awk.h" +#include "awkgram.h" + +extern YYSTYPE yylval; +extern int infunc; + +int lineno = 1; +int bracecnt = 0; +int brackcnt = 0; +int parencnt = 0; + +#define DEBUG +#ifdef DEBUG +# define RET(x) {if(dbg)printf("lex %s [%s]\n", tokname(x), yytext); return(x); } +#else +# define RET(x) return(x) +#endif + +#define CADD if (cadd(gs, yytext[0]) == 0) { \ + ERROR "string/reg expr %.30s... too long", gs->cbuf SYNTAX; \ + BEGIN A; \ + } + +char *s; +Gstring *gs = 0; /* initialized in main() */ +int cflag; + +#ifdef FLEX_SCANNER +static int my_input( YY_CHAR *buf, int max_size ); + +#undef YY_INPUT +#define YY_INPUT(buf,result,max_size) result = my_input(buf, max_size); + +#undef YY_USER_INIT +#define YY_USER_INIT init_input_source(); + +#define FIRST ((yy_start - 1) / 2) +#else /* FLEX_SCANNER */ +#define FIRST (yybgin - yysvec - 1) +#endif /* FLEX_SCANNER */ +%} + +A [a-zA-Z_] +B [a-zA-Z0-9_] +D [0-9] +O [0-7] +H [0-9a-fA-F] +WS [ \t] + +%% + switch (FIRST) { /* witchcraft */ + case 0: + BEGIN A; + break; + case sc: + BEGIN A; + RET('}'); + } + +<A>\n { lineno++; RET(NL); } +<A>#.* { ; } /* strip comments */ +<A>{WS}+ { ; } +<A>; { RET(';'); } + +<A>"\\"\n { lineno++; } +<A>BEGIN { RET(XBEGIN); } +<A>END { RET(XEND); } +<A>func(tion)? { if (infunc) ERROR "illegal nested function" SYNTAX; RET(FUNC); } +<A>return { if (!infunc) ERROR "return not in function" SYNTAX; RET(RETURN); } +<A>"&&" { RET(AND); } +<A>"||" { RET(BOR); } +<A>"!" { RET(NOT); } +<A>"!=" { yylval.i = NE; RET(NE); } +<A>"~" { yylval.i = MATCH; RET(MATCHOP); } +<A>"!~" { yylval.i = NOTMATCH; RET(MATCHOP); } +<A>"<" { yylval.i = LT; RET(LT); } +<A>"<=" { yylval.i = LE; RET(LE); } +<A>"==" { yylval.i = EQ; RET(EQ); } +<A>">=" { yylval.i = GE; RET(GE); } +<A>">" { yylval.i = GT; RET(GT); } +<A>">>" { yylval.i = APPEND; RET(APPEND); } +<A>"++" { yylval.i = INCR; RET(INCR); } +<A>"--" { yylval.i = DECR; RET(DECR); } +<A>"+=" { yylval.i = ADDEQ; RET(ASGNOP); } +<A>"-=" { yylval.i = SUBEQ; RET(ASGNOP); } +<A>"*=" { yylval.i = MULTEQ; RET(ASGNOP); } +<A>"/=" { yylval.i = DIVEQ; RET(ASGNOP); } +<A>"%=" { yylval.i = MODEQ; RET(ASGNOP); } +<A>"^=" { yylval.i = POWEQ; RET(ASGNOP); } +<A>"**=" { yylval.i = POWEQ; RET(ASGNOP); } +<A>"=" { yylval.i = ASSIGN; RET(ASGNOP); } +<A>"**" { RET(POWER); } +<A>"^" { RET(POWER); } + +<A>"$"{D}+ { yylval.cp = fieldadr(atoi(yytext+1)); RET(FIELD); } +<A>"$NF" { unputstr("(NF)"); return(INDIRECT); } +<A>"$"{A}{B}* { + int c; + char *yytext_copy = strdup(yytext); + c = input(); unput(c); /* look for '(' or '[' */ + if (c == '(' || c == '[' || + infunc && isarg(yytext_copy+1) >= 0) { + unputstr(yytext_copy+1); + free(yytext_copy); + return(INDIRECT); + } else { + yylval.cp = + setsymtab(yytext_copy+1,"",0.0,STR|NUM,symtab); + free(yytext_copy); + RET(IVAR); + } + } +<A>"$" { RET(INDIRECT); } +<A>NF { yylval.cp = setsymtab(yytext, "", 0.0, NUM, symtab); RET(VARNF); } + +<A>({D}+("."?){D}*|"."{D}+)((e|E)("+"|-)?{D}+)? { + yylval.cp = setsymtab(yytext, tostring(yytext), atof(yytext), CON|NUM, symtab); + /* should this also have STR set? */ + RET(NUMBER); } + +<A>while { RET(WHILE); } +<A>for { RET(FOR); } +<A>do { RET(DO); } +<A>if { RET(IF); } +<A>else { RET(ELSE); } +<A>next { RET(NEXT); } +<A>nextfile { RET(NEXTFILE); } +<A>exit { RET(EXIT); } +<A>break { RET(BREAK); } +<A>continue { RET(CONTINUE); } +<A>print { yylval.i = PRINT; RET(PRINT); } +<A>printf { yylval.i = PRINTF; RET(PRINTF); } +<A>sprintf { yylval.i = SPRINTF; RET(SPRINTF); } +<A>split { yylval.i = SPLIT; RET(SPLIT); } +<A>substr { RET(SUBSTR); } +<A>sub { yylval.i = SUB; RET(SUB); } +<A>gsub { yylval.i = GSUB; RET(GSUB); } +<A>index { RET(INDEX); } +<A>match { RET(MATCHFCN); } +<A>in { RET(IN); } +<A>getline { RET(GETLINE); } +<A>close { RET(CLOSE); } +<A>delete { RET(DELETE); } +<A>length { yylval.i = FLENGTH; RET(BLTIN); } +<A>log { yylval.i = FLOG; RET(BLTIN); } +<A>int { yylval.i = FINT; RET(BLTIN); } +<A>exp { yylval.i = FEXP; RET(BLTIN); } +<A>sqrt { yylval.i = FSQRT; RET(BLTIN); } +<A>sin { yylval.i = FSIN; RET(BLTIN); } +<A>cos { yylval.i = FCOS; RET(BLTIN); } +<A>atan2 { yylval.i = FATAN; RET(BLTIN); } +<A>system { yylval.i = FSYSTEM; RET(BLTIN); } +<A>rand { yylval.i = FRAND; RET(BLTIN); } +<A>srand { yylval.i = FSRAND; RET(BLTIN); } +<A>toupper { yylval.i = FTOUPPER; RET(BLTIN); } +<A>tolower { yylval.i = FTOLOWER; RET(BLTIN); } +<A>fflush { yylval.i = FFLUSH; RET(BLTIN); } + +<A>{A}{B}* { int n, c; + char *yytext_copy = strdup(yytext); + c = input(); unput(c); /* look for '(' */ + if (c != '(' && infunc && (n=isarg(yytext_copy)) >= 0) { + yylval.i = n; + free(yytext_copy); + RET(ARG); + } else { + yylval.cp = setsymtab(yytext_copy, "", 0.0, STR|NUM, symtab); + free(yytext_copy); + if (c == '(') { + RET(CALL); + } else { + RET(VAR); + } + } + } +<A>\" { BEGIN strng; caddreset(gs); } + +<A>"}" { if (--bracecnt < 0) ERROR "extra }" SYNTAX; BEGIN sc; RET(';'); } +<A>"]" { if (--brackcnt < 0) ERROR "extra ]" SYNTAX; RET(']'); } +<A>")" { if (--parencnt < 0) ERROR "extra )" SYNTAX; RET(')'); } + +<A>. { if (yytext[0] == '{') bracecnt++; + else if (yytext[0] == '[') brackcnt++; + else if (yytext[0] == '(') parencnt++; + RET(yylval.i = yytext[0]); /* everything else */ } + +<reg>\\. { cadd(gs, '\\'); cadd(gs, yytext[1]); } +<reg>\n { ERROR "newline in regular expression %.10s...", gs->cbuf SYNTAX; lineno++; BEGIN A; } +<reg>"/" { BEGIN A; + cadd(gs, 0); + yylval.s = tostring(gs->cbuf); + unput('/'); + RET(REGEXPR); } +<reg>. { CADD; } + +<strng>\" { BEGIN A; + cadd(gs, 0); s = tostring(gs->cbuf); + cunadd(gs); + cadd(gs, ' '); cadd(gs, 0); + yylval.cp = setsymtab(gs->cbuf, s, 0.0, CON|STR, symtab); + RET(STRING); } +<strng>\n { ERROR "newline in string %.10s...", gs->cbuf SYNTAX; lineno++; BEGIN A; } +<strng>"\\\"" { cadd(gs, '"'); } +<strng>"\\"n { cadd(gs, '\n'); } +<strng>"\\"t { cadd(gs, '\t'); } +<strng>"\\"f { cadd(gs, '\f'); } +<strng>"\\"r { cadd(gs, '\r'); } +<strng>"\\"b { cadd(gs, '\b'); } +<strng>"\\"v { cadd(gs, '\v'); } /* these ANSIisms may not be known by */ +<strng>"\\"a { cadd(gs, '\007'); } /* your compiler. hence 007 for bell */ +<strng>"\\\\" { cadd(gs, '\\'); } +<strng>"\\"({O}{O}{O}|{O}{O}|{O}) { int n; + sscanf(yytext+1, "%o", &n); cadd(gs, n); } +<strng>"\\"x({H}+) { int n; /* ANSI permits any number! */ + sscanf(yytext+2, "%x", &n); cadd(gs, n); } +<strng>"\\". { cadd(gs, yytext[1]); } +<strng>. { CADD; } + +%% + +void startreg(void) /* start parsing a regular expression */ +{ + BEGIN reg; + caddreset(gs); +} + +#ifdef FLEX_SCANNER +static int my_input( YY_CHAR *buf, int max_size ) +{ + extern uschar *lexprog; + + if ( lexprog ) { /* awk '...' */ + int num_chars = strlen( lexprog ); + if ( num_chars > max_size ) + { + num_chars = max_size; + strncpy( buf, lexprog, num_chars ); + } + else + strcpy( buf, lexprog ); + lexprog += num_chars; + return num_chars; + + } else { /* awk -f ... */ + int c = pgetc(); + if (c == EOF) + return 0; + buf[0] = c; + return 1; + } +} +#else /* FLEX_SCANNER */ +/* input() and unput() are transcriptions of the standard lex + macros for input and output with additions for error message + printing. God help us all if someone changes how lex works. +*/ + +char ebuf[300]; +char *ep = ebuf; + +int input(void) /* get next lexical input character */ +{ + int c; + extern char *lexprog; + + if (yysptr > yysbuf) + c = U(*--yysptr); + else if (lexprog != NULL) { /* awk '...' */ + if ((c = *lexprog) != 0) + lexprog++; + } else /* awk -f ... */ + c = pgetc(); + if (c == '\n') + yylineno++; + else if (c == EOF) + c = 0; + if (ep >= ebuf + sizeof ebuf) + ep = ebuf; + return *ep++ = c; +} + +void unput(int c) /* put lexical character back on input */ +{ + yytchar = c; + if (yytchar == '\n') + yylineno--; + *yysptr++ = yytchar; + if (--ep < ebuf) + ep = ebuf + sizeof(ebuf) - 1; +} +#endif /* FLEX_SCANNER */ + +void unputstr(char *s) /* put a string back on input */ +{ + int i; + + for (i = strlen(s)-1; i >= 0; i--) + unput(s[i]); +} + +int lex_input() +{ + return input(); +} + +/* growing-string code */ + +const int CBUFLEN = 400; + +Gstring *newGstring() +{ + Gstring *gs = (Gstring *) malloc(sizeof(Gstring)); + char *cp = (char *) malloc(CBUFLEN); + + if (gs == 0 || cp == 0) + ERROR "Out of space for strings" FATAL; + gs->cbuf = cp; + gs->cmax = CBUFLEN; + gs->clen = 0; + return gs; +} + +char *cadd(Gstring *gs, int c) /* add one char to gs->cbuf, grow as needed */ +{ + if (gs->clen >= gs->cmax) { /* need to grow */ + gs->cmax *= 4; + gs->cbuf = (char *) realloc((void *) gs->cbuf, gs->cmax); + + } + if (gs->cbuf != 0) + gs->cbuf[gs->clen++] = c; + return gs->cbuf; +} + +void caddreset(Gstring *gs) +{ + gs->clen = 0; +} + +void cunadd(Gstring *gs) +{ + if (gs->clen > 0) + gs->clen--; +} + +void delGstring(Gstring *gs) +{ + free((void *) gs->cbuf); + free((void *) gs); +} + +#ifdef FLEX_SCANNER +void init_input_source(void) +{ + extern int curpfile; + extern char *pfile[]; + + if (yyin == NULL) { + if (pfile[curpfile] == 0) + return; + if (strcmp((char *) pfile[curpfile], "-") == 0) + yyin = stdin; + else if ((yyin = fopen((char *) pfile[curpfile], "r")) == NULL) + ERROR "can't open file %s", pfile[curpfile] FATAL; + } +} +#endif |