summaryrefslogtreecommitdiff
path: root/usr.bin/awk/awklex.l
diff options
context:
space:
mode:
authorThorsten Lockert <tholo@cvs.openbsd.org>1996-07-04 20:34:50 +0000
committerThorsten Lockert <tholo@cvs.openbsd.org>1996-07-04 20:34:50 +0000
commit7f1c98407cf2b223c1da59c14405846d569923fb (patch)
treee85dea1828c1d369d50fb41511b40a1866898c83 /usr.bin/awk/awklex.l
parent941ca824f558fd952049b2434ab3b77d949bd7a6 (diff)
AT&T awk, by B. W. Kernighan, with fixes from 4.4BSD and by me
Diffstat (limited to 'usr.bin/awk/awklex.l')
-rw-r--r--usr.bin/awk/awklex.l405
1 files changed, 405 insertions, 0 deletions
diff --git a/usr.bin/awk/awklex.l b/usr.bin/awk/awklex.l
new file mode 100644
index 00000000000..d7ea2357e08
--- /dev/null
+++ b/usr.bin/awk/awklex.l
@@ -0,0 +1,405 @@
+%Start A strng sc reg comment
+
+%{
+/****************************************************************
+Copyright (C) AT&T and Lucent Technologies 1996
+All Rights Reserved
+
+Permission to use, copy, modify, and distribute this software and
+its documentation for any purpose and without fee is hereby
+granted, provided that the above copyright notice appear in all
+copies and that both that the copyright notice and this
+permission notice and warranty disclaimer appear in supporting
+documentation, and that the names of AT&T or Lucent Technologies
+or any of their entities not be used in advertising or publicity
+pertaining to distribution of the software without specific,
+written prior permission.
+
+AT&T AND LUCENT DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
+SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
+FITNESS. IN NO EVENT SHALL AT&T OR LUCENT OR ANY OF THEIR
+ENTITIES BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL
+DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
+DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
+OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE
+USE OR PERFORMANCE OF THIS SOFTWARE.
+****************************************************************/
+
+/* some of this depends on behavior of lex that
+ may not be preserved in other implementations of lex.
+*/
+
+#ifndef FLEX_SCANNER
+#undef input /* defeat lex */
+#undef unput
+#endif /* !FLEX_SCANNER */
+
+#include <stdlib.h>
+#include <string.h>
+#include "awk.h"
+#include "awkgram.h"
+
+extern YYSTYPE yylval;
+extern int infunc;
+
+int lineno = 1;
+int bracecnt = 0;
+int brackcnt = 0;
+int parencnt = 0;
+
+#define DEBUG
+#ifdef DEBUG
+# define RET(x) {if(dbg)printf("lex %s [%s]\n", tokname(x), yytext); return(x); }
+#else
+# define RET(x) return(x)
+#endif
+
+#define CADD if (cadd(gs, yytext[0]) == 0) { \
+ ERROR "string/reg expr %.30s... too long", gs->cbuf SYNTAX; \
+ BEGIN A; \
+ }
+
+char *s;
+Gstring *gs = 0; /* initialized in main() */
+int cflag;
+
+#ifdef FLEX_SCANNER
+static int my_input( YY_CHAR *buf, int max_size );
+
+#undef YY_INPUT
+#define YY_INPUT(buf,result,max_size) result = my_input(buf, max_size);
+
+#undef YY_USER_INIT
+#define YY_USER_INIT init_input_source();
+
+#define FIRST ((yy_start - 1) / 2)
+#else /* FLEX_SCANNER */
+#define FIRST (yybgin - yysvec - 1)
+#endif /* FLEX_SCANNER */
+%}
+
+A [a-zA-Z_]
+B [a-zA-Z0-9_]
+D [0-9]
+O [0-7]
+H [0-9a-fA-F]
+WS [ \t]
+
+%%
+ switch (FIRST) { /* witchcraft */
+ case 0:
+ BEGIN A;
+ break;
+ case sc:
+ BEGIN A;
+ RET('}');
+ }
+
+<A>\n { lineno++; RET(NL); }
+<A>#.* { ; } /* strip comments */
+<A>{WS}+ { ; }
+<A>; { RET(';'); }
+
+<A>"\\"\n { lineno++; }
+<A>BEGIN { RET(XBEGIN); }
+<A>END { RET(XEND); }
+<A>func(tion)? { if (infunc) ERROR "illegal nested function" SYNTAX; RET(FUNC); }
+<A>return { if (!infunc) ERROR "return not in function" SYNTAX; RET(RETURN); }
+<A>"&&" { RET(AND); }
+<A>"||" { RET(BOR); }
+<A>"!" { RET(NOT); }
+<A>"!=" { yylval.i = NE; RET(NE); }
+<A>"~" { yylval.i = MATCH; RET(MATCHOP); }
+<A>"!~" { yylval.i = NOTMATCH; RET(MATCHOP); }
+<A>"<" { yylval.i = LT; RET(LT); }
+<A>"<=" { yylval.i = LE; RET(LE); }
+<A>"==" { yylval.i = EQ; RET(EQ); }
+<A>">=" { yylval.i = GE; RET(GE); }
+<A>">" { yylval.i = GT; RET(GT); }
+<A>">>" { yylval.i = APPEND; RET(APPEND); }
+<A>"++" { yylval.i = INCR; RET(INCR); }
+<A>"--" { yylval.i = DECR; RET(DECR); }
+<A>"+=" { yylval.i = ADDEQ; RET(ASGNOP); }
+<A>"-=" { yylval.i = SUBEQ; RET(ASGNOP); }
+<A>"*=" { yylval.i = MULTEQ; RET(ASGNOP); }
+<A>"/=" { yylval.i = DIVEQ; RET(ASGNOP); }
+<A>"%=" { yylval.i = MODEQ; RET(ASGNOP); }
+<A>"^=" { yylval.i = POWEQ; RET(ASGNOP); }
+<A>"**=" { yylval.i = POWEQ; RET(ASGNOP); }
+<A>"=" { yylval.i = ASSIGN; RET(ASGNOP); }
+<A>"**" { RET(POWER); }
+<A>"^" { RET(POWER); }
+
+<A>"$"{D}+ { yylval.cp = fieldadr(atoi(yytext+1)); RET(FIELD); }
+<A>"$NF" { unputstr("(NF)"); return(INDIRECT); }
+<A>"$"{A}{B}* {
+ int c;
+ char *yytext_copy = strdup(yytext);
+ c = input(); unput(c); /* look for '(' or '[' */
+ if (c == '(' || c == '[' ||
+ infunc && isarg(yytext_copy+1) >= 0) {
+ unputstr(yytext_copy+1);
+ free(yytext_copy);
+ return(INDIRECT);
+ } else {
+ yylval.cp =
+ setsymtab(yytext_copy+1,"",0.0,STR|NUM,symtab);
+ free(yytext_copy);
+ RET(IVAR);
+ }
+ }
+<A>"$" { RET(INDIRECT); }
+<A>NF { yylval.cp = setsymtab(yytext, "", 0.0, NUM, symtab); RET(VARNF); }
+
+<A>({D}+("."?){D}*|"."{D}+)((e|E)("+"|-)?{D}+)? {
+ yylval.cp = setsymtab(yytext, tostring(yytext), atof(yytext), CON|NUM, symtab);
+ /* should this also have STR set? */
+ RET(NUMBER); }
+
+<A>while { RET(WHILE); }
+<A>for { RET(FOR); }
+<A>do { RET(DO); }
+<A>if { RET(IF); }
+<A>else { RET(ELSE); }
+<A>next { RET(NEXT); }
+<A>nextfile { RET(NEXTFILE); }
+<A>exit { RET(EXIT); }
+<A>break { RET(BREAK); }
+<A>continue { RET(CONTINUE); }
+<A>print { yylval.i = PRINT; RET(PRINT); }
+<A>printf { yylval.i = PRINTF; RET(PRINTF); }
+<A>sprintf { yylval.i = SPRINTF; RET(SPRINTF); }
+<A>split { yylval.i = SPLIT; RET(SPLIT); }
+<A>substr { RET(SUBSTR); }
+<A>sub { yylval.i = SUB; RET(SUB); }
+<A>gsub { yylval.i = GSUB; RET(GSUB); }
+<A>index { RET(INDEX); }
+<A>match { RET(MATCHFCN); }
+<A>in { RET(IN); }
+<A>getline { RET(GETLINE); }
+<A>close { RET(CLOSE); }
+<A>delete { RET(DELETE); }
+<A>length { yylval.i = FLENGTH; RET(BLTIN); }
+<A>log { yylval.i = FLOG; RET(BLTIN); }
+<A>int { yylval.i = FINT; RET(BLTIN); }
+<A>exp { yylval.i = FEXP; RET(BLTIN); }
+<A>sqrt { yylval.i = FSQRT; RET(BLTIN); }
+<A>sin { yylval.i = FSIN; RET(BLTIN); }
+<A>cos { yylval.i = FCOS; RET(BLTIN); }
+<A>atan2 { yylval.i = FATAN; RET(BLTIN); }
+<A>system { yylval.i = FSYSTEM; RET(BLTIN); }
+<A>rand { yylval.i = FRAND; RET(BLTIN); }
+<A>srand { yylval.i = FSRAND; RET(BLTIN); }
+<A>toupper { yylval.i = FTOUPPER; RET(BLTIN); }
+<A>tolower { yylval.i = FTOLOWER; RET(BLTIN); }
+<A>fflush { yylval.i = FFLUSH; RET(BLTIN); }
+
+<A>{A}{B}* { int n, c;
+ char *yytext_copy = strdup(yytext);
+ c = input(); unput(c); /* look for '(' */
+ if (c != '(' && infunc && (n=isarg(yytext_copy)) >= 0) {
+ yylval.i = n;
+ free(yytext_copy);
+ RET(ARG);
+ } else {
+ yylval.cp = setsymtab(yytext_copy, "", 0.0, STR|NUM, symtab);
+ free(yytext_copy);
+ if (c == '(') {
+ RET(CALL);
+ } else {
+ RET(VAR);
+ }
+ }
+ }
+<A>\" { BEGIN strng; caddreset(gs); }
+
+<A>"}" { if (--bracecnt < 0) ERROR "extra }" SYNTAX; BEGIN sc; RET(';'); }
+<A>"]" { if (--brackcnt < 0) ERROR "extra ]" SYNTAX; RET(']'); }
+<A>")" { if (--parencnt < 0) ERROR "extra )" SYNTAX; RET(')'); }
+
+<A>. { if (yytext[0] == '{') bracecnt++;
+ else if (yytext[0] == '[') brackcnt++;
+ else if (yytext[0] == '(') parencnt++;
+ RET(yylval.i = yytext[0]); /* everything else */ }
+
+<reg>\\. { cadd(gs, '\\'); cadd(gs, yytext[1]); }
+<reg>\n { ERROR "newline in regular expression %.10s...", gs->cbuf SYNTAX; lineno++; BEGIN A; }
+<reg>"/" { BEGIN A;
+ cadd(gs, 0);
+ yylval.s = tostring(gs->cbuf);
+ unput('/');
+ RET(REGEXPR); }
+<reg>. { CADD; }
+
+<strng>\" { BEGIN A;
+ cadd(gs, 0); s = tostring(gs->cbuf);
+ cunadd(gs);
+ cadd(gs, ' '); cadd(gs, 0);
+ yylval.cp = setsymtab(gs->cbuf, s, 0.0, CON|STR, symtab);
+ RET(STRING); }
+<strng>\n { ERROR "newline in string %.10s...", gs->cbuf SYNTAX; lineno++; BEGIN A; }
+<strng>"\\\"" { cadd(gs, '"'); }
+<strng>"\\"n { cadd(gs, '\n'); }
+<strng>"\\"t { cadd(gs, '\t'); }
+<strng>"\\"f { cadd(gs, '\f'); }
+<strng>"\\"r { cadd(gs, '\r'); }
+<strng>"\\"b { cadd(gs, '\b'); }
+<strng>"\\"v { cadd(gs, '\v'); } /* these ANSIisms may not be known by */
+<strng>"\\"a { cadd(gs, '\007'); } /* your compiler. hence 007 for bell */
+<strng>"\\\\" { cadd(gs, '\\'); }
+<strng>"\\"({O}{O}{O}|{O}{O}|{O}) { int n;
+ sscanf(yytext+1, "%o", &n); cadd(gs, n); }
+<strng>"\\"x({H}+) { int n; /* ANSI permits any number! */
+ sscanf(yytext+2, "%x", &n); cadd(gs, n); }
+<strng>"\\". { cadd(gs, yytext[1]); }
+<strng>. { CADD; }
+
+%%
+
+void startreg(void) /* start parsing a regular expression */
+{
+ BEGIN reg;
+ caddreset(gs);
+}
+
+#ifdef FLEX_SCANNER
+static int my_input( YY_CHAR *buf, int max_size )
+{
+ extern uschar *lexprog;
+
+ if ( lexprog ) { /* awk '...' */
+ int num_chars = strlen( lexprog );
+ if ( num_chars > max_size )
+ {
+ num_chars = max_size;
+ strncpy( buf, lexprog, num_chars );
+ }
+ else
+ strcpy( buf, lexprog );
+ lexprog += num_chars;
+ return num_chars;
+
+ } else { /* awk -f ... */
+ int c = pgetc();
+ if (c == EOF)
+ return 0;
+ buf[0] = c;
+ return 1;
+ }
+}
+#else /* FLEX_SCANNER */
+/* input() and unput() are transcriptions of the standard lex
+ macros for input and output with additions for error message
+ printing. God help us all if someone changes how lex works.
+*/
+
+char ebuf[300];
+char *ep = ebuf;
+
+int input(void) /* get next lexical input character */
+{
+ int c;
+ extern char *lexprog;
+
+ if (yysptr > yysbuf)
+ c = U(*--yysptr);
+ else if (lexprog != NULL) { /* awk '...' */
+ if ((c = *lexprog) != 0)
+ lexprog++;
+ } else /* awk -f ... */
+ c = pgetc();
+ if (c == '\n')
+ yylineno++;
+ else if (c == EOF)
+ c = 0;
+ if (ep >= ebuf + sizeof ebuf)
+ ep = ebuf;
+ return *ep++ = c;
+}
+
+void unput(int c) /* put lexical character back on input */
+{
+ yytchar = c;
+ if (yytchar == '\n')
+ yylineno--;
+ *yysptr++ = yytchar;
+ if (--ep < ebuf)
+ ep = ebuf + sizeof(ebuf) - 1;
+}
+#endif /* FLEX_SCANNER */
+
+void unputstr(char *s) /* put a string back on input */
+{
+ int i;
+
+ for (i = strlen(s)-1; i >= 0; i--)
+ unput(s[i]);
+}
+
+int lex_input()
+{
+ return input();
+}
+
+/* growing-string code */
+
+const int CBUFLEN = 400;
+
+Gstring *newGstring()
+{
+ Gstring *gs = (Gstring *) malloc(sizeof(Gstring));
+ char *cp = (char *) malloc(CBUFLEN);
+
+ if (gs == 0 || cp == 0)
+ ERROR "Out of space for strings" FATAL;
+ gs->cbuf = cp;
+ gs->cmax = CBUFLEN;
+ gs->clen = 0;
+ return gs;
+}
+
+char *cadd(Gstring *gs, int c) /* add one char to gs->cbuf, grow as needed */
+{
+ if (gs->clen >= gs->cmax) { /* need to grow */
+ gs->cmax *= 4;
+ gs->cbuf = (char *) realloc((void *) gs->cbuf, gs->cmax);
+
+ }
+ if (gs->cbuf != 0)
+ gs->cbuf[gs->clen++] = c;
+ return gs->cbuf;
+}
+
+void caddreset(Gstring *gs)
+{
+ gs->clen = 0;
+}
+
+void cunadd(Gstring *gs)
+{
+ if (gs->clen > 0)
+ gs->clen--;
+}
+
+void delGstring(Gstring *gs)
+{
+ free((void *) gs->cbuf);
+ free((void *) gs);
+}
+
+#ifdef FLEX_SCANNER
+void init_input_source(void)
+{
+ extern int curpfile;
+ extern char *pfile[];
+
+ if (yyin == NULL) {
+ if (pfile[curpfile] == 0)
+ return;
+ if (strcmp((char *) pfile[curpfile], "-") == 0)
+ yyin = stdin;
+ else if ((yyin = fopen((char *) pfile[curpfile], "r")) == NULL)
+ ERROR "can't open file %s", pfile[curpfile] FATAL;
+ }
+}
+#endif