/* $OpenBSD: scan.l,v 1.9 2006/12/06 05:03:29 ray Exp $ */ /* scan.l - scanner for flex input */ %{ /*- * Copyright (c) 1990 The Regents of the University of California. * All rights reserved. * * This code is derived from software contributed to Berkeley by * Vern Paxson. * * The United States Government has rights in this work pursuant * to contract no. DE-AC03-76SF00098 between the United States * Department of Energy and the University of California. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE. */ /* $Header: /cvs/OpenBSD/src/usr.bin/lex/scan.l,v 1.9 2006/12/06 05:03:29 ray Exp $ */ #include "flexdef.h" #include "parse.h" #define ACTION_ECHO add_action( yytext ) #define ACTION_IFDEF(def, should_define) \ { \ if ( should_define ) \ action_define( def, 1 ); \ } #define MARK_END_OF_PROLOG mark_prolog(); #define YY_DECL \ int flexscan() #define RETURNCHAR \ yylval = (unsigned char) yytext[0]; \ return CHAR; #define RETURNNAME \ strlcpy( nmstr, yytext, sizeof nmstr); \ return NAME; #define PUT_BACK_STRING(str, start) \ for ( i = strlen( str ) - 1; i >= start; --i ) \ unput((str)[i]) #define CHECK_REJECT(str) \ if ( all_upper( str ) ) \ reject = true; #define CHECK_YYMORE(str) \ if ( all_lower( str ) ) \ yymore_used = true; %} %option caseless nodefault outfile="scan.c" stack noyy_top_state %option nostdinit %x SECT2 SECT2PROLOG SECT3 CODEBLOCK PICKUPDEF SC CARETISBOL NUM QUOTE %x FIRSTCCL CCL ACTION RECOVER COMMENT ACTION_STRING PERCENT_BRACE_ACTION %x OPTION LINEDIR WS [[:blank:]]+ OPTWS [[:blank:]]* NOT_WS [^[:blank:]\n] NL \r?\n NAME ([[:alpha:]_][[:alnum:]_-]*) NOT_NAME [^[:alpha:]_*\n]+ SCNAME {NAME} ESCSEQ (\\([^\n]|[0-7]{1,3}|x[[:xdigit:]]{1,2})) FIRST_CCL_CHAR ([^\\\n]|{ESCSEQ}) CCL_CHAR ([^\\\n\]]|{ESCSEQ}) CCL_EXPR ("[:"[[:alpha:]]+":]") LEXOPT [aceknopr] %% static int bracelevel, didadef, indented_code; static int doing_rule_action = false; static int option_sense; int doing_codeblock = false; int i; Char nmdef[MAXLINE], myesc(); <INITIAL>{ ^{WS} indented_code = true; BEGIN(CODEBLOCK); ^"/*" ACTION_ECHO; yy_push_state( COMMENT ); ^#{OPTWS}line{WS} yy_push_state( LINEDIR ); ^"%s"{NAME}? return SCDECL; ^"%x"{NAME}? return XSCDECL; ^"%{".*{NL} { ++linenum; line_directive_out( (FILE *) 0, 1 ); indented_code = false; BEGIN(CODEBLOCK); } {WS} /* discard */ ^"%%".* { sectnum = 2; bracelevel = 0; mark_defs1(); line_directive_out( (FILE *) 0, 1 ); BEGIN(SECT2PROLOG); return SECTEND; } ^"%pointer".*{NL} yytext_is_array = false; ++linenum; ^"%array".*{NL} yytext_is_array = true; ++linenum; ^"%option" BEGIN(OPTION); return OPTION_OP; ^"%"{LEXOPT}{OPTWS}[[:digit:]]*{OPTWS}{NL} ++linenum; /* ignore */ ^"%"{LEXOPT}{WS}.*{NL} ++linenum; /* ignore */ ^"%"[^sxaceknopr{}].* synerr( _( "unrecognized '%' directive" ) ); ^{NAME} { strlcpy( nmstr, yytext, sizeof nmstr ); didadef = false; BEGIN(PICKUPDEF); } {SCNAME} RETURNNAME; ^{OPTWS}{NL} ++linenum; /* allows blank lines in section 1 */ {OPTWS}{NL} ACTION_ECHO; ++linenum; /* maybe end of comment line */ } <COMMENT>{ "*/" ACTION_ECHO; yy_pop_state(); "*" ACTION_ECHO; [^*\n]+ ACTION_ECHO; [^*\n]*{NL} ++linenum; ACTION_ECHO; } <LINEDIR>{ \n yy_pop_state(); [[:digit:]]+ linenum = myctoi( yytext ); \"[^"\n]*\" { flex_free( (void *) infilename ); infilename = copy_string( yytext + 1 ); infilename[strlen( infilename ) - 1] = '\0'; } . /* ignore spurious characters */ } <CODEBLOCK>{ ^"%}".*{NL} ++linenum; BEGIN(INITIAL); {NAME}|{NOT_NAME}|. ACTION_ECHO; {NL} { ++linenum; ACTION_ECHO; if ( indented_code ) BEGIN(INITIAL); } } <PICKUPDEF>{ {WS} /* separates name and definition */ {NOT_WS}.* { strlcpy( (char *) nmdef, yytext, sizeof nmdef); /* Skip trailing whitespace. */ for ( i = strlen( (char *) nmdef ) - 1; i >= 0 && (nmdef[i] == ' ' || nmdef[i] == '\t'); --i ) ; nmdef[i + 1] = '\0'; ndinstal( nmstr, nmdef ); didadef = true; } {NL} { if ( ! didadef ) synerr( _( "incomplete name definition" ) ); BEGIN(INITIAL); ++linenum; } } <OPTION>{ {NL} ++linenum; BEGIN(INITIAL); {WS} option_sense = true; "=" return '='; no option_sense = ! option_sense; 7bit csize = option_sense ? 128 : 256; 8bit csize = option_sense ? 256 : 128; align long_align = option_sense; always-interactive { action_define( "YY_ALWAYS_INTERACTIVE", option_sense ); } array yytext_is_array = option_sense; backup backing_up_report = option_sense; batch interactive = ! option_sense; "c++" C_plus_plus = option_sense; caseful|case-sensitive caseins = ! option_sense; caseless|case-insensitive caseins = option_sense; debug ddebug = option_sense; default spprdflt = ! option_sense; ecs useecs = option_sense; fast { useecs = usemecs = false; use_read = fullspd = true; } full { useecs = usemecs = false; use_read = fulltbl = true; } input ACTION_IFDEF("YY_NO_INPUT", ! option_sense); interactive interactive = option_sense; lex-compat lex_compat = option_sense; main { action_define( "YY_MAIN", option_sense ); do_yywrap = ! option_sense; } meta-ecs usemecs = option_sense; never-interactive { action_define( "YY_NEVER_INTERACTIVE", option_sense ); } perf-report performance_report += option_sense ? 1 : -1; pointer yytext_is_array = ! option_sense; read use_read = option_sense; reject reject_really_used = option_sense; stack action_define( "YY_STACK_USED", option_sense ); stdinit do_stdinit = option_sense; stdout use_stdout = option_sense; unput ACTION_IFDEF("YY_NO_UNPUT", ! option_sense); verbose printstats = option_sense; warn nowarn = ! option_sense; yylineno do_yylineno = option_sense; yymore yymore_really_used = option_sense; yywrap do_yywrap = option_sense; yy_push_state ACTION_IFDEF("YY_NO_PUSH_STATE", ! option_sense); yy_pop_state ACTION_IFDEF("YY_NO_POP_STATE", ! option_sense); yy_top_state ACTION_IFDEF("YY_NO_TOP_STATE", ! option_sense); yy_scan_buffer ACTION_IFDEF("YY_NO_SCAN_BUFFER", ! option_sense); yy_scan_bytes ACTION_IFDEF("YY_NO_SCAN_BYTES", ! option_sense); yy_scan_string ACTION_IFDEF("YY_NO_SCAN_STRING", ! option_sense); outfile return OPT_OUTFILE; prefix return OPT_PREFIX; yyclass return OPT_YYCLASS; \"[^"\n]*\" { strlcpy( nmstr, yytext + 1, sizeof nmstr); if (nmstr[strlen(nmstr) - 1] == '"') nmstr[strlen(nmstr) - 1] = '\0'; return NAME; } (([a-mo-z]|n[a-np-z])[[:alpha:]\-+]*)|. { format_synerr( _( "unrecognized %%option: %s" ), yytext ); BEGIN(RECOVER); } } <RECOVER>.*{NL} ++linenum; BEGIN(INITIAL); <SECT2PROLOG>{ ^"%{".* ++bracelevel; yyless( 2 ); /* eat only %{ */ ^"%}".* --bracelevel; yyless( 2 ); /* eat only %} */ ^{WS}.* ACTION_ECHO; /* indented code in prolog */ ^{NOT_WS}.* { /* non-indented code */ if ( bracelevel <= 0 ) { /* not in %{ ... %} */ yyless( 0 ); /* put it all back */ yy_set_bol( 1 ); mark_prolog(); BEGIN(SECT2); } else ACTION_ECHO; } .* ACTION_ECHO; {NL} ++linenum; ACTION_ECHO; <<EOF>> { mark_prolog(); sectnum = 0; yyterminate(); /* to stop the parser */ } } <SECT2>{ ^{OPTWS}{NL} ++linenum; /* allow blank lines in section 2 */ ^{OPTWS}"%{" { indented_code = false; doing_codeblock = true; bracelevel = 1; BEGIN(PERCENT_BRACE_ACTION); } ^{OPTWS}"<" BEGIN(SC); return '<'; ^{OPTWS}"^" return '^'; \" BEGIN(QUOTE); return '"'; "{"/[[:digit:]] BEGIN(NUM); return '{'; "$"/([[:blank:]]|{NL}) return '$'; {WS}"%{" { bracelevel = 1; BEGIN(PERCENT_BRACE_ACTION); if ( in_rule ) { doing_rule_action = true; in_rule = false; return '\n'; } } {WS}"|".*{NL} continued_action = true; ++linenum; return '\n'; ^{WS}"/*" { yyless( yyleng - 2 ); /* put back '/', '*' */ bracelevel = 0; continued_action = false; BEGIN(ACTION); } ^{WS} /* allow indented rules */ {WS} { /* This rule is separate from the one below because * otherwise we get variable trailing context, so * we can't build the scanner using -{f,F}. */ bracelevel = 0; continued_action = false; BEGIN(ACTION); if ( in_rule ) { doing_rule_action = true; in_rule = false; return '\n'; } } {OPTWS}{NL} { bracelevel = 0; continued_action = false; BEGIN(ACTION); unput( '\n' ); /* so <ACTION> sees it */ if ( in_rule ) { doing_rule_action = true; in_rule = false; return '\n'; } } ^{OPTWS}"<<EOF>>" | "<<EOF>>" return EOF_OP; ^"%%".* { sectnum = 3; BEGIN(SECT3); yyterminate(); /* to stop the parser */ } "["({FIRST_CCL_CHAR}|{CCL_EXPR})({CCL_CHAR}|{CCL_EXPR})* { int cclval; strlcpy( nmstr, yytext, sizeof nmstr); /* Check to see if we've already encountered this * ccl. */ if ( (cclval = ccllookup( (Char *) nmstr )) != 0 ) { if ( input() != ']' ) synerr( _( "bad character class" ) ); yylval = cclval; ++cclreuse; return PREVCCL; } else { /* We fudge a bit. We know that this ccl will * soon be numbered as lastccl + 1 by cclinit. */ cclinstal( (Char *) nmstr, lastccl + 1 ); /* Push back everything but the leading bracket * so the ccl can be rescanned. */ yyless( 1 ); BEGIN(FIRSTCCL); return '['; } } "{"{NAME}"}" { register Char *nmdefptr; Char *ndlookup(); strlcpy( nmstr, yytext + 1, sizeof nmstr ); nmstr[yyleng - 2] = '\0'; /* chop trailing brace */ if ( (nmdefptr = ndlookup( nmstr )) == 0 ) format_synerr( _( "undefined definition {%s}" ), nmstr ); else { /* push back name surrounded by ()'s */ int len = strlen( (char *) nmdefptr ); if ( lex_compat || nmdefptr[0] == '^' || (len > 0 && nmdefptr[len - 1] == '$') ) { /* don't use ()'s after all */ PUT_BACK_STRING((char *) nmdefptr, 0); if ( nmdefptr[0] == '^' ) BEGIN(CARETISBOL); } else { unput(')'); PUT_BACK_STRING((char *) nmdefptr, 0); unput('('); } } } [/|*+?.(){}] return (unsigned char) yytext[0]; . RETURNCHAR; } <SC>{ [,*] return (unsigned char) yytext[0]; ">" BEGIN(SECT2); return '>'; ">"/^ BEGIN(CARETISBOL); return '>'; {SCNAME} RETURNNAME; . { format_synerr( _( "bad <start condition>: %s" ), yytext ); } } <CARETISBOL>"^" BEGIN(SECT2); return '^'; <QUOTE>{ [^"\n] RETURNCHAR; \" BEGIN(SECT2); return '"'; {NL} { synerr( _( "missing quote" ) ); BEGIN(SECT2); ++linenum; return '"'; } } <FIRSTCCL>{ "^"/[^-\]\n] BEGIN(CCL); return '^'; "^"/("-"|"]") return '^'; . BEGIN(CCL); RETURNCHAR; } <CCL>{ -/[^\]\n] return '-'; [^\]\n] RETURNCHAR; "]" BEGIN(SECT2); return ']'; .|{NL} { synerr( _( "bad character class" ) ); BEGIN(SECT2); return ']'; } } <FIRSTCCL,CCL>{ "[:alnum:]" BEGIN(CCL); return CCE_ALNUM; "[:alpha:]" BEGIN(CCL); return CCE_ALPHA; "[:blank:]" BEGIN(CCL); return CCE_BLANK; "[:cntrl:]" BEGIN(CCL); return CCE_CNTRL; "[:digit:]" BEGIN(CCL); return CCE_DIGIT; "[:graph:]" BEGIN(CCL); return CCE_GRAPH; "[:lower:]" BEGIN(CCL); return CCE_LOWER; "[:print:]" BEGIN(CCL); return CCE_PRINT; "[:punct:]" BEGIN(CCL); return CCE_PUNCT; "[:space:]" BEGIN(CCL); return CCE_SPACE; "[:upper:]" BEGIN(CCL); return CCE_UPPER; "[:xdigit:]" BEGIN(CCL); return CCE_XDIGIT; {CCL_EXPR} { format_synerr( _( "bad character class expression: %s" ), yytext ); BEGIN(CCL); return CCE_ALNUM; } } <NUM>{ [[:digit:]]+ { yylval = myctoi( yytext ); return NUMBER; } "," return ','; "}" BEGIN(SECT2); return '}'; . { synerr( _( "bad character inside {}'s" ) ); BEGIN(SECT2); return '}'; } {NL} { synerr( _( "missing }" ) ); BEGIN(SECT2); ++linenum; return '}'; } } <PERCENT_BRACE_ACTION>{ {OPTWS}"%}".* bracelevel = 0; <ACTION>"/*" ACTION_ECHO; yy_push_state( COMMENT ); <CODEBLOCK,ACTION>{ "reject" { ACTION_ECHO; CHECK_REJECT(yytext); } "yymore" { ACTION_ECHO; CHECK_YYMORE(yytext); } } {NAME}|{NOT_NAME}|. ACTION_ECHO; {NL} { ++linenum; ACTION_ECHO; if ( bracelevel == 0 || (doing_codeblock && indented_code) ) { if ( doing_rule_action ) add_action( "\tYY_BREAK\n" ); doing_rule_action = doing_codeblock = false; BEGIN(SECT2); } } } /* Reject and YYmore() are checked for above, in PERCENT_BRACE_ACTION */ <ACTION>{ "{" ACTION_ECHO; ++bracelevel; "}" ACTION_ECHO; --bracelevel; [^[:alpha:]_{}"'/\n]+ ACTION_ECHO; {NAME} ACTION_ECHO; "'"([^'\\\n]|\\.)*"'" ACTION_ECHO; /* character constant */ \" ACTION_ECHO; BEGIN(ACTION_STRING); {NL} { ++linenum; ACTION_ECHO; if ( bracelevel == 0 ) { if ( doing_rule_action ) add_action( "\tYY_BREAK\n" ); doing_rule_action = false; BEGIN(SECT2); } } . ACTION_ECHO; } <ACTION_STRING>{ [^"\\\n]+ ACTION_ECHO; \\. ACTION_ECHO; {NL} ++linenum; ACTION_ECHO; \" ACTION_ECHO; BEGIN(ACTION); . ACTION_ECHO; } <COMMENT,ACTION,ACTION_STRING><<EOF>> { synerr( _( "EOF encountered inside an action" ) ); yyterminate(); } <SECT2,QUOTE,FIRSTCCL,CCL>{ESCSEQ} { yylval = myesc( (Char *) yytext ); if ( YY_START == FIRSTCCL ) BEGIN(CCL); return CHAR; } <SECT3>{ .*(\n?) ECHO; <<EOF>> sectnum = 0; yyterminate(); } <*>.|\n format_synerr( _( "bad character: %s" ), yytext ); %% int yywrap() { if ( --num_input_files > 0 ) { set_input_file( *++input_files ); return 0; } else return 1; } /* set_input_file - open the given file (if NULL, stdin) for scanning */ void set_input_file( file ) char *file; { if ( file && strcmp( file, "-" ) ) { infilename = copy_string( file ); yyin = fopen( infilename, "r" ); if ( yyin == NULL ) lerrsf( _( "can't open %s" ), file ); } else { yyin = stdin; infilename = copy_string( "<stdin>" ); } linenum = 1; } /* Wrapper routines for accessing the scanner's malloc routines. */ void *flex_alloc( size ) size_t size; { return (void *) malloc( size ); } void *flex_realloc( ptr, size ) void *ptr; size_t size; { return (void *) realloc( ptr, size ); } void flex_free( ptr ) void *ptr; { if ( ptr ) free( ptr ); }