diff options
author | Ted Unangst <tedu@cvs.openbsd.org> | 2015-11-19 22:52:41 +0000 |
---|---|---|
committer | Ted Unangst <tedu@cvs.openbsd.org> | 2015-11-19 22:52:41 +0000 |
commit | 9b738dfeed804665d9b66252352eff329895b05c (patch) | |
tree | 9255f12ec95f0ab616f40d4e5f0b66218e80983d /usr.bin/lex/nfa.c | |
parent | 60a53bd3d75f62db2ac7869f8182ab1488a64473 (diff) |
orbital strike from moonbase knf
Diffstat (limited to 'usr.bin/lex/nfa.c')
-rw-r--r-- | usr.bin/lex/nfa.c | 429 |
1 files changed, 218 insertions, 211 deletions
diff --git a/usr.bin/lex/nfa.c b/usr.bin/lex/nfa.c index f92f7fa9b86..272febfc69e 100644 --- a/usr.bin/lex/nfa.c +++ b/usr.bin/lex/nfa.c @@ -1,4 +1,4 @@ -/* $OpenBSD: nfa.c,v 1.10 2015/11/19 19:43:40 tedu Exp $ */ +/* $OpenBSD: nfa.c,v 1.11 2015/11/19 22:52:40 tedu Exp $ */ /* nfa - NFA construction routines */ @@ -38,8 +38,8 @@ /* declare functions that have forward references */ -int dupmachine PROTO ((int)); -void mkxtion PROTO ((int, int)); +int dupmachine PROTO((int)); +void mkxtion PROTO((int, int)); /* add_accept - add an accepting state to a machine @@ -47,23 +47,25 @@ void mkxtion PROTO ((int, int)); * accepting_number becomes mach's accepting number. */ -void add_accept (mach, accepting_number) - int mach, accepting_number; +void +add_accept(mach, accepting_number) + int mach, accepting_number; { - /* Hang the accepting number off an epsilon state. if it is associated - * with a state that has a non-epsilon out-transition, then the state - * will accept BEFORE it makes that transition, i.e., one character - * too soon. + /* + * Hang the accepting number off an epsilon state. if it is + * associated with a state that has a non-epsilon out-transition, + * then the state will accept BEFORE it makes that transition, i.e., + * one character too soon. */ if (transchar[finalst[mach]] == SYM_EPSILON) accptnum[finalst[mach]] = accepting_number; else { - int astate = mkstate (SYM_EPSILON); + int astate = mkstate(SYM_EPSILON); accptnum[astate] = accepting_number; - (void) link_machines (mach, astate); + (void) link_machines(mach, astate); } } @@ -79,15 +81,16 @@ void add_accept (mach, accepting_number) * num - the number of copies of singl to be present in newsng */ -int copysingl (singl, num) - int singl, num; +int +copysingl(singl, num) + int singl, num; { - int copy, i; + int copy, i; - copy = mkstate (SYM_EPSILON); + copy = mkstate(SYM_EPSILON); for (i = 1; i <= num; ++i) - copy = link_machines (copy, dupmachine (singl)); + copy = link_machines(copy, dupmachine(singl)); return copy; } @@ -95,41 +98,43 @@ int copysingl (singl, num) /* dumpnfa - debugging routine to write out an nfa */ -void dumpnfa (state1) - int state1; +void +dumpnfa(state1) + int state1; { - int sym, tsp1, tsp2, anum, ns; + int sym, tsp1, tsp2, anum, ns; - fprintf (stderr, - _ - ("\n\n********** beginning dump of nfa with start state %d\n"), - state1); + fprintf(stderr, + _ + ("\n\n********** beginning dump of nfa with start state %d\n"), + state1); - /* We probably should loop starting at firstst[state1] and going to + /* + * We probably should loop starting at firstst[state1] and going to * lastst[state1], but they're not maintained properly when we "or" - * all of the rules together. So we use our knowledge that the machine - * starts at state 1 and ends at lastnfa. + * all of the rules together. So we use our knowledge that the + * machine starts at state 1 and ends at lastnfa. */ /* for ( ns = firstst[state1]; ns <= lastst[state1]; ++ns ) */ for (ns = 1; ns <= lastnfa; ++ns) { - fprintf (stderr, _("state # %4d\t"), ns); + fprintf(stderr, _("state # %4d\t"), ns); sym = transchar[ns]; tsp1 = trans1[ns]; tsp2 = trans2[ns]; anum = accptnum[ns]; - fprintf (stderr, "%3d: %4d, %4d", sym, tsp1, tsp2); + fprintf(stderr, "%3d: %4d, %4d", sym, tsp1, tsp2); if (anum != NIL) - fprintf (stderr, " [%d]", anum); + fprintf(stderr, " [%d]", anum); - fprintf (stderr, "\n"); + fprintf(stderr, "\n"); } - fprintf (stderr, _("********** end of dump\n")); + fprintf(stderr, _("********** end of dump\n")); } @@ -150,30 +155,30 @@ void dumpnfa (state1) * states accessible by the arrays firstst and lastst */ -int dupmachine (mach) - int mach; +int +dupmachine(mach) + int mach; { - int i, init, state_offset; - int state = 0; - int last = lastst[mach]; + int i, init, state_offset; + int state = 0; + int last = lastst[mach]; for (i = firstst[mach]; i <= last; ++i) { - state = mkstate (transchar[i]); + state = mkstate(transchar[i]); if (trans1[i] != NO_TRANSITION) { - mkxtion (finalst[state], trans1[i] + state - i); + mkxtion(finalst[state], trans1[i] + state - i); if (transchar[i] == SYM_EPSILON && trans2[i] != NO_TRANSITION) - mkxtion (finalst[state], - trans2[i] + state - i); + mkxtion(finalst[state], + trans2[i] + state - i); } - accptnum[state] = accptnum[i]; } if (state == 0) - flexfatal (_("empty machine in dupmachine()")); + flexfatal(_("empty machine in dupmachine()")); state_offset = state - i + 1; @@ -198,103 +203,102 @@ int dupmachine (mach) * context has variable length. */ -void finish_rule (mach, variable_trail_rule, headcnt, trailcnt, - pcont_act) - int mach, variable_trail_rule, headcnt, trailcnt, pcont_act; +void +finish_rule(mach, variable_trail_rule, headcnt, trailcnt, + pcont_act) + int mach, variable_trail_rule, headcnt, trailcnt, pcont_act; { - char action_text[MAXLINE]; + char action_text[MAXLINE]; - add_accept (mach, num_rules); + add_accept(mach, num_rules); - /* We did this in new_rule(), but it often gets the wrong - * number because we do it before we start parsing the current rule. + /* + * We did this in new_rule(), but it often gets the wrong number + * because we do it before we start parsing the current rule. */ rule_linenum[num_rules] = linenum; - /* If this is a continued action, then the line-number has already + /* + * If this is a continued action, then the line-number has already * been updated, giving us the wrong number. */ if (continued_action) --rule_linenum[num_rules]; - /* If the previous rule was continued action, then we inherit the + /* + * If the previous rule was continued action, then we inherit the * previous newline flag, possibly overriding the current one. */ if (pcont_act && rule_has_nl[num_rules - 1]) rule_has_nl[num_rules] = true; - snprintf (action_text, sizeof(action_text), "case %d:\n", num_rules); - add_action (action_text); + snprintf(action_text, sizeof(action_text), "case %d:\n", num_rules); + add_action(action_text); if (rule_has_nl[num_rules]) { - snprintf (action_text, sizeof(action_text), "/* rule %d can match eol */\n", - num_rules); - add_action (action_text); + snprintf(action_text, sizeof(action_text), "/* rule %d can match eol */\n", + num_rules); + add_action(action_text); } - - if (variable_trail_rule) { rule_type[num_rules] = RULE_VARIABLE; if (performance_report > 0) - fprintf (stderr, - _ - ("Variable trailing context rule at line %d\n"), - rule_linenum[num_rules]); + fprintf(stderr, + _ + ("Variable trailing context rule at line %d\n"), + rule_linenum[num_rules]); variable_trailing_context_rules = true; - } - - else { + } else { rule_type[num_rules] = RULE_NORMAL; if (headcnt > 0 || trailcnt > 0) { - /* Do trailing context magic to not match the trailing - * characters. + /* + * Do trailing context magic to not match the + * trailing characters. */ - char *scanner_cp = "YY_G(yy_c_buf_p) = yy_cp"; - char *scanner_bp = "yy_bp"; + char *scanner_cp = "YY_G(yy_c_buf_p) = yy_cp"; + char *scanner_bp = "yy_bp"; add_action - ("*yy_cp = YY_G(yy_hold_char); /* undo effects of setting up yytext */\n"); + ("*yy_cp = YY_G(yy_hold_char); /* undo effects of setting up yytext */\n"); if (headcnt > 0) { if (rule_has_nl[num_rules]) { - snprintf (action_text, sizeof(action_text), - "YY_LINENO_REWIND_TO(%s + %d);\n", scanner_bp, headcnt); - add_action (action_text); + snprintf(action_text, sizeof(action_text), + "YY_LINENO_REWIND_TO(%s + %d);\n", scanner_bp, headcnt); + add_action(action_text); } - snprintf (action_text, sizeof(action_text), "%s = %s + %d;\n", - scanner_cp, scanner_bp, headcnt); - add_action (action_text); - } - - else { + snprintf(action_text, sizeof(action_text), "%s = %s + %d;\n", + scanner_cp, scanner_bp, headcnt); + add_action(action_text); + } else { if (rule_has_nl[num_rules]) { - snprintf (action_text, sizeof(action_text), - "YY_LINENO_REWIND_TO(yy_cp - %d);\n", trailcnt); - add_action (action_text); + snprintf(action_text, sizeof(action_text), + "YY_LINENO_REWIND_TO(yy_cp - %d);\n", trailcnt); + add_action(action_text); } - - snprintf (action_text, sizeof(action_text), "%s -= %d;\n", - scanner_cp, trailcnt); - add_action (action_text); + snprintf(action_text, sizeof(action_text), "%s -= %d;\n", + scanner_cp, trailcnt); + add_action(action_text); } add_action - ("YY_DO_BEFORE_ACTION; /* set up yytext again */\n"); + ("YY_DO_BEFORE_ACTION; /* set up yytext again */\n"); } } - /* Okay, in the action code at this point yytext and yyleng have - * their proper final values for this rule, so here's the point - * to do any user action. But don't do it for continued actions, - * as that'll result in multiple YY_RULE_SETUP's. + /* + * Okay, in the action code at this point yytext and yyleng have + * their proper final values for this rule, so here's the point to do + * any user action. But don't do it for continued actions, as + * that'll result in multiple YY_RULE_SETUP's. */ if (!continued_action) - add_action ("YY_RULE_SETUP\n"); + add_action("YY_RULE_SETUP\n"); - line_directive_out ((FILE *) 0, 1); + line_directive_out((FILE *) 0, 1); } @@ -314,8 +318,9 @@ void finish_rule (mach, variable_trail_rule, headcnt, trailcnt, * FIRST is set to new by the operation. last is unmolested. */ -int link_machines (first, last) - int first, last; +int +link_machines(first, last) + int first, last; { if (first == NIL) return last; @@ -324,10 +329,10 @@ int link_machines (first, last) return first; else { - mkxtion (finalst[first], last); + mkxtion(finalst[first], last); finalst[first] = finalst[last]; - lastst[first] = MAX (lastst[first], lastst[last]); - firstst[first] = MIN (firstst[first], firstst[last]); + lastst[first] = MAX(lastst[first], lastst[last]); + firstst[first] = MIN(firstst[first], firstst[last]); return first; } @@ -341,8 +346,9 @@ int link_machines (first, last) * The "beginning" states are the epsilon closure of the first state */ -void mark_beginning_as_normal (mach) - int mach; +void +mark_beginning_as_normal(mach) + int mach; { switch (state_type[mach]) { case STATE_NORMAL: @@ -354,16 +360,16 @@ void mark_beginning_as_normal (mach) if (transchar[mach] == SYM_EPSILON) { if (trans1[mach] != NO_TRANSITION) - mark_beginning_as_normal (trans1[mach]); + mark_beginning_as_normal(trans1[mach]); if (trans2[mach] != NO_TRANSITION) - mark_beginning_as_normal (trans2[mach]); + mark_beginning_as_normal(trans2[mach]); } break; default: - flexerror (_ - ("bad state type in mark_beginning_as_normal()")); + flexerror(_ + ("bad state type in mark_beginning_as_normal()")); break; } } @@ -383,10 +389,11 @@ void mark_beginning_as_normal (mach) * more mkbranch's. Compare with mkor() */ -int mkbranch (first, second) - int first, second; +int +mkbranch(first, second) + int first, second; { - int eps; + int eps; if (first == NO_TRANSITION) return second; @@ -394,10 +401,10 @@ int mkbranch (first, second) else if (second == NO_TRANSITION) return first; - eps = mkstate (SYM_EPSILON); + eps = mkstate(SYM_EPSILON); - mkxtion (eps, first); - mkxtion (eps, second); + mkxtion(eps, first); + mkxtion(eps, second); return eps; } @@ -411,10 +418,11 @@ int mkbranch (first, second) * new - a new state which matches the closure of "state" */ -int mkclos (state) - int state; +int +mkclos(state) + int state; { - return mkopt (mkposcl (state)); + return mkopt(mkposcl(state)); } @@ -432,24 +440,25 @@ int mkclos (state) * 2. mach is destroyed by the call */ -int mkopt (mach) - int mach; +int +mkopt(mach) + int mach; { - int eps; + int eps; - if (!SUPER_FREE_EPSILON (finalst[mach])) { - eps = mkstate (SYM_EPSILON); - mach = link_machines (mach, eps); + if (!SUPER_FREE_EPSILON(finalst[mach])) { + eps = mkstate(SYM_EPSILON); + mach = link_machines(mach, eps); } - - /* Can't skimp on the following if FREE_EPSILON(mach) is true because + /* + * Can't skimp on the following if FREE_EPSILON(mach) is true because * some state interior to "mach" might point back to the beginning * for a closure. */ - eps = mkstate (SYM_EPSILON); - mach = link_machines (eps, mach); + eps = mkstate(SYM_EPSILON); + mach = link_machines(eps, mach); - mkxtion (mach, finalst[mach]); + mkxtion(mach, finalst[mach]); return mach; } @@ -469,10 +478,11 @@ int mkopt (mach) * the number of epsilon states needed */ -int mkor (first, second) - int first, second; +int +mkor(first, second) + int first, second; { - int eps, orend; + int eps, orend; if (first == NIL) return second; @@ -481,34 +491,32 @@ int mkor (first, second) return first; else { - /* See comment in mkopt() about why we can't use the first - * state of "first" or "second" if they satisfy "FREE_EPSILON". + /* + * See comment in mkopt() about why we can't use the first + * state of "first" or "second" if they satisfy + * "FREE_EPSILON". */ - eps = mkstate (SYM_EPSILON); + eps = mkstate(SYM_EPSILON); - first = link_machines (eps, first); + first = link_machines(eps, first); - mkxtion (first, second); + mkxtion(first, second); - if (SUPER_FREE_EPSILON (finalst[first]) && + if (SUPER_FREE_EPSILON(finalst[first]) && accptnum[finalst[first]] == NIL) { orend = finalst[first]; - mkxtion (finalst[second], orend); - } - - else if (SUPER_FREE_EPSILON (finalst[second]) && - accptnum[finalst[second]] == NIL) { + mkxtion(finalst[second], orend); + } else if (SUPER_FREE_EPSILON(finalst[second]) && + accptnum[finalst[second]] == NIL) { orend = finalst[second]; - mkxtion (finalst[first], orend); - } - - else { - eps = mkstate (SYM_EPSILON); + mkxtion(finalst[first], orend); + } else { + eps = mkstate(SYM_EPSILON); - first = link_machines (first, eps); + first = link_machines(first, eps); orend = finalst[first]; - mkxtion (finalst[second], orend); + mkxtion(finalst[second], orend); } } @@ -525,20 +533,19 @@ int mkor (first, second) * new - a machine matching the positive closure of "state" */ -int mkposcl (state) - int state; +int +mkposcl(state) + int state; { - int eps; + int eps; - if (SUPER_FREE_EPSILON (finalst[state])) { - mkxtion (finalst[state], state); + if (SUPER_FREE_EPSILON(finalst[state])) { + mkxtion(finalst[state], state); return state; - } - - else { - eps = mkstate (SYM_EPSILON); - mkxtion (eps, state); - return link_machines (state, eps); + } else { + eps = mkstate(SYM_EPSILON); + mkxtion(eps, state); + return link_machines(state, eps); } } @@ -555,31 +562,30 @@ int mkposcl (state) * if "ub" is INFINITE_REPEAT then "new" matches "lb" or more occurrences of "mach" */ -int mkrep (mach, lb, ub) - int mach, lb, ub; +int +mkrep(mach, lb, ub) + int mach, lb, ub; { - int base_mach, tail, copy, i; + int base_mach, tail, copy, i; - base_mach = copysingl (mach, lb - 1); + base_mach = copysingl(mach, lb - 1); if (ub == INFINITE_REPEAT) { - copy = dupmachine (mach); - mach = link_machines (mach, - link_machines (base_mach, - mkclos (copy))); - } - - else { - tail = mkstate (SYM_EPSILON); + copy = dupmachine(mach); + mach = link_machines(mach, + link_machines(base_mach, + mkclos(copy))); + } else { + tail = mkstate(SYM_EPSILON); for (i = lb; i < ub; ++i) { - copy = dupmachine (mach); - tail = mkopt (link_machines (copy, tail)); + copy = dupmachine(mach); + tail = mkopt(link_machines(copy, tail)); } mach = - link_machines (mach, - link_machines (base_mach, tail)); + link_machines(mach, + link_machines(base_mach, tail)); } return mach; @@ -602,32 +608,32 @@ int mkrep (mach, lb, ub) * that it admittedly is) */ -int mkstate (sym) - int sym; +int +mkstate(sym) + int sym; { if (++lastnfa >= current_mns) { if ((current_mns += MNS_INCREMENT) >= maximum_mns) - lerrif (_ - ("input rules are too complicated (>= %d NFA states)"), -current_mns); + lerrif(_ + ("input rules are too complicated (>= %d NFA states)"), + current_mns); ++num_reallocs; - firstst = reallocate_integer_array (firstst, current_mns); - lastst = reallocate_integer_array (lastst, current_mns); - finalst = reallocate_integer_array (finalst, current_mns); + firstst = reallocate_integer_array(firstst, current_mns); + lastst = reallocate_integer_array(lastst, current_mns); + finalst = reallocate_integer_array(finalst, current_mns); transchar = - reallocate_integer_array (transchar, current_mns); - trans1 = reallocate_integer_array (trans1, current_mns); - trans2 = reallocate_integer_array (trans2, current_mns); + reallocate_integer_array(transchar, current_mns); + trans1 = reallocate_integer_array(trans1, current_mns); + trans2 = reallocate_integer_array(trans2, current_mns); accptnum = - reallocate_integer_array (accptnum, current_mns); + reallocate_integer_array(accptnum, current_mns); assoc_rule = - reallocate_integer_array (assoc_rule, current_mns); + reallocate_integer_array(assoc_rule, current_mns); state_type = - reallocate_integer_array (state_type, current_mns); + reallocate_integer_array(state_type, current_mns); } - firstst[lastnfa] = lastnfa; finalst[lastnfa] = lastnfa; lastst[lastnfa] = lastnfa; @@ -638,7 +644,8 @@ current_mns); assoc_rule[lastnfa] = num_rules; state_type[lastnfa] = current_state_type; - /* Fix up equivalence classes base on this transition. Note that any + /* + * Fix up equivalence classes base on this transition. Note that any * character which has its own transition gets its own equivalence * class. Thus only characters which are only in character classes * have a chance at being in the same equivalence class. E.g. "a|b" @@ -648,21 +655,20 @@ current_mns); */ if (sym < 0) { - /* We don't have to update the equivalence classes since - * that was already done when the ccl was created for the - * first time. + /* + * We don't have to update the equivalence classes since that + * was already done when the ccl was created for the first + * time. */ - } - - else if (sym == SYM_EPSILON) + } else if (sym == SYM_EPSILON) ++numeps; else { - check_char (sym); + check_char(sym); if (useecs) /* Map NUL's to csize. */ - mkechar (sym ? sym : csize, nextecm, ecgroup); + mkechar(sym ? sym : csize, nextecm, ecgroup); } return lastnfa; @@ -679,15 +685,16 @@ current_mns); * stateto - the state to which the transition is to be made */ -void mkxtion (statefrom, stateto) - int statefrom, stateto; +void +mkxtion(statefrom, stateto) + int statefrom, stateto; { if (trans1[statefrom] == NO_TRANSITION) trans1[statefrom] = stateto; else if ((transchar[statefrom] != SYM_EPSILON) || - (trans2[statefrom] != NO_TRANSITION)) - flexfatal (_("found too many transitions in mkxtion()")); + (trans2[statefrom] != NO_TRANSITION)) + flexfatal(_("found too many transitions in mkxtion()")); else { /* second out-transition for an epsilon state */ ++eps2; @@ -697,23 +704,23 @@ void mkxtion (statefrom, stateto) /* new_rule - initialize for a new rule */ -void new_rule () +void +new_rule() { if (++num_rules >= current_max_rules) { ++num_reallocs; current_max_rules += MAX_RULES_INCREMENT; - rule_type = reallocate_integer_array (rule_type, - current_max_rules); - rule_linenum = reallocate_integer_array (rule_linenum, - current_max_rules); - rule_useful = reallocate_integer_array (rule_useful, - current_max_rules); - rule_has_nl = reallocate_bool_array (rule_has_nl, - current_max_rules); + rule_type = reallocate_integer_array(rule_type, + current_max_rules); + rule_linenum = reallocate_integer_array(rule_linenum, + current_max_rules); + rule_useful = reallocate_integer_array(rule_useful, + current_max_rules); + rule_has_nl = reallocate_bool_array(rule_has_nl, + current_max_rules); } - if (num_rules > MAX_RULE) - lerrif (_("too many rules (> %d)!"), MAX_RULE); + lerrif(_("too many rules (> %d)!"), MAX_RULE); rule_linenum[num_rules] = linenum; rule_useful[num_rules] = false; |