/* $OpenBSD: reader.c,v 1.35 2024/05/21 05:00:48 jsg Exp $ */ /* $NetBSD: reader.c,v 1.5 1996/03/19 03:21:43 jtc Exp $ */ /* * Copyright (c) 1989 The Regents of the University of California. * All rights reserved. * * This code is derived from software contributed to Berkeley by * Robert Paul Corbett. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include #include "defs.h" /* The line size must be a positive integer. One hundred was chosen */ /* because few lines in Yacc input grammars exceed 100 characters. */ /* Note that if a line exceeds LINESIZE characters, the line buffer */ /* will be expanded to accommodate it. */ #define LINESIZE 100 char *cache; int cinc, cache_size; int ntags, tagmax; char **tag_table; char saw_eof, unionized; char *cptr, *line; int linesize; bucket *goal; int prec; int gensym; char last_was_action; int maxitems; bucket **pitem; int maxrules; bucket **plhs; int name_pool_size; char *name_pool; void cachec(int); void get_line(void); char *dup_line(void); void skip_comment(void); int nextc(void); int keyword(void); void copy_ident(void); void copy_text(void); void copy_union(void); bucket *get_literal(void); int is_reserved(char *); bucket *get_name(void); int get_number(void); char *get_tag(void); void declare_tokens(int); void declare_types(void); void declare_start(void); void read_declarations(void); void initialize_grammar(void); void expand_items(void); void expand_rules(void); void advance_to_start(void); void start_rule(bucket *, int); void end_rule(void); void insert_empty_rule(void); void add_symbol(void); void copy_action(void); int mark_symbol(void); void read_grammar(void); void free_tags(void); void pack_names(void); void check_symbols(void); void pack_symbols(void); void pack_grammar(void); void print_grammar(void); char line_format[] = "#line %d \"%s\"\n"; void cachec(int c) { assert(cinc >= 0); if (cinc >= cache_size) { cache_size += 256; cache = realloc(cache, cache_size); if (cache == NULL) no_space(); } cache[cinc] = c; ++cinc; } void get_line(void) { FILE *f = input_file; int c, i; if (saw_eof || (c = getc(f)) == EOF) { if (line) { free(line); line = 0; } cptr = 0; saw_eof = 1; return; } if (line == NULL || linesize != (LINESIZE + 1)) { free(line); linesize = LINESIZE + 1; line = malloc(linesize); if (line == NULL) no_space(); } i = 0; ++lineno; for (;;) { line[i] = c; if (c == '\n') { cptr = line; return; } if (++i >= linesize) { linesize += LINESIZE; line = realloc(line, linesize); if (line == NULL) no_space(); } c = getc(f); if (c == EOF) { line[i] = '\n'; saw_eof = 1; cptr = line; return; } } } char * dup_line(void) { char *p, *s, *t; if (line == NULL) return (0); s = line; while (*s != '\n') ++s; p = malloc(s - line + 1); if (p == NULL) no_space(); s = line; t = p; while ((*t++ = *s++) != '\n') continue; return (p); } void skip_comment(void) { char *s; int st_lineno = lineno; char *st_line = dup_line(); char *st_cptr = st_line + (cptr - line); s = cptr + 2; for (;;) { if (*s == '*' && s[1] == '/') { cptr = s + 2; free(st_line); return; } if (*s == '\n') { get_line(); if (line == NULL) unterminated_comment(st_lineno, st_line, st_cptr); s = cptr; } else ++s; } } int nextc(void) { char *s; if (line == NULL) { get_line(); if (line == NULL) return (EOF); } s = cptr; for (;;) { switch (*s) { case '\n': get_line(); if (line == NULL) return (EOF); s = cptr; break; case ' ': case '\t': case '\f': case '\r': case '\v': case ',': case ';': ++s; break; case '\\': cptr = s; return ('%'); case '/': if (s[1] == '*') { cptr = s; skip_comment(); s = cptr; break; } else if (s[1] == '/') { get_line(); if (line == NULL) return (EOF); s = cptr; break; } /* fall through */ default: cptr = s; return ((unsigned char) *s); } } } int keyword(void) { int c; char *t_cptr = cptr; c = (unsigned char) *++cptr; if (isalpha(c)) { cinc = 0; for (;;) { if (isalpha(c)) { if (isupper(c)) c = tolower(c); cachec(c); } else if (isdigit(c) || c == '_' || c == '.' || c == '$') cachec(c); else break; c = (unsigned char) *++cptr; } cachec(NUL); if (strcmp(cache, "token") == 0 || strcmp(cache, "term") == 0) return (TOKEN); if (strcmp(cache, "type") == 0) return (TYPE); if (strcmp(cache, "left") == 0) return (LEFT); if (strcmp(cache, "right") == 0) return (RIGHT); if (strcmp(cache, "nonassoc") == 0 || strcmp(cache, "binary") == 0) return (NONASSOC); if (strcmp(cache, "start") == 0) return (START); if (strcmp(cache, "union") == 0) return (UNION); if (strcmp(cache, "ident") == 0) return (IDENT); if (strcmp(cache, "expect") == 0) return (EXPECT); } else { ++cptr; if (c == '{') return (TEXT); if (c == '%' || c == '\\') return (MARK); if (c == '<') return (LEFT); if (c == '>') return (RIGHT); if (c == '0') return (TOKEN); if (c == '2') return (NONASSOC); } syntax_error(lineno, line, t_cptr); /* NOTREACHED */ return (0); } void copy_ident(void) { int c; FILE *f = output_file; c = nextc(); if (c == EOF) unexpected_EOF(); if (c != '"') syntax_error(lineno, line, cptr); ++outline; fprintf(f, "#ident \""); for (;;) { c = (unsigned char) *++cptr; if (c == '\n') { fprintf(f, "\"\n"); return; } putc(c, f); if (c == '"') { putc('\n', f); ++cptr; return; } } } void copy_text(void) { int c; int quote; FILE *f = text_file; int need_newline = 0; int t_lineno = lineno; char *t_line = dup_line(); char *t_cptr = t_line + (cptr - line - 2); if (*cptr == '\n') { get_line(); if (line == NULL) unterminated_text(t_lineno, t_line, t_cptr); } if (!lflag) fprintf(f, line_format, lineno, input_file_name); loop: c = (unsigned char) *cptr++; switch (c) { case '\n': next_line: putc('\n', f); need_newline = 0; get_line(); if (line) goto loop; unterminated_text(t_lineno, t_line, t_cptr); case '\'': case '"': { int s_lineno = lineno; char *s_line = dup_line(); char *s_cptr = s_line + (cptr - line - 1); quote = c; putc(c, f); for (;;) { c = (unsigned char) *cptr++; putc(c, f); if (c == quote) { need_newline = 1; free(s_line); goto loop; } if (c == '\n') unterminated_string(s_lineno, s_line, s_cptr); if (c == '\\') { c = (unsigned char) *cptr++; putc(c, f); if (c == '\n') { get_line(); if (line == NULL) unterminated_string(s_lineno, s_line, s_cptr); } } } } case '/': putc(c, f); need_newline = 1; c = (unsigned char) *cptr; if (c == '/') { putc('*', f); while ((c = (unsigned char) *++cptr) != '\n') { if (c == '*' && cptr[1] == '/') fprintf(f, "* "); else putc(c, f); } fprintf(f, "*/"); goto next_line; } if (c == '*') { int c_lineno = lineno; char *c_line = dup_line(); char *c_cptr = c_line + (cptr - line - 1); putc('*', f); ++cptr; for (;;) { c = (unsigned char) *cptr++; putc(c, f); if (c == '*' && *cptr == '/') { putc('/', f); ++cptr; free(c_line); goto loop; } if (c == '\n') { get_line(); if (line == NULL) unterminated_comment(c_lineno, c_line, c_cptr); } } } need_newline = 1; goto loop; case '%': case '\\': if (*cptr == '}') { if (need_newline) putc('\n', f); ++cptr; free(t_line); return; } /* fall through */ default: putc(c, f); need_newline = 1; goto loop; } } void copy_union(void) { int c, quote, depth; int u_lineno = lineno; char *u_line = dup_line(); char *u_cptr = u_line + (cptr - line - 6); if (unionized) over_unionized(cptr - 6); unionized = 1; if (!lflag) fprintf(text_file, line_format, lineno, input_file_name); fprintf(text_file, "#ifndef YYSTYPE_DEFINED\n"); fprintf(text_file, "#define YYSTYPE_DEFINED\n"); fprintf(text_file, "typedef union"); if (dflag) { fprintf(union_file, "#ifndef YYSTYPE_DEFINED\n"); fprintf(union_file, "#define YYSTYPE_DEFINED\n"); fprintf(union_file, "typedef union"); } depth = 0; loop: c = (unsigned char) *cptr++; putc(c, text_file); if (dflag) putc(c, union_file); switch (c) { case '\n': next_line: get_line(); if (line == NULL) unterminated_union(u_lineno, u_line, u_cptr); goto loop; case '{': ++depth; goto loop; case '}': if (--depth == 0) { fprintf(text_file, " YYSTYPE;\n"); fprintf(text_file, "#endif /* YYSTYPE_DEFINED */\n"); free(u_line); return; } goto loop; case '\'': case '"': { int s_lineno = lineno; char *s_line = dup_line(); char *s_cptr = s_line + (cptr - line - 1); quote = c; for (;;) { c = (unsigned char) *cptr++; putc(c, text_file); if (dflag) putc(c, union_file); if (c == quote) { free(s_line); goto loop; } if (c == '\n') unterminated_string(s_lineno, s_line, s_cptr); if (c == '\\') { c = (unsigned char) *cptr++; putc(c, text_file); if (dflag) putc(c, union_file); if (c == '\n') { get_line(); if (line == NULL) unterminated_string(s_lineno, s_line, s_cptr); } } } } case '/': c = (unsigned char) *cptr; if (c == '/') { putc('*', text_file); if (dflag) putc('*', union_file); while ((c = (unsigned char) *++cptr) != '\n') { if (c == '*' && cptr[1] == '/') { fprintf(text_file, "* "); if (dflag) fprintf(union_file, "* "); } else { putc(c, text_file); if (dflag) putc(c, union_file); } } fprintf(text_file, "*/\n"); if (dflag) fprintf(union_file, "*/\n"); goto next_line; } if (c == '*') { int c_lineno = lineno; char *c_line = dup_line(); char *c_cptr = c_line + (cptr - line - 1); putc('*', text_file); if (dflag) putc('*', union_file); ++cptr; for (;;) { c = (unsigned char) *cptr++; putc(c, text_file); if (dflag) putc(c, union_file); if (c == '*' && *cptr == '/') { putc('/', text_file); if (dflag) putc('/', union_file); ++cptr; free(c_line); goto loop; } if (c == '\n') { get_line(); if (line == NULL) unterminated_comment(c_lineno, c_line, c_cptr); } } } goto loop; default: goto loop; } } bucket * get_literal(void) { int c, quote, i, n; char *s; bucket *bp; int s_lineno = lineno; char *s_line = dup_line(); char *s_cptr = s_line + (cptr - line); quote = (unsigned char) *cptr++; cinc = 0; for (;;) { c = (unsigned char) *cptr++; if (c == quote) break; if (c == '\n') unterminated_string(s_lineno, s_line, s_cptr); if (c == '\\') { char *c_cptr = cptr - 1; unsigned long ulval; c = (unsigned char) *cptr++; switch (c) { case '\n': get_line(); if (line == NULL) unterminated_string(s_lineno, s_line, s_cptr); continue; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': ulval = strtoul(cptr - 1, &s, 8); if (s == cptr - 1 || ulval > MAXCHAR) illegal_character(c_cptr); c = (int) ulval; cptr = s; break; case 'x': ulval = strtoul(cptr, &s, 16); if (s == cptr || ulval > MAXCHAR) illegal_character(c_cptr); c = (int) ulval; cptr = s; break; case 'a': c = 7; break; case 'b': c = '\b'; break; case 'f': c = '\f'; break; case 'n': c = '\n'; break; case 'r': c = '\r'; break; case 't': c = '\t'; break; case 'v': c = '\v'; break; } } cachec(c); } free(s_line); n = cinc; s = malloc(n); if (s == NULL) no_space(); memcpy(s, cache, n); cinc = 0; if (n == 1) cachec('\''); else cachec('"'); for (i = 0; i < n; ++i) { c = ((unsigned char *) s)[i]; if (c == '\\' || c == cache[0]) { cachec('\\'); cachec(c); } else if (isprint(c)) cachec(c); else { cachec('\\'); switch (c) { case 7: cachec('a'); break; case '\b': cachec('b'); break; case '\f': cachec('f'); break; case '\n': cachec('n'); break; case '\r': cachec('r'); break; case '\t': cachec('t'); break; case '\v': cachec('v'); break; default: cachec(((c >> 6) & 7) + '0'); cachec(((c >> 3) & 7) + '0'); cachec((c & 7) + '0'); break; } } } if (n == 1) cachec('\''); else cachec('"'); cachec(NUL); bp = lookup(cache); bp->class = TERM; if (n == 1 && bp->value == UNDEFINED) bp->value = *(unsigned char *) s; free(s); return (bp); } int is_reserved(char *name) { char *s; if (strcmp(name, ".") == 0 || strcmp(name, "$accept") == 0 || strcmp(name, "$end") == 0) return (1); if (name[0] == '$' && name[1] == '$' && isdigit((unsigned char) name[2])) { s = name + 3; while (isdigit((unsigned char) *s)) ++s; if (*s == NUL) return (1); } return (0); } bucket * get_name(void) { int c; cinc = 0; for (c = (unsigned char) *cptr; IS_IDENT(c); c = (unsigned char) *++cptr) cachec(c); cachec(NUL); if (is_reserved(cache)) used_reserved(cache); return (lookup(cache)); } int get_number(void) { unsigned long ul; char *p; ul = strtoul(cptr, &p, 10); if (ul > INT_MAX) syntax_error(lineno, line, cptr); cptr = p; return (ul); } char * get_tag(void) { int c, i; char *s; int t_lineno = lineno; char *t_line = dup_line(); char *t_cptr = t_line + (cptr - line); ++cptr; c = nextc(); if (c == EOF) unexpected_EOF(); if (!isalpha(c) && c != '_' && c != '$') illegal_tag(t_lineno, t_line, t_cptr); cinc = 0; do { cachec(c); c = (unsigned char) *++cptr; } while (IS_IDENT(c)); cachec(NUL); c = nextc(); if (c == EOF) unexpected_EOF(); if (c != '>') illegal_tag(t_lineno, t_line, t_cptr); free(t_line); ++cptr; for (i = 0; i < ntags; ++i) { if (strcmp(cache, tag_table[i]) == 0) return (tag_table[i]); } if (ntags >= tagmax) { tagmax += 16; tag_table = reallocarray(tag_table, tagmax, sizeof(char *)); if (tag_table == NULL) no_space(); } s = malloc(cinc); if (s == NULL) no_space(); strlcpy(s, cache, cinc); tag_table[ntags] = s; ++ntags; return (s); } void declare_tokens(int assoc) { int c; bucket *bp; int value; char *tag = 0; if (assoc != TOKEN) ++prec; c = nextc(); if (c == EOF) unexpected_EOF(); if (c == '<') { tag = get_tag(); c = nextc(); if (c == EOF) unexpected_EOF(); } for (;;) { if (isalpha(c) || c == '_' || c == '.' || c == '$') bp = get_name(); else if (c == '\'' || c == '"') bp = get_literal(); else return; if (bp == goal) tokenized_start(bp->name); bp->class = TERM; if (tag) { if (bp->tag && tag != bp->tag) retyped_warning(bp->name); bp->tag = tag; } if (assoc != TOKEN) { if (bp->prec && prec != bp->prec) reprec_warning(bp->name); bp->assoc = assoc; bp->prec = prec; } c = nextc(); if (c == EOF) unexpected_EOF(); if (isdigit(c)) { value = get_number(); if (bp->value != UNDEFINED && value != bp->value) revalued_warning(bp->name); bp->value = value; c = nextc(); if (c == EOF) unexpected_EOF(); } } } /* * %expect requires special handling as it really isn't part of the yacc * grammar only a flag for yacc proper. */ static void declare_expect(int assoc) { int c; if (assoc != EXPECT) ++prec; /* * Stay away from nextc - doesn't detect EOL and will read to EOF. */ c = (unsigned char) *++cptr; if (c == EOF) unexpected_EOF(); for (;;) { if (isdigit(c)) { SRexpect = get_number(); break; } /* * Looking for number before EOL. * Spaces, tabs, and numbers are ok. * Words, punc., etc. are syntax errors. */ else if (c == '\n' || isalpha(c) || !isspace(c)) { syntax_error(lineno, line, cptr); } else { c = (unsigned char) *++cptr; if (c == EOF) unexpected_EOF(); } } } void declare_types(void) { int c; bucket *bp; char *tag; c = nextc(); if (c == EOF) unexpected_EOF(); if (c != '<') syntax_error(lineno, line, cptr); tag = get_tag(); for (;;) { c = nextc(); if (isalpha(c) || c == '_' || c == '.' || c == '$') bp = get_name(); else if (c == '\'' || c == '"') bp = get_literal(); else return; if (bp->tag && tag != bp->tag) retyped_warning(bp->name); bp->tag = tag; } } void declare_start(void) { int c; bucket *bp; c = nextc(); if (c == EOF) unexpected_EOF(); if (!isalpha(c) && c != '_' && c != '.' && c != '$') syntax_error(lineno, line, cptr); bp = get_name(); if (bp->class == TERM) terminal_start(bp->name); if (goal && goal != bp) restarted_warning(); goal = bp; } void read_declarations(void) { int c, k; cache_size = 256; cache = malloc(cache_size); if (cache == NULL) no_space(); for (;;) { c = nextc(); if (c == EOF) unexpected_EOF(); if (c != '%') syntax_error(lineno, line, cptr); switch (k = keyword()) { case MARK: return; case IDENT: copy_ident(); break; case TEXT: copy_text(); break; case UNION: copy_union(); break; case TOKEN: case LEFT: case RIGHT: case NONASSOC: declare_tokens(k); break; case EXPECT: declare_expect(k); break; case TYPE: declare_types(); break; case START: declare_start(); break; } } } void initialize_grammar(void) { nitems = 4; maxitems = 300; pitem = calloc(maxitems, sizeof(bucket *)); if (pitem == NULL) no_space(); nrules = 3; maxrules = 100; plhs = reallocarray(NULL, maxrules, sizeof(bucket *)); if (plhs == NULL) no_space(); plhs[0] = 0; plhs[1] = 0; plhs[2] = 0; rprec = reallocarray(NULL, maxrules, sizeof(short)); if (rprec == NULL) no_space(); rprec[0] = 0; rprec[1] = 0; rprec[2] = 0; rassoc = reallocarray(NULL, maxrules, sizeof(char)); if (rassoc == NULL) no_space(); rassoc[0] = TOKEN; rassoc[1] = TOKEN; rassoc[2] = TOKEN; } void expand_items(void) { int olditems = maxitems; maxitems += 300; pitem = reallocarray(pitem, maxitems, sizeof(bucket *)); if (pitem == NULL) no_space(); memset(pitem + olditems, 0, (maxitems - olditems) * sizeof(bucket *)); } void expand_rules(void) { maxrules += 100; plhs = reallocarray(plhs, maxrules, sizeof(bucket *)); if (plhs == NULL) no_space(); rprec = reallocarray(rprec, maxrules, sizeof(short)); if (rprec == NULL) no_space(); rassoc = reallocarray(rassoc, maxrules, sizeof(char)); if (rassoc == NULL) no_space(); } void advance_to_start(void) { int c; bucket *bp; char *s_cptr; int s_lineno; for (;;) { c = nextc(); if (c != '%') break; s_cptr = cptr; switch (keyword()) { case MARK: no_grammar(); case TEXT: copy_text(); break; case START: declare_start(); break; default: syntax_error(lineno, line, s_cptr); } } c = nextc(); if (!isalpha(c) && c != '_' && c != '.' && c != '_') syntax_error(lineno, line, cptr); bp = get_name(); if (goal == NULL) { if (bp->class == TERM) terminal_start(bp->name); goal = bp; } s_lineno = lineno; c = nextc(); if (c == EOF) unexpected_EOF(); if (c != ':') syntax_error(lineno, line, cptr); start_rule(bp, s_lineno); ++cptr; } void start_rule(bucket * bp, int s_lineno) { if (bp->class == TERM) terminal_lhs(s_lineno); bp->class = NONTERM; if (nrules >= maxrules) expand_rules(); plhs[nrules] = bp; rprec[nrules] = UNDEFINED; rassoc[nrules] = TOKEN; } void end_rule(void) { int i; if (!last_was_action && plhs[nrules]->tag) { for (i = nitems - 1; pitem[i]; --i) continue; if (i == maxitems - 1 || pitem[i + 1] == 0 || pitem[i + 1]->tag != plhs[nrules]->tag) default_action_warning(); } last_was_action = 0; if (nitems >= maxitems) expand_items(); pitem[nitems] = 0; ++nitems; ++nrules; } void insert_empty_rule(void) { bucket *bp, **bpp; assert(cache); snprintf(cache, cache_size, "$$%d", ++gensym); bp = make_bucket(cache); last_symbol->next = bp; last_symbol = bp; bp->tag = plhs[nrules]->tag; bp->class = NONTERM; if ((nitems += 2) > maxitems) expand_items(); bpp = pitem + nitems - 1; *bpp-- = bp; while ((bpp[0] = bpp[-1])) --bpp; if (++nrules >= maxrules) expand_rules(); plhs[nrules] = plhs[nrules - 1]; plhs[nrules - 1] = bp; rprec[nrules] = rprec[nrules - 1]; rprec[nrules - 1] = 0; rassoc[nrules] = rassoc[nrules - 1]; rassoc[nrules - 1] = TOKEN; } void add_symbol(void) { int c; bucket *bp; int s_lineno = lineno; c = (unsigned char) *cptr; if (c == '\'' || c == '"') bp = get_literal(); else bp = get_name(); c = nextc(); if (c == ':') { end_rule(); start_rule(bp, s_lineno); ++cptr; return; } if (last_was_action) insert_empty_rule(); last_was_action = 0; if (++nitems > maxitems) expand_items(); pitem[nitems - 1] = bp; } void copy_action(void) { int c, i, n, depth, quote; char *tag; FILE *f = action_file; int a_lineno = lineno; char *a_line = dup_line(); char *a_cptr = a_line + (cptr - line); if (last_was_action) insert_empty_rule(); last_was_action = 1; fprintf(f, "case %d:\n", nrules - 2); if (!lflag) fprintf(f, line_format, lineno, input_file_name); if (*cptr == '=') ++cptr; n = 0; for (i = nitems - 1; pitem[i]; --i) ++n; depth = 0; loop: c = (unsigned char) *cptr; if (c == '$') { if (cptr[1] == '<') { int d_lineno = lineno; char *d_line = dup_line(); char *d_cptr = d_line + (cptr - line); ++cptr; tag = get_tag(); c = (unsigned char) *cptr; if (c == '$') { fprintf(f, "yyval.%s", tag); ++cptr; free(d_line); goto loop; } else if (isdigit(c)) { i = get_number(); if (i > n) dollar_warning(d_lineno, i); fprintf(f, "yyvsp[%d].%s", i - n, tag); free(d_line); goto loop; } else if (c == '-' && isdigit((unsigned char) cptr[1])) { ++cptr; i = -get_number() - n; fprintf(f, "yyvsp[%d].%s", i, tag); free(d_line); goto loop; } else dollar_error(d_lineno, d_line, d_cptr); } else if (cptr[1] == '$') { if (ntags) { tag = plhs[nrules]->tag; if (tag == NULL) untyped_lhs(); fprintf(f, "yyval.%s", tag); } else fprintf(f, "yyval"); cptr += 2; goto loop; } else if (isdigit((unsigned char) cptr[1])) { ++cptr; i = get_number(); if (ntags) { if (i <= 0 || i > n) unknown_rhs(i); tag = pitem[nitems + i - n - 1]->tag; if (tag == NULL) untyped_rhs(i, pitem[nitems + i - n - 1]->name); fprintf(f, "yyvsp[%d].%s", i - n, tag); } else { if (i > n) dollar_warning(lineno, i); fprintf(f, "yyvsp[%d]", i - n); } goto loop; } else if (cptr[1] == '-') { cptr += 2; i = get_number(); if (ntags) unknown_rhs(-i); fprintf(f, "yyvsp[%d]", -i - n); goto loop; } } if (isalpha(c) || c == '_' || c == '$') { do { putc(c, f); c = (unsigned char) *++cptr; } while (isalnum(c) || c == '_' || c == '$'); goto loop; } putc(c, f); ++cptr; switch (c) { case '\n': next_line: get_line(); if (line) goto loop; unterminated_action(a_lineno, a_line, a_cptr); case ';': if (depth > 0) goto loop; fprintf(f, "\nbreak;\n"); free(a_line); return; case '{': ++depth; goto loop; case '}': if (--depth > 0) goto loop; fprintf(f, "\nbreak;\n"); free(a_line); return; case '\'': case '"': { int s_lineno = lineno; char *s_line = dup_line(); char *s_cptr = s_line + (cptr - line - 1); quote = c; for (;;) { c = (unsigned char) *cptr++; putc(c, f); if (c == quote) { free(s_line); goto loop; } if (c == '\n') unterminated_string(s_lineno, s_line, s_cptr); if (c == '\\') { c = (unsigned char) *cptr++; putc(c, f); if (c == '\n') { get_line(); if (line == NULL) unterminated_string(s_lineno, s_line, s_cptr); } } } } case '/': c = (unsigned char) *cptr; if (c == '/') { putc('*', f); while ((c = (unsigned char) *++cptr) != '\n') { if (c == '*' && cptr[1] == '/') fprintf(f, "* "); else putc(c, f); } fprintf(f, "*/\n"); goto next_line; } if (c == '*') { int c_lineno = lineno; char *c_line = dup_line(); char *c_cptr = c_line + (cptr - line - 1); putc('*', f); ++cptr; for (;;) { c = (unsigned char) *cptr++; putc(c, f); if (c == '*' && *cptr == '/') { putc('/', f); ++cptr; free(c_line); goto loop; } if (c == '\n') { get_line(); if (line == NULL) unterminated_comment(c_lineno, c_line, c_cptr); } } } goto loop; default: goto loop; } } int mark_symbol(void) { int c; bucket *bp = NULL; c = (unsigned char) cptr[1]; if (c == '%' || c == '\\') { cptr += 2; return (1); } if (c == '=') cptr += 2; else if ((c == 'p' || c == 'P') && ((c = cptr[2]) == 'r' || c == 'R') && ((c = cptr[3]) == 'e' || c == 'E') && ((c = cptr[4]) == 'c' || c == 'C') && ((c = (unsigned char) cptr[5], !IS_IDENT(c)))) cptr += 5; else syntax_error(lineno, line, cptr); c = nextc(); if (isalpha(c) || c == '_' || c == '.' || c == '$') bp = get_name(); else if (c == '\'' || c == '"') bp = get_literal(); else { syntax_error(lineno, line, cptr); /* NOTREACHED */ } if (rprec[nrules] != UNDEFINED && bp->prec != rprec[nrules]) prec_redeclared(); rprec[nrules] = bp->prec; rassoc[nrules] = bp->assoc; return (0); } void read_grammar(void) { int c; initialize_grammar(); advance_to_start(); for (;;) { c = nextc(); if (c == EOF) break; if (isalpha(c) || c == '_' || c == '.' || c == '$' || c == '\'' || c == '"') add_symbol(); else if (c == '{' || c == '=') copy_action(); else if (c == '|') { end_rule(); start_rule(plhs[nrules - 1], 0); ++cptr; } else if (c == '%') { if (mark_symbol()) break; } else syntax_error(lineno, line, cptr); } end_rule(); } void free_tags(void) { int i; if (tag_table == NULL) return; for (i = 0; i < ntags; ++i) { assert(tag_table[i]); free(tag_table[i]); } free(tag_table); } void pack_names(void) { bucket *bp; char *p, *s, *t; name_pool_size = 13; /* 13 == sizeof("$end") + sizeof("$accept") */ for (bp = first_symbol; bp; bp = bp->next) name_pool_size += strlen(bp->name) + 1; name_pool = malloc(name_pool_size); if (name_pool == NULL) no_space(); strlcpy(name_pool, "$accept", name_pool_size); strlcpy(name_pool + 8, "$end", name_pool_size - 8); t = name_pool + 13; for (bp = first_symbol; bp; bp = bp->next) { p = t; s = bp->name; while ((*t++ = *s++)) continue; free(bp->name); bp->name = p; } } void check_symbols(void) { bucket *bp; if (goal->class == UNKNOWN) undefined_goal(goal->name); for (bp = first_symbol; bp; bp = bp->next) { if (bp->class == UNKNOWN) { undefined_symbol_warning(bp->name); bp->class = TERM; } } } void pack_symbols(void) { bucket *bp; bucket **v; int i, j, k, n; nsyms = 2; ntokens = 1; for (bp = first_symbol; bp; bp = bp->next) { ++nsyms; if (bp->class == TERM) ++ntokens; } start_symbol = ntokens; nvars = nsyms - ntokens; symbol_name = reallocarray(NULL, nsyms, sizeof(char *)); if (symbol_name == NULL) no_space(); symbol_value = reallocarray(NULL, nsyms, sizeof(short)); if (symbol_value == NULL) no_space(); symbol_prec = reallocarray(NULL, nsyms, sizeof(short)); if (symbol_prec == NULL) no_space(); symbol_assoc = malloc(nsyms); if (symbol_assoc == NULL) no_space(); v = reallocarray(NULL, nsyms, sizeof(bucket *)); if (v == NULL) no_space(); v[0] = 0; v[start_symbol] = 0; i = 1; j = start_symbol + 1; for (bp = first_symbol; bp; bp = bp->next) { if (bp->class == TERM) v[i++] = bp; else v[j++] = bp; } assert(i == ntokens && j == nsyms); for (i = 1; i < ntokens; ++i) v[i]->index = i; goal->index = start_symbol + 1; k = start_symbol + 2; while (++i < nsyms) if (v[i] != goal) { v[i]->index = k; ++k; } goal->value = 0; k = 1; for (i = start_symbol + 1; i < nsyms; ++i) { if (v[i] != goal) { v[i]->value = k; ++k; } } k = 0; for (i = 1; i < ntokens; ++i) { n = v[i]->value; if (n > 256) { for (j = k++; j > 0 && symbol_value[j - 1] > n; --j) symbol_value[j] = symbol_value[j - 1]; symbol_value[j] = n; } } if (v[1]->value == UNDEFINED) v[1]->value = 256; j = 0; n = 257; for (i = 2; i < ntokens; ++i) { if (v[i]->value == UNDEFINED) { while (j < k && n == symbol_value[j]) { while (++j < k && n == symbol_value[j]) continue; ++n; } v[i]->value = n; ++n; } } symbol_name[0] = name_pool + 8; symbol_value[0] = 0; symbol_prec[0] = 0; symbol_assoc[0] = TOKEN; for (i = 1; i < ntokens; ++i) { symbol_name[i] = v[i]->name; symbol_value[i] = v[i]->value; symbol_prec[i] = v[i]->prec; symbol_assoc[i] = v[i]->assoc; } symbol_name[start_symbol] = name_pool; symbol_value[start_symbol] = -1; symbol_prec[start_symbol] = 0; symbol_assoc[start_symbol] = TOKEN; for (++i; i < nsyms; ++i) { k = v[i]->index; symbol_name[k] = v[i]->name; symbol_value[k] = v[i]->value; symbol_prec[k] = v[i]->prec; symbol_assoc[k] = v[i]->assoc; } free(v); } void pack_grammar(void) { int i, j; int assoc, pprec; ritem = reallocarray(NULL, nitems, sizeof(short)); if (ritem == NULL) no_space(); rlhs = reallocarray(NULL, nrules, sizeof(short)); if (rlhs == NULL) no_space(); rrhs = reallocarray(NULL, nrules + 1, sizeof(short)); if (rrhs == NULL) no_space(); rprec = reallocarray(rprec, nrules, sizeof(short)); if (rprec == NULL) no_space(); rassoc = realloc(rassoc, nrules); if (rassoc == NULL) no_space(); ritem[0] = -1; ritem[1] = goal->index; ritem[2] = 0; ritem[3] = -2; rlhs[0] = 0; rlhs[1] = 0; rlhs[2] = start_symbol; rrhs[0] = 0; rrhs[1] = 0; rrhs[2] = 1; j = 4; for (i = 3; i < nrules; ++i) { rlhs[i] = plhs[i]->index; rrhs[i] = j; assoc = TOKEN; pprec = 0; while (pitem[j]) { ritem[j] = pitem[j]->index; if (pitem[j]->class == TERM) { pprec = pitem[j]->prec; assoc = pitem[j]->assoc; } ++j; } ritem[j] = -i; ++j; if (rprec[i] == UNDEFINED) { rprec[i] = pprec; rassoc[i] = assoc; } } rrhs[i] = j; free(plhs); free(pitem); } void print_grammar(void) { int i, j, k; int spacing = 0; FILE *f = verbose_file; if (!vflag) return; k = 1; for (i = 2; i < nrules; ++i) { if (rlhs[i] != rlhs[i - 1]) { if (i != 2) fprintf(f, "\n"); fprintf(f, "%4d %s :", i - 2, symbol_name[rlhs[i]]); spacing = strlen(symbol_name[rlhs[i]]) + 1; } else { fprintf(f, "%4d ", i - 2); j = spacing; while (--j >= 0) putc(' ', f); putc('|', f); } while (ritem[k] >= 0) { fprintf(f, " %s", symbol_name[ritem[k]]); ++k; } ++k; putc('\n', f); } } void reader(void) { write_section(banner); create_symbol_table(); read_declarations(); read_grammar(); free_symbol_table(); free_tags(); pack_names(); check_symbols(); pack_symbols(); pack_grammar(); free_symbols(); print_grammar(); }