/* $OpenBSD: comp_scan.c,v 1.1 1998/07/23 21:17:26 millert Exp $ */ /**************************************************************************** * Copyright (c) 1998 Free Software Foundation, Inc. * * * * Permission is hereby granted, free of charge, to any person obtaining a * * copy of this software and associated documentation files (the * * "Software"), to deal in the Software without restriction, including * * without limitation the rights to use, copy, modify, merge, publish, * * distribute, distribute with modifications, sublicense, and/or sell * * copies of the Software, and to permit persons to whom the Software is * * furnished to do so, subject to the following conditions: * * * * The above copyright notice and this permission notice shall be included * * in all copies or substantial portions of the Software. * * * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. * * IN NO EVENT SHALL THE ABOVE COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, * * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR * * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR * * THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * * * Except as contained in this notice, the name(s) of the above copyright * * holders shall not be used in advertising or otherwise to promote the * * sale, use or other dealings in this Software without prior written * * authorization. * ****************************************************************************/ /**************************************************************************** * Author: Zeyd M. Ben-Halim 1992,1995 * * and: Eric S. Raymond * ****************************************************************************/ /* * comp_scan.c --- Lexical scanner for terminfo compiler. * * _nc_reset_input() * _nc_get_token() * _nc_panic_mode() * int _nc_syntax; * int _nc_curr_line; * long _nc_curr_file_pos; * long _nc_comment_start; * long _nc_comment_end; */ #include #include #include MODULE_ID("$From: comp_scan.c,v 1.33 1998/05/16 22:48:23 tom Exp $") /* * Maximum length of string capability we'll accept before raising an error. * Yes, there is a real capability in /etc/termcap this long, an "is". */ #define MAXCAPLEN 600 #define iswhite(ch) (ch == ' ' || ch == '\t') int _nc_syntax; /* termcap or terminfo? */ long _nc_curr_file_pos; /* file offset of current line */ long _nc_comment_start; /* start of comment range before name */ long _nc_comment_end; /* end of comment range before name */ long _nc_start_line; /* start line of current entry */ /***************************************************************************** * * Token-grabbing machinery * *****************************************************************************/ static bool first_column; /* See 'next_char()' below */ static char separator; /* capability separator */ static int pushtype; /* type of pushback token */ static char pushname[MAX_NAME_SIZE+1]; static int last_char(void); static int next_char(void); static long stream_pos(void); static bool end_of_stream(void); static void push_back(char c); /* Assume we may be looking at a termcap-style continuation */ static inline int eat_escaped_newline(int ch) { if (ch == '\\') while ((ch = next_char()) == '\n' || iswhite(ch)) continue; return ch; } /* * int * get_token() * * Scans the input for the next token, storing the specifics in the * global structure 'curr_token' and returning one of the following: * * NAMES A line beginning in column 1. 'name' * will be set to point to everything up to but * not including the first separator on the line. * BOOLEAN An entry consisting of a name followed by * a separator. 'name' will be set to point to * the name of the capability. * NUMBER An entry of the form * name#digits, * 'name' will be set to point to the capability * name and 'valnumber' to the number given. * STRING An entry of the form * name=characters, * 'name' is set to the capability name and * 'valstring' to the string of characters, with * input translations done. * CANCEL An entry of the form * name@, * 'name' is set to the capability name and * 'valnumber' to -1. * EOF The end of the file has been reached. * * A `separator' is either a comma or a semicolon, depending on whether * we are in termcap or terminfo mode. * */ int _nc_get_token(void) { static const char terminfo_punct[] = "@%&*!#"; long number; int type; int ch; bool found; static char buffer[MAX_ENTRY_SIZE]; char *ptr; int dot_flag = FALSE; long token_start; if (pushtype != NO_PUSHBACK) { int retval = pushtype; _nc_set_type(pushname); DEBUG(3, ("pushed-back token: `%s', class %d", _nc_curr_token.tk_name, pushtype)); pushtype = NO_PUSHBACK; pushname[0] = '\0'; /* currtok wasn't altered by _nc_push_token() */ return(retval); } if (end_of_stream()) return(EOF); start_token: token_start = stream_pos(); while ((ch = next_char()) == '\n' || iswhite(ch)) continue; ch = eat_escaped_newline(ch); if (ch == EOF) type = EOF; else { /* if this is a termcap entry, skip a leading separator */ if (separator == ':' && ch == ':') ch = next_char(); if (ch == '.') { dot_flag = TRUE; DEBUG(8, ("dot-flag set")); while ((ch = next_char())=='.' || iswhite(ch)) continue; } if (ch == EOF) { type = EOF; goto end_of_token; } /* have to make some punctuation chars legal for terminfo */ if (!isalnum(ch) && !strchr(terminfo_punct, (char)ch)) { _nc_warning("Illegal character (expected alphanumeric or %s) - %s", terminfo_punct, _tracechar((chtype)ch)); _nc_panic_mode(separator); goto start_token; } ptr = buffer; *(ptr++) = ch; if (first_column) { char *desc; _nc_comment_start = token_start; _nc_comment_end = _nc_curr_file_pos; _nc_start_line = _nc_curr_line; _nc_syntax = ERR; while ((ch = next_char()) != '\n') { if (ch == EOF) _nc_err_abort("premature EOF"); else if (ch == ':' && last_char() != ',') { _nc_syntax = SYN_TERMCAP; separator = ':'; break; } else if (ch == ',') { _nc_syntax = SYN_TERMINFO; separator = ','; /* * Fall-through here is not an accident. * The idea is that if we see a comma, we * figure this is terminfo unless we * subsequently run into a colon -- but * we don't stop looking for that colon until * hitting a newline. This allows commas to * be embedded in description fields of * either syntax. */ /* FALLTHRU */ } else ch = eat_escaped_newline(ch); *ptr++ = ch; } ptr[0] = '\0'; if (_nc_syntax == ERR) { /* * Grrr...what we ought to do here is barf, * complaining that the entry is malformed. * But because a couple of name fields in the * 8.2 termcap file end with |\, we just have * to assume it's termcap syntax. */ _nc_syntax = SYN_TERMCAP; separator = ':'; } else if (_nc_syntax == SYN_TERMINFO) { /* throw away trailing /, *$/ */ for (--ptr; iswhite(*ptr) || *ptr == ','; ptr--) continue; ptr[1] = '\0'; } /* * This is the soonest we have the terminal name * fetched. Set up for following warning messages. */ ptr = strchr(buffer, '|'); if (ptr == (char *)NULL) ptr = buffer + strlen(buffer); ch = *ptr; *ptr = '\0'; _nc_set_type(buffer); *ptr = ch; /* * Compute the boundary between the aliases and the * description field for syntax-checking purposes. */ desc = strrchr(buffer, '|'); if (desc) { if (*desc == '\0') _nc_warning("empty longname field"); else if (strchr(desc, ' ') == (char *)NULL) _nc_warning("older tic versions may treat the description field as an alias"); } if (!desc) desc = buffer + strlen(buffer); /* * Whitespace in a name field other than the long name * can confuse rdist and some termcap tools. Slashes * are a no-no. Other special characters can be * dangerous due to shell expansion. */ for (ptr = buffer; ptr < desc; ptr++) { if (isspace(*ptr)) { _nc_warning("whitespace in name or alias field"); break; } else if (*ptr == '/') { _nc_warning("slashes aren't allowed in names or aliases"); break; } else if (strchr("$[]!*?", *ptr)) { _nc_warning("dubious character `%c' in name or alias field", *ptr); break; } } ptr = buffer; _nc_curr_token.tk_name = buffer; type = NAMES; } else { while ((ch = next_char()) != EOF) { if (!isalnum(ch)) { if (_nc_syntax == SYN_TERMINFO) { if (ch != '_') break; } else { /* allow ';' for "k;" */ if (ch != ';') break; } } *(ptr++) = ch; } *ptr++ = '\0'; switch (ch) { case ',': case ':': if (ch != separator) _nc_err_abort("Separator inconsistent with syntax"); _nc_curr_token.tk_name = buffer; type = BOOLEAN; break; case '@': if ((ch = next_char()) != separator) _nc_warning("Missing separator after `%s', have %s", buffer, _tracechar((chtype)ch)); _nc_curr_token.tk_name = buffer; type = CANCEL; break; case '#': number = 0; found = FALSE; while (isdigit(ch = next_char())) { number = number * 10 + ch - '0'; found = TRUE; } if (found == FALSE) _nc_warning("no value given for `%s'", buffer); if (ch != separator) _nc_warning("Missing separator"); _nc_curr_token.tk_name = buffer; _nc_curr_token.tk_valnumber = number; type = NUMBER; break; case '=': ch = _nc_trans_string(ptr); if (ch != separator) _nc_warning("Missing separator"); _nc_curr_token.tk_name = buffer; _nc_curr_token.tk_valstring = ptr; type = STRING; break; case EOF: type = EOF; break; default: /* just to get rid of the compiler warning */ type = UNDEF; _nc_warning("Illegal character - %s", _tracechar((chtype)ch)); } } /* end else (first_column == FALSE) */ } /* end else (ch != EOF) */ end_of_token: #ifdef TRACE if (dot_flag == TRUE) DEBUG(8, ("Commented out ")); if (_nc_tracing & TRACE_IEVENT) { fprintf(stderr, "Token: "); switch (type) { case BOOLEAN: fprintf(stderr, "Boolean; name='%s'\n", _nc_curr_token.tk_name); break; case NUMBER: fprintf(stderr, "Number; name='%s', value=%d\n", _nc_curr_token.tk_name, _nc_curr_token.tk_valnumber); break; case STRING: fprintf(stderr, "String; name='%s', value=%s\n", _nc_curr_token.tk_name, _nc_visbuf(_nc_curr_token.tk_valstring)); break; case CANCEL: fprintf(stderr, "Cancel; name='%s'\n", _nc_curr_token.tk_name); break; case NAMES: fprintf(stderr, "Names; value='%s'\n", _nc_curr_token.tk_name); break; case EOF: fprintf(stderr, "End of file\n"); break; default: _nc_warning("Bad token type"); } } #endif if (dot_flag == TRUE) /* if commented out, use the next one */ type = _nc_get_token(); DEBUG(3, ("token: `%s', class %d", _nc_curr_token.tk_name, type)); return(type); } /* * char * trans_string(ptr) * * Reads characters using next_char() until encountering a separator, nl, * or end-of-file. The returned value is the character which caused * reading to stop. The following translations are done on the input: * * ^X goes to ctrl-X (i.e. X & 037) * {\E,\n,\r,\b,\t,\f} go to * {ESCAPE,newline,carriage-return,backspace,tab,formfeed} * {\^,\\} go to {carat,backslash} * \ddd (for ddd = up to three octal digits) goes to the character ddd * * \e == \E * \0 == \200 * */ char _nc_trans_string(char *ptr) { int count = 0; int number; int i, c; chtype ch, last_ch = '\0'; bool ignored = FALSE; while ((ch = c = next_char()) != (chtype)separator && c != EOF) { if ((_nc_syntax == SYN_TERMCAP) && c == '\n') break; if (ch == '^' && last_ch != '%') { ch = c = next_char(); if (c == EOF) _nc_err_abort("Premature EOF"); if (! (is7bits(ch) && isprint(ch))) { _nc_warning("Illegal ^ character - %s", _tracechar((unsigned char)ch)); } if (ch == '?') { *(ptr++) = '\177'; } else { if ((ch &= 037) == 0) ch = 128; *(ptr++) = (char)(ch); } } else if (ch == '\\') { ch = c = next_char(); if (c == EOF) _nc_err_abort("Premature EOF"); if (ch >= '0' && ch <= '7') { number = ch - '0'; for (i=0; i < 2; i++) { ch = c = next_char(); if (c == EOF) _nc_err_abort("Premature EOF"); if (c < '0' || c > '7') { if (isdigit(c)) { _nc_warning("Non-octal digit `%c' in \\ sequence", c); /* allow the digit; it'll do less harm */ } else { push_back((char)c); break; } } number = number * 8 + c - '0'; } if (number == 0) number = 0200; *(ptr++) = (char) number; } else { switch (c) { case 'E': case 'e': *(ptr++) = '\033'; break; case 'a': *(ptr++) = '\007'; break; case 'l': case 'n': *(ptr++) = '\n'; break; case 'r': *(ptr++) = '\r'; break; case 'b': *(ptr++) = '\010'; break; case 's': *(ptr++) = ' '; break; case 'f': *(ptr++) = '\014'; break; case 't': *(ptr++) = '\t'; break; case '\\': *(ptr++) = '\\'; break; case '^': *(ptr++) = '^'; break; case ',': *(ptr++) = ','; break; case ':': *(ptr++) = ':'; break; case '\n': continue; default: _nc_warning("Illegal character %s in \\ sequence", _tracechar((unsigned char)ch)); *(ptr++) = (char)ch; } /* endswitch (ch) */ } /* endelse (ch < '0' || ch > '7') */ } /* end else if (ch == '\\') */ else if (ch == '\n' && (_nc_syntax == SYN_TERMINFO)) { /* newlines embedded in a terminfo string are ignored */ ignored = TRUE; } else { *(ptr++) = (char)ch; } if (!ignored) { last_ch = ch; count ++; } ignored = FALSE; if (count > MAXCAPLEN) _nc_warning("Very long string found. Missing separator?"); } /* end while */ *ptr = '\0'; return(ch); } /* * _nc_push_token() * * Push a token of given type so that it will be reread by the next * get_token() call. */ void _nc_push_token(int tokclass) { /* * This implementation is kind of bogus, it will fail if we ever do * more than one pushback at a time between get_token() calls. It * relies on the fact that curr_tok is static storage that nothing * but get_token() touches. */ pushtype = tokclass; _nc_get_type(pushname); DEBUG(3, ("pushing token: `%s', class %d", _nc_curr_token.tk_name, pushtype)); } /* * Panic mode error recovery - skip everything until a "ch" is found. */ void _nc_panic_mode(char ch) { int c; for (;;) { c = next_char(); if (c == ch) return; if (c == EOF) return; } } /***************************************************************************** * * Character-stream handling * *****************************************************************************/ #define LEXBUFSIZ 1024 static char *bufptr; /* otherwise, the input buffer pointer */ static char *bufstart; /* start of buffer so we can compute offsets */ static FILE *yyin; /* scanner's input file descriptor */ /* * _nc_reset_input() * * Resets the input-reading routines. Used on initialization, * or after a seek has been done. Exactly one argument must be * non-null. */ void _nc_reset_input(FILE *fp, char *buf) { pushtype = NO_PUSHBACK; pushname[0] = '\0'; yyin = fp; bufstart = bufptr = buf; _nc_curr_file_pos = 0L; if (fp != 0) _nc_curr_line = 0; _nc_curr_col = 0; } /* * int last_char() * * Returns the final nonblank character on the current input buffer */ static int last_char(void) { size_t len = strlen(bufptr); while (len--) { if (!isspace(bufptr[len])) return bufptr[len]; } return 0; } /* * int next_char() * * Returns the next character in the input stream. Comments and leading * white space are stripped. * * The global state variable 'firstcolumn' is set TRUE if the character * returned is from the first column of the input line. * * The global variable _nc_curr_line is incremented for each new line. * The global variable _nc_curr_file_pos is set to the file offset of the * beginning of each line. */ static int next_char(void) { if (!yyin) { if (*bufptr == '\0') return(EOF); if (*bufptr == '\n') { _nc_curr_line++; _nc_curr_col = 0; } } else if (!bufptr || !*bufptr) { /* * In theory this could be recoded to do its I/O one * character at a time, saving the buffer space. In * practice, this turns out to be quite hard to get * completely right. Try it and see. If you succeed, * don't forget to hack push_back() correspondingly. */ static char line[LEXBUFSIZ]; size_t len; do { _nc_curr_file_pos = ftell(yyin); if ((bufstart = fgets(line, LEXBUFSIZ, yyin)) != NULL) { _nc_curr_line++; _nc_curr_col = 0; } bufptr = bufstart; } while (bufstart != NULL && line[0] == '#'); if (bufstart == NULL) return (EOF); while (iswhite(*bufptr)) bufptr++; /* * Treat a trailing the same as a so we can read * files on OS/2, etc. */ if ((len = strlen(bufptr)) > 1) { if (bufptr[len-1] == '\n' && bufptr[len-2] == '\r') { bufptr[len-2] = '\n'; bufptr[len-1] = '\0'; } } } first_column = (bufptr == bufstart); _nc_curr_col++; return(*bufptr++); } static void push_back(char c) /* push a character back onto the input stream */ { if (bufptr == bufstart) _nc_syserr_abort("Can't backspace off beginning of line"); *--bufptr = c; } static long stream_pos(void) /* return our current character position in the input stream */ { return (yyin ? ftell(yyin) : (bufptr ? bufptr - bufstart : 0)); } static bool end_of_stream(void) /* are we at end of input? */ { return ((yyin ? feof(yyin) : (bufptr && *bufptr == '\0')) ? TRUE : FALSE); } /* comp_scan.c ends here */