From ec3c5af51bfcb51f976b184af54d6b0c12b077f3 Mon Sep 17 00:00:00 2001 From: Jason Downs Date: Sun, 13 Oct 1996 21:32:24 +0000 Subject: Update to version 5.2.11. --- bin/pdksh/lex.c | 186 ++++++++++++++++++++++++++++++++++++-------------------- 1 file changed, 121 insertions(+), 65 deletions(-) (limited to 'bin/pdksh/lex.c') diff --git a/bin/pdksh/lex.c b/bin/pdksh/lex.c index 356d164e935..6c11aa00c11 100644 --- a/bin/pdksh/lex.c +++ b/bin/pdksh/lex.c @@ -1,4 +1,4 @@ -/* $OpenBSD: lex.c,v 1.3 1996/10/01 02:05:42 downsj Exp $ */ +/* $OpenBSD: lex.c,v 1.4 1996/10/13 21:32:19 downsj Exp $ */ /* * lexical analysis and source input @@ -8,20 +8,22 @@ #include static void readhere ARGS((struct ioword *iop)); -static int getsc_ ARGS((void)); +static int getsc__ ARGS((void)); static void getsc_line ARGS((Source *s)); static char *get_brace_var ARGS((XString *wsp, char *wp)); static int arraysub ARGS((char **strp)); -static const char *ungetsc_ ARGS((int c)); -static int getsc_bn_ ARGS((void)); +static const char *ungetsc ARGS((int c)); +static int getsc_bn ARGS((void)); +static void gethere ARGS((void)); -static void gethere ARGS((void)); +static int backslash_skip; +static int ignore_backslash_newline; -/* optimized getsc_() */ -#define getsc() ((*source->str != '\0') ? *source->str++ : getsc_()) -#define getsc_bn() (*source->str != '\0' && *source->str != '\\' \ - ? *source->str++ : getsc_bn_()) -#define ungetsc(c) (source->str > source->start ? source->str-- : ungetsc_(c)) +/* optimized getsc_bn() */ +#define getsc() (*source->str != '\0' && *source->str != '\\' \ + && !backslash_skip ? *source->str++ : getsc_bn()) +/* optimized getsc__() */ +#define getsc_() ((*source->str != '\0') ? *source->str++ : getsc__()) /* @@ -52,6 +54,9 @@ yylex(cf) Again: Xinit(ws, wp, 64, ATEMP); + backslash_skip = 0; + ignore_backslash_newline = 0; + if (cf&ONEWORD) istate = SWORD; #ifdef KSH @@ -65,9 +70,12 @@ yylex(cf) istate = (cf & HEREDELIM) ? SHEREDELIM : SBASE; while ((c = getsc()) == ' ' || c == '\t') ; - if (c == '#') + if (c == '#') { + ignore_backslash_newline++; while ((c = getsc()) != '\0' && c != '\n') ; + ignore_backslash_newline--; + } ungetsc(c); } if (source->flags & SF_ALIAS) { /* trailing ' ' in alias definition */ @@ -139,23 +147,19 @@ yylex(cf) switch (c) { case '\\': c = getsc(); - if (c != '\n') { #ifdef OS2 - if (isalnum(c)) { - *wp++ = CHAR, *wp++ = '\\'; - *wp++ = CHAR, *wp++ = c; - } else + if (isalnum(c)) { + *wp++ = CHAR, *wp++ = '\\'; + *wp++ = CHAR, *wp++ = c; + } else #endif - *wp++ = QCHAR, *wp++ = c; - } else - if (wp == Xstring(ws, wp)) { - Xfree(ws, wp); /* free word */ - goto Again; - } + if (c) /* trailing \ is lost */ + *wp++ = QCHAR, *wp++ = c; break; case '\'': *++statep = state = SSQUOTE; *wp++ = OQUOTE; + ignore_backslash_newline++; break; case '"': *++statep = state = SDQUOTE; @@ -171,16 +175,16 @@ yylex(cf) case '\\': c = getsc(); switch (c) { - case '\n': - break; case '"': case '\\': case '$': case '`': *wp++ = QCHAR, *wp++ = c; break; default: Xcheck(ws, wp); - *wp++ = CHAR, *wp++ = '\\'; - *wp++ = CHAR, *wp++ = c; + if (c) { /* trailing \ is lost */ + *wp++ = CHAR, *wp++ = '\\'; + *wp++ = CHAR, *wp++ = c; + } break; } break; @@ -207,10 +211,10 @@ yylex(cf) * wrap @(...) around the pattern * (allows easy handling of ${a#b|c}) */ - c = getsc_bn(); + c = getsc(); if (c == '#' || c == '%') { *wp++ = CHAR, *wp++ = c; - if ((c2 = getsc_bn()) == c) + if ((c2 = getsc()) == c) *wp++ = CHAR, *wp++ = c; else ungetsc(c2); @@ -263,6 +267,7 @@ yylex(cf) if (c == '\'') { state = *--statep; *wp++ = CQUOTE; + ignore_backslash_newline--; } else *wp++ = QCHAR, *wp++ = c; break; @@ -298,6 +303,7 @@ yylex(cf) break; case '\'': csstate = 4; + ignore_backslash_newline++; break; } break; @@ -315,8 +321,10 @@ yylex(cf) break; case 4: /* single quotes */ - if (c == '\'') + if (c == '\'') { csstate = 0; + ignore_backslash_newline--; + } break; } if (nparen == 0) { @@ -390,8 +398,6 @@ yylex(cf) state = *--statep; } else if (c == '\\') { switch (c = getsc()) { - case '\n': - break; case '\\': case '$': case '`': *wp++ = c; @@ -403,8 +409,10 @@ yylex(cf) } /* fall through.. */ default: - *wp++ = '\\'; - *wp++ = c; + if (c) { /* trailing \ is lost */ + *wp++ = '\\'; + *wp++ = c; + } break; } } else @@ -447,13 +455,14 @@ yylex(cf) */ if (c == '\\') { c = getsc(); - if (c != '\n') { + if (c) { /* trailing \ is lost */ *wp++ = QCHAR; *wp++ = c; } } else if (c == '\'') { *++statep = state = SSQUOTE; *wp++ = OQUOTE; + ignore_backslash_newline++; } else if (c == '"') { state = SHEREDQUOTE; *wp++ = OQUOTE; @@ -468,8 +477,19 @@ yylex(cf) *wp++ = CQUOTE; state = SHEREDELIM; } else { - if (c == '\\' && (c = getsc()) == '\n') - break; + if (c == '\\') { + switch (c = getsc()) { + case '\\': case '"': + case '$': case '`': + break; + default: + if (c) { /* trailing \ lost */ + *wp++ = CHAR; + *wp++ = '\\'; + } + break; + } + } *wp++ = CHAR; *wp++ = c; } @@ -540,7 +560,7 @@ Done: iop->flag = c == c2 ? (c == '>' ? IOCAT : IOHERE) : IORDWR; if (iop->flag == IOHERE) - if (getsc() == '-') + if ((c2 = getsc()) == '-') iop->flag |= IOSKIP; else ungetsc(c2); @@ -653,7 +673,7 @@ readhere(iop) register int c; char *volatile eof; char *eofp; - int skiptabs, bn; + int skiptabs; int i; eof = evalstr(iop->delim, 0); @@ -680,11 +700,13 @@ readhere(iop) unwind(i); } - bn = iop->flag & IOEVAL; + if (!(iop->flag & IOEVAL)) + ignore_backslash_newline++; + for (;;) { eofp = eof; skiptabs = iop->flag & IOSKIP; - while ((c = (bn ? getsc_bn() : getsc())) != 0) { + while ((c = getsc()) != 0) { if (skiptabs) { if (c == '\t') continue; @@ -701,7 +723,7 @@ readhere(iop) break; ungetsc(c); shf_write(eof, eofp - eof, shf); - while ((c = (bn ? getsc_bn() : getsc())) != '\n') { + while ((c = getsc()) != '\n') { if (c == 0) yyerror("here document `%s' unclosed\n", eof); shf_putc(c, shf); @@ -715,6 +737,8 @@ readhere(iop) /*XXX add similar checks for write errors everywhere */ quitenv(); shf_close(shf); + if (!(iop->flag & IOEVAL)) + ignore_backslash_newline--; } void @@ -771,7 +795,7 @@ pushs(type, areap) } static int -getsc_() +getsc__() { register Source *s = source; register int c; @@ -819,21 +843,40 @@ getsc_() && isspace(strchr(s->u.tblp->val.s, 0)[-1])) { source = s = s->next; /* pop source stack */ + /* Note that this alias ended with a space, + * enabling alias expansion on the following + * word. + */ s->flags |= SF_ALIAS; } else { - /* put a fake space at the end of the alias. - * This keeps the current alias in the source - * list so recursive aliases can be detected. - * The addition of a space after an alias - * never affects anything (I think). + /* At this point, we need to keep the current + * alias in the source list so recursive + * aliases can be detected and we also need + * to return the next character. Do this + * by temporarily popping the alias to get + * the next character and then put it back + * in the source list with the SF_ALIASEND + * flag set. */ - s->flags |= SF_ALIASEND; - s->start = s->str = space; + source = s->next; /* pop source stack */ + source->flags |= s->flags & SF_ALIAS; + c = getsc__(); + if (c) { + s->flags |= SF_ALIASEND; + s->ugbuf[0] = c; s->ugbuf[1] = '\0'; + s->start = s->str = s->ugbuf; + s->next = source; + source = s; + } else { + s = source; + /* avoid reading eof twice */ + s->str = NULL; + } } continue; case SREREAD: - if (s->start != s->u.ugbuf) /* yuck */ + if (s->start != s->ugbuf) /* yuck */ afree(s->u.freeme, ATEMP); source = s = s->next; continue; @@ -1088,7 +1131,7 @@ get_brace_var(wsp, wp) state = PS_INITIAL; while (1) { - c = getsc_bn(); + c = getsc(); /* State machine to figure out where the variable part ends. */ switch (state) { case PS_INITIAL: @@ -1121,7 +1164,7 @@ get_brace_var(wsp, wp) *wp++ = *p++; } afree(tmp, ATEMP); - c = getsc(); + c = getsc(); /* the ] */ } } break; @@ -1163,7 +1206,7 @@ arraysub(strp) Xinit(ws, wp, 32, ATEMP); do { - c = getsc_bn(); + c = getsc(); Xcheck(ws, wp); *wp++ = c; if (c == '[') @@ -1180,9 +1223,11 @@ arraysub(strp) /* Unget a char: handles case when we are already at the start of the buffer */ static const char * -ungetsc_(c) +ungetsc(c) int c; { + if (backslash_skip) + backslash_skip--; /* Don't unget eof... */ if (source->str == null && c == '\0') return source->str; @@ -1192,29 +1237,40 @@ ungetsc_(c) Source *s; s = pushs(SREREAD, source->areap); - s->u.ugbuf[0] = c; s->u.ugbuf[1] = '\0'; - s->start = s->str = s->u.ugbuf; + s->ugbuf[0] = c; s->ugbuf[1] = '\0'; + s->start = s->str = s->ugbuf; s->next = source; source = s; } return source->str; } + /* Called to get a char that isn't a \newline sequence. */ static int -getsc_bn_ ARGS((void)) +getsc_bn ARGS((void)) { - int c; + int c, c2; + + if (ignore_backslash_newline) + return getsc_(); + + if (backslash_skip == 1) { + backslash_skip = 2; + return getsc_(); + } + + backslash_skip = 0; while (1) { c = getsc_(); - if (c != '\\') - return c; - c = getsc(); - if (c != '\n') { - ungetsc(c); - return '\\'; + if (c == '\\') { + if ((c2 = getsc_()) == '\n') + /* ignore the \newline; get the next char... */ + continue; + ungetsc(c2); + backslash_skip = 1; } - /* ignore the \newline; get the next char... */ + return c; } } -- cgit v1.2.3