summaryrefslogtreecommitdiff
path: root/bin/pdksh/lex.c
diff options
context:
space:
mode:
authorJason Downs <downsj@cvs.openbsd.org>1996-10-13 21:32:24 +0000
committerJason Downs <downsj@cvs.openbsd.org>1996-10-13 21:32:24 +0000
commitec3c5af51bfcb51f976b184af54d6b0c12b077f3 (patch)
tree62bbeddd3e75559a71fa45465439dc59493d27b1 /bin/pdksh/lex.c
parent647a0f92a2544a00265b0a8eaff14f417a2dbe0f (diff)
Update to version 5.2.11.
Diffstat (limited to 'bin/pdksh/lex.c')
-rw-r--r--bin/pdksh/lex.c186
1 files changed, 121 insertions, 65 deletions
diff --git a/bin/pdksh/lex.c b/bin/pdksh/lex.c
index 356d164e935..6c11aa00c11 100644
--- a/bin/pdksh/lex.c
+++ b/bin/pdksh/lex.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: lex.c,v 1.3 1996/10/01 02:05:42 downsj Exp $ */
+/* $OpenBSD: lex.c,v 1.4 1996/10/13 21:32:19 downsj Exp $ */
/*
* lexical analysis and source input
@@ -8,20 +8,22 @@
#include <ctype.h>
static void readhere ARGS((struct ioword *iop));
-static int getsc_ ARGS((void));
+static int getsc__ ARGS((void));
static void getsc_line ARGS((Source *s));
static char *get_brace_var ARGS((XString *wsp, char *wp));
static int arraysub ARGS((char **strp));
-static const char *ungetsc_ ARGS((int c));
-static int getsc_bn_ ARGS((void));
+static const char *ungetsc ARGS((int c));
+static int getsc_bn ARGS((void));
+static void gethere ARGS((void));
-static void gethere ARGS((void));
+static int backslash_skip;
+static int ignore_backslash_newline;
-/* optimized getsc_() */
-#define getsc() ((*source->str != '\0') ? *source->str++ : getsc_())
-#define getsc_bn() (*source->str != '\0' && *source->str != '\\' \
- ? *source->str++ : getsc_bn_())
-#define ungetsc(c) (source->str > source->start ? source->str-- : ungetsc_(c))
+/* optimized getsc_bn() */
+#define getsc() (*source->str != '\0' && *source->str != '\\' \
+ && !backslash_skip ? *source->str++ : getsc_bn())
+/* optimized getsc__() */
+#define getsc_() ((*source->str != '\0') ? *source->str++ : getsc__())
/*
@@ -52,6 +54,9 @@ yylex(cf)
Again:
Xinit(ws, wp, 64, ATEMP);
+ backslash_skip = 0;
+ ignore_backslash_newline = 0;
+
if (cf&ONEWORD)
istate = SWORD;
#ifdef KSH
@@ -65,9 +70,12 @@ yylex(cf)
istate = (cf & HEREDELIM) ? SHEREDELIM : SBASE;
while ((c = getsc()) == ' ' || c == '\t')
;
- if (c == '#')
+ if (c == '#') {
+ ignore_backslash_newline++;
while ((c = getsc()) != '\0' && c != '\n')
;
+ ignore_backslash_newline--;
+ }
ungetsc(c);
}
if (source->flags & SF_ALIAS) { /* trailing ' ' in alias definition */
@@ -139,23 +147,19 @@ yylex(cf)
switch (c) {
case '\\':
c = getsc();
- if (c != '\n') {
#ifdef OS2
- if (isalnum(c)) {
- *wp++ = CHAR, *wp++ = '\\';
- *wp++ = CHAR, *wp++ = c;
- } else
+ if (isalnum(c)) {
+ *wp++ = CHAR, *wp++ = '\\';
+ *wp++ = CHAR, *wp++ = c;
+ } else
#endif
- *wp++ = QCHAR, *wp++ = c;
- } else
- if (wp == Xstring(ws, wp)) {
- Xfree(ws, wp); /* free word */
- goto Again;
- }
+ if (c) /* trailing \ is lost */
+ *wp++ = QCHAR, *wp++ = c;
break;
case '\'':
*++statep = state = SSQUOTE;
*wp++ = OQUOTE;
+ ignore_backslash_newline++;
break;
case '"':
*++statep = state = SDQUOTE;
@@ -171,16 +175,16 @@ yylex(cf)
case '\\':
c = getsc();
switch (c) {
- case '\n':
- break;
case '"': case '\\':
case '$': case '`':
*wp++ = QCHAR, *wp++ = c;
break;
default:
Xcheck(ws, wp);
- *wp++ = CHAR, *wp++ = '\\';
- *wp++ = CHAR, *wp++ = c;
+ if (c) { /* trailing \ is lost */
+ *wp++ = CHAR, *wp++ = '\\';
+ *wp++ = CHAR, *wp++ = c;
+ }
break;
}
break;
@@ -207,10 +211,10 @@ yylex(cf)
* wrap @(...) around the pattern
* (allows easy handling of ${a#b|c})
*/
- c = getsc_bn();
+ c = getsc();
if (c == '#' || c == '%') {
*wp++ = CHAR, *wp++ = c;
- if ((c2 = getsc_bn()) == c)
+ if ((c2 = getsc()) == c)
*wp++ = CHAR, *wp++ = c;
else
ungetsc(c2);
@@ -263,6 +267,7 @@ yylex(cf)
if (c == '\'') {
state = *--statep;
*wp++ = CQUOTE;
+ ignore_backslash_newline--;
} else
*wp++ = QCHAR, *wp++ = c;
break;
@@ -298,6 +303,7 @@ yylex(cf)
break;
case '\'':
csstate = 4;
+ ignore_backslash_newline++;
break;
}
break;
@@ -315,8 +321,10 @@ yylex(cf)
break;
case 4: /* single quotes */
- if (c == '\'')
+ if (c == '\'') {
csstate = 0;
+ ignore_backslash_newline--;
+ }
break;
}
if (nparen == 0) {
@@ -390,8 +398,6 @@ yylex(cf)
state = *--statep;
} else if (c == '\\') {
switch (c = getsc()) {
- case '\n':
- break;
case '\\':
case '$': case '`':
*wp++ = c;
@@ -403,8 +409,10 @@ yylex(cf)
}
/* fall through.. */
default:
- *wp++ = '\\';
- *wp++ = c;
+ if (c) { /* trailing \ is lost */
+ *wp++ = '\\';
+ *wp++ = c;
+ }
break;
}
} else
@@ -447,13 +455,14 @@ yylex(cf)
*/
if (c == '\\') {
c = getsc();
- if (c != '\n') {
+ if (c) { /* trailing \ is lost */
*wp++ = QCHAR;
*wp++ = c;
}
} else if (c == '\'') {
*++statep = state = SSQUOTE;
*wp++ = OQUOTE;
+ ignore_backslash_newline++;
} else if (c == '"') {
state = SHEREDQUOTE;
*wp++ = OQUOTE;
@@ -468,8 +477,19 @@ yylex(cf)
*wp++ = CQUOTE;
state = SHEREDELIM;
} else {
- if (c == '\\' && (c = getsc()) == '\n')
- break;
+ if (c == '\\') {
+ switch (c = getsc()) {
+ case '\\': case '"':
+ case '$': case '`':
+ break;
+ default:
+ if (c) { /* trailing \ lost */
+ *wp++ = CHAR;
+ *wp++ = '\\';
+ }
+ break;
+ }
+ }
*wp++ = CHAR;
*wp++ = c;
}
@@ -540,7 +560,7 @@ Done:
iop->flag = c == c2 ?
(c == '>' ? IOCAT : IOHERE) : IORDWR;
if (iop->flag == IOHERE)
- if (getsc() == '-')
+ if ((c2 = getsc()) == '-')
iop->flag |= IOSKIP;
else
ungetsc(c2);
@@ -653,7 +673,7 @@ readhere(iop)
register int c;
char *volatile eof;
char *eofp;
- int skiptabs, bn;
+ int skiptabs;
int i;
eof = evalstr(iop->delim, 0);
@@ -680,11 +700,13 @@ readhere(iop)
unwind(i);
}
- bn = iop->flag & IOEVAL;
+ if (!(iop->flag & IOEVAL))
+ ignore_backslash_newline++;
+
for (;;) {
eofp = eof;
skiptabs = iop->flag & IOSKIP;
- while ((c = (bn ? getsc_bn() : getsc())) != 0) {
+ while ((c = getsc()) != 0) {
if (skiptabs) {
if (c == '\t')
continue;
@@ -701,7 +723,7 @@ readhere(iop)
break;
ungetsc(c);
shf_write(eof, eofp - eof, shf);
- while ((c = (bn ? getsc_bn() : getsc())) != '\n') {
+ while ((c = getsc()) != '\n') {
if (c == 0)
yyerror("here document `%s' unclosed\n", eof);
shf_putc(c, shf);
@@ -715,6 +737,8 @@ readhere(iop)
/*XXX add similar checks for write errors everywhere */
quitenv();
shf_close(shf);
+ if (!(iop->flag & IOEVAL))
+ ignore_backslash_newline--;
}
void
@@ -771,7 +795,7 @@ pushs(type, areap)
}
static int
-getsc_()
+getsc__()
{
register Source *s = source;
register int c;
@@ -819,21 +843,40 @@ getsc_()
&& isspace(strchr(s->u.tblp->val.s, 0)[-1]))
{
source = s = s->next; /* pop source stack */
+ /* Note that this alias ended with a space,
+ * enabling alias expansion on the following
+ * word.
+ */
s->flags |= SF_ALIAS;
} else {
- /* put a fake space at the end of the alias.
- * This keeps the current alias in the source
- * list so recursive aliases can be detected.
- * The addition of a space after an alias
- * never affects anything (I think).
+ /* At this point, we need to keep the current
+ * alias in the source list so recursive
+ * aliases can be detected and we also need
+ * to return the next character. Do this
+ * by temporarily popping the alias to get
+ * the next character and then put it back
+ * in the source list with the SF_ALIASEND
+ * flag set.
*/
- s->flags |= SF_ALIASEND;
- s->start = s->str = space;
+ source = s->next; /* pop source stack */
+ source->flags |= s->flags & SF_ALIAS;
+ c = getsc__();
+ if (c) {
+ s->flags |= SF_ALIASEND;
+ s->ugbuf[0] = c; s->ugbuf[1] = '\0';
+ s->start = s->str = s->ugbuf;
+ s->next = source;
+ source = s;
+ } else {
+ s = source;
+ /* avoid reading eof twice */
+ s->str = NULL;
+ }
}
continue;
case SREREAD:
- if (s->start != s->u.ugbuf) /* yuck */
+ if (s->start != s->ugbuf) /* yuck */
afree(s->u.freeme, ATEMP);
source = s = s->next;
continue;
@@ -1088,7 +1131,7 @@ get_brace_var(wsp, wp)
state = PS_INITIAL;
while (1) {
- c = getsc_bn();
+ c = getsc();
/* State machine to figure out where the variable part ends. */
switch (state) {
case PS_INITIAL:
@@ -1121,7 +1164,7 @@ get_brace_var(wsp, wp)
*wp++ = *p++;
}
afree(tmp, ATEMP);
- c = getsc();
+ c = getsc(); /* the ] */
}
}
break;
@@ -1163,7 +1206,7 @@ arraysub(strp)
Xinit(ws, wp, 32, ATEMP);
do {
- c = getsc_bn();
+ c = getsc();
Xcheck(ws, wp);
*wp++ = c;
if (c == '[')
@@ -1180,9 +1223,11 @@ arraysub(strp)
/* Unget a char: handles case when we are already at the start of the buffer */
static const char *
-ungetsc_(c)
+ungetsc(c)
int c;
{
+ if (backslash_skip)
+ backslash_skip--;
/* Don't unget eof... */
if (source->str == null && c == '\0')
return source->str;
@@ -1192,29 +1237,40 @@ ungetsc_(c)
Source *s;
s = pushs(SREREAD, source->areap);
- s->u.ugbuf[0] = c; s->u.ugbuf[1] = '\0';
- s->start = s->str = s->u.ugbuf;
+ s->ugbuf[0] = c; s->ugbuf[1] = '\0';
+ s->start = s->str = s->ugbuf;
s->next = source;
source = s;
}
return source->str;
}
+
/* Called to get a char that isn't a \newline sequence. */
static int
-getsc_bn_ ARGS((void))
+getsc_bn ARGS((void))
{
- int c;
+ int c, c2;
+
+ if (ignore_backslash_newline)
+ return getsc_();
+
+ if (backslash_skip == 1) {
+ backslash_skip = 2;
+ return getsc_();
+ }
+
+ backslash_skip = 0;
while (1) {
c = getsc_();
- if (c != '\\')
- return c;
- c = getsc();
- if (c != '\n') {
- ungetsc(c);
- return '\\';
+ if (c == '\\') {
+ if ((c2 = getsc_()) == '\n')
+ /* ignore the \newline; get the next char... */
+ continue;
+ ungetsc(c2);
+ backslash_skip = 1;
}
- /* ignore the \newline; get the next char... */
+ return c;
}
}