summaryrefslogtreecommitdiff
path: root/usr.bin/m4
diff options
context:
space:
mode:
authorMarc Espie <espie@cvs.openbsd.org>2005-09-06 15:33:22 +0000
committerMarc Espie <espie@cvs.openbsd.org>2005-09-06 15:33:22 +0000
commitd81b4678132dfbd4a881b6621e80cc598edfa62c (patch)
treeb59bc096daed41023067e54aba0b0ed9a191399e /usr.bin/m4
parent71442aed77bb01f56d28f6cbc9fdeb82acfe9212 (diff)
finally make our m4 SusV3-compliant.
- changecom and changequote have a simple definition (that matches gnu-m4, coincidentally, so we no longer need two distinct modes for these) - off-by-one bug in -s, this finally works. - reorder main parser loop, so that we can use alphabetic constructs in quotes/comments. - rename putback to pushback, this matches comments, and makes more sense. - more uniform (and updated) description of changequote/changecom. - new, systematic regression tests of comments/quotes. - framework to test -s: one perl script to reconstitute `full' files with all line numbers, so that we can verify the output without needing a complete match. okay otto@, fries@
Diffstat (limited to 'usr.bin/m4')
-rw-r--r--usr.bin/m4/eval.c114
-rw-r--r--usr.bin/m4/extern.h8
-rw-r--r--usr.bin/m4/m4.142
-rw-r--r--usr.bin/m4/main.c120
-rw-r--r--usr.bin/m4/misc.c21
5 files changed, 128 insertions, 177 deletions
diff --git a/usr.bin/m4/eval.c b/usr.bin/m4/eval.c
index 926b86c6229..caf82d45256 100644
--- a/usr.bin/m4/eval.c
+++ b/usr.bin/m4/eval.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: eval.c,v 1.57 2005/08/06 16:22:26 espie Exp $ */
+/* $OpenBSD: eval.c,v 1.58 2005/09/06 15:33:21 espie Exp $ */
/* $NetBSD: eval.c,v 1.7 1996/11/10 21:21:29 pk Exp $ */
/*
@@ -61,9 +61,7 @@ static void dotrace(const char *[], int, int);
static void doifelse(const char *[], int);
static int doincl(const char *);
static int dopaste(const char *);
-static void gnu_dochq(const char *[], int);
static void dochq(const char *[], int);
-static void gnu_dochc(const char *[], int);
static void dochc(const char *[], int);
static void dom4wrap(const char *);
static void dodiv(int);
@@ -137,6 +135,9 @@ expand_builtin(const char *argv[], int argc, int td)
* have macro-or-builtin() type call. We adjust
* argc to avoid further checking..
*/
+ /* we keep the initial value for those built-ins that differentiate
+ * between builtin() and builtin.
+ */
ac = argc;
if (argc == 3 && !*(argv[2]))
@@ -286,17 +287,11 @@ expand_builtin(const char *argv[], int argc, int td)
break;
#endif
case CHNQTYPE:
- if (mimic_gnu)
- gnu_dochq(argv, ac);
- else
- dochq(argv, argc);
+ dochq(argv, ac);
break;
case CHNCTYPE:
- if (mimic_gnu)
- gnu_dochc(argv, ac);
- else
- dochc(argv, argc);
+ dochc(argv, argc);
break;
case SUBSTYPE:
@@ -319,7 +314,7 @@ expand_builtin(const char *argv[], int argc, int td)
pbstr(rquote);
pbstr(argv[n]);
pbstr(lquote);
- putback(COMMA);
+ pushback(COMMA);
}
pbstr(rquote);
pbstr(argv[3]);
@@ -518,7 +513,7 @@ expand_macro(const char *argv[], int argc)
p--; /* last character of defn */
while (p > t) {
if (*(p - 1) != ARGFLAG)
- PUTBACK(*p);
+ PUSHBACK(*p);
else {
switch (*p) {
@@ -542,7 +537,7 @@ expand_macro(const char *argv[], int argc)
if (argc > 2) {
for (n = argc - 1; n > 2; n--) {
pbstr(argv[n]);
- putback(COMMA);
+ pushback(COMMA);
}
pbstr(argv[2]);
}
@@ -553,7 +548,7 @@ expand_macro(const char *argv[], int argc)
pbstr(rquote);
pbstr(argv[n]);
pbstr(lquote);
- putback(COMMA);
+ pushback(COMMA);
}
pbstr(rquote);
pbstr(argv[2]);
@@ -561,8 +556,8 @@ expand_macro(const char *argv[], int argc)
}
break;
default:
- PUTBACK(*p);
- PUTBACK('$');
+ PUSHBACK(*p);
+ PUSHBACK('$');
break;
}
p--;
@@ -570,7 +565,7 @@ expand_macro(const char *argv[], int argc)
p--;
}
if (p == t) /* do last character */
- PUTBACK(*p);
+ PUSHBACK(*p);
}
@@ -736,85 +731,44 @@ dopaste(const char *pfile)
}
#endif
+/*
+ * dochq - change quote characters
+ */
static void
-gnu_dochq(const char *argv[], int ac)
+dochq(const char *argv[], int ac)
{
- /* In gnu-m4 mode, the only way to restore quotes is to have no
- * arguments at all. */
if (ac == 2) {
- lquote[0] = LQUOTE, lquote[1] = EOS;
- rquote[0] = RQUOTE, rquote[1] = EOS;
+ lquote[0] = LQUOTE; lquote[1] = EOS;
+ rquote[0] = RQUOTE; rquote[1] = EOS;
} else {
strlcpy(lquote, argv[2], sizeof(lquote));
- if(ac > 3)
+ if (ac > 3) {
strlcpy(rquote, argv[3], sizeof(rquote));
- else
- rquote[0] = EOS;
+ } else {
+ rquote[0] = ECOMMT; rquote[1] = EOS;
+ }
}
}
/*
- * dochq - change quote characters
+ * dochc - change comment characters
*/
static void
-dochq(const char *argv[], int argc)
-{
- if (argc > 2) {
- if (*argv[2])
- strlcpy(lquote, argv[2], sizeof(lquote));
- else {
- lquote[0] = LQUOTE;
- lquote[1] = EOS;
- }
- if (argc > 3) {
- if (*argv[3])
- strlcpy(rquote, argv[3], sizeof(rquote));
- } else
- strlcpy(rquote, lquote, sizeof(rquote));
- } else {
- lquote[0] = LQUOTE, lquote[1] = EOS;
- rquote[0] = RQUOTE, rquote[1] = EOS;
- }
-}
-
-static void
-gnu_dochc(const char *argv[], int ac)
+dochc(const char *argv[], int argc)
{
- /* In gnu-m4 mode, no arguments mean no comment
- * arguments at all. */
- if (ac == 2) {
+/* XXX Note that there is no difference between no argument and a single
+ * empty argument.
+ */
+ if (argc == 2) {
scommt[0] = EOS;
ecommt[0] = EOS;
} else {
- if (*argv[2])
- strlcpy(scommt, argv[2], sizeof(scommt));
- else
- scommt[0] = SCOMMT, scommt[1] = EOS;
- if(ac > 3 && *argv[3])
+ strlcpy(scommt, argv[2], sizeof(scommt));
+ if (argc == 3) {
+ ecommt[0] = ECOMMT; ecommt[1] = EOS;
+ } else {
strlcpy(ecommt, argv[3], sizeof(ecommt));
- else
- ecommt[0] = ECOMMT, ecommt[1] = EOS;
- }
-}
-/*
- * dochc - change comment characters
- */
-static void
-dochc(const char *argv[], int argc)
-{
- if (argc > 2) {
- if (*argv[2])
- strlcpy(scommt, argv[2], sizeof(scommt));
- if (argc > 3) {
- if (*argv[3])
- strlcpy(ecommt, argv[3], sizeof(ecommt));
}
- else
- ecommt[0] = ECOMMT, ecommt[1] = EOS;
- }
- else {
- scommt[0] = SCOMMT, scommt[1] = EOS;
- ecommt[0] = ECOMMT, ecommt[1] = EOS;
}
}
@@ -918,7 +872,7 @@ dosub(const char *argv[], int argc)
#endif
if (fc >= ap && fc < ap + strlen(ap))
for (k = fc + nc - 1; k >= fc; k--)
- putback(*k);
+ pushback(*k);
}
/*
diff --git a/usr.bin/m4/extern.h b/usr.bin/m4/extern.h
index 2ee7924863d..f1de0d75715 100644
--- a/usr.bin/m4/extern.h
+++ b/usr.bin/m4/extern.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: extern.h,v 1.41 2005/05/29 18:44:36 espie Exp $ */
+/* $OpenBSD: extern.h,v 1.42 2005/09/06 15:33:21 espie Exp $ */
/* $NetBSD: extern.h,v 1.3 1996/01/13 23:25:24 pk Exp $ */
/*-
@@ -100,7 +100,7 @@ extern void pbnum(int);
extern void pbnumbase(int, int, int);
extern void pbunsigned(unsigned long);
extern void pbstr(const char *);
-extern void putback(int);
+extern void pushback(int);
extern void *xalloc(size_t, const char *fmt, ...);
extern void *xrealloc(void *, size_t, const char *fmt, ...);
extern char *xstrdup(const char *);
@@ -113,8 +113,8 @@ extern int obtain_char(struct input_file *);
extern void set_input(struct input_file *, FILE *, const char *);
extern void release_input(struct input_file *);
-/* speeded-up versions of chrsave/putback */
-#define PUTBACK(c) \
+/* speeded-up versions of chrsave/pushback */
+#define PUSHBACK(c) \
do { \
if (bp >= endpbb) \
enlarge_bufspace(); \
diff --git a/usr.bin/m4/m4.1 b/usr.bin/m4/m4.1
index c5ade9f0a7a..1a7383b5051 100644
--- a/usr.bin/m4/m4.1
+++ b/usr.bin/m4/m4.1
@@ -1,4 +1,4 @@
-.\" @(#) $OpenBSD: m4.1,v 1.38 2005/03/02 10:12:15 espie Exp $
+.\" @(#) $OpenBSD: m4.1,v 1.39 2005/09/06 15:33:21 espie Exp $
.\"
.\" Copyright (c) 1989, 1993
.\" The Regents of the University of California. All rights reserved.
@@ -155,17 +155,29 @@ Calls a built-in by its
.Fa name ,
overriding possible redefinitions.
.It Fn changecom startcomment endcomment
-Change the start and end comment sequences.
-The default is the pound sign
-.Pq Sq #
+Changes the start comment and end comment sequences.
+Comment sequences may be up to five characters long.
+The default values are the pound sign
and the newline character.
-With no arguments comments are turned off.
-The maximum length for a comment marker is five characters.
+.Bd -literal -offset indent
+# This is a comment
+.Ed
+.Pp
+With no arguments, comments are turned off.
+With one single argument, the end comment sequence is set
+to the newline character.
.It Fn changequote beginquote endquote
-Defines the quote symbols to be the first and second arguments.
-The symbols may be up to five characters long.
-If no arguments are
-given it restores the default open and close single quotes.
+Defines the open quote and close quote sequences.
+Quote sequences may be up to five characters long.
+The default values are the backquote character and the quote
+character.
+.Bd -literal -offset indent
+`Here is a quoted string'
+.Ed
+.Pp
+With no arguments, the default quotes are restored.
+With one single argument, the close quote sequence is set
+to the newline character.
.It Fn decr arg
Decrements the argument
.Fa arg
@@ -386,18 +398,10 @@ Returns the current file's name.
.El
.Sh STANDARDS
.Nm
-follows the Single Unix 2 specification, along with a few extensions taken
+follows the Single Unix 3 specification, along with a few extensions taken
from
.Nm gnu-m4 .
.Pp
-The
-.Fl s
-option
-.Po
-.Xr cpp 1 's
-#line directives
-.Pc
-is currently not supported.
Flags
.Fl I ,
.Fl d ,
diff --git a/usr.bin/m4/main.c b/usr.bin/m4/main.c
index 3a34f113e0c..d53dd8a0da8 100644
--- a/usr.bin/m4/main.c
+++ b/usr.bin/m4/main.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: main.c,v 1.67 2005/08/06 16:22:26 espie Exp $ */
+/* $OpenBSD: main.c,v 1.68 2005/09/06 15:33:21 espie Exp $ */
/* $NetBSD: main.c,v 1.12 1997/02/08 23:54:49 cgd Exp $ */
/*-
@@ -296,9 +296,9 @@ do_look_ahead(int t, const char *token)
for (i = 1; *++token; i++) {
t = gpbc();
if (t == EOF || (unsigned char)t != (unsigned char)*token) {
- putback(t);
+ pushback(t);
while (--i)
- putback(*--token);
+ pushback(*--token);
return 0;
}
}
@@ -322,10 +322,57 @@ macro(void)
cycle {
t = gpbc();
- if (t == '_' || isalpha(t)) {
+
+ if (LOOK_AHEAD(t,lquote)) { /* strip quotes */
+ nlpar = 0;
+ record(quotes, nlpar++);
+ /*
+ * Opening quote: scan forward until matching
+ * closing quote has been found.
+ */
+ do {
+
+ l = gpbc();
+ if (LOOK_AHEAD(l,rquote)) {
+ if (--nlpar > 0)
+ outputstr(rquote);
+ } else if (LOOK_AHEAD(l,lquote)) {
+ record(quotes, nlpar++);
+ outputstr(lquote);
+ } else if (l == EOF) {
+ if (nlpar == 1)
+ warnx("unclosed quote:");
+ else
+ warnx("%d unclosed quotes:", nlpar);
+ dump_stack(quotes, nlpar);
+ exit(1);
+ } else {
+ if (nlpar > 0) {
+ if (sp < 0)
+ reallyputchar(l);
+ else
+ CHRSAVE(l);
+ }
+ }
+ }
+ while (nlpar != 0);
+ } else if (sp < 0 && LOOK_AHEAD(t, scommt)) {
+ reallyoutputstr(scommt);
+
+ for(;;) {
+ t = gpbc();
+ if (LOOK_AHEAD(t, ecommt)) {
+ reallyoutputstr(ecommt);
+ break;
+ }
+ if (t == EOF)
+ break;
+ reallyputchar(t);
+ }
+ } else if (t == '_' || isalpha(t)) {
p = inspect(t, token);
if (p != NULL)
- putback(l = gpbc());
+ pushback(l = gpbc());
if (p == NULL || (l != LPAREN &&
(macro_getdef(p)->type & NEEDARGS) != 0))
outputstr(token);
@@ -371,62 +418,7 @@ macro(void)
emit_synchline();
bufbase = bbase[ilevel];
continue;
- }
- /*
- * non-alpha token possibly seen..
- * [the order of else if .. stmts is important.]
- */
- else if (LOOK_AHEAD(t,lquote)) { /* strip quotes */
- nlpar = 0;
- record(quotes, nlpar++);
- /*
- * Opening quote: scan forward until matching
- * closing quote has been found.
- */
- do {
-
- l = gpbc();
- if (LOOK_AHEAD(l,rquote)) {
- if (--nlpar > 0)
- outputstr(rquote);
- } else if (LOOK_AHEAD(l,lquote)) {
- record(quotes, nlpar++);
- outputstr(lquote);
- } else if (l == EOF) {
- if (nlpar == 1)
- warnx("unclosed quote:");
- else
- warnx("%d unclosed quotes:", nlpar);
- dump_stack(quotes, nlpar);
- exit(1);
- } else {
- if (nlpar > 0) {
- if (sp < 0)
- reallyputchar(l);
- else
- CHRSAVE(l);
- }
- }
- }
- while (nlpar != 0);
- }
-
- else if (sp < 0 && LOOK_AHEAD(t, scommt)) {
- reallyoutputstr(scommt);
-
- for(;;) {
- t = gpbc();
- if (LOOK_AHEAD(t, ecommt)) {
- reallyoutputstr(ecommt);
- break;
- }
- if (t == EOF)
- break;
- reallyputchar(t);
- }
- }
-
- else if (sp < 0) { /* not in a macro at all */
+ } else if (sp < 0) { /* not in a macro at all */
reallyputchar(t); /* output directly.. */
}
@@ -437,7 +429,7 @@ macro(void)
chrsave(t);
while (isspace(l = gpbc()))
; /* skip blank, tab, nl.. */
- putback(l);
+ pushback(l);
record(paren, PARLEV++);
break;
@@ -464,7 +456,7 @@ macro(void)
chrsave(EOS); /* new argument */
while (isspace(l = gpbc()))
;
- putback(l);
+ pushback(l);
pushs(ep);
} else
chrsave(t);
@@ -550,7 +542,7 @@ inspect(int c, char *tp)
while ((isalnum(c = gpbc()) || c == '_') && tp < etp)
*tp++ = c;
if (c != EOF)
- PUTBACK(c);
+ PUSHBACK(c);
*tp = EOS;
/* token is too long, it won't match anything, but it can still
* be output. */
diff --git a/usr.bin/m4/misc.c b/usr.bin/m4/misc.c
index e71716978d3..dee43f34b5c 100644
--- a/usr.bin/m4/misc.c
+++ b/usr.bin/m4/misc.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: misc.c,v 1.32 2005/08/06 16:22:26 espie Exp $ */
+/* $OpenBSD: misc.c,v 1.33 2005/09/06 15:33:21 espie Exp $ */
/* $NetBSD: misc.c,v 1.6 1995/09/28 05:37:41 tls Exp $ */
/*
@@ -76,10 +76,10 @@ indx(const char *s1, const char *s2)
return (t - s1);
}
/*
- * putback - push character back onto input
+ * pushback - push character back onto input
*/
void
-putback(int c)
+pushback(int c)
{
if (c == EOF)
return;
@@ -90,7 +90,7 @@ putback(int c)
/*
* pbstr - push string back onto input
- * putback is replicated to improve
+ * pushback is replicated to improve
* performance.
*/
void
@@ -129,7 +129,7 @@ pbnumbase(int n, int base, int d)
num = (n < 0) ? -n : n;
do {
- putback(digits[num % base]);
+ pushback(digits[num % base]);
printed++;
}
while ((num /= base) > 0);
@@ -137,10 +137,10 @@ pbnumbase(int n, int base, int d)
if (n < 0)
printed++;
while (printed++ < d)
- putback('0');
+ pushback('0');
if (n < 0)
- putback('-');
+ pushback('-');
}
/*
@@ -150,7 +150,7 @@ void
pbunsigned(unsigned long n)
{
do {
- putback(n % 10 + '0');
+ pushback(n % 10 + '0');
}
while ((n /= 10) > 0);
}
@@ -332,10 +332,11 @@ obtain_char(struct input_file *f)
{
if (f->c == EOF)
return EOF;
- else if (f->c == '\n')
- f->lineno++;
f->c = fgetc(f->file);
+ if (f->c == '\n')
+ f->lineno++;
+
return f->c;
}