summaryrefslogtreecommitdiff
path: root/usr.bin/awk/b.c
diff options
context:
space:
mode:
Diffstat (limited to 'usr.bin/awk/b.c')
-rw-r--r--usr.bin/awk/b.c73
1 files changed, 60 insertions, 13 deletions
diff --git a/usr.bin/awk/b.c b/usr.bin/awk/b.c
index 288d022cffe..c6039e6740b 100644
--- a/usr.bin/awk/b.c
+++ b/usr.bin/awk/b.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: b.c,v 1.10 2001/09/08 00:12:40 millert Exp $ */
+/* $OpenBSD: b.c,v 1.11 2002/12/19 21:24:28 millert Exp $ */
/****************************************************************
Copyright (C) Lucent Technologies 1997
All Rights Reserved
@@ -76,7 +76,7 @@ int patlen;
fa *fatab[NFA];
int nfatab = 0; /* entries in fatab */
-fa *makedfa(char *s, int anchor) /* returns dfa for reg expr s */
+fa *makedfa(const char *s, int anchor) /* returns dfa for reg expr s */
{
int i, use, nuse;
fa *pfa;
@@ -94,7 +94,7 @@ fa *makedfa(char *s, int anchor) /* returns dfa for reg expr s */
return mkdfa(s, anchor);
for (i = 0; i < nfatab; i++) /* is it there already? */
if (fatab[i]->anchor == anchor
- && strcmp(fatab[i]->restr, s) == 0) {
+ && strcmp((const char *) fatab[i]->restr, s) == 0) {
fatab[i]->use = now++;
return fatab[i];
}
@@ -118,7 +118,7 @@ fa *makedfa(char *s, int anchor) /* returns dfa for reg expr s */
return pfa;
}
-fa *mkdfa(char *s, int anchor) /* does the real work of making a dfa */
+fa *mkdfa(const char *s, int anchor) /* does the real work of making a dfa */
/* anchor = 1 for anchored matches, else 0 */
{
Node *p, *p1;
@@ -283,7 +283,7 @@ int quoted(char **pp) /* pick up next thing after a \\ */
return c;
}
-char *cclenter(char *argp) /* add a character class */
+char *cclenter(const char *argp) /* add a character class */
{
int i, c, c2;
uschar *p = (uschar *) argp;
@@ -329,7 +329,7 @@ char *cclenter(char *argp) /* add a character class */
return (char *) tostring((char *) buf);
}
-void overflo(char *s)
+void overflo(const char *s)
{
FATAL("regular expression too big: %.30s...", s);
}
@@ -447,7 +447,7 @@ void follow(Node *v) /* collects leaves that can follow v into setvec */
}
}
-int member(int c, char *sarg) /* is c in s? */
+int member(int c, const char *sarg) /* is c in s? */
{
uschar *s = (uschar *) sarg;
@@ -457,7 +457,7 @@ int member(int c, char *sarg) /* is c in s? */
return(0);
}
-int match(fa *f, char *p0) /* shortest match ? */
+int match(fa *f, const char *p0) /* shortest match ? */
{
int s, ns;
uschar *p = (uschar *) p0;
@@ -476,7 +476,7 @@ int match(fa *f, char *p0) /* shortest match ? */
return(0);
}
-int pmatch(fa *f, char *p0) /* longest match, for sub */
+int pmatch(fa *f, const char *p0) /* longest match, for sub */
{
int s, ns;
uschar *p = (uschar *) p0;
@@ -529,7 +529,7 @@ int pmatch(fa *f, char *p0) /* longest match, for sub */
return (0);
}
-int nematch(fa *f, char *p0) /* non-empty match, for sub */
+int nematch(fa *f, const char *p0) /* non-empty match, for sub */
{
int s, ns;
uschar *p = (uschar *) p0;
@@ -581,15 +581,17 @@ int nematch(fa *f, char *p0) /* non-empty match, for sub */
return (0);
}
-Node *reparse(char *p) /* parses regular expression pointed to by p */
+Node *reparse(const char *p) /* parses regular expression pointed to by p */
{ /* uses relex() to scan regular expression */
Node *np;
dprintf( ("reparse <%s>\n", p) );
lastre = prestr = (uschar *) p; /* prestr points to string to be parsed */
rtok = relex();
+ /* GNU compatibility: an empty regexp matches anything */
if (rtok == '\0')
- FATAL("empty regular expression");
+ /* FATAL("empty regular expression"); previous */
+ return(op2(ALL, NIL, NIL));
np = regexp();
if (rtok != '\0')
FATAL("syntax error in regular expression %s at %s", lastre, prestr);
@@ -684,6 +686,37 @@ Node *unary(Node *np)
}
}
+/*
+ * Character class definitions conformant to the POSIX locale as
+ * defined in IEEE P1003.1 draft 7 of June 2001, assuming the source
+ * and operating character sets are both ASCII (ISO646) or supersets
+ * thereof.
+ *
+ * Note that to avoid overflowing the temporary buffer used in
+ * relex(), the expanded character class (prior to range expansion)
+ * must be less than twice the size of their full name.
+ */
+struct charclass {
+ const char *cc_name;
+ int cc_namelen;
+ const char *cc_expand;
+} charclasses[] = {
+ { "alnum", 5, "0-9A-Za-z" },
+ { "alpha", 5, "A-Za-z" },
+ { "blank", 5, " \t" },
+ { "cntrl", 5, "\000-\037\177" },
+ { "digit", 5, "0-9" },
+ { "graph", 5, "\041-\176" },
+ { "lower", 5, "a-z" },
+ { "print", 5, " \041-\176" },
+ { "punct", 5, "\041-\057\072-\100\133-\140\173-\176" },
+ { "space", 5, " \f\n\r\t\v" },
+ { "upper", 5, "A-Z" },
+ { "xdigit", 6, "0-9A-Fa-f" },
+ { NULL, 0, NULL },
+};
+
+
int relex(void) /* lexical analyzer for reparse */
{
int c, n;
@@ -691,6 +724,8 @@ int relex(void) /* lexical analyzer for reparse */
static uschar *buf = 0;
static int bufsz = 100;
uschar *bp;
+ struct charclass *cc;
+ const uschar *p;
switch (c = *prestr++) {
case '|': return OR;
@@ -720,7 +755,7 @@ int relex(void) /* lexical analyzer for reparse */
}
else
cflag = 0;
- n = 2 * strlen(prestr)+1;
+ n = 2 * strlen((const char *) prestr)+1;
if (!adjbuf((char **) &buf, &bufsz, n, n, (char **) &bp, 0))
FATAL("out of space for reg expr %.10s...", lastre);
for (; ; ) {
@@ -731,6 +766,18 @@ int relex(void) /* lexical analyzer for reparse */
*bp++ = c;
/* } else if (c == '\n') { */
/* FATAL("newline in character class %.20s...", lastre); */
+ } else if (c == '[' && *prestr == ':') {
+ /* POSIX char class names, Dag-Erling Smorgrav, des@ofug.org */
+ for (cc = charclasses; cc->cc_name; cc++)
+ if (strncmp((const char *) prestr + 1, (const char *) cc->cc_name, cc->cc_namelen) == 0)
+ break;
+ if (cc->cc_name != NULL && prestr[1 + cc->cc_namelen] == ':' &&
+ prestr[2 + cc->cc_namelen] == ']') {
+ prestr += cc->cc_namelen + 3;
+ for (p = (const uschar *) cc->cc_expand; *p; p++)
+ *bp++ = *p;
+ } else
+ *bp++ = c;
} else if (c == '\0') {
FATAL("nonterminated character class %.20s", lastre);
} else if (bp == buf) { /* 1st char is special */