diff options
Diffstat (limited to 'usr.bin/awk/b.c')
-rw-r--r-- | usr.bin/awk/b.c | 73 |
1 files changed, 60 insertions, 13 deletions
diff --git a/usr.bin/awk/b.c b/usr.bin/awk/b.c index 288d022cffe..c6039e6740b 100644 --- a/usr.bin/awk/b.c +++ b/usr.bin/awk/b.c @@ -1,4 +1,4 @@ -/* $OpenBSD: b.c,v 1.10 2001/09/08 00:12:40 millert Exp $ */ +/* $OpenBSD: b.c,v 1.11 2002/12/19 21:24:28 millert Exp $ */ /**************************************************************** Copyright (C) Lucent Technologies 1997 All Rights Reserved @@ -76,7 +76,7 @@ int patlen; fa *fatab[NFA]; int nfatab = 0; /* entries in fatab */ -fa *makedfa(char *s, int anchor) /* returns dfa for reg expr s */ +fa *makedfa(const char *s, int anchor) /* returns dfa for reg expr s */ { int i, use, nuse; fa *pfa; @@ -94,7 +94,7 @@ fa *makedfa(char *s, int anchor) /* returns dfa for reg expr s */ return mkdfa(s, anchor); for (i = 0; i < nfatab; i++) /* is it there already? */ if (fatab[i]->anchor == anchor - && strcmp(fatab[i]->restr, s) == 0) { + && strcmp((const char *) fatab[i]->restr, s) == 0) { fatab[i]->use = now++; return fatab[i]; } @@ -118,7 +118,7 @@ fa *makedfa(char *s, int anchor) /* returns dfa for reg expr s */ return pfa; } -fa *mkdfa(char *s, int anchor) /* does the real work of making a dfa */ +fa *mkdfa(const char *s, int anchor) /* does the real work of making a dfa */ /* anchor = 1 for anchored matches, else 0 */ { Node *p, *p1; @@ -283,7 +283,7 @@ int quoted(char **pp) /* pick up next thing after a \\ */ return c; } -char *cclenter(char *argp) /* add a character class */ +char *cclenter(const char *argp) /* add a character class */ { int i, c, c2; uschar *p = (uschar *) argp; @@ -329,7 +329,7 @@ char *cclenter(char *argp) /* add a character class */ return (char *) tostring((char *) buf); } -void overflo(char *s) +void overflo(const char *s) { FATAL("regular expression too big: %.30s...", s); } @@ -447,7 +447,7 @@ void follow(Node *v) /* collects leaves that can follow v into setvec */ } } -int member(int c, char *sarg) /* is c in s? */ +int member(int c, const char *sarg) /* is c in s? */ { uschar *s = (uschar *) sarg; @@ -457,7 +457,7 @@ int member(int c, char *sarg) /* is c in s? */ return(0); } -int match(fa *f, char *p0) /* shortest match ? */ +int match(fa *f, const char *p0) /* shortest match ? */ { int s, ns; uschar *p = (uschar *) p0; @@ -476,7 +476,7 @@ int match(fa *f, char *p0) /* shortest match ? */ return(0); } -int pmatch(fa *f, char *p0) /* longest match, for sub */ +int pmatch(fa *f, const char *p0) /* longest match, for sub */ { int s, ns; uschar *p = (uschar *) p0; @@ -529,7 +529,7 @@ int pmatch(fa *f, char *p0) /* longest match, for sub */ return (0); } -int nematch(fa *f, char *p0) /* non-empty match, for sub */ +int nematch(fa *f, const char *p0) /* non-empty match, for sub */ { int s, ns; uschar *p = (uschar *) p0; @@ -581,15 +581,17 @@ int nematch(fa *f, char *p0) /* non-empty match, for sub */ return (0); } -Node *reparse(char *p) /* parses regular expression pointed to by p */ +Node *reparse(const char *p) /* parses regular expression pointed to by p */ { /* uses relex() to scan regular expression */ Node *np; dprintf( ("reparse <%s>\n", p) ); lastre = prestr = (uschar *) p; /* prestr points to string to be parsed */ rtok = relex(); + /* GNU compatibility: an empty regexp matches anything */ if (rtok == '\0') - FATAL("empty regular expression"); + /* FATAL("empty regular expression"); previous */ + return(op2(ALL, NIL, NIL)); np = regexp(); if (rtok != '\0') FATAL("syntax error in regular expression %s at %s", lastre, prestr); @@ -684,6 +686,37 @@ Node *unary(Node *np) } } +/* + * Character class definitions conformant to the POSIX locale as + * defined in IEEE P1003.1 draft 7 of June 2001, assuming the source + * and operating character sets are both ASCII (ISO646) or supersets + * thereof. + * + * Note that to avoid overflowing the temporary buffer used in + * relex(), the expanded character class (prior to range expansion) + * must be less than twice the size of their full name. + */ +struct charclass { + const char *cc_name; + int cc_namelen; + const char *cc_expand; +} charclasses[] = { + { "alnum", 5, "0-9A-Za-z" }, + { "alpha", 5, "A-Za-z" }, + { "blank", 5, " \t" }, + { "cntrl", 5, "\000-\037\177" }, + { "digit", 5, "0-9" }, + { "graph", 5, "\041-\176" }, + { "lower", 5, "a-z" }, + { "print", 5, " \041-\176" }, + { "punct", 5, "\041-\057\072-\100\133-\140\173-\176" }, + { "space", 5, " \f\n\r\t\v" }, + { "upper", 5, "A-Z" }, + { "xdigit", 6, "0-9A-Fa-f" }, + { NULL, 0, NULL }, +}; + + int relex(void) /* lexical analyzer for reparse */ { int c, n; @@ -691,6 +724,8 @@ int relex(void) /* lexical analyzer for reparse */ static uschar *buf = 0; static int bufsz = 100; uschar *bp; + struct charclass *cc; + const uschar *p; switch (c = *prestr++) { case '|': return OR; @@ -720,7 +755,7 @@ int relex(void) /* lexical analyzer for reparse */ } else cflag = 0; - n = 2 * strlen(prestr)+1; + n = 2 * strlen((const char *) prestr)+1; if (!adjbuf((char **) &buf, &bufsz, n, n, (char **) &bp, 0)) FATAL("out of space for reg expr %.10s...", lastre); for (; ; ) { @@ -731,6 +766,18 @@ int relex(void) /* lexical analyzer for reparse */ *bp++ = c; /* } else if (c == '\n') { */ /* FATAL("newline in character class %.20s...", lastre); */ + } else if (c == '[' && *prestr == ':') { + /* POSIX char class names, Dag-Erling Smorgrav, des@ofug.org */ + for (cc = charclasses; cc->cc_name; cc++) + if (strncmp((const char *) prestr + 1, (const char *) cc->cc_name, cc->cc_namelen) == 0) + break; + if (cc->cc_name != NULL && prestr[1 + cc->cc_namelen] == ':' && + prestr[2 + cc->cc_namelen] == ']') { + prestr += cc->cc_namelen + 3; + for (p = (const uschar *) cc->cc_expand; *p; p++) + *bp++ = *p; + } else + *bp++ = c; } else if (c == '\0') { FATAL("nonterminated character class %.20s", lastre); } else if (bp == buf) { /* 1st char is special */ |