diff options
Diffstat (limited to 'usr.bin/vgrind/regexp.c')
-rw-r--r-- | usr.bin/vgrind/regexp.c | 576 |
1 files changed, 0 insertions, 576 deletions
diff --git a/usr.bin/vgrind/regexp.c b/usr.bin/vgrind/regexp.c deleted file mode 100644 index 4c0fd96e1be..00000000000 --- a/usr.bin/vgrind/regexp.c +++ /dev/null @@ -1,576 +0,0 @@ -/* $OpenBSD: regexp.c,v 1.8 2009/10/27 23:59:46 deraadt Exp $ */ -/* $NetBSD: regexp.c,v 1.3 1994/11/17 08:28:02 jtc Exp $ */ - -/* - * Copyright (c) 1980, 1993 - * The Regents of the University of California. All rights reserved. - * - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include <ctype.h> -#include <stdlib.h> -#include <string.h> -#include "extern.h" - -#define FALSE 0 -#define TRUE !(FALSE) -#define NIL 0 - -static void expconv(void); - -boolean x_escaped; /* true if we are currently x_escaped */ -char *x_start; /* start of string */ -boolean l_onecase; /* true if upper and lower equivalent */ - -#define makelower(c) (isupper((c)) ? tolower((c)) : (c)) - -/* STRNCMP - like strncmp except that we convert the - * first string to lower case before comparing - * if l_onecase is set. - */ - -int -STRNCMP(char *s1, char *s2, int len) -{ - if (l_onecase) { - do - if (*s2 - makelower(*s1)) - return (*s2 - makelower(*s1)); - else { - s2++; - s1++; - } - while (--len); - } else { - do - if (*s2 - *s1) - return (*s2 - *s1); - else { - s2++; - s1++; - } - while (--len); - } - return(0); -} - -/* The following routine converts an irregular expression to - * internal format. - * - * Either meta symbols (\a \d or \p) or character strings or - * operations ( alternation or parenthesizing ) can be - * specified. Each starts with a descriptor byte. The descriptor - * byte has STR set for strings, META set for meta symbols - * and OPER set for operations. - * The descriptor byte can also have the OPT bit set if the object - * defined is optional. Also ALT can be set to indicate an alternation. - * - * For metasymbols the byte following the descriptor byte identifies - * the meta symbol (containing an ascii 'a', 'd', 'p', '|', or '('). For - * strings the byte after the descriptor is a character count for - * the string: - * - * meta symbols := descriptor - * symbol - * - * strings := descriptor - * character count - * the string - * - * operatins := descriptor - * symbol - * character count - */ - -/* - * handy macros for accessing parts of match blocks - */ -#define MSYM(A) (*(A+1)) /* symbol in a meta symbol block */ -#define MNEXT(A) (A+2) /* character following a metasymbol block */ - -#define OSYM(A) (*(A+1)) /* symbol in an operation block */ -#define OCNT(A) (*(A+2)) /* character count */ -#define ONEXT(A) (A+3) /* next character after the operation */ -#define OPTR(A) (A+*(A+2)) /* place pointed to by the operator */ - -#define SCNT(A) (*(A+1)) /* byte count of a string */ -#define SSTR(A) (A+2) /* address of the string */ -#define SNEXT(A) (A+2+*(A+1)) /* character following the string */ - -/* - * bit flags in the descriptor - */ -#define OPT 1 -#define STR 2 -#define META 4 -#define ALT 8 -#define OPER 16 - -static char *ccre; /* pointer to current position in converted exp*/ -static char *ure; /* pointer current position in unconverted exp */ - -char * -convexp(char *re) -{ - char *cre; /* pointer to converted regular expression */ - - /* allocate room for the converted expression */ - if (re == NIL) - return (NIL); - if (*re == '\0') - return (NIL); - cre = malloc (4 * strlen(re) + 3); - ccre = cre; - ure = re; - - /* start the conversion with a \a */ - *cre = META | OPT; - MSYM(cre) = 'a'; - ccre = MNEXT(cre); - - /* start the conversion (its recursive) */ - expconv (); - *ccre = 0; - return (cre); -} - -static void -expconv(void) -{ - char *cs; /* pointer to current symbol in converted exp */ - char c; /* character being processed */ - char *acs; /* pointer to last alternate */ - int temp; - - /* let the conversion begin */ - acs = NIL; - cs = NIL; - while (*ure != NIL) { - switch (c = *ure++) { - - case '\\': - switch (c = *ure++) { - - /* escaped characters are just characters */ - default: - if (cs == NIL || (*cs & STR) == 0) { - cs = ccre; - *cs = STR; - SCNT(cs) = 1; - ccre += 2; - } else - SCNT(cs)++; - *ccre++ = c; - break; - - /* normal(?) metacharacters */ - case 'a': - case 'd': - case 'e': - case 'p': - if (acs != NIL && acs != cs) { - do { - temp = OCNT(acs); - OCNT(acs) = ccre - acs; - acs -= temp; - } while (temp != 0); - acs = NIL; - } - cs = ccre; - *cs = META; - MSYM(cs) = c; - ccre = MNEXT(cs); - break; - } - break; - - /* just put the symbol in */ - case '^': - case '$': - if (acs != NIL && acs != cs) { - do { - temp = OCNT(acs); - OCNT(acs) = ccre - acs; - acs -= temp; - } while (temp != 0); - acs = NIL; - } - cs = ccre; - *cs = META; - MSYM(cs) = c; - ccre = MNEXT(cs); - break; - - /* mark the last match sequence as optional */ - case '?': - if (cs) - *cs = *cs | OPT; - break; - - /* recurse and define a subexpression */ - case '(': - if (acs != NIL && acs != cs) { - do { - temp = OCNT(acs); - OCNT(acs) = ccre - acs; - acs -= temp; - } while (temp != 0); - acs = NIL; - } - cs = ccre; - *cs = OPER; - OSYM(cs) = '('; - ccre = ONEXT(cs); - expconv (); - OCNT(cs) = ccre - cs; /* offset to next symbol */ - break; - - /* return from a recursion */ - case ')': - if (acs != NIL) { - do { - temp = OCNT(acs); - OCNT(acs) = ccre - acs; - acs -= temp; - } while (temp != 0); - acs = NIL; - } - cs = ccre; - *cs = META; - MSYM(cs) = c; - ccre = MNEXT(cs); - return; - - /* mark the last match sequence as having an alternate */ - /* the third byte will contain an offset to jump over the */ - /* alternate match in case the first did not fail */ - case '|': - if (acs != NIL && acs != cs) - OCNT(ccre) = ccre - acs; /* make a back pointer */ - else - OCNT(ccre) = 0; - *cs |= ALT; - cs = ccre; - *cs = OPER; - OSYM(cs) = '|'; - ccre = ONEXT(cs); - acs = cs; /* remember that the pointer is to be filles */ - break; - - /* if its not a metasymbol just build a character string */ - default: - if (cs == NIL || (*cs & STR) == 0) { - cs = ccre; - *cs = STR; - SCNT(cs) = 1; - ccre = SSTR(cs); - } else - SCNT(cs)++; - *ccre++ = c; - break; - } - } - if (acs != NIL) { - do { - temp = OCNT(acs); - OCNT(acs) = ccre - acs; - acs -= temp; - } while (temp != 0); - acs = NIL; - } - return; -} -/* end of converter */ - - -/* - * The following routine recognises an irregular expression - * with the following special characters: - * - * \? - means last match was optional - * \a - matches any number of characters - * \d - matches any number of spaces and tabs - * \p - matches any number of alphanumeric - * characters. The - * characters matched will be copied into - * the area pointed to by 'name'. - * \| - alternation - * \( \) - grouping used mostly for alternation and - * optionality - * - * The irregular expression must be translated to internal form - * prior to calling this routine - * - * The value returned is the pointer to the first non \a - * character matched. - */ - -char * -expmatch(char *s, char *re, char *mstring) -{ - char *cs; /* the current symbol */ - char *ptr,*s1; /* temporary pointer */ - boolean matched; /* a temporary boolean */ - - /* initial conditions */ - if (re == NIL) - return (NIL); - cs = re; - matched = FALSE; - - /* loop till expression string is exhausted (or at least pretty tired) */ - while (*cs) { - switch (*cs & (OPER | STR | META)) { - - /* try to match a string */ - case STR: - matched = !STRNCMP (s, SSTR(cs), SCNT(cs)); - if (matched) { - - /* hoorah it matches */ - s += SCNT(cs); - cs = SNEXT(cs); - } else if (*cs & ALT) { - - /* alternation, skip to next expression */ - cs = SNEXT(cs); - } else if (*cs & OPT) { - - /* the match is optional */ - cs = SNEXT(cs); - matched = 1; /* indicate a successful match */ - } else { - - /* no match, error return */ - return (NIL); - } - break; - - /* an operator, do something fancy */ - case OPER: - switch (OSYM(cs)) { - - /* this is an alternation */ - case '|': - if (matched) - - /* last thing in the alternation was a match, skip ahead */ - cs = OPTR(cs); - else - - /* no match, keep trying */ - cs = ONEXT(cs); - break; - - /* this is a grouping, recurse */ - case '(': - ptr = expmatch (s, ONEXT(cs), mstring); - if (ptr != NIL) { - - /* the subexpression matched */ - matched = 1; - s = ptr; - } else if (*cs & ALT) { - - /* alternation, skip to next expression */ - matched = 0; - } else if (*cs & OPT) { - - /* the match is optional */ - matched = 1; /* indicate a successful match */ - } else { - - /* no match, error return */ - return (NIL); - } - cs = OPTR(cs); - break; - } - break; - - /* try to match a metasymbol */ - case META: - switch (MSYM(cs)) { - - /* try to match anything and remember what was matched */ - case 'p': - /* - * This is really the same as trying the match the - * remaining parts of the expression to any subset - * of the string. - */ - s1 = s; - do { - ptr = expmatch (s1, MNEXT(cs), mstring); - if (ptr != NIL && s1 != s) { - - /* we have a match, remember the match */ - strncpy (mstring, s, s1 - s); - mstring[s1 - s] = '\0'; - return (ptr); - } else if (ptr != NIL && (*cs & OPT)) { - - /* it was optional so no match is ok */ - return (ptr); - } else if (ptr != NIL) { - - /* not optional and we still matched */ - return (NIL); - } - if (!isalnum(*s1) && *s1 != '_') - return (NIL); - if (*s1 == '\\') - x_escaped = x_escaped ? FALSE : TRUE; - else - x_escaped = FALSE; - } while (*s1++); - return (NIL); - - /* try to match anything */ - case 'a': - /* - * This is really the same as trying the match the - * remaining parts of the expression to any subset - * of the string. - */ - s1 = s; - do { - ptr = expmatch (s1, MNEXT(cs), mstring); - if (ptr != NIL && s1 != s) { - - /* we have a match */ - return (ptr); - } else if (ptr != NIL && (*cs & OPT)) { - - /* it was optional so no match is ok */ - return (ptr); - } else if (ptr != NIL) { - - /* not optional and we still matched */ - return (NIL); - } - if (*s1 == '\\') - x_escaped = x_escaped ? FALSE : TRUE; - else - x_escaped = FALSE; - } while (*s1++); - return (NIL); - - /* fail if we are currently x_escaped */ - case 'e': - if (x_escaped) - return(NIL); - cs = MNEXT(cs); - break; - - /* match any number of tabs and spaces */ - case 'd': - ptr = s; - while (*s == ' ' || *s == '\t') - s++; - if (s != ptr || s == x_start) { - - /* match, be happy */ - matched = 1; - cs = MNEXT(cs); - } else if (*s == '\n' || *s == '\0') { - - /* match, be happy */ - matched = 1; - cs = MNEXT(cs); - } else if (*cs & ALT) { - - /* try the next part */ - matched = 0; - cs = MNEXT(cs); - } else if (*cs & OPT) { - - /* doesn't matter */ - matched = 1; - cs = MNEXT(cs); - } else - - /* no match, error return */ - return (NIL); - break; - - /* check for end of line */ - case '$': - if (*s == '\0' || *s == '\n') { - - /* match, be happy */ - s++; - matched = 1; - cs = MNEXT(cs); - } else if (*cs & ALT) { - - /* try the next part */ - matched = 0; - cs = MNEXT(cs); - } else if (*cs & OPT) { - - /* doesn't matter */ - matched = 1; - cs = MNEXT(cs); - } else - - /* no match, error return */ - return (NIL); - break; - - /* check for start of line */ - case '^': - if (s == x_start) { - - /* match, be happy */ - matched = 1; - cs = MNEXT(cs); - } else if (*cs & ALT) { - - /* try the next part */ - matched = 0; - cs = MNEXT(cs); - } else if (*cs & OPT) { - - /* doesn't matter */ - matched = 1; - cs = MNEXT(cs); - } else - - /* no match, error return */ - return (NIL); - break; - - /* end of a subexpression, return success */ - case ')': - return (s); - } - break; - } - } - return (s); -} |