/*	$OpenBSD: rxp.c,v 1.5 2002/02/16 21:27:11 millert Exp $	*/
/*	$NetBSD: rxp.c,v 1.5 1995/04/22 10:17:00 cgd Exp $	*/

/*-
 * Copyright (c) 1991, 1993
 *	The Regents of the University of California.  All rights reserved.
 *
 * This code is derived from software contributed to Berkeley by
 * Jim R. Oldroyd at The Instruction Set and Keith Gabryelski at
 * Commodore Business Machines.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. All advertising materials mentioning features or use of this software
 *    must display the following acknowledgement:
 *	This product includes software developed by the University of
 *	California, Berkeley and its contributors.
 * 4. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */

#ifndef lint
#if 0
static char sccsid[] = "@(#)rxp.c	8.1 (Berkeley) 5/31/93";
#else
static char rcsid[] = "$OpenBSD: rxp.c,v 1.5 2002/02/16 21:27:11 millert Exp $";
#endif
#endif /* not lint */

/*
 * regular expression parser
 *
 * external functions and return values are:
 * rxp_compile(s)
 *	TRUE	success
 *	FALSE	parse failure; error message will be in char rxperr[]
 * metas are:
 *	{...}	optional pattern, equialent to [...|]
 *	|	alternate pattern
 *	[...]	pattern delimiters
 *
 * rxp_match(s)
 *	TRUE	string s matches compiled pattern
 *	FALSE	match failure or regexp error
 *
 * rxp_expand()
 *	char *	reverse-engineered regular expression string
 *	NULL	regexp error
 */

#include <stdio.h>
#include <ctype.h>
#include "quiz.h"
					/* regexp tokens,	arg */
#define	LIT	(-1)			/* literal character,	char */
#define	SOT	(-2)			/* start text anchor,	- */
#define	EOT	(-3)			/* end text anchor,	- */
#define	GRP_S	(-4)			/* start alternate grp,	ptr_to_end */
#define	GRP_E	(-5)			/* end group,		- */
#define	ALT_S	(-6)			/* alternate starts,	ptr_to_next */
#define	ALT_E	(-7)			/* alternate ends,	- */
#define	END	(-8)			/* end of regexp,	- */

typedef short Rxp_t;			/* type for regexp tokens */

static Rxp_t rxpbuf[RXP_LINE_SZ];	/* compiled regular expression buffer */
char rxperr[128];			/* parser error message */

static int	 rxp__compile(const char *, int);
static char	*rxp__expand(int);
static int	 rxp__match(const char *, int, Rxp_t *, Rxp_t *, const char *);

int
rxp_compile(s)
	const char *	s;
{
	return (rxp__compile(s, TRUE));
}

static int
rxp__compile(s, first)
	const char *s;
	int first;
{
	static Rxp_t *rp;
	static const char *sp;
	Rxp_t *grp_ptr;
	Rxp_t *alt_ptr;
	int esc, err;

	if (s == NULL) {
		(void)snprintf(rxperr, sizeof(rxperr),
		    "null string sent to rxp_compile");
		return(FALSE);
	}
	esc = 0;
	if (first) {
		rp = rxpbuf;
		sp = s;
		*rp++ = SOT;	/* auto-anchor: pat is really ^pat$ */
		*rp++ = GRP_S;	/* auto-group: ^pat$ is really ^[pat]$ */
		*rp++ = 0;
	}
	*rp++ = ALT_S;
	alt_ptr = rp;
	*rp++ = 0;
	for (; *sp; ++sp) {
		if (rp - rxpbuf >= RXP_LINE_SZ - 4) {
			(void)snprintf(rxperr, sizeof(rxperr),
			    "regular expression too long %s", s);
			return (FALSE);
		}
		if (*sp == ':' && !esc)
			break;
		if (esc) {
			*rp++ = LIT;
			*rp++ = *sp;
			esc = 0;
		}
		else switch (*sp) {
		case '\\':
			esc = 1;
			break;
		case '{':
		case '[':
			*rp++ = GRP_S;
			grp_ptr = rp;
			*rp++ = 0;
			sp++;
			if ((err = rxp__compile(s, FALSE)) != TRUE)
				return (err);
			*rp++ = GRP_E;
			*grp_ptr = rp - rxpbuf;
			break;
		case '}':
		case ']':
		case '|':
			*rp++ = ALT_E;
			*alt_ptr = rp - rxpbuf;
			if (*sp != ']') {
				*rp++ = ALT_S;
				alt_ptr = rp;
				*rp++ = 0;
			}
			if (*sp != '|') {
				if (*sp != ']') {
					*rp++ = ALT_E;
					*alt_ptr = rp - rxpbuf;
				}
				if (first) {
					(void)snprintf(rxperr, sizeof(rxperr),
					    "unmatched alternator in regexp %s",
					     s);
					return (FALSE);
				}
				return (TRUE);
			}
			break;
		default:
			*rp++ = LIT;
			*rp++ = *sp;
			esc = 0;
			break;
		}
	}
	if (!first) {
		(void)snprintf(rxperr, sizeof(rxperr),
		    "unmatched alternator in regexp %s", s);
		return (FALSE);
	}
	*rp++ = ALT_E;
	*alt_ptr = rp - rxpbuf;
	*rp++ = GRP_E;
	*(rxpbuf + 2) = rp - rxpbuf;
	*rp++ = EOT;
	*rp = END;
	return (TRUE);
}

/*
 * match string against compiled regular expression
 */
int
rxp_match(s)
	const char *s;
{
	return (rxp__match(s, TRUE, NULL, NULL, NULL));
}

static int
rxp__match(s, first, j_succ, j_fail, sp_fail)
	const char *s;
	int first;
	Rxp_t *j_succ;		/* jump here on successful alt match */
	Rxp_t *j_fail;		/* jump here on failed match */
	const char *sp_fail;		/* reset sp to here on failed match */
{
	static Rxp_t *rp;
	static const char *sp;
	int ch;
	Rxp_t *grp_end = NULL;
	int err;

	if (first) {
		rp = rxpbuf;
		sp = s;
	}
	while (rp < rxpbuf + RXP_LINE_SZ && *rp != END)
		switch(*rp) {
		case LIT:
			rp++;
			ch = isascii(*rp) && isupper(*rp) ? tolower(*rp) : *rp;
			if (ch != *sp++) {
				rp = j_fail;
				sp = sp_fail;
				return (TRUE);
			}
			rp++;
			break;
		case SOT:
			if (sp != s)
				return (FALSE);
			rp++;
			break;
		case EOT:
			if (*sp != 0)
				return (FALSE);
			rp++;
			break;
		case GRP_S:
			rp++;
			grp_end = rxpbuf + *rp++;
			break;
		case ALT_S:
			rp++;
			if ((err = rxp__match(sp,
			    FALSE, grp_end, rxpbuf + *rp++, sp)) != TRUE)
				return (err);
			break;
		case ALT_E:
			rp = j_succ;
			return (TRUE);
		case GRP_E:
		default:
			return (FALSE);
		}
	return (*rp != END ? FALSE : TRUE);
}

/*
 * Reverse engineer the regular expression, by picking first of all alternates.
 */
char *
rxp_expand()
{
	return (rxp__expand(TRUE));
}

static char *
rxp__expand(first)
	int first;
{
	static char buf[RXP_LINE_SZ/2];
	static Rxp_t *rp;
	static char *bp;
	Rxp_t *grp_ptr;
	char *err;

	if (first) {
		rp = rxpbuf;
		bp = buf;
	}
	while (rp < rxpbuf + RXP_LINE_SZ && *rp != END)
		switch(*rp) {
		case LIT:
			rp++;
			*bp++ = *rp++;
			break;
		case GRP_S:
			rp++;
			grp_ptr = rxpbuf + *rp;
			rp++;
			if ((err = rxp__expand(FALSE)) == NULL)
				return (err);
			rp = grp_ptr;
			break;
		case ALT_E:
			return (buf);
		case ALT_S:
			rp++;
			/* FALLTHROUGH */
		case SOT:
		case EOT:
		case GRP_E:
			rp++;
			break;
		default:
			return (NULL);
		}
	if (first) {
		if (*rp != END)
			return (NULL);
		*bp = '\0';
	}
	return (buf);
}