1 files changed, 366 insertions, 315 deletions
diff --git a/gnu/usr.bin/grep/dfa.c b/gnu/usr.bin/grep/dfa.c
index 4a454a9664a..048e901c5e8 100644
--- a/gnu/usr.bin/grep/dfa.c
+++ b/gnu/usr.bin/grep/dfa.c
@@ -1,5 +1,5 @@
 /* dfa.c - deterministic extended regexp routines for GNU
-   Copyright (C) 1988 Free Software Foundation, Inc.
+   Copyright 1988, 1998, 2000 Free Software Foundation, Inc.
 
    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
@@ -13,23 +13,23 @@
 
    You should have received a copy of the GNU General Public License
    along with this program; if not, write to the Free Software
-   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.  */
-
-#ifndef lint
-static char rcsid[] = "$Id: dfa.c,v 1.1 1995/10/18 08:40:17 deraadt Exp $";
-#endif /* not lint */
+   Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA */
 
 /* Written June, 1988 by Mike Haertel
    Modified July, 1988 by Arthur David Olson to assist BMG speedups  */
 
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
 #include <assert.h>
 #include <ctype.h>
 #include <stdio.h>
 
+#include <sys/types.h>
 #ifdef STDC_HEADERS
 #include <stdlib.h>
 #else
-#include <sys/types.h>
 extern char *calloc(), *malloc(), *realloc();
 extern void free();
 #endif
@@ -42,23 +42,16 @@ extern void free();
 #include <strings.h>
 #endif
 
+#ifndef DEBUG	/* use the same approach as regex.c */
+#undef assert
+#define assert(e)
+#endif /* DEBUG */
+
 #ifndef isgraph
 #define isgraph(C) (isprint(C) && !isspace(C))
 #endif
 
-#ifdef isascii
-#define ISALPHA(C) (isascii(C) && isalpha(C))
-#define ISUPPER(C) (isascii(C) && isupper(C))
-#define ISLOWER(C) (isascii(C) && islower(C))
-#define ISDIGIT(C) (isascii(C) && isdigit(C))
-#define ISXDIGIT(C) (isascii(C) && isxdigit(C))
-#define ISSPACE(C) (isascii(C) && isspace(C))
-#define ISPUNCT(C) (isascii(C) && ispunct(C))
-#define ISALNUM(C) (isascii(C) && isalnum(C))
-#define ISPRINT(C) (isascii(C) && isprint(C))
-#define ISGRAPH(C) (isascii(C) && isgraph(C))
-#define ISCNTRL(C) (isascii(C) && iscntrl(C))
-#else
+#if defined (STDC_HEADERS) || (!defined (isascii) && !defined (HAVE_ISASCII))
 #define ISALPHA(C) isalpha(C)
 #define ISUPPER(C) isupper(C)
 #define ISLOWER(C) islower(C)
@@ -70,57 +63,130 @@ extern void free();
 #define ISPRINT(C) isprint(C)
 #define ISGRAPH(C) isgraph(C)
 #define ISCNTRL(C) iscntrl(C)
+#else
+#define ISALPHA(C) (isascii(C) && isalpha(C))
+#define ISUPPER(C) (isascii(C) && isupper(C))
+#define ISLOWER(C) (isascii(C) && islower(C))
+#define ISDIGIT(C) (isascii(C) && isdigit(C))
+#define ISXDIGIT(C) (isascii(C) && isxdigit(C))
+#define ISSPACE(C) (isascii(C) && isspace(C))
+#define ISPUNCT(C) (isascii(C) && ispunct(C))
+#define ISALNUM(C) (isascii(C) && isalnum(C))
+#define ISPRINT(C) (isascii(C) && isprint(C))
+#define ISGRAPH(C) (isascii(C) && isgraph(C))
+#define ISCNTRL(C) (isascii(C) && iscntrl(C))
+#endif
+
+/* ISASCIIDIGIT differs from ISDIGIT, as follows:
+   - Its arg may be any int or unsigned int; it need not be an unsigned char.
+   - It's guaranteed to evaluate its argument exactly once.
+   - It's typically faster.
+   Posix 1003.2-1992 section 2.5.2.1 page 50 lines 1556-1558 says that
+   only '0' through '9' are digits.  Prefer ISASCIIDIGIT to ISDIGIT unless
+   it's important to use the locale's definition of `digit' even when the
+   host does not conform to Posix.  */
+#define ISASCIIDIGIT(c) ((unsigned) (c) - '0' <= 9)
+
+/* If we (don't) have I18N.  */
+/* glibc defines _ */
+#ifndef _
+# ifdef HAVE_LIBINTL_H
+#  include <libintl.h>
+#  ifndef _
+#   define _(Str) gettext (Str)
+#  endif
+# else
+#  define _(Str) (Str)
+# endif
 #endif
 
-#include "dfa.h"
 #include "regex.h"
+#include "dfa.h"
 
-#if __STDC__
-typedef void *ptr_t;
-#else
-typedef char *ptr_t;
+/* HPUX, define those as macros in sys/param.h */
+#ifdef setbit
+# undef setbit
+#endif
+#ifdef clrbit
+# undef clrbit
 #endif
 
-static void	dfamust();
+static void dfamust PARAMS ((struct dfa *dfa));
+
+static ptr_t xcalloc PARAMS ((size_t n, size_t s));
+static ptr_t xmalloc PARAMS ((size_t n));
+static ptr_t xrealloc PARAMS ((ptr_t p, size_t n));
+#ifdef DEBUG
+static void prtok PARAMS ((token t));
+#endif
+static int tstbit PARAMS ((int b, charclass c));
+static void setbit PARAMS ((int b, charclass c));
+static void clrbit PARAMS ((int b, charclass c));
+static void copyset PARAMS ((charclass src, charclass dst));
+static void zeroset PARAMS ((charclass s));
+static void notset PARAMS ((charclass s));
+static int equal PARAMS ((charclass s1, charclass s2));
+static int charclass_index PARAMS ((charclass s));
+static int looking_at PARAMS ((const char *s));
+static token lex PARAMS ((void));
+static void addtok PARAMS ((token t));
+static void atom PARAMS ((void));
+static int nsubtoks PARAMS ((int tindex));
+static void copytoks PARAMS ((int tindex, int ntokens));
+static void closure PARAMS ((void));
+static void branch PARAMS ((void));
+static void regexp PARAMS ((int toplevel));
+static void copy PARAMS ((position_set *src, position_set *dst));
+static void insert PARAMS ((position p, position_set *s));
+static void merge PARAMS ((position_set *s1, position_set *s2, position_set *m));
+static void delete PARAMS ((position p, position_set *s));
+static int state_index PARAMS ((struct dfa *d, position_set *s,
+			  int newline, int letter));
+static void build_state PARAMS ((int s, struct dfa *d));
+static void build_state_zero PARAMS ((struct dfa *d));
+static char *icatalloc PARAMS ((char *old, char *new));
+static char *icpyalloc PARAMS ((char *string));
+static char *istrstr PARAMS ((char *lookin, char *lookfor));
+static void ifree PARAMS ((char *cp));
+static void freelist PARAMS ((char **cpp));
+static char **enlist PARAMS ((char **cpp, char *new, size_t len));
+static char **comsubs PARAMS ((char *left, char *right));
+static char **addlists PARAMS ((char **old, char **new));
+static char **inboth PARAMS ((char **left, char **right));
 
 static ptr_t
-xcalloc(n, s)
-     int n;
-     size_t s;
+xcalloc (size_t n, size_t s)
 {
   ptr_t r = calloc(n, s);
 
   if (!r)
-    dfaerror("Memory exhausted");
+    dfaerror(_("Memory exhausted"));
   return r;
 }
 
 static ptr_t
-xmalloc(n)
-     size_t n;
+xmalloc (size_t n)
 {
   ptr_t r = malloc(n);
 
   assert(n != 0);
   if (!r)
-    dfaerror("Memory exhausted");
+    dfaerror(_("Memory exhausted"));
   return r;
 }
 
 static ptr_t
-xrealloc(p, n)
-     ptr_t p;
-     size_t n;
+xrealloc (ptr_t p, size_t n)
 {
   ptr_t r = realloc(p, n);
 
   assert(n != 0);
   if (!r)
-    dfaerror("Memory exhausted");
+    dfaerror(_("Memory exhausted"));
   return r;
 }
 
-#define CALLOC(p, t, n) ((p) = (t *) xcalloc((n), sizeof (t)))
+#define CALLOC(p, t, n) ((p) = (t *) xcalloc((size_t)(n), sizeof (t)))
 #define MALLOC(p, t, n) ((p) = (t *) xmalloc((n) * sizeof (t)))
 #define REALLOC(p, t, n) ((p) = (t *) xrealloc((ptr_t) (p), (n) * sizeof (t)))
 
@@ -136,8 +202,7 @@ xrealloc(p, n)
 #ifdef DEBUG
 
 static void
-prtok(t)
-     token t;
+prtok (token t)
 {
   char *s;
 
@@ -175,33 +240,25 @@ prtok(t)
 /* Stuff pertaining to charclasses. */
 
 static int
-tstbit(b, c)
-     int b;
-     charclass c;
+tstbit (int b, charclass c)
 {
   return c[b / INTBITS] & 1 << b % INTBITS;
 }
 
 static void
-setbit(b, c)
-     int b;
-     charclass c;
+setbit (int b, charclass c)
 {
   c[b / INTBITS] |= 1 << b % INTBITS;
 }
 
 static void
-clrbit(b, c)
-     int b;
-     charclass c;
+clrbit (int b, charclass c)
 {
   c[b / INTBITS] &= ~(1 << b % INTBITS);
 }
 
 static void
-copyset(src, dst)
-     charclass src;
-     charclass dst;
+copyset (charclass src, charclass dst)
 {
   int i;
 
@@ -210,8 +267,7 @@ copyset(src, dst)
 }
 
 static void
-zeroset(s)
-     charclass s;
+zeroset (charclass s)
 {
   int i;
 
@@ -220,8 +276,7 @@ zeroset(s)
 }
 
 static void
-notset(s)
-     charclass s;
+notset (charclass s)
 {
   int i;
 
@@ -230,9 +285,7 @@ notset(s)
 }
 
 static int
-equal(s1, s2)
-     charclass s1;
-     charclass s2;
+equal (charclass s1, charclass s2)
 {
   int i;
 
@@ -247,8 +300,7 @@ static struct dfa *dfa;
 
 /* Find the index of charclass s in dfa->charclasses, or allocate a new charclass. */
 static int
-charclass_index(s)
-     charclass s;
+charclass_index (charclass s)
 {
   int i;
 
@@ -262,20 +314,22 @@ charclass_index(s)
 }
 
 /* Syntax bits controlling the behavior of the lexical analyzer. */
-static int syntax_bits, syntax_bits_set;
+static reg_syntax_t syntax_bits, syntax_bits_set;
 
 /* Flag for case-folding letters into sets. */
 static int case_fold;
 
+/* End-of-line byte in data.  */
+static unsigned char eolbyte;
+
 /* Entry point to set syntax options. */
 void
-dfasyntax(bits, fold)
-     int bits;
-     int fold;
+dfasyntax (reg_syntax_t bits, int fold, int eol)
 {
   syntax_bits_set = 1;
   syntax_bits = bits;
   case_fold = fold;
+  eolbyte = eol;
 }
 
 /* Lexical analyzer.  All the dross that deals with the obnoxious
@@ -285,7 +339,7 @@ dfasyntax(bits, fold)
 
 static char *lexstart;		/* Pointer to beginning of input string. */
 static char *lexptr;		/* Pointer to next input character. */
-static lexleft;			/* Number of characters remaining. */
+static int lexleft;		/* Number of characters remaining. */
 static token lasttok;		/* Previous token returned; initially END. */
 static int laststart;		/* True if we're separated from beginning or (, |
 				   only by zero-width characters. */
@@ -296,15 +350,21 @@ static int minrep, maxrep;	/* Repeat counts for {m,n}. */
 #define FETCH(c, eoferr)   	      \
   {			   	      \
     if (! lexleft)	   	      \
-      if (eoferr != 0)	   	      \
-	dfaerror(eoferr);  	      \
-      else		   	      \
-	return END;	   	      \
+      {				      \
+	if (eoferr != 0)	      \
+	  dfaerror (eoferr);	      \
+	else		   	      \
+	  return lasttok = END;	      \
+      }				      \
     (c) = (unsigned char) *lexptr++;  \
     --lexleft;		   	      \
   }
 
+#ifdef __STDC__
+#define FUNC(F, P) static int F(int c) { return P(c); }
+#else
 #define FUNC(F, P) static int F(c) int c; { return P(c); }
+#endif
 
 FUNC(is_alpha, ISALPHA)
 FUNC(is_upper, ISUPPER)
@@ -318,32 +378,41 @@ FUNC(is_print, ISPRINT)
 FUNC(is_graph, ISGRAPH)
 FUNC(is_cntrl, ISCNTRL)
 
+static int
+is_blank (int c)
+{
+   return (c == ' ' || c == '\t');
+}
+
 /* The following list maps the names of the Posix named character classes
    to predicate functions that determine whether a given character is in
    the class.  The leading [ has already been eaten by the lexical analyzer. */
 static struct {
-  char *name;
-  int (*pred)();
+  const char *name;
+  int (*pred) PARAMS ((int));
 } prednames[] = {
-  ":alpha:]", is_alpha,
-  ":upper:]", is_upper,
-  ":lower:]", is_lower,
-  ":digit:]", is_digit,
-  ":xdigit:]", is_xdigit,
-  ":space:]", is_space,
-  ":punct:]", is_punct,
-  ":alnum:]", is_alnum,
-  ":print:]", is_print,
-  ":graph:]", is_graph,
-  ":cntrl:]", is_cntrl,
-  0
+  { ":alpha:]", is_alpha },
+  { ":upper:]", is_upper },
+  { ":lower:]", is_lower },
+  { ":digit:]", is_digit },
+  { ":xdigit:]", is_xdigit },
+  { ":space:]", is_space },
+  { ":punct:]", is_punct },
+  { ":alnum:]", is_alnum },
+  { ":print:]", is_print },
+  { ":graph:]", is_graph },
+  { ":cntrl:]", is_cntrl },
+  { ":blank:]", is_blank },
+  { 0 }
 };
 
+/* Return non-zero if C is a `word-constituent' byte; zero otherwise.  */
+#define IS_WORD_CONSTITUENT(C) (ISALNUM(C) || (C) == '_')
+
 static int
-looking_at(s)
-     char *s;
+looking_at (char const *s)
 {
-  int len;
+  size_t len;
 
   len = strlen(s);
   if (lexleft < len)
@@ -352,12 +421,14 @@ looking_at(s)
 }
 
 static token
-lex()
+lex (void)
 {
   token c, c1, c2;
   int backslash = 0, invert;
   charclass ccl;
   int i;
+  char lo[2];
+  char hi[2];
 
   /* Basic plan: We fetch a character.  If it's a backslash,
      we set the backslash flag and go through the loop again.
@@ -374,7 +445,7 @@ lex()
 	  if (backslash)
 	    goto normal_char;
 	  if (lexleft == 0)
-	    dfaerror("Unfinished \\ escape");
+	    dfaerror(_("Unfinished \\ escape"));
 	  backslash = 1;
 	  break;
 
@@ -420,23 +491,33 @@ lex()
 	    }
 	  goto normal_char;
 
+	case '`':
+	  if (backslash && !(syntax_bits & RE_NO_GNU_OPS))
+	    return lasttok = BEGLINE;	/* FIXME: should be beginning of string */
+	  goto normal_char;
+
+	case '\'':
+	  if (backslash && !(syntax_bits & RE_NO_GNU_OPS))
+	    return lasttok = ENDLINE;	/* FIXME: should be end of string */
+	  goto normal_char;
+
 	case '<':
-	  if (backslash)
+	  if (backslash && !(syntax_bits & RE_NO_GNU_OPS))
 	    return lasttok = BEGWORD;
 	  goto normal_char;
 
 	case '>':
-	  if (backslash)
+	  if (backslash && !(syntax_bits & RE_NO_GNU_OPS))
 	    return lasttok = ENDWORD;
 	  goto normal_char;
 
 	case 'b':
-	  if (backslash)
+	  if (backslash && !(syntax_bits & RE_NO_GNU_OPS))
 	    return lasttok = LIMWORD;
 	  goto normal_char;
 
 	case 'B':
-	  if (backslash)
+	  if (backslash && !(syntax_bits & RE_NO_GNU_OPS))
 	    return lasttok = NOTLIMWORD;
 	  goto normal_char;
 
@@ -470,44 +551,76 @@ lex()
 	    goto normal_char;
 	  if (backslash != ((syntax_bits & RE_NO_BK_BRACES) == 0))
 	    goto normal_char;
-	  minrep = maxrep = 0;
+	  if (!(syntax_bits & RE_CONTEXT_INDEP_OPS) && laststart)
+	    goto normal_char;
+
+	  if (syntax_bits & RE_NO_BK_BRACES)
+	    {
+	      /* Scan ahead for a valid interval; if it's not valid,
+		 treat it as a literal '{'.  */
+	      int lo = -1, hi = -1;
+	      char const *p = lexptr;
+	      char const *lim = p + lexleft;
+	      for (;  p != lim && ISASCIIDIGIT (*p);  p++)
+		lo = (lo < 0 ? 0 : lo * 10) + *p - '0';
+	      if (p != lim && *p == ',')
+		while (++p != lim && ISASCIIDIGIT (*p))
+		  hi = (hi < 0 ? 0 : hi * 10) + *p - '0';
+	      else
+		hi = lo;
+	      if (p == lim || *p != '}'
+		  || lo < 0 || RE_DUP_MAX < hi || (0 <= hi && hi < lo))
+		goto normal_char;
+	    }
+
+	  minrep = 0;
 	  /* Cases:
 	     {M} - exact count
 	     {M,} - minimum count, maximum is infinity
-	     {,M} - 0 through M
 	     {M,N} - M through N */
-	  FETCH(c, "unfinished repeat count");
-	  if (ISDIGIT(c))
+	  FETCH(c, _("unfinished repeat count"));
+	  if (ISASCIIDIGIT (c))
 	    {
 	      minrep = c - '0';
 	      for (;;)
 		{
-		  FETCH(c, "unfinished repeat count");
-		  if (!ISDIGIT(c))
+		  FETCH(c, _("unfinished repeat count"));
+		  if (! ISASCIIDIGIT (c))
 		    break;
 		  minrep = 10 * minrep + c - '0';
 		}
 	    }
-	  else if (c != ',')
-	    dfaerror("malformed repeat count");
+	  else
+	    dfaerror(_("malformed repeat count"));
 	  if (c == ',')
-	    for (;;)
-	      {
-		FETCH(c, "unfinished repeat count");
-		if (!ISDIGIT(c))
-		  break;
-		maxrep = 10 * maxrep + c - '0';
-	      }
+	    {
+	      FETCH (c, _("unfinished repeat count"));
+	      if (! ISASCIIDIGIT (c))
+		maxrep = -1;
+	      else
+		{
+		  maxrep = c - '0';
+		  for (;;)
+		    {
+		      FETCH (c, _("unfinished repeat count"));
+		      if (! ISASCIIDIGIT (c))
+			break;
+		      maxrep = 10 * maxrep + c - '0';
+		    }
+		  if (0 <= maxrep && maxrep < minrep)
+		    dfaerror (_("malformed repeat count"));
+		}
+	    }
 	  else
 	    maxrep = minrep;
 	  if (!(syntax_bits & RE_NO_BK_BRACES))
 	    {
 	      if (c != '\\')
-		dfaerror("malformed repeat count");
-	      FETCH(c, "unfinished repeat count");
+		dfaerror(_("malformed repeat count"));
+	      FETCH(c, _("unfinished repeat count"));
 	    }
 	  if (c != '}')
-	    dfaerror("malformed repeat count");
+	    dfaerror(_("malformed repeat count"));
 	  laststart = 0;
 	  return lasttok = REPMN;
 
@@ -549,7 +662,7 @@ lex()
 	  zeroset(ccl);
 	  notset(ccl);
 	  if (!(syntax_bits & RE_DOT_NEWLINE))
-	    clrbit('\n', ccl);
+	    clrbit(eolbyte, ccl);
 	  if (syntax_bits & RE_DOT_NOT_NULL)
 	    clrbit('\0', ccl);
 	  laststart = 0;
@@ -557,25 +670,25 @@ lex()
 
 	case 'w':
 	case 'W':
-	  if (!backslash)
+	  if (!backslash || (syntax_bits & RE_NO_GNU_OPS))
 	    goto normal_char;
 	  zeroset(ccl);
 	  for (c2 = 0; c2 < NOTCHAR; ++c2)
-	    if (ISALNUM(c2))
+	    if (IS_WORD_CONSTITUENT(c2))
 	      setbit(c2, ccl);
 	  if (c == 'W')
 	    notset(ccl);
 	  laststart = 0;
 	  return lasttok = CSET + charclass_index(ccl);
-	
+
 	case '[':
 	  if (backslash)
 	    goto normal_char;
 	  zeroset(ccl);
-	  FETCH(c, "Unbalanced [");
+	  FETCH(c, _("Unbalanced ["));
 	  if (c == '^')
 	    {
-	      FETCH(c, "Unbalanced [");
+	      FETCH(c, _("Unbalanced ["));
 	      invert = 1;
 	    }
 	  else
@@ -592,20 +705,25 @@ lex()
 		for (c1 = 0; prednames[c1].name; ++c1)
 		  if (looking_at(prednames[c1].name))
 		    {
+			int (*pred)() = prednames[c1].pred;
+			if (case_fold
+			    && (pred == is_upper || pred == is_lower))
+				pred = is_alpha;
+
 		      for (c2 = 0; c2 < NOTCHAR; ++c2)
-			if ((*prednames[c1].pred)(c2))
+			if ((*pred)(c2))
 			  setbit(c2, ccl);
 		      lexptr += strlen(prednames[c1].name);
 		      lexleft -= strlen(prednames[c1].name);
-		      FETCH(c1, "Unbalanced [");
+		      FETCH(c1, _("Unbalanced ["));
 		      goto skip;
 		    }
 	      if (c == '\\' && (syntax_bits & RE_BACKSLASH_ESCAPE_IN_LISTS))
-		FETCH(c, "Unbalanced [");
-	      FETCH(c1, "Unbalanced [");
+		FETCH(c, _("Unbalanced ["));
+	      FETCH(c1, _("Unbalanced ["));
 	      if (c1 == '-')
 		{
-		  FETCH(c2, "Unbalanced [");
+		  FETCH(c2, _("Unbalanced ["));
 		  if (c2 == ']')
 		    {
 		      /* In the case [x-], the - is an ordinary hyphen,
@@ -618,22 +736,32 @@ lex()
 		    {
 		      if (c2 == '\\'
 			  && (syntax_bits & RE_BACKSLASH_ESCAPE_IN_LISTS))
-			FETCH(c2, "Unbalanced [");
-		      FETCH(c1, "Unbalanced [");
+			FETCH(c2, _("Unbalanced ["));
+		      FETCH(c1, _("Unbalanced ["));
 		    }
 		}
 	      else
 		c2 = c;
-	      while (c <= c2)
+
+	      lo[0] = c;  lo[1] = '\0';
+	      hi[0] = c2; hi[1] = '\0';
+	      for (c = 0; c < NOTCHAR; c++)
 		{
-		  setbit(c, ccl);
-		  if (case_fold)
-		    if (ISUPPER(c))
-		      setbit(tolower(c), ccl);
-		    else if (ISLOWER(c))
-		      setbit(toupper(c), ccl);
-		  ++c;
+		  char ch[2];
+		  ch[0] = c;  ch[1] = '\0';
+		  if (strcoll (lo, ch) <= 0 && strcoll (ch, hi) <= 0)
+		    {
+		      setbit (c, ccl);
+		      if (case_fold)
+			{
+			  if (ISUPPER (c))
+			    setbit (tolower (c), ccl);
+			  else if (ISLOWER (c))
+			    setbit (toupper (c), ccl);
+			}
+		    }
 		}
+
 	    skip:
 	      ;
 	    }
@@ -642,7 +770,7 @@ lex()
 	    {
 	      notset(ccl);
 	      if (syntax_bits & RE_HAT_LISTS_NOT_NEWLINE)
-		clrbit('\n', ccl);
+		clrbit(eolbyte, ccl);
 	    }
 	  laststart = 0;
 	  return lasttok = CSET + charclass_index(ccl);
@@ -667,12 +795,13 @@ lex()
   /* The above loop should consume at most a backslash
      and some other character. */
   abort();
+  return END;	/* keeps pedantic compilers happy. */
 }
 
 /* Recursive descent parser for regular expressions. */
 
 static token tok;		/* Lookahead token. */
-static depth;			/* Current depth of a hypothetical stack
+static int depth;		/* Current depth of a hypothetical stack
 				   holding deferred productions.  This is
 				   used to determine the depth that will be
 				   required of the real stack later on in
@@ -681,8 +810,7 @@ static depth;			/* Current depth of a hypothetical stack
 /* Add the given token to the parse tree, maintaining the depth count and
    updating the maximum depth if necessary. */
 static void
-addtok(t)
-     token t;
+addtok (token t)
 {
   REALLOC_IF_NECESSARY(dfa->tokens, token, dfa->talloc, dfa->tindex);
   dfa->tokens[dfa->tindex++] = t;
@@ -740,14 +868,8 @@ addtok(t)
 
    The parser builds a parse tree in postfix form in an array of tokens. */
 
-#if __STDC__
-static void regexp(int);
-#else
-static void regexp();
-#endif
-
 static void
-atom()
+atom (void)
 {
   if ((tok >= 0 && tok < NOTCHAR) || tok >= CSET || tok == BACKREF
       || tok == BEGLINE || tok == ENDLINE || tok == BEGWORD
@@ -761,7 +883,7 @@ atom()
       tok = lex();
       regexp(0);
       if (tok != RPAREN)
-	dfaerror("Unbalanced (");
+	dfaerror(_("Unbalanced ("));
       tok = lex();
     }
   else
@@ -770,7 +892,7 @@ atom()
 
 /* Return the number of tokens in the given subexpression. */
 static int
-nsubtoks(tindex)
+nsubtoks (int tindex)
 {
   int ntoks1;
 
@@ -792,8 +914,7 @@ nsubtoks(tindex)
 
 /* Copy the given subexpression to the top of the tree. */
 static void
-copytoks(tindex, ntokens)
-     int tindex, ntokens;
+copytoks (int tindex, int ntokens)
 {
   int i;
 
@@ -802,7 +923,7 @@ copytoks(tindex, ntokens)
 }
 
 static void
-closure()
+closure (void)
 {
   int tindex, ntokens, i;
 
@@ -812,7 +933,7 @@ closure()
       {
 	ntokens = nsubtoks(dfa->tindex);
 	tindex = dfa->tindex - ntokens;
-	if (maxrep == 0)
+	if (maxrep < 0)
 	  addtok(PLUS);
 	if (minrep == 0)
 	  addtok(QMARK);
@@ -837,7 +958,7 @@ closure()
 }
 
 static void
-branch()
+branch (void)
 {
   closure();
   while (tok != RPAREN && tok != OR && tok >= 0)
@@ -848,8 +969,7 @@ branch()
 }
 
 static void
-regexp(toplevel)
-     int toplevel;
+regexp (int toplevel)
 {
   branch();
   while (tok == OR)
@@ -867,11 +987,7 @@ regexp(toplevel)
    length of the string, so s can include NUL characters.  D is a pointer to
    the struct dfa to parse into. */
 void
-dfaparse(s, len, d)
-     char *s;
-     size_t len;
-     struct dfa *d;
-
+dfaparse (char *s, size_t len, struct dfa *d)
 {
   dfa = d;
   lexstart = lexptr = s;
@@ -881,7 +997,7 @@ dfaparse(s, len, d)
   parens = 0;
 
   if (! syntax_bits_set)
-    dfaerror("No syntax specified");
+    dfaerror(_("No syntax specified"));
 
   tok = lex();
   depth = d->depth;
@@ -889,7 +1005,7 @@ dfaparse(s, len, d)
   regexp(1);
 
   if (tok != END)
-    dfaerror("Unbalanced )");
+    dfaerror(_("Unbalanced )"));
 
   addtok(END - d->nregexps);
   addtok(CAT);
@@ -904,9 +1020,7 @@ dfaparse(s, len, d)
 
 /* Copy one set to another; the destination must be large enough. */
 static void
-copy(src, dst)
-     position_set *src;
-     position_set *dst;
+copy (position_set *src, position_set *dst)
 {
   int i;
 
@@ -920,15 +1034,13 @@ copy(src, dst)
    the same index then their constraints are logically or'd together.
    S->elems must point to an array large enough to hold the resulting set. */
 static void
-insert(p, s)
-     position p;
-     position_set *s;
+insert (position p, position_set *s)
 {
   int i;
   position t1, t2;
 
   for (i = 0; i < s->nelem && p.index < s->elems[i].index; ++i)
-    ;
+    continue;
   if (i < s->nelem && p.index == s->elems[i].index)
     s->elems[i].constraint |= p.constraint;
   else
@@ -947,10 +1059,7 @@ insert(p, s)
 /* Merge two sets of positions into a third.  The result is exactly as if
    the positions of both sets were inserted into an initially empty set. */
 static void
-merge(s1, s2, m)
-     position_set *s1;
-     position_set *s2;
-     position_set *m;
+merge (position_set *s1, position_set *s2, position_set *m)
 {
   int i = 0, j = 0;
 
@@ -973,9 +1082,7 @@ merge(s1, s2, m)
 
 /* Delete a position from a set. */
 static void
-delete(p, s)
-     position p;
-     position_set *s;
+delete (position p, position_set *s)
 {
   int i;
 
@@ -992,11 +1099,7 @@ delete(p, s)
    state.  Newline and letter tell whether we got here on a newline or
    letter, respectively. */
 static int
-state_index(d, s, newline, letter)
-     struct dfa *d;
-     position_set *s;
-     int newline;
-     int letter;
+state_index (struct dfa *d, position_set *s, int newline, int letter)
 {
   int hash = 0;
   int constraint;
@@ -1061,10 +1164,8 @@ state_index(d, s, newline, letter)
    that position with the elements of its follow labeled with an appropriate
    constraint.  Repeat exhaustively until no funny positions are left.
    S->elems must be large enough to hold the result. */
-void
-epsclosure(s, d)
-     position_set *s;
-     struct dfa *d;
+static void
+epsclosure (position_set *s, struct dfa *d)
 {
   int i, j;
   int *visited;
@@ -1176,9 +1277,7 @@ epsclosure(s, d)
    scheme; the number of elements in each set deeper in the stack can be
    used to determine the address of a particular set's array. */
 void
-dfaanalyze(d, searchflag)
-     struct dfa *d;
-     int searchflag;
+dfaanalyze (struct dfa *d, int searchflag)
 {
   int *nullable;		/* Nullable stack. */
   int *nfirstpos;		/* Element count stack for firstpos sets. */
@@ -1439,10 +1538,7 @@ dfaanalyze(d, searchflag)
    create a new group labeled with the characters of C and insert this
    position in that group. */
 void
-dfastate(s, d, trans)
-     int s;
-     struct dfa *d;
-     int trans[];
+dfastate (int s, struct dfa *d, int trans[])
 {
   position_set grps[NOTCHAR];	/* As many as will ever be needed. */
   charclass labels[NOTCHAR];	/* Labels corresponding to the groups. */
@@ -1463,7 +1559,7 @@ dfastate(s, d, trans)
   int state_newline;		/* New state on a newline transition. */
   int wants_letter;		/* New state wants to know letter context. */
   int state_letter;		/* New state on a letter transition. */
-  static initialized;		/* Flag for static initialization. */
+  static int initialized;	/* Flag for static initialization. */
   int i, j, k;
 
   /* Initialize the set of letters, if necessary. */
@@ -1471,9 +1567,9 @@ dfastate(s, d, trans)
     {
       initialized = 1;
       for (i = 0; i < NOTCHAR; ++i)
-	if (ISALNUM(i))
+	if (IS_WORD_CONSTITUENT(i))
 	  setbit(i, letters);
-      setbit('\n', newline);
+      setbit(eolbyte, newline);
     }
 
   zeroset(matches);
@@ -1494,7 +1590,7 @@ dfastate(s, d, trans)
 	{
 	  if (! MATCHES_NEWLINE_CONTEXT(pos.constraint,
 					 d->states[s].newline, 1))
-	    clrbit('\n', matches);
+	    clrbit(eolbyte, matches);
 	  if (! MATCHES_NEWLINE_CONTEXT(pos.constraint,
 					 d->states[s].newline, 0))
 	    for (j = 0; j < CHARCLASS_INTS; ++j)
@@ -1510,7 +1606,7 @@ dfastate(s, d, trans)
 
 	  /* If there are no characters left, there's no point in going on. */
 	  for (j = 0; j < CHARCLASS_INTS && !matches[j]; ++j)
-	    ;
+	    continue;
 	  if (j == CHARCLASS_INTS)
 	    continue;
 	}
@@ -1528,7 +1624,7 @@ dfastate(s, d, trans)
 	     matches. */
 	  intersectf = 0;
 	  for (k = 0; k < CHARCLASS_INTS; ++k)
-	    (intersect[k] = matches[k] & labels[j][k]) ? intersectf = 1 : 0;
+	    (intersect[k] = matches[k] & labels[j][k]) ? (intersectf = 1) : 0;
 	  if (! intersectf)
 	    continue;
 
@@ -1539,8 +1635,8 @@ dfastate(s, d, trans)
 	      /* Even an optimizing compiler can't know this for sure. */
 	      int match = matches[k], label = labels[j][k];
 
-	      (leftovers[k] = ~match & label) ? leftoversf = 1 : 0;
-	      (matches[k] = match & ~label) ? matchesf = 1 : 0;
+	      (leftovers[k] = ~match & label) ? (leftoversf = 1) : 0;
+	      (matches[k] = match & ~label) ? (matchesf = 1) : 0;
 	    }
 
 	  /* If there were leftovers, create a new group labeled with them. */
@@ -1604,12 +1700,8 @@ dfastate(s, d, trans)
       else
 	state_letter = state;
       for (i = 0; i < NOTCHAR; ++i)
-	if (i == '\n')
-	  trans[i] = state_newline;
-	else if (ISALNUM(i))
-	  trans[i] = state_letter;
-	else
-	  trans[i] = state;
+	trans[i] = (IS_WORD_CONSTITUENT(i)) ? state_letter : state;
+      trans[eolbyte] = state_newline;
     }
   else
     for (i = 0; i < NOTCHAR; ++i)
@@ -1633,7 +1725,7 @@ dfastate(s, d, trans)
 
       /* Find out if the new state will want any context information. */
       wants_newline = 0;
-      if (tstbit('\n', labels[i]))
+      if (tstbit(eolbyte, labels[i]))
 	for (j = 0; j < follows.nelem; ++j)
 	  if (PREV_NEWLINE_DEPENDENT(follows.elems[j].constraint))
 	    wants_newline = 1;
@@ -1665,9 +1757,9 @@ dfastate(s, d, trans)
 	    {
 	      int c = j * INTBITS + k;
 
-	      if (c == '\n')
+	      if (c == eolbyte)
 		trans[c] = state_newline;
-	      else if (ISALNUM(c))
+	      else if (IS_WORD_CONSTITUENT(c))
 		trans[c] = state_letter;
 	      else if (c < NOTCHAR)
 		trans[c] = state;
@@ -1688,9 +1780,7 @@ dfastate(s, d, trans)
    TODO: Improve this comment, get rid of the unnecessary redundancy. */
 
 static void
-build_state(s, d)
-     int s;
-     struct dfa *d;
+build_state (int s, struct dfa *d)
 {
   int *trans;			/* The new transition table. */
   int i;
@@ -1756,8 +1846,8 @@ build_state(s, d)
 
   /* Keep the newline transition in a special place so we can use it as
      a sentinel. */
-  d->newlines[s] = trans['\n'];
-  trans['\n'] = -1;
+  d->newlines[s] = trans[eolbyte];
+  trans[eolbyte] = -1;
 
   if (ACCEPTING(s, *d))
     d->fails[s] = trans;
@@ -1766,8 +1856,7 @@ build_state(s, d)
 }
 
 static void
-build_state_zero(d)
-     struct dfa *d;
+build_state_zero (struct dfa *d)
 {
   d->tralloc = 1;
   d->trcount = 0;
@@ -1793,20 +1882,16 @@ build_state_zero(d)
    match needs to be verified by a backtracking matcher.  Otherwise
    we store a 0 in *backref. */
 char *
-dfaexec(d, begin, end, newline, count, backref)
-     struct dfa *d;
-     char *begin;
-     char *end;
-     int newline;
-     int *count;
-     int *backref;
+dfaexec (struct dfa *d, char *begin, char *end,
+	 int newline, int *count, int *backref)
 {
-  register s, s1, tmp;		/* Current state. */
+  register int s, s1, tmp;	/* Current state. */
   register unsigned char *p;	/* Current input character. */
-  register **trans, *t;		/* Copy of d->trans so it can be optimized
+  register int **trans, *t;	/* Copy of d->trans so it can be optimized
 				   into a register. */
-  static sbit[NOTCHAR];	/* Table for anding with d->success. */
-  static sbit_init;
+  register unsigned char eol = eolbyte;	/* Likewise for eolbyte.  */
+  static int sbit[NOTCHAR];	/* Table for anding with d->success. */
+  static int sbit_init;
 
   if (! sbit_init)
     {
@@ -1814,12 +1899,8 @@ dfaexec(d, begin, end, newline, count, backref)
 
       sbit_init = 1;
       for (i = 0; i < NOTCHAR; ++i)
-	if (i == '\n')
-	  sbit[i] = 4;
-	else if (ISALNUM(i))
-	  sbit[i] = 2;
-	else
-	  sbit[i] = 1;
+	sbit[i] = (IS_WORD_CONSTITUENT(i)) ? 2 : 1;
+      sbit[eol] = 4;
     }
 
   if (! d->tralloc)
@@ -1828,34 +1909,25 @@ dfaexec(d, begin, end, newline, count, backref)
   s = s1 = 0;
   p = (unsigned char *) begin;
   trans = d->trans;
-  *end = '\n';
+  *end = eol;
 
   for (;;)
     {
-      /* The dreaded inner loop. */
-      if ((t = trans[s]) != 0)
-	do
-	  {
-	    s1 = t[*p++];
-	    if (! (t = trans[s1]))
-	      goto last_was_s;
-	    s = t[*p++];
-	  }
-        while ((t = trans[s]) != 0);
-      goto last_was_s1;
-    last_was_s:
-      tmp = s, s = s1, s1 = tmp;
-    last_was_s1:
+      while ((t = trans[s]) != 0) { /* hand-optimized loop */
+	s1 = t[*p++];
+        if ((t = trans[s1]) == 0) {
+           tmp = s ; s = s1 ; s1 = tmp ; /* swap */
+           break;
+        }
+	s = t[*p++];
+      }
 
       if (s >= 0 && p <= (unsigned char *) end && d->fails[s])
 	{
 	  if (d->success[s] & sbit[*p])
 	    {
 	      if (backref)
-		if (d->states[s].backref)
-		  *backref = 1;
-		else
-		  *backref = 0;
+		*backref = (d->states[s].backref != 0);
 	      return (char *) p;
 	    }
 
@@ -1865,7 +1937,7 @@ dfaexec(d, begin, end, newline, count, backref)
 	}
 
       /* If the previous character was a newline, count it. */
-      if (count && (char *) p <= end && p[-1] == '\n')
+      if (count && (char *) p <= end && p[-1] == eol)
 	++*count;
 
       /* Check if we've run off the end of the buffer. */
@@ -1879,7 +1951,7 @@ dfaexec(d, begin, end, newline, count, backref)
 	  continue;
 	}
 
-      if (p[-1] == '\n' && newline)
+      if (p[-1] == eol && newline)
 	{
 	  s = d->newlines[s1];
 	  continue;
@@ -1892,8 +1964,7 @@ dfaexec(d, begin, end, newline, count, backref)
 /* Initialize the components of a dfa that the other routines don't
    initialize for themselves. */
 void
-dfainit(d)
-     struct dfa *d;
+dfainit (struct dfa *d)
 {
   d->calloc = 1;
   MALLOC(d->charclasses, charclass, d->calloc);
@@ -1911,32 +1982,28 @@ dfainit(d)
 
 /* Parse and analyze a single string of the given length. */
 void
-dfacomp(s, len, d, searchflag)
-     char *s;
-     size_t len;
-     struct dfa *d;
-     int searchflag;
+dfacomp (char *s, size_t len, struct dfa *d, int searchflag)
 {
   if (case_fold)	/* dummy folding in service of dfamust() */
     {
-      char *copy;
+      char *lcopy;
       int i;
 
-      copy = malloc(len);
-      if (!copy)
-	dfaerror("out of memory");
-      
+      lcopy = malloc(len);
+      if (!lcopy)
+	dfaerror(_("out of memory"));
+
       /* This is a kludge. */
       case_fold = 0;
       for (i = 0; i < len; ++i)
-	if (ISUPPER(s[i]))
-	  copy[i] = tolower(s[i]);
+	if (ISUPPER ((unsigned char) s[i]))
+	  lcopy[i] = tolower ((unsigned char) s[i]);
 	else
-	  copy[i] = s[i];
+	  lcopy[i] = s[i];
 
       dfainit(d);
-      dfaparse(copy, len, d);
-      free(copy);
+      dfaparse(lcopy, len, d);
+      free(lcopy);
       dfamust(d);
       d->cindex = d->tindex = d->depth = d->nleaves = d->nregexps = 0;
       case_fold = 1;
@@ -1954,8 +2021,7 @@ dfacomp(s, len, d, searchflag)
 
 /* Free the storage held by the components of a dfa. */
 void
-dfafree(d)
-     struct dfa *d;
+dfafree (struct dfa *d)
 {
   int i;
   struct dfamust *dm, *ndm;
@@ -1974,9 +2040,10 @@ dfafree(d)
       free((ptr_t) d->trans[i]);
     else if (d->fails[i])
       free((ptr_t) d->fails[i]);
-  free((ptr_t) d->realtrans);
-  free((ptr_t) d->fails);
-  free((ptr_t) d->newlines);
+  if (d->realtrans) free((ptr_t) d->realtrans);
+  if (d->fails) free((ptr_t) d->fails);
+  if (d->newlines) free((ptr_t) d->newlines);
+  if (d->success) free((ptr_t) d->success);
   for (dm = d->musts; dm; dm = ndm)
     {
       ndm = dm->next;
@@ -2015,9 +2082,9 @@ dfafree(d)
 	Type	left		right		is		in
 	----	----		-----		--		--
 	char c	# c		# c		# c		# c
-	
+
 	CSET	ZERO		ZERO		ZERO		ZERO
-	
+
 	STAR	ZERO		ZERO		ZERO		ZERO
 
 	QMARK	ZERO		ZERO		ZERO		ZERO
@@ -2028,12 +2095,12 @@ dfafree(d)
 		p->left :	q->right :	q->is!=ZERO) ?	q->in plus
 		p->is##q->left	p->right##q->is	p->is##q->is :	p->right##q->left
 						ZERO
-					
+
 	OR	longest common	longest common	(do p->is and	substrings common to
 		leading		trailing	q->is have same	p->in and q->in
-		(sub)sequence	(sub)sequence	length and	
-		of p->left	of p->right	content) ?	
-		and q->left	and q->right	p->is : NULL	
+		(sub)sequence	(sub)sequence	length and
+		of p->left	of p->right	content) ?
+		and q->left	and q->right	p->is : NULL
 
    If there's anything else we recognize in the tree, all four sequences get set
    to zero-length sequences.  If there's something we don't recognize in the tree,
@@ -2060,18 +2127,16 @@ dfafree(d)
    Does optimization actually accomplish anything,
    or is the automaton you get from "psi|epsilon" (for example)
    the same as the one you get from "psi" (for example)?
-  
+
    Are optimizable r.e.'s likely to be used in real-life situations
    (something like 'ab*' is probably unlikely; something like is
    'psi|epsilon' is likelier)? */
 
 static char *
-icatalloc(old, new)
-     char *old;
-     char *new;
+icatalloc (char *old, char *new)
 {
   char *result;
-  int oldsize, newsize;
+  size_t oldsize, newsize;
 
   newsize = (new == NULL) ? 0 : strlen(new);
   if (old == NULL)
@@ -2089,19 +2154,16 @@ icatalloc(old, new)
 }
 
 static char *
-icpyalloc(string)
-     char *string;
+icpyalloc (char *string)
 {
   return icatalloc((char *) NULL, string);
 }
 
 static char *
-istrstr(lookin, lookfor)
-     char *lookin;
-     char *lookfor;
+istrstr (char *lookin, char *lookfor)
 {
   char *cp;
-  int len;
+  size_t len;
 
   len = strlen(lookfor);
   for (cp = lookin; *cp != '\0'; ++cp)
@@ -2111,16 +2173,14 @@ istrstr(lookin, lookfor)
 }
 
 static void
-ifree(cp)
-     char *cp;
+ifree (char *cp)
 {
   if (cp != NULL)
     free(cp);
 }
 
 static void
-freelist(cpp)
-     char **cpp;
+freelist (char **cpp)
 {
   int i;
 
@@ -2134,10 +2194,7 @@ freelist(cpp)
 }
 
 static char **
-enlist(cpp, new, len)
-     char **cpp;
-     char *new;
-     int len;
+enlist (char **cpp, char *new, size_t len)
 {
   int i, j;
 
@@ -2182,14 +2239,12 @@ enlist(cpp, new, len)
    list of their distinct common substrings. Return NULL if something
    seems wild. */
 static char **
-comsubs(left, right)
-     char *left;
-     char *right;
+comsubs (char *left, char *right)
 {
   char **cpp;
   char *lcp;
   char *rcp;
-  int i, len;
+  size_t i, len;
 
   if (left == NULL || right == NULL)
     return NULL;
@@ -2204,7 +2259,7 @@ comsubs(left, right)
       while (rcp != NULL)
 	{
 	  for (i = 1; lcp[i] != '\0' && lcp[i] == rcp[i]; ++i)
-	    ;
+	    continue;
 	  if (i > len)
 	    len = i;
 	  rcp = index(rcp + 1, *lcp);
@@ -2218,9 +2273,7 @@ comsubs(left, right)
 }
 
 static char **
-addlists(old, new)
-char **old;
-char **new;
+addlists (char **old, char **new)
 {
   int i;
 
@@ -2238,9 +2291,7 @@ char **new;
 /* Given two lists of substrings, return a new list giving substrings
    common to both. */
 static char **
-inboth(left, right)
-     char **left;
-     char **right;
+inboth (char **left, char **right)
 {
   char **both;
   char **temp;
@@ -2264,6 +2315,7 @@ inboth(left, right)
 	    }
 	  both = addlists(both, temp);
 	  freelist(temp);
+	  free(temp);
 	  if (both == NULL)
 	    return NULL;
 	}
@@ -2280,16 +2332,14 @@ typedef struct
 } must;
 
 static void
-resetmust(mp)
-must *mp;
+resetmust (must *mp)
 {
   mp->left[0] = mp->right[0] = mp->is[0] = '\0';
   freelist(mp->in);
 }
 
 static void
-dfamust(dfa)
-struct dfa *dfa;
+dfamust (struct dfa *dfa)
 {
   must *musts;
   must *mp;
@@ -2300,8 +2350,9 @@ struct dfa *dfa;
   token t;
   static must must0;
   struct dfamust *dm;
+  static char empty_string[] = "";
 
-  result = "";
+  result = empty_string;
   exact = 0;
   musts = (must *) malloc((dfa->tindex + 1) * sizeof *musts);
   if (musts == NULL)
@@ -2488,7 +2539,7 @@ struct dfa *dfa;
 	      resetmust(mp);
 	      mp->is[0] = mp->left[0] = mp->right[0] = t;
 	      mp->is[1] = mp->left[1] = mp->right[1] = '\0';
-	      mp->in = enlist(mp->in, mp->is, 1);
+	      mp->in = enlist(mp->in, mp->is, (size_t)1);
 	      if (mp->in == NULL)
 		goto done;
 	    }