src - OpenBSD base system

diff options


context:
space:
mode:

author	Todd C. Miller <millert@cvs.openbsd.org>	2002-03-01 22:01:12 +0000
committer	Todd C. Miller <millert@cvs.openbsd.org>	2002-03-01 22:01:12 +0000
commit	a994239d65289488e751e7ee61bcdb678ee26164 (patch)
tree	ebbcfba3a236a6a08d7fb989449a1b5f9684f814 /usr.bin/spell/look.c
parent	d0281a3ced86b9f7541ca7166fb7e3371be66794 (diff)

spell command from 4.4BSD, now freed by Caldera.

I have made some significant changes: o spell no longer uses hashed files. The algorithm for generating the hashed files breaks down for large files. Instead, I use the same approach as look(1) and do binary searches of mmap(2)ed files which has acceptable performance. A side effect of this is that spell can now search multiple word lists. o The spell binary has been renamed to "spellprog" so as to not conflict with the spell shell script. This is consistent with other operating systems. o The putsuf() and putword() functions have been renamed to trypref() and tryword() respectively for consistency with modern versions and for general sanity. o the spell front-end shell script has been completely rewritten o The code has been ANSIfied and re-style(9)d spell currently must be compiled with -fwriteable-strings to make the -b option work. This should be fixed.

Diffstat (limited to 'usr.bin/spell/look.c')

-rw-r--r--

usr.bin/spell/look.c

185

1 files changed, 185 insertions, 0 deletions

diff --git a/usr.bin/spell/look.c b/usr.bin/spell/look.c
new file mode 100644
index 00000000000..1aba6552e9e
--- /dev/null
+++ b/usr.bin/spell/look.c

@@ -0,0 +1,185 @@

+/* $OpenBSD: look.c,v 1.1 2002/03/01 22:01:11 millert Exp $ */

+/*-

+ *

+ * This code is derived from software contributed to Berkeley by

+ * David Hitz of Auspex Systems, Inc.

+ *

+ * Redistribution and use in source and binary forms, with or without

+ * modification, are permitted provided that the following conditions

+ * are met:

+ * 1. Redistributions of source code must retain the above copyright

+ * notice, this list of conditions and the following disclaimer.

+ * 2. Redistributions in binary form must reproduce the above copyright

+ * notice, this list of conditions and the following disclaimer in the

+ * documentation and/or other materials provided with the distribution.

+ * 3. All advertising materials mentioning features or use of this software

+ * must display the following acknowledgement:

+ * This product includes software developed by the University of

+ * California, Berkeley and its contributors.

+ * 4. Neither the name of the University nor the names of its contributors

+ * may be used to endorse or promote products derived from this software

+ * without specific prior written permission.

+ *

+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND

+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE

+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE

+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE

+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL

+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS

+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)

+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT

+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY

+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF

+ * SUCH DAMAGE.

+ */

+#ifndef lint

+#if 0

+static const char sccsid[] = "@(#)look.c 8.2 (Berkeley) 5/4/95";

+#endif

+static const char rcsid[] = "$OpenBSD: look.c,v 1.1 2002/03/01 22:01:11 millert Exp $";

+#endif /* not lint */

+#include <sys/types.h>

+#include <ctype.h>

+#include <stdio.h>

+#include <stdlib.h>

+#include <string.h>

+#include <err.h>

+u_char *binary_search(u_char *, u_char *, u_char *);

+u_char *linear_search(u_char *, u_char *, u_char *);

+int compare(u_char *, u_char *, u_char *);

+int look(u_char *, u_char *, u_char *);

+int

+look(u_char *string, u_char *front, u_char *back)

+ u_char *s;

+ /* Convert string to lower case before searching. */

+ for (s = string; *s; s++) {

+ if (isupper(*s))

+ *s = _tolower(*s);

+ }

+ front = binary_search(string, front, back);

+ front = linear_search(string, front, back);

+ return (front != NULL);

+/*

+ * Binary search for "string" in memory between "front" and "back".

+ *

+ * This routine is expected to return a pointer to the start of a line at

+ * *or before* the first word matching "string". Relaxing the constraint

+ * this way simplifies the algorithm.

+ *

+ * Invariants:

+ * front points to the beginning of a line at or before the first

+ * matching string.

+ *

+ * back points to the beginning of a line at or after the first

+ * matching line.

+ *

+ * Base of the Invariants.

+ * front = NULL;

+ * back = EOF;

+ *

+ * Advancing the Invariants:

+ *

+ * p = first newline after halfway point from front to back.

+ *

+ * If the string at "p" is not greater than the string to match,

+ * p is the new front. Otherwise it is the new back.

+ *

+ * Termination:

+ *

+ * The definition of the routine allows it return at any point,

+ * since front is always at or before the line to print.

+ *

+ * In fact, it returns when the chosen "p" equals "back". This

+ * implies that there exists a string is least half as long as

+ * (back - front), which in turn implies that a linear search will

+ * be no more expensive than the cost of simply printing a string or two.

+ *

+ * Trying to continue with binary search at this point would be

+ * more trouble than it's worth.

+ */

+#define SKIP_PAST_NEWLINE(p, back) \

+ while (p < back && *p++ != '\n');

+u_char *

+binary_search(u_char *string, u_char *front, u_char *back)

+ u_char *p;

+ p = front + (back - front) / 2;

+ SKIP_PAST_NEWLINE(p, back);

+ /*

+ * If the file changes underneath us, make sure we don't

+ * infinitely loop.

+ */

+ while (p < back && back > front) {

+ if (compare(string, p, back) > 0)

+ front = p;

+ else

+ back = p;

+ p = front + (back - front) / 2;

+ SKIP_PAST_NEWLINE(p, back);

+ }

+ return (front);

+/*

+ * Find the first line that matches string, linearly searching from front

+ * to back.

+ *

+ * Return NULL for no such line.

+ *

+ * This routine assumes:

+ *

+ * o front points at the first character in a line.

+ * o front is before or at the first line to be printed.

+ */

+u_char *

+linear_search(u_char *string, u_char *front, u_char *back)

+ int result;

+ while (front < back) {

+ result = compare(string, front, back);

+ if (result == 0)

+ return(front); /* found it */

+ if (result < 0)

+ return(NULL); /* not there */

+ SKIP_PAST_NEWLINE(front, back);

+ }

+ return (NULL);

+int

+compare(u_char *s1, u_char *s2, u_char *back)

+ int ch;

+ /* Note that s1 is already upper case. */

+ for (;; ++s1, ++s2) {

+ if (*s2 == '\n' || s2 == back)

+ ch = '\0';

+ else if (isupper(*s2))

+ ch = _tolower(*s2);

+ else

+ ch = *s2;

+ if (*s1 != ch)

+ return(*s1 - ch);

+ if (ch == '\0')

+ return(0);

+ }