src - OpenBSD base system

diff options


context:
space:
mode:

author	etheisen <etheisen@cvs.openbsd.org>	1996-09-14 19:02:02 +0000
committer	etheisen <etheisen@cvs.openbsd.org>	1996-09-14 19:02:02 +0000
commit	6a4f3d0fd940c1e7052ddf23907f0e297dc42f4b (patch)
tree	712fd665d6b150ef04906975a7493d87d38a1a8a /gnu/usr.bin/groff/libbib
parent	9cf27152dae9dbf80b167732c78c47baee90ddc5 (diff)

Third time because import sucks.

Diffstat (limited to 'gnu/usr.bin/groff/libbib')

-rw-r--r--

gnu/usr.bin/groff/libbib/Makefile.dep

-rw-r--r--

gnu/usr.bin/groff/libbib/Makefile.sub

-rw-r--r--

gnu/usr.bin/groff/libbib/common.cc

-rw-r--r--

gnu/usr.bin/groff/libbib/index.cc

623

-rw-r--r--

gnu/usr.bin/groff/libbib/linear.cc

489

-rw-r--r--

gnu/usr.bin/groff/libbib/map.c

-rw-r--r--

gnu/usr.bin/groff/libbib/search.cc

131

7 files changed, 1372 insertions, 0 deletions

diff --git a/gnu/usr.bin/groff/libbib/Makefile.dep b/gnu/usr.bin/groff/libbib/Makefile.dep
new file mode 100644
index 00000000000..8c4974853d6
--- /dev/null
+++ b/gnu/usr.bin/groff/libbib/Makefile.dep

@@ -0,0 +1,12 @@

+common.o: common.cc

+index.o: index.cc ../include/posix.h ../include/lib.h \

+ ../include/cset.h ../include/cmap.h ../include/errarg.h \

+ ../include/error.h ../include/refid.h ../include/search.h \

+ ../include/index.h ../include/defs.h

+linear.o: linear.cc ../include/posix.h ../include/lib.h \

+ ../include/errarg.h ../include/error.h ../include/cset.h \

+ ../include/cmap.h ../include/refid.h ../include/search.h

+search.o: search.cc ../include/posix.h ../include/lib.h \

+ ../include/errarg.h ../include/error.h ../include/refid.h \

+ ../include/search.h

+map.o: map.c

diff --git a/gnu/usr.bin/groff/libbib/Makefile.sub b/gnu/usr.bin/groff/libbib/Makefile.sub
new file mode 100644
index 00000000000..e9418288a49
--- /dev/null
+++ b/gnu/usr.bin/groff/libbib/Makefile.sub

@@ -0,0 +1,4 @@

+LIB=bib

+OBJS=common.o index.o linear.o search.o map.o

+CCSRCS=common.cc index.cc linear.cc search.cc

+CSRCS=map.c

diff --git a/gnu/usr.bin/groff/libbib/common.cc b/gnu/usr.bin/groff/libbib/common.cc
new file mode 100644
index 00000000000..4b2bcca23e2
--- /dev/null
+++ b/gnu/usr.bin/groff/libbib/common.cc

@@ -0,0 +1,38 @@

+ Written by James Clark (jjc@jclark.com)

+This file is part of groff.

+groff is free software; you can redistribute it and/or modify it under

+the terms of the GNU General Public License as published by the Free

+Software Foundation; either version 2, or (at your option) any later

+version.

+groff is distributed in the hope that it will be useful, but WITHOUT ANY

+WARRANTY; without even the implied warranty of MERCHANTABILITY or

+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License

+for more details.

+You should have received a copy of the GNU General Public License along

+with groff; see the file COPYING. If not, write to the Free Software

+Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */

+unsigned hash(const char *s, int len)

+#if 0

+ unsigned h = 0, g;

+ while (*s != '\0') {

+ h <<= 4;

+ h += *s++;

+ if ((g = h & 0xf0000000) != 0) {

+ h ^= g >> 24;

+ h ^= g;

+ }

+#endif

+ unsigned h = 0;

+ while (--len >= 0)

+ h = *s++ + 65587*h;

+ return h;

diff --git a/gnu/usr.bin/groff/libbib/index.cc b/gnu/usr.bin/groff/libbib/index.cc
new file mode 100644
index 00000000000..2c11f06cdff
--- /dev/null
+++ b/gnu/usr.bin/groff/libbib/index.cc

@@ -0,0 +1,623 @@

+// -*- C++ -*-

+ Written by James Clark (jjc@jclark.com)

+This file is part of groff.

+groff is free software; you can redistribute it and/or modify it under

+the terms of the GNU General Public License as published by the Free

+Software Foundation; either version 2, or (at your option) any later

+version.

+groff is distributed in the hope that it will be useful, but WITHOUT ANY

+WARRANTY; without even the implied warranty of MERCHANTABILITY or

+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License

+for more details.

+You should have received a copy of the GNU General Public License along

+with groff; see the file COPYING. If not, write to the Free Software

+Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */

+#include <stdio.h>

+#include <string.h>

+#include <stdlib.h>

+#include <errno.h>

+#include "posix.h"

+#include "lib.h"

+#include "cset.h"

+#include "cmap.h"

+#include "errarg.h"

+#include "error.h"

+#include "refid.h"

+#include "search.h"

+#include "index.h"

+#include "defs.h"

+// Interface to mmap.

+extern "C" {

+ void *mapread(int fd, int len);

+ int unmap(void *, int len);

+#if 0

+const

+#endif

+int minus_one = -1;

+int verify_flag = 0;

+struct word_list;

+class index_search_item : public search_item {

+ search_item *out_of_date_files;

+ index_header header;

+ char *buffer;

+ void *map_addr;

+ int map_len;

+ tag *tags;

+ int *table;

+ int *lists;

+ char *pool;

+ char *key_buffer;

+ char *filename_buffer;

+ int filename_buflen;

+ char **common_words_table;

+ int common_words_table_size;

+ const char *ignore_fields;

+ time_t mtime;

+ const char *do_verify();

+ const int *search1(const char **pp, const char *end);

+ const int *search(const char *ptr, int length, int **temp_listp);

+ const char *munge_filename(const char *);

+ void read_common_words_file();

+ void add_out_of_date_file(int fd, const char *filename, int fid);

+public:

+ index_search_item(const char *, int);

+ ~index_search_item();

+ int load(int fd);

+ search_item_iterator *make_search_item_iterator(const char *);

+ int verify();

+ void check_files();

+ int next_filename_id() const;

+ friend class index_search_item_iterator;

+};

+class index_search_item_iterator : public search_item_iterator {

+ index_search_item *indx;

+ search_item_iterator *out_of_date_files_iter;

+ search_item *next_out_of_date_file;

+ const int *found_list;

+ int *temp_list;

+ char *buf;

+ int buflen;

+ linear_searcher searcher;

+ char *query;

+ int get_tag(int tagno, const linear_searcher &, const char **, int *,

+ reference_id *);

+public:

+ index_search_item_iterator(index_search_item *, const char *);

+ ~index_search_item_iterator();

+ int next(const linear_searcher &, const char **, int *, reference_id *);

+};

+index_search_item::index_search_item(const char *filename, int fid)

+: search_item(filename, fid), out_of_date_files(0), key_buffer(0),

+ filename_buffer(0), filename_buflen(0), common_words_table(0),

+ map_addr(0), map_len(0), buffer(0)

+index_search_item::~index_search_item()

+ if (buffer)

+ free(buffer);

+ if (map_addr) {

+ if (unmap(map_addr, map_len) < 0)

+ error("unmap: %1", strerror(errno));

+ }

+ while (out_of_date_files) {

+ search_item *tem = out_of_date_files;

+ out_of_date_files = out_of_date_files->next;

+ delete tem;

+ }

+ a_delete filename_buffer;

+ a_delete key_buffer;

+ if (common_words_table) {

+ for (int i = 0; i < common_words_table_size; i++)

+ a_delete common_words_table[i];

+ a_delete common_words_table;

+ }

+class file_closer {

+ int *fdp;

+public:

+ file_closer(int &fd) : fdp(&fd) { }

+ ~file_closer() { close(*fdp); }

+};

+// Tell the compiler that a variable is intentionally unused.

+inline void unused(void *) { }

+int index_search_item::load(int fd)

+ file_closer fd_closer(fd); // close fd on return

+ unused(&fd_closer);

+ struct stat sb;

+ if (fstat(fd, &sb) < 0) {

+ error("can't fstat `%1': %2", name, strerror(errno));

+ return 0;

+ }

+ if (!S_ISREG(sb.st_mode)) {

+ error("`%1' is not a regular file", name);

+ return 0;

+ }

+ mtime = sb.st_mtime;

+ int size = int(sb.st_size);

+ char *addr;

+ map_addr = mapread(fd, size);

+ if (map_addr) {

+ addr = (char *)map_addr;

+ map_len = size;

+ }

+ else {

+ addr = buffer = (char *)malloc(size);

+ if (buffer == 0) {

+ error("can't allocate buffer for `%1'", name);

+ return 0;

+ }

+ char *ptr = buffer;

+ int bytes_to_read = size;

+ while (bytes_to_read > 0) {

+ int nread = read(fd, ptr, bytes_to_read);

+ if (nread == 0) {

+ error("unexpected EOF on `%1'", name);

+ return 0;

+ }

+ if (nread < 0) {

+ error("read error on `%1': %2", name, strerror(errno));

+ return 0;

+ }

+ bytes_to_read -= nread;

+ ptr += nread;

+ }

+ header = *(index_header *)addr;

+ if (header.magic != INDEX_MAGIC) {

+ error("`%1' is not an index file: wrong magic number", name);

+ return 0;

+ }

+ if (header.version != INDEX_VERSION) {

+ error("version number in `%1' is wrong: was %2, should be %3",

+ name, header.version, INDEX_VERSION);

+ return 0;

+ }

+ int sz = (header.tags_size * sizeof(tag)

+ + header.lists_size * sizeof(int)

+ + header.table_size * sizeof(int)

+ + header.strings_size

+ + sizeof(header));

+ if (sz != size) {

+ error("size of `%1' is wrong: was %2, should be %3",

+ name, size, sz);

+ return 0;

+ }

+ tags = (tag *)(addr + sizeof(header));

+ lists = (int *)(tags + header.tags_size);

+ table = (int *)(lists + header.lists_size);

+ pool = (char *)(table + header.table_size);

+ ignore_fields = strchr(strchr(pool, '\0') + 1, '\0') + 1;

+ key_buffer = new char[header.truncate];

+ read_common_words_file();

+ return 1;

+const char *index_search_item::do_verify()

+ if (tags == 0)

+ return "not loaded";

+ if (lists[header.lists_size - 1] >= 0)

+ return "last list element not negative";

+ int i;

+ for (i = 0; i < header.table_size; i++) {

+ int li = table[i];

+ if (li >= header.lists_size)

+ return "bad list index";

+ if (li >= 0) {

+ for (int *ptr = lists + li; *ptr >= 0; ptr++) {

+ if (*ptr >= header.tags_size)

+ return "bad tag index";

+ if (*ptr >= ptr[1] && ptr[1] >= 0)

+ return "list not ordered";

+ }

+ for (i = 0; i < header.tags_size; i++) {

+ if (tags[i].filename_index >= header.strings_size)

+ return "bad index in tags";

+ if (tags[i].length < 0)

+ return "bad length in tags";

+ if (tags[i].start < 0)

+ return "bad start in tags";

+ }

+ if (pool[header.strings_size - 1] != '\0')

+ return "last character in pool not nul";

+ return 0;

+int index_search_item::verify()

+ const char *reason = do_verify();

+ if (!reason)

+ return 1;

+ error("`%1' is bad: %2", name, reason);

+ return 0;

+int index_search_item::next_filename_id() const

+ return filename_id + header.strings_size + 1;

+search_item_iterator *index_search_item::make_search_item_iterator(

+ const char *query)

+ return new index_search_item_iterator(this, query);

+search_item *make_index_search_item(const char *filename, int fid)

+ char *index_filename = new char[strlen(filename) + sizeof(INDEX_SUFFIX)];

+ strcpy(index_filename, filename);

+ strcat(index_filename, INDEX_SUFFIX);

+ int fd = open(index_filename, O_RDONLY);

+ if (fd < 0)

+ return 0;

+ index_search_item *item = new index_search_item(index_filename, fid);

+ a_delete index_filename;

+ if (!item->load(fd)) {

+ close(fd);

+ delete item;

+ return 0;

+ }

+ else if (verify_flag && !item->verify()) {

+ delete item;

+ return 0;

+ }

+ else {

+ item->check_files();

+ return item;

+ }

+index_search_item_iterator::index_search_item_iterator(index_search_item *ind,

+ const char *q)

+: indx(ind), buf(0), buflen(0), temp_list(0), query(strsave(q)),

+ searcher(q, strlen(q), ind->ignore_fields, ind->header.truncate),

+ out_of_date_files_iter(0), next_out_of_date_file(0)

+ found_list = indx->search(q, strlen(q), &temp_list);

+ if (!found_list) {

+ found_list = &minus_one;

+ warning("all keys would have been discarded in constructing index `%1'",

+ indx->name);

+ }

+index_search_item_iterator::~index_search_item_iterator()

+ a_delete temp_list;

+ a_delete buf;

+ a_delete query;

+ delete out_of_date_files_iter;

+int index_search_item_iterator::next(const linear_searcher &,

+ const char **pp, int *lenp,

+ reference_id *ridp)

+ if (found_list) {

+ for (;;) {

+ int tagno = *found_list;

+ if (tagno == -1)

+ break;

+ found_list++;

+ if (get_tag(tagno, searcher, pp, lenp, ridp))

+ return 1;

+ }

+ found_list = 0;

+ next_out_of_date_file = indx->out_of_date_files;

+ }

+ while (next_out_of_date_file) {

+ if (out_of_date_files_iter == 0)

+ out_of_date_files_iter

+ = next_out_of_date_file->make_search_item_iterator(query);

+ if (out_of_date_files_iter->next(searcher, pp, lenp, ridp))

+ return 1;

+ delete out_of_date_files_iter;

+ out_of_date_files_iter = 0;

+ next_out_of_date_file = next_out_of_date_file->next;

+ }

+ return 0;

+int index_search_item_iterator::get_tag(int tagno,

+ const linear_searcher &searcher,

+ const char **pp, int *lenp,

+ reference_id *ridp)

+ if (tagno < 0 || tagno >= indx->header.tags_size) {

+ error("bad tag number");

+ return 0;

+ }

+ tag *tp = indx->tags + tagno;

+ const char *filename = indx->munge_filename(indx->pool + tp->filename_index);

+ int fd = open(filename, O_RDONLY);

+ if (fd < 0) {

+ error("can't open `%1': %2", filename, strerror(errno));

+ return 0;

+ }

+ struct stat sb;

+ if (fstat(fd, &sb) < 0) {

+ error("can't fstat: %1", strerror(errno));

+ close(fd);

+ return 0;

+ }

+ time_t mtime = sb.st_mtime;

+ if (mtime > indx->mtime) {

+ indx->add_out_of_date_file(fd, filename,

+ indx->filename_id + tp->filename_index);

+ return 0;

+ }

+ int res = 0;

+ FILE *fp = fdopen(fd, "r");

+ if (!fp) {

+ error("fdopen failed");

+ close(fd);

+ return 0;

+ }

+ if (tp->start != 0 && fseek(fp, long(tp->start), 0) < 0)

+ error("can't seek on `%1': %2", filename, strerror(errno));

+ else {

+ int length = tp->length;

+ int err = 0;

+ if (length == 0) {

+ struct stat sb;

+ if (fstat(fileno(fp), &sb) < 0) {

+ error("can't stat `%1': %2", filename, strerror(errno));

+ err = 1;

+ }

+ else if (!S_ISREG(sb.st_mode)) {

+ error("`%1' is not a regular file", filename);

+ err = 1;

+ }

+ else

+ length = int(sb.st_size);

+ }

+ if (!err) {

+ if (length + 2 > buflen) {

+ a_delete buf;

+ buflen = length + 2;

+ buf = new char[buflen];

+ }

+ if (fread(buf + 1, 1, length, fp) != length)

+ error("fread on `%1' failed: %2", filename, strerror(errno));

+ else {

+ buf[0] = '\n';

+ buf[length + 1] = '\n';

+ res = searcher.search(buf + 1, buf + 2 + length, pp, lenp);

+ if (res && ridp)

+ *ridp = reference_id(indx->filename_id + tp->filename_index,

+ tp->start);

+ }

+ fclose(fp);

+ return res;

+const char *index_search_item::munge_filename(const char *filename)

+ if (filename[0] == '/')

+ return filename;

+ const char *cwd = pool;

+ int need_slash = (cwd[0] != 0 && strchr(cwd, '\0')[-1] != '/');

+ int len = strlen(cwd) + strlen(filename) + need_slash + 1;

+ if (len > filename_buflen) {

+ a_delete filename_buffer;

+ filename_buflen = len;

+ filename_buffer = new char[len];

+ }

+ strcpy(filename_buffer, cwd);

+ if (need_slash)

+ strcat(filename_buffer, "/");

+ strcat(filename_buffer, filename);

+ return filename_buffer;

+const int *index_search_item::search1(const char **pp, const char *end)

+ while (*pp < end && !csalnum(**pp))

+ *pp += 1;

+ if (*pp >= end)

+ return 0;

+ const char *start = *pp;

+ while (*pp < end && csalnum(**pp))

+ *pp += 1;

+ int len = *pp - start;

+ if (len < header.shortest)

+ return 0;

+ if (len > header.truncate)

+ len = header.truncate;

+ int is_number = 1;

+ for (int i = 0; i < len; i++)

+ if (csdigit(start[i]))

+ key_buffer[i] = start[i];

+ else {

+ key_buffer[i] = cmlower(start[i]);

+ is_number = 0;

+ }

+ if (is_number && !(len == 4 && start[0] == '1' && start[1] == '9'))

+ return 0;

+ unsigned hc = hash(key_buffer, len);

+ if (common_words_table) {

+ for (int h = hc % common_words_table_size;

+ common_words_table[h];

+ --h) {

+ if (strlen(common_words_table[h]) == len

+ && memcmp(common_words_table[h], key_buffer, len) == 0)

+ return 0;

+ if (h == 0)

+ h = common_words_table_size;

+ }

+ int li = table[int(hc % header.table_size)];

+ return li < 0 ? &minus_one : lists + li;

+static void merge(int *result, const int *s1, const int *s2)

+ for (; *s1 >= 0; s1++) {

+ while (*s2 >= 0 && *s2 < *s1)

+ s2++;

+ if (*s2 == *s1)

+ *result++ = *s2;

+ }

+ *result++ = -1;

+const int *index_search_item::search(const char *ptr, int length,

+ int **temp_listp)

+ const char *end = ptr + length;

+ if (*temp_listp) {

+ a_delete *temp_listp;

+ *temp_listp = 0;

+ }

+ const int *first_list = 0;

+ while (ptr < end && (first_list = search1(&ptr, end)) == 0)

+ ;

+ if (!first_list)

+ return 0;

+ if (*first_list < 0)

+ return first_list;

+ const int *second_list = 0;

+ while (ptr < end && (second_list = search1(&ptr, end)) == 0)

+ ;

+ if (!second_list)

+ return first_list;

+ if (*second_list < 0)

+ return second_list;

+ const int *p;

+ for (p = first_list; *p >= 0; p++)

+ ;

+ int len = p - first_list;

+ for (p = second_list; *p >= 0; p++)

+ ;

+ if (p - second_list < len)

+ len = p - second_list;

+ int *matches = new int[len + 1];

+ merge(matches, first_list, second_list);

+ while (ptr < end) {

+ const int *list = search1(&ptr, end);

+ if (list != 0) {

+ if (*list < 0) {

+ a_delete matches;

+ return list;

+ }

+ merge(matches, matches, list);

+ if (*matches < 0) {

+ a_delete matches;

+ return &minus_one;

+ }

+ *temp_listp = matches;

+ return matches;

+void index_search_item::read_common_words_file()

+ if (header.common <= 0)

+ return;

+ const char *common_words_file = munge_filename(strchr(pool, '\0') + 1);

+ errno = 0;

+ FILE *fp = fopen(common_words_file, "r");

+ if (!fp) {

+ error("can't open `%1': %2", common_words_file, strerror(errno));

+ return;

+ }

+ common_words_table_size = 2*header.common + 1;

+ while (!is_prime(common_words_table_size))

+ common_words_table_size++;

+ common_words_table = new char *[common_words_table_size];

+ for (int i = 0; i < common_words_table_size; i++)

+ common_words_table[i] = 0;

+ int count = 0;

+ int key_len = 0;

+ for (;;) {

+ int c = getc(fp);

+ while (c != EOF && !csalnum(c))

+ c = getc(fp);

+ if (c == EOF)

+ break;

+ do {

+ if (key_len < header.truncate)

+ key_buffer[key_len++] = cmlower(c);

+ c = getc(fp);

+ } while (c != EOF && csalnum(c));

+ if (key_len >= header.shortest) {

+ int h = hash(key_buffer, key_len) % common_words_table_size;

+ while (common_words_table[h]) {

+ if (h == 0)

+ h = common_words_table_size;

+ --h;

+ }

+ common_words_table[h] = new char[key_len + 1];

+ memcpy(common_words_table[h], key_buffer, key_len);

+ common_words_table[h][key_len] = '\0';

+ }

+ if (++count >= header.common)

+ break;

+ key_len = 0;

+ if (c == EOF)

+ break;

+ }

+ fclose(fp);

+void index_search_item::add_out_of_date_file(int fd, const char *filename,

+ int fid)

+ search_item **pp;

+ for (pp = &out_of_date_files; *pp; pp = &(*pp)->next)

+ if ((*pp)->is_named(filename))

+ return;

+ *pp = make_linear_search_item(fd, filename, fid);

+ warning("`%1' modified since `%2' created", filename, name);

+void index_search_item::check_files()

+ const char *pool_end = pool + header.strings_size;

+ for (const char *ptr = strchr(ignore_fields, '\0') + 1;

+ ptr < pool_end;

+ ptr = strchr(ptr, '\0') + 1) {

+ const char *path = munge_filename(ptr);

+ struct stat sb;

+ if (stat(path, &sb) < 0)

+ error("can't stat `%1': %2", path, strerror(errno));

+ else if (sb.st_mtime > mtime) {

+ int fd = open(path, O_RDONLY);

+ if (fd < 0)

+ error("can't open `%1': %2", path, strerror(errno));

+ else

+ add_out_of_date_file(fd, path, filename_id + (ptr - pool));

+ }

diff --git a/gnu/usr.bin/groff/libbib/linear.cc b/gnu/usr.bin/groff/libbib/linear.cc
new file mode 100644
index 00000000000..3d2729af152
--- /dev/null
+++ b/gnu/usr.bin/groff/libbib/linear.cc

@@ -0,0 +1,489 @@

+// -*- C++ -*-

+ Written by James Clark (jjc@jclark.com)

+This file is part of groff.

+groff is free software; you can redistribute it and/or modify it under

+the terms of the GNU General Public License as published by the Free

+Software Foundation; either version 2, or (at your option) any later

+version.

+groff is distributed in the hope that it will be useful, but WITHOUT ANY

+WARRANTY; without even the implied warranty of MERCHANTABILITY or

+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License

+for more details.

+You should have received a copy of the GNU General Public License along

+with groff; see the file COPYING. If not, write to the Free Software

+Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */

+#include <string.h>

+#include <stdlib.h>

+#include <assert.h>

+#include <errno.h>

+#include "posix.h"

+#include "lib.h"

+#include "errarg.h"

+#include "error.h"

+#include "cset.h"

+#include "cmap.h"

+#include "refid.h"

+#include "search.h"

+class file_buffer {

+ char *buffer;

+ char *bufend;

+public:

+ file_buffer();

+ ~file_buffer();

+ int load(int fd, const char *filename);

+ const char *get_start() const;

+ const char *get_end() const;

+};

+typedef unsigned char uchar;

+static uchar map[256];

+static uchar inv_map[256][3];

+struct map_init {

+ map_init();

+};

+static map_init the_map_init;

+map_init::map_init()

+ int i;

+ for (i = 0; i < 256; i++)

+ map[i] = csalnum(i) ? cmlower(i) : '\0';

+ for (i = 0; i < 256; i++) {

+ if (cslower(i)) {

+ inv_map[i][0] = i;

+ inv_map[i][1] = cmupper(i);

+ inv_map[i][2] = '\0';

+ }

+ else if (csdigit(i)) {

+ inv_map[i][0] = i;

+ inv_map[i][1] = 0;

+ }

+ else

+ inv_map[i][0] = '\0';

+ }

+class bmpattern {

+ char *pat;

+ int len;

+ int delta[256];

+public:

+ bmpattern(const char *pattern, int pattern_length);

+ ~bmpattern();

+ const char *search(const char *p, const char *end) const;

+ int length() const;

+};

+bmpattern::bmpattern(const char *pattern, int pattern_length)

+: len(pattern_length)

+ pat = new char[len];

+ int i;

+ for (i = 0; i < len; i++)

+ pat[i] = map[uchar(pattern[i])];

+ for (i = 0; i < 256; i++)

+ delta[i] = len;

+ for (i = 0; i < len; i++)

+ for (const unsigned char *inv = inv_map[uchar(pat[i])]; *inv; inv++)

+ delta[*inv] = len - i - 1;

+const char *bmpattern::search(const char *buf, const char *end) const

+ int buflen = end - buf;

+ if (len > buflen)

+ return 0;

+ const char *strend;

+ if (buflen > len*4)

+ strend = end - len*4;

+ else

+ strend = buf;

+ const char *k = buf + len - 1;

+ const int *del = delta;

+ const char *pattern = pat;

+ for (;;) {

+ while (k < strend) {

+ int t = del[uchar(*k)];

+ if (!t)

+ break;

+ k += t;

+ k += del[uchar(*k)];

+ }

+ while (k < end && del[uchar(*k)] != 0)

+ k++;

+ if (k == end)

+ break;

+ int j = len - 1;

+ const char *s = k;

+ for (;;) {

+ if (j == 0)

+ return s;

+ if (map[uchar(*--s)] != uchar(pattern[--j]))

+ break;

+ }

+ k++;

+ }

+ return 0;

+bmpattern::~bmpattern()

+ a_delete pat;

+inline int bmpattern::length() const

+ return len;

+static const char *find_end(const char *bufend, const char *p);

+const char *linear_searcher::search_and_check(const bmpattern *key,

+ const char *buf, const char *bufend, const char **start) const

+ assert(buf[-1] == '\n');

+ assert(bufend[-1] == '\n');

+ const char *ptr = buf;

+ for (;;) {

+ const char *found = key->search(ptr, bufend);

+ if (!found)

+ break;

+ if (check_match(buf, bufend, found, key->length(), &ptr, start))

+ return found;

+ }

+ return 0;

+static const char *skip_field(const char *end, const char *p)

+ for (;;)

+ if (*p++ == '\n') {

+ if (p == end || *p == '%')

+ break;

+ const char *q;

+ for (q = p; *q == ' ' || *q == '\t'; q++)

+ ;

+ if (*q == '\n')

+ break;

+ p = q + 1;

+ }

+ return p;

+static const char *find_end(const char *bufend, const char *p)

+ for (;;)

+ if (*p++ == '\n') {

+ if (p == bufend)

+ break;

+ const char *q;

+ for (q = p; *q == ' ' || *q == '\t'; q++)

+ ;

+ if (*q == '\n')

+ break;

+ p = q + 1;

+ }

+ return p;

+int linear_searcher::check_match(const char *buf, const char *bufend,

+ const char *match, int matchlen,

+ const char **cont, const char **start) const

+ *cont = match + 1;

+ // The user is required to supply only the first truncate_len characters

+ // of the key. If truncate_len <= 0, he must supply all the key.

+ if ((truncate_len <= 0 || matchlen < truncate_len)

+ && map[uchar(match[matchlen])] != '\0')

+ return 0;

+ // The character before the match must not be an alphanumeric

+ // character (unless the alphanumeric character follows one or two

+ // percent characters at the beginning of the line), nor must it be

+ // a percent character at the beginning of a line, nor a percent

+ // character following a percent character at the beginning of a

+ // line.

+ switch (match - buf) {

+ case 0:

+ break;

+ case 1:

+ if (match[-1] == '%' || map[uchar(match[-1])] != '\0')

+ return 0;

+ break;

+ case 2:

+ if (map[uchar(match[-1])] != '\0' && match[-2] != '%')

+ return 0;

+ if (match[-1] == '%'

+ && (match[-2] == '\n' || match[-2] == '%'))

+ return 0;

+ break;

+ default:

+ if (map[uchar(match[-1])] != '\0'

+ && !(match[-2] == '%'

+ && (match[-3] == '\n'

+ || (match[-3] == '%' && match[-4] == '\n'))))

+ return 0;

+ if (match[-1] == '%'

+ && (match[-2] == '\n'

+ || (match[-2] == '%' && match[-3] == '\n')))

+ return 0;

+ }

+ const char *p = match;

+ int had_percent = 0;

+ for (;;) {

+ if (*p == '\n') {

+ if (!had_percent && p[1] == '%') {

+ if (p[2] != '\0' && strchr(ignore_fields, p[2]) != 0) {

+ *cont = skip_field(bufend, match + matchlen);

+ return 0;

+ }

+ if (!start)

+ break;

+ had_percent = 1;

+ }

+ if (p <= buf) {

+ if (start)

+ *start = p + 1;

+ return 1;

+ }

+ const char *q;

+ for (q = p - 1; *q == ' ' || *q == '\t'; q--)

+ ;

+ if (*q == '\n') {

+ if (start)

+ *start = p + 1;

+ break;

+ }

+ p = q;

+ }

+ p--;

+ }

+ return 1;

+file_buffer::file_buffer()

+: buffer(0), bufend(0)

+file_buffer::~file_buffer()

+ a_delete buffer;

+const char *file_buffer::get_start() const

+ return buffer ? buffer + 4 : 0;

+const char *file_buffer::get_end() const

+ return bufend;

+int file_buffer::load(int fd, const char *filename)

+ struct stat sb;

+ if (fstat(fd, &sb) < 0)

+ error("can't fstat `%1': %2", filename, strerror(errno));

+ else if (!S_ISREG(sb.st_mode))

+ error("`%1' is not a regular file", filename);

+ else {

+ // We need one character extra at the beginning for an additional newline

+ // used as a sentinel. We get 4 instead so that the read buffer will be

+ // word-aligned. This seems to make the read slightly faster. We also

+ // need one character at the end also for an additional newline used as a

+ // sentinel.

+ int size = int(sb.st_size);

+ buffer = new char[size + 4 + 1];

+ int nread = read(fd, buffer + 4, size);

+ if (nread < 0)

+ error("error reading `%1': %2", filename, strerror(errno));

+ else if (nread != size)

+ error("size of `%1' decreased", filename);

+ else {

+ char c;

+ nread = read(fd, &c, 1);

+ if (nread != 0)

+ error("size of `%1' increased", filename);

+ else if (memchr(buffer + 4, '\0', size < 1024 ? size : 1024) != 0)

+ error("database `%1' is a binary file", filename);

+ else {

+ close(fd);

+ buffer[3] = '\n';

+ bufend = buffer + 4 + size;

+ if (bufend[-1] != '\n')

+ *bufend++ = '\n';

+ return 1;

+ }

+ a_delete buffer;

+ buffer = 0;

+ }

+ close(fd);

+ return 0;

+linear_searcher::linear_searcher(const char *query, int query_len,

+ const char *ign, int trunc)

+: keys(0), nkeys(0), truncate_len(trunc), ignore_fields(ign)

+ const char *query_end = query + query_len;

+ int nk = 0;

+ const char *p;

+ for (p = query; p < query_end; p++)

+ if (map[uchar(*p)] != '\0'

+ && (p[1] == '\0' || map[uchar(p[1])] == '\0'))

+ nk++;

+ if (nk == 0)

+ return;

+ keys = new bmpattern*[nk];

+ p = query;

+ for (;;) {

+ while (p < query_end && map[uchar(*p)] == '\0')

+ p++;

+ if (p == query_end)

+ break;

+ const char *start = p;

+ while (p < query_end && map[uchar(*p)] != '\0')

+ p++;

+ keys[nkeys++] = new bmpattern(start, p - start);

+ }

+ assert(nkeys <= nk);

+ if (nkeys == 0) {

+ a_delete keys;

+ keys = 0;

+ }

+linear_searcher::~linear_searcher()

+ for (int i = 0; i < nkeys; i++)

+ delete keys[i];

+ a_delete keys;

+int linear_searcher::search(const char *buffer, const char *bufend,

+ const char **startp, int *lengthp) const

+ assert(bufend - buffer > 0);

+ assert(buffer[-1] == '\n');

+ assert(bufend[-1] == '\n');

+ if (nkeys == 0)

+ return 0;

+ for (;;) {

+ const char *refstart;

+ const char *found = search_and_check(keys[0], buffer, bufend, &refstart);

+ if (!found)

+ break;

+ const char *refend = find_end(bufend, found + keys[0]->length());

+ int i;

+ for (i = 1; i < nkeys; i++)

+ if (!search_and_check(keys[i], refstart, refend))

+ break;

+ if (i >= nkeys) {

+ *startp = refstart;

+ *lengthp = refend - refstart;

+ return 1;

+ }

+ buffer = refend;

+ }

+ return 0;

+class linear_search_item : public search_item {

+ file_buffer fbuf;

+public:

+ linear_search_item(const char *filename, int fid);

+ ~linear_search_item();

+ int load(int fd);

+ search_item_iterator *make_search_item_iterator(const char *);

+ friend class linear_search_item_iterator;

+};

+class linear_search_item_iterator : public search_item_iterator {

+ linear_search_item *lsi;

+ int pos;

+public:

+ linear_search_item_iterator(linear_search_item *, const char *query);

+ ~linear_search_item_iterator();

+ int next(const linear_searcher &, const char **ptr, int *lenp,

+ reference_id *ridp);

+};

+search_item *make_linear_search_item(int fd, const char *filename, int fid)

+ linear_search_item *item = new linear_search_item(filename, fid);

+ if (!item->load(fd)) {

+ delete item;

+ return 0;

+ }

+ else

+ return item;

+linear_search_item::linear_search_item(const char *filename, int fid)

+: search_item(filename, fid)

+linear_search_item::~linear_search_item()

+int linear_search_item::load(int fd)

+ return fbuf.load(fd, name);

+search_item_iterator *linear_search_item::make_search_item_iterator(

+ const char *query)

+ return new linear_search_item_iterator(this, query);

+linear_search_item_iterator::linear_search_item_iterator(

+ linear_search_item *p, const char *)

+: lsi(p), pos(0)

+linear_search_item_iterator::~linear_search_item_iterator()

+int linear_search_item_iterator::next(const linear_searcher &searcher,

+ const char **startp, int *lengthp,

+ reference_id *ridp)

+ const char *bufstart = lsi->fbuf.get_start();

+ const char *bufend = lsi->fbuf.get_end();

+ const char *ptr = bufstart + pos;

+ if (ptr < bufend && searcher.search(ptr, bufend, startp, lengthp)) {

+ pos = *startp + *lengthp - bufstart;

+ if (ridp)

+ *ridp = reference_id(lsi->filename_id, *startp - bufstart);

+ return 1;

+ }

+ else

+ return 0;

diff --git a/gnu/usr.bin/groff/libbib/map.c b/gnu/usr.bin/groff/libbib/map.c
new file mode 100644
index 00000000000..3632a11ef48
--- /dev/null
+++ b/gnu/usr.bin/groff/libbib/map.c

@@ -0,0 +1,75 @@

+ Written by James Clark (jjc@jclark.com)

+This file is part of groff.

+groff is free software; you can redistribute it and/or modify it under

+the terms of the GNU General Public License as published by the Free

+Software Foundation; either version 2, or (at your option) any later

+version.

+groff is distributed in the hope that it will be useful, but WITHOUT ANY

+WARRANTY; without even the implied warranty of MERCHANTABILITY or

+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License

+for more details.

+You should have received a copy of the GNU General Public License along

+with groff; see the file COPYING. If not, write to the Free Software

+Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */

+#ifdef HAVE_MMAP

+#include <sys/types.h>

+#include <sys/mman.h>

+/* The Net-2 man pages says that a MAP_FILE flag is required. */

+#ifndef MAP_FILE

+#define MAP_FILE 0

+#endif

+char *mapread(fd, nbytes)

+ int fd;

+ int nbytes;

+ char *p = (char *)mmap((caddr_t)0, (size_t)nbytes, PROT_READ,

+ MAP_FILE|MAP_PRIVATE, fd, (off_t)0);

+ if (p == (char *)-1)

+ return 0;

+ /* mmap() shouldn't return 0 since MAP_FIXED wasn't specified. */

+ if (p == 0)

+ abort();

+ return p;

+int unmap(p, len)

+ char *p;

+ int len;

+ return munmap((caddr_t)p, len);

+#else /* not HAVE_MMAP */

+#include <errno.h>

+#ifndef errno

+extern int errno;

+#endif

+char *mapread(fd, nbytes)

+ int fd;

+ int nbytes;

+ errno = ENODEV;

+ return 0;

+int unmap(p, len)

+ char *p;

+ int len;

+ errno = EINVAL;

+ return -1;

+#endif /* not HAVE_MMAP */

diff --git a/gnu/usr.bin/groff/libbib/search.cc b/gnu/usr.bin/groff/libbib/search.cc
new file mode 100644
index 00000000000..b5363b02d10
--- /dev/null
+++ b/gnu/usr.bin/groff/libbib/search.cc

@@ -0,0 +1,131 @@

+// -*- C++ -*-

+ Written by James Clark (jjc@jclark.com)

+This file is part of groff.

+groff is free software; you can redistribute it and/or modify it under

+the terms of the GNU General Public License as published by the Free

+Software Foundation; either version 2, or (at your option) any later

+version.

+groff is distributed in the hope that it will be useful, but WITHOUT ANY

+WARRANTY; without even the implied warranty of MERCHANTABILITY or

+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License

+for more details.

+You should have received a copy of the GNU General Public License along

+with groff; see the file COPYING. If not, write to the Free Software

+Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */

+#include <stdlib.h>

+#include <string.h>

+#include <assert.h>

+#include <errno.h>

+#include "posix.h"

+#include "lib.h"

+#include "errarg.h"

+#include "error.h"

+#include "refid.h"

+#include "search.h"

+int linear_truncate_len = 6;

+const char *linear_ignore_fields = "XYZ";

+search_list::search_list()

+: list(0), niterators(0), next_fid(1)

+search_list::~search_list()

+ assert(niterators == 0);

+ while (list) {

+ search_item *tem = list->next;

+ delete list;

+ list = tem;

+ }

+void search_list::add_file(const char *filename, int silent)

+ search_item *p = make_index_search_item(filename, next_fid);

+ if (!p) {

+ int fd = open(filename, O_RDONLY);

+ if (fd < 0) {

+ if (!silent)

+ error("can't open `%1': %2", filename, strerror(errno));

+ }

+ else

+ p = make_linear_search_item(fd, filename, next_fid);

+ }

+ if (p) {

+ search_item **pp;

+ for (pp = &list; *pp; pp = &(*pp)->next)

+ ;

+ *pp = p;

+ next_fid = p->next_filename_id();

+ }

+int search_list::nfiles() const

+ int n = 0;

+ for (search_item *ptr = list; ptr; ptr = ptr->next)

+ n++;

+ return n;

+search_list_iterator::search_list_iterator(search_list *p, const char *q)

+: list(p), ptr(p->list), iter(0), query(strsave(q)),

+ searcher(q, strlen(q), linear_ignore_fields, linear_truncate_len)

+ list->niterators += 1;

+search_list_iterator::~search_list_iterator()

+ list->niterators -= 1;

+ a_delete query;

+ delete iter;

+int search_list_iterator::next(const char **pp, int *lenp, reference_id *ridp)

+ while (ptr) {

+ if (iter == 0)

+ iter = ptr->make_search_item_iterator(query);

+ if (iter->next(searcher, pp, lenp, ridp))

+ return 1;

+ delete iter;

+ iter = 0;

+ ptr = ptr->next;

+ }

+ return 0;

+search_item::search_item(const char *nm, int fid)

+: next(0), name(strsave(nm)), filename_id(fid)

+search_item::~search_item()

+ a_delete name;

+int search_item::is_named(const char *nm) const

+ return strcmp(name, nm) == 0;

+int search_item::next_filename_id() const

+ return filename_id + 1;

+search_item_iterator::~search_item_iterator()