diff options
author | etheisen <etheisen@cvs.openbsd.org> | 1996-09-14 19:02:02 +0000 |
---|---|---|
committer | etheisen <etheisen@cvs.openbsd.org> | 1996-09-14 19:02:02 +0000 |
commit | 6a4f3d0fd940c1e7052ddf23907f0e297dc42f4b (patch) | |
tree | 712fd665d6b150ef04906975a7493d87d38a1a8a /gnu/usr.bin/groff/libbib | |
parent | 9cf27152dae9dbf80b167732c78c47baee90ddc5 (diff) |
Third time because import sucks.
Diffstat (limited to 'gnu/usr.bin/groff/libbib')
-rw-r--r-- | gnu/usr.bin/groff/libbib/Makefile.dep | 12 | ||||
-rw-r--r-- | gnu/usr.bin/groff/libbib/Makefile.sub | 4 | ||||
-rw-r--r-- | gnu/usr.bin/groff/libbib/common.cc | 38 | ||||
-rw-r--r-- | gnu/usr.bin/groff/libbib/index.cc | 623 | ||||
-rw-r--r-- | gnu/usr.bin/groff/libbib/linear.cc | 489 | ||||
-rw-r--r-- | gnu/usr.bin/groff/libbib/map.c | 75 | ||||
-rw-r--r-- | gnu/usr.bin/groff/libbib/search.cc | 131 |
7 files changed, 1372 insertions, 0 deletions
diff --git a/gnu/usr.bin/groff/libbib/Makefile.dep b/gnu/usr.bin/groff/libbib/Makefile.dep new file mode 100644 index 00000000000..8c4974853d6 --- /dev/null +++ b/gnu/usr.bin/groff/libbib/Makefile.dep @@ -0,0 +1,12 @@ +common.o: common.cc +index.o: index.cc ../include/posix.h ../include/lib.h \ + ../include/cset.h ../include/cmap.h ../include/errarg.h \ + ../include/error.h ../include/refid.h ../include/search.h \ + ../include/index.h ../include/defs.h +linear.o: linear.cc ../include/posix.h ../include/lib.h \ + ../include/errarg.h ../include/error.h ../include/cset.h \ + ../include/cmap.h ../include/refid.h ../include/search.h +search.o: search.cc ../include/posix.h ../include/lib.h \ + ../include/errarg.h ../include/error.h ../include/refid.h \ + ../include/search.h +map.o: map.c diff --git a/gnu/usr.bin/groff/libbib/Makefile.sub b/gnu/usr.bin/groff/libbib/Makefile.sub new file mode 100644 index 00000000000..e9418288a49 --- /dev/null +++ b/gnu/usr.bin/groff/libbib/Makefile.sub @@ -0,0 +1,4 @@ +LIB=bib +OBJS=common.o index.o linear.o search.o map.o +CCSRCS=common.cc index.cc linear.cc search.cc +CSRCS=map.c diff --git a/gnu/usr.bin/groff/libbib/common.cc b/gnu/usr.bin/groff/libbib/common.cc new file mode 100644 index 00000000000..4b2bcca23e2 --- /dev/null +++ b/gnu/usr.bin/groff/libbib/common.cc @@ -0,0 +1,38 @@ +/* Copyright (C) 1989, 1990, 1991, 1992 Free Software Foundation, Inc. + Written by James Clark (jjc@jclark.com) + +This file is part of groff. + +groff is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 2, or (at your option) any later +version. + +groff is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License along +with groff; see the file COPYING. If not, write to the Free Software +Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ + +unsigned hash(const char *s, int len) +{ +#if 0 + unsigned h = 0, g; + while (*s != '\0') { + h <<= 4; + h += *s++; + if ((g = h & 0xf0000000) != 0) { + h ^= g >> 24; + h ^= g; + } + } +#endif + unsigned h = 0; + while (--len >= 0) + h = *s++ + 65587*h; + return h; +} + diff --git a/gnu/usr.bin/groff/libbib/index.cc b/gnu/usr.bin/groff/libbib/index.cc new file mode 100644 index 00000000000..2c11f06cdff --- /dev/null +++ b/gnu/usr.bin/groff/libbib/index.cc @@ -0,0 +1,623 @@ +// -*- C++ -*- +/* Copyright (C) 1989, 1990, 1991, 1992 Free Software Foundation, Inc. + Written by James Clark (jjc@jclark.com) + +This file is part of groff. + +groff is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 2, or (at your option) any later +version. + +groff is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License along +with groff; see the file COPYING. If not, write to the Free Software +Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ + +#include <stdio.h> +#include <string.h> +#include <stdlib.h> +#include <errno.h> + +#include "posix.h" +#include "lib.h" +#include "cset.h" +#include "cmap.h" +#include "errarg.h" +#include "error.h" + +#include "refid.h" +#include "search.h" +#include "index.h" +#include "defs.h" + +// Interface to mmap. +extern "C" { + void *mapread(int fd, int len); + int unmap(void *, int len); +} + +#if 0 +const +#endif +int minus_one = -1; + +int verify_flag = 0; + +struct word_list; + +class index_search_item : public search_item { + search_item *out_of_date_files; + index_header header; + char *buffer; + void *map_addr; + int map_len; + tag *tags; + int *table; + int *lists; + char *pool; + char *key_buffer; + char *filename_buffer; + int filename_buflen; + char **common_words_table; + int common_words_table_size; + const char *ignore_fields; + time_t mtime; + + const char *do_verify(); + const int *search1(const char **pp, const char *end); + const int *search(const char *ptr, int length, int **temp_listp); + const char *munge_filename(const char *); + void read_common_words_file(); + void add_out_of_date_file(int fd, const char *filename, int fid); +public: + index_search_item(const char *, int); + ~index_search_item(); + int load(int fd); + search_item_iterator *make_search_item_iterator(const char *); + int verify(); + void check_files(); + int next_filename_id() const; + friend class index_search_item_iterator; +}; + +class index_search_item_iterator : public search_item_iterator { + index_search_item *indx; + search_item_iterator *out_of_date_files_iter; + search_item *next_out_of_date_file; + const int *found_list; + int *temp_list; + char *buf; + int buflen; + linear_searcher searcher; + char *query; + int get_tag(int tagno, const linear_searcher &, const char **, int *, + reference_id *); +public: + index_search_item_iterator(index_search_item *, const char *); + ~index_search_item_iterator(); + int next(const linear_searcher &, const char **, int *, reference_id *); +}; + + +index_search_item::index_search_item(const char *filename, int fid) +: search_item(filename, fid), out_of_date_files(0), key_buffer(0), + filename_buffer(0), filename_buflen(0), common_words_table(0), + map_addr(0), map_len(0), buffer(0) +{ +} + +index_search_item::~index_search_item() +{ + if (buffer) + free(buffer); + if (map_addr) { + if (unmap(map_addr, map_len) < 0) + error("unmap: %1", strerror(errno)); + } + while (out_of_date_files) { + search_item *tem = out_of_date_files; + out_of_date_files = out_of_date_files->next; + delete tem; + } + a_delete filename_buffer; + a_delete key_buffer; + if (common_words_table) { + for (int i = 0; i < common_words_table_size; i++) + a_delete common_words_table[i]; + a_delete common_words_table; + } +} + +class file_closer { + int *fdp; +public: + file_closer(int &fd) : fdp(&fd) { } + ~file_closer() { close(*fdp); } +}; + +// Tell the compiler that a variable is intentionally unused. +inline void unused(void *) { } + +int index_search_item::load(int fd) +{ + file_closer fd_closer(fd); // close fd on return + unused(&fd_closer); + struct stat sb; + if (fstat(fd, &sb) < 0) { + error("can't fstat `%1': %2", name, strerror(errno)); + return 0; + } + if (!S_ISREG(sb.st_mode)) { + error("`%1' is not a regular file", name); + return 0; + } + mtime = sb.st_mtime; + int size = int(sb.st_size); + char *addr; + map_addr = mapread(fd, size); + if (map_addr) { + addr = (char *)map_addr; + map_len = size; + } + else { + addr = buffer = (char *)malloc(size); + if (buffer == 0) { + error("can't allocate buffer for `%1'", name); + return 0; + } + char *ptr = buffer; + int bytes_to_read = size; + while (bytes_to_read > 0) { + int nread = read(fd, ptr, bytes_to_read); + if (nread == 0) { + error("unexpected EOF on `%1'", name); + return 0; + } + if (nread < 0) { + error("read error on `%1': %2", name, strerror(errno)); + return 0; + } + bytes_to_read -= nread; + ptr += nread; + } + } + header = *(index_header *)addr; + if (header.magic != INDEX_MAGIC) { + error("`%1' is not an index file: wrong magic number", name); + return 0; + } + if (header.version != INDEX_VERSION) { + error("version number in `%1' is wrong: was %2, should be %3", + name, header.version, INDEX_VERSION); + return 0; + } + int sz = (header.tags_size * sizeof(tag) + + header.lists_size * sizeof(int) + + header.table_size * sizeof(int) + + header.strings_size + + sizeof(header)); + if (sz != size) { + error("size of `%1' is wrong: was %2, should be %3", + name, size, sz); + return 0; + } + tags = (tag *)(addr + sizeof(header)); + lists = (int *)(tags + header.tags_size); + table = (int *)(lists + header.lists_size); + pool = (char *)(table + header.table_size); + ignore_fields = strchr(strchr(pool, '\0') + 1, '\0') + 1; + key_buffer = new char[header.truncate]; + read_common_words_file(); + return 1; +} + +const char *index_search_item::do_verify() +{ + if (tags == 0) + return "not loaded"; + if (lists[header.lists_size - 1] >= 0) + return "last list element not negative"; + int i; + for (i = 0; i < header.table_size; i++) { + int li = table[i]; + if (li >= header.lists_size) + return "bad list index"; + if (li >= 0) { + for (int *ptr = lists + li; *ptr >= 0; ptr++) { + if (*ptr >= header.tags_size) + return "bad tag index"; + if (*ptr >= ptr[1] && ptr[1] >= 0) + return "list not ordered"; + } + } + } + for (i = 0; i < header.tags_size; i++) { + if (tags[i].filename_index >= header.strings_size) + return "bad index in tags"; + if (tags[i].length < 0) + return "bad length in tags"; + if (tags[i].start < 0) + return "bad start in tags"; + } + if (pool[header.strings_size - 1] != '\0') + return "last character in pool not nul"; + return 0; +} + +int index_search_item::verify() +{ + const char *reason = do_verify(); + if (!reason) + return 1; + error("`%1' is bad: %2", name, reason); + return 0; +} + +int index_search_item::next_filename_id() const +{ + return filename_id + header.strings_size + 1; +} + +search_item_iterator *index_search_item::make_search_item_iterator( + const char *query) +{ + return new index_search_item_iterator(this, query); +} + +search_item *make_index_search_item(const char *filename, int fid) +{ + char *index_filename = new char[strlen(filename) + sizeof(INDEX_SUFFIX)]; + strcpy(index_filename, filename); + strcat(index_filename, INDEX_SUFFIX); + int fd = open(index_filename, O_RDONLY); + if (fd < 0) + return 0; + index_search_item *item = new index_search_item(index_filename, fid); + a_delete index_filename; + if (!item->load(fd)) { + close(fd); + delete item; + return 0; + } + else if (verify_flag && !item->verify()) { + delete item; + return 0; + } + else { + item->check_files(); + return item; + } +} + + +index_search_item_iterator::index_search_item_iterator(index_search_item *ind, + const char *q) +: indx(ind), buf(0), buflen(0), temp_list(0), query(strsave(q)), + searcher(q, strlen(q), ind->ignore_fields, ind->header.truncate), + out_of_date_files_iter(0), next_out_of_date_file(0) +{ + found_list = indx->search(q, strlen(q), &temp_list); + if (!found_list) { + found_list = &minus_one; + warning("all keys would have been discarded in constructing index `%1'", + indx->name); + } +} + +index_search_item_iterator::~index_search_item_iterator() +{ + a_delete temp_list; + a_delete buf; + a_delete query; + delete out_of_date_files_iter; +} + +int index_search_item_iterator::next(const linear_searcher &, + const char **pp, int *lenp, + reference_id *ridp) +{ + if (found_list) { + for (;;) { + int tagno = *found_list; + if (tagno == -1) + break; + found_list++; + if (get_tag(tagno, searcher, pp, lenp, ridp)) + return 1; + } + found_list = 0; + next_out_of_date_file = indx->out_of_date_files; + } + while (next_out_of_date_file) { + if (out_of_date_files_iter == 0) + out_of_date_files_iter + = next_out_of_date_file->make_search_item_iterator(query); + if (out_of_date_files_iter->next(searcher, pp, lenp, ridp)) + return 1; + delete out_of_date_files_iter; + out_of_date_files_iter = 0; + next_out_of_date_file = next_out_of_date_file->next; + } + return 0; +} + +int index_search_item_iterator::get_tag(int tagno, + const linear_searcher &searcher, + const char **pp, int *lenp, + reference_id *ridp) +{ + if (tagno < 0 || tagno >= indx->header.tags_size) { + error("bad tag number"); + return 0; + } + tag *tp = indx->tags + tagno; + const char *filename = indx->munge_filename(indx->pool + tp->filename_index); + int fd = open(filename, O_RDONLY); + if (fd < 0) { + error("can't open `%1': %2", filename, strerror(errno)); + return 0; + } + struct stat sb; + if (fstat(fd, &sb) < 0) { + error("can't fstat: %1", strerror(errno)); + close(fd); + return 0; + } + time_t mtime = sb.st_mtime; + if (mtime > indx->mtime) { + indx->add_out_of_date_file(fd, filename, + indx->filename_id + tp->filename_index); + return 0; + } + int res = 0; + FILE *fp = fdopen(fd, "r"); + if (!fp) { + error("fdopen failed"); + close(fd); + return 0; + } + if (tp->start != 0 && fseek(fp, long(tp->start), 0) < 0) + error("can't seek on `%1': %2", filename, strerror(errno)); + else { + int length = tp->length; + int err = 0; + if (length == 0) { + struct stat sb; + if (fstat(fileno(fp), &sb) < 0) { + error("can't stat `%1': %2", filename, strerror(errno)); + err = 1; + } + else if (!S_ISREG(sb.st_mode)) { + error("`%1' is not a regular file", filename); + err = 1; + } + else + length = int(sb.st_size); + } + if (!err) { + if (length + 2 > buflen) { + a_delete buf; + buflen = length + 2; + buf = new char[buflen]; + } + if (fread(buf + 1, 1, length, fp) != length) + error("fread on `%1' failed: %2", filename, strerror(errno)); + else { + buf[0] = '\n'; + buf[length + 1] = '\n'; + res = searcher.search(buf + 1, buf + 2 + length, pp, lenp); + if (res && ridp) + *ridp = reference_id(indx->filename_id + tp->filename_index, + tp->start); + } + } + } + fclose(fp); + return res; +} + +const char *index_search_item::munge_filename(const char *filename) +{ + if (filename[0] == '/') + return filename; + const char *cwd = pool; + int need_slash = (cwd[0] != 0 && strchr(cwd, '\0')[-1] != '/'); + int len = strlen(cwd) + strlen(filename) + need_slash + 1; + if (len > filename_buflen) { + a_delete filename_buffer; + filename_buflen = len; + filename_buffer = new char[len]; + } + strcpy(filename_buffer, cwd); + if (need_slash) + strcat(filename_buffer, "/"); + strcat(filename_buffer, filename); + return filename_buffer; +} + +const int *index_search_item::search1(const char **pp, const char *end) +{ + while (*pp < end && !csalnum(**pp)) + *pp += 1; + if (*pp >= end) + return 0; + const char *start = *pp; + while (*pp < end && csalnum(**pp)) + *pp += 1; + int len = *pp - start; + if (len < header.shortest) + return 0; + if (len > header.truncate) + len = header.truncate; + int is_number = 1; + for (int i = 0; i < len; i++) + if (csdigit(start[i])) + key_buffer[i] = start[i]; + else { + key_buffer[i] = cmlower(start[i]); + is_number = 0; + } + if (is_number && !(len == 4 && start[0] == '1' && start[1] == '9')) + return 0; + unsigned hc = hash(key_buffer, len); + if (common_words_table) { + for (int h = hc % common_words_table_size; + common_words_table[h]; + --h) { + if (strlen(common_words_table[h]) == len + && memcmp(common_words_table[h], key_buffer, len) == 0) + return 0; + if (h == 0) + h = common_words_table_size; + } + } + int li = table[int(hc % header.table_size)]; + return li < 0 ? &minus_one : lists + li; +} + +static void merge(int *result, const int *s1, const int *s2) +{ + for (; *s1 >= 0; s1++) { + while (*s2 >= 0 && *s2 < *s1) + s2++; + if (*s2 == *s1) + *result++ = *s2; + } + *result++ = -1; +} + +const int *index_search_item::search(const char *ptr, int length, + int **temp_listp) +{ + const char *end = ptr + length; + if (*temp_listp) { + a_delete *temp_listp; + *temp_listp = 0; + } + const int *first_list = 0; + while (ptr < end && (first_list = search1(&ptr, end)) == 0) + ; + if (!first_list) + return 0; + if (*first_list < 0) + return first_list; + const int *second_list = 0; + while (ptr < end && (second_list = search1(&ptr, end)) == 0) + ; + if (!second_list) + return first_list; + if (*second_list < 0) + return second_list; + const int *p; + for (p = first_list; *p >= 0; p++) + ; + int len = p - first_list; + for (p = second_list; *p >= 0; p++) + ; + if (p - second_list < len) + len = p - second_list; + int *matches = new int[len + 1]; + merge(matches, first_list, second_list); + while (ptr < end) { + const int *list = search1(&ptr, end); + if (list != 0) { + if (*list < 0) { + a_delete matches; + return list; + } + merge(matches, matches, list); + if (*matches < 0) { + a_delete matches; + return &minus_one; + } + } + } + *temp_listp = matches; + return matches; +} + +void index_search_item::read_common_words_file() +{ + if (header.common <= 0) + return; + const char *common_words_file = munge_filename(strchr(pool, '\0') + 1); + errno = 0; + FILE *fp = fopen(common_words_file, "r"); + if (!fp) { + error("can't open `%1': %2", common_words_file, strerror(errno)); + return; + } + common_words_table_size = 2*header.common + 1; + while (!is_prime(common_words_table_size)) + common_words_table_size++; + common_words_table = new char *[common_words_table_size]; + for (int i = 0; i < common_words_table_size; i++) + common_words_table[i] = 0; + int count = 0; + int key_len = 0; + for (;;) { + int c = getc(fp); + while (c != EOF && !csalnum(c)) + c = getc(fp); + if (c == EOF) + break; + do { + if (key_len < header.truncate) + key_buffer[key_len++] = cmlower(c); + c = getc(fp); + } while (c != EOF && csalnum(c)); + if (key_len >= header.shortest) { + int h = hash(key_buffer, key_len) % common_words_table_size; + while (common_words_table[h]) { + if (h == 0) + h = common_words_table_size; + --h; + } + common_words_table[h] = new char[key_len + 1]; + memcpy(common_words_table[h], key_buffer, key_len); + common_words_table[h][key_len] = '\0'; + } + if (++count >= header.common) + break; + key_len = 0; + if (c == EOF) + break; + } + fclose(fp); +} + +void index_search_item::add_out_of_date_file(int fd, const char *filename, + int fid) +{ + search_item **pp; + for (pp = &out_of_date_files; *pp; pp = &(*pp)->next) + if ((*pp)->is_named(filename)) + return; + *pp = make_linear_search_item(fd, filename, fid); + warning("`%1' modified since `%2' created", filename, name); +} + +void index_search_item::check_files() +{ + const char *pool_end = pool + header.strings_size; + for (const char *ptr = strchr(ignore_fields, '\0') + 1; + ptr < pool_end; + ptr = strchr(ptr, '\0') + 1) { + const char *path = munge_filename(ptr); + struct stat sb; + if (stat(path, &sb) < 0) + error("can't stat `%1': %2", path, strerror(errno)); + else if (sb.st_mtime > mtime) { + int fd = open(path, O_RDONLY); + if (fd < 0) + error("can't open `%1': %2", path, strerror(errno)); + else + add_out_of_date_file(fd, path, filename_id + (ptr - pool)); + } + } +} diff --git a/gnu/usr.bin/groff/libbib/linear.cc b/gnu/usr.bin/groff/libbib/linear.cc new file mode 100644 index 00000000000..3d2729af152 --- /dev/null +++ b/gnu/usr.bin/groff/libbib/linear.cc @@ -0,0 +1,489 @@ +// -*- C++ -*- +/* Copyright (C) 1989, 1990, 1991, 1992 Free Software Foundation, Inc. + Written by James Clark (jjc@jclark.com) + +This file is part of groff. + +groff is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 2, or (at your option) any later +version. + +groff is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License along +with groff; see the file COPYING. If not, write to the Free Software +Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ + + +#include <string.h> +#include <stdlib.h> +#include <assert.h> +#include <errno.h> + +#include "posix.h" +#include "lib.h" +#include "errarg.h" +#include "error.h" +#include "cset.h" +#include "cmap.h" + +#include "refid.h" +#include "search.h" + +class file_buffer { + char *buffer; + char *bufend; +public: + file_buffer(); + ~file_buffer(); + int load(int fd, const char *filename); + const char *get_start() const; + const char *get_end() const; +}; + +typedef unsigned char uchar; + +static uchar map[256]; +static uchar inv_map[256][3]; + +struct map_init { + map_init(); +}; + +static map_init the_map_init; + +map_init::map_init() +{ + int i; + for (i = 0; i < 256; i++) + map[i] = csalnum(i) ? cmlower(i) : '\0'; + for (i = 0; i < 256; i++) { + if (cslower(i)) { + inv_map[i][0] = i; + inv_map[i][1] = cmupper(i); + inv_map[i][2] = '\0'; + } + else if (csdigit(i)) { + inv_map[i][0] = i; + inv_map[i][1] = 0; + } + else + inv_map[i][0] = '\0'; + } +} + + +class bmpattern { + char *pat; + int len; + int delta[256]; +public: + bmpattern(const char *pattern, int pattern_length); + ~bmpattern(); + const char *search(const char *p, const char *end) const; + int length() const; +}; + +bmpattern::bmpattern(const char *pattern, int pattern_length) +: len(pattern_length) +{ + pat = new char[len]; + int i; + for (i = 0; i < len; i++) + pat[i] = map[uchar(pattern[i])]; + for (i = 0; i < 256; i++) + delta[i] = len; + for (i = 0; i < len; i++) + for (const unsigned char *inv = inv_map[uchar(pat[i])]; *inv; inv++) + delta[*inv] = len - i - 1; +} + +const char *bmpattern::search(const char *buf, const char *end) const +{ + int buflen = end - buf; + if (len > buflen) + return 0; + const char *strend; + if (buflen > len*4) + strend = end - len*4; + else + strend = buf; + const char *k = buf + len - 1; + const int *del = delta; + const char *pattern = pat; + for (;;) { + while (k < strend) { + int t = del[uchar(*k)]; + if (!t) + break; + k += t; + k += del[uchar(*k)]; + k += del[uchar(*k)]; + } + while (k < end && del[uchar(*k)] != 0) + k++; + if (k == end) + break; + int j = len - 1; + const char *s = k; + for (;;) { + if (j == 0) + return s; + if (map[uchar(*--s)] != uchar(pattern[--j])) + break; + } + k++; + } + return 0; +} + +bmpattern::~bmpattern() +{ + a_delete pat; +} + +inline int bmpattern::length() const +{ + return len; +} + + +static const char *find_end(const char *bufend, const char *p); + +const char *linear_searcher::search_and_check(const bmpattern *key, + const char *buf, const char *bufend, const char **start) const +{ + assert(buf[-1] == '\n'); + assert(bufend[-1] == '\n'); + const char *ptr = buf; + for (;;) { + const char *found = key->search(ptr, bufend); + if (!found) + break; + if (check_match(buf, bufend, found, key->length(), &ptr, start)) + return found; + } + return 0; +} + +static const char *skip_field(const char *end, const char *p) +{ + for (;;) + if (*p++ == '\n') { + if (p == end || *p == '%') + break; + const char *q; + for (q = p; *q == ' ' || *q == '\t'; q++) + ; + if (*q == '\n') + break; + p = q + 1; + } + return p; +} + +static const char *find_end(const char *bufend, const char *p) +{ + for (;;) + if (*p++ == '\n') { + if (p == bufend) + break; + const char *q; + for (q = p; *q == ' ' || *q == '\t'; q++) + ; + if (*q == '\n') + break; + p = q + 1; + } + return p; +} + + +int linear_searcher::check_match(const char *buf, const char *bufend, + const char *match, int matchlen, + const char **cont, const char **start) const +{ + *cont = match + 1; + // The user is required to supply only the first truncate_len characters + // of the key. If truncate_len <= 0, he must supply all the key. + if ((truncate_len <= 0 || matchlen < truncate_len) + && map[uchar(match[matchlen])] != '\0') + return 0; + + // The character before the match must not be an alphanumeric + // character (unless the alphanumeric character follows one or two + // percent characters at the beginning of the line), nor must it be + // a percent character at the beginning of a line, nor a percent + // character following a percent character at the beginning of a + // line. + + switch (match - buf) { + case 0: + break; + case 1: + if (match[-1] == '%' || map[uchar(match[-1])] != '\0') + return 0; + break; + case 2: + if (map[uchar(match[-1])] != '\0' && match[-2] != '%') + return 0; + if (match[-1] == '%' + && (match[-2] == '\n' || match[-2] == '%')) + return 0; + break; + default: + if (map[uchar(match[-1])] != '\0' + && !(match[-2] == '%' + && (match[-3] == '\n' + || (match[-3] == '%' && match[-4] == '\n')))) + return 0; + if (match[-1] == '%' + && (match[-2] == '\n' + || (match[-2] == '%' && match[-3] == '\n'))) + return 0; + } + + const char *p = match; + int had_percent = 0; + for (;;) { + if (*p == '\n') { + if (!had_percent && p[1] == '%') { + if (p[2] != '\0' && strchr(ignore_fields, p[2]) != 0) { + *cont = skip_field(bufend, match + matchlen); + return 0; + } + if (!start) + break; + had_percent = 1; + } + if (p <= buf) { + if (start) + *start = p + 1; + return 1; + } + const char *q; + for (q = p - 1; *q == ' ' || *q == '\t'; q--) + ; + if (*q == '\n') { + if (start) + *start = p + 1; + break; + } + p = q; + } + p--; + } + return 1; +} + +file_buffer::file_buffer() +: buffer(0), bufend(0) +{ +} + +file_buffer::~file_buffer() +{ + a_delete buffer; +} + +const char *file_buffer::get_start() const +{ + return buffer ? buffer + 4 : 0; +} + +const char *file_buffer::get_end() const +{ + return bufend; +} + +int file_buffer::load(int fd, const char *filename) +{ + struct stat sb; + if (fstat(fd, &sb) < 0) + error("can't fstat `%1': %2", filename, strerror(errno)); + else if (!S_ISREG(sb.st_mode)) + error("`%1' is not a regular file", filename); + else { + // We need one character extra at the beginning for an additional newline + // used as a sentinel. We get 4 instead so that the read buffer will be + // word-aligned. This seems to make the read slightly faster. We also + // need one character at the end also for an additional newline used as a + // sentinel. + int size = int(sb.st_size); + buffer = new char[size + 4 + 1]; + int nread = read(fd, buffer + 4, size); + if (nread < 0) + error("error reading `%1': %2", filename, strerror(errno)); + else if (nread != size) + error("size of `%1' decreased", filename); + else { + char c; + nread = read(fd, &c, 1); + if (nread != 0) + error("size of `%1' increased", filename); + else if (memchr(buffer + 4, '\0', size < 1024 ? size : 1024) != 0) + error("database `%1' is a binary file", filename); + else { + close(fd); + buffer[3] = '\n'; + bufend = buffer + 4 + size; + if (bufend[-1] != '\n') + *bufend++ = '\n'; + return 1; + } + } + a_delete buffer; + buffer = 0; + } + close(fd); + return 0; +} + +linear_searcher::linear_searcher(const char *query, int query_len, + const char *ign, int trunc) +: keys(0), nkeys(0), truncate_len(trunc), ignore_fields(ign) +{ + const char *query_end = query + query_len; + int nk = 0; + const char *p; + for (p = query; p < query_end; p++) + if (map[uchar(*p)] != '\0' + && (p[1] == '\0' || map[uchar(p[1])] == '\0')) + nk++; + if (nk == 0) + return; + keys = new bmpattern*[nk]; + p = query; + for (;;) { + while (p < query_end && map[uchar(*p)] == '\0') + p++; + if (p == query_end) + break; + const char *start = p; + while (p < query_end && map[uchar(*p)] != '\0') + p++; + keys[nkeys++] = new bmpattern(start, p - start); + } + assert(nkeys <= nk); + if (nkeys == 0) { + a_delete keys; + keys = 0; + } +} + +linear_searcher::~linear_searcher() +{ + for (int i = 0; i < nkeys; i++) + delete keys[i]; + a_delete keys; +} + +int linear_searcher::search(const char *buffer, const char *bufend, + const char **startp, int *lengthp) const +{ + assert(bufend - buffer > 0); + assert(buffer[-1] == '\n'); + assert(bufend[-1] == '\n'); + if (nkeys == 0) + return 0; + for (;;) { + const char *refstart; + const char *found = search_and_check(keys[0], buffer, bufend, &refstart); + if (!found) + break; + const char *refend = find_end(bufend, found + keys[0]->length()); + int i; + for (i = 1; i < nkeys; i++) + if (!search_and_check(keys[i], refstart, refend)) + break; + if (i >= nkeys) { + *startp = refstart; + *lengthp = refend - refstart; + return 1; + } + buffer = refend; + } + return 0; +} + +class linear_search_item : public search_item { + file_buffer fbuf; +public: + linear_search_item(const char *filename, int fid); + ~linear_search_item(); + int load(int fd); + search_item_iterator *make_search_item_iterator(const char *); + friend class linear_search_item_iterator; +}; + +class linear_search_item_iterator : public search_item_iterator { + linear_search_item *lsi; + int pos; +public: + linear_search_item_iterator(linear_search_item *, const char *query); + ~linear_search_item_iterator(); + int next(const linear_searcher &, const char **ptr, int *lenp, + reference_id *ridp); +}; + +search_item *make_linear_search_item(int fd, const char *filename, int fid) +{ + linear_search_item *item = new linear_search_item(filename, fid); + if (!item->load(fd)) { + delete item; + return 0; + } + else + return item; +} + +linear_search_item::linear_search_item(const char *filename, int fid) +: search_item(filename, fid) +{ +} + +linear_search_item::~linear_search_item() +{ +} + +int linear_search_item::load(int fd) +{ + return fbuf.load(fd, name); +} + +search_item_iterator *linear_search_item::make_search_item_iterator( + const char *query) +{ + return new linear_search_item_iterator(this, query); +} + +linear_search_item_iterator::linear_search_item_iterator( + linear_search_item *p, const char *) +: lsi(p), pos(0) +{ +} + +linear_search_item_iterator::~linear_search_item_iterator() +{ +} + +int linear_search_item_iterator::next(const linear_searcher &searcher, + const char **startp, int *lengthp, + reference_id *ridp) +{ + const char *bufstart = lsi->fbuf.get_start(); + const char *bufend = lsi->fbuf.get_end(); + const char *ptr = bufstart + pos; + if (ptr < bufend && searcher.search(ptr, bufend, startp, lengthp)) { + pos = *startp + *lengthp - bufstart; + if (ridp) + *ridp = reference_id(lsi->filename_id, *startp - bufstart); + return 1; + } + else + return 0; +} diff --git a/gnu/usr.bin/groff/libbib/map.c b/gnu/usr.bin/groff/libbib/map.c new file mode 100644 index 00000000000..3632a11ef48 --- /dev/null +++ b/gnu/usr.bin/groff/libbib/map.c @@ -0,0 +1,75 @@ +/* Copyright (C) 1989, 1990, 1991, 1992 Free Software Foundation, Inc. + Written by James Clark (jjc@jclark.com) + +This file is part of groff. + +groff is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 2, or (at your option) any later +version. + +groff is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License along +with groff; see the file COPYING. If not, write to the Free Software +Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ + +#ifdef HAVE_MMAP + +#include <sys/types.h> +#include <sys/mman.h> + +/* The Net-2 man pages says that a MAP_FILE flag is required. */ +#ifndef MAP_FILE +#define MAP_FILE 0 +#endif + +char *mapread(fd, nbytes) + int fd; + int nbytes; +{ + char *p = (char *)mmap((caddr_t)0, (size_t)nbytes, PROT_READ, + MAP_FILE|MAP_PRIVATE, fd, (off_t)0); + if (p == (char *)-1) + return 0; + /* mmap() shouldn't return 0 since MAP_FIXED wasn't specified. */ + if (p == 0) + abort(); + return p; +} + +int unmap(p, len) + char *p; + int len; +{ + return munmap((caddr_t)p, len); +} + +#else /* not HAVE_MMAP */ + +#include <errno.h> + +#ifndef errno +extern int errno; +#endif + +char *mapread(fd, nbytes) + int fd; + int nbytes; +{ + errno = ENODEV; + return 0; +} + +int unmap(p, len) + char *p; + int len; +{ + errno = EINVAL; + return -1; +} + +#endif /* not HAVE_MMAP */ diff --git a/gnu/usr.bin/groff/libbib/search.cc b/gnu/usr.bin/groff/libbib/search.cc new file mode 100644 index 00000000000..b5363b02d10 --- /dev/null +++ b/gnu/usr.bin/groff/libbib/search.cc @@ -0,0 +1,131 @@ +// -*- C++ -*- +/* Copyright (C) 1989, 1990, 1991, 1992 Free Software Foundation, Inc. + Written by James Clark (jjc@jclark.com) + +This file is part of groff. + +groff is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 2, or (at your option) any later +version. + +groff is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License along +with groff; see the file COPYING. If not, write to the Free Software +Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ + +#include <stdlib.h> +#include <string.h> +#include <assert.h> +#include <errno.h> + +#include "posix.h" +#include "lib.h" +#include "errarg.h" +#include "error.h" + +#include "refid.h" +#include "search.h" + +int linear_truncate_len = 6; +const char *linear_ignore_fields = "XYZ"; + +search_list::search_list() +: list(0), niterators(0), next_fid(1) +{ +} + +search_list::~search_list() +{ + assert(niterators == 0); + while (list) { + search_item *tem = list->next; + delete list; + list = tem; + } +} + +void search_list::add_file(const char *filename, int silent) +{ + search_item *p = make_index_search_item(filename, next_fid); + if (!p) { + int fd = open(filename, O_RDONLY); + if (fd < 0) { + if (!silent) + error("can't open `%1': %2", filename, strerror(errno)); + } + else + p = make_linear_search_item(fd, filename, next_fid); + } + if (p) { + search_item **pp; + for (pp = &list; *pp; pp = &(*pp)->next) + ; + *pp = p; + next_fid = p->next_filename_id(); + } +} + +int search_list::nfiles() const +{ + int n = 0; + for (search_item *ptr = list; ptr; ptr = ptr->next) + n++; + return n; +} + +search_list_iterator::search_list_iterator(search_list *p, const char *q) +: list(p), ptr(p->list), iter(0), query(strsave(q)), + searcher(q, strlen(q), linear_ignore_fields, linear_truncate_len) +{ + list->niterators += 1; +} + +search_list_iterator::~search_list_iterator() +{ + list->niterators -= 1; + a_delete query; + delete iter; +} + +int search_list_iterator::next(const char **pp, int *lenp, reference_id *ridp) +{ + while (ptr) { + if (iter == 0) + iter = ptr->make_search_item_iterator(query); + if (iter->next(searcher, pp, lenp, ridp)) + return 1; + delete iter; + iter = 0; + ptr = ptr->next; + } + return 0; +} + +search_item::search_item(const char *nm, int fid) +: next(0), name(strsave(nm)), filename_id(fid) +{ +} + +search_item::~search_item() +{ + a_delete name; +} + +int search_item::is_named(const char *nm) const +{ + return strcmp(name, nm) == 0; +} + +int search_item::next_filename_id() const +{ + return filename_id + 1; +} + +search_item_iterator::~search_item_iterator() +{ +} |