summaryrefslogtreecommitdiff
path: root/lib/libc/db
diff options
context:
space:
mode:
authorTheo de Raadt <deraadt@cvs.openbsd.org>1995-10-18 08:53:40 +0000
committerTheo de Raadt <deraadt@cvs.openbsd.org>1995-10-18 08:53:40 +0000
commitd6583bb2a13f329cf0332ef2570eb8bb8fc0e39c (patch)
treeece253b876159b39c620e62b6c9b1174642e070e /lib/libc/db
initial import of NetBSD tree
Diffstat (limited to 'lib/libc/db')
-rw-r--r--lib/libc/db/Makefile.inc11
-rw-r--r--lib/libc/db/README2
-rw-r--r--lib/libc/db/VERSION113
-rw-r--r--lib/libc/db/btree/Makefile.inc8
-rw-r--r--lib/libc/db/btree/bt_close.c210
-rw-r--r--lib/libc/db/btree/bt_conv.c227
-rw-r--r--lib/libc/db/btree/bt_debug.c337
-rw-r--r--lib/libc/db/btree/bt_delete.c330
-rw-r--r--lib/libc/db/btree/bt_get.c244
-rw-r--r--lib/libc/db/btree/bt_open.c446
-rw-r--r--lib/libc/db/btree/bt_overflow.c234
-rw-r--r--lib/libc/db/btree/bt_page.c99
-rw-r--r--lib/libc/db/btree/bt_put.c324
-rw-r--r--lib/libc/db/btree/bt_search.c241
-rw-r--r--lib/libc/db/btree/bt_seq.c386
-rw-r--r--lib/libc/db/btree/bt_split.c833
-rw-r--r--lib/libc/db/btree/bt_stack.c102
-rw-r--r--lib/libc/db/btree/bt_utils.c256
-rw-r--r--lib/libc/db/btree/btree.h355
-rw-r--r--lib/libc/db/btree/extern.h72
-rw-r--r--lib/libc/db/db/Makefile.inc6
-rw-r--r--lib/libc/db/db/db.c105
-rw-r--r--lib/libc/db/hash/Makefile.inc7
-rw-r--r--lib/libc/db/hash/README73
-rw-r--r--lib/libc/db/hash/extern.h67
-rw-r--r--lib/libc/db/hash/hash.c1000
-rw-r--r--lib/libc/db/hash/hash.h295
-rw-r--r--lib/libc/db/hash/hash_bigkey.c673
-rw-r--r--lib/libc/db/hash/hash_buf.c355
-rw-r--r--lib/libc/db/hash/hash_func.c218
-rw-r--r--lib/libc/db/hash/hash_log2.c60
-rw-r--r--lib/libc/db/hash/hash_page.c950
-rw-r--r--lib/libc/db/hash/hsearch.c113
-rw-r--r--lib/libc/db/hash/ndbm.c208
-rw-r--r--lib/libc/db/hash/page.h94
-rw-r--r--lib/libc/db/hash/search.h53
-rw-r--r--lib/libc/db/man/Makefile.inc8
-rw-r--r--lib/libc/db/man/btree.3228
-rw-r--r--lib/libc/db/man/dbopen.3478
-rw-r--r--lib/libc/db/man/hash.3154
-rw-r--r--lib/libc/db/man/mpool.3221
-rw-r--r--lib/libc/db/man/recno.3198
-rw-r--r--lib/libc/db/mpool/Makefile.inc6
-rw-r--r--lib/libc/db/mpool/README8
-rw-r--r--lib/libc/db/mpool/mpool.c540
-rw-r--r--lib/libc/db/recno/Makefile.inc7
-rw-r--r--lib/libc/db/recno/extern.h56
-rw-r--r--lib/libc/db/recno/rec_close.c169
-rw-r--r--lib/libc/db/recno/rec_delete.c203
-rw-r--r--lib/libc/db/recno/rec_get.c308
-rw-r--r--lib/libc/db/recno/rec_open.c236
-rw-r--r--lib/libc/db/recno/rec_put.c259
-rw-r--r--lib/libc/db/recno/rec_search.c133
-rw-r--r--lib/libc/db/recno/rec_seq.c137
-rw-r--r--lib/libc/db/recno/rec_utils.c125
-rw-r--r--lib/libc/db/recno/recno.h41
56 files changed, 12622 insertions, 0 deletions
diff --git a/lib/libc/db/Makefile.inc b/lib/libc/db/Makefile.inc
new file mode 100644
index 00000000000..6cdb4cf3013
--- /dev/null
+++ b/lib/libc/db/Makefile.inc
@@ -0,0 +1,11 @@
+# $NetBSD: Makefile.inc,v 1.5 1995/02/27 13:19:23 cgd Exp $
+# @(#)Makefile.inc 8.2 (Berkeley) 2/21/94
+#
+CFLAGS+=-D__DBINTERFACE_PRIVATE
+
+.include "${.CURDIR}/db/btree/Makefile.inc"
+.include "${.CURDIR}/db/db/Makefile.inc"
+.include "${.CURDIR}/db/hash/Makefile.inc"
+.include "${.CURDIR}/db/man/Makefile.inc"
+.include "${.CURDIR}/db/mpool/Makefile.inc"
+.include "${.CURDIR}/db/recno/Makefile.inc"
diff --git a/lib/libc/db/README b/lib/libc/db/README
new file mode 100644
index 00000000000..df1be282063
--- /dev/null
+++ b/lib/libc/db/README
@@ -0,0 +1,2 @@
+$NetBSD: README,v 1.2 1995/02/27 13:19:30 cgd Exp $
+(was symlink to VERSION. -- cgd)
diff --git a/lib/libc/db/VERSION b/lib/libc/db/VERSION
new file mode 100644
index 00000000000..440b71bad09
--- /dev/null
+++ b/lib/libc/db/VERSION
@@ -0,0 +1,113 @@
+# $NetBSD: VERSION,v 1.7 1995/02/27 13:19:33 cgd Exp $
+# @(#)VERSION 8.17 (Berkeley) 6/20/94
+
+This is version 1.79 of the Berkeley DB code.
+
+For information on compiling and installing this software, see the file
+PORT/README.
+
+If your version of the DB code doesn't have a copy of this version file,
+it's really old, please update it!
+
+Newer versions of this software will periodically be made available by
+anonymous ftp from ftp.cs.berkeley.edu. An archive in compressed format
+is in ucb/4bsd/db.tar.Z, or in gzip format in ucb/4bsd/db.tar.gz. If
+you'd like to receive announcements of future releases of this software,
+send email to the contact address below.
+
+Email questions may be addressed to Keith Bostic at bostic@cs.berkeley.edu.
+
+============================================
+Distribution contents:
+
+Makefile.inc Ignore this, it's the 4.4BSD subsystem Makefile.
+PORT The per OS/architecture directories to use to build
+ libdb.a, if you're not running 4.4BSD. See the file
+ PORT/README for more information.
+README This file.
+VERSION This file.
+btree B+tree routines.
+db Dbopen(3) interface routine.
+doc USENIX papers, formatted manual pages.
+hash Extended linear hashing routines.
+man Unformatted manual pages.
+mpool Memory pool routines.
+recno Fixed/variable length record routines.
+test Test package.
+
+============================================
+1.78 -> 1.79 Mon Jun 20 17:36:47 EDT 1994
+ all: Minor cleanups of 1.78 for porting reasons; only
+ major change was inlining check of NULL pointer
+ so that __fix_realloc goes away.
+
+1.77 -> 1.78 Thu Jun 16 19:06:43 EDT 1994
+ all: Move "standard" size typedef's into db.h.
+
+1.76 -> 1.77 Thu Jun 16 16:48:38 EDT 1994
+ hash: Delete __init_ routine, has special meaning to OSF 2.0.
+
+1.74 -> 1.76
+ all: Finish up the port to the Alpha.
+
+1.73 -> 1.74
+ recno: Don't put the record if rec_search fails, in rec_rdelete.
+ Create fixed-length intermediate records past "end" of DB
+ correctly.
+ Realloc bug when reading in fixed records.
+ all: First cut at port to Alpha (64-bit architecture) using
+ 4.4BSD basic integral types typedef's.
+ Cast allocation pointers to shut up old compilers.
+ Rework PORT directory into OS/machine directories.
+
+1.72 -> 1.73
+ btree: If enough duplicate records were inserted and then deleted
+ that internal pages had references to empty pages of the
+ duplicate keys, the search function ended up on the wrong
+ page.
+
+1.7 -> 1.72 12 Oct 1993
+ hash: Support NET/2 hash formats.
+
+1.7 -> 1.71 16 Sep 1993
+ btree/recno:
+ Fix bug in internal search routines that caused
+ return of invalid pointers.
+
+1.6 -> 1.7 07 Sep 1993
+ hash: Fixed big key overflow bugs.
+ test: Portability hacks, rewrite test script, Makefile.
+ btree/recno:
+ Stop copying non-overflow key/data pairs.
+ PORT: Break PORT directory up into per architecture/OS
+ subdirectories.
+
+1.5 -> 1.6 06 Jun 1993
+ hash: In PAIRFITS, the first comparison should look at (P)[2].
+ The hash_realloc function was walking off the end of memory.
+ The overflow page number was wrong when bumping splitpoint.
+
+1.4 -> 1.5 23 May 1993
+ hash: Set hash default fill factor dynamically.
+ recno: Fixed bug in sorted page splits.
+ Add page size parameter support.
+ Allow recno to specify the name of the underlying btree;
+ used for vi recovery.
+ btree/recno:
+ Support 64K pages.
+ btree/hash/recno:
+ Provide access to an underlying file descriptor.
+ Change sync routines to take a flag argument, recno
+ uses this to sync out the underlying btree.
+
+1.3 -> 1.4 10 May 1993
+ recno: Delete the R_CURSORLOG flag from the recno interface.
+ Zero-length record fix for non-mmap reads.
+ Try and make SIZE_T_MAX test in open portable.
+
+1.2 -> 1.3 01 May 1993
+ btree: Ignore user byte-order setting when reading already
+ existing database. Fixes to byte-order conversions.
+
+1.1 -> 1.2 15 Apr 1993
+ No bug fixes, only compatibility hacks.
diff --git a/lib/libc/db/btree/Makefile.inc b/lib/libc/db/btree/Makefile.inc
new file mode 100644
index 00000000000..d187103e2d3
--- /dev/null
+++ b/lib/libc/db/btree/Makefile.inc
@@ -0,0 +1,8 @@
+# $NetBSD: Makefile.inc,v 1.5 1995/02/27 13:19:53 cgd Exp $
+# @(#)Makefile.inc 8.1 (Berkeley) 6/4/93
+
+.PATH: ${.CURDIR}/db/btree
+
+SRCS+= bt_close.c bt_conv.c bt_debug.c bt_delete.c bt_get.c bt_open.c \
+ bt_overflow.c bt_page.c bt_put.c bt_search.c bt_seq.c bt_split.c \
+ bt_stack.c bt_utils.c
diff --git a/lib/libc/db/btree/bt_close.c b/lib/libc/db/btree/bt_close.c
new file mode 100644
index 00000000000..2b27327f7ed
--- /dev/null
+++ b/lib/libc/db/btree/bt_close.c
@@ -0,0 +1,210 @@
+/* $NetBSD: bt_close.c,v 1.6 1995/02/27 13:19:59 cgd Exp $ */
+
+/*-
+ * Copyright (c) 1990, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Mike Olson.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+#if 0
+static char sccsid[] = "@(#)bt_close.c 8.3 (Berkeley) 2/21/94";
+#else
+static char rcsid[] = "$NetBSD: bt_close.c,v 1.6 1995/02/27 13:19:59 cgd Exp $";
+#endif
+#endif /* LIBC_SCCS and not lint */
+
+#include <sys/param.h>
+
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <db.h>
+#include "btree.h"
+
+static int bt_meta __P((BTREE *));
+
+/*
+ * BT_CLOSE -- Close a btree.
+ *
+ * Parameters:
+ * dbp: pointer to access method
+ *
+ * Returns:
+ * RET_ERROR, RET_SUCCESS
+ */
+int
+__bt_close(dbp)
+ DB *dbp;
+{
+ BTREE *t;
+ int fd;
+
+ t = dbp->internal;
+
+ /* Toss any page pinned across calls. */
+ if (t->bt_pinned != NULL) {
+ mpool_put(t->bt_mp, t->bt_pinned, 0);
+ t->bt_pinned = NULL;
+ }
+
+ /*
+ * Delete any already deleted record that we've been saving
+ * because the cursor pointed to it.
+ */
+ if (ISSET(t, B_DELCRSR) && __bt_crsrdel(t, &t->bt_bcursor))
+ return (RET_ERROR);
+
+ if (__bt_sync(dbp, 0) == RET_ERROR)
+ return (RET_ERROR);
+
+ if (mpool_close(t->bt_mp) == RET_ERROR)
+ return (RET_ERROR);
+
+ if (t->bt_stack)
+ free(t->bt_stack);
+ if (t->bt_kbuf)
+ free(t->bt_kbuf);
+ if (t->bt_dbuf)
+ free(t->bt_dbuf);
+
+ fd = t->bt_fd;
+ free(t);
+ free(dbp);
+ return (close(fd) ? RET_ERROR : RET_SUCCESS);
+}
+
+/*
+ * BT_SYNC -- sync the btree to disk.
+ *
+ * Parameters:
+ * dbp: pointer to access method
+ *
+ * Returns:
+ * RET_SUCCESS, RET_ERROR.
+ */
+int
+__bt_sync(dbp, flags)
+ const DB *dbp;
+ u_int flags;
+{
+ BTREE *t;
+ int status;
+ PAGE *h;
+ void *p;
+
+ t = dbp->internal;
+
+ /* Toss any page pinned across calls. */
+ if (t->bt_pinned != NULL) {
+ mpool_put(t->bt_mp, t->bt_pinned, 0);
+ t->bt_pinned = NULL;
+ }
+
+ /* Sync doesn't currently take any flags. */
+ if (flags != 0) {
+ errno = EINVAL;
+ return (RET_ERROR);
+ }
+
+ if (ISSET(t, B_INMEM | B_RDONLY) || !ISSET(t, B_MODIFIED))
+ return (RET_SUCCESS);
+
+ if (ISSET(t, B_METADIRTY) && bt_meta(t) == RET_ERROR)
+ return (RET_ERROR);
+
+ /*
+ * Nastiness. If the cursor has been marked for deletion, but not
+ * actually deleted, we have to make a copy of the page, delete the
+ * key/data item, sync the file, and then restore the original page
+ * contents.
+ */
+ if (ISSET(t, B_DELCRSR)) {
+ if ((p = (void *)malloc(t->bt_psize)) == NULL)
+ return (RET_ERROR);
+ if ((h = mpool_get(t->bt_mp, t->bt_bcursor.pgno, 0)) == NULL)
+ return (RET_ERROR);
+ memmove(p, h, t->bt_psize);
+ if ((status =
+ __bt_dleaf(t, h, t->bt_bcursor.index)) == RET_ERROR)
+ goto ecrsr;
+ mpool_put(t->bt_mp, h, MPOOL_DIRTY);
+ }
+
+ if ((status = mpool_sync(t->bt_mp)) == RET_SUCCESS)
+ CLR(t, B_MODIFIED);
+
+ecrsr: if (ISSET(t, B_DELCRSR)) {
+ if ((h = mpool_get(t->bt_mp, t->bt_bcursor.pgno, 0)) == NULL)
+ return (RET_ERROR);
+ memmove(h, p, t->bt_psize);
+ free(p);
+ mpool_put(t->bt_mp, h, MPOOL_DIRTY);
+ }
+ return (status);
+}
+
+/*
+ * BT_META -- write the tree meta data to disk.
+ *
+ * Parameters:
+ * t: tree
+ *
+ * Returns:
+ * RET_ERROR, RET_SUCCESS
+ */
+static int
+bt_meta(t)
+ BTREE *t;
+{
+ BTMETA m;
+ void *p;
+
+ if ((p = mpool_get(t->bt_mp, P_META, 0)) == NULL)
+ return (RET_ERROR);
+
+ /* Fill in metadata. */
+ m.m_magic = BTREEMAGIC;
+ m.m_version = BTREEVERSION;
+ m.m_psize = t->bt_psize;
+ m.m_free = t->bt_free;
+ m.m_nrecs = t->bt_nrecs;
+ m.m_flags = t->bt_flags & SAVEMETA;
+
+ memmove(p, &m, sizeof(BTMETA));
+ mpool_put(t->bt_mp, p, MPOOL_DIRTY);
+ return (RET_SUCCESS);
+}
diff --git a/lib/libc/db/btree/bt_conv.c b/lib/libc/db/btree/bt_conv.c
new file mode 100644
index 00000000000..1d513865455
--- /dev/null
+++ b/lib/libc/db/btree/bt_conv.c
@@ -0,0 +1,227 @@
+/* $NetBSD: bt_conv.c,v 1.5 1995/02/27 13:20:04 cgd Exp $ */
+
+/*-
+ * Copyright (c) 1990, 1993, 1994
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Mike Olson.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+#if 0
+static char sccsid[] = "@(#)bt_conv.c 8.3 (Berkeley) 5/31/94";
+#else
+static char rcsid[] = "$NetBSD: bt_conv.c,v 1.5 1995/02/27 13:20:04 cgd Exp $";
+#endif
+#endif /* LIBC_SCCS and not lint */
+
+#include <sys/param.h>
+
+#include <stdio.h>
+
+#include <db.h>
+#include "btree.h"
+
+static void mswap __P((PAGE *));
+
+/*
+ * __BT_BPGIN, __BT_BPGOUT --
+ * Convert host-specific number layout to/from the host-independent
+ * format stored on disk.
+ *
+ * Parameters:
+ * t: tree
+ * pg: page number
+ * h: page to convert
+ */
+void
+__bt_pgin(t, pg, pp)
+ void *t;
+ pgno_t pg;
+ void *pp;
+{
+ PAGE *h;
+ indx_t i, top;
+ u_char flags;
+ char *p;
+
+ if (!ISSET(((BTREE *)t), B_NEEDSWAP))
+ return;
+ if (pg == P_META) {
+ mswap(pp);
+ return;
+ }
+
+ h = pp;
+ M_32_SWAP(h->pgno);
+ M_32_SWAP(h->prevpg);
+ M_32_SWAP(h->nextpg);
+ M_32_SWAP(h->flags);
+ M_16_SWAP(h->lower);
+ M_16_SWAP(h->upper);
+
+ top = NEXTINDEX(h);
+ if ((h->flags & P_TYPE) == P_BINTERNAL)
+ for (i = 0; i < top; i++) {
+ M_16_SWAP(h->linp[i]);
+ p = (char *)GETBINTERNAL(h, i);
+ P_32_SWAP(p);
+ p += sizeof(u_int32_t);
+ P_32_SWAP(p);
+ p += sizeof(pgno_t);
+ if (*(u_char *)p & P_BIGKEY) {
+ p += sizeof(u_char);
+ P_32_SWAP(p);
+ p += sizeof(pgno_t);
+ P_32_SWAP(p);
+ }
+ }
+ else if ((h->flags & P_TYPE) == P_BLEAF)
+ for (i = 0; i < top; i++) {
+ M_16_SWAP(h->linp[i]);
+ p = (char *)GETBLEAF(h, i);
+ P_32_SWAP(p);
+ p += sizeof(u_int32_t);
+ P_32_SWAP(p);
+ p += sizeof(u_int32_t);
+ flags = *(u_char *)p;
+ if (flags & (P_BIGKEY | P_BIGDATA)) {
+ p += sizeof(u_char);
+ if (flags & P_BIGKEY) {
+ P_32_SWAP(p);
+ p += sizeof(pgno_t);
+ P_32_SWAP(p);
+ }
+ if (flags & P_BIGDATA) {
+ p += sizeof(u_int32_t);
+ P_32_SWAP(p);
+ p += sizeof(pgno_t);
+ P_32_SWAP(p);
+ }
+ }
+ }
+}
+
+void
+__bt_pgout(t, pg, pp)
+ void *t;
+ pgno_t pg;
+ void *pp;
+{
+ PAGE *h;
+ indx_t i, top;
+ u_char flags;
+ char *p;
+
+ if (!ISSET(((BTREE *)t), B_NEEDSWAP))
+ return;
+ if (pg == P_META) {
+ mswap(pp);
+ return;
+ }
+
+ h = pp;
+ top = NEXTINDEX(h);
+ if ((h->flags & P_TYPE) == P_BINTERNAL)
+ for (i = 0; i < top; i++) {
+ p = (char *)GETBINTERNAL(h, i);
+ P_32_SWAP(p);
+ p += sizeof(u_int32_t);
+ P_32_SWAP(p);
+ p += sizeof(pgno_t);
+ if (*(u_char *)p & P_BIGKEY) {
+ p += sizeof(u_char);
+ P_32_SWAP(p);
+ p += sizeof(pgno_t);
+ P_32_SWAP(p);
+ }
+ M_16_SWAP(h->linp[i]);
+ }
+ else if ((h->flags & P_TYPE) == P_BLEAF)
+ for (i = 0; i < top; i++) {
+ p = (char *)GETBLEAF(h, i);
+ P_32_SWAP(p);
+ p += sizeof(u_int32_t);
+ P_32_SWAP(p);
+ p += sizeof(u_int32_t);
+ flags = *(u_char *)p;
+ if (flags & (P_BIGKEY | P_BIGDATA)) {
+ p += sizeof(u_char);
+ if (flags & P_BIGKEY) {
+ P_32_SWAP(p);
+ p += sizeof(pgno_t);
+ P_32_SWAP(p);
+ }
+ if (flags & P_BIGDATA) {
+ p += sizeof(u_int32_t);
+ P_32_SWAP(p);
+ p += sizeof(pgno_t);
+ P_32_SWAP(p);
+ }
+ }
+ M_16_SWAP(h->linp[i]);
+ }
+
+ M_32_SWAP(h->pgno);
+ M_32_SWAP(h->prevpg);
+ M_32_SWAP(h->nextpg);
+ M_32_SWAP(h->flags);
+ M_16_SWAP(h->lower);
+ M_16_SWAP(h->upper);
+}
+
+/*
+ * MSWAP -- Actually swap the bytes on the meta page.
+ *
+ * Parameters:
+ * p: page to convert
+ */
+static void
+mswap(pg)
+ PAGE *pg;
+{
+ char *p;
+
+ p = (char *)pg;
+ P_32_SWAP(p); /* m_magic */
+ p += sizeof(u_int32_t);
+ P_32_SWAP(p); /* m_version */
+ p += sizeof(u_int32_t);
+ P_32_SWAP(p); /* m_psize */
+ p += sizeof(u_int32_t);
+ P_32_SWAP(p); /* m_free */
+ p += sizeof(u_int32_t);
+ P_32_SWAP(p); /* m_nrecs */
+ p += sizeof(u_int32_t);
+ P_32_SWAP(p); /* m_flags */
+ p += sizeof(u_int32_t);
+}
diff --git a/lib/libc/db/btree/bt_debug.c b/lib/libc/db/btree/bt_debug.c
new file mode 100644
index 00000000000..fb967122aaa
--- /dev/null
+++ b/lib/libc/db/btree/bt_debug.c
@@ -0,0 +1,337 @@
+/* $NetBSD: bt_debug.c,v 1.5 1995/02/27 13:20:12 cgd Exp $ */
+
+/*-
+ * Copyright (c) 1990, 1993, 1994
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Mike Olson.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+#if 0
+static char sccsid[] = "@(#)bt_debug.c 8.3 (Berkeley) 5/31/94";
+#else
+static char rcsid[] = "$NetBSD: bt_debug.c,v 1.5 1995/02/27 13:20:12 cgd Exp $";
+#endif
+#endif /* LIBC_SCCS and not lint */
+
+#include <sys/param.h>
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <db.h>
+#include "btree.h"
+
+#ifdef DEBUG
+/*
+ * BT_DUMP -- Dump the tree
+ *
+ * Parameters:
+ * dbp: pointer to the DB
+ */
+void
+__bt_dump(dbp)
+ DB *dbp;
+{
+ BTREE *t;
+ PAGE *h;
+ pgno_t i;
+ char *sep;
+
+ t = dbp->internal;
+ (void)fprintf(stderr, "%s: pgsz %d",
+ ISSET(t, B_INMEM) ? "memory" : "disk", t->bt_psize);
+ if (ISSET(t, R_RECNO))
+ (void)fprintf(stderr, " keys %lu", t->bt_nrecs);
+#undef X
+#define X(flag, name) \
+ if (ISSET(t, flag)) { \
+ (void)fprintf(stderr, "%s%s", sep, name); \
+ sep = ", "; \
+ }
+ if (t->bt_flags) {
+ sep = " flags (";
+ X(B_DELCRSR, "DELCRSR");
+ X(R_FIXLEN, "FIXLEN");
+ X(B_INMEM, "INMEM");
+ X(B_NODUPS, "NODUPS");
+ X(B_RDONLY, "RDONLY");
+ X(R_RECNO, "RECNO");
+ X(B_SEQINIT, "SEQINIT");
+ X(B_METADIRTY,"METADIRTY");
+ (void)fprintf(stderr, ")\n");
+ }
+#undef X
+
+ for (i = P_ROOT; (h = mpool_get(t->bt_mp, i, 0)) != NULL; ++i) {
+ __bt_dpage(h);
+ (void)mpool_put(t->bt_mp, h, 0);
+ }
+}
+
+/*
+ * BT_DMPAGE -- Dump the meta page
+ *
+ * Parameters:
+ * h: pointer to the PAGE
+ */
+void
+__bt_dmpage(h)
+ PAGE *h;
+{
+ BTMETA *m;
+ char *sep;
+
+ m = (BTMETA *)h;
+ (void)fprintf(stderr, "magic %lx\n", m->m_magic);
+ (void)fprintf(stderr, "version %lu\n", m->m_version);
+ (void)fprintf(stderr, "psize %lu\n", m->m_psize);
+ (void)fprintf(stderr, "free %lu\n", m->m_free);
+ (void)fprintf(stderr, "nrecs %lu\n", m->m_nrecs);
+ (void)fprintf(stderr, "flags %lu", m->m_flags);
+#undef X
+#define X(flag, name) \
+ if (m->m_flags & flag) { \
+ (void)fprintf(stderr, "%s%s", sep, name); \
+ sep = ", "; \
+ }
+ if (m->m_flags) {
+ sep = " (";
+ X(B_NODUPS, "NODUPS");
+ X(R_RECNO, "RECNO");
+ (void)fprintf(stderr, ")");
+ }
+}
+
+/*
+ * BT_DNPAGE -- Dump the page
+ *
+ * Parameters:
+ * n: page number to dump.
+ */
+void
+__bt_dnpage(dbp, pgno)
+ DB *dbp;
+ pgno_t pgno;
+{
+ BTREE *t;
+ PAGE *h;
+
+ t = dbp->internal;
+ if ((h = mpool_get(t->bt_mp, pgno, 0)) != NULL) {
+ __bt_dpage(h);
+ (void)mpool_put(t->bt_mp, h, 0);
+ }
+}
+
+/*
+ * BT_DPAGE -- Dump the page
+ *
+ * Parameters:
+ * h: pointer to the PAGE
+ */
+void
+__bt_dpage(h)
+ PAGE *h;
+{
+ BINTERNAL *bi;
+ BLEAF *bl;
+ RINTERNAL *ri;
+ RLEAF *rl;
+ indx_t cur, top;
+ char *sep;
+
+ (void)fprintf(stderr, " page %d: (", h->pgno);
+#undef X
+#define X(flag, name) \
+ if (h->flags & flag) { \
+ (void)fprintf(stderr, "%s%s", sep, name); \
+ sep = ", "; \
+ }
+ sep = "";
+ X(P_BINTERNAL, "BINTERNAL") /* types */
+ X(P_BLEAF, "BLEAF")
+ X(P_RINTERNAL, "RINTERNAL") /* types */
+ X(P_RLEAF, "RLEAF")
+ X(P_OVERFLOW, "OVERFLOW")
+ X(P_PRESERVE, "PRESERVE");
+ (void)fprintf(stderr, ")\n");
+#undef X
+
+ (void)fprintf(stderr, "\tprev %2d next %2d", h->prevpg, h->nextpg);
+ if (h->flags & P_OVERFLOW)
+ return;
+
+ top = NEXTINDEX(h);
+ (void)fprintf(stderr, " lower %3d upper %3d nextind %d\n",
+ h->lower, h->upper, top);
+ for (cur = 0; cur < top; cur++) {
+ (void)fprintf(stderr, "\t[%03d] %4d ", cur, h->linp[cur]);
+ switch(h->flags & P_TYPE) {
+ case P_BINTERNAL:
+ bi = GETBINTERNAL(h, cur);
+ (void)fprintf(stderr,
+ "size %03d pgno %03d", bi->ksize, bi->pgno);
+ if (bi->flags & P_BIGKEY)
+ (void)fprintf(stderr, " (indirect)");
+ else if (bi->ksize)
+ (void)fprintf(stderr,
+ " {%.*s}", (int)bi->ksize, bi->bytes);
+ break;
+ case P_RINTERNAL:
+ ri = GETRINTERNAL(h, cur);
+ (void)fprintf(stderr, "entries %03d pgno %03d",
+ ri->nrecs, ri->pgno);
+ break;
+ case P_BLEAF:
+ bl = GETBLEAF(h, cur);
+ if (bl->flags & P_BIGKEY)
+ (void)fprintf(stderr,
+ "big key page %lu size %u/",
+ *(pgno_t *)bl->bytes,
+ *(u_int32_t *)(bl->bytes + sizeof(pgno_t)));
+ else if (bl->ksize)
+ (void)fprintf(stderr, "%s/", bl->bytes);
+ if (bl->flags & P_BIGDATA)
+ (void)fprintf(stderr,
+ "big data page %lu size %u",
+ *(pgno_t *)(bl->bytes + bl->ksize),
+ *(u_int32_t *)(bl->bytes + bl->ksize +
+ sizeof(pgno_t)));
+ else if (bl->dsize)
+ (void)fprintf(stderr, "%.*s",
+ (int)bl->dsize, bl->bytes + bl->ksize);
+ break;
+ case P_RLEAF:
+ rl = GETRLEAF(h, cur);
+ if (rl->flags & P_BIGDATA)
+ (void)fprintf(stderr,
+ "big data page %lu size %u",
+ *(pgno_t *)rl->bytes,
+ *(u_int32_t *)(rl->bytes + sizeof(pgno_t)));
+ else if (rl->dsize)
+ (void)fprintf(stderr,
+ "%.*s", (int)rl->dsize, rl->bytes);
+ break;
+ }
+ (void)fprintf(stderr, "\n");
+ }
+}
+#endif
+
+#ifdef STATISTICS
+/*
+ * BT_STAT -- Gather/print the tree statistics
+ *
+ * Parameters:
+ * dbp: pointer to the DB
+ */
+void
+__bt_stat(dbp)
+ DB *dbp;
+{
+ extern u_long bt_cache_hit, bt_cache_miss, bt_pfxsaved, bt_rootsplit;
+ extern u_long bt_sortsplit, bt_split;
+ BTREE *t;
+ PAGE *h;
+ pgno_t i, pcont, pinternal, pleaf;
+ u_long ifree, lfree, nkeys;
+ int levels;
+
+ t = dbp->internal;
+ pcont = pinternal = pleaf = 0;
+ nkeys = ifree = lfree = 0;
+ for (i = P_ROOT; (h = mpool_get(t->bt_mp, i, 0)) != NULL; ++i) {
+ switch(h->flags & P_TYPE) {
+ case P_BINTERNAL:
+ case P_RINTERNAL:
+ ++pinternal;
+ ifree += h->upper - h->lower;
+ break;
+ case P_BLEAF:
+ case P_RLEAF:
+ ++pleaf;
+ lfree += h->upper - h->lower;
+ nkeys += NEXTINDEX(h);
+ break;
+ case P_OVERFLOW:
+ ++pcont;
+ break;
+ }
+ (void)mpool_put(t->bt_mp, h, 0);
+ }
+
+ /* Count the levels of the tree. */
+ for (i = P_ROOT, levels = 0 ;; ++levels) {
+ h = mpool_get(t->bt_mp, i, 0);
+ if (h->flags & (P_BLEAF|P_RLEAF)) {
+ if (levels == 0)
+ levels = 1;
+ (void)mpool_put(t->bt_mp, h, 0);
+ break;
+ }
+ i = ISSET(t, R_RECNO) ?
+ GETRINTERNAL(h, 0)->pgno :
+ GETBINTERNAL(h, 0)->pgno;
+ (void)mpool_put(t->bt_mp, h, 0);
+ }
+
+ (void)fprintf(stderr, "%d level%s with %ld keys",
+ levels, levels == 1 ? "" : "s", nkeys);
+ if (ISSET(t, R_RECNO))
+ (void)fprintf(stderr, " (%ld header count)", t->bt_nrecs);
+ (void)fprintf(stderr,
+ "\n%lu pages (leaf %ld, internal %ld, overflow %ld)\n",
+ pinternal + pleaf + pcont, pleaf, pinternal, pcont);
+ (void)fprintf(stderr, "%ld cache hits, %ld cache misses\n",
+ bt_cache_hit, bt_cache_miss);
+ (void)fprintf(stderr, "%ld splits (%ld root splits, %ld sort splits)\n",
+ bt_split, bt_rootsplit, bt_sortsplit);
+ pleaf *= t->bt_psize - BTDATAOFF;
+ if (pleaf)
+ (void)fprintf(stderr,
+ "%.0f%% leaf fill (%ld bytes used, %ld bytes free)\n",
+ ((double)(pleaf - lfree) / pleaf) * 100,
+ pleaf - lfree, lfree);
+ pinternal *= t->bt_psize - BTDATAOFF;
+ if (pinternal)
+ (void)fprintf(stderr,
+ "%.0f%% internal fill (%ld bytes used, %ld bytes free\n",
+ ((double)(pinternal - ifree) / pinternal) * 100,
+ pinternal - ifree, ifree);
+ if (bt_pfxsaved)
+ (void)fprintf(stderr, "prefix checking removed %lu bytes.\n",
+ bt_pfxsaved);
+}
+#endif
diff --git a/lib/libc/db/btree/bt_delete.c b/lib/libc/db/btree/bt_delete.c
new file mode 100644
index 00000000000..e5ced2bc5c9
--- /dev/null
+++ b/lib/libc/db/btree/bt_delete.c
@@ -0,0 +1,330 @@
+/* $NetBSD: bt_delete.c,v 1.6 1995/02/27 13:20:16 cgd Exp $ */
+
+/*-
+ * Copyright (c) 1990, 1993, 1994
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Mike Olson.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+#if 0
+static char sccsid[] = "@(#)bt_delete.c 8.4 (Berkeley) 5/31/94";
+#else
+static char rcsid[] = "$NetBSD: bt_delete.c,v 1.6 1995/02/27 13:20:16 cgd Exp $";
+#endif
+#endif /* LIBC_SCCS and not lint */
+
+#include <sys/types.h>
+
+#include <errno.h>
+#include <stdio.h>
+#include <string.h>
+
+#include <db.h>
+#include "btree.h"
+
+static int bt_bdelete __P((BTREE *, const DBT *));
+
+/*
+ * __BT_DELETE -- Delete the item(s) referenced by a key.
+ *
+ * Parameters:
+ * dbp: pointer to access method
+ * key: key to delete
+ * flags: R_CURSOR if deleting what the cursor references
+ *
+ * Returns:
+ * RET_ERROR, RET_SUCCESS and RET_SPECIAL if the key not found.
+ */
+int
+__bt_delete(dbp, key, flags)
+ const DB *dbp;
+ const DBT *key;
+ u_int flags;
+{
+ BTREE *t;
+ int status;
+
+ t = dbp->internal;
+
+ /* Toss any page pinned across calls. */
+ if (t->bt_pinned != NULL) {
+ mpool_put(t->bt_mp, t->bt_pinned, 0);
+ t->bt_pinned = NULL;
+ }
+
+ if (ISSET(t, B_RDONLY)) {
+ errno = EPERM;
+ return (RET_ERROR);
+ }
+
+ switch(flags) {
+ case 0:
+ status = bt_bdelete(t, key);
+ break;
+ case R_CURSOR:
+ /*
+ * If flags is R_CURSOR, delete the cursor; must already have
+ * started a scan and not have already deleted the record. For
+ * the delete cursor bit to have been set requires that the
+ * scan be initialized, so no reason to check.
+ */
+ if (!ISSET(t, B_SEQINIT))
+ goto einval;
+ status = ISSET(t, B_DELCRSR) ?
+ RET_SPECIAL : __bt_crsrdel(t, &t->bt_bcursor);
+ break;
+ default:
+einval: errno = EINVAL;
+ return (RET_ERROR);
+ }
+ if (status == RET_SUCCESS)
+ SET(t, B_MODIFIED);
+ return (status);
+}
+
+/*
+ * BT_BDELETE -- Delete all key/data pairs matching the specified key.
+ *
+ * Parameters:
+ * tree: tree
+ * key: key to delete
+ *
+ * Returns:
+ * RET_ERROR, RET_SUCCESS and RET_SPECIAL if the key not found.
+ */
+static int
+bt_bdelete(t, key)
+ BTREE *t;
+ const DBT *key;
+{
+ EPG *e, save;
+ PAGE *h;
+ pgno_t cpgno, pg;
+ indx_t cindex;
+ int deleted, dirty1, dirty2, exact;
+
+ /* Find any matching record; __bt_search pins the page. */
+ if ((e = __bt_search(t, key, &exact)) == NULL)
+ return (RET_ERROR);
+ if (!exact) {
+ mpool_put(t->bt_mp, e->page, 0);
+ return (RET_SPECIAL);
+ }
+
+ /*
+ * Delete forward, then delete backward, from the found key. The
+ * ordering is so that the deletions don't mess up the page refs.
+ * The first loop deletes the key from the original page, the second
+ * unpins the original page. In the first loop, dirty1 is set if
+ * the original page is modified, and dirty2 is set if any subsequent
+ * pages are modified. In the second loop, dirty1 starts off set if
+ * the original page has been modified, and is set if any subsequent
+ * pages are modified.
+ *
+ * If find the key referenced by the cursor, don't delete it, just
+ * flag it for future deletion. The cursor page number is P_INVALID
+ * unless the sequential scan is initialized, so no reason to check.
+ * A special case is when the already deleted cursor record was the
+ * only record found. If so, then the delete opertion fails as no
+ * records were deleted.
+ *
+ * Cycle in place in the current page until the current record doesn't
+ * match the key or the page is empty. If the latter, walk forward,
+ * skipping empty pages and repeating until a record doesn't match
+ * the key or the end of the tree is reached.
+ */
+ cpgno = t->bt_bcursor.pgno;
+ cindex = t->bt_bcursor.index;
+ save = *e;
+ dirty1 = 0;
+ for (h = e->page, deleted = 0;;) {
+ dirty2 = 0;
+ do {
+ if (h->pgno == cpgno && e->index == cindex) {
+ if (!ISSET(t, B_DELCRSR)) {
+ SET(t, B_DELCRSR);
+ deleted = 1;
+ }
+ ++e->index;
+ } else {
+ if (__bt_dleaf(t, h, e->index)) {
+ if (h->pgno != save.page->pgno)
+ mpool_put(t->bt_mp, h, dirty2);
+ mpool_put(t->bt_mp, save.page, dirty1);
+ return (RET_ERROR);
+ }
+ if (h->pgno == save.page->pgno)
+ dirty1 = MPOOL_DIRTY;
+ else
+ dirty2 = MPOOL_DIRTY;
+ deleted = 1;
+ }
+ } while (e->index < NEXTINDEX(h) && __bt_cmp(t, key, e) == 0);
+
+ /*
+ * Quit if didn't find a match, no next page, or first key on
+ * the next page doesn't match. Don't unpin the original page
+ * unless an error occurs.
+ */
+ if (e->index < NEXTINDEX(h))
+ break;
+ for (;;) {
+ if ((pg = h->nextpg) == P_INVALID)
+ goto done1;
+ if (h->pgno != save.page->pgno)
+ mpool_put(t->bt_mp, h, dirty2);
+ if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) {
+ mpool_put(t->bt_mp, save.page, dirty1);
+ return (RET_ERROR);
+ }
+ if (NEXTINDEX(h) != 0) {
+ e->page = h;
+ e->index = 0;
+ break;
+ }
+ }
+
+ if (__bt_cmp(t, key, e) != 0)
+ break;
+ }
+
+ /*
+ * Reach here with the original page and the last page referenced
+ * pinned (they may be the same). Release it if not the original.
+ */
+done1: if (h->pgno != save.page->pgno)
+ mpool_put(t->bt_mp, h, dirty2);
+
+ /*
+ * Walk backwards from the record previous to the record returned by
+ * __bt_search, skipping empty pages, until a record doesn't match
+ * the key or reach the beginning of the tree.
+ */
+ *e = save;
+ for (;;) {
+ if (e->index)
+ --e->index;
+ for (h = e->page; e->index; --e->index) {
+ if (__bt_cmp(t, key, e) != 0)
+ goto done2;
+ if (h->pgno == cpgno && e->index == cindex) {
+ if (!ISSET(t, B_DELCRSR)) {
+ SET(t, B_DELCRSR);
+ deleted = 1;
+ }
+ } else {
+ if (__bt_dleaf(t, h, e->index) == RET_ERROR) {
+ mpool_put(t->bt_mp, h, dirty1);
+ return (RET_ERROR);
+ }
+ if (h->pgno == save.page->pgno)
+ dirty1 = MPOOL_DIRTY;
+ deleted = 1;
+ }
+ }
+
+ if ((pg = h->prevpg) == P_INVALID)
+ goto done2;
+ mpool_put(t->bt_mp, h, dirty1);
+ dirty1 = 0;
+ if ((e->page = mpool_get(t->bt_mp, pg, 0)) == NULL)
+ return (RET_ERROR);
+ e->index = NEXTINDEX(e->page);
+ }
+
+ /*
+ * Reach here with the last page that was looked at pinned. Release
+ * it.
+ */
+done2: mpool_put(t->bt_mp, h, dirty1);
+ return (deleted ? RET_SUCCESS : RET_SPECIAL);
+}
+
+/*
+ * __BT_DLEAF -- Delete a single record from a leaf page.
+ *
+ * Parameters:
+ * t: tree
+ * index: index on current page to delete
+ *
+ * Returns:
+ * RET_SUCCESS, RET_ERROR.
+ */
+int
+__bt_dleaf(t, h, index)
+ BTREE *t;
+ PAGE *h;
+ indx_t index;
+{
+ register BLEAF *bl;
+ register indx_t cnt, *ip, offset;
+ register u_int32_t nbytes;
+ char *from;
+ void *to;
+
+ /*
+ * Delete a record from a btree leaf page. Internal records are never
+ * deleted from internal pages, regardless of the records that caused
+ * them to be added being deleted. Pages made empty by deletion are
+ * not reclaimed. They are, however, made available for reuse.
+ *
+ * Pack the remaining entries at the end of the page, shift the indices
+ * down, overwriting the deleted record and its index. If the record
+ * uses overflow pages, make them available for reuse.
+ */
+ to = bl = GETBLEAF(h, index);
+ if (bl->flags & P_BIGKEY && __ovfl_delete(t, bl->bytes) == RET_ERROR)
+ return (RET_ERROR);
+ if (bl->flags & P_BIGDATA &&
+ __ovfl_delete(t, bl->bytes + bl->ksize) == RET_ERROR)
+ return (RET_ERROR);
+ nbytes = NBLEAF(bl);
+
+ /*
+ * Compress the key/data pairs. Compress and adjust the [BR]LEAF
+ * offsets. Reset the headers.
+ */
+ from = (char *)h + h->upper;
+ memmove(from + nbytes, from, (char *)to - from);
+ h->upper += nbytes;
+
+ offset = h->linp[index];
+ for (cnt = index, ip = &h->linp[0]; cnt--; ++ip)
+ if (ip[0] < offset)
+ ip[0] += nbytes;
+ for (cnt = NEXTINDEX(h) - index; --cnt; ++ip)
+ ip[0] = ip[1] < offset ? ip[1] + nbytes : ip[1];
+ h->lower -= sizeof(indx_t);
+ return (RET_SUCCESS);
+}
diff --git a/lib/libc/db/btree/bt_get.c b/lib/libc/db/btree/bt_get.c
new file mode 100644
index 00000000000..2adc73feb7a
--- /dev/null
+++ b/lib/libc/db/btree/bt_get.c
@@ -0,0 +1,244 @@
+/* $NetBSD: bt_get.c,v 1.6 1995/02/27 13:20:22 cgd Exp $ */
+
+/*-
+ * Copyright (c) 1990, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Mike Olson.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+#if 0
+static char sccsid[] = "@(#)bt_get.c 8.2 (Berkeley) 9/7/93";
+#else
+static char rcsid[] = "$NetBSD: bt_get.c,v 1.6 1995/02/27 13:20:22 cgd Exp $";
+#endif
+#endif /* LIBC_SCCS and not lint */
+
+#include <sys/types.h>
+
+#include <errno.h>
+#include <stddef.h>
+#include <stdio.h>
+
+#include <db.h>
+#include "btree.h"
+
+/*
+ * __BT_GET -- Get a record from the btree.
+ *
+ * Parameters:
+ * dbp: pointer to access method
+ * key: key to find
+ * data: data to return
+ * flag: currently unused
+ *
+ * Returns:
+ * RET_ERROR, RET_SUCCESS and RET_SPECIAL if the key not found.
+ */
+int
+__bt_get(dbp, key, data, flags)
+ const DB *dbp;
+ const DBT *key;
+ DBT *data;
+ u_int flags;
+{
+ BTREE *t;
+ EPG *e;
+ int exact, status;
+
+ t = dbp->internal;
+
+ /* Toss any page pinned across calls. */
+ if (t->bt_pinned != NULL) {
+ mpool_put(t->bt_mp, t->bt_pinned, 0);
+ t->bt_pinned = NULL;
+ }
+
+ /* Get currently doesn't take any flags. */
+ if (flags) {
+ errno = EINVAL;
+ return (RET_ERROR);
+ }
+
+ if ((e = __bt_search(t, key, &exact)) == NULL)
+ return (RET_ERROR);
+ if (!exact) {
+ mpool_put(t->bt_mp, e->page, 0);
+ return (RET_SPECIAL);
+ }
+
+ /*
+ * A special case is if we found the record but it's flagged for
+ * deletion. In this case, we want to find another record with the
+ * same key, if it exists. Rather than look around the tree we call
+ * __bt_first and have it redo the search, as __bt_first will not
+ * return keys marked for deletion. Slow, but should never happen.
+ */
+ if (ISSET(t, B_DELCRSR) && e->page->pgno == t->bt_bcursor.pgno &&
+ e->index == t->bt_bcursor.index) {
+ mpool_put(t->bt_mp, e->page, 0);
+ if ((e = __bt_first(t, key, &exact)) == NULL)
+ return (RET_ERROR);
+ if (!exact)
+ return (RET_SPECIAL);
+ }
+
+ status = __bt_ret(t, e, NULL, data);
+ /*
+ * If the user is doing concurrent access, we copied the
+ * key/data, toss the page.
+ */
+ if (ISSET(t, B_DB_LOCK))
+ mpool_put(t->bt_mp, e->page, 0);
+ else
+ t->bt_pinned = e->page;
+ return (status);
+}
+
+/*
+ * __BT_FIRST -- Find the first entry.
+ *
+ * Parameters:
+ * t: the tree
+ * key: the key
+ *
+ * Returns:
+ * The first entry in the tree greater than or equal to key.
+ */
+EPG *
+__bt_first(t, key, exactp)
+ BTREE *t;
+ const DBT *key;
+ int *exactp;
+{
+ register PAGE *h;
+ register EPG *e;
+ EPG save;
+ pgno_t cpgno, pg;
+ indx_t cindex;
+ int found;
+
+ /*
+ * Find any matching record; __bt_search pins the page. Only exact
+ * matches are tricky, otherwise just return the location of the key
+ * if it were to be inserted into the tree.
+ */
+ if ((e = __bt_search(t, key, exactp)) == NULL)
+ return (NULL);
+ if (!*exactp)
+ return (e);
+
+ if (ISSET(t, B_DELCRSR)) {
+ cpgno = t->bt_bcursor.pgno;
+ cindex = t->bt_bcursor.index;
+ } else {
+ cpgno = P_INVALID;
+ cindex = 0; /* GCC thinks it's uninitialized. */
+ }
+
+ /*
+ * Walk backwards, skipping empty pages, as long as the entry matches
+ * and there are keys left in the tree. Save a copy of each match in
+ * case we go too far. A special case is that we don't return a match
+ * on records that the cursor references that have already been flagged
+ * for deletion.
+ */
+ save = *e;
+ h = e->page;
+ found = 0;
+ do {
+ if (cpgno != h->pgno || cindex != e->index) {
+ if (save.page->pgno != e->page->pgno) {
+ mpool_put(t->bt_mp, save.page, 0);
+ save = *e;
+ } else
+ save.index = e->index;
+ found = 1;
+ }
+ /*
+ * Make a special effort not to unpin the page the last (or
+ * original) match was on, but also make sure it's unpinned
+ * if an error occurs.
+ */
+ while (e->index == 0) {
+ if (h->prevpg == P_INVALID)
+ goto done1;
+ if (h->pgno != save.page->pgno)
+ mpool_put(t->bt_mp, h, 0);
+ if ((h = mpool_get(t->bt_mp, h->prevpg, 0)) == NULL) {
+ if (h->pgno == save.page->pgno)
+ mpool_put(t->bt_mp, save.page, 0);
+ return (NULL);
+ }
+ e->page = h;
+ e->index = NEXTINDEX(h);
+ }
+ --e->index;
+ } while (__bt_cmp(t, key, e) == 0);
+
+ /*
+ * Reach here with the last page that was looked at pinned, which may
+ * or may not be the same as the last (or original) match page. If
+ * it's not useful, release it.
+ */
+done1: if (h->pgno != save.page->pgno)
+ mpool_put(t->bt_mp, h, 0);
+
+ /*
+ * If still haven't found a record, the only possibility left is the
+ * next one. Move forward one slot, skipping empty pages and check.
+ */
+ if (!found) {
+ h = save.page;
+ if (++save.index == NEXTINDEX(h)) {
+ do {
+ pg = h->nextpg;
+ mpool_put(t->bt_mp, h, 0);
+ if (pg == P_INVALID) {
+ *exactp = 0;
+ return (e);
+ }
+ if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL)
+ return (NULL);
+ } while ((save.index = NEXTINDEX(h)) == 0);
+ save.page = h;
+ }
+ if (__bt_cmp(t, key, &save) != 0) {
+ *exactp = 0;
+ return (e);
+ }
+ }
+ *e = save;
+ *exactp = 1;
+ return (e);
+}
diff --git a/lib/libc/db/btree/bt_open.c b/lib/libc/db/btree/bt_open.c
new file mode 100644
index 00000000000..63162702051
--- /dev/null
+++ b/lib/libc/db/btree/bt_open.c
@@ -0,0 +1,446 @@
+/* $NetBSD: bt_open.c,v 1.7 1995/02/27 13:20:27 cgd Exp $ */
+
+/*-
+ * Copyright (c) 1990, 1993, 1994
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Mike Olson.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+#if 0
+static char sccsid[] = "@(#)bt_open.c 8.7 (Berkeley) 6/16/94";
+#else
+static char rcsid[] = "$NetBSD: bt_open.c,v 1.7 1995/02/27 13:20:27 cgd Exp $";
+#endif
+#endif /* LIBC_SCCS and not lint */
+
+/*
+ * Implementation of btree access method for 4.4BSD.
+ *
+ * The design here was originally based on that of the btree access method
+ * used in the Postgres database system at UC Berkeley. This implementation
+ * is wholly independent of the Postgres code.
+ */
+
+#include <sys/param.h>
+#include <sys/stat.h>
+
+#include <errno.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <db.h>
+#include "btree.h"
+
+static int byteorder __P((void));
+static int nroot __P((BTREE *));
+static int tmp __P((void));
+
+/*
+ * __BT_OPEN -- Open a btree.
+ *
+ * Creates and fills a DB struct, and calls the routine that actually
+ * opens the btree.
+ *
+ * Parameters:
+ * fname: filename (NULL for in-memory trees)
+ * flags: open flag bits
+ * mode: open permission bits
+ * b: BTREEINFO pointer
+ *
+ * Returns:
+ * NULL on failure, pointer to DB on success.
+ *
+ */
+DB *
+__bt_open(fname, flags, mode, openinfo, dflags)
+ const char *fname;
+ int flags, mode, dflags;
+ const BTREEINFO *openinfo;
+{
+ struct stat sb;
+ BTMETA m;
+ BTREE *t;
+ BTREEINFO b;
+ DB *dbp;
+ pgno_t ncache;
+ ssize_t nr;
+ int machine_lorder;
+
+ t = NULL;
+
+ /*
+ * Intention is to make sure all of the user's selections are okay
+ * here and then use them without checking. Can't be complete, since
+ * we don't know the right page size, lorder or flags until the backing
+ * file is opened. Also, the file's page size can cause the cachesize
+ * to change.
+ */
+ machine_lorder = byteorder();
+ if (openinfo) {
+ b = *openinfo;
+
+ /* Flags: R_DUP. */
+ if (b.flags & ~(R_DUP))
+ goto einval;
+
+ /*
+ * Page size must be indx_t aligned and >= MINPSIZE. Default
+ * page size is set farther on, based on the underlying file
+ * transfer size.
+ */
+ if (b.psize &&
+ (b.psize < MINPSIZE || b.psize > MAX_PAGE_OFFSET + 1 ||
+ b.psize & sizeof(indx_t) - 1))
+ goto einval;
+
+ /* Minimum number of keys per page; absolute minimum is 2. */
+ if (b.minkeypage) {
+ if (b.minkeypage < 2)
+ goto einval;
+ } else
+ b.minkeypage = DEFMINKEYPAGE;
+
+ /* If no comparison, use default comparison and prefix. */
+ if (b.compare == NULL) {
+ b.compare = __bt_defcmp;
+ if (b.prefix == NULL)
+ b.prefix = __bt_defpfx;
+ }
+
+ if (b.lorder == 0)
+ b.lorder = machine_lorder;
+ } else {
+ b.compare = __bt_defcmp;
+ b.cachesize = 0;
+ b.flags = 0;
+ b.lorder = machine_lorder;
+ b.minkeypage = DEFMINKEYPAGE;
+ b.prefix = __bt_defpfx;
+ b.psize = 0;
+ }
+
+ /* Check for the ubiquitous PDP-11. */
+ if (b.lorder != BIG_ENDIAN && b.lorder != LITTLE_ENDIAN)
+ goto einval;
+
+ /* Allocate and initialize DB and BTREE structures. */
+ if ((t = (BTREE *)malloc(sizeof(BTREE))) == NULL)
+ goto err;
+ memset(t, 0, sizeof(BTREE));
+ t->bt_bcursor.pgno = P_INVALID;
+ t->bt_fd = -1; /* Don't close unopened fd on error. */
+ t->bt_lorder = b.lorder;
+ t->bt_order = NOT;
+ t->bt_cmp = b.compare;
+ t->bt_pfx = b.prefix;
+ t->bt_rfd = -1;
+
+ if ((t->bt_dbp = dbp = (DB *)malloc(sizeof(DB))) == NULL)
+ goto err;
+ t->bt_flags = 0;
+ if (t->bt_lorder != machine_lorder)
+ SET(t, B_NEEDSWAP);
+
+ dbp->type = DB_BTREE;
+ dbp->internal = t;
+ dbp->close = __bt_close;
+ dbp->del = __bt_delete;
+ dbp->fd = __bt_fd;
+ dbp->get = __bt_get;
+ dbp->put = __bt_put;
+ dbp->seq = __bt_seq;
+ dbp->sync = __bt_sync;
+
+ /*
+ * If no file name was supplied, this is an in-memory btree and we
+ * open a backing temporary file. Otherwise, it's a disk-based tree.
+ */
+ if (fname) {
+ switch(flags & O_ACCMODE) {
+ case O_RDONLY:
+ SET(t, B_RDONLY);
+ break;
+ case O_RDWR:
+ break;
+ case O_WRONLY:
+ default:
+ goto einval;
+ }
+
+ if ((t->bt_fd = open(fname, flags, mode)) < 0)
+ goto err;
+
+ } else {
+ if ((flags & O_ACCMODE) != O_RDWR)
+ goto einval;
+ if ((t->bt_fd = tmp()) == -1)
+ goto err;
+ SET(t, B_INMEM);
+ }
+
+ if (fcntl(t->bt_fd, F_SETFD, 1) == -1)
+ goto err;
+
+ if (fstat(t->bt_fd, &sb))
+ goto err;
+ if (sb.st_size) {
+ if ((nr = read(t->bt_fd, &m, sizeof(BTMETA))) < 0)
+ goto err;
+ if (nr != sizeof(BTMETA))
+ goto eftype;
+
+ /*
+ * Read in the meta-data. This can change the notion of what
+ * the lorder, page size and flags are, and, when the page size
+ * changes, the cachesize value can change too. If the user
+ * specified the wrong byte order for an existing database, we
+ * don't bother to return an error, we just clear the NEEDSWAP
+ * bit.
+ */
+ if (m.m_magic == BTREEMAGIC)
+ CLR(t, B_NEEDSWAP);
+ else {
+ SET(t, B_NEEDSWAP);
+ M_32_SWAP(m.m_magic);
+ M_32_SWAP(m.m_version);
+ M_32_SWAP(m.m_psize);
+ M_32_SWAP(m.m_free);
+ M_32_SWAP(m.m_nrecs);
+ M_32_SWAP(m.m_flags);
+ }
+ if (m.m_magic != BTREEMAGIC || m.m_version != BTREEVERSION)
+ goto eftype;
+ if (m.m_psize < MINPSIZE || m.m_psize > MAX_PAGE_OFFSET + 1 ||
+ m.m_psize & sizeof(indx_t) - 1)
+ goto eftype;
+ if (m.m_flags & ~SAVEMETA)
+ goto eftype;
+ b.psize = m.m_psize;
+ t->bt_flags |= m.m_flags;
+ t->bt_free = m.m_free;
+ t->bt_nrecs = m.m_nrecs;
+ } else {
+ /*
+ * Set the page size to the best value for I/O to this file.
+ * Don't overflow the page offset type.
+ */
+ if (b.psize == 0) {
+ b.psize = sb.st_blksize;
+ if (b.psize < MINPSIZE)
+ b.psize = MINPSIZE;
+ if (b.psize > MAX_PAGE_OFFSET + 1)
+ b.psize = MAX_PAGE_OFFSET + 1;
+ }
+
+ /* Set flag if duplicates permitted. */
+ if (!(b.flags & R_DUP))
+ SET(t, B_NODUPS);
+
+ t->bt_free = P_INVALID;
+ t->bt_nrecs = 0;
+ SET(t, B_METADIRTY);
+ }
+
+ t->bt_psize = b.psize;
+
+ /* Set the cache size; must be a multiple of the page size. */
+ if (b.cachesize && b.cachesize & b.psize - 1)
+ b.cachesize += (~b.cachesize & b.psize - 1) + 1;
+ if (b.cachesize < b.psize * MINCACHE)
+ b.cachesize = b.psize * MINCACHE;
+
+ /* Calculate number of pages to cache. */
+ ncache = (b.cachesize + t->bt_psize - 1) / t->bt_psize;
+
+ /*
+ * The btree data structure requires that at least two keys can fit on
+ * a page, but other than that there's no fixed requirement. The user
+ * specified a minimum number per page, and we translated that into the
+ * number of bytes a key/data pair can use before being placed on an
+ * overflow page. This calculation includes the page header, the size
+ * of the index referencing the leaf item and the size of the leaf item
+ * structure. Also, don't let the user specify a minkeypage such that
+ * a key/data pair won't fit even if both key and data are on overflow
+ * pages.
+ */
+ t->bt_ovflsize = (t->bt_psize - BTDATAOFF) / b.minkeypage -
+ (sizeof(indx_t) + NBLEAFDBT(0, 0));
+ if (t->bt_ovflsize < NBLEAFDBT(NOVFLSIZE, NOVFLSIZE) + sizeof(indx_t))
+ t->bt_ovflsize =
+ NBLEAFDBT(NOVFLSIZE, NOVFLSIZE) + sizeof(indx_t);
+
+ /* Initialize the buffer pool. */
+ if ((t->bt_mp =
+ mpool_open(NULL, t->bt_fd, t->bt_psize, ncache)) == NULL)
+ goto err;
+ if (!ISSET(t, B_INMEM))
+ mpool_filter(t->bt_mp, __bt_pgin, __bt_pgout, t);
+
+ /* Create a root page if new tree. */
+ if (nroot(t) == RET_ERROR)
+ goto err;
+
+ /* Global flags. */
+ if (dflags & DB_LOCK)
+ SET(t, B_DB_LOCK);
+ if (dflags & DB_SHMEM)
+ SET(t, B_DB_SHMEM);
+ if (dflags & DB_TXN)
+ SET(t, B_DB_TXN);
+
+ return (dbp);
+
+einval: errno = EINVAL;
+ goto err;
+
+eftype: errno = EFTYPE;
+ goto err;
+
+err: if (t) {
+ if (t->bt_dbp)
+ free(t->bt_dbp);
+ if (t->bt_fd != -1)
+ (void)close(t->bt_fd);
+ free(t);
+ }
+ return (NULL);
+}
+
+/*
+ * NROOT -- Create the root of a new tree.
+ *
+ * Parameters:
+ * t: tree
+ *
+ * Returns:
+ * RET_ERROR, RET_SUCCESS
+ */
+static int
+nroot(t)
+ BTREE *t;
+{
+ PAGE *meta, *root;
+ pgno_t npg;
+
+ if ((meta = mpool_get(t->bt_mp, 0, 0)) != NULL) {
+ mpool_put(t->bt_mp, meta, 0);
+ return (RET_SUCCESS);
+ }
+ if (errno != EINVAL) /* It's OK to not exist. */
+ return (RET_ERROR);
+ errno = 0;
+
+ if ((meta = mpool_new(t->bt_mp, &npg)) == NULL)
+ return (RET_ERROR);
+
+ if ((root = mpool_new(t->bt_mp, &npg)) == NULL)
+ return (RET_ERROR);
+
+ if (npg != P_ROOT)
+ return (RET_ERROR);
+ root->pgno = npg;
+ root->prevpg = root->nextpg = P_INVALID;
+ root->lower = BTDATAOFF;
+ root->upper = t->bt_psize;
+ root->flags = P_BLEAF;
+ memset(meta, 0, t->bt_psize);
+ mpool_put(t->bt_mp, meta, MPOOL_DIRTY);
+ mpool_put(t->bt_mp, root, MPOOL_DIRTY);
+ return (RET_SUCCESS);
+}
+
+static int
+tmp()
+{
+ sigset_t set, oset;
+ int fd;
+ char *envtmp;
+ char path[MAXPATHLEN];
+
+ envtmp = getenv("TMPDIR");
+ (void)snprintf(path,
+ sizeof(path), "%s/bt.XXXXXX", envtmp ? envtmp : "/tmp");
+
+ (void)sigfillset(&set);
+ (void)sigprocmask(SIG_BLOCK, &set, &oset);
+ if ((fd = mkstemp(path)) != -1)
+ (void)unlink(path);
+ (void)sigprocmask(SIG_SETMASK, &oset, NULL);
+ return(fd);
+}
+
+static int
+byteorder()
+{
+ u_int32_t x;
+ u_char *p;
+
+ x = 0x01020304;
+ p = (u_char *)&x;
+ switch (*p) {
+ case 1:
+ return (BIG_ENDIAN);
+ case 4:
+ return (LITTLE_ENDIAN);
+ default:
+ return (0);
+ }
+}
+
+int
+__bt_fd(dbp)
+ const DB *dbp;
+{
+ BTREE *t;
+
+ t = dbp->internal;
+
+ /* Toss any page pinned across calls. */
+ if (t->bt_pinned != NULL) {
+ mpool_put(t->bt_mp, t->bt_pinned, 0);
+ t->bt_pinned = NULL;
+ }
+
+ /* In-memory database can't have a file descriptor. */
+ if (ISSET(t, B_INMEM)) {
+ errno = ENOENT;
+ return (-1);
+ }
+ return (t->bt_fd);
+}
diff --git a/lib/libc/db/btree/bt_overflow.c b/lib/libc/db/btree/bt_overflow.c
new file mode 100644
index 00000000000..b5c43c60435
--- /dev/null
+++ b/lib/libc/db/btree/bt_overflow.c
@@ -0,0 +1,234 @@
+/* $NetBSD: bt_overflow.c,v 1.5 1995/02/27 13:20:33 cgd Exp $ */
+
+/*-
+ * Copyright (c) 1990, 1993, 1994
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Mike Olson.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+#if 0
+static char sccsid[] = "@(#)bt_overflow.c 8.4 (Berkeley) 6/20/94";
+#else
+static char rcsid[] = "$NetBSD: bt_overflow.c,v 1.5 1995/02/27 13:20:33 cgd Exp $";
+#endif
+#endif /* LIBC_SCCS and not lint */
+
+#include <sys/param.h>
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <db.h>
+#include "btree.h"
+
+/*
+ * Big key/data code.
+ *
+ * Big key and data entries are stored on linked lists of pages. The initial
+ * reference is byte string stored with the key or data and is the page number
+ * and size. The actual record is stored in a chain of pages linked by the
+ * nextpg field of the PAGE header.
+ *
+ * The first page of the chain has a special property. If the record is used
+ * by an internal page, it cannot be deleted and the P_PRESERVE bit will be set
+ * in the header.
+ *
+ * XXX
+ * A single DBT is written to each chain, so a lot of space on the last page
+ * is wasted. This is a fairly major bug for some data sets.
+ */
+
+/*
+ * __OVFL_GET -- Get an overflow key/data item.
+ *
+ * Parameters:
+ * t: tree
+ * p: pointer to { pgno_t, u_int32_t }
+ * buf: storage address
+ * bufsz: storage size
+ *
+ * Returns:
+ * RET_ERROR, RET_SUCCESS
+ */
+int
+__ovfl_get(t, p, ssz, buf, bufsz)
+ BTREE *t;
+ void *p;
+ size_t *ssz;
+ char **buf;
+ size_t *bufsz;
+{
+ PAGE *h;
+ pgno_t pg;
+ size_t nb, plen;
+ u_int32_t sz;
+
+ memmove(&pg, p, sizeof(pgno_t));
+ memmove(&sz, (char *)p + sizeof(pgno_t), sizeof(u_int32_t));
+ *ssz = sz;
+
+#ifdef DEBUG
+ if (pg == P_INVALID || sz == 0)
+ abort();
+#endif
+ /* Make the buffer bigger as necessary. */
+ if (*bufsz < sz) {
+ *buf = (char *)(*buf == NULL ? malloc(sz) : realloc(*buf, sz));
+ if (*buf == NULL)
+ return (RET_ERROR);
+ *bufsz = sz;
+ }
+
+ /*
+ * Step through the linked list of pages, copying the data on each one
+ * into the buffer. Never copy more than the data's length.
+ */
+ plen = t->bt_psize - BTDATAOFF;
+ for (p = *buf;; p = (char *)p + nb, pg = h->nextpg) {
+ if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL)
+ return (RET_ERROR);
+
+ nb = MIN(sz, plen);
+ memmove(p, (char *)h + BTDATAOFF, nb);
+ mpool_put(t->bt_mp, h, 0);
+
+ if ((sz -= nb) == 0)
+ break;
+ }
+ return (RET_SUCCESS);
+}
+
+/*
+ * __OVFL_PUT -- Store an overflow key/data item.
+ *
+ * Parameters:
+ * t: tree
+ * data: DBT to store
+ * pgno: storage page number
+ *
+ * Returns:
+ * RET_ERROR, RET_SUCCESS
+ */
+int
+__ovfl_put(t, dbt, pg)
+ BTREE *t;
+ const DBT *dbt;
+ pgno_t *pg;
+{
+ PAGE *h, *last;
+ void *p;
+ pgno_t npg;
+ size_t nb, plen;
+ u_int32_t sz;
+
+ /*
+ * Allocate pages and copy the key/data record into them. Store the
+ * number of the first page in the chain.
+ */
+ plen = t->bt_psize - BTDATAOFF;
+ for (last = NULL, p = dbt->data, sz = dbt->size;;
+ p = (char *)p + plen, last = h) {
+ if ((h = __bt_new(t, &npg)) == NULL)
+ return (RET_ERROR);
+
+ h->pgno = npg;
+ h->nextpg = h->prevpg = P_INVALID;
+ h->flags = P_OVERFLOW;
+ h->lower = h->upper = 0;
+
+ nb = MIN(sz, plen);
+ memmove((char *)h + BTDATAOFF, p, nb);
+
+ if (last) {
+ last->nextpg = h->pgno;
+ mpool_put(t->bt_mp, last, MPOOL_DIRTY);
+ } else
+ *pg = h->pgno;
+
+ if ((sz -= nb) == 0) {
+ mpool_put(t->bt_mp, h, MPOOL_DIRTY);
+ break;
+ }
+ }
+ return (RET_SUCCESS);
+}
+
+/*
+ * __OVFL_DELETE -- Delete an overflow chain.
+ *
+ * Parameters:
+ * t: tree
+ * p: pointer to { pgno_t, u_int32_t }
+ *
+ * Returns:
+ * RET_ERROR, RET_SUCCESS
+ */
+int
+__ovfl_delete(t, p)
+ BTREE *t;
+ void *p;
+{
+ PAGE *h;
+ pgno_t pg;
+ size_t plen;
+ u_int32_t sz;
+
+ memmove(&pg, p, sizeof(pgno_t));
+ memmove(&sz, (char *)p + sizeof(pgno_t), sizeof(u_int32_t));
+
+#ifdef DEBUG
+ if (pg == P_INVALID || sz == 0)
+ abort();
+#endif
+ if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL)
+ return (RET_ERROR);
+
+ /* Don't delete chains used by internal pages. */
+ if (h->flags & P_PRESERVE) {
+ mpool_put(t->bt_mp, h, 0);
+ return (RET_SUCCESS);
+ }
+
+ /* Step through the chain, calling the free routine for each page. */
+ for (plen = t->bt_psize - BTDATAOFF;; sz -= plen) {
+ pg = h->nextpg;
+ __bt_free(t, h);
+ if (sz <= plen)
+ break;
+ if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL)
+ return (RET_ERROR);
+ }
+ return (RET_SUCCESS);
+}
diff --git a/lib/libc/db/btree/bt_page.c b/lib/libc/db/btree/bt_page.c
new file mode 100644
index 00000000000..d6a44941854
--- /dev/null
+++ b/lib/libc/db/btree/bt_page.c
@@ -0,0 +1,99 @@
+/* $NetBSD: bt_page.c,v 1.5 1995/02/27 13:20:36 cgd Exp $ */
+
+/*-
+ * Copyright (c) 1990, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+#if 0
+static char sccsid[] = "@(#)bt_page.c 8.2 (Berkeley) 2/21/94";
+#else
+static char rcsid[] = "$NetBSD: bt_page.c,v 1.5 1995/02/27 13:20:36 cgd Exp $";
+#endif
+#endif /* LIBC_SCCS and not lint */
+
+#include <sys/types.h>
+
+#include <stdio.h>
+
+#include <db.h>
+#include "btree.h"
+
+/*
+ * __BT_FREE -- Put a page on the freelist.
+ *
+ * Parameters:
+ * t: tree
+ * h: page to free
+ *
+ * Returns:
+ * RET_ERROR, RET_SUCCESS
+ */
+int
+__bt_free(t, h)
+ BTREE *t;
+ PAGE *h;
+{
+ /* Insert the page at the start of the free list. */
+ h->prevpg = P_INVALID;
+ h->nextpg = t->bt_free;
+ t->bt_free = h->pgno;
+
+ /* Make sure the page gets written back. */
+ return (mpool_put(t->bt_mp, h, MPOOL_DIRTY));
+}
+
+/*
+ * __BT_NEW -- Get a new page, preferably from the freelist.
+ *
+ * Parameters:
+ * t: tree
+ * npg: storage for page number.
+ *
+ * Returns:
+ * Pointer to a page, NULL on error.
+ */
+PAGE *
+__bt_new(t, npg)
+ BTREE *t;
+ pgno_t *npg;
+{
+ PAGE *h;
+
+ if (t->bt_free != P_INVALID &&
+ (h = mpool_get(t->bt_mp, t->bt_free, 0)) != NULL) {
+ *npg = t->bt_free;
+ t->bt_free = h->nextpg;
+ return (h);
+ }
+ return (mpool_new(t->bt_mp, npg));
+}
diff --git a/lib/libc/db/btree/bt_put.c b/lib/libc/db/btree/bt_put.c
new file mode 100644
index 00000000000..9ddc98d8ec1
--- /dev/null
+++ b/lib/libc/db/btree/bt_put.c
@@ -0,0 +1,324 @@
+/* $NetBSD: bt_put.c,v 1.7 1995/02/27 13:20:40 cgd Exp $ */
+
+/*-
+ * Copyright (c) 1990, 1993, 1994
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Mike Olson.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+#if 0
+static char sccsid[] = "@(#)bt_put.c 8.4 (Berkeley) 5/31/94";
+#else
+static char rcsid[] = "$NetBSD: bt_put.c,v 1.7 1995/02/27 13:20:40 cgd Exp $";
+#endif
+#endif /* LIBC_SCCS and not lint */
+
+#include <sys/types.h>
+
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <db.h>
+#include "btree.h"
+
+static EPG *bt_fast __P((BTREE *, const DBT *, const DBT *, int *));
+
+/*
+ * __BT_PUT -- Add a btree item to the tree.
+ *
+ * Parameters:
+ * dbp: pointer to access method
+ * key: key
+ * data: data
+ * flag: R_NOOVERWRITE
+ *
+ * Returns:
+ * RET_ERROR, RET_SUCCESS and RET_SPECIAL if the key is already in the
+ * tree and R_NOOVERWRITE specified.
+ */
+int
+__bt_put(dbp, key, data, flags)
+ const DB *dbp;
+ DBT *key;
+ const DBT *data;
+ u_int flags;
+{
+ BTREE *t;
+ DBT tkey, tdata;
+ EPG *e;
+ PAGE *h;
+ indx_t index, nxtindex;
+ pgno_t pg;
+ u_int32_t nbytes;
+ int dflags, exact, status;
+ char *dest, db[NOVFLSIZE], kb[NOVFLSIZE];
+
+ t = dbp->internal;
+
+ /* Toss any page pinned across calls. */
+ if (t->bt_pinned != NULL) {
+ mpool_put(t->bt_mp, t->bt_pinned, 0);
+ t->bt_pinned = NULL;
+ }
+
+ switch (flags) {
+ case R_CURSOR:
+ if (!ISSET(t, B_SEQINIT))
+ goto einval;
+ if (ISSET(t, B_DELCRSR))
+ goto einval;
+ break;
+ case 0:
+ case R_NOOVERWRITE:
+ break;
+ default:
+einval: errno = EINVAL;
+ return (RET_ERROR);
+ }
+
+ if (ISSET(t, B_RDONLY)) {
+ errno = EPERM;
+ return (RET_ERROR);
+ }
+
+ /*
+ * If the key/data won't fit on a page, store it on indirect pages.
+ * Only store the key on the overflow page if it's too big after the
+ * data is on an overflow page.
+ *
+ * XXX
+ * If the insert fails later on, these pages aren't recovered.
+ */
+ dflags = 0;
+ if (key->size + data->size > t->bt_ovflsize) {
+ if (key->size > t->bt_ovflsize) {
+storekey: if (__ovfl_put(t, key, &pg) == RET_ERROR)
+ return (RET_ERROR);
+ tkey.data = kb;
+ tkey.size = NOVFLSIZE;
+ memmove(kb, &pg, sizeof(pgno_t));
+ memmove(kb + sizeof(pgno_t),
+ &key->size, sizeof(u_int32_t));
+ dflags |= P_BIGKEY;
+ key = &tkey;
+ }
+ if (key->size + data->size > t->bt_ovflsize) {
+ if (__ovfl_put(t, data, &pg) == RET_ERROR)
+ return (RET_ERROR);
+ tdata.data = db;
+ tdata.size = NOVFLSIZE;
+ memmove(db, &pg, sizeof(pgno_t));
+ memmove(db + sizeof(pgno_t),
+ &data->size, sizeof(u_int32_t));
+ dflags |= P_BIGDATA;
+ data = &tdata;
+ }
+ if (key->size + data->size > t->bt_ovflsize)
+ goto storekey;
+ }
+
+ /* Replace the cursor. */
+ if (flags == R_CURSOR) {
+ if ((h = mpool_get(t->bt_mp, t->bt_bcursor.pgno, 0)) == NULL)
+ return (RET_ERROR);
+ index = t->bt_bcursor.index;
+ goto delete;
+ }
+
+ /*
+ * Find the key to delete, or, the location at which to insert. Bt_fast
+ * and __bt_search pin the returned page.
+ */
+ if (t->bt_order == NOT || (e = bt_fast(t, key, data, &exact)) == NULL)
+ if ((e = __bt_search(t, key, &exact)) == NULL)
+ return (RET_ERROR);
+ h = e->page;
+ index = e->index;
+
+ /*
+ * Add the specified key/data pair to the tree. If an identical key
+ * is already in the tree, and R_NOOVERWRITE is set, an error is
+ * returned. If R_NOOVERWRITE is not set, the key is either added (if
+ * duplicates are permitted) or an error is returned.
+ *
+ * Pages are split as required.
+ */
+ switch (flags) {
+ case R_NOOVERWRITE:
+ if (!exact)
+ break;
+ /*
+ * One special case is if the cursor references the record and
+ * it's been flagged for deletion. Then, we delete the record,
+ * leaving the cursor there -- this means that the inserted
+ * record will not be seen in a cursor scan.
+ */
+ if (ISSET(t, B_DELCRSR) && t->bt_bcursor.pgno == h->pgno &&
+ t->bt_bcursor.index == index) {
+ CLR(t, B_DELCRSR);
+ goto delete;
+ }
+ mpool_put(t->bt_mp, h, 0);
+ return (RET_SPECIAL);
+ default:
+ if (!exact || !ISSET(t, B_NODUPS))
+ break;
+delete: if (__bt_dleaf(t, h, index) == RET_ERROR) {
+ mpool_put(t->bt_mp, h, 0);
+ return (RET_ERROR);
+ }
+ break;
+ }
+
+ /*
+ * If not enough room, or the user has put a ceiling on the number of
+ * keys permitted in the page, split the page. The split code will
+ * insert the key and data and unpin the current page. If inserting
+ * into the offset array, shift the pointers up.
+ */
+ nbytes = NBLEAFDBT(key->size, data->size);
+ if (h->upper - h->lower < nbytes + sizeof(indx_t)) {
+ if ((status = __bt_split(t, h, key,
+ data, dflags, nbytes, index)) != RET_SUCCESS)
+ return (status);
+ goto success;
+ }
+
+ if (index < (nxtindex = NEXTINDEX(h)))
+ memmove(h->linp + index + 1, h->linp + index,
+ (nxtindex - index) * sizeof(indx_t));
+ h->lower += sizeof(indx_t);
+
+ h->linp[index] = h->upper -= nbytes;
+ dest = (char *)h + h->upper;
+ WR_BLEAF(dest, key, data, dflags);
+
+ if (t->bt_order == NOT)
+ if (h->nextpg == P_INVALID) {
+ if (index == NEXTINDEX(h) - 1) {
+ t->bt_order = FORWARD;
+ t->bt_last.index = index;
+ t->bt_last.pgno = h->pgno;
+ }
+ } else if (h->prevpg == P_INVALID) {
+ if (index == 0) {
+ t->bt_order = BACK;
+ t->bt_last.index = 0;
+ t->bt_last.pgno = h->pgno;
+ }
+ }
+
+ mpool_put(t->bt_mp, h, MPOOL_DIRTY);
+
+success:
+ if (flags == R_SETCURSOR) {
+ t->bt_bcursor.pgno = e->page->pgno;
+ t->bt_bcursor.index = e->index;
+ }
+ SET(t, B_MODIFIED);
+ return (RET_SUCCESS);
+}
+
+#ifdef STATISTICS
+u_long bt_cache_hit, bt_cache_miss;
+#endif
+
+/*
+ * BT_FAST -- Do a quick check for sorted data.
+ *
+ * Parameters:
+ * t: tree
+ * key: key to insert
+ *
+ * Returns:
+ * EPG for new record or NULL if not found.
+ */
+static EPG *
+bt_fast(t, key, data, exactp)
+ BTREE *t;
+ const DBT *key, *data;
+ int *exactp;
+{
+ PAGE *h;
+ u_int32_t nbytes;
+ int cmp;
+
+ if ((h = mpool_get(t->bt_mp, t->bt_last.pgno, 0)) == NULL) {
+ t->bt_order = NOT;
+ return (NULL);
+ }
+ t->bt_cur.page = h;
+ t->bt_cur.index = t->bt_last.index;
+
+ /*
+ * If won't fit in this page or have too many keys in this page, have
+ * to search to get split stack.
+ */
+ nbytes = NBLEAFDBT(key->size, data->size);
+ if (h->upper - h->lower < nbytes + sizeof(indx_t))
+ goto miss;
+
+ if (t->bt_order == FORWARD) {
+ if (t->bt_cur.page->nextpg != P_INVALID)
+ goto miss;
+ if (t->bt_cur.index != NEXTINDEX(h) - 1)
+ goto miss;
+ if ((cmp = __bt_cmp(t, key, &t->bt_cur)) < 0)
+ goto miss;
+ t->bt_last.index = cmp ? ++t->bt_cur.index : t->bt_cur.index;
+ } else {
+ if (t->bt_cur.page->prevpg != P_INVALID)
+ goto miss;
+ if (t->bt_cur.index != 0)
+ goto miss;
+ if ((cmp = __bt_cmp(t, key, &t->bt_cur)) > 0)
+ goto miss;
+ t->bt_last.index = 0;
+ }
+ *exactp = cmp == 0;
+#ifdef STATISTICS
+ ++bt_cache_hit;
+#endif
+ return (&t->bt_cur);
+
+miss:
+#ifdef STATISTICS
+ ++bt_cache_miss;
+#endif
+ t->bt_order = NOT;
+ mpool_put(t->bt_mp, h, 0);
+ return (NULL);
+}
diff --git a/lib/libc/db/btree/bt_search.c b/lib/libc/db/btree/bt_search.c
new file mode 100644
index 00000000000..c98fe7346ea
--- /dev/null
+++ b/lib/libc/db/btree/bt_search.c
@@ -0,0 +1,241 @@
+/* $NetBSD: bt_search.c,v 1.7 1995/02/27 13:20:44 cgd Exp $ */
+
+/*-
+ * Copyright (c) 1990, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Mike Olson.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+#if 0
+static char sccsid[] = "@(#)bt_search.c 8.6 (Berkeley) 3/15/94";
+#else
+static char rcsid[] = "$NetBSD: bt_search.c,v 1.7 1995/02/27 13:20:44 cgd Exp $";
+#endif
+#endif /* LIBC_SCCS and not lint */
+
+#include <sys/types.h>
+
+#include <stdio.h>
+
+#include <db.h>
+#include "btree.h"
+
+static int bt_snext __P((BTREE *, PAGE *, const DBT *, int *));
+static int bt_sprev __P((BTREE *, PAGE *, const DBT *, int *));
+
+/*
+ * __BT_SEARCH -- Search a btree for a key.
+ *
+ * Parameters:
+ * t: tree to search
+ * key: key to find
+ * exactp: pointer to exact match flag
+ *
+ * Returns:
+ * The EPG for matching record, if any, or the EPG for the location
+ * of the key, if it were inserted into the tree, is entered into
+ * the bt_cur field of the tree. A pointer to the field is returned.
+ */
+EPG *
+__bt_search(t, key, exactp)
+ BTREE *t;
+ const DBT *key;
+ int *exactp;
+{
+ PAGE *h;
+ indx_t base, index, lim;
+ pgno_t pg;
+ int cmp;
+
+ BT_CLR(t);
+ for (pg = P_ROOT;;) {
+ if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL)
+ return (NULL);
+
+ /* Do a binary search on the current page. */
+ t->bt_cur.page = h;
+ for (base = 0, lim = NEXTINDEX(h); lim; lim >>= 1) {
+ t->bt_cur.index = index = base + (lim >> 1);
+ if ((cmp = __bt_cmp(t, key, &t->bt_cur)) == 0) {
+ if (h->flags & P_BLEAF) {
+ *exactp = 1;
+ return (&t->bt_cur);
+ }
+ goto next;
+ }
+ if (cmp > 0) {
+ base = index + 1;
+ --lim;
+ }
+ }
+
+ /*
+ * If it's a leaf page, and duplicates aren't allowed, we're
+ * done. If duplicates are allowed, it's possible that there
+ * were duplicate keys on duplicate pages, and they were later
+ * deleted, so we could be on a page with no matches while
+ * there are matches on other pages. If we're at the start or
+ * end of a page, check on both sides.
+ */
+ if (h->flags & P_BLEAF) {
+ t->bt_cur.index = base;
+ *exactp = 0;
+ if (!ISSET(t, B_NODUPS)) {
+ if (base == 0 &&
+ bt_sprev(t, h, key, exactp))
+ return (&t->bt_cur);
+ if (base == NEXTINDEX(h) &&
+ bt_snext(t, h, key, exactp))
+ return (&t->bt_cur);
+ }
+ return (&t->bt_cur);
+ }
+
+ /*
+ * No match found. Base is the smallest index greater than
+ * key and may be zero or a last + 1 index. If it's non-zero,
+ * decrement by one, and record the internal page which should
+ * be a parent page for the key. If a split later occurs, the
+ * inserted page will be to the right of the saved page.
+ */
+ index = base ? base - 1 : base;
+
+next: if (__bt_push(t, h->pgno, index) == RET_ERROR)
+ return (NULL);
+ pg = GETBINTERNAL(h, index)->pgno;
+ mpool_put(t->bt_mp, h, 0);
+ }
+}
+
+/*
+ * BT_SNEXT -- Check for an exact match after the key.
+ *
+ * Parameters:
+ * t: tree to search
+ * h: current page.
+ * key: key to find
+ * exactp: pointer to exact match flag
+ *
+ * Returns:
+ * If an exact match found.
+ */
+static int
+bt_snext(t, h, key, exactp)
+ BTREE *t;
+ PAGE *h;
+ const DBT *key;
+ int *exactp;
+{
+ EPG e;
+ PAGE *tp;
+ pgno_t pg;
+
+ /* Skip until reach the end of the tree or a key. */
+ for (pg = h->nextpg; pg != P_INVALID;) {
+ if ((tp = mpool_get(t->bt_mp, pg, 0)) == NULL) {
+ mpool_put(t->bt_mp, h, 0);
+ return (NULL);
+ }
+ if (NEXTINDEX(tp) != 0)
+ break;
+ pg = tp->prevpg;
+ mpool_put(t->bt_mp, tp, 0);
+ }
+ /*
+ * The key is either an exact match, or not as good as
+ * the one we already have.
+ */
+ if (pg != P_INVALID) {
+ e.page = tp;
+ e.index = NEXTINDEX(tp) - 1;
+ if (__bt_cmp(t, key, &e) == 0) {
+ mpool_put(t->bt_mp, h, 0);
+ t->bt_cur = e;
+ *exactp = 1;
+ return (1);
+ }
+ }
+ return (0);
+}
+
+/*
+ * BT_SPREV -- Check for an exact match before the key.
+ *
+ * Parameters:
+ * t: tree to search
+ * h: current page.
+ * key: key to find
+ * exactp: pointer to exact match flag
+ *
+ * Returns:
+ * If an exact match found.
+ */
+static int
+bt_sprev(t, h, key, exactp)
+ BTREE *t;
+ PAGE *h;
+ const DBT *key;
+ int *exactp;
+{
+ EPG e;
+ PAGE *tp;
+ pgno_t pg;
+
+ /* Skip until reach the beginning of the tree or a key. */
+ for (pg = h->prevpg; pg != P_INVALID;) {
+ if ((tp = mpool_get(t->bt_mp, pg, 0)) == NULL) {
+ mpool_put(t->bt_mp, h, 0);
+ return (NULL);
+ }
+ if (NEXTINDEX(tp) != 0)
+ break;
+ pg = tp->prevpg;
+ mpool_put(t->bt_mp, tp, 0);
+ }
+ /*
+ * The key is either an exact match, or not as good as
+ * the one we already have.
+ */
+ if (pg != P_INVALID) {
+ e.page = tp;
+ e.index = NEXTINDEX(tp) - 1;
+ if (__bt_cmp(t, key, &e) == 0) {
+ mpool_put(t->bt_mp, h, 0);
+ t->bt_cur = e;
+ *exactp = 1;
+ return (1);
+ }
+ }
+ return (0);
+}
diff --git a/lib/libc/db/btree/bt_seq.c b/lib/libc/db/btree/bt_seq.c
new file mode 100644
index 00000000000..f029cc3e174
--- /dev/null
+++ b/lib/libc/db/btree/bt_seq.c
@@ -0,0 +1,386 @@
+/* $NetBSD: bt_seq.c,v 1.6 1995/02/27 13:20:51 cgd Exp $ */
+
+/*-
+ * Copyright (c) 1990, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Mike Olson.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+#if 0
+static char sccsid[] = "@(#)bt_seq.c 8.2 (Berkeley) 9/7/93";
+#else
+static char rcsid[] = "$NetBSD: bt_seq.c,v 1.6 1995/02/27 13:20:51 cgd Exp $";
+#endif
+#endif /* LIBC_SCCS and not lint */
+
+#include <sys/types.h>
+
+#include <errno.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include <db.h>
+#include "btree.h"
+
+static int bt_seqadv __P((BTREE *, EPG *, int));
+static int bt_seqset __P((BTREE *, EPG *, DBT *, int));
+
+/*
+ * Sequential scan support.
+ *
+ * The tree can be scanned sequentially, starting from either end of the tree
+ * or from any specific key. A scan request before any scanning is done is
+ * initialized as starting from the least node.
+ *
+ * Each tree has an EPGNO which has the current position of the cursor. The
+ * cursor has to survive deletions/insertions in the tree without losing its
+ * position. This is done by noting deletions without doing them, and then
+ * doing them when the cursor moves (or the tree is closed).
+ */
+
+/*
+ * __BT_SEQ -- Btree sequential scan interface.
+ *
+ * Parameters:
+ * dbp: pointer to access method
+ * key: key for positioning and return value
+ * data: data return value
+ * flags: R_CURSOR, R_FIRST, R_LAST, R_NEXT, R_PREV.
+ *
+ * Returns:
+ * RET_ERROR, RET_SUCCESS or RET_SPECIAL if there's no next key.
+ */
+int
+__bt_seq(dbp, key, data, flags)
+ const DB *dbp;
+ DBT *key, *data;
+ u_int flags;
+{
+ BTREE *t;
+ EPG e;
+ int status;
+
+ t = dbp->internal;
+
+ /* Toss any page pinned across calls. */
+ if (t->bt_pinned != NULL) {
+ mpool_put(t->bt_mp, t->bt_pinned, 0);
+ t->bt_pinned = NULL;
+ }
+
+ /*
+ * If scan unitialized as yet, or starting at a specific record, set
+ * the scan to a specific key. Both bt_seqset and bt_seqadv pin the
+ * page the cursor references if they're successful.
+ */
+ switch(flags) {
+ case R_NEXT:
+ case R_PREV:
+ if (ISSET(t, B_SEQINIT)) {
+ status = bt_seqadv(t, &e, flags);
+ break;
+ }
+ /* FALLTHROUGH */
+ case R_CURSOR:
+ case R_FIRST:
+ case R_LAST:
+ status = bt_seqset(t, &e, key, flags);
+ break;
+ default:
+ errno = EINVAL;
+ return (RET_ERROR);
+ }
+
+ if (status == RET_SUCCESS) {
+ status = __bt_ret(t, &e, key, data);
+
+ /* Update the actual cursor. */
+ t->bt_bcursor.pgno = e.page->pgno;
+ t->bt_bcursor.index = e.index;
+
+ /*
+ * If the user is doing concurrent access, we copied the
+ * key/data, toss the page.
+ */
+ if (ISSET(t, B_DB_LOCK))
+ mpool_put(t->bt_mp, e.page, 0);
+ else
+ t->bt_pinned = e.page;
+ SET(t, B_SEQINIT);
+ }
+ return (status);
+}
+
+/*
+ * BT_SEQSET -- Set the sequential scan to a specific key.
+ *
+ * Parameters:
+ * t: tree
+ * ep: storage for returned key
+ * key: key for initial scan position
+ * flags: R_CURSOR, R_FIRST, R_LAST, R_NEXT, R_PREV
+ *
+ * Side effects:
+ * Pins the page the cursor references.
+ *
+ * Returns:
+ * RET_ERROR, RET_SUCCESS or RET_SPECIAL if there's no next key.
+ */
+static int
+bt_seqset(t, ep, key, flags)
+ BTREE *t;
+ EPG *ep;
+ DBT *key;
+ int flags;
+{
+ EPG *e;
+ PAGE *h;
+ pgno_t pg;
+ int exact;
+
+ /*
+ * Delete any already deleted record that we've been saving because
+ * the cursor pointed to it. Since going to a specific key, should
+ * delete any logically deleted records so they aren't found.
+ */
+ if (ISSET(t, B_DELCRSR) && __bt_crsrdel(t, &t->bt_bcursor))
+ return (RET_ERROR);
+
+ /*
+ * Find the first, last or specific key in the tree and point the cursor
+ * at it. The cursor may not be moved until a new key has been found.
+ */
+ switch(flags) {
+ case R_CURSOR: /* Keyed scan. */
+ /*
+ * Find the first instance of the key or the smallest key which
+ * is greater than or equal to the specified key. If run out
+ * of keys, return RET_SPECIAL.
+ */
+ if (key->data == NULL || key->size == 0) {
+ errno = EINVAL;
+ return (RET_ERROR);
+ }
+ e = __bt_first(t, key, &exact); /* Returns pinned page. */
+ if (e == NULL)
+ return (RET_ERROR);
+ /*
+ * If at the end of a page, skip any empty pages and find the
+ * next entry.
+ */
+ if (e->index == NEXTINDEX(e->page)) {
+ h = e->page;
+ do {
+ pg = h->nextpg;
+ mpool_put(t->bt_mp, h, 0);
+ if (pg == P_INVALID)
+ return (RET_SPECIAL);
+ if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL)
+ return (RET_ERROR);
+ } while (NEXTINDEX(h) == 0);
+ e->index = 0;
+ e->page = h;
+ }
+ *ep = *e;
+ break;
+ case R_FIRST: /* First record. */
+ case R_NEXT:
+ /* Walk down the left-hand side of the tree. */
+ for (pg = P_ROOT;;) {
+ if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL)
+ return (RET_ERROR);
+ if (h->flags & (P_BLEAF | P_RLEAF))
+ break;
+ pg = GETBINTERNAL(h, 0)->pgno;
+ mpool_put(t->bt_mp, h, 0);
+ }
+
+ /* Skip any empty pages. */
+ while (NEXTINDEX(h) == 0 && h->nextpg != P_INVALID) {
+ pg = h->nextpg;
+ mpool_put(t->bt_mp, h, 0);
+ if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL)
+ return (RET_ERROR);
+ }
+
+ if (NEXTINDEX(h) == 0) {
+ mpool_put(t->bt_mp, h, 0);
+ return (RET_SPECIAL);
+ }
+
+ ep->page = h;
+ ep->index = 0;
+ break;
+ case R_LAST: /* Last record. */
+ case R_PREV:
+ /* Walk down the right-hand side of the tree. */
+ for (pg = P_ROOT;;) {
+ if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL)
+ return (RET_ERROR);
+ if (h->flags & (P_BLEAF | P_RLEAF))
+ break;
+ pg = GETBINTERNAL(h, NEXTINDEX(h) - 1)->pgno;
+ mpool_put(t->bt_mp, h, 0);
+ }
+
+ /* Skip any empty pages. */
+ while (NEXTINDEX(h) == 0 && h->prevpg != P_INVALID) {
+ pg = h->prevpg;
+ mpool_put(t->bt_mp, h, 0);
+ if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL)
+ return (RET_ERROR);
+ }
+
+ if (NEXTINDEX(h) == 0) {
+ mpool_put(t->bt_mp, h, 0);
+ return (RET_SPECIAL);
+ }
+
+ ep->page = h;
+ ep->index = NEXTINDEX(h) - 1;
+ break;
+ }
+ return (RET_SUCCESS);
+}
+
+/*
+ * BT_SEQADVANCE -- Advance the sequential scan.
+ *
+ * Parameters:
+ * t: tree
+ * flags: R_NEXT, R_PREV
+ *
+ * Side effects:
+ * Pins the page the new key/data record is on.
+ *
+ * Returns:
+ * RET_ERROR, RET_SUCCESS or RET_SPECIAL if there's no next key.
+ */
+static int
+bt_seqadv(t, e, flags)
+ BTREE *t;
+ EPG *e;
+ int flags;
+{
+ EPGNO *c, delc;
+ PAGE *h;
+ indx_t index;
+ pgno_t pg;
+
+ /* Save the current cursor if going to delete it. */
+ c = &t->bt_bcursor;
+ if (ISSET(t, B_DELCRSR))
+ delc = *c;
+
+ if ((h = mpool_get(t->bt_mp, c->pgno, 0)) == NULL)
+ return (RET_ERROR);
+
+ /*
+ * Find the next/previous record in the tree and point the cursor at it.
+ * The cursor may not be moved until a new key has been found.
+ */
+ index = c->index;
+ switch(flags) {
+ case R_NEXT: /* Next record. */
+ if (++index == NEXTINDEX(h)) {
+ do {
+ pg = h->nextpg;
+ mpool_put(t->bt_mp, h, 0);
+ if (pg == P_INVALID)
+ return (RET_SPECIAL);
+ if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL)
+ return (RET_ERROR);
+ } while (NEXTINDEX(h) == 0);
+ index = 0;
+ }
+ break;
+ case R_PREV: /* Previous record. */
+ if (index-- == 0) {
+ do {
+ pg = h->prevpg;
+ mpool_put(t->bt_mp, h, 0);
+ if (pg == P_INVALID)
+ return (RET_SPECIAL);
+ if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL)
+ return (RET_ERROR);
+ } while (NEXTINDEX(h) == 0);
+ index = NEXTINDEX(h) - 1;
+ }
+ break;
+ }
+
+ e->page = h;
+ e->index = index;
+
+ /*
+ * Delete any already deleted record that we've been saving because the
+ * cursor pointed to it. This could cause the new index to be shifted
+ * down by one if the record we're deleting is on the same page and has
+ * a larger index.
+ */
+ if (ISSET(t, B_DELCRSR)) {
+ CLR(t, B_DELCRSR); /* Don't try twice. */
+ if (c->pgno == delc.pgno && c->index > delc.index)
+ --c->index;
+ if (__bt_crsrdel(t, &delc))
+ return (RET_ERROR);
+ }
+ return (RET_SUCCESS);
+}
+
+/*
+ * __BT_CRSRDEL -- Delete the record referenced by the cursor.
+ *
+ * Parameters:
+ * t: tree
+ *
+ * Returns:
+ * RET_ERROR, RET_SUCCESS
+ */
+int
+__bt_crsrdel(t, c)
+ BTREE *t;
+ EPGNO *c;
+{
+ PAGE *h;
+ int status;
+
+ CLR(t, B_DELCRSR); /* Don't try twice. */
+ if ((h = mpool_get(t->bt_mp, c->pgno, 0)) == NULL)
+ return (RET_ERROR);
+ status = __bt_dleaf(t, h, c->index);
+ mpool_put(t->bt_mp, h, MPOOL_DIRTY);
+ return (status);
+}
diff --git a/lib/libc/db/btree/bt_split.c b/lib/libc/db/btree/bt_split.c
new file mode 100644
index 00000000000..22334c1e430
--- /dev/null
+++ b/lib/libc/db/btree/bt_split.c
@@ -0,0 +1,833 @@
+/* $NetBSD: bt_split.c,v 1.5 1995/02/27 13:20:55 cgd Exp $ */
+
+/*-
+ * Copyright (c) 1990, 1993, 1994
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Mike Olson.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+#if 0
+static char sccsid[] = "@(#)bt_split.c 8.6 (Berkeley) 6/16/94";
+#else
+static char rcsid[] = "$NetBSD: bt_split.c,v 1.5 1995/02/27 13:20:55 cgd Exp $";
+#endif
+#endif /* LIBC_SCCS and not lint */
+
+#include <sys/types.h>
+
+#include <limits.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <db.h>
+#include "btree.h"
+
+static int bt_broot __P((BTREE *, PAGE *, PAGE *, PAGE *));
+static PAGE *bt_page
+ __P((BTREE *, PAGE *, PAGE **, PAGE **, indx_t *, size_t));
+static int bt_preserve __P((BTREE *, pgno_t));
+static PAGE *bt_psplit
+ __P((BTREE *, PAGE *, PAGE *, PAGE *, indx_t *, size_t));
+static PAGE *bt_root
+ __P((BTREE *, PAGE *, PAGE **, PAGE **, indx_t *, size_t));
+static int bt_rroot __P((BTREE *, PAGE *, PAGE *, PAGE *));
+static recno_t rec_total __P((PAGE *));
+
+#ifdef STATISTICS
+u_long bt_rootsplit, bt_split, bt_sortsplit, bt_pfxsaved;
+#endif
+
+/*
+ * __BT_SPLIT -- Split the tree.
+ *
+ * Parameters:
+ * t: tree
+ * sp: page to split
+ * key: key to insert
+ * data: data to insert
+ * flags: BIGKEY/BIGDATA flags
+ * ilen: insert length
+ * skip: index to leave open
+ *
+ * Returns:
+ * RET_ERROR, RET_SUCCESS
+ */
+int
+__bt_split(t, sp, key, data, flags, ilen, argskip)
+ BTREE *t;
+ PAGE *sp;
+ const DBT *key, *data;
+ int flags;
+ size_t ilen;
+ u_int32_t argskip;
+{
+ BINTERNAL *bi;
+ BLEAF *bl, *tbl;
+ DBT a, b;
+ EPGNO *parent;
+ PAGE *h, *l, *r, *lchild, *rchild;
+ indx_t nxtindex;
+ u_int16_t skip;
+ u_int32_t n, nbytes, nksize;
+ int parentsplit;
+ char *dest;
+
+ /*
+ * Split the page into two pages, l and r. The split routines return
+ * a pointer to the page into which the key should be inserted and with
+ * skip set to the offset which should be used. Additionally, l and r
+ * are pinned.
+ */
+ skip = argskip;
+ h = sp->pgno == P_ROOT ?
+ bt_root(t, sp, &l, &r, &skip, ilen) :
+ bt_page(t, sp, &l, &r, &skip, ilen);
+ if (h == NULL)
+ return (RET_ERROR);
+
+ /*
+ * Insert the new key/data pair into the leaf page. (Key inserts
+ * always cause a leaf page to split first.)
+ */
+ h->linp[skip] = h->upper -= ilen;
+ dest = (char *)h + h->upper;
+ if (ISSET(t, R_RECNO))
+ WR_RLEAF(dest, data, flags)
+ else
+ WR_BLEAF(dest, key, data, flags)
+
+ /* If the root page was split, make it look right. */
+ if (sp->pgno == P_ROOT &&
+ (ISSET(t, R_RECNO) ?
+ bt_rroot(t, sp, l, r) : bt_broot(t, sp, l, r)) == RET_ERROR)
+ goto err2;
+
+ /*
+ * Now we walk the parent page stack -- a LIFO stack of the pages that
+ * were traversed when we searched for the page that split. Each stack
+ * entry is a page number and a page index offset. The offset is for
+ * the page traversed on the search. We've just split a page, so we
+ * have to insert a new key into the parent page.
+ *
+ * If the insert into the parent page causes it to split, may have to
+ * continue splitting all the way up the tree. We stop if the root
+ * splits or the page inserted into didn't have to split to hold the
+ * new key. Some algorithms replace the key for the old page as well
+ * as the new page. We don't, as there's no reason to believe that the
+ * first key on the old page is any better than the key we have, and,
+ * in the case of a key being placed at index 0 causing the split, the
+ * key is unavailable.
+ *
+ * There are a maximum of 5 pages pinned at any time. We keep the left
+ * and right pages pinned while working on the parent. The 5 are the
+ * two children, left parent and right parent (when the parent splits)
+ * and the root page or the overflow key page when calling bt_preserve.
+ * This code must make sure that all pins are released other than the
+ * root page or overflow page which is unlocked elsewhere.
+ */
+ while ((parent = BT_POP(t)) != NULL) {
+ lchild = l;
+ rchild = r;
+
+ /* Get the parent page. */
+ if ((h = mpool_get(t->bt_mp, parent->pgno, 0)) == NULL)
+ goto err2;
+
+ /*
+ * The new key goes ONE AFTER the index, because the split
+ * was to the right.
+ */
+ skip = parent->index + 1;
+
+ /*
+ * Calculate the space needed on the parent page.
+ *
+ * Prefix trees: space hack when inserting into BINTERNAL
+ * pages. Retain only what's needed to distinguish between
+ * the new entry and the LAST entry on the page to its left.
+ * If the keys compare equal, retain the entire key. Note,
+ * we don't touch overflow keys, and the entire key must be
+ * retained for the next-to-left most key on the leftmost
+ * page of each level, or the search will fail. Applicable
+ * ONLY to internal pages that have leaf pages as children.
+ * Further reduction of the key between pairs of internal
+ * pages loses too much information.
+ */
+ switch (rchild->flags & P_TYPE) {
+ case P_BINTERNAL:
+ bi = GETBINTERNAL(rchild, 0);
+ nbytes = NBINTERNAL(bi->ksize);
+ break;
+ case P_BLEAF:
+ bl = GETBLEAF(rchild, 0);
+ nbytes = NBINTERNAL(bl->ksize);
+ if (t->bt_pfx && !(bl->flags & P_BIGKEY) &&
+ (h->prevpg != P_INVALID || skip > 1)) {
+ tbl = GETBLEAF(lchild, NEXTINDEX(lchild) - 1);
+ a.size = tbl->ksize;
+ a.data = tbl->bytes;
+ b.size = bl->ksize;
+ b.data = bl->bytes;
+ nksize = t->bt_pfx(&a, &b);
+ n = NBINTERNAL(nksize);
+ if (n < nbytes) {
+#ifdef STATISTICS
+ bt_pfxsaved += nbytes - n;
+#endif
+ nbytes = n;
+ } else
+ nksize = 0;
+ } else
+ nksize = 0;
+ break;
+ case P_RINTERNAL:
+ case P_RLEAF:
+ nbytes = NRINTERNAL;
+ break;
+ default:
+ abort();
+ }
+
+ /* Split the parent page if necessary or shift the indices. */
+ if (h->upper - h->lower < nbytes + sizeof(indx_t)) {
+ sp = h;
+ h = h->pgno == P_ROOT ?
+ bt_root(t, h, &l, &r, &skip, nbytes) :
+ bt_page(t, h, &l, &r, &skip, nbytes);
+ if (h == NULL)
+ goto err1;
+ parentsplit = 1;
+ } else {
+ if (skip < (nxtindex = NEXTINDEX(h)))
+ memmove(h->linp + skip + 1, h->linp + skip,
+ (nxtindex - skip) * sizeof(indx_t));
+ h->lower += sizeof(indx_t);
+ parentsplit = 0;
+ }
+
+ /* Insert the key into the parent page. */
+ switch(rchild->flags & P_TYPE) {
+ case P_BINTERNAL:
+ h->linp[skip] = h->upper -= nbytes;
+ dest = (char *)h + h->linp[skip];
+ memmove(dest, bi, nbytes);
+ ((BINTERNAL *)dest)->pgno = rchild->pgno;
+ break;
+ case P_BLEAF:
+ h->linp[skip] = h->upper -= nbytes;
+ dest = (char *)h + h->linp[skip];
+ WR_BINTERNAL(dest, nksize ? nksize : bl->ksize,
+ rchild->pgno, bl->flags & P_BIGKEY);
+ memmove(dest, bl->bytes, nksize ? nksize : bl->ksize);
+ if (bl->flags & P_BIGKEY &&
+ bt_preserve(t, *(pgno_t *)bl->bytes) == RET_ERROR)
+ goto err1;
+ break;
+ case P_RINTERNAL:
+ /*
+ * Update the left page count. If split
+ * added at index 0, fix the correct page.
+ */
+ if (skip > 0)
+ dest = (char *)h + h->linp[skip - 1];
+ else
+ dest = (char *)l + l->linp[NEXTINDEX(l) - 1];
+ ((RINTERNAL *)dest)->nrecs = rec_total(lchild);
+ ((RINTERNAL *)dest)->pgno = lchild->pgno;
+
+ /* Update the right page count. */
+ h->linp[skip] = h->upper -= nbytes;
+ dest = (char *)h + h->linp[skip];
+ ((RINTERNAL *)dest)->nrecs = rec_total(rchild);
+ ((RINTERNAL *)dest)->pgno = rchild->pgno;
+ break;
+ case P_RLEAF:
+ /*
+ * Update the left page count. If split
+ * added at index 0, fix the correct page.
+ */
+ if (skip > 0)
+ dest = (char *)h + h->linp[skip - 1];
+ else
+ dest = (char *)l + l->linp[NEXTINDEX(l) - 1];
+ ((RINTERNAL *)dest)->nrecs = NEXTINDEX(lchild);
+ ((RINTERNAL *)dest)->pgno = lchild->pgno;
+
+ /* Update the right page count. */
+ h->linp[skip] = h->upper -= nbytes;
+ dest = (char *)h + h->linp[skip];
+ ((RINTERNAL *)dest)->nrecs = NEXTINDEX(rchild);
+ ((RINTERNAL *)dest)->pgno = rchild->pgno;
+ break;
+ default:
+ abort();
+ }
+
+ /* Unpin the held pages. */
+ if (!parentsplit) {
+ mpool_put(t->bt_mp, h, MPOOL_DIRTY);
+ break;
+ }
+
+ /* If the root page was split, make it look right. */
+ if (sp->pgno == P_ROOT &&
+ (ISSET(t, R_RECNO) ?
+ bt_rroot(t, sp, l, r) : bt_broot(t, sp, l, r)) == RET_ERROR)
+ goto err1;
+
+ mpool_put(t->bt_mp, lchild, MPOOL_DIRTY);
+ mpool_put(t->bt_mp, rchild, MPOOL_DIRTY);
+ }
+
+ /* Unpin the held pages. */
+ mpool_put(t->bt_mp, l, MPOOL_DIRTY);
+ mpool_put(t->bt_mp, r, MPOOL_DIRTY);
+
+ /* Clear any pages left on the stack. */
+ return (RET_SUCCESS);
+
+ /*
+ * If something fails in the above loop we were already walking back
+ * up the tree and the tree is now inconsistent. Nothing much we can
+ * do about it but release any memory we're holding.
+ */
+err1: mpool_put(t->bt_mp, lchild, MPOOL_DIRTY);
+ mpool_put(t->bt_mp, rchild, MPOOL_DIRTY);
+
+err2: mpool_put(t->bt_mp, l, 0);
+ mpool_put(t->bt_mp, r, 0);
+ __dbpanic(t->bt_dbp);
+ return (RET_ERROR);
+}
+
+/*
+ * BT_PAGE -- Split a non-root page of a btree.
+ *
+ * Parameters:
+ * t: tree
+ * h: root page
+ * lp: pointer to left page pointer
+ * rp: pointer to right page pointer
+ * skip: pointer to index to leave open
+ * ilen: insert length
+ *
+ * Returns:
+ * Pointer to page in which to insert or NULL on error.
+ */
+static PAGE *
+bt_page(t, h, lp, rp, skip, ilen)
+ BTREE *t;
+ PAGE *h, **lp, **rp;
+ indx_t *skip;
+ size_t ilen;
+{
+ PAGE *l, *r, *tp;
+ pgno_t npg;
+
+#ifdef STATISTICS
+ ++bt_split;
+#endif
+ /* Put the new right page for the split into place. */
+ if ((r = __bt_new(t, &npg)) == NULL)
+ return (NULL);
+ r->pgno = npg;
+ r->lower = BTDATAOFF;
+ r->upper = t->bt_psize;
+ r->nextpg = h->nextpg;
+ r->prevpg = h->pgno;
+ r->flags = h->flags & P_TYPE;
+
+ /*
+ * If we're splitting the last page on a level because we're appending
+ * a key to it (skip is NEXTINDEX()), it's likely that the data is
+ * sorted. Adding an empty page on the side of the level is less work
+ * and can push the fill factor much higher than normal. If we're
+ * wrong it's no big deal, we'll just do the split the right way next
+ * time. It may look like it's equally easy to do a similar hack for
+ * reverse sorted data, that is, split the tree left, but it's not.
+ * Don't even try.
+ */
+ if (h->nextpg == P_INVALID && *skip == NEXTINDEX(h)) {
+#ifdef STATISTICS
+ ++bt_sortsplit;
+#endif
+ h->nextpg = r->pgno;
+ r->lower = BTDATAOFF + sizeof(indx_t);
+ *skip = 0;
+ *lp = h;
+ *rp = r;
+ return (r);
+ }
+
+ /* Put the new left page for the split into place. */
+ if ((l = (PAGE *)malloc(t->bt_psize)) == NULL) {
+ mpool_put(t->bt_mp, r, 0);
+ return (NULL);
+ }
+ l->pgno = h->pgno;
+ l->nextpg = r->pgno;
+ l->prevpg = h->prevpg;
+ l->lower = BTDATAOFF;
+ l->upper = t->bt_psize;
+ l->flags = h->flags & P_TYPE;
+
+ /* Fix up the previous pointer of the page after the split page. */
+ if (h->nextpg != P_INVALID) {
+ if ((tp = mpool_get(t->bt_mp, h->nextpg, 0)) == NULL) {
+ free(l);
+ /* XXX mpool_free(t->bt_mp, r->pgno); */
+ return (NULL);
+ }
+ tp->prevpg = r->pgno;
+ mpool_put(t->bt_mp, tp, 0);
+ }
+
+ /*
+ * Split right. The key/data pairs aren't sorted in the btree page so
+ * it's simpler to copy the data from the split page onto two new pages
+ * instead of copying half the data to the right page and compacting
+ * the left page in place. Since the left page can't change, we have
+ * to swap the original and the allocated left page after the split.
+ */
+ tp = bt_psplit(t, h, l, r, skip, ilen);
+
+ /* Move the new left page onto the old left page. */
+ memmove(h, l, t->bt_psize);
+ if (tp == l)
+ tp = h;
+ free(l);
+
+ *lp = h;
+ *rp = r;
+ return (tp);
+}
+
+/*
+ * BT_ROOT -- Split the root page of a btree.
+ *
+ * Parameters:
+ * t: tree
+ * h: root page
+ * lp: pointer to left page pointer
+ * rp: pointer to right page pointer
+ * skip: pointer to index to leave open
+ * ilen: insert length
+ *
+ * Returns:
+ * Pointer to page in which to insert or NULL on error.
+ */
+static PAGE *
+bt_root(t, h, lp, rp, skip, ilen)
+ BTREE *t;
+ PAGE *h, **lp, **rp;
+ indx_t *skip;
+ size_t ilen;
+{
+ PAGE *l, *r, *tp;
+ pgno_t lnpg, rnpg;
+
+#ifdef STATISTICS
+ ++bt_split;
+ ++bt_rootsplit;
+#endif
+ /* Put the new left and right pages for the split into place. */
+ if ((l = __bt_new(t, &lnpg)) == NULL ||
+ (r = __bt_new(t, &rnpg)) == NULL)
+ return (NULL);
+ l->pgno = lnpg;
+ r->pgno = rnpg;
+ l->nextpg = r->pgno;
+ r->prevpg = l->pgno;
+ l->prevpg = r->nextpg = P_INVALID;
+ l->lower = r->lower = BTDATAOFF;
+ l->upper = r->upper = t->bt_psize;
+ l->flags = r->flags = h->flags & P_TYPE;
+
+ /* Split the root page. */
+ tp = bt_psplit(t, h, l, r, skip, ilen);
+
+ *lp = l;
+ *rp = r;
+ return (tp);
+}
+
+/*
+ * BT_RROOT -- Fix up the recno root page after it has been split.
+ *
+ * Parameters:
+ * t: tree
+ * h: root page
+ * l: left page
+ * r: right page
+ *
+ * Returns:
+ * RET_ERROR, RET_SUCCESS
+ */
+static int
+bt_rroot(t, h, l, r)
+ BTREE *t;
+ PAGE *h, *l, *r;
+{
+ char *dest;
+
+ /* Insert the left and right keys, set the header information. */
+ h->linp[0] = h->upper = t->bt_psize - NRINTERNAL;
+ dest = (char *)h + h->upper;
+ WR_RINTERNAL(dest,
+ l->flags & P_RLEAF ? NEXTINDEX(l) : rec_total(l), l->pgno);
+
+ h->linp[1] = h->upper -= NRINTERNAL;
+ dest = (char *)h + h->upper;
+ WR_RINTERNAL(dest,
+ r->flags & P_RLEAF ? NEXTINDEX(r) : rec_total(r), r->pgno);
+
+ h->lower = BTDATAOFF + 2 * sizeof(indx_t);
+
+ /* Unpin the root page, set to recno internal page. */
+ h->flags &= ~P_TYPE;
+ h->flags |= P_RINTERNAL;
+ mpool_put(t->bt_mp, h, MPOOL_DIRTY);
+
+ return (RET_SUCCESS);
+}
+
+/*
+ * BT_BROOT -- Fix up the btree root page after it has been split.
+ *
+ * Parameters:
+ * t: tree
+ * h: root page
+ * l: left page
+ * r: right page
+ *
+ * Returns:
+ * RET_ERROR, RET_SUCCESS
+ */
+static int
+bt_broot(t, h, l, r)
+ BTREE *t;
+ PAGE *h, *l, *r;
+{
+ BINTERNAL *bi;
+ BLEAF *bl;
+ u_int32_t nbytes;
+ char *dest;
+
+ /*
+ * If the root page was a leaf page, change it into an internal page.
+ * We copy the key we split on (but not the key's data, in the case of
+ * a leaf page) to the new root page.
+ *
+ * The btree comparison code guarantees that the left-most key on any
+ * level of the tree is never used, so it doesn't need to be filled in.
+ */
+ nbytes = NBINTERNAL(0);
+ h->linp[0] = h->upper = t->bt_psize - nbytes;
+ dest = (char *)h + h->upper;
+ WR_BINTERNAL(dest, 0, l->pgno, 0);
+
+ switch(h->flags & P_TYPE) {
+ case P_BLEAF:
+ bl = GETBLEAF(r, 0);
+ nbytes = NBINTERNAL(bl->ksize);
+ h->linp[1] = h->upper -= nbytes;
+ dest = (char *)h + h->upper;
+ WR_BINTERNAL(dest, bl->ksize, r->pgno, 0);
+ memmove(dest, bl->bytes, bl->ksize);
+
+ /*
+ * If the key is on an overflow page, mark the overflow chain
+ * so it isn't deleted when the leaf copy of the key is deleted.
+ */
+ if (bl->flags & P_BIGKEY &&
+ bt_preserve(t, *(pgno_t *)bl->bytes) == RET_ERROR)
+ return (RET_ERROR);
+ break;
+ case P_BINTERNAL:
+ bi = GETBINTERNAL(r, 0);
+ nbytes = NBINTERNAL(bi->ksize);
+ h->linp[1] = h->upper -= nbytes;
+ dest = (char *)h + h->upper;
+ memmove(dest, bi, nbytes);
+ ((BINTERNAL *)dest)->pgno = r->pgno;
+ break;
+ default:
+ abort();
+ }
+
+ /* There are two keys on the page. */
+ h->lower = BTDATAOFF + 2 * sizeof(indx_t);
+
+ /* Unpin the root page, set to btree internal page. */
+ h->flags &= ~P_TYPE;
+ h->flags |= P_BINTERNAL;
+ mpool_put(t->bt_mp, h, MPOOL_DIRTY);
+
+ return (RET_SUCCESS);
+}
+
+/*
+ * BT_PSPLIT -- Do the real work of splitting the page.
+ *
+ * Parameters:
+ * t: tree
+ * h: page to be split
+ * l: page to put lower half of data
+ * r: page to put upper half of data
+ * pskip: pointer to index to leave open
+ * ilen: insert length
+ *
+ * Returns:
+ * Pointer to page in which to insert.
+ */
+static PAGE *
+bt_psplit(t, h, l, r, pskip, ilen)
+ BTREE *t;
+ PAGE *h, *l, *r;
+ indx_t *pskip;
+ size_t ilen;
+{
+ BINTERNAL *bi;
+ BLEAF *bl;
+ RLEAF *rl;
+ EPGNO *c;
+ PAGE *rval;
+ void *src;
+ indx_t full, half, nxt, off, skip, top, used;
+ u_int32_t nbytes;
+ int bigkeycnt, isbigkey;
+
+ /*
+ * Split the data to the left and right pages. Leave the skip index
+ * open. Additionally, make some effort not to split on an overflow
+ * key. This makes internal page processing faster and can save
+ * space as overflow keys used by internal pages are never deleted.
+ */
+ bigkeycnt = 0;
+ skip = *pskip;
+ full = t->bt_psize - BTDATAOFF;
+ half = full / 2;
+ used = 0;
+ for (nxt = off = 0, top = NEXTINDEX(h); nxt < top; ++off) {
+ if (skip == off) {
+ nbytes = ilen;
+ isbigkey = 0; /* XXX: not really known. */
+ } else
+ switch (h->flags & P_TYPE) {
+ case P_BINTERNAL:
+ src = bi = GETBINTERNAL(h, nxt);
+ nbytes = NBINTERNAL(bi->ksize);
+ isbigkey = bi->flags & P_BIGKEY;
+ break;
+ case P_BLEAF:
+ src = bl = GETBLEAF(h, nxt);
+ nbytes = NBLEAF(bl);
+ isbigkey = bl->flags & P_BIGKEY;
+ break;
+ case P_RINTERNAL:
+ src = GETRINTERNAL(h, nxt);
+ nbytes = NRINTERNAL;
+ isbigkey = 0;
+ break;
+ case P_RLEAF:
+ src = rl = GETRLEAF(h, nxt);
+ nbytes = NRLEAF(rl);
+ isbigkey = 0;
+ break;
+ default:
+ abort();
+ }
+
+ /*
+ * If the key/data pairs are substantial fractions of the max
+ * possible size for the page, it's possible to get situations
+ * where we decide to try and copy too much onto the left page.
+ * Make sure that doesn't happen.
+ */
+ if (skip <= off && used + nbytes >= full) {
+ --off;
+ break;
+ }
+
+ /* Copy the key/data pair, if not the skipped index. */
+ if (skip != off) {
+ ++nxt;
+
+ l->linp[off] = l->upper -= nbytes;
+ memmove((char *)l + l->upper, src, nbytes);
+ }
+
+ used += nbytes;
+ if (used >= half) {
+ if (!isbigkey || bigkeycnt == 3)
+ break;
+ else
+ ++bigkeycnt;
+ }
+ }
+
+ /*
+ * Off is the last offset that's valid for the left page.
+ * Nxt is the first offset to be placed on the right page.
+ */
+ l->lower += (off + 1) * sizeof(indx_t);
+
+ /*
+ * If splitting the page that the cursor was on, the cursor has to be
+ * adjusted to point to the same record as before the split. If the
+ * cursor is at or past the skipped slot, the cursor is incremented by
+ * one. If the cursor is on the right page, it is decremented by the
+ * number of records split to the left page.
+ *
+ * Don't bother checking for the B_SEQINIT flag, the page number will
+ * be P_INVALID.
+ */
+ c = &t->bt_bcursor;
+ if (c->pgno == h->pgno) {
+ if (c->index >= skip)
+ ++c->index;
+ if (c->index < nxt) /* Left page. */
+ c->pgno = l->pgno;
+ else { /* Right page. */
+ c->pgno = r->pgno;
+ c->index -= nxt;
+ }
+ }
+
+ /*
+ * If the skipped index was on the left page, just return that page.
+ * Otherwise, adjust the skip index to reflect the new position on
+ * the right page.
+ */
+ if (skip <= off) {
+ skip = 0;
+ rval = l;
+ } else {
+ rval = r;
+ *pskip -= nxt;
+ }
+
+ for (off = 0; nxt < top; ++off) {
+ if (skip == nxt) {
+ ++off;
+ skip = 0;
+ }
+ switch (h->flags & P_TYPE) {
+ case P_BINTERNAL:
+ src = bi = GETBINTERNAL(h, nxt);
+ nbytes = NBINTERNAL(bi->ksize);
+ break;
+ case P_BLEAF:
+ src = bl = GETBLEAF(h, nxt);
+ nbytes = NBLEAF(bl);
+ break;
+ case P_RINTERNAL:
+ src = GETRINTERNAL(h, nxt);
+ nbytes = NRINTERNAL;
+ break;
+ case P_RLEAF:
+ src = rl = GETRLEAF(h, nxt);
+ nbytes = NRLEAF(rl);
+ break;
+ default:
+ abort();
+ }
+ ++nxt;
+ r->linp[off] = r->upper -= nbytes;
+ memmove((char *)r + r->upper, src, nbytes);
+ }
+ r->lower += off * sizeof(indx_t);
+
+ /* If the key is being appended to the page, adjust the index. */
+ if (skip == top)
+ r->lower += sizeof(indx_t);
+
+ return (rval);
+}
+
+/*
+ * BT_PRESERVE -- Mark a chain of pages as used by an internal node.
+ *
+ * Chains of indirect blocks pointed to by leaf nodes get reclaimed when the
+ * record that references them gets deleted. Chains pointed to by internal
+ * pages never get deleted. This routine marks a chain as pointed to by an
+ * internal page.
+ *
+ * Parameters:
+ * t: tree
+ * pg: page number of first page in the chain.
+ *
+ * Returns:
+ * RET_SUCCESS, RET_ERROR.
+ */
+static int
+bt_preserve(t, pg)
+ BTREE *t;
+ pgno_t pg;
+{
+ PAGE *h;
+
+ if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL)
+ return (RET_ERROR);
+ h->flags |= P_PRESERVE;
+ mpool_put(t->bt_mp, h, MPOOL_DIRTY);
+ return (RET_SUCCESS);
+}
+
+/*
+ * REC_TOTAL -- Return the number of recno entries below a page.
+ *
+ * Parameters:
+ * h: page
+ *
+ * Returns:
+ * The number of recno entries below a page.
+ *
+ * XXX
+ * These values could be set by the bt_psplit routine. The problem is that the
+ * entry has to be popped off of the stack etc. or the values have to be passed
+ * all the way back to bt_split/bt_rroot and it's not very clean.
+ */
+static recno_t
+rec_total(h)
+ PAGE *h;
+{
+ recno_t recs;
+ indx_t nxt, top;
+
+ for (recs = 0, nxt = 0, top = NEXTINDEX(h); nxt < top; ++nxt)
+ recs += GETRINTERNAL(h, nxt)->nrecs;
+ return (recs);
+}
diff --git a/lib/libc/db/btree/bt_stack.c b/lib/libc/db/btree/bt_stack.c
new file mode 100644
index 00000000000..7199ae9b090
--- /dev/null
+++ b/lib/libc/db/btree/bt_stack.c
@@ -0,0 +1,102 @@
+/* $NetBSD: bt_stack.c,v 1.5 1995/02/27 13:21:01 cgd Exp $ */
+
+/*-
+ * Copyright (c) 1990, 1993, 1994
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Mike Olson.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+#if 0
+static char sccsid[] = "@(#)bt_stack.c 8.4 (Berkeley) 6/20/94";
+#else
+static char rcsid[] = "$NetBSD: bt_stack.c,v 1.5 1995/02/27 13:21:01 cgd Exp $";
+#endif
+#endif /* LIBC_SCCS and not lint */
+
+#include <sys/types.h>
+
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include <db.h>
+#include "btree.h"
+
+/*
+ * When a page splits, a new record has to be inserted into its parent page.
+ * This page may have to split as well, all the way up to the root. Since
+ * parent pointers in each page would be expensive, we maintain a stack of
+ * parent pages as we descend the tree.
+ *
+ * XXX
+ * This is a concurrency problem -- if user a builds a stack, then user b
+ * splits the tree, then user a tries to split the tree, there's a new level
+ * in the tree that user a doesn't know about.
+ */
+
+/*
+ * __BT_PUSH -- Push parent page info onto the stack (LIFO).
+ *
+ * Parameters:
+ * t: tree
+ * pgno: page
+ * index: page index
+ *
+ * Returns:
+ * RET_ERROR, RET_SUCCESS
+ */
+int
+__bt_push(t, pgno, index)
+ BTREE *t;
+ pgno_t pgno;
+ indx_t index;
+{
+ size_t sz;
+
+ if (t->bt_sp == t->bt_maxstack) {
+ t->bt_maxstack += 50;
+ sz = t->bt_maxstack * sizeof(EPGNO);
+ t->bt_stack = (EPGNO *)(t->bt_stack == NULL ?
+ malloc(sz) : realloc(t->bt_stack, sz));
+ if (t->bt_stack == NULL) {
+ t->bt_maxstack -= 50;
+ return (RET_ERROR);
+ }
+ }
+
+ t->bt_stack[t->bt_sp].pgno = pgno;
+ t->bt_stack[t->bt_sp].index = index;
+ ++t->bt_sp;
+ return (RET_SUCCESS);
+}
diff --git a/lib/libc/db/btree/bt_utils.c b/lib/libc/db/btree/bt_utils.c
new file mode 100644
index 00000000000..f9acf0ab8a1
--- /dev/null
+++ b/lib/libc/db/btree/bt_utils.c
@@ -0,0 +1,256 @@
+/* $NetBSD: bt_utils.c,v 1.6 1995/02/27 13:21:04 cgd Exp $ */
+
+/*-
+ * Copyright (c) 1990, 1993, 1994
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Mike Olson.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+#if 0
+static char sccsid[] = "@(#)bt_utils.c 8.5 (Berkeley) 6/20/94";
+#else
+static char rcsid[] = "$NetBSD: bt_utils.c,v 1.6 1995/02/27 13:21:04 cgd Exp $";
+#endif
+#endif /* LIBC_SCCS and not lint */
+
+#include <sys/param.h>
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <db.h>
+#include "btree.h"
+
+/*
+ * __BT_RET -- Build return key/data pair as a result of search or scan.
+ *
+ * Parameters:
+ * t: tree
+ * d: LEAF to be returned to the user.
+ * key: user's key structure (NULL if not to be filled in)
+ * data: user's data structure
+ *
+ * Returns:
+ * RET_SUCCESS, RET_ERROR.
+ */
+int
+__bt_ret(t, e, key, data)
+ BTREE *t;
+ EPG *e;
+ DBT *key, *data;
+{
+ register BLEAF *bl;
+ register void *p;
+
+ bl = GETBLEAF(e->page, e->index);
+
+ /*
+ * We always copy big keys/data to make them contigous. Otherwise,
+ * we leave the page pinned and don't copy unless the user specified
+ * concurrent access.
+ */
+ if (bl->flags & P_BIGDATA) {
+ if (__ovfl_get(t, bl->bytes + bl->ksize,
+ &data->size, &t->bt_dbuf, &t->bt_dbufsz))
+ return (RET_ERROR);
+ data->data = t->bt_dbuf;
+ } else if (ISSET(t, B_DB_LOCK)) {
+ /* Use +1 in case the first record retrieved is 0 length. */
+ if (bl->dsize + 1 > t->bt_dbufsz) {
+ p = (void *)(t->bt_dbuf == NULL ?
+ malloc(bl->dsize + 1) :
+ realloc(t->bt_dbuf, bl->dsize + 1));
+ if (p == NULL)
+ return (RET_ERROR);
+ t->bt_dbuf = p;
+ t->bt_dbufsz = bl->dsize + 1;
+ }
+ memmove(t->bt_dbuf, bl->bytes + bl->ksize, bl->dsize);
+ data->size = bl->dsize;
+ data->data = t->bt_dbuf;
+ } else {
+ data->size = bl->dsize;
+ data->data = bl->bytes + bl->ksize;
+ }
+
+ if (key == NULL)
+ return (RET_SUCCESS);
+
+ if (bl->flags & P_BIGKEY) {
+ if (__ovfl_get(t, bl->bytes,
+ &key->size, &t->bt_kbuf, &t->bt_kbufsz))
+ return (RET_ERROR);
+ key->data = t->bt_kbuf;
+ } else if (ISSET(t, B_DB_LOCK)) {
+ if (bl->ksize > t->bt_kbufsz) {
+ p = (void *)(t->bt_kbuf == NULL ?
+ malloc(bl->ksize) : realloc(t->bt_kbuf, bl->ksize));
+ if (p == NULL)
+ return (RET_ERROR);
+ t->bt_kbuf = p;
+ t->bt_kbufsz = bl->ksize;
+ }
+ memmove(t->bt_kbuf, bl->bytes, bl->ksize);
+ key->size = bl->ksize;
+ key->data = t->bt_kbuf;
+ } else {
+ key->size = bl->ksize;
+ key->data = bl->bytes;
+ }
+ return (RET_SUCCESS);
+}
+
+/*
+ * __BT_CMP -- Compare a key to a given record.
+ *
+ * Parameters:
+ * t: tree
+ * k1: DBT pointer of first arg to comparison
+ * e: pointer to EPG for comparison
+ *
+ * Returns:
+ * < 0 if k1 is < record
+ * = 0 if k1 is = record
+ * > 0 if k1 is > record
+ */
+int
+__bt_cmp(t, k1, e)
+ BTREE *t;
+ const DBT *k1;
+ EPG *e;
+{
+ BINTERNAL *bi;
+ BLEAF *bl;
+ DBT k2;
+ PAGE *h;
+ void *bigkey;
+
+ /*
+ * The left-most key on internal pages, at any level of the tree, is
+ * guaranteed by the following code to be less than any user key.
+ * This saves us from having to update the leftmost key on an internal
+ * page when the user inserts a new key in the tree smaller than
+ * anything we've yet seen.
+ */
+ h = e->page;
+ if (e->index == 0 && h->prevpg == P_INVALID && !(h->flags & P_BLEAF))
+ return (1);
+
+ bigkey = NULL;
+ if (h->flags & P_BLEAF) {
+ bl = GETBLEAF(h, e->index);
+ if (bl->flags & P_BIGKEY)
+ bigkey = bl->bytes;
+ else {
+ k2.data = bl->bytes;
+ k2.size = bl->ksize;
+ }
+ } else {
+ bi = GETBINTERNAL(h, e->index);
+ if (bi->flags & P_BIGKEY)
+ bigkey = bi->bytes;
+ else {
+ k2.data = bi->bytes;
+ k2.size = bi->ksize;
+ }
+ }
+
+ if (bigkey) {
+ if (__ovfl_get(t, bigkey,
+ &k2.size, &t->bt_dbuf, &t->bt_dbufsz))
+ return (RET_ERROR);
+ k2.data = t->bt_dbuf;
+ }
+ return ((*t->bt_cmp)(k1, &k2));
+}
+
+/*
+ * __BT_DEFCMP -- Default comparison routine.
+ *
+ * Parameters:
+ * a: DBT #1
+ * b: DBT #2
+ *
+ * Returns:
+ * < 0 if a is < b
+ * = 0 if a is = b
+ * > 0 if a is > b
+ */
+int
+__bt_defcmp(a, b)
+ const DBT *a, *b;
+{
+ register size_t len;
+ register u_char *p1, *p2;
+
+ /*
+ * XXX
+ * If a size_t doesn't fit in an int, this routine can lose.
+ * What we need is a integral type which is guaranteed to be
+ * larger than a size_t, and there is no such thing.
+ */
+ len = MIN(a->size, b->size);
+ for (p1 = a->data, p2 = b->data; len--; ++p1, ++p2)
+ if (*p1 != *p2)
+ return ((int)*p1 - (int)*p2);
+ return ((int)a->size - (int)b->size);
+}
+
+/*
+ * __BT_DEFPFX -- Default prefix routine.
+ *
+ * Parameters:
+ * a: DBT #1
+ * b: DBT #2
+ *
+ * Returns:
+ * Number of bytes needed to distinguish b from a.
+ */
+size_t
+__bt_defpfx(a, b)
+ const DBT *a, *b;
+{
+ register u_char *p1, *p2;
+ register size_t cnt, len;
+
+ cnt = 1;
+ len = MIN(a->size, b->size);
+ for (p1 = a->data, p2 = b->data; len--; ++p1, ++p2, ++cnt)
+ if (*p1 != *p2)
+ return (cnt);
+
+ /* a->size must be <= b->size, or they wouldn't be in this order. */
+ return (a->size < b->size ? a->size + 1 : a->size);
+}
diff --git a/lib/libc/db/btree/btree.h b/lib/libc/db/btree/btree.h
new file mode 100644
index 00000000000..747ed93638e
--- /dev/null
+++ b/lib/libc/db/btree/btree.h
@@ -0,0 +1,355 @@
+/* $NetBSD: btree.h,v 1.8 1995/02/27 13:21:08 cgd Exp $ */
+
+/*-
+ * Copyright (c) 1991, 1993, 1994
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Mike Olson.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)btree.h 8.6 (Berkeley) 5/31/94
+ */
+
+#include <mpool.h>
+
+#define DEFMINKEYPAGE (2) /* Minimum keys per page */
+#define MINCACHE (5) /* Minimum cached pages */
+#define MINPSIZE (512) /* Minimum page size */
+
+/*
+ * Page 0 of a btree file contains a copy of the meta-data. This page is also
+ * used as an out-of-band page, i.e. page pointers that point to nowhere point
+ * to page 0. Page 1 is the root of the btree.
+ */
+#define P_INVALID 0 /* Invalid tree page number. */
+#define P_META 0 /* Tree metadata page number. */
+#define P_ROOT 1 /* Tree root page number. */
+
+/*
+ * There are five page layouts in the btree: btree internal pages (BINTERNAL),
+ * btree leaf pages (BLEAF), recno internal pages (RINTERNAL), recno leaf pages
+ * (RLEAF) and overflow pages. All five page types have a page header (PAGE).
+ * This implementation requires that values within structures NOT be padded.
+ * (ANSI C permits random padding.) If your compiler pads randomly you'll have
+ * to do some work to get this package to run.
+ */
+typedef struct _page {
+ pgno_t pgno; /* this page's page number */
+ pgno_t prevpg; /* left sibling */
+ pgno_t nextpg; /* right sibling */
+
+#define P_BINTERNAL 0x01 /* btree internal page */
+#define P_BLEAF 0x02 /* leaf page */
+#define P_OVERFLOW 0x04 /* overflow page */
+#define P_RINTERNAL 0x08 /* recno internal page */
+#define P_RLEAF 0x10 /* leaf page */
+#define P_TYPE 0x1f /* type mask */
+#define P_PRESERVE 0x20 /* never delete this chain of pages */
+ u_int32_t flags;
+
+ indx_t lower; /* lower bound of free space on page */
+ indx_t upper; /* upper bound of free space on page */
+ indx_t linp[1]; /* indx_t-aligned VAR. LENGTH DATA */
+} PAGE;
+
+/* First and next index. */
+#define BTDATAOFF (sizeof(pgno_t) + sizeof(pgno_t) + sizeof(pgno_t) + \
+ sizeof(u_int32_t) + sizeof(indx_t) + sizeof(indx_t))
+#define NEXTINDEX(p) (((p)->lower - BTDATAOFF) / sizeof(indx_t))
+
+/*
+ * For pages other than overflow pages, there is an array of offsets into the
+ * rest of the page immediately following the page header. Each offset is to
+ * an item which is unique to the type of page. The h_lower offset is just
+ * past the last filled-in index. The h_upper offset is the first item on the
+ * page. Offsets are from the beginning of the page.
+ *
+ * If an item is too big to store on a single page, a flag is set and the item
+ * is a { page, size } pair such that the page is the first page of an overflow
+ * chain with size bytes of item. Overflow pages are simply bytes without any
+ * external structure.
+ *
+ * The page number and size fields in the items are pgno_t-aligned so they can
+ * be manipulated without copying. (This presumes that 32 bit items can be
+ * manipulated on this system.)
+ */
+#define LALIGN(n) \
+ (((n) + sizeof(pgno_t) - 1) & ~(sizeof(pgno_t) - 1))
+#define NOVFLSIZE (sizeof(pgno_t) + sizeof(u_int32_t))
+
+/*
+ * For the btree internal pages, the item is a key. BINTERNALs are {key, pgno}
+ * pairs, such that the key compares less than or equal to all of the records
+ * on that page. For a tree without duplicate keys, an internal page with two
+ * consecutive keys, a and b, will have all records greater than or equal to a
+ * and less than b stored on the page associated with a. Duplicate keys are
+ * somewhat special and can cause duplicate internal and leaf page records and
+ * some minor modifications of the above rule.
+ */
+typedef struct _binternal {
+ u_int32_t ksize; /* key size */
+ pgno_t pgno; /* page number stored on */
+#define P_BIGDATA 0x01 /* overflow data */
+#define P_BIGKEY 0x02 /* overflow key */
+ u_char flags;
+ char bytes[1]; /* data */
+} BINTERNAL;
+
+/* Get the page's BINTERNAL structure at index indx. */
+#define GETBINTERNAL(pg, indx) \
+ ((BINTERNAL *)((char *)(pg) + (pg)->linp[indx]))
+
+/* Get the number of bytes in the entry. */
+#define NBINTERNAL(len) \
+ LALIGN(sizeof(u_int32_t) + sizeof(pgno_t) + sizeof(u_char) + (len))
+
+/* Copy a BINTERNAL entry to the page. */
+#define WR_BINTERNAL(p, size, pgno, flags) { \
+ *(u_int32_t *)p = size; \
+ p += sizeof(u_int32_t); \
+ *(pgno_t *)p = pgno; \
+ p += sizeof(pgno_t); \
+ *(u_char *)p = flags; \
+ p += sizeof(u_char); \
+}
+
+/*
+ * For the recno internal pages, the item is a page number with the number of
+ * keys found on that page and below.
+ */
+typedef struct _rinternal {
+ recno_t nrecs; /* number of records */
+ pgno_t pgno; /* page number stored below */
+} RINTERNAL;
+
+/* Get the page's RINTERNAL structure at index indx. */
+#define GETRINTERNAL(pg, indx) \
+ ((RINTERNAL *)((char *)(pg) + (pg)->linp[indx]))
+
+/* Get the number of bytes in the entry. */
+#define NRINTERNAL \
+ LALIGN(sizeof(recno_t) + sizeof(pgno_t))
+
+/* Copy a RINTERAL entry to the page. */
+#define WR_RINTERNAL(p, nrecs, pgno) { \
+ *(recno_t *)p = nrecs; \
+ p += sizeof(recno_t); \
+ *(pgno_t *)p = pgno; \
+}
+
+/* For the btree leaf pages, the item is a key and data pair. */
+typedef struct _bleaf {
+ u_int32_t ksize; /* size of key */
+ u_int32_t dsize; /* size of data */
+ u_char flags; /* P_BIGDATA, P_BIGKEY */
+ char bytes[1]; /* data */
+} BLEAF;
+
+/* Get the page's BLEAF structure at index indx. */
+#define GETBLEAF(pg, indx) \
+ ((BLEAF *)((char *)(pg) + (pg)->linp[indx]))
+
+/* Get the number of bytes in the entry. */
+#define NBLEAF(p) NBLEAFDBT((p)->ksize, (p)->dsize)
+
+/* Get the number of bytes in the user's key/data pair. */
+#define NBLEAFDBT(ksize, dsize) \
+ LALIGN(sizeof(u_int32_t) + sizeof(u_int32_t) + sizeof(u_char) + \
+ (ksize) + (dsize))
+
+/* Copy a BLEAF entry to the page. */
+#define WR_BLEAF(p, key, data, flags) { \
+ *(u_int32_t *)p = key->size; \
+ p += sizeof(u_int32_t); \
+ *(u_int32_t *)p = data->size; \
+ p += sizeof(u_int32_t); \
+ *(u_char *)p = flags; \
+ p += sizeof(u_char); \
+ memmove(p, key->data, key->size); \
+ p += key->size; \
+ memmove(p, data->data, data->size); \
+}
+
+/* For the recno leaf pages, the item is a data entry. */
+typedef struct _rleaf {
+ u_int32_t dsize; /* size of data */
+ u_char flags; /* P_BIGDATA */
+ char bytes[1];
+} RLEAF;
+
+/* Get the page's RLEAF structure at index indx. */
+#define GETRLEAF(pg, indx) \
+ ((RLEAF *)((char *)(pg) + (pg)->linp[indx]))
+
+/* Get the number of bytes in the entry. */
+#define NRLEAF(p) NRLEAFDBT((p)->dsize)
+
+/* Get the number of bytes from the user's data. */
+#define NRLEAFDBT(dsize) \
+ LALIGN(sizeof(u_int32_t) + sizeof(u_char) + (dsize))
+
+/* Copy a RLEAF entry to the page. */
+#define WR_RLEAF(p, data, flags) { \
+ *(u_int32_t *)p = data->size; \
+ p += sizeof(u_int32_t); \
+ *(u_char *)p = flags; \
+ p += sizeof(u_char); \
+ memmove(p, data->data, data->size); \
+}
+
+/*
+ * A record in the tree is either a pointer to a page and an index in the page
+ * or a page number and an index. These structures are used as a cursor, stack
+ * entry and search returns as well as to pass records to other routines.
+ *
+ * One comment about searches. Internal page searches must find the largest
+ * record less than key in the tree so that descents work. Leaf page searches
+ * must find the smallest record greater than key so that the returned index
+ * is the record's correct position for insertion.
+ *
+ * One comment about cursors. The cursor key is never removed from the tree,
+ * even if deleted. This is because it is quite difficult to decide where the
+ * cursor should be when other keys have been inserted/deleted in the tree;
+ * duplicate keys make it impossible. This scheme does require extra work
+ * though, to make sure that we don't perform an operation on a deleted key.
+ */
+typedef struct _epgno {
+ pgno_t pgno; /* the page number */
+ indx_t index; /* the index on the page */
+} EPGNO;
+
+typedef struct _epg {
+ PAGE *page; /* the (pinned) page */
+ indx_t index; /* the index on the page */
+} EPG;
+
+/*
+ * The metadata of the tree. The m_nrecs field is used only by the RECNO code.
+ * This is because the btree doesn't really need it and it requires that every
+ * put or delete call modify the metadata.
+ */
+typedef struct _btmeta {
+ u_int32_t m_magic; /* magic number */
+ u_int32_t m_version; /* version */
+ u_int32_t m_psize; /* page size */
+ u_int32_t m_free; /* page number of first free page */
+ u_int32_t m_nrecs; /* R: number of records */
+#define SAVEMETA (B_NODUPS | R_RECNO)
+ u_int32_t m_flags; /* bt_flags & SAVEMETA */
+ u_int32_t m_unused; /* unused */
+} BTMETA;
+
+/* The in-memory btree/recno data structure. */
+typedef struct _btree {
+ MPOOL *bt_mp; /* memory pool cookie */
+
+ DB *bt_dbp; /* pointer to enclosing DB */
+
+ EPG bt_cur; /* current (pinned) page */
+ PAGE *bt_pinned; /* page pinned across calls */
+
+ EPGNO bt_bcursor; /* B: btree cursor */
+ recno_t bt_rcursor; /* R: recno cursor (1-based) */
+
+#define BT_POP(t) (t->bt_sp ? t->bt_stack + --t->bt_sp : NULL)
+#define BT_CLR(t) (t->bt_sp = 0)
+ EPGNO *bt_stack; /* stack of parent pages */
+ u_int bt_sp; /* current stack pointer */
+ u_int bt_maxstack; /* largest stack */
+
+ char *bt_kbuf; /* key buffer */
+ size_t bt_kbufsz; /* key buffer size */
+ char *bt_dbuf; /* data buffer */
+ size_t bt_dbufsz; /* data buffer size */
+
+ int bt_fd; /* tree file descriptor */
+
+ pgno_t bt_free; /* next free page */
+ u_int32_t bt_psize; /* page size */
+ indx_t bt_ovflsize; /* cut-off for key/data overflow */
+ int bt_lorder; /* byte order */
+ /* sorted order */
+ enum { NOT, BACK, FORWARD } bt_order;
+ EPGNO bt_last; /* last insert */
+
+ /* B: key comparison function */
+ int (*bt_cmp) __P((const DBT *, const DBT *));
+ /* B: prefix comparison function */
+ size_t (*bt_pfx) __P((const DBT *, const DBT *));
+ /* R: recno input function */
+ int (*bt_irec) __P((struct _btree *, recno_t));
+
+ FILE *bt_rfp; /* R: record FILE pointer */
+ int bt_rfd; /* R: record file descriptor */
+
+ caddr_t bt_cmap; /* R: current point in mapped space */
+ caddr_t bt_smap; /* R: start of mapped space */
+ caddr_t bt_emap; /* R: end of mapped space */
+ size_t bt_msize; /* R: size of mapped region. */
+
+ recno_t bt_nrecs; /* R: number of records */
+ size_t bt_reclen; /* R: fixed record length */
+ u_char bt_bval; /* R: delimiting byte/pad character */
+
+/*
+ * NB:
+ * B_NODUPS and R_RECNO are stored on disk, and may not be changed.
+ */
+#define B_DELCRSR 0x00001 /* cursor has been deleted */
+#define B_INMEM 0x00002 /* in-memory tree */
+#define B_METADIRTY 0x00004 /* need to write metadata */
+#define B_MODIFIED 0x00008 /* tree modified */
+#define B_NEEDSWAP 0x00010 /* if byte order requires swapping */
+#define B_NODUPS 0x00020 /* no duplicate keys permitted */
+#define B_RDONLY 0x00040 /* read-only tree */
+#define R_RECNO 0x00080 /* record oriented tree */
+#define B_SEQINIT 0x00100 /* sequential scan initialized */
+
+#define R_CLOSEFP 0x00200 /* opened a file pointer */
+#define R_EOF 0x00400 /* end of input file reached. */
+#define R_FIXLEN 0x00800 /* fixed length records */
+#define R_MEMMAPPED 0x01000 /* memory mapped file. */
+#define R_INMEM 0x02000 /* in-memory file */
+#define R_MODIFIED 0x04000 /* modified file */
+#define R_RDONLY 0x08000 /* read-only file */
+
+#define B_DB_LOCK 0x10000 /* DB_LOCK specified. */
+#define B_DB_SHMEM 0x20000 /* DB_SHMEM specified. */
+#define B_DB_TXN 0x40000 /* DB_TXN specified. */
+
+ u_int32_t bt_flags; /* btree state */
+} BTREE;
+
+#define SET(t, f) ((t)->bt_flags |= (f))
+#define CLR(t, f) ((t)->bt_flags &= ~(f))
+#define ISSET(t, f) ((t)->bt_flags & (f))
+
+#include "extern.h"
diff --git a/lib/libc/db/btree/extern.h b/lib/libc/db/btree/extern.h
new file mode 100644
index 00000000000..6d09c2eb204
--- /dev/null
+++ b/lib/libc/db/btree/extern.h
@@ -0,0 +1,72 @@
+/* $NetBSD: extern.h,v 1.5 1995/02/27 13:21:12 cgd Exp $ */
+
+/*-
+ * Copyright (c) 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)extern.h 8.4 (Berkeley) 6/4/94
+ */
+
+int __bt_close __P((DB *));
+int __bt_cmp __P((BTREE *, const DBT *, EPG *));
+int __bt_crsrdel __P((BTREE *, EPGNO *));
+int __bt_defcmp __P((const DBT *, const DBT *));
+size_t __bt_defpfx __P((const DBT *, const DBT *));
+int __bt_delete __P((const DB *, const DBT *, u_int));
+int __bt_dleaf __P((BTREE *, PAGE *, int));
+int __bt_fd __P((const DB *));
+EPG *__bt_first __P((BTREE *, const DBT *, int *));
+int __bt_free __P((BTREE *, PAGE *));
+int __bt_get __P((const DB *, const DBT *, DBT *, u_int));
+PAGE *__bt_new __P((BTREE *, pgno_t *));
+void __bt_pgin __P((void *, pgno_t, void *));
+void __bt_pgout __P((void *, pgno_t, void *));
+int __bt_push __P((BTREE *, pgno_t, int));
+int __bt_put __P((const DB *dbp, DBT *, const DBT *, u_int));
+int __bt_ret __P((BTREE *, EPG *, DBT *, DBT *));
+EPG *__bt_search __P((BTREE *, const DBT *, int *));
+int __bt_seq __P((const DB *, DBT *, DBT *, u_int));
+int __bt_split __P((BTREE *, PAGE *,
+ const DBT *, const DBT *, int, size_t, u_int32_t));
+int __bt_sync __P((const DB *, u_int));
+
+int __ovfl_delete __P((BTREE *, void *));
+int __ovfl_get __P((BTREE *, void *, size_t *, char **, size_t *));
+int __ovfl_put __P((BTREE *, const DBT *, pgno_t *));
+
+#ifdef DEBUG
+void __bt_dnpage __P((DB *, pgno_t));
+void __bt_dpage __P((PAGE *));
+void __bt_dump __P((DB *));
+#endif
+#ifdef STATISTICS
+void __bt_stat __P((DB *));
+#endif
diff --git a/lib/libc/db/db/Makefile.inc b/lib/libc/db/db/Makefile.inc
new file mode 100644
index 00000000000..9755e8f2019
--- /dev/null
+++ b/lib/libc/db/db/Makefile.inc
@@ -0,0 +1,6 @@
+# $NetBSD: Makefile.inc,v 1.4 1995/02/27 13:21:22 cgd Exp $
+# @(#)Makefile.inc 8.1 (Berkeley) 6/4/93
+
+.PATH: ${.CURDIR}/db/db
+
+SRCS+= db.c
diff --git a/lib/libc/db/db/db.c b/lib/libc/db/db/db.c
new file mode 100644
index 00000000000..791c2552234
--- /dev/null
+++ b/lib/libc/db/db/db.c
@@ -0,0 +1,105 @@
+/* $NetBSD: db.c,v 1.7 1995/02/27 13:21:27 cgd Exp $ */
+
+/*-
+ * Copyright (c) 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+#if 0
+static char sccsid[] = "@(#)db.c 8.4 (Berkeley) 2/21/94";
+#else
+static char rcsid[] = "$NetBSD: db.c,v 1.7 1995/02/27 13:21:27 cgd Exp $";
+#endif
+#endif /* LIBC_SCCS and not lint */
+
+#include <sys/types.h>
+
+#include <errno.h>
+#include <fcntl.h>
+#include <stddef.h>
+#include <stdio.h>
+
+#include <db.h>
+
+DB *
+dbopen(fname, flags, mode, type, openinfo)
+ const char *fname;
+ int flags, mode;
+ DBTYPE type;
+ const void *openinfo;
+{
+
+#define DB_FLAGS (DB_LOCK | DB_SHMEM | DB_TXN)
+#define USE_OPEN_FLAGS \
+ (O_CREAT | O_EXCL | O_EXLOCK | O_NONBLOCK | O_RDONLY | \
+ O_RDWR | O_SHLOCK | O_TRUNC)
+
+ if ((flags & ~(USE_OPEN_FLAGS | DB_FLAGS)) == 0)
+ switch (type) {
+ case DB_BTREE:
+ return (__bt_open(fname, flags & USE_OPEN_FLAGS,
+ mode, openinfo, flags & DB_FLAGS));
+ case DB_HASH:
+ return (__hash_open(fname, flags & USE_OPEN_FLAGS,
+ mode, openinfo, flags & DB_FLAGS));
+ case DB_RECNO:
+ return (__rec_open(fname, flags & USE_OPEN_FLAGS,
+ mode, openinfo, flags & DB_FLAGS));
+ }
+ errno = EINVAL;
+ return (NULL);
+}
+
+static int
+__dberr()
+{
+ return (RET_ERROR);
+}
+
+/*
+ * __DBPANIC -- Stop.
+ *
+ * Parameters:
+ * dbp: pointer to the DB structure.
+ */
+void
+__dbpanic(dbp)
+ DB *dbp;
+{
+ /* The only thing that can succeed is a close. */
+ dbp->del = (int (*)())__dberr;
+ dbp->fd = (int (*)())__dberr;
+ dbp->get = (int (*)())__dberr;
+ dbp->put = (int (*)())__dberr;
+ dbp->seq = (int (*)())__dberr;
+ dbp->sync = (int (*)())__dberr;
+}
diff --git a/lib/libc/db/hash/Makefile.inc b/lib/libc/db/hash/Makefile.inc
new file mode 100644
index 00000000000..00f339ac2ed
--- /dev/null
+++ b/lib/libc/db/hash/Makefile.inc
@@ -0,0 +1,7 @@
+# $NetBSD: Makefile.inc,v 1.5 1995/02/27 13:21:44 cgd Exp $
+# @(#)Makefile.inc 8.1 (Berkeley) 6/4/93
+
+.PATH: ${.CURDIR}/db/hash
+
+SRCS+= hash.c hash_bigkey.c hash_buf.c hash_func.c hash_log2.c \
+ hash_page.c hsearch.c ndbm.c
diff --git a/lib/libc/db/hash/README b/lib/libc/db/hash/README
new file mode 100644
index 00000000000..fd1fc653a92
--- /dev/null
+++ b/lib/libc/db/hash/README
@@ -0,0 +1,73 @@
+# $NetBSD: README,v 1.3 1995/02/27 13:21:52 cgd Exp $
+# @(#)README 8.1 (Berkeley) 6/4/93
+
+This package implements a superset of the hsearch and dbm/ndbm libraries.
+
+Test Programs:
+ All test programs which need key/data pairs expect them entered
+ with key and data on separate lines
+
+ tcreat3.c
+ Takes
+ bucketsize (bsize),
+ fill factor (ffactor), and
+ initial number of elements (nelem).
+ Creates a hash table named hashtest containing the
+ keys/data pairs entered from standard in.
+ thash4.c
+ Takes
+ bucketsize (bsize),
+ fill factor (ffactor),
+ initial number of elements (nelem)
+ bytes of cache (ncached), and
+ file from which to read data (fname)
+ Creates a table from the key/data pairs on standard in and
+ then does a read of each key/data in fname
+ tdel.c
+ Takes
+ bucketsize (bsize), and
+ fill factor (ffactor).
+ file from which to read data (fname)
+ Reads each key/data pair from fname and deletes the
+ key from the hash table hashtest
+ tseq.c
+ Reads the key/data pairs in the file hashtest and writes them
+ to standard out.
+ tread2.c
+ Takes
+ butes of cache (ncached).
+ Reads key/data pairs from standard in and looks them up
+ in the file hashtest.
+ tverify.c
+ Reads key/data pairs from standard in, looks them up
+ in the file hashtest, and verifies that the data is
+ correct.
+
+NOTES:
+
+The file search.h is provided for using the hsearch compatible interface
+on BSD systems. On System V derived systems, search.h should appear in
+/usr/include.
+
+The man page ../man/db.3 explains the interface to the hashing system.
+The file hash.ps is a postscript copy of a paper explaining
+the history, implementation, and performance of the hash package.
+
+"bugs" or idiosyncracies
+
+If you have a lot of overflows, it is possible to run out of overflow
+pages. Currently, this will cause a message to be printed on stderr.
+Eventually, this will be indicated by a return error code.
+
+If you are using the ndbm interface and exit without flushing or closing the
+file, you may lose updates since the package buffers all writes. Also,
+the db interface only creates a single database file. To avoid overwriting
+the user's original file, the suffix ".db" is appended to the file name
+passed to dbm_open. Additionally, if your code "knows" about the historic
+.dir and .pag files, it will break.
+
+There is a fundamental difference between this package and the old hsearch.
+Hsearch requires the user to maintain the keys and data in the application's
+allocated memory while hash takes care of all storage management. The down
+side is that the byte strings passed in the ENTRY structure must be null
+terminated (both the keys and the data).
diff --git a/lib/libc/db/hash/extern.h b/lib/libc/db/hash/extern.h
new file mode 100644
index 00000000000..af98921ef19
--- /dev/null
+++ b/lib/libc/db/hash/extern.h
@@ -0,0 +1,67 @@
+/* $NetBSD: extern.h,v 1.4 1995/02/27 13:21:55 cgd Exp $ */
+
+/*-
+ * Copyright (c) 1991, 1993, 1994
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)extern.h 8.4 (Berkeley) 6/16/94
+ */
+
+BUFHEAD *__add_ovflpage __P((HTAB *, BUFHEAD *));
+int __addel __P((HTAB *, BUFHEAD *, const DBT *, const DBT *));
+int __big_delete __P((HTAB *, BUFHEAD *));
+int __big_insert __P((HTAB *, BUFHEAD *, const DBT *, const DBT *));
+int __big_keydata __P((HTAB *, BUFHEAD *, DBT *, DBT *, int));
+int __big_return __P((HTAB *, BUFHEAD *, int, DBT *, int));
+int __big_split __P((HTAB *, BUFHEAD *, BUFHEAD *, BUFHEAD *,
+ int, u_int32_t, SPLIT_RETURN *));
+int __buf_free __P((HTAB *, int, int));
+void __buf_init __P((HTAB *, int));
+u_int32_t __call_hash __P((HTAB *, char *, int));
+int __delpair __P((HTAB *, BUFHEAD *, int));
+int __expand_table __P((HTAB *));
+int __find_bigpair __P((HTAB *, BUFHEAD *, int, char *, int));
+u_int16_t __find_last_page __P((HTAB *, BUFHEAD **));
+void __free_ovflpage __P((HTAB *, BUFHEAD *));
+BUFHEAD *__get_buf __P((HTAB *, u_int32_t, BUFHEAD *, int));
+int __get_page __P((HTAB *, char *, u_int32_t, int, int, int));
+int __ibitmap __P((HTAB *, int, int, int));
+u_int32_t __log2 __P((u_int32_t));
+int __put_page __P((HTAB *, char *, u_int32_t, int, int));
+void __reclaim_buf __P((HTAB *, BUFHEAD *));
+int __split_page __P((HTAB *, u_int32_t, u_int32_t));
+
+/* Default hash routine. */
+extern u_int32_t (*__default_hash) __P((const void *, size_t));
+
+#ifdef HASH_STATISTICS
+extern int hash_accesses, hash_collisions, hash_expansions, hash_overflows;
+#endif
diff --git a/lib/libc/db/hash/hash.c b/lib/libc/db/hash/hash.c
new file mode 100644
index 00000000000..2fc3281cb9f
--- /dev/null
+++ b/lib/libc/db/hash/hash.c
@@ -0,0 +1,1000 @@
+/* $NetBSD: hash.c,v 1.8 1995/02/27 13:21:59 cgd Exp $ */
+
+/*-
+ * Copyright (c) 1990, 1993, 1994
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Margo Seltzer.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+#if 0
+static char sccsid[] = "@(#)hash.c 8.9 (Berkeley) 6/16/94";
+#else
+static char rcsid[] = "$NetBSD: hash.c,v 1.8 1995/02/27 13:21:59 cgd Exp $";
+#endif
+#endif /* LIBC_SCCS and not lint */
+
+#include <sys/param.h>
+#include <sys/stat.h>
+
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#ifdef DEBUG
+#include <assert.h>
+#endif
+
+#include <db.h>
+#include "hash.h"
+#include "page.h"
+#include "extern.h"
+
+static int alloc_segs __P((HTAB *, int));
+static int flush_meta __P((HTAB *));
+static int hash_access __P((HTAB *, ACTION, DBT *, DBT *));
+static int hash_close __P((DB *));
+static int hash_delete __P((const DB *, const DBT *, u_int32_t));
+static int hash_fd __P((const DB *));
+static int hash_get __P((const DB *, const DBT *, DBT *, u_int32_t));
+static int hash_put __P((const DB *, DBT *, const DBT *, u_int32_t));
+static void *hash_realloc __P((SEGMENT **, int, int));
+static int hash_seq __P((const DB *, DBT *, DBT *, u_int32_t));
+static int hash_sync __P((const DB *, u_int32_t));
+static int hdestroy __P((HTAB *));
+static HTAB *init_hash __P((HTAB *, const char *, HASHINFO *));
+static int init_htab __P((HTAB *, int));
+#if BYTE_ORDER == LITTLE_ENDIAN
+static void swap_header __P((HTAB *));
+static void swap_header_copy __P((HASHHDR *, HASHHDR *));
+#endif
+
+/* Fast arithmetic, relying on powers of 2, */
+#define MOD(x, y) ((x) & ((y) - 1))
+
+#define RETURN_ERROR(ERR, LOC) { save_errno = ERR; goto LOC; }
+
+/* Return values */
+#define SUCCESS (0)
+#define ERROR (-1)
+#define ABNORMAL (1)
+
+#ifdef HASH_STATISTICS
+int hash_accesses, hash_collisions, hash_expansions, hash_overflows;
+#endif
+
+/************************** INTERFACE ROUTINES ***************************/
+/* OPEN/CLOSE */
+
+extern DB *
+__hash_open(file, flags, mode, info, dflags)
+ const char *file;
+ int flags, mode, dflags;
+ const HASHINFO *info; /* Special directives for create */
+{
+ HTAB *hashp;
+ struct stat statbuf;
+ DB *dbp;
+ int bpages, hdrsize, new_table, nsegs, save_errno;
+
+ if ((flags & O_ACCMODE) == O_WRONLY) {
+ errno = EINVAL;
+ return (NULL);
+ }
+
+ if (!(hashp = (HTAB *)calloc(1, sizeof(HTAB))))
+ return (NULL);
+ hashp->fp = -1;
+
+ /*
+ * Even if user wants write only, we need to be able to read
+ * the actual file, so we need to open it read/write. But, the
+ * field in the hashp structure needs to be accurate so that
+ * we can check accesses.
+ */
+ hashp->flags = flags;
+
+ new_table = 0;
+ if (!file || (flags & O_TRUNC) ||
+ (stat(file, &statbuf) && (errno == ENOENT))) {
+ if (errno == ENOENT)
+ errno = 0; /* Just in case someone looks at errno */
+ new_table = 1;
+ }
+ if (file) {
+ if ((hashp->fp = open(file, flags, mode)) == -1)
+ RETURN_ERROR(errno, error0);
+ (void)fcntl(hashp->fp, F_SETFD, 1);
+ }
+ if (new_table) {
+ if (!(hashp = init_hash(hashp, file, (HASHINFO *)info)))
+ RETURN_ERROR(errno, error1);
+ } else {
+ /* Table already exists */
+ if (info && info->hash)
+ hashp->hash = info->hash;
+ else
+ hashp->hash = __default_hash;
+
+ hdrsize = read(hashp->fp, &hashp->hdr, sizeof(HASHHDR));
+#if BYTE_ORDER == LITTLE_ENDIAN
+ swap_header(hashp);
+#endif
+ if (hdrsize == -1)
+ RETURN_ERROR(errno, error1);
+ if (hdrsize != sizeof(HASHHDR))
+ RETURN_ERROR(EFTYPE, error1);
+ /* Verify file type, versions and hash function */
+ if (hashp->MAGIC != HASHMAGIC)
+ RETURN_ERROR(EFTYPE, error1);
+#define OLDHASHVERSION 1
+ if (hashp->VERSION != HASHVERSION &&
+ hashp->VERSION != OLDHASHVERSION)
+ RETURN_ERROR(EFTYPE, error1);
+ if (hashp->hash(CHARKEY, sizeof(CHARKEY)) != hashp->H_CHARKEY)
+ RETURN_ERROR(EFTYPE, error1);
+ /*
+ * Figure out how many segments we need. Max_Bucket is the
+ * maximum bucket number, so the number of buckets is
+ * max_bucket + 1.
+ */
+ nsegs = (hashp->MAX_BUCKET + 1 + hashp->SGSIZE - 1) /
+ hashp->SGSIZE;
+ hashp->nsegs = 0;
+ if (alloc_segs(hashp, nsegs))
+ /*
+ * If alloc_segs fails, table will have been destroyed
+ * and errno will have been set.
+ */
+ return (NULL);
+ /* Read in bitmaps */
+ bpages = (hashp->SPARES[hashp->OVFL_POINT] +
+ (hashp->BSIZE << BYTE_SHIFT) - 1) >>
+ (hashp->BSHIFT + BYTE_SHIFT);
+
+ hashp->nmaps = bpages;
+ (void)memset(&hashp->mapp[0], 0, bpages * sizeof(u_int32_t *));
+ }
+
+ /* Initialize Buffer Manager */
+ if (info && info->cachesize)
+ __buf_init(hashp, info->cachesize);
+ else
+ __buf_init(hashp, DEF_BUFSIZE);
+
+ hashp->new_file = new_table;
+ hashp->save_file = file && (hashp->flags & O_RDWR);
+ hashp->cbucket = -1;
+ if (!(dbp = (DB *)malloc(sizeof(DB)))) {
+ save_errno = errno;
+ hdestroy(hashp);
+ errno = save_errno;
+ return (NULL);
+ }
+ dbp->internal = hashp;
+ dbp->close = hash_close;
+ dbp->del = hash_delete;
+ dbp->fd = hash_fd;
+ dbp->get = hash_get;
+ dbp->put = hash_put;
+ dbp->seq = hash_seq;
+ dbp->sync = hash_sync;
+ dbp->type = DB_HASH;
+
+#ifdef DEBUG
+ (void)fprintf(stderr,
+"%s\n%s%x\n%s%d\n%s%d\n%s%d\n%s%d\n%s%d\n%s%d\n%s%d\n%s%d\n%s%d\n%s%x\n%s%x\n%s%d\n%s%d\n",
+ "init_htab:",
+ "TABLE POINTER ", hashp,
+ "BUCKET SIZE ", hashp->BSIZE,
+ "BUCKET SHIFT ", hashp->BSHIFT,
+ "DIRECTORY SIZE ", hashp->DSIZE,
+ "SEGMENT SIZE ", hashp->SGSIZE,
+ "SEGMENT SHIFT ", hashp->SSHIFT,
+ "FILL FACTOR ", hashp->FFACTOR,
+ "MAX BUCKET ", hashp->MAX_BUCKET,
+ "OVFL POINT ", hashp->OVFL_POINT,
+ "LAST FREED ", hashp->LAST_FREED,
+ "HIGH MASK ", hashp->HIGH_MASK,
+ "LOW MASK ", hashp->LOW_MASK,
+ "NSEGS ", hashp->nsegs,
+ "NKEYS ", hashp->NKEYS);
+#endif
+#ifdef HASH_STATISTICS
+ hash_overflows = hash_accesses = hash_collisions = hash_expansions = 0;
+#endif
+ return (dbp);
+
+error1:
+ if (hashp != NULL)
+ (void)close(hashp->fp);
+
+error0:
+ free(hashp);
+ errno = save_errno;
+ return (NULL);
+}
+
+static int
+hash_close(dbp)
+ DB *dbp;
+{
+ HTAB *hashp;
+ int retval;
+
+ if (!dbp)
+ return (ERROR);
+
+ hashp = (HTAB *)dbp->internal;
+ retval = hdestroy(hashp);
+ free(dbp);
+ return (retval);
+}
+
+static int
+hash_fd(dbp)
+ const DB *dbp;
+{
+ HTAB *hashp;
+
+ if (!dbp)
+ return (ERROR);
+
+ hashp = (HTAB *)dbp->internal;
+ if (hashp->fp == -1) {
+ errno = ENOENT;
+ return (-1);
+ }
+ return (hashp->fp);
+}
+
+/************************** LOCAL CREATION ROUTINES **********************/
+static HTAB *
+init_hash(hashp, file, info)
+ HTAB *hashp;
+ const char *file;
+ HASHINFO *info;
+{
+ struct stat statbuf;
+ int nelem;
+
+ nelem = 1;
+ hashp->NKEYS = 0;
+ hashp->LORDER = BYTE_ORDER;
+ hashp->BSIZE = DEF_BUCKET_SIZE;
+ hashp->BSHIFT = DEF_BUCKET_SHIFT;
+ hashp->SGSIZE = DEF_SEGSIZE;
+ hashp->SSHIFT = DEF_SEGSIZE_SHIFT;
+ hashp->DSIZE = DEF_DIRSIZE;
+ hashp->FFACTOR = DEF_FFACTOR;
+ hashp->hash = __default_hash;
+ memset(hashp->SPARES, 0, sizeof(hashp->SPARES));
+ memset(hashp->BITMAPS, 0, sizeof (hashp->BITMAPS));
+
+ /* Fix bucket size to be optimal for file system */
+ if (file != NULL) {
+ if (stat(file, &statbuf))
+ return (NULL);
+ hashp->BSIZE = statbuf.st_blksize;
+ hashp->BSHIFT = __log2(hashp->BSIZE);
+ }
+
+ if (info) {
+ if (info->bsize) {
+ /* Round pagesize up to power of 2 */
+ hashp->BSHIFT = __log2(info->bsize);
+ hashp->BSIZE = 1 << hashp->BSHIFT;
+ if (hashp->BSIZE > MAX_BSIZE) {
+ errno = EINVAL;
+ return (NULL);
+ }
+ }
+ if (info->ffactor)
+ hashp->FFACTOR = info->ffactor;
+ if (info->hash)
+ hashp->hash = info->hash;
+ if (info->nelem)
+ nelem = info->nelem;
+ if (info->lorder) {
+ if (info->lorder != BIG_ENDIAN &&
+ info->lorder != LITTLE_ENDIAN) {
+ errno = EINVAL;
+ return (NULL);
+ }
+ hashp->LORDER = info->lorder;
+ }
+ }
+ /* init_htab should destroy the table and set errno if it fails */
+ if (init_htab(hashp, nelem))
+ return (NULL);
+ else
+ return (hashp);
+}
+/*
+ * This calls alloc_segs which may run out of memory. Alloc_segs will destroy
+ * the table and set errno, so we just pass the error information along.
+ *
+ * Returns 0 on No Error
+ */
+static int
+init_htab(hashp, nelem)
+ HTAB *hashp;
+ int nelem;
+{
+ register int nbuckets, nsegs;
+ int l2;
+
+ /*
+ * Divide number of elements by the fill factor and determine a
+ * desired number of buckets. Allocate space for the next greater
+ * power of two number of buckets.
+ */
+ nelem = (nelem - 1) / hashp->FFACTOR + 1;
+
+ l2 = __log2(MAX(nelem, 2));
+ nbuckets = 1 << l2;
+
+ hashp->SPARES[l2] = l2 + 1;
+ hashp->SPARES[l2 + 1] = l2 + 1;
+ hashp->OVFL_POINT = l2;
+ hashp->LAST_FREED = 2;
+
+ /* First bitmap page is at: splitpoint l2 page offset 1 */
+ if (__ibitmap(hashp, OADDR_OF(l2, 1), l2 + 1, 0))
+ return (-1);
+
+ hashp->MAX_BUCKET = hashp->LOW_MASK = nbuckets - 1;
+ hashp->HIGH_MASK = (nbuckets << 1) - 1;
+ hashp->HDRPAGES = ((MAX(sizeof(HASHHDR), MINHDRSIZE) - 1) >>
+ hashp->BSHIFT) + 1;
+
+ nsegs = (nbuckets - 1) / hashp->SGSIZE + 1;
+ nsegs = 1 << __log2(nsegs);
+
+ if (nsegs > hashp->DSIZE)
+ hashp->DSIZE = nsegs;
+ return (alloc_segs(hashp, nsegs));
+}
+
+/********************** DESTROY/CLOSE ROUTINES ************************/
+
+/*
+ * Flushes any changes to the file if necessary and destroys the hashp
+ * structure, freeing all allocated space.
+ */
+static int
+hdestroy(hashp)
+ HTAB *hashp;
+{
+ int i, save_errno;
+
+ save_errno = 0;
+
+#ifdef HASH_STATISTICS
+ (void)fprintf(stderr, "hdestroy: accesses %ld collisions %ld\n",
+ hash_accesses, hash_collisions);
+ (void)fprintf(stderr, "hdestroy: expansions %ld\n",
+ hash_expansions);
+ (void)fprintf(stderr, "hdestroy: overflows %ld\n",
+ hash_overflows);
+ (void)fprintf(stderr, "keys %ld maxp %d segmentcount %d\n",
+ hashp->NKEYS, hashp->MAX_BUCKET, hashp->nsegs);
+
+ for (i = 0; i < NCACHED; i++)
+ (void)fprintf(stderr,
+ "spares[%d] = %d\n", i, hashp->SPARES[i]);
+#endif
+ /*
+ * Call on buffer manager to free buffers, and if required,
+ * write them to disk.
+ */
+ if (__buf_free(hashp, 1, hashp->save_file))
+ save_errno = errno;
+ if (hashp->dir) {
+ free(*hashp->dir); /* Free initial segments */
+ /* Free extra segments */
+ while (hashp->exsegs--)
+ free(hashp->dir[--hashp->nsegs]);
+ free(hashp->dir);
+ }
+ if (flush_meta(hashp) && !save_errno)
+ save_errno = errno;
+ /* Free Bigmaps */
+ for (i = 0; i < hashp->nmaps; i++)
+ if (hashp->mapp[i])
+ free(hashp->mapp[i]);
+
+ if (hashp->fp != -1)
+ (void)close(hashp->fp);
+
+ free(hashp);
+
+ if (save_errno) {
+ errno = save_errno;
+ return (ERROR);
+ }
+ return (SUCCESS);
+}
+/*
+ * Write modified pages to disk
+ *
+ * Returns:
+ * 0 == OK
+ * -1 ERROR
+ */
+static int
+hash_sync(dbp, flags)
+ const DB *dbp;
+ u_int32_t flags;
+{
+ HTAB *hashp;
+
+ if (flags != 0) {
+ errno = EINVAL;
+ return (ERROR);
+ }
+
+ if (!dbp)
+ return (ERROR);
+
+ hashp = (HTAB *)dbp->internal;
+ if (!hashp->save_file)
+ return (0);
+ if (__buf_free(hashp, 0, 1) || flush_meta(hashp))
+ return (ERROR);
+ hashp->new_file = 0;
+ return (0);
+}
+
+/*
+ * Returns:
+ * 0 == OK
+ * -1 indicates that errno should be set
+ */
+static int
+flush_meta(hashp)
+ HTAB *hashp;
+{
+ HASHHDR *whdrp;
+#if BYTE_ORDER == LITTLE_ENDIAN
+ HASHHDR whdr;
+#endif
+ int fp, i, wsize;
+
+ if (!hashp->save_file)
+ return (0);
+ hashp->MAGIC = HASHMAGIC;
+ hashp->VERSION = HASHVERSION;
+ hashp->H_CHARKEY = hashp->hash(CHARKEY, sizeof(CHARKEY));
+
+ fp = hashp->fp;
+ whdrp = &hashp->hdr;
+#if BYTE_ORDER == LITTLE_ENDIAN
+ whdrp = &whdr;
+ swap_header_copy(&hashp->hdr, whdrp);
+#endif
+ if ((lseek(fp, (off_t)0, SEEK_SET) == -1) ||
+ ((wsize = write(fp, whdrp, sizeof(HASHHDR))) == -1))
+ return (-1);
+ else
+ if (wsize != sizeof(HASHHDR)) {
+ errno = EFTYPE;
+ hashp->errno = errno;
+ return (-1);
+ }
+ for (i = 0; i < NCACHED; i++)
+ if (hashp->mapp[i])
+ if (__put_page(hashp, (char *)hashp->mapp[i],
+ hashp->BITMAPS[i], 0, 1))
+ return (-1);
+ return (0);
+}
+
+/*******************************SEARCH ROUTINES *****************************/
+/*
+ * All the access routines return
+ *
+ * Returns:
+ * 0 on SUCCESS
+ * 1 to indicate an external ERROR (i.e. key not found, etc)
+ * -1 to indicate an internal ERROR (i.e. out of memory, etc)
+ */
+static int
+hash_get(dbp, key, data, flag)
+ const DB *dbp;
+ const DBT *key;
+ DBT *data;
+ u_int32_t flag;
+{
+ HTAB *hashp;
+
+ hashp = (HTAB *)dbp->internal;
+ if (flag) {
+ hashp->errno = errno = EINVAL;
+ return (ERROR);
+ }
+ return (hash_access(hashp, HASH_GET, (DBT *)key, data));
+}
+
+static int
+hash_put(dbp, key, data, flag)
+ const DB *dbp;
+ DBT *key;
+ const DBT *data;
+ u_int32_t flag;
+{
+ HTAB *hashp;
+
+ hashp = (HTAB *)dbp->internal;
+ if (flag && flag != R_NOOVERWRITE) {
+ hashp->errno = errno = EINVAL;
+ return (ERROR);
+ }
+ if ((hashp->flags & O_ACCMODE) == O_RDONLY) {
+ hashp->errno = errno = EPERM;
+ return (ERROR);
+ }
+ return (hash_access(hashp, flag == R_NOOVERWRITE ?
+ HASH_PUTNEW : HASH_PUT, (DBT *)key, (DBT *)data));
+}
+
+static int
+hash_delete(dbp, key, flag)
+ const DB *dbp;
+ const DBT *key;
+ u_int32_t flag; /* Ignored */
+{
+ HTAB *hashp;
+
+ hashp = (HTAB *)dbp->internal;
+ if (flag && flag != R_CURSOR) {
+ hashp->errno = errno = EINVAL;
+ return (ERROR);
+ }
+ if ((hashp->flags & O_ACCMODE) == O_RDONLY) {
+ hashp->errno = errno = EPERM;
+ return (ERROR);
+ }
+ return (hash_access(hashp, HASH_DELETE, (DBT *)key, NULL));
+}
+
+/*
+ * Assume that hashp has been set in wrapper routine.
+ */
+static int
+hash_access(hashp, action, key, val)
+ HTAB *hashp;
+ ACTION action;
+ DBT *key, *val;
+{
+ register BUFHEAD *rbufp;
+ BUFHEAD *bufp, *save_bufp;
+ register u_int16_t *bp;
+ register int n, ndx, off, size;
+ register char *kp;
+ u_int16_t pageno;
+
+#ifdef HASH_STATISTICS
+ hash_accesses++;
+#endif
+
+ off = hashp->BSIZE;
+ size = key->size;
+ kp = (char *)key->data;
+ rbufp = __get_buf(hashp, __call_hash(hashp, kp, size), NULL, 0);
+ if (!rbufp)
+ return (ERROR);
+ save_bufp = rbufp;
+
+ /* Pin the bucket chain */
+ rbufp->flags |= BUF_PIN;
+ for (bp = (u_int16_t *)rbufp->page, n = *bp++, ndx = 1; ndx < n;)
+ if (bp[1] >= REAL_KEY) {
+ /* Real key/data pair */
+ if (size == off - *bp &&
+ memcmp(kp, rbufp->page + *bp, size) == 0)
+ goto found;
+ off = bp[1];
+#ifdef HASH_STATISTICS
+ hash_collisions++;
+#endif
+ bp += 2;
+ ndx += 2;
+ } else if (bp[1] == OVFLPAGE) {
+ rbufp = __get_buf(hashp, *bp, rbufp, 0);
+ if (!rbufp) {
+ save_bufp->flags &= ~BUF_PIN;
+ return (ERROR);
+ }
+ /* FOR LOOP INIT */
+ bp = (u_int16_t *)rbufp->page;
+ n = *bp++;
+ ndx = 1;
+ off = hashp->BSIZE;
+ } else if (bp[1] < REAL_KEY) {
+ if ((ndx =
+ __find_bigpair(hashp, rbufp, ndx, kp, size)) > 0)
+ goto found;
+ if (ndx == -2) {
+ bufp = rbufp;
+ if (!(pageno =
+ __find_last_page(hashp, &bufp))) {
+ ndx = 0;
+ rbufp = bufp;
+ break; /* FOR */
+ }
+ rbufp = __get_buf(hashp, pageno, bufp, 0);
+ if (!rbufp) {
+ save_bufp->flags &= ~BUF_PIN;
+ return (ERROR);
+ }
+ /* FOR LOOP INIT */
+ bp = (u_int16_t *)rbufp->page;
+ n = *bp++;
+ ndx = 1;
+ off = hashp->BSIZE;
+ } else {
+ save_bufp->flags &= ~BUF_PIN;
+ return (ERROR);
+ }
+ }
+
+ /* Not found */
+ switch (action) {
+ case HASH_PUT:
+ case HASH_PUTNEW:
+ if (__addel(hashp, rbufp, key, val)) {
+ save_bufp->flags &= ~BUF_PIN;
+ return (ERROR);
+ } else {
+ save_bufp->flags &= ~BUF_PIN;
+ return (SUCCESS);
+ }
+ case HASH_GET:
+ case HASH_DELETE:
+ default:
+ save_bufp->flags &= ~BUF_PIN;
+ return (ABNORMAL);
+ }
+
+found:
+ switch (action) {
+ case HASH_PUTNEW:
+ save_bufp->flags &= ~BUF_PIN;
+ return (ABNORMAL);
+ case HASH_GET:
+ bp = (u_int16_t *)rbufp->page;
+ if (bp[ndx + 1] < REAL_KEY) {
+ if (__big_return(hashp, rbufp, ndx, val, 0))
+ return (ERROR);
+ } else {
+ val->data = (u_char *)rbufp->page + (int)bp[ndx + 1];
+ val->size = bp[ndx] - bp[ndx + 1];
+ }
+ break;
+ case HASH_PUT:
+ if ((__delpair(hashp, rbufp, ndx)) ||
+ (__addel(hashp, rbufp, key, val))) {
+ save_bufp->flags &= ~BUF_PIN;
+ return (ERROR);
+ }
+ break;
+ case HASH_DELETE:
+ if (__delpair(hashp, rbufp, ndx))
+ return (ERROR);
+ break;
+ default:
+ abort();
+ }
+ save_bufp->flags &= ~BUF_PIN;
+ return (SUCCESS);
+}
+
+static int
+hash_seq(dbp, key, data, flag)
+ const DB *dbp;
+ DBT *key, *data;
+ u_int32_t flag;
+{
+ register u_int32_t bucket;
+ register BUFHEAD *bufp;
+ HTAB *hashp;
+ u_int16_t *bp, ndx;
+
+ hashp = (HTAB *)dbp->internal;
+ if (flag && flag != R_FIRST && flag != R_NEXT) {
+ hashp->errno = errno = EINVAL;
+ return (ERROR);
+ }
+#ifdef HASH_STATISTICS
+ hash_accesses++;
+#endif
+ if ((hashp->cbucket < 0) || (flag == R_FIRST)) {
+ hashp->cbucket = 0;
+ hashp->cndx = 1;
+ hashp->cpage = NULL;
+ }
+
+ for (bp = NULL; !bp || !bp[0]; ) {
+ if (!(bufp = hashp->cpage)) {
+ for (bucket = hashp->cbucket;
+ bucket <= hashp->MAX_BUCKET;
+ bucket++, hashp->cndx = 1) {
+ bufp = __get_buf(hashp, bucket, NULL, 0);
+ if (!bufp)
+ return (ERROR);
+ hashp->cpage = bufp;
+ bp = (u_int16_t *)bufp->page;
+ if (bp[0])
+ break;
+ }
+ hashp->cbucket = bucket;
+ if (hashp->cbucket > hashp->MAX_BUCKET) {
+ hashp->cbucket = -1;
+ return (ABNORMAL);
+ }
+ } else
+ bp = (u_int16_t *)hashp->cpage->page;
+
+#ifdef DEBUG
+ assert(bp);
+ assert(bufp);
+#endif
+ while (bp[hashp->cndx + 1] == OVFLPAGE) {
+ bufp = hashp->cpage =
+ __get_buf(hashp, bp[hashp->cndx], bufp, 0);
+ if (!bufp)
+ return (ERROR);
+ bp = (u_int16_t *)(bufp->page);
+ hashp->cndx = 1;
+ }
+ if (!bp[0]) {
+ hashp->cpage = NULL;
+ ++hashp->cbucket;
+ }
+ }
+ ndx = hashp->cndx;
+ if (bp[ndx + 1] < REAL_KEY) {
+ if (__big_keydata(hashp, bufp, key, data, 1))
+ return (ERROR);
+ } else {
+ key->data = (u_char *)hashp->cpage->page + bp[ndx];
+ key->size = (ndx > 1 ? bp[ndx - 1] : hashp->BSIZE) - bp[ndx];
+ data->data = (u_char *)hashp->cpage->page + bp[ndx + 1];
+ data->size = bp[ndx] - bp[ndx + 1];
+ ndx += 2;
+ if (ndx > bp[0]) {
+ hashp->cpage = NULL;
+ hashp->cbucket++;
+ hashp->cndx = 1;
+ } else
+ hashp->cndx = ndx;
+ }
+ return (SUCCESS);
+}
+
+/********************************* UTILITIES ************************/
+
+/*
+ * Returns:
+ * 0 ==> OK
+ * -1 ==> Error
+ */
+extern int
+__expand_table(hashp)
+ HTAB *hashp;
+{
+ u_int32_t old_bucket, new_bucket;
+ int dirsize, new_segnum, spare_ndx;
+
+#ifdef HASH_STATISTICS
+ hash_expansions++;
+#endif
+ new_bucket = ++hashp->MAX_BUCKET;
+ old_bucket = (hashp->MAX_BUCKET & hashp->LOW_MASK);
+
+ new_segnum = new_bucket >> hashp->SSHIFT;
+
+ /* Check if we need a new segment */
+ if (new_segnum >= hashp->nsegs) {
+ /* Check if we need to expand directory */
+ if (new_segnum >= hashp->DSIZE) {
+ /* Reallocate directory */
+ dirsize = hashp->DSIZE * sizeof(SEGMENT *);
+ if (!hash_realloc(&hashp->dir, dirsize, dirsize << 1))
+ return (-1);
+ hashp->DSIZE = dirsize << 1;
+ }
+ if ((hashp->dir[new_segnum] =
+ (SEGMENT)calloc(hashp->SGSIZE, sizeof(SEGMENT))) == NULL)
+ return (-1);
+ hashp->exsegs++;
+ hashp->nsegs++;
+ }
+ /*
+ * If the split point is increasing (MAX_BUCKET's log base 2
+ * * increases), we need to copy the current contents of the spare
+ * split bucket to the next bucket.
+ */
+ spare_ndx = __log2(hashp->MAX_BUCKET + 1);
+ if (spare_ndx > hashp->OVFL_POINT) {
+ hashp->SPARES[spare_ndx] = hashp->SPARES[hashp->OVFL_POINT];
+ hashp->OVFL_POINT = spare_ndx;
+ }
+
+ if (new_bucket > hashp->HIGH_MASK) {
+ /* Starting a new doubling */
+ hashp->LOW_MASK = hashp->HIGH_MASK;
+ hashp->HIGH_MASK = new_bucket | hashp->LOW_MASK;
+ }
+ /* Relocate records to the new bucket */
+ return (__split_page(hashp, old_bucket, new_bucket));
+}
+
+/*
+ * If realloc guarantees that the pointer is not destroyed if the realloc
+ * fails, then this routine can go away.
+ */
+static void *
+hash_realloc(p_ptr, oldsize, newsize)
+ SEGMENT **p_ptr;
+ int oldsize, newsize;
+{
+ register void *p;
+
+ if (p = malloc(newsize)) {
+ memmove(p, *p_ptr, oldsize);
+ memset((char *)p + oldsize, 0, newsize - oldsize);
+ free(*p_ptr);
+ *p_ptr = p;
+ }
+ return (p);
+}
+
+extern u_int32_t
+__call_hash(hashp, k, len)
+ HTAB *hashp;
+ char *k;
+ int len;
+{
+ int n, bucket;
+
+ n = hashp->hash(k, len);
+ bucket = n & hashp->HIGH_MASK;
+ if (bucket > hashp->MAX_BUCKET)
+ bucket = bucket & hashp->LOW_MASK;
+ return (bucket);
+}
+
+/*
+ * Allocate segment table. On error, destroy the table and set errno.
+ *
+ * Returns 0 on success
+ */
+static int
+alloc_segs(hashp, nsegs)
+ HTAB *hashp;
+ int nsegs;
+{
+ register int i;
+ register SEGMENT store;
+
+ int save_errno;
+
+ if ((hashp->dir =
+ (SEGMENT *)calloc(hashp->DSIZE, sizeof(SEGMENT *))) == NULL) {
+ save_errno = errno;
+ (void)hdestroy(hashp);
+ errno = save_errno;
+ return (-1);
+ }
+ /* Allocate segments */
+ if ((store =
+ (SEGMENT)calloc(nsegs << hashp->SSHIFT, sizeof(SEGMENT))) == NULL) {
+ save_errno = errno;
+ (void)hdestroy(hashp);
+ errno = save_errno;
+ return (-1);
+ }
+ for (i = 0; i < nsegs; i++, hashp->nsegs++)
+ hashp->dir[i] = &store[i << hashp->SSHIFT];
+ return (0);
+}
+
+#if BYTE_ORDER == LITTLE_ENDIAN
+/*
+ * Hashp->hdr needs to be byteswapped.
+ */
+static void
+swap_header_copy(srcp, destp)
+ HASHHDR *srcp, *destp;
+{
+ int i;
+
+ P_32_COPY(srcp->magic, destp->magic);
+ P_32_COPY(srcp->version, destp->version);
+ P_32_COPY(srcp->lorder, destp->lorder);
+ P_32_COPY(srcp->bsize, destp->bsize);
+ P_32_COPY(srcp->bshift, destp->bshift);
+ P_32_COPY(srcp->dsize, destp->dsize);
+ P_32_COPY(srcp->ssize, destp->ssize);
+ P_32_COPY(srcp->sshift, destp->sshift);
+ P_32_COPY(srcp->ovfl_point, destp->ovfl_point);
+ P_32_COPY(srcp->last_freed, destp->last_freed);
+ P_32_COPY(srcp->max_bucket, destp->max_bucket);
+ P_32_COPY(srcp->high_mask, destp->high_mask);
+ P_32_COPY(srcp->low_mask, destp->low_mask);
+ P_32_COPY(srcp->ffactor, destp->ffactor);
+ P_32_COPY(srcp->nkeys, destp->nkeys);
+ P_32_COPY(srcp->hdrpages, destp->hdrpages);
+ P_32_COPY(srcp->h_charkey, destp->h_charkey);
+ for (i = 0; i < NCACHED; i++) {
+ P_32_COPY(srcp->spares[i], destp->spares[i]);
+ P_16_COPY(srcp->bitmaps[i], destp->bitmaps[i]);
+ }
+}
+
+static void
+swap_header(hashp)
+ HTAB *hashp;
+{
+ HASHHDR *hdrp;
+ int i;
+
+ hdrp = &hashp->hdr;
+
+ M_32_SWAP(hdrp->magic);
+ M_32_SWAP(hdrp->version);
+ M_32_SWAP(hdrp->lorder);
+ M_32_SWAP(hdrp->bsize);
+ M_32_SWAP(hdrp->bshift);
+ M_32_SWAP(hdrp->dsize);
+ M_32_SWAP(hdrp->ssize);
+ M_32_SWAP(hdrp->sshift);
+ M_32_SWAP(hdrp->ovfl_point);
+ M_32_SWAP(hdrp->last_freed);
+ M_32_SWAP(hdrp->max_bucket);
+ M_32_SWAP(hdrp->high_mask);
+ M_32_SWAP(hdrp->low_mask);
+ M_32_SWAP(hdrp->ffactor);
+ M_32_SWAP(hdrp->nkeys);
+ M_32_SWAP(hdrp->hdrpages);
+ M_32_SWAP(hdrp->h_charkey);
+ for (i = 0; i < NCACHED; i++) {
+ M_32_SWAP(hdrp->spares[i]);
+ M_16_SWAP(hdrp->bitmaps[i]);
+ }
+}
+#endif
diff --git a/lib/libc/db/hash/hash.h b/lib/libc/db/hash/hash.h
new file mode 100644
index 00000000000..1ad93b74d9f
--- /dev/null
+++ b/lib/libc/db/hash/hash.h
@@ -0,0 +1,295 @@
+/* $NetBSD: hash.h,v 1.5 1995/02/27 13:22:08 cgd Exp $ */
+
+/*-
+ * Copyright (c) 1990, 1993, 1994
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Margo Seltzer.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)hash.h 8.3 (Berkeley) 5/31/94
+ */
+
+/* Operations */
+typedef enum {
+ HASH_GET, HASH_PUT, HASH_PUTNEW, HASH_DELETE, HASH_FIRST, HASH_NEXT
+} ACTION;
+
+/* Buffer Management structures */
+typedef struct _bufhead BUFHEAD;
+
+struct _bufhead {
+ BUFHEAD *prev; /* LRU links */
+ BUFHEAD *next; /* LRU links */
+ BUFHEAD *ovfl; /* Overflow page buffer header */
+ u_int32_t addr; /* Address of this page */
+ char *page; /* Actual page data */
+ char flags;
+#define BUF_MOD 0x0001
+#define BUF_DISK 0x0002
+#define BUF_BUCKET 0x0004
+#define BUF_PIN 0x0008
+};
+
+#define IS_BUCKET(X) ((X) & BUF_BUCKET)
+
+typedef BUFHEAD **SEGMENT;
+
+/* Hash Table Information */
+typedef struct hashhdr { /* Disk resident portion */
+ int magic; /* Magic NO for hash tables */
+ int version; /* Version ID */
+ u_int32_t lorder; /* Byte Order */
+ int bsize; /* Bucket/Page Size */
+ int bshift; /* Bucket shift */
+ int dsize; /* Directory Size */
+ int ssize; /* Segment Size */
+ int sshift; /* Segment shift */
+ int ovfl_point; /* Where overflow pages are being
+ * allocated */
+ int last_freed; /* Last overflow page freed */
+ int max_bucket; /* ID of Maximum bucket in use */
+ int high_mask; /* Mask to modulo into entire table */
+ int low_mask; /* Mask to modulo into lower half of
+ * table */
+ int ffactor; /* Fill factor */
+ int nkeys; /* Number of keys in hash table */
+ int hdrpages; /* Size of table header */
+ int h_charkey; /* value of hash(CHARKEY) */
+#define NCACHED 32 /* number of bit maps and spare
+ * points */
+ int spares[NCACHED];/* spare pages for overflow */
+ u_int16_t bitmaps[NCACHED]; /* address of overflow page
+ * bitmaps */
+} HASHHDR;
+
+typedef struct htab { /* Memory resident data structure */
+ HASHHDR hdr; /* Header */
+ int nsegs; /* Number of allocated segments */
+ int exsegs; /* Number of extra allocated
+ * segments */
+ u_int32_t /* Hash function */
+ (*hash)__P((const void *, size_t));
+ int flags; /* Flag values */
+ int fp; /* File pointer */
+ char *tmp_buf; /* Temporary Buffer for BIG data */
+ char *tmp_key; /* Temporary Buffer for BIG keys */
+ BUFHEAD *cpage; /* Current page */
+ int cbucket; /* Current bucket */
+ int cndx; /* Index of next item on cpage */
+ int errno; /* Error Number -- for DBM
+ * compatability */
+ int new_file; /* Indicates if fd is backing store
+ * or no */
+ int save_file; /* Indicates whether we need to flush
+ * file at
+ * exit */
+ u_int32_t *mapp[NCACHED]; /* Pointers to page maps */
+ int nmaps; /* Initial number of bitmaps */
+ int nbufs; /* Number of buffers left to
+ * allocate */
+ BUFHEAD bufhead; /* Header of buffer lru list */
+ SEGMENT *dir; /* Hash Bucket directory */
+} HTAB;
+
+/*
+ * Constants
+ */
+#define MAX_BSIZE 65536 /* 2^16 */
+#define MIN_BUFFERS 6
+#define MINHDRSIZE 512
+#define DEF_BUFSIZE 65536 /* 64 K */
+#define DEF_BUCKET_SIZE 4096
+#define DEF_BUCKET_SHIFT 12 /* log2(BUCKET) */
+#define DEF_SEGSIZE 256
+#define DEF_SEGSIZE_SHIFT 8 /* log2(SEGSIZE) */
+#define DEF_DIRSIZE 256
+#define DEF_FFACTOR 65536
+#define MIN_FFACTOR 4
+#define SPLTMAX 8
+#define CHARKEY "%$sniglet^&"
+#define NUMKEY 1038583
+#define BYTE_SHIFT 3
+#define INT_TO_BYTE 2
+#define INT_BYTE_SHIFT 5
+#define ALL_SET ((u_int32_t)0xFFFFFFFF)
+#define ALL_CLEAR 0
+
+#define PTROF(X) ((BUFHEAD *)((ptrdiff_t)(X)&~0x3))
+#define ISMOD(X) ((u_int32_t)(ptrdiff_t)(X)&0x1)
+#define DOMOD(X) ((X) = (char *)((ptrdiff_t)(X)|0x1))
+#define ISDISK(X) ((u_int32_t)(ptrdiff_t)(X)&0x2)
+#define DODISK(X) ((X) = (char *)((ptrdiff_t)(X)|0x2))
+
+#define BITS_PER_MAP 32
+
+/* Given the address of the beginning of a big map, clear/set the nth bit */
+#define CLRBIT(A, N) ((A)[(N)/BITS_PER_MAP] &= ~(1<<((N)%BITS_PER_MAP)))
+#define SETBIT(A, N) ((A)[(N)/BITS_PER_MAP] |= (1<<((N)%BITS_PER_MAP)))
+#define ISSET(A, N) ((A)[(N)/BITS_PER_MAP] & (1<<((N)%BITS_PER_MAP)))
+
+/* Overflow management */
+/*
+ * Overflow page numbers are allocated per split point. At each doubling of
+ * the table, we can allocate extra pages. So, an overflow page number has
+ * the top 5 bits indicate which split point and the lower 11 bits indicate
+ * which page at that split point is indicated (pages within split points are
+ * numberered starting with 1).
+ */
+
+#define SPLITSHIFT 11
+#define SPLITMASK 0x7FF
+#define SPLITNUM(N) (((u_int32_t)(N)) >> SPLITSHIFT)
+#define OPAGENUM(N) ((N) & SPLITMASK)
+#define OADDR_OF(S,O) ((u_int32_t)((u_int32_t)(S) << SPLITSHIFT) + (O))
+
+#define BUCKET_TO_PAGE(B) \
+ (B) + hashp->HDRPAGES + ((B) ? hashp->SPARES[__log2((B)+1)-1] : 0)
+#define OADDR_TO_PAGE(B) \
+ BUCKET_TO_PAGE ( (1 << SPLITNUM((B))) -1 ) + OPAGENUM((B));
+
+/*
+ * page.h contains a detailed description of the page format.
+ *
+ * Normally, keys and data are accessed from offset tables in the top of
+ * each page which point to the beginning of the key and data. There are
+ * four flag values which may be stored in these offset tables which indicate
+ * the following:
+ *
+ *
+ * OVFLPAGE Rather than a key data pair, this pair contains
+ * the address of an overflow page. The format of
+ * the pair is:
+ * OVERFLOW_PAGE_NUMBER OVFLPAGE
+ *
+ * PARTIAL_KEY This must be the first key/data pair on a page
+ * and implies that page contains only a partial key.
+ * That is, the key is too big to fit on a single page
+ * so it starts on this page and continues on the next.
+ * The format of the page is:
+ * KEY_OFF PARTIAL_KEY OVFL_PAGENO OVFLPAGE
+ *
+ * KEY_OFF -- offset of the beginning of the key
+ * PARTIAL_KEY -- 1
+ * OVFL_PAGENO - page number of the next overflow page
+ * OVFLPAGE -- 0
+ *
+ * FULL_KEY This must be the first key/data pair on the page. It
+ * is used in two cases.
+ *
+ * Case 1:
+ * There is a complete key on the page but no data
+ * (because it wouldn't fit). The next page contains
+ * the data.
+ *
+ * Page format it:
+ * KEY_OFF FULL_KEY OVFL_PAGENO OVFL_PAGE
+ *
+ * KEY_OFF -- offset of the beginning of the key
+ * FULL_KEY -- 2
+ * OVFL_PAGENO - page number of the next overflow page
+ * OVFLPAGE -- 0
+ *
+ * Case 2:
+ * This page contains no key, but part of a large
+ * data field, which is continued on the next page.
+ *
+ * Page format it:
+ * DATA_OFF FULL_KEY OVFL_PAGENO OVFL_PAGE
+ *
+ * KEY_OFF -- offset of the beginning of the data on
+ * this page
+ * FULL_KEY -- 2
+ * OVFL_PAGENO - page number of the next overflow page
+ * OVFLPAGE -- 0
+ *
+ * FULL_KEY_DATA
+ * This must be the first key/data pair on the page.
+ * There are two cases:
+ *
+ * Case 1:
+ * This page contains a key and the beginning of the
+ * data field, but the data field is continued on the
+ * next page.
+ *
+ * Page format is:
+ * KEY_OFF FULL_KEY_DATA OVFL_PAGENO DATA_OFF
+ *
+ * KEY_OFF -- offset of the beginning of the key
+ * FULL_KEY_DATA -- 3
+ * OVFL_PAGENO - page number of the next overflow page
+ * DATA_OFF -- offset of the beginning of the data
+ *
+ * Case 2:
+ * This page contains the last page of a big data pair.
+ * There is no key, only the tail end of the data
+ * on this page.
+ *
+ * Page format is:
+ * DATA_OFF FULL_KEY_DATA <OVFL_PAGENO> <OVFLPAGE>
+ *
+ * DATA_OFF -- offset of the beginning of the data on
+ * this page
+ * FULL_KEY_DATA -- 3
+ * OVFL_PAGENO - page number of the next overflow page
+ * OVFLPAGE -- 0
+ *
+ * OVFL_PAGENO and OVFLPAGE are optional (they are
+ * not present if there is no next page).
+ */
+
+#define OVFLPAGE 0
+#define PARTIAL_KEY 1
+#define FULL_KEY 2
+#define FULL_KEY_DATA 3
+#define REAL_KEY 4
+
+/* Short hands for accessing structure */
+#define BSIZE hdr.bsize
+#define BSHIFT hdr.bshift
+#define DSIZE hdr.dsize
+#define SGSIZE hdr.ssize
+#define SSHIFT hdr.sshift
+#define LORDER hdr.lorder
+#define OVFL_POINT hdr.ovfl_point
+#define LAST_FREED hdr.last_freed
+#define MAX_BUCKET hdr.max_bucket
+#define FFACTOR hdr.ffactor
+#define HIGH_MASK hdr.high_mask
+#define LOW_MASK hdr.low_mask
+#define NKEYS hdr.nkeys
+#define HDRPAGES hdr.hdrpages
+#define SPARES hdr.spares
+#define BITMAPS hdr.bitmaps
+#define VERSION hdr.version
+#define MAGIC hdr.magic
+#define NEXT_FREE hdr.next_free
+#define H_CHARKEY hdr.h_charkey
diff --git a/lib/libc/db/hash/hash_bigkey.c b/lib/libc/db/hash/hash_bigkey.c
new file mode 100644
index 00000000000..993839d9f79
--- /dev/null
+++ b/lib/libc/db/hash/hash_bigkey.c
@@ -0,0 +1,673 @@
+/* $NetBSD: hash_bigkey.c,v 1.5 1995/02/27 13:22:16 cgd Exp $ */
+
+/*-
+ * Copyright (c) 1990, 1993, 1994
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Margo Seltzer.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+#if 0
+static char sccsid[] = "@(#)hash_bigkey.c 8.3 (Berkeley) 5/31/94";
+#else
+static char rcsid[] = "$NetBSD: hash_bigkey.c,v 1.5 1995/02/27 13:22:16 cgd Exp $";
+#endif
+#endif /* LIBC_SCCS and not lint */
+
+/*
+ * PACKAGE: hash
+ * DESCRIPTION:
+ * Big key/data handling for the hashing package.
+ *
+ * ROUTINES:
+ * External
+ * __big_keydata
+ * __big_split
+ * __big_insert
+ * __big_return
+ * __big_delete
+ * __find_last_page
+ * Internal
+ * collect_key
+ * collect_data
+ */
+
+#include <sys/param.h>
+
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#ifdef DEBUG
+#include <assert.h>
+#endif
+
+#include <db.h>
+#include "hash.h"
+#include "page.h"
+#include "extern.h"
+
+static int collect_key __P((HTAB *, BUFHEAD *, int, DBT *, int));
+static int collect_data __P((HTAB *, BUFHEAD *, int, int));
+
+/*
+ * Big_insert
+ *
+ * You need to do an insert and the key/data pair is too big
+ *
+ * Returns:
+ * 0 ==> OK
+ *-1 ==> ERROR
+ */
+extern int
+__big_insert(hashp, bufp, key, val)
+ HTAB *hashp;
+ BUFHEAD *bufp;
+ const DBT *key, *val;
+{
+ register u_int16_t *p;
+ int key_size, n, val_size;
+ u_int16_t space, move_bytes, off;
+ char *cp, *key_data, *val_data;
+
+ cp = bufp->page; /* Character pointer of p. */
+ p = (u_int16_t *)cp;
+
+ key_data = (char *)key->data;
+ key_size = key->size;
+ val_data = (char *)val->data;
+ val_size = val->size;
+
+ /* First move the Key */
+ for (space = FREESPACE(p) - BIGOVERHEAD; key_size;
+ space = FREESPACE(p) - BIGOVERHEAD) {
+ move_bytes = MIN(space, key_size);
+ off = OFFSET(p) - move_bytes;
+ memmove(cp + off, key_data, move_bytes);
+ key_size -= move_bytes;
+ key_data += move_bytes;
+ n = p[0];
+ p[++n] = off;
+ p[0] = ++n;
+ FREESPACE(p) = off - PAGE_META(n);
+ OFFSET(p) = off;
+ p[n] = PARTIAL_KEY;
+ bufp = __add_ovflpage(hashp, bufp);
+ if (!bufp)
+ return (-1);
+ n = p[0];
+ if (!key_size)
+ if (FREESPACE(p)) {
+ move_bytes = MIN(FREESPACE(p), val_size);
+ off = OFFSET(p) - move_bytes;
+ p[n] = off;
+ memmove(cp + off, val_data, move_bytes);
+ val_data += move_bytes;
+ val_size -= move_bytes;
+ p[n - 2] = FULL_KEY_DATA;
+ FREESPACE(p) = FREESPACE(p) - move_bytes;
+ OFFSET(p) = off;
+ } else
+ p[n - 2] = FULL_KEY;
+ p = (u_int16_t *)bufp->page;
+ cp = bufp->page;
+ bufp->flags |= BUF_MOD;
+ }
+
+ /* Now move the data */
+ for (space = FREESPACE(p) - BIGOVERHEAD; val_size;
+ space = FREESPACE(p) - BIGOVERHEAD) {
+ move_bytes = MIN(space, val_size);
+ /*
+ * Here's the hack to make sure that if the data ends on the
+ * same page as the key ends, FREESPACE is at least one.
+ */
+ if (space == val_size && val_size == val->size)
+ move_bytes--;
+ off = OFFSET(p) - move_bytes;
+ memmove(cp + off, val_data, move_bytes);
+ val_size -= move_bytes;
+ val_data += move_bytes;
+ n = p[0];
+ p[++n] = off;
+ p[0] = ++n;
+ FREESPACE(p) = off - PAGE_META(n);
+ OFFSET(p) = off;
+ if (val_size) {
+ p[n] = FULL_KEY;
+ bufp = __add_ovflpage(hashp, bufp);
+ if (!bufp)
+ return (-1);
+ cp = bufp->page;
+ p = (u_int16_t *)cp;
+ } else
+ p[n] = FULL_KEY_DATA;
+ bufp->flags |= BUF_MOD;
+ }
+ return (0);
+}
+
+/*
+ * Called when bufp's page contains a partial key (index should be 1)
+ *
+ * All pages in the big key/data pair except bufp are freed. We cannot
+ * free bufp because the page pointing to it is lost and we can't get rid
+ * of its pointer.
+ *
+ * Returns:
+ * 0 => OK
+ *-1 => ERROR
+ */
+extern int
+__big_delete(hashp, bufp)
+ HTAB *hashp;
+ BUFHEAD *bufp;
+{
+ register BUFHEAD *last_bfp, *rbufp;
+ u_int16_t *bp, pageno;
+ int key_done, n;
+
+ rbufp = bufp;
+ last_bfp = NULL;
+ bp = (u_int16_t *)bufp->page;
+ pageno = 0;
+ key_done = 0;
+
+ while (!key_done || (bp[2] != FULL_KEY_DATA)) {
+ if (bp[2] == FULL_KEY || bp[2] == FULL_KEY_DATA)
+ key_done = 1;
+
+ /*
+ * If there is freespace left on a FULL_KEY_DATA page, then
+ * the data is short and fits entirely on this page, and this
+ * is the last page.
+ */
+ if (bp[2] == FULL_KEY_DATA && FREESPACE(bp))
+ break;
+ pageno = bp[bp[0] - 1];
+ rbufp->flags |= BUF_MOD;
+ rbufp = __get_buf(hashp, pageno, rbufp, 0);
+ if (last_bfp)
+ __free_ovflpage(hashp, last_bfp);
+ last_bfp = rbufp;
+ if (!rbufp)
+ return (-1); /* Error. */
+ bp = (u_int16_t *)rbufp->page;
+ }
+
+ /*
+ * If we get here then rbufp points to the last page of the big
+ * key/data pair. Bufp points to the first one -- it should now be
+ * empty pointing to the next page after this pair. Can't free it
+ * because we don't have the page pointing to it.
+ */
+
+ /* This is information from the last page of the pair. */
+ n = bp[0];
+ pageno = bp[n - 1];
+
+ /* Now, bp is the first page of the pair. */
+ bp = (u_int16_t *)bufp->page;
+ if (n > 2) {
+ /* There is an overflow page. */
+ bp[1] = pageno;
+ bp[2] = OVFLPAGE;
+ bufp->ovfl = rbufp->ovfl;
+ } else
+ /* This is the last page. */
+ bufp->ovfl = NULL;
+ n -= 2;
+ bp[0] = n;
+ FREESPACE(bp) = hashp->BSIZE - PAGE_META(n);
+ OFFSET(bp) = hashp->BSIZE - 1;
+
+ bufp->flags |= BUF_MOD;
+ if (rbufp)
+ __free_ovflpage(hashp, rbufp);
+ if (last_bfp != rbufp)
+ __free_ovflpage(hashp, last_bfp);
+
+ hashp->NKEYS--;
+ return (0);
+}
+/*
+ * Returns:
+ * 0 = key not found
+ * -1 = get next overflow page
+ * -2 means key not found and this is big key/data
+ * -3 error
+ */
+extern int
+__find_bigpair(hashp, bufp, ndx, key, size)
+ HTAB *hashp;
+ BUFHEAD *bufp;
+ int ndx;
+ char *key;
+ int size;
+{
+ register u_int16_t *bp;
+ register char *p;
+ int ksize;
+ u_int16_t bytes;
+ char *kkey;
+
+ bp = (u_int16_t *)bufp->page;
+ p = bufp->page;
+ ksize = size;
+ kkey = key;
+
+ for (bytes = hashp->BSIZE - bp[ndx];
+ bytes <= size && bp[ndx + 1] == PARTIAL_KEY;
+ bytes = hashp->BSIZE - bp[ndx]) {
+ if (memcmp(p + bp[ndx], kkey, bytes))
+ return (-2);
+ kkey += bytes;
+ ksize -= bytes;
+ bufp = __get_buf(hashp, bp[ndx + 2], bufp, 0);
+ if (!bufp)
+ return (-3);
+ p = bufp->page;
+ bp = (u_int16_t *)p;
+ ndx = 1;
+ }
+
+ if (bytes != ksize || memcmp(p + bp[ndx], kkey, bytes)) {
+#ifdef HASH_STATISTICS
+ ++hash_collisions;
+#endif
+ return (-2);
+ } else
+ return (ndx);
+}
+
+/*
+ * Given the buffer pointer of the first overflow page of a big pair,
+ * find the end of the big pair
+ *
+ * This will set bpp to the buffer header of the last page of the big pair.
+ * It will return the pageno of the overflow page following the last page
+ * of the pair; 0 if there isn't any (i.e. big pair is the last key in the
+ * bucket)
+ */
+extern u_int16_t
+__find_last_page(hashp, bpp)
+ HTAB *hashp;
+ BUFHEAD **bpp;
+{
+ BUFHEAD *bufp;
+ u_int16_t *bp, pageno;
+ int n;
+
+ bufp = *bpp;
+ bp = (u_int16_t *)bufp->page;
+ for (;;) {
+ n = bp[0];
+
+ /*
+ * This is the last page if: the tag is FULL_KEY_DATA and
+ * either only 2 entries OVFLPAGE marker is explicit there
+ * is freespace on the page.
+ */
+ if (bp[2] == FULL_KEY_DATA &&
+ ((n == 2) || (bp[n] == OVFLPAGE) || (FREESPACE(bp))))
+ break;
+
+ pageno = bp[n - 1];
+ bufp = __get_buf(hashp, pageno, bufp, 0);
+ if (!bufp)
+ return (0); /* Need to indicate an error! */
+ bp = (u_int16_t *)bufp->page;
+ }
+
+ *bpp = bufp;
+ if (bp[0] > 2)
+ return (bp[3]);
+ else
+ return (0);
+}
+
+/*
+ * Return the data for the key/data pair that begins on this page at this
+ * index (index should always be 1).
+ */
+extern int
+__big_return(hashp, bufp, ndx, val, set_current)
+ HTAB *hashp;
+ BUFHEAD *bufp;
+ int ndx;
+ DBT *val;
+ int set_current;
+{
+ BUFHEAD *save_p;
+ u_int16_t *bp, len, off, save_addr;
+ char *tp;
+
+ bp = (u_int16_t *)bufp->page;
+ while (bp[ndx + 1] == PARTIAL_KEY) {
+ bufp = __get_buf(hashp, bp[bp[0] - 1], bufp, 0);
+ if (!bufp)
+ return (-1);
+ bp = (u_int16_t *)bufp->page;
+ ndx = 1;
+ }
+
+ if (bp[ndx + 1] == FULL_KEY) {
+ bufp = __get_buf(hashp, bp[bp[0] - 1], bufp, 0);
+ if (!bufp)
+ return (-1);
+ bp = (u_int16_t *)bufp->page;
+ save_p = bufp;
+ save_addr = save_p->addr;
+ off = bp[1];
+ len = 0;
+ } else
+ if (!FREESPACE(bp)) {
+ /*
+ * This is a hack. We can't distinguish between
+ * FULL_KEY_DATA that contains complete data or
+ * incomplete data, so we require that if the data
+ * is complete, there is at least 1 byte of free
+ * space left.
+ */
+ off = bp[bp[0]];
+ len = bp[1] - off;
+ save_p = bufp;
+ save_addr = bufp->addr;
+ bufp = __get_buf(hashp, bp[bp[0] - 1], bufp, 0);
+ if (!bufp)
+ return (-1);
+ bp = (u_int16_t *)bufp->page;
+ } else {
+ /* The data is all on one page. */
+ tp = (char *)bp;
+ off = bp[bp[0]];
+ val->data = (u_char *)tp + off;
+ val->size = bp[1] - off;
+ if (set_current) {
+ if (bp[0] == 2) { /* No more buckets in
+ * chain */
+ hashp->cpage = NULL;
+ hashp->cbucket++;
+ hashp->cndx = 1;
+ } else {
+ hashp->cpage = __get_buf(hashp,
+ bp[bp[0] - 1], bufp, 0);
+ if (!hashp->cpage)
+ return (-1);
+ hashp->cndx = 1;
+ if (!((u_int16_t *)
+ hashp->cpage->page)[0]) {
+ hashp->cbucket++;
+ hashp->cpage = NULL;
+ }
+ }
+ }
+ return (0);
+ }
+
+ val->size = collect_data(hashp, bufp, (int)len, set_current);
+ if (val->size == -1)
+ return (-1);
+ if (save_p->addr != save_addr) {
+ /* We are pretty short on buffers. */
+ errno = EINVAL; /* OUT OF BUFFERS */
+ return (-1);
+ }
+ memmove(hashp->tmp_buf, (save_p->page) + off, len);
+ val->data = (u_char *)hashp->tmp_buf;
+ return (0);
+}
+/*
+ * Count how big the total datasize is by recursing through the pages. Then
+ * allocate a buffer and copy the data as you recurse up.
+ */
+static int
+collect_data(hashp, bufp, len, set)
+ HTAB *hashp;
+ BUFHEAD *bufp;
+ int len, set;
+{
+ register u_int16_t *bp;
+ register char *p;
+ BUFHEAD *xbp;
+ u_int16_t save_addr;
+ int mylen, totlen;
+
+ p = bufp->page;
+ bp = (u_int16_t *)p;
+ mylen = hashp->BSIZE - bp[1];
+ save_addr = bufp->addr;
+
+ if (bp[2] == FULL_KEY_DATA) { /* End of Data */
+ totlen = len + mylen;
+ if (hashp->tmp_buf)
+ free(hashp->tmp_buf);
+ if ((hashp->tmp_buf = (char *)malloc(totlen)) == NULL)
+ return (-1);
+ if (set) {
+ hashp->cndx = 1;
+ if (bp[0] == 2) { /* No more buckets in chain */
+ hashp->cpage = NULL;
+ hashp->cbucket++;
+ } else {
+ hashp->cpage =
+ __get_buf(hashp, bp[bp[0] - 1], bufp, 0);
+ if (!hashp->cpage)
+ return (-1);
+ else if (!((u_int16_t *)hashp->cpage->page)[0]) {
+ hashp->cbucket++;
+ hashp->cpage = NULL;
+ }
+ }
+ }
+ } else {
+ xbp = __get_buf(hashp, bp[bp[0] - 1], bufp, 0);
+ if (!xbp || ((totlen =
+ collect_data(hashp, xbp, len + mylen, set)) < 1))
+ return (-1);
+ }
+ if (bufp->addr != save_addr) {
+ errno = EINVAL; /* Out of buffers. */
+ return (-1);
+ }
+ memmove(&hashp->tmp_buf[len], (bufp->page) + bp[1], mylen);
+ return (totlen);
+}
+
+/*
+ * Fill in the key and data for this big pair.
+ */
+extern int
+__big_keydata(hashp, bufp, key, val, set)
+ HTAB *hashp;
+ BUFHEAD *bufp;
+ DBT *key, *val;
+ int set;
+{
+ key->size = collect_key(hashp, bufp, 0, val, set);
+ if (key->size == -1)
+ return (-1);
+ key->data = (u_char *)hashp->tmp_key;
+ return (0);
+}
+
+/*
+ * Count how big the total key size is by recursing through the pages. Then
+ * collect the data, allocate a buffer and copy the key as you recurse up.
+ */
+static int
+collect_key(hashp, bufp, len, val, set)
+ HTAB *hashp;
+ BUFHEAD *bufp;
+ int len;
+ DBT *val;
+ int set;
+{
+ BUFHEAD *xbp;
+ char *p;
+ int mylen, totlen;
+ u_int16_t *bp, save_addr;
+
+ p = bufp->page;
+ bp = (u_int16_t *)p;
+ mylen = hashp->BSIZE - bp[1];
+
+ save_addr = bufp->addr;
+ totlen = len + mylen;
+ if (bp[2] == FULL_KEY || bp[2] == FULL_KEY_DATA) { /* End of Key. */
+ if (hashp->tmp_key != NULL)
+ free(hashp->tmp_key);
+ if ((hashp->tmp_key = (char *)malloc(totlen)) == NULL)
+ return (-1);
+ if (__big_return(hashp, bufp, 1, val, set))
+ return (-1);
+ } else {
+ xbp = __get_buf(hashp, bp[bp[0] - 1], bufp, 0);
+ if (!xbp || ((totlen =
+ collect_key(hashp, xbp, totlen, val, set)) < 1))
+ return (-1);
+ }
+ if (bufp->addr != save_addr) {
+ errno = EINVAL; /* MIS -- OUT OF BUFFERS */
+ return (-1);
+ }
+ memmove(&hashp->tmp_key[len], (bufp->page) + bp[1], mylen);
+ return (totlen);
+}
+
+/*
+ * Returns:
+ * 0 => OK
+ * -1 => error
+ */
+extern int
+__big_split(hashp, op, np, big_keyp, addr, obucket, ret)
+ HTAB *hashp;
+ BUFHEAD *op; /* Pointer to where to put keys that go in old bucket */
+ BUFHEAD *np; /* Pointer to new bucket page */
+ /* Pointer to first page containing the big key/data */
+ BUFHEAD *big_keyp;
+ int addr; /* Address of big_keyp */
+ u_int32_t obucket;/* Old Bucket */
+ SPLIT_RETURN *ret;
+{
+ register BUFHEAD *tmpp;
+ register u_int16_t *tp;
+ BUFHEAD *bp;
+ DBT key, val;
+ u_int32_t change;
+ u_int16_t free_space, n, off;
+
+ bp = big_keyp;
+
+ /* Now figure out where the big key/data goes */
+ if (__big_keydata(hashp, big_keyp, &key, &val, 0))
+ return (-1);
+ change = (__call_hash(hashp, key.data, key.size) != obucket);
+
+ if (ret->next_addr = __find_last_page(hashp, &big_keyp)) {
+ if (!(ret->nextp =
+ __get_buf(hashp, ret->next_addr, big_keyp, 0)))
+ return (-1);;
+ } else
+ ret->nextp = NULL;
+
+ /* Now make one of np/op point to the big key/data pair */
+#ifdef DEBUG
+ assert(np->ovfl == NULL);
+#endif
+ if (change)
+ tmpp = np;
+ else
+ tmpp = op;
+
+ tmpp->flags |= BUF_MOD;
+#ifdef DEBUG1
+ (void)fprintf(stderr,
+ "BIG_SPLIT: %d->ovfl was %d is now %d\n", tmpp->addr,
+ (tmpp->ovfl ? tmpp->ovfl->addr : 0), (bp ? bp->addr : 0));
+#endif
+ tmpp->ovfl = bp; /* one of op/np point to big_keyp */
+ tp = (u_int16_t *)tmpp->page;
+#ifdef DEBUG
+ assert(FREESPACE(tp) >= OVFLSIZE);
+#endif
+ n = tp[0];
+ off = OFFSET(tp);
+ free_space = FREESPACE(tp);
+ tp[++n] = (u_int16_t)addr;
+ tp[++n] = OVFLPAGE;
+ tp[0] = n;
+ OFFSET(tp) = off;
+ FREESPACE(tp) = free_space - OVFLSIZE;
+
+ /*
+ * Finally, set the new and old return values. BIG_KEYP contains a
+ * pointer to the last page of the big key_data pair. Make sure that
+ * big_keyp has no following page (2 elements) or create an empty
+ * following page.
+ */
+
+ ret->newp = np;
+ ret->oldp = op;
+
+ tp = (u_int16_t *)big_keyp->page;
+ big_keyp->flags |= BUF_MOD;
+ if (tp[0] > 2) {
+ /*
+ * There may be either one or two offsets on this page. If
+ * there is one, then the overflow page is linked on normally
+ * and tp[4] is OVFLPAGE. If there are two, tp[4] contains
+ * the second offset and needs to get stuffed in after the
+ * next overflow page is added.
+ */
+ n = tp[4];
+ free_space = FREESPACE(tp);
+ off = OFFSET(tp);
+ tp[0] -= 2;
+ FREESPACE(tp) = free_space + OVFLSIZE;
+ OFFSET(tp) = off;
+ tmpp = __add_ovflpage(hashp, big_keyp);
+ if (!tmpp)
+ return (-1);
+ tp[4] = n;
+ } else
+ tmpp = big_keyp;
+
+ if (change)
+ ret->newp = tmpp;
+ else
+ ret->oldp = tmpp;
+ return (0);
+}
diff --git a/lib/libc/db/hash/hash_buf.c b/lib/libc/db/hash/hash_buf.c
new file mode 100644
index 00000000000..7747331688a
--- /dev/null
+++ b/lib/libc/db/hash/hash_buf.c
@@ -0,0 +1,355 @@
+/* $NetBSD: hash_buf.c,v 1.5 1995/02/27 13:22:23 cgd Exp $ */
+
+/*-
+ * Copyright (c) 1990, 1993, 1994
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Margo Seltzer.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+#if 0
+static char sccsid[] = "@(#)hash_buf.c 8.4 (Berkeley) 6/4/94";
+#else
+static char rcsid[] = "$NetBSD: hash_buf.c,v 1.5 1995/02/27 13:22:23 cgd Exp $";
+#endif
+#endif /* LIBC_SCCS and not lint */
+
+/*
+ * PACKAGE: hash
+ *
+ * DESCRIPTION:
+ * Contains buffer management
+ *
+ * ROUTINES:
+ * External
+ * __buf_init
+ * __get_buf
+ * __buf_free
+ * __reclaim_buf
+ * Internal
+ * newbuf
+ */
+
+#include <sys/param.h>
+
+#include <errno.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#ifdef DEBUG
+#include <assert.h>
+#endif
+
+#include <db.h>
+#include "hash.h"
+#include "page.h"
+#include "extern.h"
+
+static BUFHEAD *newbuf __P((HTAB *, u_int32_t, BUFHEAD *));
+
+/* Unlink B from its place in the lru */
+#define BUF_REMOVE(B) { \
+ (B)->prev->next = (B)->next; \
+ (B)->next->prev = (B)->prev; \
+}
+
+/* Insert B after P */
+#define BUF_INSERT(B, P) { \
+ (B)->next = (P)->next; \
+ (B)->prev = (P); \
+ (P)->next = (B); \
+ (B)->next->prev = (B); \
+}
+
+#define MRU hashp->bufhead.next
+#define LRU hashp->bufhead.prev
+
+#define MRU_INSERT(B) BUF_INSERT((B), &hashp->bufhead)
+#define LRU_INSERT(B) BUF_INSERT((B), LRU)
+
+/*
+ * We are looking for a buffer with address "addr". If prev_bp is NULL, then
+ * address is a bucket index. If prev_bp is not NULL, then it points to the
+ * page previous to an overflow page that we are trying to find.
+ *
+ * CAVEAT: The buffer header accessed via prev_bp's ovfl field may no longer
+ * be valid. Therefore, you must always verify that its address matches the
+ * address you are seeking.
+ */
+extern BUFHEAD *
+__get_buf(hashp, addr, prev_bp, newpage)
+ HTAB *hashp;
+ u_int32_t addr;
+ BUFHEAD *prev_bp;
+ int newpage; /* If prev_bp set, indicates a new overflow page. */
+{
+ register BUFHEAD *bp;
+ register u_int32_t is_disk_mask;
+ register int is_disk, segment_ndx;
+ SEGMENT segp;
+
+ is_disk = 0;
+ is_disk_mask = 0;
+ if (prev_bp) {
+ bp = prev_bp->ovfl;
+ if (!bp || (bp->addr != addr))
+ bp = NULL;
+ if (!newpage)
+ is_disk = BUF_DISK;
+ } else {
+ /* Grab buffer out of directory */
+ segment_ndx = addr & (hashp->SGSIZE - 1);
+
+ /* valid segment ensured by __call_hash() */
+ segp = hashp->dir[addr >> hashp->SSHIFT];
+#ifdef DEBUG
+ assert(segp != NULL);
+#endif
+ bp = PTROF(segp[segment_ndx]);
+ is_disk_mask = ISDISK(segp[segment_ndx]);
+ is_disk = is_disk_mask || !hashp->new_file;
+ }
+
+ if (!bp) {
+ bp = newbuf(hashp, addr, prev_bp);
+ if (!bp ||
+ __get_page(hashp, bp->page, addr, !prev_bp, is_disk, 0))
+ return (NULL);
+ if (!prev_bp)
+ segp[segment_ndx] =
+ (BUFHEAD *)((ptrdiff_t)bp | is_disk_mask);
+ } else {
+ BUF_REMOVE(bp);
+ MRU_INSERT(bp);
+ }
+ return (bp);
+}
+
+/*
+ * We need a buffer for this page. Either allocate one, or evict a resident
+ * one (if we have as many buffers as we're allowed) and put this one in.
+ *
+ * If newbuf finds an error (returning NULL), it also sets errno.
+ */
+static BUFHEAD *
+newbuf(hashp, addr, prev_bp)
+ HTAB *hashp;
+ u_int32_t addr;
+ BUFHEAD *prev_bp;
+{
+ register BUFHEAD *bp; /* The buffer we're going to use */
+ register BUFHEAD *xbp; /* Temp pointer */
+ register BUFHEAD *next_xbp;
+ SEGMENT segp;
+ int segment_ndx;
+ u_int16_t oaddr, *shortp;
+
+ oaddr = 0;
+ bp = LRU;
+ /*
+ * If LRU buffer is pinned, the buffer pool is too small. We need to
+ * allocate more buffers.
+ */
+ if (hashp->nbufs || (bp->flags & BUF_PIN)) {
+ /* Allocate a new one */
+ if ((bp = (BUFHEAD *)malloc(sizeof(BUFHEAD))) == NULL)
+ return (NULL);
+ if ((bp->page = (char *)malloc(hashp->BSIZE)) == NULL) {
+ free(bp);
+ return (NULL);
+ }
+ if (hashp->nbufs)
+ hashp->nbufs--;
+ } else {
+ /* Kick someone out */
+ BUF_REMOVE(bp);
+ /*
+ * If this is an overflow page with addr 0, it's already been
+ * flushed back in an overflow chain and initialized.
+ */
+ if ((bp->addr != 0) || (bp->flags & BUF_BUCKET)) {
+ /*
+ * Set oaddr before __put_page so that you get it
+ * before bytes are swapped.
+ */
+ shortp = (u_int16_t *)bp->page;
+ if (shortp[0])
+ oaddr = shortp[shortp[0] - 1];
+ if ((bp->flags & BUF_MOD) && __put_page(hashp, bp->page,
+ bp->addr, (int)IS_BUCKET(bp->flags), 0))
+ return (NULL);
+ /*
+ * Update the pointer to this page (i.e. invalidate it).
+ *
+ * If this is a new file (i.e. we created it at open
+ * time), make sure that we mark pages which have been
+ * written to disk so we retrieve them from disk later,
+ * rather than allocating new pages.
+ */
+ if (IS_BUCKET(bp->flags)) {
+ segment_ndx = bp->addr & (hashp->SGSIZE - 1);
+ segp = hashp->dir[bp->addr >> hashp->SSHIFT];
+#ifdef DEBUG
+ assert(segp != NULL);
+#endif
+
+ if (hashp->new_file &&
+ ((bp->flags & BUF_MOD) ||
+ ISDISK(segp[segment_ndx])))
+ segp[segment_ndx] = (BUFHEAD *)BUF_DISK;
+ else
+ segp[segment_ndx] = NULL;
+ }
+ /*
+ * Since overflow pages can only be access by means of
+ * their bucket, free overflow pages associated with
+ * this bucket.
+ */
+ for (xbp = bp; xbp->ovfl;) {
+ next_xbp = xbp->ovfl;
+ xbp->ovfl = 0;
+ xbp = next_xbp;
+
+ /* Check that ovfl pointer is up date. */
+ if (IS_BUCKET(xbp->flags) ||
+ (oaddr != xbp->addr))
+ break;
+
+ shortp = (u_int16_t *)xbp->page;
+ if (shortp[0])
+ /* set before __put_page */
+ oaddr = shortp[shortp[0] - 1];
+ if ((xbp->flags & BUF_MOD) && __put_page(hashp,
+ xbp->page, xbp->addr, 0, 0))
+ return (NULL);
+ xbp->addr = 0;
+ xbp->flags = 0;
+ BUF_REMOVE(xbp);
+ LRU_INSERT(xbp);
+ }
+ }
+ }
+
+ /* Now assign this buffer */
+ bp->addr = addr;
+#ifdef DEBUG1
+ (void)fprintf(stderr, "NEWBUF1: %d->ovfl was %d is now %d\n",
+ bp->addr, (bp->ovfl ? bp->ovfl->addr : 0), 0);
+#endif
+ bp->ovfl = NULL;
+ if (prev_bp) {
+ /*
+ * If prev_bp is set, this is an overflow page, hook it in to
+ * the buffer overflow links.
+ */
+#ifdef DEBUG1
+ (void)fprintf(stderr, "NEWBUF2: %d->ovfl was %d is now %d\n",
+ prev_bp->addr, (prev_bp->ovfl ? bp->ovfl->addr : 0),
+ (bp ? bp->addr : 0));
+#endif
+ prev_bp->ovfl = bp;
+ bp->flags = 0;
+ } else
+ bp->flags = BUF_BUCKET;
+ MRU_INSERT(bp);
+ return (bp);
+}
+
+extern void
+__buf_init(hashp, nbytes)
+ HTAB *hashp;
+ int nbytes;
+{
+ BUFHEAD *bfp;
+ int npages;
+
+ bfp = &(hashp->bufhead);
+ npages = (nbytes + hashp->BSIZE - 1) >> hashp->BSHIFT;
+ npages = MAX(npages, MIN_BUFFERS);
+
+ hashp->nbufs = npages;
+ bfp->next = bfp;
+ bfp->prev = bfp;
+ /*
+ * This space is calloc'd so these are already null.
+ *
+ * bfp->ovfl = NULL;
+ * bfp->flags = 0;
+ * bfp->page = NULL;
+ * bfp->addr = 0;
+ */
+}
+
+extern int
+__buf_free(hashp, do_free, to_disk)
+ HTAB *hashp;
+ int do_free, to_disk;
+{
+ BUFHEAD *bp;
+
+ /* Need to make sure that buffer manager has been initialized */
+ if (!LRU)
+ return (0);
+ for (bp = LRU; bp != &hashp->bufhead;) {
+ /* Check that the buffer is valid */
+ if (bp->addr || IS_BUCKET(bp->flags)) {
+ if (to_disk && (bp->flags & BUF_MOD) &&
+ __put_page(hashp, bp->page,
+ bp->addr, IS_BUCKET(bp->flags), 0))
+ return (-1);
+ }
+ /* Check if we are freeing stuff */
+ if (do_free) {
+ if (bp->page)
+ free(bp->page);
+ BUF_REMOVE(bp);
+ free(bp);
+ bp = LRU;
+ } else
+ bp = bp->prev;
+ }
+ return (0);
+}
+
+extern void
+__reclaim_buf(hashp, bp)
+ HTAB *hashp;
+ BUFHEAD *bp;
+{
+ bp->ovfl = 0;
+ bp->addr = 0;
+ bp->flags = 0;
+ BUF_REMOVE(bp);
+ LRU_INSERT(bp);
+}
diff --git a/lib/libc/db/hash/hash_func.c b/lib/libc/db/hash/hash_func.c
new file mode 100644
index 00000000000..a978b99d2fa
--- /dev/null
+++ b/lib/libc/db/hash/hash_func.c
@@ -0,0 +1,218 @@
+/* $NetBSD: hash_func.c,v 1.5 1995/02/27 13:22:27 cgd Exp $ */
+
+/*-
+ * Copyright (c) 1990, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Margo Seltzer.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+#if 0
+static char sccsid[] = "@(#)hash_func.c 8.2 (Berkeley) 2/21/94";
+#else
+static char rcsid[] = "$NetBSD: hash_func.c,v 1.5 1995/02/27 13:22:27 cgd Exp $";
+#endif
+#endif /* LIBC_SCCS and not lint */
+
+#include <sys/types.h>
+
+#include <db.h>
+#include "hash.h"
+#include "page.h"
+#include "extern.h"
+
+static u_int32_t hash1 __P((const void *, size_t));
+static u_int32_t hash2 __P((const void *, size_t));
+static u_int32_t hash3 __P((const void *, size_t));
+static u_int32_t hash4 __P((const void *, size_t));
+
+/* Global default hash function */
+u_int32_t (*__default_hash) __P((const void *, size_t)) = hash4;
+
+/*
+ * HASH FUNCTIONS
+ *
+ * Assume that we've already split the bucket to which this key hashes,
+ * calculate that bucket, and check that in fact we did already split it.
+ *
+ * This came from ejb's hsearch.
+ */
+
+#define PRIME1 37
+#define PRIME2 1048583
+
+static u_int32_t
+hash1(keyarg, len)
+ const void *keyarg;
+ register size_t len;
+{
+ register const u_char *key;
+ register u_int32_t h;
+
+ /* Convert string to integer */
+ for (key = keyarg, h = 0; len--;)
+ h = h * PRIME1 ^ (*key++ - ' ');
+ h %= PRIME2;
+ return (h);
+}
+
+/*
+ * Phong's linear congruential hash
+ */
+#define dcharhash(h, c) ((h) = 0x63c63cd9*(h) + 0x9c39c33d + (c))
+
+static u_int32_t
+hash2(keyarg, len)
+ const void *keyarg;
+ size_t len;
+{
+ register const u_char *e, *key;
+ register u_int32_t h;
+ register u_char c;
+
+ key = keyarg;
+ e = key + len;
+ for (h = 0; key != e;) {
+ c = *key++;
+ if (!c && key > e)
+ break;
+ dcharhash(h, c);
+ }
+ return (h);
+}
+
+/*
+ * This is INCREDIBLY ugly, but fast. We break the string up into 8 byte
+ * units. On the first time through the loop we get the "leftover bytes"
+ * (strlen % 8). On every other iteration, we perform 8 HASHC's so we handle
+ * all 8 bytes. Essentially, this saves us 7 cmp & branch instructions. If
+ * this routine is heavily used enough, it's worth the ugly coding.
+ *
+ * OZ's original sdbm hash
+ */
+static u_int32_t
+hash3(keyarg, len)
+ const void *keyarg;
+ register size_t len;
+{
+ register const u_char *key;
+ register size_t loop;
+ register u_int32_t h;
+
+#define HASHC h = *key++ + 65599 * h
+
+ h = 0;
+ key = keyarg;
+ if (len > 0) {
+ loop = (len + 8 - 1) >> 3;
+
+ switch (len & (8 - 1)) {
+ case 0:
+ do {
+ HASHC;
+ /* FALLTHROUGH */
+ case 7:
+ HASHC;
+ /* FALLTHROUGH */
+ case 6:
+ HASHC;
+ /* FALLTHROUGH */
+ case 5:
+ HASHC;
+ /* FALLTHROUGH */
+ case 4:
+ HASHC;
+ /* FALLTHROUGH */
+ case 3:
+ HASHC;
+ /* FALLTHROUGH */
+ case 2:
+ HASHC;
+ /* FALLTHROUGH */
+ case 1:
+ HASHC;
+ } while (--loop);
+ }
+ }
+ return (h);
+}
+
+/* Hash function from Chris Torek. */
+static u_int32_t
+hash4(keyarg, len)
+ const void *keyarg;
+ register size_t len;
+{
+ register const u_char *key;
+ register size_t loop;
+ register u_int32_t h;
+
+#define HASH4a h = (h << 5) - h + *key++;
+#define HASH4b h = (h << 5) + h + *key++;
+#define HASH4 HASH4b
+
+ h = 0;
+ key = keyarg;
+ if (len > 0) {
+ loop = (len + 8 - 1) >> 3;
+
+ switch (len & (8 - 1)) {
+ case 0:
+ do {
+ HASH4;
+ /* FALLTHROUGH */
+ case 7:
+ HASH4;
+ /* FALLTHROUGH */
+ case 6:
+ HASH4;
+ /* FALLTHROUGH */
+ case 5:
+ HASH4;
+ /* FALLTHROUGH */
+ case 4:
+ HASH4;
+ /* FALLTHROUGH */
+ case 3:
+ HASH4;
+ /* FALLTHROUGH */
+ case 2:
+ HASH4;
+ /* FALLTHROUGH */
+ case 1:
+ HASH4;
+ } while (--loop);
+ }
+ }
+ return (h);
+}
diff --git a/lib/libc/db/hash/hash_log2.c b/lib/libc/db/hash/hash_log2.c
new file mode 100644
index 00000000000..9332cbac887
--- /dev/null
+++ b/lib/libc/db/hash/hash_log2.c
@@ -0,0 +1,60 @@
+/* $NetBSD: hash_log2.c,v 1.5 1995/02/27 13:22:30 cgd Exp $ */
+
+/*-
+ * Copyright (c) 1990, 1993, 1994
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Margo Seltzer.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+#if 0
+static char sccsid[] = "@(#)hash_log2.c 8.2 (Berkeley) 5/31/94";
+#else
+static char rcsid[] = "$NetBSD: hash_log2.c,v 1.5 1995/02/27 13:22:30 cgd Exp $";
+#endif
+#endif /* LIBC_SCCS and not lint */
+
+#include <sys/types.h>
+
+#include <db.h>
+
+u_int32_t
+__log2(num)
+ u_int32_t num;
+{
+ register u_int32_t i, limit;
+
+ limit = 1;
+ for (i = 0; limit < num; limit = limit << 1, i++);
+ return (i);
+}
diff --git a/lib/libc/db/hash/hash_page.c b/lib/libc/db/hash/hash_page.c
new file mode 100644
index 00000000000..49ee8904a14
--- /dev/null
+++ b/lib/libc/db/hash/hash_page.c
@@ -0,0 +1,950 @@
+/* $NetBSD: hash_page.c,v 1.7 1995/02/27 13:22:34 cgd Exp $ */
+
+/*-
+ * Copyright (c) 1990, 1993, 1994
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Margo Seltzer.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+#if 0
+static char sccsid[] = "@(#)hash_page.c 8.6 (Berkeley) 6/16/94";
+#else
+static char rcsid[] = "$NetBSD: hash_page.c,v 1.7 1995/02/27 13:22:34 cgd Exp $";
+#endif
+#endif /* LIBC_SCCS and not lint */
+
+/*
+ * PACKAGE: hashing
+ *
+ * DESCRIPTION:
+ * Page manipulation for hashing package.
+ *
+ * ROUTINES:
+ *
+ * External
+ * __get_page
+ * __add_ovflpage
+ * Internal
+ * overflow_page
+ * open_temp
+ */
+
+#include <sys/types.h>
+
+#include <errno.h>
+#include <fcntl.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#ifdef DEBUG
+#include <assert.h>
+#endif
+
+#include <db.h>
+#include "hash.h"
+#include "page.h"
+#include "extern.h"
+
+static u_int32_t *fetch_bitmap __P((HTAB *, int));
+static u_int32_t first_free __P((u_int32_t));
+static int open_temp __P((HTAB *));
+static u_int16_t overflow_page __P((HTAB *));
+static void putpair __P((char *, const DBT *, const DBT *));
+static void squeeze_key __P((u_int16_t *, const DBT *, const DBT *));
+static int ugly_split
+ __P((HTAB *, u_int32_t, BUFHEAD *, BUFHEAD *, int, int));
+
+#define PAGE_INIT(P) { \
+ ((u_int16_t *)(P))[0] = 0; \
+ ((u_int16_t *)(P))[1] = hashp->BSIZE - 3 * sizeof(u_int16_t); \
+ ((u_int16_t *)(P))[2] = hashp->BSIZE; \
+}
+
+/*
+ * This is called AFTER we have verified that there is room on the page for
+ * the pair (PAIRFITS has returned true) so we go right ahead and start moving
+ * stuff on.
+ */
+static void
+putpair(p, key, val)
+ char *p;
+ const DBT *key, *val;
+{
+ register u_int16_t *bp, n, off;
+
+ bp = (u_int16_t *)p;
+
+ /* Enter the key first. */
+ n = bp[0];
+
+ off = OFFSET(bp) - key->size;
+ memmove(p + off, key->data, key->size);
+ bp[++n] = off;
+
+ /* Now the data. */
+ off -= val->size;
+ memmove(p + off, val->data, val->size);
+ bp[++n] = off;
+
+ /* Adjust page info. */
+ bp[0] = n;
+ bp[n + 1] = off - ((n + 3) * sizeof(u_int16_t));
+ bp[n + 2] = off;
+}
+
+/*
+ * Returns:
+ * 0 OK
+ * -1 error
+ */
+extern int
+__delpair(hashp, bufp, ndx)
+ HTAB *hashp;
+ BUFHEAD *bufp;
+ register int ndx;
+{
+ register u_int16_t *bp, newoff;
+ register int n;
+ u_int16_t pairlen;
+
+ bp = (u_int16_t *)bufp->page;
+ n = bp[0];
+
+ if (bp[ndx + 1] < REAL_KEY)
+ return (__big_delete(hashp, bufp));
+ if (ndx != 1)
+ newoff = bp[ndx - 1];
+ else
+ newoff = hashp->BSIZE;
+ pairlen = newoff - bp[ndx + 1];
+
+ if (ndx != (n - 1)) {
+ /* Hard Case -- need to shuffle keys */
+ register int i;
+ register char *src = bufp->page + (int)OFFSET(bp);
+ register char *dst = src + (int)pairlen;
+ memmove(dst, src, bp[ndx + 1] - OFFSET(bp));
+
+ /* Now adjust the pointers */
+ for (i = ndx + 2; i <= n; i += 2) {
+ if (bp[i + 1] == OVFLPAGE) {
+ bp[i - 2] = bp[i];
+ bp[i - 1] = bp[i + 1];
+ } else {
+ bp[i - 2] = bp[i] + pairlen;
+ bp[i - 1] = bp[i + 1] + pairlen;
+ }
+ }
+ }
+ /* Finally adjust the page data */
+ bp[n] = OFFSET(bp) + pairlen;
+ bp[n - 1] = bp[n + 1] + pairlen + 2 * sizeof(u_int16_t);
+ bp[0] = n - 2;
+ hashp->NKEYS--;
+
+ bufp->flags |= BUF_MOD;
+ return (0);
+}
+/*
+ * Returns:
+ * 0 ==> OK
+ * -1 ==> Error
+ */
+extern int
+__split_page(hashp, obucket, nbucket)
+ HTAB *hashp;
+ u_int32_t obucket, nbucket;
+{
+ register BUFHEAD *new_bufp, *old_bufp;
+ register u_int16_t *ino;
+ register char *np;
+ DBT key, val;
+ int n, ndx, retval;
+ u_int16_t copyto, diff, off, moved;
+ char *op;
+
+ copyto = (u_int16_t)hashp->BSIZE;
+ off = (u_int16_t)hashp->BSIZE;
+ old_bufp = __get_buf(hashp, obucket, NULL, 0);
+ if (old_bufp == NULL)
+ return (-1);
+ new_bufp = __get_buf(hashp, nbucket, NULL, 0);
+ if (new_bufp == NULL)
+ return (-1);
+
+ old_bufp->flags |= (BUF_MOD | BUF_PIN);
+ new_bufp->flags |= (BUF_MOD | BUF_PIN);
+
+ ino = (u_int16_t *)(op = old_bufp->page);
+ np = new_bufp->page;
+
+ moved = 0;
+
+ for (n = 1, ndx = 1; n < ino[0]; n += 2) {
+ if (ino[n + 1] < REAL_KEY) {
+ retval = ugly_split(hashp, obucket, old_bufp, new_bufp,
+ (int)copyto, (int)moved);
+ old_bufp->flags &= ~BUF_PIN;
+ new_bufp->flags &= ~BUF_PIN;
+ return (retval);
+
+ }
+ key.data = (u_char *)op + ino[n];
+ key.size = off - ino[n];
+
+ if (__call_hash(hashp, key.data, key.size) == obucket) {
+ /* Don't switch page */
+ diff = copyto - off;
+ if (diff) {
+ copyto = ino[n + 1] + diff;
+ memmove(op + copyto, op + ino[n + 1],
+ off - ino[n + 1]);
+ ino[ndx] = copyto + ino[n] - ino[n + 1];
+ ino[ndx + 1] = copyto;
+ } else
+ copyto = ino[n + 1];
+ ndx += 2;
+ } else {
+ /* Switch page */
+ val.data = (u_char *)op + ino[n + 1];
+ val.size = ino[n] - ino[n + 1];
+ putpair(np, &key, &val);
+ moved += 2;
+ }
+
+ off = ino[n + 1];
+ }
+
+ /* Now clean up the page */
+ ino[0] -= moved;
+ FREESPACE(ino) = copyto - sizeof(u_int16_t) * (ino[0] + 3);
+ OFFSET(ino) = copyto;
+
+#ifdef DEBUG3
+ (void)fprintf(stderr, "split %d/%d\n",
+ ((u_int16_t *)np)[0] / 2,
+ ((u_int16_t *)op)[0] / 2);
+#endif
+ /* unpin both pages */
+ old_bufp->flags &= ~BUF_PIN;
+ new_bufp->flags &= ~BUF_PIN;
+ return (0);
+}
+
+/*
+ * Called when we encounter an overflow or big key/data page during split
+ * handling. This is special cased since we have to begin checking whether
+ * the key/data pairs fit on their respective pages and because we may need
+ * overflow pages for both the old and new pages.
+ *
+ * The first page might be a page with regular key/data pairs in which case
+ * we have a regular overflow condition and just need to go on to the next
+ * page or it might be a big key/data pair in which case we need to fix the
+ * big key/data pair.
+ *
+ * Returns:
+ * 0 ==> success
+ * -1 ==> failure
+ */
+static int
+ugly_split(hashp, obucket, old_bufp, new_bufp, copyto, moved)
+ HTAB *hashp;
+ u_int32_t obucket; /* Same as __split_page. */
+ BUFHEAD *old_bufp, *new_bufp;
+ int copyto; /* First byte on page which contains key/data values. */
+ int moved; /* Number of pairs moved to new page. */
+{
+ register BUFHEAD *bufp; /* Buffer header for ino */
+ register u_int16_t *ino; /* Page keys come off of */
+ register u_int16_t *np; /* New page */
+ register u_int16_t *op; /* Page keys go on to if they aren't moving */
+
+ BUFHEAD *last_bfp; /* Last buf header OVFL needing to be freed */
+ DBT key, val;
+ SPLIT_RETURN ret;
+ u_int16_t n, off, ov_addr, scopyto;
+ char *cino; /* Character value of ino */
+
+ bufp = old_bufp;
+ ino = (u_int16_t *)old_bufp->page;
+ np = (u_int16_t *)new_bufp->page;
+ op = (u_int16_t *)old_bufp->page;
+ last_bfp = NULL;
+ scopyto = (u_int16_t)copyto; /* ANSI */
+
+ n = ino[0] - 1;
+ while (n < ino[0]) {
+ if (ino[2] < REAL_KEY && ino[2] != OVFLPAGE) {
+ if (__big_split(hashp, old_bufp,
+ new_bufp, bufp, bufp->addr, obucket, &ret))
+ return (-1);
+ old_bufp = ret.oldp;
+ if (!old_bufp)
+ return (-1);
+ op = (u_int16_t *)old_bufp->page;
+ new_bufp = ret.newp;
+ if (!new_bufp)
+ return (-1);
+ np = (u_int16_t *)new_bufp->page;
+ bufp = ret.nextp;
+ if (!bufp)
+ return (0);
+ cino = (char *)bufp->page;
+ ino = (u_int16_t *)cino;
+ last_bfp = ret.nextp;
+ } else if (ino[n + 1] == OVFLPAGE) {
+ ov_addr = ino[n];
+ /*
+ * Fix up the old page -- the extra 2 are the fields
+ * which contained the overflow information.
+ */
+ ino[0] -= (moved + 2);
+ FREESPACE(ino) =
+ scopyto - sizeof(u_int16_t) * (ino[0] + 3);
+ OFFSET(ino) = scopyto;
+
+ bufp = __get_buf(hashp, ov_addr, bufp, 0);
+ if (!bufp)
+ return (-1);
+
+ ino = (u_int16_t *)bufp->page;
+ n = 1;
+ scopyto = hashp->BSIZE;
+ moved = 0;
+
+ if (last_bfp)
+ __free_ovflpage(hashp, last_bfp);
+ last_bfp = bufp;
+ }
+ /* Move regular sized pairs of there are any */
+ off = hashp->BSIZE;
+ for (n = 1; (n < ino[0]) && (ino[n + 1] >= REAL_KEY); n += 2) {
+ cino = (char *)ino;
+ key.data = (u_char *)cino + ino[n];
+ key.size = off - ino[n];
+ val.data = (u_char *)cino + ino[n + 1];
+ val.size = ino[n] - ino[n + 1];
+ off = ino[n + 1];
+
+ if (__call_hash(hashp, key.data, key.size) == obucket) {
+ /* Keep on old page */
+ if (PAIRFITS(op, (&key), (&val)))
+ putpair((char *)op, &key, &val);
+ else {
+ old_bufp =
+ __add_ovflpage(hashp, old_bufp);
+ if (!old_bufp)
+ return (-1);
+ op = (u_int16_t *)old_bufp->page;
+ putpair((char *)op, &key, &val);
+ }
+ old_bufp->flags |= BUF_MOD;
+ } else {
+ /* Move to new page */
+ if (PAIRFITS(np, (&key), (&val)))
+ putpair((char *)np, &key, &val);
+ else {
+ new_bufp =
+ __add_ovflpage(hashp, new_bufp);
+ if (!new_bufp)
+ return (-1);
+ np = (u_int16_t *)new_bufp->page;
+ putpair((char *)np, &key, &val);
+ }
+ new_bufp->flags |= BUF_MOD;
+ }
+ }
+ }
+ if (last_bfp)
+ __free_ovflpage(hashp, last_bfp);
+ return (0);
+}
+
+/*
+ * Add the given pair to the page
+ *
+ * Returns:
+ * 0 ==> OK
+ * 1 ==> failure
+ */
+extern int
+__addel(hashp, bufp, key, val)
+ HTAB *hashp;
+ BUFHEAD *bufp;
+ const DBT *key, *val;
+{
+ register u_int16_t *bp, *sop;
+ int do_expand;
+
+ bp = (u_int16_t *)bufp->page;
+ do_expand = 0;
+ while (bp[0] && (bp[2] < REAL_KEY || bp[bp[0]] < REAL_KEY))
+ /* Exception case */
+ if (bp[2] == FULL_KEY_DATA && bp[0] == 2)
+ /* This is the last page of a big key/data pair
+ and we need to add another page */
+ break;
+ else if (bp[2] < REAL_KEY && bp[bp[0]] != OVFLPAGE) {
+ bufp = __get_buf(hashp, bp[bp[0] - 1], bufp, 0);
+ if (!bufp)
+ return (-1);
+ bp = (u_int16_t *)bufp->page;
+ } else
+ /* Try to squeeze key on this page */
+ if (FREESPACE(bp) > PAIRSIZE(key, val)) {
+ squeeze_key(bp, key, val);
+ return (0);
+ } else {
+ bufp = __get_buf(hashp, bp[bp[0] - 1], bufp, 0);
+ if (!bufp)
+ return (-1);
+ bp = (u_int16_t *)bufp->page;
+ }
+
+ if (PAIRFITS(bp, key, val))
+ putpair(bufp->page, key, val);
+ else {
+ do_expand = 1;
+ bufp = __add_ovflpage(hashp, bufp);
+ if (!bufp)
+ return (-1);
+ sop = (u_int16_t *)bufp->page;
+
+ if (PAIRFITS(sop, key, val))
+ putpair((char *)sop, key, val);
+ else
+ if (__big_insert(hashp, bufp, key, val))
+ return (-1);
+ }
+ bufp->flags |= BUF_MOD;
+ /*
+ * If the average number of keys per bucket exceeds the fill factor,
+ * expand the table.
+ */
+ hashp->NKEYS++;
+ if (do_expand ||
+ (hashp->NKEYS / (hashp->MAX_BUCKET + 1) > hashp->FFACTOR))
+ return (__expand_table(hashp));
+ return (0);
+}
+
+/*
+ *
+ * Returns:
+ * pointer on success
+ * NULL on error
+ */
+extern BUFHEAD *
+__add_ovflpage(hashp, bufp)
+ HTAB *hashp;
+ BUFHEAD *bufp;
+{
+ register u_int16_t *sp;
+ u_int16_t ndx, ovfl_num;
+#ifdef DEBUG1
+ int tmp1, tmp2;
+#endif
+ sp = (u_int16_t *)bufp->page;
+
+ /* Check if we are dynamically determining the fill factor */
+ if (hashp->FFACTOR == DEF_FFACTOR) {
+ hashp->FFACTOR = sp[0] >> 1;
+ if (hashp->FFACTOR < MIN_FFACTOR)
+ hashp->FFACTOR = MIN_FFACTOR;
+ }
+ bufp->flags |= BUF_MOD;
+ ovfl_num = overflow_page(hashp);
+#ifdef DEBUG1
+ tmp1 = bufp->addr;
+ tmp2 = bufp->ovfl ? bufp->ovfl->addr : 0;
+#endif
+ if (!ovfl_num || !(bufp->ovfl = __get_buf(hashp, ovfl_num, bufp, 1)))
+ return (NULL);
+ bufp->ovfl->flags |= BUF_MOD;
+#ifdef DEBUG1
+ (void)fprintf(stderr, "ADDOVFLPAGE: %d->ovfl was %d is now %d\n",
+ tmp1, tmp2, bufp->ovfl->addr);
+#endif
+ ndx = sp[0];
+ /*
+ * Since a pair is allocated on a page only if there's room to add
+ * an overflow page, we know that the OVFL information will fit on
+ * the page.
+ */
+ sp[ndx + 4] = OFFSET(sp);
+ sp[ndx + 3] = FREESPACE(sp) - OVFLSIZE;
+ sp[ndx + 1] = ovfl_num;
+ sp[ndx + 2] = OVFLPAGE;
+ sp[0] = ndx + 2;
+#ifdef HASH_STATISTICS
+ hash_overflows++;
+#endif
+ return (bufp->ovfl);
+}
+
+/*
+ * Returns:
+ * 0 indicates SUCCESS
+ * -1 indicates FAILURE
+ */
+extern int
+__get_page(hashp, p, bucket, is_bucket, is_disk, is_bitmap)
+ HTAB *hashp;
+ char *p;
+ u_int32_t bucket;
+ int is_bucket, is_disk, is_bitmap;
+{
+ register int fd, page, size;
+ int rsize;
+ u_int16_t *bp;
+
+ fd = hashp->fp;
+ size = hashp->BSIZE;
+
+ if ((fd == -1) || !is_disk) {
+ PAGE_INIT(p);
+ return (0);
+ }
+ if (is_bucket)
+ page = BUCKET_TO_PAGE(bucket);
+ else
+ page = OADDR_TO_PAGE(bucket);
+ if ((lseek(fd, (off_t)page << hashp->BSHIFT, SEEK_SET) == -1) ||
+ ((rsize = read(fd, p, size)) == -1))
+ return (-1);
+ bp = (u_int16_t *)p;
+ if (!rsize)
+ bp[0] = 0; /* We hit the EOF, so initialize a new page */
+ else
+ if (rsize != size) {
+ errno = EFTYPE;
+ return (-1);
+ }
+ if (!is_bitmap && !bp[0]) {
+ PAGE_INIT(p);
+ } else
+ if (hashp->LORDER != BYTE_ORDER) {
+ register int i, max;
+
+ if (is_bitmap) {
+ max = hashp->BSIZE >> 2; /* divide by 4 */
+ for (i = 0; i < max; i++)
+ M_32_SWAP(((int *)p)[i]);
+ } else {
+ M_16_SWAP(bp[0]);
+ max = bp[0] + 2;
+ for (i = 1; i <= max; i++)
+ M_16_SWAP(bp[i]);
+ }
+ }
+ return (0);
+}
+
+/*
+ * Write page p to disk
+ *
+ * Returns:
+ * 0 ==> OK
+ * -1 ==>failure
+ */
+extern int
+__put_page(hashp, p, bucket, is_bucket, is_bitmap)
+ HTAB *hashp;
+ char *p;
+ u_int32_t bucket;
+ int is_bucket, is_bitmap;
+{
+ register int fd, page, size;
+ int wsize;
+
+ size = hashp->BSIZE;
+ if ((hashp->fp == -1) && open_temp(hashp))
+ return (-1);
+ fd = hashp->fp;
+
+ if (hashp->LORDER != BYTE_ORDER) {
+ register int i;
+ register int max;
+
+ if (is_bitmap) {
+ max = hashp->BSIZE >> 2; /* divide by 4 */
+ for (i = 0; i < max; i++)
+ M_32_SWAP(((int *)p)[i]);
+ } else {
+ max = ((u_int16_t *)p)[0] + 2;
+ for (i = 0; i <= max; i++)
+ M_16_SWAP(((u_int16_t *)p)[i]);
+ }
+ }
+ if (is_bucket)
+ page = BUCKET_TO_PAGE(bucket);
+ else
+ page = OADDR_TO_PAGE(bucket);
+ if ((lseek(fd, (off_t)page << hashp->BSHIFT, SEEK_SET) == -1) ||
+ ((wsize = write(fd, p, size)) == -1))
+ /* Errno is set */
+ return (-1);
+ if (wsize != size) {
+ errno = EFTYPE;
+ return (-1);
+ }
+ return (0);
+}
+
+#define BYTE_MASK ((1 << INT_BYTE_SHIFT) -1)
+/*
+ * Initialize a new bitmap page. Bitmap pages are left in memory
+ * once they are read in.
+ */
+extern int
+__ibitmap(hashp, pnum, nbits, ndx)
+ HTAB *hashp;
+ int pnum, nbits, ndx;
+{
+ u_int32_t *ip;
+ int clearbytes, clearints;
+
+ if ((ip = (u_int32_t *)malloc(hashp->BSIZE)) == NULL)
+ return (1);
+ hashp->nmaps++;
+ clearints = ((nbits - 1) >> INT_BYTE_SHIFT) + 1;
+ clearbytes = clearints << INT_TO_BYTE;
+ (void)memset((char *)ip, 0, clearbytes);
+ (void)memset(((char *)ip) + clearbytes, 0xFF,
+ hashp->BSIZE - clearbytes);
+ ip[clearints - 1] = ALL_SET << (nbits & BYTE_MASK);
+ SETBIT(ip, 0);
+ hashp->BITMAPS[ndx] = (u_int16_t)pnum;
+ hashp->mapp[ndx] = ip;
+ return (0);
+}
+
+static u_int32_t
+first_free(map)
+ u_int32_t map;
+{
+ register u_int32_t i, mask;
+
+ mask = 0x1;
+ for (i = 0; i < BITS_PER_MAP; i++) {
+ if (!(mask & map))
+ return (i);
+ mask = mask << 1;
+ }
+ return (i);
+}
+
+static u_int16_t
+overflow_page(hashp)
+ HTAB *hashp;
+{
+ register u_int32_t *freep;
+ register int max_free, offset, splitnum;
+ u_int16_t addr;
+ int bit, first_page, free_bit, free_page, i, in_use_bits, j;
+#ifdef DEBUG2
+ int tmp1, tmp2;
+#endif
+ splitnum = hashp->OVFL_POINT;
+ max_free = hashp->SPARES[splitnum];
+
+ free_page = (max_free - 1) >> (hashp->BSHIFT + BYTE_SHIFT);
+ free_bit = (max_free - 1) & ((hashp->BSIZE << BYTE_SHIFT) - 1);
+
+ /* Look through all the free maps to find the first free block */
+ first_page = hashp->LAST_FREED >>(hashp->BSHIFT + BYTE_SHIFT);
+ for ( i = first_page; i <= free_page; i++ ) {
+ if (!(freep = (u_int32_t *)hashp->mapp[i]) &&
+ !(freep = fetch_bitmap(hashp, i)))
+ return (NULL);
+ if (i == free_page)
+ in_use_bits = free_bit;
+ else
+ in_use_bits = (hashp->BSIZE << BYTE_SHIFT) - 1;
+
+ if (i == first_page) {
+ bit = hashp->LAST_FREED &
+ ((hashp->BSIZE << BYTE_SHIFT) - 1);
+ j = bit / BITS_PER_MAP;
+ bit = bit & ~(BITS_PER_MAP - 1);
+ } else {
+ bit = 0;
+ j = 0;
+ }
+ for (; bit <= in_use_bits; j++, bit += BITS_PER_MAP)
+ if (freep[j] != ALL_SET)
+ goto found;
+ }
+
+ /* No Free Page Found */
+ hashp->LAST_FREED = hashp->SPARES[splitnum];
+ hashp->SPARES[splitnum]++;
+ offset = hashp->SPARES[splitnum] -
+ (splitnum ? hashp->SPARES[splitnum - 1] : 0);
+
+#define OVMSG "HASH: Out of overflow pages. Increase page size\n"
+ if (offset > SPLITMASK) {
+ if (++splitnum >= NCACHED) {
+ (void)write(STDERR_FILENO, OVMSG, sizeof(OVMSG) - 1);
+ return (NULL);
+ }
+ hashp->OVFL_POINT = splitnum;
+ hashp->SPARES[splitnum] = hashp->SPARES[splitnum-1];
+ hashp->SPARES[splitnum-1]--;
+ offset = 1;
+ }
+
+ /* Check if we need to allocate a new bitmap page */
+ if (free_bit == (hashp->BSIZE << BYTE_SHIFT) - 1) {
+ free_page++;
+ if (free_page >= NCACHED) {
+ (void)write(STDERR_FILENO, OVMSG, sizeof(OVMSG) - 1);
+ return (NULL);
+ }
+ /*
+ * This is tricky. The 1 indicates that you want the new page
+ * allocated with 1 clear bit. Actually, you are going to
+ * allocate 2 pages from this map. The first is going to be
+ * the map page, the second is the overflow page we were
+ * looking for. The init_bitmap routine automatically, sets
+ * the first bit of itself to indicate that the bitmap itself
+ * is in use. We would explicitly set the second bit, but
+ * don't have to if we tell init_bitmap not to leave it clear
+ * in the first place.
+ */
+ if (__ibitmap(hashp, (int)OADDR_OF(splitnum, offset),
+ 1, free_page))
+ return (NULL);
+ hashp->SPARES[splitnum]++;
+#ifdef DEBUG2
+ free_bit = 2;
+#endif
+ offset++;
+ if (offset > SPLITMASK) {
+ if (++splitnum >= NCACHED) {
+ (void)write(STDERR_FILENO, OVMSG,
+ sizeof(OVMSG) - 1);
+ return (NULL);
+ }
+ hashp->OVFL_POINT = splitnum;
+ hashp->SPARES[splitnum] = hashp->SPARES[splitnum-1];
+ hashp->SPARES[splitnum-1]--;
+ offset = 0;
+ }
+ } else {
+ /*
+ * Free_bit addresses the last used bit. Bump it to address
+ * the first available bit.
+ */
+ free_bit++;
+ SETBIT(freep, free_bit);
+ }
+
+ /* Calculate address of the new overflow page */
+ addr = OADDR_OF(splitnum, offset);
+#ifdef DEBUG2
+ (void)fprintf(stderr, "OVERFLOW_PAGE: ADDR: %d BIT: %d PAGE %d\n",
+ addr, free_bit, free_page);
+#endif
+ return (addr);
+
+found:
+ bit = bit + first_free(freep[j]);
+ SETBIT(freep, bit);
+#ifdef DEBUG2
+ tmp1 = bit;
+ tmp2 = i;
+#endif
+ /*
+ * Bits are addressed starting with 0, but overflow pages are addressed
+ * beginning at 1. Bit is a bit addressnumber, so we need to increment
+ * it to convert it to a page number.
+ */
+ bit = 1 + bit + (i * (hashp->BSIZE << BYTE_SHIFT));
+ if (bit >= hashp->LAST_FREED)
+ hashp->LAST_FREED = bit - 1;
+
+ /* Calculate the split number for this page */
+ for (i = 0; (i < splitnum) && (bit > hashp->SPARES[i]); i++);
+ offset = (i ? bit - hashp->SPARES[i - 1] : bit);
+ if (offset >= SPLITMASK)
+ return (NULL); /* Out of overflow pages */
+ addr = OADDR_OF(i, offset);
+#ifdef DEBUG2
+ (void)fprintf(stderr, "OVERFLOW_PAGE: ADDR: %d BIT: %d PAGE %d\n",
+ addr, tmp1, tmp2);
+#endif
+
+ /* Allocate and return the overflow page */
+ return (addr);
+}
+
+/*
+ * Mark this overflow page as free.
+ */
+extern void
+__free_ovflpage(hashp, obufp)
+ HTAB *hashp;
+ BUFHEAD *obufp;
+{
+ register u_int16_t addr;
+ u_int32_t *freep;
+ int bit_address, free_page, free_bit;
+ u_int16_t ndx;
+
+ addr = obufp->addr;
+#ifdef DEBUG1
+ (void)fprintf(stderr, "Freeing %d\n", addr);
+#endif
+ ndx = (((u_int16_t)addr) >> SPLITSHIFT);
+ bit_address =
+ (ndx ? hashp->SPARES[ndx - 1] : 0) + (addr & SPLITMASK) - 1;
+ if (bit_address < hashp->LAST_FREED)
+ hashp->LAST_FREED = bit_address;
+ free_page = (bit_address >> (hashp->BSHIFT + BYTE_SHIFT));
+ free_bit = bit_address & ((hashp->BSIZE << BYTE_SHIFT) - 1);
+
+ if (!(freep = hashp->mapp[free_page]))
+ freep = fetch_bitmap(hashp, free_page);
+#ifdef DEBUG
+ /*
+ * This had better never happen. It means we tried to read a bitmap
+ * that has already had overflow pages allocated off it, and we
+ * failed to read it from the file.
+ */
+ if (!freep)
+ assert(0);
+#endif
+ CLRBIT(freep, free_bit);
+#ifdef DEBUG2
+ (void)fprintf(stderr, "FREE_OVFLPAGE: ADDR: %d BIT: %d PAGE %d\n",
+ obufp->addr, free_bit, free_page);
+#endif
+ __reclaim_buf(hashp, obufp);
+}
+
+/*
+ * Returns:
+ * 0 success
+ * -1 failure
+ */
+static int
+open_temp(hashp)
+ HTAB *hashp;
+{
+ sigset_t set, oset;
+ static char namestr[] = "_hashXXXXXX";
+
+ /* Block signals; make sure file goes away at process exit. */
+ (void)sigfillset(&set);
+ (void)sigprocmask(SIG_BLOCK, &set, &oset);
+ if ((hashp->fp = mkstemp(namestr)) != -1) {
+ (void)unlink(namestr);
+ (void)fcntl(hashp->fp, F_SETFD, 1);
+ }
+ (void)sigprocmask(SIG_SETMASK, &oset, (sigset_t *)NULL);
+ return (hashp->fp != -1 ? 0 : -1);
+}
+
+/*
+ * We have to know that the key will fit, but the last entry on the page is
+ * an overflow pair, so we need to shift things.
+ */
+static void
+squeeze_key(sp, key, val)
+ u_int16_t *sp;
+ const DBT *key, *val;
+{
+ register char *p;
+ u_int16_t free_space, n, off, pageno;
+
+ p = (char *)sp;
+ n = sp[0];
+ free_space = FREESPACE(sp);
+ off = OFFSET(sp);
+
+ pageno = sp[n - 1];
+ off -= key->size;
+ sp[n - 1] = off;
+ memmove(p + off, key->data, key->size);
+ off -= val->size;
+ sp[n] = off;
+ memmove(p + off, val->data, val->size);
+ sp[0] = n + 2;
+ sp[n + 1] = pageno;
+ sp[n + 2] = OVFLPAGE;
+ FREESPACE(sp) = free_space - PAIRSIZE(key, val);
+ OFFSET(sp) = off;
+}
+
+static u_int32_t *
+fetch_bitmap(hashp, ndx)
+ HTAB *hashp;
+ int ndx;
+{
+ if (ndx >= hashp->nmaps)
+ return (NULL);
+ if ((hashp->mapp[ndx] = (u_int32_t *)malloc(hashp->BSIZE)) == NULL)
+ return (NULL);
+ if (__get_page(hashp,
+ (char *)hashp->mapp[ndx], hashp->BITMAPS[ndx], 0, 1, 1)) {
+ free(hashp->mapp[ndx]);
+ return (NULL);
+ }
+ return (hashp->mapp[ndx]);
+}
+
+#ifdef DEBUG4
+int
+print_chain(addr)
+ int addr;
+{
+ BUFHEAD *bufp;
+ short *bp, oaddr;
+
+ (void)fprintf(stderr, "%d ", addr);
+ bufp = __get_buf(hashp, addr, NULL, 0);
+ bp = (short *)bufp->page;
+ while (bp[0] && ((bp[bp[0]] == OVFLPAGE) ||
+ ((bp[0] > 2) && bp[2] < REAL_KEY))) {
+ oaddr = bp[bp[0] - 1];
+ (void)fprintf(stderr, "%d ", (int)oaddr);
+ bufp = __get_buf(hashp, (int)oaddr, bufp, 0);
+ bp = (short *)bufp->page;
+ }
+ (void)fprintf(stderr, "\n");
+}
+#endif
diff --git a/lib/libc/db/hash/hsearch.c b/lib/libc/db/hash/hsearch.c
new file mode 100644
index 00000000000..ef825ab7182
--- /dev/null
+++ b/lib/libc/db/hash/hsearch.c
@@ -0,0 +1,113 @@
+/* $NetBSD: hsearch.c,v 1.8 1995/02/27 13:22:38 cgd Exp $ */
+
+/*-
+ * Copyright (c) 1990, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Margo Seltzer.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+#if 0
+static char sccsid[] = "@(#)hsearch.c 8.3 (Berkeley) 2/21/94";
+#else
+static char rcsid[] = "$NetBSD: hsearch.c,v 1.8 1995/02/27 13:22:38 cgd Exp $";
+#endif
+#endif /* LIBC_SCCS and not lint */
+
+#include <sys/types.h>
+
+#include <fcntl.h>
+#include <string.h>
+
+#include <db.h>
+#include "search.h"
+
+static DB *dbp = NULL;
+static ENTRY retval;
+
+extern int
+hcreate(nel)
+ u_int nel;
+{
+ HASHINFO info;
+
+ info.nelem = nel;
+ info.bsize = 256;
+ info.ffactor = 8;
+ info.cachesize = NULL;
+ info.hash = NULL;
+ info.lorder = 0;
+ dbp = (DB *)__hash_open(NULL, O_CREAT | O_RDWR, 0600, &info, 0);
+ return (dbp != NULL);
+}
+
+extern ENTRY *
+hsearch(item, action)
+ ENTRY item;
+ ACTION action;
+{
+ DBT key, val;
+ int status;
+
+ if (!dbp)
+ return (NULL);
+ key.data = (u_char *)item.key;
+ key.size = strlen(item.key) + 1;
+
+ if (action == ENTER) {
+ val.data = (u_char *)item.data;
+ val.size = strlen(item.data) + 1;
+ status = (dbp->put)(dbp, &key, &val, R_NOOVERWRITE);
+ if (status)
+ return (NULL);
+ } else {
+ /* FIND */
+ status = (dbp->get)(dbp, &key, &val, 0);
+ if (status)
+ return (NULL);
+ else
+ item.data = (char *)val.data;
+ }
+ retval.key = item.key;
+ retval.data = item.data;
+ return (&retval);
+}
+
+extern void
+hdestroy()
+{
+ if (dbp) {
+ (void)(dbp->close)(dbp);
+ dbp = NULL;
+ }
+}
diff --git a/lib/libc/db/hash/ndbm.c b/lib/libc/db/hash/ndbm.c
new file mode 100644
index 00000000000..875b0494e59
--- /dev/null
+++ b/lib/libc/db/hash/ndbm.c
@@ -0,0 +1,208 @@
+/* $NetBSD: ndbm.c,v 1.7 1995/02/27 13:22:44 cgd Exp $ */
+
+/*-
+ * Copyright (c) 1990, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Margo Seltzer.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+#if 0
+static char sccsid[] = "@(#)ndbm.c 8.3 (Berkeley) 5/30/94";
+#else
+static char rcsid[] = "$NetBSD: ndbm.c,v 1.7 1995/02/27 13:22:44 cgd Exp $";
+#endif
+#endif /* LIBC_SCCS and not lint */
+
+/*
+ * This package provides a dbm compatible interface to the new hashing
+ * package described in db(3).
+ */
+
+#include <sys/param.h>
+
+#include <stdio.h>
+#include <string.h>
+
+#include <ndbm.h>
+#include "hash.h"
+
+/*
+ * Returns:
+ * *DBM on success
+ * NULL on failure
+ */
+extern DBM *
+dbm_open(file, flags, mode)
+ const char *file;
+ int flags, mode;
+{
+ HASHINFO info;
+ char path[MAXPATHLEN];
+
+ info.bsize = 4096;
+ info.ffactor = 40;
+ info.nelem = 1;
+ info.cachesize = NULL;
+ info.hash = NULL;
+ info.lorder = 0;
+ (void)strcpy(path, file);
+ (void)strcat(path, DBM_SUFFIX);
+ return ((DBM *)__hash_open(path, flags, mode, &info, 0));
+}
+
+extern void
+dbm_close(db)
+ DBM *db;
+{
+ (void)(db->close)(db);
+}
+
+/*
+ * Returns:
+ * DATUM on success
+ * NULL on failure
+ */
+extern datum
+dbm_fetch(db, key)
+ DBM *db;
+ datum key;
+{
+ datum retval;
+ int status;
+
+ status = (db->get)(db, (DBT *)&key, (DBT *)&retval, 0);
+ if (status) {
+ retval.dptr = NULL;
+ retval.dsize = 0;
+ }
+ return (retval);
+}
+
+/*
+ * Returns:
+ * DATUM on success
+ * NULL on failure
+ */
+extern datum
+dbm_firstkey(db)
+ DBM *db;
+{
+ int status;
+ datum retdata, retkey;
+
+ status = (db->seq)(db, (DBT *)&retkey, (DBT *)&retdata, R_FIRST);
+ if (status)
+ retkey.dptr = NULL;
+ return (retkey);
+}
+
+/*
+ * Returns:
+ * DATUM on success
+ * NULL on failure
+ */
+extern datum
+dbm_nextkey(db)
+ DBM *db;
+{
+ int status;
+ datum retdata, retkey;
+
+ status = (db->seq)(db, (DBT *)&retkey, (DBT *)&retdata, R_NEXT);
+ if (status)
+ retkey.dptr = NULL;
+ return (retkey);
+}
+/*
+ * Returns:
+ * 0 on success
+ * <0 failure
+ */
+extern int
+dbm_delete(db, key)
+ DBM *db;
+ datum key;
+{
+ int status;
+
+ status = (db->del)(db, (DBT *)&key, 0);
+ if (status)
+ return (-1);
+ else
+ return (0);
+}
+
+/*
+ * Returns:
+ * 0 on success
+ * <0 failure
+ * 1 if DBM_INSERT and entry exists
+ */
+extern int
+dbm_store(db, key, content, flags)
+ DBM *db;
+ datum key, content;
+ int flags;
+{
+ return ((db->put)(db, (DBT *)&key, (DBT *)&content,
+ (flags == DBM_INSERT) ? R_NOOVERWRITE : 0));
+}
+
+extern int
+dbm_error(db)
+ DBM *db;
+{
+ HTAB *hp;
+
+ hp = (HTAB *)db->internal;
+ return (hp->errno);
+}
+
+extern int
+dbm_clearerr(db)
+ DBM *db;
+{
+ HTAB *hp;
+
+ hp = (HTAB *)db->internal;
+ hp->errno = 0;
+ return (0);
+}
+
+extern int
+dbm_dirfno(db)
+ DBM *db;
+{
+ return(((HTAB *)db->internal)->fp);
+}
diff --git a/lib/libc/db/hash/page.h b/lib/libc/db/hash/page.h
new file mode 100644
index 00000000000..ed62d595dc1
--- /dev/null
+++ b/lib/libc/db/hash/page.h
@@ -0,0 +1,94 @@
+/* $NetBSD: page.h,v 1.5 1995/02/27 13:22:52 cgd Exp $ */
+
+/*-
+ * Copyright (c) 1990, 1993, 1994
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Margo Seltzer.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)page.h 8.2 (Berkeley) 5/31/94
+ */
+
+/*
+ * Definitions for hashing page file format.
+ */
+
+/*
+ * routines dealing with a data page
+ *
+ * page format:
+ * +------------------------------+
+ * p | n | keyoff | datoff | keyoff |
+ * +------------+--------+--------+
+ * | datoff | free | ptr | --> |
+ * +--------+---------------------+
+ * | F R E E A R E A |
+ * +--------------+---------------+
+ * | <---- - - - | data |
+ * +--------+-----+----+----------+
+ * | key | data | key |
+ * +--------+----------+----------+
+ *
+ * Pointer to the free space is always: p[p[0] + 2]
+ * Amount of free space on the page is: p[p[0] + 1]
+ */
+
+/*
+ * How many bytes required for this pair?
+ * 2 shorts in the table at the top of the page + room for the
+ * key and room for the data
+ *
+ * We prohibit entering a pair on a page unless there is also room to append
+ * an overflow page. The reason for this it that you can get in a situation
+ * where a single key/data pair fits on a page, but you can't append an
+ * overflow page and later you'd have to split the key/data and handle like
+ * a big pair.
+ * You might as well do this up front.
+ */
+
+#define PAIRSIZE(K,D) (2*sizeof(u_int16_t) + (K)->size + (D)->size)
+#define BIGOVERHEAD (4*sizeof(u_int16_t))
+#define KEYSIZE(K) (4*sizeof(u_int16_t) + (K)->size);
+#define OVFLSIZE (2*sizeof(u_int16_t))
+#define FREESPACE(P) ((P)[(P)[0]+1])
+#define OFFSET(P) ((P)[(P)[0]+2])
+#define PAIRFITS(P,K,D) \
+ (((P)[2] >= REAL_KEY) && \
+ (PAIRSIZE((K),(D)) + OVFLSIZE) <= FREESPACE((P)))
+#define PAGE_META(N) (((N)+3) * sizeof(u_int16_t))
+
+typedef struct {
+ BUFHEAD *newp;
+ BUFHEAD *oldp;
+ BUFHEAD *nextp;
+ u_int16_t next_addr;
+} SPLIT_RETURN;
diff --git a/lib/libc/db/hash/search.h b/lib/libc/db/hash/search.h
new file mode 100644
index 00000000000..d11ac14eefb
--- /dev/null
+++ b/lib/libc/db/hash/search.h
@@ -0,0 +1,53 @@
+/* $NetBSD: search.h,v 1.5 1995/02/27 13:22:58 cgd Exp $ */
+
+/*-
+ * Copyright (c) 1990, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Margo Seltzer.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)search.h 8.1 (Berkeley) 6/4/93
+ */
+
+/* Backward compatibility to hsearch interface. */
+typedef struct entry {
+ char *key;
+ char *data;
+} ENTRY;
+
+typedef enum {
+ FIND, ENTER
+} ACTION;
+
+int hcreate __P((unsigned int));
+void hdestroy __P((void));
+ENTRY *hsearch __P((ENTRY, ACTION));
diff --git a/lib/libc/db/man/Makefile.inc b/lib/libc/db/man/Makefile.inc
new file mode 100644
index 00000000000..6ccbb21d9d1
--- /dev/null
+++ b/lib/libc/db/man/Makefile.inc
@@ -0,0 +1,8 @@
+# $NetBSD: Makefile.inc,v 1.5 1995/02/27 13:23:12 cgd Exp $
+# @(#)Makefile.inc 8.1 (Berkeley) 6/4/93
+
+.PATH: ${.CURDIR}/db/man
+
+# mpool.3
+MAN+= btree.3 dbopen.3 hash.3 recno.3
+MLINKS+= dbopen.3 db.3
diff --git a/lib/libc/db/man/btree.3 b/lib/libc/db/man/btree.3
new file mode 100644
index 00000000000..f0cd4eba8d6
--- /dev/null
+++ b/lib/libc/db/man/btree.3
@@ -0,0 +1,228 @@
+.\" $NetBSD: btree.3,v 1.5 1995/02/27 13:23:18 cgd Exp $
+.\"
+.\" Copyright (c) 1990, 1993
+.\" The Regents of the University of California. All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\" 3. All advertising materials mentioning features or use of this software
+.\" must display the following acknowledgement:
+.\" This product includes software developed by the University of
+.\" California, Berkeley and its contributors.
+.\" 4. Neither the name of the University nor the names of its contributors
+.\" may be used to endorse or promote products derived from this software
+.\" without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" @(#)btree.3 8.3 (Berkeley) 2/21/94
+.\"
+.TH BTREE 3 "February 21, 1994"
+.\".UC 7
+.SH NAME
+btree \- btree database access method
+.SH SYNOPSIS
+.nf
+.ft B
+#include <sys/types.h>
+#include <db.h>
+.ft R
+.fi
+.SH DESCRIPTION
+The routine
+.IR dbopen
+is the library interface to database files.
+One of the supported file formats is btree files.
+The general description of the database access methods is in
+.IR dbopen (3),
+this manual page describes only the btree specific information.
+.PP
+The btree data structure is a sorted, balanced tree structure storing
+associated key/data pairs.
+.PP
+The btree access method specific data structure provided to
+.I dbopen
+is defined in the <db.h> include file as follows:
+.PP
+typedef struct {
+.RS
+u_long flags;
+.br
+u_int cachesize;
+.br
+int maxkeypage;
+.br
+int minkeypage;
+.br
+u_int psize;
+.br
+int (*compare)(const DBT *key1, const DBT *key2);
+.br
+size_t (*prefix)(const DBT *key1, const DBT *key2);
+.br
+int lorder;
+.RE
+} BTREEINFO;
+.PP
+The elements of this structure are as follows:
+.TP
+flags
+The flag value is specified by
+.IR or 'ing
+any of the following values:
+.RS
+.TP
+R_DUP
+Permit duplicate keys in the tree, i.e. permit insertion if the key to be
+inserted already exists in the tree.
+The default behavior, as described in
+.IR dbopen (3),
+is to overwrite a matching key when inserting a new key or to fail if
+the R_NOOVERWRITE flag is specified.
+The R_DUP flag is overridden by the R_NOOVERWRITE flag, and if the
+R_NOOVERWRITE flag is specified, attempts to insert duplicate keys into
+the tree will fail.
+.IP
+If the database contains duplicate keys, the order of retrieval of
+key/data pairs is undefined if the
+.I get
+routine is used, however,
+.I seq
+routine calls with the R_CURSOR flag set will always return the logical
+``first'' of any group of duplicate keys.
+.RE
+.TP
+cachesize
+A suggested maximum size (in bytes) of the memory cache.
+This value is
+.B only
+advisory, and the access method will allocate more memory rather than fail.
+Since every search examines the root page of the tree, caching the most
+recently used pages substantially improves access time.
+In addition, physical writes are delayed as long as possible, so a moderate
+cache can reduce the number of I/O operations significantly.
+Obviously, using a cache increases (but only increases) the likelihood of
+corruption or lost data if the system crashes while a tree is being modified.
+If
+.I cachesize
+is 0 (no size is specified) a default cache is used.
+.TP
+maxkeypage
+The maximum number of keys which will be stored on any single page.
+Not currently implemented.
+.\" The maximum number of keys which will be stored on any single page.
+.\" Because of the way the btree data structure works,
+.\" .I maxkeypage
+.\" must always be greater than or equal to 2.
+.\" If
+.\" .I maxkeypage
+.\" is 0 (no maximum number of keys is specified) the page fill factor is
+.\" made as large as possible (which is almost invariably what is wanted).
+.TP
+minkeypage
+The minimum number of keys which will be stored on any single page.
+This value is used to determine which keys will be stored on overflow
+pages, i.e. if a key or data item is longer than the pagesize divided
+by the minkeypage value, it will be stored on overflow pages instead
+of in the page itself.
+If
+.I minkeypage
+is 0 (no minimum number of keys is specified) a value of 2 is used.
+.TP
+psize
+Page size is the size (in bytes) of the pages used for nodes in the tree.
+The minimum page size is 512 bytes and the maximum page size is 64K.
+If
+.I psize
+is 0 (no page size is specified) a page size is chosen based on the
+underlying file system I/O block size.
+.TP
+compare
+Compare is the key comparison function.
+It must return an integer less than, equal to, or greater than zero if the
+first key argument is considered to be respectively less than, equal to,
+or greater than the second key argument.
+The same comparison function must be used on a given tree every time it
+is opened.
+If
+.I compare
+is NULL (no comparison function is specified), the keys are compared
+lexically, with shorter keys considered less than longer keys.
+.TP
+prefix
+Prefix is the prefix comparison function.
+If specified, this routine must return the number of bytes of the second key
+argument which are necessary to determine that it is greater than the first
+key argument.
+If the keys are equal, the key length should be returned.
+Note, the usefulness of this routine is very data dependent, but, in some
+data sets can produce significantly reduced tree sizes and search times.
+If
+.I prefix
+is NULL (no prefix function is specified),
+.B and
+no comparison function is specified, a default lexical comparison routine
+is used.
+If
+.I prefix
+is NULL and a comparison routine is specified, no prefix comparison is
+done.
+.TP
+lorder
+The byte order for integers in the stored database metadata.
+The number should represent the order as an integer; for example,
+big endian order would be the number 4,321.
+If
+.I lorder
+is 0 (no order is specified) the current host order is used.
+.PP
+If the file already exists (and the O_TRUNC flag is not specified), the
+values specified for the parameters flags, lorder and psize are ignored
+in favor of the values used when the tree was created.
+.PP
+Forward sequential scans of a tree are from the least key to the greatest.
+.PP
+Space freed up by deleting key/data pairs from the tree is never reclaimed,
+although it is normally made available for reuse.
+This means that the btree storage structure is grow-only.
+The only solutions are to avoid excessive deletions, or to create a fresh
+tree periodically from a scan of an existing one.
+.PP
+Searches, insertions, and deletions in a btree will all complete in
+O lg base N where base is the average fill factor.
+Often, inserting ordered data into btrees results in a low fill factor.
+This implementation has been modified to make ordered insertion the best
+case, resulting in a much better than normal page fill factor.
+.SH "SEE ALSO"
+.IR dbopen (3),
+.IR hash (3),
+.IR mpool (3),
+.IR recno (3)
+.sp
+.IR "The Ubiquitous B-tree" ,
+Douglas Comer, ACM Comput. Surv. 11, 2 (June 1979), 121-138.
+.sp
+.IR "Prefix B-trees" ,
+Bayer and Unterauer, ACM Transactions on Database Systems, Vol. 2, 1
+(March 1977), 11-26.
+.sp
+.IR "The Art of Computer Programming Vol. 3: Sorting and Searching" ,
+D.E. Knuth, 1968, pp 471-480.
+.SH BUGS
+Only big and little endian byte order is supported.
diff --git a/lib/libc/db/man/dbopen.3 b/lib/libc/db/man/dbopen.3
new file mode 100644
index 00000000000..b842160b19f
--- /dev/null
+++ b/lib/libc/db/man/dbopen.3
@@ -0,0 +1,478 @@
+.\" $NetBSD: dbopen.3,v 1.6 1995/02/27 13:23:25 cgd Exp $
+.\"
+.\" Copyright (c) 1990, 1993
+.\" The Regents of the University of California. All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\" 3. All advertising materials mentioning features or use of this software
+.\" must display the following acknowledgement:
+.\" This product includes software developed by the University of
+.\" California, Berkeley and its contributors.
+.\" 4. Neither the name of the University nor the names of its contributors
+.\" may be used to endorse or promote products derived from this software
+.\" without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" @(#)dbopen.3 8.5 (Berkeley) 1/2/94
+.\"
+.TH DBOPEN 3 "January 2, 1994"
+.UC 7
+.SH NAME
+dbopen \- database access methods
+.SH SYNOPSIS
+.nf
+.ft B
+#include <sys/types.h>
+#include <limits.h>
+#include <db.h>
+
+DB *
+dbopen(const char *file, int flags, int mode, DBTYPE type,
+.ti +5
+const void *openinfo);
+.ft R
+.fi
+.SH DESCRIPTION
+.IR Dbopen
+is the library interface to database files.
+The supported file formats are btree, hashed and UNIX file oriented.
+The btree format is a representation of a sorted, balanced tree structure.
+The hashed format is an extensible, dynamic hashing scheme.
+The flat-file format is a byte stream file with fixed or variable length
+records.
+The formats and file format specific information are described in detail
+in their respective manual pages
+.IR btree (3),
+.IR hash (3)
+and
+.IR recno (3).
+.PP
+Dbopen opens
+.I file
+for reading and/or writing.
+Files never intended to be preserved on disk may be created by setting
+the file parameter to NULL.
+.PP
+The
+.I flags
+and
+.I mode arguments
+are as specified to the
+.IR open (2)
+routine, however, only the O_CREAT, O_EXCL, O_EXLOCK, O_NONBLOCK,
+O_RDONLY, O_RDWR, O_SHLOCK and O_TRUNC flags are meaningful.
+(Note, opening a database file O_WRONLY is not possible.)
+.\"Three additional options may be specified by
+.\".IR or 'ing
+.\"them into the
+.\".I flags
+.\"argument.
+.\".TP
+.\"DB_LOCK
+.\"Do the necessary locking in the database to support concurrent access.
+.\"If concurrent access isn't needed or the database is read-only this
+.\"flag should not be set, as it tends to have an associated performance
+.\"penalty.
+.\".TP
+.\"DB_SHMEM
+.\"Place the underlying memory pool used by the database in shared
+.\"memory.
+.\"Necessary for concurrent access.
+.\".TP
+.\"DB_TXN
+.\"Support transactions in the database.
+.\"The DB_LOCK and DB_SHMEM flags must be set as well.
+.PP
+The
+.I type
+argument is of type DBTYPE (as defined in the <db.h> include file) and
+may be set to DB_BTREE, DB_HASH or DB_RECNO.
+.PP
+The
+.I openinfo
+argument is a pointer to an access method specific structure described
+in the access method's manual page.
+If
+.I openinfo
+is NULL, each access method will use defaults appropriate for the system
+and the access method.
+.PP
+.I Dbopen
+returns a pointer to a DB structure on success and NULL on error.
+The DB structure is defined in the <db.h> include file, and contains at
+least the following fields:
+.sp
+.nf
+typedef struct {
+.RS
+DBTYPE type;
+int (*close)(const DB *db);
+int (*del)(const DB *db, const DBT *key, u_int flags);
+int (*fd)(const DB *db);
+int (*get)(const DB *db, DBT *key, DBT *data, u_int flags);
+int (*put)(const DB *db, DBT *key, const DBT *data,
+.ti +5
+u_int flags);
+int (*sync)(const DB *db, u_int flags);
+int (*seq)(const DB *db, DBT *key, DBT *data, u_int flags);
+.RE
+} DB;
+.fi
+.PP
+These elements describe a database type and a set of functions performing
+various actions.
+These functions take a pointer to a structure as returned by
+.IR dbopen ,
+and sometimes one or more pointers to key/data structures and a flag value.
+.TP
+type
+The type of the underlying access method (and file format).
+.TP
+close
+A pointer to a routine to flush any cached information to disk, free any
+allocated resources, and close the underlying file(s).
+Since key/data pairs may be cached in memory, failing to sync the file
+with a
+.I close
+or
+.I sync
+function may result in inconsistent or lost information.
+.I Close
+routines return -1 on error (setting
+.IR errno )
+and 0 on success.
+.TP
+del
+A pointer to a routine to remove key/data pairs from the database.
+.IP
+The parameter
+.I flag
+may be set to the following value:
+.RS
+.TP
+R_CURSOR
+Delete the record referenced by the cursor.
+The cursor must have previously been initialized.
+.RE
+.IP
+.I Delete
+routines return -1 on error (setting
+.IR errno ),
+0 on success, and 1 if the specified
+.I key
+was not in the file.
+.TP
+fd
+A pointer to a routine which returns a file descriptor representative
+of the underlying database.
+A file descriptor referencing the same file will be returned to all
+processes which call
+.I dbopen
+with the same
+.I file
+name.
+This file descriptor may be safely used as an argument to the
+.IR fcntl (2)
+and
+.IR flock (2)
+locking functions.
+The file descriptor is not necessarily associated with any of the
+underlying files used by the access method.
+No file descriptor is available for in memory databases.
+.I Fd
+routines return -1 on error (setting
+.IR errno ),
+and the file descriptor on success.
+.TP
+get
+A pointer to a routine which is the interface for keyed retrieval from
+the database.
+The address and length of the data associated with the specified
+.I key
+are returned in the structure referenced by
+.IR data .
+.I Get
+routines return -1 on error (setting
+.IR errno ),
+0 on success, and 1 if the
+.I key
+was not in the file.
+.TP
+put
+A pointer to a routine to store key/data pairs in the database.
+.IP
+The parameter
+.I flag
+may be set to one of the following values:
+.RS
+.TP
+R_CURSOR
+Replace the key/data pair referenced by the cursor.
+The cursor must have previously been initialized.
+.TP
+R_IAFTER
+Append the data immediately after the data referenced by
+.IR key ,
+creating a new key/data pair.
+The record number of the appended key/data pair is returned in the
+.I key
+structure.
+(Applicable only to the DB_RECNO access method.)
+.TP
+R_IBEFORE
+Insert the data immediately before the data referenced by
+.IR key ,
+creating a new key/data pair.
+The record number of the inserted key/data pair is returned in the
+.I key
+structure.
+(Applicable only to the DB_RECNO access method.)
+.TP
+R_NOOVERWRITE
+Enter the new key/data pair only if the key does not previously exist.
+.TP
+R_SETCURSOR
+Store the key/data pair, setting or initializing the position of the
+cursor to reference it.
+(Applicable only to the DB_BTREE and DB_RECNO access methods.)
+.RE
+.IP
+R_SETCURSOR is available only for the DB_BTREE and DB_RECNO access
+methods because it implies that the keys have an inherent order
+which does not change.
+.IP
+R_IAFTER and R_IBEFORE are available only for the DB_RECNO
+access method because they each imply that the access method is able to
+create new keys.
+This is only true if the keys are ordered and independent, record numbers
+for example.
+.IP
+The default behavior of the
+.I put
+routines is to enter the new key/data pair, replacing any previously
+existing key.
+.IP
+.I Put
+routines return -1 on error (setting
+.IR errno ),
+0 on success, and 1 if the R_NOOVERWRITE
+.I flag
+was set and the key already exists in the file.
+.TP
+seq
+A pointer to a routine which is the interface for sequential
+retrieval from the database.
+The address and length of the key are returned in the structure
+referenced by
+.IR key ,
+and the address and length of the data are returned in the
+structure referenced
+by
+.IR data .
+.IP
+Sequential key/data pair retrieval may begin at any time, and the
+position of the ``cursor'' is not affected by calls to the
+.IR del ,
+.IR get ,
+.IR put ,
+or
+.I sync
+routines.
+Modifications to the database during a sequential scan will be reflected
+in the scan, i.e. records inserted behind the cursor will not be returned
+while records inserted in front of the cursor will be returned.
+.IP
+The flag value
+.B must
+be set to one of the following values:
+.RS
+.TP
+R_CURSOR
+The data associated with the specified key is returned.
+This differs from the
+.I get
+routines in that it sets or initializes the cursor to the location of
+the key as well.
+(Note, for the DB_BTREE access method, the returned key is not necessarily an
+exact match for the specified key.
+The returned key is the smallest key greater than or equal to the specified
+key, permitting partial key matches and range searches.)
+.TP
+R_FIRST
+The first key/data pair of the database is returned, and the cursor
+is set or initialized to reference it.
+.TP
+R_LAST
+The last key/data pair of the database is returned, and the cursor
+is set or initialized to reference it.
+(Applicable only to the DB_BTREE and DB_RECNO access methods.)
+.TP
+R_NEXT
+Retrieve the key/data pair immediately after the cursor.
+If the cursor is not yet set, this is the same as the R_FIRST flag.
+.TP
+R_PREV
+Retrieve the key/data pair immediately before the cursor.
+If the cursor is not yet set, this is the same as the R_LAST flag.
+(Applicable only to the DB_BTREE and DB_RECNO access methods.)
+.RE
+.IP
+R_LAST and R_PREV are available only for the DB_BTREE and DB_RECNO
+access methods because they each imply that the keys have an inherent
+order which does not change.
+.IP
+.I Seq
+routines return -1 on error (setting
+.IR errno ),
+0 on success and 1 if there are no key/data pairs less than or greater
+than the specified or current key.
+If the DB_RECNO access method is being used, and if the database file
+is a character special file and no complete key/data pairs are currently
+available, the
+.I seq
+routines return 2.
+.TP
+sync
+A pointer to a routine to flush any cached information to disk.
+If the database is in memory only, the
+.I sync
+routine has no effect and will always succeed.
+.IP
+The flag value may be set to the following value:
+.RS
+.TP
+R_RECNOSYNC
+If the DB_RECNO access method is being used, this flag causes
+the sync routine to apply to the btree file which underlies the
+recno file, not the recno file itself.
+(See the
+.I bfname
+field of the
+.IR recno (3)
+manual page for more information.)
+.RE
+.IP
+.I Sync
+routines return -1 on error (setting
+.IR errno )
+and 0 on success.
+.SH "KEY/DATA PAIRS"
+Access to all file types is based on key/data pairs.
+Both keys and data are represented by the following data structure:
+.PP
+typedef struct {
+.RS
+void *data;
+.br
+size_t size;
+.RE
+} DBT;
+.PP
+The elements of the DBT structure are defined as follows:
+.TP
+data
+A pointer to a byte string.
+.TP
+size
+The length of the byte string.
+.PP
+Key and data byte strings may reference strings of essentially unlimited
+length although any two of them must fit into available memory at the same
+time.
+It should be noted that the access methods provide no guarantees about
+byte string alignment.
+.SH ERRORS
+The
+.I dbopen
+routine may fail and set
+.I errno
+for any of the errors specified for the library routines
+.IR open (2)
+and
+.IR malloc (3)
+or the following:
+.TP
+[EFTYPE]
+A file is incorrectly formatted.
+.TP
+[EINVAL]
+A parameter has been specified (hash function, pad byte etc.) that is
+incompatible with the current file specification or which is not
+meaningful for the function (for example, use of the cursor without
+prior initialization) or there is a mismatch between the version
+number of file and the software.
+.PP
+The
+.I close
+routines may fail and set
+.I errno
+for any of the errors specified for the library routines
+.IR close (2),
+.IR read (2),
+.IR write (2),
+.IR free (3),
+or
+.IR fsync (2).
+.PP
+The
+.IR del ,
+.IR get ,
+.I put
+and
+.I seq
+routines may fail and set
+.I errno
+for any of the errors specified for the library routines
+.IR read (2),
+.IR write (2),
+.IR free (3)
+or
+.IR malloc (3).
+.PP
+The
+.I fd
+routines will fail and set
+.I errno
+to ENOENT for in memory databases.
+.PP
+The
+.I sync
+routines may fail and set
+.I errno
+for any of the errors specified for the library routine
+.IR fsync (2).
+.SH "SEE ALSO"
+.IR btree (3),
+.IR hash (3),
+.IR mpool (3),
+.IR recno (3)
+.sp
+.IR "LIBTP: Portable, Modular Transactions for UNIX" ,
+Margo Seltzer, Michael Olson, USENIX proceedings, Winter 1992.
+.SH BUGS
+The typedef DBT is a mnemonic for ``data base thang'', and was used
+because noone could think of a reasonable name that wasn't already used.
+.PP
+The file descriptor interface is a kluge and will be deleted in a
+future version of the interface.
+.PP
+None of the access methods provide any form of concurrent access,
+locking, or transactions.
diff --git a/lib/libc/db/man/hash.3 b/lib/libc/db/man/hash.3
new file mode 100644
index 00000000000..6d296a8f6d1
--- /dev/null
+++ b/lib/libc/db/man/hash.3
@@ -0,0 +1,154 @@
+.\" $NetBSD: hash.3,v 1.5 1995/02/27 13:23:31 cgd Exp $
+.\"
+.\" Copyright (c) 1990, 1993
+.\" The Regents of the University of California. All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\" 3. All advertising materials mentioning features or use of this software
+.\" must display the following acknowledgement:
+.\" This product includes software developed by the University of
+.\" California, Berkeley and its contributors.
+.\" 4. Neither the name of the University nor the names of its contributors
+.\" may be used to endorse or promote products derived from this software
+.\" without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" @(#)hash.3 8.5 (Berkeley) 2/21/94
+.\"
+.TH HASH 3 "February 21, 1994"
+.UC 7
+.SH NAME
+hash \- hash database access method
+.SH SYNOPSIS
+.nf
+.ft B
+#include <sys/types.h>
+#include <db.h>
+.ft R
+.fi
+.SH DESCRIPTION
+The routine
+.IR dbopen
+is the library interface to database files.
+One of the supported file formats is hash files.
+The general description of the database access methods is in
+.IR dbopen (3),
+this manual page describes only the hash specific information.
+.PP
+The hash data structure is an extensible, dynamic hashing scheme.
+.PP
+The access method specific data structure provided to
+.I dbopen
+is defined in the <db.h> include file as follows:
+.sp
+typedef struct {
+.RS
+u_int bsize;
+.br
+u_int ffactor;
+.br
+u_int nelem;
+.br
+u_int cachesize;
+.br
+u_int32_t (*hash)(const void *, size_t);
+.br
+int lorder;
+.RE
+} HASHINFO;
+.PP
+The elements of this structure are as follows:
+.TP
+bsize
+.I Bsize
+defines the hash table bucket size, and is, by default, 256 bytes.
+It may be preferable to increase the page size for disk-resident tables
+and tables with large data items.
+.TP
+ffactor
+.I Ffactor
+indicates a desired density within the hash table.
+It is an approximation of the number of keys allowed to accumulate in any
+one bucket, determining when the hash table grows or shrinks.
+The default value is 8.
+.TP
+nelem
+.I Nelem
+is an estimate of the final size of the hash table.
+If not set or set too low, hash tables will expand gracefully as keys
+are entered, although a slight performance degradation may be noticed.
+The default value is 1.
+.TP
+cachesize
+A suggested maximum size, in bytes, of the memory cache.
+This value is
+.B only
+advisory, and the access method will allocate more memory rather
+than fail.
+.TP
+hash
+.I Hash
+is a user defined hash function.
+Since no hash function performs equally well on all possible data, the
+user may find that the built-in hash function does poorly on a particular
+data set.
+User specified hash functions must take two arguments (a pointer to a byte
+string and a length) and return a 32-bit quantity to be used as the hash
+value.
+.TP
+lorder
+The byte order for integers in the stored database metadata.
+The number should represent the order as an integer; for example,
+big endian order would be the number 4,321.
+If
+.I lorder
+is 0 (no order is specified) the current host order is used.
+If the file already exists, the specified value is ignored and the
+value specified when the tree was created is used.
+.PP
+If the file already exists (and the O_TRUNC flag is not specified), the
+values specified for the parameters bsize, ffactor, lorder and nelem are
+ignored and the values specified when the tree was created are used.
+.PP
+If a hash function is specified,
+.I hash_open
+will attempt to determine if the hash function specified is the same as
+the one with which the database was created, and will fail if it is not.
+.PP
+Backward compatible interfaces to the routines described in
+.IR dbm (3),
+and
+.IR ndbm (3)
+are provided, however these interfaces are not compatible with
+previous file formats.
+.SH "SEE ALSO"
+.IR btree (3),
+.IR dbopen (3),
+.IR mpool (3),
+.IR recno (3)
+.sp
+.IR "Dynamic Hash Tables" ,
+Per-Ake Larson, Communications of the ACM, April 1988.
+.sp
+.IR "A New Hash Package for UNIX" ,
+Margo Seltzer, USENIX Proceedings, Winter 1991.
+.SH BUGS
+Only big and little endian byte order is supported.
diff --git a/lib/libc/db/man/mpool.3 b/lib/libc/db/man/mpool.3
new file mode 100644
index 00000000000..0ad0b6864b5
--- /dev/null
+++ b/lib/libc/db/man/mpool.3
@@ -0,0 +1,221 @@
+.\" $NetBSD: mpool.3,v 1.4 1995/02/27 13:23:35 cgd Exp $
+.\"
+.\" Copyright (c) 1990, 1993
+.\" The Regents of the University of California. All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\" 3. All advertising materials mentioning features or use of this software
+.\" must display the following acknowledgement:
+.\" This product includes software developed by the University of
+.\" California, Berkeley and its contributors.
+.\" 4. Neither the name of the University nor the names of its contributors
+.\" may be used to endorse or promote products derived from this software
+.\" without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" @(#)mpool.3 8.1 (Berkeley) 6/4/93
+.\"
+.TH MPOOL 3 "June 4, 1993"
+.UC 7
+.SH NAME
+mpool \- shared memory buffer pool
+.SH SYNOPSIS
+.nf
+.ft B
+#include <db.h>
+#include <mpool.h>
+
+MPOOL *
+mpool_open (DBT *key, int fd, pgno_t pagesize, pgno_t maxcache);
+
+void
+mpool_filter (MPOOL *mp, void (*pgin)(void *, pgno_t, void *),
+.ti +5
+void (*pgout)(void *, pgno_t, void *), void *pgcookie);
+
+void *
+mpool_new (MPOOL *mp, pgno_t *pgnoaddr);
+
+void *
+mpool_get (MPOOL *mp, pgno_t pgno, u_int flags);
+
+int
+mpool_put (MPOOL *mp, void *pgaddr, u_int flags);
+
+int
+mpool_sync (MPOOL *mp);
+
+int
+mpool_close (MPOOL *mp);
+.ft R
+.fi
+.SH DESCRIPTION
+.IR Mpool
+is the library interface intended to provide page oriented buffer management
+of files.
+The buffers may be shared between processes.
+.PP
+The function
+.I mpool_open
+initializes a memory pool.
+The
+.I key
+argument is the byte string used to negotiate between multiple
+processes wishing to share buffers.
+If the file buffers are mapped in shared memory, all processes using
+the same key will share the buffers.
+If
+.I key
+is NULL, the buffers are mapped into private memory.
+The
+.I fd
+argument is a file descriptor for the underlying file, which must be seekable.
+If
+.I key
+is non-NULL and matches a file already being mapped, the
+.I fd
+argument is ignored.
+.PP
+The
+.I pagesize
+argument is the size, in bytes, of the pages into which the file is broken up.
+The
+.I maxcache
+argument is the maximum number of pages from the underlying file to cache
+at any one time.
+This value is not relative to the number of processes which share a file's
+buffers, but will be the largest value specified by any of the processes
+sharing the file.
+.PP
+The
+.I mpool_filter
+function is intended to make transparent input and output processing of the
+pages possible.
+If the
+.I pgin
+function is specified, it is called each time a buffer is read into the memory
+pool from the backing file.
+If the
+.I pgout
+function is specified, it is called each time a buffer is written into the
+backing file.
+Both functions are are called with the
+.I pgcookie
+pointer, the page number and a pointer to the page to being read or written.
+.PP
+The function
+.I mpool_new
+takes an MPOOL pointer and an address as arguments.
+If a new page can be allocated, a pointer to the page is returned and
+the page number is stored into the
+.I pgnoaddr
+address.
+Otherwise, NULL is returned and errno is set.
+.PP
+The function
+.I mpool_get
+takes a MPOOL pointer and a page number as arguments.
+If the page exists, a pointer to the page is returned.
+Otherwise, NULL is returned and errno is set.
+The flags parameter is not currently used.
+.PP
+The function
+.I mpool_put
+unpins the page referenced by
+.IR pgaddr .
+.I Pgaddr
+must be an address previously returned by
+.I mpool_get
+or
+.IR mpool_new .
+The flag value is specified by
+.IR or 'ing
+any of the following values:
+.TP
+MPOOL_DIRTY
+The page has been modified and needs to be written to the backing file.
+.PP
+.I Mpool_put
+returns 0 on success and -1 if an error occurs.
+.PP
+The function
+.I mpool_sync
+writes all modified pages associated with the MPOOL pointer to the
+backing file.
+.I Mpool_sync
+returns 0 on success and -1 if an error occurs.
+.PP
+The
+.I mpool_close
+function free's up any allocated memory associated with the memory pool
+cookie.
+Modified pages are
+.B not
+written to the backing file.
+.I Mpool_close
+returns 0 on success and -1 if an error occurs.
+.SH ERRORS
+The
+.I mpool_open
+function may fail and set
+.I errno
+for any of the errors specified for the library routine
+.IR malloc (3).
+.PP
+The
+.I mpool_get
+function may fail and set
+.I errno
+for the following:
+.TP 15
+[EINVAL]
+The requested record doesn't exist.
+.PP
+The
+.I mpool_new
+and
+.I mpool_get
+functions may fail and set
+.I errno
+for any of the errors specified for the library routines
+.IR read (2) ,
+.IR write (2) ,
+and
+.IR malloc (3).
+.PP
+The
+.I mpool_sync
+function may fail and set
+.I errno
+for any of the errors specified for the library routine
+.IR write (2).
+.PP
+The
+.I mpool_close
+function may fail and set
+.I errno
+for any of the errors specified for the library routine
+.IR free (3).
+.SH "SEE ALSO"
+.IR dbopen (3),
+.IR btree (3),
+.IR hash (3),
+.IR recno (3)
diff --git a/lib/libc/db/man/recno.3 b/lib/libc/db/man/recno.3
new file mode 100644
index 00000000000..63a366b645f
--- /dev/null
+++ b/lib/libc/db/man/recno.3
@@ -0,0 +1,198 @@
+.\" $NetBSD: recno.3,v 1.5 1995/06/13 00:53:40 jtc Exp $
+.\"
+.\" Copyright (c) 1990, 1993
+.\" The Regents of the University of California. All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\" 3. All advertising materials mentioning features or use of this software
+.\" must display the following acknowledgement:
+.\" This product includes software developed by the University of
+.\" California, Berkeley and its contributors.
+.\" 4. Neither the name of the University nor the names of its contributors
+.\" may be used to endorse or promote products derived from this software
+.\" without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" @(#)recno.3 8.3 (Berkeley) 2/21/94
+.\"
+.TH RECNO 3 "February 21, 1994"
+.UC 7
+.SH NAME
+recno \- record number database access method
+.SH SYNOPSIS
+.nf
+.ft B
+#include <sys/types.h>
+#include <db.h>
+.ft R
+.fi
+.SH DESCRIPTION
+The routine
+.IR dbopen
+is the library interface to database files.
+One of the supported file formats is record number files.
+The general description of the database access methods is in
+.IR dbopen (3),
+this manual page describes only the recno specific information.
+.PP
+The record number data structure is either variable or fixed-length
+records stored in a flat-file format, accessed by the logical record
+number.
+The existence of record number five implies the existence of records
+one through four, and the deletion of record number one causes
+record number five to be renumbered to record number four, as well
+as the cursor, if positioned after record number one, to shift down
+one record.
+.PP
+The recno access method specific data structure provided to
+.I dbopen
+is defined in the <db.h> include file as follows:
+.PP
+typedef struct {
+.RS
+u_long flags;
+.br
+u_int cachesize;
+.br
+u_int psize;
+.br
+int lorder;
+.br
+size_t reclen;
+.br
+u_char bval;
+.br
+char *bfname;
+.RE
+} RECNOINFO;
+.PP
+The elements of this structure are defined as follows:
+.TP
+flags
+The flag value is specified by
+.IR or 'ing
+any of the following values:
+.RS
+.TP
+R_FIXEDLEN
+The records are fixed-length, not byte delimited.
+The structure element
+.I reclen
+specifies the length of the record, and the structure element
+.I bval
+is used as the pad character.
+.TP
+R_NOKEY
+In the interface specified by
+.IR dbopen ,
+the sequential record retrieval fills in both the caller's key and
+data structures.
+If the R_NOKEY flag is specified, the
+.I cursor
+routines are not required to fill in the key structure.
+This permits applications to retrieve records at the end of files without
+reading all of the intervening records.
+.TP
+R_SNAPSHOT
+This flag requires that a snapshot of the file be taken when
+.I dbopen
+is called, instead of permitting any unmodified records to be read from
+the original file.
+.RE
+.TP
+cachesize
+A suggested maximum size, in bytes, of the memory cache.
+This value is
+.B only
+advisory, and the access method will allocate more memory rather than fail.
+If
+.I cachesize
+is 0 (no size is specified) a default cache is used.
+.TP
+psize
+The recno access method stores the in-memory copies of its records
+in a btree.
+This value is the size (in bytes) of the pages used for nodes in that tree.
+If
+.I psize
+is 0 (no page size is specified) a page size is chosen based on the
+underlying file system I/O block size.
+See
+.IR btree (3)
+for more information.
+.TP
+lorder
+The byte order for integers in the stored database metadata.
+The number should represent the order as an integer; for example,
+big endian order would be the number 4,321.
+If
+.I lorder
+is 0 (no order is specified) the current host order is used.
+.TP
+reclen
+The length of a fixed-length record.
+.TP
+bval
+The delimiting byte to be used to mark the end of a record for
+variable-length records, and the pad character for fixed-length
+records.
+If no value is specified, newlines (``\en'') are used to mark the end
+of variable-length records and fixed-length records are padded with
+spaces.
+.TP
+bfname
+The recno access method stores the in-memory copies of its records
+in a btree.
+If bfname is non-NULL, it specifies the name of the btree file,
+as if specified as the file name for a dbopen of a btree file.
+.PP
+The data part of the key/data pair used by the recno access method
+is the same as other access methods.
+The key is different.
+The
+.I data
+field of the key should be a pointer to a memory location of type
+.IR recno_t ,
+as defined in the <db.h> include file.
+This type is normally the largest unsigned integral type available to
+the implementation.
+The
+.I size
+field of the key should be the size of that type.
+.PP
+In the interface specified by
+.IR dbopen ,
+using the
+.I put
+interface to create a new record will cause the creation of multiple,
+empty records if the record number is more than one greater than the
+largest record currently in the database.
+.SH "SEE ALSO"
+.IR btree (3),
+.IR dbopen (3),
+.IR hash (3),
+.IR mpool (3)
+.sp
+.IR "Document Processing in a Relational Database System" ,
+Michael Stonebraker, Heidi Stettner, Joseph Kalash, Antonin Guttman,
+Nadene Lynn, Memorandum No. UCB/ERL M82/32, May 1982.
+.SH BUGS
+Only big and little endian byte order is supported.
diff --git a/lib/libc/db/mpool/Makefile.inc b/lib/libc/db/mpool/Makefile.inc
new file mode 100644
index 00000000000..c9320fb0a1e
--- /dev/null
+++ b/lib/libc/db/mpool/Makefile.inc
@@ -0,0 +1,6 @@
+# $NetBSD: Makefile.inc,v 1.4 1995/02/27 13:23:53 cgd Exp $
+# @(#)Makefile.inc 8.1 (Berkeley) 6/4/93
+
+.PATH: ${.CURDIR}/db/mpool
+
+SRCS+= mpool.c
diff --git a/lib/libc/db/mpool/README b/lib/libc/db/mpool/README
new file mode 100644
index 00000000000..13002b30fd2
--- /dev/null
+++ b/lib/libc/db/mpool/README
@@ -0,0 +1,8 @@
+# $NetBSD: README,v 1.2 1995/02/27 13:24:00 cgd Exp $
+# @(#)README 8.1 (Berkeley) 6/4/93
+
+These are the current memory pool routines.
+They aren't ready for prime time, yet, and
+the interface is expected to change.
+
+--keith
diff --git a/lib/libc/db/mpool/mpool.c b/lib/libc/db/mpool/mpool.c
new file mode 100644
index 00000000000..6417c29d453
--- /dev/null
+++ b/lib/libc/db/mpool/mpool.c
@@ -0,0 +1,540 @@
+/* $NetBSD: mpool.c,v 1.5 1995/02/27 13:24:05 cgd Exp $ */
+
+/*-
+ * Copyright (c) 1990, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+#if 0
+static char sccsid[] = "@(#)mpool.c 8.2 (Berkeley) 2/21/94";
+#else
+static char rcsid[] = "$NetBSD: mpool.c,v 1.5 1995/02/27 13:24:05 cgd Exp $";
+#endif
+#endif /* LIBC_SCCS and not lint */
+
+#include <sys/param.h>
+#include <sys/stat.h>
+
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <db.h>
+#define __MPOOLINTERFACE_PRIVATE
+#include "mpool.h"
+
+static BKT *mpool_bkt __P((MPOOL *));
+static BKT *mpool_look __P((MPOOL *, pgno_t));
+static int mpool_write __P((MPOOL *, BKT *));
+#ifdef DEBUG
+static void __mpoolerr __P((const char *fmt, ...));
+#endif
+
+/*
+ * MPOOL_OPEN -- initialize a memory pool.
+ *
+ * Parameters:
+ * key: Shared buffer key.
+ * fd: File descriptor.
+ * pagesize: File page size.
+ * maxcache: Max number of cached pages.
+ *
+ * Returns:
+ * MPOOL pointer, NULL on error.
+ */
+MPOOL *
+mpool_open(key, fd, pagesize, maxcache)
+ DBT *key;
+ int fd;
+ pgno_t pagesize, maxcache;
+{
+ struct stat sb;
+ MPOOL *mp;
+ int entry;
+
+ if (fstat(fd, &sb))
+ return (NULL);
+ /* XXX
+ * We should only set st_size to 0 for pipes -- 4.4BSD has the fix so
+ * that stat(2) returns true for ISSOCK on pipes. Until then, this is
+ * fairly close.
+ */
+ if (!S_ISREG(sb.st_mode)) {
+ errno = ESPIPE;
+ return (NULL);
+ }
+
+ if ((mp = (MPOOL *)malloc(sizeof(MPOOL))) == NULL)
+ return (NULL);
+ mp->free.cnext = mp->free.cprev = (BKT *)&mp->free;
+ mp->lru.cnext = mp->lru.cprev = (BKT *)&mp->lru;
+ for (entry = 0; entry < HASHSIZE; ++entry)
+ mp->hashtable[entry].hnext = mp->hashtable[entry].hprev =
+ mp->hashtable[entry].cnext = mp->hashtable[entry].cprev =
+ (BKT *)&mp->hashtable[entry];
+ mp->curcache = 0;
+ mp->maxcache = maxcache;
+ mp->pagesize = pagesize;
+ mp->npages = sb.st_size / pagesize;
+ mp->fd = fd;
+ mp->pgcookie = NULL;
+ mp->pgin = mp->pgout = NULL;
+
+#ifdef STATISTICS
+ mp->cachehit = mp->cachemiss = mp->pagealloc = mp->pageflush =
+ mp->pageget = mp->pagenew = mp->pageput = mp->pageread =
+ mp->pagewrite = 0;
+#endif
+ return (mp);
+}
+
+/*
+ * MPOOL_FILTER -- initialize input/output filters.
+ *
+ * Parameters:
+ * pgin: Page in conversion routine.
+ * pgout: Page out conversion routine.
+ * pgcookie: Cookie for page in/out routines.
+ */
+void
+mpool_filter(mp, pgin, pgout, pgcookie)
+ MPOOL *mp;
+ void (*pgin) __P((void *, pgno_t, void *));
+ void (*pgout) __P((void *, pgno_t, void *));
+ void *pgcookie;
+{
+ mp->pgin = pgin;
+ mp->pgout = pgout;
+ mp->pgcookie = pgcookie;
+}
+
+/*
+ * MPOOL_NEW -- get a new page
+ *
+ * Parameters:
+ * mp: mpool cookie
+ * pgnoadddr: place to store new page number
+ * Returns:
+ * RET_ERROR, RET_SUCCESS
+ */
+void *
+mpool_new(mp, pgnoaddr)
+ MPOOL *mp;
+ pgno_t *pgnoaddr;
+{
+ BKT *b;
+ BKTHDR *hp;
+
+#ifdef STATISTICS
+ ++mp->pagenew;
+#endif
+ /*
+ * Get a BKT from the cache. Assign a new page number, attach it to
+ * the hash and lru chains and return.
+ */
+ if ((b = mpool_bkt(mp)) == NULL)
+ return (NULL);
+ *pgnoaddr = b->pgno = mp->npages++;
+ b->flags = MPOOL_PINNED;
+ inshash(b, b->pgno);
+ inschain(b, &mp->lru);
+ return (b->page);
+}
+
+/*
+ * MPOOL_GET -- get a page from the pool
+ *
+ * Parameters:
+ * mp: mpool cookie
+ * pgno: page number
+ * flags: not used
+ *
+ * Returns:
+ * RET_ERROR, RET_SUCCESS
+ */
+void *
+mpool_get(mp, pgno, flags)
+ MPOOL *mp;
+ pgno_t pgno;
+ u_int flags; /* XXX not used? */
+{
+ BKT *b;
+ BKTHDR *hp;
+ off_t off;
+ int nr;
+
+ /*
+ * If asking for a specific page that is already in the cache, find
+ * it and return it.
+ */
+ if (b = mpool_look(mp, pgno)) {
+#ifdef STATISTICS
+ ++mp->pageget;
+#endif
+#ifdef DEBUG
+ if (b->flags & MPOOL_PINNED)
+ __mpoolerr("mpool_get: page %d already pinned",
+ b->pgno);
+#endif
+ rmchain(b);
+ inschain(b, &mp->lru);
+ b->flags |= MPOOL_PINNED;
+ return (b->page);
+ }
+
+ /* Not allowed to retrieve a non-existent page. */
+ if (pgno >= mp->npages) {
+ errno = EINVAL;
+ return (NULL);
+ }
+
+ /* Get a page from the cache. */
+ if ((b = mpool_bkt(mp)) == NULL)
+ return (NULL);
+ b->pgno = pgno;
+ b->flags = MPOOL_PINNED;
+
+#ifdef STATISTICS
+ ++mp->pageread;
+#endif
+ /* Read in the contents. */
+ off = mp->pagesize * pgno;
+ if (lseek(mp->fd, off, SEEK_SET) != off)
+ return (NULL);
+ if ((nr = read(mp->fd, b->page, mp->pagesize)) != mp->pagesize) {
+ if (nr >= 0)
+ errno = EFTYPE;
+ return (NULL);
+ }
+ if (mp->pgin)
+ (mp->pgin)(mp->pgcookie, b->pgno, b->page);
+
+ inshash(b, b->pgno);
+ inschain(b, &mp->lru);
+#ifdef STATISTICS
+ ++mp->pageget;
+#endif
+ return (b->page);
+}
+
+/*
+ * MPOOL_PUT -- return a page to the pool
+ *
+ * Parameters:
+ * mp: mpool cookie
+ * page: page pointer
+ * pgno: page number
+ *
+ * Returns:
+ * RET_ERROR, RET_SUCCESS
+ */
+int
+mpool_put(mp, page, flags)
+ MPOOL *mp;
+ void *page;
+ u_int flags;
+{
+ BKT *baddr;
+#ifdef DEBUG
+ BKT *b;
+#endif
+
+#ifdef STATISTICS
+ ++mp->pageput;
+#endif
+ baddr = (BKT *)((char *)page - sizeof(BKT));
+#ifdef DEBUG
+ if (!(baddr->flags & MPOOL_PINNED))
+ __mpoolerr("mpool_put: page %d not pinned", b->pgno);
+ for (b = mp->lru.cnext; b != (BKT *)&mp->lru; b = b->cnext) {
+ if (b == (BKT *)&mp->lru)
+ __mpoolerr("mpool_put: %0x: bad address", baddr);
+ if (b == baddr)
+ break;
+ }
+#endif
+ baddr->flags &= ~MPOOL_PINNED;
+ baddr->flags |= flags & MPOOL_DIRTY;
+ return (RET_SUCCESS);
+}
+
+/*
+ * MPOOL_CLOSE -- close the buffer pool
+ *
+ * Parameters:
+ * mp: mpool cookie
+ *
+ * Returns:
+ * RET_ERROR, RET_SUCCESS
+ */
+int
+mpool_close(mp)
+ MPOOL *mp;
+{
+ BKT *b, *next;
+
+ /* Free up any space allocated to the lru pages. */
+ for (b = mp->lru.cprev; b != (BKT *)&mp->lru; b = next) {
+ next = b->cprev;
+ free(b);
+ }
+ free(mp);
+ return (RET_SUCCESS);
+}
+
+/*
+ * MPOOL_SYNC -- sync the file to disk.
+ *
+ * Parameters:
+ * mp: mpool cookie
+ *
+ * Returns:
+ * RET_ERROR, RET_SUCCESS
+ */
+int
+mpool_sync(mp)
+ MPOOL *mp;
+{
+ BKT *b;
+
+ for (b = mp->lru.cprev; b != (BKT *)&mp->lru; b = b->cprev)
+ if (b->flags & MPOOL_DIRTY && mpool_write(mp, b) == RET_ERROR)
+ return (RET_ERROR);
+ return (fsync(mp->fd) ? RET_ERROR : RET_SUCCESS);
+}
+
+/*
+ * MPOOL_BKT -- get/create a BKT from the cache
+ *
+ * Parameters:
+ * mp: mpool cookie
+ *
+ * Returns:
+ * NULL on failure and a pointer to the BKT on success
+ */
+static BKT *
+mpool_bkt(mp)
+ MPOOL *mp;
+{
+ BKT *b;
+
+ if (mp->curcache < mp->maxcache)
+ goto new;
+
+ /*
+ * If the cache is maxxed out, search the lru list for a buffer we
+ * can flush. If we find one, write it if necessary and take it off
+ * any lists. If we don't find anything we grow the cache anyway.
+ * The cache never shrinks.
+ */
+ for (b = mp->lru.cprev; b != (BKT *)&mp->lru; b = b->cprev)
+ if (!(b->flags & MPOOL_PINNED)) {
+ if (b->flags & MPOOL_DIRTY &&
+ mpool_write(mp, b) == RET_ERROR)
+ return (NULL);
+ rmhash(b);
+ rmchain(b);
+#ifdef STATISTICS
+ ++mp->pageflush;
+#endif
+#ifdef DEBUG
+ {
+ void *spage;
+ spage = b->page;
+ memset(b, 0xff, sizeof(BKT) + mp->pagesize);
+ b->page = spage;
+ }
+#endif
+ return (b);
+ }
+
+new: if ((b = (BKT *)malloc(sizeof(BKT) + mp->pagesize)) == NULL)
+ return (NULL);
+#ifdef STATISTICS
+ ++mp->pagealloc;
+#endif
+#ifdef DEBUG
+ memset(b, 0xff, sizeof(BKT) + mp->pagesize);
+#endif
+ b->page = (char *)b + sizeof(BKT);
+ ++mp->curcache;
+ return (b);
+}
+
+/*
+ * MPOOL_WRITE -- sync a page to disk
+ *
+ * Parameters:
+ * mp: mpool cookie
+ *
+ * Returns:
+ * RET_ERROR, RET_SUCCESS
+ */
+static int
+mpool_write(mp, b)
+ MPOOL *mp;
+ BKT *b;
+{
+ off_t off;
+
+ if (mp->pgout)
+ (mp->pgout)(mp->pgcookie, b->pgno, b->page);
+
+#ifdef STATISTICS
+ ++mp->pagewrite;
+#endif
+ off = mp->pagesize * b->pgno;
+ if (lseek(mp->fd, off, SEEK_SET) != off)
+ return (RET_ERROR);
+ if (write(mp->fd, b->page, mp->pagesize) != mp->pagesize)
+ return (RET_ERROR);
+ b->flags &= ~MPOOL_DIRTY;
+ return (RET_SUCCESS);
+}
+
+/*
+ * MPOOL_LOOK -- lookup a page
+ *
+ * Parameters:
+ * mp: mpool cookie
+ * pgno: page number
+ *
+ * Returns:
+ * NULL on failure and a pointer to the BKT on success
+ */
+static BKT *
+mpool_look(mp, pgno)
+ MPOOL *mp;
+ pgno_t pgno;
+{
+ register BKT *b;
+ register BKTHDR *tb;
+
+ /* XXX
+ * If find the buffer, put it first on the hash chain so can
+ * find it again quickly.
+ */
+ tb = &mp->hashtable[HASHKEY(pgno)];
+ for (b = tb->hnext; b != (BKT *)tb; b = b->hnext)
+ if (b->pgno == pgno) {
+#ifdef STATISTICS
+ ++mp->cachehit;
+#endif
+ return (b);
+ }
+#ifdef STATISTICS
+ ++mp->cachemiss;
+#endif
+ return (NULL);
+}
+
+#ifdef STATISTICS
+/*
+ * MPOOL_STAT -- cache statistics
+ *
+ * Parameters:
+ * mp: mpool cookie
+ */
+void
+mpool_stat(mp)
+ MPOOL *mp;
+{
+ BKT *b;
+ int cnt;
+ char *sep;
+
+ (void)fprintf(stderr, "%lu pages in the file\n", mp->npages);
+ (void)fprintf(stderr,
+ "page size %lu, cacheing %lu pages of %lu page max cache\n",
+ mp->pagesize, mp->curcache, mp->maxcache);
+ (void)fprintf(stderr, "%lu page puts, %lu page gets, %lu page new\n",
+ mp->pageput, mp->pageget, mp->pagenew);
+ (void)fprintf(stderr, "%lu page allocs, %lu page flushes\n",
+ mp->pagealloc, mp->pageflush);
+ if (mp->cachehit + mp->cachemiss)
+ (void)fprintf(stderr,
+ "%.0f%% cache hit rate (%lu hits, %lu misses)\n",
+ ((double)mp->cachehit / (mp->cachehit + mp->cachemiss))
+ * 100, mp->cachehit, mp->cachemiss);
+ (void)fprintf(stderr, "%lu page reads, %lu page writes\n",
+ mp->pageread, mp->pagewrite);
+
+ sep = "";
+ cnt = 0;
+ for (b = mp->lru.cnext; b != (BKT *)&mp->lru; b = b->cnext) {
+ (void)fprintf(stderr, "%s%d", sep, b->pgno);
+ if (b->flags & MPOOL_DIRTY)
+ (void)fprintf(stderr, "d");
+ if (b->flags & MPOOL_PINNED)
+ (void)fprintf(stderr, "P");
+ if (++cnt == 10) {
+ sep = "\n";
+ cnt = 0;
+ } else
+ sep = ", ";
+
+ }
+ (void)fprintf(stderr, "\n");
+}
+#endif
+
+#ifdef DEBUG
+#if __STDC__
+#include <stdarg.h>
+#else
+#include <varargs.h>
+#endif
+
+static void
+#if __STDC__
+__mpoolerr(const char *fmt, ...)
+#else
+__mpoolerr(fmt, va_alist)
+ char *fmt;
+ va_dcl
+#endif
+{
+ va_list ap;
+#if __STDC__
+ va_start(ap, fmt);
+#else
+ va_start(ap);
+#endif
+ (void)vfprintf(stderr, fmt, ap);
+ va_end(ap);
+ (void)fprintf(stderr, "\n");
+ abort();
+ /* NOTREACHED */
+}
+#endif
diff --git a/lib/libc/db/recno/Makefile.inc b/lib/libc/db/recno/Makefile.inc
new file mode 100644
index 00000000000..c0fd31125a1
--- /dev/null
+++ b/lib/libc/db/recno/Makefile.inc
@@ -0,0 +1,7 @@
+# $NetBSD: Makefile.inc,v 1.4 1995/02/27 13:24:22 cgd Exp $
+# @(#)Makefile.inc 8.1 (Berkeley) 6/4/93
+
+.PATH: ${.CURDIR}/db/recno
+
+SRCS+= rec_close.c rec_delete.c rec_get.c rec_open.c rec_put.c rec_search.c \
+ rec_seq.c rec_utils.c
diff --git a/lib/libc/db/recno/extern.h b/lib/libc/db/recno/extern.h
new file mode 100644
index 00000000000..5ec5d2f220c
--- /dev/null
+++ b/lib/libc/db/recno/extern.h
@@ -0,0 +1,56 @@
+/* $NetBSD: extern.h,v 1.4 1995/02/27 13:24:30 cgd Exp $ */
+
+/*-
+ * Copyright (c) 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)extern.h 8.3 (Berkeley) 6/4/94
+ */
+
+#include "../btree/extern.h"
+
+int __rec_close __P((DB *));
+int __rec_delete __P((const DB *, const DBT *, u_int));
+int __rec_dleaf __P((BTREE *, PAGE *, u_int32_t));
+int __rec_fd __P((const DB *));
+int __rec_fmap __P((BTREE *, recno_t));
+int __rec_fout __P((BTREE *));
+int __rec_fpipe __P((BTREE *, recno_t));
+int __rec_get __P((const DB *, const DBT *, DBT *, u_int));
+int __rec_iput __P((BTREE *, recno_t, const DBT *, u_int));
+int __rec_put __P((const DB *dbp, DBT *, const DBT *, u_int));
+int __rec_ret __P((BTREE *, EPG *, recno_t, DBT *, DBT *));
+EPG *__rec_search __P((BTREE *, recno_t, enum SRCHOP));
+int __rec_seq __P((const DB *, DBT *, DBT *, u_int));
+int __rec_sync __P((const DB *, u_int));
+int __rec_vmap __P((BTREE *, recno_t));
+int __rec_vout __P((BTREE *));
+int __rec_vpipe __P((BTREE *, recno_t));
diff --git a/lib/libc/db/recno/rec_close.c b/lib/libc/db/recno/rec_close.c
new file mode 100644
index 00000000000..7c4dc283525
--- /dev/null
+++ b/lib/libc/db/recno/rec_close.c
@@ -0,0 +1,169 @@
+/* $NetBSD: rec_close.c,v 1.6 1995/02/27 13:24:37 cgd Exp $ */
+
+/*-
+ * Copyright (c) 1990, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+#if 0
+static char sccsid[] = "@(#)rec_close.c 8.3 (Berkeley) 2/21/94";
+#else
+static char rcsid[] = "$NetBSD: rec_close.c,v 1.6 1995/02/27 13:24:37 cgd Exp $";
+#endif
+#endif /* LIBC_SCCS and not lint */
+
+#include <sys/types.h>
+#include <sys/uio.h>
+#include <sys/mman.h>
+
+#include <errno.h>
+#include <limits.h>
+#include <stdio.h>
+#include <unistd.h>
+
+#include <db.h>
+#include "recno.h"
+
+/*
+ * __REC_CLOSE -- Close a recno tree.
+ *
+ * Parameters:
+ * dbp: pointer to access method
+ *
+ * Returns:
+ * RET_ERROR, RET_SUCCESS
+ */
+int
+__rec_close(dbp)
+ DB *dbp;
+{
+ BTREE *t;
+ int status;
+
+ t = dbp->internal;
+
+ /* Toss any page pinned across calls. */
+ if (t->bt_pinned != NULL) {
+ mpool_put(t->bt_mp, t->bt_pinned, 0);
+ t->bt_pinned = NULL;
+ }
+
+ if (__rec_sync(dbp, 0) == RET_ERROR)
+ return (RET_ERROR);
+
+ /* Committed to closing. */
+ status = RET_SUCCESS;
+ if (ISSET(t, R_MEMMAPPED) && munmap(t->bt_smap, t->bt_msize))
+ status = RET_ERROR;
+
+ if (!ISSET(t, R_INMEM))
+ if (ISSET(t, R_CLOSEFP)) {
+ if (fclose(t->bt_rfp))
+ status = RET_ERROR;
+ } else
+ if (close(t->bt_rfd))
+ status = RET_ERROR;
+
+ if (__bt_close(dbp) == RET_ERROR)
+ status = RET_ERROR;
+
+ return (status);
+}
+
+/*
+ * __REC_SYNC -- sync the recno tree to disk.
+ *
+ * Parameters:
+ * dbp: pointer to access method
+ *
+ * Returns:
+ * RET_SUCCESS, RET_ERROR.
+ */
+int
+__rec_sync(dbp, flags)
+ const DB *dbp;
+ u_int flags;
+{
+ struct iovec iov[2];
+ BTREE *t;
+ DBT data, key;
+ off_t off;
+ recno_t scursor, trec;
+ int status;
+
+ t = dbp->internal;
+
+ /* Toss any page pinned across calls. */
+ if (t->bt_pinned != NULL) {
+ mpool_put(t->bt_mp, t->bt_pinned, 0);
+ t->bt_pinned = NULL;
+ }
+
+ if (flags == R_RECNOSYNC)
+ return (__bt_sync(dbp, 0));
+
+ if (ISSET(t, R_RDONLY | R_INMEM) || !ISSET(t, R_MODIFIED))
+ return (RET_SUCCESS);
+
+ /* Read any remaining records into the tree. */
+ if (!ISSET(t, R_EOF) && t->bt_irec(t, MAX_REC_NUMBER) == RET_ERROR)
+ return (RET_ERROR);
+
+ /* Rewind the file descriptor. */
+ if (lseek(t->bt_rfd, (off_t)0, SEEK_SET) != 0)
+ return (RET_ERROR);
+
+ iov[1].iov_base = "\n";
+ iov[1].iov_len = 1;
+ scursor = t->bt_rcursor;
+
+ key.size = sizeof(recno_t);
+ key.data = &trec;
+
+ status = (dbp->seq)(dbp, &key, &data, R_FIRST);
+ while (status == RET_SUCCESS) {
+ iov[0].iov_base = data.data;
+ iov[0].iov_len = data.size;
+ if (writev(t->bt_rfd, iov, 2) != data.size + 1)
+ return (RET_ERROR);
+ status = (dbp->seq)(dbp, &key, &data, R_NEXT);
+ }
+ t->bt_rcursor = scursor;
+ if (status == RET_ERROR)
+ return (RET_ERROR);
+ if ((off = lseek(t->bt_rfd, (off_t)0, SEEK_CUR)) == -1)
+ return (RET_ERROR);
+ if (ftruncate(t->bt_rfd, off))
+ return (RET_ERROR);
+ CLR(t, R_MODIFIED);
+ return (RET_SUCCESS);
+}
diff --git a/lib/libc/db/recno/rec_delete.c b/lib/libc/db/recno/rec_delete.c
new file mode 100644
index 00000000000..6f438a35cd9
--- /dev/null
+++ b/lib/libc/db/recno/rec_delete.c
@@ -0,0 +1,203 @@
+/* $NetBSD: rec_delete.c,v 1.7 1995/02/27 13:24:48 cgd Exp $ */
+
+/*-
+ * Copyright (c) 1990, 1993, 1994
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Mike Olson.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+#if 0
+static char sccsid[] = "@(#)rec_delete.c 8.6 (Berkeley) 6/4/94";
+#else
+static char rcsid[] = "$NetBSD: rec_delete.c,v 1.7 1995/02/27 13:24:48 cgd Exp $";
+#endif
+#endif /* LIBC_SCCS and not lint */
+
+#include <sys/types.h>
+
+#include <errno.h>
+#include <stdio.h>
+#include <string.h>
+
+#include <db.h>
+#include "recno.h"
+
+static int rec_rdelete __P((BTREE *, recno_t));
+
+/*
+ * __REC_DELETE -- Delete the item(s) referenced by a key.
+ *
+ * Parameters:
+ * dbp: pointer to access method
+ * key: key to delete
+ * flags: R_CURSOR if deleting what the cursor references
+ *
+ * Returns:
+ * RET_ERROR, RET_SUCCESS and RET_SPECIAL if the key not found.
+ */
+int
+__rec_delete(dbp, key, flags)
+ const DB *dbp;
+ const DBT *key;
+ u_int flags;
+{
+ BTREE *t;
+ recno_t nrec;
+ int status;
+
+ t = dbp->internal;
+
+ /* Toss any page pinned across calls. */
+ if (t->bt_pinned != NULL) {
+ mpool_put(t->bt_mp, t->bt_pinned, 0);
+ t->bt_pinned = NULL;
+ }
+
+ switch(flags) {
+ case 0:
+ if ((nrec = *(recno_t *)key->data) == 0)
+ goto einval;
+ if (nrec > t->bt_nrecs)
+ return (RET_SPECIAL);
+ --nrec;
+ status = rec_rdelete(t, nrec);
+ break;
+ case R_CURSOR:
+ if (!ISSET(t, B_SEQINIT))
+ goto einval;
+ if (t->bt_nrecs == 0)
+ return (RET_SPECIAL);
+ status = rec_rdelete(t, t->bt_rcursor - 1);
+ if (status == RET_SUCCESS)
+ --t->bt_rcursor;
+ break;
+ default:
+einval: errno = EINVAL;
+ return (RET_ERROR);
+ }
+
+ if (status == RET_SUCCESS)
+ SET(t, B_MODIFIED | R_MODIFIED);
+ return (status);
+}
+
+/*
+ * REC_RDELETE -- Delete the data matching the specified key.
+ *
+ * Parameters:
+ * tree: tree
+ * nrec: record to delete
+ *
+ * Returns:
+ * RET_ERROR, RET_SUCCESS and RET_SPECIAL if the key not found.
+ */
+static int
+rec_rdelete(t, nrec)
+ BTREE *t;
+ recno_t nrec;
+{
+ EPG *e;
+ PAGE *h;
+ int status;
+
+ /* Find the record; __rec_search pins the page. */
+ if ((e = __rec_search(t, nrec, SDELETE)) == NULL)
+ return (RET_ERROR);
+
+ /* Delete the record. */
+ h = e->page;
+ status = __rec_dleaf(t, h, e->index);
+ if (status != RET_SUCCESS) {
+ mpool_put(t->bt_mp, h, 0);
+ return (status);
+ }
+ mpool_put(t->bt_mp, h, MPOOL_DIRTY);
+ return (RET_SUCCESS);
+}
+
+/*
+ * __REC_DLEAF -- Delete a single record from a recno leaf page.
+ *
+ * Parameters:
+ * t: tree
+ * index: index on current page to delete
+ *
+ * Returns:
+ * RET_SUCCESS, RET_ERROR.
+ */
+int
+__rec_dleaf(t, h, index)
+ BTREE *t;
+ PAGE *h;
+ u_int32_t index;
+{
+ RLEAF *rl;
+ indx_t *ip, cnt, offset;
+ u_int32_t nbytes;
+ char *from;
+ void *to;
+
+ /*
+ * Delete a record from a recno leaf page. Internal records are never
+ * deleted from internal pages, regardless of the records that caused
+ * them to be added being deleted. Pages made empty by deletion are
+ * not reclaimed. They are, however, made available for reuse.
+ *
+ * Pack the remaining entries at the end of the page, shift the indices
+ * down, overwriting the deleted record and its index. If the record
+ * uses overflow pages, make them available for reuse.
+ */
+ to = rl = GETRLEAF(h, index);
+ if (rl->flags & P_BIGDATA && __ovfl_delete(t, rl->bytes) == RET_ERROR)
+ return (RET_ERROR);
+ nbytes = NRLEAF(rl);
+
+ /*
+ * Compress the key/data pairs. Compress and adjust the [BR]LEAF
+ * offsets. Reset the headers.
+ */
+ from = (char *)h + h->upper;
+ memmove(from + nbytes, from, (char *)to - from);
+ h->upper += nbytes;
+
+ offset = h->linp[index];
+ for (cnt = &h->linp[index] - (ip = &h->linp[0]); cnt--; ++ip)
+ if (ip[0] < offset)
+ ip[0] += nbytes;
+ for (cnt = &h->linp[NEXTINDEX(h)] - ip; --cnt; ++ip)
+ ip[0] = ip[1] < offset ? ip[1] + nbytes : ip[1];
+ h->lower -= sizeof(indx_t);
+ --t->bt_nrecs;
+ return (RET_SUCCESS);
+}
diff --git a/lib/libc/db/recno/rec_get.c b/lib/libc/db/recno/rec_get.c
new file mode 100644
index 00000000000..bc25104e552
--- /dev/null
+++ b/lib/libc/db/recno/rec_get.c
@@ -0,0 +1,308 @@
+/* $NetBSD: rec_get.c,v 1.7 1995/02/27 13:24:57 cgd Exp $ */
+
+/*-
+ * Copyright (c) 1990, 1993, 1994
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+#if 0
+static char sccsid[] = "@(#)rec_get.c 8.5 (Berkeley) 6/20/94";
+#else
+static char rcsid[] = "$NetBSD: rec_get.c,v 1.7 1995/02/27 13:24:57 cgd Exp $";
+#endif
+#endif /* LIBC_SCCS and not lint */
+
+#include <sys/types.h>
+
+#include <errno.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <db.h>
+#include "recno.h"
+
+/*
+ * __REC_GET -- Get a record from the btree.
+ *
+ * Parameters:
+ * dbp: pointer to access method
+ * key: key to find
+ * data: data to return
+ * flag: currently unused
+ *
+ * Returns:
+ * RET_ERROR, RET_SUCCESS and RET_SPECIAL if the key not found.
+ */
+int
+__rec_get(dbp, key, data, flags)
+ const DB *dbp;
+ const DBT *key;
+ DBT *data;
+ u_int flags;
+{
+ BTREE *t;
+ EPG *e;
+ recno_t nrec;
+ int status;
+
+ t = dbp->internal;
+
+ /* Toss any page pinned across calls. */
+ if (t->bt_pinned != NULL) {
+ mpool_put(t->bt_mp, t->bt_pinned, 0);
+ t->bt_pinned = NULL;
+ }
+
+ /* Get currently doesn't take any flags, and keys of 0 are illegal. */
+ if (flags || (nrec = *(recno_t *)key->data) == 0) {
+ errno = EINVAL;
+ return (RET_ERROR);
+ }
+
+ /*
+ * If we haven't seen this record yet, try to find it in the
+ * original file.
+ */
+ if (nrec > t->bt_nrecs) {
+ if (ISSET(t, R_EOF | R_INMEM))
+ return (RET_SPECIAL);
+ if ((status = t->bt_irec(t, nrec)) != RET_SUCCESS)
+ return (status);
+ }
+
+ --nrec;
+ if ((e = __rec_search(t, nrec, SEARCH)) == NULL)
+ return (RET_ERROR);
+
+ status = __rec_ret(t, e, 0, NULL, data);
+ if (ISSET(t, B_DB_LOCK))
+ mpool_put(t->bt_mp, e->page, 0);
+ else
+ t->bt_pinned = e->page;
+ return (status);
+}
+
+/*
+ * __REC_FPIPE -- Get fixed length records from a pipe.
+ *
+ * Parameters:
+ * t: tree
+ * cnt: records to read
+ *
+ * Returns:
+ * RET_ERROR, RET_SUCCESS
+ */
+int
+__rec_fpipe(t, top)
+ BTREE *t;
+ recno_t top;
+{
+ DBT data;
+ recno_t nrec;
+ size_t len;
+ int ch;
+ char *p;
+
+ if (t->bt_dbufsz < t->bt_reclen) {
+ t->bt_dbuf = (char *)(t->bt_dbuf == NULL ?
+ malloc(t->bt_reclen) : realloc(t->bt_dbuf, t->bt_reclen));
+ if (t->bt_dbuf == NULL)
+ return (RET_ERROR);
+ t->bt_dbufsz = t->bt_reclen;
+ }
+ data.data = t->bt_dbuf;
+ data.size = t->bt_reclen;
+
+ for (nrec = t->bt_nrecs; nrec < top; ++nrec) {
+ len = t->bt_reclen;
+ for (p = t->bt_dbuf;; *p++ = ch)
+ if ((ch = getc(t->bt_rfp)) == EOF || !len--) {
+ if (__rec_iput(t, nrec, &data, 0)
+ != RET_SUCCESS)
+ return (RET_ERROR);
+ break;
+ }
+ if (ch == EOF)
+ break;
+ }
+ if (nrec < top) {
+ SET(t, R_EOF);
+ return (RET_SPECIAL);
+ }
+ return (RET_SUCCESS);
+}
+
+/*
+ * __REC_VPIPE -- Get variable length records from a pipe.
+ *
+ * Parameters:
+ * t: tree
+ * cnt: records to read
+ *
+ * Returns:
+ * RET_ERROR, RET_SUCCESS
+ */
+int
+__rec_vpipe(t, top)
+ BTREE *t;
+ recno_t top;
+{
+ DBT data;
+ recno_t nrec;
+ indx_t len;
+ size_t sz;
+ int bval, ch;
+ char *p;
+
+ bval = t->bt_bval;
+ for (nrec = t->bt_nrecs; nrec < top; ++nrec) {
+ for (p = t->bt_dbuf, sz = t->bt_dbufsz;; *p++ = ch, --sz) {
+ if ((ch = getc(t->bt_rfp)) == EOF || ch == bval) {
+ data.data = t->bt_dbuf;
+ data.size = p - t->bt_dbuf;
+ if (ch == EOF && data.size == 0)
+ break;
+ if (__rec_iput(t, nrec, &data, 0)
+ != RET_SUCCESS)
+ return (RET_ERROR);
+ break;
+ }
+ if (sz == 0) {
+ len = p - t->bt_dbuf;
+ t->bt_dbufsz += (sz = 256);
+ t->bt_dbuf = (char *)(t->bt_dbuf == NULL ?
+ malloc(t->bt_dbufsz) :
+ realloc(t->bt_dbuf, t->bt_dbufsz));
+ if (t->bt_dbuf == NULL)
+ return (RET_ERROR);
+ p = t->bt_dbuf + len;
+ }
+ }
+ if (ch == EOF)
+ break;
+ }
+ if (nrec < top) {
+ SET(t, R_EOF);
+ return (RET_SPECIAL);
+ }
+ return (RET_SUCCESS);
+}
+
+/*
+ * __REC_FMAP -- Get fixed length records from a file.
+ *
+ * Parameters:
+ * t: tree
+ * cnt: records to read
+ *
+ * Returns:
+ * RET_ERROR, RET_SUCCESS
+ */
+int
+__rec_fmap(t, top)
+ BTREE *t;
+ recno_t top;
+{
+ DBT data;
+ recno_t nrec;
+ caddr_t sp, ep;
+ size_t len;
+ char *p;
+
+ if (t->bt_dbufsz < t->bt_reclen) {
+ t->bt_dbuf = (char *)(t->bt_dbuf == NULL ?
+ malloc(t->bt_reclen) : realloc(t->bt_dbuf, t->bt_reclen));
+ if (t->bt_dbuf == NULL)
+ return (RET_ERROR);
+ t->bt_dbufsz = t->bt_reclen;
+ }
+ data.data = t->bt_dbuf;
+ data.size = t->bt_reclen;
+
+ sp = t->bt_cmap;
+ ep = t->bt_emap;
+ for (nrec = t->bt_nrecs; nrec < top; ++nrec) {
+ if (sp >= ep) {
+ SET(t, R_EOF);
+ return (RET_SPECIAL);
+ }
+ len = t->bt_reclen;
+ for (p = t->bt_dbuf; sp < ep && len--; *p++ = *sp++);
+ memset(p, t->bt_bval, len);
+ if (__rec_iput(t, nrec, &data, 0) != RET_SUCCESS)
+ return (RET_ERROR);
+ }
+ t->bt_cmap = sp;
+ return (RET_SUCCESS);
+}
+
+/*
+ * __REC_VMAP -- Get variable length records from a file.
+ *
+ * Parameters:
+ * t: tree
+ * cnt: records to read
+ *
+ * Returns:
+ * RET_ERROR, RET_SUCCESS
+ */
+int
+__rec_vmap(t, top)
+ BTREE *t;
+ recno_t top;
+{
+ DBT data;
+ caddr_t sp, ep;
+ recno_t nrec;
+ int bval;
+
+ sp = t->bt_cmap;
+ ep = t->bt_emap;
+ bval = t->bt_bval;
+
+ for (nrec = t->bt_nrecs; nrec < top; ++nrec) {
+ if (sp >= ep) {
+ SET(t, R_EOF);
+ return (RET_SPECIAL);
+ }
+ for (data.data = sp; sp < ep && *sp != bval; ++sp);
+ data.size = sp - (caddr_t)data.data;
+ if (__rec_iput(t, nrec, &data, 0) != RET_SUCCESS)
+ return (RET_ERROR);
+ ++sp;
+ }
+ t->bt_cmap = sp;
+ return (RET_SUCCESS);
+}
diff --git a/lib/libc/db/recno/rec_open.c b/lib/libc/db/recno/rec_open.c
new file mode 100644
index 00000000000..e67a7b6f000
--- /dev/null
+++ b/lib/libc/db/recno/rec_open.c
@@ -0,0 +1,236 @@
+/* $NetBSD: rec_open.c,v 1.6 1995/02/27 13:25:05 cgd Exp $ */
+
+/*-
+ * Copyright (c) 1990, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Mike Olson.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+#if 0
+static char sccsid[] = "@(#)rec_open.c 8.6 (Berkeley) 2/22/94";
+#else
+static char rcsid[] = "$NetBSD: rec_open.c,v 1.6 1995/02/27 13:25:05 cgd Exp $";
+#endif
+#endif /* LIBC_SCCS and not lint */
+
+#include <sys/types.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+
+#include <errno.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <unistd.h>
+
+#include <db.h>
+#include "recno.h"
+
+DB *
+__rec_open(fname, flags, mode, openinfo, dflags)
+ const char *fname;
+ int flags, mode, dflags;
+ const RECNOINFO *openinfo;
+{
+ BTREE *t;
+ BTREEINFO btopeninfo;
+ DB *dbp;
+ PAGE *h;
+ struct stat sb;
+ int rfd, sverrno;
+
+ /* Open the user's file -- if this fails, we're done. */
+ if (fname != NULL && (rfd = open(fname, flags, mode)) < 0)
+ return (NULL);
+
+ /* Create a btree in memory (backed by disk). */
+ dbp = NULL;
+ if (openinfo) {
+ if (openinfo->flags & ~(R_FIXEDLEN | R_NOKEY | R_SNAPSHOT))
+ goto einval;
+ btopeninfo.flags = 0;
+ btopeninfo.cachesize = openinfo->cachesize;
+ btopeninfo.maxkeypage = 0;
+ btopeninfo.minkeypage = 0;
+ btopeninfo.psize = openinfo->psize;
+ btopeninfo.compare = NULL;
+ btopeninfo.prefix = NULL;
+ btopeninfo.lorder = openinfo->lorder;
+ dbp = __bt_open(openinfo->bfname,
+ O_RDWR, S_IRUSR | S_IWUSR, &btopeninfo, dflags);
+ } else
+ dbp = __bt_open(NULL, O_RDWR, S_IRUSR | S_IWUSR, NULL, dflags);
+ if (dbp == NULL)
+ goto err;
+
+ /*
+ * Some fields in the tree structure are recno specific. Fill them
+ * in and make the btree structure look like a recno structure. We
+ * don't change the bt_ovflsize value, it's close enough and slightly
+ * bigger.
+ */
+ t = dbp->internal;
+ if (openinfo) {
+ if (openinfo->flags & R_FIXEDLEN) {
+ SET(t, R_FIXLEN);
+ t->bt_reclen = openinfo->reclen;
+ if (t->bt_reclen == 0)
+ goto einval;
+ }
+ t->bt_bval = openinfo->bval;
+ } else
+ t->bt_bval = '\n';
+
+ SET(t, R_RECNO);
+ if (fname == NULL)
+ SET(t, R_EOF | R_INMEM);
+ else
+ t->bt_rfd = rfd;
+ t->bt_rcursor = 0;
+
+ if (fname != NULL) {
+ /*
+ * In 4.4BSD, stat(2) returns true for ISSOCK on pipes.
+ * Unfortunately, that's not portable, so we use lseek
+ * and check the errno values.
+ */
+ errno = 0;
+ if (lseek(rfd, (off_t)0, SEEK_CUR) == -1 && errno == ESPIPE) {
+ switch (flags & O_ACCMODE) {
+ case O_RDONLY:
+ SET(t, R_RDONLY);
+ break;
+ default:
+ goto einval;
+ }
+slow: if ((t->bt_rfp = fdopen(rfd, "r")) == NULL)
+ goto err;
+ SET(t, R_CLOSEFP);
+ t->bt_irec =
+ ISSET(t, R_FIXLEN) ? __rec_fpipe : __rec_vpipe;
+ } else {
+ switch (flags & O_ACCMODE) {
+ case O_RDONLY:
+ SET(t, R_RDONLY);
+ break;
+ case O_RDWR:
+ break;
+ default:
+ goto einval;
+ }
+
+ if (fstat(rfd, &sb))
+ goto err;
+ /*
+ * Kluge -- we'd like to test to see if the file is too
+ * big to mmap. Since, we don't know what size or type
+ * off_t's or size_t's are, what the largest unsigned
+ * integral type is, or what random insanity the local
+ * C compiler will perpetrate, doing the comparison in
+ * a portable way is flatly impossible. Hope that mmap
+ * fails if the file is too large.
+ */
+ if (sb.st_size == 0)
+ SET(t, R_EOF);
+ else {
+ t->bt_msize = sb.st_size;
+ if ((t->bt_smap = mmap(NULL, t->bt_msize,
+ PROT_READ, MAP_PRIVATE, rfd,
+ (off_t)0)) == (caddr_t)-1)
+ goto slow;
+ t->bt_cmap = t->bt_smap;
+ t->bt_emap = t->bt_smap + sb.st_size;
+ t->bt_irec = ISSET(t, R_FIXLEN) ?
+ __rec_fmap : __rec_vmap;
+ SET(t, R_MEMMAPPED);
+ }
+ }
+ }
+
+ /* Use the recno routines. */
+ dbp->close = __rec_close;
+ dbp->del = __rec_delete;
+ dbp->fd = __rec_fd;
+ dbp->get = __rec_get;
+ dbp->put = __rec_put;
+ dbp->seq = __rec_seq;
+ dbp->sync = __rec_sync;
+
+ /* If the root page was created, reset the flags. */
+ if ((h = mpool_get(t->bt_mp, P_ROOT, 0)) == NULL)
+ goto err;
+ if ((h->flags & P_TYPE) == P_BLEAF) {
+ h->flags = h->flags & ~P_TYPE | P_RLEAF;
+ mpool_put(t->bt_mp, h, MPOOL_DIRTY);
+ } else
+ mpool_put(t->bt_mp, h, 0);
+
+ if (openinfo && openinfo->flags & R_SNAPSHOT &&
+ !ISSET(t, R_EOF | R_INMEM) &&
+ t->bt_irec(t, MAX_REC_NUMBER) == RET_ERROR)
+ goto err;
+ return (dbp);
+
+einval: errno = EINVAL;
+err: sverrno = errno;
+ if (dbp != NULL)
+ (void)__bt_close(dbp);
+ if (fname != NULL)
+ (void)close(rfd);
+ errno = sverrno;
+ return (NULL);
+}
+
+int
+__rec_fd(dbp)
+ const DB *dbp;
+{
+ BTREE *t;
+
+ t = dbp->internal;
+
+ /* Toss any page pinned across calls. */
+ if (t->bt_pinned != NULL) {
+ mpool_put(t->bt_mp, t->bt_pinned, 0);
+ t->bt_pinned = NULL;
+ }
+
+ /* In-memory database can't have a file descriptor. */
+ if (ISSET(t, R_INMEM)) {
+ errno = ENOENT;
+ return (-1);
+ }
+ return (t->bt_rfd);
+}
diff --git a/lib/libc/db/recno/rec_put.c b/lib/libc/db/recno/rec_put.c
new file mode 100644
index 00000000000..77d7079fb73
--- /dev/null
+++ b/lib/libc/db/recno/rec_put.c
@@ -0,0 +1,259 @@
+/* $NetBSD: rec_put.c,v 1.7 1995/02/27 13:25:13 cgd Exp $ */
+
+/*-
+ * Copyright (c) 1990, 1993, 1994
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+#if 0
+static char sccsid[] = "@(#)rec_put.c 8.4 (Berkeley) 5/31/94";
+#else
+static char rcsid[] = "$NetBSD: rec_put.c,v 1.7 1995/02/27 13:25:13 cgd Exp $";
+#endif
+#endif /* LIBC_SCCS and not lint */
+
+#include <sys/types.h>
+
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <db.h>
+#include "recno.h"
+
+/*
+ * __REC_PUT -- Add a recno item to the tree.
+ *
+ * Parameters:
+ * dbp: pointer to access method
+ * key: key
+ * data: data
+ * flag: R_CURSOR, R_IAFTER, R_IBEFORE, R_NOOVERWRITE
+ *
+ * Returns:
+ * RET_ERROR, RET_SUCCESS and RET_SPECIAL if the key is
+ * already in the tree and R_NOOVERWRITE specified.
+ */
+int
+__rec_put(dbp, key, data, flags)
+ const DB *dbp;
+ DBT *key;
+ const DBT *data;
+ u_int flags;
+{
+ BTREE *t;
+ DBT tdata;
+ recno_t nrec;
+ int status;
+
+ t = dbp->internal;
+
+ /* Toss any page pinned across calls. */
+ if (t->bt_pinned != NULL) {
+ mpool_put(t->bt_mp, t->bt_pinned, 0);
+ t->bt_pinned = NULL;
+ }
+
+ switch (flags) {
+ case R_CURSOR:
+ if (!ISSET(t, B_SEQINIT))
+ goto einval;
+ nrec = t->bt_rcursor;
+ break;
+ case R_SETCURSOR:
+ if ((nrec = *(recno_t *)key->data) == 0)
+ goto einval;
+ break;
+ case R_IAFTER:
+ if ((nrec = *(recno_t *)key->data) == 0) {
+ nrec = 1;
+ flags = R_IBEFORE;
+ }
+ break;
+ case 0:
+ case R_IBEFORE:
+ if ((nrec = *(recno_t *)key->data) == 0)
+ goto einval;
+ break;
+ case R_NOOVERWRITE:
+ if ((nrec = *(recno_t *)key->data) == 0)
+ goto einval;
+ if (nrec <= t->bt_nrecs)
+ return (RET_SPECIAL);
+ break;
+ default:
+einval: errno = EINVAL;
+ return (RET_ERROR);
+ }
+
+ /*
+ * Make sure that records up to and including the put record are
+ * already in the database. If skipping records, create empty ones.
+ */
+ if (nrec > t->bt_nrecs) {
+ if (!ISSET(t, R_EOF | R_INMEM) &&
+ t->bt_irec(t, nrec) == RET_ERROR)
+ return (RET_ERROR);
+ if (nrec > t->bt_nrecs + 1) {
+ if (ISSET(t, R_FIXLEN)) {
+ if ((tdata.data =
+ (void *)malloc(t->bt_reclen)) == NULL)
+ return (RET_ERROR);
+ tdata.size = t->bt_reclen;
+ memset(tdata.data, t->bt_bval, tdata.size);
+ } else {
+ tdata.data = NULL;
+ tdata.size = 0;
+ }
+ while (nrec > t->bt_nrecs + 1)
+ if (__rec_iput(t,
+ t->bt_nrecs, &tdata, 0) != RET_SUCCESS)
+ return (RET_ERROR);
+ if (ISSET(t, R_FIXLEN))
+ free(tdata.data);
+ }
+ }
+
+ if ((status = __rec_iput(t, nrec - 1, data, flags)) != RET_SUCCESS)
+ return (status);
+
+ if (flags == R_SETCURSOR)
+ t->bt_rcursor = nrec;
+
+ SET(t, R_MODIFIED);
+ return (__rec_ret(t, NULL, nrec, key, NULL));
+}
+
+/*
+ * __REC_IPUT -- Add a recno item to the tree.
+ *
+ * Parameters:
+ * t: tree
+ * nrec: record number
+ * data: data
+ *
+ * Returns:
+ * RET_ERROR, RET_SUCCESS
+ */
+int
+__rec_iput(t, nrec, data, flags)
+ BTREE *t;
+ recno_t nrec;
+ const DBT *data;
+ u_int flags;
+{
+ DBT tdata;
+ EPG *e;
+ PAGE *h;
+ indx_t index, nxtindex;
+ pgno_t pg;
+ u_int32_t nbytes;
+ int dflags, status;
+ char *dest, db[NOVFLSIZE];
+
+ /*
+ * If the data won't fit on a page, store it on indirect pages.
+ *
+ * XXX
+ * If the insert fails later on, these pages aren't recovered.
+ */
+ if (data->size > t->bt_ovflsize) {
+ if (__ovfl_put(t, data, &pg) == RET_ERROR)
+ return (RET_ERROR);
+ tdata.data = db;
+ tdata.size = NOVFLSIZE;
+ *(pgno_t *)db = pg;
+ *(u_int32_t *)(db + sizeof(pgno_t)) = data->size;
+ dflags = P_BIGDATA;
+ data = &tdata;
+ } else
+ dflags = 0;
+
+ /* __rec_search pins the returned page. */
+ if ((e = __rec_search(t, nrec,
+ nrec > t->bt_nrecs || flags == R_IAFTER || flags == R_IBEFORE ?
+ SINSERT : SEARCH)) == NULL)
+ return (RET_ERROR);
+
+ h = e->page;
+ index = e->index;
+
+ /*
+ * Add the specified key/data pair to the tree. The R_IAFTER and
+ * R_IBEFORE flags insert the key after/before the specified key.
+ *
+ * Pages are split as required.
+ */
+ switch (flags) {
+ case R_IAFTER:
+ ++index;
+ break;
+ case R_IBEFORE:
+ break;
+ default:
+ if (nrec < t->bt_nrecs &&
+ __rec_dleaf(t, h, index) == RET_ERROR) {
+ mpool_put(t->bt_mp, h, 0);
+ return (RET_ERROR);
+ }
+ break;
+ }
+
+ /*
+ * If not enough room, split the page. The split code will insert
+ * the key and data and unpin the current page. If inserting into
+ * the offset array, shift the pointers up.
+ */
+ nbytes = NRLEAFDBT(data->size);
+ if (h->upper - h->lower < nbytes + sizeof(indx_t)) {
+ status = __bt_split(t, h, NULL, data, dflags, nbytes, index);
+ if (status == RET_SUCCESS)
+ ++t->bt_nrecs;
+ return (status);
+ }
+
+ if (index < (nxtindex = NEXTINDEX(h)))
+ memmove(h->linp + index + 1, h->linp + index,
+ (nxtindex - index) * sizeof(indx_t));
+ h->lower += sizeof(indx_t);
+
+ h->linp[index] = h->upper -= nbytes;
+ dest = (char *)h + h->upper;
+ WR_RLEAF(dest, data, dflags);
+
+ ++t->bt_nrecs;
+ SET(t, B_MODIFIED);
+ mpool_put(t->bt_mp, h, MPOOL_DIRTY);
+
+ return (RET_SUCCESS);
+}
diff --git a/lib/libc/db/recno/rec_search.c b/lib/libc/db/recno/rec_search.c
new file mode 100644
index 00000000000..208328d9ddd
--- /dev/null
+++ b/lib/libc/db/recno/rec_search.c
@@ -0,0 +1,133 @@
+/* $NetBSD: rec_search.c,v 1.6 1995/02/27 13:25:17 cgd Exp $ */
+
+/*-
+ * Copyright (c) 1990, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+#if 0
+static char sccsid[] = "@(#)rec_search.c 8.3 (Berkeley) 2/21/94";
+#else
+static char rcsid[] = "$NetBSD: rec_search.c,v 1.6 1995/02/27 13:25:17 cgd Exp $";
+#endif
+#endif /* LIBC_SCCS and not lint */
+
+#include <sys/types.h>
+
+#include <errno.h>
+#include <stdio.h>
+
+#include <db.h>
+#include "recno.h"
+
+/*
+ * __REC_SEARCH -- Search a btree for a key.
+ *
+ * Parameters:
+ * t: tree to search
+ * recno: key to find
+ * op: search operation
+ *
+ * Returns:
+ * EPG for matching record, if any, or the EPG for the location of the
+ * key, if it were inserted into the tree.
+ *
+ * Returns:
+ * The EPG for matching record, if any, or the EPG for the location
+ * of the key, if it were inserted into the tree, is entered into
+ * the bt_cur field of the tree. A pointer to the field is returned.
+ */
+EPG *
+__rec_search(t, recno, op)
+ BTREE *t;
+ recno_t recno;
+ enum SRCHOP op;
+{
+ register indx_t index;
+ register PAGE *h;
+ EPGNO *parent;
+ RINTERNAL *r;
+ pgno_t pg;
+ indx_t top;
+ recno_t total;
+ int sverrno;
+
+ BT_CLR(t);
+ for (pg = P_ROOT, total = 0;;) {
+ if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL)
+ goto err;
+ if (h->flags & P_RLEAF) {
+ t->bt_cur.page = h;
+ t->bt_cur.index = recno - total;
+ return (&t->bt_cur);
+ }
+ for (index = 0, top = NEXTINDEX(h);;) {
+ r = GETRINTERNAL(h, index);
+ if (++index == top || total + r->nrecs > recno)
+ break;
+ total += r->nrecs;
+ }
+
+ if (__bt_push(t, pg, index - 1) == RET_ERROR)
+ return (NULL);
+
+ pg = r->pgno;
+ switch (op) {
+ case SDELETE:
+ --GETRINTERNAL(h, (index - 1))->nrecs;
+ mpool_put(t->bt_mp, h, MPOOL_DIRTY);
+ break;
+ case SINSERT:
+ ++GETRINTERNAL(h, (index - 1))->nrecs;
+ mpool_put(t->bt_mp, h, MPOOL_DIRTY);
+ break;
+ case SEARCH:
+ mpool_put(t->bt_mp, h, 0);
+ break;
+ }
+
+ }
+ /* Try and recover the tree. */
+err: sverrno = errno;
+ if (op != SEARCH)
+ while ((parent = BT_POP(t)) != NULL) {
+ if ((h = mpool_get(t->bt_mp, parent->pgno, 0)) == NULL)
+ break;
+ if (op == SINSERT)
+ --GETRINTERNAL(h, parent->index)->nrecs;
+ else
+ ++GETRINTERNAL(h, parent->index)->nrecs;
+ mpool_put(t->bt_mp, h, MPOOL_DIRTY);
+ }
+ errno = sverrno;
+ return (NULL);
+}
diff --git a/lib/libc/db/recno/rec_seq.c b/lib/libc/db/recno/rec_seq.c
new file mode 100644
index 00000000000..b91f3a27bf6
--- /dev/null
+++ b/lib/libc/db/recno/rec_seq.c
@@ -0,0 +1,137 @@
+/* $NetBSD: rec_seq.c,v 1.6 1995/02/27 13:25:24 cgd Exp $ */
+
+/*-
+ * Copyright (c) 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef lint
+#if 0
+static char sccsid[] = "@(#)rec_seq.c 8.2 (Berkeley) 9/7/93";
+#else
+static char rcsid[] = "$NetBSD: rec_seq.c,v 1.6 1995/02/27 13:25:24 cgd Exp $";
+#endif
+#endif /* not lint */
+
+#include <sys/types.h>
+
+#include <errno.h>
+#include <limits.h>
+#include <stdio.h>
+#include <string.h>
+
+#include <db.h>
+#include "recno.h"
+
+/*
+ * __REC_SEQ -- Recno sequential scan interface.
+ *
+ * Parameters:
+ * dbp: pointer to access method
+ * key: key for positioning and return value
+ * data: data return value
+ * flags: R_CURSOR, R_FIRST, R_LAST, R_NEXT, R_PREV.
+ *
+ * Returns:
+ * RET_ERROR, RET_SUCCESS or RET_SPECIAL if there's no next key.
+ */
+int
+__rec_seq(dbp, key, data, flags)
+ const DB *dbp;
+ DBT *key, *data;
+ u_int flags;
+{
+ BTREE *t;
+ EPG *e;
+ recno_t nrec;
+ int status;
+
+ t = dbp->internal;
+
+ /* Toss any page pinned across calls. */
+ if (t->bt_pinned != NULL) {
+ mpool_put(t->bt_mp, t->bt_pinned, 0);
+ t->bt_pinned = NULL;
+ }
+
+ switch(flags) {
+ case R_CURSOR:
+ if ((nrec = *(recno_t *)key->data) == 0)
+ goto einval;
+ break;
+ case R_NEXT:
+ if (ISSET(t, B_SEQINIT)) {
+ nrec = t->bt_rcursor + 1;
+ break;
+ }
+ /* FALLTHROUGH */
+ case R_FIRST:
+ nrec = 1;
+ break;
+ case R_PREV:
+ if (ISSET(t, B_SEQINIT)) {
+ if ((nrec = t->bt_rcursor - 1) == 0)
+ return (RET_SPECIAL);
+ break;
+ }
+ /* FALLTHROUGH */
+ case R_LAST:
+ if (!ISSET(t, R_EOF | R_INMEM) &&
+ t->bt_irec(t, MAX_REC_NUMBER) == RET_ERROR)
+ return (RET_ERROR);
+ nrec = t->bt_nrecs;
+ break;
+ default:
+einval: errno = EINVAL;
+ return (RET_ERROR);
+ }
+
+ if (t->bt_nrecs == 0 || nrec > t->bt_nrecs) {
+ if (!ISSET(t, R_EOF | R_INMEM) &&
+ (status = t->bt_irec(t, nrec)) != RET_SUCCESS)
+ return (status);
+ if (t->bt_nrecs == 0 || nrec > t->bt_nrecs)
+ return (RET_SPECIAL);
+ }
+
+ if ((e = __rec_search(t, nrec - 1, SEARCH)) == NULL)
+ return (RET_ERROR);
+
+ SET(t, B_SEQINIT);
+ t->bt_rcursor = nrec;
+
+ status = __rec_ret(t, e, nrec, key, data);
+ if (ISSET(t, B_DB_LOCK))
+ mpool_put(t->bt_mp, e->page, 0);
+ else
+ t->bt_pinned = e->page;
+ return (status);
+}
diff --git a/lib/libc/db/recno/rec_utils.c b/lib/libc/db/recno/rec_utils.c
new file mode 100644
index 00000000000..7aab4ac3a91
--- /dev/null
+++ b/lib/libc/db/recno/rec_utils.c
@@ -0,0 +1,125 @@
+/* $NetBSD: rec_utils.c,v 1.6 1995/02/27 13:25:29 cgd Exp $ */
+
+/*-
+ * Copyright (c) 1990, 1993, 1994
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+#if 0
+static char sccsid[] = "@(#)rec_utils.c 8.4 (Berkeley) 6/20/94";
+#else
+static char rcsid[] = "$NetBSD: rec_utils.c,v 1.6 1995/02/27 13:25:29 cgd Exp $";
+#endif
+#endif /* LIBC_SCCS and not lint */
+
+#include <sys/param.h>
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <db.h>
+#include "recno.h"
+
+/*
+ * __REC_RET -- Build return data as a result of search or scan.
+ *
+ * Parameters:
+ * t: tree
+ * d: LEAF to be returned to the user.
+ * data: user's data structure
+ *
+ * Returns:
+ * RET_SUCCESS, RET_ERROR.
+ */
+int
+__rec_ret(t, e, nrec, key, data)
+ BTREE *t;
+ EPG *e;
+ recno_t nrec;
+ DBT *key, *data;
+{
+ register RLEAF *rl;
+ register void *p;
+
+ if (data == NULL)
+ goto retkey;
+
+ rl = GETRLEAF(e->page, e->index);
+
+ /*
+ * We always copy big data to make it contigous. Otherwise, we
+ * leave the page pinned and don't copy unless the user specified
+ * concurrent access.
+ */
+ if (rl->flags & P_BIGDATA) {
+ if (__ovfl_get(t, rl->bytes,
+ &data->size, &t->bt_dbuf, &t->bt_dbufsz))
+ return (RET_ERROR);
+ data->data = t->bt_dbuf;
+ } else if (ISSET(t, B_DB_LOCK)) {
+ /* Use +1 in case the first record retrieved is 0 length. */
+ if (rl->dsize + 1 > t->bt_dbufsz) {
+ p = (void *)(t->bt_dbuf == NULL ?
+ malloc(rl->dsize + 1) :
+ realloc(t->bt_dbuf, rl->dsize + 1));
+ if (p == NULL)
+ return (RET_ERROR);
+ t->bt_dbuf = p;
+ t->bt_dbufsz = rl->dsize + 1;
+ }
+ memmove(t->bt_dbuf, rl->bytes, rl->dsize);
+ data->size = rl->dsize;
+ data->data = t->bt_dbuf;
+ } else {
+ data->size = rl->dsize;
+ data->data = rl->bytes;
+ }
+
+retkey: if (key == NULL)
+ return (RET_SUCCESS);
+
+ /* We have to copy the key, it's not on the page. */
+ if (sizeof(recno_t) > t->bt_kbufsz) {
+ p = (void *)(t->bt_kbuf == NULL ?
+ malloc(sizeof(recno_t)) :
+ realloc(t->bt_kbuf, sizeof(recno_t)));
+ if (p == NULL)
+ return (RET_ERROR);
+ t->bt_kbuf = p;
+ t->bt_kbufsz = sizeof(recno_t);
+ }
+ memmove(t->bt_kbuf, &nrec, sizeof(recno_t));
+ key->size = sizeof(recno_t);
+ key->data = t->bt_kbuf;
+ return (RET_SUCCESS);
+}
diff --git a/lib/libc/db/recno/recno.h b/lib/libc/db/recno/recno.h
new file mode 100644
index 00000000000..5c1c716ace3
--- /dev/null
+++ b/lib/libc/db/recno/recno.h
@@ -0,0 +1,41 @@
+/* $NetBSD: recno.h,v 1.4 1995/02/27 13:25:35 cgd Exp $ */
+
+/*-
+ * Copyright (c) 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)recno.h 8.1 (Berkeley) 6/4/93
+ */
+
+enum SRCHOP { SDELETE, SINSERT, SEARCH}; /* Rec_search operation. */
+
+#include "../btree/btree.h"
+#include "extern.h"