summaryrefslogtreecommitdiff
path: root/lib/libc/db/mpool
diff options
context:
space:
mode:
authorTheo de Raadt <deraadt@cvs.openbsd.org>1996-05-07 09:02:24 +0000
committerTheo de Raadt <deraadt@cvs.openbsd.org>1996-05-07 09:02:24 +0000
commit30981883e827aa69632bca87e2764ef1ab152fa8 (patch)
treee1095f47308154adb4c7d01f0b14a4f737d2da8e /lib/libc/db/mpool
parent6a0d2fe1f44c1b101f0a8a97b06b5e847918cf81 (diff)
db release 1.85
Diffstat (limited to 'lib/libc/db/mpool')
-rw-r--r--lib/libc/db/mpool/mpool.c431
-rw-r--r--lib/libc/db/mpool/mpool.libtp746
2 files changed, 926 insertions, 251 deletions
diff --git a/lib/libc/db/mpool/mpool.c b/lib/libc/db/mpool/mpool.c
index 6417c29d453..be04f45e56c 100644
--- a/lib/libc/db/mpool/mpool.c
+++ b/lib/libc/db/mpool/mpool.c
@@ -1,7 +1,7 @@
-/* $NetBSD: mpool.c,v 1.5 1995/02/27 13:24:05 cgd Exp $ */
+/* $NetBSD: mpool.c,v 1.6 1996/05/03 21:29:48 cgd Exp $ */
/*-
- * Copyright (c) 1990, 1993
+ * Copyright (c) 1990, 1993, 1994
* The Regents of the University of California. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -35,13 +35,14 @@
#if defined(LIBC_SCCS) && !defined(lint)
#if 0
-static char sccsid[] = "@(#)mpool.c 8.2 (Berkeley) 2/21/94";
+static char sccsid[] = "@(#)mpool.c 8.5 (Berkeley) 7/26/94";
#else
-static char rcsid[] = "$NetBSD: mpool.c,v 1.5 1995/02/27 13:24:05 cgd Exp $";
+static char rcsid[] = "$NetBSD: mpool.c,v 1.6 1996/05/03 21:29:48 cgd Exp $";
#endif
#endif /* LIBC_SCCS and not lint */
#include <sys/param.h>
+#include <sys/queue.h>
#include <sys/stat.h>
#include <errno.h>
@@ -51,31 +52,21 @@ static char rcsid[] = "$NetBSD: mpool.c,v 1.5 1995/02/27 13:24:05 cgd Exp $";
#include <unistd.h>
#include <db.h>
+
#define __MPOOLINTERFACE_PRIVATE
-#include "mpool.h"
+#include <mpool.h>
static BKT *mpool_bkt __P((MPOOL *));
static BKT *mpool_look __P((MPOOL *, pgno_t));
static int mpool_write __P((MPOOL *, BKT *));
-#ifdef DEBUG
-static void __mpoolerr __P((const char *fmt, ...));
-#endif
/*
- * MPOOL_OPEN -- initialize a memory pool.
- *
- * Parameters:
- * key: Shared buffer key.
- * fd: File descriptor.
- * pagesize: File page size.
- * maxcache: Max number of cached pages.
- *
- * Returns:
- * MPOOL pointer, NULL on error.
+ * mpool_open --
+ * Initialize a memory pool.
*/
MPOOL *
mpool_open(key, fd, pagesize, maxcache)
- DBT *key;
+ void *key;
int fd;
pgno_t pagesize, maxcache;
{
@@ -83,49 +74,35 @@ mpool_open(key, fd, pagesize, maxcache)
MPOOL *mp;
int entry;
+ /*
+ * Get information about the file.
+ *
+ * XXX
+ * We don't currently handle pipes, although we should.
+ */
if (fstat(fd, &sb))
return (NULL);
- /* XXX
- * We should only set st_size to 0 for pipes -- 4.4BSD has the fix so
- * that stat(2) returns true for ISSOCK on pipes. Until then, this is
- * fairly close.
- */
if (!S_ISREG(sb.st_mode)) {
errno = ESPIPE;
return (NULL);
}
- if ((mp = (MPOOL *)malloc(sizeof(MPOOL))) == NULL)
+ /* Allocate and initialize the MPOOL cookie. */
+ if ((mp = (MPOOL *)calloc(1, sizeof(MPOOL))) == NULL)
return (NULL);
- mp->free.cnext = mp->free.cprev = (BKT *)&mp->free;
- mp->lru.cnext = mp->lru.cprev = (BKT *)&mp->lru;
+ CIRCLEQ_INIT(&mp->lqh);
for (entry = 0; entry < HASHSIZE; ++entry)
- mp->hashtable[entry].hnext = mp->hashtable[entry].hprev =
- mp->hashtable[entry].cnext = mp->hashtable[entry].cprev =
- (BKT *)&mp->hashtable[entry];
- mp->curcache = 0;
+ CIRCLEQ_INIT(&mp->hqh[entry]);
mp->maxcache = maxcache;
- mp->pagesize = pagesize;
mp->npages = sb.st_size / pagesize;
+ mp->pagesize = pagesize;
mp->fd = fd;
- mp->pgcookie = NULL;
- mp->pgin = mp->pgout = NULL;
-
-#ifdef STATISTICS
- mp->cachehit = mp->cachemiss = mp->pagealloc = mp->pageflush =
- mp->pageget = mp->pagenew = mp->pageput = mp->pageread =
- mp->pagewrite = 0;
-#endif
return (mp);
}
/*
- * MPOOL_FILTER -- initialize input/output filters.
- *
- * Parameters:
- * pgin: Page in conversion routine.
- * pgout: Page out conversion routine.
- * pgcookie: Cookie for page in/out routines.
+ * mpool_filter --
+ * Initialize input/output filters.
*/
void
mpool_filter(mp, pgin, pgout, pgcookie)
@@ -140,124 +117,128 @@ mpool_filter(mp, pgin, pgout, pgcookie)
}
/*
- * MPOOL_NEW -- get a new page
- *
- * Parameters:
- * mp: mpool cookie
- * pgnoadddr: place to store new page number
- * Returns:
- * RET_ERROR, RET_SUCCESS
+ * mpool_new --
+ * Get a new page of memory.
*/
void *
mpool_new(mp, pgnoaddr)
MPOOL *mp;
pgno_t *pgnoaddr;
{
- BKT *b;
- BKTHDR *hp;
+ struct _hqh *head;
+ BKT *bp;
+ if (mp->npages == MAX_PAGE_NUMBER) {
+ (void)fprintf(stderr, "mpool_new: page allocation overflow.\n");
+ abort();
+ }
#ifdef STATISTICS
++mp->pagenew;
#endif
/*
- * Get a BKT from the cache. Assign a new page number, attach it to
- * the hash and lru chains and return.
+ * Get a BKT from the cache. Assign a new page number, attach
+ * it to the head of the hash chain, the tail of the lru chain,
+ * and return.
*/
- if ((b = mpool_bkt(mp)) == NULL)
+ if ((bp = mpool_bkt(mp)) == NULL)
return (NULL);
- *pgnoaddr = b->pgno = mp->npages++;
- b->flags = MPOOL_PINNED;
- inshash(b, b->pgno);
- inschain(b, &mp->lru);
- return (b->page);
+ *pgnoaddr = bp->pgno = mp->npages++;
+ bp->flags = MPOOL_PINNED;
+
+ head = &mp->hqh[HASHKEY(bp->pgno)];
+ CIRCLEQ_INSERT_HEAD(head, bp, hq);
+ CIRCLEQ_INSERT_TAIL(&mp->lqh, bp, q);
+ return (bp->page);
}
/*
- * MPOOL_GET -- get a page from the pool
- *
- * Parameters:
- * mp: mpool cookie
- * pgno: page number
- * flags: not used
- *
- * Returns:
- * RET_ERROR, RET_SUCCESS
+ * mpool_get
+ * Get a page.
*/
void *
mpool_get(mp, pgno, flags)
MPOOL *mp;
pgno_t pgno;
- u_int flags; /* XXX not used? */
+ u_int flags; /* XXX not used? */
{
- BKT *b;
- BKTHDR *hp;
+ struct _hqh *head;
+ BKT *bp;
off_t off;
int nr;
- /*
- * If asking for a specific page that is already in the cache, find
- * it and return it.
- */
- if (b = mpool_look(mp, pgno)) {
+ /* Check for attempt to retrieve a non-existent page. */
+ if (pgno >= mp->npages) {
+ errno = EINVAL;
+ return (NULL);
+ }
+
#ifdef STATISTICS
- ++mp->pageget;
+ ++mp->pageget;
#endif
+
+ /* Check for a page that is cached. */
+ if ((bp = mpool_look(mp, pgno)) != NULL) {
#ifdef DEBUG
- if (b->flags & MPOOL_PINNED)
- __mpoolerr("mpool_get: page %d already pinned",
- b->pgno);
+ if (bp->flags & MPOOL_PINNED) {
+ (void)fprintf(stderr,
+ "mpool_get: page %d already pinned\n", bp->pgno);
+ abort();
+ }
#endif
- rmchain(b);
- inschain(b, &mp->lru);
- b->flags |= MPOOL_PINNED;
- return (b->page);
- }
-
- /* Not allowed to retrieve a non-existent page. */
- if (pgno >= mp->npages) {
- errno = EINVAL;
- return (NULL);
+ /*
+ * Move the page to the head of the hash chain and the tail
+ * of the lru chain.
+ */
+ head = &mp->hqh[HASHKEY(bp->pgno)];
+ CIRCLEQ_REMOVE(head, bp, hq);
+ CIRCLEQ_INSERT_HEAD(head, bp, hq);
+ CIRCLEQ_REMOVE(&mp->lqh, bp, q);
+ CIRCLEQ_INSERT_TAIL(&mp->lqh, bp, q);
+
+ /* Return a pinned page. */
+ bp->flags |= MPOOL_PINNED;
+ return (bp->page);
}
/* Get a page from the cache. */
- if ((b = mpool_bkt(mp)) == NULL)
+ if ((bp = mpool_bkt(mp)) == NULL)
return (NULL);
- b->pgno = pgno;
- b->flags = MPOOL_PINNED;
+ /* Read in the contents. */
#ifdef STATISTICS
++mp->pageread;
#endif
- /* Read in the contents. */
off = mp->pagesize * pgno;
if (lseek(mp->fd, off, SEEK_SET) != off)
return (NULL);
- if ((nr = read(mp->fd, b->page, mp->pagesize)) != mp->pagesize) {
+ if ((nr = read(mp->fd, bp->page, mp->pagesize)) != mp->pagesize) {
if (nr >= 0)
errno = EFTYPE;
return (NULL);
}
- if (mp->pgin)
- (mp->pgin)(mp->pgcookie, b->pgno, b->page);
- inshash(b, b->pgno);
- inschain(b, &mp->lru);
-#ifdef STATISTICS
- ++mp->pageget;
-#endif
- return (b->page);
+ /* Set the page number, pin the page. */
+ bp->pgno = pgno;
+ bp->flags = MPOOL_PINNED;
+
+ /*
+ * Add the page to the head of the hash chain and the tail
+ * of the lru chain.
+ */
+ head = &mp->hqh[HASHKEY(bp->pgno)];
+ CIRCLEQ_INSERT_HEAD(head, bp, hq);
+ CIRCLEQ_INSERT_TAIL(&mp->lqh, bp, q);
+
+ /* Run through the user's filter. */
+ if (mp->pgin != NULL)
+ (mp->pgin)(mp->pgcookie, bp->pgno, bp->page);
+
+ return (bp->page);
}
/*
- * MPOOL_PUT -- return a page to the pool
- *
- * Parameters:
- * mp: mpool cookie
- * page: page pointer
- * pgno: page number
- *
- * Returns:
- * RET_ERROR, RET_SUCCESS
+ * mpool_put
+ * Return a page.
*/
int
mpool_put(mp, page, flags)
@@ -265,193 +246,172 @@ mpool_put(mp, page, flags)
void *page;
u_int flags;
{
- BKT *baddr;
-#ifdef DEBUG
- BKT *b;
-#endif
+ BKT *bp;
#ifdef STATISTICS
++mp->pageput;
#endif
- baddr = (BKT *)((char *)page - sizeof(BKT));
+ bp = (BKT *)((char *)page - sizeof(BKT));
#ifdef DEBUG
- if (!(baddr->flags & MPOOL_PINNED))
- __mpoolerr("mpool_put: page %d not pinned", b->pgno);
- for (b = mp->lru.cnext; b != (BKT *)&mp->lru; b = b->cnext) {
- if (b == (BKT *)&mp->lru)
- __mpoolerr("mpool_put: %0x: bad address", baddr);
- if (b == baddr)
- break;
+ if (!(bp->flags & MPOOL_PINNED)) {
+ (void)fprintf(stderr,
+ "mpool_put: page %d not pinned\n", bp->pgno);
+ abort();
}
#endif
- baddr->flags &= ~MPOOL_PINNED;
- baddr->flags |= flags & MPOOL_DIRTY;
+ bp->flags &= ~MPOOL_PINNED;
+ bp->flags |= flags & MPOOL_DIRTY;
return (RET_SUCCESS);
}
/*
- * MPOOL_CLOSE -- close the buffer pool
- *
- * Parameters:
- * mp: mpool cookie
- *
- * Returns:
- * RET_ERROR, RET_SUCCESS
+ * mpool_close
+ * Close the buffer pool.
*/
int
mpool_close(mp)
MPOOL *mp;
{
- BKT *b, *next;
+ BKT *bp;
/* Free up any space allocated to the lru pages. */
- for (b = mp->lru.cprev; b != (BKT *)&mp->lru; b = next) {
- next = b->cprev;
- free(b);
+ while ((bp = mp->lqh.cqh_first) != (void *)&mp->lqh) {
+ CIRCLEQ_REMOVE(&mp->lqh, mp->lqh.cqh_first, q);
+ free(bp);
}
+
+ /* Free the MPOOL cookie. */
free(mp);
return (RET_SUCCESS);
}
/*
- * MPOOL_SYNC -- sync the file to disk.
- *
- * Parameters:
- * mp: mpool cookie
- *
- * Returns:
- * RET_ERROR, RET_SUCCESS
+ * mpool_sync
+ * Sync the pool to disk.
*/
int
mpool_sync(mp)
MPOOL *mp;
{
- BKT *b;
+ BKT *bp;
- for (b = mp->lru.cprev; b != (BKT *)&mp->lru; b = b->cprev)
- if (b->flags & MPOOL_DIRTY && mpool_write(mp, b) == RET_ERROR)
+ /* Walk the lru chain, flushing any dirty pages to disk. */
+ for (bp = mp->lqh.cqh_first;
+ bp != (void *)&mp->lqh; bp = bp->q.cqe_next)
+ if (bp->flags & MPOOL_DIRTY &&
+ mpool_write(mp, bp) == RET_ERROR)
return (RET_ERROR);
+
+ /* Sync the file descriptor. */
return (fsync(mp->fd) ? RET_ERROR : RET_SUCCESS);
}
/*
- * MPOOL_BKT -- get/create a BKT from the cache
- *
- * Parameters:
- * mp: mpool cookie
- *
- * Returns:
- * NULL on failure and a pointer to the BKT on success
+ * mpool_bkt
+ * Get a page from the cache (or create one).
*/
static BKT *
mpool_bkt(mp)
MPOOL *mp;
{
- BKT *b;
+ struct _hqh *head;
+ BKT *bp;
+ /* If under the max cached, always create a new page. */
if (mp->curcache < mp->maxcache)
goto new;
/*
- * If the cache is maxxed out, search the lru list for a buffer we
- * can flush. If we find one, write it if necessary and take it off
- * any lists. If we don't find anything we grow the cache anyway.
+ * If the cache is max'd out, walk the lru list for a buffer we
+ * can flush. If we find one, write it (if necessary) and take it
+ * off any lists. If we don't find anything we grow the cache anyway.
* The cache never shrinks.
*/
- for (b = mp->lru.cprev; b != (BKT *)&mp->lru; b = b->cprev)
- if (!(b->flags & MPOOL_PINNED)) {
- if (b->flags & MPOOL_DIRTY &&
- mpool_write(mp, b) == RET_ERROR)
+ for (bp = mp->lqh.cqh_first;
+ bp != (void *)&mp->lqh; bp = bp->q.cqe_next)
+ if (!(bp->flags & MPOOL_PINNED)) {
+ /* Flush if dirty. */
+ if (bp->flags & MPOOL_DIRTY &&
+ mpool_write(mp, bp) == RET_ERROR)
return (NULL);
- rmhash(b);
- rmchain(b);
#ifdef STATISTICS
++mp->pageflush;
#endif
+ /* Remove from the hash and lru queues. */
+ head = &mp->hqh[HASHKEY(bp->pgno)];
+ CIRCLEQ_REMOVE(head, bp, hq);
+ CIRCLEQ_REMOVE(&mp->lqh, bp, q);
#ifdef DEBUG
- {
- void *spage;
- spage = b->page;
- memset(b, 0xff, sizeof(BKT) + mp->pagesize);
- b->page = spage;
+ { void *spage;
+ spage = bp->page;
+ memset(bp, 0xff, sizeof(BKT) + mp->pagesize);
+ bp->page = spage;
}
#endif
- return (b);
+ return (bp);
}
-new: if ((b = (BKT *)malloc(sizeof(BKT) + mp->pagesize)) == NULL)
+new: if ((bp = (BKT *)malloc(sizeof(BKT) + mp->pagesize)) == NULL)
return (NULL);
#ifdef STATISTICS
++mp->pagealloc;
#endif
-#ifdef DEBUG
- memset(b, 0xff, sizeof(BKT) + mp->pagesize);
+#if defined(DEBUG) || defined(PURIFY)
+ memset(bp, 0xff, sizeof(BKT) + mp->pagesize);
#endif
- b->page = (char *)b + sizeof(BKT);
+ bp->page = (char *)bp + sizeof(BKT);
++mp->curcache;
- return (b);
+ return (bp);
}
/*
- * MPOOL_WRITE -- sync a page to disk
- *
- * Parameters:
- * mp: mpool cookie
- *
- * Returns:
- * RET_ERROR, RET_SUCCESS
+ * mpool_write
+ * Write a page to disk.
*/
static int
-mpool_write(mp, b)
+mpool_write(mp, bp)
MPOOL *mp;
- BKT *b;
+ BKT *bp;
{
off_t off;
- if (mp->pgout)
- (mp->pgout)(mp->pgcookie, b->pgno, b->page);
-
#ifdef STATISTICS
++mp->pagewrite;
#endif
- off = mp->pagesize * b->pgno;
+
+ /* Run through the user's filter. */
+ if (mp->pgout)
+ (mp->pgout)(mp->pgcookie, bp->pgno, bp->page);
+
+ off = mp->pagesize * bp->pgno;
if (lseek(mp->fd, off, SEEK_SET) != off)
return (RET_ERROR);
- if (write(mp->fd, b->page, mp->pagesize) != mp->pagesize)
+ if (write(mp->fd, bp->page, mp->pagesize) != mp->pagesize)
return (RET_ERROR);
- b->flags &= ~MPOOL_DIRTY;
+
+ bp->flags &= ~MPOOL_DIRTY;
return (RET_SUCCESS);
}
/*
- * MPOOL_LOOK -- lookup a page
- *
- * Parameters:
- * mp: mpool cookie
- * pgno: page number
- *
- * Returns:
- * NULL on failure and a pointer to the BKT on success
+ * mpool_look
+ * Lookup a page in the cache.
*/
static BKT *
mpool_look(mp, pgno)
MPOOL *mp;
pgno_t pgno;
{
- register BKT *b;
- register BKTHDR *tb;
+ struct _hqh *head;
+ BKT *bp;
- /* XXX
- * If find the buffer, put it first on the hash chain so can
- * find it again quickly.
- */
- tb = &mp->hashtable[HASHKEY(pgno)];
- for (b = tb->hnext; b != (BKT *)tb; b = b->hnext)
- if (b->pgno == pgno) {
+ head = &mp->hqh[HASHKEY(pgno)];
+ for (bp = head->cqh_first; bp != (void *)head; bp = bp->hq.cqe_next)
+ if (bp->pgno == pgno) {
#ifdef STATISTICS
++mp->cachehit;
#endif
- return (b);
+ return (bp);
}
#ifdef STATISTICS
++mp->cachemiss;
@@ -461,16 +421,14 @@ mpool_look(mp, pgno)
#ifdef STATISTICS
/*
- * MPOOL_STAT -- cache statistics
- *
- * Parameters:
- * mp: mpool cookie
+ * mpool_stat
+ * Print out cache statistics.
*/
void
mpool_stat(mp)
MPOOL *mp;
{
- BKT *b;
+ BKT *bp;
int cnt;
char *sep;
@@ -492,11 +450,12 @@ mpool_stat(mp)
sep = "";
cnt = 0;
- for (b = mp->lru.cnext; b != (BKT *)&mp->lru; b = b->cnext) {
- (void)fprintf(stderr, "%s%d", sep, b->pgno);
- if (b->flags & MPOOL_DIRTY)
+ for (bp = mp->lqh.cqh_first;
+ bp != (void *)&mp->lqh; bp = bp->q.cqe_next) {
+ (void)fprintf(stderr, "%s%d", sep, bp->pgno);
+ if (bp->flags & MPOOL_DIRTY)
(void)fprintf(stderr, "d");
- if (b->flags & MPOOL_PINNED)
+ if (bp->flags & MPOOL_PINNED)
(void)fprintf(stderr, "P");
if (++cnt == 10) {
sep = "\n";
@@ -508,33 +467,3 @@ mpool_stat(mp)
(void)fprintf(stderr, "\n");
}
#endif
-
-#ifdef DEBUG
-#if __STDC__
-#include <stdarg.h>
-#else
-#include <varargs.h>
-#endif
-
-static void
-#if __STDC__
-__mpoolerr(const char *fmt, ...)
-#else
-__mpoolerr(fmt, va_alist)
- char *fmt;
- va_dcl
-#endif
-{
- va_list ap;
-#if __STDC__
- va_start(ap, fmt);
-#else
- va_start(ap);
-#endif
- (void)vfprintf(stderr, fmt, ap);
- va_end(ap);
- (void)fprintf(stderr, "\n");
- abort();
- /* NOTREACHED */
-}
-#endif
diff --git a/lib/libc/db/mpool/mpool.libtp b/lib/libc/db/mpool/mpool.libtp
new file mode 100644
index 00000000000..3ab0c8f835c
--- /dev/null
+++ b/lib/libc/db/mpool/mpool.libtp
@@ -0,0 +1,746 @@
+/******************************************************************************
+
+VERSION $Id: mpool.libtp,v 1.1 1996/05/07 09:02:01 deraadt Exp $
+PACKAGE: User Level Shared Memory Manager
+
+DESCRIPTION:
+ This package provides a buffer pool interface implemented as
+ a collection of file pages mapped into shared memory.
+
+ Based on Mark's buffer manager
+
+ROUTINES:
+ External
+ buf_alloc
+ buf_flags
+ buf_get
+ buf_init
+ buf_last
+ buf_open
+ buf_pin
+ buf_sync
+ buf_unpin
+ Internal
+ bf_assign_buf
+ bf_fid_to_fd
+ bf_newbuf
+ bf_put_page
+
+
+******************************************************************************/
+#include <sys/types.h>
+#include <assert.h>
+#include <sys/file.h>
+#include <sys/stat.h>
+#include <stdio.h>
+#include <errno.h>
+#include "list.h"
+#include "user.h"
+#include "txn_sys.h"
+#include "buf.h"
+#include "semkeys.h"
+#include "error.h"
+
+/*
+ we need to translate between some type of file id that the user
+ process passes and a file descriptor. For now, it's a nop.
+*/
+#define GET_MASTER get_sem ( buf_spinlock )
+#define RELEASE_MASTER release_sem ( buf_spinlock )
+
+#define LRUID *buf_lru
+#define LRUP (bufhdr_table+*buf_lru)
+#define MRU bufhdr_table[*buf_lru].lru.prev
+
+/* Global indicator that you have started reusing buffers */
+int do_statistics = 0;
+/*
+ Process Statics (pointers into shared memory)
+*/
+static BUF_T *buf_table = 0;
+static BUFHDR_T *bufhdr_table;
+static int *buf_hash_table;
+static int *buf_lru; /* LRU is the free list */
+static int buf_spinlock;
+static FINFO_T *buf_fids;
+static int *buf_sp; /* Pointer to string free space */
+static char *buf_strings;
+
+/* Process Local FID->FD table */
+static int fds[NUM_FILE_ENTRIES];
+
+/* Static routines */
+static BUFHDR_T *bf_assign_buf();
+static int bf_fid_to_fd();
+static BUFHDR_T *bf_newbuf();
+static int bf_put_page();
+
+/*
+ Return 0 on success
+ 1 on failure
+*/
+extern int
+buf_init ( )
+{
+ ADDR_T buf_region;
+ BUFHDR_T *bhp;
+ int i;
+ int ref_count;
+ int *spinlockp;
+
+ /*
+ Initialize Process local structures
+ */
+ for ( i = 0; i < NUM_FILE_ENTRIES; i++ ) {
+ fds[i] = -1;
+ }
+
+ buf_region = attach_region ( BUF_REGION_NAME, BUF_REGION_NUM,
+ BUF_REGION_SIZE, &ref_count );
+ if ( !buf_region ) {
+ return (1);
+ }
+ error_log3 ( "Buf Region: ADDR: %d ID: %d SIZE: %d\n", buf_region,
+ BUF_REGION_NUM, BUF_REGION_SIZE );
+
+ buf_table = (BUF_T *)buf_region;
+ bufhdr_table = (BUFHDR_T *)(buf_table + NUM_BUFS);
+ buf_hash_table = (int *)(bufhdr_table + NUM_BUFS);
+ buf_lru = buf_hash_table + NUMTABLE_ENTRIES;
+ spinlockp = buf_lru + 1;
+ buf_fids = (FINFO_T *)(spinlockp+1);
+ buf_sp = (int *)(buf_fids + NUM_FILE_ENTRIES);
+ buf_strings = (char *)(buf_sp + 1);
+
+ /* Create locking spinlock (gets creating holding the lock) */
+ buf_spinlock = create_sem ( BUF_SPIN_NAME, BUF_SPIN_NUM, ref_count <= 1 );
+ if ( buf_spinlock < 0 ) {
+ return(1);
+ }
+ if ( ref_count <= 1 ) {
+ *spinlockp = buf_spinlock;
+
+ /* Now initialize the buffer manager */
+
+ /* 1. Free list */
+ *buf_lru = 0;
+
+ /* 2. Buffer headers */
+ for ( i = 0, bhp = bufhdr_table; i < NUM_BUFS; bhp++, i++ ) {
+ bhp->lru.next = i+1;
+ bhp->lru.prev = i-1;
+ bhp->flags = 0; /* All Flags off */
+ bhp->refcount = 0;
+ bhp->wait_proc = -1; /* No sleepers */
+ LISTPE_INIT ( hash, bhp, i ); /* Hash chains */
+ }
+ bufhdr_table[0].lru.prev = NUM_BUFS-1;
+ bufhdr_table[NUM_BUFS-1].lru.next = 0;
+
+ /* 3. Hash Table */
+ for ( i = 0; i < NUMTABLE_ENTRIES; i++ ) {
+ buf_hash_table[i] = NUM_BUFS;
+ }
+
+ /* 4. File ID Table */
+ for ( i = 0; i < NUM_FILE_ENTRIES; i++ ) {
+ buf_fids[i].offset = -1;
+ buf_fids[i].npages = -1;
+ buf_fids[i].refcount = 0;
+ }
+
+ /* 5. Free String Pointer */
+ *buf_sp = (FILE_NAME_LEN*NUM_FILE_ENTRIES);
+ if (RELEASE_MASTER) {
+ return(1);
+ }
+ error_log0 ( "Initialized buffer region\n" );
+ }
+ return (0);
+}
+
+extern void
+buf_exit()
+{
+ int ref;
+ int i;
+
+ /* Flush Buffer Pool on Exit */
+ for ( i = 0; i < NUM_FILE_ENTRIES; i++ ) {
+ if ( fds[i] != -1 ) {
+ close ( fds[i] );
+ }
+ }
+ if ( buf_table ) {
+ detach_region ( buf_table, BUF_REGION_NUM, BUF_REGION_SIZE, &ref );
+ }
+ return;
+}
+
+/*
+ We need an empty buffer. Find the LRU unpinned NON-Dirty page.
+*/
+static BUFHDR_T *
+bf_newbuf()
+{
+ int fd;
+ int lruid;
+ int nbytes;
+ int ndx;
+ BUFHDR_T *bhp;
+
+ lruid = LRUID;
+ for ( bhp = LRUP;
+ bhp->flags & (BUF_PINNED|BUF_IO_IN_PROGRESS);
+ bhp = LISTP_NEXTP (bufhdr_table, lru, bhp ) ) {
+
+ if ( bhp->lru.next == lruid ) {
+ /* OUT OF BUFFERS */
+ error_log1 ( "All buffers are pinned. %s\n",
+ "Unable to grant buffer request" );
+ return(NULL);
+ }
+ }
+ /* BHP can be used */
+ if ( bhp->flags & BUF_DIRTY ) {
+ do_statistics = 1;
+ /*
+ MIS Check for log flushed appropriately
+ */
+ fd = bf_fid_to_fd(bhp->id.file_id);
+ if ( fd == -1 ) {
+ error_log1 ("Invalid fid %d\n", bhp->id.file_id);
+ return(NULL);
+ }
+ if ( bf_put_page(fd, bhp) < 0 ) {
+ return(NULL);
+ }
+ }
+ /* Update Hash Pointers */
+ ndx = BUF_HASH ( bhp->id.file_id, bhp->id.obj_id );
+ LISTP_REMOVE(bufhdr_table, hash, bhp);
+ if ( buf_hash_table[ndx] == (bhp-bufhdr_table) ) {
+ if ( bhp->hash.next != (bhp-bufhdr_table) ) {
+ buf_hash_table[ndx] = bhp->hash.next;
+ } else {
+ buf_hash_table[ndx] = NUM_BUFS;
+ }
+ }
+ INIT_BUF(bhp);
+
+ return(bhp);
+}
+/*
+ buf_alloc
+
+ Add a page to a file and return a buffer for it.
+
+*/
+ADDR_T
+buf_alloc ( fid, new_pageno )
+int fid;
+int *new_pageno;
+{
+ BUFHDR_T *bhp;
+ int fd;
+ int len;
+ int ndx;
+ OBJ_T fobj;
+
+ if (GET_MASTER) {
+ return(NULL);
+ }
+ if ( buf_fids[fid].npages == -1 ) {
+ /* initialize npages field */
+ fd = bf_fid_to_fd ( fid );
+ }
+ assert (fid < NUM_FILE_ENTRIES);
+
+ *new_pageno = buf_fids[fid].npages;
+ if ( *new_pageno == -1 ) {
+ RELEASE_MASTER;
+ return ( NULL );
+ }
+ buf_fids[fid].npages++;
+ ndx = BUF_HASH ( fid, *new_pageno );
+ fobj.file_id = fid;
+ fobj.obj_id = *new_pageno;
+ bhp = bf_assign_buf ( ndx, &fobj, BF_PIN|BF_DIRTY|BF_EMPTY, &len );
+ if ( RELEASE_MASTER ) {
+ /* Memory leak */
+ return(NULL);
+ }
+ if ( bhp ) {
+ return ((ADDR_T)(buf_table+(bhp-bufhdr_table)));
+ } else {
+ return ( NULL );
+ }
+}
+
+
+/*
+ Buffer Flags
+ BF_DIRTY Mark page as dirty
+ BF_EMPTY Don't initialize page, just get buffer
+ BF_PIN Retrieve with pin
+
+MIS
+Might want to add a flag that sets an LSN for this buffer is the
+DIRTY flag is set
+
+Eventually, you may want a flag that indicates the I/O and lock
+request should be shipped off together, but not for now.
+*/
+extern ADDR_T
+buf_get ( file_id, page_id, flags, len )
+int file_id;
+int page_id;
+u_long flags;
+int *len; /* Number of bytes read into buffer */
+{
+ BUFHDR_T *bhp;
+ int bufid;
+ int fd;
+ int ndx;
+ int next_bufid;
+ int stat;
+ OBJ_T fobj;
+
+ ndx = BUF_HASH ( file_id, page_id );
+ fobj.file_id = (long) file_id;
+ fobj.obj_id = (long) page_id;
+ if ( GET_MASTER ) {
+ return(NULL);
+ }
+ /*
+ This could be a for loop, but we lose speed
+ by making all the cases general purpose so we
+ optimize for the no-collision case.
+ */
+ bufid = buf_hash_table[ndx];
+ if ( bufid < NUM_BUFS ) {
+ for ( bhp = bufhdr_table+bufid;
+ !OBJ_EQ (bhp->id, fobj) || !(bhp->flags & BUF_VALID);
+ bhp = LISTP_NEXTP ( bufhdr_table, hash, bhp ) ) {
+
+ if ( bhp->hash.next == bufid ) {
+ goto not_found;
+ }
+ }
+/* found */
+ if ( flags & BF_PIN ) {
+ bhp->flags |= BUF_PINNED;
+ bhp->refcount++;
+#ifdef PIN_DEBUG
+ fprintf(stderr, "buf_get: %X PINNED (%d)\n",
+ buf_table + (bhp-bufhdr_table), bhp->refcount);
+#endif
+ }
+ if ( flags & BF_DIRTY ) {
+ bhp->flags |= BUF_DIRTY;
+ }
+
+ while ( bhp->flags & BUF_IO_IN_PROGRESS ) {
+ /* MIS -- eventually err check here */
+#ifdef DEBUG
+ printf("About to sleep on %d (me: %d\n)\n", bhp->wait_proc,
+ my_txnp - txn_table);
+#endif
+#ifdef WAIT_STATS
+ buf_waits++;
+#endif
+ stat = proc_sleep_on ( &(bhp->wait_proc), buf_spinlock );
+ if ( stat ) {
+ /* Memory leak */
+ return(NULL);
+ }
+ if (!( bhp->flags & BUF_IO_IN_PROGRESS) &&
+ (!OBJ_EQ (bhp->id, fobj) || !(bhp->flags & BUF_VALID))) {
+ if (RELEASE_MASTER)
+ return(NULL);
+ return(buf_get ( file_id, page_id, flags, len ));
+ }
+ }
+ MAKE_MRU( bhp );
+ *len = BUFSIZE;
+ } else {
+not_found:
+ /* If you get here, the page isn't in the hash table */
+ bhp = bf_assign_buf ( ndx, &fobj, flags, len );
+ }
+ /* Common code between found and not found */
+
+ if ( bhp && bhp->flags & BUF_NEWPAGE ) {
+ *len = 0;
+ }
+ if (RELEASE_MASTER){
+ /* Memory leak */
+ return(NULL);
+ }
+ if ( bhp ) {
+ return ((ADDR_T)(buf_table+(bhp-bufhdr_table)));
+ } else {
+ return ( NULL );
+ }
+}
+
+/*
+ MIS - do I want to add file links to buffer pool?
+*/
+extern int
+buf_sync ( fid, close )
+int fid;
+int close; /* should we dec refcount and possibly
+ invalidate all the buffers */
+{
+ int i;
+ int fd;
+ int invalidate;
+ BUFHDR_T *bhp;
+
+ if ( (fd = bf_fid_to_fd ( fid )) < 0 ) {
+ return(1);
+ }
+ if (GET_MASTER) {
+ return(1);
+ }
+ invalidate = (buf_fids[fid].refcount == 1 && close);
+ if ( invalidate )
+ for ( bhp = bufhdr_table, i = 0; i < NUM_BUFS; bhp++, i++ ) {
+ if (bhp->id.file_id == fid) {
+ if ((bhp->flags & BF_DIRTY) && (bf_put_page( fd, bhp ) < 0)) {
+ return(1);
+ }
+ bhp->id.file_id = -1;
+ }
+ }
+ if (invalidate || close)
+ buf_fids[fid].refcount--;
+
+ if (RELEASE_MASTER) {
+ return(1);
+ }
+ return(0);
+
+
+}
+
+extern int
+buf_flags ( addr, set_flags, unset_flags )
+ADDR_T addr;
+u_long set_flags;
+u_long unset_flags;
+{
+ int bufid;
+ BUFHDR_T *bhp;
+
+#ifdef PIN_DEBUG
+ fprintf(stderr, "buf_flags: %X setting %s%s%s%s%s releasing %s%s%s%s%s\n",
+ addr,
+ set_flags&BUF_DIRTY ? "DIRTY " : "",
+ set_flags&BUF_VALID ? "VALID " : "",
+ set_flags&BUF_PINNED ? "PINNED " : "",
+ set_flags&BUF_IO_ERROR ? "IO_ERROR " : "",
+ set_flags&BUF_IO_IN_PROGRESS ? "IO_IN_PROG " : "",
+ set_flags&BUF_NEWPAGE ? "NEWPAGE " : "",
+ unset_flags&BUF_DIRTY ? "DIRTY " : "",
+ unset_flags&BUF_VALID ? "VALID " : "",
+ unset_flags&BUF_PINNED ? "PINNED " : "",
+ unset_flags&BUF_IO_ERROR ? "IO_ERROR " : "",
+ unset_flags&BUF_IO_IN_PROGRESS ? "IO_IN_PROG " : "",
+ unset_flags&BUF_NEWPAGE ? "NEWPAGE " : "" );
+#endif
+ if (!ADDR_OK(addr)) {
+ error_log1 ( "buf_pin: Invalid Buffer Address %x\n", addr );
+ return(1);
+ }
+ bufid = ((BUF_T *)addr) - buf_table;
+ assert ( bufid < NUM_BUFS);
+ bhp = &bufhdr_table[bufid];
+ if (GET_MASTER) {
+ return(1);
+ }
+ bhp->flags |= set_flags;
+ if ( set_flags & BUF_PINNED ) {
+ bhp->refcount++;
+ }
+ if ( set_flags & BUF_DIRTY ) {
+ unset_flags |= BUF_NEWPAGE;
+ }
+
+ if ( unset_flags & BUF_PINNED ) {
+ bhp->refcount--;
+ if ( bhp->refcount ) {
+ /* Turn off pin bit so it doesn't get unset */
+ unset_flags &= ~BUF_PINNED;
+ }
+ }
+ bhp->flags &= ~unset_flags;
+ MAKE_MRU(bhp);
+ if (RELEASE_MASTER) {
+ return(1);
+ }
+ return(0);
+}
+
+/*
+ Take a string name and produce an fid.
+
+ returns -1 on error
+
+ MIS -- this is a potential problem -- you keep actual names
+ here -- what if people run from different directories?
+*/
+extern int
+buf_name_lookup ( fname )
+char *fname;
+{
+ int i;
+ int fid;
+ int ndx;
+
+ fid = -1;
+ if (GET_MASTER) {
+ return(-1);
+ }
+ for ( i = 0; i < NUM_FILE_ENTRIES; i++ ) {
+ if ( buf_fids[i].offset == -1 ) {
+ fid = i;
+ } else {
+ if (!strcmp (fname, buf_strings+buf_fids[i].offset)) {
+ if (RELEASE_MASTER) {
+ return(-1);
+ }
+ buf_fids[i].refcount++;
+ return(i);
+ }
+ }
+ }
+ if ( fid == -1 ) {
+ error_log0 ( "No more file ID's\n" );
+ } else {
+ ndx = *buf_sp - strlen(fname) - 1;
+ if ( ndx < 0 ) {
+ error_log0 ( "Out of string space\n" );
+ fid = -1;
+ } else {
+ *buf_sp = ndx;
+ strcpy ( buf_strings+ndx, fname );
+ buf_fids[fid].offset = ndx;
+ }
+ buf_fids[fid].refcount = 1;
+ }
+ if (RELEASE_MASTER) {
+ return(-1);
+ }
+ return(fid);
+}
+
+static int
+bf_fid_to_fd ( fid )
+int fid;
+{
+ struct stat sbuf;
+
+ assert ( (fid < NUM_FILE_ENTRIES) && (buf_fids[fid].offset != -1) );
+ if ( fds[fid] != -1 ) {
+ return(fds[fid]);
+
+ }
+ fds[fid] = open ( buf_strings+buf_fids[fid].offset, O_RDWR|O_CREAT,
+ 0666 );
+ if ( fds[fid] < 0 ) {
+ error_log3 ( "Error Opening File %s FID: %d FD: %d. Errno = %d\n",
+ buf_strings+buf_fids[fid].offset, fid, fds[fid],
+ errno );
+ return(-1);
+ }
+ error_log3 ( "Opening File %s FID: %d FD: %d\n",
+ buf_strings+buf_fids[fid].offset, fid, fds[fid] );
+ if ( buf_fids[fid].npages == -1 ) {
+ /* Initialize the npages field */
+ if ( fstat ( fds[fid], &sbuf ) ) {
+ error_log3 ( "Error Fstating %s FID: %d. Errno = %d\n",
+ buf_strings+buf_fids[fid].offset, fid, errno );
+ } else {
+ buf_fids[fid].npages = ( sbuf.st_size / BUFSIZE );
+ }
+ }
+
+ return ( fds[fid] );
+}
+
+static int
+bf_put_page ( fd, bhp )
+int fd;
+BUFHDR_T *bhp;
+{
+ int nbytes;
+
+ assert ( (bhp-bufhdr_table) < NUM_BUFS );
+ if ( lseek ( fd, bhp->id.obj_id << BUFSHIFT, L_SET ) < 0 ) {
+ return(-1);
+ }
+ bhp->flags |= BUF_IO_IN_PROGRESS;
+ if (RELEASE_MASTER) {
+ return(-1);
+ }
+ nbytes = write(fd, buf_table[bhp-bufhdr_table], BUFSIZE);
+ if (GET_MASTER) {
+ return(-2);
+ }
+ if ( nbytes < 0 ) {
+ error_log1 ("Write failed with error code %d\n", errno);
+ return(-1);
+ } else if ( nbytes != BUFSIZE ) {
+ error_log1 ("Short write: %d bytes of %d\n", nbytes, BUFSIZE );
+ }
+ bhp->flags &= ~(BUF_DIRTY|BUF_IO_IN_PROGRESS);
+ return (0);
+}
+
+static BUFHDR_T *
+bf_assign_buf ( ndx, obj, flags, len )
+int ndx;
+OBJ_T *obj;
+u_long flags;
+int *len; /* Number of bytes read */
+{
+ BUFHDR_T *bhp;
+ int fd;
+
+ assert ( obj->file_id < NUM_FILE_ENTRIES );
+ bhp = bf_newbuf();
+ if ( !bhp ) {
+ return(NULL);
+ }
+ OBJ_ASSIGN ( (*obj), bhp->id );
+ if ( buf_hash_table[ndx] >= NUM_BUFS ) {
+ buf_hash_table[ndx] = bhp-bufhdr_table;
+ } else {
+ LISTPE_INSERT ( bufhdr_table, hash, bhp, buf_hash_table[ndx] );
+ }
+
+ bhp->flags |= BUF_VALID;
+ if ( flags & BF_PIN ) {
+ bhp->flags |= BUF_PINNED;
+ bhp->refcount++;
+#ifdef PIN_DEBUG
+ fprintf(stderr, "bf_assign_buf: %X PINNED (%d)\n",
+ buf_table + (bhp-bufhdr_table), bhp->refcount);
+#endif
+ }
+ fd = bf_fid_to_fd(obj->file_id);
+ if ( fd == -1 ) {
+ error_log1 ("Invalid fid %d\n", obj->file_id);
+ bhp->flags |= ~BUF_IO_ERROR;
+ return(NULL);
+ }
+ if ( obj->obj_id >= buf_fids[obj->file_id].npages) {
+ buf_fids[obj->file_id].npages = obj->obj_id+1;
+ *len = 0;
+ } else if ( flags & BF_EMPTY ) {
+ *len = 0;
+ } else {
+ bhp->flags |= BUF_IO_IN_PROGRESS;
+ if (RELEASE_MASTER) {
+ return(NULL);
+ }
+ if ( lseek ( fd, obj->obj_id << BUFSHIFT, L_SET ) < -1 ) {
+ error_log2 ("Unable to perform seek on file: %d to page %d",
+ obj->file_id, obj->obj_id );
+ bhp->flags &= ~BUF_IO_IN_PROGRESS;
+ bhp->flags |= ~BUF_IO_ERROR;
+ return(NULL);
+ }
+ *len = read(fd, buf_table[bhp-bufhdr_table], BUFSIZE);
+ if ( *len < 0 ) {
+ error_log2 ("Unable to perform read on file: %d to page %d",
+ obj->file_id, obj->obj_id );
+ bhp->flags &= ~BUF_IO_IN_PROGRESS;
+ bhp->flags |= ~BUF_IO_ERROR;
+ return(NULL);
+ }
+ if (GET_MASTER) {
+ return(NULL);
+ }
+ bhp->flags &= ~BUF_IO_IN_PROGRESS;
+ if ( bhp->wait_proc != -1 ) {
+ /* wake up waiter and anyone waiting on it */
+#ifdef DEBUG
+ printf("Waking transaction %d due to completed I/O\n",
+ bhp->wait_proc);
+#endif
+ proc_wake_id ( bhp->wait_proc );
+ bhp->wait_proc = -1;
+ }
+ MAKE_MRU(bhp);
+ }
+
+ if ( flags & BF_DIRTY ) {
+ bhp->flags |= BUF_DIRTY;
+ } else if ( *len < BUFSIZE ) {
+ bhp->flags |= BUF_NEWPAGE;
+ }
+ return ( bhp );
+}
+
+int
+buf_last ( fid )
+int fid;
+{
+ int val;
+
+ if (GET_MASTER) {
+ return(-1);
+ }
+ assert ( fid < NUM_FILE_ENTRIES );
+ if ( buf_fids[fid].npages == -1 ) {
+ /* initialize npages field */
+ (void) bf_fid_to_fd ( fid );
+ }
+ val = buf_fids[fid].npages;
+ if ( val ) {
+ val--; /* Convert to page number */
+ }
+ if (RELEASE_MASTER) {
+ return(-1);
+ }
+ return(val);
+}
+
+#ifdef DEBUG
+extern void
+buf_dump ( id, all )
+int id;
+int all;
+{
+ int i;
+ BUFHDR_T *bhp;
+
+ printf ( "LRU + %d\n", *buf_lru );
+ if ( all ) {
+ printf("ID\tFID\tPID\tLNEXT\tLPREV\tHNEXT\tHPREV\tSLEEP\tFLAG\tREFS\n");
+ for ( bhp = bufhdr_table, i = 0; i < NUM_BUFS; bhp++, i++ ) {
+ printf ( "%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%x\t%d\n", i,
+ bhp->id.file_id, bhp->id.obj_id,
+ bhp->lru.next, bhp->lru.prev,
+ bhp->hash.next, bhp->hash.prev,
+ bhp->wait_proc, bhp->flags, bhp->refcount );
+ }
+ } else {
+ if ( id >= NUM_BUFS ) {
+ printf ( "Buffer ID (%d) too high\n", id );
+ return;
+ }
+ bhp = bufhdr_table+id;
+ printf ( "%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%x\t%d\n", i,
+ bhp->id.file_id, bhp->id.obj_id,
+ bhp->lru.next, bhp->lru.prev,
+ bhp->hash.next, bhp->hash.prev,
+ bhp->wait_proc, bhp->flags, bhp->refcount );
+ }
+ return;
+}
+#endif
+