summaryrefslogtreecommitdiff
path: root/sys/kern/vfs_bio.c
diff options
context:
space:
mode:
Diffstat (limited to 'sys/kern/vfs_bio.c')
-rw-r--r--sys/kern/vfs_bio.c207
1 files changed, 40 insertions, 167 deletions
diff --git a/sys/kern/vfs_bio.c b/sys/kern/vfs_bio.c
index 42435d9e05f..0e7c21786ec 100644
--- a/sys/kern/vfs_bio.c
+++ b/sys/kern/vfs_bio.c
@@ -1,7 +1,7 @@
-/* $OpenBSD: vfs_bio.c,v 1.116 2009/06/06 18:06:22 art Exp $ */
+/* $OpenBSD: vfs_bio.c,v 1.117 2009/06/15 17:01:26 beck Exp $ */
/* $NetBSD: vfs_bio.c,v 1.44 1996/06/11 11:15:36 pk Exp $ */
-/*
+/*-
* Copyright (c) 1994 Christopher G. Demetriou
* Copyright (c) 1982, 1986, 1989, 1993
* The Regents of the University of California. All rights reserved.
@@ -62,6 +62,20 @@
#include <miscfs/specfs/specdev.h>
/*
+ * Definitions for the buffer hash lists.
+ */
+#define BUFHASH(dvp, lbn) \
+ (&bufhashtbl[((long)(dvp) / sizeof(*(dvp)) + (int)(lbn)) & bufhash])
+LIST_HEAD(bufhashhdr, buf) *bufhashtbl, invalhash;
+u_long bufhash;
+
+/*
+ * Insq/Remq for the buffer hash lists.
+ */
+#define binshash(bp, dp) LIST_INSERT_HEAD(dp, bp, b_hash)
+#define bremhash(bp) LIST_REMOVE(bp, b_hash)
+
+/*
* Definitions for the buffer free lists.
*/
#define BQUEUES 2 /* number of free buffer queues */
@@ -109,9 +123,6 @@ long hidirtypages;
long locleanpages;
long hicleanpages;
long maxcleanpages;
-long backoffpages; /* backoff counter for page allocations */
-long buflowpages; /* bufpages low water mark */
-long bufhighpages; /* bufpages high water mark */
/* XXX - should be defined here. */
extern int bufcachepercent;
@@ -171,13 +182,9 @@ buf_put(struct buf *bp)
panic("buf_put: b_dep is not empty");
#endif
+ bremhash(bp);
LIST_REMOVE(bp, b_list);
bcstats.numbufs--;
- if (backoffpages) {
- backoffpages -= atop(bp->b_bufsize);
- if (backoffpages < 0)
- backoffpages = 0;
- }
if (buf_dealloc_mem(bp) != 0)
return;
@@ -193,7 +200,7 @@ bufinit(void)
struct bqueues *dp;
/* XXX - for now */
- bufhighpages = buflowpages = bufpages = bufcachepercent = bufkvm = 0;
+ bufpages = bufcachepercent = bufkvm = 0;
/*
* If MD code doesn't say otherwise, use 10% of kvm for mappings and
@@ -204,16 +211,6 @@ bufinit(void)
if (bufpages == 0)
bufpages = physmem * bufcachepercent / 100;
- bufhighpages = bufpages;
-
- /*
- * set the base backoff level for the buffer cache to bufpages.
- * we will not allow uvm to steal back more than this number of
- * pages
- */
- buflowpages = physmem * 10 / 100;
-
-
if (bufkvm == 0)
bufkvm = (VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS) / 10;
@@ -240,6 +237,7 @@ bufinit(void)
*/
buf_mem_init(bufkvm);
+ bufhashtbl = hashinit(bufpages / 4, M_CACHE, M_WAITOK, &bufhash);
hidirtypages = (bufpages / 4) * 3;
lodirtypages = bufpages / 2;
@@ -253,104 +251,6 @@ bufinit(void)
maxcleanpages = locleanpages;
}
-/*
- * Change cachepct
- */
-void
-bufadjust(int newbufpages)
-{
- /*
- * XXX - note, bufkvm was allocated once, based on 10% of physmem
- * see above.
- */
- struct buf *bp;
- int s;
-
- s = splbio();
- bufpages = newbufpages;
-
- hidirtypages = (bufpages / 4) * 3;
- lodirtypages = bufpages / 2;
-
- /*
- * When we hit 95% of pages being clean, we bring them down to
- * 90% to have some slack.
- */
- hicleanpages = bufpages - (bufpages / 20);
- locleanpages = bufpages - (bufpages / 10);
-
- maxcleanpages = locleanpages;
-
- /*
- * If we we have more buffers allocated than bufpages,
- * free them up to get back down. this may possibly consume
- * all our clean pages...
- */
- while ((bp = TAILQ_FIRST(&bufqueues[BQ_CLEAN])) &&
- (bcstats.numbufpages > bufpages)) {
- bremfree(bp);
- if (bp->b_vp) {
- RB_REMOVE(buf_rb_bufs,
- &bp->b_vp->v_bufs_tree, bp);
- brelvp(bp);
- }
- buf_put(bp);
- }
-
- /*
- * Wake up cleaner if we're getting low on pages. We might
- * now have too much dirty, or have fallen below our low
- * water mark on clean pages so we need to free more stuff
- * up.
- */
- if (bcstats.numdirtypages >= hidirtypages ||
- bcstats.numcleanpages <= locleanpages)
- wakeup(&bd_req);
-
- /*
- * if immediate action has not freed up enough goo for us
- * to proceed - we tsleep and wait for the cleaner above
- * to do it's work and get us reduced down to sanity.
- */
- while (bcstats.numbufpages > bufpages) {
- tsleep(&needbuffer, PRIBIO, "needbuffer", 0);
- }
- splx(s);
-}
-
-/*
- * Make the buffer cache back off from cachepct.
- */
-int
-bufbackoff()
-{
- /*
- * Back off the amount of buffer cache pages. Called by the page
- * daemon to consume buffer cache pages rather than swapping.
- *
- * On success, it frees N pages from the buffer cache, and sets
- * a flag so that the next N allocations from buf_get will recycle
- * a buffer rather than allocate a new one. It then returns 0 to the
- * caller.
- *
- * on failure, it could free no pages from the buffer cache, does
- * nothing and returns -1 to the caller.
- */
- long d;
-
- if (bufpages <= buflowpages)
- return(-1);
-
- if (bufpages - BACKPAGES >= buflowpages)
- d = BACKPAGES;
- else
- d = bufpages - buflowpages;
- backoffpages = BACKPAGES;
- bufadjust(bufpages - d);
- backoffpages = BACKPAGES;
- return(0);
-}
-
struct buf *
bio_doread(struct vnode *vp, daddr64_t blkno, int size, int async)
{
@@ -776,12 +676,10 @@ brelse(struct buf *bp)
CLR(bp->b_flags, B_DELWRI);
}
- if (bp->b_vp) {
- RB_REMOVE(buf_rb_bufs, &bp->b_vp->v_bufs_tree,
- bp);
+ if (bp->b_vp)
brelvp(bp);
- }
- bp->b_vp = NULL;
+ bremhash(bp);
+ binshash(bp, &invalhash);
/*
* If the buffer has no associated data, place it back in the
@@ -799,9 +697,6 @@ brelse(struct buf *bp)
CLR(bp->b_flags, B_WANTED);
wakeup(bp);
}
- if (bp->b_vp != NULL)
- RB_REMOVE(buf_rb_bufs,
- &bp->b_vp->v_bufs_tree, bp);
buf_put(bp);
splx(s);
return;
@@ -863,14 +758,15 @@ struct buf *
incore(struct vnode *vp, daddr64_t blkno)
{
struct buf *bp;
- struct buf b;
-
- /* Search buf lookup tree */
- b.b_lblkno = blkno;
- bp = RB_FIND(buf_rb_bufs, &vp->v_bufs_tree, &b);
- if (bp && !ISSET(bp->b_flags, B_INVAL))
- return(bp);
- return(NULL);
+
+ /* Search hash chain */
+ LIST_FOREACH(bp, BUFHASH(vp, blkno), b_hash) {
+ if (bp->b_lblkno == blkno && bp->b_vp == vp &&
+ !ISSET(bp->b_flags, B_INVAL))
+ return (bp);
+ }
+
+ return (NULL);
}
/*
@@ -885,7 +781,6 @@ struct buf *
getblk(struct vnode *vp, daddr64_t blkno, int size, int slpflag, int slptimeo)
{
struct buf *bp;
- struct buf b;
int s, error;
/*
@@ -899,9 +794,9 @@ getblk(struct vnode *vp, daddr64_t blkno, int size, int slpflag, int slptimeo)
* the block until the write is finished.
*/
start:
- b.b_lblkno = blkno;
- bp = RB_FIND(buf_rb_bufs, &vp->v_bufs_tree, &b);
- if (bp != NULL) {
+ LIST_FOREACH(bp, BUFHASH(vp, blkno), b_hash) {
+ if (bp->b_lblkno != blkno || bp->b_vp != vp)
+ continue;
s = splbio();
if (ISSET(bp->b_flags, B_BUSY)) {
@@ -950,24 +845,11 @@ geteblk(int size)
struct buf *
buf_get(struct vnode *vp, daddr64_t blkno, size_t size)
{
- static int gcount = 0;
struct buf *bp;
int poolwait = size == 0 ? PR_NOWAIT : PR_WAITOK;
int npages;
int s;
- /*
- * if we were previously backed off, slowly climb back up
- * to the high water mark again.
- */
- if ((backoffpages == 0) && (bufpages < bufhighpages)) {
- if ( gcount == 0 ) {
- bufadjust(bufpages + BACKPAGES);
- gcount += BACKPAGES;
- } else
- gcount--;
- }
-
s = splbio();
if (size) {
/*
@@ -985,11 +867,8 @@ buf_get(struct vnode *vp, daddr64_t blkno, size_t size)
while (bcstats.numcleanpages > locleanpages) {
bp = TAILQ_FIRST(&bufqueues[BQ_CLEAN]);
bremfree(bp);
- if (bp->b_vp) {
- RB_REMOVE(buf_rb_bufs,
- &bp->b_vp->v_bufs_tree, bp);
+ if (bp->b_vp)
brelvp(bp);
- }
buf_put(bp);
}
}
@@ -999,21 +878,16 @@ buf_get(struct vnode *vp, daddr64_t blkno, size_t size)
/*
* Free some buffers until we have enough space.
*/
- while ((bcstats.numbufpages + npages > bufpages)
- || backoffpages) {
+ while (bcstats.numbufpages + npages > bufpages) {
int freemax = 5;
int i = freemax;
while ((bp = TAILQ_FIRST(&bufqueues[BQ_CLEAN])) && i--) {
bremfree(bp);
- if (bp->b_vp) {
- RB_REMOVE(buf_rb_bufs,
- &bp->b_vp->v_bufs_tree, bp);
+ if (bp->b_vp)
brelvp(bp);
- }
buf_put(bp);
}
- if (freemax == i &&
- (bcstats.numbufpages + npages > bufpages)) {
+ if (freemax == i) {
needbuffer++;
tsleep(&needbuffer, PRIBIO, "needbuffer", 0);
splx(s);
@@ -1054,12 +928,11 @@ buf_get(struct vnode *vp, daddr64_t blkno, size_t size)
bp->b_blkno = bp->b_lblkno = blkno;
bgetvp(vp, bp);
- if (RB_INSERT(buf_rb_bufs, &vp->v_bufs_tree, bp))
- panic("buf_get: dup lblk vp %p bp %p", vp, bp);
+ binshash(bp, BUFHASH(vp, blkno));
} else {
bp->b_vnbufs.le_next = NOLIST;
SET(bp->b_flags, B_INVAL);
- bp->b_vp = NULL;
+ binshash(bp, &invalhash);
}
LIST_INSERT_HEAD(&bufhead, bp, b_list);