From 4bb22f0a2ededa74481dfa34ba5a9a5a0613a39b Mon Sep 17 00:00:00 2001 From: Bob Beck Date: Wed, 3 Jun 2009 04:30:58 +0000 Subject: Change bufhash from the old grotty hash table to red-black trees hanging off the vnode. ok art@, oga@, miod@ --- sys/kern/vfs_bio.c | 68 +++++++++++++++++++++++++---------------------------- sys/kern/vfs_subr.c | 17 +++++++++++++- sys/sys/buf.h | 8 +++++-- sys/sys/vnode.h | 7 +++++- 4 files changed, 60 insertions(+), 40 deletions(-) (limited to 'sys') diff --git a/sys/kern/vfs_bio.c b/sys/kern/vfs_bio.c index 5be42faeac0..6706e7262a9 100644 --- a/sys/kern/vfs_bio.c +++ b/sys/kern/vfs_bio.c @@ -1,4 +1,4 @@ -/* $OpenBSD: vfs_bio.c,v 1.112 2009/04/22 13:12:26 art Exp $ */ +/* $OpenBSD: vfs_bio.c,v 1.113 2009/06/03 04:30:57 beck Exp $ */ /* $NetBSD: vfs_bio.c,v 1.44 1996/06/11 11:15:36 pk Exp $ */ /*- @@ -61,20 +61,6 @@ #include -/* - * Definitions for the buffer hash lists. - */ -#define BUFHASH(dvp, lbn) \ - (&bufhashtbl[((long)(dvp) / sizeof(*(dvp)) + (int)(lbn)) & bufhash]) -LIST_HEAD(bufhashhdr, buf) *bufhashtbl, invalhash; -u_long bufhash; - -/* - * Insq/Remq for the buffer hash lists. - */ -#define binshash(bp, dp) LIST_INSERT_HEAD(dp, bp, b_hash) -#define bremhash(bp) LIST_REMOVE(bp, b_hash) - /* * Definitions for the buffer free lists. */ @@ -182,7 +168,6 @@ buf_put(struct buf *bp) panic("buf_put: b_dep is not empty"); #endif - bremhash(bp); LIST_REMOVE(bp, b_list); bcstats.numbufs--; @@ -237,7 +222,6 @@ bufinit(void) */ buf_mem_init(bufkvm); - bufhashtbl = hashinit(bufpages / 4, M_CACHE, M_WAITOK, &bufhash); hidirtypages = (bufpages / 4) * 3; lodirtypages = bufpages / 2; @@ -676,10 +660,12 @@ brelse(struct buf *bp) CLR(bp->b_flags, B_DELWRI); } - if (bp->b_vp) + if (bp->b_vp) { + RB_REMOVE(buf_rb_bufs, &bp->b_vp->v_bufs_tree, + bp); brelvp(bp); - bremhash(bp); - binshash(bp, &invalhash); + } + bp->b_vp = NULL; /* * If the buffer has no associated data, place it back in the @@ -697,6 +683,9 @@ brelse(struct buf *bp) CLR(bp->b_flags, B_WANTED); wakeup(bp); } + if (bp->b_vp != NULL) + RB_REMOVE(buf_rb_bufs, + &bp->b_vp->v_bufs_tree, bp); buf_put(bp); splx(s); return; @@ -758,15 +747,14 @@ struct buf * incore(struct vnode *vp, daddr64_t blkno) { struct buf *bp; - - /* Search hash chain */ - LIST_FOREACH(bp, BUFHASH(vp, blkno), b_hash) { - if (bp->b_lblkno == blkno && bp->b_vp == vp && - !ISSET(bp->b_flags, B_INVAL)) - return (bp); - } - - return (NULL); + struct buf b; + + /* Search buf lookup tree */ + b.b_lblkno = blkno; + bp = RB_FIND(buf_rb_bufs, &vp->v_bufs_tree, &b); + if (bp && !ISSET(bp->b_flags, B_INVAL)) + return(bp); + return(NULL); } /* @@ -781,6 +769,7 @@ struct buf * getblk(struct vnode *vp, daddr64_t blkno, int size, int slpflag, int slptimeo) { struct buf *bp; + struct buf b; int s, error; /* @@ -794,9 +783,9 @@ getblk(struct vnode *vp, daddr64_t blkno, int size, int slpflag, int slptimeo) * the block until the write is finished. */ start: - LIST_FOREACH(bp, BUFHASH(vp, blkno), b_hash) { - if (bp->b_lblkno != blkno || bp->b_vp != vp) - continue; + b.b_lblkno = blkno; + bp = RB_FIND(buf_rb_bufs, &vp->v_bufs_tree, &b); + if (bp != NULL) { s = splbio(); if (ISSET(bp->b_flags, B_BUSY)) { @@ -868,8 +857,11 @@ buf_get(struct vnode *vp, daddr64_t blkno, size_t size) while (bcstats.numcleanpages > locleanpages) { bp = TAILQ_FIRST(&bufqueues[BQ_CLEAN]); bremfree(bp); - if (bp->b_vp) + if (bp->b_vp) { + RB_REMOVE(buf_rb_bufs, + &bp->b_vp->v_bufs_tree, bp); brelvp(bp); + } buf_put(bp); } } @@ -884,8 +876,11 @@ buf_get(struct vnode *vp, daddr64_t blkno, size_t size) int i = freemax; while ((bp = TAILQ_FIRST(&bufqueues[BQ_CLEAN])) && i--) { bremfree(bp); - if (bp->b_vp) + if (bp->b_vp) { + RB_REMOVE(buf_rb_bufs, + &bp->b_vp->v_bufs_tree, bp); brelvp(bp); + } buf_put(bp); } if (freemax == i) { @@ -929,11 +924,12 @@ buf_get(struct vnode *vp, daddr64_t blkno, size_t size) bp->b_blkno = bp->b_lblkno = blkno; bgetvp(vp, bp); - binshash(bp, BUFHASH(vp, blkno)); + if (RB_INSERT(buf_rb_bufs, &vp->v_bufs_tree, bp)) + panic("buf_get: dup lblk vp %p bp %p", vp, bp); } else { bp->b_vnbufs.le_next = NOLIST; SET(bp->b_flags, B_INVAL); - binshash(bp, &invalhash); + bp->b_vp = NULL; } LIST_INSERT_HEAD(&bufhead, bp, b_list); diff --git a/sys/kern/vfs_subr.c b/sys/kern/vfs_subr.c index ce7eeedae86..069cd7ce121 100644 --- a/sys/kern/vfs_subr.c +++ b/sys/kern/vfs_subr.c @@ -1,4 +1,4 @@ -/* $OpenBSD: vfs_subr.c,v 1.175 2008/11/10 11:53:16 pedro Exp $ */ +/* $OpenBSD: vfs_subr.c,v 1.176 2009/06/03 04:30:57 beck Exp $ */ /* $NetBSD: vfs_subr.c,v 1.53 1996/04/22 01:39:13 christos Exp $ */ /* @@ -59,6 +59,7 @@ #include #include #include +#include #include #include @@ -115,6 +116,19 @@ void printlockedvnodes(void); struct pool vnode_pool; +static int rb_buf_compare(struct buf *b1, struct buf *b2); +RB_GENERATE(buf_rb_bufs, buf, b_rbbufs, rb_buf_compare); + +static int +rb_buf_compare(struct buf *b1, struct buf *b2) +{ + if (b1->b_lblkno < b2->b_lblkno) + return(-1); + if (b1->b_lblkno > b2->b_lblkno) + return(1); + return(0); +} + /* * Initialize the vnode management data structures. */ @@ -345,6 +359,7 @@ getnewvnode(enum vtagtype tag, struct mount *mp, int (**vops)(void *), ((TAILQ_FIRST(listhd = &vnode_hold_list) == NULL) || toggle))) { splx(s); vp = pool_get(&vnode_pool, PR_WAITOK | PR_ZERO); + RB_INIT(&vp->v_bufs_tree); numvnodes++; } else { for (vp = TAILQ_FIRST(listhd); vp != NULLVP; diff --git a/sys/sys/buf.h b/sys/sys/buf.h index dc173eb6233..a537c6b95aa 100644 --- a/sys/sys/buf.h +++ b/sys/sys/buf.h @@ -1,4 +1,4 @@ -/* $OpenBSD: buf.h,v 1.62 2009/06/03 03:14:28 thib Exp $ */ +/* $OpenBSD: buf.h,v 1.63 2009/06/03 04:30:57 beck Exp $ */ /* $NetBSD: buf.h,v 1.25 1997/04/09 21:12:17 mycroft Exp $ */ /* @@ -40,12 +40,16 @@ #ifndef _SYS_BUF_H_ #define _SYS_BUF_H_ #include +#include #define NOLIST ((struct buf *)0x87654321) struct buf; struct vnode; +struct buf_rb_bufs; +RB_PROTOTYPE(buf_rb_bufs, buf, b_rbbufs, rb_buf_compare); + LIST_HEAD(bufhead, buf); /* @@ -72,8 +76,8 @@ extern struct bio_ops { * The buffer header describes an I/O operation in the kernel. */ struct buf { + RB_ENTRY(buf) b_rbbufs; /* vnode "hash" tree */ LIST_ENTRY(buf) b_list; /* All allocated buffers. */ - LIST_ENTRY(buf) b_hash; /* Hash chain. */ LIST_ENTRY(buf) b_vnbufs; /* Buffer's associated vnode. */ TAILQ_ENTRY(buf) b_freelist; /* Free list position if not active. */ time_t b_synctime; /* Time this buffer should be flushed */ diff --git a/sys/sys/vnode.h b/sys/sys/vnode.h index d0bf58af37c..82c3312355f 100644 --- a/sys/sys/vnode.h +++ b/sys/sys/vnode.h @@ -1,4 +1,4 @@ -/* $OpenBSD: vnode.h,v 1.97 2008/12/23 21:43:15 thib Exp $ */ +/* $OpenBSD: vnode.h,v 1.98 2009/06/03 04:30:57 beck Exp $ */ /* $NetBSD: vnode.h,v 1.38 1996/02/29 20:59:05 cgd Exp $ */ /* @@ -32,10 +32,12 @@ * @(#)vnode.h 8.11 (Berkeley) 11/21/94 */ +#include #include #include #include #include +#include #include #include @@ -79,6 +81,8 @@ enum vtagtype { */ LIST_HEAD(buflists, buf); +RB_HEAD(buf_rb_bufs, buf); + struct vnode { struct uvm_vnode v_uvm; /* uvm data */ int (**v_op)(void *); /* vnode operations vector */ @@ -94,6 +98,7 @@ struct vnode { struct mount *v_mount; /* ptr to vfs we are in */ TAILQ_ENTRY(vnode) v_freelist; /* vnode freelist */ LIST_ENTRY(vnode) v_mntvnodes; /* vnodes for mount point */ + struct buf_rb_bufs v_bufs_tree; /* lookup of all bufs */ struct buflists v_cleanblkhd; /* clean blocklist head */ struct buflists v_dirtyblkhd; /* dirty blocklist head */ u_int v_numoutput; /* num of writes in progress */ -- cgit v1.2.3