diff options
-rw-r--r-- | sys/kern/vfs_bio.c | 366 | ||||
-rw-r--r-- | sys/kern/vfs_biomem.c | 57 | ||||
-rw-r--r-- | sys/sys/buf.h | 4 |
3 files changed, 184 insertions, 243 deletions
diff --git a/sys/kern/vfs_bio.c b/sys/kern/vfs_bio.c index a2ed79f97f4..5be42faeac0 100644 --- a/sys/kern/vfs_bio.c +++ b/sys/kern/vfs_bio.c @@ -1,4 +1,4 @@ -/* $OpenBSD: vfs_bio.c,v 1.111 2009/03/23 15:10:44 beck Exp $ */ +/* $OpenBSD: vfs_bio.c,v 1.112 2009/04/22 13:12:26 art Exp $ */ /* $NetBSD: vfs_bio.c,v 1.44 1996/06/11 11:15:36 pk Exp $ */ /*- @@ -92,8 +92,6 @@ struct bio_ops bioops; */ struct pool bufpool; struct bufhead bufhead = LIST_HEAD_INITIALIZER(bufhead); -struct buf *buf_get(size_t); -struct buf *buf_stub(struct vnode *, daddr64_t); void buf_put(struct buf *); /* @@ -103,8 +101,7 @@ void buf_put(struct buf *); #define binstailfree(bp, dp) TAILQ_INSERT_TAIL(dp, bp, b_freelist) struct buf *bio_doread(struct vnode *, daddr64_t, int, int); -struct buf *getnewbuf(size_t, int, int, int *); -void buf_init(struct buf *); +struct buf *buf_get(struct vnode *, daddr64_t, size_t); void bread_cluster_callback(struct buf *); /* @@ -166,98 +163,6 @@ bremfree(struct buf *bp) } void -buf_init(struct buf *bp) -{ - splassert(IPL_BIO); - - bzero((char *)bp, sizeof *bp); - bp->b_vnbufs.le_next = NOLIST; - bp->b_freelist.tqe_next = NOLIST; - bp->b_synctime = time_uptime + 300; - bp->b_dev = NODEV; - LIST_INIT(&bp->b_dep); -} - -/* - * This is a non-sleeping expanded equivalent of getblk() that allocates only - * the buffer structure, and not its contents. - */ -struct buf * -buf_stub(struct vnode *vp, daddr64_t lblkno) -{ - struct buf *bp; - int s; - - s = splbio(); - bp = pool_get(&bufpool, PR_NOWAIT); - splx(s); - - if (bp == NULL) - return (NULL); - - bzero((char *)bp, sizeof *bp); - bp->b_vnbufs.le_next = NOLIST; - bp->b_freelist.tqe_next = NOLIST; - bp->b_synctime = time_uptime + 300; - bp->b_dev = NODEV; - bp->b_bufsize = 0; - bp->b_data = NULL; - bp->b_flags = 0; - bp->b_dev = NODEV; - bp->b_blkno = bp->b_lblkno = lblkno; - bp->b_iodone = NULL; - bp->b_error = 0; - bp->b_resid = 0; - bp->b_bcount = 0; - bp->b_dirtyoff = bp->b_dirtyend = 0; - bp->b_validoff = bp->b_validend = 0; - - LIST_INIT(&bp->b_dep); - - buf_acquire_unmapped(bp); - - s = splbio(); - LIST_INSERT_HEAD(&bufhead, bp, b_list); - bcstats.numbufs++; - bgetvp(vp, bp); - splx(s); - - return (bp); -} - -struct buf * -buf_get(size_t size) -{ - struct buf *bp; - int npages; - - splassert(IPL_BIO); - - KASSERT(size > 0); - - size = round_page(size); - npages = atop(size); - - if (bcstats.numbufpages + npages > bufpages) - return (NULL); - - bp = pool_get(&bufpool, PR_WAITOK); - - buf_init(bp); - bp->b_flags = B_INVAL; - buf_alloc_pages(bp, size); - bp->b_data = NULL; - binsheadfree(bp, &bufqueues[BQ_CLEAN]); - binshash(bp, &invalhash); - LIST_INSERT_HEAD(&bufhead, bp, b_list); - bcstats.numbufs++; - bcstats.freebufs++; - bcstats.numcleanpages += atop(bp->b_bufsize); - - return (bp); -} - -void buf_put(struct buf *bp) { splassert(IPL_BIO); @@ -277,6 +182,7 @@ buf_put(struct buf *bp) panic("buf_put: b_dep is not empty"); #endif + bremhash(bp); LIST_REMOVE(bp, b_list); bcstats.numbufs--; @@ -438,20 +344,33 @@ breadn(struct vnode *vp, daddr64_t blkno, int size, daddr64_t rablks[], void bread_cluster_callback(struct buf *bp) { + struct buf **xbpp = bp->b_saveaddr; int i; - struct buf **xbpp; - xbpp = (struct buf **)bp->b_saveaddr; + if (xbpp[1] != NULL) { + size_t newsize = xbpp[1]->b_bufsize; - for (i = 0; xbpp[i] != 0; i++) { + /* + * Shrink this buffer to only cover its part of the total I/O. + */ + buf_shrink_mem(bp, newsize); + bp->b_bcount = newsize; + } + + for (i = 1; xbpp[i] != 0; i++) { if (ISSET(bp->b_flags, B_ERROR)) SET(xbpp[i]->b_flags, B_INVAL | B_ERROR); biodone(xbpp[i]); } free(xbpp, M_TEMP); - bp->b_pobj = NULL; - buf_put(bp); + + if (ISSET(bp->b_flags, B_ASYNC)) { + brelse(bp); + } else { + CLR(bp->b_flags, B_WANTED); + wakeup(bp); + } } int @@ -464,14 +383,14 @@ bread_cluster(struct vnode *vp, daddr64_t blkno, int size, struct buf **rbpp) *rbpp = bio_doread(vp, blkno, size, 0); if (size != round_page(size)) - return (biowait(*rbpp)); + goto out; if (VOP_BMAP(vp, blkno + 1, NULL, &sblkno, &maxra)) - return (biowait(*rbpp)); + goto out; maxra++; if (sblkno == -1 || maxra < 2) - return (biowait(*rbpp)); + goto out; howmany = MAXPHYS / size; if (howmany > maxra) @@ -479,66 +398,60 @@ bread_cluster(struct vnode *vp, daddr64_t blkno, int size, struct buf **rbpp) xbpp = malloc((howmany + 1) * sizeof(struct buf *), M_TEMP, M_NOWAIT); if (xbpp == NULL) - return (biowait(*rbpp)); + goto out; - for (i = 0; i < howmany; i++) { - if (incore(vp, blkno + i + 1)) { - for (--i; i >= 0; i--) { - SET(xbpp[i]->b_flags, B_INVAL); - brelse(xbpp[i]); - } - free(xbpp, M_TEMP); - return (biowait(*rbpp)); - } - xbpp[i] = buf_stub(vp, blkno + i + 1); + for (i = howmany - 1; i >= 0; i--) { + size_t sz; + + /* + * First buffer allocates big enough size to cover what + * all the other buffers need. + */ + sz = i == 0 ? howmany * size : 0; + + xbpp[i] = buf_get(vp, blkno + i + 1, sz); if (xbpp[i] == NULL) { - for (--i; i >= 0; i--) { + for (++i; i < howmany; i++) { SET(xbpp[i]->b_flags, B_INVAL); brelse(xbpp[i]); } free(xbpp, M_TEMP); - return (biowait(*rbpp)); + goto out; } } - xbpp[howmany] = 0; + bp = xbpp[0]; - bp = getnewbuf(howmany * size, 0, 0, NULL); - if (bp == NULL) { - for (i = 0; i < howmany; i++) { - SET(xbpp[i]->b_flags, B_INVAL); - brelse(xbpp[i]); - } - free(xbpp, M_TEMP); - return (biowait(*rbpp)); - } + xbpp[howmany] = 0; inc = btodb(size); - for (i = 0; i < howmany; i++) { + for (i = 1; i < howmany; i++) { bcstats.pendingreads++; bcstats.numreads++; SET(xbpp[i]->b_flags, B_READ | B_ASYNC); - binshash(xbpp[i], BUFHASH(vp, xbpp[i]->b_lblkno)); xbpp[i]->b_blkno = sblkno + (i * inc); xbpp[i]->b_bufsize = xbpp[i]->b_bcount = size; xbpp[i]->b_data = NULL; xbpp[i]->b_pobj = bp->b_pobj; xbpp[i]->b_poffs = bp->b_poffs + (i * size); - buf_acquire_unmapped(xbpp[i]); } + KASSERT(bp->b_lblkno == blkno + 1); + KASSERT(bp->b_vp == vp); + bp->b_blkno = sblkno; - bp->b_lblkno = blkno + 1; SET(bp->b_flags, B_READ | B_ASYNC | B_CALL); + bp->b_saveaddr = (void *)xbpp; bp->b_iodone = bread_cluster_callback; - bp->b_vp = vp; + bcstats.pendingreads++; bcstats.numreads++; VOP_STRATEGY(bp); curproc->p_stats->p_ru.ru_inblock++; +out: return (biowait(*rbpp)); } @@ -738,7 +651,6 @@ brelse(struct buf *bp) struct bqueues *bufq; int s; - /* Block disk interrupts. */ s = splbio(); if (bp->b_data != NULL) @@ -766,6 +678,8 @@ brelse(struct buf *bp) if (bp->b_vp) brelvp(bp); + bremhash(bp); + binshash(bp, &invalhash); /* * If the buffer has no associated data, place it back in the @@ -824,7 +738,7 @@ brelse(struct buf *bp) /* Wake up any processes waiting for any buffer to become free. */ if (needbuffer) { needbuffer--; - wakeup_one(&needbuffer); + wakeup(&needbuffer); } /* Wake up any processes waiting for _this_ buffer to become free. */ @@ -866,8 +780,7 @@ incore(struct vnode *vp, daddr64_t blkno) struct buf * getblk(struct vnode *vp, daddr64_t blkno, int size, int slpflag, int slptimeo) { - struct bufhashhdr *bh; - struct buf *bp, *nb = NULL; + struct buf *bp; int s, error; /* @@ -880,7 +793,6 @@ getblk(struct vnode *vp, daddr64_t blkno, int size, int slpflag, int slptimeo) * case, we can't allow the system to allocate a new buffer for * the block until the write is finished. */ - bh = BUFHASH(vp, blkno); start: LIST_FOREACH(bp, BUFHASH(vp, blkno), b_hash) { if (bp->b_lblkno != blkno || bp->b_vp != vp) @@ -888,12 +800,6 @@ start: s = splbio(); if (ISSET(bp->b_flags, B_BUSY)) { - if (nb != NULL) { - SET(nb->b_flags, B_INVAL); - binshash(nb, &invalhash); - brelse(nb); - nb = NULL; - } SET(bp->b_flags, B_WANTED); error = tsleep(bp, slpflag | (PRIBIO + 1), "getblk", slptimeo); @@ -909,36 +815,14 @@ start: SET(bp->b_flags, B_CACHE); buf_acquire(bp); splx(s); - break; + return (bp); } splx(s); } - if (nb && bp) { - SET(nb->b_flags, B_INVAL); - binshash(nb, &invalhash); - brelse(nb); - nb = NULL; - } - if (bp == NULL && nb == NULL) { - nb = getnewbuf(size, slpflag, slptimeo, &error); - if (nb == NULL) { - if (error == ERESTART || error == EINTR) - return (NULL); - } + + if ((bp = buf_get(vp, blkno, size)) == NULL) goto start; - } - if (nb) { - bp = nb; - binshash(bp, bh); - bp->b_blkno = bp->b_lblkno = blkno; - s = splbio(); - bgetvp(vp, bp); - splx(s); - } -#ifdef DIAGNOSTIC - if (!ISSET(bp->b_flags, B_BUSY)) - panic("getblk buffer not B_BUSY"); -#endif + return (bp); } @@ -950,86 +834,118 @@ geteblk(int size) { struct buf *bp; - while ((bp = getnewbuf(size, 0, 0, NULL)) == NULL) + while ((bp = buf_get(NULL, 0, size)) == NULL) ; - SET(bp->b_flags, B_INVAL); - binshash(bp, &invalhash); return (bp); } /* - * Find a buffer which is available for use. + * Allocate a buffer. */ struct buf * -getnewbuf(size_t size, int slpflag, int slptimeo, int *ep) +buf_get(struct vnode *vp, daddr64_t blkno, size_t size) { struct buf *bp; + int poolwait = size == 0 ? PR_NOWAIT : PR_WAITOK; + int npages; int s; -#if 0 /* we would really like this but sblock update kills it */ - KASSERT(curproc != syncerproc && curproc != cleanerproc); -#endif - s = splbio(); - /* - * Wake up cleaner if we're getting low on pages. - */ - if (bcstats.numdirtypages >= hidirtypages || bcstats.numcleanpages <= locleanpages) - wakeup(&bd_req); + if (size) { + /* + * Wake up cleaner if we're getting low on pages. + */ + if (bcstats.numdirtypages >= hidirtypages || + bcstats.numcleanpages <= locleanpages) + wakeup(&bd_req); - /* - * If we're above the high water mark for clean pages, - * free down to the low water mark. - */ - if (bcstats.numcleanpages > hicleanpages) { - while (bcstats.numcleanpages > locleanpages) { - bp = TAILQ_FIRST(&bufqueues[BQ_CLEAN]); - bremfree(bp); - if (bp->b_vp) - brelvp(bp); - bremhash(bp); - buf_put(bp); + /* + * If we're above the high water mark for clean pages, + * free down to the low water mark. + */ + if (bcstats.numcleanpages > hicleanpages) { + while (bcstats.numcleanpages > locleanpages) { + bp = TAILQ_FIRST(&bufqueues[BQ_CLEAN]); + bremfree(bp); + if (bp->b_vp) + brelvp(bp); + buf_put(bp); + } + } + + npages = atop(round_page(size)); + + /* + * Free some buffers until we have enough space. + */ + while (bcstats.numbufpages + npages > bufpages) { + int freemax = 5; + int i = freemax; + while ((bp = TAILQ_FIRST(&bufqueues[BQ_CLEAN])) && i--) { + bremfree(bp); + if (bp->b_vp) + brelvp(bp); + buf_put(bp); + } + if (freemax == i) { + needbuffer++; + tsleep(&needbuffer, PRIBIO, "needbuffer", 0); + splx(s); + return (NULL); + } } } - /* we just ask. it can say no.. */ -getsome: - bp = buf_get(size); + bp = pool_get(&bufpool, poolwait|PR_ZERO); + if (bp == NULL) { - int freemax = 5; - int i = freemax; - while ((bp = TAILQ_FIRST(&bufqueues[BQ_CLEAN])) && i--) { - bremfree(bp); - if (bp->b_vp) - brelvp(bp); - bremhash(bp); - buf_put(bp); - } - if (freemax != i) - goto getsome; splx(s); return (NULL); } - bremfree(bp); - /* Buffer is no longer on free lists. */ - bp->b_flags = 0; - buf_acquire(bp); - - splx(s); - - /* clear out various other fields */ + bp->b_freelist.tqe_next = NOLIST; + bp->b_synctime = time_uptime + 300; bp->b_dev = NODEV; - bp->b_blkno = bp->b_lblkno = 0; - bp->b_iodone = NULL; - bp->b_error = 0; - bp->b_resid = 0; + LIST_INIT(&bp->b_dep); bp->b_bcount = size; - bp->b_dirtyoff = bp->b_dirtyend = 0; - bp->b_validoff = bp->b_validend = 0; - bremhash(bp); + buf_acquire_unmapped(bp); + + if (vp != NULL) { + /* + * We insert the buffer into the hash with B_BUSY set + * while we allocate pages for it. This way any getblk + * that happens while we allocate pages will wait for + * this buffer instead of starting its own guf_get. + * + * But first, we check if someone beat us to it. + */ + if (incore(vp, blkno)) { + pool_put(&bufpool, bp); + splx(s); + return (NULL); + } + + bp->b_blkno = bp->b_lblkno = blkno; + bgetvp(vp, bp); + binshash(bp, BUFHASH(vp, blkno)); + } else { + bp->b_vnbufs.le_next = NOLIST; + SET(bp->b_flags, B_INVAL); + binshash(bp, &invalhash); + } + + LIST_INSERT_HEAD(&bufhead, bp, b_list); + bcstats.numbufs++; + + if (size) { + buf_alloc_pages(bp, round_page(size)); + buf_map(bp); + } + + splx(s); + return (bp); } diff --git a/sys/kern/vfs_biomem.c b/sys/kern/vfs_biomem.c index 671921dbc4c..708c06f4d61 100644 --- a/sys/kern/vfs_biomem.c +++ b/sys/kern/vfs_biomem.c @@ -1,4 +1,4 @@ -/* $OpenBSD: vfs_biomem.c,v 1.4 2008/11/08 23:20:50 pedro Exp $ */ +/* $OpenBSD: vfs_biomem.c,v 1.5 2009/04/22 13:12:26 art Exp $ */ /* * Copyright (c) 2007 Artur Grabowski <art@openbsd.org> * @@ -75,7 +75,6 @@ buf_mem_init(vsize_t size) void buf_acquire(struct buf *bp) { - vaddr_t va; int s; KASSERT((bp->b_flags & B_BUSY) == 0); @@ -85,6 +84,32 @@ buf_acquire(struct buf *bp) * Busy before waiting for kvm. */ SET(bp->b_flags, B_BUSY); + buf_map(bp); + + splx(s); +} + +/* + * Busy a buffer, but don't map it. + * If it has a mapping, we keep it, but we also keep the mapping on + * the list since we assume that it won't be used anymore. + */ +void +buf_acquire_unmapped(struct buf *bp) +{ + int s; + + s = splbio(); + SET(bp->b_flags, B_BUSY|B_NOTMAPPED); + splx(s); +} + +void +buf_map(struct buf *bp) +{ + vaddr_t va; + + splassert(IPL_BIO); if (bp->b_data == NULL) { unsigned long i; @@ -123,22 +148,8 @@ buf_acquire(struct buf *bp) } else { TAILQ_REMOVE(&buf_valist, bp, b_valist); } - splx(s); -} -/* - * Busy a buffer, but don't map it. - * If it has a mapping, we keep it, but we also keep the mapping on - * the list since we assume that it won't be used anymore. - */ -void -buf_acquire_unmapped(struct buf *bp) -{ - int s; - - s = splbio(); - SET(bp->b_flags, B_BUSY|B_NOTMAPPED); - splx(s); + CLR(bp->b_flags, B_NOTMAPPED); } void @@ -209,6 +220,18 @@ buf_dealloc_mem(struct buf *bp) return (1); } +void +buf_shrink_mem(struct buf *bp, vsize_t newsize) +{ + vaddr_t va = (vaddr_t)bp->b_data; + + if (newsize < bp->b_bufsize) { + pmap_kremove(va + newsize, bp->b_bufsize - newsize); + pmap_update(pmap_kernel()); + bp->b_bufsize = newsize; + } +} + vaddr_t buf_unmap(struct buf *bp) { diff --git a/sys/sys/buf.h b/sys/sys/buf.h index 330cc90d614..c3898a15616 100644 --- a/sys/sys/buf.h +++ b/sys/sys/buf.h @@ -1,4 +1,4 @@ -/* $OpenBSD: buf.h,v 1.60 2008/06/12 06:58:40 deraadt Exp $ */ +/* $OpenBSD: buf.h,v 1.61 2009/04/22 13:12:26 art Exp $ */ /* $NetBSD: buf.h,v 1.25 1997/04/09 21:12:17 mycroft Exp $ */ /* @@ -247,8 +247,10 @@ struct buf *incore(struct vnode *, daddr64_t); void buf_mem_init(vsize_t); void buf_acquire(struct buf *); void buf_acquire_unmapped(struct buf *); +void buf_map(struct buf *); void buf_release(struct buf *); int buf_dealloc_mem(struct buf *); +void buf_shrink_mem(struct buf *, vsize_t); void buf_alloc_pages(struct buf *, vsize_t); void buf_free_pages(struct buf *); |