summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--sys/kern/vfs_bio.c366
-rw-r--r--sys/kern/vfs_biomem.c57
-rw-r--r--sys/sys/buf.h4
3 files changed, 184 insertions, 243 deletions
diff --git a/sys/kern/vfs_bio.c b/sys/kern/vfs_bio.c
index a2ed79f97f4..5be42faeac0 100644
--- a/sys/kern/vfs_bio.c
+++ b/sys/kern/vfs_bio.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: vfs_bio.c,v 1.111 2009/03/23 15:10:44 beck Exp $ */
+/* $OpenBSD: vfs_bio.c,v 1.112 2009/04/22 13:12:26 art Exp $ */
/* $NetBSD: vfs_bio.c,v 1.44 1996/06/11 11:15:36 pk Exp $ */
/*-
@@ -92,8 +92,6 @@ struct bio_ops bioops;
*/
struct pool bufpool;
struct bufhead bufhead = LIST_HEAD_INITIALIZER(bufhead);
-struct buf *buf_get(size_t);
-struct buf *buf_stub(struct vnode *, daddr64_t);
void buf_put(struct buf *);
/*
@@ -103,8 +101,7 @@ void buf_put(struct buf *);
#define binstailfree(bp, dp) TAILQ_INSERT_TAIL(dp, bp, b_freelist)
struct buf *bio_doread(struct vnode *, daddr64_t, int, int);
-struct buf *getnewbuf(size_t, int, int, int *);
-void buf_init(struct buf *);
+struct buf *buf_get(struct vnode *, daddr64_t, size_t);
void bread_cluster_callback(struct buf *);
/*
@@ -166,98 +163,6 @@ bremfree(struct buf *bp)
}
void
-buf_init(struct buf *bp)
-{
- splassert(IPL_BIO);
-
- bzero((char *)bp, sizeof *bp);
- bp->b_vnbufs.le_next = NOLIST;
- bp->b_freelist.tqe_next = NOLIST;
- bp->b_synctime = time_uptime + 300;
- bp->b_dev = NODEV;
- LIST_INIT(&bp->b_dep);
-}
-
-/*
- * This is a non-sleeping expanded equivalent of getblk() that allocates only
- * the buffer structure, and not its contents.
- */
-struct buf *
-buf_stub(struct vnode *vp, daddr64_t lblkno)
-{
- struct buf *bp;
- int s;
-
- s = splbio();
- bp = pool_get(&bufpool, PR_NOWAIT);
- splx(s);
-
- if (bp == NULL)
- return (NULL);
-
- bzero((char *)bp, sizeof *bp);
- bp->b_vnbufs.le_next = NOLIST;
- bp->b_freelist.tqe_next = NOLIST;
- bp->b_synctime = time_uptime + 300;
- bp->b_dev = NODEV;
- bp->b_bufsize = 0;
- bp->b_data = NULL;
- bp->b_flags = 0;
- bp->b_dev = NODEV;
- bp->b_blkno = bp->b_lblkno = lblkno;
- bp->b_iodone = NULL;
- bp->b_error = 0;
- bp->b_resid = 0;
- bp->b_bcount = 0;
- bp->b_dirtyoff = bp->b_dirtyend = 0;
- bp->b_validoff = bp->b_validend = 0;
-
- LIST_INIT(&bp->b_dep);
-
- buf_acquire_unmapped(bp);
-
- s = splbio();
- LIST_INSERT_HEAD(&bufhead, bp, b_list);
- bcstats.numbufs++;
- bgetvp(vp, bp);
- splx(s);
-
- return (bp);
-}
-
-struct buf *
-buf_get(size_t size)
-{
- struct buf *bp;
- int npages;
-
- splassert(IPL_BIO);
-
- KASSERT(size > 0);
-
- size = round_page(size);
- npages = atop(size);
-
- if (bcstats.numbufpages + npages > bufpages)
- return (NULL);
-
- bp = pool_get(&bufpool, PR_WAITOK);
-
- buf_init(bp);
- bp->b_flags = B_INVAL;
- buf_alloc_pages(bp, size);
- bp->b_data = NULL;
- binsheadfree(bp, &bufqueues[BQ_CLEAN]);
- binshash(bp, &invalhash);
- LIST_INSERT_HEAD(&bufhead, bp, b_list);
- bcstats.numbufs++;
- bcstats.freebufs++;
- bcstats.numcleanpages += atop(bp->b_bufsize);
-
- return (bp);
-}
-
-void
buf_put(struct buf *bp)
{
splassert(IPL_BIO);
@@ -277,6 +182,7 @@ buf_put(struct buf *bp)
panic("buf_put: b_dep is not empty");
#endif
+ bremhash(bp);
LIST_REMOVE(bp, b_list);
bcstats.numbufs--;
@@ -438,20 +344,33 @@ breadn(struct vnode *vp, daddr64_t blkno, int size, daddr64_t rablks[],
void
bread_cluster_callback(struct buf *bp)
{
+ struct buf **xbpp = bp->b_saveaddr;
int i;
- struct buf **xbpp;
- xbpp = (struct buf **)bp->b_saveaddr;
+ if (xbpp[1] != NULL) {
+ size_t newsize = xbpp[1]->b_bufsize;
- for (i = 0; xbpp[i] != 0; i++) {
+ /*
+ * Shrink this buffer to only cover its part of the total I/O.
+ */
+ buf_shrink_mem(bp, newsize);
+ bp->b_bcount = newsize;
+ }
+
+ for (i = 1; xbpp[i] != 0; i++) {
if (ISSET(bp->b_flags, B_ERROR))
SET(xbpp[i]->b_flags, B_INVAL | B_ERROR);
biodone(xbpp[i]);
}
free(xbpp, M_TEMP);
- bp->b_pobj = NULL;
- buf_put(bp);
+
+ if (ISSET(bp->b_flags, B_ASYNC)) {
+ brelse(bp);
+ } else {
+ CLR(bp->b_flags, B_WANTED);
+ wakeup(bp);
+ }
}
int
@@ -464,14 +383,14 @@ bread_cluster(struct vnode *vp, daddr64_t blkno, int size, struct buf **rbpp)
*rbpp = bio_doread(vp, blkno, size, 0);
if (size != round_page(size))
- return (biowait(*rbpp));
+ goto out;
if (VOP_BMAP(vp, blkno + 1, NULL, &sblkno, &maxra))
- return (biowait(*rbpp));
+ goto out;
maxra++;
if (sblkno == -1 || maxra < 2)
- return (biowait(*rbpp));
+ goto out;
howmany = MAXPHYS / size;
if (howmany > maxra)
@@ -479,66 +398,60 @@ bread_cluster(struct vnode *vp, daddr64_t blkno, int size, struct buf **rbpp)
xbpp = malloc((howmany + 1) * sizeof(struct buf *), M_TEMP, M_NOWAIT);
if (xbpp == NULL)
- return (biowait(*rbpp));
+ goto out;
- for (i = 0; i < howmany; i++) {
- if (incore(vp, blkno + i + 1)) {
- for (--i; i >= 0; i--) {
- SET(xbpp[i]->b_flags, B_INVAL);
- brelse(xbpp[i]);
- }
- free(xbpp, M_TEMP);
- return (biowait(*rbpp));
- }
- xbpp[i] = buf_stub(vp, blkno + i + 1);
+ for (i = howmany - 1; i >= 0; i--) {
+ size_t sz;
+
+ /*
+ * First buffer allocates big enough size to cover what
+ * all the other buffers need.
+ */
+ sz = i == 0 ? howmany * size : 0;
+
+ xbpp[i] = buf_get(vp, blkno + i + 1, sz);
if (xbpp[i] == NULL) {
- for (--i; i >= 0; i--) {
+ for (++i; i < howmany; i++) {
SET(xbpp[i]->b_flags, B_INVAL);
brelse(xbpp[i]);
}
free(xbpp, M_TEMP);
- return (biowait(*rbpp));
+ goto out;
}
}
- xbpp[howmany] = 0;
+ bp = xbpp[0];
- bp = getnewbuf(howmany * size, 0, 0, NULL);
- if (bp == NULL) {
- for (i = 0; i < howmany; i++) {
- SET(xbpp[i]->b_flags, B_INVAL);
- brelse(xbpp[i]);
- }
- free(xbpp, M_TEMP);
- return (biowait(*rbpp));
- }
+ xbpp[howmany] = 0;
inc = btodb(size);
- for (i = 0; i < howmany; i++) {
+ for (i = 1; i < howmany; i++) {
bcstats.pendingreads++;
bcstats.numreads++;
SET(xbpp[i]->b_flags, B_READ | B_ASYNC);
- binshash(xbpp[i], BUFHASH(vp, xbpp[i]->b_lblkno));
xbpp[i]->b_blkno = sblkno + (i * inc);
xbpp[i]->b_bufsize = xbpp[i]->b_bcount = size;
xbpp[i]->b_data = NULL;
xbpp[i]->b_pobj = bp->b_pobj;
xbpp[i]->b_poffs = bp->b_poffs + (i * size);
- buf_acquire_unmapped(xbpp[i]);
}
+ KASSERT(bp->b_lblkno == blkno + 1);
+ KASSERT(bp->b_vp == vp);
+
bp->b_blkno = sblkno;
- bp->b_lblkno = blkno + 1;
SET(bp->b_flags, B_READ | B_ASYNC | B_CALL);
+
bp->b_saveaddr = (void *)xbpp;
bp->b_iodone = bread_cluster_callback;
- bp->b_vp = vp;
+
bcstats.pendingreads++;
bcstats.numreads++;
VOP_STRATEGY(bp);
curproc->p_stats->p_ru.ru_inblock++;
+out:
return (biowait(*rbpp));
}
@@ -738,7 +651,6 @@ brelse(struct buf *bp)
struct bqueues *bufq;
int s;
- /* Block disk interrupts. */
s = splbio();
if (bp->b_data != NULL)
@@ -766,6 +678,8 @@ brelse(struct buf *bp)
if (bp->b_vp)
brelvp(bp);
+ bremhash(bp);
+ binshash(bp, &invalhash);
/*
* If the buffer has no associated data, place it back in the
@@ -824,7 +738,7 @@ brelse(struct buf *bp)
/* Wake up any processes waiting for any buffer to become free. */
if (needbuffer) {
needbuffer--;
- wakeup_one(&needbuffer);
+ wakeup(&needbuffer);
}
/* Wake up any processes waiting for _this_ buffer to become free. */
@@ -866,8 +780,7 @@ incore(struct vnode *vp, daddr64_t blkno)
struct buf *
getblk(struct vnode *vp, daddr64_t blkno, int size, int slpflag, int slptimeo)
{
- struct bufhashhdr *bh;
- struct buf *bp, *nb = NULL;
+ struct buf *bp;
int s, error;
/*
@@ -880,7 +793,6 @@ getblk(struct vnode *vp, daddr64_t blkno, int size, int slpflag, int slptimeo)
* case, we can't allow the system to allocate a new buffer for
* the block until the write is finished.
*/
- bh = BUFHASH(vp, blkno);
start:
LIST_FOREACH(bp, BUFHASH(vp, blkno), b_hash) {
if (bp->b_lblkno != blkno || bp->b_vp != vp)
@@ -888,12 +800,6 @@ start:
s = splbio();
if (ISSET(bp->b_flags, B_BUSY)) {
- if (nb != NULL) {
- SET(nb->b_flags, B_INVAL);
- binshash(nb, &invalhash);
- brelse(nb);
- nb = NULL;
- }
SET(bp->b_flags, B_WANTED);
error = tsleep(bp, slpflag | (PRIBIO + 1), "getblk",
slptimeo);
@@ -909,36 +815,14 @@ start:
SET(bp->b_flags, B_CACHE);
buf_acquire(bp);
splx(s);
- break;
+ return (bp);
}
splx(s);
}
- if (nb && bp) {
- SET(nb->b_flags, B_INVAL);
- binshash(nb, &invalhash);
- brelse(nb);
- nb = NULL;
- }
- if (bp == NULL && nb == NULL) {
- nb = getnewbuf(size, slpflag, slptimeo, &error);
- if (nb == NULL) {
- if (error == ERESTART || error == EINTR)
- return (NULL);
- }
+
+ if ((bp = buf_get(vp, blkno, size)) == NULL)
goto start;
- }
- if (nb) {
- bp = nb;
- binshash(bp, bh);
- bp->b_blkno = bp->b_lblkno = blkno;
- s = splbio();
- bgetvp(vp, bp);
- splx(s);
- }
-#ifdef DIAGNOSTIC
- if (!ISSET(bp->b_flags, B_BUSY))
- panic("getblk buffer not B_BUSY");
-#endif
+
return (bp);
}
@@ -950,86 +834,118 @@ geteblk(int size)
{
struct buf *bp;
- while ((bp = getnewbuf(size, 0, 0, NULL)) == NULL)
+ while ((bp = buf_get(NULL, 0, size)) == NULL)
;
- SET(bp->b_flags, B_INVAL);
- binshash(bp, &invalhash);
return (bp);
}
/*
- * Find a buffer which is available for use.
+ * Allocate a buffer.
*/
struct buf *
-getnewbuf(size_t size, int slpflag, int slptimeo, int *ep)
+buf_get(struct vnode *vp, daddr64_t blkno, size_t size)
{
struct buf *bp;
+ int poolwait = size == 0 ? PR_NOWAIT : PR_WAITOK;
+ int npages;
int s;
-#if 0 /* we would really like this but sblock update kills it */
- KASSERT(curproc != syncerproc && curproc != cleanerproc);
-#endif
-
s = splbio();
- /*
- * Wake up cleaner if we're getting low on pages.
- */
- if (bcstats.numdirtypages >= hidirtypages || bcstats.numcleanpages <= locleanpages)
- wakeup(&bd_req);
+ if (size) {
+ /*
+ * Wake up cleaner if we're getting low on pages.
+ */
+ if (bcstats.numdirtypages >= hidirtypages ||
+ bcstats.numcleanpages <= locleanpages)
+ wakeup(&bd_req);
- /*
- * If we're above the high water mark for clean pages,
- * free down to the low water mark.
- */
- if (bcstats.numcleanpages > hicleanpages) {
- while (bcstats.numcleanpages > locleanpages) {
- bp = TAILQ_FIRST(&bufqueues[BQ_CLEAN]);
- bremfree(bp);
- if (bp->b_vp)
- brelvp(bp);
- bremhash(bp);
- buf_put(bp);
+ /*
+ * If we're above the high water mark for clean pages,
+ * free down to the low water mark.
+ */
+ if (bcstats.numcleanpages > hicleanpages) {
+ while (bcstats.numcleanpages > locleanpages) {
+ bp = TAILQ_FIRST(&bufqueues[BQ_CLEAN]);
+ bremfree(bp);
+ if (bp->b_vp)
+ brelvp(bp);
+ buf_put(bp);
+ }
+ }
+
+ npages = atop(round_page(size));
+
+ /*
+ * Free some buffers until we have enough space.
+ */
+ while (bcstats.numbufpages + npages > bufpages) {
+ int freemax = 5;
+ int i = freemax;
+ while ((bp = TAILQ_FIRST(&bufqueues[BQ_CLEAN])) && i--) {
+ bremfree(bp);
+ if (bp->b_vp)
+ brelvp(bp);
+ buf_put(bp);
+ }
+ if (freemax == i) {
+ needbuffer++;
+ tsleep(&needbuffer, PRIBIO, "needbuffer", 0);
+ splx(s);
+ return (NULL);
+ }
}
}
- /* we just ask. it can say no.. */
-getsome:
- bp = buf_get(size);
+ bp = pool_get(&bufpool, poolwait|PR_ZERO);
+
if (bp == NULL) {
- int freemax = 5;
- int i = freemax;
- while ((bp = TAILQ_FIRST(&bufqueues[BQ_CLEAN])) && i--) {
- bremfree(bp);
- if (bp->b_vp)
- brelvp(bp);
- bremhash(bp);
- buf_put(bp);
- }
- if (freemax != i)
- goto getsome;
splx(s);
return (NULL);
}
- bremfree(bp);
- /* Buffer is no longer on free lists. */
- bp->b_flags = 0;
- buf_acquire(bp);
-
- splx(s);
-
- /* clear out various other fields */
+ bp->b_freelist.tqe_next = NOLIST;
+ bp->b_synctime = time_uptime + 300;
bp->b_dev = NODEV;
- bp->b_blkno = bp->b_lblkno = 0;
- bp->b_iodone = NULL;
- bp->b_error = 0;
- bp->b_resid = 0;
+ LIST_INIT(&bp->b_dep);
bp->b_bcount = size;
- bp->b_dirtyoff = bp->b_dirtyend = 0;
- bp->b_validoff = bp->b_validend = 0;
- bremhash(bp);
+ buf_acquire_unmapped(bp);
+
+ if (vp != NULL) {
+ /*
+ * We insert the buffer into the hash with B_BUSY set
+ * while we allocate pages for it. This way any getblk
+ * that happens while we allocate pages will wait for
+ * this buffer instead of starting its own guf_get.
+ *
+ * But first, we check if someone beat us to it.
+ */
+ if (incore(vp, blkno)) {
+ pool_put(&bufpool, bp);
+ splx(s);
+ return (NULL);
+ }
+
+ bp->b_blkno = bp->b_lblkno = blkno;
+ bgetvp(vp, bp);
+ binshash(bp, BUFHASH(vp, blkno));
+ } else {
+ bp->b_vnbufs.le_next = NOLIST;
+ SET(bp->b_flags, B_INVAL);
+ binshash(bp, &invalhash);
+ }
+
+ LIST_INSERT_HEAD(&bufhead, bp, b_list);
+ bcstats.numbufs++;
+
+ if (size) {
+ buf_alloc_pages(bp, round_page(size));
+ buf_map(bp);
+ }
+
+ splx(s);
+
return (bp);
}
diff --git a/sys/kern/vfs_biomem.c b/sys/kern/vfs_biomem.c
index 671921dbc4c..708c06f4d61 100644
--- a/sys/kern/vfs_biomem.c
+++ b/sys/kern/vfs_biomem.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: vfs_biomem.c,v 1.4 2008/11/08 23:20:50 pedro Exp $ */
+/* $OpenBSD: vfs_biomem.c,v 1.5 2009/04/22 13:12:26 art Exp $ */
/*
* Copyright (c) 2007 Artur Grabowski <art@openbsd.org>
*
@@ -75,7 +75,6 @@ buf_mem_init(vsize_t size)
void
buf_acquire(struct buf *bp)
{
- vaddr_t va;
int s;
KASSERT((bp->b_flags & B_BUSY) == 0);
@@ -85,6 +84,32 @@ buf_acquire(struct buf *bp)
* Busy before waiting for kvm.
*/
SET(bp->b_flags, B_BUSY);
+ buf_map(bp);
+
+ splx(s);
+}
+
+/*
+ * Busy a buffer, but don't map it.
+ * If it has a mapping, we keep it, but we also keep the mapping on
+ * the list since we assume that it won't be used anymore.
+ */
+void
+buf_acquire_unmapped(struct buf *bp)
+{
+ int s;
+
+ s = splbio();
+ SET(bp->b_flags, B_BUSY|B_NOTMAPPED);
+ splx(s);
+}
+
+void
+buf_map(struct buf *bp)
+{
+ vaddr_t va;
+
+ splassert(IPL_BIO);
if (bp->b_data == NULL) {
unsigned long i;
@@ -123,22 +148,8 @@ buf_acquire(struct buf *bp)
} else {
TAILQ_REMOVE(&buf_valist, bp, b_valist);
}
- splx(s);
-}
-/*
- * Busy a buffer, but don't map it.
- * If it has a mapping, we keep it, but we also keep the mapping on
- * the list since we assume that it won't be used anymore.
- */
-void
-buf_acquire_unmapped(struct buf *bp)
-{
- int s;
-
- s = splbio();
- SET(bp->b_flags, B_BUSY|B_NOTMAPPED);
- splx(s);
+ CLR(bp->b_flags, B_NOTMAPPED);
}
void
@@ -209,6 +220,18 @@ buf_dealloc_mem(struct buf *bp)
return (1);
}
+void
+buf_shrink_mem(struct buf *bp, vsize_t newsize)
+{
+ vaddr_t va = (vaddr_t)bp->b_data;
+
+ if (newsize < bp->b_bufsize) {
+ pmap_kremove(va + newsize, bp->b_bufsize - newsize);
+ pmap_update(pmap_kernel());
+ bp->b_bufsize = newsize;
+ }
+}
+
vaddr_t
buf_unmap(struct buf *bp)
{
diff --git a/sys/sys/buf.h b/sys/sys/buf.h
index 330cc90d614..c3898a15616 100644
--- a/sys/sys/buf.h
+++ b/sys/sys/buf.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: buf.h,v 1.60 2008/06/12 06:58:40 deraadt Exp $ */
+/* $OpenBSD: buf.h,v 1.61 2009/04/22 13:12:26 art Exp $ */
/* $NetBSD: buf.h,v 1.25 1997/04/09 21:12:17 mycroft Exp $ */
/*
@@ -247,8 +247,10 @@ struct buf *incore(struct vnode *, daddr64_t);
void buf_mem_init(vsize_t);
void buf_acquire(struct buf *);
void buf_acquire_unmapped(struct buf *);
+void buf_map(struct buf *);
void buf_release(struct buf *);
int buf_dealloc_mem(struct buf *);
+void buf_shrink_mem(struct buf *, vsize_t);
void buf_alloc_pages(struct buf *, vsize_t);
void buf_free_pages(struct buf *);