summaryrefslogtreecommitdiff
path: root/sys/ufs/ffs/ffs_softdep.c
diff options
context:
space:
mode:
authorArtur Grabowski <art@cvs.openbsd.org>2001-11-27 05:27:13 +0000
committerArtur Grabowski <art@cvs.openbsd.org>2001-11-27 05:27:13 +0000
commit8a1845e49f56720cbfccd4c7f5f80ba5b980fdf4 (patch)
treed4a522dc41cdc79ba48fe761e94663b795da8cc0 /sys/ufs/ffs/ffs_softdep.c
parent0d68e9b5af14f4bfa04d22dbebab5972ac647b26 (diff)
Merge in the unified buffer cache code as found in NetBSD 2001/03/10. The
code is written mostly by Chuck Silvers <chuq@chuq.com>/<chs@netbsd.org>. Tested for the past few weeks by many developers, should be in a pretty stable state, but will require optimizations and additional cleanups.
Diffstat (limited to 'sys/ufs/ffs/ffs_softdep.c')
-rw-r--r--sys/ufs/ffs/ffs_softdep.c274
1 files changed, 261 insertions, 13 deletions
diff --git a/sys/ufs/ffs/ffs_softdep.c b/sys/ufs/ffs/ffs_softdep.c
index 1d66094cc06..7a66eed4d8b 100644
--- a/sys/ufs/ffs/ffs_softdep.c
+++ b/sys/ufs/ffs/ffs_softdep.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: ffs_softdep.c,v 1.25 2001/11/13 14:19:24 art Exp $ */
+/* $OpenBSD: ffs_softdep.c,v 1.26 2001/11/27 05:27:12 art Exp $ */
/*
* Copyright 1998, 2000 Marshall Kirk McKusick. All Rights Reserved.
*
@@ -56,6 +56,7 @@
#include <sys/malloc.h>
#include <sys/mount.h>
#include <sys/proc.h>
+#include <sys/pool.h>
#include <sys/syslog.h>
#include <sys/systm.h>
#include <sys/vnode.h>
@@ -69,6 +70,10 @@
#include <ufs/ffs/ffs_extern.h>
#include <ufs/ufs/ufs_extern.h>
+#include <uvm/uvm.h>
+struct pool sdpcpool;
+int softdep_lockedbufs;
+
#define STATIC
/*
@@ -109,6 +114,13 @@ extern char *memname[];
*/
/*
+ * Definitions for page cache info hashtable.
+ */
+#define PCBPHASHSIZE 1024
+LIST_HEAD(, buf) pcbphashhead[PCBPHASHSIZE];
+#define PCBPHASH(vp, lbn) ((((vaddr_t)(vp) >> 8) ^ (lbn)) & (PCBPHASHSIZE - 1))
+
+/*
* Internal function prototypes.
*/
STATIC void softdep_error __P((char *, int));
@@ -160,6 +172,13 @@ STATIC void pause_timer __P((void *));
STATIC int request_cleanup __P((int, int));
STATIC int process_worklist_item __P((struct mount *, int));
STATIC void add_to_worklist __P((struct worklist *));
+STATIC struct buf *softdep_setup_pagecache __P((struct inode *, ufs_lbn_t,
+ long));
+STATIC void softdep_collect_pagecache __P((struct inode *));
+STATIC void softdep_free_pagecache __P((struct inode *));
+STATIC struct vnode *softdep_lookupvp(struct fs *, ino_t);
+STATIC struct buf *softdep_lookup_pcbp __P((struct vnode *, ufs_lbn_t));
+void softdep_pageiodone __P((struct buf *));
/*
* Exported softdep operations.
@@ -176,6 +195,7 @@ struct bio_ops bioops = {
softdep_deallocate_dependencies, /* io_deallocate */
softdep_move_dependencies, /* io_movedeps */
softdep_count_dependencies, /* io_countdeps */
+ softdep_pageiodone, /* io_pagedone */
};
/*
@@ -1055,6 +1075,7 @@ top:
void
softdep_initialize()
{
+ int i;
LIST_INIT(&mkdirlisthd);
LIST_INIT(&softdep_workitem_pending);
@@ -1073,6 +1094,11 @@ softdep_initialize()
newblk_hashtbl = hashinit(64, M_NEWBLK, M_WAITOK, &newblk_hash);
sema_init(&newblk_in_progress, "newblk", PRIBIO, 0);
timeout_set(&proc_waiting_timeout, pause_timer, 0);
+ pool_init(&sdpcpool, sizeof(struct buf), 0, 0, 0, "sdpcpool",
+ 0, pool_page_alloc_nointr, pool_page_free_nointr, M_TEMP);
+ for (i = 0; i < PCBPHASHSIZE; i++) {
+ LIST_INIT(&pcbphashhead[i]);
+ }
}
/*
@@ -1325,11 +1351,16 @@ softdep_setup_allocdirect(ip, lbn, newblkno, oldblkno, newsize, oldsize, bp)
LIST_REMOVE(newblk, nb_hash);
FREE(newblk, M_NEWBLK);
+ /*
+ * If we were not passed a bp to attach the dep to,
+ * then this must be for a regular file.
+ * Allocate a buffer to represent the page cache pages
+ * that are the real dependency. The pages themselves
+ * cannot refer to the dependency since we don't want to
+ * add a field to struct vm_page for this.
+ */
if (bp == NULL) {
- /*
- * XXXUBC - Yes, I know how to fix this, but not right now.
- */
- panic("softdep_setup_allocdirect: Bonk art in the head\n");
+ bp = softdep_setup_pagecache(ip, lbn, newsize);
}
WORKLIST_INSERT(&bp->b_dep, &adp->ad_list);
if (lbn >= NDADDR) {
@@ -1563,10 +1594,7 @@ softdep_setup_allocindir_page(ip, lbn, bp, ptrno, newblkno, oldblkno, nbp)
pagedep_lookup(ip, lbn, DEPALLOC, &pagedep) == 0)
WORKLIST_INSERT(&nbp->b_dep, &pagedep->pd_list);
if (nbp == NULL) {
- /*
- * XXXUBC - Yes, I know how to fix this, but not right now.
- */
- panic("softdep_setup_allocindir_page: Bonk art in the head\n");
+ nbp = softdep_setup_pagecache(ip, lbn, ip->i_fs->fs_bsize);
}
WORKLIST_INSERT(&nbp->b_dep, &aip->ai_list);
FREE_LOCK(&lk);
@@ -1745,6 +1773,7 @@ softdep_setup_freeblocks(ip, length)
int i, delay, error;
fs = ip->i_fs;
+ vp = ITOV(ip);
if (length != 0)
panic("softdep_setup_freeblocks: non-zero length");
MALLOC(freeblks, struct freeblks *, sizeof(struct freeblks),
@@ -1804,9 +1833,15 @@ softdep_setup_freeblocks(ip, length)
* with this inode are obsolete and can simply be de-allocated.
* We must first merge the two dependency lists to get rid of
* any duplicate freefrag structures, then purge the merged list.
+ * We must remove any pagecache markers from the pagecache
+ * hashtable first because any I/Os in flight will want to see
+ * dependencies attached to their pagecache markers. We cannot
+ * free the pagecache markers until after we've freed all the
+ * dependencies that reference them later.
* If we still have a bitmap dependency, then the inode has never
* been written to disk, so we can free any fragments without delay.
*/
+ softdep_collect_pagecache(ip);
merge_inode_lists(inodedep);
while ((adp = TAILQ_FIRST(&inodedep->id_inoupdt)) != 0)
free_allocdirect(&inodedep->id_inoupdt, adp, delay);
@@ -1818,7 +1853,6 @@ softdep_setup_freeblocks(ip, length)
* Once they are all there, walk the list and get rid of
* any dependencies.
*/
- vp = ITOV(ip);
ACQUIRE_LOCK(&lk);
drain_output(vp, 1);
while (getdirtybuf(&LIST_FIRST(&vp->v_dirtyblkhd), MNT_WAIT)) {
@@ -1830,6 +1864,7 @@ softdep_setup_freeblocks(ip, length)
brelse(bp);
ACQUIRE_LOCK(&lk);
}
+ softdep_free_pagecache(ip);
if (inodedep_lookup(fs, ip->i_number, 0, &inodedep) != 0)
(void) free_inodedep(inodedep);
FREE_LOCK(&lk);
@@ -2898,7 +2933,6 @@ handle_workitem_freefile(freefile)
struct freefile *freefile;
{
struct fs *fs;
- struct vnode vp;
struct inode tip;
struct inodedep *idp;
int error;
@@ -2914,8 +2948,7 @@ handle_workitem_freefile(freefile)
tip.i_devvp = freefile->fx_devvp;
tip.i_dev = freefile->fx_devvp->v_rdev;
tip.i_fs = fs;
- tip.i_vnode = &vp;
- vp.v_data = &tip;
+ tip.i_vnode = NULL;
if ((error = ffs_freefile(&tip, freefile->fx_oldinum,
freefile->fx_mode)) != 0) {
@@ -4313,6 +4346,7 @@ flush_inodedep_deps(fs, ino)
struct allocdirect *adp;
int error, waitfor;
struct buf *bp;
+ struct vnode *vp;
/*
* This work is done in two passes. The first pass grabs most
@@ -4332,6 +4366,27 @@ flush_inodedep_deps(fs, ino)
ACQUIRE_LOCK(&lk);
if (inodedep_lookup(fs, ino, 0, &inodedep) == 0)
return (0);
+
+ /*
+ * When file data was in the buffer cache,
+ * softdep_sync_metadata() would start i/o on
+ * file data buffers itself. But now that
+ * we're using the page cache to hold file data,
+ * we need something else to trigger those flushes.
+ * let's just do it here.
+ */
+
+ vp = softdep_lookupvp(fs, ino);
+ if (vp) {
+ struct uvm_object *uobj = &vp->v_uvm.u_obj;
+
+ simple_lock(&uobj->vmobjlock);
+ (uobj->pgops->pgo_flush)(uobj, 0, 0,
+ PGO_ALLPAGES|PGO_CLEANIT|
+ (waitfor == MNT_NOWAIT ? 0: PGO_SYNCIO));
+ simple_unlock(&uobj->vmobjlock);
+ }
+
TAILQ_FOREACH(adp, &inodedep->id_inoupdt, ad_next) {
if (adp->ad_state & DEPCOMPLETE)
continue;
@@ -4944,3 +4999,196 @@ softdep_error(func, error)
/* XXX should do something better! */
printf("%s: got error %d while accessing filesystem\n", func, error);
}
+
+/*
+ * Allocate a buffer on which to attach a dependency.
+ */
+STATIC struct buf *
+softdep_setup_pagecache(ip, lbn, size)
+ struct inode *ip;
+ ufs_lbn_t lbn;
+ long size;
+{
+ struct vnode *vp = ITOV(ip);
+ struct buf *bp;
+ int s;
+
+ /*
+ * Enter pagecache dependency buf in hash.
+ */
+
+ bp = softdep_lookup_pcbp(vp, lbn);
+ if (bp == NULL) {
+ s = splbio();
+ bp = pool_get(&sdpcpool, PR_WAITOK);
+ splx(s);
+
+ bp->b_vp = vp;
+ bp->b_lblkno = lbn;
+ bp->b_bcount = bp->b_resid = size;
+ LIST_INIT(&bp->b_dep);
+ LIST_INSERT_HEAD(&pcbphashhead[PCBPHASH(vp, lbn)], bp, b_hash);
+ LIST_INSERT_HEAD(&ip->i_pcbufhd, bp, b_vnbufs);
+ } else {
+ KASSERT(size >= bp->b_bcount);
+ bp->b_resid += size - bp->b_bcount;
+ bp->b_bcount = size;
+ }
+ return bp;
+}
+
+/*
+ * softdep_collect_pagecache() and softdep_free_pagecache()
+ * are used to remove page cache dependency buffers when
+ * a file is being truncated to 0.
+ */
+
+STATIC void
+softdep_collect_pagecache(ip)
+ struct inode *ip;
+{
+ struct buf *bp;
+
+ LIST_FOREACH(bp, &ip->i_pcbufhd, b_vnbufs) {
+ LIST_REMOVE(bp, b_hash);
+ }
+}
+
+STATIC void
+softdep_free_pagecache(ip)
+ struct inode *ip;
+{
+ struct buf *bp, *nextbp;
+
+ for (bp = LIST_FIRST(&ip->i_pcbufhd); bp != NULL; bp = nextbp) {
+ nextbp = LIST_NEXT(bp, b_vnbufs);
+ LIST_REMOVE(bp, b_vnbufs);
+ KASSERT(LIST_FIRST(&bp->b_dep) == NULL);
+ pool_put(&sdpcpool, bp);
+ }
+}
+
+STATIC struct vnode *
+softdep_lookupvp(fs, ino)
+ struct fs *fs;
+ ino_t ino;
+{
+ struct mount *mp;
+ extern struct vfsops ffs_vfsops;
+
+ CIRCLEQ_FOREACH(mp, &mountlist, mnt_list) {
+ if (mp->mnt_op == &ffs_vfsops &&
+ VFSTOUFS(mp)->um_fs == fs) {
+ break;
+ }
+ }
+ if (mp == NULL) {
+ return NULL;
+ }
+ return ufs_ihashlookup(VFSTOUFS(mp)->um_dev, ino);
+}
+
+STATIC struct buf *
+softdep_lookup_pcbp(vp, lbn)
+ struct vnode *vp;
+ ufs_lbn_t lbn;
+{
+ struct buf *bp;
+
+ LIST_FOREACH(bp, &pcbphashhead[PCBPHASH(vp, lbn)], b_hash) {
+ if (bp->b_vp == vp && bp->b_lblkno == lbn) {
+ break;
+ }
+ }
+ return bp;
+}
+
+/*
+ * Do softdep i/o completion processing for page cache writes.
+ */
+
+void
+softdep_pageiodone(bp)
+ struct buf *bp;
+{
+ int npages = bp->b_bufsize >> PAGE_SHIFT;
+ struct vnode *vp = bp->b_vp;
+ struct vm_page *pg;
+ struct buf *pcbp = NULL;
+ struct allocdirect *adp;
+ struct allocindir *aip;
+ struct worklist *wk;
+ ufs_lbn_t lbn;
+ voff_t off;
+ long iosize = bp->b_bcount;
+ int size, asize, bshift, bsize;
+ int i;
+
+ KASSERT(!(bp->b_flags & B_READ));
+ bshift = vp->v_mount->mnt_fs_bshift;
+ bsize = 1 << bshift;
+ asize = min(PAGE_SIZE, bsize);
+ ACQUIRE_LOCK(&lk);
+ for (i = 0; i < npages; i++) {
+ pg = uvm_pageratop((vaddr_t)bp->b_data + (i << PAGE_SHIFT));
+ if (pg == NULL) {
+ continue;
+ }
+
+ for (off = pg->offset;
+ off < pg->offset + PAGE_SIZE;
+ off += bsize) {
+ size = min(asize, iosize);
+ iosize -= size;
+ lbn = off >> bshift;
+ if (pcbp == NULL || pcbp->b_lblkno != lbn) {
+ pcbp = softdep_lookup_pcbp(vp, lbn);
+ }
+ if (pcbp == NULL) {
+ continue;
+ }
+ pcbp->b_resid -= size;
+ if (pcbp->b_resid < 0) {
+ panic("softdep_pageiodone: "
+ "resid < 0, vp %p lbn 0x%lx pcbp %p",
+ vp, lbn, pcbp);
+ }
+ if (pcbp->b_resid > 0) {
+ continue;
+ }
+
+ /*
+ * We've completed all the i/o for this block.
+ * mark the dep complete.
+ */
+
+ KASSERT(LIST_FIRST(&pcbp->b_dep) != NULL);
+ while ((wk = LIST_FIRST(&pcbp->b_dep))) {
+ WORKLIST_REMOVE(wk);
+ switch (wk->wk_type) {
+ case D_ALLOCDIRECT:
+ adp = WK_ALLOCDIRECT(wk);
+ adp->ad_state |= COMPLETE;
+ handle_allocdirect_partdone(adp);
+ break;
+
+ case D_ALLOCINDIR:
+ aip = WK_ALLOCINDIR(wk);
+ aip->ai_state |= COMPLETE;
+ handle_allocindir_partdone(aip);
+ break;
+
+ default:
+ panic("softdep_pageiodone: "
+ "bad type %d, pcbp %p wk %p",
+ wk->wk_type, pcbp, wk);
+ }
+ }
+ LIST_REMOVE(pcbp, b_hash);
+ LIST_REMOVE(pcbp, b_vnbufs);
+ pool_put(&sdpcpool, pcbp);
+ pcbp = NULL;
+ }
+ }
+ FREE_LOCK(&lk);
+}