diff options
author | Artur Grabowski <art@cvs.openbsd.org> | 2001-11-27 05:27:13 +0000 |
---|---|---|
committer | Artur Grabowski <art@cvs.openbsd.org> | 2001-11-27 05:27:13 +0000 |
commit | 8a1845e49f56720cbfccd4c7f5f80ba5b980fdf4 (patch) | |
tree | d4a522dc41cdc79ba48fe761e94663b795da8cc0 /sys/ufs/ffs/ffs_softdep.c | |
parent | 0d68e9b5af14f4bfa04d22dbebab5972ac647b26 (diff) |
Merge in the unified buffer cache code as found in NetBSD 2001/03/10. The
code is written mostly by Chuck Silvers <chuq@chuq.com>/<chs@netbsd.org>.
Tested for the past few weeks by many developers, should be in a pretty stable
state, but will require optimizations and additional cleanups.
Diffstat (limited to 'sys/ufs/ffs/ffs_softdep.c')
-rw-r--r-- | sys/ufs/ffs/ffs_softdep.c | 274 |
1 files changed, 261 insertions, 13 deletions
diff --git a/sys/ufs/ffs/ffs_softdep.c b/sys/ufs/ffs/ffs_softdep.c index 1d66094cc06..7a66eed4d8b 100644 --- a/sys/ufs/ffs/ffs_softdep.c +++ b/sys/ufs/ffs/ffs_softdep.c @@ -1,4 +1,4 @@ -/* $OpenBSD: ffs_softdep.c,v 1.25 2001/11/13 14:19:24 art Exp $ */ +/* $OpenBSD: ffs_softdep.c,v 1.26 2001/11/27 05:27:12 art Exp $ */ /* * Copyright 1998, 2000 Marshall Kirk McKusick. All Rights Reserved. * @@ -56,6 +56,7 @@ #include <sys/malloc.h> #include <sys/mount.h> #include <sys/proc.h> +#include <sys/pool.h> #include <sys/syslog.h> #include <sys/systm.h> #include <sys/vnode.h> @@ -69,6 +70,10 @@ #include <ufs/ffs/ffs_extern.h> #include <ufs/ufs/ufs_extern.h> +#include <uvm/uvm.h> +struct pool sdpcpool; +int softdep_lockedbufs; + #define STATIC /* @@ -109,6 +114,13 @@ extern char *memname[]; */ /* + * Definitions for page cache info hashtable. + */ +#define PCBPHASHSIZE 1024 +LIST_HEAD(, buf) pcbphashhead[PCBPHASHSIZE]; +#define PCBPHASH(vp, lbn) ((((vaddr_t)(vp) >> 8) ^ (lbn)) & (PCBPHASHSIZE - 1)) + +/* * Internal function prototypes. */ STATIC void softdep_error __P((char *, int)); @@ -160,6 +172,13 @@ STATIC void pause_timer __P((void *)); STATIC int request_cleanup __P((int, int)); STATIC int process_worklist_item __P((struct mount *, int)); STATIC void add_to_worklist __P((struct worklist *)); +STATIC struct buf *softdep_setup_pagecache __P((struct inode *, ufs_lbn_t, + long)); +STATIC void softdep_collect_pagecache __P((struct inode *)); +STATIC void softdep_free_pagecache __P((struct inode *)); +STATIC struct vnode *softdep_lookupvp(struct fs *, ino_t); +STATIC struct buf *softdep_lookup_pcbp __P((struct vnode *, ufs_lbn_t)); +void softdep_pageiodone __P((struct buf *)); /* * Exported softdep operations. @@ -176,6 +195,7 @@ struct bio_ops bioops = { softdep_deallocate_dependencies, /* io_deallocate */ softdep_move_dependencies, /* io_movedeps */ softdep_count_dependencies, /* io_countdeps */ + softdep_pageiodone, /* io_pagedone */ }; /* @@ -1055,6 +1075,7 @@ top: void softdep_initialize() { + int i; LIST_INIT(&mkdirlisthd); LIST_INIT(&softdep_workitem_pending); @@ -1073,6 +1094,11 @@ softdep_initialize() newblk_hashtbl = hashinit(64, M_NEWBLK, M_WAITOK, &newblk_hash); sema_init(&newblk_in_progress, "newblk", PRIBIO, 0); timeout_set(&proc_waiting_timeout, pause_timer, 0); + pool_init(&sdpcpool, sizeof(struct buf), 0, 0, 0, "sdpcpool", + 0, pool_page_alloc_nointr, pool_page_free_nointr, M_TEMP); + for (i = 0; i < PCBPHASHSIZE; i++) { + LIST_INIT(&pcbphashhead[i]); + } } /* @@ -1325,11 +1351,16 @@ softdep_setup_allocdirect(ip, lbn, newblkno, oldblkno, newsize, oldsize, bp) LIST_REMOVE(newblk, nb_hash); FREE(newblk, M_NEWBLK); + /* + * If we were not passed a bp to attach the dep to, + * then this must be for a regular file. + * Allocate a buffer to represent the page cache pages + * that are the real dependency. The pages themselves + * cannot refer to the dependency since we don't want to + * add a field to struct vm_page for this. + */ if (bp == NULL) { - /* - * XXXUBC - Yes, I know how to fix this, but not right now. - */ - panic("softdep_setup_allocdirect: Bonk art in the head\n"); + bp = softdep_setup_pagecache(ip, lbn, newsize); } WORKLIST_INSERT(&bp->b_dep, &adp->ad_list); if (lbn >= NDADDR) { @@ -1563,10 +1594,7 @@ softdep_setup_allocindir_page(ip, lbn, bp, ptrno, newblkno, oldblkno, nbp) pagedep_lookup(ip, lbn, DEPALLOC, &pagedep) == 0) WORKLIST_INSERT(&nbp->b_dep, &pagedep->pd_list); if (nbp == NULL) { - /* - * XXXUBC - Yes, I know how to fix this, but not right now. - */ - panic("softdep_setup_allocindir_page: Bonk art in the head\n"); + nbp = softdep_setup_pagecache(ip, lbn, ip->i_fs->fs_bsize); } WORKLIST_INSERT(&nbp->b_dep, &aip->ai_list); FREE_LOCK(&lk); @@ -1745,6 +1773,7 @@ softdep_setup_freeblocks(ip, length) int i, delay, error; fs = ip->i_fs; + vp = ITOV(ip); if (length != 0) panic("softdep_setup_freeblocks: non-zero length"); MALLOC(freeblks, struct freeblks *, sizeof(struct freeblks), @@ -1804,9 +1833,15 @@ softdep_setup_freeblocks(ip, length) * with this inode are obsolete and can simply be de-allocated. * We must first merge the two dependency lists to get rid of * any duplicate freefrag structures, then purge the merged list. + * We must remove any pagecache markers from the pagecache + * hashtable first because any I/Os in flight will want to see + * dependencies attached to their pagecache markers. We cannot + * free the pagecache markers until after we've freed all the + * dependencies that reference them later. * If we still have a bitmap dependency, then the inode has never * been written to disk, so we can free any fragments without delay. */ + softdep_collect_pagecache(ip); merge_inode_lists(inodedep); while ((adp = TAILQ_FIRST(&inodedep->id_inoupdt)) != 0) free_allocdirect(&inodedep->id_inoupdt, adp, delay); @@ -1818,7 +1853,6 @@ softdep_setup_freeblocks(ip, length) * Once they are all there, walk the list and get rid of * any dependencies. */ - vp = ITOV(ip); ACQUIRE_LOCK(&lk); drain_output(vp, 1); while (getdirtybuf(&LIST_FIRST(&vp->v_dirtyblkhd), MNT_WAIT)) { @@ -1830,6 +1864,7 @@ softdep_setup_freeblocks(ip, length) brelse(bp); ACQUIRE_LOCK(&lk); } + softdep_free_pagecache(ip); if (inodedep_lookup(fs, ip->i_number, 0, &inodedep) != 0) (void) free_inodedep(inodedep); FREE_LOCK(&lk); @@ -2898,7 +2933,6 @@ handle_workitem_freefile(freefile) struct freefile *freefile; { struct fs *fs; - struct vnode vp; struct inode tip; struct inodedep *idp; int error; @@ -2914,8 +2948,7 @@ handle_workitem_freefile(freefile) tip.i_devvp = freefile->fx_devvp; tip.i_dev = freefile->fx_devvp->v_rdev; tip.i_fs = fs; - tip.i_vnode = &vp; - vp.v_data = &tip; + tip.i_vnode = NULL; if ((error = ffs_freefile(&tip, freefile->fx_oldinum, freefile->fx_mode)) != 0) { @@ -4313,6 +4346,7 @@ flush_inodedep_deps(fs, ino) struct allocdirect *adp; int error, waitfor; struct buf *bp; + struct vnode *vp; /* * This work is done in two passes. The first pass grabs most @@ -4332,6 +4366,27 @@ flush_inodedep_deps(fs, ino) ACQUIRE_LOCK(&lk); if (inodedep_lookup(fs, ino, 0, &inodedep) == 0) return (0); + + /* + * When file data was in the buffer cache, + * softdep_sync_metadata() would start i/o on + * file data buffers itself. But now that + * we're using the page cache to hold file data, + * we need something else to trigger those flushes. + * let's just do it here. + */ + + vp = softdep_lookupvp(fs, ino); + if (vp) { + struct uvm_object *uobj = &vp->v_uvm.u_obj; + + simple_lock(&uobj->vmobjlock); + (uobj->pgops->pgo_flush)(uobj, 0, 0, + PGO_ALLPAGES|PGO_CLEANIT| + (waitfor == MNT_NOWAIT ? 0: PGO_SYNCIO)); + simple_unlock(&uobj->vmobjlock); + } + TAILQ_FOREACH(adp, &inodedep->id_inoupdt, ad_next) { if (adp->ad_state & DEPCOMPLETE) continue; @@ -4944,3 +4999,196 @@ softdep_error(func, error) /* XXX should do something better! */ printf("%s: got error %d while accessing filesystem\n", func, error); } + +/* + * Allocate a buffer on which to attach a dependency. + */ +STATIC struct buf * +softdep_setup_pagecache(ip, lbn, size) + struct inode *ip; + ufs_lbn_t lbn; + long size; +{ + struct vnode *vp = ITOV(ip); + struct buf *bp; + int s; + + /* + * Enter pagecache dependency buf in hash. + */ + + bp = softdep_lookup_pcbp(vp, lbn); + if (bp == NULL) { + s = splbio(); + bp = pool_get(&sdpcpool, PR_WAITOK); + splx(s); + + bp->b_vp = vp; + bp->b_lblkno = lbn; + bp->b_bcount = bp->b_resid = size; + LIST_INIT(&bp->b_dep); + LIST_INSERT_HEAD(&pcbphashhead[PCBPHASH(vp, lbn)], bp, b_hash); + LIST_INSERT_HEAD(&ip->i_pcbufhd, bp, b_vnbufs); + } else { + KASSERT(size >= bp->b_bcount); + bp->b_resid += size - bp->b_bcount; + bp->b_bcount = size; + } + return bp; +} + +/* + * softdep_collect_pagecache() and softdep_free_pagecache() + * are used to remove page cache dependency buffers when + * a file is being truncated to 0. + */ + +STATIC void +softdep_collect_pagecache(ip) + struct inode *ip; +{ + struct buf *bp; + + LIST_FOREACH(bp, &ip->i_pcbufhd, b_vnbufs) { + LIST_REMOVE(bp, b_hash); + } +} + +STATIC void +softdep_free_pagecache(ip) + struct inode *ip; +{ + struct buf *bp, *nextbp; + + for (bp = LIST_FIRST(&ip->i_pcbufhd); bp != NULL; bp = nextbp) { + nextbp = LIST_NEXT(bp, b_vnbufs); + LIST_REMOVE(bp, b_vnbufs); + KASSERT(LIST_FIRST(&bp->b_dep) == NULL); + pool_put(&sdpcpool, bp); + } +} + +STATIC struct vnode * +softdep_lookupvp(fs, ino) + struct fs *fs; + ino_t ino; +{ + struct mount *mp; + extern struct vfsops ffs_vfsops; + + CIRCLEQ_FOREACH(mp, &mountlist, mnt_list) { + if (mp->mnt_op == &ffs_vfsops && + VFSTOUFS(mp)->um_fs == fs) { + break; + } + } + if (mp == NULL) { + return NULL; + } + return ufs_ihashlookup(VFSTOUFS(mp)->um_dev, ino); +} + +STATIC struct buf * +softdep_lookup_pcbp(vp, lbn) + struct vnode *vp; + ufs_lbn_t lbn; +{ + struct buf *bp; + + LIST_FOREACH(bp, &pcbphashhead[PCBPHASH(vp, lbn)], b_hash) { + if (bp->b_vp == vp && bp->b_lblkno == lbn) { + break; + } + } + return bp; +} + +/* + * Do softdep i/o completion processing for page cache writes. + */ + +void +softdep_pageiodone(bp) + struct buf *bp; +{ + int npages = bp->b_bufsize >> PAGE_SHIFT; + struct vnode *vp = bp->b_vp; + struct vm_page *pg; + struct buf *pcbp = NULL; + struct allocdirect *adp; + struct allocindir *aip; + struct worklist *wk; + ufs_lbn_t lbn; + voff_t off; + long iosize = bp->b_bcount; + int size, asize, bshift, bsize; + int i; + + KASSERT(!(bp->b_flags & B_READ)); + bshift = vp->v_mount->mnt_fs_bshift; + bsize = 1 << bshift; + asize = min(PAGE_SIZE, bsize); + ACQUIRE_LOCK(&lk); + for (i = 0; i < npages; i++) { + pg = uvm_pageratop((vaddr_t)bp->b_data + (i << PAGE_SHIFT)); + if (pg == NULL) { + continue; + } + + for (off = pg->offset; + off < pg->offset + PAGE_SIZE; + off += bsize) { + size = min(asize, iosize); + iosize -= size; + lbn = off >> bshift; + if (pcbp == NULL || pcbp->b_lblkno != lbn) { + pcbp = softdep_lookup_pcbp(vp, lbn); + } + if (pcbp == NULL) { + continue; + } + pcbp->b_resid -= size; + if (pcbp->b_resid < 0) { + panic("softdep_pageiodone: " + "resid < 0, vp %p lbn 0x%lx pcbp %p", + vp, lbn, pcbp); + } + if (pcbp->b_resid > 0) { + continue; + } + + /* + * We've completed all the i/o for this block. + * mark the dep complete. + */ + + KASSERT(LIST_FIRST(&pcbp->b_dep) != NULL); + while ((wk = LIST_FIRST(&pcbp->b_dep))) { + WORKLIST_REMOVE(wk); + switch (wk->wk_type) { + case D_ALLOCDIRECT: + adp = WK_ALLOCDIRECT(wk); + adp->ad_state |= COMPLETE; + handle_allocdirect_partdone(adp); + break; + + case D_ALLOCINDIR: + aip = WK_ALLOCINDIR(wk); + aip->ai_state |= COMPLETE; + handle_allocindir_partdone(aip); + break; + + default: + panic("softdep_pageiodone: " + "bad type %d, pcbp %p wk %p", + wk->wk_type, pcbp, wk); + } + } + LIST_REMOVE(pcbp, b_hash); + LIST_REMOVE(pcbp, b_vnbufs); + pool_put(&sdpcpool, pcbp); + pcbp = NULL; + } + } + FREE_LOCK(&lk); +} |