summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorArtur Grabowski <art@cvs.openbsd.org>1999-12-05 08:30:39 +0000
committerArtur Grabowski <art@cvs.openbsd.org>1999-12-05 08:30:39 +0000
commitb0a011cf3296b87d11115a78cad39b1efec4bf79 (patch)
tree77bb0ee4ac71f3df3112cdc2cd64fef8fa2a2d7a
parent0c333d0b6b7319a29626c8b56baa1ad5eabd4e54 (diff)
Integrate the latest soft updates code.
From mckusick@mckusick.com.
-rw-r--r--sys/ufs/ffs/ffs_extern.h4
-rw-r--r--sys/ufs/ffs/ffs_softdep.c616
-rw-r--r--sys/ufs/ffs/ffs_softdep_stub.c10
-rw-r--r--sys/ufs/ffs/ffs_vnops.c7
-rw-r--r--sys/ufs/ffs/softdep.h3
5 files changed, 458 insertions, 182 deletions
diff --git a/sys/ufs/ffs/ffs_extern.h b/sys/ufs/ffs/ffs_extern.h
index d955a10e74a..a9fdfd5846d 100644
--- a/sys/ufs/ffs/ffs_extern.h
+++ b/sys/ufs/ffs/ffs_extern.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: ffs_extern.h,v 1.7 1998/02/08 22:41:49 tholo Exp $ */
+/* $OpenBSD: ffs_extern.h,v 1.8 1999/12/05 08:30:38 art Exp $ */
/* $NetBSD: ffs_extern.h,v 1.4 1996/02/09 22:22:22 christos Exp $ */
/*-
@@ -159,7 +159,7 @@ void softdep_setup_allocindir_page __P((struct inode *, ufs_lbn_t,
void softdep_disk_io_initiation __P((struct buf *));
void softdep_disk_write_complete __P((struct buf *));
int softdep_sync_metadata __P((struct vop_fsync_args *));
-
+void softdep_fsync_mountdev __P((struct vnode *));
__END_DECLS
extern int (**ffs_vnodeop_p) __P((void *));
diff --git a/sys/ufs/ffs/ffs_softdep.c b/sys/ufs/ffs/ffs_softdep.c
index 8162bc3ddf9..b82a65a19bb 100644
--- a/sys/ufs/ffs/ffs_softdep.c
+++ b/sys/ufs/ffs/ffs_softdep.c
@@ -52,11 +52,9 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
- * @(#)ffs_softdep.c 9.30 (McKusick) 10/3/98
+ * @(#)ffs_softdep.c 9.40 (McKusick) 6/15/99
*/
-#ifdef FFS_SOFTUPDATES
-
/*
* For now we want the safety net that the DIAGNOSTIC and DEBUG flags provide.
*/
@@ -124,7 +122,10 @@ extern char *memname[];
* Internal function prototypes.
*/
static void softdep_error __P((char *, int));
+static void drain_output __P((struct vnode *, int));
static int getdirtybuf __P((struct buf **, int));
+static void clear_remove __P((struct proc *));
+static void clear_inodedeps __P((struct proc *));
static int flush_pagedep_deps __P((struct vnode *, struct mount *,
struct diraddhd *));
static int flush_inodedep_deps __P((struct fs *, ino_t));
@@ -165,7 +166,7 @@ static int inodedep_lookup __P((struct fs *, ino_t, int, struct inodedep **));
static int pagedep_lookup __P((struct inode *, ufs_lbn_t, int,
struct pagedep **));
static void pause_timer __P((void *));
-static int checklimit __P((long *, int));
+static int request_cleanup __P((int, int));
static void add_to_worklist __P((struct worklist *));
/*
@@ -175,7 +176,7 @@ struct bio_ops bioops = {
softdep_disk_io_initiation, /* io_start */
softdep_disk_write_complete, /* io_complete */
softdep_deallocate_dependencies, /* io_deallocate */
- softdep_fsync, /* io_fsync */
+ softdep_fsync, /* io_fsync */
softdep_process_worklist, /* io_sync */
};
@@ -410,17 +411,36 @@ static struct workhead softdep_workitem_pending;
static int softdep_worklist_busy;
static int max_softdeps; /* maximum number of structs before slowdown */
static int tickdelay = 2; /* number of ticks to pause during slowdown */
-static int max_limit_hit; /* number of times slowdown imposed */
-static int rush_requests; /* number of times I/O speeded up */
static int proc_waiting; /* tracks whether we have a timeout posted */
-static pid_t filesys_syncer_pid;/* records pid of filesystem syncer process */
+static struct proc *filesys_syncer; /* proc of filesystem syncer process */
+static int req_clear_inodedeps; /* syncer process flush some inodedeps */
+#define FLUSH_INODES 1
+static int req_clear_remove; /* syncer process flush some freeblks */
+#define FLUSH_REMOVE 2
+/*
+ * runtime statistics
+ */
+static int stat_blk_limit_push; /* number of times block limit neared */
+static int stat_ino_limit_push; /* number of times inode limit neared */
+static int stat_blk_limit_hit; /* number of times block slowdown imposed */
+static int stat_ino_limit_hit; /* number of times inode slowdown imposed */
+static int stat_indir_blk_ptrs; /* bufs redirtied as indir ptrs not written */
+static int stat_inode_bitmap; /* bufs redirtied as inode bitmap not written */
+static int stat_direct_blk_ptrs;/* bufs redirtied as direct ptrs not written */
+static int stat_dir_entry; /* bufs redirtied as dir entry cannot write */
#ifdef DEBUG
#include <vm/vm.h>
#include <sys/sysctl.h>
-struct ctldebug debug8 = { "max_softdeps", &max_softdeps };
-struct ctldebug debug9 = { "tickdelay", &tickdelay };
-struct ctldebug debug10 = { "max_limit_hit", &max_limit_hit };
-struct ctldebug debug11 = { "rush_requests", &rush_requests };
+struct ctldebug debug20 = { "max_softdeps", &max_softdeps };
+struct ctldebug debug21 = { "tickdelay", &tickdelay };
+struct ctldebug debug23 = { "blk_limit_push", &stat_blk_limit_push };
+struct ctldebug debug24 = { "ino_limit_push", &stat_ino_limit_push };
+struct ctldebug debug25 = { "blk_limit_hit", &stat_blk_limit_hit };
+struct ctldebug debug26 = { "ino_limit_hit", &stat_ino_limit_hit };
+struct ctldebug debug27 = { "indir_blk_ptrs", &stat_indir_blk_ptrs };
+struct ctldebug debug28 = { "inode_bitmap", &stat_inode_bitmap };
+struct ctldebug debug29 = { "direct_blk_ptrs", &stat_direct_blk_ptrs };
+struct ctldebug debug30 = { "dir_entry", &stat_dir_entry };
#endif /* DEBUG */
/*
@@ -465,10 +485,10 @@ softdep_process_worklist(matchmnt)
int matchcnt;
/*
- * Record the process identifier of our caller so that we can
- * give this process preferential treatment in checklimit below.
+ * Record the process identifier of our caller so that we can give
+ * this process preferential treatment in request_cleanup below.
*/
- filesys_syncer_pid = p->p_pid;
+ filesys_syncer = p;
matchcnt = 0;
matchfs = NULL;
if (matchmnt != NULL)
@@ -481,6 +501,19 @@ softdep_process_worklist(matchmnt)
*/
if (softdep_worklist_busy && matchmnt == NULL)
return (-1);
+ /*
+ * If requested, try removing inode or removal dependencies.
+ */
+ if (req_clear_inodedeps) {
+ clear_inodedeps(p);
+ req_clear_inodedeps = 0;
+ wakeup(&proc_waiting);
+ }
+ if (req_clear_remove) {
+ clear_remove(p);
+ req_clear_remove = 0;
+ wakeup(&proc_waiting);
+ }
ACQUIRE_LOCK(&lk);
while ((wk = LIST_FIRST(&softdep_workitem_pending)) != 0) {
WORKLIST_REMOVE(wk);
@@ -522,6 +555,19 @@ softdep_process_worklist(matchmnt)
}
if (softdep_worklist_busy && matchmnt == NULL)
return (-1);
+ /*
+ * If requested, try removing inode or removal dependencies.
+ */
+ if (req_clear_inodedeps) {
+ clear_inodedeps(p);
+ req_clear_inodedeps = 0;
+ wakeup(&proc_waiting);
+ }
+ if (req_clear_remove) {
+ clear_remove(p);
+ req_clear_remove = 0;
+ wakeup(&proc_waiting);
+ }
ACQUIRE_LOCK(&lk);
}
FREE_LOCK(&lk);
@@ -592,71 +638,6 @@ softdep_flushfiles(oldmnt, flags, p)
}
/*
- * A large burst of file addition or deletion activity can drive the
- * memory load excessively high. Therefore we deliberately slow things
- * down and speed up the I/O processing if we find ourselves with too
- * many dependencies in progress.
- */
-static int
-checklimit(resource, islocked)
- long *resource;
- int islocked;
-{
- struct proc *p = CURPROC;
-
- /*
- * If we are under our limit, just proceed.
- */
- if (*resource < max_softdeps)
- return (0);
- /*
- * We never hold up the filesystem syncer process.
- */
- if (p->p_pid == filesys_syncer_pid)
- return (0);
- /*
- * Our first approach is to speed up the syncer process.
- * We never push it to speed up more than half of its
- * normal turn time, otherwise it could take over the cpu.
- */
- if (rushjob < syncdelay / 2) {
- rushjob += 1;
- rush_requests += 1;
- return (0);
- }
- /*
- * Every trick has failed, so we pause momentarily to let
- * the filesystem syncer process catch up.
- */
- if (islocked == 0)
- ACQUIRE_LOCK(&lk);
- if (proc_waiting == 0) {
- proc_waiting = 1;
- timeout(pause_timer, NULL, tickdelay > 2 ? tickdelay : 2);
- }
- FREE_LOCK_INTERLOCKED(&lk);
- (void) tsleep((caddr_t)&proc_waiting, PPAUSE | PCATCH, "softupdate", 0);
- ACQUIRE_LOCK_INTERLOCKED(&lk);
- if (islocked == 0)
- FREE_LOCK(&lk);
- max_limit_hit += 1;
- return (1);
-}
-
-/*
- * Awaken processes pausing in checklimit and clear proc_waiting
- * to indicate that there is no longer a timer running.
- */
-void
-pause_timer(arg)
- void *arg;
-{
-
- proc_waiting = 0;
- wakeup(&proc_waiting);
-}
-
-/*
* Structure hashing.
*
* There are three types of structures that can be looked up:
@@ -797,7 +778,11 @@ top:
*inodedeppp = NULL;
return (0);
}
- if (firsttry && checklimit(&num_inodedep, 1) == 1) {
+ /*
+ * If we are over our limit, try to improve the situation.
+ */
+ if (num_inodedep > max_softdeps && firsttry && speedup_syncer() == 0 &&
+ request_cleanup(FLUSH_INODES, 1)) {
firsttry = 0;
goto top;
}
@@ -940,20 +925,8 @@ softdep_mount(devvp, mp, fs, cred)
brelse(bp);
}
#ifdef DEBUG
- if (bcmp(&cstotal, &fs->fs_cstotal, sizeof cstotal)) {
- printf("ffs_mountfs: superblock updated\n");
- printf ("%d %d %d %d\n",
- cstotal.cs_nffree,
- cstotal.cs_nbfree,
- cstotal.cs_nifree,
- cstotal.cs_ndir);
-
- printf ("%d %d %d %d\n",
- fs->fs_cstotal.cs_nffree,
- fs->fs_cstotal.cs_nbfree,
- fs->fs_cstotal.cs_nifree,
- fs->fs_cstotal.cs_ndir);
- }
+ if (bcmp(&cstotal, &fs->fs_cstotal, sizeof cstotal))
+ printf("ffs_mountfs: superblock updated for soft updates\n");
#endif
bcopy(&cstotal, &fs->fs_cstotal, sizeof cstotal);
return (0);
@@ -1232,7 +1205,7 @@ allocdirect_merge(adphead, newadp, oldadp)
if (newadp->ad_oldblkno != oldadp->ad_newblkno ||
newadp->ad_oldsize != oldadp->ad_newsize ||
newadp->ad_lbn >= NDADDR)
- panic("allocdirect_check: old %d != new %d || lbn %d >= %d",
+ panic("allocdirect_check: old %d != new %d || lbn %ld >= %d",
newadp->ad_oldblkno, oldadp->ad_newblkno, newadp->ad_lbn,
NDADDR);
newadp->ad_oldblkno = oldadp->ad_oldblkno;
@@ -1556,7 +1529,11 @@ softdep_setup_freeblocks(ip, length)
fs = ip->i_fs;
if (length != 0)
panic("softde_setup_freeblocks: non-zero length");
- (void) checklimit(&num_freeblks, 0);
+ /*
+ * If we are over our limit, try to improve the situation.
+ */
+ if (num_freeblks > max_softdeps / 2 && speedup_syncer() == 0)
+ (void) request_cleanup(FLUSH_REMOVE, 0);
num_freeblks += 1;
MALLOC(freeblks, struct freeblks *, sizeof(struct freeblks),
M_FREEBLKS, M_WAITOK);
@@ -1612,6 +1589,7 @@ softdep_setup_freeblocks(ip, length)
merge_inode_lists(inodedep);
while ((adp = TAILQ_FIRST(&inodedep->id_inoupdt)) != 0)
free_allocdirect(&inodedep->id_inoupdt, adp, 1);
+ FREE_LOCK(&lk);
bdwrite(bp);
/*
* We must wait for any I/O in progress to finish so that
@@ -1620,18 +1598,16 @@ softdep_setup_freeblocks(ip, length)
* any dependencies.
*/
vp = ITOV(ip);
- while (vp->v_numoutput) {
- vp->v_flag |= VBWAIT;
- FREE_LOCK_INTERLOCKED(&lk);
- sleep((caddr_t)&vp->v_numoutput, PRIBIO + 1);
- ACQUIRE_LOCK_INTERLOCKED(&lk);
- }
+ ACQUIRE_LOCK(&lk);
+ drain_output(vp, 1);
while (getdirtybuf(&LIST_FIRST(&vp->v_dirtyblkhd), MNT_WAIT)) {
bp = LIST_FIRST(&vp->v_dirtyblkhd);
(void) inodedep_lookup(fs, ip->i_number, 0, &inodedep);
deallocate_dependencies(bp, inodedep);
bp->b_flags |= B_INVAL;
+ FREE_LOCK(&lk);
brelse(bp);
+ ACQUIRE_LOCK(&lk);
}
/*
* Try freeing the inodedep in case that was the last dependency.
@@ -1701,7 +1677,8 @@ deallocate_dependencies(bp, inodedep)
* visible, so they can simply be tossed.
*/
for (i = 0; i < DAHASHSZ; i++)
- while ((dap=LIST_FIRST(&pagedep->pd_diraddhd[i])))
+ while ((dap =
+ LIST_FIRST(&pagedep->pd_diraddhd[i])))
free_diradd(dap);
while ((dap = LIST_FIRST(&pagedep->pd_pendinghd)) != 0)
free_diradd(dap);
@@ -1792,9 +1769,13 @@ softdep_freefile(ap)
struct freefile *freefile;
/*
+ * If we are over our limit, try to improve the situation.
+ */
+ if (num_freefile > max_softdeps / 2 && speedup_syncer() == 0)
+ (void) request_cleanup(FLUSH_REMOVE, 0);
+ /*
* This sets up the inode de-allocation dependency.
*/
- (void) checklimit(&num_freefile, 0);
num_freefile += 1;
MALLOC(freefile, struct freefile *, sizeof(struct freefile),
M_FREEFILE, M_WAITOK);
@@ -2114,16 +2095,19 @@ softdep_setup_directory_add(bp, dp, diroffset, newinum, newdirbp)
mkdir2->md_list.wk_type = D_MKDIR;
mkdir2->md_state = MKDIR_PARENT;
mkdir2->md_diradd = dap;
- ACQUIRE_LOCK(&lk);
/*
* Dependency on "." and ".." being written to disk.
*/
+ mkdir1->md_buf = newdirbp;
+ ACQUIRE_LOCK(&lk);
LIST_INSERT_HEAD(&mkdirlisthd, mkdir1, md_mkdirs);
WORKLIST_INSERT(&newdirbp->b_dep, &mkdir1->md_list);
+ FREE_LOCK(&lk);
bdwrite(newdirbp);
/*
* Dependency on link count increase for parent directory
*/
+ ACQUIRE_LOCK(&lk);
if (inodedep_lookup(dp->i_fs, dp->i_number, 0, &inodedep) == 0
|| (inodedep->id_state & ALLCOMPLETE) == ALLCOMPLETE) {
dap->da_state &= ~MKDIR_PARENT;
@@ -2339,7 +2323,6 @@ newdirrem(bp, dp, ip, isrmdir)
dap; dap = LIST_NEXT(dap, da_pdlist))
if (dap->da_offset == offset)
break;
-
if (dap == NULL) {
for (dap = LIST_FIRST(&pagedep->pd_pendinghd);
dap; dap = LIST_NEXT(dap, da_pdlist))
@@ -2348,7 +2331,6 @@ newdirrem(bp, dp, ip, isrmdir)
if (dap == NULL)
return (dirrem);
}
-
/*
* Must be ATTACHED at this point, so just delete it.
*/
@@ -2359,7 +2341,6 @@ newdirrem(bp, dp, ip, isrmdir)
ip->i_number, dap->da_newinum);
free_diradd(dap);
dirrem->dm_state |= COMPLETE;
-
return (dirrem);
}
@@ -2389,7 +2370,7 @@ softdep_setup_directory_change(bp, dp, ip, newinum, isrmdir)
int isrmdir; /* indicates if doing RMDIR */
{
int offset;
- struct diradd *dap;
+ struct diradd *dap = NULL;
struct dirrem *dirrem;
struct pagedep *pagedep;
struct inodedep *inodedep;
@@ -2416,8 +2397,8 @@ softdep_setup_directory_change(bp, dp, ip, newinum, isrmdir)
pagedep = dirrem->dm_pagedep;
/*
* The possible values for isrmdir:
- * 0 - non-directory file rename
- * 1 - directory rename within same directory
+ * 0 - non-directory file rename
+ * 1 - directory rename within same directory
* inum - directory rename to new directory of given inode number
* When renaming to a new directory, we are both deleting and
* creating a new directory entry, so the link count on the new
@@ -2660,6 +2641,7 @@ softdep_disk_io_initiation(bp)
* dependency can be freed.
*/
if (LIST_FIRST(&indirdep->ir_deplisthd) == NULL) {
+ indirdep->ir_savebp->b_flags |= B_INVAL;
brelse(indirdep->ir_savebp);
/* inline expand WORKLIST_REMOVE(wk); */
wk->wk_state &= ~ONWORKLIST;
@@ -2791,12 +2773,12 @@ initiate_write_inodeblock(inodedep, bp)
prevlbn = adp->ad_lbn;
if (adp->ad_lbn < NDADDR &&
dp->di_db[adp->ad_lbn] != adp->ad_newblkno)
- panic("%s: direct pointer #%d mismatch %d != %d",
+ panic("%s: direct pointer #%ld mismatch %d != %d",
"softdep_write_inodeblock", adp->ad_lbn,
dp->di_db[adp->ad_lbn], adp->ad_newblkno);
if (adp->ad_lbn >= NDADDR &&
dp->di_ib[adp->ad_lbn - NDADDR] != adp->ad_newblkno)
- panic("%s: indirect pointer #%d mismatch %d != %d",
+ panic("%s: indirect pointer #%ld mismatch %d != %d",
"softdep_write_inodeblock", adp->ad_lbn - NDADDR,
dp->di_ib[adp->ad_lbn - NDADDR], adp->ad_newblkno);
deplist |= 1 << adp->ad_lbn;
@@ -2917,20 +2899,22 @@ softdep_disk_write_complete(bp)
newblk->nb_bmsafemap = NULL;
LIST_REMOVE(newblk, nb_deps);
}
- while ((adp = LIST_FIRST(&bmsafemap->sm_allocdirecthd))) {
+ while ((adp =
+ LIST_FIRST(&bmsafemap->sm_allocdirecthd))) {
adp->ad_state |= DEPCOMPLETE;
adp->ad_buf = NULL;
LIST_REMOVE(adp, ad_deps);
handle_allocdirect_partdone(adp);
}
- while ((aip = LIST_FIRST(&bmsafemap->sm_allocindirhd))) {
+ while ((aip =
+ LIST_FIRST(&bmsafemap->sm_allocindirhd))) {
aip->ai_state |= DEPCOMPLETE;
aip->ai_buf = NULL;
LIST_REMOVE(aip, ai_deps);
handle_allocindir_partdone(aip);
}
while ((inodedep =
- LIST_FIRST(&bmsafemap->sm_inodedephd)) != NULL) {
+ LIST_FIRST(&bmsafemap->sm_inodedephd)) != NULL) {
inodedep->id_state |= DEPCOMPLETE;
LIST_REMOVE(inodedep, id_deps);
inodedep->id_buf = NULL;
@@ -2962,10 +2946,13 @@ softdep_disk_write_complete(bp)
indirdep->ir_state &= ~UNDONE;
indirdep->ir_state |= ATTACHED;
while ((aip = LIST_FIRST(&indirdep->ir_donehd)) != 0) {
- LIST_REMOVE(aip, ai_next);
handle_allocindir_partdone(aip);
+ if (aip == LIST_FIRST(&indirdep->ir_donehd))
+ panic("disk_write_complete: not gone");
}
WORKLIST_INSERT(&reattach, wk);
+ if ((bp->b_flags & B_DELWRI) == 0)
+ stat_indir_blk_ptrs++;
bdirty(bp);
continue;
@@ -3121,6 +3108,8 @@ handle_written_inodeblock(inodedep, bp)
*dp = *inodedep->id_savedino;
FREE(inodedep->id_savedino, M_INODEDEP);
inodedep->id_savedino = NULL;
+ if ((bp->b_flags & B_DELWRI) == 0)
+ stat_inode_bitmap++;
bdirty(bp);
return (1);
}
@@ -3135,14 +3124,14 @@ handle_written_inodeblock(inodedep, bp)
panic("handle_written_inodeblock: new entry");
if (adp->ad_lbn < NDADDR) {
if (dp->di_db[adp->ad_lbn] != adp->ad_oldblkno)
- panic("%s: %s #%d mismatch %d != %d",
+ panic("%s: %s #%ld mismatch %d != %d",
"handle_written_inodeblock",
"direct pointer", adp->ad_lbn,
dp->di_db[adp->ad_lbn], adp->ad_oldblkno);
dp->di_db[adp->ad_lbn] = adp->ad_newblkno;
} else {
if (dp->di_ib[adp->ad_lbn - NDADDR] != 0)
- panic("%s: %s #%d allocated as %d",
+ panic("%s: %s #%ld allocated as %d",
"handle_written_inodeblock",
"indirect pointer", adp->ad_lbn - NDADDR,
dp->di_ib[adp->ad_lbn - NDADDR]);
@@ -3152,6 +3141,8 @@ handle_written_inodeblock(inodedep, bp)
adp->ad_state |= ATTACHED;
hadchanges = 1;
}
+ if (hadchanges && (bp->b_flags & B_DELWRI) == 0)
+ stat_direct_blk_ptrs++;
/*
* Reset the file size to its most up-to-date value.
*/
@@ -3350,8 +3341,11 @@ handle_written_filepage(pagedep, bp)
* marked dirty so that its will eventually get written back in
* its correct form.
*/
- if (chgs)
+ if (chgs) {
+ if ((bp->b_flags & B_DELWRI) == 0)
+ stat_dir_entry++;
bdirty(bp);
+ }
/*
* If no dependencies remain, the pagedep will be freed.
* Otherwise it will remain to update the page before it
@@ -3425,7 +3419,7 @@ void
softdep_update_inodeblock(ip, bp, waitfor)
struct inode *ip; /* the "in_core" copy of the inode */
struct buf *bp; /* the buffer containing the inode block */
- int waitfor; /* 1 => update must be allowed */
+ int waitfor; /* nonzero => update must be allowed */
{
struct inodedep *inodedep;
struct worklist *wk;
@@ -3635,6 +3629,52 @@ softdep_fsync(vp)
}
/*
+ * Flush all the dirty bitmaps associated with the block device
+ * before flushing the rest of the dirty blocks so as to reduce
+ * the number of dependencies that will have to be rolled back.
+ */
+void
+softdep_fsync_mountdev(vp)
+ struct vnode *vp;
+{
+ struct buf *bp, *nbp;
+ struct worklist *wk;
+
+ if (vp->v_type != VBLK)
+ panic("softdep_fsync_mountdev: vnode not VBLK");
+ ACQUIRE_LOCK(&lk);
+ for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
+ nbp = LIST_NEXT(bp, b_vnbufs);
+ /*
+ * If it is already scheduled, skip to the next buffer.
+ */
+ if (bp->b_flags & B_BUSY)
+ continue;
+ if ((bp->b_flags & B_DELWRI) == 0)
+ panic("softdep_fsync_mountdev: not dirty");
+ /*
+ * We are only interested in bitmaps with outstanding
+ * dependencies.
+ */
+ if ((wk = LIST_FIRST(&bp->b_dep)) == NULL ||
+ wk->wk_type != D_BMSAFEMAP)
+ continue;
+ bremfree(bp);
+ bp->b_flags |= B_BUSY;
+ FREE_LOCK(&lk);
+ (void) bawrite(bp);
+ ACQUIRE_LOCK(&lk);
+ /*
+ * Since we may have slept during the I/O, we need
+ * to start from a known point.
+ */
+ nbp = LIST_FIRST(&vp->v_dirtyblkhd);
+ }
+ drain_output(vp, 1);
+ FREE_LOCK(&lk);
+}
+
+/*
* This routine is called when we are trying to synchronously flush a
* file. This routine must eliminate any filesystem metadata dependencies
* so that the syncing routine can succeed by pushing the dirty blocks
@@ -3783,8 +3823,9 @@ loop:
for (i = 0; i < DAHASHSZ; i++) {
if (LIST_FIRST(&pagedep->pd_diraddhd[i]) == 0)
continue;
- if ((error = flush_pagedep_deps(vp,
- pagedep->pd_mnt, &pagedep->pd_diraddhd[i]))) {
+ if ((error =
+ flush_pagedep_deps(vp, pagedep->pd_mnt,
+ &pagedep->pd_diraddhd[i]))) {
FREE_LOCK(&lk);
bawrite(bp);
return (error);
@@ -3792,6 +3833,48 @@ loop:
}
break;
+ case D_MKDIR:
+ /*
+ * This case should never happen if the vnode has
+ * been properly sync'ed. However, if this function
+ * is used at a place where the vnode has not yet
+ * been sync'ed, this dependency can show up. So,
+ * rather than panic, just flush it.
+ */
+ nbp = WK_MKDIR(wk)->md_buf;
+ if (getdirtybuf(&nbp, waitfor) == 0)
+ break;
+ FREE_LOCK(&lk);
+ if (waitfor == MNT_NOWAIT) {
+ bawrite(nbp);
+ } else if ((error = VOP_BWRITE(nbp)) != 0) {
+ bawrite(bp);
+ return (error);
+ }
+ ACQUIRE_LOCK(&lk);
+ break;
+
+ case D_BMSAFEMAP:
+ /*
+ * This case should never happen if the vnode has
+ * been properly sync'ed. However, if this function
+ * is used at a place where the vnode has not yet
+ * been sync'ed, this dependency can show up. So,
+ * rather than panic, just flush it.
+ */
+ nbp = WK_BMSAFEMAP(wk)->sm_buf;
+ if (getdirtybuf(&nbp, waitfor) == 0)
+ break;
+ FREE_LOCK(&lk);
+ if (waitfor == MNT_NOWAIT) {
+ bawrite(nbp);
+ } else if ((error = VOP_BWRITE(nbp)) != 0) {
+ bawrite(bp);
+ return (error);
+ }
+ ACQUIRE_LOCK(&lk);
+ break;
+
default:
panic("softdep_sync_metadata: Unknown type %s",
TYPENAME(wk->wk_type));
@@ -3813,12 +3896,7 @@ loop:
* Once they are all there, proceed with the second pass
* which will wait for the I/O as per above.
*/
- while (vp->v_numoutput) {
- vp->v_flag |= VBWAIT;
- FREE_LOCK_INTERLOCKED(&lk);
- sleep((caddr_t)&vp->v_numoutput, PRIBIO + 1);
- ACQUIRE_LOCK_INTERLOCKED(&lk);
- }
+ drain_output(vp, 1);
/*
* The brief unlock is to allow any pent up dependency
* processing to be done.
@@ -4043,18 +4121,12 @@ flush_pagedep_deps(pvp, mp, diraddhdp)
* level in the filesystem. Instead, we push the blocks
* and wait for them to clear.
*/
- if ((error = VOP_FSYNC(vp, p->p_ucred, MNT_NOWAIT, p))) {
+ if ((error =
+ VOP_FSYNC(vp, p->p_ucred, MNT_NOWAIT, p))) {
vput(vp);
break;
}
- ACQUIRE_LOCK(&lk);
- while (vp->v_numoutput) {
- vp->v_flag |= VBWAIT;
- FREE_LOCK_INTERLOCKED(&lk);
- sleep((caddr_t)&vp->v_numoutput, PRIBIO + 1);
- ACQUIRE_LOCK_INTERLOCKED(&lk);
- }
- FREE_LOCK(&lk);
+ drain_output(vp, 0);
}
TIMEVAL_TO_TIMESPEC(&time, &ts);
error = VOP_UPDATE(vp, &ts, &ts, MNT_WAIT);
@@ -4075,6 +4147,209 @@ flush_pagedep_deps(pvp, mp, diraddhdp)
}
/*
+ * A large burst of file addition or deletion activity can drive the
+ * memory load excessively high. Therefore we deliberately slow things
+ * down and speed up the I/O processing if we find ourselves with too
+ * many dependencies in progress.
+ */
+static int
+request_cleanup(resource, islocked)
+ int resource;
+ int islocked;
+{
+ struct proc *p = CURPROC;
+
+ /*
+ * We never hold up the filesystem syncer process.
+ */
+ if (p == filesys_syncer)
+ return (0);
+ /*
+ * If we are resource constrained on inode dependencies, try
+ * flushing some dirty inodes. Otherwise, we are constrained
+ * by file deletions, so try accelerating flushes of directories
+ * with removal dependencies. We would like to do the cleanup
+ * here, but we probably hold an inode locked at this point and
+ * that might deadlock against one that we try to clean. So,
+ * the best that we can do is request the syncer daemon to do
+ * the cleanup for us.
+ */
+ switch (resource) {
+
+ case FLUSH_INODES:
+ stat_ino_limit_push += 1;
+ req_clear_inodedeps = 1;
+ break;
+
+ case FLUSH_REMOVE:
+ stat_blk_limit_push += 1;
+ req_clear_remove = 1;
+ break;
+
+ default:
+ panic("request_cleanup: unknown type");
+ }
+ /*
+ * Hopefully the syncer daemon will catch up and awaken us.
+ * We wait at most tickdelay before proceeding in any case.
+ */
+ if (islocked == 0)
+ ACQUIRE_LOCK(&lk);
+ if (proc_waiting == 0) {
+ proc_waiting = 1;
+ timeout(pause_timer, NULL, tickdelay > 2 ? tickdelay : 2);
+ }
+ FREE_LOCK_INTERLOCKED(&lk);
+ (void) tsleep((caddr_t)&proc_waiting, PPAUSE | PCATCH, "softupdate", 0);
+ ACQUIRE_LOCK_INTERLOCKED(&lk);
+ if (proc_waiting) {
+ untimeout(pause_timer, NULL);
+ proc_waiting = 0;
+ } else {
+ switch (resource) {
+
+ case FLUSH_INODES:
+ stat_ino_limit_hit += 1;
+ break;
+
+ case FLUSH_REMOVE:
+ stat_blk_limit_hit += 1;
+ break;
+ }
+ }
+ if (islocked == 0)
+ FREE_LOCK(&lk);
+ return (1);
+}
+
+/*
+ * Awaken processes pausing in request_cleanup and clear proc_waiting
+ * to indicate that there is no longer a timer running.
+ */
+void
+pause_timer(arg)
+ void *arg;
+{
+
+ proc_waiting = 0;
+ wakeup(&proc_waiting);
+}
+
+/*
+ * Flush out a directory with at least one removal dependency in an effort
+ * to reduce the number of freefile and freeblks dependency structures.
+ */
+static void
+clear_remove(p)
+ struct proc *p;
+{
+ struct pagedep_hashhead *pagedephd;
+ struct pagedep *pagedep;
+ static int next = 0;
+ struct mount *mp;
+ struct vnode *vp;
+ int error, cnt;
+ ino_t ino;
+
+ ACQUIRE_LOCK(&lk);
+ for (cnt = 0; cnt < pagedep_hash; cnt++) {
+ pagedephd = &pagedep_hashtbl[next++];
+ if (next >= pagedep_hash)
+ next = 0;
+ for (pagedep = LIST_FIRST(pagedephd); pagedep;
+ pagedep = LIST_NEXT(pagedep, pd_hash)) {
+ if (LIST_FIRST(&pagedep->pd_dirremhd) == NULL)
+ continue;
+ mp = pagedep->pd_mnt;
+ ino = pagedep->pd_ino;
+ FREE_LOCK(&lk);
+ if ((error = VFS_VGET(mp, ino, &vp)) != 0) {
+ softdep_error("clear_remove: vget", error);
+ return;
+ }
+ if ((error = VOP_FSYNC(vp, p->p_ucred, MNT_NOWAIT, p)))
+ softdep_error("clear_remove: fsync", error);
+ drain_output(vp, 0);
+ vput(vp);
+ return;
+ }
+ }
+ FREE_LOCK(&lk);
+}
+
+/*
+ * Clear out a block of dirty inodes in an effort to reduce
+ * the number of inodedep dependency structures.
+ */
+static void
+clear_inodedeps(p)
+ struct proc *p;
+{
+ struct inodedep_hashhead *inodedephd;
+ struct inodedep *inodedep;
+ static int next = 0;
+ struct mount *mp;
+ struct vnode *vp;
+ struct fs *fs;
+ int error, cnt;
+ ino_t firstino, lastino, ino;
+
+ ACQUIRE_LOCK(&lk);
+ /*
+ * Pick a random inode dependency to be cleared.
+ * We will then gather up all the inodes in its block
+ * that have dependencies and flush them out.
+ */
+ for (cnt = 0; cnt < inodedep_hash; cnt++) {
+ inodedephd = &inodedep_hashtbl[next++];
+ if (next >= inodedep_hash)
+ next = 0;
+ if ((inodedep = LIST_FIRST(inodedephd)) != NULL)
+ break;
+ }
+ /*
+ * Ugly code to find mount point given pointer to superblock.
+ */
+ fs = inodedep->id_fs;
+ for (mp = CIRCLEQ_FIRST(&mountlist); mp != (void *)&mountlist;
+ mp = CIRCLEQ_NEXT(mp, mnt_list))
+ if ((mp->mnt_flag & MNT_SOFTDEP) && fs == VFSTOUFS(mp)->um_fs)
+ break;
+ /*
+ * Find the last inode in the block with dependencies.
+ */
+ firstino = inodedep->id_ino & ~(INOPB(fs) - 1);
+ for (lastino = firstino + INOPB(fs) - 1; lastino > firstino; lastino--)
+ if (inodedep_lookup(fs, lastino, 0, &inodedep) != 0)
+ break;
+ /*
+ * Asynchronously push all but the last inode with dependencies.
+ * Synchronously push the last inode with dependencies to ensure
+ * that the inode block gets written to free up the inodedeps.
+ */
+ for (ino = firstino; ino <= lastino; ino++) {
+ if (inodedep_lookup(fs, ino, 0, &inodedep) == 0)
+ continue;
+ FREE_LOCK(&lk);
+ if ((error = VFS_VGET(mp, ino, &vp)) != 0) {
+ softdep_error("clear_inodedeps: vget", error);
+ return;
+ }
+ if (ino == lastino) {
+ if ((error = VOP_FSYNC(vp, p->p_ucred, MNT_WAIT, p)))
+ softdep_error("clear_inodedeps: fsync1", error);
+ } else {
+ if ((error = VOP_FSYNC(vp, p->p_ucred, MNT_NOWAIT, p)))
+ softdep_error("clear_inodedeps: fsync2", error);
+ drain_output(vp, 0);
+ }
+ vput(vp);
+ ACQUIRE_LOCK(&lk);
+ }
+ FREE_LOCK(&lk);
+}
+
+/*
* Acquire exclusive access to a buffer.
* Must be called with splbio blocked.
* Return 1 if buffer was acquired.
@@ -4095,7 +4370,7 @@ getdirtybuf(bpp, waitfor)
return (0);
bp->b_flags |= B_WANTED;
FREE_LOCK_INTERLOCKED(&lk);
- sleep((caddr_t)bp, PRIBIO + 1);
+ tsleep((caddr_t)bp, PRIBIO + 1, "sdsdty", 0);
ACQUIRE_LOCK_INTERLOCKED(&lk);
}
if ((bp->b_flags & B_DELWRI) == 0)
@@ -4106,6 +4381,28 @@ getdirtybuf(bpp, waitfor)
}
/*
+ * Wait for pending output on a vnode to complete.
+ * Must be called with vnode locked.
+ */
+static void
+drain_output(vp, islocked)
+ struct vnode *vp;
+ int islocked;
+{
+
+ if (!islocked)
+ ACQUIRE_LOCK(&lk);
+ while (vp->v_numoutput) {
+ vp->v_flag |= VBWAIT;
+ FREE_LOCK_INTERLOCKED(&lk);
+ tsleep((caddr_t)&vp->v_numoutput, PRIBIO + 1, "drainvp", 0);
+ ACQUIRE_LOCK_INTERLOCKED(&lk);
+ }
+ if (!islocked)
+ FREE_LOCK(&lk);
+}
+
+/*
* Called whenever a buffer that is being invalidated or reallocated
* contains dependencies. This should only happen if an I/O error has
* occurred. The routine is called with the buffer locked.
@@ -4114,43 +4411,11 @@ void
softdep_deallocate_dependencies(bp)
struct buf *bp;
{
- struct worklist *wk;
if ((bp->b_flags & B_ERROR) == 0)
panic("softdep_deallocate_dependencies: dangling deps");
softdep_error(bp->b_vp->v_mount->mnt_stat.f_mntonname, bp->b_error);
- ACQUIRE_LOCK(&lk);
- while ((wk = LIST_FIRST(&bp->b_dep)) != NULL) {
- WORKLIST_REMOVE(wk);
- FREE_LOCK(&lk);
- switch (wk->wk_type) {
- /*
- * XXX - should really clean up, but for now we will
- * just leak memory and not worry about it. Also should
- * mark the filesystem permanently dirty so that it will
- * force fsck to be run (though this would best be done
- * in the mainline code).
- */
- case D_PAGEDEP:
- case D_INODEDEP:
- case D_BMSAFEMAP:
- case D_ALLOCDIRECT:
- case D_INDIRDEP:
- case D_ALLOCINDIR:
- case D_MKDIR:
-#ifdef DEBUG
- printf("Lost type %s\n", TYPENAME(wk->wk_type));
-#endif
- break;
- default:
- panic("%s: Unexpected type %s",
- "softdep_deallocate_dependencies",
- TYPENAME(wk->wk_type));
- /* NOTREACHED */
- }
- ACQUIRE_LOCK(&lk);
- }
- FREE_LOCK(&lk);
+ panic("softdep_deallocate_dependencies: unrecovered I/O error");
}
/*
@@ -4163,8 +4428,5 @@ softdep_error(func, error)
{
/* XXX should do something better! */
- log(LOG_ERR, "%s: got error %d while accessing filesystem\n",
- func, error);
+ printf("%s: got error %d while accessing filesystem\n", func, error);
}
-
-#endif /* FFS_SOFTUPDATES */
diff --git a/sys/ufs/ffs/ffs_softdep_stub.c b/sys/ufs/ffs/ffs_softdep_stub.c
index 93c790d1d47..bd06b5fbdd2 100644
--- a/sys/ufs/ffs/ffs_softdep_stub.c
+++ b/sys/ufs/ffs/ffs_softdep_stub.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: ffs_softdep_stub.c,v 1.1 1998/03/15 03:53:51 millert Exp $ */
+/* $OpenBSD: ffs_softdep_stub.c,v 1.2 1999/12/05 08:30:38 art Exp $ */
/*
* Copyright 1997 Marshall Kirk McKusick. All Rights Reserved.
@@ -247,4 +247,12 @@ softdep_sync_metadata(ap)
return (0);
}
+void
+softdep_fsync_mountdev(vp)
+ struct vnode *vp;
+{
+ panic("softdep_fsync_mountdev called");
+}
+
+
#endif /* !FFS_SOFTUPDATES */
diff --git a/sys/ufs/ffs/ffs_vnops.c b/sys/ufs/ffs/ffs_vnops.c
index b987c318c7a..1e395e51620 100644
--- a/sys/ufs/ffs/ffs_vnops.c
+++ b/sys/ufs/ffs/ffs_vnops.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: ffs_vnops.c,v 1.9 1999/02/26 03:56:30 art Exp $ */
+/* $OpenBSD: ffs_vnops.c,v 1.10 1999/12/05 08:30:38 art Exp $ */
/* $NetBSD: ffs_vnops.c,v 1.7 1996/05/11 18:27:24 mycroft Exp $ */
/*
@@ -255,6 +255,11 @@ ffs_fsync(v)
struct timespec ts;
int s, error, passes, skipmeta;
+ if (vp->v_type == VBLK &&
+ vp->v_specmountpoint != NULL &&
+ (vp->v_specmountpoint->mnt_flag & MNT_SOFTDEP))
+ softdep_fsync_mountdev(vp);
+
/*
* Flush all dirty buffers associated with a vnode
*/
diff --git a/sys/ufs/ffs/softdep.h b/sys/ufs/ffs/softdep.h
index 3435f630417..b34bd579315 100644
--- a/sys/ufs/ffs/softdep.h
+++ b/sys/ufs/ffs/softdep.h
@@ -52,7 +52,7 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
- * @(#)softdep.h 9.5 (McKusick) 2/11/98
+ * @(#)softdep.h 9.6 (McKusick) 2/25/99
*/
#include <sys/queue.h>
@@ -516,6 +516,7 @@ struct mkdir {
struct worklist md_list; /* id_inowait or buffer holding dir */
# define md_state md_list.wk_state /* type: MKDIR_PARENT or MKDIR_BODY */
struct diradd *md_diradd; /* associated diradd */
+ struct buf *md_buf; /* MKDIR_BODY: buffer holding dir */
LIST_ENTRY(mkdir) md_mkdirs; /* list of all mkdirs */
};
LIST_HEAD(mkdirlist, mkdir) mkdirlisthd;