diff options
-rw-r--r-- | sys/ufs/ffs/ffs_extern.h | 4 | ||||
-rw-r--r-- | sys/ufs/ffs/ffs_softdep.c | 616 | ||||
-rw-r--r-- | sys/ufs/ffs/ffs_softdep_stub.c | 10 | ||||
-rw-r--r-- | sys/ufs/ffs/ffs_vnops.c | 7 | ||||
-rw-r--r-- | sys/ufs/ffs/softdep.h | 3 |
5 files changed, 458 insertions, 182 deletions
diff --git a/sys/ufs/ffs/ffs_extern.h b/sys/ufs/ffs/ffs_extern.h index d955a10e74a..a9fdfd5846d 100644 --- a/sys/ufs/ffs/ffs_extern.h +++ b/sys/ufs/ffs/ffs_extern.h @@ -1,4 +1,4 @@ -/* $OpenBSD: ffs_extern.h,v 1.7 1998/02/08 22:41:49 tholo Exp $ */ +/* $OpenBSD: ffs_extern.h,v 1.8 1999/12/05 08:30:38 art Exp $ */ /* $NetBSD: ffs_extern.h,v 1.4 1996/02/09 22:22:22 christos Exp $ */ /*- @@ -159,7 +159,7 @@ void softdep_setup_allocindir_page __P((struct inode *, ufs_lbn_t, void softdep_disk_io_initiation __P((struct buf *)); void softdep_disk_write_complete __P((struct buf *)); int softdep_sync_metadata __P((struct vop_fsync_args *)); - +void softdep_fsync_mountdev __P((struct vnode *)); __END_DECLS extern int (**ffs_vnodeop_p) __P((void *)); diff --git a/sys/ufs/ffs/ffs_softdep.c b/sys/ufs/ffs/ffs_softdep.c index 8162bc3ddf9..b82a65a19bb 100644 --- a/sys/ufs/ffs/ffs_softdep.c +++ b/sys/ufs/ffs/ffs_softdep.c @@ -52,11 +52,9 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * @(#)ffs_softdep.c 9.30 (McKusick) 10/3/98 + * @(#)ffs_softdep.c 9.40 (McKusick) 6/15/99 */ -#ifdef FFS_SOFTUPDATES - /* * For now we want the safety net that the DIAGNOSTIC and DEBUG flags provide. */ @@ -124,7 +122,10 @@ extern char *memname[]; * Internal function prototypes. */ static void softdep_error __P((char *, int)); +static void drain_output __P((struct vnode *, int)); static int getdirtybuf __P((struct buf **, int)); +static void clear_remove __P((struct proc *)); +static void clear_inodedeps __P((struct proc *)); static int flush_pagedep_deps __P((struct vnode *, struct mount *, struct diraddhd *)); static int flush_inodedep_deps __P((struct fs *, ino_t)); @@ -165,7 +166,7 @@ static int inodedep_lookup __P((struct fs *, ino_t, int, struct inodedep **)); static int pagedep_lookup __P((struct inode *, ufs_lbn_t, int, struct pagedep **)); static void pause_timer __P((void *)); -static int checklimit __P((long *, int)); +static int request_cleanup __P((int, int)); static void add_to_worklist __P((struct worklist *)); /* @@ -175,7 +176,7 @@ struct bio_ops bioops = { softdep_disk_io_initiation, /* io_start */ softdep_disk_write_complete, /* io_complete */ softdep_deallocate_dependencies, /* io_deallocate */ - softdep_fsync, /* io_fsync */ + softdep_fsync, /* io_fsync */ softdep_process_worklist, /* io_sync */ }; @@ -410,17 +411,36 @@ static struct workhead softdep_workitem_pending; static int softdep_worklist_busy; static int max_softdeps; /* maximum number of structs before slowdown */ static int tickdelay = 2; /* number of ticks to pause during slowdown */ -static int max_limit_hit; /* number of times slowdown imposed */ -static int rush_requests; /* number of times I/O speeded up */ static int proc_waiting; /* tracks whether we have a timeout posted */ -static pid_t filesys_syncer_pid;/* records pid of filesystem syncer process */ +static struct proc *filesys_syncer; /* proc of filesystem syncer process */ +static int req_clear_inodedeps; /* syncer process flush some inodedeps */ +#define FLUSH_INODES 1 +static int req_clear_remove; /* syncer process flush some freeblks */ +#define FLUSH_REMOVE 2 +/* + * runtime statistics + */ +static int stat_blk_limit_push; /* number of times block limit neared */ +static int stat_ino_limit_push; /* number of times inode limit neared */ +static int stat_blk_limit_hit; /* number of times block slowdown imposed */ +static int stat_ino_limit_hit; /* number of times inode slowdown imposed */ +static int stat_indir_blk_ptrs; /* bufs redirtied as indir ptrs not written */ +static int stat_inode_bitmap; /* bufs redirtied as inode bitmap not written */ +static int stat_direct_blk_ptrs;/* bufs redirtied as direct ptrs not written */ +static int stat_dir_entry; /* bufs redirtied as dir entry cannot write */ #ifdef DEBUG #include <vm/vm.h> #include <sys/sysctl.h> -struct ctldebug debug8 = { "max_softdeps", &max_softdeps }; -struct ctldebug debug9 = { "tickdelay", &tickdelay }; -struct ctldebug debug10 = { "max_limit_hit", &max_limit_hit }; -struct ctldebug debug11 = { "rush_requests", &rush_requests }; +struct ctldebug debug20 = { "max_softdeps", &max_softdeps }; +struct ctldebug debug21 = { "tickdelay", &tickdelay }; +struct ctldebug debug23 = { "blk_limit_push", &stat_blk_limit_push }; +struct ctldebug debug24 = { "ino_limit_push", &stat_ino_limit_push }; +struct ctldebug debug25 = { "blk_limit_hit", &stat_blk_limit_hit }; +struct ctldebug debug26 = { "ino_limit_hit", &stat_ino_limit_hit }; +struct ctldebug debug27 = { "indir_blk_ptrs", &stat_indir_blk_ptrs }; +struct ctldebug debug28 = { "inode_bitmap", &stat_inode_bitmap }; +struct ctldebug debug29 = { "direct_blk_ptrs", &stat_direct_blk_ptrs }; +struct ctldebug debug30 = { "dir_entry", &stat_dir_entry }; #endif /* DEBUG */ /* @@ -465,10 +485,10 @@ softdep_process_worklist(matchmnt) int matchcnt; /* - * Record the process identifier of our caller so that we can - * give this process preferential treatment in checklimit below. + * Record the process identifier of our caller so that we can give + * this process preferential treatment in request_cleanup below. */ - filesys_syncer_pid = p->p_pid; + filesys_syncer = p; matchcnt = 0; matchfs = NULL; if (matchmnt != NULL) @@ -481,6 +501,19 @@ softdep_process_worklist(matchmnt) */ if (softdep_worklist_busy && matchmnt == NULL) return (-1); + /* + * If requested, try removing inode or removal dependencies. + */ + if (req_clear_inodedeps) { + clear_inodedeps(p); + req_clear_inodedeps = 0; + wakeup(&proc_waiting); + } + if (req_clear_remove) { + clear_remove(p); + req_clear_remove = 0; + wakeup(&proc_waiting); + } ACQUIRE_LOCK(&lk); while ((wk = LIST_FIRST(&softdep_workitem_pending)) != 0) { WORKLIST_REMOVE(wk); @@ -522,6 +555,19 @@ softdep_process_worklist(matchmnt) } if (softdep_worklist_busy && matchmnt == NULL) return (-1); + /* + * If requested, try removing inode or removal dependencies. + */ + if (req_clear_inodedeps) { + clear_inodedeps(p); + req_clear_inodedeps = 0; + wakeup(&proc_waiting); + } + if (req_clear_remove) { + clear_remove(p); + req_clear_remove = 0; + wakeup(&proc_waiting); + } ACQUIRE_LOCK(&lk); } FREE_LOCK(&lk); @@ -592,71 +638,6 @@ softdep_flushfiles(oldmnt, flags, p) } /* - * A large burst of file addition or deletion activity can drive the - * memory load excessively high. Therefore we deliberately slow things - * down and speed up the I/O processing if we find ourselves with too - * many dependencies in progress. - */ -static int -checklimit(resource, islocked) - long *resource; - int islocked; -{ - struct proc *p = CURPROC; - - /* - * If we are under our limit, just proceed. - */ - if (*resource < max_softdeps) - return (0); - /* - * We never hold up the filesystem syncer process. - */ - if (p->p_pid == filesys_syncer_pid) - return (0); - /* - * Our first approach is to speed up the syncer process. - * We never push it to speed up more than half of its - * normal turn time, otherwise it could take over the cpu. - */ - if (rushjob < syncdelay / 2) { - rushjob += 1; - rush_requests += 1; - return (0); - } - /* - * Every trick has failed, so we pause momentarily to let - * the filesystem syncer process catch up. - */ - if (islocked == 0) - ACQUIRE_LOCK(&lk); - if (proc_waiting == 0) { - proc_waiting = 1; - timeout(pause_timer, NULL, tickdelay > 2 ? tickdelay : 2); - } - FREE_LOCK_INTERLOCKED(&lk); - (void) tsleep((caddr_t)&proc_waiting, PPAUSE | PCATCH, "softupdate", 0); - ACQUIRE_LOCK_INTERLOCKED(&lk); - if (islocked == 0) - FREE_LOCK(&lk); - max_limit_hit += 1; - return (1); -} - -/* - * Awaken processes pausing in checklimit and clear proc_waiting - * to indicate that there is no longer a timer running. - */ -void -pause_timer(arg) - void *arg; -{ - - proc_waiting = 0; - wakeup(&proc_waiting); -} - -/* * Structure hashing. * * There are three types of structures that can be looked up: @@ -797,7 +778,11 @@ top: *inodedeppp = NULL; return (0); } - if (firsttry && checklimit(&num_inodedep, 1) == 1) { + /* + * If we are over our limit, try to improve the situation. + */ + if (num_inodedep > max_softdeps && firsttry && speedup_syncer() == 0 && + request_cleanup(FLUSH_INODES, 1)) { firsttry = 0; goto top; } @@ -940,20 +925,8 @@ softdep_mount(devvp, mp, fs, cred) brelse(bp); } #ifdef DEBUG - if (bcmp(&cstotal, &fs->fs_cstotal, sizeof cstotal)) { - printf("ffs_mountfs: superblock updated\n"); - printf ("%d %d %d %d\n", - cstotal.cs_nffree, - cstotal.cs_nbfree, - cstotal.cs_nifree, - cstotal.cs_ndir); - - printf ("%d %d %d %d\n", - fs->fs_cstotal.cs_nffree, - fs->fs_cstotal.cs_nbfree, - fs->fs_cstotal.cs_nifree, - fs->fs_cstotal.cs_ndir); - } + if (bcmp(&cstotal, &fs->fs_cstotal, sizeof cstotal)) + printf("ffs_mountfs: superblock updated for soft updates\n"); #endif bcopy(&cstotal, &fs->fs_cstotal, sizeof cstotal); return (0); @@ -1232,7 +1205,7 @@ allocdirect_merge(adphead, newadp, oldadp) if (newadp->ad_oldblkno != oldadp->ad_newblkno || newadp->ad_oldsize != oldadp->ad_newsize || newadp->ad_lbn >= NDADDR) - panic("allocdirect_check: old %d != new %d || lbn %d >= %d", + panic("allocdirect_check: old %d != new %d || lbn %ld >= %d", newadp->ad_oldblkno, oldadp->ad_newblkno, newadp->ad_lbn, NDADDR); newadp->ad_oldblkno = oldadp->ad_oldblkno; @@ -1556,7 +1529,11 @@ softdep_setup_freeblocks(ip, length) fs = ip->i_fs; if (length != 0) panic("softde_setup_freeblocks: non-zero length"); - (void) checklimit(&num_freeblks, 0); + /* + * If we are over our limit, try to improve the situation. + */ + if (num_freeblks > max_softdeps / 2 && speedup_syncer() == 0) + (void) request_cleanup(FLUSH_REMOVE, 0); num_freeblks += 1; MALLOC(freeblks, struct freeblks *, sizeof(struct freeblks), M_FREEBLKS, M_WAITOK); @@ -1612,6 +1589,7 @@ softdep_setup_freeblocks(ip, length) merge_inode_lists(inodedep); while ((adp = TAILQ_FIRST(&inodedep->id_inoupdt)) != 0) free_allocdirect(&inodedep->id_inoupdt, adp, 1); + FREE_LOCK(&lk); bdwrite(bp); /* * We must wait for any I/O in progress to finish so that @@ -1620,18 +1598,16 @@ softdep_setup_freeblocks(ip, length) * any dependencies. */ vp = ITOV(ip); - while (vp->v_numoutput) { - vp->v_flag |= VBWAIT; - FREE_LOCK_INTERLOCKED(&lk); - sleep((caddr_t)&vp->v_numoutput, PRIBIO + 1); - ACQUIRE_LOCK_INTERLOCKED(&lk); - } + ACQUIRE_LOCK(&lk); + drain_output(vp, 1); while (getdirtybuf(&LIST_FIRST(&vp->v_dirtyblkhd), MNT_WAIT)) { bp = LIST_FIRST(&vp->v_dirtyblkhd); (void) inodedep_lookup(fs, ip->i_number, 0, &inodedep); deallocate_dependencies(bp, inodedep); bp->b_flags |= B_INVAL; + FREE_LOCK(&lk); brelse(bp); + ACQUIRE_LOCK(&lk); } /* * Try freeing the inodedep in case that was the last dependency. @@ -1701,7 +1677,8 @@ deallocate_dependencies(bp, inodedep) * visible, so they can simply be tossed. */ for (i = 0; i < DAHASHSZ; i++) - while ((dap=LIST_FIRST(&pagedep->pd_diraddhd[i]))) + while ((dap = + LIST_FIRST(&pagedep->pd_diraddhd[i]))) free_diradd(dap); while ((dap = LIST_FIRST(&pagedep->pd_pendinghd)) != 0) free_diradd(dap); @@ -1792,9 +1769,13 @@ softdep_freefile(ap) struct freefile *freefile; /* + * If we are over our limit, try to improve the situation. + */ + if (num_freefile > max_softdeps / 2 && speedup_syncer() == 0) + (void) request_cleanup(FLUSH_REMOVE, 0); + /* * This sets up the inode de-allocation dependency. */ - (void) checklimit(&num_freefile, 0); num_freefile += 1; MALLOC(freefile, struct freefile *, sizeof(struct freefile), M_FREEFILE, M_WAITOK); @@ -2114,16 +2095,19 @@ softdep_setup_directory_add(bp, dp, diroffset, newinum, newdirbp) mkdir2->md_list.wk_type = D_MKDIR; mkdir2->md_state = MKDIR_PARENT; mkdir2->md_diradd = dap; - ACQUIRE_LOCK(&lk); /* * Dependency on "." and ".." being written to disk. */ + mkdir1->md_buf = newdirbp; + ACQUIRE_LOCK(&lk); LIST_INSERT_HEAD(&mkdirlisthd, mkdir1, md_mkdirs); WORKLIST_INSERT(&newdirbp->b_dep, &mkdir1->md_list); + FREE_LOCK(&lk); bdwrite(newdirbp); /* * Dependency on link count increase for parent directory */ + ACQUIRE_LOCK(&lk); if (inodedep_lookup(dp->i_fs, dp->i_number, 0, &inodedep) == 0 || (inodedep->id_state & ALLCOMPLETE) == ALLCOMPLETE) { dap->da_state &= ~MKDIR_PARENT; @@ -2339,7 +2323,6 @@ newdirrem(bp, dp, ip, isrmdir) dap; dap = LIST_NEXT(dap, da_pdlist)) if (dap->da_offset == offset) break; - if (dap == NULL) { for (dap = LIST_FIRST(&pagedep->pd_pendinghd); dap; dap = LIST_NEXT(dap, da_pdlist)) @@ -2348,7 +2331,6 @@ newdirrem(bp, dp, ip, isrmdir) if (dap == NULL) return (dirrem); } - /* * Must be ATTACHED at this point, so just delete it. */ @@ -2359,7 +2341,6 @@ newdirrem(bp, dp, ip, isrmdir) ip->i_number, dap->da_newinum); free_diradd(dap); dirrem->dm_state |= COMPLETE; - return (dirrem); } @@ -2389,7 +2370,7 @@ softdep_setup_directory_change(bp, dp, ip, newinum, isrmdir) int isrmdir; /* indicates if doing RMDIR */ { int offset; - struct diradd *dap; + struct diradd *dap = NULL; struct dirrem *dirrem; struct pagedep *pagedep; struct inodedep *inodedep; @@ -2416,8 +2397,8 @@ softdep_setup_directory_change(bp, dp, ip, newinum, isrmdir) pagedep = dirrem->dm_pagedep; /* * The possible values for isrmdir: - * 0 - non-directory file rename - * 1 - directory rename within same directory + * 0 - non-directory file rename + * 1 - directory rename within same directory * inum - directory rename to new directory of given inode number * When renaming to a new directory, we are both deleting and * creating a new directory entry, so the link count on the new @@ -2660,6 +2641,7 @@ softdep_disk_io_initiation(bp) * dependency can be freed. */ if (LIST_FIRST(&indirdep->ir_deplisthd) == NULL) { + indirdep->ir_savebp->b_flags |= B_INVAL; brelse(indirdep->ir_savebp); /* inline expand WORKLIST_REMOVE(wk); */ wk->wk_state &= ~ONWORKLIST; @@ -2791,12 +2773,12 @@ initiate_write_inodeblock(inodedep, bp) prevlbn = adp->ad_lbn; if (adp->ad_lbn < NDADDR && dp->di_db[adp->ad_lbn] != adp->ad_newblkno) - panic("%s: direct pointer #%d mismatch %d != %d", + panic("%s: direct pointer #%ld mismatch %d != %d", "softdep_write_inodeblock", adp->ad_lbn, dp->di_db[adp->ad_lbn], adp->ad_newblkno); if (adp->ad_lbn >= NDADDR && dp->di_ib[adp->ad_lbn - NDADDR] != adp->ad_newblkno) - panic("%s: indirect pointer #%d mismatch %d != %d", + panic("%s: indirect pointer #%ld mismatch %d != %d", "softdep_write_inodeblock", adp->ad_lbn - NDADDR, dp->di_ib[adp->ad_lbn - NDADDR], adp->ad_newblkno); deplist |= 1 << adp->ad_lbn; @@ -2917,20 +2899,22 @@ softdep_disk_write_complete(bp) newblk->nb_bmsafemap = NULL; LIST_REMOVE(newblk, nb_deps); } - while ((adp = LIST_FIRST(&bmsafemap->sm_allocdirecthd))) { + while ((adp = + LIST_FIRST(&bmsafemap->sm_allocdirecthd))) { adp->ad_state |= DEPCOMPLETE; adp->ad_buf = NULL; LIST_REMOVE(adp, ad_deps); handle_allocdirect_partdone(adp); } - while ((aip = LIST_FIRST(&bmsafemap->sm_allocindirhd))) { + while ((aip = + LIST_FIRST(&bmsafemap->sm_allocindirhd))) { aip->ai_state |= DEPCOMPLETE; aip->ai_buf = NULL; LIST_REMOVE(aip, ai_deps); handle_allocindir_partdone(aip); } while ((inodedep = - LIST_FIRST(&bmsafemap->sm_inodedephd)) != NULL) { + LIST_FIRST(&bmsafemap->sm_inodedephd)) != NULL) { inodedep->id_state |= DEPCOMPLETE; LIST_REMOVE(inodedep, id_deps); inodedep->id_buf = NULL; @@ -2962,10 +2946,13 @@ softdep_disk_write_complete(bp) indirdep->ir_state &= ~UNDONE; indirdep->ir_state |= ATTACHED; while ((aip = LIST_FIRST(&indirdep->ir_donehd)) != 0) { - LIST_REMOVE(aip, ai_next); handle_allocindir_partdone(aip); + if (aip == LIST_FIRST(&indirdep->ir_donehd)) + panic("disk_write_complete: not gone"); } WORKLIST_INSERT(&reattach, wk); + if ((bp->b_flags & B_DELWRI) == 0) + stat_indir_blk_ptrs++; bdirty(bp); continue; @@ -3121,6 +3108,8 @@ handle_written_inodeblock(inodedep, bp) *dp = *inodedep->id_savedino; FREE(inodedep->id_savedino, M_INODEDEP); inodedep->id_savedino = NULL; + if ((bp->b_flags & B_DELWRI) == 0) + stat_inode_bitmap++; bdirty(bp); return (1); } @@ -3135,14 +3124,14 @@ handle_written_inodeblock(inodedep, bp) panic("handle_written_inodeblock: new entry"); if (adp->ad_lbn < NDADDR) { if (dp->di_db[adp->ad_lbn] != adp->ad_oldblkno) - panic("%s: %s #%d mismatch %d != %d", + panic("%s: %s #%ld mismatch %d != %d", "handle_written_inodeblock", "direct pointer", adp->ad_lbn, dp->di_db[adp->ad_lbn], adp->ad_oldblkno); dp->di_db[adp->ad_lbn] = adp->ad_newblkno; } else { if (dp->di_ib[adp->ad_lbn - NDADDR] != 0) - panic("%s: %s #%d allocated as %d", + panic("%s: %s #%ld allocated as %d", "handle_written_inodeblock", "indirect pointer", adp->ad_lbn - NDADDR, dp->di_ib[adp->ad_lbn - NDADDR]); @@ -3152,6 +3141,8 @@ handle_written_inodeblock(inodedep, bp) adp->ad_state |= ATTACHED; hadchanges = 1; } + if (hadchanges && (bp->b_flags & B_DELWRI) == 0) + stat_direct_blk_ptrs++; /* * Reset the file size to its most up-to-date value. */ @@ -3350,8 +3341,11 @@ handle_written_filepage(pagedep, bp) * marked dirty so that its will eventually get written back in * its correct form. */ - if (chgs) + if (chgs) { + if ((bp->b_flags & B_DELWRI) == 0) + stat_dir_entry++; bdirty(bp); + } /* * If no dependencies remain, the pagedep will be freed. * Otherwise it will remain to update the page before it @@ -3425,7 +3419,7 @@ void softdep_update_inodeblock(ip, bp, waitfor) struct inode *ip; /* the "in_core" copy of the inode */ struct buf *bp; /* the buffer containing the inode block */ - int waitfor; /* 1 => update must be allowed */ + int waitfor; /* nonzero => update must be allowed */ { struct inodedep *inodedep; struct worklist *wk; @@ -3635,6 +3629,52 @@ softdep_fsync(vp) } /* + * Flush all the dirty bitmaps associated with the block device + * before flushing the rest of the dirty blocks so as to reduce + * the number of dependencies that will have to be rolled back. + */ +void +softdep_fsync_mountdev(vp) + struct vnode *vp; +{ + struct buf *bp, *nbp; + struct worklist *wk; + + if (vp->v_type != VBLK) + panic("softdep_fsync_mountdev: vnode not VBLK"); + ACQUIRE_LOCK(&lk); + for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) { + nbp = LIST_NEXT(bp, b_vnbufs); + /* + * If it is already scheduled, skip to the next buffer. + */ + if (bp->b_flags & B_BUSY) + continue; + if ((bp->b_flags & B_DELWRI) == 0) + panic("softdep_fsync_mountdev: not dirty"); + /* + * We are only interested in bitmaps with outstanding + * dependencies. + */ + if ((wk = LIST_FIRST(&bp->b_dep)) == NULL || + wk->wk_type != D_BMSAFEMAP) + continue; + bremfree(bp); + bp->b_flags |= B_BUSY; + FREE_LOCK(&lk); + (void) bawrite(bp); + ACQUIRE_LOCK(&lk); + /* + * Since we may have slept during the I/O, we need + * to start from a known point. + */ + nbp = LIST_FIRST(&vp->v_dirtyblkhd); + } + drain_output(vp, 1); + FREE_LOCK(&lk); +} + +/* * This routine is called when we are trying to synchronously flush a * file. This routine must eliminate any filesystem metadata dependencies * so that the syncing routine can succeed by pushing the dirty blocks @@ -3783,8 +3823,9 @@ loop: for (i = 0; i < DAHASHSZ; i++) { if (LIST_FIRST(&pagedep->pd_diraddhd[i]) == 0) continue; - if ((error = flush_pagedep_deps(vp, - pagedep->pd_mnt, &pagedep->pd_diraddhd[i]))) { + if ((error = + flush_pagedep_deps(vp, pagedep->pd_mnt, + &pagedep->pd_diraddhd[i]))) { FREE_LOCK(&lk); bawrite(bp); return (error); @@ -3792,6 +3833,48 @@ loop: } break; + case D_MKDIR: + /* + * This case should never happen if the vnode has + * been properly sync'ed. However, if this function + * is used at a place where the vnode has not yet + * been sync'ed, this dependency can show up. So, + * rather than panic, just flush it. + */ + nbp = WK_MKDIR(wk)->md_buf; + if (getdirtybuf(&nbp, waitfor) == 0) + break; + FREE_LOCK(&lk); + if (waitfor == MNT_NOWAIT) { + bawrite(nbp); + } else if ((error = VOP_BWRITE(nbp)) != 0) { + bawrite(bp); + return (error); + } + ACQUIRE_LOCK(&lk); + break; + + case D_BMSAFEMAP: + /* + * This case should never happen if the vnode has + * been properly sync'ed. However, if this function + * is used at a place where the vnode has not yet + * been sync'ed, this dependency can show up. So, + * rather than panic, just flush it. + */ + nbp = WK_BMSAFEMAP(wk)->sm_buf; + if (getdirtybuf(&nbp, waitfor) == 0) + break; + FREE_LOCK(&lk); + if (waitfor == MNT_NOWAIT) { + bawrite(nbp); + } else if ((error = VOP_BWRITE(nbp)) != 0) { + bawrite(bp); + return (error); + } + ACQUIRE_LOCK(&lk); + break; + default: panic("softdep_sync_metadata: Unknown type %s", TYPENAME(wk->wk_type)); @@ -3813,12 +3896,7 @@ loop: * Once they are all there, proceed with the second pass * which will wait for the I/O as per above. */ - while (vp->v_numoutput) { - vp->v_flag |= VBWAIT; - FREE_LOCK_INTERLOCKED(&lk); - sleep((caddr_t)&vp->v_numoutput, PRIBIO + 1); - ACQUIRE_LOCK_INTERLOCKED(&lk); - } + drain_output(vp, 1); /* * The brief unlock is to allow any pent up dependency * processing to be done. @@ -4043,18 +4121,12 @@ flush_pagedep_deps(pvp, mp, diraddhdp) * level in the filesystem. Instead, we push the blocks * and wait for them to clear. */ - if ((error = VOP_FSYNC(vp, p->p_ucred, MNT_NOWAIT, p))) { + if ((error = + VOP_FSYNC(vp, p->p_ucred, MNT_NOWAIT, p))) { vput(vp); break; } - ACQUIRE_LOCK(&lk); - while (vp->v_numoutput) { - vp->v_flag |= VBWAIT; - FREE_LOCK_INTERLOCKED(&lk); - sleep((caddr_t)&vp->v_numoutput, PRIBIO + 1); - ACQUIRE_LOCK_INTERLOCKED(&lk); - } - FREE_LOCK(&lk); + drain_output(vp, 0); } TIMEVAL_TO_TIMESPEC(&time, &ts); error = VOP_UPDATE(vp, &ts, &ts, MNT_WAIT); @@ -4075,6 +4147,209 @@ flush_pagedep_deps(pvp, mp, diraddhdp) } /* + * A large burst of file addition or deletion activity can drive the + * memory load excessively high. Therefore we deliberately slow things + * down and speed up the I/O processing if we find ourselves with too + * many dependencies in progress. + */ +static int +request_cleanup(resource, islocked) + int resource; + int islocked; +{ + struct proc *p = CURPROC; + + /* + * We never hold up the filesystem syncer process. + */ + if (p == filesys_syncer) + return (0); + /* + * If we are resource constrained on inode dependencies, try + * flushing some dirty inodes. Otherwise, we are constrained + * by file deletions, so try accelerating flushes of directories + * with removal dependencies. We would like to do the cleanup + * here, but we probably hold an inode locked at this point and + * that might deadlock against one that we try to clean. So, + * the best that we can do is request the syncer daemon to do + * the cleanup for us. + */ + switch (resource) { + + case FLUSH_INODES: + stat_ino_limit_push += 1; + req_clear_inodedeps = 1; + break; + + case FLUSH_REMOVE: + stat_blk_limit_push += 1; + req_clear_remove = 1; + break; + + default: + panic("request_cleanup: unknown type"); + } + /* + * Hopefully the syncer daemon will catch up and awaken us. + * We wait at most tickdelay before proceeding in any case. + */ + if (islocked == 0) + ACQUIRE_LOCK(&lk); + if (proc_waiting == 0) { + proc_waiting = 1; + timeout(pause_timer, NULL, tickdelay > 2 ? tickdelay : 2); + } + FREE_LOCK_INTERLOCKED(&lk); + (void) tsleep((caddr_t)&proc_waiting, PPAUSE | PCATCH, "softupdate", 0); + ACQUIRE_LOCK_INTERLOCKED(&lk); + if (proc_waiting) { + untimeout(pause_timer, NULL); + proc_waiting = 0; + } else { + switch (resource) { + + case FLUSH_INODES: + stat_ino_limit_hit += 1; + break; + + case FLUSH_REMOVE: + stat_blk_limit_hit += 1; + break; + } + } + if (islocked == 0) + FREE_LOCK(&lk); + return (1); +} + +/* + * Awaken processes pausing in request_cleanup and clear proc_waiting + * to indicate that there is no longer a timer running. + */ +void +pause_timer(arg) + void *arg; +{ + + proc_waiting = 0; + wakeup(&proc_waiting); +} + +/* + * Flush out a directory with at least one removal dependency in an effort + * to reduce the number of freefile and freeblks dependency structures. + */ +static void +clear_remove(p) + struct proc *p; +{ + struct pagedep_hashhead *pagedephd; + struct pagedep *pagedep; + static int next = 0; + struct mount *mp; + struct vnode *vp; + int error, cnt; + ino_t ino; + + ACQUIRE_LOCK(&lk); + for (cnt = 0; cnt < pagedep_hash; cnt++) { + pagedephd = &pagedep_hashtbl[next++]; + if (next >= pagedep_hash) + next = 0; + for (pagedep = LIST_FIRST(pagedephd); pagedep; + pagedep = LIST_NEXT(pagedep, pd_hash)) { + if (LIST_FIRST(&pagedep->pd_dirremhd) == NULL) + continue; + mp = pagedep->pd_mnt; + ino = pagedep->pd_ino; + FREE_LOCK(&lk); + if ((error = VFS_VGET(mp, ino, &vp)) != 0) { + softdep_error("clear_remove: vget", error); + return; + } + if ((error = VOP_FSYNC(vp, p->p_ucred, MNT_NOWAIT, p))) + softdep_error("clear_remove: fsync", error); + drain_output(vp, 0); + vput(vp); + return; + } + } + FREE_LOCK(&lk); +} + +/* + * Clear out a block of dirty inodes in an effort to reduce + * the number of inodedep dependency structures. + */ +static void +clear_inodedeps(p) + struct proc *p; +{ + struct inodedep_hashhead *inodedephd; + struct inodedep *inodedep; + static int next = 0; + struct mount *mp; + struct vnode *vp; + struct fs *fs; + int error, cnt; + ino_t firstino, lastino, ino; + + ACQUIRE_LOCK(&lk); + /* + * Pick a random inode dependency to be cleared. + * We will then gather up all the inodes in its block + * that have dependencies and flush them out. + */ + for (cnt = 0; cnt < inodedep_hash; cnt++) { + inodedephd = &inodedep_hashtbl[next++]; + if (next >= inodedep_hash) + next = 0; + if ((inodedep = LIST_FIRST(inodedephd)) != NULL) + break; + } + /* + * Ugly code to find mount point given pointer to superblock. + */ + fs = inodedep->id_fs; + for (mp = CIRCLEQ_FIRST(&mountlist); mp != (void *)&mountlist; + mp = CIRCLEQ_NEXT(mp, mnt_list)) + if ((mp->mnt_flag & MNT_SOFTDEP) && fs == VFSTOUFS(mp)->um_fs) + break; + /* + * Find the last inode in the block with dependencies. + */ + firstino = inodedep->id_ino & ~(INOPB(fs) - 1); + for (lastino = firstino + INOPB(fs) - 1; lastino > firstino; lastino--) + if (inodedep_lookup(fs, lastino, 0, &inodedep) != 0) + break; + /* + * Asynchronously push all but the last inode with dependencies. + * Synchronously push the last inode with dependencies to ensure + * that the inode block gets written to free up the inodedeps. + */ + for (ino = firstino; ino <= lastino; ino++) { + if (inodedep_lookup(fs, ino, 0, &inodedep) == 0) + continue; + FREE_LOCK(&lk); + if ((error = VFS_VGET(mp, ino, &vp)) != 0) { + softdep_error("clear_inodedeps: vget", error); + return; + } + if (ino == lastino) { + if ((error = VOP_FSYNC(vp, p->p_ucred, MNT_WAIT, p))) + softdep_error("clear_inodedeps: fsync1", error); + } else { + if ((error = VOP_FSYNC(vp, p->p_ucred, MNT_NOWAIT, p))) + softdep_error("clear_inodedeps: fsync2", error); + drain_output(vp, 0); + } + vput(vp); + ACQUIRE_LOCK(&lk); + } + FREE_LOCK(&lk); +} + +/* * Acquire exclusive access to a buffer. * Must be called with splbio blocked. * Return 1 if buffer was acquired. @@ -4095,7 +4370,7 @@ getdirtybuf(bpp, waitfor) return (0); bp->b_flags |= B_WANTED; FREE_LOCK_INTERLOCKED(&lk); - sleep((caddr_t)bp, PRIBIO + 1); + tsleep((caddr_t)bp, PRIBIO + 1, "sdsdty", 0); ACQUIRE_LOCK_INTERLOCKED(&lk); } if ((bp->b_flags & B_DELWRI) == 0) @@ -4106,6 +4381,28 @@ getdirtybuf(bpp, waitfor) } /* + * Wait for pending output on a vnode to complete. + * Must be called with vnode locked. + */ +static void +drain_output(vp, islocked) + struct vnode *vp; + int islocked; +{ + + if (!islocked) + ACQUIRE_LOCK(&lk); + while (vp->v_numoutput) { + vp->v_flag |= VBWAIT; + FREE_LOCK_INTERLOCKED(&lk); + tsleep((caddr_t)&vp->v_numoutput, PRIBIO + 1, "drainvp", 0); + ACQUIRE_LOCK_INTERLOCKED(&lk); + } + if (!islocked) + FREE_LOCK(&lk); +} + +/* * Called whenever a buffer that is being invalidated or reallocated * contains dependencies. This should only happen if an I/O error has * occurred. The routine is called with the buffer locked. @@ -4114,43 +4411,11 @@ void softdep_deallocate_dependencies(bp) struct buf *bp; { - struct worklist *wk; if ((bp->b_flags & B_ERROR) == 0) panic("softdep_deallocate_dependencies: dangling deps"); softdep_error(bp->b_vp->v_mount->mnt_stat.f_mntonname, bp->b_error); - ACQUIRE_LOCK(&lk); - while ((wk = LIST_FIRST(&bp->b_dep)) != NULL) { - WORKLIST_REMOVE(wk); - FREE_LOCK(&lk); - switch (wk->wk_type) { - /* - * XXX - should really clean up, but for now we will - * just leak memory and not worry about it. Also should - * mark the filesystem permanently dirty so that it will - * force fsck to be run (though this would best be done - * in the mainline code). - */ - case D_PAGEDEP: - case D_INODEDEP: - case D_BMSAFEMAP: - case D_ALLOCDIRECT: - case D_INDIRDEP: - case D_ALLOCINDIR: - case D_MKDIR: -#ifdef DEBUG - printf("Lost type %s\n", TYPENAME(wk->wk_type)); -#endif - break; - default: - panic("%s: Unexpected type %s", - "softdep_deallocate_dependencies", - TYPENAME(wk->wk_type)); - /* NOTREACHED */ - } - ACQUIRE_LOCK(&lk); - } - FREE_LOCK(&lk); + panic("softdep_deallocate_dependencies: unrecovered I/O error"); } /* @@ -4163,8 +4428,5 @@ softdep_error(func, error) { /* XXX should do something better! */ - log(LOG_ERR, "%s: got error %d while accessing filesystem\n", - func, error); + printf("%s: got error %d while accessing filesystem\n", func, error); } - -#endif /* FFS_SOFTUPDATES */ diff --git a/sys/ufs/ffs/ffs_softdep_stub.c b/sys/ufs/ffs/ffs_softdep_stub.c index 93c790d1d47..bd06b5fbdd2 100644 --- a/sys/ufs/ffs/ffs_softdep_stub.c +++ b/sys/ufs/ffs/ffs_softdep_stub.c @@ -1,4 +1,4 @@ -/* $OpenBSD: ffs_softdep_stub.c,v 1.1 1998/03/15 03:53:51 millert Exp $ */ +/* $OpenBSD: ffs_softdep_stub.c,v 1.2 1999/12/05 08:30:38 art Exp $ */ /* * Copyright 1997 Marshall Kirk McKusick. All Rights Reserved. @@ -247,4 +247,12 @@ softdep_sync_metadata(ap) return (0); } +void +softdep_fsync_mountdev(vp) + struct vnode *vp; +{ + panic("softdep_fsync_mountdev called"); +} + + #endif /* !FFS_SOFTUPDATES */ diff --git a/sys/ufs/ffs/ffs_vnops.c b/sys/ufs/ffs/ffs_vnops.c index b987c318c7a..1e395e51620 100644 --- a/sys/ufs/ffs/ffs_vnops.c +++ b/sys/ufs/ffs/ffs_vnops.c @@ -1,4 +1,4 @@ -/* $OpenBSD: ffs_vnops.c,v 1.9 1999/02/26 03:56:30 art Exp $ */ +/* $OpenBSD: ffs_vnops.c,v 1.10 1999/12/05 08:30:38 art Exp $ */ /* $NetBSD: ffs_vnops.c,v 1.7 1996/05/11 18:27:24 mycroft Exp $ */ /* @@ -255,6 +255,11 @@ ffs_fsync(v) struct timespec ts; int s, error, passes, skipmeta; + if (vp->v_type == VBLK && + vp->v_specmountpoint != NULL && + (vp->v_specmountpoint->mnt_flag & MNT_SOFTDEP)) + softdep_fsync_mountdev(vp); + /* * Flush all dirty buffers associated with a vnode */ diff --git a/sys/ufs/ffs/softdep.h b/sys/ufs/ffs/softdep.h index 3435f630417..b34bd579315 100644 --- a/sys/ufs/ffs/softdep.h +++ b/sys/ufs/ffs/softdep.h @@ -52,7 +52,7 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * @(#)softdep.h 9.5 (McKusick) 2/11/98 + * @(#)softdep.h 9.6 (McKusick) 2/25/99 */ #include <sys/queue.h> @@ -516,6 +516,7 @@ struct mkdir { struct worklist md_list; /* id_inowait or buffer holding dir */ # define md_state md_list.wk_state /* type: MKDIR_PARENT or MKDIR_BODY */ struct diradd *md_diradd; /* associated diradd */ + struct buf *md_buf; /* MKDIR_BODY: buffer holding dir */ LIST_ENTRY(mkdir) md_mkdirs; /* list of all mkdirs */ }; LIST_HEAD(mkdirlist, mkdir) mkdirlisthd; |