summaryrefslogtreecommitdiff
path: root/sys/ufs/ffs/ffs_softdep.c
diff options
context:
space:
mode:
authorTed Unangst <tedu@cvs.openbsd.org>2004-12-07 04:37:29 +0000
committerTed Unangst <tedu@cvs.openbsd.org>2004-12-07 04:37:29 +0000
commit794ffce479f48a30cd683f84a622ec2692264248 (patch)
tree7e980e2405528661a34a2712fcdd50a8245326c8 /sys/ufs/ffs/ffs_softdep.c
parent3a457a1fd63cee639bebe505f35920cc472ff35f (diff)
prev softdep merge could lead to inode wait deadlock for unknown reasons.
revert.
Diffstat (limited to 'sys/ufs/ffs/ffs_softdep.c')
-rw-r--r--sys/ufs/ffs/ffs_softdep.c285
1 files changed, 51 insertions, 234 deletions
diff --git a/sys/ufs/ffs/ffs_softdep.c b/sys/ufs/ffs/ffs_softdep.c
index 37f589f023d..3a320df7b22 100644
--- a/sys/ufs/ffs/ffs_softdep.c
+++ b/sys/ufs/ffs/ffs_softdep.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: ffs_softdep.c,v 1.52 2004/07/13 21:04:29 millert Exp $ */
+/* $OpenBSD: ffs_softdep.c,v 1.53 2004/12/07 04:37:28 tedu Exp $ */
/*
* Copyright 1998, 2000 Marshall Kirk McKusick. All Rights Reserved.
*
@@ -66,25 +66,25 @@
/*
* Mapping of dependency structure types to malloc types.
*/
-#define D_PAGEDEP 0
-#define D_INODEDEP 1
-#define D_NEWBLK 2
-#define D_BMSAFEMAP 3
-#define D_ALLOCDIRECT 4
-#define D_INDIRDEP 5
-#define D_ALLOCINDIR 6
-#define D_FREEFRAG 7
-#define D_FREEBLKS 8
-#define D_FREEFILE 9
-#define D_DIRADD 10
-#define D_MKDIR 11
-#define D_DIRREM 12
-#define D_NEWDIRBLK 13
+#define D_PAGEDEP 1
+#define D_INODEDEP 2
+#define D_NEWBLK 3
+#define D_BMSAFEMAP 4
+#define D_ALLOCDIRECT 5
+#define D_INDIRDEP 6
+#define D_ALLOCINDIR 7
+#define D_FREEFRAG 8
+#define D_FREEBLKS 9
+#define D_FREEFILE 10
+#define D_DIRADD 11
+#define D_MKDIR 12
+#define D_DIRREM 13
#define D_LAST 13
/*
* Names of softdep types.
*/
const char *softdep_typenames[] = {
+ "invalid",
"pagedep",
"inodedep",
"newblk",
@@ -98,7 +98,6 @@ const char *softdep_typenames[] = {
"diradd",
"mkdir",
"dirrem",
- "newdirblk",
};
#define TYPENAME(type) \
((unsigned)(type) <= D_LAST ? softdep_typenames[type] : "???")
@@ -135,7 +134,6 @@ STATIC struct dirrem *newdirrem(struct buf *, struct inode *,
struct inode *, int, struct dirrem **);
STATIC void free_diradd(struct diradd *);
STATIC void free_allocindir(struct allocindir *, struct inodedep *);
-STATIC void free_newdirblk(struct newdirblk *);
STATIC int indir_trunc(struct inode *, daddr_t, int, ufs_lbn_t,
long *);
STATIC void deallocate_dependencies(struct buf *, struct inodedep *);
@@ -374,7 +372,6 @@ STATIC struct pool freefile_pool;
STATIC struct pool diradd_pool;
STATIC struct pool mkdir_pool;
STATIC struct pool dirrem_pool;
-STATIC struct pool newdirblk_pool;
static __inline void
softdep_free(struct worklist *item, int type)
@@ -429,10 +426,6 @@ softdep_free(struct worklist *item, int type)
pool_put(&dirrem_pool, item);
break;
- case D_NEWDIRBLK:
- pool_put(&newdirblk_pool, item);
- break;
-
default:
#ifdef DEBUG
if (lk.lkt_held != -1)
@@ -964,8 +957,7 @@ u_long pagedep_hash; /* size of hash table - 1 */
STATIC struct sema pagedep_in_progress;
/*
- * Look up a pagedep. Return 1 if found, 0 if not found or found
- * when asked to allocate but not associated with any buffer.
+ * Look up a pagedep. Return 1 if found, 0 if not found.
* If not found, allocate if DEPALLOC flag is passed.
* Found or allocated entry is returned in pagedeppp.
* This routine must be called with splbio interrupts blocked.
@@ -996,9 +988,6 @@ top:
break;
if (pagedep) {
*pagedeppp = pagedep;
- if ((flags & DEPALLOC) != 0 &&
- (pagedep->pd_state & ONWORKLIST) == 0)
- return (0);
return (1);
}
if ((flags & DEPALLOC) == 0) {
@@ -1211,8 +1200,6 @@ softdep_initialize()
"mkdirpl", &pool_allocator_nointr);
pool_init(&dirrem_pool, sizeof(struct dirrem), 0, 0, 0,
"dirrempl", &pool_allocator_nointr);
- pool_init(&newdirblk_pool, sizeof(struct newdirblk), 0, 0, 0,
- "newdirblkpl", &pool_allocator_nointr);
}
/*
@@ -1439,7 +1426,6 @@ softdep_setup_allocdirect(ip, lbn, newblkno, oldblkno, newsize, oldsize, bp)
adp->ad_newsize = newsize;
adp->ad_oldsize = oldsize;
adp->ad_state = ATTACHED;
- LIST_INIT(&adp->ad_newdirblk);
if (newblkno == oldblkno)
adp->ad_freefrag = NULL;
else
@@ -1536,9 +1522,7 @@ allocdirect_merge(adphead, newadp, oldadp)
struct allocdirect *newadp; /* allocdirect being added */
struct allocdirect *oldadp; /* existing allocdirect being checked */
{
- struct worklist *wk;
struct freefrag *freefrag;
- struct newdirblk *newdirblk;
#ifdef DEBUG
if (lk.lkt_held == -1)
@@ -1548,7 +1532,7 @@ allocdirect_merge(adphead, newadp, oldadp)
newadp->ad_oldsize != oldadp->ad_newsize ||
newadp->ad_lbn >= NDADDR) {
FREE_LOCK(&lk);
- panic("allocdirect_merge: old %d != new %d || lbn %ld >= %d",
+ panic("allocdirect_check: old %d != new %d || lbn %ld >= %d",
newadp->ad_oldblkno, oldadp->ad_newblkno, newadp->ad_lbn,
NDADDR);
}
@@ -1575,17 +1559,6 @@ allocdirect_merge(adphead, newadp, oldadp)
newadp->ad_freefrag = oldadp->ad_freefrag;
oldadp->ad_freefrag = freefrag;
}
- /*
- * If we are tracking a new directory-block allocation,
- * move it from the old allocdirect to the new allocdirect.
- */
- if ((wk = LIST_FIRST(&oldadp->ad_newdirblk)) != NULL) {
- newdirblk = WK_NEWDIRBLK(wk);
- WORKLIST_REMOVE(&newdirblk->db_list);
- if (LIST_FIRST(&oldadp->ad_newdirblk) != NULL)
- panic("allocdirect_merge: extra newdirblk");
- WORKLIST_INSERT(&newadp->ad_newdirblk, &newdirblk->db_list);
- }
free_allocdirect(adphead, oldadp, 0);
}
@@ -2076,21 +2049,6 @@ deallocate_dependencies(bp, inodedep)
WORKLIST_INSERT(&inodedep->id_bufwait,
&dirrem->dm_list);
}
- if ((pagedep->pd_state & NEWBLOCK) != 0) {
- LIST_FOREACH(wk, &inodedep->id_bufwait, wk_list)
- if (wk->wk_type == D_NEWDIRBLK &&
- WK_NEWDIRBLK(wk)->db_pagedep ==
- pagedep)
- break;
- if (wk != NULL) {
- WORKLIST_REMOVE(wk);
- free_newdirblk(WK_NEWDIRBLK(wk));
- } else {
- FREE_LOCK(&lk);
- panic("deallocate_dependencies: "
- "lost pagedep");
- }
- }
WORKLIST_REMOVE(&pagedep->pd_list);
LIST_REMOVE(pagedep, pd_hash);
WORKITEM_FREE(pagedep, D_PAGEDEP);
@@ -2126,8 +2084,6 @@ free_allocdirect(adphead, adp, delay)
struct allocdirect *adp;
int delay;
{
- struct newdirblk *newdirblk;
- struct worklist *wk;
#ifdef DEBUG
if (lk.lkt_held == -1)
@@ -2145,65 +2101,10 @@ free_allocdirect(adphead, adp, delay)
else
add_to_worklist(&adp->ad_freefrag->ff_list);
}
- if ((wk = LIST_FIRST(&adp->ad_newdirblk)) != NULL) {
- newdirblk = WK_NEWDIRBLK(wk);
- WORKLIST_REMOVE(&newdirblk->db_list);
- if (LIST_FIRST(&adp->ad_newdirblk) != NULL)
- panic("free_allocdirect: extra newdirblk");
- if (delay)
- WORKLIST_INSERT(&adp->ad_inodedep->id_bufwait,
- &newdirblk->db_list);
- else
- free_newdirblk(newdirblk);
- }
WORKITEM_FREE(adp, D_ALLOCDIRECT);
}
/*
- * Free a newdirblk. Clear the NEWBLOCK flag on its associated pagedep.
- * This routine must be called with splbio interrupts blocked.
- */
-void
-free_newdirblk(newdirblk)
- struct newdirblk *newdirblk;
-{
- struct pagedep *pagedep;
- struct diradd *dap;
- int i;
-
-#ifdef DEBUG
- if (lk.lkt_held == -1)
- panic("free_newdirblk: lock not held");
-#endif
- /*
- * If the pagedep is still linked onto the directory buffer
- * dependency chain, then some of the entries on the
- * pd_pendinghd list may not be committed to disk yet. In
- * this case, we will simply clear the NEWBLOCK flag and
- * let the pd_pendinghd list be processed when the pagedep
- * is next written. If the pagedep is no longer on the buffer
- * dependency chain, then all the entries on the pd_pending
- * list are committed to disk and we can free them here.
- */
- pagedep = newdirblk->db_pagedep;
- pagedep->pd_state &= ~NEWBLOCK;
- if ((pagedep->pd_state & ONWORKLIST) == 0)
- while ((dap = LIST_FIRST(&pagedep->pd_pendinghd)) != NULL)
- free_diradd(dap);
- /*
- * If no dependencies remain, the pagedep will be freed.
- */
- for (i = 0; i < DAHASHSZ; i++)
- if (LIST_FIRST(&pagedep->pd_diraddhd[i]) != NULL)
- break;
- if (i == DAHASHSZ && (pagedep->pd_state & ONWORKLIST) == 0) {
- LIST_REMOVE(pagedep, pd_hash);
- WORKITEM_FREE(pagedep, D_PAGEDEP);
- }
- WORKITEM_FREE(newdirblk, D_NEWDIRBLK);
-}
-
-/*
* Prepare an inode to be freed. The actual free operation is not
* done until the zero'ed inode has been written to disk.
*/
@@ -2514,25 +2415,21 @@ free_allocindir(aip, inodedep)
* count has been incremented, but before the directory entry's
* pointer to the inode has been set.
*/
-int
-softdep_setup_directory_add(bp, dp, diroffset, newinum, newdirbp, isnewblk)
+void
+softdep_setup_directory_add(bp, dp, diroffset, newinum, newdirbp)
struct buf *bp; /* buffer containing directory block */
struct inode *dp; /* inode for directory */
off_t diroffset; /* offset of new entry in directory */
long newinum; /* inode referenced by new directory entry */
struct buf *newdirbp; /* non-NULL => contents of new mkdir */
- int isnewblk; /* entry is in a newly allocated block */
{
int offset; /* offset of new entry within directory block */
ufs_lbn_t lbn; /* block in directory containing new entry */
struct fs *fs;
struct diradd *dap;
- struct allocdirect *adp;
struct pagedep *pagedep;
struct inodedep *inodedep;
- struct newdirblk *newdirblk = NULL;
struct mkdir *mkdir1, *mkdir2;
-
/*
* Whiteouts have no dependencies.
@@ -2540,7 +2437,7 @@ softdep_setup_directory_add(bp, dp, diroffset, newinum, newdirbp, isnewblk)
if (newinum == WINO) {
if (newdirbp != NULL)
bdwrite(newdirbp);
- return (0);
+ return;
}
fs = dp->i_fs;
@@ -2552,11 +2449,6 @@ softdep_setup_directory_add(bp, dp, diroffset, newinum, newdirbp, isnewblk)
dap->da_offset = offset;
dap->da_newinum = newinum;
dap->da_state = ATTACHED;
- if (isnewblk && lbn < NDADDR && fragoff(fs, diroffset) == 0) {
- newdirblk = pool_get(&newdirblk_pool, PR_WAITOK);
- newdirblk->db_list.wk_type = D_NEWDIRBLK;
- newdirblk->db_state = 0;
- }
if (newdirbp == NULL) {
dap->da_state |= DEPCOMPLETE;
ACQUIRE_LOCK(&lk);
@@ -2583,7 +2475,7 @@ softdep_setup_directory_add(bp, dp, diroffset, newinum, newdirbp, isnewblk)
* Dependency on link count increase for parent directory
*/
ACQUIRE_LOCK(&lk);
- if (inodedep_lookup(fs, dp->i_number, 0, &inodedep) == 0
+ if (inodedep_lookup(dp->i_fs, dp->i_number, 0, &inodedep) == 0
|| (inodedep->id_state & ALLCOMPLETE) == ALLCOMPLETE) {
dap->da_state &= ~MKDIR_PARENT;
WORKITEM_FREE(mkdir2, D_MKDIR);
@@ -2610,57 +2502,7 @@ softdep_setup_directory_add(bp, dp, diroffset, newinum, newdirbp, isnewblk)
diradd_inode_written(dap, inodedep);
else
WORKLIST_INSERT(&inodedep->id_bufwait, &dap->da_list);
- if (isnewblk) {
- /*
- * Directories growing into indirect blocks are rare
- * enough and the frequency of new block allocation
- * in those cases even more rare, that we choose not
- * to bother tracking them. Rather we simply force the
- * new directory entry to disk.
- */
- if (lbn >= NDADDR) {
- FREE_LOCK(&lk);
- /*
- * We only have a new allocation when at the
- * beginning of a new block, not when we are
- * expanding into an existing block.
- */
- if (blkoff(fs, diroffset) == 0)
- return (1);
- return (0);
- }
- /*
- * We only have a new allocation when at the beginning
- * of a new fragment, not when we are expanding into an
- * existing fragment. Also, there is nothing to do if we
- * are already tracking this block.
- */
- if (fragoff(fs, diroffset) != 0) {
- FREE_LOCK(&lk);
- return (0);
- }
-
- if ((pagedep->pd_state & NEWBLOCK) != 0) {
- WORKITEM_FREE(newdirblk, D_NEWDIRBLK);
- FREE_LOCK(&lk);
- return (0);
- }
- /*
- * Find our associated allocdirect and have it track us.
- */
- if (inodedep_lookup(fs, dp->i_number, 0, &inodedep) == 0)
- panic("softdep_setup_directory_add: lost inodedep");
- adp = TAILQ_LAST(&inodedep->id_newinoupdt, allocdirectlst);
- if (adp == NULL || adp->ad_lbn != lbn) {
- FREE_LOCK(&lk);
- panic("softdep_setup_directory_add: lost entry");
- }
- pagedep->pd_state |= NEWBLOCK;
- newdirblk->db_pagedep = pagedep;
- WORKLIST_INSERT(&adp->ad_newdirblk, &newdirblk->db_list);
- }
FREE_LOCK(&lk);
- return (0);
}
/*
@@ -3270,7 +3112,7 @@ softdep_disk_io_initiation(bp)
/*
* Replace up-to-date version with safe version.
*/
- indirdep->ir_saveddata = malloc(bp->b_bcount,
+ MALLOC(indirdep->ir_saveddata, caddr_t, bp->b_bcount,
M_INDIRDEP, M_WAITOK);
ACQUIRE_LOCK(&lk);
indirdep->ir_state &= ~ATTACHED;
@@ -3580,7 +3422,7 @@ softdep_disk_write_complete(bp)
if (indirdep->ir_state & GOINGAWAY)
panic("disk_write_complete: indirdep gone");
bcopy(indirdep->ir_saveddata, bp->b_data, bp->b_bcount);
- free(indirdep->ir_saveddata, M_INDIRDEP);
+ FREE(indirdep->ir_saveddata, M_INDIRDEP);
indirdep->ir_saveddata = 0;
indirdep->ir_state &= ~UNDONE;
indirdep->ir_state |= ATTACHED;
@@ -3845,10 +3687,6 @@ handle_written_inodeblock(inodedep, bp)
add_to_worklist(wk);
continue;
- case D_NEWDIRBLK:
- free_newdirblk(WK_NEWDIRBLK(wk));
- continue;
-
default:
panic("handle_written_inodeblock: Unknown type %s",
TYPENAME(wk->wk_type));
@@ -3952,12 +3790,9 @@ handle_written_filepage(pagedep, bp)
}
/*
* Free any directory additions that have been committed.
- * If it is a newly allocated block, we have to wait until
- * the on-disk directory inode claims the new block.
*/
- if ((pagedep->pd_state & NEWBLOCK) == 0)
- while ((dap = LIST_FIRST(&pagedep->pd_pendinghd)) != NULL)
- free_diradd(dap);
+ while ((dap = LIST_FIRST(&pagedep->pd_pendinghd)) != NULL)
+ free_diradd(dap);
/*
* Uncommitted directory entries must be restored.
*/
@@ -3994,19 +3829,23 @@ handle_written_filepage(pagedep, bp)
if ((bp->b_flags & B_DELWRI) == 0)
stat_dir_entry++;
buf_dirty(bp);
- return (1);
}
/*
- * If we are not waiting for a new directory block to be
- * claimed by its inode, then the pagedep will be freed.
- * Otherwise it will remain to track any new entries on
- * the page in case they are fsync'ed.
+ * If no dependencies remain, the pagedep will be freed.
+ * Otherwise it will remain to update the page before it
+ * is written back to disk.
*/
- if ((pagedep->pd_state & NEWBLOCK) == 0) {
- LIST_REMOVE(pagedep, pd_hash);
- WORKITEM_FREE(pagedep, D_PAGEDEP);
+ if (LIST_FIRST(&pagedep->pd_pendinghd) == 0) {
+ for (i = 0; i < DAHASHSZ; i++)
+ if (LIST_FIRST(&pagedep->pd_diraddhd[i]) != NULL)
+ break;
+ if (i == DAHASHSZ) {
+ LIST_REMOVE(pagedep, pd_hash);
+ WORKITEM_FREE(pagedep, D_PAGEDEP);
+ return (0);
+ }
}
- return (0);
+ return (1);
}
/*
@@ -4176,7 +4015,6 @@ softdep_fsync(vp)
struct mount *mnt;
struct vnode *pvp;
struct inode *ip;
- struct inode *pip;
struct buf *bp;
struct fs *fs;
struct proc *p = CURPROC; /* XXX */
@@ -4208,8 +4046,8 @@ softdep_fsync(vp)
}
dap = WK_DIRADD(wk);
/*
- * Flush our parent if this directory entry has a MKDIR_PARENT
- * dependency or is contained in a newly allocated block.
+ * Flush our parent if this directory entry
+ * has a MKDIR_PARENT dependency.
*/
if (dap->da_state & DIRCHG)
pagedep = dap->da_previous->dm_pagedep;
@@ -4222,11 +4060,7 @@ softdep_fsync(vp)
FREE_LOCK(&lk);
panic("softdep_fsync: dirty");
}
- if ((dap->da_state & MKDIR_PARENT) ||
- (pagedep->pd_state & NEWBLOCK))
- flushparent = 1;
- else
- flushparent = 0;
+ flushparent = dap->da_state & MKDIR_PARENT;
/*
* If we are being fsync'ed as part of vgone'ing this vnode,
* then we will not be able to release and recover the
@@ -4250,22 +4084,8 @@ softdep_fsync(vp)
vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
if (error != 0)
return (error);
- /*
- * All MKDIR_PARENT dependencies and all the NEWBLOCK pagedeps
- * that are contained in direct blocks will be resolved by
- * doing a UFS_UPDATE. Pagedeps contained in indirect blocks
- * may require a complete sync'ing of the directory. So, we
- * try the cheap and fast UFS_UPDATE first, and if that fails,
- * then we do the slower VOP_FSYNC of the directory.
- */
- pip = VTOI(pvp);
if (flushparent) {
- if ((error = UFS_UPDATE(pip, 1)) != 0) {
- vput(pvp);
- return (error);
- }
- if ((pagedep->pd_state & NEWBLOCK) &&
- (error = UFS_UPDATE(pip, MNT_WAIT))) {
+ if ((error = UFS_UPDATE(VTOI(pvp), MNT_WAIT))) {
vput(pvp);
return (error);
}
@@ -4273,7 +4093,7 @@ softdep_fsync(vp)
/*
* Flush directory page containing the inode's name.
*/
- error = bread(pvp, lbn, blksize(fs, pip, lbn), p->p_ucred,
+ error = bread(pvp, lbn, blksize(fs, VTOI(pvp), lbn), p->p_ucred,
&bp);
if (error == 0)
error = bwrite(bp);
@@ -4400,11 +4220,6 @@ softdep_sync_metadata(ap)
*/
waitfor = MNT_NOWAIT;
top:
- /*
- * We must wait for any I/O in progress to finish so that
- * all potential buffers on the dirty list will be visible.
- */
- drain_output(vp, 1);
if (getdirtybuf(&LIST_FIRST(&vp->v_dirtyblkhd), MNT_WAIT) == 0) {
FREE_LOCK(&lk);
return (0);
@@ -4561,8 +4376,15 @@ loop:
goto loop;
}
/*
+ * We must wait for any I/O in progress to finish so that
+ * all potential buffers on the dirty list will be visible.
+ * Once they are all there, proceed with the second pass
+ * which will wait for the I/O as per above.
+ */
+ drain_output(vp, 1);
+ /*
* The brief unlock is to allow any pent up dependency
- * processing to be done. Then proceed with the second pass.
+ * processing to be done.
*/
if (waitfor == MNT_NOWAIT) {
waitfor = MNT_WAIT;
@@ -4575,11 +4397,7 @@ loop:
* If we have managed to get rid of all the dirty buffers,
* then we are done. For certain directories and block
* devices, we may need to do further work.
- *
- * We must wait for any I/O in progress to finish so that
- * all potential buffers on the dirty list will be visible.
*/
- drain_output(vp, 1);
if (LIST_FIRST(&vp->v_dirtyblkhd) == NULL) {
FREE_LOCK(&lk);
return (0);
@@ -5075,7 +4893,6 @@ clear_inodedeps(p)
vn_finished_write(mp);
#endif
ACQUIRE_LOCK(&lk);
- drain_output(vp, 1);
}
FREE_LOCK(&lk);
}