summaryrefslogtreecommitdiff
path: root/sys/ufs
diff options
context:
space:
mode:
authorPedro Martelletto <pedro@cvs.openbsd.org>2005-07-20 16:30:36 +0000
committerPedro Martelletto <pedro@cvs.openbsd.org>2005-07-20 16:30:36 +0000
commit46977aad7743bfd836421c3ef3ce4c932d8d1598 (patch)
tree029db1f772d28cae3756093fcbff6fcc6614297a /sys/ufs
parente71ab29464650e64885f7cdd9698ff74ef4ff014 (diff)
Reintroduce the changes made by tedu in revision 1.50 of ffs_softdep.c,
this time with a small tweak: when flushing the dependencies, don't update the inode twice, but update once, and then, if needed, fsync it. Doing so fixes the inode hangs some people were seeing. Various testing for a while, especially krw@ and millert@, okay deraadt@
Diffstat (limited to 'sys/ufs')
-rw-r--r--sys/ufs/ffs/ffs_softdep.c286
-rw-r--r--sys/ufs/ffs/ffs_softdep_stub.c8
-rw-r--r--sys/ufs/ffs/softdep.h66
-rw-r--r--sys/ufs/ufs/ufs_extern.h6
-rw-r--r--sys/ufs/ufs/ufs_lookup.c38
5 files changed, 328 insertions, 76 deletions
diff --git a/sys/ufs/ffs/ffs_softdep.c b/sys/ufs/ffs/ffs_softdep.c
index 164eff01655..9e94e6e2be9 100644
--- a/sys/ufs/ffs/ffs_softdep.c
+++ b/sys/ufs/ffs/ffs_softdep.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: ffs_softdep.c,v 1.59 2005/07/03 20:14:01 drahn Exp $ */
+/* $OpenBSD: ffs_softdep.c,v 1.60 2005/07/20 16:30:34 pedro Exp $ */
/*
* Copyright 1998, 2000 Marshall Kirk McKusick. All Rights Reserved.
*
@@ -65,25 +65,25 @@
/*
* Mapping of dependency structure types to malloc types.
*/
-#define D_PAGEDEP 1
-#define D_INODEDEP 2
-#define D_NEWBLK 3
-#define D_BMSAFEMAP 4
-#define D_ALLOCDIRECT 5
-#define D_INDIRDEP 6
-#define D_ALLOCINDIR 7
-#define D_FREEFRAG 8
-#define D_FREEBLKS 9
-#define D_FREEFILE 10
-#define D_DIRADD 11
-#define D_MKDIR 12
-#define D_DIRREM 13
+#define D_PAGEDEP 0
+#define D_INODEDEP 1
+#define D_NEWBLK 2
+#define D_BMSAFEMAP 3
+#define D_ALLOCDIRECT 4
+#define D_INDIRDEP 5
+#define D_ALLOCINDIR 6
+#define D_FREEFRAG 7
+#define D_FREEBLKS 8
+#define D_FREEFILE 9
+#define D_DIRADD 10
+#define D_MKDIR 11
+#define D_DIRREM 12
+#define D_NEWDIRBLK 13
#define D_LAST 13
/*
* Names of softdep types.
*/
const char *softdep_typenames[] = {
- "invalid",
"pagedep",
"inodedep",
"newblk",
@@ -97,6 +97,7 @@ const char *softdep_typenames[] = {
"diradd",
"mkdir",
"dirrem",
+ "newdirblk",
};
#define TYPENAME(type) \
((unsigned)(type) <= D_LAST ? softdep_typenames[type] : "???")
@@ -133,6 +134,7 @@ STATIC struct dirrem *newdirrem(struct buf *, struct inode *,
struct inode *, int, struct dirrem **);
STATIC void free_diradd(struct diradd *);
STATIC void free_allocindir(struct allocindir *, struct inodedep *);
+STATIC void free_newdirblk(struct newdirblk *);
STATIC int indir_trunc(struct inode *, daddr_t, int, ufs_lbn_t,
long *);
STATIC void deallocate_dependencies(struct buf *, struct inodedep *);
@@ -371,6 +373,7 @@ STATIC struct pool freefile_pool;
STATIC struct pool diradd_pool;
STATIC struct pool mkdir_pool;
STATIC struct pool dirrem_pool;
+STATIC struct pool newdirblk_pool;
static __inline void
softdep_free(struct worklist *item, int type)
@@ -425,6 +428,10 @@ softdep_free(struct worklist *item, int type)
pool_put(&dirrem_pool, item);
break;
+ case D_NEWDIRBLK:
+ pool_put(&newdirblk_pool, item);
+ break;
+
default:
#ifdef DEBUG
if (lk.lkt_held != -1)
@@ -969,7 +976,8 @@ u_long pagedep_hash; /* size of hash table - 1 */
STATIC struct sema pagedep_in_progress;
/*
- * Look up a pagedep. Return 1 if found, 0 if not found.
+ * Look up a pagedep. Return 1 if found, 0 if not found or found
+ * when asked to allocate but not associated with any buffer.
* If not found, allocate if DEPALLOC flag is passed.
* Found or allocated entry is returned in pagedeppp.
* This routine must be called with splbio interrupts blocked.
@@ -1000,6 +1008,9 @@ top:
break;
if (pagedep) {
*pagedeppp = pagedep;
+ if ((flags & DEPALLOC) != 0 &&
+ (pagedep->pd_state & ONWORKLIST) == 0)
+ return (0);
return (1);
}
if ((flags & DEPALLOC) == 0) {
@@ -1212,6 +1223,8 @@ softdep_initialize()
"mkdirpl", &pool_allocator_nointr);
pool_init(&dirrem_pool, sizeof(struct dirrem), 0, 0, 0,
"dirrempl", &pool_allocator_nointr);
+ pool_init(&newdirblk_pool, sizeof(struct newdirblk), 0, 0, 0,
+ "newdirblkpl", &pool_allocator_nointr);
}
/*
@@ -1438,6 +1451,7 @@ softdep_setup_allocdirect(ip, lbn, newblkno, oldblkno, newsize, oldsize, bp)
adp->ad_newsize = newsize;
adp->ad_oldsize = oldsize;
adp->ad_state = ATTACHED;
+ LIST_INIT(&adp->ad_newdirblk);
if (newblkno == oldblkno)
adp->ad_freefrag = NULL;
else
@@ -1534,7 +1548,9 @@ allocdirect_merge(adphead, newadp, oldadp)
struct allocdirect *newadp; /* allocdirect being added */
struct allocdirect *oldadp; /* existing allocdirect being checked */
{
+ struct worklist *wk;
struct freefrag *freefrag;
+ struct newdirblk *newdirblk;
#ifdef DEBUG
if (lk.lkt_held == -1)
@@ -1544,7 +1560,7 @@ allocdirect_merge(adphead, newadp, oldadp)
newadp->ad_oldsize != oldadp->ad_newsize ||
newadp->ad_lbn >= NDADDR) {
FREE_LOCK(&lk);
- panic("allocdirect_check: old %d != new %d || lbn %ld >= %d",
+ panic("allocdirect_merge: old %d != new %d || lbn %ld >= %d",
newadp->ad_oldblkno, oldadp->ad_newblkno, newadp->ad_lbn,
NDADDR);
}
@@ -1571,6 +1587,17 @@ allocdirect_merge(adphead, newadp, oldadp)
newadp->ad_freefrag = oldadp->ad_freefrag;
oldadp->ad_freefrag = freefrag;
}
+ /*
+ * If we are tracking a new directory-block allocation,
+ * move it from the old allocdirect to the new allocdirect.
+ */
+ if ((wk = LIST_FIRST(&oldadp->ad_newdirblk)) != NULL) {
+ newdirblk = WK_NEWDIRBLK(wk);
+ WORKLIST_REMOVE(&newdirblk->db_list);
+ if (LIST_FIRST(&oldadp->ad_newdirblk) != NULL)
+ panic("allocdirect_merge: extra newdirblk");
+ WORKLIST_INSERT(&newadp->ad_newdirblk, &newdirblk->db_list);
+ }
free_allocdirect(adphead, oldadp, 0);
}
@@ -2061,6 +2088,21 @@ deallocate_dependencies(bp, inodedep)
WORKLIST_INSERT(&inodedep->id_bufwait,
&dirrem->dm_list);
}
+ if ((pagedep->pd_state & NEWBLOCK) != 0) {
+ LIST_FOREACH(wk, &inodedep->id_bufwait, wk_list)
+ if (wk->wk_type == D_NEWDIRBLK &&
+ WK_NEWDIRBLK(wk)->db_pagedep ==
+ pagedep)
+ break;
+ if (wk != NULL) {
+ WORKLIST_REMOVE(wk);
+ free_newdirblk(WK_NEWDIRBLK(wk));
+ } else {
+ FREE_LOCK(&lk);
+ panic("deallocate_dependencies: "
+ "lost pagedep");
+ }
+ }
WORKLIST_REMOVE(&pagedep->pd_list);
LIST_REMOVE(pagedep, pd_hash);
WORKITEM_FREE(pagedep, D_PAGEDEP);
@@ -2096,6 +2138,8 @@ free_allocdirect(adphead, adp, delay)
struct allocdirect *adp;
int delay;
{
+ struct newdirblk *newdirblk;
+ struct worklist *wk;
#ifdef DEBUG
if (lk.lkt_held == -1)
@@ -2113,10 +2157,65 @@ free_allocdirect(adphead, adp, delay)
else
add_to_worklist(&adp->ad_freefrag->ff_list);
}
+ if ((wk = LIST_FIRST(&adp->ad_newdirblk)) != NULL) {
+ newdirblk = WK_NEWDIRBLK(wk);
+ WORKLIST_REMOVE(&newdirblk->db_list);
+ if (LIST_FIRST(&adp->ad_newdirblk) != NULL)
+ panic("free_allocdirect: extra newdirblk");
+ if (delay)
+ WORKLIST_INSERT(&adp->ad_inodedep->id_bufwait,
+ &newdirblk->db_list);
+ else
+ free_newdirblk(newdirblk);
+ }
WORKITEM_FREE(adp, D_ALLOCDIRECT);
}
/*
+ * Free a newdirblk. Clear the NEWBLOCK flag on its associated pagedep.
+ * This routine must be called with splbio interrupts blocked.
+ */
+void
+free_newdirblk(newdirblk)
+ struct newdirblk *newdirblk;
+{
+ struct pagedep *pagedep;
+ struct diradd *dap;
+ int i;
+
+#ifdef DEBUG
+ if (lk.lkt_held == -1)
+ panic("free_newdirblk: lock not held");
+#endif
+ /*
+ * If the pagedep is still linked onto the directory buffer
+ * dependency chain, then some of the entries on the
+ * pd_pendinghd list may not be committed to disk yet. In
+ * this case, we will simply clear the NEWBLOCK flag and
+ * let the pd_pendinghd list be processed when the pagedep
+ * is next written. If the pagedep is no longer on the buffer
+ * dependency chain, then all the entries on the pd_pending
+ * list are committed to disk and we can free them here.
+ */
+ pagedep = newdirblk->db_pagedep;
+ pagedep->pd_state &= ~NEWBLOCK;
+ if ((pagedep->pd_state & ONWORKLIST) == 0)
+ while ((dap = LIST_FIRST(&pagedep->pd_pendinghd)) != NULL)
+ free_diradd(dap);
+ /*
+ * If no dependencies remain, the pagedep will be freed.
+ */
+ for (i = 0; i < DAHASHSZ; i++)
+ if (LIST_FIRST(&pagedep->pd_diraddhd[i]) != NULL)
+ break;
+ if (i == DAHASHSZ && (pagedep->pd_state & ONWORKLIST) == 0) {
+ LIST_REMOVE(pagedep, pd_hash);
+ WORKITEM_FREE(pagedep, D_PAGEDEP);
+ }
+ WORKITEM_FREE(newdirblk, D_NEWDIRBLK);
+}
+
+/*
* Prepare an inode to be freed. The actual free operation is not
* done until the zero'ed inode has been written to disk.
*/
@@ -2427,21 +2526,25 @@ free_allocindir(aip, inodedep)
* count has been incremented, but before the directory entry's
* pointer to the inode has been set.
*/
-void
-softdep_setup_directory_add(bp, dp, diroffset, newinum, newdirbp)
+int
+softdep_setup_directory_add(bp, dp, diroffset, newinum, newdirbp, isnewblk)
struct buf *bp; /* buffer containing directory block */
struct inode *dp; /* inode for directory */
off_t diroffset; /* offset of new entry in directory */
long newinum; /* inode referenced by new directory entry */
struct buf *newdirbp; /* non-NULL => contents of new mkdir */
+ int isnewblk; /* entry is in a newly allocated block */
{
int offset; /* offset of new entry within directory block */
ufs_lbn_t lbn; /* block in directory containing new entry */
struct fs *fs;
struct diradd *dap;
+ struct allocdirect *adp;
struct pagedep *pagedep;
struct inodedep *inodedep;
+ struct newdirblk *newdirblk = NULL;
struct mkdir *mkdir1, *mkdir2;
+
fs = dp->i_fs;
lbn = lblkno(fs, diroffset);
@@ -2452,6 +2555,11 @@ softdep_setup_directory_add(bp, dp, diroffset, newinum, newdirbp)
dap->da_offset = offset;
dap->da_newinum = newinum;
dap->da_state = ATTACHED;
+ if (isnewblk && lbn < NDADDR && fragoff(fs, diroffset) == 0) {
+ newdirblk = pool_get(&newdirblk_pool, PR_WAITOK);
+ newdirblk->db_list.wk_type = D_NEWDIRBLK;
+ newdirblk->db_state = 0;
+ }
if (newdirbp == NULL) {
dap->da_state |= DEPCOMPLETE;
ACQUIRE_LOCK(&lk);
@@ -2478,7 +2586,7 @@ softdep_setup_directory_add(bp, dp, diroffset, newinum, newdirbp)
* Dependency on link count increase for parent directory
*/
ACQUIRE_LOCK(&lk);
- if (inodedep_lookup(dp->i_fs, dp->i_number, 0, &inodedep) == 0
+ if (inodedep_lookup(fs, dp->i_number, 0, &inodedep) == 0
|| (inodedep->id_state & ALLCOMPLETE) == ALLCOMPLETE) {
dap->da_state &= ~MKDIR_PARENT;
WORKITEM_FREE(mkdir2, D_MKDIR);
@@ -2505,7 +2613,57 @@ softdep_setup_directory_add(bp, dp, diroffset, newinum, newdirbp)
diradd_inode_written(dap, inodedep);
else
WORKLIST_INSERT(&inodedep->id_bufwait, &dap->da_list);
+ if (isnewblk) {
+ /*
+ * Directories growing into indirect blocks are rare
+ * enough and the frequency of new block allocation
+ * in those cases even more rare, that we choose not
+ * to bother tracking them. Rather we simply force the
+ * new directory entry to disk.
+ */
+ if (lbn >= NDADDR) {
+ FREE_LOCK(&lk);
+ /*
+ * We only have a new allocation when at the
+ * beginning of a new block, not when we are
+ * expanding into an existing block.
+ */
+ if (blkoff(fs, diroffset) == 0)
+ return (1);
+ return (0);
+ }
+ /*
+ * We only have a new allocation when at the beginning
+ * of a new fragment, not when we are expanding into an
+ * existing fragment. Also, there is nothing to do if we
+ * are already tracking this block.
+ */
+ if (fragoff(fs, diroffset) != 0) {
+ FREE_LOCK(&lk);
+ return (0);
+ }
+
+ if ((pagedep->pd_state & NEWBLOCK) != 0) {
+ WORKITEM_FREE(newdirblk, D_NEWDIRBLK);
+ FREE_LOCK(&lk);
+ return (0);
+ }
+ /*
+ * Find our associated allocdirect and have it track us.
+ */
+ if (inodedep_lookup(fs, dp->i_number, 0, &inodedep) == 0)
+ panic("softdep_setup_directory_add: lost inodedep");
+ adp = TAILQ_LAST(&inodedep->id_newinoupdt, allocdirectlst);
+ if (adp == NULL || adp->ad_lbn != lbn) {
+ FREE_LOCK(&lk);
+ panic("softdep_setup_directory_add: lost entry");
+ }
+ pagedep->pd_state |= NEWBLOCK;
+ newdirblk->db_pagedep = pagedep;
+ WORKLIST_INSERT(&adp->ad_newdirblk, &newdirblk->db_list);
+ }
FREE_LOCK(&lk);
+ return (0);
}
/*
@@ -3105,7 +3263,7 @@ softdep_disk_io_initiation(bp)
/*
* Replace up-to-date version with safe version.
*/
- MALLOC(indirdep->ir_saveddata, caddr_t, bp->b_bcount,
+ indirdep->ir_saveddata = malloc(bp->b_bcount,
M_INDIRDEP, M_WAITOK);
ACQUIRE_LOCK(&lk);
indirdep->ir_state &= ~ATTACHED;
@@ -3415,7 +3573,7 @@ softdep_disk_write_complete(bp)
if (indirdep->ir_state & GOINGAWAY)
panic("disk_write_complete: indirdep gone");
bcopy(indirdep->ir_saveddata, bp->b_data, bp->b_bcount);
- FREE(indirdep->ir_saveddata, M_INDIRDEP);
+ free(indirdep->ir_saveddata, M_INDIRDEP);
indirdep->ir_saveddata = 0;
indirdep->ir_state &= ~UNDONE;
indirdep->ir_state |= ATTACHED;
@@ -3680,6 +3838,10 @@ handle_written_inodeblock(inodedep, bp)
add_to_worklist(wk);
continue;
+ case D_NEWDIRBLK:
+ free_newdirblk(WK_NEWDIRBLK(wk));
+ continue;
+
default:
panic("handle_written_inodeblock: Unknown type %s",
TYPENAME(wk->wk_type));
@@ -3783,9 +3945,12 @@ handle_written_filepage(pagedep, bp)
}
/*
* Free any directory additions that have been committed.
+ * If it is a newly allocated block, we have to wait until
+ * the on-disk directory inode claims the new block.
*/
- while ((dap = LIST_FIRST(&pagedep->pd_pendinghd)) != NULL)
- free_diradd(dap);
+ if ((pagedep->pd_state & NEWBLOCK) == 0)
+ while ((dap = LIST_FIRST(&pagedep->pd_pendinghd)) != NULL)
+ free_diradd(dap);
/*
* Uncommitted directory entries must be restored.
*/
@@ -3822,23 +3987,19 @@ handle_written_filepage(pagedep, bp)
if ((bp->b_flags & B_DELWRI) == 0)
stat_dir_entry++;
buf_dirty(bp);
+ return (1);
}
/*
- * If no dependencies remain, the pagedep will be freed.
- * Otherwise it will remain to update the page before it
- * is written back to disk.
+ * If we are not waiting for a new directory block to be
+ * claimed by its inode, then the pagedep will be freed.
+ * Otherwise it will remain to track any new entries on
+ * the page in case they are fsync'ed.
*/
- if (LIST_FIRST(&pagedep->pd_pendinghd) == 0) {
- for (i = 0; i < DAHASHSZ; i++)
- if (LIST_FIRST(&pagedep->pd_diraddhd[i]) != NULL)
- break;
- if (i == DAHASHSZ) {
- LIST_REMOVE(pagedep, pd_hash);
- WORKITEM_FREE(pagedep, D_PAGEDEP);
- return (0);
- }
+ if ((pagedep->pd_state & NEWBLOCK) == 0) {
+ LIST_REMOVE(pagedep, pd_hash);
+ WORKITEM_FREE(pagedep, D_PAGEDEP);
}
- return (1);
+ return (0);
}
/*
@@ -4008,6 +4169,7 @@ softdep_fsync(vp)
struct mount *mnt;
struct vnode *pvp;
struct inode *ip;
+ struct inode *pip;
struct buf *bp;
struct fs *fs;
struct proc *p = CURPROC; /* XXX */
@@ -4039,8 +4201,8 @@ softdep_fsync(vp)
}
dap = WK_DIRADD(wk);
/*
- * Flush our parent if this directory entry
- * has a MKDIR_PARENT dependency.
+ * Flush our parent if this directory entry has a MKDIR_PARENT
+ * dependency or is contained in a newly allocated block.
*/
if (dap->da_state & DIRCHG)
pagedep = dap->da_previous->dm_pagedep;
@@ -4053,7 +4215,11 @@ softdep_fsync(vp)
FREE_LOCK(&lk);
panic("softdep_fsync: dirty");
}
- flushparent = dap->da_state & MKDIR_PARENT;
+ if ((dap->da_state & MKDIR_PARENT) ||
+ (pagedep->pd_state & NEWBLOCK))
+ flushparent = 1;
+ else
+ flushparent = 0;
/*
* If we are being fsync'ed as part of vgone'ing this vnode,
* then we will not be able to release and recover the
@@ -4077,16 +4243,33 @@ softdep_fsync(vp)
vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
if (error != 0)
return (error);
+ /*
+ * All MKDIR_PARENT dependencies and all the NEWBLOCK pagedeps
+ * that are contained in direct blocks will be resolved by
+ * doing a UFS_UPDATE. Pagedeps contained in indirect blocks
+ * may require a complete sync'ing of the directory. So, we
+ * try the cheap and fast UFS_UPDATE first, and if that fails,
+ * then we do the slower VOP_FSYNC of the directory.
+ */
+ pip = VTOI(pvp);
if (flushparent) {
- if ((error = UFS_UPDATE(VTOI(pvp), MNT_WAIT))) {
+ error = UFS_UPDATE(pip, MNT_WAIT);
+ if (error) {
vput(pvp);
return (error);
}
+ if (pagedep->pd_state & NEWBLOCK) {
+ error = VOP_FSYNC(pvp, p->p_ucred, MNT_WAIT, p);
+ if (error) {
+ vput(pvp);
+ return (error);
+ }
+ }
}
/*
* Flush directory page containing the inode's name.
*/
- error = bread(pvp, lbn, blksize(fs, VTOI(pvp), lbn), p->p_ucred,
+ error = bread(pvp, lbn, blksize(fs, pip, lbn), p->p_ucred,
&bp);
if (error == 0)
error = bwrite(bp);
@@ -4213,6 +4396,11 @@ softdep_sync_metadata(ap)
*/
waitfor = MNT_NOWAIT;
top:
+ /*
+ * We must wait for any I/O in progress to finish so that
+ * all potential buffers on the dirty list will be visible.
+ */
+ drain_output(vp, 1);
if (getdirtybuf(&LIST_FIRST(&vp->v_dirtyblkhd), MNT_WAIT) == 0) {
FREE_LOCK(&lk);
return (0);
@@ -4369,15 +4557,8 @@ loop:
goto loop;
}
/*
- * We must wait for any I/O in progress to finish so that
- * all potential buffers on the dirty list will be visible.
- * Once they are all there, proceed with the second pass
- * which will wait for the I/O as per above.
- */
- drain_output(vp, 1);
- /*
* The brief unlock is to allow any pent up dependency
- * processing to be done.
+ * processing to be done. Then proceed with the second pass.
*/
if (waitfor == MNT_NOWAIT) {
waitfor = MNT_WAIT;
@@ -4390,7 +4571,11 @@ loop:
* If we have managed to get rid of all the dirty buffers,
* then we are done. For certain directories and block
* devices, we may need to do further work.
+ *
+ * We must wait for any I/O in progress to finish so that
+ * all potential buffers on the dirty list will be visible.
*/
+ drain_output(vp, 1);
if (LIST_FIRST(&vp->v_dirtyblkhd) == NULL) {
FREE_LOCK(&lk);
return (0);
@@ -4888,6 +5073,7 @@ clear_inodedeps(p)
vn_finished_write(mp);
#endif
ACQUIRE_LOCK(&lk);
+ drain_output(vp, 1);
}
FREE_LOCK(&lk);
}
diff --git a/sys/ufs/ffs/ffs_softdep_stub.c b/sys/ufs/ffs/ffs_softdep_stub.c
index 14dd963d854..bb52d82d15b 100644
--- a/sys/ufs/ffs/ffs_softdep_stub.c
+++ b/sys/ufs/ffs/ffs_softdep_stub.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: ffs_softdep_stub.c,v 1.10 2005/07/03 20:14:02 drahn Exp $ */
+/* $OpenBSD: ffs_softdep_stub.c,v 1.11 2005/07/20 16:30:35 pedro Exp $ */
/*
* Copyright 1998 Marshall Kirk McKusick. All Rights Reserved.
@@ -153,16 +153,18 @@ softdep_freefile(pvp, ino, mode)
panic("softdep_freefile called");
}
-void
-softdep_setup_directory_add(bp, dp, diroffset, newinum, newdirbp)
+int
+softdep_setup_directory_add(bp, dp, diroffset, newinum, newdirbp, isnewblk)
struct buf *bp;
struct inode *dp;
off_t diroffset;
long newinum;
struct buf *newdirbp;
+ int isnewblk;
{
panic("softdep_setup_directory_add called");
+ return (0);
}
void
diff --git a/sys/ufs/ffs/softdep.h b/sys/ufs/ffs/softdep.h
index 75d925fbb3e..309dbb5e25d 100644
--- a/sys/ufs/ffs/softdep.h
+++ b/sys/ufs/ffs/softdep.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: softdep.h,v 1.8 2004/12/07 04:37:28 tedu Exp $ */
+/* $OpenBSD: softdep.h,v 1.9 2005/07/20 16:30:35 pedro Exp $ */
/*
* Copyright 1998, 2000 Marshall Kirk McKusick. All Rights Reserved.
*
@@ -83,20 +83,27 @@
* data structure is frozen from further change until its dependencies
* have been completed and its resources freed after which it will be
* discarded. The IOSTARTED flag prevents multiple calls to the I/O
- * start routine from doing multiple rollbacks. The ONWORKLIST flag
- * shows whether the structure is currently linked onto a worklist.
+ * start routine from doing multiple rollbacks. The SPACECOUNTED flag
+ * says that the files space has been accounted to the pending free
+ * space count. The NEWBLOCK flag marks pagedep structures that have
+ * just been allocated, so must be claimed by the inode before all
+ * dependencies are complete. The ONWORKLIST flag shows whether the
+ * structure is currently linked onto a worklist
+ *
*/
#define ATTACHED 0x0001
#define UNDONE 0x0002
#define COMPLETE 0x0004
#define DEPCOMPLETE 0x0008
-#define MKDIR_PARENT 0x0010
-#define MKDIR_BODY 0x0020
-#define RMDIR 0x0040
-#define DIRCHG 0x0080
-#define GOINGAWAY 0x0100
-#define IOSTARTED 0x0200
-#define ONWORKLIST 0x8000
+#define MKDIR_PARENT 0x0010 /* diradd & mkdir only */
+#define MKDIR_BODY 0x0020 /* diradd & mkdir only */
+#define RMDIR 0x0040 /* dirrem only */
+#define DIRCHG 0x0080 /* diradd & dirrem only */
+#define GOINGAWAY 0x0100 /* indirdep only */
+#define IOSTARTED 0x0200 /* inodedep & pagedep only */
+#define SPACECOUNTED 0x0400 /* inodedep only */
+#define NEWBLOCK 0x0800 /* pagedep only */
+#define ONWORKLIST 0x8000
#define ALLCOMPLETE (ATTACHED | COMPLETE | DEPCOMPLETE)
@@ -140,6 +147,7 @@ struct worklist {
#define WK_DIRADD(wk) ((struct diradd *)(wk))
#define WK_MKDIR(wk) ((struct mkdir *)(wk))
#define WK_DIRREM(wk) ((struct dirrem *)(wk))
+#define WK_NEWDIRBLK(wk) ((struct newdirblk *)(wk))
/*
* Various types of lists
@@ -300,7 +308,17 @@ struct bmsafemap {
* be freed once the inode claiming the new block is written to disk.
* This ad_fragfree request is attached to the id_inowait list of the
* associated inodedep (pointed to by ad_inodedep) for processing after
- * the inode is written.
+ * the inode is written. When a block is allocated to a directory, an
+ * fsync of a file whose name is within that block must ensure not only
+ * that the block containing the file name has been written, but also
+ * that the on-disk inode references that block. When a new directory
+ * block is created, we allocate a newdirblk structure which is linked
+ * to the associated allocdirect (on its ad_newdirblk list). When the
+ * allocdirect has been satisfied, the newdirblk structure is moved to
+ * the inodedep id_bufwait list of its directory to await the inode
+ * being written. When the inode is written, the directory entries are
+ * fully committed and can be deleted from their pagedep->id_pendinghd
+ * and inodedep->id_pendinghd lists.
*/
struct allocdirect {
struct worklist ad_list; /* buffer holding block */
@@ -315,6 +333,7 @@ struct allocdirect {
struct buf *ad_buf; /* cylgrp buffer (if pending) */
struct inodedep *ad_inodedep; /* associated inodedep */
struct freefrag *ad_freefrag; /* fragment to be freed (if any) */
+ struct workhead ad_newdirblk; /* dir block to notify when written */
};
/*
@@ -530,3 +549,28 @@ struct dirrem {
};
#define dm_pagedep dm_un.dmu_pagedep
#define dm_dirinum dm_un.dmu_dirinum
+
+
+/*
+ * A "newdirblk" structure tracks the progress of a newly allocated
+ * directory block from its creation until it is claimed by its on-disk
+ * inode. When a block is allocated to a directory, an fsync of a file
+ * whose name is within that block must ensure not only that the block
+ * containing the file name has been written, but also that the on-disk
+ * inode references that block. When a new directory block is created,
+ * we allocate a newdirblk structure which is linked to the associated
+ * allocdirect (on its ad_newdirblk list). When the allocdirect has been
+ * satisfied, the newdirblk structure is moved to the inodedep id_bufwait
+ * list of its directory to await the inode being written. When the inode
+ * is written, the directory entries are fully committed and can be
+ * deleted from their pagedep->id_pendinghd and inodedep->id_pendinghd
+ * lists. Note that we could track directory blocks allocated to indirect
+ * blocks using a similar scheme with the allocindir structures. Rather
+ * than adding this level of complexity, we simply write those newly
+ * allocated indirect blocks synchronously as such allocations are rare.
+ */
+struct newdirblk {
+ struct worklist db_list;/* id_inowait or pg_newdirblk */
+# define db_state db_list.wk_state /* unused */
+ struct pagedep *db_pagedep;/* associated pagedep */
+};
diff --git a/sys/ufs/ufs/ufs_extern.h b/sys/ufs/ufs/ufs_extern.h
index d73eaf6ade1..1771effe74e 100644
--- a/sys/ufs/ufs/ufs_extern.h
+++ b/sys/ufs/ufs/ufs_extern.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: ufs_extern.h,v 1.24 2005/06/10 17:37:41 pedro Exp $ */
+/* $OpenBSD: ufs_extern.h,v 1.25 2005/07/20 16:30:35 pedro Exp $ */
/* $NetBSD: ufs_extern.h,v 1.5 1996/02/09 22:36:03 christos Exp $ */
/*-
@@ -148,8 +148,8 @@ int ufs_makeinode(int, struct vnode *, struct vnode **,
/*
* Soft dependency function prototypes.
*/
-void softdep_setup_directory_add(struct buf *, struct inode *, off_t,
- long, struct buf *);
+int softdep_setup_directory_add(struct buf *, struct inode *, off_t,
+ long, struct buf *, int);
void softdep_change_directoryentry_offset(struct inode *, caddr_t,
caddr_t, caddr_t, int);
void softdep_setup_remove(struct buf *,struct inode *, struct inode *,
diff --git a/sys/ufs/ufs/ufs_lookup.c b/sys/ufs/ufs/ufs_lookup.c
index 62eb6d40632..97238080f1f 100644
--- a/sys/ufs/ufs/ufs_lookup.c
+++ b/sys/ufs/ufs/ufs_lookup.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: ufs_lookup.c,v 1.32 2005/07/03 20:14:03 drahn Exp $ */
+/* $OpenBSD: ufs_lookup.c,v 1.33 2005/07/20 16:30:35 pedro Exp $ */
/* $NetBSD: ufs_lookup.c,v 1.7 1996/02/09 22:36:06 christos Exp $ */
/*
@@ -780,12 +780,31 @@ ufs_direnter(dvp, tvp, dirp, cnp, newdirbp)
(bp->b_data + blkoff))->d_reclen = DIRBLKSIZ;
blkoff += DIRBLKSIZ;
}
- softdep_setup_directory_add(bp, dp, dp->i_offset,
- dirp->d_ino, newdirbp);
- bdwrite(bp);
- } else {
- error = VOP_BWRITE(bp);
- }
+ if (softdep_setup_directory_add(bp, dp, dp->i_offset,
+ dirp->d_ino, newdirbp, 1) == 0) {
+ bdwrite(bp);
+ return (UFS_UPDATE(dp, 0));
+ }
+ /* We have just allocated a directory block in an
+ * indirect block. Rather than tracking when it gets
+ * claimed by the inode, we simply do a VOP_FSYNC
+ * now to ensure that it is there (in case the user
+ * does a future fsync). Note that we have to unlock
+ * the inode for the entry that we just entered, as
+ * the VOP_FSYNC may need to lock other inodes which
+ * can lead to deadlock if we also hold a lock on
+ * the newly entered node.
+ */
+ if ((error = VOP_BWRITE(bp)))
+ return (error);
+ if (tvp != NULL)
+ VOP_UNLOCK(tvp, 0, p);
+ error = VOP_FSYNC(dvp, p->p_ucred, MNT_WAIT, p);
+ if (tvp != NULL)
+ vn_lock(tvp, LK_EXCLUSIVE | LK_RETRY, p);
+ return (error);
+ }
+ error = VOP_BWRITE(bp);
ret = UFS_UPDATE(dp, !DOINGSOFTDEP(dvp));
if (error == 0)
return (ret);
@@ -900,8 +919,9 @@ ufs_direnter(dvp, tvp, dirp, cnp, newdirbp)
#endif
if (DOINGSOFTDEP(dvp)) {
- softdep_setup_directory_add(bp, dp,
- dp->i_offset + (caddr_t)ep - dirbuf, dirp->d_ino, newdirbp);
+ (void)softdep_setup_directory_add(bp, dp,
+ dp->i_offset + (caddr_t)ep - dirbuf,
+ dirp->d_ino, newdirbp, 0);
bdwrite(bp);
} else {
error = VOP_BWRITE(bp);