summaryrefslogtreecommitdiff
path: root/sys
diff options
context:
space:
mode:
authorConstantine Sapuntzakis <csapuntz@cvs.openbsd.org>2001-02-21 23:24:33 +0000
committerConstantine Sapuntzakis <csapuntz@cvs.openbsd.org>2001-02-21 23:24:33 +0000
commit33e6fbe33f4ec84f10016e81c87c7be89171378b (patch)
tree122cb8b58569496544bc77d618dec5e995a23b94 /sys
parentd0a302227eeedfb62540337fcbc0741756591d7c (diff)
Latest soft updates from FreeBSD/Kirk McKusick
Snapshot-related code has been commented out.
Diffstat (limited to 'sys')
-rw-r--r--sys/kern/kern_malloc.c9
-rw-r--r--sys/kern/vfs_bio.c16
-rw-r--r--sys/kern/vfs_cluster.c6
-rw-r--r--sys/kern/vfs_subr.c16
-rw-r--r--sys/kern/vfs_sync.c35
-rw-r--r--sys/kern/vfs_syscalls.c9
-rw-r--r--sys/sys/buf.h48
-rw-r--r--sys/sys/malloc.h4
-rw-r--r--sys/sys/vnode.h7
-rw-r--r--sys/ufs/ffs/ffs_alloc.c6
-rw-r--r--sys/ufs/ffs/ffs_extern.h22
-rw-r--r--sys/ufs/ffs/ffs_inode.c8
-rw-r--r--sys/ufs/ffs/ffs_softdep.c1532
-rw-r--r--sys/ufs/ffs/ffs_softdep_stub.c57
-rw-r--r--sys/ufs/ffs/ffs_vfsops.c11
-rw-r--r--sys/ufs/ffs/softdep.h40
-rw-r--r--sys/ufs/ufs/inode.h3
-rw-r--r--sys/ufs/ufs/ufs_extern.h5
-rw-r--r--sys/ufs/ufs/ufs_lookup.c48
-rw-r--r--sys/ufs/ufs/ufs_vnops.c106
20 files changed, 1266 insertions, 722 deletions
diff --git a/sys/kern/kern_malloc.c b/sys/kern/kern_malloc.c
index d52ea29ef5e..6c2c7f3c046 100644
--- a/sys/kern/kern_malloc.c
+++ b/sys/kern/kern_malloc.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: kern_malloc.c,v 1.23 2001/02/20 23:35:35 csapuntz Exp $ */
+/* $OpenBSD: kern_malloc.c,v 1.24 2001/02/21 23:24:29 csapuntz Exp $ */
/* $NetBSD: kern_malloc.c,v 1.15.4.2 1996/06/13 17:10:56 cgd Exp $ */
/*
@@ -135,11 +135,8 @@ malloc(size, type, flags)
#endif
#ifdef MALLOC_DEBUG
- if (debug_malloc(size, type, flags, (void **)&va)) {
- if ((flags & M_ZERO) && va != NULL)
- bzero(va, size);
+ if (debug_malloc(size, type, flags, (void **)&va))
return ((void *) va);
- }
#endif
indx = BUCKETINDX(size);
@@ -312,8 +309,6 @@ out:
out:
#endif
splx(s);
- if ((flags & M_ZERO) && va != NULL)
- bzero(va, size);
return ((void *) va);
}
diff --git a/sys/kern/vfs_bio.c b/sys/kern/vfs_bio.c
index 3021d4bbd0d..eae71abafb5 100644
--- a/sys/kern/vfs_bio.c
+++ b/sys/kern/vfs_bio.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: vfs_bio.c,v 1.28 2001/02/13 19:51:49 art Exp $ */
+/* $OpenBSD: vfs_bio.c,v 1.29 2001/02/21 23:24:30 csapuntz Exp $ */
/* $NetBSD: vfs_bio.c,v 1.44 1996/06/11 11:15:36 pk Exp $ */
/*-
@@ -475,9 +475,9 @@ brelse(bp)
* If it's invalid or empty, dissociate it from its vnode
* and put on the head of the appropriate queue.
*/
- if (LIST_FIRST(&bp->b_dep) != NULL && bioops.io_deallocate) {
- (*bioops.io_deallocate)(bp);
- }
+ if (LIST_FIRST(&bp->b_dep) != NULL)
+ buf_deallocate(bp);
+
CLR(bp->b_flags, B_DELWRI);
if (bp->b_vp) {
reassignbuf(bp, bp->b_vp);
@@ -787,8 +787,8 @@ start:
splx(s);
- if (LIST_FIRST(&bp->b_dep) != NULL && bioops.io_deallocate)
- (*bioops.io_deallocate)(bp);
+ if (LIST_FIRST(&bp->b_dep) != NULL)
+ buf_deallocate(bp);
/* clear out various other fields */
bp->b_flags = B_BUSY;
@@ -866,8 +866,8 @@ biodone(bp)
panic("biodone already");
SET(bp->b_flags, B_DONE); /* note that it's done */
- if (LIST_FIRST(&bp->b_dep) != NULL && bioops.io_complete)
- (*bioops.io_complete)(bp);
+ if (LIST_FIRST(&bp->b_dep) != NULL)
+ buf_complete(bp);
if (!ISSET(bp->b_flags, B_READ)) /* wake up reader */
vwakeup(bp);
diff --git a/sys/kern/vfs_cluster.c b/sys/kern/vfs_cluster.c
index 0c433c72b83..1839e585f0f 100644
--- a/sys/kern/vfs_cluster.c
+++ b/sys/kern/vfs_cluster.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: vfs_cluster.c,v 1.17 2000/06/23 02:14:38 mickey Exp $ */
+/* $OpenBSD: vfs_cluster.c,v 1.18 2001/02/21 23:24:30 csapuntz Exp $ */
/* $NetBSD: vfs_cluster.c,v 1.12 1996/04/22 01:39:05 christos Exp $ */
/*-
@@ -703,8 +703,8 @@ redo:
tbp->b_flags &= ~(B_READ | B_DONE | B_ERROR | B_DELWRI);
tbp->b_flags |= (B_ASYNC | B_AGE);
- if (LIST_FIRST(&tbp->b_dep) != NULL && bioops.io_start)
- (*bioops.io_start)(tbp);
+ if (LIST_FIRST(&tbp->b_dep) != NULL)
+ buf_start(tbp);
pagemove(tbp->b_data, cp, size);
bp->b_bcount += size;
diff --git a/sys/kern/vfs_subr.c b/sys/kern/vfs_subr.c
index 0cb3e61cc4b..bee6b56c1ae 100644
--- a/sys/kern/vfs_subr.c
+++ b/sys/kern/vfs_subr.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: vfs_subr.c,v 1.48 2001/02/08 00:32:11 mickey Exp $ */
+/* $OpenBSD: vfs_subr.c,v 1.49 2001/02/21 23:24:30 csapuntz Exp $ */
/* $NetBSD: vfs_subr.c,v 1.53 1996/04/22 01:39:13 christos Exp $ */
/*
@@ -2209,3 +2209,17 @@ vfs_unregister(vfs)
return 0;
}
+
+/*
+ * Check if vnode represents a disk device
+ */
+int
+vn_isdisk(vp, errp)
+ struct vnode *vp;
+ int *errp;
+{
+ if (vp->v_type != VBLK && vp->v_type != VCHR)
+ return (0);
+
+ return (1);
+}
diff --git a/sys/kern/vfs_sync.c b/sys/kern/vfs_sync.c
index 128866b8f69..d5c9fddf418 100644
--- a/sys/kern/vfs_sync.c
+++ b/sys/kern/vfs_sync.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: vfs_sync.c,v 1.12 2000/03/23 15:57:33 art Exp $ */
+/* $OpenBSD: vfs_sync.c,v 1.13 2001/02/21 23:24:30 csapuntz Exp $ */
/*
* Portions of this code are:
@@ -55,6 +55,10 @@
#include <sys/kernel.h>
+#ifdef FFS_SOFTUPDATES
+int softdep_process_worklist __P((struct mount *));
+#endif
+
/*
* The workitem queue.
*/
@@ -67,7 +71,7 @@ int rushjob = 0; /* number of slots to run ASAP */
int stat_rush_requests = 0; /* number of rush requests */
static int syncer_delayno = 0;
-static long syncer_last;
+static long syncer_mask;
LIST_HEAD(synclist, vnode);
static struct synclist *syncer_workitem_pending;
@@ -105,16 +109,9 @@ void
vn_initialize_syncerd()
{
- int i;
-
- syncer_last = SYNCER_MAXDELAY + 2;
-
- syncer_workitem_pending =
- malloc(syncer_last * sizeof(struct synclist),
- M_VNODE, M_WAITOK);
-
- for (i = 0; i < syncer_last; i++)
- LIST_INIT(&syncer_workitem_pending[i]);
+ syncer_workitem_pending = hashinit(syncer_maxdelay, M_VNODE, M_WAITOK,
+ &syncer_mask);
+ syncer_maxdelay = syncer_mask + 1;
}
/*
@@ -132,9 +129,10 @@ vn_syncer_add_to_worklist(vp, delay)
if (vp->v_flag & VONSYNCLIST)
LIST_REMOVE(vp, v_synclist);
- if (delay > syncer_maxdelay)
- delay = syncer_maxdelay;
- slot = (syncer_delayno + delay) % syncer_last;
+ if (delay > syncer_maxdelay - 2)
+ delay = syncer_maxdelay - 2;
+ slot = (syncer_delayno + delay) & syncer_mask;
+
LIST_INSERT_HEAD(&syncer_workitem_pending[slot], vp, v_synclist);
vp->v_flag |= VONSYNCLIST;
splx(s);
@@ -164,7 +162,7 @@ sched_sync(p)
s = splbio();
slp = &syncer_workitem_pending[syncer_delayno];
syncer_delayno += 1;
- if (syncer_delayno >= syncer_last)
+ if (syncer_delayno == syncer_maxdelay)
syncer_delayno = 0;
splx(s);
while ((vp = LIST_FIRST(slp)) != NULL) {
@@ -182,11 +180,12 @@ sched_sync(p)
}
}
+#ifdef FFS_SOFTUPDATES
/*
* Do soft update processing.
*/
- if (bioops.io_sync)
- (*bioops.io_sync)(NULL);
+ softdep_process_worklist(NULL);
+#endif
/*
* The variable rushjob allows the kernel to speed up the
diff --git a/sys/kern/vfs_syscalls.c b/sys/kern/vfs_syscalls.c
index ec0f6bf9597..f74993737ae 100644
--- a/sys/kern/vfs_syscalls.c
+++ b/sys/kern/vfs_syscalls.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: vfs_syscalls.c,v 1.67 2001/02/20 01:50:09 assar Exp $ */
+/* $OpenBSD: vfs_syscalls.c,v 1.68 2001/02/21 23:24:30 csapuntz Exp $ */
/* $NetBSD: vfs_syscalls.c,v 1.71 1996/04/23 10:29:02 mycroft Exp $ */
/*
@@ -2280,9 +2280,10 @@ sys_fsync(p, v, retval)
vp = (struct vnode *)fp->f_data;
vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
error = VOP_FSYNC(vp, fp->f_cred, MNT_WAIT, p);
- if (error == 0 && bioops.io_fsync != NULL &&
- vp->v_mount && (vp->v_mount->mnt_flag & MNT_SOFTDEP))
- error = (*bioops.io_fsync)(vp);
+#ifdef FFS_SOFTUPDATES
+ if (error == 0 && vp->v_mount && (vp->v_mount->mnt_flag & MNT_SOFTDEP))
+ error = softdep_fsync(vp);
+#endif
VOP_UNLOCK(vp, 0, p);
return (error);
diff --git a/sys/sys/buf.h b/sys/sys/buf.h
index 24c66801ab1..0ea3baee7b2 100644
--- a/sys/sys/buf.h
+++ b/sys/sys/buf.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: buf.h,v 1.15 1999/02/26 02:15:41 art Exp $ */
+/* $OpenBSD: buf.h,v 1.16 2001/02/21 23:24:30 csapuntz Exp $ */
/* $NetBSD: buf.h,v 1.25 1997/04/09 21:12:17 mycroft Exp $ */
/*
@@ -62,13 +62,12 @@ LIST_HEAD(workhead, worklist);
* to use these hooks, a pointer to a set of bio_ops could be added
* to each buffer.
*/
-struct mount;
extern struct bio_ops {
void (*io_start) __P((struct buf *));
void (*io_complete) __P((struct buf *));
- void (*io_deallocate) __P((struct buf *));
- int (*io_fsync) __P((struct vnode *));
- int (*io_sync) __P((struct mount *));
+ void (*io_deallocate) __P((struct buf *));
+ void (*io_movedeps) __P((struct buf *, struct buf *));
+ int (*io_countdeps) __P((struct buf *, int));
} bioops;
@@ -174,6 +173,7 @@ struct cluster_save {
(bp)->b_resid = 0; \
}
+
/* Flags to low-level allocation routines. */
#define B_CLRBUF 0x01 /* Request allocated buffer be cleared. */
#define B_SYNC 0x02 /* Do all allocations synchronously. */
@@ -221,6 +221,44 @@ int physio __P((void (*strategy)(struct buf *), struct buf *bp, dev_t dev,
void brelvp __P((struct buf *));
void reassignbuf __P((struct buf *, struct vnode *));
void bgetvp __P((struct vnode *, struct buf *));
+
+static __inline void
+buf_start(struct buf *bp)
+{
+ if (bioops.io_start)
+ (*bioops.io_start)(bp);
+}
+
+static __inline void
+buf_complete(struct buf *bp)
+{
+ if (bioops.io_complete)
+ (*bioops.io_complete)(bp);
+}
+
+static __inline void
+buf_deallocate(struct buf *bp)
+{
+ if (bioops.io_deallocate)
+ (*bioops.io_deallocate)(bp);
+}
+
+static __inline void
+buf_movedeps(struct buf *bp, struct buf *bp2)
+{
+ if (bioops.io_movedeps)
+ (*bioops.io_movedeps)(bp, bp2);
+}
+
+static __inline int
+buf_countdeps(struct buf *bp, int i)
+{
+ if (bioops.io_countdeps)
+ return ((*bioops.io_countdeps)(bp, i));
+ else
+ return (0);
+}
+
__END_DECLS
#endif
#endif /* !_SYS_BUF_H_ */
diff --git a/sys/sys/malloc.h b/sys/sys/malloc.h
index eb63e284112..74a23c2fb15 100644
--- a/sys/sys/malloc.h
+++ b/sys/sys/malloc.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: malloc.h,v 1.36 2001/02/21 08:03:52 csapuntz Exp $ */
+/* $OpenBSD: malloc.h,v 1.37 2001/02/21 23:24:30 csapuntz Exp $ */
/* $NetBSD: malloc.h,v 1.39 1998/07/12 19:52:01 augustss Exp $ */
/*
@@ -54,7 +54,6 @@
*/
#define M_WAITOK 0x0000
#define M_NOWAIT 0x0001
-#define M_ZERO 0x0008
/*
* Types of memory to be allocated
@@ -407,7 +406,6 @@ struct kmembuckets {
} else { \
(space) = (cast)kbp->kb_next; \
kbp->kb_next = *(caddr_t *)(space); \
- if (flags & M_ZERO) bzero((space),(size)); \
} \
splx(s); \
} while (0)
diff --git a/sys/sys/vnode.h b/sys/sys/vnode.h
index 9d5685c67e4..8251fb22e7e 100644
--- a/sys/sys/vnode.h
+++ b/sys/sys/vnode.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: vnode.h,v 1.26 2000/11/21 21:49:56 provos Exp $ */
+/* $OpenBSD: vnode.h,v 1.27 2001/02/21 23:24:30 csapuntz Exp $ */
/* $NetBSD: vnode.h,v 1.38 1996/02/29 20:59:05 cgd Exp $ */
/*
@@ -507,4 +507,9 @@ void vput __P((struct vnode *vp));
void vrele __P((struct vnode *vp));
int vaccess __P((mode_t file_mode, uid_t uid, gid_t gid,
mode_t acc_mode, struct ucred *cred));
+
+int vn_isdisk __P((struct vnode *vp, int *errp));
+
+int softdep_fsync __P((struct vnode *vp));
+
#endif /* _KERNEL */
diff --git a/sys/ufs/ffs/ffs_alloc.c b/sys/ufs/ffs/ffs_alloc.c
index 7562ea203b9..9ddaf9f85f6 100644
--- a/sys/ufs/ffs/ffs_alloc.c
+++ b/sys/ufs/ffs/ffs_alloc.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: ffs_alloc.c,v 1.18 2000/01/14 19:23:34 art Exp $ */
+/* $OpenBSD: ffs_alloc.c,v 1.19 2001/02/21 23:24:30 csapuntz Exp $ */
/* $NetBSD: ffs_alloc.c,v 1.11 1996/05/11 18:27:09 mycroft Exp $ */
/*
@@ -317,7 +317,7 @@ nospace:
* logical blocks to be made contiguous is given. The allocator attempts
* to find a range of sequential blocks starting as close as possible to
* an fs_rotdelay offset from the end of the allocation for the logical
- * block immediately preceeding the current range. If successful, the
+ * block immediately preceding the current range. If successful, the
* physical block numbers in the buffer pointers and in the inode are
* changed to reflect the new allocation. If unsuccessful, the allocation
* is left unchanged. The success in doing the reallocation is returned.
@@ -1414,7 +1414,7 @@ ffs_vfree(v)
if (DOINGSOFTDEP(ap->a_pvp)) {
- softdep_freefile(ap);
+ softdep_freefile(ap->a_pvp, ap->a_ino, ap->a_mode);
return (0);
}
diff --git a/sys/ufs/ffs/ffs_extern.h b/sys/ufs/ffs/ffs_extern.h
index 44b77883d07..93dda25df9c 100644
--- a/sys/ufs/ffs/ffs_extern.h
+++ b/sys/ufs/ffs/ffs_extern.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: ffs_extern.h,v 1.10 2001/02/20 01:50:12 assar Exp $ */
+/* $OpenBSD: ffs_extern.h,v 1.11 2001/02/21 23:24:31 csapuntz Exp $ */
/* $NetBSD: ffs_extern.h,v 1.4 1996/02/09 22:22:22 christos Exp $ */
/*-
@@ -140,25 +140,23 @@ void softdep_initialize __P((void));
int softdep_process_worklist __P((struct mount *));
int softdep_mount __P((struct vnode *, struct mount *, struct fs *,
struct ucred *));
+int softdep_flushworklist __P((struct mount *, int *, struct proc *));
int softdep_flushfiles __P((struct mount *, int, struct proc *));
void softdep_update_inodeblock __P((struct inode *, struct buf *, int));
void softdep_load_inodeblock __P((struct inode *));
-int softdep_fsync __P((struct vnode *));
-void softdep_freefile __P((struct vop_vfree_args *));
+void softdep_freefile __P((struct vnode *, ino_t, int));
void softdep_setup_freeblocks __P((struct inode *, off_t));
-void softdep_deallocate_dependencies __P((struct buf *));
void softdep_setup_inomapdep __P((struct buf *, struct inode *, ino_t));
-void softdep_setup_blkmapdep __P((struct buf *, struct fs *, daddr_t));
-void softdep_setup_allocdirect __P((struct inode *, ufs_lbn_t, daddr_t,
- daddr_t, long, long, struct buf *));
+void softdep_setup_blkmapdep __P((struct buf *, struct fs *, ufs_daddr_t));
+void softdep_setup_allocdirect __P((struct inode *, ufs_lbn_t, ufs_daddr_t,
+ ufs_daddr_t, long, long, struct buf *));
void softdep_setup_allocindir_meta __P((struct buf *, struct inode *,
- struct buf *, int, daddr_t));
+ struct buf *, int, ufs_daddr_t));
void softdep_setup_allocindir_page __P((struct inode *, ufs_lbn_t,
- struct buf *, int, daddr_t, daddr_t, struct buf *));
-void softdep_disk_io_initiation __P((struct buf *));
-void softdep_disk_write_complete __P((struct buf *));
-int softdep_sync_metadata __P((struct vop_fsync_args *));
+ struct buf *, int, ufs_daddr_t, ufs_daddr_t, struct buf *));
void softdep_fsync_mountdev __P((struct vnode *));
+int softdep_sync_metadata __P((struct vop_fsync_args *));
+int softdep_fsync __P((struct vnode *vp));
__END_DECLS
extern int (**ffs_vnodeop_p) __P((void *));
diff --git a/sys/ufs/ffs/ffs_inode.c b/sys/ufs/ffs/ffs_inode.c
index b9f1ad90b8d..6607642eae3 100644
--- a/sys/ufs/ffs/ffs_inode.c
+++ b/sys/ufs/ffs/ffs_inode.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: ffs_inode.c,v 1.16 2000/06/23 02:14:39 mickey Exp $ */
+/* $OpenBSD: ffs_inode.c,v 1.17 2001/02/21 23:24:31 csapuntz Exp $ */
/* $NetBSD: ffs_inode.c,v 1.10 1996/05/11 18:27:19 mycroft Exp $ */
/*
@@ -139,7 +139,7 @@ ffs_update(v)
*((struct dinode *)bp->b_data +
ino_to_fsbo(fs, ip->i_number)) = ip->i_din.ffs_din;
- if (ap->a_waitfor && (ap->a_vp->v_mount->mnt_flag & MNT_ASYNC) == 0) {
+ if (ap->a_waitfor && !DOINGASYNC(ap->a_vp)) {
return (bwrite(bp));
} else {
bdwrite(bp);
@@ -210,7 +210,7 @@ ffs_truncate(v)
#endif
ovp->v_lasta = ovp->v_clen = ovp->v_cstart = ovp->v_lastw = 0;
if (DOINGSOFTDEP(ovp)) {
- if (length > 0) {
+ if (length > 0 || softdep_slowdown(ovp)) {
/*
* If a file is only partially truncated, then
* we have to clean up the data structures
@@ -510,7 +510,7 @@ ffs_indirtrunc(ip, lbn, dbn, lastbn, level, countp)
bcopy((caddr_t)bap, (caddr_t)copy, (u_int)fs->fs_bsize);
bzero((caddr_t)&bap[last + 1],
(u_int)(NINDIR(fs) - (last + 1)) * sizeof (daddr_t));
- if ((vp->v_mount->mnt_flag & MNT_ASYNC) == 0) {
+ if (!DOINGASYNC(vp)) {
error = bwrite(bp);
if (error)
allerror = error;
diff --git a/sys/ufs/ffs/ffs_softdep.c b/sys/ufs/ffs/ffs_softdep.c
index 771dd4562ca..12d9f631618 100644
--- a/sys/ufs/ffs/ffs_softdep.c
+++ b/sys/ufs/ffs/ffs_softdep.c
@@ -1,21 +1,17 @@
-/* $OpenBSD: ffs_softdep.c,v 1.11 2001/02/10 11:08:39 fgsch Exp $ */
+/* $OpenBSD: ffs_softdep.c,v 1.12 2001/02/21 23:24:31 csapuntz Exp $ */
/*
- * Copyright 1998 Marshall Kirk McKusick. All Rights Reserved.
+ * Copyright 1998, 2000 Marshall Kirk McKusick. All Rights Reserved.
*
* The soft updates code is derived from the appendix of a University
* of Michigan technical report (Gregory R. Ganger and Yale N. Patt,
* "Soft Updates: A Solution to the Metadata Update Problem in File
* Systems", CSE-TR-254-95, August 1995).
*
- * The following are the copyrights and redistribution conditions that
- * apply to this copy of the soft update software. For a license
- * to use, redistribute or sell the soft update software under
- * conditions other than those described here, please contact the
- * author at one of the following addresses:
+ * Further information about soft updates can be obtained from:
*
- * Marshall Kirk McKusick mckusick@mckusick.com
- * 1614 Oxford Street +1-510-843-9542
- * Berkeley, CA 94709-1608
+ * Marshall Kirk McKusick http://www.mckusick.com/softdep/
+ * 1614 Oxford Street mckusick@mckusick.com
+ * Berkeley, CA 94709-1608 +1-510-843-9542
* USA
*
* Redistribution and use in source and binary forms, with or without
@@ -27,19 +23,6 @@
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
- * 3. None of the names of McKusick, Ganger, Patt, or the University of
- * Michigan may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- * 4. Redistributions in any form must be accompanied by information on
- * how to obtain complete source code for any accompanying software
- * that uses this software. This source code must either be included
- * in the distribution or be available for no more than the cost of
- * distribution plus a nominal fee, and must be freely redistributable
- * under reasonable conditions. For an executable file, complete
- * source code means the source code for all modules it contains.
- * It does not mean source code for modules or files that typically
- * accompany the operating system on which the executable file runs,
- * e.g., standard library modules or system header files.
*
* THIS SOFTWARE IS PROVIDED BY MARSHALL KIRK MCKUSICK ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
@@ -53,7 +36,8 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
- * @(#)ffs_softdep.c 9.40 (McKusick) 6/15/99
+ * from: @(#)ffs_softdep.c 9.59 (McKusick) 6/21/00
+ * $FreeBSD: src/sys/ufs/ffs/ffs_softdep.c,v 1.84 2001/02/04 16:08:18 phk Exp $
*/
/*
@@ -64,6 +48,7 @@
#endif
#ifndef DEBUG
#define DEBUG
+#define STATIC
#endif
#include <sys/param.h>
@@ -85,10 +70,14 @@
#include <ufs/ffs/ffs_extern.h>
#include <ufs/ufs/ufs_extern.h>
+
/*
* These definitions need to be adapted to the system to which
* this file is being ported.
*/
+
+#define M_SOFTDEP_FLAGS (M_WAITOK)
+
/*
* Mapping of dependency structure types to malloc types.
*/
@@ -122,62 +111,71 @@ extern char *memname[];
/*
* Internal function prototypes.
*/
-static void softdep_error __P((char *, int));
-static void drain_output __P((struct vnode *, int));
-static int getdirtybuf __P((struct buf **, int));
-static void clear_remove __P((struct proc *));
-static void clear_inodedeps __P((struct proc *));
-static int flush_pagedep_deps __P((struct vnode *, struct mount *,
+STATIC void softdep_error __P((char *, int));
+STATIC void drain_output __P((struct vnode *, int));
+STATIC int getdirtybuf __P((struct buf **, int));
+STATIC void clear_remove __P((struct proc *));
+STATIC void clear_inodedeps __P((struct proc *));
+STATIC int flush_pagedep_deps __P((struct vnode *, struct mount *,
struct diraddhd *));
-static int flush_inodedep_deps __P((struct fs *, ino_t));
-static int handle_written_filepage __P((struct pagedep *, struct buf *));
-static void diradd_inode_written __P((struct diradd *, struct inodedep *));
-static int handle_written_inodeblock __P((struct inodedep *, struct buf *));
-static void handle_allocdirect_partdone __P((struct allocdirect *));
-static void handle_allocindir_partdone __P((struct allocindir *));
-static void initiate_write_filepage __P((struct pagedep *, struct buf *));
-static void handle_written_mkdir __P((struct mkdir *, int));
-static void initiate_write_inodeblock __P((struct inodedep *, struct buf *));
-static void handle_workitem_freefile __P((struct freefile *));
-static void handle_workitem_remove __P((struct dirrem *));
-static struct dirrem *newdirrem __P((struct buf *, struct inode *,
- struct inode *, int));
-static void free_diradd __P((struct diradd *));
-static void free_allocindir __P((struct allocindir *, struct inodedep *));
-static int indir_trunc __P((struct inode *, ufs_daddr_t, int, ufs_lbn_t,
+STATIC int flush_inodedep_deps __P((struct fs *, ino_t));
+STATIC int handle_written_filepage __P((struct pagedep *, struct buf *));
+STATIC void diradd_inode_written __P((struct diradd *, struct inodedep *));
+STATIC int handle_written_inodeblock __P((struct inodedep *, struct buf *));
+STATIC void handle_allocdirect_partdone __P((struct allocdirect *));
+STATIC void handle_allocindir_partdone __P((struct allocindir *));
+STATIC void initiate_write_filepage __P((struct pagedep *, struct buf *));
+STATIC void handle_written_mkdir __P((struct mkdir *, int));
+STATIC void initiate_write_inodeblock __P((struct inodedep *, struct buf *));
+STATIC void handle_workitem_freefile __P((struct freefile *));
+STATIC void handle_workitem_remove __P((struct dirrem *));
+STATIC struct dirrem *newdirrem __P((struct buf *, struct inode *,
+ struct inode *, int, struct dirrem **));
+STATIC void free_diradd __P((struct diradd *));
+STATIC void free_allocindir __P((struct allocindir *, struct inodedep *));
+STATIC int indir_trunc __P((struct inode *, ufs_daddr_t, int, ufs_lbn_t,
long *));
-static void deallocate_dependencies __P((struct buf *, struct inodedep *));
-static void free_allocdirect __P((struct allocdirectlst *,
+STATIC void deallocate_dependencies __P((struct buf *, struct inodedep *));
+STATIC void free_allocdirect __P((struct allocdirectlst *,
struct allocdirect *, int));
-static int free_inodedep __P((struct inodedep *));
-static void handle_workitem_freeblocks __P((struct freeblks *));
-static void merge_inode_lists __P((struct inodedep *));
-static void setup_allocindir_phase2 __P((struct buf *, struct inode *,
+STATIC int check_inode_unwritten __P((struct inodedep *));
+STATIC int free_inodedep __P((struct inodedep *));
+STATIC void handle_workitem_freeblocks __P((struct freeblks *));
+STATIC void merge_inode_lists __P((struct inodedep *));
+STATIC void setup_allocindir_phase2 __P((struct buf *, struct inode *,
struct allocindir *));
-static struct allocindir *newallocindir __P((struct inode *, int, ufs_daddr_t,
+STATIC struct allocindir *newallocindir __P((struct inode *, int, ufs_daddr_t,
ufs_daddr_t));
-static void handle_workitem_freefrag __P((struct freefrag *));
-static struct freefrag *newfreefrag __P((struct inode *, ufs_daddr_t, long));
-static void allocdirect_merge __P((struct allocdirectlst *,
+STATIC void handle_workitem_freefrag __P((struct freefrag *));
+STATIC struct freefrag *newfreefrag __P((struct inode *, ufs_daddr_t, long));
+STATIC void allocdirect_merge __P((struct allocdirectlst *,
struct allocdirect *, struct allocdirect *));
-static struct bmsafemap *bmsafemap_lookup __P((struct buf *));
-static int newblk_lookup __P((struct fs *, ufs_daddr_t, int,
+STATIC struct bmsafemap *bmsafemap_lookup __P((struct buf *));
+STATIC int newblk_lookup __P((struct fs *, ufs_daddr_t, int,
struct newblk **));
-static int inodedep_lookup __P((struct fs *, ino_t, int, struct inodedep **));
-static int pagedep_lookup __P((struct inode *, ufs_lbn_t, int,
+STATIC int inodedep_lookup __P((struct fs *, ino_t, int, struct inodedep **));
+STATIC int pagedep_lookup __P((struct inode *, ufs_lbn_t, int,
struct pagedep **));
-static int request_cleanup __P((int, int));
-static void add_to_worklist __P((struct worklist *));
+STATIC void pause_timer __P((void *));
+STATIC int request_cleanup __P((int, int));
+STATIC int process_worklist_item __P((struct mount *, int));
+STATIC void add_to_worklist __P((struct worklist *));
/*
* Exported softdep operations.
*/
+void softdep_disk_io_initiation __P((struct buf *));
+void softdep_disk_write_complete __P((struct buf *));
+void softdep_deallocate_dependencies __P((struct buf *));
+void softdep_move_dependencies __P((struct buf *, struct buf *));
+int softdep_count_dependencies __P((struct buf *bp, int));
+
struct bio_ops bioops = {
softdep_disk_io_initiation, /* io_start */
softdep_disk_write_complete, /* io_complete */
softdep_deallocate_dependencies, /* io_deallocate */
- softdep_fsync, /* io_fsync */
- softdep_process_worklist, /* io_sync */
+ softdep_move_dependencies, /* io_movedeps */
+ softdep_count_dependencies, /* io_countdeps */
};
/*
@@ -196,7 +194,7 @@ struct bio_ops bioops = {
* the spl, there is nothing that really needs to be done.
*/
#ifndef /* NOT */ DEBUG
-static struct lockit {
+STATIC struct lockit {
int lkt_spl;
} lk = { 0 };
#define ACQUIRE_LOCK(lk) (lk)->lkt_spl = splbio()
@@ -205,72 +203,78 @@ static struct lockit {
#define FREE_LOCK_INTERLOCKED(lk)
#else /* DEBUG */
-static struct lockit {
+STATIC struct lockit {
int lkt_spl;
pid_t lkt_held;
+ int lkt_line;
} lk = { 0, -1 };
-static int lockcnt;
+STATIC int lockcnt;
-static void acquire_lock __P((struct lockit *));
-static void free_lock __P((struct lockit *));
-static void acquire_lock_interlocked __P((struct lockit *));
-static void free_lock_interlocked __P((struct lockit *));
+STATIC void acquire_lock __P((struct lockit *, int));
+STATIC void free_lock __P((struct lockit *, int));
+STATIC void acquire_lock_interlocked __P((struct lockit *, int));
+STATIC void free_lock_interlocked __P((struct lockit *, int));
-#define ACQUIRE_LOCK(lk) acquire_lock(lk)
-#define FREE_LOCK(lk) free_lock(lk)
-#define ACQUIRE_LOCK_INTERLOCKED(lk) acquire_lock_interlocked(lk)
-#define FREE_LOCK_INTERLOCKED(lk) free_lock_interlocked(lk)
+#define ACQUIRE_LOCK(lk) acquire_lock(lk, __LINE__)
+#define FREE_LOCK(lk) free_lock(lk, __LINE__)
+#define ACQUIRE_LOCK_INTERLOCKED(lk) acquire_lock_interlocked(lk, __LINE__)
+#define FREE_LOCK_INTERLOCKED(lk) free_lock_interlocked(lk, __LINE__)
-static void
-acquire_lock(lk)
+STATIC void
+acquire_lock(lk, line)
struct lockit *lk;
+ int line;
{
if (lk->lkt_held != -1) {
if (lk->lkt_held == CURPROC->p_pid)
- panic("softdep_lock: locking against myself");
+ panic("softdep_lock: locking against myself, acquired at line %d", lk->lkt_line);
else
- panic("softdep_lock: lock held by %d", lk->lkt_held);
+ panic("softdep_lock: lock held by %d, acquired at line %d", lk->lkt_held, line);
}
lk->lkt_spl = splbio();
lk->lkt_held = CURPROC->p_pid;
+ lk->lkt_line = line;
lockcnt++;
}
-static void
-free_lock(lk)
+STATIC void
+free_lock(lk, line)
struct lockit *lk;
+ int line;
{
if (lk->lkt_held == -1)
- panic("softdep_unlock: lock not held");
+ panic("softdep_unlock: lock not held at line %d", line);
lk->lkt_held = -1;
splx(lk->lkt_spl);
}
-static void
-acquire_lock_interlocked(lk)
+STATIC void
+acquire_lock_interlocked(lk, line)
struct lockit *lk;
+ int line;
{
if (lk->lkt_held != -1) {
if (lk->lkt_held == CURPROC->p_pid)
- panic("softdep_lock_interlocked: locking against self");
+ panic("softdep_lock: locking against myself, acquired at line %d", lk->lkt_line);
else
- panic("softdep_lock_interlocked: lock held by %d",
- lk->lkt_held);
+ panic("softdep_lock: lock held by %d, acquired at line %d", lk->lkt_held, lk->lkt_line);
}
lk->lkt_held = CURPROC->p_pid;
+ lk->lkt_line = line;
lockcnt++;
}
-static void
-free_lock_interlocked(lk)
+STATIC void
+free_lock_interlocked(lk, line)
struct lockit *lk;
+ int line;
{
if (lk->lkt_held == -1)
- panic("softdep_unlock_interlocked: lock not held");
+ panic("softdep_unlock_interlocked: lock not held at line %d", line);
lk->lkt_held = -1;
}
#endif /* DEBUG */
@@ -285,11 +289,11 @@ struct sema {
int prio;
int timo;
};
-static void sema_init __P((struct sema *, char *, int, int));
-static int sema_get __P((struct sema *, struct lockit *));
-static void sema_release __P((struct sema *));
+STATIC void sema_init __P((struct sema *, char *, int, int));
+STATIC int sema_get __P((struct sema *, struct lockit *));
+STATIC void sema_release __P((struct sema *));
-static void
+STATIC void
sema_init(semap, name, prio, timo)
struct sema *semap;
char *name;
@@ -303,7 +307,7 @@ sema_init(semap, name, prio, timo)
semap->timo = timo;
}
-static int
+STATIC int
sema_get(semap, interlock)
struct sema *semap;
struct lockit *interlock;
@@ -325,7 +329,7 @@ sema_get(semap, interlock)
return (1);
}
-static void
+STATIC void
sema_release(semap)
struct sema *semap;
{
@@ -355,15 +359,15 @@ sema_release(semap)
#define WORKITEM_FREE(item, type) FREE(item, DtoM(type))
#else /* DEBUG */
-static void worklist_insert __P((struct workhead *, struct worklist *));
-static void worklist_remove __P((struct worklist *));
-static void workitem_free __P((struct worklist *, int));
+STATIC void worklist_insert __P((struct workhead *, struct worklist *));
+STATIC void worklist_remove __P((struct worklist *));
+STATIC void workitem_free __P((struct worklist *, int));
#define WORKLIST_INSERT(head, item) worklist_insert(head, item)
#define WORKLIST_REMOVE(item) worklist_remove(item)
#define WORKITEM_FREE(item, type) workitem_free((struct worklist *)item, type)
-static void
+STATIC void
worklist_insert(head, item)
struct workhead *head;
struct worklist *item;
@@ -377,7 +381,7 @@ worklist_insert(head, item)
LIST_INSERT_HEAD(head, item, wk_list);
}
-static void
+STATIC void
worklist_remove(item)
struct worklist *item;
{
@@ -390,7 +394,7 @@ worklist_remove(item)
LIST_REMOVE(item, wk_list);
}
-static void
+STATIC void
workitem_free(item, type)
struct worklist *item;
int type;
@@ -407,42 +411,59 @@ workitem_free(item, type)
/*
* Workitem queue management
*/
-static struct workhead softdep_workitem_pending;
-static int softdep_worklist_busy;
-static int max_softdeps; /* maximum number of structs before slowdown */
-static int tickdelay = 2; /* number of ticks to pause during slowdown */
-static int proc_waiting; /* tracks whether we have a timeout posted */
-static struct proc *filesys_syncer; /* proc of filesystem syncer process */
-static int req_clear_inodedeps; /* syncer process flush some inodedeps */
+STATIC struct workhead softdep_workitem_pending;
+STATIC int num_on_worklist; /* number of worklist items to be processed */
+STATIC int softdep_worklist_busy; /* 1 => trying to do unmount */
+STATIC int softdep_worklist_req; /* serialized waiters */
+STATIC int max_softdeps; /* maximum number of structs before slowdown */
+STATIC int tickdelay = 2; /* number of ticks to pause during slowdown */
+STATIC int proc_waiting; /* tracks whether we have a timeout posted */
+STATIC int *stat_countp; /* statistic to count in proc_waiting timeout */
+STATIC struct timeout proc_waiting_timeout;
+STATIC struct proc *filesys_syncer; /* proc of filesystem syncer process */
+STATIC int req_clear_inodedeps; /* syncer process flush some inodedeps */
#define FLUSH_INODES 1
-static int req_clear_remove; /* syncer process flush some freeblks */
+STATIC int req_clear_remove; /* syncer process flush some freeblks */
#define FLUSH_REMOVE 2
/*
* runtime statistics
*/
-static int stat_blk_limit_push; /* number of times block limit neared */
-static int stat_ino_limit_push; /* number of times inode limit neared */
-static int stat_blk_limit_hit; /* number of times block slowdown imposed */
-static int stat_ino_limit_hit; /* number of times inode slowdown imposed */
-static int stat_indir_blk_ptrs; /* bufs redirtied as indir ptrs not written */
-static int stat_inode_bitmap; /* bufs redirtied as inode bitmap not written */
-static int stat_direct_blk_ptrs;/* bufs redirtied as direct ptrs not written */
-static int stat_dir_entry; /* bufs redirtied as dir entry cannot write */
+STATIC int stat_worklist_push; /* number of worklist cleanups */
+STATIC int stat_blk_limit_push; /* number of times block limit neared */
+STATIC int stat_ino_limit_push; /* number of times inode limit neared */
+STATIC int stat_blk_limit_hit; /* number of times block slowdown imposed */
+STATIC int stat_ino_limit_hit; /* number of times inode slowdown imposed */
+STATIC int stat_sync_limit_hit; /* number of synchronous slowdowns imposed */
+STATIC int stat_indir_blk_ptrs; /* bufs redirtied as indir ptrs not written */
+STATIC int stat_inode_bitmap; /* bufs redirtied as inode bitmap not written */
+STATIC int stat_direct_blk_ptrs;/* bufs redirtied as direct ptrs not written */
+STATIC int stat_dir_entry; /* bufs redirtied as dir entry cannot write */
#ifdef DEBUG
#include <vm/vm.h>
#include <sys/sysctl.h>
struct ctldebug debug20 = { "max_softdeps", &max_softdeps };
struct ctldebug debug21 = { "tickdelay", &tickdelay };
+struct ctldebug debug22 = { "worklist_push", &stat_worklist_push };
struct ctldebug debug23 = { "blk_limit_push", &stat_blk_limit_push };
struct ctldebug debug24 = { "ino_limit_push", &stat_ino_limit_push };
struct ctldebug debug25 = { "blk_limit_hit", &stat_blk_limit_hit };
struct ctldebug debug26 = { "ino_limit_hit", &stat_ino_limit_hit };
-struct ctldebug debug27 = { "indir_blk_ptrs", &stat_indir_blk_ptrs };
-struct ctldebug debug28 = { "inode_bitmap", &stat_inode_bitmap };
-struct ctldebug debug29 = { "direct_blk_ptrs", &stat_direct_blk_ptrs };
-struct ctldebug debug30 = { "dir_entry", &stat_dir_entry };
+struct ctldebug debug27 = { "sync_limit_hit", &stat_sync_limit_hit };
+struct ctldebug debug28 = { "indir_blk_ptrs", &stat_indir_blk_ptrs };
+struct ctldebug debug29 = { "inode_bitmap", &stat_inode_bitmap };
+struct ctldebug debug30 = { "direct_blk_ptrs", &stat_direct_blk_ptrs };
+struct ctldebug debug31 = { "dir_entry", &stat_dir_entry };
#endif /* DEBUG */
+void wakeup_one __P((void *));
+
+void
+wakeup_one(c)
+ void *c;
+{
+ wakeup(c);
+}
+
/*
* Add an item to the end of the work queue.
* This routine requires that the lock be held.
@@ -450,7 +471,7 @@ struct ctldebug debug30 = { "dir_entry", &stat_dir_entry };
* The following routine is the only one that removes items
* and does so in order from first to last.
*/
-static void
+STATIC void
add_to_worklist(wk)
struct worklist *wk;
{
@@ -464,6 +485,7 @@ add_to_worklist(wk)
else
LIST_INSERT_AFTER(worklist_tail, wk, wk_list);
worklist_tail = wk;
+ num_on_worklist += 1;
}
/*
@@ -480,9 +502,8 @@ softdep_process_worklist(matchmnt)
struct mount *matchmnt;
{
struct proc *p = CURPROC;
- struct worklist *wk;
- struct fs *matchfs;
- int matchcnt;
+ int matchcnt, loopcount;
+ struct timeval starttime;
/*
* Record the process identifier of our caller so that we can give
@@ -490,133 +511,243 @@ softdep_process_worklist(matchmnt)
*/
filesys_syncer = p;
matchcnt = 0;
- matchfs = NULL;
- if (matchmnt != NULL)
- matchfs = VFSTOUFS(matchmnt)->um_fs;
+
/*
* There is no danger of having multiple processes run this
- * code. It is single threaded solely so that softdep_flushfiles
- * (below) can get an accurate count of the number of items
+ * code, but we have to single-thread it when softdep_flushfiles()
+ * is in operation to get an accurate count of the number of items
* related to its mount point that are in the list.
*/
- if (softdep_worklist_busy && matchmnt == NULL)
- return (-1);
+ if (matchmnt == NULL) {
+ if (softdep_worklist_busy < 0)
+ return(-1);
+ softdep_worklist_busy += 1;
+ }
+
/*
* If requested, try removing inode or removal dependencies.
*/
if (req_clear_inodedeps) {
clear_inodedeps(p);
- req_clear_inodedeps = 0;
- wakeup(&proc_waiting);
+ req_clear_inodedeps -= 1;
+ wakeup_one(&proc_waiting);
}
if (req_clear_remove) {
clear_remove(p);
- req_clear_remove = 0;
- wakeup(&proc_waiting);
+ req_clear_remove -= 1;
+ wakeup_one(&proc_waiting);
}
- ACQUIRE_LOCK(&lk);
- while ((wk = LIST_FIRST(&softdep_workitem_pending)) != 0) {
- WORKLIST_REMOVE(wk);
- FREE_LOCK(&lk);
- switch (wk->wk_type) {
-
- case D_DIRREM:
- /* removal of a directory entry */
- if (WK_DIRREM(wk)->dm_mnt == matchmnt)
- matchcnt += 1;
- handle_workitem_remove(WK_DIRREM(wk));
- break;
-
- case D_FREEBLKS:
- /* releasing blocks and/or fragments from a file */
- if (WK_FREEBLKS(wk)->fb_fs == matchfs)
- matchcnt += 1;
- handle_workitem_freeblocks(WK_FREEBLKS(wk));
- break;
-
- case D_FREEFRAG:
- /* releasing a fragment when replaced as a file grows */
- if (WK_FREEFRAG(wk)->ff_fs == matchfs)
- matchcnt += 1;
- handle_workitem_freefrag(WK_FREEFRAG(wk));
- break;
+ loopcount = 1;
+ starttime = time;
+ while (num_on_worklist > 0) {
+ matchcnt += process_worklist_item(matchmnt, 0);
- case D_FREEFILE:
- /* releasing an inode when its link count drops to 0 */
- if (WK_FREEFILE(wk)->fx_fs == matchfs)
- matchcnt += 1;
- handle_workitem_freefile(WK_FREEFILE(wk));
+ /*
+ * If a umount operation wants to run the worklist
+ * accurately, abort.
+ */
+ if (softdep_worklist_req && matchmnt == NULL) {
+ matchcnt = -1;
break;
-
- default:
- panic("%s_process_worklist: Unknown type %s",
- "softdep", TYPENAME(wk->wk_type));
- /* NOTREACHED */
}
- if (softdep_worklist_busy && matchmnt == NULL)
- return (-1);
+
/*
* If requested, try removing inode or removal dependencies.
*/
if (req_clear_inodedeps) {
clear_inodedeps(p);
- req_clear_inodedeps = 0;
- wakeup(&proc_waiting);
+ req_clear_inodedeps -= 1;
+ wakeup_one(&proc_waiting);
}
if (req_clear_remove) {
clear_remove(p);
- req_clear_remove = 0;
- wakeup(&proc_waiting);
+ req_clear_remove -= 1;
+ wakeup_one(&proc_waiting);
}
- ACQUIRE_LOCK(&lk);
+ /*
+ * We do not generally want to stop for buffer space, but if
+ * we are really being a buffer hog, we will stop and wait.
+ */
+#if 0
+ if (loopcount++ % 128 == 0)
+ bwillwrite();
+#endif
+ /*
+ * Never allow processing to run for more than one
+ * second. Otherwise the other syncer tasks may get
+ * excessively backlogged.
+ */
+ {
+ struct timeval diff;
+
+ timersub(&time, &starttime, &diff);
+ if (diff.tv_sec > 0 && matchmnt == NULL) {
+ matchcnt = -1;
+ break;
+ }
+ }
+ }
+ if (matchmnt == NULL) {
+ softdep_worklist_busy -= 1;
+ if (softdep_worklist_req && softdep_worklist_busy == 0)
+ wakeup(&softdep_worklist_req);
+ }
+ return (matchcnt);
+}
+
+/*
+ * Process one item on the worklist.
+ */
+STATIC int
+process_worklist_item(matchmnt, flags)
+ struct mount *matchmnt;
+ int flags;
+{
+ struct worklist *wk;
+ struct dirrem *dirrem;
+ struct mount *mp;
+ struct vnode *vp;
+ int matchcnt = 0;
+
+ ACQUIRE_LOCK(&lk);
+ /*
+ * Normally we just process each item on the worklist in order.
+ * However, if we are in a situation where we cannot lock any
+ * inodes, we have to skip over any dirrem requests whose
+ * vnodes are resident and locked.
+ */
+ LIST_FOREACH(wk, &softdep_workitem_pending, wk_list) {
+ if ((flags & LK_NOWAIT) == 0 || wk->wk_type != D_DIRREM)
+ break;
+ dirrem = WK_DIRREM(wk);
+ vp = ufs_ihashlookup(VFSTOUFS(dirrem->dm_mnt)->um_dev,
+ dirrem->dm_oldinum);
+ if (vp == NULL || !VOP_ISLOCKED(vp))
+ break;
}
+ if (wk == 0)
+ return (0);
+ WORKLIST_REMOVE(wk);
+ num_on_worklist -= 1;
FREE_LOCK(&lk);
+ switch (wk->wk_type) {
+
+ case D_DIRREM:
+ /* removal of a directory entry */
+ mp = WK_DIRREM(wk)->dm_mnt;
+#if 0
+ if (vn_write_suspend_wait(NULL, mp, V_NOWAIT))
+ panic("%s: dirrem on suspended filesystem",
+ "process_worklist_item");
+#endif
+ if (mp == matchmnt)
+ matchcnt += 1;
+ handle_workitem_remove(WK_DIRREM(wk));
+ break;
+
+ case D_FREEBLKS:
+ /* releasing blocks and/or fragments from a file */
+ mp = WK_FREEBLKS(wk)->fb_mnt;
+#if 0
+ if (vn_write_suspend_wait(NULL, mp, V_NOWAIT))
+ panic("%s: freeblks on suspended filesystem",
+ "process_worklist_item");
+#endif
+ if (mp == matchmnt)
+ matchcnt += 1;
+ handle_workitem_freeblocks(WK_FREEBLKS(wk));
+ break;
+
+ case D_FREEFRAG:
+ /* releasing a fragment when replaced as a file grows */
+ mp = WK_FREEFRAG(wk)->ff_mnt;
+#if 0
+ if (vn_write_suspend_wait(NULL, mp, V_NOWAIT))
+ panic("%s: freefrag on suspended filesystem",
+ "process_worklist_item");
+#endif
+ if (mp == matchmnt)
+ matchcnt += 1;
+ handle_workitem_freefrag(WK_FREEFRAG(wk));
+ break;
+
+ case D_FREEFILE:
+ /* releasing an inode when its link count drops to 0 */
+ mp = WK_FREEFILE(wk)->fx_mnt;
+#if 0
+ if (vn_write_suspend_wait(NULL, mp, V_NOWAIT))
+ panic("%s: freefile on suspended filesystem",
+ "process_worklist_item");
+#endif
+ if (mp == matchmnt)
+ matchcnt += 1;
+ handle_workitem_freefile(WK_FREEFILE(wk));
+ break;
+
+ default:
+ panic("%s_process_worklist: Unknown type %s",
+ "softdep", TYPENAME(wk->wk_type));
+ /* NOTREACHED */
+ }
return (matchcnt);
}
/*
+ * Move dependencies from one buffer to another.
+ */
+void
+softdep_move_dependencies(oldbp, newbp)
+ struct buf *oldbp;
+ struct buf *newbp;
+{
+ struct worklist *wk, *wktail;
+
+ if (LIST_FIRST(&newbp->b_dep) != NULL)
+ panic("softdep_move_dependencies: need merge code");
+ wktail = 0;
+ ACQUIRE_LOCK(&lk);
+ while ((wk = LIST_FIRST(&oldbp->b_dep)) != NULL) {
+ LIST_REMOVE(wk, wk_list);
+ if (wktail == 0)
+ LIST_INSERT_HEAD(&newbp->b_dep, wk, wk_list);
+ else
+ LIST_INSERT_AFTER(wktail, wk, wk_list);
+ wktail = wk;
+ }
+ FREE_LOCK(&lk);
+}
+
+/*
* Purge the work list of all items associated with a particular mount point.
*/
int
-softdep_flushfiles(oldmnt, flags, p)
+softdep_flushworklist(oldmnt, countp, p)
struct mount *oldmnt;
- int flags;
+ int *countp;
struct proc *p;
{
struct vnode *devvp;
- int error, loopcnt;
+ int count, error = 0;
/*
- * Await our turn to clear out the queue.
+ * Await our turn to clear out the queue, then serialize access.
*/
- while (softdep_worklist_busy)
- tsleep(&lbolt, PRIBIO, "softflush", 0);
- softdep_worklist_busy = 1;
- if ((error = ffs_flushfiles(oldmnt, flags, p)) != 0) {
- softdep_worklist_busy = 0;
- return (error);
+ while (softdep_worklist_busy) {
+ softdep_worklist_req += 1;
+ tsleep(&softdep_worklist_req, PRIBIO, "softflush", 0);
+ softdep_worklist_req -= 1;
}
+ softdep_worklist_busy = -1;
/*
* Alternately flush the block device associated with the mount
* point and process any dependencies that the flushing
- * creates. In theory, this loop can happen at most twice,
- * but we give it a few extra just to be sure.
+ * creates. We continue until no more worklist dependencies
+ * are found.
*/
+ *countp = 0;
devvp = VFSTOUFS(oldmnt)->um_devvp;
- for (loopcnt = 10; loopcnt > 0; loopcnt--) {
- if (softdep_process_worklist(oldmnt) == 0) {
- /*
- * Do another flush in case any vnodes were brought in
- * as part of the cleanup operations.
- */
- if ((error = ffs_flushfiles(oldmnt, flags, p)) != 0)
- break;
- /*
- * If we still found nothing to do, we are really done.
- */
- if (softdep_process_worklist(oldmnt) == 0)
- break;
- }
+ while ((count = softdep_process_worklist(oldmnt)) > 0) {
+ *countp += count;
vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, p);
error = VOP_FSYNC(devvp, p->p_ucred, MNT_WAIT, p);
VOP_UNLOCK(devvp, 0, p);
@@ -624,6 +755,39 @@ softdep_flushfiles(oldmnt, flags, p)
break;
}
softdep_worklist_busy = 0;
+ if (softdep_worklist_req)
+ wakeup(&softdep_worklist_req);
+ return (error);
+}
+
+/*
+ * Flush all vnodes and worklist items associated with a specified mount point.
+ */
+int
+softdep_flushfiles(oldmnt, flags, p)
+ struct mount *oldmnt;
+ int flags;
+ struct proc *p;
+{
+ int error, count, loopcnt;
+
+ /*
+ * Alternately flush the vnodes associated with the mount
+ * point and process any dependencies that the flushing
+ * creates. In theory, this loop can happen at most twice,
+ * but we give it a few extra just to be sure.
+ */
+ for (loopcnt = 10; loopcnt > 0; loopcnt--) {
+ /*
+ * Do another flush in case any vnodes were brought in
+ * as part of the cleanup operations.
+ */
+ if ((error = ffs_flushfiles(oldmnt, flags, p)) != 0)
+ break;
+ if ((error = softdep_flushworklist(oldmnt, &count, p)) != 0 ||
+ count == 0)
+ break;
+ }
/*
* If we are unmounting then it is an error to fail. If we
* are simply trying to downgrade to read-only, then filesystem
@@ -660,6 +824,7 @@ softdep_flushfiles(oldmnt, flags, p)
* an existing entry is not found.
*/
#define DEPALLOC 0x0001 /* allocate structure if lookup fails */
+#define NODELAY 0x0002 /* cannot do background work */
/*
* Structures and routines associated with pagedep caching.
@@ -669,7 +834,7 @@ u_long pagedep_hash; /* size of hash table - 1 */
#define PAGEDEP_HASH(mp, inum, lbn) \
(&pagedep_hashtbl[((((register_t)(mp)) >> 13) + (inum) + (lbn)) & \
pagedep_hash])
-static struct sema pagedep_in_progress;
+STATIC struct sema pagedep_in_progress;
/*
* Look up a pagedep. Return 1 if found, 0 if not found.
@@ -677,7 +842,7 @@ static struct sema pagedep_in_progress;
* Found or allocated entry is returned in pagedeppp.
* This routine must be called with splbio interrupts blocked.
*/
-static int
+STATIC int
pagedep_lookup(ip, lbn, flags, pagedeppp)
struct inode *ip;
ufs_lbn_t lbn;
@@ -696,8 +861,7 @@ pagedep_lookup(ip, lbn, flags, pagedeppp)
mp = ITOV(ip)->v_mount;
pagedephd = PAGEDEP_HASH(mp, ip->i_number, lbn);
top:
- for (pagedep = LIST_FIRST(pagedephd); pagedep;
- pagedep = LIST_NEXT(pagedep, pd_hash))
+ LIST_FOREACH(pagedep, pagedephd, pd_hash)
if (ip->i_number == pagedep->pd_ino &&
lbn == pagedep->pd_lbn &&
mp == pagedep->pd_mnt)
@@ -715,7 +879,7 @@ top:
goto top;
}
MALLOC(pagedep, struct pagedep *, sizeof(struct pagedep), M_PAGEDEP,
- M_WAITOK);
+ M_SOFTDEP_FLAGS);
bzero(pagedep, sizeof(struct pagedep));
pagedep->pd_list.wk_type = D_PAGEDEP;
pagedep->pd_mnt = mp;
@@ -736,11 +900,11 @@ top:
* Structures and routines associated with inodedep caching.
*/
LIST_HEAD(inodedep_hashhead, inodedep) *inodedep_hashtbl;
-static u_long inodedep_hash; /* size of hash table - 1 */
-static long num_inodedep; /* number of inodedep allocated */
+STATIC u_long inodedep_hash; /* size of hash table - 1 */
+STATIC long num_inodedep; /* number of inodedep allocated */
#define INODEDEP_HASH(fs, inum) \
(&inodedep_hashtbl[((((register_t)(fs)) >> 13) + (inum)) & inodedep_hash])
-static struct sema inodedep_in_progress;
+STATIC struct sema inodedep_in_progress;
/*
* Look up a inodedep. Return 1 if found, 0 if not found.
@@ -748,7 +912,7 @@ static struct sema inodedep_in_progress;
* Found or allocated entry is returned in inodedeppp.
* This routine must be called with splbio interrupts blocked.
*/
-static int
+STATIC int
inodedep_lookup(fs, inum, flags, inodedeppp)
struct fs *fs;
ino_t inum;
@@ -766,8 +930,7 @@ inodedep_lookup(fs, inum, flags, inodedeppp)
firsttry = 1;
inodedephd = INODEDEP_HASH(fs, inum);
top:
- for (inodedep = LIST_FIRST(inodedephd); inodedep;
- inodedep = LIST_NEXT(inodedep, id_hash))
+ LIST_FOREACH(inodedep, inodedephd, id_hash)
if (inum == inodedep->id_ino && fs == inodedep->id_fs)
break;
if (inodedep) {
@@ -781,7 +944,7 @@ top:
/*
* If we are over our limit, try to improve the situation.
*/
- if (num_inodedep > max_softdeps && firsttry && speedup_syncer() == 0 &&
+ if (num_inodedep > max_softdeps && firsttry && (flags & NODELAY) == 0 &&
request_cleanup(FLUSH_INODES, 1)) {
firsttry = 0;
goto top;
@@ -792,7 +955,7 @@ top:
}
num_inodedep += 1;
MALLOC(inodedep, struct inodedep *, sizeof(struct inodedep),
- M_INODEDEP, M_WAITOK);
+ M_INODEDEP, M_SOFTDEP_FLAGS);
inodedep->id_list.wk_type = D_INODEDEP;
inodedep->id_fs = fs;
inodedep->id_ino = inum;
@@ -820,14 +983,14 @@ LIST_HEAD(newblk_hashhead, newblk) *newblk_hashtbl;
u_long newblk_hash; /* size of hash table - 1 */
#define NEWBLK_HASH(fs, inum) \
(&newblk_hashtbl[((((register_t)(fs)) >> 13) + (inum)) & newblk_hash])
-static struct sema newblk_in_progress;
+STATIC struct sema newblk_in_progress;
/*
* Look up a newblk. Return 1 if found, 0 if not found.
* If not found, allocate if DEPALLOC flag is passed.
* Found or allocated entry is returned in newblkpp.
*/
-static int
+STATIC int
newblk_lookup(fs, newblkno, flags, newblkpp)
struct fs *fs;
ufs_daddr_t newblkno;
@@ -839,8 +1002,7 @@ newblk_lookup(fs, newblkno, flags, newblkpp)
newblkhd = NEWBLK_HASH(fs, newblkno);
top:
- for (newblk = LIST_FIRST(newblkhd); newblk;
- newblk = LIST_NEXT(newblk, nb_hash))
+ LIST_FOREACH(newblk, newblkhd, nb_hash)
if (newblkno == newblk->nb_newblkno && fs == newblk->nb_fs)
break;
if (newblk) {
@@ -854,7 +1016,7 @@ top:
if (sema_get(&newblk_in_progress, 0) == 0)
goto top;
MALLOC(newblk, struct newblk *, sizeof(struct newblk),
- M_NEWBLK, M_WAITOK);
+ M_NEWBLK, M_SOFTDEP_FLAGS);
newblk->nb_state = 0;
newblk->nb_fs = fs;
newblk->nb_newblkno = newblkno;
@@ -874,7 +1036,8 @@ softdep_initialize()
LIST_INIT(&mkdirlisthd);
LIST_INIT(&softdep_workitem_pending);
- max_softdeps = desiredvnodes * (16 / sizeof(register_t));
+ max_softdeps = min (desiredvnodes * 8,
+ kmemstats[M_INODEDEP].ks_limit / (2 * sizeof(struct inodedep)));
pagedep_hashtbl = hashinit(desiredvnodes / 5, M_PAGEDEP, M_WAITOK,
&pagedep_hash);
sema_init(&pagedep_in_progress, "pagedep", PRIBIO, 0);
@@ -901,6 +1064,7 @@ softdep_mount(devvp, mp, fs, cred)
struct buf *bp;
int error, cyl;
+ mp->mnt_flag &= ~MNT_ASYNC;
mp->mnt_flag |= MNT_SOFTDEP;
/*
* When doing soft updates, the counters in the
@@ -983,7 +1147,8 @@ softdep_setup_inomapdep(bp, ip, newinum)
* the cylinder group map from which it was allocated.
*/
ACQUIRE_LOCK(&lk);
- if (inodedep_lookup(ip->i_fs, newinum, DEPALLOC, &inodedep) != 0)
+ if (inodedep_lookup(ip->i_fs, newinum, DEPALLOC | NODELAY, &inodedep)
+ != 0)
panic("softdep_setup_inomapdep: found inode");
inodedep->id_buf = bp;
inodedep->id_state &= ~DEPCOMPLETE;
@@ -1024,7 +1189,7 @@ softdep_setup_blkmapdep(bp, fs, newblkno)
* this routine is called and this routine must be called with
* splbio interrupts blocked.
*/
-static struct bmsafemap *
+STATIC struct bmsafemap *
bmsafemap_lookup(bp)
struct buf *bp;
{
@@ -1035,12 +1200,12 @@ bmsafemap_lookup(bp)
if (lk.lkt_held == -1)
panic("bmsafemap_lookup: lock not held");
#endif
- for (wk = LIST_FIRST(&bp->b_dep); wk; wk = LIST_NEXT(wk, wk_list))
+ LIST_FOREACH(wk, &bp->b_dep, wk_list)
if (wk->wk_type == D_BMSAFEMAP)
return (WK_BMSAFEMAP(wk));
FREE_LOCK(&lk);
MALLOC(bmsafemap, struct bmsafemap *, sizeof(struct bmsafemap),
- M_BMSAFEMAP, M_WAITOK);
+ M_BMSAFEMAP, M_SOFTDEP_FLAGS);
bmsafemap->sm_list.wk_type = D_BMSAFEMAP;
bmsafemap->sm_list.wk_state = 0;
bmsafemap->sm_buf = bp;
@@ -1100,7 +1265,7 @@ softdep_setup_allocdirect(ip, lbn, newblkno, oldblkno, newsize, oldsize, bp)
struct newblk *newblk;
MALLOC(adp, struct allocdirect *, sizeof(struct allocdirect),
- M_ALLOCDIRECT, M_WAITOK);
+ M_ALLOCDIRECT, M_SOFTDEP_FLAGS);
bzero(adp, sizeof(struct allocdirect));
adp->ad_list.wk_type = D_ALLOCDIRECT;
adp->ad_lbn = lbn;
@@ -1118,7 +1283,7 @@ softdep_setup_allocdirect(ip, lbn, newblkno, oldblkno, newsize, oldsize, bp)
panic("softdep_setup_allocdirect: lost block");
ACQUIRE_LOCK(&lk);
- (void) inodedep_lookup(ip->i_fs, ip->i_number, DEPALLOC, &inodedep);
+ inodedep_lookup(ip->i_fs, ip->i_number, DEPALLOC | NODELAY, &inodedep);
adp->ad_inodedep = inodedep;
if (newblk->nb_state == DEPCOMPLETE) {
@@ -1172,8 +1337,7 @@ softdep_setup_allocdirect(ip, lbn, newblkno, oldblkno, newsize, oldsize, bp)
FREE_LOCK(&lk);
return;
}
- for (oldadp = TAILQ_FIRST(adphead); oldadp;
- oldadp = TAILQ_NEXT(oldadp, ad_next)) {
+ TAILQ_FOREACH(oldadp, adphead, ad_next) {
if (oldadp->ad_lbn >= lbn)
break;
}
@@ -1190,7 +1354,7 @@ softdep_setup_allocdirect(ip, lbn, newblkno, oldblkno, newsize, oldsize, bp)
* Replace an old allocdirect dependency with a newer one.
* This routine must be called with splbio interrupts blocked.
*/
-static void
+STATIC void
allocdirect_merge(adphead, newadp, oldadp)
struct allocdirectlst *adphead; /* head of list holding allocdirects */
struct allocdirect *newadp; /* allocdirect being added */
@@ -1237,7 +1401,7 @@ allocdirect_merge(adphead, newadp, oldadp)
/*
* Allocate a new freefrag structure if needed.
*/
-static struct freefrag *
+STATIC struct freefrag *
newfreefrag(ip, blkno, size)
struct inode *ip;
ufs_daddr_t blkno;
@@ -1252,11 +1416,11 @@ newfreefrag(ip, blkno, size)
if (fragnum(fs, blkno) + numfrags(fs, size) > fs->fs_frag)
panic("newfreefrag: frag size");
MALLOC(freefrag, struct freefrag *, sizeof(struct freefrag),
- M_FREEFRAG, M_WAITOK);
+ M_FREEFRAG, M_SOFTDEP_FLAGS);
freefrag->ff_list.wk_type = D_FREEFRAG;
freefrag->ff_state = ip->i_ffs_uid & ~ONWORKLIST; /* XXX - used below */
freefrag->ff_inum = ip->i_number;
- freefrag->ff_fs = fs;
+ freefrag->ff_mnt = ITOV(ip)->v_mount;
freefrag->ff_devvp = ip->i_devvp;
freefrag->ff_blkno = blkno;
freefrag->ff_fragsize = size;
@@ -1267,13 +1431,14 @@ newfreefrag(ip, blkno, size)
* This workitem de-allocates fragments that were replaced during
* file block allocation.
*/
-static void
+STATIC void
handle_workitem_freefrag(freefrag)
struct freefrag *freefrag;
{
struct inode tip;
- tip.i_fs = freefrag->ff_fs;
+ tip.i_vnode = NULL;
+ tip.i_fs = VFSTOUFS(freefrag->ff_mnt)->um_fs;
tip.i_devvp = freefrag->ff_devvp;
tip.i_dev = freefrag->ff_devvp->v_rdev;
tip.i_number = freefrag->ff_inum;
@@ -1310,7 +1475,7 @@ handle_workitem_freefrag(freefrag)
/*
* Allocate a new allocindir structure.
*/
-static struct allocindir *
+STATIC struct allocindir *
newallocindir(ip, ptrno, newblkno, oldblkno)
struct inode *ip; /* inode for file being extended */
int ptrno; /* offset of pointer in indirect block */
@@ -1320,8 +1485,8 @@ newallocindir(ip, ptrno, newblkno, oldblkno)
struct allocindir *aip;
MALLOC(aip, struct allocindir *, sizeof(struct allocindir),
- M_ALLOCINDIR, M_WAITOK);
- bzero(aip, sizeof(struct allocindir));
+ M_ALLOCINDIR, M_SOFTDEP_FLAGS);
+ bzero(aip,sizeof(struct allocindir));
aip->ai_list.wk_type = D_ALLOCINDIR;
aip->ai_state = ATTACHED;
aip->ai_offset = ptrno;
@@ -1388,7 +1553,7 @@ softdep_setup_allocindir_meta(nbp, ip, bp, ptrno, newblkno)
* Called to finish the allocation of the "aip" allocated
* by one of the two routines above.
*/
-static void
+STATIC void
setup_allocindir_phase2(bp, ip, aip)
struct buf *bp; /* in-memory copy of the indirect block */
struct inode *ip; /* inode for file being extended */
@@ -1405,8 +1570,7 @@ setup_allocindir_phase2(bp, ip, aip)
panic("setup_allocindir_phase2: not indir blk");
for (indirdep = NULL, newindirdep = NULL; ; ) {
ACQUIRE_LOCK(&lk);
- for (wk = LIST_FIRST(&bp->b_dep); wk;
- wk = LIST_NEXT(wk, wk_list)) {
+ LIST_FOREACH(wk, &bp->b_dep, wk_list) {
if (wk->wk_type != D_INDIRDEP)
continue;
indirdep = WK_INDIRDEP(wk);
@@ -1444,23 +1608,26 @@ setup_allocindir_phase2(bp, ip, aip)
if (aip->ai_oldblkno == 0)
oldaip = NULL;
else
- for (oldaip=LIST_FIRST(&indirdep->ir_deplisthd);
- oldaip; oldaip = LIST_NEXT(oldaip, ai_next))
+
+ LIST_FOREACH(oldaip, &indirdep->ir_deplisthd, ai_next)
if (oldaip->ai_offset == aip->ai_offset)
break;
+ freefrag = NULL;
if (oldaip != NULL) {
if (oldaip->ai_newblkno != aip->ai_oldblkno)
panic("setup_allocindir_phase2: blkno");
aip->ai_oldblkno = oldaip->ai_oldblkno;
- freefrag = oldaip->ai_freefrag;
- oldaip->ai_freefrag = aip->ai_freefrag;
- aip->ai_freefrag = freefrag;
+ freefrag = aip->ai_freefrag;
+ aip->ai_freefrag = oldaip->ai_freefrag;
+ oldaip->ai_freefrag = NULL;
free_allocindir(oldaip, NULL);
}
LIST_INSERT_HEAD(&indirdep->ir_deplisthd, aip, ai_next);
((ufs_daddr_t *)indirdep->ir_savebp->b_data)
[aip->ai_offset] = aip->ai_oldblkno;
FREE_LOCK(&lk);
+ if (freefrag != NULL)
+ handle_workitem_freefrag(freefrag);
}
if (newindirdep) {
if (indirdep->ir_savebp != NULL)
@@ -1470,16 +1637,21 @@ setup_allocindir_phase2(bp, ip, aip)
if (indirdep)
break;
MALLOC(newindirdep, struct indirdep *, sizeof(struct indirdep),
- M_INDIRDEP, M_WAITOK);
+ M_INDIRDEP, M_SOFTDEP_FLAGS);
newindirdep->ir_list.wk_type = D_INDIRDEP;
newindirdep->ir_state = ATTACHED;
LIST_INIT(&newindirdep->ir_deplisthd);
LIST_INIT(&newindirdep->ir_donehd);
- newindirdep->ir_saveddata = (ufs_daddr_t *)bp->b_data;
+ if (bp->b_blkno == bp->b_lblkno) {
+ VOP_BMAP(bp->b_vp, bp->b_lblkno, NULL, &bp->b_blkno,
+ NULL);
+ }
newindirdep->ir_savebp =
getblk(ip->i_devvp, bp->b_blkno, bp->b_bcount, 0, 0);
- bcopy((caddr_t)newindirdep->ir_saveddata,
- newindirdep->ir_savebp->b_data, bp->b_bcount);
+#if 0
+ BUF_KERNPROC(newindirdep->ir_savebp);
+#endif
+ bcopy(bp->b_data, newindirdep->ir_savebp->b_data, bp->b_bcount);
}
}
@@ -1512,7 +1684,6 @@ setup_allocindir_phase2(bp, ip, aip)
* later release and zero the inode so that the calling routine
* can release it.
*/
-static long num_freeblks; /* number of freeblks allocated */
void
softdep_setup_freeblocks(ip, length)
struct inode *ip; /* The inode whose length is to be reduced */
@@ -1524,25 +1695,19 @@ softdep_setup_freeblocks(ip, length)
struct vnode *vp;
struct buf *bp;
struct fs *fs;
- int i, error;
+ int i, delay, error;
fs = ip->i_fs;
if (length != 0)
- panic("softde_setup_freeblocks: non-zero length");
- /*
- * If we are over our limit, try to improve the situation.
- */
- if (num_freeblks > max_softdeps / 2 && speedup_syncer() == 0)
- (void) request_cleanup(FLUSH_REMOVE, 0);
- num_freeblks += 1;
+ panic("softdep_setup_freeblocks: non-zero length");
MALLOC(freeblks, struct freeblks *, sizeof(struct freeblks),
- M_FREEBLKS, M_WAITOK);
+ M_FREEBLKS, M_SOFTDEP_FLAGS);
bzero(freeblks, sizeof(struct freeblks));
freeblks->fb_list.wk_type = D_FREEBLKS;
freeblks->fb_uid = ip->i_ffs_uid;
freeblks->fb_previousinum = ip->i_number;
freeblks->fb_devvp = ip->i_devvp;
- freeblks->fb_fs = fs;
+ freeblks->fb_mnt = ITOV(ip)->v_mount;
freeblks->fb_oldsize = ip->i_ffs_size;
freeblks->fb_newsize = length;
freeblks->fb_chkcnt = ip->i_ffs_blocks;
@@ -1576,19 +1741,26 @@ softdep_setup_freeblocks(ip, length)
panic("softdep_setup_freeblocks: inode busy");
/*
* Add the freeblks structure to the list of operations that
- * must await the zero'ed inode being written to disk.
+ * must await the zero'ed inode being written to disk. If we
+ * still have a bitmap dependency (delay == 0), then the inode
+ * has never been written to disk, so we can process the
+ * freeblks below once we have deleted the dependencies.
*/
- WORKLIST_INSERT(&inodedep->id_bufwait, &freeblks->fb_list);
+ delay = (inodedep->id_state & DEPCOMPLETE);
+ if (delay)
+ WORKLIST_INSERT(&inodedep->id_bufwait, &freeblks->fb_list);
/*
* Because the file length has been truncated to zero, any
* pending block allocation dependency structures associated
* with this inode are obsolete and can simply be de-allocated.
* We must first merge the two dependency lists to get rid of
* any duplicate freefrag structures, then purge the merged list.
+ * If we still have a bitmap dependency, then the inode has never
+ * been written to disk, so we can free any fragments without delay.
*/
merge_inode_lists(inodedep);
while ((adp = TAILQ_FIRST(&inodedep->id_inoupdt)) != 0)
- free_allocdirect(&inodedep->id_inoupdt, adp, 1);
+ free_allocdirect(&inodedep->id_inoupdt, adp, delay);
FREE_LOCK(&lk);
bdwrite(bp);
/*
@@ -1604,17 +1776,21 @@ softdep_setup_freeblocks(ip, length)
bp = LIST_FIRST(&vp->v_dirtyblkhd);
(void) inodedep_lookup(fs, ip->i_number, 0, &inodedep);
deallocate_dependencies(bp, inodedep);
- bp->b_flags |= B_INVAL;
+ bp->b_flags |= B_INVAL | B_NOCACHE;
FREE_LOCK(&lk);
brelse(bp);
ACQUIRE_LOCK(&lk);
}
- /*
- * Try freeing the inodedep in case that was the last dependency.
- */
- if ((inodedep_lookup(fs, ip->i_number, 0, &inodedep)) != 0)
+ if (inodedep_lookup(fs, ip->i_number, 0, &inodedep) != 0)
(void) free_inodedep(inodedep);
FREE_LOCK(&lk);
+ /*
+ * If the inode has never been written to disk (delay == 0),
+ * then we can process the freeblks now that we have deleted
+ * the dependencies.
+ */
+ if (!delay)
+ handle_workitem_freeblocks(freeblks);
}
/*
@@ -1624,7 +1800,7 @@ softdep_setup_freeblocks(ip, length)
* its associated dependencies. The mutex is held so that other I/O's
* associated with related dependencies do not occur.
*/
-static void
+STATIC void
deallocate_dependencies(bp, inodedep)
struct buf *bp;
struct inodedep *inodedep;
@@ -1688,11 +1864,12 @@ deallocate_dependencies(bp, inodedep)
* If the inode has already been written, then they
* can be dumped directly onto the work list.
*/
- for (dirrem = LIST_FIRST(&pagedep->pd_dirremhd); dirrem;
- dirrem = LIST_NEXT(dirrem, dm_next)) {
+ LIST_FOREACH(dirrem, &pagedep->pd_dirremhd, dm_next) {
LIST_REMOVE(dirrem, dm_next);
dirrem->dm_dirinum = pagedep->pd_ino;
- if (inodedep == NULL)
+ if (inodedep == NULL ||
+ (inodedep->id_state & ALLCOMPLETE) ==
+ ALLCOMPLETE)
add_to_worklist(&dirrem->dm_list);
else
WORKLIST_INSERT(&inodedep->id_bufwait,
@@ -1725,7 +1902,7 @@ deallocate_dependencies(bp, inodedep)
* Free an allocdirect. Generate a new freefrag work request if appropriate.
* This routine must be called with splbio interrupts blocked.
*/
-static void
+STATIC void
free_allocdirect(adphead, adp, delay)
struct allocdirectlst *adphead;
struct allocdirect *adp;
@@ -1755,79 +1932,91 @@ free_allocdirect(adphead, adp, delay)
* Prepare an inode to be freed. The actual free operation is not
* done until the zero'ed inode has been written to disk.
*/
-static long num_freefile; /* number of freefile allocated */
void
-softdep_freefile(ap)
- struct vop_vfree_args /* {
- struct vnode *a_pvp;
- ino_t a_ino;
- int a_mode;
- } */ *ap;
+softdep_freefile(pvp, ino, mode)
+ struct vnode *pvp;
+ ino_t ino;
+ int mode;
{
- struct inode *ip = VTOI(ap->a_pvp);
+ struct inode *ip = VTOI(pvp);
struct inodedep *inodedep;
struct freefile *freefile;
/*
- * If we are over our limit, try to improve the situation.
- */
- if (num_freefile > max_softdeps / 2 && speedup_syncer() == 0)
- (void) request_cleanup(FLUSH_REMOVE, 0);
- /*
* This sets up the inode de-allocation dependency.
*/
- num_freefile += 1;
MALLOC(freefile, struct freefile *, sizeof(struct freefile),
- M_FREEFILE, M_WAITOK);
+ M_FREEFILE, M_SOFTDEP_FLAGS);
freefile->fx_list.wk_type = D_FREEFILE;
freefile->fx_list.wk_state = 0;
- freefile->fx_mode = ap->a_mode;
- freefile->fx_oldinum = ap->a_ino;
+ freefile->fx_mode = mode;
+ freefile->fx_oldinum = ino;
freefile->fx_devvp = ip->i_devvp;
- freefile->fx_fs = ip->i_fs;
+ freefile->fx_mnt = ITOV(ip)->v_mount;
/*
* If the inodedep does not exist, then the zero'ed inode has
- * been written to disk and we can free the file immediately.
+ * been written to disk. If the allocated inode has never been
+ * written to disk, then the on-disk inode is zero'ed. In either
+ * case we can free the file immediately.
*/
ACQUIRE_LOCK(&lk);
- if (inodedep_lookup(ip->i_fs, ap->a_ino, 0, &inodedep) == 0) {
- add_to_worklist(&freefile->fx_list);
+ if (inodedep_lookup(ip->i_fs, ino, 0, &inodedep) == 0 ||
+ check_inode_unwritten(inodedep)) {
FREE_LOCK(&lk);
+ handle_workitem_freefile(freefile);
return;
}
+ WORKLIST_INSERT(&inodedep->id_inowait, &freefile->fx_list);
+ FREE_LOCK(&lk);
+}
- /*
- * If we still have a bitmap dependency, then the inode has never
- * been written to disk. Drop the dependency as it is no longer
- * necessary since the inode is being deallocated. We could process
- * the freefile immediately, but then we would have to clear the
- * id_inowait dependencies here and it is easier just to let the
- * zero'ed inode be written and let them be cleaned up in the
- * normal followup actions that follow the inode write.
- */
- if ((inodedep->id_state & DEPCOMPLETE) == 0) {
- inodedep->id_state |= DEPCOMPLETE;
- LIST_REMOVE(inodedep, id_deps);
- inodedep->id_buf = NULL;
+/*
+ * Check to see if an inode has never been written to disk. If
+ * so free the inodedep and return success, otherwise return failure.
+ * This routine must be called with splbio interrupts blocked.
+ *
+ * If we still have a bitmap dependency, then the inode has never
+ * been written to disk. Drop the dependency as it is no longer
+ * necessary since the inode is being deallocated. We set the
+ * ALLCOMPLETE flags since the bitmap now properly shows that the
+ * inode is not allocated. Even if the inode is actively being
+ * written, it has been rolled back to its zero'ed state, so we
+ * are ensured that a zero inode is what is on the disk. For short
+ * lived files, this change will usually result in removing all the
+ * dependencies from the inode so that it can be freed immediately.
+ */
+STATIC int
+check_inode_unwritten(inodedep)
+ struct inodedep *inodedep;
+{
+
+ if ((inodedep->id_state & DEPCOMPLETE) != 0 ||
+ LIST_FIRST(&inodedep->id_pendinghd) != NULL ||
+ LIST_FIRST(&inodedep->id_bufwait) != NULL ||
+ LIST_FIRST(&inodedep->id_inowait) != NULL ||
+ TAILQ_FIRST(&inodedep->id_inoupdt) != NULL ||
+ TAILQ_FIRST(&inodedep->id_newinoupdt) != NULL ||
+ inodedep->id_nlinkdelta != 0)
+ return (0);
+ inodedep->id_state |= ALLCOMPLETE;
+ LIST_REMOVE(inodedep, id_deps);
+ inodedep->id_buf = NULL;
+ if (inodedep->id_state & ONWORKLIST)
+ WORKLIST_REMOVE(&inodedep->id_list);
+ if (inodedep->id_savedino != NULL) {
+ FREE(inodedep->id_savedino, M_INODEDEP);
+ inodedep->id_savedino = NULL;
}
- /*
- * If the inodedep has no dependencies associated with it,
- * then we must free it here and free the file immediately.
- * This case arises when an early allocation fails (for
- * example, the user is over their file quota).
- */
if (free_inodedep(inodedep) == 0)
- WORKLIST_INSERT(&inodedep->id_inowait, &freefile->fx_list);
- else
- add_to_worklist(&freefile->fx_list);
- FREE_LOCK(&lk);
+ panic("check_inode_unwritten: busy inode");
+ return (1);
}
/*
* Try to free an inodedep structure. Return 1 if it could be freed.
*/
-static int
+STATIC int
free_inodedep(inodedep)
struct inodedep *inodedep;
{
@@ -1855,7 +2044,7 @@ free_inodedep(inodedep)
* to the number of blocks allocated for the file) are also
* performed in this function.
*/
-static void
+STATIC void
handle_workitem_freeblocks(freeblks)
struct freeblks *freeblks;
{
@@ -1867,13 +2056,13 @@ handle_workitem_freeblocks(freeblks)
int error, allerror = 0;
ufs_lbn_t baselbns[NIADDR], tmpval;
+ tip.i_fs = fs = VFSTOUFS(freeblks->fb_mnt)->um_fs;
tip.i_number = freeblks->fb_previousinum;
tip.i_devvp = freeblks->fb_devvp;
tip.i_dev = freeblks->fb_devvp->v_rdev;
- tip.i_fs = freeblks->fb_fs;
tip.i_ffs_size = freeblks->fb_oldsize;
tip.i_ffs_uid = freeblks->fb_uid;
- fs = freeblks->fb_fs;
+ tip.i_vnode = NULL;
tmpval = 1;
baselbns[0] = NDADDR;
for (i = 1; i < NIADDR; i++) {
@@ -1907,12 +2096,11 @@ handle_workitem_freeblocks(freeblks)
#ifdef DIAGNOSTIC
if (freeblks->fb_chkcnt != blocksreleased)
- panic("handle_workitem_freeblocks: block count");
+ printf("handle_workitem_freeblocks: block count");
if (allerror)
softdep_error("handle_workitem_freeblks", allerror);
#endif /* DIAGNOSTIC */
WORKITEM_FREE(freeblks, D_FREEBLKS);
- num_freeblks -= 1;
}
/*
@@ -1921,7 +2109,7 @@ handle_workitem_freeblocks(freeblks)
* and recursive calls to indirtrunc must be used to cleanse other indirect
* blocks.
*/
-static int
+STATIC int
indir_trunc(ip, dbn, level, lbn, countp)
struct inode *ip;
ufs_daddr_t dbn;
@@ -1988,7 +2176,7 @@ indir_trunc(ip, dbn, level, lbn, countp)
ffs_blkfree(ip, nb, fs->fs_bsize);
*countp += nblocks;
}
- bp->b_flags |= B_INVAL;
+ bp->b_flags |= B_INVAL | B_NOCACHE;
brelse(bp);
return (allerror);
}
@@ -1997,7 +2185,7 @@ indir_trunc(ip, dbn, level, lbn, countp)
* Free an allocindir.
* This routine must be called with splbio interrupts blocked.
*/
-static void
+STATIC void
free_allocindir(aip, inodedep)
struct allocindir *aip;
struct inodedep *inodedep;
@@ -2074,8 +2262,9 @@ softdep_setup_directory_add(bp, dp, diroffset, newinum, newdirbp)
fs = dp->i_fs;
lbn = lblkno(fs, diroffset);
offset = blkoff(fs, diroffset);
- MALLOC(dap, struct diradd *, sizeof(struct diradd), M_DIRADD, M_WAITOK);
- bzero(dap, sizeof(struct diradd));
+ MALLOC(dap, struct diradd *, sizeof(struct diradd), M_DIRADD,
+ M_SOFTDEP_FLAGS);
+ bzero(dap,sizeof(struct diradd));
dap->da_list.wk_type = D_DIRADD;
dap->da_offset = offset;
dap->da_newinum = newinum;
@@ -2086,12 +2275,12 @@ softdep_setup_directory_add(bp, dp, diroffset, newinum, newdirbp)
} else {
dap->da_state |= MKDIR_BODY | MKDIR_PARENT;
MALLOC(mkdir1, struct mkdir *, sizeof(struct mkdir), M_MKDIR,
- M_WAITOK);
+ M_SOFTDEP_FLAGS);
mkdir1->md_list.wk_type = D_MKDIR;
mkdir1->md_state = MKDIR_BODY;
mkdir1->md_diradd = dap;
MALLOC(mkdir2, struct mkdir *, sizeof(struct mkdir), M_MKDIR,
- M_WAITOK);
+ M_SOFTDEP_FLAGS);
mkdir2->md_list.wk_type = D_MKDIR;
mkdir2->md_state = MKDIR_PARENT;
mkdir2->md_diradd = dap;
@@ -2165,8 +2354,8 @@ softdep_change_directoryentry_offset(dp, base, oldloc, newloc, entrysize)
goto done;
oldoffset = offset + (oldloc - base);
newoffset = offset + (newloc - base);
- for (dap = LIST_FIRST(&pagedep->pd_diraddhd[DIRADDHASH(oldoffset)]);
- dap; dap = LIST_NEXT(dap, da_pdlist)) {
+
+ LIST_FOREACH(dap, &pagedep->pd_diraddhd[DIRADDHASH(oldoffset)], da_pdlist) {
if (dap->da_offset != oldoffset)
continue;
dap->da_offset = newoffset;
@@ -2178,8 +2367,8 @@ softdep_change_directoryentry_offset(dp, base, oldloc, newloc, entrysize)
break;
}
if (dap == NULL) {
- for (dap = LIST_FIRST(&pagedep->pd_pendinghd);
- dap; dap = LIST_NEXT(dap, da_pdlist)) {
+
+ LIST_FOREACH(dap, &pagedep->pd_pendinghd, da_pdlist) {
if (dap->da_offset == oldoffset) {
dap->da_offset = newoffset;
break;
@@ -2195,7 +2384,7 @@ done:
* Free a diradd dependency structure. This routine must be called
* with splbio interrupts blocked.
*/
-static void
+STATIC void
free_diradd(dap)
struct diradd *dap;
{
@@ -2261,32 +2450,50 @@ softdep_setup_remove(bp, dp, ip, isrmdir)
struct inode *ip; /* inode for directory entry being removed */
int isrmdir; /* indicates if doing RMDIR */
{
- struct dirrem *dirrem;
+ struct dirrem *dirrem, *prevdirrem;
/*
* Allocate a new dirrem if appropriate and ACQUIRE_LOCK.
*/
- dirrem = newdirrem(bp, dp, ip, isrmdir);
+ dirrem = newdirrem(bp, dp, ip, isrmdir, &prevdirrem);
+
+ /*
+ * If the COMPLETE flag is clear, then there were no active
+ * entries and we want to roll back to a zeroed entry until
+ * the new inode is committed to disk. If the COMPLETE flag is
+ * set then we have deleted an entry that never made it to
+ * disk. If the entry we deleted resulted from a name change,
+ * then the old name still resides on disk. We cannot delete
+ * its inode (returned to us in prevdirrem) until the zeroed
+ * directory entry gets to disk. The new inode has never been
+ * referenced on the disk, so can be deleted immediately.
+ */
if ((dirrem->dm_state & COMPLETE) == 0) {
LIST_INSERT_HEAD(&dirrem->dm_pagedep->pd_dirremhd, dirrem,
dm_next);
+ FREE_LOCK(&lk);
} else {
+ if (prevdirrem != NULL)
+ LIST_INSERT_HEAD(&dirrem->dm_pagedep->pd_dirremhd,
+ prevdirrem, dm_next);
dirrem->dm_dirinum = dirrem->dm_pagedep->pd_ino;
- add_to_worklist(&dirrem->dm_list);
+ FREE_LOCK(&lk);
+ handle_workitem_remove(dirrem);
}
- FREE_LOCK(&lk);
}
/*
* Allocate a new dirrem if appropriate and return it along with
* its associated pagedep. Called without a lock, returns with lock.
*/
-static struct dirrem *
-newdirrem(bp, dp, ip, isrmdir)
+STATIC long num_dirrem; /* number of dirrem allocated */
+STATIC struct dirrem *
+newdirrem(bp, dp, ip, isrmdir, prevdirremp)
struct buf *bp; /* buffer containing directory block */
struct inode *dp; /* inode for the directory being modified */
struct inode *ip; /* inode for directory entry being removed */
int isrmdir; /* indicates if doing RMDIR */
+ struct dirrem **prevdirremp; /* previously referenced inode, if any */
{
int offset;
ufs_lbn_t lbn;
@@ -2299,13 +2506,22 @@ newdirrem(bp, dp, ip, isrmdir)
*/
if (ip == NULL)
panic("newdirrem: whiteout");
+ /*
+ * If we are over our limit, try to improve the situation.
+ * Limiting the number of dirrem structures will also limit
+ * the number of freefile and freeblks structures.
+ */
+ if (num_dirrem > max_softdeps / 2)
+ (void) request_cleanup(FLUSH_REMOVE, 0);
+ num_dirrem += 1;
MALLOC(dirrem, struct dirrem *, sizeof(struct dirrem),
- M_DIRREM, M_WAITOK);
- bzero(dirrem, sizeof(struct dirrem));
+ M_DIRREM, M_SOFTDEP_FLAGS);
+ bzero(dirrem,sizeof(struct dirrem));
dirrem->dm_list.wk_type = D_DIRREM;
dirrem->dm_state = isrmdir ? RMDIR : 0;
dirrem->dm_mnt = ITOV(ip)->v_mount;
dirrem->dm_oldinum = ip->i_number;
+ *prevdirremp = NULL;
ACQUIRE_LOCK(&lk);
lbn = lblkno(dp->i_fs, dp->i_offset);
@@ -2319,28 +2535,42 @@ newdirrem(bp, dp, ip, isrmdir)
* be de-allocated. Check for an entry on both the pd_dirraddhd
* list and the pd_pendinghd list.
*/
- for (dap = LIST_FIRST(&pagedep->pd_diraddhd[DIRADDHASH(offset)]);
- dap; dap = LIST_NEXT(dap, da_pdlist))
+
+ LIST_FOREACH(dap, &pagedep->pd_diraddhd[DIRADDHASH(offset)], da_pdlist)
if (dap->da_offset == offset)
break;
if (dap == NULL) {
- for (dap = LIST_FIRST(&pagedep->pd_pendinghd);
- dap; dap = LIST_NEXT(dap, da_pdlist))
+
+ LIST_FOREACH(dap, &pagedep->pd_pendinghd, da_pdlist)
if (dap->da_offset == offset)
break;
if (dap == NULL)
return (dirrem);
}
/*
- * Must be ATTACHED at this point, so just delete it.
+ * Must be ATTACHED at this point.
*/
if ((dap->da_state & ATTACHED) == 0)
panic("newdirrem: not ATTACHED");
if (dap->da_newinum != ip->i_number)
panic("newdirrem: inum %d should be %d",
ip->i_number, dap->da_newinum);
- free_diradd(dap);
+ /*
+ * If we are deleting a changed name that never made it to disk,
+ * then return the dirrem describing the previous inode (which
+ * represents the inode currently referenced from this entry on disk).
+ */
+ if ((dap->da_state & DIRCHG) != 0) {
+ *prevdirremp = dap->da_previous;
+ dap->da_state &= ~DIRCHG;
+ dap->da_pagedep = pagedep;
+ }
+ /*
+ * We are deleting an entry that never made it to disk.
+ * Mark it COMPLETE so we can delete its inode immediately.
+ */
dirrem->dm_state |= COMPLETE;
+ free_diradd(dap);
return (dirrem);
}
@@ -2371,7 +2601,7 @@ softdep_setup_directory_change(bp, dp, ip, newinum, isrmdir)
{
int offset;
struct diradd *dap = NULL;
- struct dirrem *dirrem;
+ struct dirrem *dirrem, *prevdirrem;
struct pagedep *pagedep;
struct inodedep *inodedep;
@@ -2382,8 +2612,8 @@ softdep_setup_directory_change(bp, dp, ip, newinum, isrmdir)
*/
if (newinum != WINO) {
MALLOC(dap, struct diradd *, sizeof(struct diradd),
- M_DIRADD, M_WAITOK);
- bzero(dap, sizeof(struct diradd));
+ M_DIRADD, M_SOFTDEP_FLAGS);
+ bzero(dap,sizeof(struct diradd));
dap->da_list.wk_type = D_DIRADD;
dap->da_state = DIRCHG | ATTACHED | DEPCOMPLETE;
dap->da_offset = offset;
@@ -2393,7 +2623,7 @@ softdep_setup_directory_change(bp, dp, ip, newinum, isrmdir)
/*
* Allocate a new dirrem and ACQUIRE_LOCK.
*/
- dirrem = newdirrem(bp, dp, ip, isrmdir);
+ dirrem = newdirrem(bp, dp, ip, isrmdir, &prevdirrem);
pagedep = dirrem->dm_pagedep;
/*
* The possible values for isrmdir:
@@ -2427,11 +2657,35 @@ softdep_setup_directory_change(bp, dp, ip, newinum, isrmdir)
}
/*
+ * If the COMPLETE flag is clear, then there were no active
+ * entries and we want to roll back to the previous inode until
+ * the new inode is committed to disk. If the COMPLETE flag is
+ * set, then we have deleted an entry that never made it to disk.
+ * If the entry we deleted resulted from a name change, then the old
+ * inode reference still resides on disk. Any rollback that we do
+ * needs to be to that old inode (returned to us in prevdirrem). If
+ * the entry we deleted resulted from a create, then there is
+ * no entry on the disk, so we want to roll back to zero rather
+ * than the uncommitted inode. In either of the COMPLETE cases we
+ * want to immediately free the unwritten and unreferenced inode.
+ */
+ if ((dirrem->dm_state & COMPLETE) == 0) {
+ dap->da_previous = dirrem;
+ } else {
+ if (prevdirrem != NULL) {
+ dap->da_previous = prevdirrem;
+ } else {
+ dap->da_state &= ~DIRCHG;
+ dap->da_pagedep = pagedep;
+ }
+ dirrem->dm_dirinum = pagedep->pd_ino;
+ add_to_worklist(&dirrem->dm_list);
+ }
+ /*
* Link into its inodedep. Put it on the id_bufwait list if the inode
* is not yet written. If it is written, do the post-inode write
* processing to put it on the id_pendinghd list.
*/
- dap->da_previous = dirrem;
if (inodedep_lookup(dp->i_fs, newinum, DEPALLOC, &inodedep) == 0 ||
(inodedep->id_state & ALLCOMPLETE) == ALLCOMPLETE) {
dap->da_state |= COMPLETE;
@@ -2442,35 +2696,26 @@ softdep_setup_directory_change(bp, dp, ip, newinum, isrmdir)
dap, da_pdlist);
WORKLIST_INSERT(&inodedep->id_bufwait, &dap->da_list);
}
- /*
- * If the previous inode was never written or its previous directory
- * entry was never written, then we do not want to roll back to this
- * previous value. Instead we want to roll back to zero and immediately
- * free the unwritten or unreferenced inode.
- */
- if (dirrem->dm_state & COMPLETE) {
- dap->da_state &= ~DIRCHG;
- dap->da_pagedep = pagedep;
- dirrem->dm_dirinum = pagedep->pd_ino;
- add_to_worklist(&dirrem->dm_list);
- }
FREE_LOCK(&lk);
}
/*
- * Called whenever the link count on an inode is increased.
+ * Called whenever the link count on an inode is changed.
* It creates an inode dependency so that the new reference(s)
* to the inode cannot be committed to disk until the updated
* inode has been written.
*/
void
-softdep_increase_linkcnt(ip)
+softdep_change_linkcnt(ip)
struct inode *ip; /* the inode with the increased link count */
{
struct inodedep *inodedep;
ACQUIRE_LOCK(&lk);
(void) inodedep_lookup(ip->i_fs, ip->i_number, DEPALLOC, &inodedep);
+ if (ip->i_ffs_nlink < ip->i_effnlink)
+ panic("softdep_change_linkcnt: bad delta");
+ inodedep->id_nlinkdelta = ip->i_ffs_nlink - ip->i_effnlink;
FREE_LOCK(&lk);
}
@@ -2478,7 +2723,7 @@ softdep_increase_linkcnt(ip)
* This workitem decrements the inode's link count.
* If the link count reaches zero, the file is removed.
*/
-static void
+STATIC void
handle_workitem_remove(dirrem)
struct dirrem *dirrem;
{
@@ -2486,6 +2731,7 @@ handle_workitem_remove(dirrem)
struct inodedep *inodedep;
struct vnode *vp;
struct inode *ip;
+ ino_t oldinum;
int error;
if ((error = VFS_VGET(dirrem->dm_mnt, dirrem->dm_oldinum, &vp)) != 0) {
@@ -2493,15 +2739,21 @@ handle_workitem_remove(dirrem)
return;
}
ip = VTOI(vp);
+ ACQUIRE_LOCK(&lk);
+ if ((inodedep_lookup(ip->i_fs, dirrem->dm_oldinum, 0, &inodedep)) == 0)
+ panic("handle_workitem_remove: lost inodedep");
/*
* Normal file deletion.
*/
if ((dirrem->dm_state & RMDIR) == 0) {
ip->i_ffs_nlink--;
+ ip->i_flag |= IN_CHANGE;
if (ip->i_ffs_nlink < ip->i_effnlink)
panic("handle_workitem_remove: bad file delta");
- ip->i_flag |= IN_CHANGE;
+ inodedep->id_nlinkdelta = ip->i_ffs_nlink - ip->i_effnlink;
+ FREE_LOCK(&lk);
vput(vp);
+ num_dirrem -= 1;
WORKITEM_FREE(dirrem, D_DIRREM);
return;
}
@@ -2513,9 +2765,11 @@ handle_workitem_remove(dirrem)
* the parent decremented to account for the loss of "..".
*/
ip->i_ffs_nlink -= 2;
+ ip->i_flag |= IN_CHANGE;
if (ip->i_ffs_nlink < ip->i_effnlink)
panic("handle_workitem_remove: bad dir delta");
- ip->i_flag |= IN_CHANGE;
+ inodedep->id_nlinkdelta = ip->i_ffs_nlink - ip->i_effnlink;
+ FREE_LOCK(&lk);
if ((error = VOP_TRUNCATE(vp, (off_t)0, 0, p->p_ucred, p)) != 0)
softdep_error("handle_workitem_remove: truncate", error);
/*
@@ -2525,14 +2779,27 @@ handle_workitem_remove(dirrem)
*/
if (dirrem->dm_state & DIRCHG) {
vput(vp);
+ num_dirrem -= 1;
WORKITEM_FREE(dirrem, D_DIRREM);
return;
}
+ /*
+ * If the inodedep does not exist, then the zero'ed inode has
+ * been written to disk. If the allocated inode has never been
+ * written to disk, then the on-disk inode is zero'ed. In either
+ * case we can remove the file immediately.
+ */
ACQUIRE_LOCK(&lk);
- (void) inodedep_lookup(ip->i_fs, dirrem->dm_oldinum, DEPALLOC,
- &inodedep);
dirrem->dm_state = 0;
+ oldinum = dirrem->dm_oldinum;
dirrem->dm_oldinum = dirrem->dm_dirinum;
+ if (inodedep_lookup(ip->i_fs, oldinum, 0, &inodedep) == 0 ||
+ check_inode_unwritten(inodedep)) {
+ FREE_LOCK(&lk);
+ vput(vp);
+ handle_workitem_remove(dirrem);
+ return;
+ }
WORKLIST_INSERT(&inodedep->id_inowait, &dirrem->dm_list);
FREE_LOCK(&lk);
vput(vp);
@@ -2552,33 +2819,39 @@ handle_workitem_remove(dirrem)
* procedure above (softdep_setup_freeblocks) and completed by the
* following procedure.
*/
-static void
+STATIC void
handle_workitem_freefile(freefile)
struct freefile *freefile;
{
+ struct fs *fs;
struct vnode vp;
struct inode tip;
struct inodedep *idp;
- struct vop_vfree_args args;
int error;
+ fs = VFSTOUFS(freefile->fx_mnt)->um_fs;
#ifdef DEBUG
ACQUIRE_LOCK(&lk);
- if (inodedep_lookup(freefile->fx_fs, freefile->fx_oldinum, 0, &idp))
+ if (inodedep_lookup(fs, freefile->fx_oldinum, 0, &idp))
panic("handle_workitem_freefile: inodedep survived");
FREE_LOCK(&lk);
#endif
tip.i_devvp = freefile->fx_devvp;
tip.i_dev = freefile->fx_devvp->v_rdev;
- tip.i_fs = freefile->fx_fs;
+ tip.i_fs = fs;
+ tip.i_vnode = &vp;
vp.v_data = &tip;
- args.a_pvp = &vp;
- args.a_ino = freefile->fx_oldinum;
- args.a_mode = freefile->fx_mode;
- if ((error = ffs_freefile(&args)) != 0)
- softdep_error("handle_workitem_freefile", error);
+ {
+ struct vop_vfree_args vargs;
+
+ vargs.a_pvp = &vp;
+ vargs.a_ino = freefile->fx_oldinum;
+ vargs.a_mode = freefile->fx_mode;
+
+ if ((error = ffs_freefile(&vargs)) != 0)
+ softdep_error("handle_workitem_freefile", error);
+ }
WORKITEM_FREE(freefile, D_FREEFILE);
- num_freefile -= 1;
}
/*
@@ -2641,7 +2914,7 @@ softdep_disk_io_initiation(bp)
* dependency can be freed.
*/
if (LIST_FIRST(&indirdep->ir_deplisthd) == NULL) {
- indirdep->ir_savebp->b_flags |= B_INVAL;
+ indirdep->ir_savebp->b_flags |= B_INVAL | B_NOCACHE;
brelse(indirdep->ir_savebp);
/* inline expand WORKLIST_REMOVE(wk); */
wk->wk_state &= ~ONWORKLIST;
@@ -2652,10 +2925,14 @@ softdep_disk_io_initiation(bp)
/*
* Replace up-to-date version with safe version.
*/
+ MALLOC(indirdep->ir_saveddata, caddr_t, bp->b_bcount,
+ M_INDIRDEP, M_SOFTDEP_FLAGS);
ACQUIRE_LOCK(&lk);
indirdep->ir_state &= ~ATTACHED;
indirdep->ir_state |= UNDONE;
- bp->b_data = indirdep->ir_savebp->b_data;
+ bcopy(bp->b_data, indirdep->ir_saveddata, bp->b_bcount);
+ bcopy(indirdep->ir_savebp->b_data, bp->b_data,
+ bp->b_bcount);
FREE_LOCK(&lk);
continue;
@@ -2679,7 +2956,7 @@ softdep_disk_io_initiation(bp)
* thus, no I/O completion operations can occur while we are
* manipulating its associated dependencies.
*/
-static void
+STATIC void
initiate_write_filepage(pagedep, bp)
struct pagedep *pagedep;
struct buf *bp;
@@ -2700,8 +2977,7 @@ initiate_write_filepage(pagedep, bp)
pagedep->pd_state |= IOSTARTED;
ACQUIRE_LOCK(&lk);
for (i = 0; i < DAHASHSZ; i++) {
- for (dap = LIST_FIRST(&pagedep->pd_diraddhd[i]); dap;
- dap = LIST_NEXT(dap, da_pdlist)) {
+ LIST_FOREACH(dap, &pagedep->pd_diraddhd[i], da_pdlist) {
ep = (struct direct *)
((char *)bp->b_data + dap->da_offset);
if (ep->d_ino != dap->da_newinum)
@@ -2725,7 +3001,7 @@ initiate_write_filepage(pagedep, bp)
* locked, thus, no I/O completion operations can occur while we
* are manipulating its associated dependencies.
*/
-static void
+STATIC void
initiate_write_inodeblock(inodedep, bp)
struct inodedep *inodedep;
struct buf *bp; /* The inode block */
@@ -2750,7 +3026,7 @@ initiate_write_inodeblock(inodedep, bp)
if (inodedep->id_savedino != NULL)
panic("initiate_write_inodeblock: already doing I/O");
MALLOC(inodedep->id_savedino, struct dinode *,
- sizeof(struct dinode), M_INODEDEP, M_WAITOK);
+ sizeof(struct dinode), M_INODEDEP, M_SOFTDEP_FLAGS);
*inodedep->id_savedino = *dp;
bzero((caddr_t)dp, sizeof(struct dinode));
return;
@@ -2942,7 +3218,9 @@ softdep_disk_write_complete(bp)
indirdep = WK_INDIRDEP(wk);
if (indirdep->ir_state & GOINGAWAY)
panic("disk_write_complete: indirdep gone");
- bp->b_data = (caddr_t)indirdep->ir_saveddata;
+ bcopy(indirdep->ir_saveddata, bp->b_data, bp->b_bcount);
+ FREE(indirdep->ir_saveddata, M_INDIRDEP);
+ indirdep->ir_saveddata = 0;
indirdep->ir_state &= ~UNDONE;
indirdep->ir_state |= ATTACHED;
while ((aip = LIST_FIRST(&indirdep->ir_donehd)) != 0) {
@@ -2981,13 +3259,13 @@ softdep_disk_write_complete(bp)
* this routine is always called from interrupt level with further
* splbio interrupts blocked.
*/
-static void
+STATIC void
handle_allocdirect_partdone(adp)
struct allocdirect *adp; /* the completed allocdirect */
{
struct allocdirect *listadp;
struct inodedep *inodedep;
- long bsize;
+ long bsize, delay;
if ((adp->ad_state & ALLCOMPLETE) != ALLCOMPLETE)
return;
@@ -3004,8 +3282,7 @@ handle_allocdirect_partdone(adp)
*/
inodedep = adp->ad_inodedep;
bsize = inodedep->id_fs->fs_bsize;
- for (listadp = TAILQ_FIRST(&inodedep->id_inoupdt); listadp;
- listadp = TAILQ_NEXT(listadp, ad_next)) {
+ TAILQ_FOREACH(listadp, &inodedep->id_inoupdt, ad_next) {
/* found our block */
if (listadp == adp)
break;
@@ -3024,8 +3301,7 @@ handle_allocdirect_partdone(adp)
*/
if (listadp == NULL) {
#ifdef DEBUG
- for (listadp = TAILQ_FIRST(&inodedep->id_newinoupdt); listadp;
- listadp = TAILQ_NEXT(listadp, ad_next))
+ TAILQ_FOREACH(listadp, &inodedep->id_newinoupdt, ad_next)
/* found our block */
if (listadp == adp)
break;
@@ -3037,12 +3313,16 @@ handle_allocdirect_partdone(adp)
/*
* If we have found the just finished dependency, then free
* it along with anything that follows it that is complete.
+ * If the inode still has a bitmap dependency, then it has
+ * never been written to disk, hence the on-disk inode cannot
+ * reference the old fragment so we can free it without delay.
*/
+ delay = (inodedep->id_state & DEPCOMPLETE);
for (; adp; adp = listadp) {
listadp = TAILQ_NEXT(adp, ad_next);
if ((adp->ad_state & ALLCOMPLETE) != ALLCOMPLETE)
return;
- free_allocdirect(&inodedep->id_inoupdt, adp, 1);
+ free_allocdirect(&inodedep->id_inoupdt, adp, delay);
}
}
@@ -3051,7 +3331,7 @@ handle_allocdirect_partdone(adp)
* this routine is always called from interrupt level with further
* splbio interrupts blocked.
*/
-static void
+STATIC void
handle_allocindir_partdone(aip)
struct allocindir *aip; /* the completed allocindir */
{
@@ -3081,7 +3361,7 @@ handle_allocindir_partdone(aip)
* that this routine is always called from interrupt level with further
* splbio interrupts blocked.
*/
-static int
+STATIC int
handle_written_inodeblock(inodedep, bp)
struct inodedep *inodedep;
struct buf *bp; /* buffer containing the inode block */
@@ -3228,7 +3508,7 @@ handle_written_inodeblock(inodedep, bp)
* Process a diradd entry after its dependent inode has been written.
* This routine must be called with splbio interrupts blocked.
*/
-static void
+STATIC void
diradd_inode_written(dap, inodedep)
struct diradd *dap;
struct inodedep *inodedep;
@@ -3250,7 +3530,7 @@ diradd_inode_written(dap, inodedep)
/*
* Handle the completion of a mkdir dependency.
*/
-static void
+STATIC void
handle_written_mkdir(mkdir, type)
struct mkdir *mkdir;
int type;
@@ -3283,7 +3563,7 @@ handle_written_mkdir(mkdir, type)
* Note that this routine is always called from interrupt level
* with further splbio interrupts blocked.
*/
-static int
+STATIC int
handle_written_filepage(pagedep, bp)
struct pagedep *pagedep;
struct buf *bp; /* buffer containing the written page */
@@ -3396,12 +3676,7 @@ softdep_load_inodeblock(ip)
FREE_LOCK(&lk);
return;
}
- if (inodedep->id_nlinkdelta != 0) {
- ip->i_effnlink -= inodedep->id_nlinkdelta;
- ip->i_flag |= IN_MODIFIED;
- inodedep->id_nlinkdelta = 0;
- (void) free_inodedep(inodedep);
- }
+ ip->i_effnlink -= inodedep->id_nlinkdelta;
FREE_LOCK(&lk);
}
@@ -3433,16 +3708,14 @@ softdep_update_inodeblock(ip, bp, waitfor)
* to track.
*/
ACQUIRE_LOCK(&lk);
- if (ip->i_effnlink != ip->i_ffs_nlink) {
- (void) inodedep_lookup(ip->i_fs, ip->i_number, DEPALLOC,
- &inodedep);
- } else if (inodedep_lookup(ip->i_fs, ip->i_number, 0, &inodedep) == 0) {
+ if (inodedep_lookup(ip->i_fs, ip->i_number, 0, &inodedep) == 0) {
+ if (ip->i_effnlink != ip->i_ffs_nlink)
+ panic("softdep_update_inodeblock: bad link count");
FREE_LOCK(&lk);
return;
}
- if (ip->i_ffs_nlink < ip->i_effnlink)
+ if (inodedep->id_nlinkdelta != ip->i_ffs_nlink - ip->i_effnlink)
panic("softdep_update_inodeblock: bad delta");
- inodedep->id_nlinkdelta = ip->i_ffs_nlink - ip->i_effnlink;
/*
* Changes have been initiated. Anything depending on these
* changes cannot occur until this inode has been written.
@@ -3482,7 +3755,8 @@ softdep_update_inodeblock(ip, bp, waitfor)
}
gotit = getdirtybuf(&inodedep->id_buf, MNT_WAIT);
FREE_LOCK(&lk);
- if (gotit && (error = VOP_BWRITE(inodedep->id_buf)) != 0)
+ if (gotit &&
+ (error = bwrite(inodedep->id_buf)) != 0)
softdep_error("softdep_update_inodeblock: bwrite", error);
if ((inodedep->id_state & DEPCOMPLETE) == 0)
panic("softdep_update_inodeblock: update failed");
@@ -3493,7 +3767,7 @@ softdep_update_inodeblock(ip, bp, waitfor)
* inode dependency list (id_inoupdt). This routine must be called
* with splbio interrupts blocked.
*/
-static void
+STATIC void
merge_inode_lists(inodedep)
struct inodedep *inodedep;
{
@@ -3528,32 +3802,34 @@ int
softdep_fsync(vp)
struct vnode *vp; /* the "in_core" copy of the inode */
{
- struct diradd *dap, *olddap;
struct inodedep *inodedep;
struct pagedep *pagedep;
struct worklist *wk;
+ struct diradd *dap;
struct mount *mnt;
struct vnode *pvp;
struct inode *ip;
struct buf *bp;
struct fs *fs;
struct proc *p = CURPROC; /* XXX */
- int error, ret, flushparent;
- struct timespec ts;
+ int error, flushparent;
ino_t parentino;
ufs_lbn_t lbn;
+ struct timespec ts;
ip = VTOI(vp);
fs = ip->i_fs;
- for (error = 0, flushparent = 0, olddap = NULL; ; ) {
- ACQUIRE_LOCK(&lk);
- if (inodedep_lookup(fs, ip->i_number, 0, &inodedep) == 0)
- break;
- if (LIST_FIRST(&inodedep->id_inowait) != NULL ||
- LIST_FIRST(&inodedep->id_bufwait) != NULL ||
- TAILQ_FIRST(&inodedep->id_inoupdt) != NULL ||
- TAILQ_FIRST(&inodedep->id_newinoupdt) != NULL)
- panic("softdep_fsync: pending ops");
+ ACQUIRE_LOCK(&lk);
+ if (inodedep_lookup(fs, ip->i_number, 0, &inodedep) == 0) {
+ FREE_LOCK(&lk);
+ return (0);
+ }
+ if (LIST_FIRST(&inodedep->id_inowait) != NULL ||
+ LIST_FIRST(&inodedep->id_bufwait) != NULL ||
+ TAILQ_FIRST(&inodedep->id_inoupdt) != NULL ||
+ TAILQ_FIRST(&inodedep->id_newinoupdt) != NULL)
+ panic("softdep_fsync: pending ops");
+ for (error = 0, flushparent = 0; ; ) {
if ((wk = LIST_FIRST(&inodedep->id_pendinghd)) == NULL)
break;
if (wk->wk_type != D_DIRADD)
@@ -3561,13 +3837,6 @@ softdep_fsync(vp)
TYPENAME(wk->wk_type));
dap = WK_DIRADD(wk);
/*
- * If we have failed to get rid of all the dependencies
- * then something is seriously wrong.
- */
- if (dap == olddap)
- panic("softdep_fsync: flush failed");
- olddap = dap;
- /*
* Flush our parent if this directory entry
* has a MKDIR_PARENT dependency.
*/
@@ -3600,11 +3869,10 @@ softdep_fsync(vp)
*/
FREE_LOCK(&lk);
VOP_UNLOCK(vp, 0, p);
- if ((error = VFS_VGET(mnt, parentino, &pvp)) != 0) {
- vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
- return (error);
- }
+ error = VFS_VGET(mnt, parentino, &pvp);
vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
+ if (error != 0)
+ return (error);
if (flushparent) {
TIMEVAL_TO_TIMESPEC(&time, &ts);
if ((error = VOP_UPDATE(pvp, &ts, &ts, MNT_WAIT))) {
@@ -3617,12 +3885,14 @@ softdep_fsync(vp)
*/
error = bread(pvp, lbn, blksize(fs, VTOI(pvp), lbn), p->p_ucred,
&bp);
- ret = VOP_BWRITE(bp);
+ if (error == 0)
+ error = bwrite(bp);
vput(pvp);
if (error != 0)
return (error);
- if (ret != 0)
- return (ret);
+ ACQUIRE_LOCK(&lk);
+ if (inodedep_lookup(fs, ip->i_number, 0, &inodedep) == 0)
+ break;
}
FREE_LOCK(&lk);
return (0);
@@ -3640,8 +3910,8 @@ softdep_fsync_mountdev(vp)
struct buf *bp, *nbp;
struct worklist *wk;
- if (vp->v_type != VBLK)
- panic("softdep_fsync_mountdev: vnode not VBLK");
+ if (!vn_isdisk(vp, NULL))
+ panic("softdep_fsync_mountdev: vnode not a disk");
ACQUIRE_LOCK(&lk);
for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
nbp = LIST_NEXT(bp, b_vnbufs);
@@ -3650,6 +3920,8 @@ softdep_fsync_mountdev(vp)
*/
if (bp->b_flags & B_BUSY)
continue;
+ bp->b_flags |= B_BUSY;
+
if ((bp->b_flags & B_DELWRI) == 0)
panic("softdep_fsync_mountdev: not dirty");
/*
@@ -3657,10 +3929,11 @@ softdep_fsync_mountdev(vp)
* dependencies.
*/
if ((wk = LIST_FIRST(&bp->b_dep)) == NULL ||
- wk->wk_type != D_BMSAFEMAP)
+ wk->wk_type != D_BMSAFEMAP) {
+ bp->b_flags &= ~B_BUSY;
continue;
+ }
bremfree(bp);
- bp->b_flags |= B_BUSY;
FREE_LOCK(&lk);
(void) bawrite(bp);
ACQUIRE_LOCK(&lk);
@@ -3701,7 +3974,7 @@ softdep_sync_metadata(ap)
* Check whether this vnode is involved in a filesystem
* that is doing soft dependency processing.
*/
- if (vp->v_type != VBLK) {
+ if (!vn_isdisk(vp, NULL)) {
if (!DOINGSOFTDEP(vp))
return (0);
} else
@@ -3745,8 +4018,7 @@ loop:
* As we hold the buffer locked, none of its dependencies
* will disappear.
*/
- for (wk = LIST_FIRST(&bp->b_dep); wk;
- wk = LIST_NEXT(wk, wk_list)) {
+ LIST_FOREACH(wk, &bp->b_dep, wk_list) {
switch (wk->wk_type) {
case D_ALLOCDIRECT:
@@ -3785,8 +4057,8 @@ loop:
case D_INDIRDEP:
restart:
- for (aip = LIST_FIRST(&WK_INDIRDEP(wk)->ir_deplisthd);
- aip; aip = LIST_NEXT(aip, ai_next)) {
+
+ LIST_FOREACH(aip, &WK_INDIRDEP(wk)->ir_deplisthd, ai_next) {
if (aip->ai_state & DEPCOMPLETE)
continue;
nbp = aip->ai_buf;
@@ -3926,7 +4198,8 @@ loop:
* way to accomplish this is to sync the entire filesystem (luckily
* this happens rarely).
*/
- if (vp->v_type == VBLK && vp->v_specmountpoint && !VOP_ISLOCKED(vp) &&
+ if (vn_isdisk(vp, NULL) &&
+ vp->v_specmountpoint && !VOP_ISLOCKED(vp) &&
(error = VFS_SYNC(vp->v_specmountpoint, MNT_WAIT, ap->a_cred,
ap->a_p)) != 0)
return (error);
@@ -3937,7 +4210,7 @@ loop:
* Flush the dependencies associated with an inodedep.
* Called with splbio blocked.
*/
-static int
+STATIC int
flush_inodedep_deps(fs, ino)
struct fs *fs;
ino_t ino;
@@ -3965,8 +4238,7 @@ flush_inodedep_deps(fs, ino)
ACQUIRE_LOCK(&lk);
if (inodedep_lookup(fs, ino, 0, &inodedep) == 0)
return (0);
- for (adp = TAILQ_FIRST(&inodedep->id_inoupdt); adp;
- adp = TAILQ_NEXT(adp, ad_next)) {
+ TAILQ_FOREACH(adp, &inodedep->id_inoupdt, ad_next) {
if (adp->ad_state & DEPCOMPLETE)
continue;
bp = adp->ad_buf;
@@ -3987,8 +4259,7 @@ flush_inodedep_deps(fs, ino)
}
if (adp != NULL)
continue;
- for (adp = TAILQ_FIRST(&inodedep->id_newinoupdt); adp;
- adp = TAILQ_NEXT(adp, ad_next)) {
+ TAILQ_FOREACH(adp, &inodedep->id_newinoupdt, ad_next) {
if (adp->ad_state & DEPCOMPLETE)
continue;
bp = adp->ad_buf;
@@ -4028,7 +4299,7 @@ flush_inodedep_deps(fs, ino)
* Eliminate a pagedep dependency by flushing out all its diradd dependencies.
* Called with splbio blocked.
*/
-static int
+STATIC int
flush_pagedep_deps(pvp, mp, diraddhdp)
struct vnode *pvp;
struct mount *mp;
@@ -4062,84 +4333,85 @@ flush_pagedep_deps(pvp, mp, diraddhdp)
if (dap != LIST_FIRST(diraddhdp))
continue;
if (dap->da_state & MKDIR_PARENT)
- panic("flush_pagedep_deps: MKDIR");
+ panic("flush_pagedep_deps: MKDIR_PARENT");
}
/*
- * Flush the file on which the directory entry depends.
- * If the inode has already been pushed out of the cache,
- * then all the block dependencies will have been flushed
- * leaving only inode dependencies (e.g., bitmaps). Thus,
- * we do a ufs_ihashget to check for the vnode in the cache.
- * If it is there, we do a full flush. If it is no longer
- * there we need only dispose of any remaining bitmap
- * dependencies and write the inode to disk.
+ * A newly allocated directory must have its "." and
+ * ".." entries written out before its name can be
+ * committed in its parent. We do not want or need
+ * the full semantics of a synchronous VOP_FSYNC as
+ * that may end up here again, once for each directory
+ * level in the filesystem. Instead, we push the blocks
+ * and wait for them to clear. We have to fsync twice
+ * because the first call may choose to defer blocks
+ * that still have dependencies, but deferral will
+ * happen at most once.
*/
inum = dap->da_newinum;
- FREE_LOCK(&lk);
- if ((vp = ufs_ihashget(ump->um_dev, inum)) == NULL) {
+ if (dap->da_state & MKDIR_BODY) {
+ FREE_LOCK(&lk);
+ if ((error = VFS_VGET(mp, inum, &vp)) != 0)
+ break;
+ if ((error=VOP_FSYNC(vp, p->p_ucred, MNT_NOWAIT, p)) ||
+ (error=VOP_FSYNC(vp, p->p_ucred, MNT_NOWAIT, p))) {
+ vput(vp);
+ break;
+ }
+ drain_output(vp, 0);
+ vput(vp);
ACQUIRE_LOCK(&lk);
- if (inodedep_lookup(ump->um_fs, inum, 0, &inodedep) == 0
- && dap == LIST_FIRST(diraddhdp))
- panic("flush_pagedep_deps: flush 1 failed");
/*
- * If the inode still has bitmap dependencies,
- * push them to disk.
+ * If that cleared dependencies, go on to next.
*/
- if ((inodedep->id_state & DEPCOMPLETE) == 0) {
- gotit = getdirtybuf(&inodedep->id_buf,MNT_WAIT);
- FREE_LOCK(&lk);
- if (gotit &&
- (error = VOP_BWRITE(inodedep->id_buf)) != 0)
- break;
- ACQUIRE_LOCK(&lk);
- }
if (dap != LIST_FIRST(diraddhdp))
continue;
- /*
- * If the inode is still sitting in a buffer waiting
- * to be written, push it to disk.
- */
+ if (dap->da_state & MKDIR_BODY)
+ panic("flush_pagedep_deps: MKDIR_BODY");
+ }
+ /*
+ * Flush the inode on which the directory entry depends.
+ * Having accounted for MKDIR_PARENT and MKDIR_BODY above,
+ * the only remaining dependency is that the updated inode
+ * count must get pushed to disk. The inode has already
+ * been pushed into its inode buffer (via VOP_UPDATE) at
+ * the time of the reference count change. So we need only
+ * locate that buffer, ensure that there will be no rollback
+ * caused by a bitmap dependency, then write the inode buffer.
+ */
+ if (inodedep_lookup(ump->um_fs, inum, 0, &inodedep) == 0)
+ panic("flush_pagedep_deps: lost inode");
+ /*
+ * If the inode still has bitmap dependencies,
+ * push them to disk.
+ */
+ if ((inodedep->id_state & DEPCOMPLETE) == 0) {
+ gotit = getdirtybuf(&inodedep->id_buf, MNT_WAIT);
FREE_LOCK(&lk);
- if ((error = bread(ump->um_devvp,
- fsbtodb(ump->um_fs, ino_to_fsba(ump->um_fs, inum)),
- (int)ump->um_fs->fs_bsize, NOCRED, &bp)) != 0)
- break;
- if ((error = VOP_BWRITE(bp)) != 0)
+ if (gotit &&
+ (error = bwrite(inodedep->id_buf)) != 0)
break;
ACQUIRE_LOCK(&lk);
- if (dap == LIST_FIRST(diraddhdp))
- panic("flush_pagedep_deps: flush 2 failed");
- continue;
- }
- if (vp->v_type == VDIR) {
- /*
- * A newly allocated directory must have its "." and
- * ".." entries written out before its name can be
- * committed in its parent. We do not want or need
- * the full semantics of a synchronous VOP_FSYNC as
- * that may end up here again, once for each directory
- * level in the filesystem. Instead, we push the blocks
- * and wait for them to clear.
- */
- if ((error =
- VOP_FSYNC(vp, p->p_ucred, MNT_NOWAIT, p))) {
- vput(vp);
- break;
- }
- drain_output(vp, 0);
+ if (dap != LIST_FIRST(diraddhdp))
+ continue;
}
- TIMEVAL_TO_TIMESPEC(&time, &ts);
- error = VOP_UPDATE(vp, &ts, &ts, MNT_WAIT);
- vput(vp);
- if (error)
+ /*
+ * If the inode is still sitting in a buffer waiting
+ * to be written, push it to disk.
+ */
+ FREE_LOCK(&lk);
+ if ((error = bread(ump->um_devvp,
+ fsbtodb(ump->um_fs, ino_to_fsba(ump->um_fs, inum)),
+ (int)ump->um_fs->fs_bsize, NOCRED, &bp)) != 0)
+ break;
+ if ((error = bwrite(bp)) != 0)
break;
+ ACQUIRE_LOCK(&lk);
/*
* If we have failed to get rid of all the dependencies
* then something is seriously wrong.
*/
if (dap == LIST_FIRST(diraddhdp))
- panic("flush_pagedep_deps: flush 3 failed");
- ACQUIRE_LOCK(&lk);
+ panic("flush_pagedep_deps: flush failed");
}
if (error)
ACQUIRE_LOCK(&lk);
@@ -4148,17 +4420,35 @@ flush_pagedep_deps(pvp, mp, diraddhdp)
/*
* A large burst of file addition or deletion activity can drive the
- * memory load excessively high. Therefore we deliberately slow things
- * down and speed up the I/O processing if we find ourselves with too
- * many dependencies in progress.
+ * memory load excessively high. First attempt to slow things down
+ * using the techniques below. If that fails, this routine requests
+ * the offending operations to fall back to running synchronously
+ * until the memory load returns to a reasonable level.
*/
-static int
+int
+softdep_slowdown(vp)
+ struct vnode *vp;
+{
+ int max_softdeps_hard;
+
+ max_softdeps_hard = max_softdeps * 11 / 10;
+ if (num_dirrem < max_softdeps_hard / 2 &&
+ num_inodedep < max_softdeps_hard)
+ return (0);
+ stat_sync_limit_hit += 1;
+ return (1);
+}
+
+/*
+ * If memory utilization has gotten too high, deliberately slow things
+ * down and speed up the I/O processing.
+ */
+STATIC int
request_cleanup(resource, islocked)
int resource;
int islocked;
{
struct proc *p = CURPROC;
- int error;
/*
* We never hold up the filesystem syncer process.
@@ -4166,6 +4456,29 @@ request_cleanup(resource, islocked)
if (p == filesys_syncer)
return (0);
/*
+ * First check to see if the work list has gotten backlogged.
+ * If it has, co-opt this process to help clean up two entries.
+ * Because this process may hold inodes locked, we cannot
+ * handle any remove requests that might block on a locked
+ * inode as that could lead to deadlock.
+ */
+ if (num_on_worklist > max_softdeps / 10) {
+ if (islocked)
+ FREE_LOCK(&lk);
+ process_worklist_item(NULL, LK_NOWAIT);
+ process_worklist_item(NULL, LK_NOWAIT);
+ stat_worklist_push += 2;
+ if (islocked)
+ ACQUIRE_LOCK(&lk);
+ return(1);
+ }
+ /*
+ * Next, we attempt to speed up the syncer process. If that
+ * is successful, then we allow the process to continue.
+ */
+ if (speedup_syncer())
+ return(0);
+ /*
* If we are resource constrained on inode dependencies, try
* flushing some dirty inodes. Otherwise, we are constrained
* by file deletions, so try accelerating flushes of directories
@@ -4179,12 +4492,14 @@ request_cleanup(resource, islocked)
case FLUSH_INODES:
stat_ino_limit_push += 1;
- req_clear_inodedeps = 1;
+ req_clear_inodedeps += 1;
+ stat_countp = &stat_ino_limit_hit;
break;
case FLUSH_REMOVE:
stat_blk_limit_push += 1;
- req_clear_remove = 1;
+ req_clear_remove += 1;
+ stat_countp = &stat_blk_limit_hit;
break;
default:
@@ -4196,33 +4511,43 @@ request_cleanup(resource, islocked)
*/
if (islocked == 0)
ACQUIRE_LOCK(&lk);
+ proc_waiting += 1;
+ if (!timeout_initialized(&proc_waiting_timeout)) {
+ timeout_set(&proc_waiting_timeout, pause_timer, 0);
+ timeout_add(&proc_waiting_timeout, tickdelay > 2 ? tickdelay : 2);
+ }
FREE_LOCK_INTERLOCKED(&lk);
- error = tsleep((caddr_t)&proc_waiting, PPAUSE | PCATCH, "softupdate",
- tickdelay > 2 ? tickdelay : 2);
+ (void) tsleep((caddr_t)&proc_waiting, PPAUSE, "softupdate", 0);
ACQUIRE_LOCK_INTERLOCKED(&lk);
- if (error == EWOULDBLOCK) {
- switch (resource) {
-
- case FLUSH_INODES:
- stat_ino_limit_hit += 1;
- break;
-
- case FLUSH_REMOVE:
- stat_blk_limit_hit += 1;
- break;
- }
- }
+ proc_waiting -= 1;
if (islocked == 0)
FREE_LOCK(&lk);
return (1);
}
/*
- * Flush out a directory with at least one removal dependency in an effort
- * to reduce the number of freefile and freeblks dependency structures.
+ * Awaken processes pausing in request_cleanup and clear proc_waiting
+ * to indicate that there is no longer a timer running.
*/
-static void
+void
+pause_timer(arg)
+ void *arg;
+{
+
+ *stat_countp += 1;
+ wakeup_one(&proc_waiting);
+ if (proc_waiting > 0)
+ timeout_add(&proc_waiting_timeout, tickdelay > 2 ? tickdelay : 2);
+ else
+ timeout_del(&proc_waiting_timeout);
+}
+
+/*
+ * Flush out a directory with at least one removal dependency in an effort to
+ * reduce the number of dirrem, freefile, and freeblks dependency structures.
+ */
+STATIC void
clear_remove(p)
struct proc *p;
{
@@ -4239,21 +4564,30 @@ clear_remove(p)
pagedephd = &pagedep_hashtbl[next++];
if (next >= pagedep_hash)
next = 0;
- for (pagedep = LIST_FIRST(pagedephd); pagedep;
- pagedep = LIST_NEXT(pagedep, pd_hash)) {
+ LIST_FOREACH(pagedep, pagedephd, pd_hash) {
if (LIST_FIRST(&pagedep->pd_dirremhd) == NULL)
continue;
mp = pagedep->pd_mnt;
ino = pagedep->pd_ino;
FREE_LOCK(&lk);
+#if 0
+ if (vn_start_write(NULL, &mp, V_NOWAIT) != 0)
+ continue;
+#endif
if ((error = VFS_VGET(mp, ino, &vp)) != 0) {
softdep_error("clear_remove: vget", error);
+#if 0
+ vn_finished_write(mp);
+#endif
return;
}
if ((error = VOP_FSYNC(vp, p->p_ucred, MNT_NOWAIT, p)))
softdep_error("clear_remove: fsync", error);
drain_output(vp, 0);
vput(vp);
+#if 0
+ vn_finished_write(mp);
+#endif
return;
}
}
@@ -4264,7 +4598,7 @@ clear_remove(p)
* Clear out a block of dirty inodes in an effort to reduce
* the number of inodedep dependency structures.
*/
-static void
+STATIC void
clear_inodedeps(p)
struct proc *p;
{
@@ -4294,8 +4628,7 @@ clear_inodedeps(p)
* Ugly code to find mount point given pointer to superblock.
*/
fs = inodedep->id_fs;
- for (mp = CIRCLEQ_FIRST(&mountlist); mp != (void *)&mountlist;
- mp = CIRCLEQ_NEXT(mp, mnt_list))
+ CIRCLEQ_FOREACH(mp, &mountlist, mnt_list)
if ((mp->mnt_flag & MNT_SOFTDEP) && fs == VFSTOUFS(mp)->um_fs)
break;
/*
@@ -4314,8 +4647,15 @@ clear_inodedeps(p)
if (inodedep_lookup(fs, ino, 0, &inodedep) == 0)
continue;
FREE_LOCK(&lk);
+#if 0
+ if (vn_start_write(NULL, &mp, V_NOWAIT) != 0)
+ continue;
+#endif
if ((error = VFS_VGET(mp, ino, &vp)) != 0) {
softdep_error("clear_inodedeps: vget", error);
+#if 0
+ vn_finished_write(mp);
+#endif
return;
}
if (ino == lastino) {
@@ -4327,17 +4667,101 @@ clear_inodedeps(p)
drain_output(vp, 0);
}
vput(vp);
+#if 0
+ vn_finished_write(mp);
+#endif
ACQUIRE_LOCK(&lk);
}
FREE_LOCK(&lk);
}
/*
+ * Function to determine if the buffer has outstanding dependencies
+ * that will cause a roll-back if the buffer is written. If wantcount
+ * is set, return number of dependencies, otherwise just yes or no.
+ */
+int
+softdep_count_dependencies(bp, wantcount)
+ struct buf *bp;
+ int wantcount;
+{
+ struct worklist *wk;
+ struct inodedep *inodedep;
+ struct indirdep *indirdep;
+ struct allocindir *aip;
+ struct pagedep *pagedep;
+ struct diradd *dap;
+ int i, retval;
+
+ retval = 0;
+ ACQUIRE_LOCK(&lk);
+ LIST_FOREACH(wk, &bp->b_dep, wk_list) {
+ switch (wk->wk_type) {
+
+ case D_INODEDEP:
+ inodedep = WK_INODEDEP(wk);
+ if ((inodedep->id_state & DEPCOMPLETE) == 0) {
+ /* bitmap allocation dependency */
+ retval += 1;
+ if (!wantcount)
+ goto out;
+ }
+ if (TAILQ_FIRST(&inodedep->id_inoupdt)) {
+ /* direct block pointer dependency */
+ retval += 1;
+ if (!wantcount)
+ goto out;
+ }
+ continue;
+
+ case D_INDIRDEP:
+ indirdep = WK_INDIRDEP(wk);
+
+ LIST_FOREACH(aip, &indirdep->ir_deplisthd, ai_next) {
+ /* indirect block pointer dependency */
+ retval += 1;
+ if (!wantcount)
+ goto out;
+ }
+ continue;
+
+ case D_PAGEDEP:
+ pagedep = WK_PAGEDEP(wk);
+ for (i = 0; i < DAHASHSZ; i++) {
+
+ LIST_FOREACH(dap, &pagedep->pd_diraddhd[i], da_pdlist) {
+ /* directory entry dependency */
+ retval += 1;
+ if (!wantcount)
+ goto out;
+ }
+ }
+ continue;
+
+ case D_BMSAFEMAP:
+ case D_ALLOCDIRECT:
+ case D_ALLOCINDIR:
+ case D_MKDIR:
+ /* never a dependency on these blocks */
+ continue;
+
+ default:
+ panic("softdep_check_for_rollback: Unexpected type %s",
+ TYPENAME(wk->wk_type));
+ /* NOTREACHED */
+ }
+ }
+out:
+ FREE_LOCK(&lk);
+ return retval;
+}
+
+/*
* Acquire exclusive access to a buffer.
* Must be called with splbio blocked.
* Return 1 if buffer was acquired.
*/
-static int
+STATIC int
getdirtybuf(bpp, waitfor)
struct buf **bpp;
int waitfor;
@@ -4367,7 +4791,7 @@ getdirtybuf(bpp, waitfor)
* Wait for pending output on a vnode to complete.
* Must be called with vnode locked.
*/
-static void
+STATIC void
drain_output(vp, islocked)
struct vnode *vp;
int islocked;
diff --git a/sys/ufs/ffs/ffs_softdep_stub.c b/sys/ufs/ffs/ffs_softdep_stub.c
index bd06b5fbdd2..2eabe90e9b3 100644
--- a/sys/ufs/ffs/ffs_softdep_stub.c
+++ b/sys/ufs/ffs/ffs_softdep_stub.c
@@ -1,10 +1,12 @@
-/* $OpenBSD: ffs_softdep_stub.c,v 1.2 1999/12/05 08:30:38 art Exp $ */
+/* $OpenBSD: ffs_softdep_stub.c,v 1.3 2001/02/21 23:24:31 csapuntz Exp $ */
/*
- * Copyright 1997 Marshall Kirk McKusick. All Rights Reserved.
+ * Copyright 1998 Marshall Kirk McKusick. All Rights Reserved.
*
- * This code is derived from work done by Greg Ganger and Yale Patt at the
- * University of Michigan.
+ * The soft updates code is derived from the appendix of a University
+ * of Michigan technical report (Gregory R. Ganger and Yale N. Patt,
+ * "Soft Updates: A Solution to the Metadata Update Problem in File
+ * Systems", CSE-TR-254-95, August 1995).
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -14,9 +16,9 @@
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
- * 3. None of the names of McKusick, Ganger, Patt, or the University of
- * Michigan may be used to endorse or promote products derived from
- * this software without specific prior written permission.
+ * 3. None of the names of McKusick, Ganger, or the University of Michigan
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY MARSHALL KIRK MCKUSICK ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
@@ -30,7 +32,8 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
- * @(#)ffs_softdep.stub.c 9.1 (McKusick) 7/9/97
+ * from: @(#)ffs_softdep_stub.c 9.1 (McKusick) 7/10/97
+ * $FreeBSD: src/sys/ufs/ffs/ffs_softdep_stub.c,v 1.14 2000/08/09 00:41:54 tegge Exp $
*/
#ifndef FFS_SOFTUPDATES
@@ -141,12 +144,10 @@ softdep_setup_freeblocks(ip, length)
}
void
-softdep_freefile(ap)
- struct vop_vfree_args /* {
- struct vnode *a_pvp;
- ino_t a_ino;
- int a_mode;
- } */ *ap;
+softdep_freefile(pvp, ino, mode)
+ struct vnode *pvp;
+ ino_t ino;
+ int mode;
{
panic("softdep_freefile called");
@@ -200,11 +201,11 @@ softdep_setup_directory_change(bp, dp, ip, newinum, isrmdir)
}
void
-softdep_increase_linkcnt(ip)
+softdep_change_linkcnt(ip)
struct inode *ip;
{
- panic("softdep_increase_linkcnt called");
+ panic("softdep_change_linkcnt called");
}
void
@@ -225,13 +226,23 @@ softdep_update_inodeblock(ip, bp, waitfor)
panic("softdep_update_inodeblock called");
}
-int
-softdep_fsync(vp)
+void
+softdep_fsync_mountdev(vp)
struct vnode *vp;
{
- panic("softdep_fsync called");
- return (EIO);
+ return;
+}
+
+int
+softdep_flushworklist(oldmnt, countp, p)
+ struct mount *oldmnt;
+ int *countp;
+ struct proc *p;
+{
+
+ *countp = 0;
+ return (0);
}
int
@@ -247,11 +258,11 @@ softdep_sync_metadata(ap)
return (0);
}
-void
-softdep_fsync_mountdev(vp)
+int
+softdep_slowdown(vp)
struct vnode *vp;
{
- panic("softdep_fsync_mountdev called");
+ panic("softdep_slowdown called");
}
diff --git a/sys/ufs/ffs/ffs_vfsops.c b/sys/ufs/ffs/ffs_vfsops.c
index 2db4c26a878..e50422b6862 100644
--- a/sys/ufs/ffs/ffs_vfsops.c
+++ b/sys/ufs/ffs/ffs_vfsops.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: ffs_vfsops.c,v 1.29 2001/02/20 01:50:12 assar Exp $ */
+/* $OpenBSD: ffs_vfsops.c,v 1.30 2001/02/21 23:24:31 csapuntz Exp $ */
/* $NetBSD: ffs_vfsops.c,v 1.19 1996/02/09 22:22:26 christos Exp $ */
/*
@@ -868,7 +868,7 @@ ffs_sync(mp, waitfor, cred, p)
register struct inode *ip;
register struct ufsmount *ump = VFSTOUFS(mp);
register struct fs *fs;
- int error, allerror = 0;
+ int error, allerror = 0, count;
fs = ump->um_fs;
/*
@@ -923,6 +923,13 @@ loop:
/*
* Force stale file system control information to be flushed.
*/
+ if ((ump->um_mountp->mnt_flag & MNT_SOFTDEP) && waitfor == MNT_WAIT) {
+ if ((error == softdep_flushworklist(ump->um_mountp, &count, p))
+ != 0)
+ allerror = error;
+ if (count)
+ goto loop;
+ }
if (waitfor != MNT_LAZY) {
if (ump->um_mountp->mnt_flag & MNT_SOFTDEP)
waitfor = MNT_NOWAIT;
diff --git a/sys/ufs/ffs/softdep.h b/sys/ufs/ffs/softdep.h
index 6aec5127608..9b568184916 100644
--- a/sys/ufs/ffs/softdep.h
+++ b/sys/ufs/ffs/softdep.h
@@ -1,21 +1,17 @@
-/* $OpenBSD: softdep.h,v 1.3 2001/02/10 11:08:40 fgsch Exp $ */
+/* $OpenBSD: softdep.h,v 1.4 2001/02/21 23:24:31 csapuntz Exp $ */
/*
- * Copyright 1998 Marshall Kirk McKusick. All Rights Reserved.
+ * Copyright 1998, 2000 Marshall Kirk McKusick. All Rights Reserved.
*
* The soft updates code is derived from the appendix of a University
* of Michigan technical report (Gregory R. Ganger and Yale N. Patt,
* "Soft Updates: A Solution to the Metadata Update Problem in File
* Systems", CSE-TR-254-95, August 1995).
*
- * The following are the copyrights and redistribution conditions that
- * apply to this copy of the soft update software. For a license
- * to use, redistribute or sell the soft update software under
- * conditions other than those described here, please contact the
- * author at one of the following addresses:
+ * Further information about soft updates can be obtained from:
*
- * Marshall Kirk McKusick mckusick@mckusick.com
- * 1614 Oxford Street +1-510-843-9542
- * Berkeley, CA 94709-1608
+ * Marshall Kirk McKusick http://www.mckusick.com/softdep/
+ * 1614 Oxford Street mckusick@mckusick.com
+ * Berkeley, CA 94709-1608 +1-510-843-9542
* USA
*
* Redistribution and use in source and binary forms, with or without
@@ -27,19 +23,6 @@
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
- * 3. None of the names of McKusick, Ganger, Patt, or the University of
- * Michigan may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- * 4. Redistributions in any form must be accompanied by information on
- * how to obtain complete source code for any accompanying software
- * that uses this software. This source code must either be included
- * in the distribution or be available for no more than the cost of
- * distribution plus a nominal fee, and must be freely redistributable
- * under reasonable conditions. For an executable file, complete
- * source code means the source code for all modules it contains.
- * It does not mean source code for modules or files that typically
- * accompany the operating system on which the executable file runs,
- * e.g., standard library modules or system header files.
*
* THIS SOFTWARE IS PROVIDED BY MARSHALL KIRK MCKUSICK ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
@@ -53,7 +36,8 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
- * @(#)softdep.h 9.6 (McKusick) 2/25/99
+ * @(#)softdep.h 9.7 (McKusick) 6/21/00
+ * $FreeBSD: src/sys/ufs/ffs/softdep.h,v 1.10 2000/06/22 00:29:53 mckusick Exp $
*/
#include <sys/queue.h>
@@ -353,7 +337,7 @@ struct allocdirect {
struct indirdep {
struct worklist ir_list; /* buffer holding indirect block */
# define ir_state ir_list.wk_state /* indirect block pointer state */
- ufs_daddr_t *ir_saveddata; /* buffer cache contents */
+ caddr_t ir_saveddata; /* buffer cache contents */
struct buf *ir_savebp; /* buffer holding safe copy */
struct allocindirhd ir_donehd; /* done waiting to update safecopy */
struct allocindirhd ir_deplisthd; /* allocindir deps for this block */
@@ -399,7 +383,7 @@ struct freefrag {
struct worklist ff_list; /* id_inowait or delayed worklist */
# define ff_state ff_list.wk_state /* owning user; should be uid_t */
struct vnode *ff_devvp; /* filesystem device vnode */
- struct fs *ff_fs; /* addr of superblock */
+ struct mount *ff_mnt; /* associated mount point */
ufs_daddr_t ff_blkno; /* fragment physical block number */
long ff_fragsize; /* size of fragment being deleted */
ino_t ff_inum; /* owning inode number */
@@ -415,7 +399,7 @@ struct freeblks {
struct worklist fb_list; /* id_inowait or delayed worklist */
ino_t fb_previousinum; /* inode of previous owner of blocks */
struct vnode *fb_devvp; /* filesystem device vnode */
- struct fs *fb_fs; /* addr of superblock */
+ struct mount *fb_mnt; /* associated mount point */
off_t fb_oldsize; /* previous file size */
off_t fb_newsize; /* new file size */
int fb_chkcnt; /* used to check cnt of blks released */
@@ -435,7 +419,7 @@ struct freefile {
mode_t fx_mode; /* mode of inode */
ino_t fx_oldinum; /* inum of the unlinked file */
struct vnode *fx_devvp; /* filesystem device vnode */
- struct fs *fx_fs; /* addr of superblock */
+ struct mount *fx_mnt; /* associated mount point */
};
/*
diff --git a/sys/ufs/ufs/inode.h b/sys/ufs/ufs/inode.h
index 09f582c4e09..7d23b147966 100644
--- a/sys/ufs/ufs/inode.h
+++ b/sys/ufs/ufs/inode.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: inode.h,v 1.11 1999/09/10 23:39:10 art Exp $ */
+/* $OpenBSD: inode.h,v 1.12 2001/02/21 23:24:31 csapuntz Exp $ */
/* $NetBSD: inode.h,v 1.8 1995/06/15 23:22:50 cgd Exp $ */
/*
@@ -247,6 +247,7 @@ struct indir {
#else
#define DOINGSOFTDEP(vp) (0)
#endif
+#define DOINGASYNC(vp) ((vp)->v_mount->mnt_flag & MNT_ASYNC)
/* This overlays the fid structure (see mount.h). */
struct ufid {
diff --git a/sys/ufs/ufs/ufs_extern.h b/sys/ufs/ufs/ufs_extern.h
index 66d5cac00c2..bdf0c2b3abf 100644
--- a/sys/ufs/ufs/ufs_extern.h
+++ b/sys/ufs/ufs/ufs_extern.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: ufs_extern.h,v 1.9 2000/02/07 04:57:19 assar Exp $ */
+/* $OpenBSD: ufs_extern.h,v 1.10 2001/02/21 23:24:31 csapuntz Exp $ */
/* $NetBSD: ufs_extern.h,v 1.5 1996/02/09 22:36:03 christos Exp $ */
/*-
@@ -180,6 +180,7 @@ void softdep_setup_remove __P((struct buf *,struct inode *, struct inode *,
int));
void softdep_setup_directory_change __P((struct buf *, struct inode *,
struct inode *, long, int));
-void softdep_increase_linkcnt __P((struct inode *));
+void softdep_change_linkcnt __P((struct inode *));
+int softdep_slowdown __P((struct vnode *));
__END_DECLS
diff --git a/sys/ufs/ufs/ufs_lookup.c b/sys/ufs/ufs/ufs_lookup.c
index 623128442d6..bc8967bd75f 100644
--- a/sys/ufs/ufs/ufs_lookup.c
+++ b/sys/ufs/ufs/ufs_lookup.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: ufs_lookup.c,v 1.12 1999/02/26 03:35:18 art Exp $ */
+/* $OpenBSD: ufs_lookup.c,v 1.13 2001/02/21 23:24:31 csapuntz Exp $ */
/* $NetBSD: ufs_lookup.c,v 1.7 1996/02/09 22:36:06 christos Exp $ */
/*
@@ -958,19 +958,31 @@ ufs_dirremove(dvp, ip, flags, isrmdir)
ep->d_reclen += dp->i_reclen;
}
out:
- if (ip) {
- ip->i_effnlink--;
- ip->i_flag |= IN_CHANGE;
- }
if (DOINGSOFTDEP(dvp)) {
- if (ip)
- softdep_setup_remove(bp, dp, ip, isrmdir);
- bdwrite(bp);
+ if (ip) {
+ ip->i_effnlink--;
+ softdep_change_linkcnt(ip);
+ softdep_setup_remove(bp, dp, ip, isrmdir);
+ }
+ if (softdep_slowdown(dvp)) {
+ error = bwrite(bp);
+ } else {
+ bdwrite(bp);
+ error = 0;
+ }
} else {
- if (ip)
- ip->i_ffs_nlink--; /* XXX */
-
- error = VOP_BWRITE(bp);
+ if (ip) {
+ ip->i_effnlink--;
+ ip->i_ffs_nlink--;
+ ip->i_flag |= IN_CHANGE;
+ }
+ if (flags & DOWHITEOUT)
+ error = bwrite(bp);
+ else if (DOINGASYNC(dvp) && dp->i_count != 0) {
+ bdwrite(bp);
+ error = 0;
+ } else
+ error = bwrite(bp);
}
dp->i_flag |= IN_CHANGE | IN_UPDATE;
return (error);
@@ -1000,13 +1012,19 @@ ufs_dirrewrite(dp, oip, newinum, newtype, isrmdir)
if (vdp->v_mount->mnt_maxsymlinklen > 0)
ep->d_type = newtype;
oip->i_effnlink--;
- oip->i_flag |= IN_CHANGE;
if (DOINGSOFTDEP(vdp)) {
+ softdep_change_linkcnt(oip);
softdep_setup_directory_change(bp, dp, oip, newinum, isrmdir);
bdwrite(bp);
} else {
- oip->i_ffs_nlink--; /* XXX */
- error = VOP_BWRITE(bp);
+ oip->i_ffs_nlink--;
+ oip->i_flag |= IN_CHANGE;
+ if (DOINGASYNC(vdp)) {
+ bdwrite(bp);
+ error = 0;
+ } else {
+ error = VOP_BWRITE(bp);
+ }
}
dp->i_flag |= IN_CHANGE | IN_UPDATE;
return (error);
diff --git a/sys/ufs/ufs/ufs_vnops.c b/sys/ufs/ufs/ufs_vnops.c
index 2a648f44883..e224d37e729 100644
--- a/sys/ufs/ufs/ufs_vnops.c
+++ b/sys/ufs/ufs/ufs_vnops.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: ufs_vnops.c,v 1.28 2000/11/21 21:49:57 provos Exp $ */
+/* $OpenBSD: ufs_vnops.c,v 1.29 2001/02/21 23:24:32 csapuntz Exp $ */
/* $NetBSD: ufs_vnops.c,v 1.18 1996/05/11 18:28:04 mycroft Exp $ */
/*
@@ -758,7 +758,7 @@ ufs_link(v)
ip->i_ffs_nlink++;
ip->i_flag |= IN_CHANGE;
if (DOINGSOFTDEP(vp))
- softdep_increase_linkcnt(ip);
+ softdep_change_linkcnt(ip);
TIMEVAL_TO_TIMESPEC(&time, &ts);
if ((error = VOP_UPDATE(vp, &ts, &ts, !DOINGSOFTDEP(vp))) == 0) {
ufs_makedirentry(ip, cnp, &newdir);
@@ -768,6 +768,8 @@ ufs_link(v)
ip->i_effnlink--;
ip->i_ffs_nlink--;
ip->i_flag |= IN_CHANGE;
+ if (DOINGSOFTDEP(vp))
+ softdep_change_linkcnt(ip);
}
FREE(cnp->cn_pnbuf, M_NAMEI);
VN_KNOTE(vp, NOTE_LINK);
@@ -924,9 +926,22 @@ abortit:
error = EPERM;
goto abortit;
}
+
+ /*
+ * Check if just deleting a link name or if we've lost a race.
+ * If another process completes the same rename after we've looked
+ * up the source and have blocked looking up the target, then the
+ * source and target inodes may be identical now although the
+ * names were never linked.
+ */
if (fvp == tvp) {
if (fvp->v_type == VDIR) {
- error = EINVAL;
+ /*
+ * Linked directories are impossible, so we must
+ * have lost the race. Pretend that the rename
+ * completed before the lookup.
+ */
+ error = ENOENT;
goto abortit;
}
@@ -935,7 +950,12 @@ abortit:
vput(tdvp);
vput(tvp);
- /* Delete source. */
+ /*
+ * Delete source. There is another race now that everything
+ * is unlocked, but this doesn't cause any new complications.
+ * Relookup() may find a file that is unrelated to the
+ * original one, or it may fail. Too bad.
+ */
vrele(fdvp);
vrele(fvp);
fcnp->cn_flags &= ~MODMASK;
@@ -1012,7 +1032,7 @@ abortit:
ip->i_ffs_nlink++;
ip->i_flag |= IN_CHANGE;
if (DOINGSOFTDEP(fvp))
- softdep_increase_linkcnt(ip);
+ softdep_change_linkcnt(ip);
TIMEVAL_TO_TIMESPEC(&time, &ts);
if ((error = VOP_UPDATE(fvp, &ts, &ts, !DOINGSOFTDEP(fvp))) != 0) {
VOP_UNLOCK(fvp, 0, p);
@@ -1077,12 +1097,14 @@ abortit:
dp->i_ffs_nlink++;
dp->i_flag |= IN_CHANGE;
if (DOINGSOFTDEP(tdvp))
- softdep_increase_linkcnt(dp);
+ softdep_change_linkcnt(dp);
if ((error = VOP_UPDATE(tdvp, &ts, &ts,
!DOINGSOFTDEP(tdvp))) != 0) {
dp->i_effnlink--;
dp->i_ffs_nlink--;
dp->i_flag |= IN_CHANGE;
+ if (DOINGSOFTDEP(tdvp))
+ softdep_change_linkcnt(dp);
goto bad;
}
}
@@ -1092,6 +1114,8 @@ abortit:
dp->i_effnlink--;
dp->i_ffs_nlink--;
dp->i_flag |= IN_CHANGE;
+ if (DOINGSOFTDEP(tdvp))
+ softdep_change_linkcnt(dp);
(void)VOP_UPDATE(tdvp, &ts, &ts, 1);
}
goto bad;
@@ -1105,7 +1129,7 @@ abortit:
* Short circuit rename(foo, foo).
*/
if (xp->i_number == ip->i_number)
- panic("rename: same file");
+ panic("ufs_rename: same file");
/*
* If the parent directory is "sticky", then the user must
* own the parent directory, or the destination of the rename,
@@ -1146,10 +1170,12 @@ abortit:
if (doingdirectory) {
if (!newparent) {
dp->i_effnlink--;
- dp->i_flag |= IN_CHANGE;
+ if (DOINGSOFTDEP(tdvp))
+ softdep_change_linkcnt(dp);
}
xp->i_effnlink--;
- xp->i_flag |= IN_CHANGE;
+ if (DOINGSOFTDEP(tvp))
+ softdep_change_linkcnt(xp);
}
if (doingdirectory && !DOINGSOFTDEP(tvp)) {
/*
@@ -1163,10 +1189,13 @@ abortit:
* disk, so when running with that code we avoid doing
* them now.
*/
- if (!newparent)
+ if (!newparent) {
dp->i_ffs_nlink--;
+ dp->i_flag |= IN_CHANGE;
+ }
xp->i_ffs_nlink--;
+ xp->i_flag |= IN_CHANGE;
if ((error = VOP_TRUNCATE(tvp, (off_t)0, IO_SYNC,
tcnp->cn_cred, tcnp->cn_proc)) != 0)
goto bad;
@@ -1194,7 +1223,7 @@ abortit:
* From name has disappeared.
*/
if (doingdirectory)
- panic("rename: lost dir entry");
+ panic("ufs_rename: lost dir entry");
vrele(ap->a_fvp);
return (0);
}
@@ -1209,7 +1238,7 @@ abortit:
*/
if (xp != ip) {
if (doingdirectory)
- panic("rename: lost dir entry");
+ panic("ufs_rename: lost dir entry");
} else {
/*
* If the source is a directory with a
@@ -1244,6 +1273,9 @@ out:
ip->i_effnlink--;
ip->i_ffs_nlink--;
ip->i_flag |= IN_CHANGE;
+ ip->i_flag &= ~IN_RENAME;
+ if (DOINGSOFTDEP(fvp))
+ softdep_change_linkcnt(ip);
vput(fvp);
} else
vrele(fvp);
@@ -1311,7 +1343,7 @@ ufs_mkdir(v)
ip->i_effnlink = 2;
ip->i_ffs_nlink = 2;
if (DOINGSOFTDEP(tvp))
- softdep_increase_linkcnt(ip);
+ softdep_change_linkcnt(ip);
if (cnp->cn_flags & ISWHITEOUT)
ip->i_ffs_flags |= UF_OPAQUE;
@@ -1325,7 +1357,7 @@ ufs_mkdir(v)
dp->i_ffs_nlink++;
dp->i_flag |= IN_CHANGE;
if (DOINGSOFTDEP(dvp))
- softdep_increase_linkcnt(dp);
+ softdep_change_linkcnt(dp);
TIMEVAL_TO_TIMESPEC(&time, &ts);
if ((error = VOP_UPDATE(dvp, &ts, &ts, !DOINGSOFTDEP(dvp))) != 0)
goto bad;
@@ -1395,6 +1427,8 @@ bad:
dp->i_effnlink--;
dp->i_ffs_nlink--;
dp->i_flag |= IN_CHANGE;
+ if (DOINGSOFTDEP(dvp))
+ softdep_change_linkcnt(dp);
/*
* No need to do an explicit VOP_TRUNCATE here, vrele will
* do this for us because we set the link count to 0.
@@ -1402,7 +1436,8 @@ bad:
ip->i_effnlink = 0;
ip->i_ffs_nlink = 0;
ip->i_flag |= IN_CHANGE;
-
+ if (DOINGSOFTDEP(tvp))
+ softdep_change_linkcnt(ip);
vput(tvp);
}
out:
@@ -1469,28 +1504,41 @@ ufs_rmdir(v)
* inode. If we crash in between, the directory
* will be reattached to lost+found,
*/
- if ((error = ufs_dirremove(dvp, ip, cnp->cn_flags, 1)) != 0)
+ dp->i_effnlink--;
+ ip->i_effnlink--;
+ if (DOINGSOFTDEP(vp)) {
+ softdep_change_linkcnt(dp);
+ softdep_change_linkcnt(ip);
+ }
+ if ((error = ufs_dirremove(dvp, ip, cnp->cn_flags, 1)) != 0) {
+ dp->i_effnlink++;
+ ip->i_effnlink++;
+ if (DOINGSOFTDEP(vp)) {
+ softdep_change_linkcnt(dp);
+ softdep_change_linkcnt(ip);
+ }
goto out;
+ }
+
VN_KNOTE(dvp, NOTE_WRITE | NOTE_LINK);
cache_purge(dvp);
/*
* Truncate inode. The only stuff left in the directory is "." and
* "..". The "." reference is inconsequential since we are quashing
- * it. We have removed the "." reference and the reference in the
- * parent directory, but there may be other hard links. The soft
- * update code will arange to do these operations after the parent
- * directory has been deleted on disk, so when running with
- * that code we avoid doing them now.
+ * it. The soft dependency code will arrange to do these operations
+ * after the parent directory entry has been deleted on disk, so
+ * when running with that code we avoid doing them now.
*/
- dp->i_effnlink--;
- dp->i_flag |= IN_CHANGE;
- ip->i_effnlink--;
- ip->i_flag |= IN_CHANGE;
if (!DOINGSOFTDEP(vp)) {
+ int ioflag;
+
dp->i_ffs_nlink--;
+ dp->i_flag |= IN_CHANGE;
ip->i_ffs_nlink--;
- error = VOP_TRUNCATE(vp, (off_t)0, IO_SYNC, cnp->cn_cred,
- cnp->cn_proc);
+ ip->i_flag |= IN_CHANGE;
+ ioflag = DOINGASYNC(vp) ? 0 : IO_SYNC;
+ error = VOP_TRUNCATE(vp, (off_t)0, ioflag, cnp->cn_cred,
+ cnp->cn_proc);
}
cache_purge(vp);
out:
@@ -2114,7 +2162,7 @@ ufs_makeinode(mode, dvp, vpp, cnp)
ip->i_effnlink = 1;
ip->i_ffs_nlink = 1;
if (DOINGSOFTDEP(tvp))
- softdep_increase_linkcnt(ip);
+ softdep_change_linkcnt(ip);
if ((ip->i_ffs_mode & ISGID) &&
!groupmember(ip->i_ffs_gid, cnp->cn_cred) &&
suser(cnp->cn_cred, NULL))
@@ -2150,6 +2198,8 @@ bad:
ip->i_effnlink = 0;
ip->i_ffs_nlink = 0;
ip->i_flag |= IN_CHANGE;
+ if (DOINGSOFTDEP(tvp))
+ softdep_change_linkcnt(ip);
vput(tvp);
return (error);