32 files changed, 2194 insertions, 1037 deletions
diff --git a/sys/sys/buf.h b/sys/sys/buf.h
index 34587d51cc8..4b2582677a0 100644
--- a/sys/sys/buf.h
+++ b/sys/sys/buf.h
@@ -1,4 +1,4 @@
-/*	$OpenBSD: buf.h,v 1.7 1997/07/28 09:13:14 deraadt Exp $	*/
+/*	$OpenBSD: buf.h,v 1.8 1997/10/06 15:25:32 csapuntz Exp $	*/
 /*	$NetBSD: buf.h,v 1.25 1997/04/09 21:12:17 mycroft Exp $	*/
 
 /*
@@ -48,6 +48,27 @@
 #define NOLIST ((struct buf *)0x87654321)
 
 /*
+ * To avoid including <ufs/ffs/softdep.h>
+ */ 
+
+LIST_HEAD(workhead, worklist);
+
+/*
+ * These are currently used only by the soft dependency code, hence
+ * are stored once in a global variable. If other subsystems wanted
+ * to use these hooks, a pointer to a set of bio_ops could be added
+ * to each buffer.
+ */
+struct mount;
+extern struct bio_ops {
+	void	(*io_start) __P((struct buf *));
+	void	(*io_complete) __P((struct buf *));
+ 	void	(*io_deallocate) __P((struct buf *));
+ 	int	(*io_sync) __P((struct mount *));
+} bioops;
+ 
+
+/*
  * The buffer header describes an I/O operation in the kernel.
  */
 struct buf {
@@ -79,6 +100,7 @@ struct buf {
 	struct	ucred *b_wcred;		/* Write credentials reference. */
 	int	b_validoff;		/* Offset in buffer of valid region. */
 	int	b_validend;		/* Offset of end of valid region. */
+ 	struct	workhead b_dep;		/* List of filesystem dependencies. */
 };
 
 /*
@@ -177,6 +199,7 @@ int	breadn __P((struct vnode *, daddr_t, int, daddr_t *, int *, int,
 void	brelse __P((struct buf *));
 void	bremfree __P((struct buf *));
 void	bufinit __P((void));
+void	bdirty __P((struct buf *));
 int	bwrite __P((struct buf *));
 void	cluster_callback __P((struct buf *));
 int	cluster_read __P((struct vnode *, u_quad_t, daddr_t, long,
diff --git a/sys/sys/lock.h b/sys/sys/lock.h
new file mode 100644
index 00000000000..f4491b09520
--- /dev/null
+++ b/sys/sys/lock.h
@@ -0,0 +1,167 @@
+/* 
+ * Copyright (c) 1995
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code contains ideas from software contributed to Berkeley by
+ * Avadis Tevanian, Jr., Michael Wayne Young, and the Mach Operating
+ * System project at Carnegie-Mellon University.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)lock.h	8.12 (Berkeley) 5/19/95
+ */
+
+#ifndef	_LOCK_H_
+#define	_LOCK_H_
+
+#include <sys/simplelock.h>
+
+/*
+ * The general lock structure.  Provides for multiple shared locks,
+ * upgrading from shared to exclusive, and sleeping until the lock
+ * can be gained. The simple locks are defined in <machine/param.h>.
+ */
+struct lock {
+	struct	simplelock lk_interlock; /* lock on remaining fields */
+	u_int	lk_flags;		/* see below */
+	int	lk_sharecount;		/* # of accepted shared locks */
+	int	lk_waitcount;		/* # of processes sleeping for lock */
+	short	lk_exclusivecount;	/* # of recursive exclusive locks */
+	short	lk_prio;		/* priority at which to sleep */
+	char	*lk_wmesg;		/* resource sleeping (for tsleep) */
+	int	lk_timo;		/* maximum sleep time (for tsleep) */
+	pid_t	lk_lockholder;		/* pid of exclusive lock holder */
+};
+/*
+ * Lock request types:
+ *   LK_SHARED - get one of many possible shared locks. If a process
+ *	holding an exclusive lock requests a shared lock, the exclusive
+ *	lock(s) will be downgraded to shared locks.
+ *   LK_EXCLUSIVE - stop further shared locks, when they are cleared,
+ *	grant a pending upgrade if it exists, then grant an exclusive
+ *	lock. Only one exclusive lock may exist at a time, except that
+ *	a process holding an exclusive lock may get additional exclusive
+ *	locks if it explicitly sets the LK_CANRECURSE flag in the lock
+ *	request, or if the LK_CANRECUSE flag was set when the lock was
+ *	initialized.
+ *   LK_UPGRADE - the process must hold a shared lock that it wants to
+ *	have upgraded to an exclusive lock. Other processes may get
+ *	exclusive access to the resource between the time that the upgrade
+ *	is requested and the time that it is granted.
+ *   LK_EXCLUPGRADE - the process must hold a shared lock that it wants to
+ *	have upgraded to an exclusive lock. If the request succeeds, no
+ *	other processes will have gotten exclusive access to the resource
+ *	between the time that the upgrade is requested and the time that
+ *	it is granted. However, if another process has already requested
+ *	an upgrade, the request will fail (see error returns below).
+ *   LK_DOWNGRADE - the process must hold an exclusive lock that it wants
+ *	to have downgraded to a shared lock. If the process holds multiple
+ *	(recursive) exclusive locks, they will all be downgraded to shared
+ *	locks.
+ *   LK_RELEASE - release one instance of a lock.
+ *   LK_DRAIN - wait for all activity on the lock to end, then mark it
+ *	decommissioned. This feature is used before freeing a lock that
+ *	is part of a piece of memory that is about to be freed.
+ *
+ * These are flags that are passed to the lockmgr routine.
+ */
+#define LK_TYPE_MASK	0x0000000f	/* type of lock sought */
+#define LK_SHARED	0x00000001	/* shared lock */
+#define LK_EXCLUSIVE	0x00000002	/* exclusive lock */
+#define LK_UPGRADE	0x00000003	/* shared-to-exclusive upgrade */
+#define LK_EXCLUPGRADE	0x00000004	/* first shared-to-exclusive upgrade */
+#define LK_DOWNGRADE	0x00000005	/* exclusive-to-shared downgrade */
+#define LK_RELEASE	0x00000006	/* release any type of lock */
+#define LK_DRAIN	0x00000007	/* wait for all lock activity to end */
+/*
+ * External lock flags.
+ *
+ * The first three flags may be set in lock_init to set their mode permanently,
+ * or passed in as arguments to the lock manager. The LK_REENABLE flag may be
+ * set only at the release of a lock obtained by drain.
+ */
+#define LK_EXTFLG_MASK	0x00000070	/* mask of external flags */
+#define LK_NOWAIT	0x00000010	/* do not sleep to await lock */
+#define LK_SLEEPFAIL	0x00000020	/* sleep, then return failure */
+#define LK_CANRECURSE	0x00000040	/* allow recursive exclusive lock */
+#define LK_REENABLE	0x00000080	/* lock is be reenabled after drain */
+/*
+ * Internal lock flags.
+ *
+ * These flags are used internally to the lock manager.
+ */
+#define LK_WANT_UPGRADE	0x00000100	/* waiting for share-to-excl upgrade */
+#define LK_WANT_EXCL	0x00000200	/* exclusive lock sought */
+#define LK_HAVE_EXCL	0x00000400	/* exclusive lock obtained */
+#define LK_WAITDRAIN	0x00000800	/* process waiting for lock to drain */
+#define LK_DRAINING	0x00004000	/* lock is being drained */
+#define LK_DRAINED	0x00008000	/* lock has been decommissioned */
+/*
+ * Control flags
+ *
+ * Non-persistent external flags.
+ */
+#define LK_INTERLOCK	0x00010000	/* unlock passed simple lock after
+					   getting lk_interlock */
+#define LK_RETRY	0x00020000	/* vn_lock: retry until locked */
+
+/*
+ * Lock return status.
+ *
+ * Successfully obtained locks return 0. Locks will always succeed
+ * unless one of the following is true:
+ *	LK_FORCEUPGRADE is requested and some other process has already
+ *	    requested a lock upgrade (returns EBUSY).
+ *	LK_WAIT is set and a sleep would be required (returns EBUSY).
+ *	LK_SLEEPFAIL is set and a sleep was done (returns ENOLCK).
+ *	PCATCH is set in lock priority and a signal arrives (returns
+ *	    either EINTR or ERESTART if system calls is to be restarted).
+ *	Non-null lock timeout and timeout expires (returns EWOULDBLOCK).
+ * A failed lock attempt always returns a non-zero error value. No lock
+ * is held after an error return (in particular, a failed LK_UPGRADE
+ * or LK_FORCEUPGRADE will have released its shared access lock).
+ */
+
+/*
+ * Indicator that no process holds exclusive lock
+ */
+#define LK_KERNPROC ((pid_t) -2)
+#define LK_NOPROC ((pid_t) -1)
+
+struct proc;
+
+void	lockinit __P((struct lock *, int prio, char *wmesg, int timo,
+			int flags));
+int	lockmgr __P((__volatile struct lock *, u_int flags,
+			struct simplelock *, struct proc *p));
+void    lockmgr_printinfo __P((struct lock *));
+int	lockstatus __P((struct lock *));
+
+#endif /* !_LOCK_H_ */
+
diff --git a/sys/sys/malloc.h b/sys/sys/malloc.h
index 4b87be6fa20..3e380f50dfd 100644
--- a/sys/sys/malloc.h
+++ b/sys/sys/malloc.h
@@ -1,4 +1,4 @@
-/*	$OpenBSD: malloc.h,v 1.10 1997/03/01 21:24:46 kstailey Exp $	*/
+/*	$OpenBSD: malloc.h,v 1.11 1997/10/06 15:25:33 csapuntz Exp $	*/
 /*	$NetBSD: malloc.h,v 1.23 1996/04/05 04:52:52 mhitch Exp $	*/
 
 /*
@@ -128,8 +128,25 @@
 #define	M_PFIL		73	/* packer filter */
 #define	M_TDB		75	/* Transforms database */
 #define	M_XDATA		76	/* IPsec data */
-#define	M_TEMP		84	/* misc temporary data buffers */
-#define	M_LAST		85	/* Must be last type + 1 */
+#define M_VFS           77      /* VFS file systems */
+
+#define	M_PAGEDEP	78	/* File page dependencies */
+#define	M_INODEDEP	79	/* Inode dependencies */
+#define	M_NEWBLK	80	/* New block allocation */
+#define	M_BMSAFEMAP	81	/* Block or frag allocated from cyl group map */
+#define	M_ALLOCDIRECT	82	/* Block or frag dependency for an inode */
+#define	M_INDIRDEP	83	/* Indirect block dependencies */
+#define	M_ALLOCINDIR	84	/* Block dependency for an indirect block */
+#define	M_FREEFRAG	85	/* Previously used frag for an inode */
+#define	M_FREEBLKS	86	/* Blocks freed from an inode */
+#define	M_FREEFILE	87	/* Inode deallocated */
+#define	M_DIRADD	88	/* New directory entry */
+#define	M_MKDIR		89	/* New directory */
+#define	M_DIRREM	90	/* Directory entry deleted */
+
+#define	M_TEMP		127	/* misc temporary data buffers */
+#define M_LAST          128     /* Must be last type + 1 */
+
 
 #define	INITKMEMNAMES { \
 	"free",		/* 0 M_FREE */ \
@@ -209,9 +226,29 @@
 	NULL, \
 	"tdb",		/* 75 M_TDB */ \
 	"xform_data",	/* 76 M_XDATA */ \
-	NULL, NULL, \
+	"vfs",          /* 77 M_VFS */ \
+ 	"pagedep",	/* 78 M_PAGEDEP */ \
+ 	"inodedep",	/* 79 M_INODEDEP */ \
+ 	"newblk",	/* 80 M_NEWBLK */ \
+ 	"bmsafemap",	/* 81 M_BMSAFEMAP */ \
+ 	"allocdirect",	/* 82 M_ALLOCDIRECT */ \
+ 	"indirdep",	/* 83 M_INDIRDEP */ \
+ 	"allocindir",	/* 84 M_ALLOCINDIR */ \
+ 	"freefrag",	/* 85 M_FREEFRAG */ \
+ 	"freeblks",	/* 86 M_FREEBLKS */ \
+ 	"freefile",	/* 87 M_FREEFILE */ \
+ 	"diradd",	/* 88 M_DIRADD */ \
+ 	"mkdir",	/* 89 M_MKDIR */ \
+ 	"dirrem",	/* 90 M_DIRREM */ \
+	NULL, NULL, NULL, NULL, NULL, \
+	NULL, NULL, NULL, NULL, NULL, \
+	NULL, NULL, NULL, NULL, NULL, \
+	NULL, NULL, NULL, NULL, NULL, \
+	NULL, NULL, NULL, NULL, NULL, \
+	NULL, NULL, NULL, NULL, NULL, \
 	NULL, NULL, NULL, NULL, NULL, \
-	"temp",		/* 84 M_TEMP */ \
+	NULL,                         \
+	"temp",		/* 127 M_TEMP */ \
 }
 
 struct kmemstats {
diff --git a/sys/sys/mount.h b/sys/sys/mount.h
index 2ad19911a6a..776740078d0 100644
--- a/sys/sys/mount.h
+++ b/sys/sys/mount.h
@@ -1,4 +1,4 @@
-/*	$OpenBSD: mount.h,v 1.18 1997/04/16 09:49:00 downsj Exp $	*/
+/*	$OpenBSD: mount.h,v 1.19 1997/10/06 15:25:33 csapuntz Exp $	*/
 /*	$NetBSD: mount.h,v 1.48 1996/02/18 11:55:47 fvdl Exp $	*/
 
 /*
@@ -43,6 +43,7 @@
 #include <sys/ucred.h>
 #endif
 #include <sys/queue.h>
+#include <sys/lock.h>
 
 typedef struct { int32_t val[2]; } fsid_t;	/* file system id type */
 
@@ -55,7 +56,7 @@ typedef struct { int32_t val[2]; } fsid_t;	/* file system id type */
 struct fid {
 	u_short		fid_len;		/* length of data in bytes */
 	u_short		fid_reserved;		/* force longword alignment */
-	char		fid_data[MAXFIDSZ];	/* data (variable length) */
+ 	char		fid_data[MAXFIDSZ];	/* data (variable length) */
 };
 
 /*
@@ -77,7 +78,9 @@ struct statfs {
 	long	f_ffree;		/* free file nodes in fs */
 	fsid_t	f_fsid;			/* file system id */
 	uid_t	f_owner;		/* user that mounted the file system */
-	long	f_spare[4];		/* spare for later */
+	long    f_syncwrites;           /* count of sync writes since mount */
+	long    f_asyncwrites;          /* count of async writes since mount */
+	long	f_spare[2];		/* spare for later */
 	char	f_fstypename[MFSNAMELEN]; /* fs type name */
 	char	f_mntonname[MNAMELEN];	  /* directory on which mounted */
 	char	f_mntfromname[MNAMELEN];  /* mounted file system */
@@ -116,8 +119,11 @@ LIST_HEAD(vnodelst, vnode);
 struct mount {
 	CIRCLEQ_ENTRY(mount) mnt_list;		/* mount list */
 	struct vfsops	*mnt_op;		/* operations on fs */
+	struct vfsconf  *mnt_vfc;               /* configuration info */
 	struct vnode	*mnt_vnodecovered;	/* vnode we mounted on */
+	struct vnode    *mnt_syncer;            /* syncer vnode */
 	struct vnodelst	mnt_vnodelist;		/* list of vnodes this mount */
+	struct lock     mnt_lock;               /* mount structure lock */
 	int		mnt_flag;		/* flags */
 	int		mnt_maxsymlinklen;	/* max size of short symlink */
 	struct statfs	mnt_stat;		/* cache of filesystem stats */
@@ -161,7 +167,7 @@ struct mount {
 /*
  * Mask of flags that are visible to statfs()
  */
-#define	MNT_VISFLAGMASK	0x0000ffff
+#define	MNT_VISFLAGMASK	0x0400ffff
 
 /*
  * filesystem control flags.
@@ -180,6 +186,37 @@ struct mount {
 #define MNT_MPWANT	0x00800000	/* waiting for mount point */
 #define MNT_UNMOUNT	0x01000000	/* unmount in progress */
 #define MNT_WANTRDWR	0x02000000	/* want upgrade to read/write */
+#define MNT_SOFTDEP     0x04000000      /* soft dependencies being done */
+/*
+ * Sysctl CTL_VFS definitions.
+ *
+ * Second level identifier specifies which filesystem. Second level
+ * identifier VFS_GENERIC returns information about all filesystems.
+ */
+#define	VFS_GENERIC		0	/* generic filesystem information */
+/*
+ * Third level identifiers for VFS_GENERIC are given below; third
+ * level identifiers for specific filesystems are given in their
+ * mount specific header files.
+ */
+#define VFS_MAXTYPENUM	1	/* int: highest defined filesystem type */
+#define VFS_CONF	2	/* struct: vfsconf for filesystem given
+				   as next argument */
+
+/*
+ * Filesystem configuration information. One of these exists for each
+ * type of filesystem supported by the kernel. These are searched at
+ * mount time to identify the requested filesystem.
+ */
+struct vfsconf {
+	struct	vfsops *vfc_vfsops;	/* filesystem operations vector */
+	char	vfc_name[MFSNAMELEN];	/* filesystem type name */
+	int	vfc_typenum;		/* historic filesystem type number */
+	int	vfc_refcount;		/* number mounted of this type */
+	int	vfc_flags;		/* permanent flags */
+	int	(*vfc_mountroot)(void);	/* if != NULL, routine to mount root */
+	struct	vfsconf *vfc_next;	/* next in list */
+};
 
 /*
  * Operations supported on mounted file system.
@@ -190,8 +227,10 @@ struct nameidata;
 struct mbuf;
 #endif
 
+extern int maxvfsconf;		/* highest defined filesystem type */
+extern struct vfsconf *vfsconf;	/* head of list of filesystem types */
+
 struct vfsops {
-	char	*vfs_name;
 	int	(*vfs_mount)	__P((struct mount *mp, char *path, caddr_t data,
 				    struct nameidata *ndp, struct proc *p));
 	int	(*vfs_start)	__P((struct mount *mp, int flags,
@@ -211,8 +250,9 @@ struct vfsops {
 				    struct mbuf *nam, struct vnode **vpp,
 				    int *exflagsp, struct ucred **credanonp));
 	int	(*vfs_vptofh)	__P((struct vnode *vp, struct fid *fhp));
-	void	(*vfs_init)	__P((void));
-	int	vfs_refcount;
+	int	(*vfs_init)	__P((struct vfsconf *));
+	int     (*vfs_sysctl)   __P((int *, u_int, void *, size_t *, void *,
+				     size_t, struct proc *));
 };
 
 #define VFS_MOUNT(MP, PATH, DATA, NDP, P) \
@@ -234,8 +274,9 @@ struct vfsops {
  *
  * waitfor flags to vfs_sync() and getfsstat()
  */
-#define MNT_WAIT	1
-#define MNT_NOWAIT	2
+#define MNT_WAIT	1	/* synchronously wait for I/O to complete */
+#define MNT_NOWAIT	2	/* start all I/O, but do not wait for it */
+#define MNT_LAZY	3	/* push data not written by filesystem syncer */
 
 /*
  * Generic file handle
@@ -446,21 +487,25 @@ struct adosfs_args {
 /*
  * exported vnode operations
  */
+int	vfs_busy __P((struct mount *, int, struct simplelock *, struct proc *));
+void	vfs_getnewfsid __P((struct mount *));
+struct	mount *vfs_getvfs __P((fsid_t *));
+int	vfs_mountedon __P((struct vnode *));
+int	vfs_mountroot __P((void));
+int	vfs_rootmountalloc __P((char *, char *, struct mount **));
+void	vfs_unbusy __P((struct mount *, struct proc *));
+void	vfs_unmountall __P((void));
+extern	CIRCLEQ_HEAD(mntlist, mount) mountlist;
+extern	struct simplelock mountlist_slock;
+
 struct	mount *getvfs __P((fsid_t *));	    /* return vfs given fsid */
 int	vfs_export			    /* process mount export info */
 	  __P((struct mount *, struct netexport *, struct export_args *));
 struct	netcred *vfs_export_lookup	    /* lookup host in fs export list */
 	  __P((struct mount *, struct netexport *, struct mbuf *));
-int	vfs_lock __P((struct mount *));	    /* lock a vfs */
-int	vfs_mountedon __P((struct vnode *));/* is a vfs mounted on vp */
+int	vfs_allocate_syncvnode __P((struct mount *));
+
 void	vfs_shutdown __P((void));	    /* unmount and sync file systems */
-void	vfs_unlock __P((struct mount *));   /* unlock a vfs */
-void	vfs_unmountall __P((void));	    /* unmount file systems */
-int 	vfs_busy __P((struct mount *));
-void	vfs_unbusy __P((struct mount *));
-extern	CIRCLEQ_HEAD(mntlist, mount) mountlist;	/* mounted filesystem list */
-extern	struct vfsops *vfssw[];		    /* filesystem type table */
-extern	int nvfssw;
 long	makefstype __P((char *));
 int	dounmount __P((struct mount *, int, struct proc *));
 void	vfsinit __P((void));
@@ -479,6 +524,8 @@ int	getmntinfo __P((struct statfs **, int));
 int	mount __P((const char *, const char *, int, void *));
 int	statfs __P((const char *, struct statfs *));
 int	unmount __P((const char *, int));
+
+
 __END_DECLS
 
 #endif /* _KERNEL */
diff --git a/sys/sys/param.h b/sys/sys/param.h
index a7d227ee2da..d9b459abc71 100644
--- a/sys/sys/param.h
+++ b/sys/sys/param.h
@@ -1,4 +1,4 @@
-/*	$OpenBSD: param.h,v 1.15 1997/10/01 21:53:36 deraadt Exp $	*/
+/*	$OpenBSD: param.h,v 1.16 1997/10/06 15:25:34 csapuntz Exp $	*/
 /*	$NetBSD: param.h,v 1.23 1996/03/17 01:02:29 thorpej Exp $	*/
 
 /*-
@@ -54,6 +54,7 @@
 
 #ifndef _LOCORE
 #include <sys/types.h>
+#include <sys/simplelock.h>
 #endif
 
 /*
diff --git a/sys/sys/queue.h b/sys/sys/queue.h
index 962009c90d4..e617d3c4052 100644
--- a/sys/sys/queue.h
+++ b/sys/sys/queue.h
@@ -1,4 +1,4 @@
-/*	$OpenBSD: queue.h,v 1.4 1996/05/22 12:07:15 deraadt Exp $	*/
+/*	$OpenBSD: queue.h,v 1.5 1997/10/06 15:25:34 csapuntz Exp $	*/
 /*	$NetBSD: queue.h,v 1.11 1996/05/16 05:17:14 mycroft Exp $	*/
 
 /*
@@ -62,7 +62,7 @@
  * linked so that an arbitrary element can be removed without a need to
  * traverse the list. New elements can be added to the list before or
  * after an existing element, at the head of the list, or at the end of
- * the list. A tail queue may only be traversed in the forward direction.
+ * the list. A tail queue may be traversed in either direction.
  *
  * A circle queue is headed by a pair of pointers, one to the head of the
  * list and the other to the tail of the list. The elements are doubly
@@ -81,7 +81,7 @@
 #define LIST_HEAD(name, type)						\
 struct name {								\
 	struct type *lh_first;	/* first element */			\
-}
+} 
 
 #define LIST_ENTRY(type)						\
 struct {								\
@@ -89,41 +89,45 @@ struct {								\
 	struct type **le_prev;	/* address of previous next element */	\
 }
 
+#define	LIST_FIRST(head)		((head)->lh_first)
+#define	LIST_NEXT(elm, field)		((elm)->field.le_next)
+#define	LIST_END(head)			NULL
+
 /*
  * List functions.
  */
-#define	LIST_INIT(head) {						\
+#define	LIST_INIT(head) do {						\
 	(head)->lh_first = NULL;					\
-}
+} while (0)
 
-#define LIST_INSERT_AFTER(listelm, elm, field) {			\
+#define LIST_INSERT_AFTER(listelm, elm, field) do {			\
 	if (((elm)->field.le_next = (listelm)->field.le_next) != NULL)	\
 		(listelm)->field.le_next->field.le_prev =		\
 		    &(elm)->field.le_next;				\
 	(listelm)->field.le_next = (elm);				\
 	(elm)->field.le_prev = &(listelm)->field.le_next;		\
-}
+} while (0)
 
-#define	LIST_INSERT_BEFORE(listelm, elm, field) {			\
+#define	LIST_INSERT_BEFORE(listelm, elm, field) do {			\
 	(elm)->field.le_prev = (listelm)->field.le_prev;		\
 	(elm)->field.le_next = (listelm);				\
 	*(listelm)->field.le_prev = (elm);				\
 	(listelm)->field.le_prev = &(elm)->field.le_next;		\
-}
+} while (0)
 
-#define LIST_INSERT_HEAD(head, elm, field) {				\
+#define LIST_INSERT_HEAD(head, elm, field) do {				\
 	if (((elm)->field.le_next = (head)->lh_first) != NULL)		\
 		(head)->lh_first->field.le_prev = &(elm)->field.le_next;\
 	(head)->lh_first = (elm);					\
 	(elm)->field.le_prev = &(head)->lh_first;			\
-}
+} while (0)
 
-#define LIST_REMOVE(elm, field) {					\
+#define LIST_REMOVE(elm, field) do {					\
 	if ((elm)->field.le_next != NULL)				\
 		(elm)->field.le_next->field.le_prev = 			\
 		    (elm)->field.le_prev;				\
 	*(elm)->field.le_prev = (elm)->field.le_next;			\
-}
+} while (0)
 
 /*
  * Simple queue definitions.
@@ -142,33 +146,33 @@ struct {								\
 /*
  * Simple queue functions.
  */
-#define	SIMPLEQ_INIT(head) {						\
+#define	SIMPLEQ_INIT(head) do {						\
 	(head)->sqh_first = NULL;					\
 	(head)->sqh_last = &(head)->sqh_first;				\
-}
+} while (0)
 
-#define SIMPLEQ_INSERT_HEAD(head, elm, field) {				\
+#define SIMPLEQ_INSERT_HEAD(head, elm, field) do {				\
 	if (((elm)->field.sqe_next = (head)->sqh_first) == NULL)	\
 		(head)->sqh_last = &(elm)->field.sqe_next;		\
 	(head)->sqh_first = (elm);					\
-}
+} while (0)
 
-#define SIMPLEQ_INSERT_TAIL(head, elm, field) {				\
+#define SIMPLEQ_INSERT_TAIL(head, elm, field) do {				\
 	(elm)->field.sqe_next = NULL;					\
 	*(head)->sqh_last = (elm);					\
 	(head)->sqh_last = &(elm)->field.sqe_next;			\
-}
+} while (0)
 
-#define SIMPLEQ_INSERT_AFTER(head, listelm, elm, field) {		\
+#define SIMPLEQ_INSERT_AFTER(head, listelm, elm, field) do {		\
 	if (((elm)->field.sqe_next = (listelm)->field.sqe_next) == NULL)\
 		(head)->sqh_last = &(elm)->field.sqe_next;		\
 	(listelm)->field.sqe_next = (elm);				\
-}
+} while (0)
 
-#define SIMPLEQ_REMOVE_HEAD(head, elm, field) {				\
+#define SIMPLEQ_REMOVE_HEAD(head, elm, field) do {				\
 	if (((head)->sqh_first = (elm)->field.sqe_next) == NULL)	\
 		(head)->sqh_last = &(head)->sqh_first;			\
-}
+} while (0)
 
 /*
  * Tail queue definitions.
@@ -185,15 +189,24 @@ struct {								\
 	struct type **tqe_prev;	/* address of previous next element */	\
 }
 
+
+#define	TAILQ_FIRST(head)		((head)->tqh_first)
+#define	TAILQ_NEXT(elm, field)		((elm)->field.tqe_next)
+#define	TAILQ_END(head)			NULL
+#define TAILQ_LAST(head, headname) \
+	(*(((struct headname *)((head)->tqh_last))->tqh_last))
+#define TAILQ_PREV(elm, headname, field) \
+	(*(((struct headname *)((elm)->field.tqe_prev))->tqh_last))
+
 /*
  * Tail queue functions.
  */
-#define	TAILQ_INIT(head) {						\
+#define	TAILQ_INIT(head) do {						\
 	(head)->tqh_first = NULL;					\
 	(head)->tqh_last = &(head)->tqh_first;				\
-}
+} while (0)
 
-#define TAILQ_INSERT_HEAD(head, elm, field) {				\
+#define TAILQ_INSERT_HEAD(head, elm, field) do {				\
 	if (((elm)->field.tqe_next = (head)->tqh_first) != NULL)	\
 		(head)->tqh_first->field.tqe_prev =			\
 		    &(elm)->field.tqe_next;				\
@@ -201,16 +214,16 @@ struct {								\
 		(head)->tqh_last = &(elm)->field.tqe_next;		\
 	(head)->tqh_first = (elm);					\
 	(elm)->field.tqe_prev = &(head)->tqh_first;			\
-}
+} while (0)
 
-#define TAILQ_INSERT_TAIL(head, elm, field) {				\
+#define TAILQ_INSERT_TAIL(head, elm, field) do {				\
 	(elm)->field.tqe_next = NULL;					\
 	(elm)->field.tqe_prev = (head)->tqh_last;			\
 	*(head)->tqh_last = (elm);					\
 	(head)->tqh_last = &(elm)->field.tqe_next;			\
-}
+} while (0)
 
-#define TAILQ_INSERT_AFTER(head, listelm, elm, field) {			\
+#define TAILQ_INSERT_AFTER(head, listelm, elm, field) do {			\
 	if (((elm)->field.tqe_next = (listelm)->field.tqe_next) != NULL)\
 		(elm)->field.tqe_next->field.tqe_prev = 		\
 		    &(elm)->field.tqe_next;				\
@@ -218,23 +231,23 @@ struct {								\
 		(head)->tqh_last = &(elm)->field.tqe_next;		\
 	(listelm)->field.tqe_next = (elm);				\
 	(elm)->field.tqe_prev = &(listelm)->field.tqe_next;		\
-}
+} while (0)
 
-#define	TAILQ_INSERT_BEFORE(listelm, elm, field) {			\
+#define	TAILQ_INSERT_BEFORE(listelm, elm, field) do {			\
 	(elm)->field.tqe_prev = (listelm)->field.tqe_prev;		\
 	(elm)->field.tqe_next = (listelm);				\
 	*(listelm)->field.tqe_prev = (elm);				\
 	(listelm)->field.tqe_prev = &(elm)->field.tqe_next;		\
-}
+} while (0)
 
-#define TAILQ_REMOVE(head, elm, field) {				\
+#define TAILQ_REMOVE(head, elm, field) do {				\
 	if (((elm)->field.tqe_next) != NULL)				\
 		(elm)->field.tqe_next->field.tqe_prev = 		\
 		    (elm)->field.tqe_prev;				\
 	else								\
 		(head)->tqh_last = (elm)->field.tqe_prev;		\
 	*(elm)->field.tqe_prev = (elm)->field.tqe_next;			\
-}
+} while (0)
 
 /*
  * Circular queue definitions.
@@ -251,15 +264,21 @@ struct {								\
 	struct type *cqe_prev;		/* previous element */		\
 }
 
+#define	CIRCLEQ_FIRST(head)		((head)->cqh_first)
+#define	CIRCLEQ_LAST(head)		((head)->cqh_last)
+#define	CIRCLEQ_END(head)		((void *)(head))
+#define	CIRCLEQ_NEXT(elm, field)	((elm)->field.cqe_next)
+#define	CIRCLEQ_PREV(elm, field)	((elm)->field.cqe_prev)
+
 /*
  * Circular queue functions.
  */
-#define	CIRCLEQ_INIT(head) {						\
+#define	CIRCLEQ_INIT(head) do {						\
 	(head)->cqh_first = (void *)(head);				\
 	(head)->cqh_last = (void *)(head);				\
-}
+} while (0)
 
-#define CIRCLEQ_INSERT_AFTER(head, listelm, elm, field) {		\
+#define CIRCLEQ_INSERT_AFTER(head, listelm, elm, field) do {		\
 	(elm)->field.cqe_next = (listelm)->field.cqe_next;		\
 	(elm)->field.cqe_prev = (listelm);				\
 	if ((listelm)->field.cqe_next == (void *)(head))		\
@@ -267,9 +286,9 @@ struct {								\
 	else								\
 		(listelm)->field.cqe_next->field.cqe_prev = (elm);	\
 	(listelm)->field.cqe_next = (elm);				\
-}
+} while (0)
 
-#define CIRCLEQ_INSERT_BEFORE(head, listelm, elm, field) {		\
+#define CIRCLEQ_INSERT_BEFORE(head, listelm, elm, field) do {		\
 	(elm)->field.cqe_next = (listelm);				\
 	(elm)->field.cqe_prev = (listelm)->field.cqe_prev;		\
 	if ((listelm)->field.cqe_prev == (void *)(head))		\
@@ -277,9 +296,9 @@ struct {								\
 	else								\
 		(listelm)->field.cqe_prev->field.cqe_next = (elm);	\
 	(listelm)->field.cqe_prev = (elm);				\
-}
+} while (0)
 
-#define CIRCLEQ_INSERT_HEAD(head, elm, field) {				\
+#define CIRCLEQ_INSERT_HEAD(head, elm, field) do {				\
 	(elm)->field.cqe_next = (head)->cqh_first;			\
 	(elm)->field.cqe_prev = (void *)(head);				\
 	if ((head)->cqh_last == (void *)(head))				\
@@ -287,9 +306,9 @@ struct {								\
 	else								\
 		(head)->cqh_first->field.cqe_prev = (elm);		\
 	(head)->cqh_first = (elm);					\
-}
+} while (0)
 
-#define CIRCLEQ_INSERT_TAIL(head, elm, field) {				\
+#define CIRCLEQ_INSERT_TAIL(head, elm, field) do {				\
 	(elm)->field.cqe_next = (void *)(head);				\
 	(elm)->field.cqe_prev = (head)->cqh_last;			\
 	if ((head)->cqh_first == (void *)(head))			\
@@ -297,9 +316,9 @@ struct {								\
 	else								\
 		(head)->cqh_last->field.cqe_next = (elm);		\
 	(head)->cqh_last = (elm);					\
-}
+} while (0)
 
-#define	CIRCLEQ_REMOVE(head, elm, field) {				\
+#define	CIRCLEQ_REMOVE(head, elm, field) do {				\
 	if ((elm)->field.cqe_next == (void *)(head))			\
 		(head)->cqh_last = (elm)->field.cqe_prev;		\
 	else								\
@@ -310,5 +329,5 @@ struct {								\
 	else								\
 		(elm)->field.cqe_prev->field.cqe_next =			\
 		    (elm)->field.cqe_next;				\
-}
+} while (0)
 #endif	/* !_SYS_QUEUE_H_ */
diff --git a/sys/sys/simplelock.h b/sys/sys/simplelock.h
new file mode 100644
index 00000000000..c979f157b08
--- /dev/null
+++ b/sys/sys/simplelock.h
@@ -0,0 +1,86 @@
+#ifndef _SIMPLELOCK_H_
+#define _SIMPLELOCK_H_
+/*
+ * A simple spin lock.
+ *
+ * This structure only sets one bit of data, but is sized based on the
+ * minimum word size that can be operated on by the hardware test-and-set
+ * instruction. It is only needed for multiprocessors, as uniprocessors
+ * will always run to completion or a sleep. It is an error to hold one
+ * of these locks while a process is sleeping.
+ */
+struct simplelock {
+	int	lock_data;
+};
+
+#ifndef NCPUS
+#define NCPUS 1
+#endif
+
+#if NCPUS == 1
+
+#if !defined(DEBUG)
+#define	simple_lock(alp)
+#define	simple_lock_try(alp)	(1)	/* always succeeds */
+#define	simple_unlock(alp)
+
+static __inline void simple_lock_init __P((struct simplelock *));
+
+static __inline void
+simple_lock_init(lkp)
+	struct simplelock *lkp;
+{
+
+	lkp->lock_data = 0;
+}
+
+#else
+
+void _simple_unlock __P((__volatile struct simplelock *alp, const char *, int));
+#define simple_unlock(alp) _simple_unlock(alp, __FILE__, __LINE__)
+int _simple_lock_try __P((__volatile struct simplelock *alp, const char *, int));
+#define simple_lock_try(alp) _simple_lock_try(alp, __FILE__, __LINE__)
+void _simple_lock __P((__volatile struct simplelock *alp, const char *, int));
+#define simple_lock(alp) _simple_lock(alp, __FILE__, __LINE__)
+void simple_lock_init __P((struct simplelock *alp));
+
+#endif /* !defined(DEBUG) */
+
+#else  /* NCPUS >  1 */
+
+/*
+ * The simple-lock routines are the primitives out of which the lock
+ * package is built. The machine-dependent code must implement an
+ * atomic test_and_set operation that indivisibly sets the simple lock
+ * to non-zero and returns its old value. It also assumes that the
+ * setting of the lock to zero below is indivisible. Simple locks may
+ * only be used for exclusive locks.
+ */
+
+static __inline void
+simple_lock(lkp)
+	__volatile struct simplelock *lkp;
+{
+
+	while (test_and_set(&lkp->lock_data))
+		continue;
+}
+
+static __inline int
+simple_lock_try(lkp)
+	__volatile struct simplelock *lkp;
+{
+
+	return (!test_and_set(&lkp->lock_data))
+}
+
+static __inline void
+simple_unlock(lkp)
+	__volatile struct simplelock *lkp;
+{
+
+	lkp->lock_data = 0;
+}
+#endif /* NCPUS > 1 */
+
+#endif /* !_SIMPLELOCK_H_ */
diff --git a/sys/sys/sysctl.h b/sys/sys/sysctl.h
index 9eb21269f2c..3cc255b0b1c 100644
--- a/sys/sys/sysctl.h
+++ b/sys/sys/sysctl.h
@@ -1,4 +1,4 @@
-/*	$OpenBSD: sysctl.h,v 1.18 1997/09/08 17:28:18 kstailey Exp $	*/
+/*	$OpenBSD: sysctl.h,v 1.19 1997/10/06 15:25:35 csapuntz Exp $	*/
 /*	$NetBSD: sysctl.h,v 1.16 1996/04/09 20:55:36 cgd Exp $	*/
 
 /*
@@ -49,9 +49,10 @@
 #include <sys/time.h>
 #include <sys/ucred.h>
 #include <sys/proc.h>
-#include <vm/vm.h>
 #endif
 
+#include <vm/vm.h>
+
 /*
  * Definitions for sysctl call.  The sysctl call uses a hierarchical name
  * for objects that can be examined or modified.  The name is expressed as
@@ -93,7 +94,8 @@ struct ctlname {
 #define	CTL_MACHDEP	7		/* machine dependent */
 #define	CTL_USER	8		/* user-level */
 #define	CTL_DDB		9		/* DDB user interface, see ddb_var.h */
-#define	CTL_MAXID	10		/* number of valid top-level ids */
+#define CTL_VFS         10              /* VFS sysctl's */
+#define	CTL_MAXID	11		/* number of valid top-level ids */
 
 #define CTL_NAMES { \
 	{ 0, 0 }, \
@@ -106,6 +108,7 @@ struct ctlname {
 	{ "machdep", CTLTYPE_NODE }, \
 	{ "user", CTLTYPE_NODE }, \
 	{ "ddb", CTLTYPE_NODE }, \
+	{ "vfs", CTLTYPE_NODE }, \
 }
 
 /*
@@ -383,7 +386,7 @@ int sysctl_rtable __P((int *, u_int, void *, size_t *, void *, size_t));
 int sysctl_clockrate __P((char *, size_t *));
 int sysctl_rdstring __P((void *, size_t *, void *, char *));
 int sysctl_rdstruct __P((void *, size_t *, void *, void *, int));
-int sysctl_vnode __P((char *, size_t *));
+int sysctl_vnode __P((char *, size_t *, struct proc *));
 int sysctl_ntptime __P((char *, size_t *));
 #ifdef GPROF
 int sysctl_doprof __P((int *, u_int, void *, size_t *, void *, size_t));
@@ -409,6 +412,8 @@ int net_sysctl __P((int *, u_int, void *, size_t *, void *, size_t,
 		    struct proc *));
 int cpu_sysctl __P((int *, u_int, void *, size_t *, void *, size_t,
 		    struct proc *));
+int vfs_sysctl __P((int *, u_int, void *, size_t *, void *, size_t,
+		    struct proc *));
 #else	/* !_KERNEL */
 #include <sys/cdefs.h>
 
diff --git a/sys/sys/systm.h b/sys/sys/systm.h
index 757aa464ec2..1dbd1ed3c57 100644
--- a/sys/sys/systm.h
+++ b/sys/sys/systm.h
@@ -1,4 +1,4 @@
-/*	$OpenBSD: systm.h,v 1.20 1997/03/06 07:05:54 tholo Exp $	*/
+/*	$OpenBSD: systm.h,v 1.21 1997/10/06 15:25:35 csapuntz Exp $	*/
 /*	$NetBSD: systm.h,v 1.50 1996/06/09 04:55:09 briggs Exp $	*/
 
 /*-
@@ -128,7 +128,7 @@ int	enodev __P((void));
 int	enosys __P((void));
 int	enoioctl __P((void));
 int	enxio __P((void));
-int	eopnotsupp __P((void));
+int	eopnotsupp __P((void *));
 
 int	lkmenodev __P((void));
 
@@ -240,7 +240,7 @@ void	kmstartup __P((void));
 
 int nfs_mountroot __P((void));
 int dk_mountroot __P((void));
-int (*mountroot) __P((void));
+int (*mountroot)__P((void));
 
 #include <lib/libkern/libkern.h>
 
diff --git a/sys/sys/vnode.h b/sys/sys/vnode.h
index ed2fbcebca7..ebb93d38447 100644
--- a/sys/sys/vnode.h
+++ b/sys/sys/vnode.h
@@ -1,4 +1,4 @@
-/*	$OpenBSD: vnode.h,v 1.8 1996/07/14 08:54:05 downsj Exp $	*/
+/*	$OpenBSD: vnode.h,v 1.9 1997/10/06 15:25:36 csapuntz Exp $	*/
 /*	$NetBSD: vnode.h,v 1.38 1996/02/29 20:59:05 cgd Exp $	*/
 
 /*
@@ -37,6 +37,7 @@
  */
 
 #include <sys/queue.h>
+#include <sys/lock.h>
 
 /*
  * The vnode is the focus of all file activity in UNIX.  There is a
@@ -60,7 +61,7 @@ enum vtype	{ VNON, VREG, VDIR, VBLK, VCHR, VLNK, VSOCK, VFIFO, VBAD };
 enum vtagtype	{
 	VT_NON, VT_UFS, VT_NFS, VT_MFS, VT_MSDOSFS, VT_LFS, VT_LOFS, VT_FDESC,
 	VT_PORTAL, VT_NULL, VT_UMAP, VT_KERNFS, VT_PROCFS, VT_AFS, VT_ISOFS,
-	VT_UNION, VT_ADOSFS, VT_EXT2FS, VT_NCPFS
+	VT_UNION, VT_ADOSFS, VT_EXT2FS, VT_NCPFS, VT_VFS
 };
 
 /*
@@ -69,6 +70,14 @@ enum vtagtype	{
  */
 LIST_HEAD(buflists, buf);
 
+/*
+ * Reading or writing any of these items requires holding the appropriate lock.
+ * v_freelist is locked by the global vnode_free_list simple lock.
+ * v_mntvnodes is locked by the global mntvnodes simple lock.
+ * v_flag, v_usecount, v_holdcount and v_writecount are
+ *    locked by the v_interlock simple lock.
+ */
+
 struct vnode {
 	u_long	v_flag;				/* vnode flags (see below) */
 	short	v_usecount;			/* reference count of users */
@@ -83,6 +92,7 @@ struct vnode {
 	struct	buflists v_cleanblkhd;		/* clean blocklist head */
 	struct	buflists v_dirtyblkhd;		/* dirty blocklist head */
 	long	v_numoutput;			/* num of writes in progress */
+	LIST_ENTRY(vnode) v_synclist;           /* vnode with dirty buffers */
 	enum	vtype v_type;			/* vnode type */
 	union {
 		struct mount	*vu_mountedhere;/* ptr to mounted vfs (VDIR) */
@@ -98,7 +108,9 @@ struct vnode {
 	int	v_clen;				/* length of current cluster */
 	int	v_ralen;			/* Read-ahead length */
 	daddr_t	v_maxra;			/* last readahead block */
-	long	v_spare[7];			/* round to 128 bytes */
+	struct  simplelock v_interlock;        /* lock on usecount and flag */
+	struct  lock *v_vnlock;                 /* used for non-locking fs's */
+	long	v_spare[3];			/* round to 128 bytes */
 	enum	vtagtype v_tag;			/* type of underlying data */
 	void 	*v_data;			/* private data for fs */
 };
@@ -120,6 +132,7 @@ struct vnode {
 #define	VBWAIT		0x0400	/* waiting for output to complete */
 #define	VALIASED	0x0800	/* vnode has an alias */
 #define	VDIROP		0x1000	/* LFS: vnode is involved in a directory op */
+#define VGONEHACK       0x2000  /* vgone: don't put me on the head of the free list */
 
 /*
  * Vnode attributes.  A field value of VNOVAL represents a field whose value
@@ -151,7 +164,7 @@ struct vattr {
  * Flags for va_cflags.
  */
 #define	VA_UTIMES_NULL	0x01		/* utimes argument was NULL */
-
+#define VA_EXCLUSIVE    0x02            /* exclusive create request */
 /*
  * Flags for ioflag.
  */
@@ -197,6 +210,14 @@ extern int		vttoif_tab[];
 #define	V_SAVE		0x0001		/* vinvalbuf: sync file first */
 #define	V_SAVEMETA	0x0002		/* vinvalbuf: leave indirect blocks */
 
+#define REVOKEALL       0x0001          /* vop_reovke: revoke all aliases */
+
+
+TAILQ_HEAD(freelst, vnode);
+extern struct freelst vnode_hold_list;	/* free vnodes referencing buffers */
+extern struct freelst vnode_free_list;	/* vnode free list */
+extern struct simplelock vnode_free_list_slock;
+
 #ifdef DIAGNOSTIC
 #define	HOLDRELE(vp)	holdrele(vp)
 #define	VATTR_NULL(vap)	vattr_null(vap)
@@ -208,11 +229,47 @@ void	vattr_null __P((struct vattr *));
 void	vhold __P((struct vnode *));
 void	vref __P((struct vnode *));
 #else
-#define	HOLDRELE(vp)	(vp)->v_holdcnt--	/* decrease buf or page ref */
+#define	HOLDRELE(vp)	holdrele(vp); 	/* decrease buf or page ref */
 #define	VATTR_NULL(vap)	(*(vap) = va_null)	/* initialize a vattr */
-#define	VHOLD(vp)	(vp)->v_holdcnt++	/* increase buf or page ref */
-#define	VREF(vp)	(vp)->v_usecount++	/* increase reference */
-#endif
+
+static __inline holdrele(vp)
+	struct vnode *vp;
+{
+	simple_lock(&vp->v_interlock);
+	vp->v_holdcnt--;
+ 	if (!(vp->v_flag & VGONEHACK) &&
+ 	    vp->v_holdcnt == 0 && vp->v_usecount == 0) {
+ 		simple_lock(&vnode_free_list_slock);
+ 		TAILQ_REMOVE(&vnode_hold_list, vp, v_freelist);
+ 		TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
+ 		simple_unlock(&vnode_free_list_slock);
+ 	}
+	simple_unlock(&vp->v_interlock);
+}
+#define	VHOLD(vp)	vhold(vp)		/* increase buf or page ref */
+static __inline vhold(vp)
+	struct vnode *vp;
+{
+	simple_lock(&vp->v_interlock);
+ 	if (!(vp->v_flag & VGONEHACK) &&
+ 	    vp->v_holdcnt == 0 && vp->v_usecount == 0) {
+ 		simple_lock(&vnode_free_list_slock);
+ 		TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
+ 		TAILQ_INSERT_TAIL(&vnode_hold_list, vp, v_freelist);
+ 		simple_unlock(&vnode_free_list_slock);
+ 	}
+	vp->v_holdcnt++;
+	simple_unlock(&vp->v_interlock);
+}
+#define	VREF(vp)	vref(vp)		/* increase reference */
+static __inline vref(vp)
+	struct vnode *vp;
+{
+	simple_lock(&vp->v_interlock);
+	vp->v_usecount++;
+	simple_unlock(&vp->v_interlock);
+}
+#endif /* DIAGNOSTIC */
 
 #define	NULLVP	((struct vnode *)NULL)
 
@@ -220,6 +277,7 @@ void	vref __P((struct vnode *));
  * Global vnode data.
  */
 extern	struct vnode *rootvnode;	/* root (i.e. "/") vnode */
+extern	time_t syncdelay;		/* time to delay syncing vnodes */
 extern	int desiredvnodes;		/* number of vnodes desired */
 extern	struct vattr va_null;		/* predefined null vattr structure */
 
@@ -289,6 +347,11 @@ extern struct vnodeop_desc *vnodeop_descs[];
 
 
 /*
+ * Interlock for scanning list of vnodes attached to a mountpoint
+ */
+struct simplelock mntvnode_slock;
+
+/*
  * This macro is very helpful in defining those offsets in the vdesc struct.
  *
  * This is stolen from X11R4.  I ingored all the fancy stuff for
@@ -371,14 +434,15 @@ int	getvnode __P((struct filedesc *fdp, int fd, struct file **fpp));
 void	getnewfsid __P((struct mount *, int));
 void 	vattr_null __P((struct vattr *vap));
 int 	vcount __P((struct vnode *vp));
-void	vclean __P((struct vnode *, int));
+void	vclean __P((struct vnode *, int, struct proc *));
 int	vfinddev __P((dev_t, enum vtype, struct vnode **));
 void	vflushbuf __P((struct vnode *vp, int sync));
 int	vflush __P((struct mount *mp, struct vnode *vp, int flags));
 void	vntblinit __P((void));
 void	vwakeup __P((struct buf *));
-int 	vget __P((struct vnode *vp, int lockflag));
+int 	vget __P((struct vnode *vp, int lockflag, struct proc *p));
 void 	vgone __P((struct vnode *vp));
+void    vgonel __P((struct vnode *, struct proc *));
 void 	vgoneall __P((struct vnode *vp));
 int	vinvalbuf __P((struct vnode *vp, int save, struct ucred *cred,
 	    struct proc *p, int slpflag, int slptimeo));
@@ -391,14 +455,25 @@ int 	vn_closefile __P((struct file *fp, struct proc *p));
 int	vn_ioctl __P((struct file *fp, u_long com, caddr_t data,
 	    struct proc *p));
 int 	vn_open __P((struct nameidata *ndp, int fmode, int cmode));
+int	vrecycle __P((struct vnode *vp, struct simplelock *inter_lkp,
+	    struct proc *p));
 int 	vn_rdwr __P((enum uio_rw rw, struct vnode *vp, caddr_t base,
 	    int len, off_t offset, enum uio_seg segflg, int ioflg,
 	    struct ucred *cred, int *aresid, struct proc *p));
+int	vn_lock __P((struct vnode *vp, int flags, struct proc *p));
+int	vop_noislocked __P((void *));
+int	vop_nolock __P((void *));
+int	vop_nounlock __P((void *));
+int	vop_revoke __P((void *));
+
 int	vn_read __P((struct file *fp, struct uio *uio, struct ucred *cred));
 int	vn_select __P((struct file *fp, int which, struct proc *p));
 int	vn_stat __P((struct vnode *vp, struct stat *sb, struct proc *p));
 int	vn_write __P((struct file *fp, struct uio *uio, struct ucred *cred));
 int	vn_writechk __P((struct vnode *vp));
+void	vn_syncer_add_to_worklist __P((struct vnode *vp, int delay));
+void    sched_sync __P((struct proc *));
+
 struct vnode *
 	checkalias __P((struct vnode *vp, dev_t nvp_rdev, struct mount *mp));
 void 	vput __P((struct vnode *vp));
diff --git a/sys/sys/vnode_if.h b/sys/sys/vnode_if.h
index abf129f1126..43b56b5dc76 100644
--- a/sys/sys/vnode_if.h
+++ b/sys/sys/vnode_if.h
@@ -291,6 +291,31 @@ static __inline int VOP_WRITE(vp, uio, ioflag, cred)
 	return (VCALL(vp, VOFFSET(vop_write), &a));
 }
 
+struct vop_lease_args {
+	struct vnodeop_desc *a_desc;
+	struct vnode *a_vp;
+	struct proc *a_p;
+	struct ucred *a_cred;
+	int a_flag;
+};
+extern struct vnodeop_desc vop_lease_desc;
+static __inline int VOP_LEASE __P((struct vnode *, struct proc *, 
+    struct ucred *, int));
+static __inline int VOP_LEASE(vp, p, cred, flag)
+	struct vnode *vp;
+	struct proc *p;
+	struct ucred *cred;
+	int flag;
+{
+	struct vop_lease_args a;
+	a.a_desc = VDESC(vop_lease);
+	a.a_vp = vp;
+	a.a_p = p;
+	a.a_cred = cred;
+	a.a_flag = flag;
+	return (VCALL(vp, VOFFSET(vop_lease), &a));
+}
+
 struct vop_ioctl_args {
 	struct vnodeop_desc *a_desc;
 	struct vnode *a_vp;
@@ -350,6 +375,24 @@ static __inline int VOP_SELECT(vp, which, fflags, cred, p)
 	return (VCALL(vp, VOFFSET(vop_select), &a));
 }
 
+struct vop_revoke_args {
+	struct vnodeop_desc *a_desc;
+	struct vnode *a_vp;
+	int a_flags;
+};
+extern struct vnodeop_desc vop_revoke_desc;
+static __inline int VOP_REVOKE __P((struct vnode *, int));
+static __inline int VOP_REVOKE(vp, flags)
+	struct vnode *vp;
+	int flags;
+{
+	struct vop_revoke_args a;
+	a.a_desc = VDESC(vop_revoke);
+	a.a_vp = vp;
+	a.a_flags = flags;
+	return (VCALL(vp, VOFFSET(vop_revoke), &a));
+}
+
 struct vop_mmap_args {
 	struct vnodeop_desc *a_desc;
 	struct vnode *a_vp;
@@ -582,19 +625,19 @@ struct vop_readdir_args {
 	struct uio *a_uio;
 	struct ucred *a_cred;
 	int *a_eofflag;
-	u_long *a_cookies;
-	int a_ncookies;
+	int *a_ncookies;
+	u_long **a_cookies;
 };
 extern struct vnodeop_desc vop_readdir_desc;
 static __inline int VOP_READDIR __P((struct vnode *, struct uio *, 
-    struct ucred *, int *, u_long *, int));
-static __inline int VOP_READDIR(vp, uio, cred, eofflag, cookies, ncookies)
+    struct ucred *, int *, int *, u_long **));
+static __inline int VOP_READDIR(vp, uio, cred, eofflag, ncookies, cookies)
 	struct vnode *vp;
 	struct uio *uio;
 	struct ucred *cred;
 	int *eofflag;
-	u_long *cookies;
-	int ncookies;
+	int *ncookies;
+	u_long **cookies;
 {
 	struct vop_readdir_args a;
 	a.a_desc = VDESC(vop_readdir);
@@ -602,8 +645,8 @@ static __inline int VOP_READDIR(vp, uio, cred, eofflag, cookies, ncookies)
 	a.a_uio = uio;
 	a.a_cred = cred;
 	a.a_eofflag = eofflag;
-	a.a_cookies = cookies;
 	a.a_ncookies = ncookies;
+	a.a_cookies = cookies;
 	return (VCALL(vp, VOFFSET(vop_readdir), &a));
 }
 
@@ -650,60 +693,78 @@ static __inline int VOP_ABORTOP(dvp, cnp)
 struct vop_inactive_args {
 	struct vnodeop_desc *a_desc;
 	struct vnode *a_vp;
+	struct proc *a_p;
 };
 extern struct vnodeop_desc vop_inactive_desc;
-static __inline int VOP_INACTIVE __P((struct vnode *));
-static __inline int VOP_INACTIVE(vp)
+static __inline int VOP_INACTIVE __P((struct vnode *, struct proc *));
+static __inline int VOP_INACTIVE(vp, p)
 	struct vnode *vp;
+	struct proc *p;
 {
 	struct vop_inactive_args a;
 	a.a_desc = VDESC(vop_inactive);
 	a.a_vp = vp;
+	a.a_p = p;
 	return (VCALL(vp, VOFFSET(vop_inactive), &a));
 }
 
 struct vop_reclaim_args {
 	struct vnodeop_desc *a_desc;
 	struct vnode *a_vp;
+	struct proc *a_p;
 };
 extern struct vnodeop_desc vop_reclaim_desc;
-static __inline int VOP_RECLAIM __P((struct vnode *));
-static __inline int VOP_RECLAIM(vp)
+static __inline int VOP_RECLAIM __P((struct vnode *, struct proc *));
+static __inline int VOP_RECLAIM(vp, p)
 	struct vnode *vp;
+	struct proc *p;
 {
 	struct vop_reclaim_args a;
 	a.a_desc = VDESC(vop_reclaim);
 	a.a_vp = vp;
+	a.a_p = p;
 	return (VCALL(vp, VOFFSET(vop_reclaim), &a));
 }
 
 struct vop_lock_args {
 	struct vnodeop_desc *a_desc;
 	struct vnode *a_vp;
+	int a_flags;
+	struct proc *a_p;
 };
 extern struct vnodeop_desc vop_lock_desc;
-static __inline int VOP_LOCK __P((struct vnode *));
-static __inline int VOP_LOCK(vp)
+static __inline int VOP_LOCK __P((struct vnode *, int, struct proc *));
+static __inline int VOP_LOCK(vp, flags, p)
 	struct vnode *vp;
+	int flags;
+	struct proc *p;
 {
 	struct vop_lock_args a;
 	a.a_desc = VDESC(vop_lock);
 	a.a_vp = vp;
+	a.a_flags = flags;
+	a.a_p = p;
 	return (VCALL(vp, VOFFSET(vop_lock), &a));
 }
 
 struct vop_unlock_args {
 	struct vnodeop_desc *a_desc;
 	struct vnode *a_vp;
+	int a_flags;
+	struct proc *a_p;
 };
 extern struct vnodeop_desc vop_unlock_desc;
-static __inline int VOP_UNLOCK __P((struct vnode *));
-static __inline int VOP_UNLOCK(vp)
+static __inline int VOP_UNLOCK __P((struct vnode *, int, struct proc *));
+static __inline int VOP_UNLOCK(vp, flags, p)
 	struct vnode *vp;
+	int flags;
+	struct proc *p;
 {
 	struct vop_unlock_args a;
 	a.a_desc = VDESC(vop_unlock);
 	a.a_vp = vp;
+	a.a_flags = flags;
+	a.a_p = p;
 	return (VCALL(vp, VOFFSET(vop_unlock), &a));
 }
 
@@ -864,6 +925,37 @@ static __inline int VOP_VALLOC(pvp, mode, cred, vpp)
 	return (VCALL(pvp, VOFFSET(vop_valloc), &a));
 }
 
+struct vop_balloc_args {
+	struct vnodeop_desc *a_desc;
+	struct vnode *a_vp;
+	off_t a_startoffset;
+	int a_size;
+	struct ucred *a_cred;
+	int a_flags;
+	struct buf **a_bpp;
+};
+extern struct vnodeop_desc vop_balloc_desc;
+static __inline int VOP_BALLOC __P((struct vnode *, off_t, int, 
+    struct ucred *, int, struct buf **));
+static __inline int VOP_BALLOC(vp, startoffset, size, cred, flags, bpp)
+	struct vnode *vp;
+	off_t startoffset;
+	int size;
+	struct ucred *cred;
+	int flags;
+	struct buf **bpp;
+{
+	struct vop_balloc_args a;
+	a.a_desc = VDESC(vop_balloc);
+	a.a_vp = vp;
+	a.a_startoffset = startoffset;
+	a.a_size = size;
+	a.a_cred = cred;
+	a.a_flags = flags;
+	a.a_bpp = bpp;
+	return (VCALL(vp, VOFFSET(vop_balloc), &a));
+}
+
 struct vop_reallocblks_args {
 	struct vnodeop_desc *a_desc;
 	struct vnode *a_vp;
@@ -957,31 +1049,6 @@ static __inline int VOP_UPDATE(vp, access, modify, waitfor)
 	return (VCALL(vp, VOFFSET(vop_update), &a));
 }
 
-struct vop_lease_args {
-	struct vnodeop_desc *a_desc;
-	struct vnode *a_vp;
-	struct proc *a_p;
-	struct ucred *a_cred;
-	int a_flag;
-};
-extern struct vnodeop_desc vop_lease_desc;
-static __inline int VOP_LEASE __P((struct vnode *, struct proc *, 
-    struct ucred *, int));
-static __inline int VOP_LEASE(vp, p, cred, flag)
-	struct vnode *vp;
-	struct proc *p;
-	struct ucred *cred;
-	int flag;
-{
-	struct vop_lease_args a;
-	a.a_desc = VDESC(vop_lease);
-	a.a_vp = vp;
-	a.a_p = p;
-	a.a_cred = cred;
-	a.a_flag = flag;
-	return (VCALL(vp, VOFFSET(vop_lease), &a));
-}
-
 struct vop_whiteout_args {
 	struct vnodeop_desc *a_desc;
 	struct vnode *a_dvp;
diff --git a/sys/ufs/ffs/ffs_alloc.c b/sys/ufs/ffs/ffs_alloc.c
index e25073d6715..e4bf9e3d285 100644
--- a/sys/ufs/ffs/ffs_alloc.c
+++ b/sys/ufs/ffs/ffs_alloc.c
@@ -1,4 +1,4 @@
-/*	$OpenBSD: ffs_alloc.c,v 1.7 1997/07/22 10:31:50 deraadt Exp $	*/
+/*	$OpenBSD: ffs_alloc.c,v 1.8 1997/10/06 15:26:28 csapuntz Exp $	*/
 /*	$NetBSD: ffs_alloc.c,v 1.11 1996/05/11 18:27:09 mycroft Exp $	*/
 
 /*
@@ -59,7 +59,7 @@
 extern u_long nextgennumber;
 
 static daddr_t	ffs_alloccg __P((struct inode *, int, daddr_t, int));
-static daddr_t	ffs_alloccgblk __P((struct fs *, struct cg *, daddr_t));
+static daddr_t	ffs_alloccgblk __P((struct inode *, struct buf *, daddr_t));
 static daddr_t	ffs_clusteralloc __P((struct inode *, int, daddr_t, int));
 static ino_t	ffs_dirpref __P((struct fs *));
 static daddr_t	ffs_fragextend __P((struct inode *, int, long, int, int));
@@ -70,6 +70,11 @@ static u_long	ffs_hashalloc __P((struct inode *, int, long, int,
 static daddr_t	ffs_nodealloccg __P((struct inode *, int, daddr_t, int));
 static daddr_t	ffs_mapsearch __P((struct fs *, struct cg *, daddr_t, int));
 
+#ifdef DIAGNOSTIC
+static int      ffs_checkblk __P((struct inode *, daddr_t, long));
+#endif
+int ffs_freefile __P((struct vop_vfree_args *));
+
 /*
  * Allocate a block in the file system.
  * 
@@ -272,7 +277,8 @@ ffs_realloccg(ip, lbprev, bpref, osize, nsize, cred, bpp)
 	if (bno > 0) {
 		bp->b_blkno = fsbtodb(fs, bno);
 		(void) vnode_pager_uncache(ITOV(ip));
-		ffs_blkfree(ip, bprev, (long)osize);
+		if (!DOINGSOFTDEP(ITOV(ip)))
+			ffs_blkfree(ip, bprev, (long)osize);
 		if (nsize < request)
 			ffs_blkfree(ip, bno + numfrags(fs, nsize),
 			    (long)(request - nsize));
@@ -314,15 +320,10 @@ nospace:
  * Note that the error return is not reflected back to the user. Rather
  * the previous block allocation will be used.
  */
-#ifdef DEBUG
-#include <sys/sysctl.h>
+
 int doasyncfree = 1;
-struct ctldebug debug14 = { "doasyncfree", &doasyncfree };
+int doreallocblks = 1;
 int prtrealloc = 0;
-struct ctldebug debug15 = { "prtrealloc", &prtrealloc };
-#else
-#define doasyncfree 1
-#endif
 
 int
 ffs_reallocblks(v)
@@ -343,6 +344,9 @@ ffs_reallocblks(v)
 	int i, len, start_lvl, end_lvl, pref, ssize;
 	struct timespec ts;
 
+	if (doreallocblks == 0)
+		return (ENOSPC);
+
 	vp = ap->a_vp;
 	ip = VTOI(vp);
 	fs = ip->i_fs;
@@ -352,10 +356,22 @@ ffs_reallocblks(v)
 	len = buflist->bs_nchildren;
 	start_lbn = buflist->bs_children[0]->b_lblkno;
 	end_lbn = start_lbn + len - 1;
+
 #ifdef DIAGNOSTIC
+	for (i = 0; i < len; i++)
+		if (!ffs_checkblk(ip,
+		   dbtofsb(fs, buflist->bs_children[i]->b_blkno), fs->fs_bsize))
+			panic("ffs_reallocblks: unallocated block 1");
+		
 	for (i = 1; i < len; i++)
 		if (buflist->bs_children[i]->b_lblkno != start_lbn + i)
-			panic("ffs_reallocblks: non-cluster");
+			panic("ffs_reallocblks: non-logical cluster");
+
+	blkno = buflist->bs_children[0]->b_blkno;
+	ssize = fsbtodb(fs, fs->fs_frag);
+	for (i = 1; i < len - 1; i++)
+		if (buflist->bs_children[i]->b_blkno != blkno + (i * ssize))
+			panic("ffs_reallocblks: non-physical cluster %d", i);
 #endif
 	/*
 	 * If the latest allocation is in a new cylinder group, assume that
@@ -422,9 +438,14 @@ ffs_reallocblks(v)
 #endif
 	blkno = newblk;
 	for (bap = &sbap[soff], i = 0; i < len; i++, blkno += fs->fs_frag) {
-		if (i == ssize)
+		if (i == ssize) {
 			bap = ebap;
+			soff = -i;
+		}
 #ifdef DIAGNOSTIC
+		if (!ffs_checkblk(ip,
+		   dbtofsb(fs, buflist->bs_children[i]->b_blkno), fs->fs_bsize))
+			panic("ffs_reallocblks: unallocated block 2");
 		if (dbtofsb(fs, buflist->bs_children[i]->b_blkno) != *bap)
 			panic("ffs_reallocblks: alloc mismatch");
 #endif
@@ -432,6 +453,17 @@ ffs_reallocblks(v)
 		if (prtrealloc)
 			printf(" %d,", *bap);
 #endif
+		if (DOINGSOFTDEP(vp)) {
+			if (sbap == &ip->i_ffs_db[0] && i < ssize)
+				softdep_setup_allocdirect(ip, start_lbn + i,
+				   blkno, *bap, fs->fs_bsize, fs->fs_bsize,
+                                   buflist->bs_children[i]);
+			else
+				softdep_setup_allocindir_page(ip, start_lbn + i,
+                                   i < ssize ? sbp : ebp, soff + i, blkno,
+                                   *bap, buflist->bs_children[i]);
+		}
+
 		*bap++ = blkno;
 	}
 	/*
@@ -473,10 +505,15 @@ ffs_reallocblks(v)
 		printf("\n\tnew:");
 #endif
 	for (blkno = newblk, i = 0; i < len; i++, blkno += fs->fs_frag) {
-		ffs_blkfree(ip, dbtofsb(fs, buflist->bs_children[i]->b_blkno),
-		    fs->fs_bsize);
+		if (!DOINGSOFTDEP(vp))
+			ffs_blkfree(ip, 
+			    dbtofsb(fs, buflist->bs_children[i]->b_blkno),
+		            fs->fs_bsize);
 		buflist->bs_children[i]->b_blkno = fsbtodb(fs, blkno);
 #ifdef DEBUG
+		if (!ffs_checkblk(ip,
+		   dbtofsb(fs, buflist->bs_children[i]->b_blkno), fs->fs_bsize))
+			panic("ffs_reallocblks: unallocated block 3");
 		if (prtrealloc)
 			printf(" %d,", blkno);
 #endif
@@ -815,6 +852,9 @@ ffs_fragextend(ip, cg, bprev, osize, nsize)
 		fs->fs_cs(fs, cg).cs_nffree--;
 	}
 	fs->fs_fmod = 1;
+	if (DOINGSOFTDEP(ITOV(ip)))
+		softdep_setup_blkmapdep(bp, fs, bprev);
+
 	bdwrite(bp);
 	return (bprev);
 }
@@ -835,8 +875,8 @@ ffs_alloccg(ip, cg, bpref, size)
 	register struct fs *fs;
 	register struct cg *cgp;
 	struct buf *bp;
-	register int i;
-	int error, bno, frags, allocsiz;
+	daddr_t bno, blkno;
+	int error, i, frags, allocsiz;
 
 	fs = ip->i_fs;
 	if (fs->fs_cs(fs, cg).cs_nbfree == 0 && size == fs->fs_bsize)
@@ -855,7 +895,7 @@ ffs_alloccg(ip, cg, bpref, size)
 	}
 	cgp->cg_time = time.tv_sec;
 	if (size == fs->fs_bsize) {
-		bno = ffs_alloccgblk(fs, cgp, bpref);
+		bno = ffs_alloccgblk(ip, bp, bpref);
 		bdwrite(bp);
 		return (bno);
 	}
@@ -877,7 +917,7 @@ ffs_alloccg(ip, cg, bpref, size)
 			brelse(bp);
 			return (NULL);
 		}
-		bno = ffs_alloccgblk(fs, cgp, bpref);
+		bno = ffs_alloccgblk(ip, bp, bpref);
 		bpref = dtogd(fs, bno);
 		for (i = frags; i < fs->fs_frag; i++)
 			setbit(cg_blksfree(cgp), bpref + i);
@@ -904,8 +944,12 @@ ffs_alloccg(ip, cg, bpref, size)
 	cgp->cg_frsum[allocsiz]--;
 	if (frags != allocsiz)
 		cgp->cg_frsum[allocsiz - frags]++;
-	bdwrite(bp);
-	return (cg * fs->fs_fpg + bno);
+
+	blkno = cg * fs->fs_fpg + bno;
+	if (DOINGSOFTDEP(ITOV(ip)))
+		softdep_setup_blkmapdep(bp, fs, blkno);
+        bdwrite(bp);
+	return ((u_long)blkno);
 }
 
 /*
@@ -920,16 +964,20 @@ ffs_alloccg(ip, cg, bpref, size)
  * blocks may be fragmented by the routine that allocates them.
  */
 static daddr_t
-ffs_alloccgblk(fs, cgp, bpref)
-	register struct fs *fs;
-	register struct cg *cgp;
+ffs_alloccgblk(ip, bp, bpref)
+	struct inode *ip;
+	struct buf *bp;
 	daddr_t bpref;
 {
+	struct fs *fs;
+	struct cg *cgp;
 	daddr_t bno, blkno;
 	int cylno, pos, delta;
 	short *cylbp;
 	register int i;
 
+	fs = ip->i_fs;
+	cgp = (struct cg *)bp->b_data;
 	if (bpref == 0 || dtog(fs, bpref) != cgp->cg_cgx) {
 		bpref = cgp->cg_rotor;
 		goto norot;
@@ -1020,7 +1068,10 @@ gotit:
 	cg_blks(fs, cgp, cylno)[cbtorpos(fs, bno)]--;
 	cg_blktot(cgp)[cylno]--;
 	fs->fs_fmod = 1;
-	return (cgp->cg_cgx * fs->fs_fpg + bno);
+        blkno = cgp->cg_cgx * fs->fs_fpg + bno;
+        if (DOINGSOFTDEP(ITOV(ip)))
+                softdep_setup_blkmapdep(bp, fs, blkno);
+        return (blkno);
 }
 
 /*
@@ -1040,7 +1091,7 @@ ffs_clusteralloc(ip, cg, bpref, len)
 	register struct fs *fs;
 	register struct cg *cgp;
 	struct buf *bp;
-	int i, run, bno, bit, map;
+	int i, got, run, bno, bit, map;
 	u_char *mapp;
 	int32_t *lp;
 
@@ -1094,7 +1145,7 @@ ffs_clusteralloc(ip, cg, bpref, len)
 	mapp = &cg_clustersfree(cgp)[bpref / NBBY];
 	map = *mapp++;
 	bit = 1 << (bpref % NBBY);
-	for (run = 0, i = bpref; i < cgp->cg_nclusterblks; i++) {
+	for (run = 0, got = bpref; got < cgp->cg_nclusterblks; got++) {
 		if ((map & bit) == 0) {
 			run = 0;
 		} else {
@@ -1102,22 +1153,32 @@ ffs_clusteralloc(ip, cg, bpref, len)
 			if (run == len)
 				break;
 		}
-		if ((i & (NBBY - 1)) != (NBBY - 1)) {
+		if ((got & (NBBY - 1)) != (NBBY - 1)) {
 			bit <<= 1;
 		} else {
 			map = *mapp++;
 			bit = 1;
 		}
 	}
-	if (i >= cgp->cg_nclusterblks)
+	if (got >= cgp->cg_nclusterblks)
 		goto fail;
 	/*
 	 * Allocate the cluster that we have found.
 	 */
-	bno = cg * fs->fs_fpg + blkstofrags(fs, i - run + 1);
+#ifdef DIAGNOSTIC
+	for (i = 1; i <= len; i++)
+		if (!ffs_isblock(fs, cg_blksfree(cgp), got - run + i))
+			panic("ffs_clusteralloc: map mismatch");
+#endif
+	bno = cg * fs->fs_fpg + blkstofrags(fs, got - run + 1);
+#ifdef DIAGNOSTIC
+	if (dtog(fs, bno) != cg)
+		panic("ffs_clusteralloc: allocated out of group");
+#endif
+
 	len = blkstofrags(fs, len);
 	for (i = 0; i < len; i += fs->fs_frag)
-		if (ffs_alloccgblk(fs, cgp, bno + i) != bno + i)
+		if (ffs_alloccgblk(ip, bp, bno + i) != bno + i)
 			panic("ffs_clusteralloc: lost block");
 	bdwrite(bp);
 	return (bno);
@@ -1195,6 +1256,9 @@ ffs_nodealloccg(ip, cg, ipref, mode)
 	panic("ffs_nodealloccg: block not in map");
 	/* NOTREACHED */
 gotit:
+        if (DOINGSOFTDEP(ITOV(ip)))
+		softdep_setup_inomapdep(bp, ip, cg * fs->fs_ipg + ipref);
+
 	setbit(cg_inosused(cgp), ipref);
 	cgp->cg_cs.cs_nifree--;
 	fs->fs_cstotal.cs_nifree--;
@@ -1229,7 +1293,8 @@ ffs_blkfree(ip, bno, size)
 	int i, error, cg, blk, frags, bbase;
 
 	fs = ip->i_fs;
-	if ((u_int)size > fs->fs_bsize || fragoff(fs, size) != 0) {
+	if ((u_int)size > fs->fs_bsize || fragoff(fs, size) != 0 ||
+	    fragnum(fs, bno) + numfrags(fs, size) > fs->fs_frag) {
 		printf("dev = 0x%x, bsize = %d, size = %ld, fs = %s\n",
 		    ip->i_dev, fs->fs_bsize, size, fs->fs_fsmnt);
 		panic("blkfree: bad size");
@@ -1255,7 +1320,7 @@ ffs_blkfree(ip, bno, size)
 	bno = dtogd(fs, bno);
 	if (size == fs->fs_bsize) {
 		blkno = fragstoblks(fs, bno);
-		if (ffs_isblock(fs, cg_blksfree(cgp), blkno)) {
+		if (!ffs_isfreeblock(fs, cg_blksfree(cgp), blkno)) {
 			printf("dev = 0x%x, block = %d, fs = %s\n",
 			    ip->i_dev, bno, fs->fs_fsmnt);
 			panic("blkfree: freeing free block");
@@ -1318,8 +1383,6 @@ ffs_blkfree(ip, bno, size)
 
 /*
  * Free an inode.
- *
- * The specified inode is placed back in the free map.
  */
 int
 ffs_vfree(v)
@@ -1330,6 +1393,28 @@ ffs_vfree(v)
 		ino_t a_ino;
 		int a_mode;
 	} */ *ap = v;
+
+
+	if (DOINGSOFTDEP(ap->a_pvp)) {
+		softdep_freefile(ap);
+		return (0);
+	}
+
+	return (ffs_freefile(ap));
+}
+
+/*
+ * Do the actual free operation.
+ * The specified inode is placed back in the free map.
+ */
+int
+ffs_freefile(ap)
+      struct vop_vfree_args /* {
+              struct vnode *a_pvp;
+              ino_t a_ino;
+              int a_mode;
+      } */ *ap;
+{
 	register struct fs *fs;
 	register struct cg *cgp;
 	register struct inode *pip;
@@ -1347,7 +1432,7 @@ ffs_vfree(v)
 		(int)fs->fs_cgsize, NOCRED, &bp);
 	if (error) {
 		brelse(bp);
-		return (0);
+		return (error);
 	}
 	cgp = (struct cg *)bp->b_data;
 	if (!cg_chkmagic(cgp)) {
@@ -1378,6 +1463,60 @@ ffs_vfree(v)
 	return (0);
 }
 
+#ifdef DIAGNOSTIC
+/*
+ * Verify allocation of a block or fragment. Returns true if block or
+ * fragment is allocated, false if it is free.
+ */
+int
+ffs_checkblk(ip, bno, size)
+	struct inode *ip;
+	daddr_t bno;
+	long size;
+{
+	struct fs *fs;
+	struct cg *cgp;
+	struct buf *bp;
+	int i, error, frags, free;
+
+	fs = ip->i_fs;
+	if ((u_int)size > fs->fs_bsize || fragoff(fs, size) != 0) {
+		printf("bsize = %d, size = %d, fs = %s\n",
+		    fs->fs_bsize, size, fs->fs_fsmnt);
+		panic("checkblk: bad size");
+	}
+	if ((u_int)bno >= fs->fs_size)
+		panic("checkblk: bad block %d", bno);
+	error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, dtog(fs, bno))),
+		(int)fs->fs_cgsize, NOCRED, &bp);
+	if (error) {
+		/* XXX -probably should pannic here */
+		brelse(bp);
+		return (-1);
+	}
+	cgp = (struct cg *)bp->b_data;
+	if (!cg_chkmagic(cgp)) {
+		/* XXX -probably should pannic here */
+		brelse(bp);
+		return (-1);
+	}
+	bno = dtogd(fs, bno);
+	if (size == fs->fs_bsize) {
+		free = ffs_isblock(fs, cg_blksfree(cgp), fragstoblks(fs, bno));
+	} else {
+		frags = numfrags(fs, size);
+		for (free = 0, i = 0; i < frags; i++)
+			if (isset(cg_blksfree(cgp), bno + i))
+				free++;
+		if (free != 0 && free != frags)
+			panic("checkblk: partially free fragment");
+	}
+	brelse(bp);
+	return (!free);
+}
+#endif /* DIAGNOSTIC */
+
+
 /*
  * Find a block of the specified size in the specified cylinder group.
  *
@@ -1550,3 +1689,4 @@ ffs_fserr(fs, uid, cp)
 
 	log(LOG_ERR, "uid %d on %s: %s\n", uid, fs->fs_fsmnt, cp);
 }
+
diff --git a/sys/ufs/ffs/ffs_balloc.c b/sys/ufs/ffs/ffs_balloc.c
index 5a7dc3afcc2..285ca5f2ca7 100644
--- a/sys/ufs/ffs/ffs_balloc.c
+++ b/sys/ufs/ffs/ffs_balloc.c
@@ -1,4 +1,4 @@
-/*	$OpenBSD: ffs_balloc.c,v 1.3 1997/05/30 08:34:19 downsj Exp $	*/
+/*	$OpenBSD: ffs_balloc.c,v 1.4 1997/10/06 15:26:29 csapuntz Exp $	*/
 /*	$NetBSD: ffs_balloc.c,v 1.3 1996/02/09 22:22:21 christos Exp $	*/
 
 /*
@@ -41,6 +41,7 @@
 #include <sys/buf.h>
 #include <sys/proc.h>
 #include <sys/file.h>
+#include <sys/mount.h>
 #include <sys/vnode.h>
 
 #include <vm/vm.h>
@@ -58,27 +59,44 @@
  * the inode and the logical block number in a file.
  */
 int
-ffs_balloc(ip, bn, size, cred, bpp, flags)
-	register struct inode *ip;
-	register daddr_t bn;
+ffs_balloc(v)
+	void *v;
+{
+	struct vop_balloc_args /* {
+		struct vnode *a_vp;
+		off_t a_startpoint;
+		int a_size;
+		struct ucred *a_cred;
+		int a_flags;
+		struct buf *a_bpp;
+        } */ *ap = v;
+
+	struct inode *ip;
+	daddr_t lbn;
 	int size;
 	struct ucred *cred;
-	struct buf **bpp;
 	int flags;
-{
-	register struct fs *fs;
-	register daddr_t nb;
+	struct fs *fs;
+	daddr_t nb;
 	struct buf *bp, *nbp;
-	struct vnode *vp = ITOV(ip);
+	struct vnode *vp;
 	struct indir indirs[NIADDR + 2];
-	daddr_t newb, lbn, *bap, pref;
-	int osize, nsize, num, i, error;
+	daddr_t newb, *bap, pref;
+	int deallocated, osize, nsize, num, i, error;
+	daddr_t *allocib, *blkp, *allocblk, allociblk[NIADDR+1];
 
-	*bpp = NULL;
-	if (bn < 0)
-		return (EFBIG);
+	vp = ap->a_vp;
+	ip = VTOI(vp);
 	fs = ip->i_fs;
-	lbn = bn;
+	lbn = lblkno(fs, ap->a_startoffset);
+	size = blkoff(fs, ap->a_startoffset) + ap->a_size;
+	if (size > fs->fs_bsize)
+		panic("ffs_balloc; blk too big");
+	*ap->a_bpp = NULL;
+	if (lbn < 0)
+		return (EFBIG);
+	cred = ap->a_cred;
+	flags = ap->a_flags;
 
 	/*
 	 * If the next write will extend the file into a new block,
@@ -86,7 +104,7 @@ ffs_balloc(ip, bn, size, cred, bpp, flags)
 	 * this fragment has to be extended to be a full block.
 	 */
 	nb = lblkno(fs, ip->i_ffs_size);
-	if (nb < NDADDR && nb < bn) {
+	if (nb < NDADDR && nb < lbn) {
 		osize = blksize(fs, ip, nb);
 		if (osize < fs->fs_bsize && osize > 0) {
 			error = ffs_realloccg(ip, nb,
@@ -94,6 +112,11 @@ ffs_balloc(ip, bn, size, cred, bpp, flags)
 				osize, (int)fs->fs_bsize, cred, &bp);
 			if (error)
 				return (error);
+			if (DOINGSOFTDEP(vp))
+				softdep_setup_allocdirect(ip, nb,
+				    dbtofsb(fs, bp->b_blkno), ip->i_ffs_db[nb],
+				    fs->fs_bsize, osize, bp);
+
 			ip->i_ffs_size = (nb + 1) * fs->fs_bsize;
 			vnode_pager_setsize(vp, (u_long)ip->i_ffs_size);
 			ip->i_ffs_db[nb] = dbtofsb(fs, bp->b_blkno);
@@ -107,15 +130,15 @@ ffs_balloc(ip, bn, size, cred, bpp, flags)
 	/*
 	 * The first NDADDR blocks are direct blocks
 	 */
-	if (bn < NDADDR) {
-		nb = ip->i_ffs_db[bn];
-		if (nb != 0 && ip->i_ffs_size >= (bn + 1) * fs->fs_bsize) {
-			error = bread(vp, bn, fs->fs_bsize, NOCRED, &bp);
+	if (lbn < NDADDR) {
+		nb = ip->i_ffs_db[lbn];
+		if (nb != 0 && ip->i_ffs_size >= (lbn + 1) * fs->fs_bsize) {
+			error = bread(vp, lbn, fs->fs_bsize, NOCRED, &bp);
 			if (error) {
 				brelse(bp);
 				return (error);
 			}
-			*bpp = bp;
+			*ap->a_bpp = bp;
 			return (0);
 		}
 		if (nb != 0) {
@@ -125,43 +148,52 @@ ffs_balloc(ip, bn, size, cred, bpp, flags)
 			osize = fragroundup(fs, blkoff(fs, ip->i_ffs_size));
 			nsize = fragroundup(fs, size);
 			if (nsize <= osize) {
-				error = bread(vp, bn, osize, NOCRED, &bp);
+				error = bread(vp, lbn, osize, NOCRED, &bp);
 				if (error) {
 					brelse(bp);
 					return (error);
 				}
 			} else {
-				error = ffs_realloccg(ip, bn,
-				    ffs_blkpref(ip, bn, (int)bn, &ip->i_ffs_db[0]),
+				error = ffs_realloccg(ip, lbn,
+				    ffs_blkpref(ip, lbn, (int)lbn, 
+					&ip->i_ffs_db[0]),
 				    osize, nsize, cred, &bp);
 				if (error)
 					return (error);
+				if (DOINGSOFTDEP(vp))
+					softdep_setup_allocdirect(ip, lbn,
+					    dbtofsb(fs, bp->b_blkno), nb,
+                                            nsize, osize, bp);
 			}
 		} else {
-			if (ip->i_ffs_size < (bn + 1) * fs->fs_bsize)
+			if (ip->i_ffs_size < (lbn + 1) * fs->fs_bsize)
 				nsize = fragroundup(fs, size);
 			else
 				nsize = fs->fs_bsize;
-			error = ffs_alloc(ip, bn,
-			    ffs_blkpref(ip, bn, (int)bn, &ip->i_ffs_db[0]),
+			error = ffs_alloc(ip, lbn,
+			    ffs_blkpref(ip, lbn, (int)lbn, &ip->i_ffs_db[0]),
 			    nsize, cred, &newb);
 			if (error)
 				return (error);
-			bp = getblk(vp, bn, nsize, 0, 0);
+			bp = getblk(vp, lbn, nsize, 0, 0);
 			bp->b_blkno = fsbtodb(fs, newb);
 			if (flags & B_CLRBUF)
 				clrbuf(bp);
+			if (DOINGSOFTDEP(vp))
+				softdep_setup_allocdirect(ip, lbn, newb, 0,
+				    nsize, 0, bp);
+
 		}
-		ip->i_ffs_db[bn] = dbtofsb(fs, bp->b_blkno);
+		ip->i_ffs_db[lbn] = dbtofsb(fs, bp->b_blkno);
 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
-		*bpp = bp;
+		*ap->a_bpp = bp;
 		return (0);
 	}
 	/*
 	 * Determine the number of levels of indirection.
 	 */
 	pref = 0;
-	if ((error = ufs_getlbns(vp, bn, indirs, &num)) != 0)
+	if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0)
 		return(error);
 #ifdef DIAGNOSTIC
 	if (num < 1)
@@ -172,6 +204,9 @@ ffs_balloc(ip, bn, size, cred, bpp, flags)
 	 */
 	--num;
 	nb = ip->i_ffs_ib[indirs[0].in_off];
+
+	allocib = NULL;
+	allocblk = allociblk;
 	if (nb == 0) {
 		pref = ffs_blkpref(ip, lbn, 0, (daddr_t *)0);
 	        error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
@@ -179,18 +214,26 @@ ffs_balloc(ip, bn, size, cred, bpp, flags)
 		if (error)
 			return (error);
 		nb = newb;
+
+		*allocblk++ = nb;
 		bp = getblk(vp, indirs[1].in_lbn, fs->fs_bsize, 0, 0);
-		bp->b_blkno = fsbtodb(fs, newb);
+		bp->b_blkno = fsbtodb(fs, nb);
 		clrbuf(bp);
-		/*
-		 * Write synchronously so that indirect blocks
-		 * never point at garbage.
-		 */
-		if ((error = bwrite(bp)) != 0) {
-			ffs_blkfree(ip, nb, fs->fs_bsize);
-			return (error);
-		}
-		ip->i_ffs_ib[indirs[0].in_off] = newb;
+
+                if (DOINGSOFTDEP(vp)) {
+                        softdep_setup_allocdirect(ip, NDADDR + indirs[0].in_off,
+                            newb, 0, fs->fs_bsize, 0, bp);
+                        bdwrite(bp);
+                } else {
+                        /*
+                         * Write synchronously so that indirect blocks
+                         * never point at garbage.
+                         */
+                        if ((error = bwrite(bp)) != 0)
+                                goto fail;
+                }
+		allocib = &ip->i_ffs_ib[indirs[0].in_off];
+		*allocib = nb;
 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
 	}
 	/*
@@ -201,7 +244,7 @@ ffs_balloc(ip, bn, size, cred, bpp, flags)
 		    indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, &bp);
 		if (error) {
 			brelse(bp);
-			return (error);
+			goto fail;
 		}
 		bap = (daddr_t *)bp->b_data;
 		nb = bap[indirs[i].in_off];
@@ -218,20 +261,27 @@ ffs_balloc(ip, bn, size, cred, bpp, flags)
 				  &newb);
 		if (error) {
 			brelse(bp);
-			return (error);
+			goto fail;
 		}
 		nb = newb;
+		*allocblk++ = nb;
 		nbp = getblk(vp, indirs[i].in_lbn, fs->fs_bsize, 0, 0);
 		nbp->b_blkno = fsbtodb(fs, nb);
 		clrbuf(nbp);
-		/*
-		 * Write synchronously so that indirect blocks
-		 * never point at garbage.
-		 */
-		if ((error = bwrite(nbp)) != 0) {
-			ffs_blkfree(ip, nb, fs->fs_bsize);
-			brelse(bp);
-			return (error);
+
+                if (DOINGSOFTDEP(vp)) {
+                        softdep_setup_allocindir_meta(nbp, ip, bp,
+                            indirs[i - 1].in_off, nb);
+                        bdwrite(nbp);
+                } else {
+                        /*
+                         * Write synchronously so that indirect blocks
+                         * never point at garbage.
+                         */
+                        if ((error = bwrite(nbp)) != 0) {
+                                brelse(bp);
+                                goto fail;
+                        }
 		}
 		bap[indirs[i - 1].in_off] = nb;
 		/*
@@ -253,13 +303,17 @@ ffs_balloc(ip, bn, size, cred, bpp, flags)
 				  &newb);
 		if (error) {
 			brelse(bp);
-			return (error);
+			goto fail;
 		}
 		nb = newb;
+		*allocblk++ = nb;
 		nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0);
 		nbp->b_blkno = fsbtodb(fs, nb);
 		if (flags & B_CLRBUF)
 			clrbuf(nbp);
+		if (DOINGSOFTDEP(vp))
+			softdep_setup_allocindir_page(ip, lbn, bp,
+			    indirs[i].in_off, nb, 0, nbp);
 		bap[indirs[i].in_off] = nb;
 		/*
 		 * If required, write synchronously, otherwise use
@@ -270,7 +324,7 @@ ffs_balloc(ip, bn, size, cred, bpp, flags)
 		} else {
 			bdwrite(bp);
 		}
-		*bpp = nbp;
+		*ap->a_bpp = nbp;
 		return (0);
 	}
 	brelse(bp);
@@ -278,12 +332,36 @@ ffs_balloc(ip, bn, size, cred, bpp, flags)
 		error = bread(vp, lbn, (int)fs->fs_bsize, NOCRED, &nbp);
 		if (error) {
 			brelse(nbp);
-			return (error);
+			goto fail;
 		}
 	} else {
 		nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0);
 		nbp->b_blkno = fsbtodb(fs, nb);
 	}
-	*bpp = nbp;
+	*ap->a_bpp = nbp;
 	return (0);
+
+fail:
+	/*
+	 * If we have failed part way through block allocation, we
+	 * have to deallocate any indirect blocks that we have allocated.
+	 */
+	for (deallocated = 0, blkp = allociblk; blkp < allocblk; blkp++) {
+		ffs_blkfree(ip, *blkp, fs->fs_bsize);
+		deallocated += fs->fs_bsize;
+	}
+	if (allocib != NULL)
+		*allocib = 0;
+	if (deallocated) {
+#ifdef QUOTA
+		/*
+		 * Restore user's disk quota because allocation failed.
+		 */
+		(void) chkdq(ip, (long)-btodb(deallocated), cred, FORCE);
+#endif
+		ip->i_ffs_blocks -= btodb(deallocated);
+		ip->i_flag |= IN_CHANGE | IN_UPDATE;
+	}
+	return (error);
+
 }
diff --git a/sys/ufs/ffs/ffs_extern.h b/sys/ufs/ffs/ffs_extern.h
index 94ca01ad634..3fe5a46bfa8 100644
--- a/sys/ufs/ffs/ffs_extern.h
+++ b/sys/ufs/ffs/ffs_extern.h
@@ -1,4 +1,4 @@
-/*	$OpenBSD: ffs_extern.h,v 1.2 1996/02/27 07:27:36 niklas Exp $	*/
+/*	$OpenBSD: ffs_extern.h,v 1.3 1997/10/06 15:26:29 csapuntz Exp $	*/
 /*	$NetBSD: ffs_extern.h,v 1.4 1996/02/09 22:22:22 christos Exp $	*/
 
 /*-
@@ -36,6 +36,21 @@
  *	@(#)ffs_extern.h	8.3 (Berkeley) 4/16/94
  */
 
+#define FFS_CLUSTERREAD		1	/* cluster reading enabled */
+#define FFS_CLUSTERWRITE	2	/* cluster writing enabled */
+#define FFS_REALLOCBLKS		3	/* block reallocation enabled */
+#define FFS_ASYNCFREE		4	/* asynchronous block freeing enabled */
+#define	FFS_MAXID		5	/* number of valid ffs ids */
+
+#define FFS_NAMES { \
+	{ 0, 0 }, \
+	{ "doclusterread", CTLTYPE_INT }, \
+	{ "doclusterwrite", CTLTYPE_INT }, \
+	{ "doreallocblks", CTLTYPE_INT }, \
+	{ "doasyncfree", CTLTYPE_INT }, \
+}
+
+
 struct buf;
 struct fid;
 struct fs;
@@ -47,6 +62,7 @@ struct statfs;
 struct timeval;
 struct ucred;
 struct ufsmount;
+struct vfsconf;
 struct uio;
 struct vnode;
 struct mbuf;
@@ -67,11 +83,10 @@ int ffs_vfree __P((void *));
 void ffs_clusteracct __P((struct fs *, struct cg *, daddr_t, int));
 
 /* ffs_balloc.c */
-int ffs_balloc __P((struct inode *, daddr_t, int, struct ucred *,
-		    struct buf **, int));
+int ffs_balloc __P((void *));
 
 /* ffs_inode.c */
-void ffs_init __P((void));
+int ffs_init __P((struct vfsconf *));
 int ffs_update __P((void *));
 int ffs_truncate __P((void *));
 
@@ -81,6 +96,8 @@ void ffs_fragacct __P((struct fs *, int, int32_t[], int));
 #ifdef DIAGNOSTIC
 void	ffs_checkoverlap __P((struct buf *, struct inode *));
 #endif
+int   ffs_freefile __P((struct vop_vfree_args *));
+int   ffs_isfreeblock __P((struct fs *, unsigned char *, daddr_t));
 int ffs_isblock __P((struct fs *, unsigned char *, daddr_t));
 void ffs_clrblock __P((struct fs *, u_char *, daddr_t));
 void ffs_setblock __P((struct fs *, unsigned char *, daddr_t));
@@ -100,6 +117,8 @@ int ffs_vget __P((struct mount *, ino_t, struct vnode **));
 int ffs_fhtovp __P((struct mount *, struct fid *, struct mbuf *,
 		    struct vnode **, int *, struct ucred **));
 int ffs_vptofh __P((struct vnode *, struct fid *));
+int ffs_sysctl __P((int *, u_int, void *, size_t *, void *, size_t,
+		    struct proc *));
 int ffs_sbupdate __P((struct ufsmount *, int));
 int ffs_cgupdate __P((struct ufsmount *, int));
 
@@ -108,6 +127,38 @@ int ffs_read __P((void *));
 int ffs_write __P((void *));
 int ffs_fsync __P((void *));
 int ffs_reclaim __P((void *));
+
+
+/*
+ * Soft dependency function prototypes.
+ */
+
+struct vop_vfree_args;
+struct vop_fsync_args;
+
+void  softdep_initialize __P((void));
+int   softdep_process_worklist __P((struct mount *));
+int   softdep_mount __P((struct vnode *, struct mount *, struct fs *,
+          struct ucred *));
+int   softdep_flushfiles __P((struct mount *, int, struct proc *));
+void  softdep_update_inodeblock __P((struct inode *, struct buf *, int));
+void  softdep_load_inodeblock __P((struct inode *));
+int   softdep_fsync __P((struct vnode *));
+void  softdep_freefile __P((struct vop_vfree_args *));
+void  softdep_setup_freeblocks __P((struct inode *, off_t));
+void  softdep_deallocate_dependencies __P((struct buf *));
+void  softdep_setup_inomapdep __P((struct buf *, struct inode *, ino_t));
+void  softdep_setup_blkmapdep __P((struct buf *, struct fs *, daddr_t));
+void  softdep_setup_allocdirect __P((struct inode *, ufs_lbn_t, daddr_t,
+          daddr_t, long, long, struct buf *));
+void  softdep_setup_allocindir_meta __P((struct buf *, struct inode *,
+          struct buf *, int, daddr_t));
+void  softdep_setup_allocindir_page __P((struct inode *, ufs_lbn_t,
+          struct buf *, int, daddr_t, daddr_t, struct buf *));
+void  softdep_disk_io_initiation __P((struct buf *));
+void  softdep_disk_write_complete __P((struct buf *));
+int   softdep_sync_metadata __P((struct vop_fsync_args *));
+
 __END_DECLS
 
 extern int (**ffs_vnodeop_p) __P((void *));
diff --git a/sys/ufs/ffs/ffs_inode.c b/sys/ufs/ffs/ffs_inode.c
index 488841b5e7f..ba1eb996cb9 100644
--- a/sys/ufs/ffs/ffs_inode.c
+++ b/sys/ufs/ffs/ffs_inode.c
@@ -1,4 +1,4 @@
-/*	$OpenBSD: ffs_inode.c,v 1.6 1997/05/30 08:34:21 downsj Exp $	*/
+/*	$OpenBSD: ffs_inode.c,v 1.7 1997/10/06 15:26:30 csapuntz Exp $	*/
 /*	$NetBSD: ffs_inode.c,v 1.10 1996/05/11 18:27:19 mycroft Exp $	*/
 
 /*
@@ -61,10 +61,12 @@
 static int ffs_indirtrunc __P((struct inode *, daddr_t, daddr_t, daddr_t, int,
 			       long *));
 
-void
-ffs_init()
+int
+ffs_init(vfsp)
+	struct vfsconf *vfsp;
 {
-	ufs_init();
+	softdep_initialize();
+	return (ufs_init(vfsp));
 }
 
 /*
@@ -101,7 +103,8 @@ ffs_update(v)
 		ip->i_flag &= ~IN_ACCESS;
 	}
 	if ((ip->i_flag &
-	    (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0)
+	    (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0 &&
+	    ap->a_waitfor != MNT_WAIT)
 		return (0);
 	if (ip->i_flag & IN_ACCESS) {
 		ip->i_ffs_atime = ap->a_access->tv_sec;
@@ -133,11 +136,17 @@ ffs_update(v)
 		brelse(bp);
 		return (error);
 	}
+
+	if (DOINGSOFTDEP(ap->a_vp))
+		softdep_update_inodeblock(ip, bp, ap->a_waitfor);
+	else if (ip->i_effnlink != ip->i_ffs_nlink) 
+		panic("ffs_update: bad link cnt");
+
 	*((struct dinode *)bp->b_data +
 	    ino_to_fsbo(fs, ip->i_number)) = ip->i_din.ffs_din;
-	if (ap->a_waitfor)
+	if (ap->a_waitfor && (ap->a_vp->v_mount->mnt_flag & MNT_ASYNC) == 0) {
 		return (bwrite(bp));
-	else {
+	} else {
 		bdwrite(bp);
 		return (0);
 	}
@@ -179,6 +188,8 @@ ffs_truncate(v)
 	if (length < 0)
 		return (EINVAL);
 	oip = VTOI(ovp);
+	if (oip->i_ffs_size == length)
+		return (0);
 	TIMEVAL_TO_TIMESPEC(&time, &ts);
 	if (ovp->v_type == VLNK &&
 	    (oip->i_ffs_size < ovp->v_mount->mnt_maxsymlinklen ||
@@ -202,8 +213,34 @@ ffs_truncate(v)
 		return (error);
 #endif
 	vnode_pager_setsize(ovp, (u_long)length);
+	ovp->v_lasta = ovp->v_clen = ovp->v_cstart = ovp->v_lastw = 0;
+	if (DOINGSOFTDEP(ovp)) {
+		if (length > 0) {
+			/*
+			 * If a file is only partially truncated, then
+			 * we have to clean up the data structures
+			 * describing the allocation past the truncation
+			 * point. Finding and deallocating those structures
+			 * is a lot of work. Since partial truncation occurs
+			 * rarely, we solve the problem by syncing the file
+			 * so that it will have no data structures left.
+			 */
+			if ((error = VOP_FSYNC(ovp, ap->a_cred, MNT_WAIT,
+					       ap->a_p)) != 0)
+				return (error);
+		} else {
+#ifdef QUOTA
+			(void) chkdq(oip, -oip->i_ffs_blocks, NOCRED, 0);
+#endif
+			softdep_setup_freeblocks(oip, length);
+			(void) vinvalbuf(ovp, 0, ap->a_cred, ap->a_p, 0, 0);
+			oip->i_flag |= IN_CHANGE | IN_UPDATE;
+			return (VOP_UPDATE(ovp, &ts, &ts, 0));
+		}
+	}
+
 	fs = oip->i_fs;
-	osize = oip->i_ffs_size;
+	osize = oip->i_ffs_size; 
 	/*
 	 * Lengthen the size of the file. We must ensure that the
 	 * last byte of the file is allocated. Since the smallest
@@ -217,11 +254,12 @@ ffs_truncate(v)
 		aflags = B_CLRBUF;
 		if (ap->a_flags & IO_SYNC)
 			aflags |= B_SYNC;
-		error = ffs_balloc(oip, lbn, offset + 1, ap->a_cred, &bp,
-				   aflags);
+		error = VOP_BALLOC(ovp, length -1, 1, 
+				   ap->a_cred, aflags, &bp);
 		if (error)
 			return (error);
 		oip->i_ffs_size = length;
+		vnode_pager_setsize(ovp, (u_long)length);
 		(void) vnode_pager_uncache(ovp);
 		if (aflags & B_SYNC)
 			bwrite(bp);
@@ -230,6 +268,8 @@ ffs_truncate(v)
 		oip->i_flag |= IN_CHANGE | IN_UPDATE;
 		return (VOP_UPDATE(ovp, &ts, &ts, 1));
 	}
+	vnode_pager_setsize(ovp, (u_long)length);
+
 	/*
 	 * Shorten the size of the file. If the file is not being
 	 * truncated to a block boundry, the contents of the
@@ -245,7 +285,8 @@ ffs_truncate(v)
 		aflags = B_CLRBUF;
 		if (ap->a_flags & IO_SYNC)
 			aflags |= B_SYNC;
-		error = ffs_balloc(oip, lbn, offset, ap->a_cred, &bp, aflags);
+		error = VOP_BALLOC(ovp, length - 1, 1,
+				   ap->a_cred, aflags, &bp);
 		if (error)
 			return (error);
 		oip->i_ffs_size = length;
diff --git a/sys/ufs/ffs/ffs_subr.c b/sys/ufs/ffs/ffs_subr.c
index e5d0c350387..7e5e417cf53 100644
--- a/sys/ufs/ffs/ffs_subr.c
+++ b/sys/ufs/ffs/ffs_subr.c
@@ -1,4 +1,4 @@
-/*	$OpenBSD: ffs_subr.c,v 1.3 1996/04/21 22:32:33 deraadt Exp $	*/
+/*	$OpenBSD: ffs_subr.c,v 1.4 1997/10/06 15:26:31 csapuntz Exp $	*/
 /*	$NetBSD: ffs_subr.c,v 1.6 1996/03/17 02:16:23 christos Exp $	*/
 
 /*
@@ -42,10 +42,10 @@
 #ifdef _KERNEL
 #include <sys/systm.h>
 #include <sys/vnode.h>
-#include <ufs/ffs/ffs_extern.h>
 #include <sys/buf.h>
 #include <ufs/ufs/quota.h>
 #include <ufs/ufs/inode.h>
+#include <ufs/ffs/ffs_extern.h>
 
 /*
  * Return buffer with the contents of block "offset" from the beginning of
@@ -240,3 +240,30 @@ ffs_setblock(fs, cp, h)
 		panic("ffs_setblock");
 	}
 }
+
+
+/*
+ * check if a block is free
+ */
+int
+ffs_isfreeblock(fs, cp, h)
+      struct fs *fs;
+      unsigned char *cp;
+      daddr_t h;
+{
+
+      switch ((int)fs->fs_frag) {
+      case 8:
+              return (cp[h] == 0);
+      case 4:
+              return ((cp[h >> 1] & (0x0f << ((h & 0x1) << 2))) == 0);
+      case 2:
+              return ((cp[h >> 2] & (0x03 << ((h & 0x3) << 1))) == 0);
+      case 1:
+              return ((cp[h >> 3] & (0x01 << (h & 0x7))) == 0);
+      default:
+              panic("ffs_isfreeblock");
+      }
+}
+
+
diff --git a/sys/ufs/ffs/ffs_vfsops.c b/sys/ufs/ffs/ffs_vfsops.c
index b70f7b0db8d..7b5f8b2463a 100644
--- a/sys/ufs/ffs/ffs_vfsops.c
+++ b/sys/ufs/ffs/ffs_vfsops.c
@@ -1,4 +1,4 @@
-/*	$OpenBSD: ffs_vfsops.c,v 1.9 1997/06/20 14:04:32 kstailey Exp $	*/
+/*	$OpenBSD: ffs_vfsops.c,v 1.10 1997/10/06 15:26:31 csapuntz Exp $	*/
 /*	$NetBSD: ffs_vfsops.c,v 1.19 1996/02/09 22:22:26 christos Exp $	*/
 
 /*
@@ -51,6 +51,7 @@
 #include <sys/ioctl.h>
 #include <sys/errno.h>
 #include <sys/malloc.h>
+#include <sys/sysctl.h>
 
 #include <dev/rndvar.h>
 
@@ -68,7 +69,6 @@
 int ffs_sbupdate __P((struct ufsmount *, int));
 
 struct vfsops ffs_vfsops = {
-	MOUNT_FFS,
 	ffs_mount,
 	ufs_start,
 	ffs_unmount,
@@ -80,61 +80,53 @@ struct vfsops ffs_vfsops = {
 	ffs_fhtovp,
 	ffs_vptofh,
 	ffs_init,
+	ffs_sysctl
 };
 
 extern u_long nextgennumber;
 
 /*
  * Called by main() when ufs is going to be mounted as root.
- *
- * Name is updated by mount(8) after booting.
  */
-#define ROOTNAME	"root_device"
 
 int
 ffs_mountroot()
 {
 	extern struct vnode *rootvp;
-	register struct fs *fs;
-	register struct mount *mp;
+	struct fs *fs;
+	struct mount *mp;
 	struct proc *p = curproc;	/* XXX */
 	struct ufsmount *ump;
-	size_t size;
 	int error;
 	
 	/*
 	 * Get vnodes for swapdev and rootdev.
 	 */
-	if (bdevvp(swapdev, &swapdev_vp) || bdevvp(rootdev, &rootvp))
-		panic("ffs_mountroot: can't setup bdevvp's");
-
-	mp = malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK);
-	bzero((char *)mp, (u_long)sizeof(struct mount));
-	mp->mnt_op = &ffs_vfsops;
-	mp->mnt_flag = MNT_RDONLY;
-	if ((error = ffs_mountfs(rootvp, mp, p)) != 0) {
-		free(mp, M_MOUNT);
+	if ((error = bdevvp(swapdev, &swapdev_vp)) || 
+	    (error = bdevvp(rootdev, &rootvp))) {
+		printf("ffs_mountroot: can't setup bdevvp's");
 		return (error);
 	}
-	if ((error = vfs_lock(mp)) != 0) {
-		(void)ffs_unmount(mp, 0, p);
-		free(mp, M_MOUNT);
+
+	if ((error = vfs_rootmountalloc("ffs", "root_device", &mp)) != 0)
 		return (error);
-	}
+	if ((error = ffs_mountfs(rootvp, mp, p)) != 0) {
+		mp->mnt_vfc->vfc_refcount--;
+		vfs_unbusy(mp, p);
+ 		free(mp, M_MOUNT);
+ 		return (error);
+ 	}
+	simple_lock(&mountlist_slock);
 	CIRCLEQ_INSERT_TAIL(&mountlist, mp, mnt_list);
-	mp->mnt_vnodecovered = NULLVP;
-	ump = VFSTOUFS(mp);
-	fs = ump->um_fs;
-	bzero(fs->fs_fsmnt, sizeof(fs->fs_fsmnt));
-	fs->fs_fsmnt[0] = '/';
-	bcopy(fs->fs_fsmnt, mp->mnt_stat.f_mntonname, MNAMELEN);
-	(void) copystr(ROOTNAME, mp->mnt_stat.f_mntfromname, MNAMELEN - 1,
-	    &size);
-	bzero(mp->mnt_stat.f_mntfromname + size, MNAMELEN - size);
-	(void)ffs_statfs(mp, &mp->mnt_stat, p);
-	vfs_unlock(mp);
-	inittodr(fs->fs_time);
-	return (0);
+	simple_unlock(&mountlist_slock);
+ 	ump = VFSTOUFS(mp);
+ 	fs = ump->um_fs;
+	(void) copystr(mp->mnt_stat.f_mntonname, fs->fs_fsmnt, MNAMELEN - 1, 0);
+ 	(void)ffs_statfs(mp, &mp->mnt_stat, p);
+
+	vfs_unbusy(mp, p);
+ 	inittodr(fs->fs_time);
+ 	return (0);
 }
 
 /*
@@ -172,8 +164,6 @@ ffs_mount(mp, path, data, ndp, p)
 			flags = WRITECLOSE;
 			if (mp->mnt_flag & MNT_FORCE)
 				flags |= FORCECLOSE;
-			if (vfs_busy(mp))
-				return (EBUSY);
 			error = ffs_flushfiles(mp, flags, p);
 			if (error == 0 &&
 			    ffs_cgupdate(ump, MNT_WAIT) == 0 &&
@@ -181,7 +171,6 @@ ffs_mount(mp, path, data, ndp, p)
 				fs->fs_clean = FS_ISCLEAN;
 				(void) ffs_sbupdate(ump, MNT_WAIT);
 			}
-			vfs_unbusy(mp);
 			if (error)
 				return (error);
 			fs->fs_ronly = 1;
@@ -198,18 +187,19 @@ ffs_mount(mp, path, data, ndp, p)
 			 */
 			if (p->p_ucred->cr_uid != 0) {
 				devvp = ump->um_devvp;
-				VOP_LOCK(devvp);
+				vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, p);
 				error = VOP_ACCESS(devvp, VREAD | VWRITE,
 						   p->p_ucred, p);
 				if (error) {
-					VOP_UNLOCK(devvp);
+					VOP_UNLOCK(devvp, 0, p);
 					return (error);
 				}
-				VOP_UNLOCK(devvp);
+				VOP_UNLOCK(devvp, 0, p);
 			}
 			fs->fs_ronly = 0;
 			fs->fs_clean <<= 1;
 			fs->fs_fmod = 1;
+			(void) ffs_sbupdate(ump, MNT_WAIT);
 		}
 		if (args.fspec == 0) {
 			/*
@@ -243,13 +233,13 @@ ffs_mount(mp, path, data, ndp, p)
 		accessmode = VREAD;
 		if ((mp->mnt_flag & MNT_RDONLY) == 0)
 			accessmode |= VWRITE;
-		VOP_LOCK(devvp);
+		vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, p);
 		error = VOP_ACCESS(devvp, accessmode, p->p_ucred, p);
 		if (error) {
 			vput(devvp);
 			return (error);
 		}
-		VOP_UNLOCK(devvp);
+		VOP_UNLOCK(devvp, 0, p);
 	}
 	if ((mp->mnt_flag & MNT_UPDATE) == 0)
 		error = ffs_mountfs(devvp, mp, p);
@@ -317,8 +307,12 @@ ffs_reload(mountp, cred, p)
 	 * Step 1: invalidate all cached meta-data.
 	 */
 	devvp = VFSTOUFS(mountp)->um_devvp;
-	if (vinvalbuf(devvp, 0, cred, p, 0, 0))
+	vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, p);
+	error = vinvalbuf(devvp, 0, cred, p, 0, 0);
+	VOP_UNLOCK(devvp, 0, p);
+	if (error)
 		panic("ffs_reload: dirty1");
+
 	/*
 	 * Step 2: re-read superblock from disk.
 	 */
@@ -375,19 +369,26 @@ ffs_reload(mountp, cred, p)
 	}
 
 loop:
+	simple_lock(&mntvnode_slock);
 	for (vp = mountp->mnt_vnodelist.lh_first; vp != NULL; vp = nvp) {
+		if (vp->v_mount != mountp) {
+			simple_unlock(&mntvnode_slock);
+			goto loop;
+		}
+
 		nvp = vp->v_mntvnodes.le_next;
 		/*
 		 * Step 4: invalidate all inactive vnodes.
 		 */
-		if (vp->v_usecount == 0) {
-			vgone(vp);
-			continue;
-		}
+		if (vrecycle(vp, &mntvnode_slock, p))
+			goto loop;
+
 		/*
 		 * Step 5: invalidate all cached file data.
 		 */
-		if (vget(vp, 1))
+		simple_lock(&vp->v_interlock);
+		simple_unlock(&mntvnode_slock);
+		if (vget(vp, LK_EXCLUSIVE  | LK_INTERLOCK, p))
 			goto loop;
 		if (vinvalbuf(vp, 0, cred, p, 0, 0))
 			panic("ffs_reload: dirty2");
@@ -403,11 +404,12 @@ loop:
 		}
 		ip->i_din.ffs_din = *((struct dinode *)bp->b_data +
 		    ino_to_fsbo(fs, ip->i_number));
+		ip->i_effnlink = ip->i_ffs_nlink;
 		brelse(bp);
 		vput(vp);
-		if (vp->v_mount != mountp)
-			goto loop;
+		simple_lock(&mntvnode_slock);
 	}
+	simple_unlock(&mntvnode_slock);
 	return (0);
 }
 
@@ -426,8 +428,7 @@ ffs_mountfs(devvp, mp, p)
 	dev_t dev;
 	struct partinfo dpart;
 	caddr_t base, space;
-	int blks;
-	int error, i, size, ronly;
+	int error, i, blks, size, ronly;
 	int32_t *lp;
 	struct ucred *cred;
 	extern struct vnode *rootvp;
@@ -445,7 +446,10 @@ ffs_mountfs(devvp, mp, p)
 		return (error);
 	if (vcount(devvp) > 1 && devvp != rootvp)
 		return (EBUSY);
-	if ((error = vinvalbuf(devvp, V_SAVE, cred, p, 0, 0)) != 0)
+	vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, p);
+	error = vinvalbuf(devvp, V_SAVE, cred, p, 0, 0);
+	VOP_UNLOCK(devvp, 0, p);
+	if (error)
 		return (error);
 
 	ronly = (mp->mnt_flag & MNT_RDONLY) != 0;
@@ -484,10 +488,6 @@ ffs_mountfs(devvp, mp, p)
 	bp = NULL;
 	fs = ump->um_fs;
 	fs->fs_ronly = ronly;
-	if (ronly == 0) {
-		fs->fs_clean <<= 1;
-		fs->fs_fmod = 1;
-	}
 	size = fs->fs_cssize;
 	blks = howmany(size, fs->fs_fsize);
 	if (fs->fs_contigsumsize > 0)
@@ -520,9 +520,8 @@ ffs_mountfs(devvp, mp, p)
 	if (fs->fs_id[0] != 0 && fs->fs_id[1] != 0)
 		mp->mnt_stat.f_fsid.val[1] = fs->fs_id[1];
 	else
-		mp->mnt_stat.f_fsid.val[1] = makefstype(MOUNT_FFS);
+		mp->mnt_stat.f_fsid.val[1] = mp->mnt_vfc->vfc_typenum;
 	mp->mnt_maxsymlinklen = fs->fs_maxsymlinklen;
-	mp->mnt_flag |= MNT_LOCAL;
 	ump->um_mountp = mp;
 	ump->um_dev = dev;
 	ump->um_devvp = devvp;
@@ -531,14 +530,24 @@ ffs_mountfs(devvp, mp, p)
 	ump->um_seqinc = fs->fs_frag;
 	for (i = 0; i < MAXQUOTAS; i++)
 		ump->um_quotas[i] = NULLVP;
-	devvp->v_specflags |= SI_MOUNTEDON;
+	devvp->v_specmountpoint = mp;
 	ffs_oldfscompat(fs);
 	ump->um_savedmaxfilesize = fs->fs_maxfilesize;		/* XXX */
 	maxfilesize = (u_int64_t)0x80000000 * fs->fs_bsize - 1;	/* XXX */
 	if (fs->fs_maxfilesize > maxfilesize)			/* XXX */
 		fs->fs_maxfilesize = maxfilesize;		/* XXX */
+	if (ronly == 0) {
+		if ((fs->fs_flags & FS_DOSOFTDEP) &&
+		    (error = softdep_mount(devvp, mp, fs, cred)) != 0) {
+			free(base, M_UFSMNT);
+			goto out;
+		}
+		fs->fs_clean = 0;
+		(void) ffs_sbupdate(ump, MNT_WAIT);
+	}
 	return (0);
 out:
+	devvp->v_specmountpoint = NULL;
 	if (bp)
 		brelse(bp);
 	(void)VOP_CLOSE(devvp, ronly ? FREAD : FREAD|FWRITE, cred, p);
@@ -595,8 +604,14 @@ ffs_unmount(mp, mntflags, p)
 	flags = 0;
 	if (mntflags & MNT_FORCE)
 		flags |= FORCECLOSE;
-	if ((error = ffs_flushfiles(mp, flags, p)) != 0)
-		return (error);
+	if (mp->mnt_flag & MNT_SOFTDEP) {
+		if ((error = softdep_flushfiles(mp, flags, p)) != 0)
+			return (error);
+	} else {
+		if ((error = ffs_flushfiles(mp, flags, p)) != 0)
+			return (error);
+	}
+
 	ump = VFSTOUFS(mp);
 	fs = ump->um_fs;
 	if (fs->fs_ronly == 0 &&
@@ -605,7 +620,7 @@ ffs_unmount(mp, mntflags, p)
 		fs->fs_clean = FS_ISCLEAN;
 		(void) ffs_sbupdate(ump, MNT_WAIT);
 	}
-	ump->um_devvp->v_specflags &= ~SI_MOUNTEDON;
+	ump->um_devvp->v_specmountpoint = NULL;
 	error = VOP_CLOSE(ump->um_devvp, fs->fs_ronly ? FREAD : FREAD|FWRITE,
 		NOCRED, p);
 	vrele(ump->um_devvp);
@@ -613,7 +628,6 @@ ffs_unmount(mp, mntflags, p)
 	free(fs, M_UFSMNT);
 	free(ump, M_UFSMNT);
 	mp->mnt_data = (qaddr_t)0;
-	mp->mnt_flag &= ~MNT_LOCAL;
 	return (error);
 }
 
@@ -626,12 +640,9 @@ ffs_flushfiles(mp, flags, p)
 	int flags;
 	struct proc *p;
 {
-	extern int doforce;
 	register struct ufsmount *ump;
 	int error;
 
-	if (!doforce)
-		flags &= ~FORCECLOSE;
 	ump = VFSTOUFS(mp);
 #ifdef QUOTA
 	if (mp->mnt_flag & MNT_QUOTA) {
@@ -649,7 +660,17 @@ ffs_flushfiles(mp, flags, p)
 		 */
 	}
 #endif
-	error = vflush(mp, NULLVP, flags);
+	/*
+	 * Flush all the files.
+	 */
+	if ((error = vflush(mp, NULL, flags)) != 0)
+		return (error);
+	/*
+	 * Flush filesystem metadata.
+	 */
+	vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY, p);
+	error = VOP_FSYNC(ump->um_devvp, p->p_ucred, MNT_WAIT, p);
+	VOP_UNLOCK(ump->um_devvp, 0, p);
 	return (error);
 }
 
@@ -684,10 +705,11 @@ ffs_statfs(mp, sbp, p)
 	sbp->f_files =  fs->fs_ncg * fs->fs_ipg - ROOTINO;
 	sbp->f_ffree = fs->fs_cstotal.cs_nifree;
 	if (sbp != &mp->mnt_stat) {
+		sbp->f_type = mp->mnt_vfc->vfc_typenum;
 		bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN);
 		bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN);
 	}
-	strncpy(sbp->f_fstypename, mp->mnt_op->vfs_name, MFSNAMELEN);
+	strncpy(sbp->f_fstypename, mp->mnt_vfc->vfc_name, MFSNAMELEN);
 	return (0);
 }
 
@@ -705,7 +727,7 @@ ffs_sync(mp, waitfor, cred, p)
 	struct ucred *cred;
 	struct proc *p;
 {
-	register struct vnode *vp;
+	register struct vnode *vp, *nvp;
 	register struct inode *ip;
 	register struct ufsmount *ump = VFSTOUFS(mp);
 	register struct fs *fs;
@@ -717,49 +739,71 @@ ffs_sync(mp, waitfor, cred, p)
 	 * Consistency check that the superblock
 	 * is still in the buffer cache.
 	 */
-	if (fs->fs_fmod != 0) {
-		if (fs->fs_ronly != 0) {		/* XXX */
-			printf("fs = %s\n", fs->fs_fsmnt);
-			panic("update: rofs mod");
-		}
-		fs->fs_fmod = 0;
-		fs->fs_time = time.tv_sec;
-		allerror = ffs_cgupdate(ump, waitfor);
+	if (fs->fs_fmod != 0  && fs->fs_ronly != 0) {
+		printf("fs = %s\n", fs->fs_fsmnt);
+		panic("update: rofs mod");
 	}
 	/*
 	 * Write back each (modified) inode.
 	 */
+	simple_lock(&mntvnode_slock);
 loop:
 	for (vp = mp->mnt_vnodelist.lh_first;
 	     vp != NULL;
-	     vp = vp->v_mntvnodes.le_next) {
+	     vp = nvp) {
 		/*
 		 * If the vnode that we are about to sync is no longer
 		 * associated with this mount point, start over.
 		 */
 		if (vp->v_mount != mp)
 			goto loop;
-		if (VOP_ISLOCKED(vp))
-			continue;
+
+		simple_lock(&vp->v_interlock);
+		nvp = vp->v_mntvnodes.le_next;
 		ip = VTOI(vp);
-		if ((ip->i_flag &
-		    (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0 &&
-		    vp->v_dirtyblkhd.lh_first == NULL)
+		if (vp->v_type == VNON || ((ip->i_flag &
+		     (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0 &&
+		     vp->v_dirtyblkhd.lh_first == NULL) || 
+		    waitfor == MNT_LAZY) {
+			simple_unlock(&vp->v_interlock);
 			continue;
-		if (vget(vp, 1))
-			goto loop;
+		}
+		simple_unlock(&mntvnode_slock);
+       		error = vget(vp, LK_EXCLUSIVE | LK_NOWAIT | LK_INTERLOCK, p);
+		if (error) {
+			simple_lock(&mntvnode_slock);
+			if (error == ENOENT)
+				goto loop;
+			continue;
+		}
 		if ((error = VOP_FSYNC(vp, cred, waitfor, p)) != 0)
 			allerror = error;
-		vput(vp);
+		VOP_UNLOCK(vp, 0, p);
+		vrele(vp);
+		simple_lock(&mntvnode_slock);
 	}
+	simple_unlock(&mntvnode_slock);
 	/*
 	 * Force stale file system control information to be flushed.
 	 */
-	if ((error = VOP_FSYNC(ump->um_devvp, cred, waitfor, p)) != 0)
-		allerror = error;
+        if (waitfor != MNT_LAZY) {
+                if (ump->um_mountp->mnt_flag & MNT_SOFTDEP)
+                        waitfor = MNT_NOWAIT;
+                vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY, p);
+                if ((error = VOP_FSYNC(ump->um_devvp, cred, waitfor, p)) != 0)
+                        allerror = error;
+                VOP_UNLOCK(ump->um_devvp, 0, p);
+        }
 #ifdef QUOTA
 	qsync(mp);
 #endif
+	/*
+         * Write back modified superblock.
+	 */
+
+	if (fs->fs_fmod != 0 && (error = ffs_sbupdate(ump, waitfor)) != 0)
+		allerror = error;
+
 	return (allerror);
 }
 
@@ -796,6 +840,7 @@ ffs_vget(mp, ino, vpp)
 	type = ump->um_devvp->v_tag == VT_MFS ? M_MFSNODE : M_FFSNODE; /* XXX */
 	MALLOC(ip, struct inode *, sizeof(struct inode), type, M_WAITOK);
 	bzero((caddr_t)ip, sizeof(struct inode));
+	lockinit(&ip->i_lock, PINOD, "inode", 0, 0);
 	vp->v_data = ip;
 	ip->i_vnode = vp;
 	ip->i_fs = fs = ump->um_fs;
@@ -833,6 +878,10 @@ ffs_vget(mp, ino, vpp)
 		return (error);
 	}
 	ip->i_din.ffs_din = *((struct dinode *)bp->b_data + ino_to_fsbo(fs, ino));
+	if (DOINGSOFTDEP(vp))
+		softdep_load_inodeblock(ip);
+	else
+		ip->i_effnlink = ip->i_ffs_nlink;
 	brelse(bp);
 
 	/*
@@ -965,7 +1014,7 @@ ffs_cgupdate(mp, waitfor)
 	struct ufsmount *mp;
 	int waitfor;
 {
-	register struct fs *fs = mp->um_fs;
+	register struct fs *fs = mp->um_fs, *dfs;
 	register struct buf *bp;
 	int blks;
 	caddr_t space;
@@ -987,7 +1036,74 @@ ffs_cgupdate(mp, waitfor)
 		else
 			bawrite(bp);
 	}
-	if (!allerror && error)
+
+	/*
+	 * Now write back the superblock itself. If any errors occurred
+	 * up to this point, then fail so that the superblock avoids
+	 * being written out as clean.
+	 */
+	if (allerror)
+		return (allerror);
+	bp = getblk(mp->um_devvp, SBLOCK, (int)fs->fs_sbsize, 0, 0);
+	fs->fs_fmod = 0;
+	fs->fs_time = time.tv_sec;
+	bcopy((caddr_t)fs, bp->b_data, (u_int)fs->fs_sbsize);
+	/* Restore compatibility to old file systems.		   XXX */
+	dfs = (struct fs *)bp->b_data;				/* XXX */
+	if (fs->fs_postblformat == FS_42POSTBLFMT)		/* XXX */
+		dfs->fs_nrpos = -1;				/* XXX */
+	if (fs->fs_inodefmt < FS_44INODEFMT) {			/* XXX */
+		int32_t *lp, tmp;				/* XXX */
+								/* XXX */
+		lp = (int32_t *)&dfs->fs_qbmask;		/* XXX */
+		tmp = lp[4];					/* XXX */
+		for (i = 4; i > 0; i--)				/* XXX */
+			lp[i] = lp[i-1];			/* XXX */
+		lp[0] = tmp;					/* XXX */
+	}							/* XXX */
+	dfs->fs_maxfilesize = mp->um_savedmaxfilesize;		/* XXX */
+	if (waitfor != MNT_WAIT)
+		bawrite(bp);
+	else if ((error = bwrite(bp)) != 0)
 		allerror = error;
+
 	return (allerror);
 }
+
+/*
+ * fast filesystem related variables.
+ */
+int
+ffs_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p)
+	int *name;
+	u_int namelen;
+	void *oldp;
+	size_t *oldlenp;
+	void *newp;
+	size_t newlen;
+	struct proc *p;
+{
+	extern int doclusterread, doclusterwrite, doreallocblks, doasyncfree;
+
+	/* all sysctl names at this level are terminal */
+	if (namelen != 1)
+		return (ENOTDIR);		/* overloaded */
+
+	switch (name[0]) {
+	case FFS_CLUSTERREAD:
+		return (sysctl_int(oldp, oldlenp, newp, newlen,
+		    &doclusterread));
+	case FFS_CLUSTERWRITE:
+		return (sysctl_int(oldp, oldlenp, newp, newlen,
+		    &doclusterwrite));
+	case FFS_REALLOCBLKS:
+		return (sysctl_int(oldp, oldlenp, newp, newlen,
+		    &doreallocblks));
+	case FFS_ASYNCFREE:
+		return (sysctl_int(oldp, oldlenp, newp, newlen, &doasyncfree));
+	default:
+		return (EOPNOTSUPP);
+	}
+	/* NOTREACHED */
+}
+
diff --git a/sys/ufs/ffs/ffs_vnops.c b/sys/ufs/ffs/ffs_vnops.c
index e9462ff50be..088ba291a3a 100644
--- a/sys/ufs/ffs/ffs_vnops.c
+++ b/sys/ufs/ffs/ffs_vnops.c
@@ -1,4 +1,4 @@
-/*	$OpenBSD: ffs_vnops.c,v 1.3 1996/05/22 11:47:18 deraadt Exp $	*/
+/*	$OpenBSD: ffs_vnops.c,v 1.4 1997/10/06 15:26:32 csapuntz Exp $	*/
 /*	$NetBSD: ffs_vnops.c,v 1.7 1996/05/11 18:27:24 mycroft Exp $	*/
 
 /*
@@ -82,6 +82,7 @@ struct vnodeopv_entry_desc ffs_vnodeop_entries[] = {
 	{ &vop_lease_desc, ufs_lease_check },		/* lease */
 	{ &vop_ioctl_desc, ufs_ioctl },			/* ioctl */
 	{ &vop_select_desc, ufs_select },		/* select */
+	{ &vop_revoke_desc, ufs_revoke },               /* revoke */
 	{ &vop_mmap_desc, ufs_mmap },			/* mmap */
 	{ &vop_fsync_desc, ffs_fsync },			/* fsync */
 	{ &vop_seek_desc, ufs_seek },			/* seek */
@@ -106,6 +107,7 @@ struct vnodeopv_entry_desc ffs_vnodeop_entries[] = {
 	{ &vop_advlock_desc, ufs_advlock },		/* advlock */
 	{ &vop_blkatoff_desc, ffs_blkatoff },		/* blkatoff */
 	{ &vop_valloc_desc, ffs_valloc },		/* valloc */
+	{ &vop_balloc_desc, ffs_balloc },               /* balloc */
 	{ &vop_reallocblks_desc, ffs_reallocblks },	/* reallocblks */
 	{ &vop_vfree_desc, ffs_vfree },			/* vfree */
 	{ &vop_truncate_desc, ffs_truncate },		/* truncate */
@@ -132,6 +134,7 @@ struct vnodeopv_entry_desc ffs_specop_entries[] = {
 	{ &vop_lease_desc, spec_lease_check },		/* lease */
 	{ &vop_ioctl_desc, spec_ioctl },		/* ioctl */
 	{ &vop_select_desc, spec_select },		/* select */
+	{ &vop_revoke_desc, spec_revoke },              /* revoke */
 	{ &vop_mmap_desc, spec_mmap },			/* mmap */
 	{ &vop_fsync_desc, ffs_fsync },			/* fsync */
 	{ &vop_seek_desc, spec_seek },			/* seek */
@@ -183,6 +186,7 @@ struct vnodeopv_entry_desc ffs_fifoop_entries[] = {
 	{ &vop_lease_desc, fifo_lease_check },		/* lease */
 	{ &vop_ioctl_desc, fifo_ioctl },		/* ioctl */
 	{ &vop_select_desc, fifo_select },		/* select */
+	{ &vop_revoke_desc, fifo_revoke },              /* revoke */
 	{ &vop_mmap_desc, fifo_mmap },			/* mmap */
 	{ &vop_fsync_desc, ffs_fsync },			/* fsync */
 	{ &vop_seek_desc, fifo_seek },			/* seek */
@@ -218,20 +222,11 @@ struct vnodeopv_desc ffs_fifoop_opv_desc =
 	{ &ffs_fifoop_p, ffs_fifoop_entries };
 #endif /* FIFO */
 
-#ifdef DEBUG
 /*
  * Enabling cluster read/write operations.
  */
-#include <sys/sysctl.h>
 int doclusterread = 1;
-struct ctldebug debug11 = { "doclusterread", &doclusterread };
 int doclusterwrite = 1;
-struct ctldebug debug12 = { "doclusterwrite", &doclusterwrite };
-#else
-/* XXX for ufs_readwrite */
-#define doclusterread 1
-#define doclusterwrite 1
-#endif
 
 #include <ufs/ufs/ufs_readwrite.c>
 
@@ -249,12 +244,84 @@ ffs_fsync(v)
 		int a_waitfor;
 		struct proc *a_p;
 	} */ *ap = v;
-	register struct vnode *vp = ap->a_vp;
+	struct vnode *vp = ap->a_vp;
+	struct buf *bp, *nbp;
 	struct timespec ts;
+	int s, error, passes, skipmeta;
 
-	vflushbuf(vp, ap->a_waitfor == MNT_WAIT);
+	/* 
+	 * Flush all dirty buffers associated with a vnode
+	 */
+	passes = NIADDR;
+	skipmeta = 0;
+	if (ap->a_waitfor == MNT_WAIT)
+		skipmeta = 1;
+loop:
+	s = splbio();
+loop2:
+	for (bp = vp->v_dirtyblkhd.lh_first; bp; bp = nbp) {
+		nbp = bp->b_vnbufs.le_next;
+		if ((bp->b_flags & B_BUSY))
+			continue;
+		if ((bp->b_flags & B_DELWRI) == 0)
+			panic("ffs_fsync: not dirty");
+		if (skipmeta && bp->b_lblkno < 0)
+			continue;
+		bremfree(bp);
+		bp->b_flags |= B_BUSY;
+		splx(s);
+		/*
+		 * Wait for I/O associated with indirect blocks to complete,
+		 * since there is no way to quickly wait for them below.
+		 */
+		if (bp->b_vp == vp || ap->a_waitfor != MNT_WAIT)
+			(void) bawrite(bp);
+		else if ((error = bwrite(bp)) != 0)
+			return (error);
+		goto loop;
+	}
+	if (skipmeta) {
+		skipmeta = 0;
+		goto loop2;
+	}
+	if (ap->a_waitfor == MNT_WAIT) {
+                while (vp->v_numoutput) {
+                        vp->v_flag |= VBWAIT;
+                        sleep((caddr_t)&vp->v_numoutput, PRIBIO + 1);
+                }
+		/*
+		 * Ensure that any filesystem metatdata associated
+		 * with the vnode has been written.
+		 */
+		splx(s);
+		if ((error = softdep_sync_metadata(ap)) != 0)
+			return (error);
+		s = splbio();
+                if (vp->v_dirtyblkhd.lh_first) {
+                       /*
+                        * Block devices associated with filesystems may
+                        * have new I/O requests posted for them even if
+                        * the vnode is locked, so no amount of trying will
+                        * get them clean. Thus we give block devices a
+                        * good effort, then just give up. For all other file
+                        * types, go around and try again until it is clean.
+                        */
+                       if (passes > 0) {
+                               passes -= 1;
+                               goto loop2;
+                       }
+#ifdef DIAGNOSTIC
+		       if (vp->v_type != VBLK)
+			       vprint("ffs_fsync: dirty", vp);
+#endif
+                }
+        }
+        splx(s);
 	TIMEVAL_TO_TIMESPEC(&time, &ts);
-	return (VOP_UPDATE(ap->a_vp, &ts, &ts, ap->a_waitfor == MNT_WAIT));
+	if ((error = VOP_UPDATE(vp, &ts, &ts, ap->a_waitfor == MNT_WAIT)) != 0)               return (error);
+	if (DOINGSOFTDEP(vp) && ap->a_waitfor == MNT_WAIT)
+		error = softdep_fsync(vp);
+	return (error);
 }
 
 /*
@@ -266,11 +333,12 @@ ffs_reclaim(v)
 {
 	struct vop_reclaim_args /* {
 		struct vnode *a_vp;
+		sturct proc *a_p;
 	} */ *ap = v;
 	register struct vnode *vp = ap->a_vp;
 	int error;
 
-	if ((error = ufs_reclaim(vp)) != 0)
+	if ((error = ufs_reclaim(vp, ap->a_p)) != 0)
 		return (error);
 	FREE(vp->v_data, VFSTOUFS(vp->v_mount)->um_devvp->v_tag == VT_MFS ?
 	    M_MFSNODE : M_FFSNODE);
diff --git a/sys/ufs/ffs/fs.h b/sys/ufs/ffs/fs.h
index e5a17da3a22..2979a3c4fe9 100644
--- a/sys/ufs/ffs/fs.h
+++ b/sys/ufs/ffs/fs.h
@@ -1,4 +1,4 @@
-/*	$OpenBSD: fs.h,v 1.4 1997/05/30 08:34:28 downsj Exp $	*/
+/*	$OpenBSD: fs.h,v 1.5 1997/10/06 15:26:32 csapuntz Exp $	*/
 /*	$NetBSD: fs.h,v 1.6 1995/04/12 21:21:02 mycroft Exp $	*/
 
 /*
@@ -221,7 +221,7 @@ struct fs {
 	int8_t	 fs_fmod;		/* super block modified flag */
 	int8_t	 fs_clean;		/* file system is clean flag */
 	int8_t	 fs_ronly;		/* mounted read-only flag */
-	int8_t	 fs_flags;		/* currently unused flag */
+	int8_t	 fs_flags;		/* see FS_ below */
 	u_char	 fs_fsmnt[MAXMNTLEN];	/* name mounted on */
 /* these fields retain the current block allocation info */
 	int32_t	 fs_cgrotor;		/* last cg searched */
@@ -267,6 +267,12 @@ struct fs {
 #define FS_OPTTIME	0	/* minimize allocation time */
 #define FS_OPTSPACE	1	/* minimize disk fragmentation */
 
+/* 
+ * Filesystem falgs.
+ */
+#define FS_UNCLEAN    0x01   /* filesystem not clean at mount */
+#define FS_DOSOFTDEP  0x02   /* filesystem using soft dependencies */
+
 /*
  * Rotational layout table format types
  */
@@ -490,6 +496,12 @@ struct ocg {
 	    ? (fs)->fs_bsize \
 	    : (fragroundup(fs, blkoff(fs, (dip)->di_size))))
 
+#define sblksize(fs, size, lbn) \
+        (((lbn) >= NDADDR || (size) >= ((lbn) + 1) << (fs)->fs_bshift) \
+            ? (fs)->fs_bsize \
+            : (fragroundup(fs, blkoff(fs, (size)))))
+
+
 /*
  * Number of disk sectors per block/fragment; assumes DEV_BSIZE byte
  * sector size.
diff --git a/sys/ufs/mfs/mfs_extern.h b/sys/ufs/mfs/mfs_extern.h
index bd14c23226d..3616acedf76 100644
--- a/sys/ufs/mfs/mfs_extern.h
+++ b/sys/ufs/mfs/mfs_extern.h
@@ -1,4 +1,4 @@
-/*	$OpenBSD: mfs_extern.h,v 1.2 1996/02/27 07:15:46 niklas Exp $	*/
+/*	$OpenBSD: mfs_extern.h,v 1.3 1997/10/06 15:27:12 csapuntz Exp $	*/
 /*	$NetBSD: mfs_extern.h,v 1.4 1996/02/09 22:31:27 christos Exp $	*/
 
 /*-
@@ -43,6 +43,7 @@ struct proc;
 struct statfs;
 struct ucred;
 struct vnode;
+struct vfsconf;
 
 __BEGIN_DECLS
 /* mfs_vfsops.c */
@@ -53,7 +54,7 @@ int	mfs_mount	__P((struct mount *, char *, caddr_t,
 int	mfs_start	__P((struct mount *, int, struct proc *));
 int	mfs_statfs	__P((struct mount *, struct statfs *, struct proc *));
 
-void	mfs_init	__P((void));
+int	mfs_init	__P((struct vfsconf *));
 
 /* mfs_vnops.c */
 int	mfs_open	__P((void *));
@@ -65,6 +66,7 @@ int	mfs_close	__P((void *));
 int	mfs_inactive	__P((void *));
 int	mfs_reclaim	__P((void *));
 int	mfs_print	__P((void *));
+#define	mfs_revoke vop_revoke
 int	mfs_badop	__P((void *));
 
 __END_DECLS
diff --git a/sys/ufs/mfs/mfs_vfsops.c b/sys/ufs/mfs/mfs_vfsops.c
index 577325fe95b..dbd32e6ea2f 100644
--- a/sys/ufs/mfs/mfs_vfsops.c
+++ b/sys/ufs/mfs/mfs_vfsops.c
@@ -1,4 +1,4 @@
-/*	$OpenBSD: mfs_vfsops.c,v 1.2 1996/02/27 07:15:47 niklas Exp $	*/
+/*	$OpenBSD: mfs_vfsops.c,v 1.3 1997/10/06 15:27:12 csapuntz Exp $	*/
 /*	$NetBSD: mfs_vfsops.c,v 1.10 1996/02/09 22:31:28 christos Exp $	*/
 
 /*
@@ -69,7 +69,6 @@ extern int (**mfs_vnodeop_p) __P((void *));
  * mfs vfs operations.
  */
 struct vfsops mfs_vfsops = {
-	MOUNT_MFS,
 	mfs_mount,
 	mfs_start,
 	ffs_unmount,
@@ -81,37 +80,31 @@ struct vfsops mfs_vfsops = {
 	ffs_fhtovp,
 	ffs_vptofh,
 	mfs_init,
+	ffs_sysctl
 };
 
 /*
  * Called by main() when mfs is going to be mounted as root.
- *
- * Name is updated by mount(8) after booting.
  */
-#define ROOTNAME	"mfs_root"
 
 int
 mfs_mountroot()
 {
 	extern struct vnode *rootvp;
 	register struct fs *fs;
-	register struct mount *mp;
+	struct mount *mp;
 	struct proc *p = curproc;	/* XXX */
 	struct ufsmount *ump;
 	struct mfsnode *mfsp;
-	size_t size;
 	int error;
 
-	/*
-	 * Get vnodes for swapdev and rootdev.
-	 */
-	if (bdevvp(swapdev, &swapdev_vp) || bdevvp(rootdev, &rootvp))
-		panic("mfs_mountroot: can't setup bdevvp's");
-
-	mp = malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK);
-	bzero((char *)mp, (u_long)sizeof(struct mount));
-	mp->mnt_op = &mfs_vfsops;
-	mp->mnt_flag = MNT_RDONLY;
+	if ((error = bdevvp(swapdev, &swapdev_vp)) ||
+	    (error = bdevvp(rootdev, &rootvp))) {
+		printf("mfs_mountroot: can't setup bdevvp's");
+		return (error);
+	}
+	if ((error = vfs_rootmountalloc("mfs", "mfs_root", &mp)) != 0)
+		return (error);
 	mfsp = malloc(sizeof *mfsp, M_MFSNODE, M_WAITOK);
 	rootvp->v_data = mfsp;
 	rootvp->v_op = mfs_vnodeop_p;
@@ -122,28 +115,20 @@ mfs_mountroot()
 	mfsp->mfs_pid = p->p_pid;
 	mfsp->mfs_buflist = (struct buf *)0;
 	if ((error = ffs_mountfs(rootvp, mp, p)) != 0) {
+		mp->mnt_vfc->vfc_refcount--;
+		vfs_unbusy(mp, p);
 		free(mp, M_MOUNT);
 		free(mfsp, M_MFSNODE);
 		return (error);
 	}
-	if ((error = vfs_lock(mp)) != 0) {
-		(void)ffs_unmount(mp, 0, p);
-		free(mp, M_MOUNT);
-		free(mfsp, M_MFSNODE);
-		return (error);
-	}
+	simple_lock(&mountlist_slock);
 	CIRCLEQ_INSERT_TAIL(&mountlist, mp, mnt_list);
-	mp->mnt_vnodecovered = NULLVP;
+	simple_unlock(&mountlist_slock);
 	ump = VFSTOUFS(mp);
 	fs = ump->um_fs;
-	bzero(fs->fs_fsmnt, sizeof(fs->fs_fsmnt));
-	fs->fs_fsmnt[0] = '/';
-	bcopy(fs->fs_fsmnt, mp->mnt_stat.f_mntonname, MNAMELEN);
-	(void) copystr(ROOTNAME, mp->mnt_stat.f_mntfromname, MNAMELEN - 1,
-	    &size);
-	bzero(mp->mnt_stat.f_mntfromname + size, MNAMELEN - size);
+	(void) copystr(mp->mnt_stat.f_mntonname, fs->fs_fsmnt, MNAMELEN - 1, 0);
 	(void)ffs_statfs(mp, &mp->mnt_stat, p);
-	vfs_unlock(mp);
+	vfs_unbusy(mp, p);
 	inittodr((time_t)0);
 	return (0);
 }
@@ -207,10 +192,7 @@ mfs_mount(mp, path, data, ndp, p)
 			flags = WRITECLOSE;
 			if (mp->mnt_flag & MNT_FORCE)
 				flags |= FORCECLOSE;
-			if (vfs_busy(mp))
-				return (EBUSY);
 			error = ffs_flushfiles(mp, flags, p);
-			vfs_unbusy(mp);
 			if (error)
 				return (error);
 		}
@@ -272,7 +254,6 @@ mfs_start(mp, flags, p)
 	register struct mfsnode *mfsp = VTOMFS(vp);
 	register struct buf *bp;
 	register caddr_t base;
-	int error = 0;
 
 	base = mfsp->mfs_baseoff;
 	while (mfsp->mfs_buflist != (struct buf *)-1) {
@@ -289,13 +270,11 @@ mfs_start(mp, flags, p)
 		 * otherwise we will loop here, as tsleep will always return
 		 * EINTR/ERESTART.
 		 */
-		if ((error = tsleep((caddr_t)vp, mfs_pri, "mfsidl", 0)) != 0) {
-			DOIO();
-			if (dounmount(mp, 0, p) != 0)
-				CLRSIG(p, CURSIG(p));
-		}
+		if (tsleep((caddr_t)vp, mfs_pri, "mfsidl", 0) &&
+		    dounmount(mp, 0, p) != 0)
+			CLRSIG(p, CURSIG(p));
 	}
-	return (error);
+	return (0);
 }
 
 /*
@@ -311,10 +290,10 @@ mfs_statfs(mp, sbp, p)
 
 	error = ffs_statfs(mp, sbp, p);
 #ifdef COMPAT_09
-	sbp->f_type = 3;
+	sbp->f_type = mp->mnt_vfc->vfc_typenum;
 #else
 	sbp->f_type = 0;
 #endif
-	strncpy(&sbp->f_fstypename[0], mp->mnt_op->vfs_name, MFSNAMELEN);
+	strncpy(&sbp->f_fstypename[0], mp->mnt_vfc->vfc_name, MFSNAMELEN);
 	return (error);
 }
diff --git a/sys/ufs/mfs/mfs_vnops.c b/sys/ufs/mfs/mfs_vnops.c
index 63b20a029bf..84a5ed3d368 100644
--- a/sys/ufs/mfs/mfs_vnops.c
+++ b/sys/ufs/mfs/mfs_vnops.c
@@ -1,4 +1,4 @@
-/*	$OpenBSD: mfs_vnops.c,v 1.4 1996/04/21 22:32:49 deraadt Exp $	*/
+/*	$OpenBSD: mfs_vnops.c,v 1.5 1997/10/06 15:27:13 csapuntz Exp $	*/
 /*	$NetBSD: mfs_vnops.c,v 1.8 1996/03/17 02:16:32 christos Exp $	*/
 
 /*
@@ -72,6 +72,7 @@ struct vnodeopv_entry_desc mfs_vnodeop_entries[] = {
 	{ &vop_write_desc, mfs_write },			/* write */
 	{ &vop_ioctl_desc, mfs_ioctl },			/* ioctl */
 	{ &vop_select_desc, mfs_select },		/* select */
+	{ &vop_revoke_desc, mfs_revoke },               /* revoke */
 	{ &vop_mmap_desc, mfs_mmap },			/* mmap */
 	{ &vop_fsync_desc, spec_fsync },		/* fsync */
 	{ &vop_seek_desc, mfs_seek },			/* seek */
@@ -231,6 +232,9 @@ mfs_bmap(v)
 		*ap->a_vpp = ap->a_vp;
 	if (ap->a_bnp != NULL)
 		*ap->a_bnp = ap->a_bn;
+	if (ap->a_runp != NULL)
+		*ap->a_runp = 0;
+
 	return (0);
 }
 
@@ -294,12 +298,14 @@ mfs_inactive(v)
 {
 	struct vop_inactive_args /* {
 		struct vnode *a_vp;
+		struct proc *a_p;
 	} */ *ap = v;
 	register struct mfsnode *mfsp = VTOMFS(ap->a_vp);
 
 	if (mfsp->mfs_buflist && mfsp->mfs_buflist != (struct buf *)(-1))
 		panic("mfs_inactive: not inactive (mfs_buflist %p)",
 			mfsp->mfs_buflist);
+	VOP_UNLOCK(ap->a_vp, 0, ap->a_p);
 	return (0);
 }
 
@@ -352,8 +358,9 @@ mfs_badop(v)
 /*
  * Memory based filesystem initialization.
  */
-void
-mfs_init()
+int
+mfs_init(vfsp)
+	struct vfsconf *vfsp;
 {
-
+	return  (0);
 }
diff --git a/sys/ufs/mfs/mfsnode.h b/sys/ufs/mfs/mfsnode.h
index d37f7ba4e68..29c290c4e09 100644
--- a/sys/ufs/mfs/mfsnode.h
+++ b/sys/ufs/mfs/mfsnode.h
@@ -1,4 +1,4 @@
-/*	$OpenBSD: mfsnode.h,v 1.3 1996/06/11 03:25:15 tholo Exp $	*/
+/*	$OpenBSD: mfsnode.h,v 1.4 1997/10/06 15:27:13 csapuntz Exp $	*/
 /*	$NetBSD: mfsnode.h,v 1.3 1996/02/09 22:31:31 christos Exp $	*/
 
 /*
@@ -76,9 +76,9 @@ struct mfsnode {
 #define	mfs_readdir	mfs_badop
 #define	mfs_readlink	mfs_badop
 #define	mfs_abortop	mfs_badop
-#define	mfs_lock	nullop
-#define	mfs_unlock	nullop
-#define	mfs_islocked	nullop
+#define	mfs_lock	vop_nolock
+#define	mfs_unlock	vop_nounlock
+#define	mfs_islocked	vop_noislocked
 #define	mfs_pathconf	mfs_badop
 #define	mfs_advlock	mfs_badop
 #define	mfs_blkatoff	mfs_badop
diff --git a/sys/ufs/ufs/inode.h b/sys/ufs/ufs/inode.h
index 9dcc48697f1..0a9a7a24151 100644
--- a/sys/ufs/ufs/inode.h
+++ b/sys/ufs/ufs/inode.h
@@ -1,4 +1,4 @@
-/*	$OpenBSD: inode.h,v 1.6 1997/05/30 15:18:49 downsj Exp $	*/
+/*	$OpenBSD: inode.h,v 1.7 1997/10/06 15:27:36 csapuntz Exp $	*/
 /*	$NetBSD: inode.h,v 1.8 1995/06/15 23:22:50 cgd Exp $	*/
 
 /*
@@ -45,6 +45,8 @@
 #include <ufs/ufs/dir.h>
 #include <ufs/ext2fs/ext2fs_dinode.h>
 
+typedef long ufs_lbn_t;
+
 /*
  * Per-filesystem inode extensions.
  */
@@ -63,13 +65,13 @@ struct ext2fs_inode_ext {
  * active, and is put back when the file is no longer being used.
  */
 struct inode {
-	struct	inode  *i_next;	/* Hash chain forward. */
-	struct	inode **i_prev;	/* Hash chain back. */
+	LIST_ENTRY(inode) i_hash; /* Hash chain */
 	struct	vnode  *i_vnode;/* Vnode associated with this inode. */
 	struct	vnode  *i_devvp;/* Vnode for block I/O. */
 	u_int32_t i_flag;	/* flags, see below */
 	dev_t	  i_dev;	/* Device associated with the inode. */
 	ino_t	  i_number;	/* The identity of the inode. */
+	int       i_effnlink;   /* i_nlink when I/O completes */
 
 	union {			/* Associated filesystem. */
 		struct	fs *fs;			/* FFS */
@@ -83,8 +85,8 @@ struct inode {
 	struct	 dquot *i_dquot[MAXQUOTAS]; /* Dquot structures. */
 	u_quad_t i_modrev;	/* Revision level for NFS lease. */
 	struct	 lockf *i_lockf;/* Head of byte-level lock list. */
-	pid_t	 i_lockholder;	/* DEBUG: holder of inode lock. */
-	pid_t	 i_lockwaiter;	/* DEBUG: latest blocked for inode lock. */
+	struct   lock i_lock;   /* Inode lock */
+
 	/*
 	 * Side effects; used during directory lookup.
 	 */
@@ -180,14 +182,11 @@ struct inode {
 /* These flags are kept in i_flag. */
 #define	IN_ACCESS	0x0001		/* Access time update request. */
 #define	IN_CHANGE	0x0002		/* Inode change time update request. */
-#define	IN_EXLOCK	0x0004		/* File has exclusive lock. */
-#define	IN_LOCKED	0x0008		/* Inode lock. */
-#define	IN_LWAIT	0x0010		/* Process waiting on file lock. */
-#define	IN_MODIFIED	0x0020		/* Inode has been modified. */
-#define	IN_RENAME	0x0040		/* Inode is being renamed. */
-#define	IN_SHLOCK	0x0080		/* File has shared lock. */
-#define	IN_UPDATE	0x0100		/* Modification time update request. */
-#define	IN_WANTED	0x0200		/* Inode is wanted by a process. */
+#define IN_UPDATE       0x0004          /* Modification time update request */
+#define	IN_MODIFIED	0x0008		/* Inode has been modified. */
+#define	IN_RENAME	0x0010		/* Inode is being renamed. */
+#define IN_SHLOCK       0x0020          /* FIle has shared lock. */
+#define	IN_EXLOCK	0x0040		/* File has exclusive lock. */
 
 #ifdef _KERNEL
 /*
@@ -242,6 +241,9 @@ struct indir {
 	}								\
 }
 
+/* Determine if soft dependencies are being done */
+#define DOINGSOFTDEP(vp)      ((vp)->v_mount->mnt_flag & MNT_SOFTDEP)
+
 /* This overlays the fid structure (see mount.h). */
 struct ufid {
 	u_int16_t ufid_len;	/* Length of structure. */
diff --git a/sys/ufs/ufs/ufs_extern.h b/sys/ufs/ufs/ufs_extern.h
index 4dbeed61a92..166d8f43684 100644
--- a/sys/ufs/ufs/ufs_extern.h
+++ b/sys/ufs/ufs/ufs_extern.h
@@ -1,4 +1,4 @@
-/*	$OpenBSD: ufs_extern.h,v 1.2 1996/02/27 07:21:25 niklas Exp $	*/
+/*	$OpenBSD: ufs_extern.h,v 1.3 1997/10/06 15:27:36 csapuntz Exp $	*/
 /*	$NetBSD: ufs_extern.h,v 1.5 1996/02/09 22:36:03 christos Exp $	*/
 
 /*-
@@ -54,6 +54,7 @@ struct ufs_args;
 struct ufsmount;
 struct uio;
 struct vattr;
+struct vfsconf;
 struct vnode;
 
 __BEGIN_DECLS
@@ -86,6 +87,7 @@ int	 ufs_readdir	__P((void *));
 int	 ufs_readlink	__P((void *));
 int	 ufs_remove	__P((void *));
 int	 ufs_rename	__P((void *));
+#define  ufs_revoke  vop_revoke
 int	 ufs_rmdir	__P((void *));
 int	 ufs_seek	__P((void *));
 int	 ufs_select	__P((void *));
@@ -117,19 +119,19 @@ void ufs_ihashins __P((struct inode *));
 void ufs_ihashrem __P((struct inode *));
 
 /* ufs_inode.c */
-void ufs_init __P((void));
-int ufs_reclaim __P((struct vnode *));
+int ufs_init __P((struct vfsconf *));
+int ufs_reclaim __P((struct vnode *, struct proc *));
 
 /* ufs_lookup.c */
 void ufs_dirbad __P((struct inode *, doff_t, char *));
 int ufs_dirbadentry __P((struct vnode *, struct direct *, int));
-int ufs_direnter __P((struct inode *, struct vnode *,
-		      struct componentname *));
-int ufs_direnter2 __P((struct vnode *, struct direct *, struct ucred *,
-		       struct proc *));
-int ufs_dirremove __P((struct vnode *, struct componentname *));
+void ufs_makedirentry __P((struct inode *, struct componentname *,
+			   struct direct *));
+int ufs_direnter __P((struct vnode *, struct direct *,
+		      struct componentname *, struct buf *));
+int ufs_dirremove __P((struct vnode *, struct inode *, int, int));
 int ufs_dirrewrite __P((struct inode *, struct inode *,
-			struct componentname *));
+		        ino_t, int, int));
 int ufs_dirempty __P((struct inode *, ino_t, struct ucred *));
 int ufs_checkpath __P((struct inode *, struct inode *, struct ucred *));
 
@@ -165,4 +167,19 @@ int ufs_vinit __P((struct mount *, int (**) __P((void *)),
 		   int (**) __P((void *)), struct vnode **));
 int ufs_makeinode __P((int, struct vnode *, struct vnode **,
 		       struct componentname *));
+
+ 
+/*
+ * Soft dependency function prototypes.
+ */
+void  softdep_setup_directory_add __P((struct buf *, struct inode *, off_t,
+          long, struct buf *));
+void  softdep_change_directoryentry_offset __P((struct inode *, caddr_t,
+          caddr_t, caddr_t, int));
+void  softdep_setup_remove __P((struct buf *,struct inode *, struct inode *,
+          int));
+void  softdep_setup_directory_change __P((struct buf *, struct inode *,
+          struct inode *, long, int));
+void  softdep_increase_linkcnt __P((struct inode *));
+
 __END_DECLS
diff --git a/sys/ufs/ufs/ufs_ihash.c b/sys/ufs/ufs/ufs_ihash.c
index a9b7227942d..84ff51b8b39 100644
--- a/sys/ufs/ufs/ufs_ihash.c
+++ b/sys/ufs/ufs/ufs_ihash.c
@@ -1,4 +1,4 @@
-/*	$OpenBSD: ufs_ihash.c,v 1.2 1996/02/27 07:21:26 niklas Exp $	*/
+/*	$OpenBSD: ufs_ihash.c,v 1.3 1997/10/06 15:27:37 csapuntz Exp $	*/
 /*	$NetBSD: ufs_ihash.c,v 1.3 1996/02/09 22:36:04 christos Exp $	*/
 
 /*
@@ -49,9 +49,10 @@
 /*
  * Structures associated with inode cacheing.
  */
-struct inode **ihashtbl;
+LIST_HEAD(ihashhead, inode) *ihashtbl;
 u_long	ihash;		/* size of hash table - 1 */
-#define	INOHASH(device, inum)	(((device) + (inum)) & ihash)
+#define	INOHASH(device, inum)	(&ihashtbl[((device) + (inum)) & ihash])
+struct simplelock ufs_ihash_slock;
 
 /*
  * Initialize inode hash table.
@@ -61,6 +62,7 @@ ufs_ihashinit()
 {
 
 	ihashtbl = hashinit(desiredvnodes, M_UFSMNT, &ihash);
+	simple_lock_init(&ufs_ihash_slock);
 }
 
 /*
@@ -68,19 +70,21 @@ ufs_ihashinit()
  * to it. If it is in core, return it, even if it is locked.
  */
 struct vnode *
-ufs_ihashlookup(device, inum)
-	dev_t device;
+ufs_ihashlookup(dev, inum)
+	dev_t dev;
 	ino_t inum;
 {
-	register struct inode *ip;
+        struct inode *ip;
 
-	for (ip = ihashtbl[INOHASH(device, inum)];; ip = ip->i_next) {
-		if (ip == NULL)
-			return (NULL);
-		if (inum == ip->i_number && device == ip->i_dev)
-			return (ITOV(ip));
-	}
-	/* NOTREACHED */
+	simple_lock(&ufs_ihash_slock);
+	for (ip = INOHASH(dev, inum)->lh_first; ip; ip = ip->i_hash.le_next)
+		if (inum == ip->i_number && dev == ip->i_dev)
+			break;
+	simple_unlock(&ufs_ihash_slock);
+
+	if (ip)
+		return (ITOV(ip));
+	return (NULLVP);
 }
 
 /*
@@ -88,30 +92,28 @@ ufs_ihashlookup(device, inum)
  * to it. If it is in core, but locked, wait for it.
  */
 struct vnode *
-ufs_ihashget(device, inum)
-	dev_t device;
+ufs_ihashget(dev, inum)
+	dev_t dev;
 	ino_t inum;
 {
-	register struct inode *ip;
+	struct proc *p = curproc;
+	struct inode *ip;
 	struct vnode *vp;
+loop:
+	simple_lock(&ufs_ihash_slock);
+	for (ip = INOHASH(dev, inum)->lh_first; ip; ip = ip->i_hash.le_next) {
+		if (inum == ip->i_number && dev == ip->i_dev) {
+			vp = ITOV(ip);
+			simple_lock(&vp->v_interlock);
+			simple_unlock(&ufs_ihash_slock);
+			if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, p))
+				goto loop;
+			return (vp);
+ 		}
 
-	for (;;)
-		for (ip = ihashtbl[INOHASH(device, inum)];; ip = ip->i_next) {
-			if (ip == NULL)
-				return (NULL);
-			if (inum == ip->i_number && device == ip->i_dev) {
-				if (ip->i_flag & IN_LOCKED) {
-					ip->i_flag |= IN_WANTED;
-					sleep(ip, PINOD);
-					break;
-				}
-				vp = ITOV(ip);
-				if (!vget(vp, 1))
-					return (vp);
-				break;
-			}
-		}
-	/* NOTREACHED */
+	}
+	simple_unlock(&ufs_ihash_slock);
+	return (NULL);
 }
 
 /*
@@ -121,21 +123,16 @@ void
 ufs_ihashins(ip)
 	struct inode *ip;
 {
-	struct inode **ipp, *iq;
+	struct proc *p = curproc;		/* XXX */
+	struct ihashhead *ipp;
 
-	ipp = &ihashtbl[INOHASH(ip->i_dev, ip->i_number)];
-	if ((iq = *ipp) != NULL)
-		iq->i_prev = &ip->i_next;
-	ip->i_next = iq;
-	ip->i_prev = ipp;
-	*ipp = ip;
-	if (ip->i_flag & IN_LOCKED)
-		panic("ufs_ihashins: already locked");
-	if (curproc)
-		ip->i_lockholder = curproc->p_pid;
-	else
-		ip->i_lockholder = -1;
-	ip->i_flag |= IN_LOCKED;
+	/* lock the inode, then put it on the appropriate hash list */
+	lockmgr(&ip->i_lock, LK_EXCLUSIVE, (struct simplelock *)0, p);
+ 
+	simple_lock(&ufs_ihash_slock);
+	ipp = INOHASH(ip->i_dev, ip->i_number);
+	LIST_INSERT_HEAD(ipp, ip, i_hash);
+	simple_unlock(&ufs_ihash_slock);
 }
 
 /*
@@ -143,15 +140,14 @@ ufs_ihashins(ip)
  */
 void
 ufs_ihashrem(ip)
-	register struct inode *ip;
+	struct inode *ip;
 {
-	register struct inode *iq;
+	simple_lock(&ufs_ihash_slock);
+	LIST_REMOVE(ip, i_hash);
+ #ifdef DIAGNOSTIC
+	ip->i_hash.le_next = NULL;
+	ip->i_hash.le_prev = NULL;
+ #endif
+	simple_unlock(&ufs_ihash_slock);
 
-	if ((iq = ip->i_next) != NULL)
-		iq->i_prev = ip->i_prev;
-	*ip->i_prev = iq;
-#ifdef DIAGNOSTIC
-	ip->i_next = NULL;
-	ip->i_prev = NULL;
-#endif
 }
diff --git a/sys/ufs/ufs/ufs_inode.c b/sys/ufs/ufs/ufs_inode.c
index eed08b7f2cf..31437cd4bfd 100644
--- a/sys/ufs/ufs/ufs_inode.c
+++ b/sys/ufs/ufs/ufs_inode.c
@@ -1,4 +1,4 @@
-/*	$OpenBSD: ufs_inode.c,v 1.4 1997/05/30 08:35:04 downsj Exp $	*/
+/*	$OpenBSD: ufs_inode.c,v 1.5 1997/10/06 15:27:37 csapuntz Exp $	*/
 /*	$NetBSD: ufs_inode.c,v 1.7 1996/05/11 18:27:52 mycroft Exp $	*/
 
 /*
@@ -57,6 +57,7 @@
 
 u_long	nextgennumber;		/* Next generation number to assign. */
 
+#if 0
 void
 ufs_init()
 {
@@ -71,7 +72,7 @@ ufs_init()
 #endif
 	return;
 }
-
+#endif
 /*
  * Last reference to an inode.  If necessary, write or delete it.
  */
@@ -81,39 +82,29 @@ ufs_inactive(v)
 {
 	struct vop_inactive_args /* {
 		struct vnode *a_vp;
+		sturct proc *a_p;
 	} */ *ap = v;
-	register struct vnode *vp = ap->a_vp;
-	register struct inode *ip = VTOI(vp);
+	struct vnode *vp = ap->a_vp;
+	struct inode *ip = VTOI(vp);
+	struct proc *p = ap->a_p;
 	struct timespec ts;
-	int mode, error;
+	int mode, error = 0;
 	extern int prtactive;
 
 	if (prtactive && vp->v_usecount != 0)
 		vprint("ffs_inactive: pushing active", vp);
 
-	/* Get rid of inodes related to stale file handles. */
-	if (ip->i_ffs_mode == 0) {
-		if ((vp->v_flag & VXLOCK) == 0)
-			vgone(vp);
-		return (0);
-	}
-
-	error = 0;
-#ifdef DIAGNOSTIC
-	if (VOP_ISLOCKED(vp))
-		panic("ffs_inactive: locked inode");
-	if (curproc)
-		ip->i_lockholder = curproc->p_pid;
-	else
-		ip->i_lockholder = -1;
-#endif
-	ip->i_flag |= IN_LOCKED;
+	/*
+	 * Ignore inodes related to stale file handles.
+	 */
+	if (ip->i_ffs_mode == 0)
+		goto out;
 	if (ip->i_ffs_nlink <= 0 && (vp->v_mount->mnt_flag & MNT_RDONLY) == 0) {
 #ifdef QUOTA
 		if (!getinoquota(ip))
 			(void)chkiq(ip, -1, NOCRED, 0);
 #endif
-		error = VOP_TRUNCATE(vp, (off_t)0, 0, NOCRED, NULL);
+		error = VOP_TRUNCATE(vp, (off_t)0, 0, NOCRED, p);
 		ip->i_ffs_rdev = 0;
 		mode = ip->i_ffs_mode;
 		ip->i_ffs_mode = 0;
@@ -124,13 +115,14 @@ ufs_inactive(v)
 		TIMEVAL_TO_TIMESPEC(&time, &ts);
 		VOP_UPDATE(vp, &ts, &ts, 0);
 	}
-	VOP_UNLOCK(vp);
+out:
+	VOP_UNLOCK(vp, 0, p);
 	/*
 	 * If we are done with the inode, reclaim it
 	 * so that it can be reused immediately.
 	 */
-	if (vp->v_usecount == 0 && ip->i_ffs_mode == 0)
-		vgone(vp);
+	if (ip->i_ffs_mode == 0)
+		vrecycle(vp, (struct simplelock *)0, p);
 	return (error);
 }
 
@@ -138,8 +130,9 @@ ufs_inactive(v)
  * Reclaim an inode so that it can be used for other purposes.
  */
 int
-ufs_reclaim(vp)
+ufs_reclaim(vp, p)
 	register struct vnode *vp;
+	struct proc *p;
 {
 	register struct inode *ip;
 	extern int prtactive;
diff --git a/sys/ufs/ufs/ufs_lookup.c b/sys/ufs/ufs/ufs_lookup.c
index 38d828b987e..47587cdd00a 100644
--- a/sys/ufs/ufs/ufs_lookup.c
+++ b/sys/ufs/ufs/ufs_lookup.c
@@ -1,4 +1,4 @@
-/*	$OpenBSD: ufs_lookup.c,v 1.4 1997/05/30 08:35:08 downsj Exp $	*/
+/*	$OpenBSD: ufs_lookup.c,v 1.5 1997/10/06 15:27:38 csapuntz Exp $	*/
 /*	$NetBSD: ufs_lookup.c,v 1.7 1996/02/09 22:36:06 christos Exp $	*/
 
 /*
@@ -43,12 +43,16 @@
 
 #include <sys/param.h>
 #include <sys/systm.h>
+#include <sys/kernel.h>
 #include <sys/namei.h>
 #include <sys/buf.h>
 #include <sys/file.h>
+#include <sys/stat.h>
 #include <sys/mount.h>
 #include <sys/vnode.h>
 
+#include <vm/vm.h>
+
 #include <ufs/ufs/quota.h>
 #include <ufs/ufs/inode.h>
 #include <ufs/ufs/dir.h>
@@ -131,6 +135,7 @@ ufs_lookup(v)
 	struct ucred *cred = cnp->cn_cred;
 	int flags = cnp->cn_flags;
 	int nameiop = cnp->cn_nameiop;
+	struct proc *p = cnp->cn_proc;
 
 	bp = NULL;
 	slotoffset = -1;
@@ -148,6 +153,10 @@ ufs_lookup(v)
 	if ((error = VOP_ACCESS(vdp, VEXEC, cred, cnp->cn_proc)) != 0)
 		return (error);
 
+	if ((flags & ISLASTCN) && (vdp->v_mount->mnt_flag & MNT_RDONLY) &&
+	    (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME))
+		return (EROFS);
+
 	/*
 	 * We now have a segment name to search for, and a directory to search.
 	 *
@@ -173,14 +182,14 @@ ufs_lookup(v)
 			VREF(vdp);
 			error = 0;
 		} else if (flags & ISDOTDOT) {
-			VOP_UNLOCK(pdp);
-			error = vget(vdp, 1);
+			VOP_UNLOCK(pdp, 0, p);
+			error = vget(vdp, LK_EXCLUSIVE, p);
 			if (!error && lockparent && (flags & ISLASTCN))
-				error = VOP_LOCK(pdp);
+				error = vn_lock(pdp, LK_EXCLUSIVE | LK_RETRY, p);
 		} else {
-			error = vget(vdp, 1);
+			error = vget(vdp, LK_EXCLUSIVE, p);
 			if (!lockparent || error || !(flags & ISLASTCN))
-				VOP_UNLOCK(pdp);
+				VOP_UNLOCK(pdp, 0, p);
 		}
 		/*
 		 * Check that the capability number did not change
@@ -191,13 +200,14 @@ ufs_lookup(v)
 				return (0);
 			vput(vdp);
 			if (lockparent && pdp != vdp && (flags & ISLASTCN))
-				VOP_UNLOCK(pdp);
+				VOP_UNLOCK(pdp, 0, p);
 		}
-		if ((error = VOP_LOCK(pdp)) != 0)
+		*vpp = NULL;
+
+		if ((error = vn_lock(pdp, LK_EXCLUSIVE | LK_RETRY, p)) != 0)
 			return (error);
 		vdp = pdp;
 		dp = VTOI(pdp);
-		*vpp = NULL;
 	}
 
 	/*
@@ -396,7 +406,7 @@ notfound:
 	     (nameiop == DELETE &&
 	      (ap->a_cnp->cn_flags & DOWHITEOUT) &&
 	      (ap->a_cnp->cn_flags & ISWHITEOUT))) &&
-	    (flags & ISLASTCN) && dp->i_ffs_nlink != 0) {
+	    (flags & ISLASTCN) && dp->i_effnlink != 0) {
 		/*
 		 * Access for write is interpreted as allowing
 		 * creation of files in the directory.
@@ -446,7 +456,7 @@ notfound:
 		 */
 		cnp->cn_flags |= SAVENAME;
 		if (!lockparent)
-			VOP_UNLOCK(vdp);
+			VOP_UNLOCK(vdp, 0, p);
 		return (EJUSTRETURN);
 	}
 	/*
@@ -524,7 +534,7 @@ found:
 		}
 		*vpp = tdp;
 		if (!lockparent)
-			VOP_UNLOCK(vdp);
+			VOP_UNLOCK(vdp, 0, p);
 		return (0);
 	}
 
@@ -551,7 +561,7 @@ found:
 		*vpp = tdp;
 		cnp->cn_flags |= SAVENAME;
 		if (!lockparent)
-			VOP_UNLOCK(vdp);
+			VOP_UNLOCK(vdp, 0, p);
 		return (0);
 	}
 
@@ -576,14 +586,14 @@ found:
 	 */
 	pdp = vdp;
 	if (flags & ISDOTDOT) {
-		VOP_UNLOCK(pdp);	/* race to get the inode */
+		VOP_UNLOCK(pdp, 0, p);	/* race to get the inode */
 		error = VFS_VGET(vdp->v_mount, dp->i_ino, &tdp);
 		if (error) {
-			VOP_LOCK(pdp);
+			vn_lock(pdp, LK_EXCLUSIVE | LK_RETRY, p);
 			return (error);
 		}
 		if (lockparent && (flags & ISLASTCN) &&
-		    (error = VOP_LOCK(pdp))) {
+		    (error = vn_lock(pdp, LK_EXCLUSIVE, p))) {
 			vput(tdp);
 			return (error);
 		}
@@ -596,7 +606,7 @@ found:
 		if (error)
 			return (error);
 		if (!lockparent || !(flags & ISLASTCN))
-			VOP_UNLOCK(pdp);
+			VOP_UNLOCK(pdp, 0, p);
 		*vpp = tdp;
 	}
 
@@ -671,108 +681,130 @@ bad:
 }
 
 /*
- * Write a directory entry after a call to namei, using the parameters
- * that it left in nameidata.  The argument ip is the inode which the new
- * directory entry will refer to.  Dvp is a pointer to the directory to
- * be written, which was left locked by namei. Remaining parameters
- * (dp->i_offset, dp->i_count) indicate how the space for the new
- * entry is to be obtained.
+ * Construct a new directory entry after a call to namei, using the
+ * parameters that it left in the componentname argument cnp. The
+ * argument ip is the inode to which the new directory entry will refer.
  */
-int
-ufs_direnter(ip, dvp, cnp)
-	struct inode *ip;
-	struct vnode *dvp;
-	register struct componentname *cnp;
+void
+ufs_makedirentry(ip, cnp, newdirp)
+  	struct inode *ip;
+	struct componentname *cnp;
+	struct direct *newdirp;
 {
-	register struct inode *dp;
-	struct direct newdir;
-
+  
 #ifdef DIAGNOSTIC
-	if ((cnp->cn_flags & SAVENAME) == 0)
-		panic("direnter: missing name");
+  	if ((cnp->cn_flags & SAVENAME) == 0)
+		panic("ufs_makedirentry: missing name");
 #endif
-	dp = VTOI(dvp);
-	newdir.d_ino = ip->i_number;
-	newdir.d_namlen = cnp->cn_namelen;
-	bcopy(cnp->cn_nameptr, newdir.d_name, (unsigned)cnp->cn_namelen + 1);
-	if (dvp->v_mount->mnt_maxsymlinklen > 0)
-		newdir.d_type = IFTODT(ip->i_ffs_mode);
-	else {
-		newdir.d_type = 0;
+	newdirp->d_ino = ip->i_number;
+	newdirp->d_namlen = cnp->cn_namelen;
+	bcopy(cnp->cn_nameptr, newdirp->d_name, (unsigned)cnp->cn_namelen + 1);
+	if (ITOV(ip)->v_mount->mnt_maxsymlinklen > 0)
+		newdirp->d_type = IFTODT(ip->i_ffs_mode);
+  	else {
+		newdirp->d_type = 0;
 #		if (BYTE_ORDER == LITTLE_ENDIAN)
-			{ u_char tmp = newdir.d_namlen;
-			newdir.d_namlen = newdir.d_type;
-			newdir.d_type = tmp; }
+			{ u_char tmp = newdirp->d_namlen;
+			newdirp->d_namlen = newdirp->d_type;
+			newdirp->d_type = tmp; }
 #		endif
-	}
-	return (ufs_direnter2(dvp, &newdir, cnp->cn_cred, cnp->cn_proc));
+  	}
 }
-
+  
 /*
- * Common entry point for directory entry removal used by ufs_direnter
- * and ufs_whiteout
+ * Write a directory entry after a call to namei, using the parameters
+ * that it left in nameidata. The argument dirp is the new directory
+ * entry contents. Dvp is a pointer to the directory to be written,
+ * which was left locked by namei. Remaining parameters (dp->i_offset,
+ * dp->i_count) indicate how the space for the new entry is to be obtained.
+ * Non-null bp indicates that a directory is being created (for the
+ * soft dependency code).
  */
 int
-ufs_direnter2(dvp, dirp, cr, p)
-	struct vnode *dvp;
-	struct direct *dirp;
-	struct ucred *cr;
-	struct proc *p;
+ufs_direnter(dvp, dirp, cnp, newdirbp)
+  	struct vnode *dvp;
+  	struct direct *dirp;
+ 	struct componentname *cnp;
+ 	struct buf *newdirbp;
 {
-	int newentrysize;
-	struct inode *dp;
-	struct buf *bp;
-	struct iovec aiov;
-	struct uio auio;
-	u_int dsize;
-	struct direct *ep, *nep;
-	int error, loc, spacefree;
-	char *dirbuf;
+  	struct ucred *cr;
+  	struct proc *p;
+  	int newentrysize;
+  	struct inode *dp;
+  	struct buf *bp;
+  	u_int dsize;
+  	struct direct *ep, *nep;
+	int error, ret, blkoff, loc, spacefree, flags;
+  	char *dirbuf;
+	struct timespec ts;
 
-	dp = VTOI(dvp);
-	newentrysize = DIRSIZ(FSFMT(dvp), dirp);
+ 	error = 0;
+ 	cr = cnp->cn_cred;
+ 	p = cnp->cn_proc;
+  	dp = VTOI(dvp);
+  	newentrysize = DIRSIZ(FSFMT(dvp), dirp);
 
 	if (dp->i_count == 0) {
 		/*
 		 * If dp->i_count is 0, then namei could find no
 		 * space in the directory. Here, dp->i_offset will
 		 * be on a directory block boundary and we will write the
-		 * new entry into a fresh block.
-		 */
-		if (dp->i_offset & (DIRBLKSIZ - 1))
-			panic("ufs_direnter2: newblk");
-		auio.uio_offset = dp->i_offset;
-		dirp->d_reclen = DIRBLKSIZ;
-		auio.uio_resid = newentrysize;
-		aiov.iov_len = newentrysize;
-		aiov.iov_base = (caddr_t)dirp;
-		auio.uio_iov = &aiov;
-		auio.uio_iovcnt = 1;
-		auio.uio_rw = UIO_WRITE;
-		auio.uio_segflg = UIO_SYSSPACE;
-		auio.uio_procp = (struct proc *)0;
-		error = VOP_WRITE(dvp, &auio, IO_SYNC, cr);
-		if (DIRBLKSIZ >
-		    VFSTOUFS(dvp->v_mount)->um_mountp->mnt_stat.f_bsize)
-			/* XXX should grow with balloc() */
-			panic("ufs_direnter2: frag size");
-		else if (!error) {
-			dp->i_ffs_size = roundup(dp->i_ffs_size, DIRBLKSIZ);
-			dp->i_flag |= IN_CHANGE;
+  		 * new entry into a fresh block.
+  		 */
+  		if (dp->i_offset & (DIRBLKSIZ - 1))
+			panic("ufs_direnter: newblk");
+		flags = B_CLRBUF;
+		if (!DOINGSOFTDEP(dvp))
+			flags |= B_SYNC;
+		if ((error = VOP_BALLOC(dvp, (off_t)dp->i_offset, DIRBLKSIZ,
+		    cr, flags, &bp)) != 0) {
+			if (DOINGSOFTDEP(dvp) && newdirbp != NULL)
+				bdwrite(newdirbp);
+			return (error);
 		}
-		return (error);
-	}
-
-	/*
-	 * If dp->i_count is non-zero, then namei found space
-	 * for the new entry in the range dp->i_offset to
-	 * dp->i_offset + dp->i_count in the directory.
-	 * To use this space, we may have to compact the entries located
-	 * there, by copying them together towards the beginning of the
-	 * block, leaving the free space in one usable chunk at the end.
-	 */
-
-	/*
+		dp->i_ffs_size = dp->i_offset + DIRBLKSIZ;
+		dp->i_flag |= IN_CHANGE | IN_UPDATE;
+		vnode_pager_setsize(dvp, (u_long)dp->i_ffs_size);
+  		dirp->d_reclen = DIRBLKSIZ;
+		blkoff = dp->i_offset &
+		    (VFSTOUFS(dvp->v_mount)->um_mountp->mnt_stat.f_iosize - 1);
+		bcopy((caddr_t)dirp, (caddr_t)bp->b_data + blkoff,newentrysize);
+		if (DOINGSOFTDEP(dvp)) {
+			/*
+			 * Ensure that the entire newly allocated block is a
+			 * valid directory so that future growth within the
+			 * block does not have to ensure that the block is
+			 * written before the inode.
+			 */
+			blkoff += DIRBLKSIZ;
+			while (blkoff < bp->b_bcount) {
+				((struct direct *)
+				   (bp->b_data + blkoff))->d_reclen = DIRBLKSIZ;
+				blkoff += DIRBLKSIZ;
+			}
+			softdep_setup_directory_add(bp, dp, dp->i_offset,
+			    dirp->d_ino, newdirbp);
+			bdwrite(bp);
+		} else {
+			error = VOP_BWRITE(bp);
+  		}
+		TIMEVAL_TO_TIMESPEC(&time, &ts);
+ 		ret = VOP_UPDATE(dvp, &ts, &ts, !DOINGSOFTDEP(dvp));
+ 		if (error == 0)
+ 			return (ret);
+  		return (error);
+  	}
+  
+  	/*
+	 * If dp->i_count is non-zero, then namei found space for the new
+	 * entry in the range dp->i_offset to dp->i_offset + dp->i_count
+	 * in the directory. To use this space, we may have to compact
+	 * the entries located there, by copying them together towards the
+	 * beginning of the block, leaving the free space in one usable
+	 * chunk at the end.
+  	 */
+  
+  	/*
 	 * Increase size of directory if entry eats into new space.
 	 * This should never push the size past a new multiple of
 	 * DIRBLKSIZE.
@@ -784,15 +816,17 @@ ufs_direnter2(dvp, dirp, cr, p)
 	/*
 	 * Get the block containing the space for the new directory entry.
 	 */
-	error = VOP_BLKATOFF(dvp, (off_t)dp->i_offset, &dirbuf, &bp);
-	if (error)
-		return (error);
+ 	if ((error = VOP_BLKATOFF(dvp, (off_t)dp->i_offset, &dirbuf, &bp)) 
+	    != 0) {
+ 		if (DOINGSOFTDEP(dvp) && newdirbp != NULL)
+ 			bdwrite(newdirbp);
+  		return (error);
+ 	}
 	/*
 	 * Find space for the new entry. In the simple case, the entry at
 	 * offset base will have the space. If it does not, then namei
 	 * arranged that compacting the region dp->i_offset to
-	 * dp->i_offset + dp->i_count would yield the
-	 * space.
+	 * dp->i_offset + dp->i_count would yield the space.
 	 */
 	ep = (struct direct *)dirbuf;
 	dsize = DIRSIZ(FSFMT(dvp), ep);
@@ -810,7 +844,11 @@ ufs_direnter2(dvp, dirp, cr, p)
 		dsize = DIRSIZ(FSFMT(dvp), nep);
 		spacefree += nep->d_reclen - dsize;
 		loc += nep->d_reclen;
-		bcopy((caddr_t)nep, (caddr_t)ep, dsize);
+ 		if (DOINGSOFTDEP(dvp))
+ 			softdep_change_directoryentry_offset(dp, dirbuf,
+ 			    (caddr_t)nep, (caddr_t)ep, dsize); 
+ 		else
+ 			bcopy((caddr_t)nep, (caddr_t)ep, dsize);
 	}
 	/*
 	 * Update the pointer fields in the previous entry (if any),
@@ -820,19 +858,26 @@ ufs_direnter2(dvp, dirp, cr, p)
 	    (ep->d_ino == WINO &&
 	     bcmp(ep->d_name, dirp->d_name, dirp->d_namlen) == 0)) {
 		if (spacefree + dsize < newentrysize)
-			panic("ufs_direnter2: compact1");
+			panic("ufs_direnter: compact1");
 		dirp->d_reclen = spacefree + dsize;
 	} else {
 		if (spacefree < newentrysize)
-			panic("ufs_direnter2: compact2");
+			panic("ufs_direnter: compact2");
 		dirp->d_reclen = spacefree;
 		ep->d_reclen = dsize;
 		ep = (struct direct *)((char *)ep + dsize);
 	}
 	bcopy((caddr_t)dirp, (caddr_t)ep, (u_int)newentrysize);
-	error = VOP_BWRITE(bp);
+
+  	if (DOINGSOFTDEP(dvp)) {
+  		softdep_setup_directory_add(bp, dp,
+  		    dp->i_offset + (caddr_t)ep - dirbuf, dirp->d_ino, newdirbp);
+  		bdwrite(bp);
+  	} else {
+  		error = VOP_BWRITE(bp);
+  	}
 	dp->i_flag |= IN_CHANGE | IN_UPDATE;
-	if (!error && dp->i_endoff && dp->i_endoff < dp->i_ffs_size)
+	if (error == 0 && dp->i_endoff && dp->i_endoff < dp->i_ffs_size)
 		error = VOP_TRUNCATE(dvp, (off_t)dp->i_endoff, IO_SYNC, cr, p);
 	return (error);
 }
@@ -850,18 +895,20 @@ ufs_direnter2(dvp, dirp, cr, p)
  * to the size of the previous entry.
  */
 int
-ufs_dirremove(dvp, cnp)
+ufs_dirremove(dvp, ip, flags, isrmdir)
 	struct vnode *dvp;
-	struct componentname *cnp;
+	struct inode *ip;
+	int flags;
+	int isrmdir;
 {
-	register struct inode *dp;
+	struct inode *dp;
 	struct direct *ep;
 	struct buf *bp;
 	int error;
 
 	dp = VTOI(dvp);
 
-	if (cnp->cn_flags & DOWHITEOUT) {
+	if (flags & DOWHITEOUT) {
 		/*
 		 * Whiteout entry: set d_ino to WINO.
 		 */
@@ -871,33 +918,39 @@ ufs_dirremove(dvp, cnp)
 			return (error);
 		ep->d_ino = WINO;
 		ep->d_type = DT_WHT;
-		error = VOP_BWRITE(bp);
-		dp->i_flag |= IN_CHANGE | IN_UPDATE;
-		return (error);
+		goto out;
 	}
 
+ 	if ((error = VOP_BLKATOFF(dvp,
+ 	    (off_t)(dp->i_offset - dp->i_count), (char **)&ep, &bp)) != 0)
+ 		return (error);
+
 	if (dp->i_count == 0) {
 		/*
 		 * First entry in block: set d_ino to zero.
 		 */
-		error = VOP_BLKATOFF(dvp, (off_t)dp->i_offset, (char **)&ep,
-				     &bp);
-		if (error)
-			return (error);
 		ep->d_ino = 0;
+	} else {
+ 		/*
+ 		 * Collapse new free space into previous entry.
+ 		 */
+ 		ep->d_reclen += dp->i_reclen;
+	}
+out:
+ 	if (ip) {
+ 		ip->i_effnlink--;
+ 		ip->i_flag |= IN_CHANGE;
+ 	}
+ 	if (DOINGSOFTDEP(dvp)) {
+ 		if (ip)
+ 			softdep_setup_remove(bp, dp, ip, isrmdir);
+ 		bdwrite(bp);
+ 	} else {
+ 		if (ip)
+ 			ip->i_ffs_nlink--;   /* XXX */
+
 		error = VOP_BWRITE(bp);
-		dp->i_flag |= IN_CHANGE | IN_UPDATE;
-		return (error);
 	}
-	/*
-	 * Collapse new free space into previous entry.
-	 */
-	error = VOP_BLKATOFF(dvp, (off_t)(dp->i_offset - dp->i_count),
-			     (char **)&ep, &bp);
-	if (error)
-		return (error);
-	ep->d_reclen += dp->i_reclen;
-	error = VOP_BWRITE(bp);
 	dp->i_flag |= IN_CHANGE | IN_UPDATE;
 	return (error);
 }
@@ -908,9 +961,11 @@ ufs_dirremove(dvp, cnp)
  * set up by a call to namei.
  */
 int
-ufs_dirrewrite(dp, ip, cnp)
-	struct inode *dp, *ip;
-	struct componentname *cnp;
+ufs_dirrewrite(dp, oip, newinum, newtype, isrmdir)
+	struct inode *dp, *oip;
+	ino_t newinum;
+	int newtype;
+	int isrmdir;
 {
 	struct buf *bp;
 	struct direct *ep;
@@ -920,10 +975,18 @@ ufs_dirrewrite(dp, ip, cnp)
 	error = VOP_BLKATOFF(vdp, (off_t)dp->i_offset, (char **)&ep, &bp);
 	if (error)
 		return (error);
-	ep->d_ino = ip->i_number;
+	ep->d_ino = newinum;
 	if (vdp->v_mount->mnt_maxsymlinklen > 0)
-		ep->d_type = IFTODT(ip->i_ffs_mode);
-	error = VOP_BWRITE(bp);
+ 		ep->d_type = newtype;
+ 	oip->i_effnlink--;
+ 	oip->i_flag |= IN_CHANGE;
+ 	if (DOINGSOFTDEP(vdp)) {
+ 		softdep_setup_directory_change(bp, dp, oip, newinum, isrmdir);
+ 		bdwrite(bp);
+ 	} else {
+ 		oip->i_ffs_nlink--; /* XXX */
+ 		error = VOP_BWRITE(bp);
+ 	}
 	dp->i_flag |= IN_CHANGE | IN_UPDATE;
 	return (error);
 }
@@ -983,7 +1046,7 @@ ufs_dirempty(ip, parentino, cred)
 		 * 1 implies ".", 2 implies ".." if second
 		 * char is also "."
 		 */
-		if (namlen == 1)
+		if (namlen == 1 && dp->d_ino == ip->i_number)
 			continue;
 		if (dp->d_name[1] == '.' && dp->d_ino == parentino)
 			continue;
diff --git a/sys/ufs/ufs/ufs_quota.c b/sys/ufs/ufs/ufs_quota.c
index f6ea0606058..bc295d57e26 100644
--- a/sys/ufs/ufs/ufs_quota.c
+++ b/sys/ufs/ufs/ufs_quota.c
@@ -1,4 +1,4 @@
-/*	$OpenBSD: ufs_quota.c,v 1.3 1997/05/30 08:35:10 downsj Exp $	*/
+/*	$OpenBSD: ufs_quota.c,v 1.4 1997/10/06 15:27:38 csapuntz Exp $	*/
 /*	$NetBSD: ufs_quota.c,v 1.8 1996/02/09 22:36:09 christos Exp $	*/
 
 /*
@@ -376,15 +376,11 @@ quotaon(p, mp, type, fname)
 	if ((error = vn_open(&nd, FREAD|FWRITE, 0)) != 0)
 		return (error);
 	vp = nd.ni_vp;
-	VOP_UNLOCK(vp);
+	VOP_UNLOCK(vp, 0, p);
 	if (vp->v_type != VREG) {
 		(void) vn_close(vp, FREAD|FWRITE, p->p_ucred, p);
 		return (EACCES);
 	}
-	if (vfs_busy(mp)) {
-		(void) vn_close(vp, FREAD|FWRITE, p->p_ucred, p);
-		return (EBUSY);
-	}
 	if (*vpp != vp)
 		quotaoff(p, mp, type);
 	ump->um_qflags[type] |= QTF_OPENING;
@@ -414,9 +410,9 @@ quotaon(p, mp, type, fname)
 again:
 	for (vp = mp->mnt_vnodelist.lh_first; vp != NULL; vp = nextvp) {
 		nextvp = vp->v_mntvnodes.le_next;
-		if (vp->v_writecount == 0)
+		if (vp->v_type == VNON || vp->v_writecount == 0)
 			continue;
-		if (vget(vp, 1))
+		if (vget(vp, LK_EXCLUSIVE, p))
 			goto again;
 		if ((error = getinoquota(VTOI(vp))) != 0) {
 			vput(vp);
@@ -429,7 +425,6 @@ again:
 	ump->um_qflags[type] &= ~QTF_OPENING;
 	if (error)
 		quotaoff(p, mp, type);
-	vfs_unbusy(mp);
 	return (error);
 }
 
@@ -449,8 +444,6 @@ quotaoff(p, mp, type)
 	register struct inode *ip;
 	int error;
 	
-	if ((mp->mnt_flag & MNT_MPBUSY) == 0)
-		panic("quotaoff: not busy");
 	if ((qvp = ump->um_quotas[type]) == NULLVP)
 		return (0);
 	ump->um_qflags[type] |= QTF_CLOSING;
@@ -461,7 +454,9 @@ quotaoff(p, mp, type)
 again:
 	for (vp = mp->mnt_vnodelist.lh_first; vp != NULL; vp = nextvp) {
 		nextvp = vp->v_mntvnodes.le_next;
-		if (vget(vp, 1))
+		if (vp->v_type == VNON)
+			continue;
+		if (vget(vp, LK_EXCLUSIVE, p))
 			goto again;
 		ip = VTOI(vp);
 		dq = ip->i_dquot[type];
@@ -621,16 +616,16 @@ qsync(mp)
 	struct mount *mp;
 {
 	struct ufsmount *ump = VFSTOUFS(mp);
+	struct proc *p = curproc;
 	register struct vnode *vp, *nextvp;
 	register struct dquot *dq;
 	register int i;
+	int error = 0;
 
 	/*
 	 * Check if the mount point has any quotas.
 	 * If not, simply return.
 	 */
-	if ((mp->mnt_flag & MNT_MPBUSY) == 0)
-		panic("qsync: not busy");
 	for (i = 0; i < MAXQUOTAS; i++)
 		if (ump->um_quotas[i] != NULLVP)
 			break;
@@ -640,22 +635,34 @@ qsync(mp)
 	 * Search vnodes associated with this mount point,
 	 * synchronizing any modified dquot structures.
 	 */
+	simple_lock(&mntvnode_slock);
 again:
-	for (vp = mp->mnt_vnodelist.lh_first; vp != NULL; vp = nextvp) {
-		nextvp = vp->v_mntvnodes.le_next;
-		if (VOP_ISLOCKED(vp))
-			continue;
-		if (vget(vp, 1))
+ 	for (vp = mp->mnt_vnodelist.lh_first; vp != NULL; vp = nextvp) {
+		if (vp->v_mount != mp)
 			goto again;
+ 		nextvp = vp->v_mntvnodes.le_next;
+		if (vp->v_type == VNON)
+			continue;
+		simple_lock(&vp->v_interlock);
+		simple_unlock(&mntvnode_slock);
+		error = vget(vp, LK_EXCLUSIVE | LK_NOWAIT | LK_INTERLOCK, p);
+		if (error) {
+			simple_lock(&mntvnode_slock);
+			if (error == ENOENT)
+				goto again;
+ 			continue;
+		}
 		for (i = 0; i < MAXQUOTAS; i++) {
 			dq = VTOI(vp)->i_dquot[i];
 			if (dq != NODQUOT && (dq->dq_flags & DQ_MOD))
 				dqsync(vp, dq);
 		}
 		vput(vp);
-		if (vp->v_mntvnodes.le_next != nextvp || vp->v_mount != mp)
-			goto again;
-	}
+		simple_lock(&mntvnode_slock);
+		if (vp->v_mntvnodes.le_next != nextvp)
+ 			goto again;
+ 	}
+	simple_unlock(&mntvnode_slock);
 	return (0);
 }
 
@@ -697,6 +704,7 @@ dqget(vp, id, ump, type, dqp)
 	register int type;
 	struct dquot **dqp;
 {
+	struct proc *p = curproc;
 	register struct dquot *dq;
 	struct dqhash *dqh;
 	register struct vnode *dqvp;
@@ -752,7 +760,7 @@ dqget(vp, id, ump, type, dqp)
 	 * Initialize the contents of the dquot structure.
 	 */
 	if (vp != dqvp)
-		VOP_LOCK(dqvp);
+		vn_lock(dqvp, LK_EXCLUSIVE | LK_RETRY, p);
 	LIST_INSERT_HEAD(dqh, dq, dq_hash);
 	DQREF(dq);
 	dq->dq_flags = DQ_LOCK;
@@ -772,7 +780,7 @@ dqget(vp, id, ump, type, dqp)
 	if (auio.uio_resid == sizeof(struct dqblk) && error == 0)
 		bzero((caddr_t)&dq->dq_dqb, sizeof(struct dqblk));
 	if (vp != dqvp)
-		VOP_UNLOCK(dqvp);
+		VOP_UNLOCK(dqvp, 0, p);
 	if (dq->dq_flags & DQ_WANT)
 		wakeup((caddr_t)dq);
 	dq->dq_flags = 0;
@@ -844,6 +852,7 @@ dqsync(vp, dq)
 	struct vnode *vp;
 	register struct dquot *dq;
 {
+	struct proc *p = curproc;
 	struct vnode *dqvp;
 	struct iovec aiov;
 	struct uio auio;
@@ -856,13 +865,13 @@ dqsync(vp, dq)
 	if ((dqvp = dq->dq_ump->um_quotas[dq->dq_type]) == NULLVP)
 		panic("dqsync: file");
 	if (vp != dqvp)
-		VOP_LOCK(dqvp);
+		vn_lock(dqvp, LK_EXCLUSIVE | LK_RETRY, p);
 	while (dq->dq_flags & DQ_LOCK) {
 		dq->dq_flags |= DQ_WANT;
 		sleep((caddr_t)dq, PINOD+2);
 		if ((dq->dq_flags & DQ_MOD) == 0) {
 			if (vp != dqvp)
-				VOP_UNLOCK(dqvp);
+				VOP_UNLOCK(dqvp, 0, p);
 			return (0);
 		}
 	}
@@ -883,7 +892,7 @@ dqsync(vp, dq)
 		wakeup((caddr_t)dq);
 	dq->dq_flags &= ~(DQ_MOD|DQ_LOCK|DQ_WANT);
 	if (vp != dqvp)
-		VOP_UNLOCK(dqvp);
+		VOP_UNLOCK(dqvp, 0, p);
 	return (error);
 }
 
diff --git a/sys/ufs/ufs/ufs_readwrite.c b/sys/ufs/ufs/ufs_readwrite.c
index 604c16fcb90..25148b78f61 100644
--- a/sys/ufs/ufs/ufs_readwrite.c
+++ b/sys/ufs/ufs/ufs_readwrite.c
@@ -1,4 +1,4 @@
-/*	$OpenBSD: ufs_readwrite.c,v 1.9 1997/05/30 08:35:13 downsj Exp $	*/
+/*	$OpenBSD: ufs_readwrite.c,v 1.10 1997/10/06 15:27:39 csapuntz Exp $	*/
 /*	$NetBSD: ufs_readwrite.c,v 1.9 1996/05/11 18:27:57 mycroft Exp $	*/
 
 /*-
@@ -242,19 +242,13 @@ WRITE(v)
 		xfersize = fs->fs_bsize - blkoffset;
 		if (uio->uio_resid < xfersize)
 			xfersize = uio->uio_resid;
-#ifdef LFS_READWRITE
-		(void)lfs_check(vp, lbn);
-		error = lfs_balloc(vp, blkoffset, xfersize, lbn, &bp);
-#else
 		if (fs->fs_bsize > xfersize)
 			flags |= B_CLRBUF;
 		else
 			flags &= ~B_CLRBUF;
 
-		error = ffs_balloc(ip,
-		    lbn, blkoffset + xfersize, ap->a_cred, &bp, flags);
-#endif
-		if (error)
+		if ((error = VOP_BALLOC(vp, uio->uio_offset, xfersize,
+					ap->a_cred, flags, &bp)) != 0)
 			break;
 		if (uio->uio_offset + xfersize > ip->i_ffs_size) {
 			ip->i_ffs_size = uio->uio_offset + xfersize;
diff --git a/sys/ufs/ufs/ufs_vfsops.c b/sys/ufs/ufs/ufs_vfsops.c
index 0e308fd39dd..11dfa3086c4 100644
--- a/sys/ufs/ufs/ufs_vfsops.c
+++ b/sys/ufs/ufs/ufs_vfsops.c
@@ -1,4 +1,4 @@
-/*	$OpenBSD: ufs_vfsops.c,v 1.3 1997/05/30 08:35:15 downsj Exp $	*/
+/*	$OpenBSD: ufs_vfsops.c,v 1.4 1997/10/06 15:27:39 csapuntz Exp $	*/
 /*	$NetBSD: ufs_vfsops.c,v 1.4 1996/02/09 22:36:12 christos Exp $	*/
 
 /*
@@ -125,39 +125,64 @@ ufs_quotactl(mp, cmds, uid, arg, p)
 	if ((u_int)type >= MAXQUOTAS)
 		return (EINVAL);
 
+	if (vfs_busy(mp, LK_NOWAIT, 0, p))
+		return (0);
+ 
+
 	switch (cmd) {
 
 	case Q_QUOTAON:
-		return (quotaon(p, mp, type, arg));
+		error = quotaon(p, mp, type, arg);
+		break;
 
 	case Q_QUOTAOFF:
-		if (vfs_busy(mp))
-			return (0);
 		error = quotaoff(p, mp, type);
-		vfs_unbusy(mp);
-		return (error);
+		break;
 
 	case Q_SETQUOTA:
-		return (setquota(mp, uid, type, arg));
+		error = setquota(mp, uid, type, arg) ;
+		break;
 
 	case Q_SETUSE:
-		return (setuse(mp, uid, type, arg));
+		error = setuse(mp, uid, type, arg);
+		break;
 
 	case Q_GETQUOTA:
-		return (getquota(mp, uid, type, arg));
+		error = getquota(mp, uid, type, arg);
+		break;
 
 	case Q_SYNC:
-		if (vfs_busy(mp))
-			return (0);
 		error = qsync(mp);
-		vfs_unbusy(mp);
-		return (error);
+		break;
 
 	default:
-		return (EINVAL);
+		error = EINVAL;
+		break;
 	}
-	/* NOTREACHED */
+
+	vfs_unbusy(mp, p);
+	return (error);
+#endif
+}
+
+
+/*
+ * Initial UFS filesystems, done only once.
+ */
+int
+ufs_init(vfsp)
+	struct vfsconf *vfsp;
+{
+	static int done;
+
+	if (done)
+		return (0);
+	done = 1;
+	ufs_ihashinit();
+#ifdef QUOTA
+	dqinit();
 #endif
+	return (0);
 }
 
 /*
diff --git a/sys/ufs/ufs/ufs_vnops.c b/sys/ufs/ufs/ufs_vnops.c
index fe58d6e899e..12245ddece3 100644
--- a/sys/ufs/ufs/ufs_vnops.c
+++ b/sys/ufs/ufs/ufs_vnops.c
@@ -1,4 +1,4 @@
-/*	$OpenBSD: ufs_vnops.c,v 1.10 1997/07/03 17:49:49 deraadt Exp $	*/
+/*	$OpenBSD: ufs_vnops.c,v 1.11 1997/10/06 15:27:40 csapuntz Exp $	*/
 /*	$NetBSD: ufs_vnops.c,v 1.18 1996/05/11 18:28:04 mycroft Exp $	*/
 
 /*
@@ -90,6 +90,19 @@ union _qcvt {
 	(q) = tmp.qcvt; \
 }
 
+
+/*
+ * A virgin directory (no blushing please).
+ */
+static struct dirtemplate mastertemplate = {
+	0, 12, DT_DIR, 1, ".",
+	0, DIRBLKSIZ - 12, DT_DIR, 2, ".."
+};
+static struct odirtemplate omastertemplate = {
+	0, 12, 1, ".",
+	0, DIRBLKSIZ - 12, 2, ".."
+};
+
 /*
  * Create a regular file
  */
@@ -117,19 +130,19 @@ ufs_mknod(v)
 	void *v;
 {
 	struct vop_mknod_args /* {
-		struct vnode *a_dvp;
-		struct vnode **a_vpp;
-		struct componentname *a_cnp;
-		struct vattr *a_vap;
-	} */ *ap = v;
-	register struct vattr *vap = ap->a_vap;
-	register struct vnode **vpp = ap->a_vpp;
-	register struct inode *ip;
+				 struct vnode *a_dvp;
+				 struct vnode **a_vpp;
+				 struct componentname *a_cnp;
+				 struct vattr *a_vap;
+				 } */ *ap = v;
+	struct vattr *vap = ap->a_vap;
+        struct vnode **vpp = ap->a_vpp;
+	struct inode *ip;
 	int error;
 
 	if ((error =
-	    ufs_makeinode(MAKEIMODE(vap->va_type, vap->va_mode),
-	    ap->a_dvp, vpp, ap->a_cnp)) != 0)
+	     ufs_makeinode(MAKEIMODE(vap->va_type, vap->va_mode),
+			   ap->a_dvp, vpp, ap->a_cnp)) != 0)
 		return (error);
 	ip = VTOI(*vpp);
 	ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE;
@@ -163,11 +176,11 @@ ufs_open(v)
 	void *v;
 {
 	struct vop_open_args /* {
-		struct vnode *a_vp;
-		int  a_mode;
-		struct ucred *a_cred;
-		struct proc *a_p;
-	} */ *ap = v;
+				struct vnode *a_vp;
+				int  a_mode;
+				struct ucred *a_cred;
+				struct proc *a_p;
+				} */ *ap = v;
 
 	/*
 	 * Files marked append-only must be opened for appending.
@@ -194,11 +207,13 @@ ufs_close(v)
 		struct ucred *a_cred;
 		struct proc *a_p;
 	} */ *ap = v;
-	register struct vnode *vp = ap->a_vp;
-	register struct inode *ip = VTOI(vp);
+	struct vnode *vp = ap->a_vp;
+	struct inode *ip = VTOI(vp);
 
-	if (vp->v_usecount > 1 && !(ip->i_flag & IN_LOCKED))
+	simple_lock(&vp->v_interlock);
+	if (vp->v_usecount > 1)
 		ITIMES(ip, &time, &time);
+	simple_unlock(&vp->v_interlock);
 	return (0);
 }
 
@@ -212,25 +227,27 @@ ufs_access(v)
 		struct ucred *a_cred;
 		struct proc *a_p;
 	} */ *ap = v;
-	register struct vnode *vp = ap->a_vp;
-	register struct inode *ip = VTOI(vp);
+	struct vnode *vp = ap->a_vp;
+	struct inode *ip = VTOI(vp);
 	mode_t mode = ap->a_mode;
 
-#ifdef DIAGNOSTIC
-	if (!VOP_ISLOCKED(vp)) {
-		vprint("ufs_access: not locked", vp);
-		panic("ufs_access: not locked");
-	}
-#endif
-#ifdef QUOTA
-	if (mode & VWRITE)
+	/*
+	 * Disallow write attempts on read-only file systems;
+	 * unless the file is a socket, fifo, or a block or
+	 * character device resident on the file system.
+	 */
+	if (mode & VWRITE) {
 		switch (vp->v_type) {
 			int error;
 		case VDIR:
 		case VLNK:
 		case VREG:
+			if (vp->v_mount->mnt_flag & MNT_RDONLY)
+				return (EROFS);
+#ifdef QUOTA
 			if ((error = getinoquota(ip)) != 0)
 				return (error);
+#endif
 			break;
 		case VBAD:
 		case VBLK:
@@ -239,8 +256,9 @@ ufs_access(v)
 		case VFIFO:
 		case VNON:
 			break;
+
 		}
-#endif
+	}
 
 	/* If immutable bit set, nobody gets to write it. */
 	if ((mode & VWRITE) && (ip->i_ffs_flags & IMMUTABLE))
@@ -261,9 +279,9 @@ ufs_getattr(v)
 		struct ucred *a_cred;
 		struct proc *a_p;
 	} */ *ap = v;
-	register struct vnode *vp = ap->a_vp;
-	register struct inode *ip = VTOI(vp);
-	register struct vattr *vap = ap->a_vap;
+	struct vnode *vp = ap->a_vp;
+	struct inode *ip = VTOI(vp);
+	struct vattr *vap = ap->a_vap;
 
 	ITIMES(ip, &time, &time);
 	/*
@@ -272,7 +290,7 @@ ufs_getattr(v)
 	vap->va_fsid = ip->i_dev;
 	vap->va_fileid = ip->i_number;
 	vap->va_mode = ip->i_ffs_mode & ~IFMT;
-	vap->va_nlink = ip->i_ffs_nlink;
+	vap->va_nlink = ip->i_effnlink;
 	vap->va_uid = ip->i_ffs_uid;
 	vap->va_gid = ip->i_ffs_gid;
 	vap->va_rdev = (dev_t)ip->i_ffs_rdev;
@@ -311,11 +329,11 @@ ufs_setattr(v)
 		struct ucred *a_cred;
 		struct proc *a_p;
 	} */ *ap = v;
-	register struct vattr *vap = ap->a_vap;
-	register struct vnode *vp = ap->a_vp;
-	register struct inode *ip = VTOI(vp);
-	register struct ucred *cred = ap->a_cred;
-	register struct proc *p = ap->a_p;
+	struct vattr *vap = ap->a_vap;
+	struct vnode *vp = ap->a_vp;
+	struct inode *ip = VTOI(vp);
+	struct ucred *cred = ap->a_cred;
+	struct proc *p = ap->a_p;
 	int error;
 
 	/*
@@ -328,6 +346,8 @@ ufs_setattr(v)
 		return (EINVAL);
 	}
 	if (vap->va_flags != VNOVAL) {
+		if (vp->v_mount->mnt_flag & MNT_RDONLY)
+			return (EROFS);
 		if (cred->cr_uid != ip->i_ffs_uid &&
 		    (error = suser(cred, &p->p_acflag)))
 			return (error);
@@ -337,7 +357,8 @@ ufs_setattr(v)
 				return (EPERM);
 			ip->i_ffs_flags = vap->va_flags;
 		} else {
-			if (ip->i_ffs_flags & (SF_IMMUTABLE | SF_APPEND))
+			if (ip->i_ffs_flags & (SF_IMMUTABLE | SF_APPEND) ||
+			    (vap->va_flags & UF_SETTABLE) != vap->va_flags)
 				return (EPERM);
 			ip->i_ffs_flags &= SF_SETTABLE;
 			ip->i_ffs_flags |= (vap->va_flags & UF_SETTABLE);
@@ -352,19 +373,36 @@ ufs_setattr(v)
 	 * Go through the fields and update if not VNOVAL.
 	 */
 	if (vap->va_uid != (uid_t)VNOVAL || vap->va_gid != (gid_t)VNOVAL) {
+		if (vp->v_mount->mnt_flag & MNT_RDONLY)
+			return (EROFS);
 		error = ufs_chown(vp, vap->va_uid, vap->va_gid, cred, p);
 		if (error)
 			return (error);
 	}
 	if (vap->va_size != VNOVAL) {
-		if (vp->v_type == VDIR)
-			return (EISDIR);
-		error = VOP_TRUNCATE(vp, vap->va_size, 0, cred, p);
-		if (error)
-			return (error);
+		/*
+		 * Disallow write attempts on read-only file systems;
+		 * unless the file is a socket, fifo, or a block or
+		 * character device resident on the file system.
+		 */
+		switch (vp->v_type) {
+		case VDIR:
+ 			return (EISDIR);
+		case VLNK:
+		case VREG:
+			if (vp->v_mount->mnt_flag & MNT_RDONLY)
+				return (EROFS);
+			break;
+		default:
+			break;
+		}
+ 		if ((error = VOP_TRUNCATE(vp, vap->va_size, 0, cred, p)) != 0)
+ 			return (error);
 	}
 	ip = VTOI(vp);
 	if (vap->va_atime.tv_sec != VNOVAL || vap->va_mtime.tv_sec != VNOVAL) {
+		if (vp->v_mount->mnt_flag & MNT_RDONLY)
+			return (EROFS);
 		if (cred->cr_uid != ip->i_ffs_uid &&
 		    (error = suser(cred, &p->p_acflag)) &&
 		    ((vap->va_vaflags & VA_UTIMES_NULL) == 0 || 
@@ -374,13 +412,16 @@ ufs_setattr(v)
 			ip->i_flag |= IN_ACCESS;
 		if (vap->va_mtime.tv_sec != VNOVAL)
 			ip->i_flag |= IN_CHANGE | IN_UPDATE;
-		error = VOP_UPDATE(vp, &vap->va_atime, &vap->va_mtime, 1);
+		error = VOP_UPDATE(vp, &vap->va_atime, &vap->va_mtime, 0);
 		if (error)
 			return (error);
 	}
 	error = 0;
-	if (vap->va_mode != (mode_t)VNOVAL)
+	if (vap->va_mode != (mode_t)VNOVAL) {
+		if (vp->v_mount->mnt_flag & MNT_RDONLY)
+			return (EROFS);
 		error = ufs_chmod(vp, (int)vap->va_mode, cred, p);
+	}
 	return (error);
 }
 
@@ -390,12 +431,12 @@ ufs_setattr(v)
  */
 static int
 ufs_chmod(vp, mode, cred, p)
-	register struct vnode *vp;
-	register int mode;
-	register struct ucred *cred;
+	struct vnode *vp;
+	int mode;
+	struct ucred *cred;
 	struct proc *p;
 {
-	register struct inode *ip = VTOI(vp);
+	struct inode *ip = VTOI(vp);
 	int error;
 
 	if (cred->cr_uid != ip->i_ffs_uid &&
@@ -421,18 +462,18 @@ ufs_chmod(vp, mode, cred, p)
  */
 static int
 ufs_chown(vp, uid, gid, cred, p)
-	register struct vnode *vp;
+	struct vnode *vp;
 	uid_t uid;
 	gid_t gid;
 	struct ucred *cred;
 	struct proc *p;
 {
-	register struct inode *ip = VTOI(vp);
+	struct inode *ip = VTOI(vp);
 	uid_t ouid;
 	gid_t ogid;
 	int error = 0;
 #ifdef QUOTA
-	register int i;
+	int i;
 	long change;
 #endif
 
@@ -614,9 +655,9 @@ ufs_remove(v)
 		struct vnode *a_vp;
 		struct componentname *a_cnp;
 	} */ *ap = v;
-	register struct inode *ip;
-	register struct vnode *vp = ap->a_vp;
-	register struct vnode *dvp = ap->a_dvp;
+	struct inode *ip;
+	struct vnode *vp = ap->a_vp;
+	struct vnode *dvp = ap->a_dvp;
 	int error;
 
 	ip = VTOI(vp);
@@ -625,10 +666,8 @@ ufs_remove(v)
 		error = EPERM;
 		goto out;
 	}
-	if ((error = ufs_dirremove(dvp, ap->a_cnp)) == 0) {
-		ip->i_ffs_nlink--;
-		ip->i_flag |= IN_CHANGE;
-	}
+	if ((error = ufs_dirremove(dvp, ip, ap->a_cnp->cn_flags, 0)) != 0)
+		goto out;
 out:
 	if (dvp == vp)
 		vrele(vp);
@@ -650,10 +689,12 @@ ufs_link(v)
 		struct vnode *a_vp;
 		struct componentname *a_cnp;
 	} */ *ap = v;
-	register struct vnode *dvp = ap->a_dvp;
-	register struct vnode *vp = ap->a_vp;
-	register struct componentname *cnp = ap->a_cnp;
-	register struct inode *ip;
+	struct vnode *dvp = ap->a_dvp;
+	struct vnode *vp = ap->a_vp;
+	struct componentname *cnp = ap->a_cnp;
+	struct proc *p = cnp->cn_proc;
+	struct inode *ip;
+	struct direct newdir;
 	struct timespec ts;
 	int error;
 
@@ -671,7 +712,7 @@ ufs_link(v)
 		error = EXDEV;
 		goto out2;
 	}
-	if (dvp != vp && (error = VOP_LOCK(vp))) {
+	if (dvp != vp && (error = vn_lock(vp, LK_EXCLUSIVE, p))) {
 		VOP_ABORTOP(dvp, cnp);
 		goto out2;
 	}
@@ -686,20 +727,25 @@ ufs_link(v)
 		error = EPERM;
 		goto out1;
 	}
+	ip->i_effnlink++;
 	ip->i_ffs_nlink++;
 	ip->i_flag |= IN_CHANGE;
+	if (DOINGSOFTDEP(vp))
+		softdep_increase_linkcnt(ip);
 	TIMEVAL_TO_TIMESPEC(&time, &ts);
-	error = VOP_UPDATE(vp, &ts, &ts, 1);
-	if (!error)
-		error = ufs_direnter(ip, dvp, cnp);
+	if ((error = VOP_UPDATE(vp, &ts, &ts, !DOINGSOFTDEP(vp))) == 0) {
+		ufs_makedirentry(ip, cnp, &newdir);
+		error = ufs_direnter(dvp, &newdir, cnp, NULL);
+	}
 	if (error) {
+		ip->i_effnlink--;
 		ip->i_ffs_nlink--;
 		ip->i_flag |= IN_CHANGE;
 	}
 	FREE(cnp->cn_pnbuf, M_NAMEI);
 out1:
 	if (dvp != vp)
-		VOP_UNLOCK(vp);
+		VOP_UNLOCK(vp, 0, p);
 out2:
 	vput(dvp);
 	return (error);
@@ -742,7 +788,7 @@ ufs_whiteout(v)
 		newdir.d_namlen = cnp->cn_namelen;
 		bcopy(cnp->cn_nameptr, newdir.d_name, (unsigned)cnp->cn_namelen + 1);
 		newdir.d_type = DT_WHT;
-		error = ufs_direnter2(dvp, &newdir, cnp->cn_cred, cnp->cn_proc);
+		error = ufs_direnter(dvp, &newdir, cnp, NULL);
 		break;
 
 	case DELETE:
@@ -753,8 +799,11 @@ ufs_whiteout(v)
 #endif
 
 		cnp->cn_flags &= ~DOWHITEOUT;
-		error = ufs_dirremove(dvp, cnp);
+		error = ufs_dirremove(dvp, NULL, cnp->cn_flags, 0);
 		break;
+	default:
+		panic("ufs_whiteout: unknown op");
+		/* NOTREACHED */
 	}
 	if (cnp->cn_flags & HASBUF) {
 		FREE(cnp->cn_pnbuf, M_NAMEI);
@@ -801,17 +850,17 @@ ufs_rename(v)
 		struct componentname *a_tcnp;
 	} */ *ap = v;
 	struct vnode *tvp = ap->a_tvp;
-	register struct vnode *tdvp = ap->a_tdvp;
+	struct vnode *tdvp = ap->a_tdvp;
 	struct vnode *fvp = ap->a_fvp;
-	register struct vnode *fdvp = ap->a_fdvp;
-	register struct componentname *tcnp = ap->a_tcnp;
-	register struct componentname *fcnp = ap->a_fcnp;
-	register struct inode *ip, *xp, *dp;
-	struct dirtemplate dirbuf;
+	struct vnode *fdvp = ap->a_fdvp;
+	struct componentname *tcnp = ap->a_tcnp;
+	struct componentname *fcnp = ap->a_fcnp;
+	struct proc *p = fcnp->cn_proc;
+	struct inode *ip, *xp, *dp;
+	struct direct newdir;
 	struct timespec ts;
 	int doingdirectory = 0, oldparent = 0, newparent = 0;
 	int error = 0;
-	u_char namlen;
 
 #ifdef DIAGNOSTIC
 	if ((tcnp->cn_flags & HASBUF) == 0 ||
@@ -868,13 +917,13 @@ abortit:
 		(void) relookup(fdvp, &fvp, fcnp);
 		return (VOP_REMOVE(fdvp, fvp, fcnp));
 	}
-	if ((error = VOP_LOCK(fvp)) != 0)
+	if ((error = vn_lock(fvp, LK_EXCLUSIVE, p)) != 0)
 		goto abortit;
 	dp = VTOI(fdvp);
 	ip = VTOI(fvp);
 	if ((ip->i_ffs_flags & (IMMUTABLE | APPEND)) ||
 	    (dp->i_ffs_flags & APPEND)) {
-		VOP_UNLOCK(fvp);
+		VOP_UNLOCK(fvp, 0, p);
 		error = EPERM;
 		goto abortit;
 	}
@@ -883,7 +932,7 @@ abortit:
 		if (!error && tvp)
 			error = VOP_ACCESS(tvp, VWRITE, tcnp->cn_cred, tcnp->cn_proc);
 		if (error) {
-			VOP_UNLOCK(fvp);
+			VOP_UNLOCK(fvp, 0, p);
 			error = EACCES;
 			goto abortit;
 		}
@@ -895,7 +944,7 @@ abortit:
 		    (fcnp->cn_flags & ISDOTDOT) ||
 		    (tcnp->cn_flags & ISDOTDOT) ||
 		    (ip->i_flag & IN_RENAME)) {
-			VOP_UNLOCK(fvp);
+			VOP_UNLOCK(fvp, 0, p);
 			error = EINVAL;
 			goto abortit;
 		}
@@ -920,11 +969,14 @@ abortit:
 	 *    completing our work, the link count
 	 *    may be wrong, but correctable.
 	 */
+	ip->i_effnlink++;
 	ip->i_ffs_nlink++;
 	ip->i_flag |= IN_CHANGE;
+	if (DOINGSOFTDEP(fvp))
+		softdep_increase_linkcnt(ip);
 	TIMEVAL_TO_TIMESPEC(&time, &ts);
-	if ((error = VOP_UPDATE(fvp, &ts, &ts, 1)) != 0) {
-		VOP_UNLOCK(fvp);
+	if ((error = VOP_UPDATE(fvp, &ts, &ts, !DOINGSOFTDEP(fvp))) != 0) {
+		VOP_UNLOCK(fvp, 0, p);
 		goto bad;
 	}
 
@@ -939,7 +991,7 @@ abortit:
 	 * call to checkpath().
 	 */
 	error = VOP_ACCESS(fvp, VWRITE, tcnp->cn_cred, tcnp->cn_proc);
-	VOP_UNLOCK(fvp);
+	VOP_UNLOCK(fvp, 0, p);
 	if (oldparent != dp->i_number)
 		newparent = dp->i_number;
 	if (doingdirectory && newparent) {
@@ -978,13 +1030,19 @@ abortit:
 				error = EMLINK;
 				goto bad;
 			}
+			dp->i_effnlink++;
 			dp->i_ffs_nlink++;
 			dp->i_flag |= IN_CHANGE;
-			if ((error = VOP_UPDATE(tdvp, &ts, &ts, 1)) != 0)
+			if (DOINGSOFTDEP(tdvp))
+                               softdep_increase_linkcnt(dp);
+			if ((error = VOP_UPDATE(tdvp, &ts, &ts,
+						!DOINGSOFTDEP(tdvp))) != 0)
 				goto bad;
 		}
-		if ((error = ufs_direnter(ip, tdvp, tcnp)) != 0) {
+		ufs_makedirentry(ip, tcnp, &newdir);
+		if ((error = ufs_direnter(tdvp, &newdir, tcnp, NULL)) != 0) {
 			if (doingdirectory && newparent) {
+				dp->i_effnlink--;
 				dp->i_ffs_nlink--;
 				dp->i_flag |= IN_CHANGE;
 				(void)VOP_UPDATE(tdvp, &ts, &ts, 1);
@@ -1018,8 +1076,8 @@ abortit:
 		 * (both directories, or both not directories).
 		 */
 		if ((xp->i_ffs_mode & IFMT) == IFDIR) {
-			if (!ufs_dirempty(xp, dp->i_number, tcnp->cn_cred) || 
-				xp->i_ffs_nlink > 2) {
+			if (xp->i_effnlink > 2 ||
+			    !ufs_dirempty(xp, dp->i_number, tcnp->cn_cred)) {
 				error = ENOTEMPTY;
 				goto bad;
 			}
@@ -1032,37 +1090,35 @@ abortit:
 			error = EISDIR;
 			goto bad;
 		}
-		if ((error = ufs_dirrewrite(dp, ip, tcnp)) != 0)
-			goto bad;
-		/*
-		 * If the target directory is in the same
-		 * directory as the source directory,
-		 * decrement the link count on the parent
-		 * of the target directory.
-		 */
-		 if (doingdirectory && !newparent) {
-			dp->i_ffs_nlink--;
-			dp->i_flag |= IN_CHANGE;
-		}
-		vput(tdvp);
-		/*
-		 * Adjust the link count of the target to
-		 * reflect the dirrewrite above.  If this is
-		 * a directory it is empty and there are
-		 * no links to it, so we can squash the inode and
-		 * any space associated with it.  We disallowed
-		 * renaming over top of a directory with links to
-		 * it above, as the remaining link would point to
-		 * a directory without "." or ".." entries.
-		 */
-		xp->i_ffs_nlink--;
+		
+		if ((error = ufs_dirrewrite(dp, xp, ip->i_number,
+                   IFTODT(ip->i_ffs_mode), doingdirectory)) != 0)
+                        goto bad;
 		if (doingdirectory) {
-			if (--xp->i_ffs_nlink != 0)
-				panic("rename: linked directory");
-			error = VOP_TRUNCATE(tvp, (off_t)0, IO_SYNC,
-			    tcnp->cn_cred, tcnp->cn_proc);
+			dp->i_effnlink--;
+			dp->i_flag |= IN_CHANGE;
+			xp->i_effnlink--;
+			xp->i_flag |= IN_CHANGE;
 		}
-		xp->i_flag |= IN_CHANGE;
+		if (doingdirectory && !DOINGSOFTDEP(tvp)) {
+		       /*
+			* Truncate inode. The only stuff left in the directory
+			* is "." and "..". The "." reference is inconsequential
+                        * since we are quashing it. We have removed the "."
+                        * reference and the reference in the parent directory,
+                        * but there may be other hard links. The soft
+                        * dependency code will arrange to do these operations
+                        * after the parent directory entry has been deleted on
+                        * disk, so when running with that code we avoid doing
+                        * them now.
+                        */
+			dp->i_ffs_nlink--;
+			xp->i_ffs_nlink--;
+			if ((error = VOP_TRUNCATE(tvp, (off_t)0, IO_SYNC,
+			        tcnp->cn_cred, tcnp->cn_proc)) != 0)
+				goto bad;
+                }
+	        vput(tdvp);
 		vput(tvp);
 		xp = NULL;
 	}
@@ -1092,10 +1148,9 @@ abortit:
 	 * changed while the new name has been entered. If the source is
 	 * a file then the entry may have been unlinked or renamed. In
 	 * either case there is no further work to be done. If the source
-	 * is a directory then it cannot have been rmdir'ed; its link
-	 * count of three would cause a rmdir to fail with ENOTEMPTY.
-	 * The IRENAME flag ensures that it cannot be moved by another
-	 * rename.
+	 * is a directory then it cannot have been rmdir'ed; the IN_RENAME 
+	 * flag ensures that it cannot be moved by another rename or removed
+	 * by a rmdir.
 	 */
 	if (xp != ip) {
 		if (doingdirectory)
@@ -1108,44 +1163,11 @@ abortit:
 		 * and ".." set to point to the new parent.
 		 */
 		if (doingdirectory && newparent) {
-			dp->i_ffs_nlink--;
-			dp->i_flag |= IN_CHANGE;
-			error = vn_rdwr(UIO_READ, fvp, (caddr_t)&dirbuf,
-				sizeof (struct dirtemplate), (off_t)0,
-				UIO_SYSSPACE, IO_NODELOCKED, 
-				tcnp->cn_cred, (int *)0, (struct proc *)0);
-			if (error == 0) {
-#				if (BYTE_ORDER == LITTLE_ENDIAN)
-					if (fvp->v_mount->mnt_maxsymlinklen <= 0)
-						namlen = dirbuf.dotdot_type;
-					else
-						namlen = dirbuf.dotdot_namlen;
-#				else
-					namlen = dirbuf.dotdot_namlen;
-#				endif
-				if (namlen != 2 ||
-				    dirbuf.dotdot_name[0] != '.' ||
-				    dirbuf.dotdot_name[1] != '.') {
-					ufs_dirbad(xp, (doff_t)12,
-					    "rename: mangled dir");
-				} else {
-					dirbuf.dotdot_ino = newparent;
-					(void) vn_rdwr(UIO_WRITE, fvp,
-					    (caddr_t)&dirbuf,
-					    sizeof (struct dirtemplate),
-					    (off_t)0, UIO_SYSSPACE,
-					    IO_NODELOCKED|IO_SYNC,
-					    tcnp->cn_cred, (int *)0,
-					    (struct proc *)0);
-					cache_purge(fdvp);
-				}
-			}
-		}
-		error = ufs_dirremove(fdvp, fcnp);
-		if (!error) {
-			xp->i_ffs_nlink--;
-			xp->i_flag |= IN_CHANGE;
+			xp->i_offset = mastertemplate.dot_reclen;
+			ufs_dirrewrite(xp, dp, newparent, DT_DIR, 0);
+			cache_purge(fdvp);
 		}
+		error = ufs_dirremove(fdvp, xp, fcnp->cn_flags, 0);
 		xp->i_flag &= ~IN_RENAME;
 	}
 	if (dp)
@@ -1162,7 +1184,8 @@ bad:
 out:
 	if (doingdirectory)
 		ip->i_flag &= ~IN_RENAME;
-	if (VOP_LOCK(fvp) == 0) {
+	if (vn_lock(fvp, LK_EXCLUSIVE, p) == 0) {
+		ip->i_effnlink--;
 		ip->i_ffs_nlink--;
 		ip->i_flag |= IN_CHANGE;
 		vput(fvp);
@@ -1172,18 +1195,6 @@ out:
 }
 
 /*
- * A virgin directory (no blushing please).
- */
-static struct dirtemplate mastertemplate = {
-	0, 12, DT_DIR, 1, ".",
-	0, DIRBLKSIZ - 12, DT_DIR, 2, ".."
-};
-static struct odirtemplate omastertemplate = {
-	0, 12, 1, ".",
-	0, DIRBLKSIZ - 12, 2, ".."
-};
-
-/*
  * Mkdir system call
  */
 int
@@ -1196,11 +1207,13 @@ ufs_mkdir(v)
 		struct componentname *a_cnp;
 		struct vattr *a_vap;
 	} */ *ap = v;
-	register struct vnode *dvp = ap->a_dvp;
-	register struct vattr *vap = ap->a_vap;
-	register struct componentname *cnp = ap->a_cnp;
-	register struct inode *ip, *dp;
+	struct vnode *dvp = ap->a_dvp;
+	struct vattr *vap = ap->a_vap;
+	struct componentname *cnp = ap->a_cnp;
+	struct inode *ip, *dp;
 	struct vnode *tvp;
+	struct buf *bp;
+	struct direct newdir;
 	struct dirtemplate dirtemplate, *dtp;
 	struct timespec ts;
 	int error, dmode;
@@ -1239,24 +1252,31 @@ ufs_mkdir(v)
 	ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE;
 	ip->i_ffs_mode = dmode;
 	tvp->v_type = VDIR;	/* Rest init'd in getnewvnode(). */
+	ip->i_effnlink = 2;
 	ip->i_ffs_nlink = 2;
+	if (DOINGSOFTDEP(tvp))
+		softdep_increase_linkcnt(ip);
+
 	if (cnp->cn_flags & ISWHITEOUT)
 		ip->i_ffs_flags |= UF_OPAQUE;
-	TIMEVAL_TO_TIMESPEC(&time, &ts);
-	error = VOP_UPDATE(tvp, &ts, &ts, 1);
 
 	/*
-	 * Bump link count in parent directory
-	 * to reflect work done below.  Should
-	 * be done before reference is created
-	 * so reparation is possible if we crash.
+	 * Bump link count in parent directory to reflect work done below.
+	 * Should be done before reference is create so cleanup is 
+	 * possible if we crash.
 	 */
+	dp->i_effnlink++;
 	dp->i_ffs_nlink++;
 	dp->i_flag |= IN_CHANGE;
-	if ((error = VOP_UPDATE(dvp, &ts, &ts, 1)) != 0)
+	if (DOINGSOFTDEP(dvp))
+		softdep_increase_linkcnt(dp);
+	TIMEVAL_TO_TIMESPEC(&time, &ts);
+	if ((error = VOP_UPDATE(dvp, &ts, &ts, !DOINGSOFTDEP(dvp))) != 0)
 		goto bad;
 
-	/* Initialize directory with "." and ".." from static template. */
+	/* 
+	 * Initialize directory with "." and ".." from static template.
+	 */
 	if (dvp->v_mount->mnt_maxsymlinklen > 0)
 		dtp = &mastertemplate;
 	else
@@ -1264,40 +1284,56 @@ ufs_mkdir(v)
 	dirtemplate = *dtp;
 	dirtemplate.dot_ino = ip->i_number;
 	dirtemplate.dotdot_ino = dp->i_number;
-	error = vn_rdwr(UIO_WRITE, tvp, (caddr_t)&dirtemplate,
-	    sizeof (dirtemplate), (off_t)0, UIO_SYSSPACE,
-	    IO_NODELOCKED|IO_SYNC, cnp->cn_cred, (int *)0, (struct proc *)0);
-	if (error) {
-		dp->i_ffs_nlink--;
-		dp->i_flag |= IN_CHANGE;
+
+	if ((error = VOP_BALLOC(tvp, (off_t)0, DIRBLKSIZ, cnp->cn_cred,
+            B_CLRBUF, &bp)) != 0)
+		goto bad;
+	ip->i_ffs_size = DIRBLKSIZ;
+	ip->i_flag |= IN_CHANGE | IN_UPDATE;
+	vnode_pager_setsize(tvp, (u_long)ip->i_ffs_size);
+	bcopy((caddr_t)&dirtemplate, (caddr_t)bp->b_data, sizeof dirtemplate);
+	if ((error = VOP_UPDATE(tvp, &ts, &ts, !DOINGSOFTDEP(tvp))) != 0) {
+		(void)VOP_BWRITE(bp);
 		goto bad;
-	}
-	if (DIRBLKSIZ > VFSTOUFS(dvp->v_mount)->um_mountp->mnt_stat.f_bsize)
-		panic("ufs_mkdir: blksize"); /* XXX should grow with balloc() */
-	else {
-		ip->i_ffs_size = DIRBLKSIZ;
-		ip->i_flag |= IN_CHANGE;
 	}
 
-	/* Directory set up, now install it's entry in the parent directory. */
-	if ((error = ufs_direnter(ip, dvp, cnp)) != 0) {
-		dp->i_ffs_nlink--;
-		dp->i_flag |= IN_CHANGE;
-	}
-bad:
 	/*
-	 * No need to do an explicit VOP_TRUNCATE here, vrele will do this
-	 * for us because we set the link count to 0.
+         * Directory set up, now install it's entry in the parent directory.
+         *
+         * If we are not doing soft dependencies, then we must write out the
+         * buffer containing the new directory body before entering the new
+         * name in the parent. If we are doing soft dependencies, then the
+         * buffer containing the new directory body will be passed to and
+         * released in the soft dependency code after the code has attached
+         * an appropriate ordering dependency to the buffer which ensures that
+         * the buffer is written before the new name is written in the parent.
 	 */
-	if (error) {
-		ip->i_ffs_nlink = 0;
-		ip->i_flag |= IN_CHANGE;
+        if (!DOINGSOFTDEP(dvp) && ((error = VOP_BWRITE(bp)) != 0))
+                goto bad;
+        ufs_makedirentry(ip, cnp, &newdir);
+        error = ufs_direnter(dvp, &newdir, cnp, bp);
+  
+bad:
+        if (error == 0) {
+                *ap->a_vpp = tvp;
+        } else {
+                dp->i_effnlink--;
+                dp->i_ffs_nlink--;
+                dp->i_flag |= IN_CHANGE;
+                /*
+                 * No need to do an explicit VOP_TRUNCATE here, vrele will
+                 * do this for us because we set the link count to 0.
+                 */
+                ip->i_effnlink = 0;
+                ip->i_ffs_nlink = 0;
+                ip->i_flag |= IN_CHANGE;
+
 		vput(tvp);
-	} else
-		*ap->a_vpp = tvp;
+	}
 out:
 	FREE(cnp->cn_pnbuf, M_NAMEI);
 	vput(dvp);
+
 	return (error);
 }
 
@@ -1313,10 +1349,10 @@ ufs_rmdir(v)
 		struct vnode *a_vp;
 		struct componentname *a_cnp;
 	} */ *ap = v;
-	register struct vnode *vp = ap->a_vp;
-	register struct vnode *dvp = ap->a_dvp;
-	register struct componentname *cnp = ap->a_cnp;
-	register struct inode *ip, *dp;
+	struct vnode *vp = ap->a_vp;
+	struct vnode *dvp = ap->a_dvp;
+	struct componentname *cnp = ap->a_cnp;
+	struct inode *ip, *dp;
 	int error;
 
 	ip = VTOI(vp);
@@ -1330,14 +1366,17 @@ ufs_rmdir(v)
 		return (EINVAL);
 	}
 	/*
-	 * Verify the directory is empty (and valid).
-	 * (Rmdir ".." won't be valid since
-	 *  ".." will contain a reference to
-	 *  the current directory and thus be
-	 *  non-empty.)
+         * Do not remove a directory that is in the process of being renamed.
+         * Verify the directory is empty (and valid). Rmdir ".." will not be
+         * valid since ".." will contain a reference to the current directory
+         * and thus be non-empty.
 	 */
 	error = 0;
-	if (ip->i_ffs_nlink != 2 ||
+	if (ip->i_flag & IN_RENAME) {
+		error = EINVAL;
+		goto out;
+	}
+	if (ip->i_effnlink != 2 ||
 	    !ufs_dirempty(ip, dp->i_number, cnp->cn_cred)) {
 		error = ENOTEMPTY;
 		goto out;
@@ -1352,31 +1391,33 @@ ufs_rmdir(v)
 	 * inode.  If we crash in between, the directory
 	 * will be reattached to lost+found,
 	 */
-	if ((error = ufs_dirremove(dvp, cnp)) != 0)
+	if ((error = ufs_dirremove(dvp, ip, cnp->cn_flags, 1)) != 0)
 		goto out;
-	dp->i_ffs_nlink--;
-	dp->i_flag |= IN_CHANGE;
 	cache_purge(dvp);
-	vput(dvp);
-	dvp = NULL;
-	/*
-	 * Truncate inode.  The only stuff left
-	 * in the directory is "." and "..".  The
-	 * "." reference is inconsequential since
-	 * we're quashing it.  The ".." reference
-	 * has already been adjusted above.  We've
-	 * removed the "." reference and the reference
-	 * in the parent directory, but there may be
-	 * other hard links so decrement by 2 and
-	 * worry about them later.
+        /*
+	 * Truncate inode. The only stuff left in the directory is "." and
+	 * "..". The "." reference is inconsequential since we are quashing
+	 * it. We have removed the "." reference and the reference in the
+	 * parent directory, but there may be other hard links. So,
+	 * ufs_dirremove will set the UF_IMMUTABLE flag to ensure that no
+	 * new entries are made. The soft dependency code will arrange to
+	 * do these operations after the parent directory entry has been
+	 * deleted on disk, so when running with that code we avoid doing
+	 * them now.
 	 */
-	ip->i_ffs_nlink -= 2;
-	error = VOP_TRUNCATE(vp, (off_t)0, IO_SYNC, cnp->cn_cred,
-	    cnp->cn_proc);
-	cache_purge(ITOV(ip));
+	dp->i_effnlink--;
+	dp->i_flag |= IN_CHANGE;
+	ip->i_effnlink--;
+	ip->i_flag |= IN_CHANGE;
+	if (!DOINGSOFTDEP(vp)) {
+		dp->i_ffs_nlink--;
+		ip->i_ffs_nlink--;
+		error = VOP_TRUNCATE(vp, (off_t)0, IO_SYNC, cnp->cn_cred,
+		   cnp->cn_proc);
+	}
+	cache_purge(vp);
 out:
-	if (dvp)
-		vput(dvp);
+        vput(dvp);
 	vput(vp);
 	return (error);
 }
@@ -1395,8 +1436,8 @@ ufs_symlink(v)
 		struct vattr *a_vap;
 		char *a_target;
 	} */ *ap = v;
-	register struct vnode *vp, **vpp = ap->a_vpp;
-	register struct inode *ip;
+	struct vnode *vp, **vpp = ap->a_vpp;
+	struct inode *ip;
 	int len, error;
 
 	error = ufs_makeinode(IFLNK | ap->a_vap->va_mode, ap->a_dvp,
@@ -1436,10 +1477,10 @@ ufs_readdir(v)
 		struct uio *a_uio;
 		struct ucred *a_cred;
 		int *a_eofflag;
-		u_long *a_cookies;
-		int ncookies;
+		u_long **a_cookies;
+		int *ncookies;
 	} */ *ap = v;
-	register struct uio *uio = ap->a_uio;
+	struct uio *uio = ap->a_uio;
 	int error;
 	size_t count, lost;
 	off_t off = uio->uio_offset;
@@ -1495,9 +1536,10 @@ ufs_readdir(v)
 		error = VOP_READ(ap->a_vp, uio, 0, ap->a_cred);
 #	endif
 	if (!error && ap->a_ncookies) {
-		register struct dirent *dp;
-		register u_long *cookies = ap->a_cookies;
-		register int ncookies = ap->a_ncookies;
+		struct dirent *dp, *dpstart;
+		off_t offstart;
+		u_long *cookies;
+		int ncookies;
 
 		/*
 		 * Only the NFS server and emulations use cookies, and they
@@ -1506,17 +1548,28 @@ ufs_readdir(v)
 		 */
 		if (uio->uio_segflg != UIO_SYSSPACE || uio->uio_iovcnt != 1)
 			panic("ufs_readdir: lost in space");
-		dp = (struct dirent *)
-		     (uio->uio_iov->iov_base - (uio->uio_offset - off));
-		while (ncookies-- && off < uio->uio_offset) {
-			if (dp->d_reclen == 0)
-				break;
+
+		dpstart = (struct dirent *)
+			(uio->uio_iov->iov_base - (uio->uio_offset - off));
+                offstart = off;
+                for (dp = dpstart, ncookies = 0; off < uio->uio_offset; ) {
+                        if (dp->d_reclen == 0)
+                                break;
+                        off += dp->d_reclen;
+                        ncookies++;
+                        dp = (struct dirent *)((caddr_t)dp + dp->d_reclen);
+                }
+                lost += uio->uio_offset - off;
+                uio->uio_offset = off;
+                MALLOC(cookies, u_long *, ncookies * sizeof(u_long), M_TEMP,
+                    M_WAITOK);
+                *ap->a_ncookies = ncookies;
+                *ap->a_cookies = cookies;
+                for (off = offstart, dp = dpstart; off < uio->uio_offset; ) {
+                        *(cookies++) = off;
 			off += dp->d_reclen;
-			*(cookies++) = off;
-			dp = (struct dirent *)((caddr_t)dp + dp->d_reclen);
+                        dp = (struct dirent *)((caddr_t)dp + dp->d_reclen);
 		}
-		lost += uio->uio_offset - off;
-		uio->uio_offset = off;
 	}
 	uio->uio_resid += lost;
 	*ap->a_eofflag = VTOI(ap->a_vp)->i_ffs_size <= uio->uio_offset;
@@ -1535,8 +1588,8 @@ ufs_readlink(v)
 		struct uio *a_uio;
 		struct ucred *a_cred;
 	} */ *ap = v;
-	register struct vnode *vp = ap->a_vp;
-	register struct inode *ip = VTOI(vp);
+	struct vnode *vp = ap->a_vp;
+	struct inode *ip = VTOI(vp);
 	int isize;
 
 	isize = ip->i_ffs_size;
@@ -1575,82 +1628,31 @@ ufs_lock(v)
 {
 	struct vop_lock_args /* {
 		struct vnode *a_vp;
+		int a_flags;
+		sturct proc *a_p;
 	} */ *ap = v;
-	register struct vnode *vp = ap->a_vp;
-	register struct inode *ip;
-#ifdef DIAGNOSTIC
-	struct proc *p = curproc;	/* XXX */
-#endif
+	struct vnode *vp = ap->a_vp;
 
-start:
-	while (vp->v_flag & VXLOCK) {
-		vp->v_flag |= VXWANT;
-		sleep((caddr_t)vp, PINOD);
-	}
-	if (vp->v_tag == VT_NON)
-		return (ENOENT);
-	ip = VTOI(vp);
-	if (ip->i_flag & IN_LOCKED) {
-		ip->i_flag |= IN_WANTED;
-#ifdef DIAGNOSTIC
-		if (p) {
-			if (p->p_pid == ip->i_lockholder)
-				panic("locking against myself");
-			ip->i_lockwaiter = p->p_pid;
-		} else
-			ip->i_lockwaiter = -1;
-#endif
-		(void) sleep((caddr_t)ip, PINOD);
-		goto start;
-	}
-#ifdef DIAGNOSTIC
-	ip->i_lockwaiter = 0;
-	if (ip->i_lockholder != 0)
-		panic("lockholder (%d) != 0", ip->i_lockholder);
-	if (p && p->p_pid == 0)
-		printf("locking by process 0\n");
-	if (p)
-		ip->i_lockholder = p->p_pid;
-	else
-		ip->i_lockholder = -1;
-#endif
-	ip->i_flag |= IN_LOCKED;
-	return (0);
+	return (lockmgr(&VTOI(vp)->i_lock, ap->a_flags, &vp->v_interlock,
+		ap->a_p));
 }
 
 /*
  * Unlock an inode.  If WANT bit is on, wakeup.
  */
-int lockcount = 90;
 int
 ufs_unlock(v)
 	void *v;
 {
 	struct vop_unlock_args /* {
 		struct vnode *a_vp;
+		int a_flags;
+		struct proc *a_p;
 	} */ *ap = v;
-	register struct inode *ip = VTOI(ap->a_vp);
-#ifdef DIAGNOSTIC
-	struct proc *p = curproc;	/* XXX */
-#endif
+	struct vnode *vp = ap->a_vp;
 
-#ifdef DIAGNOSTIC
-	if ((ip->i_flag & IN_LOCKED) == 0) {
-		vprint("ufs_unlock: unlocked inode", ap->a_vp);
-		panic("ufs_unlock NOT LOCKED");
-	}
-	if (p && p->p_pid != ip->i_lockholder && p->p_pid > -1 &&
-	    ip->i_lockholder > -1 && lockcount++ < 100)
-		panic("unlocker (%d) != lock holder (%d)",
-		    p->p_pid, ip->i_lockholder);
-	ip->i_lockholder = 0;
-#endif
-	ip->i_flag &= ~IN_LOCKED;
-	if (ip->i_flag & IN_WANTED) {
-		ip->i_flag &= ~IN_WANTED;
-		wakeup((caddr_t)ip);
-	}
-	return (0);
+	return (lockmgr(&VTOI(vp)->i_lock, ap->a_flags | LK_RELEASE,
+		&vp->v_interlock, ap->a_p));
 }
 
 /*
@@ -1664,9 +1666,7 @@ ufs_islocked(v)
 		struct vnode *a_vp;
 	} */ *ap = v;
 
-	if (VTOI(ap->a_vp)->i_flag & IN_LOCKED)
-		return (1);
-	return (0);
+	return (lockstatus(&VTOI(ap->a_vp)->i_lock));
 }
 
 /*
@@ -1680,9 +1680,9 @@ ufs_strategy(v)
 	struct vop_strategy_args /* {
 		struct buf *a_bp;
 	} */ *ap = v;
-	register struct buf *bp = ap->a_bp;
-	register struct vnode *vp = bp->b_vp;
-	register struct inode *ip;
+	struct buf *bp = ap->a_bp;
+	struct vnode *vp = bp->b_vp;
+	struct inode *ip;
 	int error;
 
 	ip = VTOI(vp);
@@ -1720,8 +1720,8 @@ ufs_print(v)
 	struct vop_print_args /* {
 		struct vnode *a_vp;
 	} */ *ap = v;
-	register struct vnode *vp = ap->a_vp;
-	register struct inode *ip = VTOI(vp);
+	struct vnode *vp = ap->a_vp;
+	struct inode *ip = VTOI(vp);
 
 	printf("tag VT_UFS, ino %d, on dev %d, %d", ip->i_number,
 		major(ip->i_dev), minor(ip->i_dev));
@@ -1729,12 +1729,7 @@ ufs_print(v)
 	if (vp->v_type == VFIFO)
 		fifo_printinfo(vp);
 #endif /* FIFO */
-	printf("%s\n", (ip->i_flag & IN_LOCKED) ? " (LOCKED)" : "");
-	if (ip->i_lockholder == 0)
-		return (0);
-	printf("\towner pid %d", ip->i_lockholder);
-	if (ip->i_lockwaiter)
-		printf(" waiting pid %d", ip->i_lockwaiter);
+	lockmgr_printinfo(&ip->i_lock);
 	printf("\n");
 	return (0);
 }
@@ -1796,10 +1791,12 @@ ufsspec_close(v)
 		struct ucred *a_cred;
 		struct proc *a_p;
 	} */ *ap = v;
-	register struct inode *ip = VTOI(ap->a_vp);
+	struct inode *ip = VTOI(ap->a_vp);
 
-	if (ap->a_vp->v_usecount > 1 && !(ip->i_flag & IN_LOCKED))
+	simple_lock(&vp->v_interlock);
+	if (ap->a_vp->v_usecount > 1)
 		ITIMES(ip, &time, &time);
+	simple_unlock(&vp->v_interlock);
 	return (VOCALL (spec_vnodeop_p, VOFFSET(vop_close), ap));
 }
 
@@ -1864,10 +1861,13 @@ ufsfifo_close(v)
 		struct proc *a_p;
 	} */ *ap = v;
 	extern int (**fifo_vnodeop_p) __P((void *));
-	register struct inode *ip = VTOI(ap->a_vp);
+	struct vnode *vp = ap->a_vp;
+	struct inode *ip = VTOI(vp);
 
-	if (ap->a_vp->v_usecount > 1 && !(ip->i_flag & IN_LOCKED))
+	simple_lock(&vp->v_interlock);
+	if (ap->a_vp->v_usecount > 1)
 		ITIMES(ip, &time, &time);
+	simple_unlock(&vp->v_interlock);
 	return (VOCALL (fifo_vnodeop_p, VOFFSET(vop_close), ap));
 }
 #endif /* FIFO */
@@ -1924,7 +1924,7 @@ ufs_advlock(v)
 		struct flock *a_fl;
 		int  a_flags;
 	} */ *ap = v;
-	register struct inode *ip = VTOI(ap->a_vp);
+	struct inode *ip = VTOI(ap->a_vp);
 
 	return (lf_advlock(&ip->i_lockf, ip->i_ffs_size, ap->a_id, ap->a_op,
 	    ap->a_fl, ap->a_flags));
@@ -1953,9 +1953,9 @@ ufs_vinit(mntp, specops, fifoops, vpp)
 		if ((nvp = checkalias(vp, ip->i_ffs_rdev, mntp)) != NULL) {
 			/*
 			 * Discard unneeded vnode, but save its inode.
+			 * Note that the lock is carried over in the inode
+			 * to the replacement vnode.
 			 */
-			ufs_ihashrem(ip);
-			VOP_UNLOCK(vp);
 			nvp->v_data = vp->v_data;
 			vp->v_data = NULL;
 			vp->v_op = spec_vnodeop_p;
@@ -1966,7 +1966,6 @@ ufs_vinit(mntp, specops, fifoops, vpp)
 			 */
 			vp = nvp;
 			ip->i_vnode = vp;
-			ufs_ihashins(ip);
 		}
 		break;
 	case VFIFO:
@@ -2005,7 +2004,8 @@ ufs_makeinode(mode, dvp, vpp, cnp)
 	struct vnode **vpp;
 	struct componentname *cnp;
 {
-	register struct inode *ip, *pdir;
+	struct inode *ip, *pdir;
+	struct direct newdir;
 	struct timespec ts;
 	struct vnode *tvp;
 	int error;
@@ -2040,7 +2040,10 @@ ufs_makeinode(mode, dvp, vpp, cnp)
 	ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE;
 	ip->i_ffs_mode = mode;
 	tvp->v_type = IFTOVT(mode);	/* Rest init'd in getnewvnode(). */
+	ip->i_effnlink = 1;
 	ip->i_ffs_nlink = 1;
+	if (DOINGSOFTDEP(tvp))
+		softdep_increase_linkcnt(ip);
 	if ((ip->i_ffs_mode & ISGID) &&
 		!groupmember(ip->i_ffs_gid, cnp->cn_cred) &&
 	    suser(cnp->cn_cred, NULL))
@@ -2053,10 +2056,13 @@ ufs_makeinode(mode, dvp, vpp, cnp)
 	 * Make sure inode goes to disk before directory entry.
 	 */
 	TIMEVAL_TO_TIMESPEC(&time, &ts);
-	if ((error = VOP_UPDATE(tvp, &ts, &ts, 1)) != 0)
+	if ((error = VOP_UPDATE(tvp, &ts, &ts, !DOINGSOFTDEP(tvp))) != 0)
 		goto bad;
-	if ((error = ufs_direnter(ip, dvp, cnp)) != 0)
+
+	ufs_makedirentry(ip, cnp, &newdir);
+	if ((error = ufs_direnter(dvp, &newdir, cnp, NULL)) != 0)
 		goto bad;
+
 	if ((cnp->cn_flags & SAVESTART) == 0)
 		FREE(cnp->cn_pnbuf, M_NAMEI);
 	vput(dvp);
@@ -2070,8 +2076,12 @@ bad:
 	 */
 	free(cnp->cn_pnbuf, M_NAMEI);
 	vput(dvp);
+	ip->i_effnlink = 0;
 	ip->i_ffs_nlink = 0;
 	ip->i_flag |= IN_CHANGE;
 	vput(tvp);
+
 	return (error);
 }
+
+