summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--sys/kern/exec_script.c4
-rw-r--r--sys/kern/init_main.c9
-rw-r--r--sys/kern/kern_acct.c4
-rw-r--r--sys/kern/kern_exec.c6
-rw-r--r--sys/kern/kern_exit.c4
-rw-r--r--sys/kern/kern_ktrace.c10
-rw-r--r--sys/kern/kern_lkm.c47
-rw-r--r--sys/kern/kern_lock.c537
-rw-r--r--sys/kern/kern_sig.c4
-rw-r--r--sys/kern/kern_synch.c4
-rw-r--r--sys/kern/kern_sysctl.c9
-rw-r--r--sys/kern/subr_xxx.c6
-rw-r--r--sys/kern/sys_generic.c22
-rw-r--r--sys/kern/sys_pipe.c4
-rw-r--r--sys/kern/tty.c6
-rw-r--r--sys/kern/tty_tty.c16
-rw-r--r--sys/kern/uipc_usrreq.c4
-rw-r--r--sys/kern/vfs_bio.c294
-rw-r--r--sys/kern/vfs_cluster.c19
-rw-r--r--sys/kern/vfs_conf.c172
-rw-r--r--sys/kern/vfs_init.c21
-rw-r--r--sys/kern/vfs_lookup.c36
-rw-r--r--sys/kern/vfs_subr.c1230
-rw-r--r--sys/kern/vfs_syscalls.c406
-rw-r--r--sys/kern/vfs_vnops.c58
-rw-r--r--sys/kern/vnode_if.c76
-rw-r--r--sys/kern/vnode_if.src241
27 files changed, 2398 insertions, 851 deletions
diff --git a/sys/kern/exec_script.c b/sys/kern/exec_script.c
index 551c52c7602..91f6d638397 100644
--- a/sys/kern/exec_script.c
+++ b/sys/kern/exec_script.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: exec_script.c,v 1.4 1996/10/20 15:30:07 dm Exp $ */
+/* $OpenBSD: exec_script.c,v 1.5 1997/10/06 15:12:10 csapuntz Exp $ */
/* $NetBSD: exec_script.c,v 1.13 1996/02/04 02:15:06 christos Exp $ */
/*
@@ -228,7 +228,7 @@ check_shell:
scriptvp = epp->ep_vp;
oldpnbuf = epp->ep_ndp->ni_cnd.cn_pnbuf;
- VOP_UNLOCK(scriptvp);
+ VOP_UNLOCK(scriptvp, 0, p);
if ((error = check_exec(p, epp)) == 0) {
/* note that we've clobbered the header */
diff --git a/sys/kern/init_main.c b/sys/kern/init_main.c
index a8f78c7511a..ae47944d74e 100644
--- a/sys/kern/init_main.c
+++ b/sys/kern/init_main.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: init_main.c,v 1.24 1997/07/28 09:13:17 deraadt Exp $ */
+/* $OpenBSD: init_main.c,v 1.25 1997/10/06 15:12:12 csapuntz Exp $ */
/* $NetBSD: init_main.c,v 1.84.4.1 1996/06/02 09:08:06 mrg Exp $ */
/*
@@ -335,17 +335,16 @@ main(framep)
schedcpu(NULL);
/* Mount the root file system. */
- if ((*mountroot)())
+ if (vfs_mountroot())
panic("cannot mount root");
mountlist.cqh_first->mnt_flag |= MNT_ROOTFS;
- mountlist.cqh_first->mnt_op->vfs_refcount++;
/* Get the vnode for '/'. Set filedesc0.fd_fd.fd_cdir to reference it. */
if (VFS_ROOT(mountlist.cqh_first, &rootvnode))
panic("cannot find root vnode");
filedesc0.fd_fd.fd_cdir = rootvnode;
VREF(filedesc0.fd_fd.fd_cdir);
- VOP_UNLOCK(rootvnode);
+ VOP_UNLOCK(rootvnode, 0, p);
filedesc0.fd_fd.fd_rdir = NULL;
swapinit();
@@ -584,6 +583,6 @@ start_update(p)
*/
p->p_flag |= P_INMEM | P_SYSTEM; /* XXX */
bcopy("update", curproc->p_comm, sizeof ("update"));
- vn_update();
+ sched_sync(p);
/* NOTREACHED */
}
diff --git a/sys/kern/kern_acct.c b/sys/kern/kern_acct.c
index c371c085046..ccf5d0a79db 100644
--- a/sys/kern/kern_acct.c
+++ b/sys/kern/kern_acct.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: kern_acct.c,v 1.2 1996/03/03 17:19:40 niklas Exp $ */
+/* $OpenBSD: kern_acct.c,v 1.3 1997/10/06 15:12:14 csapuntz Exp $ */
/* $NetBSD: kern_acct.c,v 1.42 1996/02/04 02:15:12 christos Exp $ */
/*-
@@ -120,7 +120,7 @@ sys_acct(p, v, retval)
p);
if ((error = vn_open(&nd, FWRITE, 0)) != 0)
return (error);
- VOP_UNLOCK(nd.ni_vp);
+ VOP_UNLOCK(nd.ni_vp, 0, p);
if (nd.ni_vp->v_type != VREG) {
vn_close(nd.ni_vp, FWRITE, p->p_ucred, p);
return (EACCES);
diff --git a/sys/kern/kern_exec.c b/sys/kern/kern_exec.c
index 4dfb48a0e9c..b70c6bd3cd2 100644
--- a/sys/kern/kern_exec.c
+++ b/sys/kern/kern_exec.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: kern_exec.c,v 1.12 1997/08/01 22:54:50 deraadt Exp $ */
+/* $OpenBSD: kern_exec.c,v 1.13 1997/10/06 15:12:16 csapuntz Exp $ */
/* $NetBSD: kern_exec.c,v 1.75 1996/02/09 18:59:28 christos Exp $ */
/*-
@@ -188,10 +188,10 @@ check_exec(p, epp)
bad2:
/*
- * unlock and close the vnode, restore the old one, free the
+ * unlock and close the vnode, free the
* pathname buf, and punt.
*/
- VOP_UNLOCK(vp);
+ VOP_UNLOCK(vp, 0, p);
vn_close(vp, FREAD, p->p_ucred, p);
FREE(ndp->ni_cnd.cn_pnbuf, M_NAMEI);
return error;
diff --git a/sys/kern/kern_exit.c b/sys/kern/kern_exit.c
index 0f74d83a4f3..bab12bfe770 100644
--- a/sys/kern/kern_exit.c
+++ b/sys/kern/kern_exit.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: kern_exit.c,v 1.9 1997/09/15 05:46:12 millert Exp $ */
+/* $OpenBSD: kern_exit.c,v 1.10 1997/10/06 15:12:17 csapuntz Exp $ */
/* $NetBSD: kern_exit.c,v 1.39 1996/04/22 01:38:25 christos Exp $ */
/*
@@ -179,7 +179,7 @@ exit1(p, rv)
* if we blocked.
*/
if (sp->s_ttyvp)
- vgoneall(sp->s_ttyvp);
+ VOP_REVOKE(sp->s_ttyvp, REVOKEALL);
}
if (sp->s_ttyvp)
vrele(sp->s_ttyvp);
diff --git a/sys/kern/kern_ktrace.c b/sys/kern/kern_ktrace.c
index ab28fda5075..973ba8e5827 100644
--- a/sys/kern/kern_ktrace.c
+++ b/sys/kern/kern_ktrace.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: kern_ktrace.c,v 1.3 1996/03/03 17:19:46 niklas Exp $ */
+/* $OpenBSD: kern_ktrace.c,v 1.4 1997/10/06 15:12:18 csapuntz Exp $ */
/* $NetBSD: kern_ktrace.c,v 1.23 1996/02/09 18:59:36 christos Exp $ */
/*
@@ -293,7 +293,9 @@ sys_ktrace(curp, v, retval)
return (error);
}
vp = nd.ni_vp;
- VOP_UNLOCK(vp);
+
+ /* FIXME: Should be curp?? */
+ VOP_UNLOCK(vp, 0, p);
if (vp->v_type != VREG) {
(void) vn_close(vp, FREAD|FWRITE, curp->p_ucred, curp);
curp->p_traceflag &= ~KTRFAC_ACTIVE;
@@ -468,9 +470,9 @@ ktrwrite(vp, kth)
aiov[1].iov_len = kth->ktr_len;
auio.uio_resid += kth->ktr_len;
}
- VOP_LOCK(vp);
+ vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
error = VOP_WRITE(vp, &auio, IO_UNIT|IO_APPEND, p->p_ucred);
- VOP_UNLOCK(vp);
+ VOP_UNLOCK(vp, 0, p);
if (!error)
return;
/*
diff --git a/sys/kern/kern_lkm.c b/sys/kern/kern_lkm.c
index 144df985707..524b3671468 100644
--- a/sys/kern/kern_lkm.c
+++ b/sys/kern/kern_lkm.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: kern_lkm.c,v 1.18 1997/09/24 18:16:22 mickey Exp $ */
+/* $OpenBSD: kern_lkm.c,v 1.19 1997/10/06 15:12:19 csapuntz Exp $ */
/* $NetBSD: kern_lkm.c,v 1.31 1996/03/31 21:40:27 christos Exp $ */
/*
@@ -689,57 +689,52 @@ _lkm_vfs(lkmtp, cmd)
struct lkm_table *lkmtp;
int cmd;
{
- struct lkm_vfs *args = lkmtp->private.lkm_vfs;
- int i;
int error = 0;
-
+#if 0
+ struct lkm_vfs *args = lkmtp->private.lkm_vfs;
+ struct vfsconf *vfsp, **vfspp;
+#endif
switch(cmd) {
case LKM_E_LOAD:
/* don't load twice! */
if (lkmexists(lkmtp))
return (EEXIST);
+ return (EEXIST);
+#if 0
/* make sure there's no VFS in the table with this name */
- for (i = 0; i < nvfssw; i++)
- if (vfssw[i] != (struct vfsops *)0 &&
- strncmp(vfssw[i]->vfs_name,
+ for (vfspp = &vfsconf, vfsp = vfsconf;
+ vfsp;
+ vfspp = &vfsp->vfc_next, vfsp = vfsp->vfc_next)
+ if (strncmp(vfsp->vfc_name,
args->lkm_vfsops->vfs_name,
MFSNAMELEN) == 0)
return (EEXIST);
+
/* pick the last available empty slot */
- for (i = nvfssw - 1; i >= 0; i--)
- if (vfssw[i] == (struct vfsops *)0)
- break;
- if (i == -1) { /* or if none, punt */
- error = EINVAL;
- break;
- }
+ MALLOC (vfsp, struct vfsconf *, sizeof (struct vfsconf),
+ M_VFS, M_WAITOK);
+
+ /* Add tot he end of the list */
+ *vfspp = vfsp;
/*
* Set up file system
*/
- vfssw[i] = args->lkm_vfsops;
- vfssw[i]->vfs_refcount = 0;
+ /* FIXME (CPS): Setup new vfsconf structure */
/*
* Call init function for this VFS...
*/
- (*(vfssw[i]->vfs_init))();
+ (*(vfsp->vfc_vfsops->vfs_init))(vfsp);
/* done! */
- args->lkm_offset = i; /* slot in vfssw[] */
+ /* Nope - can't return this */
break;
+#endif
case LKM_E_UNLOAD:
- /* current slot... */
- i = args->lkm_offset;
-
- if (vfssw[i]->vfs_refcount != 0)
- return (EBUSY);
-
- /* replace current slot contents with old contents */
- vfssw[i] = (struct vfsops *)0;
break;
case LKM_E_STAT: /* no special handling... */
diff --git a/sys/kern/kern_lock.c b/sys/kern/kern_lock.c
new file mode 100644
index 00000000000..c6793d24f58
--- /dev/null
+++ b/sys/kern/kern_lock.c
@@ -0,0 +1,537 @@
+/*
+ * Copyright (c) 1995
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code contains ideas from software contributed to Berkeley by
+ * Avadis Tevanian, Jr., Michael Wayne Young, and the Mach Operating
+ * System project at Carnegie-Mellon University.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)kern_lock.c 8.18 (Berkeley) 5/21/95
+ */
+
+#include <sys/param.h>
+#include <sys/proc.h>
+#include <sys/lock.h>
+#include <sys/systm.h>
+
+#include <machine/cpu.h>
+
+void record_stacktrace __P((int *, int));
+void playback_stacktrace __P((int *, int));
+
+/*
+ * Locking primitives implementation.
+ * Locks provide shared/exclusive sychronization.
+ */
+
+#ifdef DEBUG
+#define COUNT(p, x) if (p) (p)->p_locks += (x)
+#else
+#define COUNT(p, x)
+#endif
+
+#if NCPUS > 1
+
+/*
+ * For multiprocessor system, try spin lock first.
+ *
+ * This should be inline expanded below, but we cannot have #if
+ * inside a multiline define.
+ */
+int lock_wait_time = 100;
+#define PAUSE(lkp, wanted) \
+ if (lock_wait_time > 0) { \
+ int i; \
+ \
+ simple_unlock(&lkp->lk_interlock); \
+ for (i = lock_wait_time; i > 0; i--) \
+ if (!(wanted)) \
+ break; \
+ simple_lock(&lkp->lk_interlock); \
+ } \
+ if (!(wanted)) \
+ break;
+
+#else /* NCPUS == 1 */
+
+/*
+ * It is an error to spin on a uniprocessor as nothing will ever cause
+ * the simple lock to clear while we are executing.
+ */
+#define PAUSE(lkp, wanted)
+
+#endif /* NCPUS == 1 */
+
+/*
+ * Acquire a resource.
+ */
+#define ACQUIRE(lkp, error, extflags, wanted) \
+ PAUSE(lkp, wanted); \
+ for (error = 0; wanted; ) { \
+ (lkp)->lk_waitcount++; \
+ simple_unlock(&(lkp)->lk_interlock); \
+ error = tsleep((void *)lkp, (lkp)->lk_prio, \
+ (lkp)->lk_wmesg, (lkp)->lk_timo); \
+ simple_lock(&(lkp)->lk_interlock); \
+ (lkp)->lk_waitcount--; \
+ if (error) \
+ break; \
+ if ((extflags) & LK_SLEEPFAIL) { \
+ error = ENOLCK; \
+ break; \
+ } \
+ }
+
+/*
+ * Initialize a lock; required before use.
+ */
+void
+lockinit(lkp, prio, wmesg, timo, flags)
+ struct lock *lkp;
+ int prio;
+ char *wmesg;
+ int timo;
+ int flags;
+{
+
+ bzero(lkp, sizeof(struct lock));
+ simple_lock_init(&lkp->lk_interlock);
+ lkp->lk_flags = flags & LK_EXTFLG_MASK;
+ lkp->lk_prio = prio;
+ lkp->lk_timo = timo;
+ lkp->lk_wmesg = wmesg;
+ lkp->lk_lockholder = LK_NOPROC;
+}
+
+/*
+ * Determine the status of a lock.
+ */
+int
+lockstatus(lkp)
+ struct lock *lkp;
+{
+ int lock_type = 0;
+
+ simple_lock(&lkp->lk_interlock);
+ if (lkp->lk_exclusivecount != 0)
+ lock_type = LK_EXCLUSIVE;
+ else if (lkp->lk_sharecount != 0)
+ lock_type = LK_SHARED;
+ simple_unlock(&lkp->lk_interlock);
+ return (lock_type);
+}
+
+/*
+ * Set, change, or release a lock.
+ *
+ * Shared requests increment the shared count. Exclusive requests set the
+ * LK_WANT_EXCL flag (preventing further shared locks), and wait for already
+ * accepted shared locks and shared-to-exclusive upgrades to go away.
+ */
+int
+lockmgr(lkp, flags, interlkp, p)
+ __volatile struct lock *lkp;
+ u_int flags;
+ struct simplelock *interlkp;
+ struct proc *p;
+{
+ int error;
+ pid_t pid;
+ int extflags;
+
+ error = 0;
+ if (p)
+ pid = p->p_pid;
+ else
+ pid = LK_KERNPROC;
+ simple_lock(&lkp->lk_interlock);
+ if (flags & LK_INTERLOCK)
+ simple_unlock(interlkp);
+ extflags = (flags | lkp->lk_flags) & LK_EXTFLG_MASK;
+#ifdef DIAGNOSTIC
+ /*
+ * Once a lock has drained, the LK_DRAINING flag is set and an
+ * exclusive lock is returned. The only valid operation thereafter
+ * is a single release of that exclusive lock. This final release
+ * clears the LK_DRAINING flag and sets the LK_DRAINED flag. Any
+ * further requests of any sort will result in a panic. The bits
+ * selected for these two flags are chosen so that they will be set
+ * in memory that is freed (freed memory is filled with 0xdeadbeef).
+ * The final release is permitted to give a new lease on life to
+ * the lock by specifying LK_REENABLE.
+ */
+ if (lkp->lk_flags & (LK_DRAINING|LK_DRAINED)) {
+ if (lkp->lk_flags & LK_DRAINED)
+ panic("lockmgr: using decommissioned lock");
+ if ((flags & LK_TYPE_MASK) != LK_RELEASE ||
+ lkp->lk_lockholder != pid)
+ panic("lockmgr: non-release on draining lock: %d\n",
+ flags & LK_TYPE_MASK);
+ lkp->lk_flags &= ~LK_DRAINING;
+ if ((flags & LK_REENABLE) == 0)
+ lkp->lk_flags |= LK_DRAINED;
+ }
+#endif DIAGNOSTIC
+
+ switch (flags & LK_TYPE_MASK) {
+
+ case LK_SHARED:
+ if (lkp->lk_lockholder != pid) {
+ /*
+ * If just polling, check to see if we will block.
+ */
+ if ((extflags & LK_NOWAIT) && (lkp->lk_flags &
+ (LK_HAVE_EXCL | LK_WANT_EXCL | LK_WANT_UPGRADE))) {
+ error = EBUSY;
+ break;
+ }
+ /*
+ * Wait for exclusive locks and upgrades to clear.
+ */
+ ACQUIRE(lkp, error, extflags, lkp->lk_flags &
+ (LK_HAVE_EXCL | LK_WANT_EXCL | LK_WANT_UPGRADE));
+ if (error)
+ break;
+ lkp->lk_sharecount++;
+ COUNT(p, 1);
+ break;
+ }
+ /*
+ * We hold an exclusive lock, so downgrade it to shared.
+ * An alternative would be to fail with EDEADLK.
+ */
+ lkp->lk_sharecount++;
+ COUNT(p, 1);
+ /* fall into downgrade */
+
+ case LK_DOWNGRADE:
+ if (lkp->lk_lockholder != pid || lkp->lk_exclusivecount == 0)
+ panic("lockmgr: not holding exclusive lock");
+ lkp->lk_sharecount += lkp->lk_exclusivecount;
+ lkp->lk_exclusivecount = 0;
+ lkp->lk_flags &= ~LK_HAVE_EXCL;
+ lkp->lk_lockholder = LK_NOPROC;
+ if (lkp->lk_waitcount)
+ wakeup((void *)lkp);
+ break;
+
+ case LK_EXCLUPGRADE:
+ /*
+ * If another process is ahead of us to get an upgrade,
+ * then we want to fail rather than have an intervening
+ * exclusive access.
+ */
+ if (lkp->lk_flags & LK_WANT_UPGRADE) {
+ lkp->lk_sharecount--;
+ COUNT(p, -1);
+ error = EBUSY;
+ break;
+ }
+ /* fall into normal upgrade */
+
+ case LK_UPGRADE:
+ /*
+ * Upgrade a shared lock to an exclusive one. If another
+ * shared lock has already requested an upgrade to an
+ * exclusive lock, our shared lock is released and an
+ * exclusive lock is requested (which will be granted
+ * after the upgrade). If we return an error, the file
+ * will always be unlocked.
+ */
+ if (lkp->lk_lockholder == pid || lkp->lk_sharecount <= 0)
+ panic("lockmgr: upgrade exclusive lock");
+ lkp->lk_sharecount--;
+ COUNT(p, -1);
+ /*
+ * If we are just polling, check to see if we will block.
+ */
+ if ((extflags & LK_NOWAIT) &&
+ ((lkp->lk_flags & LK_WANT_UPGRADE) ||
+ lkp->lk_sharecount > 1)) {
+ error = EBUSY;
+ break;
+ }
+ if ((lkp->lk_flags & LK_WANT_UPGRADE) == 0) {
+ /*
+ * We are first shared lock to request an upgrade, so
+ * request upgrade and wait for the shared count to
+ * drop to zero, then take exclusive lock.
+ */
+ lkp->lk_flags |= LK_WANT_UPGRADE;
+ ACQUIRE(lkp, error, extflags, lkp->lk_sharecount);
+ lkp->lk_flags &= ~LK_WANT_UPGRADE;
+ if (error)
+ break;
+ lkp->lk_flags |= LK_HAVE_EXCL;
+ lkp->lk_lockholder = pid;
+ if (lkp->lk_exclusivecount != 0)
+ panic("lockmgr: non-zero exclusive count");
+ lkp->lk_exclusivecount = 1;
+ COUNT(p, 1);
+ break;
+ }
+ /*
+ * Someone else has requested upgrade. Release our shared
+ * lock, awaken upgrade requestor if we are the last shared
+ * lock, then request an exclusive lock.
+ */
+ if (lkp->lk_sharecount == 0 && lkp->lk_waitcount)
+ wakeup((void *)lkp);
+ /* fall into exclusive request */
+
+ case LK_EXCLUSIVE:
+ if (lkp->lk_lockholder == pid && pid != LK_KERNPROC) {
+ /*
+ * Recursive lock.
+ */
+ if ((extflags & LK_CANRECURSE) == 0)
+ panic("lockmgr: locking against myself");
+ lkp->lk_exclusivecount++;
+ COUNT(p, 1);
+ break;
+ }
+ /*
+ * If we are just polling, check to see if we will sleep.
+ */
+ if ((extflags & LK_NOWAIT) && ((lkp->lk_flags &
+ (LK_HAVE_EXCL | LK_WANT_EXCL | LK_WANT_UPGRADE)) ||
+ lkp->lk_sharecount != 0)) {
+ error = EBUSY;
+ break;
+ }
+ /*
+ * Try to acquire the want_exclusive flag.
+ */
+ ACQUIRE(lkp, error, extflags, lkp->lk_flags &
+ (LK_HAVE_EXCL | LK_WANT_EXCL));
+ if (error)
+ break;
+ lkp->lk_flags |= LK_WANT_EXCL;
+ /*
+ * Wait for shared locks and upgrades to finish.
+ */
+ ACQUIRE(lkp, error, extflags, lkp->lk_sharecount != 0 ||
+ (lkp->lk_flags & LK_WANT_UPGRADE));
+ lkp->lk_flags &= ~LK_WANT_EXCL;
+ if (error)
+ break;
+ lkp->lk_flags |= LK_HAVE_EXCL;
+ lkp->lk_lockholder = pid;
+ if (lkp->lk_exclusivecount != 0)
+ panic("lockmgr: non-zero exclusive count");
+ lkp->lk_exclusivecount = 1;
+ COUNT(p, 1);
+ break;
+
+ case LK_RELEASE:
+ if (lkp->lk_exclusivecount != 0) {
+ if (pid != lkp->lk_lockholder)
+ panic("lockmgr: pid %d, not %s %d unlocking",
+ pid, "exclusive lock holder",
+ lkp->lk_lockholder);
+ lkp->lk_exclusivecount--;
+ COUNT(p, -1);
+ if (lkp->lk_exclusivecount == 0) {
+ lkp->lk_flags &= ~LK_HAVE_EXCL;
+ lkp->lk_lockholder = LK_NOPROC;
+ }
+ } else if (lkp->lk_sharecount != 0) {
+ lkp->lk_sharecount--;
+ COUNT(p, -1);
+ }
+ if (lkp->lk_waitcount)
+ wakeup((void *)lkp);
+ break;
+
+ case LK_DRAIN:
+ /*
+ * Check that we do not already hold the lock, as it can
+ * never drain if we do. Unfortunately, we have no way to
+ * check for holding a shared lock, but at least we can
+ * check for an exclusive one.
+ */
+ if (lkp->lk_lockholder == pid)
+ panic("lockmgr: draining against myself");
+ /*
+ * If we are just polling, check to see if we will sleep.
+ */
+ if ((extflags & LK_NOWAIT) && ((lkp->lk_flags &
+ (LK_HAVE_EXCL | LK_WANT_EXCL | LK_WANT_UPGRADE)) ||
+ lkp->lk_sharecount != 0 || lkp->lk_waitcount != 0)) {
+ error = EBUSY;
+ break;
+ }
+ PAUSE(lkp, ((lkp->lk_flags &
+ (LK_HAVE_EXCL | LK_WANT_EXCL | LK_WANT_UPGRADE)) ||
+ lkp->lk_sharecount != 0 || lkp->lk_waitcount != 0));
+ for (error = 0; ((lkp->lk_flags &
+ (LK_HAVE_EXCL | LK_WANT_EXCL | LK_WANT_UPGRADE)) ||
+ lkp->lk_sharecount != 0 || lkp->lk_waitcount != 0); ) {
+ lkp->lk_flags |= LK_WAITDRAIN;
+ simple_unlock(&lkp->lk_interlock);
+ if ((error = tsleep((void *)&lkp->lk_flags, lkp->lk_prio,
+ lkp->lk_wmesg, lkp->lk_timo)) != 0)
+ return (error);
+ if ((extflags) & LK_SLEEPFAIL)
+ return (ENOLCK);
+ simple_lock(&lkp->lk_interlock);
+ }
+ lkp->lk_flags |= LK_DRAINING | LK_HAVE_EXCL;
+ lkp->lk_lockholder = pid;
+ lkp->lk_exclusivecount = 1;
+ COUNT(p, 1);
+ break;
+
+ default:
+ simple_unlock(&lkp->lk_interlock);
+ panic("lockmgr: unknown locktype request %d",
+ flags & LK_TYPE_MASK);
+ /* NOTREACHED */
+ }
+ if ((lkp->lk_flags & LK_WAITDRAIN) && ((lkp->lk_flags &
+ (LK_HAVE_EXCL | LK_WANT_EXCL | LK_WANT_UPGRADE)) == 0 &&
+ lkp->lk_sharecount == 0 && lkp->lk_waitcount == 0)) {
+ lkp->lk_flags &= ~LK_WAITDRAIN;
+ wakeup((void *)&lkp->lk_flags);
+ }
+ simple_unlock(&lkp->lk_interlock);
+ return (error);
+}
+
+/*
+ * Print out information about state of a lock. Used by VOP_PRINT
+ * routines to display ststus about contained locks.
+ */
+void
+lockmgr_printinfo(lkp)
+ struct lock *lkp;
+{
+
+ if (lkp->lk_sharecount)
+ printf(" lock type %s: SHARED (count %d)", lkp->lk_wmesg,
+ lkp->lk_sharecount);
+ else if (lkp->lk_flags & LK_HAVE_EXCL)
+ printf(" lock type %s: EXCL (count %d) by pid %d",
+ lkp->lk_wmesg, lkp->lk_exclusivecount, lkp->lk_lockholder);
+ if (lkp->lk_waitcount > 0)
+ printf(" with %d pending", lkp->lk_waitcount);
+}
+
+#if defined(DEBUG) && NCPUS == 1
+#include <sys/kernel.h>
+#include <vm/vm.h>
+#include <sys/sysctl.h>
+int lockpausetime = 0;
+struct ctldebug debug2 = { "lockpausetime", &lockpausetime };
+int simplelockrecurse;
+/*
+ * Simple lock functions so that the debugger can see from whence
+ * they are being called.
+ */
+void
+simple_lock_init(alp)
+ struct simplelock *alp;
+{
+
+ alp->lock_data = 0;
+}
+
+void
+_simple_lock(alp, id, l)
+ __volatile struct simplelock *alp;
+ const char *id;
+ int l;
+{
+
+ if (simplelockrecurse)
+ return;
+ if (alp->lock_data == 1) {
+ if (lockpausetime == -1)
+ panic("%s:%d: simple_lock: lock held", id, l);
+ printf("%s:%d: simple_lock: lock held\n", id, l);
+ if (lockpausetime == 1) {
+ BACKTRACE(curproc);
+ } else if (lockpausetime > 1) {
+ printf("%s:%d: simple_lock: lock held...", id, l);
+ tsleep(&lockpausetime, PCATCH | PPAUSE, "slock",
+ lockpausetime * hz);
+ printf(" continuing\n");
+ }
+ }
+ alp->lock_data = 1;
+ if (curproc)
+ curproc->p_simple_locks++;
+}
+
+int
+_simple_lock_try(alp, id, l)
+ __volatile struct simplelock *alp;
+ const char *id;
+ int l;
+{
+
+ if (alp->lock_data)
+ return (0);
+ if (simplelockrecurse)
+ return (1);
+ alp->lock_data = 1;
+ if (curproc)
+ curproc->p_simple_locks++;
+ return (1);
+}
+
+void
+_simple_unlock(alp, id, l)
+ __volatile struct simplelock *alp;
+ const char *id;
+ int l;
+{
+
+ if (simplelockrecurse)
+ return;
+ if (alp->lock_data == 0) {
+ if (lockpausetime == -1)
+ panic("%s:%d: simple_unlock: lock not held", id, l);
+ printf("%s:%d: simple_unlock: lock not held\n", id, l);
+ if (lockpausetime == 1) {
+ BACKTRACE(curproc);
+ } else if (lockpausetime > 1) {
+ printf("%s:%d: simple_unlock: lock not held...", id, l);
+ tsleep(&lockpausetime, PCATCH | PPAUSE, "sunlock",
+ lockpausetime * hz);
+ printf(" continuing\n");
+ }
+ }
+ alp->lock_data = 0;
+ if (curproc)
+ curproc->p_simple_locks--;
+}
+#endif /* DEBUG && NCPUS == 1 */
diff --git a/sys/kern/kern_sig.c b/sys/kern/kern_sig.c
index 98bc10fa302..2e29983bea4 100644
--- a/sys/kern/kern_sig.c
+++ b/sys/kern/kern_sig.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: kern_sig.c,v 1.19 1997/09/15 05:46:13 millert Exp $ */
+/* $OpenBSD: kern_sig.c,v 1.20 1997/10/06 15:12:21 csapuntz Exp $ */
/* $NetBSD: kern_sig.c,v 1.54 1996/04/22 01:38:32 christos Exp $ */
/*
@@ -1213,7 +1213,7 @@ coredump(p)
UIO_SYSSPACE, IO_NODELOCKED|IO_UNIT, cred, NULL, p);
}
out:
- VOP_UNLOCK(vp);
+ VOP_UNLOCK(vp, 0, p);
error1 = vn_close(vp, FWRITE, cred, p);
crfree(cred);
if (error == 0)
diff --git a/sys/kern/kern_synch.c b/sys/kern/kern_synch.c
index 91b556e6067..93d2459035d 100644
--- a/sys/kern/kern_synch.c
+++ b/sys/kern/kern_synch.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: kern_synch.c,v 1.7 1997/07/28 09:13:17 deraadt Exp $ */
+/* $OpenBSD: kern_synch.c,v 1.8 1997/10/06 15:12:23 csapuntz Exp $ */
/* $NetBSD: kern_synch.c,v 1.37 1996/04/22 01:38:37 christos Exp $ */
/*-
@@ -174,7 +174,6 @@ schedcpu(arg)
register int s;
register unsigned int newcpu;
- wakeup((caddr_t)&lbolt);
for (p = allproc.lh_first; p != 0; p = p->p_list.le_next) {
/*
* Increment time in/out of memory and sleep time
@@ -223,6 +222,7 @@ schedcpu(arg)
splx(s);
}
vmmeter();
+ wakeup((caddr_t)&lbolt);
timeout(schedcpu, (void *)0, hz);
}
diff --git a/sys/kern/kern_sysctl.c b/sys/kern/kern_sysctl.c
index 2ab8e6d63c0..923a4bd9077 100644
--- a/sys/kern/kern_sysctl.c
+++ b/sys/kern/kern_sysctl.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: kern_sysctl.c,v 1.19 1997/09/03 13:51:08 kstailey Exp $ */
+/* $OpenBSD: kern_sysctl.c,v 1.20 1997/10/06 15:12:25 csapuntz Exp $ */
/* $NetBSD: kern_sysctl.c,v 1.17 1996/05/20 17:49:05 mrg Exp $ */
/*-
@@ -110,7 +110,7 @@ sys___sysctl(p, v, retval)
switch (name[0]) {
case CTL_KERN:
fn = kern_sysctl;
- if (name[2] != KERN_VNODE) /* XXX */
+ if (name[2] == KERN_VNODE) /* XXX */
dolock = 0;
break;
case CTL_HW:
@@ -125,6 +125,9 @@ sys___sysctl(p, v, retval)
case CTL_FS:
fn = fs_sysctl;
break;
+ case CTL_VFS:
+ fn = vfs_sysctl;
+ break;
case CTL_MACHDEP:
fn = cpu_sysctl;
break;
@@ -264,7 +267,7 @@ kern_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p)
return (sysctl_rdstruct(oldp, oldlenp, newp, &boottime,
sizeof(struct timeval)));
case KERN_VNODE:
- return (sysctl_vnode(oldp, oldlenp));
+ return (sysctl_vnode(oldp, oldlenp, p));
case KERN_PROC:
return (sysctl_doproc(name + 1, namelen - 1, oldp, oldlenp));
case KERN_FILE:
diff --git a/sys/kern/subr_xxx.c b/sys/kern/subr_xxx.c
index 0d44bc841b4..a19a8a11a1e 100644
--- a/sys/kern/subr_xxx.c
+++ b/sys/kern/subr_xxx.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: subr_xxx.c,v 1.3 1997/02/24 14:19:58 niklas Exp $ */
+/* $OpenBSD: subr_xxx.c,v 1.4 1997/10/06 15:12:26 csapuntz Exp $ */
/* $NetBSD: subr_xxx.c,v 1.10 1996/02/04 02:16:51 christos Exp $ */
/*
@@ -91,8 +91,10 @@ enosys ()
* Return error for operation not supported
* on a specific object or file type.
*/
+/*ARGSUSED*/
int
-eopnotsupp()
+eopnotsupp(v)
+ void *v;
{
return (EOPNOTSUPP);
diff --git a/sys/kern/sys_generic.c b/sys/kern/sys_generic.c
index ef811b417ba..ab02e3bb9d2 100644
--- a/sys/kern/sys_generic.c
+++ b/sys/kern/sys_generic.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: sys_generic.c,v 1.8 1997/08/31 20:42:21 deraadt Exp $ */
+/* $OpenBSD: sys_generic.c,v 1.9 1997/10/06 15:12:28 csapuntz Exp $ */
/* $NetBSD: sys_generic.c,v 1.24 1996/03/29 00:25:32 cgd Exp $ */
/*
@@ -178,18 +178,12 @@ sys_readv(p, v, retval)
goto done;
auio.uio_resid = 0;
for (i = 0; i < SCARG(uap, iovcnt); i++) {
-#if 0
- /* Cannot happen iov_len is unsigned */
- if (iov->iov_len < 0) {
+ if (auio.uio_resid + iov->iov_len < auio.uio_resid) {
error = EINVAL;
goto done;
}
-#endif
+
auio.uio_resid += iov->iov_len;
- if (auio.uio_resid < 0) {
- error = EINVAL;
- goto done;
- }
iov++;
}
#ifdef KTRACE
@@ -337,18 +331,12 @@ sys_writev(p, v, retval)
goto done;
auio.uio_resid = 0;
for (i = 0; i < SCARG(uap, iovcnt); i++) {
-#if 0
- /* Cannot happen iov_len is unsigned */
- if (iov->iov_len < 0) {
+ if (auio.uio_resid + iov->iov_len < auio.uio_resid) {
error = EINVAL;
goto done;
}
-#endif
+
auio.uio_resid += iov->iov_len;
- if (auio.uio_resid < 0) {
- error = EINVAL;
- goto done;
- }
iov++;
}
#ifdef KTRACE
diff --git a/sys/kern/sys_pipe.c b/sys/kern/sys_pipe.c
index 78f38f71f29..b4f48d76604 100644
--- a/sys/kern/sys_pipe.c
+++ b/sys/kern/sys_pipe.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: sys_pipe.c,v 1.5 1997/02/24 14:19:58 niklas Exp $ */
+/* $OpenBSD: sys_pipe.c,v 1.6 1997/10/06 15:12:29 csapuntz Exp $ */
/*
* Copyright (c) 1996 John S. Dyson
@@ -77,7 +77,7 @@
#include <vm/vm.h>
#include <vm/vm_prot.h>
#include <vm/vm_param.h>
-#include <vm/lock.h>
+#include <sys/lock.h>
#include <vm/vm_object.h>
#include <vm/vm_kern.h>
#include <vm/vm_extern.h>
diff --git a/sys/kern/tty.c b/sys/kern/tty.c
index 7548fd6befc..9a593842688 100644
--- a/sys/kern/tty.c
+++ b/sys/kern/tty.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: tty.c,v 1.27 1997/03/26 18:03:57 deraadt Exp $ */
+/* $OpenBSD: tty.c,v 1.28 1997/10/06 15:12:31 csapuntz Exp $ */
/* $NetBSD: tty.c,v 1.68.4.2 1996/06/06 16:04:52 thorpej Exp $ */
/*-
@@ -812,9 +812,9 @@ ttioctl(tp, cmd, data, flag, p)
error = namei(&nid);
if (error)
return (error);
- VOP_LOCK(nid.ni_vp);
+ vn_lock(nid.ni_vp, LK_EXCLUSIVE | LK_RETRY, p);
error = VOP_ACCESS(nid.ni_vp, VREAD, p->p_ucred, p);
- VOP_UNLOCK(nid.ni_vp);
+ VOP_UNLOCK(nid.ni_vp, 0, p);
vrele(nid.ni_vp);
if (error)
return (error);
diff --git a/sys/kern/tty_tty.c b/sys/kern/tty_tty.c
index 38c033636a4..0f70fcbda85 100644
--- a/sys/kern/tty_tty.c
+++ b/sys/kern/tty_tty.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: tty_tty.c,v 1.3 1996/04/21 22:27:32 deraadt Exp $ */
+/* $OpenBSD: tty_tty.c,v 1.4 1997/10/06 15:12:32 csapuntz Exp $ */
/* $NetBSD: tty_tty.c,v 1.13 1996/03/30 22:24:46 christos Exp $ */
/*-
@@ -63,7 +63,7 @@ cttyopen(dev, flag, mode, p)
if (ttyvp == NULL)
return (ENXIO);
- VOP_LOCK(ttyvp);
+ vn_lock(ttyvp, LK_EXCLUSIVE | LK_RETRY, p);
#ifdef PARANOID
/*
* Since group is tty and mode is 620 on most terminal lines
@@ -78,7 +78,7 @@ cttyopen(dev, flag, mode, p)
if (!error)
#endif /* PARANOID */
error = VOP_OPEN(ttyvp, flag, NOCRED, p);
- VOP_UNLOCK(ttyvp);
+ VOP_UNLOCK(ttyvp, 0, p);
return (error);
}
@@ -89,14 +89,15 @@ cttyread(dev, uio, flag)
struct uio *uio;
int flag;
{
+ struct proc *p = uio->uio_procp;
register struct vnode *ttyvp = cttyvp(uio->uio_procp);
int error;
if (ttyvp == NULL)
return (EIO);
- VOP_LOCK(ttyvp);
+ vn_lock(ttyvp, LK_EXCLUSIVE | LK_RETRY, p);
error = VOP_READ(ttyvp, uio, flag, NOCRED);
- VOP_UNLOCK(ttyvp);
+ VOP_UNLOCK(ttyvp, 0, p);
return (error);
}
@@ -107,14 +108,15 @@ cttywrite(dev, uio, flag)
struct uio *uio;
int flag;
{
+ struct proc *p = uio->uio_procp;
register struct vnode *ttyvp = cttyvp(uio->uio_procp);
int error;
if (ttyvp == NULL)
return (EIO);
- VOP_LOCK(ttyvp);
+ vn_lock(ttyvp, LK_EXCLUSIVE | LK_RETRY, p);
error = VOP_WRITE(ttyvp, uio, flag, NOCRED);
- VOP_UNLOCK(ttyvp);
+ VOP_UNLOCK(ttyvp, 0, p);
return (error);
}
diff --git a/sys/kern/uipc_usrreq.c b/sys/kern/uipc_usrreq.c
index f1843da7ccc..58e0fcc5bad 100644
--- a/sys/kern/uipc_usrreq.c
+++ b/sys/kern/uipc_usrreq.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: uipc_usrreq.c,v 1.4 1997/06/05 08:13:12 deraadt Exp $ */
+/* $OpenBSD: uipc_usrreq.c,v 1.5 1997/10/06 15:12:33 csapuntz Exp $ */
/* $NetBSD: uipc_usrreq.c,v 1.18 1996/02/09 19:00:50 christos Exp $ */
/*
@@ -427,7 +427,7 @@ unp_bind(unp, nam, p)
vp->v_socket = unp->unp_socket;
unp->unp_vnode = vp;
unp->unp_addr = m_copy(nam, 0, (int)M_COPYALL);
- VOP_UNLOCK(vp);
+ VOP_UNLOCK(vp, 0, p);
return (0);
}
diff --git a/sys/kern/vfs_bio.c b/sys/kern/vfs_bio.c
index d1d4592820c..c8e596db9a9 100644
--- a/sys/kern/vfs_bio.c
+++ b/sys/kern/vfs_bio.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: vfs_bio.c,v 1.15 1997/06/14 06:10:36 tholo Exp $ */
+/* $OpenBSD: vfs_bio.c,v 1.16 1997/10/06 15:12:35 csapuntz Exp $ */
/* $NetBSD: vfs_bio.c,v 1.44 1996/06/11 11:15:36 pk Exp $ */
/*-
@@ -63,6 +63,8 @@
#include <vm/vm.h>
+#include <miscfs/specfs/specdev.h>
+
/* Macros to clear/set/test flags. */
#define SET(t, f) (t) |= (f)
#define CLR(t, f) (t) &= ~(f)
@@ -94,6 +96,7 @@ u_long bufhash;
TAILQ_HEAD(bqueues, buf) bufqueues[BQUEUES];
int needbuffer;
+struct bio_ops bioops;
/*
* Insq/Remq for the buffer free lists.
@@ -139,7 +142,6 @@ bufinit()
register int i;
int base, residual;
- TAILQ_INIT(&bdirties);
for (dp = bufqueues; dp < &bufqueues[BQUEUES]; dp++)
TAILQ_INIT(dp);
bufhashtbl = hashinit(nbuf, M_CACHE, &bufhash);
@@ -153,6 +155,7 @@ bufinit()
bp->b_wcred = NOCRED;
bp->b_vnbufs.le_next = NOLIST;
bp->b_data = buffers + i * MAXBSIZE;
+ LIST_INIT(&bp->b_dep);
if (i < residual)
bp->b_bufsize = (base + 1) * CLBYTES;
else
@@ -278,7 +281,7 @@ int
bwrite(bp)
struct buf *bp;
{
- int rv, sync, wasdelayed, s;
+ int rv, async, wasdelayed, s;
/*
* Remember buffer type, to switch on it later. If the write was
@@ -287,34 +290,28 @@ bwrite(bp)
* XXX note that this relies on delayed tape writes being converted
* to async, not sync writes (which is safe, but ugly).
*/
- sync = !ISSET(bp->b_flags, B_ASYNC);
- if (sync && bp->b_vp && bp->b_vp->v_mount &&
+ async = ISSET(bp->b_flags, B_ASYNC);
+ if (!async && bp->b_vp && bp->b_vp->v_mount &&
ISSET(bp->b_vp->v_mount->mnt_flag, MNT_ASYNC)) {
bdwrite(bp);
return (0);
}
wasdelayed = ISSET(bp->b_flags, B_DELWRI);
CLR(bp->b_flags, (B_READ | B_DONE | B_ERROR | B_DELWRI));
+
+ s = splbio();
+
/*
- * If this was a delayed write, remove it from the
- * list of dirty blocks now
+ * If not synchronous, pay for the I/O operation and make
+ * sure the buf is on the correct vnode queue. We have
+ * to do this now, because if we don't, the vnode may not
+ * be properly notified that its I/O has completed.
*/
if (wasdelayed)
- TAILQ_REMOVE(&bdirties, bp, b_synclist);
-
- s = splbio();
- if (!sync) {
- /*
- * If not synchronous, pay for the I/O operation and make
- * sure the buf is on the correct vnode queue. We have
- * to do this now, because if we don't, the vnode may not
- * be properly notified that its I/O has completed.
- */
- if (wasdelayed)
- reassignbuf(bp, bp->b_vp);
- else
- curproc->p_stats->p_ru.ru_oublock++;
- }
+ reassignbuf(bp, bp->b_vp);
+ else
+ curproc->p_stats->p_ru.ru_oublock++;
+
/* Initiate disk write. Make sure the appropriate party is charged. */
bp->b_vp->v_numoutput++;
@@ -322,31 +319,18 @@ bwrite(bp)
SET(bp->b_flags, B_WRITEINPROG);
VOP_STRATEGY(bp);
- if (sync) {
- /*
- * If I/O was synchronous, wait for it to complete.
- */
- rv = biowait(bp);
+ if (async)
+ return (0);
- /*
- * Pay for the I/O operation, if it's not been paid for, and
- * make sure it's on the correct vnode queue. (async operatings
- * were payed for above.)
- */
- s = splbio();
- if (wasdelayed)
- reassignbuf(bp, bp->b_vp);
- else
- curproc->p_stats->p_ru.ru_oublock++;
- splx(s);
+ /*
+ * If I/O was synchronous, wait for it to complete.
+ */
+ rv = biowait(bp);
- /* Release the buffer. */
- brelse(bp);
+ /* Release the buffer. */
+ brelse(bp);
- return (rv);
- } else {
- return (0);
- }
+ return (rv);
}
int
@@ -382,23 +366,10 @@ bdwrite(bp)
* (3) Make sure it's on its vnode's correct block list,
* (4) If a buffer is rewritten, move it to end of dirty list
*/
- bp->b_synctime = time.tv_sec + 30;
if (!ISSET(bp->b_flags, B_DELWRI)) {
- /*
- * Add the buffer to the list of dirty blocks.
- * If it is the first entry on the list, schedule
- * a timeout to flush it to disk
- */
- TAILQ_INSERT_TAIL(&bdirties, bp, b_synclist);
- if (bdirties.tqh_first == bp) {
- untimeout((void (*)__P((void *)))wakeup,
- &bdirties); /* XXX */
- timeout((void (*)__P((void *)))wakeup,
- &bdirties, 30 * hz);
- }
SET(bp->b_flags, B_DELWRI);
- curproc->p_stats->p_ru.ru_oublock++; /* XXX */
reassignbuf(bp, bp->b_vp);
+ curproc->p_stats->p_ru.ru_oublock++; /* XXX */
}
/* If this is a tape block, write the block now. */
@@ -426,142 +397,17 @@ bawrite(bp)
VOP_BWRITE(bp);
}
-/*
- * Write out dirty buffers if they have been on the dirty
- * list for more than 30 seconds; scan for such buffers
- * once a second.
- */
void
-vn_update()
-{
- struct mount *mp, *nmp;
- struct timespec ts;
- struct vnode *vp;
+bdirty(bp)
struct buf *bp;
- int async, s;
+{
+ struct proc *p = curproc; /* XXX */
- /*
- * In case any buffers got scheduled for write before the
- * process got started (should never happen)
- */
- untimeout((void (*)__P((void *)))wakeup,
- &bdirties);
- for (;;) {
- s = splbio();
- /*
- * Schedule a wakeup when the next buffer is to
- * be flushed to disk. If no buffers are enqueued,
- * a wakeup will be scheduled at the time a new
- * buffer is enqueued
- */
- if ((bp = bdirties.tqh_first) != NULL) {
- untimeout((void (*)__P((void *)))wakeup,
- &bdirties); /* XXX */
- timeout((void (*)__P((void *)))wakeup,
- &bdirties, (bp->b_synctime - time.tv_sec) * hz);
- }
- tsleep(&bdirties, PZERO - 1, "dirty", 0);
- /*
- * Walk the dirty block list, starting an asyncroneous
- * write of any block that has timed out
- */
- while ((bp = bdirties.tqh_first) != NULL &&
- bp->b_synctime <= time.tv_sec) {
- /*
- * If the block is currently busy (perhaps being
- * written), move it to the end of the dirty list
- * and go to the next block
- */
- if (ISSET(bp->b_flags, B_BUSY)) {
- TAILQ_REMOVE(&bdirties, bp, b_synclist);
- TAILQ_INSERT_TAIL(&bdirties, bp, b_synclist);
- bp->b_synctime = time.tv_sec + 30;
- continue;
- }
- /*
- * Remove the block from the per-vnode dirty
- * list and mark it as busy
- */
- bremfree(bp);
- SET(bp->b_flags, B_BUSY);
- splx(s);
- /*
- * Start an asyncroneous write of the buffer.
- * Note that this will also remove the buffer
- * from the dirty list
- */
- bawrite(bp);
- s = splbio();
- }
- splx(s);
- /*
- * We also need to flush out modified vnodes
- */
- for (mp = mountlist.cqh_last;
- mp != (void *)&mountlist;
- mp = nmp) {
- /*
- * Get the next pointer in case we hang of vfs_busy()
- * while being unmounted
- */
- nmp = mp->mnt_list.cqe_prev;
- /*
- * The lock check below is to avoid races with mount
- * and unmount
- */
- if ((mp->mnt_flag & (MNT_MLOCK | MNT_RDONLY | MNT_MPBUSY)) == 0 &&
- !vfs_busy(mp)) {
- /*
- * Turn off the file system async flag until
- * we are done writing out vnodes
- */
- async = mp->mnt_flag & MNT_ASYNC;
- mp->mnt_flag &= ~MNT_ASYNC;
- /*
- * Walk the vnode list for the file system,
- * writing each modified vnode out
- */
-loop:
- for (vp = mp->mnt_vnodelist.lh_first;
- vp != NULL;
- vp = vp->v_mntvnodes.le_next) {
- /*
- * If the vnode is no longer associated
- * with the file system in question, skip
- * it
- */
- if (vp->v_mount != mp)
- goto loop;
- /*
- * If the vnode is currently locked,
- * ignore it
- */
- if (VOP_ISLOCKED(vp))
- continue;
- /*
- * Lock the vnode, start a write and
- * release the vnode
- */
- if (vget(vp, 1))
- goto loop;
- TIMEVAL_TO_TIMESPEC(&time, &ts);
- VOP_UPDATE(vp, &ts, &ts, 0);
- vput(vp);
- }
- /*
- * Restore the file system async flag if it
- * were previously set for this file system
- */
- mp->mnt_flag |= async;
- /*
- * Get the next pointer again as the next
- * file system might have been unmounted
- * while we were flushing vnodes
- */
- nmp = mp->mnt_list.cqe_prev;
- vfs_unbusy(mp);
- }
- }
+ if (ISSET(bp->b_flags, B_DELWRI) == 0) {
+ SET(bp->b_flags, B_DELWRI);
+ reassignbuf(bp, bp->b_vp);
+ if (p)
+ p->p_stats->p_ru.ru_oublock++;
}
}
@@ -576,18 +422,6 @@ brelse(bp)
struct bqueues *bufq;
int s;
- /* Wake up any processes waiting for any buffer to become free. */
- if (needbuffer) {
- needbuffer = 0;
- wakeup(&needbuffer);
- }
-
- /* Wake up any proceeses waiting for _this_ buffer to become free. */
- if (ISSET(bp->b_flags, B_WANTED)) {
- CLR(bp->b_flags, B_WANTED);
- wakeup(bp);
- }
-
/* Block disk interrupts. */
s = splbio();
@@ -622,11 +456,14 @@ brelse(bp)
* If it's invalid or empty, dissociate it from its vnode
* and put on the head of the appropriate queue.
*/
- if (bp->b_vp)
- brelvp(bp);
- if (ISSET(bp->b_flags, B_DELWRI))
- TAILQ_REMOVE(&bdirties, bp, b_synclist);
+ if (LIST_FIRST(&bp->b_dep) != NULL && bioops.io_deallocate) {
+ (*bioops.io_deallocate)(bp);
+ }
CLR(bp->b_flags, B_DELWRI);
+ if (bp->b_vp) {
+ reassignbuf(bp, bp->b_vp);
+ brelvp(bp);
+ }
if (bp->b_bufsize <= 0)
/* no data */
bufq = &bufqueues[BQ_EMPTY];
@@ -657,6 +494,18 @@ already_queued:
/* Allow disk interrupts. */
splx(s);
+
+ /* Wake up any processes waiting for any buffer to become free. */
+ if (needbuffer) {
+ needbuffer = 0;
+ wakeup(&needbuffer);
+ }
+
+ /* Wake up any proceeses waiting for _this_ buffer to become free. */
+ if (ISSET(bp->b_flags, B_WANTED)) {
+ CLR(bp->b_flags, B_WANTED);
+ wakeup(bp);
+ }
}
/*
@@ -806,7 +655,7 @@ allocbuf(bp, size)
/* find a buffer */
while ((nbp = getnewbuf(0, 0)) == NULL)
;
- SET(nbp->b_flags, B_INVAL);
+ SET(nbp->b_flags, B_INVAL);
binshash(nbp, &invalhash);
/* and steal its pages, up to the amount we need */
@@ -875,16 +724,16 @@ getnewbuf(slpflag, slptimeo)
start:
s = splbio();
- if ((bp = bufqueues[BQ_AGE].tqh_first) != NULL ||
- (bp = bufqueues[BQ_LRU].tqh_first) != NULL) {
- bremfree(bp);
- } else {
+ if ((bp = bufqueues[BQ_AGE].tqh_first) == NULL &&
+ (bp = bufqueues[BQ_LRU].tqh_first) == NULL) {
/* wait for a free buffer of any kind */
needbuffer = 1;
tsleep(&needbuffer, slpflag|(PRIBIO+1), "getnewbuf", slptimeo);
splx(s);
return (0);
- }
+ }
+
+ bremfree(bp);
if (ISSET(bp->b_flags, B_VFLUSH)) {
/*
@@ -916,8 +765,12 @@ start:
/* disassociate us from our vnode, if we had one... */
if (bp->b_vp)
brelvp(bp);
+
splx(s);
+ if (LIST_FIRST(&bp->b_dep) != NULL && bioops.io_deallocate)
+ (*bioops.io_deallocate)(bp);
+
/* clear out various other fields */
bp->b_flags = B_BUSY;
bp->b_dev = NODEV;
@@ -962,7 +815,9 @@ biowait(bp)
if (ISSET(bp->b_flags, B_EINTR)) {
CLR(bp->b_flags, B_EINTR);
return (EINTR);
- } else if (ISSET(bp->b_flags, B_ERROR))
+ }
+
+ if (ISSET(bp->b_flags, B_ERROR))
return (bp->b_error ? bp->b_error : EIO);
else
return (0);
@@ -992,13 +847,18 @@ biodone(bp)
panic("biodone already");
SET(bp->b_flags, B_DONE); /* note that it's done */
- if (!ISSET(bp->b_flags, B_READ)) /* wake up reader */
- vwakeup(bp);
+ if (LIST_FIRST(&bp->b_dep) != NULL && bioops.io_complete)
+ (*bioops.io_complete)(bp);
if (ISSET(bp->b_flags, B_CALL)) { /* if necessary, call out */
CLR(bp->b_flags, B_CALL); /* but note callout done */
(*bp->b_iodone)(bp);
- } else if (ISSET(bp->b_flags, B_ASYNC)) /* if async, release it */
+ }
+
+ if (!ISSET(bp->b_flags, B_READ)) /* wake up reader */
+ vwakeup(bp);
+
+ if (ISSET(bp->b_flags, B_ASYNC)) /* if async, release it */
brelse(bp);
else { /* or just wakeup the buffer */
CLR(bp->b_flags, B_WANTED);
diff --git a/sys/kern/vfs_cluster.c b/sys/kern/vfs_cluster.c
index d3fc332b376..87b024600bc 100644
--- a/sys/kern/vfs_cluster.c
+++ b/sys/kern/vfs_cluster.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: vfs_cluster.c,v 1.8 1997/09/27 06:56:18 niklas Exp $ */
+/* $OpenBSD: vfs_cluster.c,v 1.9 1997/10/06 15:12:36 csapuntz Exp $ */
/* $NetBSD: vfs_cluster.c,v 1.12 1996/04/22 01:39:05 christos Exp $ */
/*-
@@ -48,15 +48,6 @@
#include <vm/vm.h>
-#ifdef DEBUG
-#include <sys/sysctl.h>
-int doreallocblks = 0;
-struct ctldebug debug13 = { "doreallocblks", &doreallocblks };
-#else
-/* XXX for cluster_write */
-#define doreallocblks 0
-#endif
-
/*
* Local declarations
*/
@@ -518,8 +509,7 @@ cluster_write(bp, filesize)
* Otherwise try reallocating to make it sequential.
*/
cursize = vp->v_lastw - vp->v_cstart + 1;
- if (!doreallocblks ||
- (lbn + 1) * bp->b_bcount != filesize ||
+ if ((lbn + 1) * bp->b_bcount != filesize ||
lbn != vp->v_lastw + 1 || vp->v_clen <= cursize) {
cluster_wbuild(vp, NULL, bp->b_bcount,
vp->v_cstart, cursize, lbn);
@@ -708,13 +698,14 @@ redo:
panic("Clustered write to wrong blocks");
}
+ if (LIST_FIRST(&tbp->b_dep) != NULL && bioops.io_start)
+ (*bioops.io_start)(tbp);
+
pagemove(tbp->b_data, cp, size);
bp->b_bcount += size;
bp->b_bufsize += size;
tbp->b_bufsize -= size;
- if (tbp->b_flags & B_DELWRI)
- TAILQ_REMOVE(&bdirties, tbp, b_synclist);
tbp->b_flags &= ~(B_READ | B_DONE | B_ERROR | B_DELWRI);
/*
* We might as well AGE the buffer here; it's either empty, or
diff --git a/sys/kern/vfs_conf.c b/sys/kern/vfs_conf.c
index 800aff67811..527450f20e5 100644
--- a/sys/kern/vfs_conf.c
+++ b/sys/kern/vfs_conf.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: vfs_conf.c,v 1.5 1997/02/24 14:20:01 niklas Exp $ */
+/* $OpenBSD: vfs_conf.c,v 1.6 1997/10/06 15:12:37 csapuntz Exp $ */
/* $NetBSD: vfs_conf.c,v 1.21.4.1 1995/11/01 00:06:26 jtc Exp $ */
/*
@@ -40,6 +40,28 @@
#include <sys/mount.h>
#include <sys/vnode.h>
+#ifdef FFS
+#include <ufs/ufs/quota.h>
+#include <ufs/ufs/inode.h>
+#include <ufs/ffs/ffs_extern.h>
+#endif
+
+#ifdef CD9660
+#include <isofs/cd9660/iso.h>
+#endif
+
+#ifdef MFS
+#include <ufs/mfs/mfs_extern.h>
+#endif
+
+#ifdef NFSCLIENT
+#include <nfs/rpcv2.h>
+#include <nfs/nfsproto.h>
+#include <nfs/nfsnode.h>
+#include <nfs/nfs.h>
+#include <nfs/nfsmount.h>
+#endif
+
/*
* These define the root filesystem and device.
*/
@@ -50,6 +72,8 @@ struct vnode *rootvnode;
* Set up the filesystem operations for vnodes.
* The types are defined in mount.h.
*/
+
+
#ifdef FFS
extern struct vfsops ffs_vfsops;
#endif
@@ -115,107 +139,102 @@ extern struct vfsops ext2fs_vfsops;
#endif
/*
- * XXX ORDERING MATTERS, for COMPAT_09. when that goes away,
- * empty slots can go away.
+ * Set up the filesystem operations for vnodes.
*/
-struct vfsops *vfssw[] = {
- NULL, /* 0 = MOUNT_NONE */
+static struct vfsconf vfsconflist[] = {
+
+ /* Fast Filesystem */
#ifdef FFS
- &ffs_vfsops, /* 1 = MOUNT_FFS */
-#else
- NULL,
+ { &ffs_vfsops, "ffs", 1, 0, MNT_LOCAL, ffs_mountroot, NULL },
#endif
-#ifdef NFSCLIENT
- &nfs_vfsops, /* 2 = MOUNT_NFS */
-#else
- NULL,
+
+ /* Log-based Filesystem */
+#ifdef LFS
+ { &lfs_vfsops, "lfs", 5, 0, MNT_LOCAL, lfs_mountroot, NULL },
#endif
+
+ /* Memory-based Filesystem */
#ifdef MFS
- &mfs_vfsops, /* 3 = MOUNT_MFS */
-#else
- NULL,
-#endif
-#ifdef MSDOSFS
- &msdosfs_vfsops, /* 4 = MOUNT_MSDOS */
-#else
- NULL,
-#endif
-#ifdef LFS
- &lfs_vfsops, /* 5 = MOUNT_LFS */
-#else
- NULL,
+ { &mfs_vfsops, "mfs", 3, 0, MNT_LOCAL, mfs_mountroot, NULL },
#endif
- NULL, /* 6 = MOUNT_LOFS */
-#ifdef FDESC
- &fdesc_vfsops, /* 7 = MOUNT_FDESC */
-#else
- NULL,
+
+ /* ISO9660 (aka CDROM) Filesystem */
+#ifdef CD9660
+ { &cd9660_vfsops, "cd9660", 14, 0, MNT_LOCAL, cd9660_mountroot, NULL },
#endif
-#ifdef PORTAL
- &portal_vfsops, /* 8 = MOUNT_PORTAL */
-#else
- NULL,
+
+ /* MSDOS Filesystem */
+#ifdef MSDOSFS
+ { &msdosfs_vfsops, "msdos", 4, 0, MNT_LOCAL, NULL, NULL },
#endif
-#ifdef NULLFS
- &null_vfsops, /* 9 = MOUNT_NULL */
-#else
- NULL,
+
+ /* AmigaDOS Filesystem */
+#ifdef ADOSFS
+ { &adosfs_vfsops, "adosfs", 16, 0, MNT_LOCAL, NULL, NULL },
#endif
-#ifdef UMAPFS
- &umap_vfsops, /* 10 = MOUNT_UMAP */
-#else
- NULL,
+
+ /* Sun-compatible Network Filesystem */
+#ifdef NFSCLIENT
+ { &nfs_vfsops, "nfs", 2, 0, 0, nfs_mountroot, NULL },
#endif
-#ifdef KERNFS
- &kernfs_vfsops, /* 11 = MOUNT_KERNFS */
-#else
- NULL,
+
+ /* Andrew Filesystem */
+#ifdef AFS
+ { &afs_vfsops, "andrewfs", 13, 0, 0, afs_mountroot, NULL },
#endif
+
+ /* /proc Filesystem */
#ifdef PROCFS
- &procfs_vfsops, /* 12 = MOUNT_PROCFS */
-#else
- NULL,
+ { &procfs_vfsops, "procfs", 12, 0, 0, NULL, NULL },
#endif
-#ifdef AFS
- &afs_vfsops, /* 13 = MOUNT_AFS */
-#else
- NULL,
-#endif
-#ifdef CD9660
- &cd9660_vfsops, /* 14 = MOUNT_ISOFS */
-#else
- NULL,
+
+ /* Loopback (Minimal) Filesystem Layer */
+#ifdef NULLFS
+ { &null_vfsops, "loopback", 9, 0, 0, NULL, NULL },
#endif
+
+ /* Union (translucent) Filesystem */
#ifdef UNION
- &union_vfsops, /* 15 = MOUNT_UNION */
-#else
- NULL,
+ { &union_vfsops, "union", 15, 0, 0, NULL, NULL },
#endif
-#ifdef ADOSFS
- &adosfs_vfsops, /* 16 = MOUNT_ADOSFS */
-#else
- NULL,
+
+ /* User/Group Identifer Remapping Filesystem */
+#ifdef UMAPFS
+ { &umap_vfsops, "umap", 10, 0, 0, NULL, NULL },
#endif
-#ifdef EXT2FS
- &ext2fs_vfsops, /* 17 = MOUNT_EXT2FS */
-#else
- NULL,
+
+ /* Portal Filesystem */
+#ifdef PORTAL
+ { &portal_vfsops, "portal", 8, 0, 0, NULL, NULL },
#endif
-#ifdef LKM /* for LKM's. add new FS's before these */
- NULL,
- NULL,
- NULL,
- NULL,
+
+ /* File Descriptor Filesystem */
+#ifdef FDESC
+ { &fdesc_vfsops, "fdesc", 7, 0, 0, NULL, NULL },
#endif
- 0
+
+ /* Kernel Information Filesystem */
+#ifdef KERNFS
+ { &kernfs_vfsops, "kernfs", 11, 0, 0, NULL, NULL },
+#endif
+
};
-int nvfssw = sizeof(vfssw) / sizeof(vfssw[0]);
+
+
+/*
+ * Initially the size of the list, vfs_init will set maxvfsconf
+ * to the highest defined type number.
+ */
+int maxvfsconf = sizeof(vfsconflist) / sizeof (struct vfsconf);
+struct vfsconf *vfsconf = vfsconflist;
+
/*
* vfs_opv_descs enumerates the list of vnode classes, each with it's own
* vnode operation vector. It is consulted at system boot to build operation
* vectors. It is NULL terminated.
*/
+extern struct vnodeopv_desc sync_vnodeop_opv_desc;
extern struct vnodeopv_desc ffs_vnodeop_opv_desc;
extern struct vnodeopv_desc ffs_specop_opv_desc;
extern struct vnodeopv_desc ffs_fifoop_opv_desc;
@@ -246,6 +265,7 @@ extern struct vnodeopv_desc ext2fs_specop_opv_desc;
extern struct vnodeopv_desc ext2fs_fifoop_opv_desc;
struct vnodeopv_desc *vfs_opv_descs[] = {
+ &sync_vnodeop_opv_desc,
#ifdef FFS
&ffs_vnodeop_opv_desc,
&ffs_specop_opv_desc,
diff --git a/sys/kern/vfs_init.c b/sys/kern/vfs_init.c
index 2071a8f633f..a10e5a3ff41 100644
--- a/sys/kern/vfs_init.c
+++ b/sys/kern/vfs_init.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: vfs_init.c,v 1.4 1997/02/24 14:20:02 niklas Exp $ */
+/* $OpenBSD: vfs_init.c,v 1.5 1997/10/06 15:12:39 csapuntz Exp $ */
/* $NetBSD: vfs_init.c,v 1.6 1996/02/09 19:00:58 christos Exp $ */
/*
@@ -243,7 +243,8 @@ struct vattr va_null;
void
vfsinit()
{
- struct vfsops **vfsp;
+ struct vfsconf *vfsp;
+ int i, maxtypenum;
/*
* Initialize the vnode table
@@ -262,9 +263,15 @@ vfsinit()
* Initialize each file system type.
*/
vattr_null(&va_null);
- for (vfsp = &vfssw[0]; vfsp < &vfssw[nvfssw]; vfsp++) {
- if (*vfsp == NULL)
- continue;
- (*(*vfsp)->vfs_init)();
- }
+ maxtypenum = 0;
+
+ for (vfsp = vfsconf, i = 1; i <= maxvfsconf; i++, vfsp++) {
+ if (i < maxvfsconf)
+ vfsp->vfc_next = vfsp + 1;
+ if (maxtypenum <= vfsp->vfc_typenum)
+ maxtypenum = vfsp->vfc_typenum + 1;
+ (*vfsp->vfc_vfsops->vfs_init)(vfsp);
+ }
+ /* next vfc_typenum to be used */
+ maxvfsconf = maxtypenum;
}
diff --git a/sys/kern/vfs_lookup.c b/sys/kern/vfs_lookup.c
index 6d3e4f8567c..aa86b97412e 100644
--- a/sys/kern/vfs_lookup.c
+++ b/sys/kern/vfs_lookup.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: vfs_lookup.c,v 1.8 1997/06/18 17:37:38 tholo Exp $ */
+/* $OpenBSD: vfs_lookup.c,v 1.9 1997/10/06 15:12:40 csapuntz Exp $ */
/* $NetBSD: vfs_lookup.c,v 1.17 1996/02/09 19:00:59 christos Exp $ */
/*
@@ -88,6 +88,7 @@ namei(ndp)
struct uio auio;
int error, linklen;
struct componentname *cnp = &ndp->ni_cnd;
+ struct proc *p = cnp->cn_proc;
ndp->ni_cnd.cn_cred = ndp->ni_cnd.cn_proc->p_ucred;
#ifdef DIAGNOSTIC
@@ -164,7 +165,7 @@ namei(ndp)
return (0);
}
if ((cnp->cn_flags & LOCKPARENT) && ndp->ni_pathlen == 1)
- VOP_UNLOCK(ndp->ni_dvp);
+ VOP_UNLOCK(ndp->ni_dvp, 0, p);
if (ndp->ni_loopcnt++ >= MAXSYMLINKS) {
error = ELOOP;
break;
@@ -271,7 +272,7 @@ lookup(ndp)
int error = 0;
int slashes;
struct componentname *cnp = &ndp->ni_cnd;
-
+ struct proc *p = cnp->cn_proc;
/*
* Setup: break out flag bits into variables.
*/
@@ -285,7 +286,7 @@ lookup(ndp)
cnp->cn_flags &= ~ISSYMLINK;
dp = ndp->ni_startdir;
ndp->ni_startdir = NULLVP;
- VOP_LOCK(dp);
+ vn_lock(dp, LK_EXCLUSIVE | LK_RETRY, p);
/*
* If we have a leading string of slashes, remove them, and just make
@@ -410,7 +411,7 @@ dirloop:
dp = dp->v_mount->mnt_vnodecovered;
vput(tdp);
VREF(dp);
- VOP_LOCK(dp);
+ vn_lock(dp, LK_EXCLUSIVE | LK_RETRY, p);
}
}
@@ -419,6 +420,7 @@ dirloop:
*/
unionlookup:
ndp->ni_dvp = dp;
+ ndp->ni_vp = NULL;
if ((error = VOP_LOOKUP(dp, &ndp->ni_vp, cnp)) != 0) {
#ifdef DIAGNOSTIC
if (ndp->ni_vp != NULL)
@@ -434,7 +436,7 @@ unionlookup:
dp = dp->v_mount->mnt_vnodecovered;
vput(tdp);
VREF(dp);
- VOP_LOCK(dp);
+ vn_lock(dp, LK_EXCLUSIVE | LK_RETRY, p);
goto unionlookup;
}
@@ -491,12 +493,11 @@ unionlookup:
*/
while (dp->v_type == VDIR && (mp = dp->v_mountedhere) &&
(cnp->cn_flags & NOCROSSMOUNT) == 0) {
- if (mp->mnt_flag & MNT_MLOCK) {
- mp->mnt_flag |= MNT_MWAIT;
- sleep((caddr_t)mp, PVFS);
+ if (vfs_busy(mp, 0, 0, p))
continue;
- }
- if ((error = VFS_ROOT(dp->v_mountedhere, &tdp)) != 0)
+ error = VFS_ROOT(mp, &tdp);
+ vfs_unbusy(mp, p);
+ if (error)
goto bad2;
vput(dp);
ndp->ni_vp = dp = tdp;
@@ -558,12 +559,12 @@ terminal:
vrele(ndp->ni_dvp);
}
if ((cnp->cn_flags & LOCKLEAF) == 0)
- VOP_UNLOCK(dp);
+ VOP_UNLOCK(dp, 0, p);
return (0);
bad2:
if ((cnp->cn_flags & LOCKPARENT) && (cnp->cn_flags & ISLASTCN))
- VOP_UNLOCK(ndp->ni_dvp);
+ VOP_UNLOCK(ndp->ni_dvp, 0, p);
vrele(ndp->ni_dvp);
bad:
vput(dp);
@@ -579,6 +580,7 @@ relookup(dvp, vpp, cnp)
struct vnode *dvp, **vpp;
struct componentname *cnp;
{
+ struct proc *p = cnp->cn_proc;
register struct vnode *dp = 0; /* the directory we are searching */
int docache; /* == 0 do not cache last component */
int wantparent; /* 1 => wantparent or lockparent flag */
@@ -600,7 +602,7 @@ relookup(dvp, vpp, cnp)
rdonly = cnp->cn_flags & RDONLY;
cnp->cn_flags &= ~ISSYMLINK;
dp = dvp;
- VOP_LOCK(dp);
+ vn_lock(dp, LK_EXCLUSIVE | LK_RETRY, p);
/* dirloop: */
/*
@@ -694,15 +696,17 @@ relookup(dvp, vpp, cnp)
if (!wantparent)
vrele(dvp);
if ((cnp->cn_flags & LOCKLEAF) == 0)
- VOP_UNLOCK(dp);
+ VOP_UNLOCK(dp, 0, p);
return (0);
bad2:
if ((cnp->cn_flags & LOCKPARENT) && (cnp->cn_flags & ISLASTCN))
- VOP_UNLOCK(dvp);
+ VOP_UNLOCK(dvp, 0, p);
vrele(dvp);
bad:
vput(dp);
*vpp = NULL;
return (error);
}
+
+
diff --git a/sys/kern/vfs_subr.c b/sys/kern/vfs_subr.c
index 10ed04c26ca..f265b15051d 100644
--- a/sys/kern/vfs_subr.c
+++ b/sys/kern/vfs_subr.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: vfs_subr.c,v 1.10 1997/04/25 09:33:24 deraadt Exp $ */
+/* $OpenBSD: vfs_subr.c,v 1.11 1997/10/06 15:12:42 csapuntz Exp $ */
/* $NetBSD: vfs_subr.c,v 1.53 1996/04/22 01:39:13 christos Exp $ */
/*
@@ -51,6 +51,7 @@
#include <sys/mount.h>
#include <sys/time.h>
#include <sys/fcntl.h>
+#include <sys/kernel.h>
#include <sys/vnode.h>
#include <sys/stat.h>
#include <sys/namei.h>
@@ -88,8 +89,28 @@ int suid_clear = 1; /* 1 => clear SUID / SGID on owner change */
LIST_REMOVE(bp, b_vnbufs); \
(bp)->b_vnbufs.le_next = NOLIST; \
}
-TAILQ_HEAD(freelst, vnode) vnode_free_list; /* vnode free list */
+
+struct freelst vnode_hold_list; /* list of vnodes referencing buffers */
+struct freelst vnode_free_list; /* vnode free list */
+
struct mntlist mountlist; /* mounted filesystem list */
+struct simplelock mountlist_slock;
+static struct simplelock mntid_slock;
+struct simplelock mntvnode_slock;
+struct simplelock vnode_free_list_slock;
+static struct simplelock spechash_slock;
+
+/*
+ * The workitem queue.
+ */
+#define SYNCER_MAXDELAY 32
+int syncer_maxdelay = SYNCER_MAXDELAY; /* maximum delay time */
+time_t syncdelay = 30; /* time to delay syncing vnodes */
+
+static int syncer_delayno = 0;
+static long syncer_mask;
+LIST_HEAD(synclist, vnode);
+static struct synclist *syncer_workitem_pending;
int vfs_lock __P((struct mount *));
void vfs_unlock __P((struct mount *));
@@ -107,15 +128,16 @@ int bdevvp __P((dev_t, struct vnode **));
int cdevvp __P((dev_t, struct vnode **));
int getdevvp __P((dev_t, struct vnode **, enum vtype));
struct vnode *checkalias __P((struct vnode *, dev_t, struct mount *));
-int vget __P((struct vnode *, int));
void vref __P((struct vnode *));
void vput __P((struct vnode *));
void vrele __P((struct vnode *));
+int vunref __P((struct vnode *));
void vhold __P((struct vnode *));
void holdrele __P((struct vnode *));
int vflush __P((struct mount *, struct vnode *, int));
void vgoneall __P((struct vnode *));
void vgone __P((struct vnode *));
+void vgonel __P((struct vnode *, struct proc *));
int vcount __P((struct vnode *));
void vprint __P((char *, struct vnode *));
int vfs_mountedon __P((struct vnode *));
@@ -126,10 +148,10 @@ int vaccess __P((mode_t, uid_t, gid_t, mode_t, struct ucred *));
void vfs_unmountall __P((void));
void vfs_shutdown __P((void));
-static int vfs_hang_addrlist __P((struct mount *, struct netexport *,
+int vfs_hang_addrlist __P((struct mount *, struct netexport *,
struct export_args *));
-static int vfs_free_netcred __P((struct radix_node *, void *));
-static void vfs_free_addrlist __P((struct netexport *));
+int vfs_free_netcred __P((struct radix_node *, void *));
+void vfs_free_addrlist __P((struct netexport *));
#ifdef DEBUG
void printlockedvnodes __P((void));
@@ -142,124 +164,191 @@ void
vntblinit()
{
+ simple_lock_init(&mntvnode_slock);
+ simple_lock_init(&mntid_slock);
+ simple_lock_init(&spechash_slock);
+ TAILQ_INIT(&vnode_hold_list);
TAILQ_INIT(&vnode_free_list);
+ simple_lock_init(&vnode_free_list_slock);
CIRCLEQ_INIT(&mountlist);
+ /*
+ * Initialize the filesystem syncer.
+ */
+ syncer_workitem_pending = hashinit(syncer_maxdelay, M_VNODE,
+ &syncer_mask);
+ syncer_maxdelay = syncer_mask + 1;
+
}
+
/*
- * Lock a filesystem.
- * Used to prevent access to it while mounting and unmounting.
+ * Mark a mount point as busy. Used to synchornize access and to delay
+ * unmounting. Interlock is not released n failure.
*/
+
int
-vfs_lock(mp)
- register struct mount *mp;
+vfs_busy(mp, flags, interlkp, p)
+ struct mount *mp;
+ int flags;
+ struct simplelock *interlkp;
+ struct proc *p;
{
+ int lkflags;
- while (mp->mnt_flag & MNT_MLOCK) {
+ if (mp->mnt_flag & MNT_UNMOUNT) {
+ if (flags & LK_NOWAIT)
+ return (ENOENT);
mp->mnt_flag |= MNT_MWAIT;
- tsleep((caddr_t)mp, PVFS, "vfslock", 0);
+ if (interlkp)
+ simple_unlock(interlkp);
+ /*
+ * Since all busy locks are shared except the exclusive
+ * lock granted when unmounting, the only place that a
+ * wakeup needs to be done is at the release of the
+ * exclusive lock at the end of dounmount.
+ */
+ sleep((caddr_t)mp, PVFS);
+ if (interlkp)
+ simple_lock(interlkp);
+ return (ENOENT);
}
- mp->mnt_flag |= MNT_MLOCK;
- return (0);
+ lkflags = LK_SHARED;
+ if (interlkp)
+ lkflags |= LK_INTERLOCK;
+ if (lockmgr(&mp->mnt_lock, lkflags, interlkp, p))
+ panic("vfs_busy: unexpected lock failure");
+ return (0);
}
+
/*
- * Unlock a locked filesystem.
- * Panic if filesystem is not locked.
+ * Free a busy file system
*/
void
-vfs_unlock(mp)
- register struct mount *mp;
+vfs_unbusy(mp, p)
+ struct mount *mp;
+ struct proc *p;
{
-
- if ((mp->mnt_flag & MNT_MLOCK) == 0)
- panic("vfs_unlock: not locked");
- mp->mnt_flag &= ~MNT_MLOCK;
- if (mp->mnt_flag & MNT_MWAIT) {
- mp->mnt_flag &= ~MNT_MWAIT;
- wakeup((caddr_t)mp);
- }
+ lockmgr(&mp->mnt_lock, LK_RELEASE, NULL, p);
}
/*
- * Mark a mount point as busy.
- * Used to synchronize access and to delay unmounting.
+ * Lookup a filesystem type, and if found allocate and initialize
+ * a mount structure for it.
+ *
+ * Devname is usually updated by mount(8) after booting.
*/
-int
-vfs_busy(mp)
- register struct mount *mp;
-{
- while(mp->mnt_flag & MNT_MPBUSY) {
- mp->mnt_flag |= MNT_MPWANT;
- tsleep((caddr_t)&mp->mnt_flag, PVFS, "vfsbusy", 0);
- }
- if (mp->mnt_flag & MNT_UNMOUNT)
- return (1);
- mp->mnt_flag |= MNT_MPBUSY;
- return (0);
-}
+int
+vfs_rootmountalloc(fstypename, devname, mpp)
+ char *fstypename;
+ char *devname;
+ struct mount **mpp;
+ {
+ struct proc *p = curproc; /* XXX */
+ struct vfsconf *vfsp;
+ struct mount *mp;
+
+ for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
+ if (!strcmp(vfsp->vfc_name, fstypename))
+ break;
+ if (vfsp == NULL)
+ return (ENODEV);
+ mp = malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK);
+ bzero((char *)mp, (u_long)sizeof(struct mount));
+ lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, 0);
+ (void)vfs_busy(mp, LK_NOWAIT, 0, p);
+ LIST_INIT(&mp->mnt_vnodelist);
+ mp->mnt_vfc = vfsp;
+ mp->mnt_op = vfsp->vfc_vfsops;
+ mp->mnt_flag = MNT_RDONLY;
+ mp->mnt_vnodecovered = NULLVP;
+ vfsp->vfc_refcount++;
+ mp->mnt_stat.f_type = vfsp->vfc_typenum;
+ mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK;
+ strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN);
+ mp->mnt_stat.f_mntonname[0] = '/';
+ (void) copystr(devname, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, 0);
+ *mpp = mp;
+ return (0);
+ }
/*
- * Free a busy filesystem.
- * Panic if filesystem is not busy.
- */
-void
-vfs_unbusy(mp)
- register struct mount *mp;
+ * Find an appropriate filesystem to use for the root. If a filesystem
+ * has not been preselected, walk through the list of known filesystems
+ * trying those that have mountroot routines, and try them until one
+ * works or we have tried them all.
+ */
+int
+vfs_mountroot()
{
-
- if ((mp->mnt_flag & MNT_MPBUSY) == 0)
- panic("vfs_unbusy: not busy");
- mp->mnt_flag &= ~MNT_MPBUSY;
- if (mp->mnt_flag & MNT_MPWANT) {
- mp->mnt_flag &= ~MNT_MPWANT;
- wakeup((caddr_t)&mp->mnt_flag);
- }
+ struct vfsconf *vfsp;
+ extern int (*mountroot)(void);
+ int error;
+
+ if (mountroot != NULL)
+ return ((*mountroot)());
+ for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) {
+ if (vfsp->vfc_mountroot == NULL)
+ continue;
+ if ((error = (*vfsp->vfc_mountroot)()) == 0)
+ return (0);
+ printf("%s_mountroot failed: %d\n", vfsp->vfc_name, error);
+ }
+ return (ENODEV);
}
-
+
/*
* Lookup a mount point by filesystem identifier.
*/
struct mount *
-getvfs(fsid)
+vfs_getvfs(fsid)
fsid_t *fsid;
{
register struct mount *mp;
+ simple_lock(&mountlist_slock);
for (mp = mountlist.cqh_first; mp != (void *)&mountlist;
- mp = mp->mnt_list.cqe_next)
+ mp = mp->mnt_list.cqe_next) {
if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] &&
- mp->mnt_stat.f_fsid.val[1] == fsid->val[1])
+ mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) {
+ simple_unlock(&mountlist_slock);
return (mp);
+ }
+ }
+ simple_unlock(&mountlist_slock);
return ((struct mount *)0);
}
+
/*
* Get a new unique fsid
*/
void
-getnewfsid(mp, mtype)
+vfs_getnewfsid(mp)
struct mount *mp;
- int mtype;
{
static u_short xxxfs_mntid;
fsid_t tfsid;
+ int mtype;
- mp->mnt_stat.f_fsid.val[0] = makedev(nblkdev + 11, 0); /* XXX */
+ simple_lock(&mntid_slock);
+ mtype = mp->mnt_vfc->vfc_typenum;
+ mp->mnt_stat.f_fsid.val[0] = makedev(nblkdev + mtype, 0);
mp->mnt_stat.f_fsid.val[1] = mtype;
if (xxxfs_mntid == 0)
++xxxfs_mntid;
- tfsid.val[0] = makedev((nblkdev + mtype) & 0xff, xxxfs_mntid);
+ tfsid.val[0] = makedev(nblkdev + mtype, xxxfs_mntid);
tfsid.val[1] = mtype;
if (mountlist.cqh_first != (void *)&mountlist) {
- while (getvfs(&tfsid)) {
+ while (vfs_getvfs(&tfsid)) {
tfsid.val[0]++;
xxxfs_mntid++;
}
}
mp->mnt_stat.f_fsid.val[0] = tfsid.val[0];
+ simple_unlock(&mntid_slock);
}
/*
@@ -318,20 +407,56 @@ getnewvnode(tag, mp, vops, vpp)
int (**vops) __P((void *));
struct vnode **vpp;
{
- register struct vnode *vp;
+ struct proc *p = curproc; /* XXX */
+ struct freelst *listhd;
+ static int toggle;
+ struct vnode *vp;
#ifdef DIAGNOSTIC
int s;
#endif
- if ((vnode_free_list.tqh_first == NULL &&
- numvnodes < 2 * desiredvnodes) ||
- numvnodes < desiredvnodes) {
+ /*
+ * We must choose whether to allocate a new vnode or recycle an
+ * existing one. The criterion for allocating a new one is that
+ * the total number of vnodes is less than the number desired or
+ * there are no vnodes on either free list. Generally we only
+ * want to recycle vnodes that have no buffers associated with
+ * them, so we look first on the vnode_free_list. If it is empty,
+ * we next consider vnodes with referencing buffers on the
+ * vnode_hold_list. The toggle ensures that half the time we
+ * will use a buffer from the vnode_hold_list, and half the time
+ * we will allocate a new one unless the list has grown to twice
+ * the desired size. We are reticent to recycle vnodes from the
+ * vnode_hold_list because we will lose the identity of all its
+ * referencing buffers.
+ */
+ toggle ^= 1;
+ if (numvnodes > 2 * desiredvnodes)
+ toggle = 0;
+
+
+ simple_lock(&vnode_free_list_slock);
+ if ((numvnodes < desiredvnodes) ||
+ ((TAILQ_FIRST(listhd = &vnode_free_list) == NULL) &&
+ ((TAILQ_FIRST(listhd = &vnode_hold_list) == NULL) || toggle))) {
+ simple_unlock(&vnode_free_list_slock);
vp = (struct vnode *)malloc((u_long)sizeof *vp,
M_VNODE, M_WAITOK);
bzero((char *)vp, sizeof *vp);
numvnodes++;
} else {
- if ((vp = vnode_free_list.tqh_first) == NULL) {
+ for (vp = TAILQ_FIRST(listhd); vp != NULLVP;
+ vp = TAILQ_NEXT(vp, v_freelist)) {
+ if (simple_lock_try(&vp->v_interlock))
+ break;
+ }
+ /*
+ * Unless this is a bad time of the month, at most
+ * the first NCPUS items on the free list are
+ * locked, so this is close enough to being empty.
+ */
+ if (vp == NULLVP) {
+ simple_unlock(&vnode_free_list_slock);
tablefull("vnode");
*vpp = 0;
return (ENFILE);
@@ -340,12 +465,15 @@ getnewvnode(tag, mp, vops, vpp)
vprint("free vnode", vp);
panic("free vnode isn't");
}
- TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
+ TAILQ_REMOVE(listhd, vp, v_freelist);
/* see comment on why 0xdeadb is set at end of vgone (below) */
- vp->v_freelist.tqe_prev = (struct vnode **)0xdeadb;
+ vp->v_flag |= VGONEHACK;
+ simple_unlock(&vnode_free_list_slock);
vp->v_lease = NULL;
if (vp->v_type != VBAD)
- vgone(vp);
+ vgonel(vp, p);
+ else
+ simple_unlock(&vp->v_interlock);
#ifdef DIAGNOSTIC
if (vp->v_data) {
vprint("cleaned vnode", vp);
@@ -385,18 +513,19 @@ insmntque(vp, mp)
register struct vnode *vp;
register struct mount *mp;
{
-
+ simple_lock(&mntvnode_slock);
/*
* Delete from old mount point vnode list, if on one.
*/
+
if (vp->v_mount != NULL)
LIST_REMOVE(vp, v_mntvnodes);
/*
* Insert into list of vnodes for the new mount point, if available.
*/
- if ((vp->v_mount = mp) == NULL)
- return;
- LIST_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes);
+ if ((vp->v_mount = mp) != NULL)
+ LIST_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes);
+ simple_unlock(&mntvnode_slock);
}
/*
@@ -435,14 +564,15 @@ vinvalbuf(vp, flags, cred, p, slpflag, slptimeo)
struct buf *nbp, *blist;
int s, error;
- if (flags & V_SAVE) {
+ if ((flags & V_SAVE) && vp->v_dirtyblkhd.lh_first != NULL) {
if ((error = VOP_FSYNC(vp, cred, MNT_WAIT, p)) != 0)
return (error);
if (vp->v_dirtyblkhd.lh_first != NULL)
panic("vinvalbuf: dirty bufs");
}
for (;;) {
- if ((blist = vp->v_cleanblkhd.lh_first) && flags & V_SAVEMETA)
+ if ((blist = vp->v_cleanblkhd.lh_first) &&
+ (flags & V_SAVEMETA))
while (blist && blist->b_lblkno < 0)
blist = blist->b_vnbufs.le_next;
if (!blist && (blist = vp->v_dirtyblkhd.lh_first) &&
@@ -562,30 +692,140 @@ brelvp(bp)
register struct buf *bp;
{
struct vnode *vp;
+ struct buf *wasdirty;
- if (bp->b_vp == (struct vnode *) 0)
+ if ((vp = bp->b_vp) == (struct vnode *) 0)
panic("brelvp: NULL");
/*
* Delete from old vnode list, if on one.
*/
+ wasdirty = vp->v_dirtyblkhd.lh_first;
if (bp->b_vnbufs.le_next != NOLIST)
bufremvn(bp);
- vp = bp->b_vp;
+ if (wasdirty && LIST_FIRST(&vp->v_dirtyblkhd) == NULL)
+ LIST_REMOVE(vp, v_synclist);
bp->b_vp = (struct vnode *) 0;
HOLDRELE(vp);
}
/*
- * Reassign a buffer from one vnode to another.
- * Used to assign file specific control information
- * (indirect blocks) to the vnode to which they belong.
+ * The workitem queue.
+ *
+ * It is useful to delay writes of file data and filesystem metadata
+ * for tens of seconds so that quickly created and deleted files need
+ * not waste disk bandwidth being created and removed. To realize this,
+ * we append vnodes to a "workitem" queue. When running with a soft
+ * updates implementation, most pending metadata dependencies should
+ * not wait for more than a few seconds. Thus, mounted on block devices
+ * are delayed only about a half the time that file data is delayed.
+ * Similarly, directory updates are more critical, so are only delayed
+ * about a third the time that file data is delayed. Thus, there are
+ * SYNCER_MAXDELAY queues that are processed round-robin at a rate of
+ * one each second (driven off the filesystem syner process). The
+ * syncer_delayno variable indicates the next queue that is to be processed.
+ * Items that need to be processed soon are placed in this queue:
+ *
+ * syncer_workitem_pending[syncer_delayno]
+ *
+ * A delay of fifteen seconds is done by placing the request fifteen
+ * entries later in the queue:
+ *
+ * syncer_workitem_pending[(syncer_delayno + 15) & syncer_mask]
+ *
+ */
+
+/*
+ * Add an item to the syncer work queue.
+ */
+void
+vn_syncer_add_to_worklist(vp, delay)
+ struct vnode *vp;
+ int delay;
+{
+ int s, slot;
+
+ s = splbio();
+ if (delay > syncer_maxdelay - 2)
+ delay = syncer_maxdelay - 2;
+ slot = (syncer_delayno + delay) & syncer_mask;
+ LIST_INSERT_HEAD(&syncer_workitem_pending[slot], vp, v_synclist);
+ splx(s);
+}
+
+/*
+ * System filesystem synchronizer daemon.
+ */
+
+extern int lbolt;
+
+void
+sched_sync(p)
+ struct proc *p;
+{
+ struct synclist *slp;
+ struct vnode *vp;
+ long starttime;
+ int s;
+
+ for (;;) {
+ starttime = time.tv_sec;
+
+ /*
+ * Push files whose dirty time has expired.
+ */
+ s = splbio();
+ slp = &syncer_workitem_pending[syncer_delayno];
+ syncer_delayno += 1;
+ if (syncer_delayno == syncer_maxdelay)
+ syncer_delayno = 0;
+ splx(s);
+ while ((vp = LIST_FIRST(slp)) != NULL) {
+ vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
+ (void) VOP_FSYNC(vp, p->p_ucred, MNT_LAZY, p);
+ VOP_UNLOCK(vp, 0, p);
+ if (LIST_FIRST(slp) == vp) {
+ if (LIST_FIRST(&vp->v_dirtyblkhd) == NULL)
+ panic("sched_sync: fsync failed");
+ /*
+ * Move ourselves to the back of the sync list.
+ */
+ LIST_REMOVE(vp, v_synclist);
+ vn_syncer_add_to_worklist(vp, syncdelay);
+ }
+ }
+
+ /*
+ * Do soft update processing.
+ */
+ if (bioops.io_sync)
+ (*bioops.io_sync)(NULL);
+
+ /*
+ * If it has taken us less than a second to process the
+ * current work, then wait. Otherwise start right over
+ * again. We can still lose time if any single round
+ * takes more than two seconds, but it does not really
+ * matter as we are just trying to generally pace the
+ * filesystem activity.
+ */
+ if (time.tv_sec == starttime)
+ tsleep(&lbolt, PPAUSE, "syncer", 0);
+ }
+}
+
+/*
+ * Reassign a buffer from one vnode to another. Used to assign buffers
+ * to the appropriate clean or dirty list and to add newly dirty vnodes
+ * to the appropriate filesystem syncer list.
*/
void
reassignbuf(bp, newvp)
register struct buf *bp;
register struct vnode *newvp;
{
- register struct buflists *listheadp;
+ struct buflists *listheadp;
+ struct buf *wasdirty;
+ int delay;
if (newvp == NULL) {
printf("reassignbuf: NULL");
@@ -594,16 +834,36 @@ reassignbuf(bp, newvp)
/*
* Delete from old vnode list, if on one.
*/
+ wasdirty = newvp->v_dirtyblkhd.lh_first;
if (bp->b_vnbufs.le_next != NOLIST)
bufremvn(bp);
/*
* If dirty, put on list of dirty buffers;
* otherwise insert onto list of clean buffers.
*/
- if (bp->b_flags & B_DELWRI)
- listheadp = &newvp->v_dirtyblkhd;
- else
+ if ((bp->b_flags & B_DELWRI) == 0) {
listheadp = &newvp->v_cleanblkhd;
+ if (wasdirty && LIST_FIRST(&newvp->v_dirtyblkhd) == NULL)
+ LIST_REMOVE(newvp, v_synclist);
+ } else {
+ listheadp = &newvp->v_dirtyblkhd;
+ if (LIST_FIRST(listheadp) == NULL) {
+ switch (newvp->v_type) {
+ case VDIR:
+ delay = syncdelay / 3;
+ break;
+ case VBLK:
+ if (newvp->v_specmountpoint != NULL) {
+ delay = syncdelay / 2;
+ break;
+ }
+ /* fall through */
+ default:
+ delay = syncdelay;
+ }
+ vn_syncer_add_to_worklist(newvp, delay);
+ }
+ }
bufinsvn(bp, listheadp);
}
@@ -649,8 +909,10 @@ getdevvp(dev, vpp, type)
struct vnode *nvp;
int error;
- if (dev == NODEV)
+ if (dev == NODEV) {
+ *vpp = NULLVP;
return (0);
+ }
error = getnewvnode(VT_NON, NULL, spec_vnodeop_p, &nvp);
if (error) {
*vpp = NULLVP;
@@ -680,6 +942,7 @@ checkalias(nvp, nvp_rdev, mp)
dev_t nvp_rdev;
struct mount *mp;
{
+ struct proc *p = curproc;
register struct vnode *vp;
struct vnode **vpp;
@@ -688,18 +951,23 @@ checkalias(nvp, nvp_rdev, mp)
vpp = &speclisth[SPECHASH(nvp_rdev)];
loop:
+ simple_lock(&spechash_slock);
for (vp = *vpp; vp; vp = vp->v_specnext) {
+ simple_lock(&vp->v_interlock);
if (nvp_rdev != vp->v_rdev || nvp->v_type != vp->v_type)
continue;
/*
* Alias, but not in use, so flush it out.
*/
if (vp->v_usecount == 0) {
- vgone(vp);
+ simple_unlock(&spechash_slock);
+ vgonel(vp, p);
goto loop;
}
- if (vget(vp, 1))
+ if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, p)) {
+ simple_unlock(&spechash_slock);
goto loop;
+ }
break;
}
if (vp == NULL || vp->v_tag != VT_NON || vp->v_type != VBLK) {
@@ -708,18 +976,21 @@ loop:
nvp->v_rdev = nvp_rdev;
nvp->v_hashchain = vpp;
nvp->v_specnext = *vpp;
- nvp->v_specflags = 0;
+ nvp->v_specmountpoint = NULL;
nvp->v_speclockf = NULL;
+ simple_unlock(&spechash_slock);
*vpp = nvp;
- if (vp != NULL) {
+ if (vp != NULLVP) {
nvp->v_flag |= VALIASED;
vp->v_flag |= VALIASED;
vput(vp);
}
return (NULLVP);
}
- VOP_UNLOCK(vp);
- vclean(vp, 0);
+ simple_unlock(&spechash_slock);
+ VOP_UNLOCK(vp, 0, p);
+ simple_lock(&vp->v_interlock);
+ vclean(vp, 0, p);
vp->v_op = nvp->v_op;
vp->v_tag = nvp->v_tag;
nvp->v_type = VNON;
@@ -736,91 +1007,260 @@ loop:
* been changed to a new file system type).
*/
int
-vget(vp, lockflag)
- register struct vnode *vp;
- int lockflag;
+vget(vp, flags, p)
+ struct vnode *vp;
+ int flags;
+ struct proc *p;
{
-
+ int error;
/*
* If the vnode is in the process of being cleaned out for
* another use, we wait for the cleaning to finish and then
- * return failure. Cleaning is determined either by checking
- * that the VXLOCK flag is set, or that the use count is
- * zero with the back pointer set to show that it has been
- * removed from the free list by getnewvnode. The VXLOCK
- * flag may not have been set yet because vclean is blocked in
- * the VOP_LOCK call waiting for the VOP_INACTIVE to complete.
+ * return failure. Cleaning is determined by checking that
+ * the VXLOCK flag is set.
*/
- if ((vp->v_flag & VXLOCK) ||
- (vp->v_usecount == 0 &&
- vp->v_freelist.tqe_prev == (struct vnode **)0xdeadb)) {
- vp->v_flag |= VXWANT;
+ if ((flags & LK_INTERLOCK) == 0)
+ simple_lock(&vp->v_interlock);
+ if (vp->v_flag & VXLOCK) {
+ vp->v_flag |= VXWANT;
+ simple_unlock(&vp->v_interlock);
tsleep((caddr_t)vp, PINOD, "vget", 0);
- return (1);
+ return (ENOENT);
+ }
+ if (vp->v_usecount == 0) {
+ simple_lock(&vnode_free_list_slock);
+ if (vp->v_holdcnt > 0)
+ TAILQ_REMOVE(&vnode_hold_list, vp, v_freelist);
+ else
+ TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
+ simple_unlock(&vnode_free_list_slock);
}
- if (vp->v_usecount == 0)
- TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
- vp->v_usecount++;
- if (lockflag)
- VOP_LOCK(vp);
+ vp->v_usecount++;
+ if (flags & LK_TYPE_MASK) {
+ if ((error = vn_lock(vp, flags | LK_INTERLOCK, p)) != 0) {
+ vunref(vp);
+ simple_unlock(&vp->v_interlock);
+ }
+ return (error);
+ }
+ simple_unlock(&vp->v_interlock);
return (0);
}
/*
- * Vnode reference, just increment the count
+ * Stubs to use when there is no locking to be done on the underlying object.
+ * A minimal shared lock is necessary to ensure that the underlying object
+ * is not revoked while an operation is in progress. So, an active shared
+ * count is maintained in an auxillary vnode lock structure.
+ */
+int
+vop_nolock(v)
+ void *v;
+{
+ struct vop_lock_args /* {
+ struct vnode *a_vp;
+ int a_flags;
+ struct proc *a_p;
+ } */ *ap = v;
+
+#ifdef notyet
+ /*
+ * This code cannot be used until all the non-locking filesystems
+ * (notably NFS) are converted to properly lock and release nodes.
+ * Also, certain vnode operations change the locking state within
+ * the operation (create, mknod, remove, link, rename, mkdir, rmdir,
+ * and symlink). Ideally these operations should not change the
+ * lock state, but should be changed to let the caller of the
+ * function unlock them. Otherwise all intermediate vnode layers
+ * (such as union, umapfs, etc) must catch these functions to do
+ * the necessary locking at their layer. Note that the inactive
+ * and lookup operations also change their lock state, but this
+ * cannot be avoided, so these two operations will always need
+ * to be handled in intermediate layers.
+ */
+ struct vnode *vp = ap->a_vp;
+ int vnflags, flags = ap->a_flags;
+
+ if (vp->v_vnlock == NULL) {
+ if ((flags & LK_TYPE_MASK) == LK_DRAIN)
+ return (0);
+ MALLOC(vp->v_vnlock, struct lock *, sizeof(struct lock),
+ M_VNODE, M_WAITOK);
+ lockinit(vp->v_vnlock, PVFS, "vnlock", 0, 0);
+ }
+ switch (flags & LK_TYPE_MASK) {
+ case LK_DRAIN:
+ vnflags = LK_DRAIN;
+ break;
+ case LK_EXCLUSIVE:
+ case LK_SHARED:
+ vnflags = LK_SHARED;
+ break;
+ case LK_UPGRADE:
+ case LK_EXCLUPGRADE:
+ case LK_DOWNGRADE:
+ return (0);
+ case LK_RELEASE:
+ default:
+ panic("vop_nolock: bad operation %d", flags & LK_TYPE_MASK);
+ }
+ if (flags & LK_INTERLOCK)
+ vnflags |= LK_INTERLOCK;
+ return(lockmgr(vp->v_vnlock, vnflags, &vp->v_interlock, ap->a_p));
+#else /* for now */
+ /*
+ * Since we are not using the lock manager, we must clear
+ * the interlock here.
+ */
+ if (ap->a_flags & LK_INTERLOCK)
+ simple_unlock(&ap->a_vp->v_interlock);
+ return (0);
+#endif
+}
+
+/*
+ * Decrement the active use count.
+ */
+
+int
+vop_nounlock(v)
+ void *v;
+{
+ struct vop_unlock_args /* {
+ struct vnode *a_vp;
+ int a_flags;
+ struct proc *a_p;
+ } */ *ap = v;
+
+ struct vnode *vp = ap->a_vp;
+
+ if (vp->v_vnlock == NULL)
+ return (0);
+ return (lockmgr(vp->v_vnlock, LK_RELEASE, NULL, ap->a_p));
+}
+
+/*
+ * Return whether or not the node is in use.
+ */
+int
+vop_noislocked(v)
+ void *v;
+{
+ struct vop_islocked_args /* {
+ struct vnode *a_vp;
+ } */ *ap = v;
+
+ struct vnode *vp = ap->a_vp;
+
+ if (vp->v_vnlock == NULL)
+ return (0);
+ return (lockstatus(vp->v_vnlock));
+}
+
+/*
+ * Vnode reference.
*/
void
vref(vp)
struct vnode *vp;
{
-
+ simple_lock(&vp->v_interlock);
if (vp->v_usecount <= 0)
panic("vref used where vget required");
vp->v_usecount++;
+ simple_unlock(&vp->v_interlock);
}
-/*
- * vput(), just unlock and vrele()
- */
-void
-vput(vp)
- register struct vnode *vp;
+
+int
+vunref(vp)
+ struct vnode *vp;
{
+#ifdef DIAGNOSTIC
+ if (vp == NULL)
+ panic("vrele: null vp");
+#endif
+ simple_lock (&vp->v_interlock);
+ vp->v_usecount--;
+ if (vp->v_usecount > 0) {
+ simple_unlock(&vp->v_interlock);
+ return (vp->v_usecount);
+ }
+#ifdef DIAGNOSTIC
+ if (vp->v_usecount < 0 || vp->v_writecount != 0) {
+ vprint("vrele: bad ref count", vp);
+ panic("vrele: ref cnt");
+ }
+#endif
+ /*
+ * insert at tail of LRU list
+ */
+ simple_lock(&vnode_free_list_slock);
+ if (vp->v_holdcnt > 0)
+ TAILQ_INSERT_TAIL(&vnode_hold_list, vp, v_freelist);
+ else
+ TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
+ simple_unlock(&vnode_free_list_slock);
- VOP_UNLOCK(vp);
- vrele(vp);
+ return (0);
}
/*
- * Vnode release.
- * If count drops to zero, call inactive routine and return to freelist.
+ * vput(), just unlock and vrele()
*/
void
-vrele(vp)
+vput(vp)
register struct vnode *vp;
{
+ struct proc *p = curproc; /* XXX */
-#ifdef DIAGNOSTIC
+#ifdef DIGANOSTIC
if (vp == NULL)
- panic("vrele: null vp");
+ panic("vput: null vp");
#endif
+ simple_lock(&vp->v_interlock);
vp->v_usecount--;
- if (vp->v_usecount > 0)
+ if (vp->v_usecount > 0) {
+ simple_unlock(&vp->v_interlock);
+ VOP_UNLOCK(vp, 0, p);
return;
+ }
#ifdef DIAGNOSTIC
- if (vp->v_usecount != 0 || vp->v_writecount != 0) {
- vprint("vrele: bad ref count", vp);
- panic("vrele: ref cnt");
+ if (vp->v_usecount < 0 || vp->v_writecount != 0) {
+ vprint("vput: bad ref count", vp);
+ panic("vput: ref cnt");
}
#endif
/*
* insert at tail of LRU list
*/
- TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
- VOP_INACTIVE(vp);
+ simple_lock(&vnode_free_list_slock);
+ if (vp->v_holdcnt > 0)
+ TAILQ_INSERT_TAIL(&vnode_hold_list, vp, v_freelist);
+ else
+ TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
+ simple_unlock(&vnode_free_list_slock);
+ simple_unlock(&vp->v_interlock);
+ VOP_INACTIVE(vp, p);
}
/*
+ * Vnode release - use for active VNODES.
+ * If count drops to zero, call inactive routine and return to freelist.
+ */
+void
+vrele(vp)
+ register struct vnode *vp;
+{
+ struct proc *p = curproc;
+
+ if (vunref(vp) == 0 &&
+ vn_lock(vp, LK_EXCLUSIVE |LK_INTERLOCK, p) == 0)
+ VOP_INACTIVE(vp, p);
+}
+
+#ifdef DIAGNOSTIC
+/*
* Page or buffer structure gets a reference.
*/
void
@@ -828,7 +1268,27 @@ vhold(vp)
register struct vnode *vp;
{
+ /*
+ * If it is on the freelist and the hold count is currently
+ * zero, move it to the hold list.
+ *
+ * The VGONEHACK flag reflects a call from getnewvnode,
+ * which will remove the vnode from the free list, but
+ * will not increment the ref count until after it calls vgone
+ * If the ref count we're incremented first, vgone would
+ * (incorrectly) try to close the previous instance of the
+ * underlying object.
+ */
+ simple_lock(&vp->v_interlock);
+ if (!(vp->v_flag & VGONEHACK) &&
+ vp->v_holdcnt == 0 && vp->v_usecount == 0) {
+ simple_lock(&vnode_free_list_slock);
+ TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
+ TAILQ_INSERT_TAIL(&vnode_hold_list, vp, v_freelist);
+ simple_unlock(&vnode_free_list_slock);
+ }
vp->v_holdcnt++;
+ simple_unlock(&vp->v_interlock);
}
/*
@@ -839,10 +1299,26 @@ holdrele(vp)
register struct vnode *vp;
{
+ simple_lock(&vp->v_interlock);
if (vp->v_holdcnt <= 0)
panic("holdrele: holdcnt");
vp->v_holdcnt--;
+ /*
+ * If it is on the holdlist and the hold count drops to
+ * zero, move it to the free list.
+ *
+ * See above for VGONEHACK
+ */
+ if (!(vp->v_flag & VGONEHACK) &&
+ vp->v_holdcnt == 0 && vp->v_usecount == 0) {
+ simple_lock(&vnode_free_list_slock);
+ TAILQ_REMOVE(&vnode_hold_list, vp, v_freelist);
+ TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
+ simple_unlock(&vnode_free_list_slock);
+ }
+ simple_unlock(&vp->v_interlock);
}
+#endif /* DIAGNOSTIC */
/*
* Remove any vnodes in the vnode table belonging to mount point mp.
@@ -863,11 +1339,11 @@ vflush(mp, skipvp, flags)
struct vnode *skipvp;
int flags;
{
+ struct proc *p = curproc;
register struct vnode *vp, *nvp;
int busy = 0;
- if ((mp->mnt_flag & MNT_MPBUSY) == 0)
- panic("vflush: not busy");
+ simple_lock(&mntvnode_slock);
loop:
for (vp = mp->mnt_vnodelist.lh_first; vp; vp = nvp) {
if (vp->v_mount != mp)
@@ -878,24 +1354,32 @@ loop:
*/
if (vp == skipvp)
continue;
+
+ simple_lock(&vp->v_interlock);
/*
* Skip over a vnodes marked VSYSTEM.
*/
- if ((flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM))
+ if ((flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM)) {
+ simple_unlock(&vp->v_interlock);
continue;
+ }
/*
* If WRITECLOSE is set, only flush out regular file
* vnodes open for writing.
*/
if ((flags & WRITECLOSE) &&
- (vp->v_writecount == 0 || vp->v_type != VREG))
+ (vp->v_writecount == 0 || vp->v_type != VREG)) {
+ simple_unlock(&vp->v_interlock);
continue;
+ }
/*
* With v_usecount == 0, all we need to do is clear
* out the vnode data structures and we are done.
*/
if (vp->v_usecount == 0) {
- vgone(vp);
+ simple_unlock(&mntvnode_slock);
+ vgonel(vp, p);
+ simple_lock(&mntvnode_slock);
continue;
}
/*
@@ -904,21 +1388,25 @@ loop:
* anonymous device. For all other files, just kill them.
*/
if (flags & FORCECLOSE) {
+ simple_unlock(&mntvnode_slock);
if (vp->v_type != VBLK && vp->v_type != VCHR) {
- vgone(vp);
+ vgonel(vp, p);
} else {
- vclean(vp, 0);
+ vclean(vp, 0, p);
vp->v_op = spec_vnodeop_p;
insmntque(vp, (struct mount *)0);
}
+ simple_lock(&mntvnode_slock);
continue;
}
#ifdef DEBUG
if (busyprt)
vprint("vflush: busy vnode", vp);
#endif
+ simple_unlock(&vp->v_interlock);
busy++;
}
+ simple_unlock(&mntvnode_slock);
if (busy)
return (EBUSY);
return (0);
@@ -926,11 +1414,13 @@ loop:
/*
* Disassociate the underlying file system from a vnode.
+ * The vnode interlock is held on entry.
*/
void
-vclean(vp, flags)
+vclean(vp, flags, p)
register struct vnode *vp;
int flags;
+ struct proc *p;
{
int active;
@@ -941,15 +1431,8 @@ vclean(vp, flags)
* race against ourselves to recycle it.
*/
if ((active = vp->v_usecount) != 0)
- VREF(vp);
- /*
- * Even if the count is zero, the VOP_INACTIVE routine may still
- * have the object locked while it cleans it out. The VOP_LOCK
- * ensures that the VOP_INACTIVE routine is done with its work.
- * For active vnodes, it ensures that no other activity can
- * occur while the underlying object is being cleaned out.
- */
- VOP_LOCK(vp);
+ vp->v_usecount++;
+
/*
* Prevent the vnode from being recycled or
* brought into use while we clean it out.
@@ -957,32 +1440,57 @@ vclean(vp, flags)
if (vp->v_flag & VXLOCK)
panic("vclean: deadlock");
vp->v_flag |= VXLOCK;
+
+
/*
- * Clean out any buffers associated with the vnode.
+ * Even if the count is zero, the VOP_INACTIVE routine may still
+ * have the object locked while it cleans it out. The VOP_LOCK
+ * ensures that the VOP_INACTIVE routine is done with its work.
+ * For active vnodes, it ensures that no other activity can
+ * occur while the underlying object is being cleaned out.
*/
- if (flags & DOCLOSE)
- vinvalbuf(vp, V_SAVE, NOCRED, NULL, 0, 0);
+ VOP_LOCK(vp, LK_DRAIN | LK_INTERLOCK, p);
+
/*
- * Any other processes trying to obtain this lock must first
- * wait for VXLOCK to clear, then call the new lock operation.
+ * Clean out any buffers associated with the vnode.
*/
- VOP_UNLOCK(vp);
+ if (flags & DOCLOSE)
+ vinvalbuf(vp, V_SAVE, NOCRED, p, 0, 0);
/*
* If purging an active vnode, it must be closed and
- * deactivated before being reclaimed.
+ * deactivated before being reclaimed. Note that the
+ * VOP_INACTIVE will unlock the vnode
*/
if (active) {
if (flags & DOCLOSE)
- VOP_CLOSE(vp, FNONBLOCK, NOCRED, NULL);
- VOP_INACTIVE(vp);
+ VOP_CLOSE(vp, FNONBLOCK, NOCRED, p);
+ VOP_INACTIVE(vp, p);
+ } else {
+ /*
+ * Any other processes trying to obtain this lock must first
+ * wait for VXLOCK to clear, then call the new lock operation.
+ */
+ VOP_UNLOCK(vp, 0, p);
}
+
/*
* Reclaim the vnode.
*/
- if (VOP_RECLAIM(vp))
+ if (VOP_RECLAIM(vp, p))
panic("vclean: cannot reclaim");
- if (active)
- vrele(vp);
+ if (active) {
+ if (vunref(vp) == 0 &&
+ vp->v_holdcnt > 0)
+ panic("vclean: not clean");
+ simple_unlock(&vp->v_interlock);
+ }
+ cache_purge(vp);
+ if (vp->v_vnlock) {
+ if ((vp->v_vnlock->lk_flags & LK_DRAINED) == 0)
+ vprint("vclean: lock not drained", vp);
+ FREE(vp->v_vnlock, M_VNODE);
+ vp->v_vnlock = NULL;
+ }
/*
* Done with purge, notify sleepers of the grim news.
@@ -1000,12 +1508,25 @@ vclean(vp, flags)
* Eliminate all activity associated with the requested vnode
* and with all vnodes aliased to the requested vnode.
*/
-void
-vgoneall(vp)
- register struct vnode *vp;
+int
+vop_revoke(v)
+ void *v;
{
- register struct vnode *vq;
+ struct vop_revoke_args /* {
+ struct vnode *a_vp;
+ int a_flags;
+ } */ *ap = v;
+ struct vnode *vp, *vq;
+ struct proc *p = curproc;
+
+#ifdef DIAGNOSTIC
+ if ((ap->a_flags & REVOKEALL) == 0)
+ panic("vop_revoke");
+#endif
+ vp = ap->a_vp;
+ simple_lock(&vp->v_interlock);
+
if (vp->v_flag & VALIASED) {
/*
* If a vgone (or vclean) is already in progress,
@@ -1013,19 +1534,23 @@ vgoneall(vp)
*/
if (vp->v_flag & VXLOCK) {
vp->v_flag |= VXWANT;
- tsleep((caddr_t)vp, PINOD, "vgoneall", 0);
- return;
+ simple_unlock(&vp->v_interlock);
+ tsleep((caddr_t)vp, PINOD, "vop_revokeall", 0);
+ return(0);
}
/*
* Ensure that vp will not be vgone'd while we
* are eliminating its aliases.
*/
vp->v_flag |= VXLOCK;
+ simple_unlock(&vp->v_interlock);
while (vp->v_flag & VALIASED) {
+ simple_lock(&spechash_slock);
for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
if (vq->v_rdev != vp->v_rdev ||
vq->v_type != vp->v_type || vp == vq)
continue;
+ simple_unlock(&spechash_slock);
vgone(vq);
break;
}
@@ -1035,9 +1560,34 @@ vgoneall(vp)
* really eliminate the vnode after which time
* vgone will awaken any sleepers.
*/
+ simple_lock(&vp->v_interlock);
vp->v_flag &= ~VXLOCK;
}
- vgone(vp);
+ vgonel(vp, p);
+ return (0);
+}
+
+
+/*
+ * Recycle an unused vnode to the front of the free list.
+ * Release the passed interlock if the vnode will be recycled.
+ */
+int
+vrecycle(vp, inter_lkp, p)
+ struct vnode *vp;
+ struct simplelock *inter_lkp;
+ struct proc *p;
+{
+
+ simple_lock(&vp->v_interlock);
+ if (vp->v_usecount == 0) {
+ if (inter_lkp)
+ simple_unlock(inter_lkp);
+ vgonel(vp, p);
+ return (1);
+ }
+ simple_unlock(&vp->v_interlock);
+ return (0);
}
/*
@@ -1048,6 +1598,20 @@ void
vgone(vp)
register struct vnode *vp;
{
+ struct proc *p = curproc;
+
+ simple_lock (&vp->v_interlock);
+ vgonel(vp, p);
+}
+
+/*
+ * vgone, with the vp interlock held.
+ */
+void
+vgonel(vp, p)
+ struct vnode *vp;
+ struct proc *p;
+{
register struct vnode *vq;
struct vnode *vx;
@@ -1057,21 +1621,25 @@ vgone(vp)
*/
if (vp->v_flag & VXLOCK) {
vp->v_flag |= VXWANT;
+ simple_unlock(&vp->v_interlock);
tsleep((caddr_t)vp, PINOD, "vgone", 0);
return;
}
/*
* Clean out the filesystem specific data.
*/
- vclean(vp, DOCLOSE);
+ vclean(vp, DOCLOSE, p);
/*
* Delete from old mount point vnode list, if on one.
*/
- insmntque(vp, (struct mount *)0);
+ if (vp->v_mount != NULL)
+ insmntque(vp, (struct mount *)0);
/*
- * If special device, remove it from special device alias list.
+ * If special device, remove it from special device alias list
+ * if it is on one.
*/
- if (vp->v_type == VBLK || vp->v_type == VCHR) {
+ if ((vp->v_type == VBLK || vp->v_type == VCHR) && vp->v_specinfo != 0) {
+ simple_lock(&spechash_slock);
if (*vp->v_hashchain == vp) {
*vp->v_hashchain = vp->v_specnext;
} else {
@@ -1100,27 +1668,26 @@ vgone(vp)
vx->v_flag &= ~VALIASED;
vp->v_flag &= ~VALIASED;
}
+ simple_unlock(&spechash_slock);
FREE(vp->v_specinfo, M_VNODE);
vp->v_specinfo = NULL;
}
/*
* If it is on the freelist and not already at the head,
- * move it to the head of the list. The test of the back
- * pointer and the reference count of zero is because
- * it will be removed from the free list by getnewvnode,
- * but will not have its reference count incremented until
- * after calling vgone. If the reference count were
- * incremented first, vgone would (incorrectly) try to
- * close the previous instance of the underlying object.
- * So, the back pointer is explicitly set to `0xdeadb' in
- * getnewvnode after removing it from the freelist to ensure
- * that we do not try to move it here.
+ * move it to the head of the list.
+ *
+ * See above about the VGONEHACK
*/
- if (vp->v_usecount == 0 &&
- vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb &&
- vnode_free_list.tqh_first != vp) {
- TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
- TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist);
+ if (vp->v_usecount == 0) {
+ simple_lock(&vnode_free_list_slock);
+ if (vp->v_holdcnt > 0)
+ panic("vgonel: not clean");
+ if (!(vp->v_flag & VGONEHACK) &&
+ TAILQ_FIRST(&vnode_free_list) != vp) {
+ TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
+ TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist);
+ }
+ simple_unlock(&vnode_free_list_slock);
}
vp->v_type = VBAD;
}
@@ -1135,14 +1702,18 @@ vfinddev(dev, type, vpp)
struct vnode **vpp;
{
register struct vnode *vp;
+ int rc =0;
+ simple_lock(&spechash_slock);
for (vp = speclisth[SPECHASH(dev)]; vp; vp = vp->v_specnext) {
if (dev != vp->v_rdev || type != vp->v_type)
continue;
*vpp = vp;
- return (1);
+ rc = 1;
+ break;
}
- return (0);
+ simple_unlock(&spechash_slock);
+ return (rc);
}
/*
@@ -1150,14 +1721,15 @@ vfinddev(dev, type, vpp)
*/
int
vcount(vp)
- register struct vnode *vp;
+ struct vnode *vp;
{
- register struct vnode *vq, *vnext;
+ struct vnode *vq, *vnext;
int count;
loop:
if ((vp->v_flag & VALIASED) == 0)
return (vp->v_usecount);
+ simple_lock(&spechash_slock);
for (count = 0, vq = *vp->v_hashchain; vq; vq = vnext) {
vnext = vq->v_specnext;
if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type)
@@ -1166,11 +1738,13 @@ loop:
* Alias, but not in use, so flush it out.
*/
if (vq->v_usecount == 0 && vq != vp) {
+ simple_unlock(&spechash_slock);
vgone(vq);
goto loop;
}
count += vq->v_usecount;
}
+ simple_unlock(&spechash_slock);
return (count);
}
@@ -1225,21 +1799,77 @@ vprint(label, vp)
void
printlockedvnodes()
{
- register struct mount *mp;
+ struct proc *p = curproc;
+ register struct mount *mp, *nmp;
register struct vnode *vp;
printf("Locked vnodes\n");
+ simple_lock(&mountlist_slock);
for (mp = mountlist.cqh_first; mp != (void *)&mountlist;
- mp = mp->mnt_list.cqe_next) {
+ mp = nmp) {
+ if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) {
+ nmp = mp->mnt_list.cque_next;
+ continue;
+ }
for (vp = mp->mnt_vnodelist.lh_first;
vp != NULL;
- vp = vp->v_mntvnodes.le_next)
+ vp = vp->v_mntvnodes.le_next) {
if (VOP_ISLOCKED(vp))
vprint((char *)0, vp);
- }
+ simple_lock(&mountlist_slock);
+ nmp = mp->mnt_list.cqe_next;
+ vfs_unbusy(mp, p);
+ }
+ simple_unlock(&mountlist_slock);
+
}
#endif
+/*
+ * Top level filesystem related information gathering.
+ */
+int
+vfs_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p)
+ int *name;
+ u_int namelen;
+ void *oldp;
+ size_t *oldlenp;
+ void *newp;
+ size_t newlen;
+ struct proc *p;
+{
+ struct vfsconf *vfsp;
+
+ /* all sysctl names at this level are at least name and field */
+ if (namelen < 2)
+ return (ENOTDIR); /* overloaded */
+ if (name[0] != VFS_GENERIC) {
+ for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
+ if (vfsp->vfc_typenum == name[0])
+ break;
+ if (vfsp == NULL)
+ return (EOPNOTSUPP);
+ return ((*vfsp->vfc_vfsops->vfs_sysctl)(&name[1], namelen - 1,
+ oldp, oldlenp, newp, newlen, p));
+ }
+ switch (name[1]) {
+ case VFS_MAXTYPENUM:
+ return (sysctl_rdint(oldp, oldlenp, newp, maxvfsconf));
+ case VFS_CONF:
+ if (namelen < 3)
+ return (ENOTDIR); /* overloaded */
+ for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
+ if (vfsp->vfc_typenum == name[2])
+ break;
+ if (vfsp == NULL)
+ return (EOPNOTSUPP);
+ return (sysctl_rdstruct(oldp, oldlenp, newp, vfsp,
+ sizeof(struct vfsconf)));
+ }
+ return (EOPNOTSUPP);
+}
+
+
int kinfo_vdebug = 1;
int kinfo_vgetfailed;
#define KINFO_VNODESLOP 10
@@ -1249,12 +1879,13 @@ int kinfo_vgetfailed;
*/
/* ARGSUSED */
int
-sysctl_vnode(where, sizep)
+sysctl_vnode(where, sizep, p)
char *where;
size_t *sizep;
+ struct proc *p;
{
register struct mount *mp, *nmp;
- struct vnode *vp;
+ struct vnode *vp, *nvp;
register char *bp = where, *savebp;
char *ewhere;
int error;
@@ -1267,27 +1898,32 @@ sysctl_vnode(where, sizep)
}
ewhere = where + *sizep;
+ simple_lock(&mountlist_slock);
for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) {
- nmp = mp->mnt_list.cqe_next;
- if (vfs_busy(mp))
+ if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) {
+ nmp = mp->mnt_list.cqe_next;
continue;
+ }
savebp = bp;
again:
for (vp = mp->mnt_vnodelist.lh_first;
vp != NULL;
- vp = vp->v_mntvnodes.le_next) {
+ vp = nvp) {
/*
* Check that the vp is still associated with
* this filesystem. RACE: could have been
* recycled onto the same filesystem.
*/
if (vp->v_mount != mp) {
+ simple_unlock(&mntvnode_slock);
if (kinfo_vdebug)
printf("kinfo: vp changed\n");
bp = savebp;
goto again;
}
+ nvp = vp->v_mntvnodes.le_next;
if (bp + VPTRSZ + VNODESZ > ewhere) {
+ simple_unlock(&mntvnode_slock);
*sizep = bp - where;
return (ENOMEM);
}
@@ -1295,10 +1931,17 @@ again:
(error = copyout((caddr_t)vp, bp + VPTRSZ, VNODESZ)))
return (error);
bp += VPTRSZ + VNODESZ;
+ simple_lock(&mntvnode_slock);
}
- vfs_unbusy(mp);
+
+ simple_unlock(&mntvnode_slock);
+ simple_lock(&mountlist_slock);
+ nmp = mp->mnt_list.cqe_next;
+ vfs_unbusy(mp, p);
}
+ simple_unlock(&mountlist_slock);
+
*sizep = bp - where;
return (0);
}
@@ -1311,26 +1954,31 @@ vfs_mountedon(vp)
register struct vnode *vp;
{
register struct vnode *vq;
+ int error = 0;
- if (vp->v_specflags & SI_MOUNTEDON)
+ if (vp->v_specmountpoint != NULL)
return (EBUSY);
if (vp->v_flag & VALIASED) {
+ simple_lock(&spechash_slock);
for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
if (vq->v_rdev != vp->v_rdev ||
vq->v_type != vp->v_type)
continue;
- if (vq->v_specflags & SI_MOUNTEDON)
- return (EBUSY);
- }
+ if (vq->v_specmountpoint != NULL) {
+ error = EBUSY;
+ break;
+ }
+ }
+ simple_unlock(&spechash_slock);
}
- return (0);
+ return (error);
}
/*
* Build hash lists of net addresses and hang them off the mount point.
* Called by ufs_mount() to set up the lists of export addresses.
*/
-static int
+int
vfs_hang_addrlist(mp, nep, argp)
struct mount *mp;
struct netexport *nep;
@@ -1404,7 +2052,7 @@ out:
}
/* ARGSUSED */
-static int
+int
vfs_free_netcred(rn, w)
struct radix_node *rn;
void *w;
@@ -1419,7 +2067,7 @@ vfs_free_netcred(rn, w)
/*
* Free the net address hash lists that are hanging off the mount points.
*/
-static void
+void
vfs_free_addrlist(nep)
struct netexport *nep;
{
@@ -1666,3 +2314,161 @@ fs_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p)
}
return (*fn)(name + 1, namelen - 1, oldp, oldlenp, newp, newlen, p);
}
+
+/*
+ * Routine to create and manage a filesystem syncer vnode.
+ */
+#define sync_close nullop
+int sync_fsync __P((void *));
+int sync_inactive __P((void *));
+#define sync_reclaim nullop
+#define sync_lock vop_nolock
+#define sync_unlock vop_nounlock
+int sync_print __P((void *));
+#define sync_islocked vop_noislocked
+
+int (**sync_vnodeop_p) __P((void *));
+struct vnodeopv_entry_desc sync_vnodeop_entries[] = {
+ { &vop_default_desc, vn_default_error },
+ { &vop_close_desc, sync_close }, /* close */
+ { &vop_fsync_desc, sync_fsync }, /* fsync */
+ { &vop_inactive_desc, sync_inactive }, /* inactive */
+ { &vop_reclaim_desc, sync_reclaim }, /* reclaim */
+ { &vop_lock_desc, sync_lock }, /* lock */
+ { &vop_unlock_desc, sync_unlock }, /* unlock */
+ { &vop_print_desc, sync_print }, /* print */
+ { &vop_islocked_desc, sync_islocked }, /* islocked */
+ { (struct vnodeop_desc*)NULL, (int(*) __P((void *)))NULL }
+};
+struct vnodeopv_desc sync_vnodeop_opv_desc =
+ { &sync_vnodeop_p, sync_vnodeop_entries };
+
+/*
+ * Create a new filesystem syncer vnode for the specified mount point.
+ */
+int
+vfs_allocate_syncvnode(mp)
+ struct mount *mp;
+{
+ struct vnode *vp;
+ static long start, incr, next;
+ int error;
+
+ /* Allocate a new vnode */
+ if ((error = getnewvnode(VT_VFS, mp, sync_vnodeop_p, &vp)) != 0) {
+ mp->mnt_syncer = NULL;
+ return (error);
+ }
+ vp->v_writecount = 1;
+ vp->v_type = VNON;
+ /*
+ * Place the vnode onto the syncer worklist. We attempt to
+ * scatter them about on the list so that they will go off
+ * at evenly distributed times even if all the filesystems
+ * are mounted at once.
+ */
+ next += incr;
+ if (next == 0 || next > syncer_maxdelay) {
+ start /= 2;
+ incr /= 2;
+ if (start == 0) {
+ start = syncer_maxdelay / 2;
+ incr = syncer_maxdelay;
+ }
+ next = start;
+ }
+ vn_syncer_add_to_worklist(vp, next);
+ mp->mnt_syncer = vp;
+ return (0);
+}
+
+/*
+ * Do a lazy sync of the filesystem.
+ */
+int
+sync_fsync(v)
+ void *v;
+{
+ struct vop_fsync_args /* {
+ struct vnode *a_vp;
+ struct ucred *a_cred;
+ int a_waitfor;
+ struct proc *a_p;
+ } */ *ap = v;
+
+ struct vnode *syncvp = ap->a_vp;
+ struct mount *mp = syncvp->v_mount;
+ int asyncflag;
+
+ /*
+ * We only need to do something if this is a lazy evaluation.
+ */
+ if (ap->a_waitfor != MNT_LAZY)
+ return (0);
+
+ /*
+ * Move ourselves to the back of the sync list.
+ */
+ LIST_REMOVE(syncvp, v_synclist);
+ vn_syncer_add_to_worklist(syncvp, syncdelay);
+
+ /*
+ * Walk the list of vnodes pushing all that are dirty and
+ * not already on the sync list.
+ */
+ simple_lock(&mountlist_slock);
+ if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, ap->a_p) == 0) {
+ asyncflag = mp->mnt_flag & MNT_ASYNC;
+ mp->mnt_flag &= ~MNT_ASYNC;
+ VFS_SYNC(mp, MNT_LAZY, ap->a_cred, ap->a_p);
+ if (asyncflag)
+ mp->mnt_flag |= MNT_ASYNC;
+ vfs_unbusy(mp, ap->a_p);
+ }
+ return (0);
+}
+
+/*
+ * The syncer vnode is no longer needed and is being decommissioned.
+ */
+int
+sync_inactive(v)
+ void *v;
+
+{
+ struct vop_inactive_args /* {
+ struct vnode *a_vp;
+ struct proc *a_p;
+ } */ *ap = v;
+
+ struct vnode *vp = ap->a_vp;
+
+ if (vp->v_usecount == 0)
+ return (0);
+ vp->v_mount->mnt_syncer = NULL;
+ LIST_REMOVE(vp, v_synclist);
+ vp->v_writecount = 0;
+ vput(vp);
+ return (0);
+}
+
+/*
+ * Print out a syncer vnode.
+ */
+int
+sync_print(v)
+ void *v;
+
+{
+ struct vop_print_args /* {
+ struct vnode *a_vp;
+ } */ *ap = v;
+ struct vnode *vp = ap->a_vp;
+
+ printf("syncer vnode");
+ if (vp->v_vnlock != NULL)
+ lockmgr_printinfo(vp->v_vnlock);
+ printf("\n");
+ return (0);
+}
+
diff --git a/sys/kern/vfs_syscalls.c b/sys/kern/vfs_syscalls.c
index 74d914ee7e8..f1e566ae6b8 100644
--- a/sys/kern/vfs_syscalls.c
+++ b/sys/kern/vfs_syscalls.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: vfs_syscalls.c,v 1.25 1997/03/02 09:38:35 millert Exp $ */
+/* $OpenBSD: vfs_syscalls.c,v 1.26 1997/10/06 15:12:43 csapuntz Exp $ */
/* $NetBSD: vfs_syscalls.c,v 1.71 1996/04/23 10:29:02 mycroft Exp $ */
/*
@@ -102,10 +102,11 @@ sys_mount(p, v, retval)
register struct vnode *vp;
register struct mount *mp;
int error, flag = 0;
- u_long fsindex = 0;
+ u_long fstypenum = 0;
char fstypename[MFSNAMELEN];
struct vattr va;
struct nameidata nd;
+ struct vfsconf *vfsp;
if (usermount == 0 && (error = suser(p->p_ucred, &p->p_acflag)))
return (error);
@@ -156,7 +157,7 @@ sys_mount(p, v, retval)
}
SCARG(uap, flags) |= MNT_NOSUID | MNT_NODEV;
}
- VOP_UNLOCK(vp);
+ VOP_UNLOCK(vp, 0, p);
goto update;
}
/*
@@ -195,12 +196,19 @@ sys_mount(p, v, retval)
* string, we check to see if it matches one of the historic
* filesystem types.
*/
- fsindex = (u_long)SCARG(uap, type);
- if (fsindex >= nvfssw || vfssw[fsindex] == NULL) {
- vput(vp);
- return (ENODEV);
+ fstypenum = (u_long)SCARG(uap, type);
+
+ if (fstypenum < maxvfsconf) {
+ for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
+ if (vfsp->vfc_typenum == fstypenum)
+ break;
+ if (vfsp == NULL) {
+ vput(vp);
+ return (ENODEV);
+ }
+ strncpy(fstypename, vfsp->vfc_name, MFSNAMELEN);
+
}
- strncpy(fstypename, vfssw[fsindex]->vfs_name, MFSNAMELEN);
#else
vput(vp);
return (error);
@@ -212,14 +220,16 @@ sys_mount(p, v, retval)
strncpy( fstypename, "ffs", MFSNAMELEN);
}
#endif
- for (fsindex = 0; fsindex < nvfssw; fsindex++)
- if (vfssw[fsindex] != NULL &&
- !strncmp(vfssw[fsindex]->vfs_name, fstypename, MFSNAMELEN))
+ for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) {
+ if (!strcmp(vfsp->vfc_name, fstypename))
break;
- if (fsindex >= nvfssw) {
+ }
+
+ if (vfsp == NULL) {
vput(vp);
return (ENODEV);
}
+
if (vp->v_mountedhere != NULL) {
vput(vp);
return (EBUSY);
@@ -231,14 +241,14 @@ sys_mount(p, v, retval)
mp = (struct mount *)malloc((u_long)sizeof(struct mount),
M_MOUNT, M_WAITOK);
bzero((char *)mp, (u_long)sizeof(struct mount));
- mp->mnt_op = vfssw[fsindex];
- if ((error = vfs_lock(mp)) != 0) {
- free((caddr_t)mp, M_MOUNT);
- vput(vp);
- return (error);
- }
- /* Do this early in case we block later. */
- vfssw[fsindex]->vfs_refcount++;
+ lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, 0);
+ vfs_busy(mp, LK_NOWAIT, 0, p);
+ mp->mnt_op = vfsp->vfc_vfsops;
+ mp->mnt_vfc = vfsp;
+ vfsp->vfc_refcount++;
+ mp->mnt_stat.f_type = vfsp->vfc_typenum;
+ mp->mnt_flag |= (vfsp->vfc_flags & MNT_VISFLAGMASK);
+ strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN);
vp->v_mountedhere = mp;
mp->mnt_vnodecovered = vp;
mp->mnt_stat.f_owner = p->p_ucred->cr_uid;
@@ -266,6 +276,17 @@ update:
(MNT_UPDATE | MNT_RELOAD | MNT_FORCE | MNT_WANTRDWR);
if (error)
mp->mnt_flag = flag;
+
+ if ((mp->mnt_flag & MNT_RDONLY) == 0) {
+ if (mp->mnt_syncer == NULL)
+ error = vfs_allocate_syncvnode(mp);
+ } else {
+ if (mp->mnt_syncer != NULL)
+ vgone(mp->mnt_syncer);
+ mp->mnt_syncer = NULL;
+ }
+
+ vfs_unbusy(mp, p);
return (error);
}
/*
@@ -273,16 +294,20 @@ update:
*/
cache_purge(vp);
if (!error) {
+ simple_lock(&mountlist_slock);
CIRCLEQ_INSERT_TAIL(&mountlist, mp, mnt_list);
+ simple_unlock(&mountlist_slock);
checkdirs(vp);
- VOP_UNLOCK(vp);
- vfs_unlock(mp);
+ VOP_UNLOCK(vp, 0, p);
+ if ((mp->mnt_flag & MNT_RDONLY) == 0)
+ error = vfs_allocate_syncvnode(mp);
+ vfs_unbusy(mp, p);
(void) VFS_STATFS(mp, &mp->mnt_stat, p);
- error = VFS_START(mp, 0, p);
+ if ((error = VFS_START(mp, 0, p)) != 0)
+ vrele(vp);
} else {
mp->mnt_vnodecovered->v_mountedhere = (struct mount *)0;
- vfssw[fsindex]->vfs_refcount--;
- vfs_unlock(mp);
+ vfs_unbusy(mp, p);
free((caddr_t)mp, M_MOUNT);
vput(vp);
}
@@ -397,36 +422,40 @@ dounmount(mp, flags, p)
struct vnode *coveredvp;
int error;
- coveredvp = mp->mnt_vnodecovered;
- if (vfs_busy(mp))
- return (EBUSY);
+ simple_lock(&mountlist_slock);
mp->mnt_flag |= MNT_UNMOUNT;
- if ((error = vfs_lock(mp)) != 0)
+ lockmgr(&mp->mnt_lock, LK_DRAIN | LK_INTERLOCK, &mountlist_slock, p);
+ mp->mnt_flag &=~ MNT_ASYNC;
+ vnode_pager_umount(mp); /* release cached vnodes */
+ cache_purgevfs(mp); /* remove cache entries for this file sys */
+ if (mp->mnt_syncer != NULL)
+ vgone(mp->mnt_syncer);
+ if (((mp->mnt_flag & MNT_RDONLY) ||
+ (error = VFS_SYNC(mp, MNT_WAIT, p->p_ucred, p)) == 0) ||
+ (flags & MNT_FORCE))
+ error = VFS_UNMOUNT(mp, flags, p);
+ simple_lock(&mountlist_slock);
+ if (error) {
+ if ((mp->mnt_flag & MNT_RDONLY) == 0 && mp->mnt_syncer == NULL)
+ (void) vfs_allocate_syncvnode(mp);
+ mp->mnt_flag &= ~MNT_UNMOUNT;
+ lockmgr(&mp->mnt_lock, LK_RELEASE | LK_INTERLOCK | LK_REENABLE,
+ &mountlist_slock, p);
return (error);
-
- mp->mnt_flag &=~ MNT_ASYNC;
- vnode_pager_umount(mp); /* release cached vnodes */
- cache_purgevfs(mp); /* remove cache entries for this file sys */
- if ((error = VFS_SYNC(mp, MNT_WAIT, p->p_ucred, p)) == 0 ||
- (flags & MNT_FORCE))
- error = VFS_UNMOUNT(mp, flags, p);
- mp->mnt_flag &= ~MNT_UNMOUNT;
- vfs_unbusy(mp);
- if (error) {
- vfs_unlock(mp);
- } else {
- CIRCLEQ_REMOVE(&mountlist, mp, mnt_list);
- if (coveredvp != NULLVP) {
- vrele(coveredvp);
- coveredvp->v_mountedhere = (struct mount *)0;
- }
- mp->mnt_op->vfs_refcount--;
- vfs_unlock(mp);
- if (mp->mnt_vnodelist.lh_first != NULL)
- panic("unmount: dangling vnode");
- free((caddr_t)mp, M_MOUNT);
}
- return (error);
+ CIRCLEQ_REMOVE(&mountlist, mp, mnt_list);
+ if ((coveredvp = mp->mnt_vnodecovered) != NULLVP) {
+ coveredvp->v_mountedhere = (struct mount *)0;
+ vrele(coveredvp);
+ }
+ mp->mnt_vfc->vfc_refcount--;
+ if (mp->mnt_vnodelist.lh_first != NULL)
+ panic("unmount: dangling vnode");
+ lockmgr(&mp->mnt_lock, LK_RELEASE | LK_INTERLOCK, &mountlist_slock, p);
+ if (mp->mnt_flag & MNT_MWAIT)
+ wakeup((caddr_t)mp);
+ free((caddr_t)mp, M_MOUNT);
+ return (0);
}
/*
@@ -447,31 +476,25 @@ sys_sync(p, v, retval)
register struct mount *mp, *nmp;
int asyncflag;
+ simple_lock(&mountlist_slock);
for (mp = mountlist.cqh_last; mp != (void *)&mountlist; mp = nmp) {
- /*
- * Get the next pointer in case we hang on vfs_busy
- * while we are being unmounted.
- */
- nmp = mp->mnt_list.cqe_prev;
- /*
- * The lock check below is to avoid races with mount
- * and unmount.
- */
- if ((mp->mnt_flag & (MNT_MLOCK|MNT_RDONLY|MNT_MPBUSY)) == 0 &&
- !vfs_busy(mp)) {
+ if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) {
+ nmp = mp->mnt_list.cqe_next;
+ continue;
+ }
+ if ((mp->mnt_flag & MNT_RDONLY) == 0) {
asyncflag = mp->mnt_flag & MNT_ASYNC;
mp->mnt_flag &= ~MNT_ASYNC;
VFS_SYNC(mp, MNT_NOWAIT, p->p_ucred, p);
if (asyncflag)
mp->mnt_flag |= MNT_ASYNC;
- /*
- * Get the next pointer again, as the next filesystem
- * might have been unmounted while we were sync'ing.
- */
- nmp = mp->mnt_list.cqe_prev;
- vfs_unbusy(mp);
}
+ simple_lock(&mountlist_slock);
+ nmp = mp->mnt_list.cqe_next;
+ vfs_unbusy(mp, p);
}
+ simple_unlock(&mountlist_slock);
+
#ifdef DEBUG
if (syncprt)
vfs_bufstats();
@@ -596,7 +619,7 @@ sys_getfsstat(p, v, retval)
syscallarg(long) bufsize;
syscallarg(int) flags;
} */ *uap = v;
- register struct mount *mp;
+ register struct mount *mp, *nmp;
register struct statfs *sp;
caddr_t sfsp;
long count, maxcount, error;
@@ -604,20 +627,28 @@ sys_getfsstat(p, v, retval)
maxcount = SCARG(uap, bufsize) / sizeof(struct statfs);
sfsp = (caddr_t)SCARG(uap, buf);
- for (count = 0, mp = mountlist.cqh_first;
- mp != (void *)&mountlist;
- mp = mp->mnt_list.cqe_next) {
- if (sfsp && count < maxcount &&
- ((mp->mnt_flag & MNT_MLOCK) == 0)) {
+ count = 0;
+ simple_lock(&mountlist_slock);
+ for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) {
+ if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) {
+ nmp = mp->mnt_list.cqe_next;
+ continue;
+ }
+ if (sfsp && count < maxcount) {
sp = &mp->mnt_stat;
/*
- * If MNT_NOWAIT is specified, do not refresh the
- * fsstat cache. MNT_WAIT overrides MNT_NOWAIT.
- */
- if (((SCARG(uap, flags) & MNT_NOWAIT) == 0 ||
+ * If MNT_NOWAIT or MNT_LAZY is specified, do not
+ * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY
+ * overrides MNT_WAIT.
+ */
+ if (((SCARG(uap, flags) & (MNT_LAZY|MNT_NOWAIT)) == 0 ||
(SCARG(uap, flags) & MNT_WAIT)) &&
- (error = VFS_STATFS(mp, sp, p)))
- continue;
+ (error = VFS_STATFS(mp, sp, p))) {
+ simple_lock(&mountlist_slock);
+ nmp = mp->mnt_list.cqe_next;
+ vfs_unbusy(mp, p);
+ continue;
+ }
sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
if (suser(p->p_ucred, &p->p_acflag)) {
bcopy((caddr_t)sp, (caddr_t)&sb, sizeof(sb));
@@ -630,7 +661,11 @@ sys_getfsstat(p, v, retval)
sfsp += sizeof(*sp);
}
count++;
+ simple_lock(&mountlist_slock);
+ nmp = mp->mnt_list.cqe_next;
+ vfs_unbusy(mp, p);
}
+ simple_unlock(&mountlist_slock);
if (sfsp && count > maxcount)
*retval = maxcount;
else
@@ -661,7 +696,7 @@ sys_fchdir(p, v, retval)
return (error);
vp = (struct vnode *)fp->f_data;
VREF(vp);
- VOP_LOCK(vp);
+ vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
if (vp->v_type != VDIR)
error = ENOTDIR;
else
@@ -677,11 +712,21 @@ sys_fchdir(p, v, retval)
vput(vp);
vp = tdp;
}
- VOP_UNLOCK(vp);
+ while (!error && (mp = vp->v_mountedhere) != NULL) {
+ if (vfs_busy(mp, 0, 0, p))
+ continue;
+ error = VFS_ROOT(mp, &tdp);
+ vfs_unbusy(mp, p);
+ if (error)
+ break;
+ vput(vp);
+ vp = tdp;
+ }
if (error) {
- vrele(vp);
+ vput(vp);
return (error);
}
+ VOP_UNLOCK(vp, 0, p);
vrele(fdp->fd_cdir);
fdp->fd_cdir = vp;
return (0);
@@ -768,9 +813,10 @@ change_dir(ndp, p)
error = ENOTDIR;
else
error = VOP_ACCESS(vp, VEXEC, p->p_ucred, p);
- VOP_UNLOCK(vp);
if (error)
- vrele(vp);
+ vput(vp);
+ else
+ VOP_UNLOCK(vp, 0, p);
return (error);
}
@@ -837,7 +883,7 @@ sys_open(p, v, retval)
type = F_FLOCK;
if ((flags & FNONBLOCK) == 0)
type |= F_WAIT;
- VOP_UNLOCK(vp);
+ VOP_UNLOCK(vp, 0, p);
error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type);
if (error) {
(void) vn_close(vp, fp->f_flag, fp->f_cred, p);
@@ -845,10 +891,10 @@ sys_open(p, v, retval)
fdp->fd_ofiles[indx] = NULL;
return (error);
}
- VOP_LOCK(vp);
+ vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
fp->f_flag |= FHASLOCK;
}
- VOP_UNLOCK(vp);
+ VOP_UNLOCK(vp, 0, p);
*retval = indx;
return (0);
}
@@ -1417,7 +1463,7 @@ sys_chflags(p, v, retval)
return (error);
vp = nd.ni_vp;
VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
- VOP_LOCK(vp);
+ vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
if (vp->v_mount->mnt_flag & MNT_RDONLY)
error = EROFS;
else {
@@ -1452,7 +1498,7 @@ sys_fchflags(p, v, retval)
return (error);
vp = (struct vnode *)fp->f_data;
VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
- VOP_LOCK(vp);
+ vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
if (vp->v_mount->mnt_flag & MNT_RDONLY)
error = EROFS;
else {
@@ -1460,7 +1506,7 @@ sys_fchflags(p, v, retval)
vattr.va_flags = SCARG(uap, flags);
error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
}
- VOP_UNLOCK(vp);
+ VOP_UNLOCK(vp, 0, p);
return (error);
}
@@ -1488,7 +1534,7 @@ sys_chmod(p, v, retval)
return (error);
vp = nd.ni_vp;
VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
- VOP_LOCK(vp);
+ vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
if (vp->v_mount->mnt_flag & MNT_RDONLY)
error = EROFS;
else {
@@ -1523,7 +1569,7 @@ sys_fchmod(p, v, retval)
return (error);
vp = (struct vnode *)fp->f_data;
VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
- VOP_LOCK(vp);
+ vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
if (vp->v_mount->mnt_flag & MNT_RDONLY)
error = EROFS;
else {
@@ -1531,7 +1577,7 @@ sys_fchmod(p, v, retval)
vattr.va_mode = SCARG(uap, mode) & ALLPERMS;
error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
}
- VOP_UNLOCK(vp);
+ VOP_UNLOCK(vp, 0, p);
return (error);
}
@@ -1561,7 +1607,7 @@ sys_chown(p, v, retval)
return (error);
vp = nd.ni_vp;
VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
- VOP_LOCK(vp);
+ vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
if (vp->v_mount->mnt_flag & MNT_RDONLY)
error = EROFS;
else {
@@ -1613,7 +1659,7 @@ sys_lchown(p, v, retval)
return (error);
vp = nd.ni_vp;
VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
- VOP_LOCK(vp);
+ vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
if (vp->v_mount->mnt_flag & MNT_RDONLY)
error = EROFS;
else {
@@ -1658,88 +1704,88 @@ sys_fchown(p, v, retval)
struct vattr vattr;
int error;
struct file *fp;
- u_short mode;
-
- if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
- return (error);
- vp = (struct vnode *)fp->f_data;
- VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
- VOP_LOCK(vp);
- if (vp->v_mount->mnt_flag & MNT_RDONLY)
- error = EROFS;
- else {
- if (suser(p->p_ucred, &p->p_acflag) ||
- suid_clear) {
- error = VOP_GETATTR(vp, &vattr, p->p_ucred, p);
- if (error)
- goto out;
- mode = vattr.va_mode & ~(VSUID | VSGID);
- if (mode == vattr.va_mode)
- mode = VNOVAL;
- }
- else
- mode = VNOVAL;
- VATTR_NULL(&vattr);
- vattr.va_uid = SCARG(uap, uid);
- vattr.va_gid = SCARG(uap, gid);
- vattr.va_mode = mode;
- error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
- }
+ u_short mode;
+
+ if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
+ return (error);
+ vp = (struct vnode *)fp->f_data;
+ VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
+ vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
+ if (vp->v_mount->mnt_flag & MNT_RDONLY)
+ error = EROFS;
+ else {
+ if (suser(p->p_ucred, &p->p_acflag) ||
+ suid_clear) {
+ error = VOP_GETATTR(vp, &vattr, p->p_ucred, p);
+ if (error)
+ goto out;
+ mode = vattr.va_mode & ~(VSUID | VSGID);
+ if (mode == vattr.va_mode)
+ mode = VNOVAL;
+ }
+ else
+ mode = VNOVAL;
+ VATTR_NULL(&vattr);
+ vattr.va_uid = SCARG(uap, uid);
+ vattr.va_gid = SCARG(uap, gid);
+ vattr.va_mode = mode;
+ error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
+ }
out:
- VOP_UNLOCK(vp);
- return (error);
+ VOP_UNLOCK(vp, 0, p);
+ return (error);
}
-
/*
* Set the access and modification times given a path name.
*/
/* ARGSUSED */
int
sys_utimes(p, v, retval)
- struct proc *p;
- void *v;
- register_t *retval;
+ struct proc *p;
+ void *v;
+ register_t *retval;
{
- register struct sys_utimes_args /* {
- syscallarg(char *) path;
- syscallarg(struct timeval *) tptr;
- } */ *uap = v;
- register struct vnode *vp;
- struct timeval tv[2];
- struct vattr vattr;
- int error;
- struct nameidata nd;
-
- VATTR_NULL(&vattr);
- if (SCARG(uap, tptr) == NULL) {
- microtime(&tv[0]);
- tv[1] = tv[0];
- vattr.va_vaflags |= VA_UTIMES_NULL;
- } else {
- error = copyin((caddr_t)SCARG(uap, tptr), (caddr_t)tv,
- sizeof (tv));
- if (error)
- return (error);
- }
- NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
- if ((error = namei(&nd)) != 0)
- return (error);
- vp = nd.ni_vp;
- VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
- VOP_LOCK(vp);
- if (vp->v_mount->mnt_flag & MNT_RDONLY)
- error = EROFS;
- else {
- vattr.va_atime.tv_sec = tv[0].tv_sec;
- vattr.va_atime.tv_nsec = tv[0].tv_usec * 1000;
- vattr.va_mtime.tv_sec = tv[1].tv_sec;
- vattr.va_mtime.tv_nsec = tv[1].tv_usec * 1000;
- error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
- }
+ register struct sys_utimes_args /* {
+ syscallarg(char *) path;
+ syscallarg(struct timeval *) tptr;
+ } */ *uap = v;
+ register struct vnode *vp;
+ struct timeval tv[2];
+ struct vattr vattr;
+ int error;
+ struct nameidata nd;
+
+ VATTR_NULL(&vattr);
+ if (SCARG(uap, tptr) == NULL) {
+ microtime(&tv[0]);
+ tv[1] = tv[0];
+ vattr.va_vaflags |= VA_UTIMES_NULL;
+ } else {
+ error = copyin((caddr_t)SCARG(uap, tptr), (caddr_t)tv,
+ sizeof (tv));
+ if (error)
+ return (error);
+ }
+ NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
+ if ((error = namei(&nd)) != 0)
+ return (error);
+ vp = nd.ni_vp;
+ VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
+ vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
+ if (vp->v_mount->mnt_flag & MNT_RDONLY)
+ error = EROFS;
+ else {
+ vattr.va_atime.tv_sec = tv[0].tv_sec;
+ vattr.va_atime.tv_nsec = tv[0].tv_usec * 1000;
+ vattr.va_mtime.tv_sec = tv[1].tv_sec;
+ vattr.va_mtime.tv_nsec = tv[1].tv_usec * 1000;
+ error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
+ }
vput(vp);
- return (error);
+ return (error);
}
+
/*
* Set the access and modification times given a file descriptor.
*/
@@ -1775,7 +1821,7 @@ sys_futimes(p, v, retval)
return (error);
vp = (struct vnode *)fp->f_data;
VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
- VOP_LOCK(vp);
+ vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
if (vp->v_mount->mnt_flag & MNT_RDONLY)
error = EROFS;
else {
@@ -1785,7 +1831,7 @@ sys_futimes(p, v, retval)
vattr.va_mtime.tv_nsec = tv[1].tv_usec * 1000;
error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
}
- VOP_UNLOCK(vp);
+ VOP_UNLOCK(vp, 0, p);
return (error);
}
@@ -1814,7 +1860,7 @@ sys_truncate(p, v, retval)
return (error);
vp = nd.ni_vp;
VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
- VOP_LOCK(vp);
+ vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
if (vp->v_type == VDIR)
error = EISDIR;
else if ((error = vn_writechk(vp)) == 0 &&
@@ -1853,7 +1899,7 @@ sys_ftruncate(p, v, retval)
return (EINVAL);
vp = (struct vnode *)fp->f_data;
VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
- VOP_LOCK(vp);
+ vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
if (vp->v_type == VDIR)
error = EISDIR;
else if ((error = vn_writechk(vp)) == 0) {
@@ -1861,7 +1907,7 @@ sys_ftruncate(p, v, retval)
vattr.va_size = SCARG(uap, length);
error = VOP_SETATTR(vp, &vattr, fp->f_cred, p);
}
- VOP_UNLOCK(vp);
+ VOP_UNLOCK(vp, 0, p);
return (error);
}
@@ -1885,9 +1931,9 @@ sys_fsync(p, v, retval)
if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
return (error);
vp = (struct vnode *)fp->f_data;
- VOP_LOCK(vp);
+ vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
error = VOP_FSYNC(vp, fp->f_cred, MNT_WAIT, p);
- VOP_UNLOCK(vp);
+ VOP_UNLOCK(vp, 0, p);
return (error);
}
@@ -2108,11 +2154,11 @@ unionread:
auio.uio_segflg = UIO_USERSPACE;
auio.uio_procp = p;
auio.uio_resid = SCARG(uap, count);
- VOP_LOCK(vp);
+ vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
loff = auio.uio_offset = fp->f_offset;
- error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, (u_long *)0, 0);
+ error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, 0, 0);
fp->f_offset = auio.uio_offset;
- VOP_UNLOCK(vp);
+ VOP_UNLOCK(vp, 0, p);
if (error)
return (error);
if ((SCARG(uap, count) == auio.uio_resid) &&
@@ -2182,17 +2228,13 @@ sys_revoke(p, v, retval)
if ((error = namei(&nd)) != 0)
return (error);
vp = nd.ni_vp;
- if (vp->v_type != VCHR && vp->v_type != VBLK) {
- error = EINVAL;
- goto out;
- }
if ((error = VOP_GETATTR(vp, &vattr, p->p_ucred, p)) != 0)
goto out;
if (p->p_ucred->cr_uid != vattr.va_uid &&
(error = suser(p->p_ucred, &p->p_acflag)))
goto out;
if (vp->v_usecount > 1 || (vp->v_flag & VALIASED))
- vgoneall(vp);
+ VOP_REVOKE(vp, REVOKEALL);
out:
vrele(vp);
return (error);
diff --git a/sys/kern/vfs_vnops.c b/sys/kern/vfs_vnops.c
index b99a001a165..3037cad20fe 100644
--- a/sys/kern/vfs_vnops.c
+++ b/sys/kern/vfs_vnops.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: vfs_vnops.c,v 1.5 1997/08/04 08:24:54 deraadt Exp $ */
+/* $OpenBSD: vfs_vnops.c,v 1.6 1997/10/06 15:12:45 csapuntz Exp $ */
/* $NetBSD: vfs_vnops.c,v 1.20 1996/02/04 02:18:41 christos Exp $ */
/*
@@ -133,9 +133,9 @@ vn_open(ndp, fmode, cmode)
}
}
if (fmode & O_TRUNC) {
- VOP_UNLOCK(vp); /* XXX */
+ VOP_UNLOCK(vp, 0, p); /* XXX */
VOP_LEASE(vp, p, cred, LEASE_WRITE);
- VOP_LOCK(vp); /* XXX */
+ vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); /* XXX */
VATTR_NULL(&va);
va.va_size = 0;
if ((error = VOP_SETATTR(vp, &va, cred, p)) != 0)
@@ -153,14 +153,14 @@ bad:
/*
* Check for write permissions on the specified vnode.
- * The read-only status of the file system is checked.
- * Also, prototype text segments cannot be written.
+ * Prototype text segments cannot be written.
*/
int
vn_writechk(vp)
register struct vnode *vp;
{
+#if 0
/*
* Disallow write attempts on read-only file systems;
* unless the file is a socket or a block or character
@@ -175,6 +175,7 @@ vn_writechk(vp)
break;
}
}
+#endif
/*
* If there's shared text associated with
* the vnode, try to free it up once. If
@@ -225,7 +226,7 @@ vn_rdwr(rw, vp, base, len, offset, segflg, ioflg, cred, aresid, p)
int error;
if ((ioflg & IO_NODELOCKED) == 0)
- VOP_LOCK(vp);
+ vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
auio.uio_iov = &aiov;
auio.uio_iovcnt = 1;
aiov.iov_base = base;
@@ -246,7 +247,7 @@ vn_rdwr(rw, vp, base, len, offset, segflg, ioflg, cred, aresid, p)
if (auio.uio_resid && error == 0)
error = EIO;
if ((ioflg & IO_NODELOCKED) == 0)
- VOP_UNLOCK(vp);
+ VOP_UNLOCK(vp, 0, p);
return (error);
}
@@ -261,16 +262,17 @@ vn_read(fp, uio, cred)
{
register struct vnode *vp = (struct vnode *)fp->f_data;
int count, error = 0;
+ struct proc *p = uio->uio_procp;
VOP_LEASE(vp, uio->uio_procp, cred, LEASE_READ);
- VOP_LOCK(vp);
+ vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
uio->uio_offset = fp->f_offset;
count = uio->uio_resid;
if (vp->v_type != VDIR)
error = VOP_READ(vp, uio,
(fp->f_flag & FNONBLOCK) ? IO_NDELAY : 0, cred);
fp->f_offset += count - uio->uio_resid;
- VOP_UNLOCK(vp);
+ VOP_UNLOCK(vp, 0, p);
return (error);
}
@@ -284,14 +286,18 @@ vn_write(fp, uio, cred)
struct ucred *cred;
{
register struct vnode *vp = (struct vnode *)fp->f_data;
+ struct proc *p = uio->uio_procp;
int count, error, ioflag = IO_UNIT;
if (vp->v_type == VREG && (fp->f_flag & O_APPEND))
ioflag |= IO_APPEND;
if (fp->f_flag & FNONBLOCK)
ioflag |= IO_NDELAY;
+ if ((fp->f_flag & O_FSYNC) ||
+ (vp->v_mount && (vp->v_mount->mnt_flag & MNT_SYNCHRONOUS)))
+ ioflag |= IO_SYNC;
VOP_LEASE(vp, uio->uio_procp, cred, LEASE_WRITE);
- VOP_LOCK(vp);
+ vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
uio->uio_offset = fp->f_offset;
count = uio->uio_resid;
error = VOP_WRITE(vp, uio, ioflag, cred);
@@ -299,7 +305,7 @@ vn_write(fp, uio, cred)
fp->f_offset = uio->uio_offset;
else
fp->f_offset += count - uio->uio_resid;
- VOP_UNLOCK(vp);
+ VOP_UNLOCK(vp, 0, p);
return (error);
}
@@ -427,6 +433,36 @@ vn_select(fp, which, p)
}
/*
+ * Check that the vnode is still valid, and if so
+ * acquire requested lock.
+ */
+int
+vn_lock(vp, flags, p)
+ struct vnode *vp;
+ int flags;
+ struct proc *p;
+{
+ int error;
+
+ do {
+ if ((flags & LK_INTERLOCK) == 0)
+ simple_lock(&vp->v_interlock);
+ if (vp->v_flag & VXLOCK) {
+ vp->v_flag |= VXWANT;
+ simple_unlock(&vp->v_interlock);
+ tsleep((caddr_t)vp, PINOD, "vn_lock", 0);
+ error = ENOENT;
+ } else {
+ error = VOP_LOCK(vp, flags | LK_INTERLOCK, p);
+ if (error == 0)
+ return (error);
+ }
+ flags &= ~LK_INTERLOCK;
+ } while (flags & LK_RETRY);
+ return (error);
+}
+
+/*
* File table vnode close routine.
*/
int
diff --git a/sys/kern/vnode_if.c b/sys/kern/vnode_if.c
index 14f1f0c5a09..b373b57c591 100644
--- a/sys/kern/vnode_if.c
+++ b/sys/kern/vnode_if.c
@@ -218,6 +218,22 @@ struct vnodeop_desc vop_write_desc = {
NULL,
};
+int vop_lease_vp_offsets[] = {
+ VOPARG_OFFSETOF(struct vop_lease_args,a_vp),
+ VDESC_NO_OFFSET
+};
+struct vnodeop_desc vop_lease_desc = {
+ 0,
+ "vop_lease",
+ 0,
+ vop_lease_vp_offsets,
+ VDESC_NO_OFFSET,
+ VOPARG_OFFSETOF(struct vop_lease_args, a_cred),
+ VOPARG_OFFSETOF(struct vop_lease_args, a_p),
+ VDESC_NO_OFFSET,
+ NULL,
+};
+
int vop_ioctl_vp_offsets[] = {
VOPARG_OFFSETOF(struct vop_ioctl_args,a_vp),
VDESC_NO_OFFSET
@@ -250,6 +266,22 @@ struct vnodeop_desc vop_select_desc = {
NULL,
};
+int vop_revoke_vp_offsets[] = {
+ VOPARG_OFFSETOF(struct vop_revoke_args,a_vp),
+ VDESC_NO_OFFSET
+};
+struct vnodeop_desc vop_revoke_desc = {
+ 0,
+ "vop_revoke",
+ 0,
+ vop_revoke_vp_offsets,
+ VDESC_NO_OFFSET,
+ VDESC_NO_OFFSET,
+ VDESC_NO_OFFSET,
+ VDESC_NO_OFFSET,
+ NULL,
+};
+
int vop_mmap_vp_offsets[] = {
VOPARG_OFFSETOF(struct vop_mmap_args,a_vp),
VDESC_NO_OFFSET
@@ -459,7 +491,7 @@ struct vnodeop_desc vop_inactive_desc = {
vop_inactive_vp_offsets,
VDESC_NO_OFFSET,
VDESC_NO_OFFSET,
- VDESC_NO_OFFSET,
+ VOPARG_OFFSETOF(struct vop_inactive_args, a_p),
VDESC_NO_OFFSET,
NULL,
};
@@ -475,7 +507,7 @@ struct vnodeop_desc vop_reclaim_desc = {
vop_reclaim_vp_offsets,
VDESC_NO_OFFSET,
VDESC_NO_OFFSET,
- VDESC_NO_OFFSET,
+ VOPARG_OFFSETOF(struct vop_reclaim_args, a_p),
VDESC_NO_OFFSET,
NULL,
};
@@ -491,7 +523,7 @@ struct vnodeop_desc vop_lock_desc = {
vop_lock_vp_offsets,
VDESC_NO_OFFSET,
VDESC_NO_OFFSET,
- VDESC_NO_OFFSET,
+ VOPARG_OFFSETOF(struct vop_lock_args, a_p),
VDESC_NO_OFFSET,
NULL,
};
@@ -507,7 +539,7 @@ struct vnodeop_desc vop_unlock_desc = {
vop_unlock_vp_offsets,
VDESC_NO_OFFSET,
VDESC_NO_OFFSET,
- VDESC_NO_OFFSET,
+ VOPARG_OFFSETOF(struct vop_unlock_args, a_p),
VDESC_NO_OFFSET,
NULL,
};
@@ -624,6 +656,22 @@ struct vnodeop_desc vop_valloc_desc = {
NULL,
};
+int vop_balloc_vp_offsets[] = {
+ VOPARG_OFFSETOF(struct vop_balloc_args,a_vp),
+ VDESC_NO_OFFSET
+};
+struct vnodeop_desc vop_balloc_desc = {
+ 0,
+ "vop_balloc",
+ 0,
+ vop_balloc_vp_offsets,
+ VDESC_NO_OFFSET,
+ VOPARG_OFFSETOF(struct vop_balloc_args, a_cred),
+ VDESC_NO_OFFSET,
+ VDESC_NO_OFFSET,
+ NULL,
+};
+
int vop_reallocblks_vp_offsets[] = {
VOPARG_OFFSETOF(struct vop_reallocblks_args,a_vp),
VDESC_NO_OFFSET
@@ -688,22 +736,6 @@ struct vnodeop_desc vop_update_desc = {
NULL,
};
-int vop_lease_vp_offsets[] = {
- VOPARG_OFFSETOF(struct vop_lease_args,a_vp),
- VDESC_NO_OFFSET
-};
-struct vnodeop_desc vop_lease_desc = {
- 0,
- "vop_lease",
- 0,
- vop_lease_vp_offsets,
- VDESC_NO_OFFSET,
- VOPARG_OFFSETOF(struct vop_lease_args, a_cred),
- VOPARG_OFFSETOF(struct vop_lease_args, a_p),
- VDESC_NO_OFFSET,
- NULL,
-};
-
int vop_whiteout_vp_offsets[] = {
VOPARG_OFFSETOF(struct vop_whiteout_args,a_dvp),
VDESC_NO_OFFSET
@@ -769,8 +801,10 @@ struct vnodeop_desc *vfs_op_descs[] = {
&vop_setattr_desc,
&vop_read_desc,
&vop_write_desc,
+ &vop_lease_desc,
&vop_ioctl_desc,
&vop_select_desc,
+ &vop_revoke_desc,
&vop_mmap_desc,
&vop_fsync_desc,
&vop_seek_desc,
@@ -794,11 +828,11 @@ struct vnodeop_desc *vfs_op_descs[] = {
&vop_advlock_desc,
&vop_blkatoff_desc,
&vop_valloc_desc,
+ &vop_balloc_desc,
&vop_reallocblks_desc,
&vop_vfree_desc,
&vop_truncate_desc,
&vop_update_desc,
- &vop_lease_desc,
&vop_whiteout_desc,
NULL
};
diff --git a/sys/kern/vnode_if.src b/sys/kern/vnode_if.src
index 0a8c45ace58..76edff456c6 100644
--- a/sys/kern/vnode_if.src
+++ b/sys/kern/vnode_if.src
@@ -1,4 +1,4 @@
-# $OpenBSD: vnode_if.src,v 1.4 1996/05/22 11:47:12 deraadt Exp $
+# $OpenBSD: vnode_if.src,v 1.5 1997/10/06 15:12:48 csapuntz Exp $
# $NetBSD: vnode_if.src,v 1.10 1996/05/11 18:26:27 mycroft Exp $
#
# Copyright (c) 1992, 1993
@@ -34,12 +34,43 @@
#
# @(#)vnode_if.src 8.3 (Berkeley) 2/3/94
#
+
+
+#
+# Above each of the vop descriptors is a specification of the locking
+# protocol used by each vop call. The first column is the name of
+# the variable, the remaining three columns are in, out and error
+# respectively. The "in" column defines the lock state on input,
+# the "out" column defines the state on succesful return, and the
+# "error" column defines the locking state on error exit.
+#
+# The locking value can take the following values:
+# L: locked.
+# U: unlocked/
+# -: not applicable. vnode does not yet (or no longer) exists.
+# =: the same on input and output, may be either L or U.
+# X: locked if not nil.
+#
+
+#
+#% lookup dvp L ? ?
+#% lookup vpp - L -
+#
+# XXX - the lookup locking protocol defies simple description and depends
+# on the flags and operation fields in the (cnp) structure. Note
+# especially that *vpp may equal dvp and both may be locked.
+
vop_lookup {
IN struct vnode *dvp;
INOUT struct vnode **vpp;
IN struct componentname *cnp;
};
+#
+#% create dvp L U U
+#% create vpp - L -
+#
+
vop_create {
IN WILLRELE struct vnode *dvp;
OUT struct vnode **vpp;
@@ -47,6 +78,11 @@ vop_create {
IN struct vattr *vap;
};
+#
+#% mknod dvp L U U
+#% mknod vpp - X -
+#
+
vop_mknod {
IN WILLRELE struct vnode *dvp;
OUT WILLRELE struct vnode **vpp;
@@ -54,6 +90,10 @@ vop_mknod {
IN struct vattr *vap;
};
+#
+#% open vp L L L
+#
+
vop_open {
IN struct vnode *vp;
IN int mode;
@@ -61,6 +101,10 @@ vop_open {
IN struct proc *p;
};
+#
+#% close vp U U U
+#
+
vop_close {
IN struct vnode *vp;
IN int fflag;
@@ -68,6 +112,10 @@ vop_close {
IN struct proc *p;
};
+#
+#% access vp L L L
+#
+
vop_access {
IN struct vnode *vp;
IN int mode;
@@ -75,6 +123,10 @@ vop_access {
IN struct proc *p;
};
+#
+#% getattr vp = = =
+#
+
vop_getattr {
IN struct vnode *vp;
IN struct vattr *vap;
@@ -82,6 +134,11 @@ vop_getattr {
IN struct proc *p;
};
+
+#
+#% setattr vp L L L
+#
+
vop_setattr {
IN struct vnode *vp;
IN struct vattr *vap;
@@ -89,6 +146,10 @@ vop_setattr {
IN struct proc *p;
};
+#
+#% read vp L L L
+#
+
vop_read {
IN struct vnode *vp;
INOUT struct uio *uio;
@@ -96,6 +157,10 @@ vop_read {
IN struct ucred *cred;
};
+#
+#% write vp L L L
+#
+
vop_write {
IN struct vnode *vp;
INOUT struct uio *uio;
@@ -103,6 +168,20 @@ vop_write {
IN struct ucred *cred;
};
+#
+#% lease vp = = =
+#
+vop_lease {
+ IN struct vnode *vp;
+ IN struct proc *p;
+ IN struct ucred *cred;
+ IN int flag;
+};
+
+#
+#% ioctl vp U U U
+#
+
vop_ioctl {
IN struct vnode *vp;
IN u_long command;
@@ -112,7 +191,11 @@ vop_ioctl {
IN struct proc *p;
};
+#
+#% select vp U U U
+#
# Needs work? (fflags)
+#
vop_select {
IN struct vnode *vp;
IN int which;
@@ -121,6 +204,17 @@ vop_select {
IN struct proc *p;
};
+#
+#% revoke vp U U U
+#
+vop_revoke {
+ IN struct vnode *vp;
+ IN int flags;
+};
+
+#
+# XXX - not used
+#
vop_mmap {
IN struct vnode *vp;
IN int fflags;
@@ -128,6 +222,9 @@ vop_mmap {
IN struct proc *p;
};
+#
+#% fsync vp L L L
+#
vop_fsync {
IN struct vnode *vp;
IN struct ucred *cred;
@@ -135,7 +232,10 @@ vop_fsync {
IN struct proc *p;
};
-# Needs word: Is newoff right? What's it mean?
+#
+# XXX - not used
+# Needs work: Is newoff right? What's it mean?
+#
vop_seek {
IN struct vnode *vp;
IN off_t oldoff;
@@ -143,18 +243,34 @@ vop_seek {
IN struct ucred *cred;
};
+#
+#% remove dvp L U U
+#% remove vp L U U
+#
+
vop_remove {
IN WILLRELE struct vnode *dvp;
IN WILLRELE struct vnode *vp;
IN struct componentname *cnp;
};
+#
+#% link vp U U U
+#% link tdvp L U U
+#
vop_link {
IN WILLRELE struct vnode *dvp;
IN struct vnode *vp;
IN struct componentname *cnp;
};
+#
+#% rename fdvp U U U
+#% rename fvp U U U
+#% rename tdvp L U U
+#% rename tvp X U U
+#
+
vop_rename {
IN WILLRELE struct vnode *fdvp;
IN WILLRELE struct vnode *fvp;
@@ -164,6 +280,11 @@ vop_rename {
IN struct componentname *tcnp;
};
+#
+#% mkdir dvp L U U
+#% mkdir vpp - L -
+#
+
vop_mkdir {
IN WILLRELE struct vnode *dvp;
OUT struct vnode **vpp;
@@ -171,12 +292,26 @@ vop_mkdir {
IN struct vattr *vap;
};
+#
+#% rmdir dvp L U U
+#% rmdir vp L U U
+#
+
vop_rmdir {
IN WILLRELE struct vnode *dvp;
IN WILLRELE struct vnode *vp;
IN struct componentname *cnp;
};
+#
+#% symlink dvp L U U
+#% symlink vpp - U -
+#
+# XXX - note that the return vnode has already been VRELE'ed
+# by the filesystem layer. To use it you must use vget,
+# possibly with a further namei.
+#
+
vop_symlink {
IN WILLRELE struct vnode *dvp;
OUT WILLRELE struct vnode **vpp;
@@ -185,42 +320,79 @@ vop_symlink {
IN char *target;
};
+#
+#% readdir vp L L L
+#
+
vop_readdir {
IN struct vnode *vp;
INOUT struct uio *uio;
IN struct ucred *cred;
- OUT int *eofflag;
- OUT u_long *cookies;
- IN int ncookies;
+ INOUT int *eofflag;
+ OUT int *ncookies;
+ INOUT u_long **cookies;
};
+#
+#% readlink vp L L L
+#
vop_readlink {
IN struct vnode *vp;
INOUT struct uio *uio;
IN struct ucred *cred;
};
+#
+#% abortop dvp = = =
+#
vop_abortop {
IN struct vnode *dvp;
IN struct componentname *cnp;
};
+
+#
+#% inactive vp L U U
+#
vop_inactive {
IN struct vnode *vp;
+ IN struct proc *p;
};
+#
+#% reclaim vp U U U
+#
+
vop_reclaim {
IN struct vnode *vp;
+ IN struct proc *p;
};
+#
+#% lock vp U L U
+#
+
vop_lock {
IN struct vnode *vp;
+ IN int flags;
+ IN struct proc *p;
};
+#
+#% unlock vp L U L
+#
+
vop_unlock {
IN struct vnode *vp;
+ IN int flags;
+ IN struct proc *p;
};
+#
+#% bmap vp L L L
+#% bmap vpp - U -
+#
+
vop_bmap {
IN struct vnode *vp;
IN daddr_t bn;
@@ -229,24 +401,39 @@ vop_bmap {
OUT int *runp;
};
+#
+# Needs work: no vp?
+#
#vop_strategy {
# IN struct buf *bp;
#};
+#
+#% print vp = = =
+#
vop_print {
IN struct vnode *vp;
};
+#
+#% islocked vp = = =
+#
vop_islocked {
IN struct vnode *vp;
};
+#
+#% pathconf vp L L L
+#
vop_pathconf {
IN struct vnode *vp;
IN int name;
OUT register_t *retval;
};
+#
+#% advlock vp U U U
+#
vop_advlock {
IN struct vnode *vp;
IN caddr_t id;
@@ -255,6 +442,9 @@ vop_advlock {
IN int flags;
};
+#
+#% blkatoff vp L L L
+#
vop_blkatoff {
IN struct vnode *vp;
IN off_t offset;
@@ -262,6 +452,9 @@ vop_blkatoff {
OUT struct buf **bpp;
};
+#
+#% valloc pvp L L L
+#
vop_valloc {
IN struct vnode *pvp;
IN int mode;
@@ -269,17 +462,40 @@ vop_valloc {
OUT struct vnode **vpp;
};
+#
+#% balloc vp L L L
+#
+vop_balloc {
+ IN struct vnode *vp;
+ IN off_t startoffset;
+ IN int size;
+ IN struct ucred *cred;
+ IN int flags;
+ OUT struct buf **bpp;
+};
+
+#
+#% reallocblks vp L L L
+#
vop_reallocblks {
IN struct vnode *vp;
IN struct cluster_save *buflist;
};
+#
+#% vfree pvp L L L
+#
+
vop_vfree {
IN struct vnode *pvp;
IN ino_t ino;
IN int mode;
};
+#
+#% truncate vp L L L
+#
+
vop_truncate {
IN struct vnode *vp;
IN off_t length;
@@ -288,6 +504,10 @@ vop_truncate {
IN struct proc *p;
};
+#
+#% update vp L L L
+#
+
vop_update {
IN struct vnode *vp;
IN struct timespec *access;
@@ -295,12 +515,11 @@ vop_update {
IN int waitfor;
};
-vop_lease {
- IN struct vnode *vp;
- IN struct proc *p;
- IN struct ucred *cred;
- IN int flag;
-};
+#
+#% whiteout dvp L L L
+#% whiteout cnp - - -
+#% whiteout flag - - -
+#
vop_whiteout {
IN struct vnode *dvp;