diff options
-rw-r--r-- | sys/kern/exec_script.c | 4 | ||||
-rw-r--r-- | sys/kern/init_main.c | 9 | ||||
-rw-r--r-- | sys/kern/kern_acct.c | 4 | ||||
-rw-r--r-- | sys/kern/kern_exec.c | 6 | ||||
-rw-r--r-- | sys/kern/kern_exit.c | 4 | ||||
-rw-r--r-- | sys/kern/kern_ktrace.c | 10 | ||||
-rw-r--r-- | sys/kern/kern_lkm.c | 47 | ||||
-rw-r--r-- | sys/kern/kern_lock.c | 537 | ||||
-rw-r--r-- | sys/kern/kern_sig.c | 4 | ||||
-rw-r--r-- | sys/kern/kern_synch.c | 4 | ||||
-rw-r--r-- | sys/kern/kern_sysctl.c | 9 | ||||
-rw-r--r-- | sys/kern/subr_xxx.c | 6 | ||||
-rw-r--r-- | sys/kern/sys_generic.c | 22 | ||||
-rw-r--r-- | sys/kern/sys_pipe.c | 4 | ||||
-rw-r--r-- | sys/kern/tty.c | 6 | ||||
-rw-r--r-- | sys/kern/tty_tty.c | 16 | ||||
-rw-r--r-- | sys/kern/uipc_usrreq.c | 4 | ||||
-rw-r--r-- | sys/kern/vfs_bio.c | 294 | ||||
-rw-r--r-- | sys/kern/vfs_cluster.c | 19 | ||||
-rw-r--r-- | sys/kern/vfs_conf.c | 172 | ||||
-rw-r--r-- | sys/kern/vfs_init.c | 21 | ||||
-rw-r--r-- | sys/kern/vfs_lookup.c | 36 | ||||
-rw-r--r-- | sys/kern/vfs_subr.c | 1230 | ||||
-rw-r--r-- | sys/kern/vfs_syscalls.c | 406 | ||||
-rw-r--r-- | sys/kern/vfs_vnops.c | 58 | ||||
-rw-r--r-- | sys/kern/vnode_if.c | 76 | ||||
-rw-r--r-- | sys/kern/vnode_if.src | 241 |
27 files changed, 2398 insertions, 851 deletions
diff --git a/sys/kern/exec_script.c b/sys/kern/exec_script.c index 551c52c7602..91f6d638397 100644 --- a/sys/kern/exec_script.c +++ b/sys/kern/exec_script.c @@ -1,4 +1,4 @@ -/* $OpenBSD: exec_script.c,v 1.4 1996/10/20 15:30:07 dm Exp $ */ +/* $OpenBSD: exec_script.c,v 1.5 1997/10/06 15:12:10 csapuntz Exp $ */ /* $NetBSD: exec_script.c,v 1.13 1996/02/04 02:15:06 christos Exp $ */ /* @@ -228,7 +228,7 @@ check_shell: scriptvp = epp->ep_vp; oldpnbuf = epp->ep_ndp->ni_cnd.cn_pnbuf; - VOP_UNLOCK(scriptvp); + VOP_UNLOCK(scriptvp, 0, p); if ((error = check_exec(p, epp)) == 0) { /* note that we've clobbered the header */ diff --git a/sys/kern/init_main.c b/sys/kern/init_main.c index a8f78c7511a..ae47944d74e 100644 --- a/sys/kern/init_main.c +++ b/sys/kern/init_main.c @@ -1,4 +1,4 @@ -/* $OpenBSD: init_main.c,v 1.24 1997/07/28 09:13:17 deraadt Exp $ */ +/* $OpenBSD: init_main.c,v 1.25 1997/10/06 15:12:12 csapuntz Exp $ */ /* $NetBSD: init_main.c,v 1.84.4.1 1996/06/02 09:08:06 mrg Exp $ */ /* @@ -335,17 +335,16 @@ main(framep) schedcpu(NULL); /* Mount the root file system. */ - if ((*mountroot)()) + if (vfs_mountroot()) panic("cannot mount root"); mountlist.cqh_first->mnt_flag |= MNT_ROOTFS; - mountlist.cqh_first->mnt_op->vfs_refcount++; /* Get the vnode for '/'. Set filedesc0.fd_fd.fd_cdir to reference it. */ if (VFS_ROOT(mountlist.cqh_first, &rootvnode)) panic("cannot find root vnode"); filedesc0.fd_fd.fd_cdir = rootvnode; VREF(filedesc0.fd_fd.fd_cdir); - VOP_UNLOCK(rootvnode); + VOP_UNLOCK(rootvnode, 0, p); filedesc0.fd_fd.fd_rdir = NULL; swapinit(); @@ -584,6 +583,6 @@ start_update(p) */ p->p_flag |= P_INMEM | P_SYSTEM; /* XXX */ bcopy("update", curproc->p_comm, sizeof ("update")); - vn_update(); + sched_sync(p); /* NOTREACHED */ } diff --git a/sys/kern/kern_acct.c b/sys/kern/kern_acct.c index c371c085046..ccf5d0a79db 100644 --- a/sys/kern/kern_acct.c +++ b/sys/kern/kern_acct.c @@ -1,4 +1,4 @@ -/* $OpenBSD: kern_acct.c,v 1.2 1996/03/03 17:19:40 niklas Exp $ */ +/* $OpenBSD: kern_acct.c,v 1.3 1997/10/06 15:12:14 csapuntz Exp $ */ /* $NetBSD: kern_acct.c,v 1.42 1996/02/04 02:15:12 christos Exp $ */ /*- @@ -120,7 +120,7 @@ sys_acct(p, v, retval) p); if ((error = vn_open(&nd, FWRITE, 0)) != 0) return (error); - VOP_UNLOCK(nd.ni_vp); + VOP_UNLOCK(nd.ni_vp, 0, p); if (nd.ni_vp->v_type != VREG) { vn_close(nd.ni_vp, FWRITE, p->p_ucred, p); return (EACCES); diff --git a/sys/kern/kern_exec.c b/sys/kern/kern_exec.c index 4dfb48a0e9c..b70c6bd3cd2 100644 --- a/sys/kern/kern_exec.c +++ b/sys/kern/kern_exec.c @@ -1,4 +1,4 @@ -/* $OpenBSD: kern_exec.c,v 1.12 1997/08/01 22:54:50 deraadt Exp $ */ +/* $OpenBSD: kern_exec.c,v 1.13 1997/10/06 15:12:16 csapuntz Exp $ */ /* $NetBSD: kern_exec.c,v 1.75 1996/02/09 18:59:28 christos Exp $ */ /*- @@ -188,10 +188,10 @@ check_exec(p, epp) bad2: /* - * unlock and close the vnode, restore the old one, free the + * unlock and close the vnode, free the * pathname buf, and punt. */ - VOP_UNLOCK(vp); + VOP_UNLOCK(vp, 0, p); vn_close(vp, FREAD, p->p_ucred, p); FREE(ndp->ni_cnd.cn_pnbuf, M_NAMEI); return error; diff --git a/sys/kern/kern_exit.c b/sys/kern/kern_exit.c index 0f74d83a4f3..bab12bfe770 100644 --- a/sys/kern/kern_exit.c +++ b/sys/kern/kern_exit.c @@ -1,4 +1,4 @@ -/* $OpenBSD: kern_exit.c,v 1.9 1997/09/15 05:46:12 millert Exp $ */ +/* $OpenBSD: kern_exit.c,v 1.10 1997/10/06 15:12:17 csapuntz Exp $ */ /* $NetBSD: kern_exit.c,v 1.39 1996/04/22 01:38:25 christos Exp $ */ /* @@ -179,7 +179,7 @@ exit1(p, rv) * if we blocked. */ if (sp->s_ttyvp) - vgoneall(sp->s_ttyvp); + VOP_REVOKE(sp->s_ttyvp, REVOKEALL); } if (sp->s_ttyvp) vrele(sp->s_ttyvp); diff --git a/sys/kern/kern_ktrace.c b/sys/kern/kern_ktrace.c index ab28fda5075..973ba8e5827 100644 --- a/sys/kern/kern_ktrace.c +++ b/sys/kern/kern_ktrace.c @@ -1,4 +1,4 @@ -/* $OpenBSD: kern_ktrace.c,v 1.3 1996/03/03 17:19:46 niklas Exp $ */ +/* $OpenBSD: kern_ktrace.c,v 1.4 1997/10/06 15:12:18 csapuntz Exp $ */ /* $NetBSD: kern_ktrace.c,v 1.23 1996/02/09 18:59:36 christos Exp $ */ /* @@ -293,7 +293,9 @@ sys_ktrace(curp, v, retval) return (error); } vp = nd.ni_vp; - VOP_UNLOCK(vp); + + /* FIXME: Should be curp?? */ + VOP_UNLOCK(vp, 0, p); if (vp->v_type != VREG) { (void) vn_close(vp, FREAD|FWRITE, curp->p_ucred, curp); curp->p_traceflag &= ~KTRFAC_ACTIVE; @@ -468,9 +470,9 @@ ktrwrite(vp, kth) aiov[1].iov_len = kth->ktr_len; auio.uio_resid += kth->ktr_len; } - VOP_LOCK(vp); + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); error = VOP_WRITE(vp, &auio, IO_UNIT|IO_APPEND, p->p_ucred); - VOP_UNLOCK(vp); + VOP_UNLOCK(vp, 0, p); if (!error) return; /* diff --git a/sys/kern/kern_lkm.c b/sys/kern/kern_lkm.c index 144df985707..524b3671468 100644 --- a/sys/kern/kern_lkm.c +++ b/sys/kern/kern_lkm.c @@ -1,4 +1,4 @@ -/* $OpenBSD: kern_lkm.c,v 1.18 1997/09/24 18:16:22 mickey Exp $ */ +/* $OpenBSD: kern_lkm.c,v 1.19 1997/10/06 15:12:19 csapuntz Exp $ */ /* $NetBSD: kern_lkm.c,v 1.31 1996/03/31 21:40:27 christos Exp $ */ /* @@ -689,57 +689,52 @@ _lkm_vfs(lkmtp, cmd) struct lkm_table *lkmtp; int cmd; { - struct lkm_vfs *args = lkmtp->private.lkm_vfs; - int i; int error = 0; - +#if 0 + struct lkm_vfs *args = lkmtp->private.lkm_vfs; + struct vfsconf *vfsp, **vfspp; +#endif switch(cmd) { case LKM_E_LOAD: /* don't load twice! */ if (lkmexists(lkmtp)) return (EEXIST); + return (EEXIST); +#if 0 /* make sure there's no VFS in the table with this name */ - for (i = 0; i < nvfssw; i++) - if (vfssw[i] != (struct vfsops *)0 && - strncmp(vfssw[i]->vfs_name, + for (vfspp = &vfsconf, vfsp = vfsconf; + vfsp; + vfspp = &vfsp->vfc_next, vfsp = vfsp->vfc_next) + if (strncmp(vfsp->vfc_name, args->lkm_vfsops->vfs_name, MFSNAMELEN) == 0) return (EEXIST); + /* pick the last available empty slot */ - for (i = nvfssw - 1; i >= 0; i--) - if (vfssw[i] == (struct vfsops *)0) - break; - if (i == -1) { /* or if none, punt */ - error = EINVAL; - break; - } + MALLOC (vfsp, struct vfsconf *, sizeof (struct vfsconf), + M_VFS, M_WAITOK); + + /* Add tot he end of the list */ + *vfspp = vfsp; /* * Set up file system */ - vfssw[i] = args->lkm_vfsops; - vfssw[i]->vfs_refcount = 0; + /* FIXME (CPS): Setup new vfsconf structure */ /* * Call init function for this VFS... */ - (*(vfssw[i]->vfs_init))(); + (*(vfsp->vfc_vfsops->vfs_init))(vfsp); /* done! */ - args->lkm_offset = i; /* slot in vfssw[] */ + /* Nope - can't return this */ break; +#endif case LKM_E_UNLOAD: - /* current slot... */ - i = args->lkm_offset; - - if (vfssw[i]->vfs_refcount != 0) - return (EBUSY); - - /* replace current slot contents with old contents */ - vfssw[i] = (struct vfsops *)0; break; case LKM_E_STAT: /* no special handling... */ diff --git a/sys/kern/kern_lock.c b/sys/kern/kern_lock.c new file mode 100644 index 00000000000..c6793d24f58 --- /dev/null +++ b/sys/kern/kern_lock.c @@ -0,0 +1,537 @@ +/* + * Copyright (c) 1995 + * The Regents of the University of California. All rights reserved. + * + * This code contains ideas from software contributed to Berkeley by + * Avadis Tevanian, Jr., Michael Wayne Young, and the Mach Operating + * System project at Carnegie-Mellon University. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)kern_lock.c 8.18 (Berkeley) 5/21/95 + */ + +#include <sys/param.h> +#include <sys/proc.h> +#include <sys/lock.h> +#include <sys/systm.h> + +#include <machine/cpu.h> + +void record_stacktrace __P((int *, int)); +void playback_stacktrace __P((int *, int)); + +/* + * Locking primitives implementation. + * Locks provide shared/exclusive sychronization. + */ + +#ifdef DEBUG +#define COUNT(p, x) if (p) (p)->p_locks += (x) +#else +#define COUNT(p, x) +#endif + +#if NCPUS > 1 + +/* + * For multiprocessor system, try spin lock first. + * + * This should be inline expanded below, but we cannot have #if + * inside a multiline define. + */ +int lock_wait_time = 100; +#define PAUSE(lkp, wanted) \ + if (lock_wait_time > 0) { \ + int i; \ + \ + simple_unlock(&lkp->lk_interlock); \ + for (i = lock_wait_time; i > 0; i--) \ + if (!(wanted)) \ + break; \ + simple_lock(&lkp->lk_interlock); \ + } \ + if (!(wanted)) \ + break; + +#else /* NCPUS == 1 */ + +/* + * It is an error to spin on a uniprocessor as nothing will ever cause + * the simple lock to clear while we are executing. + */ +#define PAUSE(lkp, wanted) + +#endif /* NCPUS == 1 */ + +/* + * Acquire a resource. + */ +#define ACQUIRE(lkp, error, extflags, wanted) \ + PAUSE(lkp, wanted); \ + for (error = 0; wanted; ) { \ + (lkp)->lk_waitcount++; \ + simple_unlock(&(lkp)->lk_interlock); \ + error = tsleep((void *)lkp, (lkp)->lk_prio, \ + (lkp)->lk_wmesg, (lkp)->lk_timo); \ + simple_lock(&(lkp)->lk_interlock); \ + (lkp)->lk_waitcount--; \ + if (error) \ + break; \ + if ((extflags) & LK_SLEEPFAIL) { \ + error = ENOLCK; \ + break; \ + } \ + } + +/* + * Initialize a lock; required before use. + */ +void +lockinit(lkp, prio, wmesg, timo, flags) + struct lock *lkp; + int prio; + char *wmesg; + int timo; + int flags; +{ + + bzero(lkp, sizeof(struct lock)); + simple_lock_init(&lkp->lk_interlock); + lkp->lk_flags = flags & LK_EXTFLG_MASK; + lkp->lk_prio = prio; + lkp->lk_timo = timo; + lkp->lk_wmesg = wmesg; + lkp->lk_lockholder = LK_NOPROC; +} + +/* + * Determine the status of a lock. + */ +int +lockstatus(lkp) + struct lock *lkp; +{ + int lock_type = 0; + + simple_lock(&lkp->lk_interlock); + if (lkp->lk_exclusivecount != 0) + lock_type = LK_EXCLUSIVE; + else if (lkp->lk_sharecount != 0) + lock_type = LK_SHARED; + simple_unlock(&lkp->lk_interlock); + return (lock_type); +} + +/* + * Set, change, or release a lock. + * + * Shared requests increment the shared count. Exclusive requests set the + * LK_WANT_EXCL flag (preventing further shared locks), and wait for already + * accepted shared locks and shared-to-exclusive upgrades to go away. + */ +int +lockmgr(lkp, flags, interlkp, p) + __volatile struct lock *lkp; + u_int flags; + struct simplelock *interlkp; + struct proc *p; +{ + int error; + pid_t pid; + int extflags; + + error = 0; + if (p) + pid = p->p_pid; + else + pid = LK_KERNPROC; + simple_lock(&lkp->lk_interlock); + if (flags & LK_INTERLOCK) + simple_unlock(interlkp); + extflags = (flags | lkp->lk_flags) & LK_EXTFLG_MASK; +#ifdef DIAGNOSTIC + /* + * Once a lock has drained, the LK_DRAINING flag is set and an + * exclusive lock is returned. The only valid operation thereafter + * is a single release of that exclusive lock. This final release + * clears the LK_DRAINING flag and sets the LK_DRAINED flag. Any + * further requests of any sort will result in a panic. The bits + * selected for these two flags are chosen so that they will be set + * in memory that is freed (freed memory is filled with 0xdeadbeef). + * The final release is permitted to give a new lease on life to + * the lock by specifying LK_REENABLE. + */ + if (lkp->lk_flags & (LK_DRAINING|LK_DRAINED)) { + if (lkp->lk_flags & LK_DRAINED) + panic("lockmgr: using decommissioned lock"); + if ((flags & LK_TYPE_MASK) != LK_RELEASE || + lkp->lk_lockholder != pid) + panic("lockmgr: non-release on draining lock: %d\n", + flags & LK_TYPE_MASK); + lkp->lk_flags &= ~LK_DRAINING; + if ((flags & LK_REENABLE) == 0) + lkp->lk_flags |= LK_DRAINED; + } +#endif DIAGNOSTIC + + switch (flags & LK_TYPE_MASK) { + + case LK_SHARED: + if (lkp->lk_lockholder != pid) { + /* + * If just polling, check to see if we will block. + */ + if ((extflags & LK_NOWAIT) && (lkp->lk_flags & + (LK_HAVE_EXCL | LK_WANT_EXCL | LK_WANT_UPGRADE))) { + error = EBUSY; + break; + } + /* + * Wait for exclusive locks and upgrades to clear. + */ + ACQUIRE(lkp, error, extflags, lkp->lk_flags & + (LK_HAVE_EXCL | LK_WANT_EXCL | LK_WANT_UPGRADE)); + if (error) + break; + lkp->lk_sharecount++; + COUNT(p, 1); + break; + } + /* + * We hold an exclusive lock, so downgrade it to shared. + * An alternative would be to fail with EDEADLK. + */ + lkp->lk_sharecount++; + COUNT(p, 1); + /* fall into downgrade */ + + case LK_DOWNGRADE: + if (lkp->lk_lockholder != pid || lkp->lk_exclusivecount == 0) + panic("lockmgr: not holding exclusive lock"); + lkp->lk_sharecount += lkp->lk_exclusivecount; + lkp->lk_exclusivecount = 0; + lkp->lk_flags &= ~LK_HAVE_EXCL; + lkp->lk_lockholder = LK_NOPROC; + if (lkp->lk_waitcount) + wakeup((void *)lkp); + break; + + case LK_EXCLUPGRADE: + /* + * If another process is ahead of us to get an upgrade, + * then we want to fail rather than have an intervening + * exclusive access. + */ + if (lkp->lk_flags & LK_WANT_UPGRADE) { + lkp->lk_sharecount--; + COUNT(p, -1); + error = EBUSY; + break; + } + /* fall into normal upgrade */ + + case LK_UPGRADE: + /* + * Upgrade a shared lock to an exclusive one. If another + * shared lock has already requested an upgrade to an + * exclusive lock, our shared lock is released and an + * exclusive lock is requested (which will be granted + * after the upgrade). If we return an error, the file + * will always be unlocked. + */ + if (lkp->lk_lockholder == pid || lkp->lk_sharecount <= 0) + panic("lockmgr: upgrade exclusive lock"); + lkp->lk_sharecount--; + COUNT(p, -1); + /* + * If we are just polling, check to see if we will block. + */ + if ((extflags & LK_NOWAIT) && + ((lkp->lk_flags & LK_WANT_UPGRADE) || + lkp->lk_sharecount > 1)) { + error = EBUSY; + break; + } + if ((lkp->lk_flags & LK_WANT_UPGRADE) == 0) { + /* + * We are first shared lock to request an upgrade, so + * request upgrade and wait for the shared count to + * drop to zero, then take exclusive lock. + */ + lkp->lk_flags |= LK_WANT_UPGRADE; + ACQUIRE(lkp, error, extflags, lkp->lk_sharecount); + lkp->lk_flags &= ~LK_WANT_UPGRADE; + if (error) + break; + lkp->lk_flags |= LK_HAVE_EXCL; + lkp->lk_lockholder = pid; + if (lkp->lk_exclusivecount != 0) + panic("lockmgr: non-zero exclusive count"); + lkp->lk_exclusivecount = 1; + COUNT(p, 1); + break; + } + /* + * Someone else has requested upgrade. Release our shared + * lock, awaken upgrade requestor if we are the last shared + * lock, then request an exclusive lock. + */ + if (lkp->lk_sharecount == 0 && lkp->lk_waitcount) + wakeup((void *)lkp); + /* fall into exclusive request */ + + case LK_EXCLUSIVE: + if (lkp->lk_lockholder == pid && pid != LK_KERNPROC) { + /* + * Recursive lock. + */ + if ((extflags & LK_CANRECURSE) == 0) + panic("lockmgr: locking against myself"); + lkp->lk_exclusivecount++; + COUNT(p, 1); + break; + } + /* + * If we are just polling, check to see if we will sleep. + */ + if ((extflags & LK_NOWAIT) && ((lkp->lk_flags & + (LK_HAVE_EXCL | LK_WANT_EXCL | LK_WANT_UPGRADE)) || + lkp->lk_sharecount != 0)) { + error = EBUSY; + break; + } + /* + * Try to acquire the want_exclusive flag. + */ + ACQUIRE(lkp, error, extflags, lkp->lk_flags & + (LK_HAVE_EXCL | LK_WANT_EXCL)); + if (error) + break; + lkp->lk_flags |= LK_WANT_EXCL; + /* + * Wait for shared locks and upgrades to finish. + */ + ACQUIRE(lkp, error, extflags, lkp->lk_sharecount != 0 || + (lkp->lk_flags & LK_WANT_UPGRADE)); + lkp->lk_flags &= ~LK_WANT_EXCL; + if (error) + break; + lkp->lk_flags |= LK_HAVE_EXCL; + lkp->lk_lockholder = pid; + if (lkp->lk_exclusivecount != 0) + panic("lockmgr: non-zero exclusive count"); + lkp->lk_exclusivecount = 1; + COUNT(p, 1); + break; + + case LK_RELEASE: + if (lkp->lk_exclusivecount != 0) { + if (pid != lkp->lk_lockholder) + panic("lockmgr: pid %d, not %s %d unlocking", + pid, "exclusive lock holder", + lkp->lk_lockholder); + lkp->lk_exclusivecount--; + COUNT(p, -1); + if (lkp->lk_exclusivecount == 0) { + lkp->lk_flags &= ~LK_HAVE_EXCL; + lkp->lk_lockholder = LK_NOPROC; + } + } else if (lkp->lk_sharecount != 0) { + lkp->lk_sharecount--; + COUNT(p, -1); + } + if (lkp->lk_waitcount) + wakeup((void *)lkp); + break; + + case LK_DRAIN: + /* + * Check that we do not already hold the lock, as it can + * never drain if we do. Unfortunately, we have no way to + * check for holding a shared lock, but at least we can + * check for an exclusive one. + */ + if (lkp->lk_lockholder == pid) + panic("lockmgr: draining against myself"); + /* + * If we are just polling, check to see if we will sleep. + */ + if ((extflags & LK_NOWAIT) && ((lkp->lk_flags & + (LK_HAVE_EXCL | LK_WANT_EXCL | LK_WANT_UPGRADE)) || + lkp->lk_sharecount != 0 || lkp->lk_waitcount != 0)) { + error = EBUSY; + break; + } + PAUSE(lkp, ((lkp->lk_flags & + (LK_HAVE_EXCL | LK_WANT_EXCL | LK_WANT_UPGRADE)) || + lkp->lk_sharecount != 0 || lkp->lk_waitcount != 0)); + for (error = 0; ((lkp->lk_flags & + (LK_HAVE_EXCL | LK_WANT_EXCL | LK_WANT_UPGRADE)) || + lkp->lk_sharecount != 0 || lkp->lk_waitcount != 0); ) { + lkp->lk_flags |= LK_WAITDRAIN; + simple_unlock(&lkp->lk_interlock); + if ((error = tsleep((void *)&lkp->lk_flags, lkp->lk_prio, + lkp->lk_wmesg, lkp->lk_timo)) != 0) + return (error); + if ((extflags) & LK_SLEEPFAIL) + return (ENOLCK); + simple_lock(&lkp->lk_interlock); + } + lkp->lk_flags |= LK_DRAINING | LK_HAVE_EXCL; + lkp->lk_lockholder = pid; + lkp->lk_exclusivecount = 1; + COUNT(p, 1); + break; + + default: + simple_unlock(&lkp->lk_interlock); + panic("lockmgr: unknown locktype request %d", + flags & LK_TYPE_MASK); + /* NOTREACHED */ + } + if ((lkp->lk_flags & LK_WAITDRAIN) && ((lkp->lk_flags & + (LK_HAVE_EXCL | LK_WANT_EXCL | LK_WANT_UPGRADE)) == 0 && + lkp->lk_sharecount == 0 && lkp->lk_waitcount == 0)) { + lkp->lk_flags &= ~LK_WAITDRAIN; + wakeup((void *)&lkp->lk_flags); + } + simple_unlock(&lkp->lk_interlock); + return (error); +} + +/* + * Print out information about state of a lock. Used by VOP_PRINT + * routines to display ststus about contained locks. + */ +void +lockmgr_printinfo(lkp) + struct lock *lkp; +{ + + if (lkp->lk_sharecount) + printf(" lock type %s: SHARED (count %d)", lkp->lk_wmesg, + lkp->lk_sharecount); + else if (lkp->lk_flags & LK_HAVE_EXCL) + printf(" lock type %s: EXCL (count %d) by pid %d", + lkp->lk_wmesg, lkp->lk_exclusivecount, lkp->lk_lockholder); + if (lkp->lk_waitcount > 0) + printf(" with %d pending", lkp->lk_waitcount); +} + +#if defined(DEBUG) && NCPUS == 1 +#include <sys/kernel.h> +#include <vm/vm.h> +#include <sys/sysctl.h> +int lockpausetime = 0; +struct ctldebug debug2 = { "lockpausetime", &lockpausetime }; +int simplelockrecurse; +/* + * Simple lock functions so that the debugger can see from whence + * they are being called. + */ +void +simple_lock_init(alp) + struct simplelock *alp; +{ + + alp->lock_data = 0; +} + +void +_simple_lock(alp, id, l) + __volatile struct simplelock *alp; + const char *id; + int l; +{ + + if (simplelockrecurse) + return; + if (alp->lock_data == 1) { + if (lockpausetime == -1) + panic("%s:%d: simple_lock: lock held", id, l); + printf("%s:%d: simple_lock: lock held\n", id, l); + if (lockpausetime == 1) { + BACKTRACE(curproc); + } else if (lockpausetime > 1) { + printf("%s:%d: simple_lock: lock held...", id, l); + tsleep(&lockpausetime, PCATCH | PPAUSE, "slock", + lockpausetime * hz); + printf(" continuing\n"); + } + } + alp->lock_data = 1; + if (curproc) + curproc->p_simple_locks++; +} + +int +_simple_lock_try(alp, id, l) + __volatile struct simplelock *alp; + const char *id; + int l; +{ + + if (alp->lock_data) + return (0); + if (simplelockrecurse) + return (1); + alp->lock_data = 1; + if (curproc) + curproc->p_simple_locks++; + return (1); +} + +void +_simple_unlock(alp, id, l) + __volatile struct simplelock *alp; + const char *id; + int l; +{ + + if (simplelockrecurse) + return; + if (alp->lock_data == 0) { + if (lockpausetime == -1) + panic("%s:%d: simple_unlock: lock not held", id, l); + printf("%s:%d: simple_unlock: lock not held\n", id, l); + if (lockpausetime == 1) { + BACKTRACE(curproc); + } else if (lockpausetime > 1) { + printf("%s:%d: simple_unlock: lock not held...", id, l); + tsleep(&lockpausetime, PCATCH | PPAUSE, "sunlock", + lockpausetime * hz); + printf(" continuing\n"); + } + } + alp->lock_data = 0; + if (curproc) + curproc->p_simple_locks--; +} +#endif /* DEBUG && NCPUS == 1 */ diff --git a/sys/kern/kern_sig.c b/sys/kern/kern_sig.c index 98bc10fa302..2e29983bea4 100644 --- a/sys/kern/kern_sig.c +++ b/sys/kern/kern_sig.c @@ -1,4 +1,4 @@ -/* $OpenBSD: kern_sig.c,v 1.19 1997/09/15 05:46:13 millert Exp $ */ +/* $OpenBSD: kern_sig.c,v 1.20 1997/10/06 15:12:21 csapuntz Exp $ */ /* $NetBSD: kern_sig.c,v 1.54 1996/04/22 01:38:32 christos Exp $ */ /* @@ -1213,7 +1213,7 @@ coredump(p) UIO_SYSSPACE, IO_NODELOCKED|IO_UNIT, cred, NULL, p); } out: - VOP_UNLOCK(vp); + VOP_UNLOCK(vp, 0, p); error1 = vn_close(vp, FWRITE, cred, p); crfree(cred); if (error == 0) diff --git a/sys/kern/kern_synch.c b/sys/kern/kern_synch.c index 91b556e6067..93d2459035d 100644 --- a/sys/kern/kern_synch.c +++ b/sys/kern/kern_synch.c @@ -1,4 +1,4 @@ -/* $OpenBSD: kern_synch.c,v 1.7 1997/07/28 09:13:17 deraadt Exp $ */ +/* $OpenBSD: kern_synch.c,v 1.8 1997/10/06 15:12:23 csapuntz Exp $ */ /* $NetBSD: kern_synch.c,v 1.37 1996/04/22 01:38:37 christos Exp $ */ /*- @@ -174,7 +174,6 @@ schedcpu(arg) register int s; register unsigned int newcpu; - wakeup((caddr_t)&lbolt); for (p = allproc.lh_first; p != 0; p = p->p_list.le_next) { /* * Increment time in/out of memory and sleep time @@ -223,6 +222,7 @@ schedcpu(arg) splx(s); } vmmeter(); + wakeup((caddr_t)&lbolt); timeout(schedcpu, (void *)0, hz); } diff --git a/sys/kern/kern_sysctl.c b/sys/kern/kern_sysctl.c index 2ab8e6d63c0..923a4bd9077 100644 --- a/sys/kern/kern_sysctl.c +++ b/sys/kern/kern_sysctl.c @@ -1,4 +1,4 @@ -/* $OpenBSD: kern_sysctl.c,v 1.19 1997/09/03 13:51:08 kstailey Exp $ */ +/* $OpenBSD: kern_sysctl.c,v 1.20 1997/10/06 15:12:25 csapuntz Exp $ */ /* $NetBSD: kern_sysctl.c,v 1.17 1996/05/20 17:49:05 mrg Exp $ */ /*- @@ -110,7 +110,7 @@ sys___sysctl(p, v, retval) switch (name[0]) { case CTL_KERN: fn = kern_sysctl; - if (name[2] != KERN_VNODE) /* XXX */ + if (name[2] == KERN_VNODE) /* XXX */ dolock = 0; break; case CTL_HW: @@ -125,6 +125,9 @@ sys___sysctl(p, v, retval) case CTL_FS: fn = fs_sysctl; break; + case CTL_VFS: + fn = vfs_sysctl; + break; case CTL_MACHDEP: fn = cpu_sysctl; break; @@ -264,7 +267,7 @@ kern_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p) return (sysctl_rdstruct(oldp, oldlenp, newp, &boottime, sizeof(struct timeval))); case KERN_VNODE: - return (sysctl_vnode(oldp, oldlenp)); + return (sysctl_vnode(oldp, oldlenp, p)); case KERN_PROC: return (sysctl_doproc(name + 1, namelen - 1, oldp, oldlenp)); case KERN_FILE: diff --git a/sys/kern/subr_xxx.c b/sys/kern/subr_xxx.c index 0d44bc841b4..a19a8a11a1e 100644 --- a/sys/kern/subr_xxx.c +++ b/sys/kern/subr_xxx.c @@ -1,4 +1,4 @@ -/* $OpenBSD: subr_xxx.c,v 1.3 1997/02/24 14:19:58 niklas Exp $ */ +/* $OpenBSD: subr_xxx.c,v 1.4 1997/10/06 15:12:26 csapuntz Exp $ */ /* $NetBSD: subr_xxx.c,v 1.10 1996/02/04 02:16:51 christos Exp $ */ /* @@ -91,8 +91,10 @@ enosys () * Return error for operation not supported * on a specific object or file type. */ +/*ARGSUSED*/ int -eopnotsupp() +eopnotsupp(v) + void *v; { return (EOPNOTSUPP); diff --git a/sys/kern/sys_generic.c b/sys/kern/sys_generic.c index ef811b417ba..ab02e3bb9d2 100644 --- a/sys/kern/sys_generic.c +++ b/sys/kern/sys_generic.c @@ -1,4 +1,4 @@ -/* $OpenBSD: sys_generic.c,v 1.8 1997/08/31 20:42:21 deraadt Exp $ */ +/* $OpenBSD: sys_generic.c,v 1.9 1997/10/06 15:12:28 csapuntz Exp $ */ /* $NetBSD: sys_generic.c,v 1.24 1996/03/29 00:25:32 cgd Exp $ */ /* @@ -178,18 +178,12 @@ sys_readv(p, v, retval) goto done; auio.uio_resid = 0; for (i = 0; i < SCARG(uap, iovcnt); i++) { -#if 0 - /* Cannot happen iov_len is unsigned */ - if (iov->iov_len < 0) { + if (auio.uio_resid + iov->iov_len < auio.uio_resid) { error = EINVAL; goto done; } -#endif + auio.uio_resid += iov->iov_len; - if (auio.uio_resid < 0) { - error = EINVAL; - goto done; - } iov++; } #ifdef KTRACE @@ -337,18 +331,12 @@ sys_writev(p, v, retval) goto done; auio.uio_resid = 0; for (i = 0; i < SCARG(uap, iovcnt); i++) { -#if 0 - /* Cannot happen iov_len is unsigned */ - if (iov->iov_len < 0) { + if (auio.uio_resid + iov->iov_len < auio.uio_resid) { error = EINVAL; goto done; } -#endif + auio.uio_resid += iov->iov_len; - if (auio.uio_resid < 0) { - error = EINVAL; - goto done; - } iov++; } #ifdef KTRACE diff --git a/sys/kern/sys_pipe.c b/sys/kern/sys_pipe.c index 78f38f71f29..b4f48d76604 100644 --- a/sys/kern/sys_pipe.c +++ b/sys/kern/sys_pipe.c @@ -1,4 +1,4 @@ -/* $OpenBSD: sys_pipe.c,v 1.5 1997/02/24 14:19:58 niklas Exp $ */ +/* $OpenBSD: sys_pipe.c,v 1.6 1997/10/06 15:12:29 csapuntz Exp $ */ /* * Copyright (c) 1996 John S. Dyson @@ -77,7 +77,7 @@ #include <vm/vm.h> #include <vm/vm_prot.h> #include <vm/vm_param.h> -#include <vm/lock.h> +#include <sys/lock.h> #include <vm/vm_object.h> #include <vm/vm_kern.h> #include <vm/vm_extern.h> diff --git a/sys/kern/tty.c b/sys/kern/tty.c index 7548fd6befc..9a593842688 100644 --- a/sys/kern/tty.c +++ b/sys/kern/tty.c @@ -1,4 +1,4 @@ -/* $OpenBSD: tty.c,v 1.27 1997/03/26 18:03:57 deraadt Exp $ */ +/* $OpenBSD: tty.c,v 1.28 1997/10/06 15:12:31 csapuntz Exp $ */ /* $NetBSD: tty.c,v 1.68.4.2 1996/06/06 16:04:52 thorpej Exp $ */ /*- @@ -812,9 +812,9 @@ ttioctl(tp, cmd, data, flag, p) error = namei(&nid); if (error) return (error); - VOP_LOCK(nid.ni_vp); + vn_lock(nid.ni_vp, LK_EXCLUSIVE | LK_RETRY, p); error = VOP_ACCESS(nid.ni_vp, VREAD, p->p_ucred, p); - VOP_UNLOCK(nid.ni_vp); + VOP_UNLOCK(nid.ni_vp, 0, p); vrele(nid.ni_vp); if (error) return (error); diff --git a/sys/kern/tty_tty.c b/sys/kern/tty_tty.c index 38c033636a4..0f70fcbda85 100644 --- a/sys/kern/tty_tty.c +++ b/sys/kern/tty_tty.c @@ -1,4 +1,4 @@ -/* $OpenBSD: tty_tty.c,v 1.3 1996/04/21 22:27:32 deraadt Exp $ */ +/* $OpenBSD: tty_tty.c,v 1.4 1997/10/06 15:12:32 csapuntz Exp $ */ /* $NetBSD: tty_tty.c,v 1.13 1996/03/30 22:24:46 christos Exp $ */ /*- @@ -63,7 +63,7 @@ cttyopen(dev, flag, mode, p) if (ttyvp == NULL) return (ENXIO); - VOP_LOCK(ttyvp); + vn_lock(ttyvp, LK_EXCLUSIVE | LK_RETRY, p); #ifdef PARANOID /* * Since group is tty and mode is 620 on most terminal lines @@ -78,7 +78,7 @@ cttyopen(dev, flag, mode, p) if (!error) #endif /* PARANOID */ error = VOP_OPEN(ttyvp, flag, NOCRED, p); - VOP_UNLOCK(ttyvp); + VOP_UNLOCK(ttyvp, 0, p); return (error); } @@ -89,14 +89,15 @@ cttyread(dev, uio, flag) struct uio *uio; int flag; { + struct proc *p = uio->uio_procp; register struct vnode *ttyvp = cttyvp(uio->uio_procp); int error; if (ttyvp == NULL) return (EIO); - VOP_LOCK(ttyvp); + vn_lock(ttyvp, LK_EXCLUSIVE | LK_RETRY, p); error = VOP_READ(ttyvp, uio, flag, NOCRED); - VOP_UNLOCK(ttyvp); + VOP_UNLOCK(ttyvp, 0, p); return (error); } @@ -107,14 +108,15 @@ cttywrite(dev, uio, flag) struct uio *uio; int flag; { + struct proc *p = uio->uio_procp; register struct vnode *ttyvp = cttyvp(uio->uio_procp); int error; if (ttyvp == NULL) return (EIO); - VOP_LOCK(ttyvp); + vn_lock(ttyvp, LK_EXCLUSIVE | LK_RETRY, p); error = VOP_WRITE(ttyvp, uio, flag, NOCRED); - VOP_UNLOCK(ttyvp); + VOP_UNLOCK(ttyvp, 0, p); return (error); } diff --git a/sys/kern/uipc_usrreq.c b/sys/kern/uipc_usrreq.c index f1843da7ccc..58e0fcc5bad 100644 --- a/sys/kern/uipc_usrreq.c +++ b/sys/kern/uipc_usrreq.c @@ -1,4 +1,4 @@ -/* $OpenBSD: uipc_usrreq.c,v 1.4 1997/06/05 08:13:12 deraadt Exp $ */ +/* $OpenBSD: uipc_usrreq.c,v 1.5 1997/10/06 15:12:33 csapuntz Exp $ */ /* $NetBSD: uipc_usrreq.c,v 1.18 1996/02/09 19:00:50 christos Exp $ */ /* @@ -427,7 +427,7 @@ unp_bind(unp, nam, p) vp->v_socket = unp->unp_socket; unp->unp_vnode = vp; unp->unp_addr = m_copy(nam, 0, (int)M_COPYALL); - VOP_UNLOCK(vp); + VOP_UNLOCK(vp, 0, p); return (0); } diff --git a/sys/kern/vfs_bio.c b/sys/kern/vfs_bio.c index d1d4592820c..c8e596db9a9 100644 --- a/sys/kern/vfs_bio.c +++ b/sys/kern/vfs_bio.c @@ -1,4 +1,4 @@ -/* $OpenBSD: vfs_bio.c,v 1.15 1997/06/14 06:10:36 tholo Exp $ */ +/* $OpenBSD: vfs_bio.c,v 1.16 1997/10/06 15:12:35 csapuntz Exp $ */ /* $NetBSD: vfs_bio.c,v 1.44 1996/06/11 11:15:36 pk Exp $ */ /*- @@ -63,6 +63,8 @@ #include <vm/vm.h> +#include <miscfs/specfs/specdev.h> + /* Macros to clear/set/test flags. */ #define SET(t, f) (t) |= (f) #define CLR(t, f) (t) &= ~(f) @@ -94,6 +96,7 @@ u_long bufhash; TAILQ_HEAD(bqueues, buf) bufqueues[BQUEUES]; int needbuffer; +struct bio_ops bioops; /* * Insq/Remq for the buffer free lists. @@ -139,7 +142,6 @@ bufinit() register int i; int base, residual; - TAILQ_INIT(&bdirties); for (dp = bufqueues; dp < &bufqueues[BQUEUES]; dp++) TAILQ_INIT(dp); bufhashtbl = hashinit(nbuf, M_CACHE, &bufhash); @@ -153,6 +155,7 @@ bufinit() bp->b_wcred = NOCRED; bp->b_vnbufs.le_next = NOLIST; bp->b_data = buffers + i * MAXBSIZE; + LIST_INIT(&bp->b_dep); if (i < residual) bp->b_bufsize = (base + 1) * CLBYTES; else @@ -278,7 +281,7 @@ int bwrite(bp) struct buf *bp; { - int rv, sync, wasdelayed, s; + int rv, async, wasdelayed, s; /* * Remember buffer type, to switch on it later. If the write was @@ -287,34 +290,28 @@ bwrite(bp) * XXX note that this relies on delayed tape writes being converted * to async, not sync writes (which is safe, but ugly). */ - sync = !ISSET(bp->b_flags, B_ASYNC); - if (sync && bp->b_vp && bp->b_vp->v_mount && + async = ISSET(bp->b_flags, B_ASYNC); + if (!async && bp->b_vp && bp->b_vp->v_mount && ISSET(bp->b_vp->v_mount->mnt_flag, MNT_ASYNC)) { bdwrite(bp); return (0); } wasdelayed = ISSET(bp->b_flags, B_DELWRI); CLR(bp->b_flags, (B_READ | B_DONE | B_ERROR | B_DELWRI)); + + s = splbio(); + /* - * If this was a delayed write, remove it from the - * list of dirty blocks now + * If not synchronous, pay for the I/O operation and make + * sure the buf is on the correct vnode queue. We have + * to do this now, because if we don't, the vnode may not + * be properly notified that its I/O has completed. */ if (wasdelayed) - TAILQ_REMOVE(&bdirties, bp, b_synclist); - - s = splbio(); - if (!sync) { - /* - * If not synchronous, pay for the I/O operation and make - * sure the buf is on the correct vnode queue. We have - * to do this now, because if we don't, the vnode may not - * be properly notified that its I/O has completed. - */ - if (wasdelayed) - reassignbuf(bp, bp->b_vp); - else - curproc->p_stats->p_ru.ru_oublock++; - } + reassignbuf(bp, bp->b_vp); + else + curproc->p_stats->p_ru.ru_oublock++; + /* Initiate disk write. Make sure the appropriate party is charged. */ bp->b_vp->v_numoutput++; @@ -322,31 +319,18 @@ bwrite(bp) SET(bp->b_flags, B_WRITEINPROG); VOP_STRATEGY(bp); - if (sync) { - /* - * If I/O was synchronous, wait for it to complete. - */ - rv = biowait(bp); + if (async) + return (0); - /* - * Pay for the I/O operation, if it's not been paid for, and - * make sure it's on the correct vnode queue. (async operatings - * were payed for above.) - */ - s = splbio(); - if (wasdelayed) - reassignbuf(bp, bp->b_vp); - else - curproc->p_stats->p_ru.ru_oublock++; - splx(s); + /* + * If I/O was synchronous, wait for it to complete. + */ + rv = biowait(bp); - /* Release the buffer. */ - brelse(bp); + /* Release the buffer. */ + brelse(bp); - return (rv); - } else { - return (0); - } + return (rv); } int @@ -382,23 +366,10 @@ bdwrite(bp) * (3) Make sure it's on its vnode's correct block list, * (4) If a buffer is rewritten, move it to end of dirty list */ - bp->b_synctime = time.tv_sec + 30; if (!ISSET(bp->b_flags, B_DELWRI)) { - /* - * Add the buffer to the list of dirty blocks. - * If it is the first entry on the list, schedule - * a timeout to flush it to disk - */ - TAILQ_INSERT_TAIL(&bdirties, bp, b_synclist); - if (bdirties.tqh_first == bp) { - untimeout((void (*)__P((void *)))wakeup, - &bdirties); /* XXX */ - timeout((void (*)__P((void *)))wakeup, - &bdirties, 30 * hz); - } SET(bp->b_flags, B_DELWRI); - curproc->p_stats->p_ru.ru_oublock++; /* XXX */ reassignbuf(bp, bp->b_vp); + curproc->p_stats->p_ru.ru_oublock++; /* XXX */ } /* If this is a tape block, write the block now. */ @@ -426,142 +397,17 @@ bawrite(bp) VOP_BWRITE(bp); } -/* - * Write out dirty buffers if they have been on the dirty - * list for more than 30 seconds; scan for such buffers - * once a second. - */ void -vn_update() -{ - struct mount *mp, *nmp; - struct timespec ts; - struct vnode *vp; +bdirty(bp) struct buf *bp; - int async, s; +{ + struct proc *p = curproc; /* XXX */ - /* - * In case any buffers got scheduled for write before the - * process got started (should never happen) - */ - untimeout((void (*)__P((void *)))wakeup, - &bdirties); - for (;;) { - s = splbio(); - /* - * Schedule a wakeup when the next buffer is to - * be flushed to disk. If no buffers are enqueued, - * a wakeup will be scheduled at the time a new - * buffer is enqueued - */ - if ((bp = bdirties.tqh_first) != NULL) { - untimeout((void (*)__P((void *)))wakeup, - &bdirties); /* XXX */ - timeout((void (*)__P((void *)))wakeup, - &bdirties, (bp->b_synctime - time.tv_sec) * hz); - } - tsleep(&bdirties, PZERO - 1, "dirty", 0); - /* - * Walk the dirty block list, starting an asyncroneous - * write of any block that has timed out - */ - while ((bp = bdirties.tqh_first) != NULL && - bp->b_synctime <= time.tv_sec) { - /* - * If the block is currently busy (perhaps being - * written), move it to the end of the dirty list - * and go to the next block - */ - if (ISSET(bp->b_flags, B_BUSY)) { - TAILQ_REMOVE(&bdirties, bp, b_synclist); - TAILQ_INSERT_TAIL(&bdirties, bp, b_synclist); - bp->b_synctime = time.tv_sec + 30; - continue; - } - /* - * Remove the block from the per-vnode dirty - * list and mark it as busy - */ - bremfree(bp); - SET(bp->b_flags, B_BUSY); - splx(s); - /* - * Start an asyncroneous write of the buffer. - * Note that this will also remove the buffer - * from the dirty list - */ - bawrite(bp); - s = splbio(); - } - splx(s); - /* - * We also need to flush out modified vnodes - */ - for (mp = mountlist.cqh_last; - mp != (void *)&mountlist; - mp = nmp) { - /* - * Get the next pointer in case we hang of vfs_busy() - * while being unmounted - */ - nmp = mp->mnt_list.cqe_prev; - /* - * The lock check below is to avoid races with mount - * and unmount - */ - if ((mp->mnt_flag & (MNT_MLOCK | MNT_RDONLY | MNT_MPBUSY)) == 0 && - !vfs_busy(mp)) { - /* - * Turn off the file system async flag until - * we are done writing out vnodes - */ - async = mp->mnt_flag & MNT_ASYNC; - mp->mnt_flag &= ~MNT_ASYNC; - /* - * Walk the vnode list for the file system, - * writing each modified vnode out - */ -loop: - for (vp = mp->mnt_vnodelist.lh_first; - vp != NULL; - vp = vp->v_mntvnodes.le_next) { - /* - * If the vnode is no longer associated - * with the file system in question, skip - * it - */ - if (vp->v_mount != mp) - goto loop; - /* - * If the vnode is currently locked, - * ignore it - */ - if (VOP_ISLOCKED(vp)) - continue; - /* - * Lock the vnode, start a write and - * release the vnode - */ - if (vget(vp, 1)) - goto loop; - TIMEVAL_TO_TIMESPEC(&time, &ts); - VOP_UPDATE(vp, &ts, &ts, 0); - vput(vp); - } - /* - * Restore the file system async flag if it - * were previously set for this file system - */ - mp->mnt_flag |= async; - /* - * Get the next pointer again as the next - * file system might have been unmounted - * while we were flushing vnodes - */ - nmp = mp->mnt_list.cqe_prev; - vfs_unbusy(mp); - } - } + if (ISSET(bp->b_flags, B_DELWRI) == 0) { + SET(bp->b_flags, B_DELWRI); + reassignbuf(bp, bp->b_vp); + if (p) + p->p_stats->p_ru.ru_oublock++; } } @@ -576,18 +422,6 @@ brelse(bp) struct bqueues *bufq; int s; - /* Wake up any processes waiting for any buffer to become free. */ - if (needbuffer) { - needbuffer = 0; - wakeup(&needbuffer); - } - - /* Wake up any proceeses waiting for _this_ buffer to become free. */ - if (ISSET(bp->b_flags, B_WANTED)) { - CLR(bp->b_flags, B_WANTED); - wakeup(bp); - } - /* Block disk interrupts. */ s = splbio(); @@ -622,11 +456,14 @@ brelse(bp) * If it's invalid or empty, dissociate it from its vnode * and put on the head of the appropriate queue. */ - if (bp->b_vp) - brelvp(bp); - if (ISSET(bp->b_flags, B_DELWRI)) - TAILQ_REMOVE(&bdirties, bp, b_synclist); + if (LIST_FIRST(&bp->b_dep) != NULL && bioops.io_deallocate) { + (*bioops.io_deallocate)(bp); + } CLR(bp->b_flags, B_DELWRI); + if (bp->b_vp) { + reassignbuf(bp, bp->b_vp); + brelvp(bp); + } if (bp->b_bufsize <= 0) /* no data */ bufq = &bufqueues[BQ_EMPTY]; @@ -657,6 +494,18 @@ already_queued: /* Allow disk interrupts. */ splx(s); + + /* Wake up any processes waiting for any buffer to become free. */ + if (needbuffer) { + needbuffer = 0; + wakeup(&needbuffer); + } + + /* Wake up any proceeses waiting for _this_ buffer to become free. */ + if (ISSET(bp->b_flags, B_WANTED)) { + CLR(bp->b_flags, B_WANTED); + wakeup(bp); + } } /* @@ -806,7 +655,7 @@ allocbuf(bp, size) /* find a buffer */ while ((nbp = getnewbuf(0, 0)) == NULL) ; - SET(nbp->b_flags, B_INVAL); + SET(nbp->b_flags, B_INVAL); binshash(nbp, &invalhash); /* and steal its pages, up to the amount we need */ @@ -875,16 +724,16 @@ getnewbuf(slpflag, slptimeo) start: s = splbio(); - if ((bp = bufqueues[BQ_AGE].tqh_first) != NULL || - (bp = bufqueues[BQ_LRU].tqh_first) != NULL) { - bremfree(bp); - } else { + if ((bp = bufqueues[BQ_AGE].tqh_first) == NULL && + (bp = bufqueues[BQ_LRU].tqh_first) == NULL) { /* wait for a free buffer of any kind */ needbuffer = 1; tsleep(&needbuffer, slpflag|(PRIBIO+1), "getnewbuf", slptimeo); splx(s); return (0); - } + } + + bremfree(bp); if (ISSET(bp->b_flags, B_VFLUSH)) { /* @@ -916,8 +765,12 @@ start: /* disassociate us from our vnode, if we had one... */ if (bp->b_vp) brelvp(bp); + splx(s); + if (LIST_FIRST(&bp->b_dep) != NULL && bioops.io_deallocate) + (*bioops.io_deallocate)(bp); + /* clear out various other fields */ bp->b_flags = B_BUSY; bp->b_dev = NODEV; @@ -962,7 +815,9 @@ biowait(bp) if (ISSET(bp->b_flags, B_EINTR)) { CLR(bp->b_flags, B_EINTR); return (EINTR); - } else if (ISSET(bp->b_flags, B_ERROR)) + } + + if (ISSET(bp->b_flags, B_ERROR)) return (bp->b_error ? bp->b_error : EIO); else return (0); @@ -992,13 +847,18 @@ biodone(bp) panic("biodone already"); SET(bp->b_flags, B_DONE); /* note that it's done */ - if (!ISSET(bp->b_flags, B_READ)) /* wake up reader */ - vwakeup(bp); + if (LIST_FIRST(&bp->b_dep) != NULL && bioops.io_complete) + (*bioops.io_complete)(bp); if (ISSET(bp->b_flags, B_CALL)) { /* if necessary, call out */ CLR(bp->b_flags, B_CALL); /* but note callout done */ (*bp->b_iodone)(bp); - } else if (ISSET(bp->b_flags, B_ASYNC)) /* if async, release it */ + } + + if (!ISSET(bp->b_flags, B_READ)) /* wake up reader */ + vwakeup(bp); + + if (ISSET(bp->b_flags, B_ASYNC)) /* if async, release it */ brelse(bp); else { /* or just wakeup the buffer */ CLR(bp->b_flags, B_WANTED); diff --git a/sys/kern/vfs_cluster.c b/sys/kern/vfs_cluster.c index d3fc332b376..87b024600bc 100644 --- a/sys/kern/vfs_cluster.c +++ b/sys/kern/vfs_cluster.c @@ -1,4 +1,4 @@ -/* $OpenBSD: vfs_cluster.c,v 1.8 1997/09/27 06:56:18 niklas Exp $ */ +/* $OpenBSD: vfs_cluster.c,v 1.9 1997/10/06 15:12:36 csapuntz Exp $ */ /* $NetBSD: vfs_cluster.c,v 1.12 1996/04/22 01:39:05 christos Exp $ */ /*- @@ -48,15 +48,6 @@ #include <vm/vm.h> -#ifdef DEBUG -#include <sys/sysctl.h> -int doreallocblks = 0; -struct ctldebug debug13 = { "doreallocblks", &doreallocblks }; -#else -/* XXX for cluster_write */ -#define doreallocblks 0 -#endif - /* * Local declarations */ @@ -518,8 +509,7 @@ cluster_write(bp, filesize) * Otherwise try reallocating to make it sequential. */ cursize = vp->v_lastw - vp->v_cstart + 1; - if (!doreallocblks || - (lbn + 1) * bp->b_bcount != filesize || + if ((lbn + 1) * bp->b_bcount != filesize || lbn != vp->v_lastw + 1 || vp->v_clen <= cursize) { cluster_wbuild(vp, NULL, bp->b_bcount, vp->v_cstart, cursize, lbn); @@ -708,13 +698,14 @@ redo: panic("Clustered write to wrong blocks"); } + if (LIST_FIRST(&tbp->b_dep) != NULL && bioops.io_start) + (*bioops.io_start)(tbp); + pagemove(tbp->b_data, cp, size); bp->b_bcount += size; bp->b_bufsize += size; tbp->b_bufsize -= size; - if (tbp->b_flags & B_DELWRI) - TAILQ_REMOVE(&bdirties, tbp, b_synclist); tbp->b_flags &= ~(B_READ | B_DONE | B_ERROR | B_DELWRI); /* * We might as well AGE the buffer here; it's either empty, or diff --git a/sys/kern/vfs_conf.c b/sys/kern/vfs_conf.c index 800aff67811..527450f20e5 100644 --- a/sys/kern/vfs_conf.c +++ b/sys/kern/vfs_conf.c @@ -1,4 +1,4 @@ -/* $OpenBSD: vfs_conf.c,v 1.5 1997/02/24 14:20:01 niklas Exp $ */ +/* $OpenBSD: vfs_conf.c,v 1.6 1997/10/06 15:12:37 csapuntz Exp $ */ /* $NetBSD: vfs_conf.c,v 1.21.4.1 1995/11/01 00:06:26 jtc Exp $ */ /* @@ -40,6 +40,28 @@ #include <sys/mount.h> #include <sys/vnode.h> +#ifdef FFS +#include <ufs/ufs/quota.h> +#include <ufs/ufs/inode.h> +#include <ufs/ffs/ffs_extern.h> +#endif + +#ifdef CD9660 +#include <isofs/cd9660/iso.h> +#endif + +#ifdef MFS +#include <ufs/mfs/mfs_extern.h> +#endif + +#ifdef NFSCLIENT +#include <nfs/rpcv2.h> +#include <nfs/nfsproto.h> +#include <nfs/nfsnode.h> +#include <nfs/nfs.h> +#include <nfs/nfsmount.h> +#endif + /* * These define the root filesystem and device. */ @@ -50,6 +72,8 @@ struct vnode *rootvnode; * Set up the filesystem operations for vnodes. * The types are defined in mount.h. */ + + #ifdef FFS extern struct vfsops ffs_vfsops; #endif @@ -115,107 +139,102 @@ extern struct vfsops ext2fs_vfsops; #endif /* - * XXX ORDERING MATTERS, for COMPAT_09. when that goes away, - * empty slots can go away. + * Set up the filesystem operations for vnodes. */ -struct vfsops *vfssw[] = { - NULL, /* 0 = MOUNT_NONE */ +static struct vfsconf vfsconflist[] = { + + /* Fast Filesystem */ #ifdef FFS - &ffs_vfsops, /* 1 = MOUNT_FFS */ -#else - NULL, + { &ffs_vfsops, "ffs", 1, 0, MNT_LOCAL, ffs_mountroot, NULL }, #endif -#ifdef NFSCLIENT - &nfs_vfsops, /* 2 = MOUNT_NFS */ -#else - NULL, + + /* Log-based Filesystem */ +#ifdef LFS + { &lfs_vfsops, "lfs", 5, 0, MNT_LOCAL, lfs_mountroot, NULL }, #endif + + /* Memory-based Filesystem */ #ifdef MFS - &mfs_vfsops, /* 3 = MOUNT_MFS */ -#else - NULL, -#endif -#ifdef MSDOSFS - &msdosfs_vfsops, /* 4 = MOUNT_MSDOS */ -#else - NULL, -#endif -#ifdef LFS - &lfs_vfsops, /* 5 = MOUNT_LFS */ -#else - NULL, + { &mfs_vfsops, "mfs", 3, 0, MNT_LOCAL, mfs_mountroot, NULL }, #endif - NULL, /* 6 = MOUNT_LOFS */ -#ifdef FDESC - &fdesc_vfsops, /* 7 = MOUNT_FDESC */ -#else - NULL, + + /* ISO9660 (aka CDROM) Filesystem */ +#ifdef CD9660 + { &cd9660_vfsops, "cd9660", 14, 0, MNT_LOCAL, cd9660_mountroot, NULL }, #endif -#ifdef PORTAL - &portal_vfsops, /* 8 = MOUNT_PORTAL */ -#else - NULL, + + /* MSDOS Filesystem */ +#ifdef MSDOSFS + { &msdosfs_vfsops, "msdos", 4, 0, MNT_LOCAL, NULL, NULL }, #endif -#ifdef NULLFS - &null_vfsops, /* 9 = MOUNT_NULL */ -#else - NULL, + + /* AmigaDOS Filesystem */ +#ifdef ADOSFS + { &adosfs_vfsops, "adosfs", 16, 0, MNT_LOCAL, NULL, NULL }, #endif -#ifdef UMAPFS - &umap_vfsops, /* 10 = MOUNT_UMAP */ -#else - NULL, + + /* Sun-compatible Network Filesystem */ +#ifdef NFSCLIENT + { &nfs_vfsops, "nfs", 2, 0, 0, nfs_mountroot, NULL }, #endif -#ifdef KERNFS - &kernfs_vfsops, /* 11 = MOUNT_KERNFS */ -#else - NULL, + + /* Andrew Filesystem */ +#ifdef AFS + { &afs_vfsops, "andrewfs", 13, 0, 0, afs_mountroot, NULL }, #endif + + /* /proc Filesystem */ #ifdef PROCFS - &procfs_vfsops, /* 12 = MOUNT_PROCFS */ -#else - NULL, + { &procfs_vfsops, "procfs", 12, 0, 0, NULL, NULL }, #endif -#ifdef AFS - &afs_vfsops, /* 13 = MOUNT_AFS */ -#else - NULL, -#endif -#ifdef CD9660 - &cd9660_vfsops, /* 14 = MOUNT_ISOFS */ -#else - NULL, + + /* Loopback (Minimal) Filesystem Layer */ +#ifdef NULLFS + { &null_vfsops, "loopback", 9, 0, 0, NULL, NULL }, #endif + + /* Union (translucent) Filesystem */ #ifdef UNION - &union_vfsops, /* 15 = MOUNT_UNION */ -#else - NULL, + { &union_vfsops, "union", 15, 0, 0, NULL, NULL }, #endif -#ifdef ADOSFS - &adosfs_vfsops, /* 16 = MOUNT_ADOSFS */ -#else - NULL, + + /* User/Group Identifer Remapping Filesystem */ +#ifdef UMAPFS + { &umap_vfsops, "umap", 10, 0, 0, NULL, NULL }, #endif -#ifdef EXT2FS - &ext2fs_vfsops, /* 17 = MOUNT_EXT2FS */ -#else - NULL, + + /* Portal Filesystem */ +#ifdef PORTAL + { &portal_vfsops, "portal", 8, 0, 0, NULL, NULL }, #endif -#ifdef LKM /* for LKM's. add new FS's before these */ - NULL, - NULL, - NULL, - NULL, + + /* File Descriptor Filesystem */ +#ifdef FDESC + { &fdesc_vfsops, "fdesc", 7, 0, 0, NULL, NULL }, #endif - 0 + + /* Kernel Information Filesystem */ +#ifdef KERNFS + { &kernfs_vfsops, "kernfs", 11, 0, 0, NULL, NULL }, +#endif + }; -int nvfssw = sizeof(vfssw) / sizeof(vfssw[0]); + + +/* + * Initially the size of the list, vfs_init will set maxvfsconf + * to the highest defined type number. + */ +int maxvfsconf = sizeof(vfsconflist) / sizeof (struct vfsconf); +struct vfsconf *vfsconf = vfsconflist; + /* * vfs_opv_descs enumerates the list of vnode classes, each with it's own * vnode operation vector. It is consulted at system boot to build operation * vectors. It is NULL terminated. */ +extern struct vnodeopv_desc sync_vnodeop_opv_desc; extern struct vnodeopv_desc ffs_vnodeop_opv_desc; extern struct vnodeopv_desc ffs_specop_opv_desc; extern struct vnodeopv_desc ffs_fifoop_opv_desc; @@ -246,6 +265,7 @@ extern struct vnodeopv_desc ext2fs_specop_opv_desc; extern struct vnodeopv_desc ext2fs_fifoop_opv_desc; struct vnodeopv_desc *vfs_opv_descs[] = { + &sync_vnodeop_opv_desc, #ifdef FFS &ffs_vnodeop_opv_desc, &ffs_specop_opv_desc, diff --git a/sys/kern/vfs_init.c b/sys/kern/vfs_init.c index 2071a8f633f..a10e5a3ff41 100644 --- a/sys/kern/vfs_init.c +++ b/sys/kern/vfs_init.c @@ -1,4 +1,4 @@ -/* $OpenBSD: vfs_init.c,v 1.4 1997/02/24 14:20:02 niklas Exp $ */ +/* $OpenBSD: vfs_init.c,v 1.5 1997/10/06 15:12:39 csapuntz Exp $ */ /* $NetBSD: vfs_init.c,v 1.6 1996/02/09 19:00:58 christos Exp $ */ /* @@ -243,7 +243,8 @@ struct vattr va_null; void vfsinit() { - struct vfsops **vfsp; + struct vfsconf *vfsp; + int i, maxtypenum; /* * Initialize the vnode table @@ -262,9 +263,15 @@ vfsinit() * Initialize each file system type. */ vattr_null(&va_null); - for (vfsp = &vfssw[0]; vfsp < &vfssw[nvfssw]; vfsp++) { - if (*vfsp == NULL) - continue; - (*(*vfsp)->vfs_init)(); - } + maxtypenum = 0; + + for (vfsp = vfsconf, i = 1; i <= maxvfsconf; i++, vfsp++) { + if (i < maxvfsconf) + vfsp->vfc_next = vfsp + 1; + if (maxtypenum <= vfsp->vfc_typenum) + maxtypenum = vfsp->vfc_typenum + 1; + (*vfsp->vfc_vfsops->vfs_init)(vfsp); + } + /* next vfc_typenum to be used */ + maxvfsconf = maxtypenum; } diff --git a/sys/kern/vfs_lookup.c b/sys/kern/vfs_lookup.c index 6d3e4f8567c..aa86b97412e 100644 --- a/sys/kern/vfs_lookup.c +++ b/sys/kern/vfs_lookup.c @@ -1,4 +1,4 @@ -/* $OpenBSD: vfs_lookup.c,v 1.8 1997/06/18 17:37:38 tholo Exp $ */ +/* $OpenBSD: vfs_lookup.c,v 1.9 1997/10/06 15:12:40 csapuntz Exp $ */ /* $NetBSD: vfs_lookup.c,v 1.17 1996/02/09 19:00:59 christos Exp $ */ /* @@ -88,6 +88,7 @@ namei(ndp) struct uio auio; int error, linklen; struct componentname *cnp = &ndp->ni_cnd; + struct proc *p = cnp->cn_proc; ndp->ni_cnd.cn_cred = ndp->ni_cnd.cn_proc->p_ucred; #ifdef DIAGNOSTIC @@ -164,7 +165,7 @@ namei(ndp) return (0); } if ((cnp->cn_flags & LOCKPARENT) && ndp->ni_pathlen == 1) - VOP_UNLOCK(ndp->ni_dvp); + VOP_UNLOCK(ndp->ni_dvp, 0, p); if (ndp->ni_loopcnt++ >= MAXSYMLINKS) { error = ELOOP; break; @@ -271,7 +272,7 @@ lookup(ndp) int error = 0; int slashes; struct componentname *cnp = &ndp->ni_cnd; - + struct proc *p = cnp->cn_proc; /* * Setup: break out flag bits into variables. */ @@ -285,7 +286,7 @@ lookup(ndp) cnp->cn_flags &= ~ISSYMLINK; dp = ndp->ni_startdir; ndp->ni_startdir = NULLVP; - VOP_LOCK(dp); + vn_lock(dp, LK_EXCLUSIVE | LK_RETRY, p); /* * If we have a leading string of slashes, remove them, and just make @@ -410,7 +411,7 @@ dirloop: dp = dp->v_mount->mnt_vnodecovered; vput(tdp); VREF(dp); - VOP_LOCK(dp); + vn_lock(dp, LK_EXCLUSIVE | LK_RETRY, p); } } @@ -419,6 +420,7 @@ dirloop: */ unionlookup: ndp->ni_dvp = dp; + ndp->ni_vp = NULL; if ((error = VOP_LOOKUP(dp, &ndp->ni_vp, cnp)) != 0) { #ifdef DIAGNOSTIC if (ndp->ni_vp != NULL) @@ -434,7 +436,7 @@ unionlookup: dp = dp->v_mount->mnt_vnodecovered; vput(tdp); VREF(dp); - VOP_LOCK(dp); + vn_lock(dp, LK_EXCLUSIVE | LK_RETRY, p); goto unionlookup; } @@ -491,12 +493,11 @@ unionlookup: */ while (dp->v_type == VDIR && (mp = dp->v_mountedhere) && (cnp->cn_flags & NOCROSSMOUNT) == 0) { - if (mp->mnt_flag & MNT_MLOCK) { - mp->mnt_flag |= MNT_MWAIT; - sleep((caddr_t)mp, PVFS); + if (vfs_busy(mp, 0, 0, p)) continue; - } - if ((error = VFS_ROOT(dp->v_mountedhere, &tdp)) != 0) + error = VFS_ROOT(mp, &tdp); + vfs_unbusy(mp, p); + if (error) goto bad2; vput(dp); ndp->ni_vp = dp = tdp; @@ -558,12 +559,12 @@ terminal: vrele(ndp->ni_dvp); } if ((cnp->cn_flags & LOCKLEAF) == 0) - VOP_UNLOCK(dp); + VOP_UNLOCK(dp, 0, p); return (0); bad2: if ((cnp->cn_flags & LOCKPARENT) && (cnp->cn_flags & ISLASTCN)) - VOP_UNLOCK(ndp->ni_dvp); + VOP_UNLOCK(ndp->ni_dvp, 0, p); vrele(ndp->ni_dvp); bad: vput(dp); @@ -579,6 +580,7 @@ relookup(dvp, vpp, cnp) struct vnode *dvp, **vpp; struct componentname *cnp; { + struct proc *p = cnp->cn_proc; register struct vnode *dp = 0; /* the directory we are searching */ int docache; /* == 0 do not cache last component */ int wantparent; /* 1 => wantparent or lockparent flag */ @@ -600,7 +602,7 @@ relookup(dvp, vpp, cnp) rdonly = cnp->cn_flags & RDONLY; cnp->cn_flags &= ~ISSYMLINK; dp = dvp; - VOP_LOCK(dp); + vn_lock(dp, LK_EXCLUSIVE | LK_RETRY, p); /* dirloop: */ /* @@ -694,15 +696,17 @@ relookup(dvp, vpp, cnp) if (!wantparent) vrele(dvp); if ((cnp->cn_flags & LOCKLEAF) == 0) - VOP_UNLOCK(dp); + VOP_UNLOCK(dp, 0, p); return (0); bad2: if ((cnp->cn_flags & LOCKPARENT) && (cnp->cn_flags & ISLASTCN)) - VOP_UNLOCK(dvp); + VOP_UNLOCK(dvp, 0, p); vrele(dvp); bad: vput(dp); *vpp = NULL; return (error); } + + diff --git a/sys/kern/vfs_subr.c b/sys/kern/vfs_subr.c index 10ed04c26ca..f265b15051d 100644 --- a/sys/kern/vfs_subr.c +++ b/sys/kern/vfs_subr.c @@ -1,4 +1,4 @@ -/* $OpenBSD: vfs_subr.c,v 1.10 1997/04/25 09:33:24 deraadt Exp $ */ +/* $OpenBSD: vfs_subr.c,v 1.11 1997/10/06 15:12:42 csapuntz Exp $ */ /* $NetBSD: vfs_subr.c,v 1.53 1996/04/22 01:39:13 christos Exp $ */ /* @@ -51,6 +51,7 @@ #include <sys/mount.h> #include <sys/time.h> #include <sys/fcntl.h> +#include <sys/kernel.h> #include <sys/vnode.h> #include <sys/stat.h> #include <sys/namei.h> @@ -88,8 +89,28 @@ int suid_clear = 1; /* 1 => clear SUID / SGID on owner change */ LIST_REMOVE(bp, b_vnbufs); \ (bp)->b_vnbufs.le_next = NOLIST; \ } -TAILQ_HEAD(freelst, vnode) vnode_free_list; /* vnode free list */ + +struct freelst vnode_hold_list; /* list of vnodes referencing buffers */ +struct freelst vnode_free_list; /* vnode free list */ + struct mntlist mountlist; /* mounted filesystem list */ +struct simplelock mountlist_slock; +static struct simplelock mntid_slock; +struct simplelock mntvnode_slock; +struct simplelock vnode_free_list_slock; +static struct simplelock spechash_slock; + +/* + * The workitem queue. + */ +#define SYNCER_MAXDELAY 32 +int syncer_maxdelay = SYNCER_MAXDELAY; /* maximum delay time */ +time_t syncdelay = 30; /* time to delay syncing vnodes */ + +static int syncer_delayno = 0; +static long syncer_mask; +LIST_HEAD(synclist, vnode); +static struct synclist *syncer_workitem_pending; int vfs_lock __P((struct mount *)); void vfs_unlock __P((struct mount *)); @@ -107,15 +128,16 @@ int bdevvp __P((dev_t, struct vnode **)); int cdevvp __P((dev_t, struct vnode **)); int getdevvp __P((dev_t, struct vnode **, enum vtype)); struct vnode *checkalias __P((struct vnode *, dev_t, struct mount *)); -int vget __P((struct vnode *, int)); void vref __P((struct vnode *)); void vput __P((struct vnode *)); void vrele __P((struct vnode *)); +int vunref __P((struct vnode *)); void vhold __P((struct vnode *)); void holdrele __P((struct vnode *)); int vflush __P((struct mount *, struct vnode *, int)); void vgoneall __P((struct vnode *)); void vgone __P((struct vnode *)); +void vgonel __P((struct vnode *, struct proc *)); int vcount __P((struct vnode *)); void vprint __P((char *, struct vnode *)); int vfs_mountedon __P((struct vnode *)); @@ -126,10 +148,10 @@ int vaccess __P((mode_t, uid_t, gid_t, mode_t, struct ucred *)); void vfs_unmountall __P((void)); void vfs_shutdown __P((void)); -static int vfs_hang_addrlist __P((struct mount *, struct netexport *, +int vfs_hang_addrlist __P((struct mount *, struct netexport *, struct export_args *)); -static int vfs_free_netcred __P((struct radix_node *, void *)); -static void vfs_free_addrlist __P((struct netexport *)); +int vfs_free_netcred __P((struct radix_node *, void *)); +void vfs_free_addrlist __P((struct netexport *)); #ifdef DEBUG void printlockedvnodes __P((void)); @@ -142,124 +164,191 @@ void vntblinit() { + simple_lock_init(&mntvnode_slock); + simple_lock_init(&mntid_slock); + simple_lock_init(&spechash_slock); + TAILQ_INIT(&vnode_hold_list); TAILQ_INIT(&vnode_free_list); + simple_lock_init(&vnode_free_list_slock); CIRCLEQ_INIT(&mountlist); + /* + * Initialize the filesystem syncer. + */ + syncer_workitem_pending = hashinit(syncer_maxdelay, M_VNODE, + &syncer_mask); + syncer_maxdelay = syncer_mask + 1; + } + /* - * Lock a filesystem. - * Used to prevent access to it while mounting and unmounting. + * Mark a mount point as busy. Used to synchornize access and to delay + * unmounting. Interlock is not released n failure. */ + int -vfs_lock(mp) - register struct mount *mp; +vfs_busy(mp, flags, interlkp, p) + struct mount *mp; + int flags; + struct simplelock *interlkp; + struct proc *p; { + int lkflags; - while (mp->mnt_flag & MNT_MLOCK) { + if (mp->mnt_flag & MNT_UNMOUNT) { + if (flags & LK_NOWAIT) + return (ENOENT); mp->mnt_flag |= MNT_MWAIT; - tsleep((caddr_t)mp, PVFS, "vfslock", 0); + if (interlkp) + simple_unlock(interlkp); + /* + * Since all busy locks are shared except the exclusive + * lock granted when unmounting, the only place that a + * wakeup needs to be done is at the release of the + * exclusive lock at the end of dounmount. + */ + sleep((caddr_t)mp, PVFS); + if (interlkp) + simple_lock(interlkp); + return (ENOENT); } - mp->mnt_flag |= MNT_MLOCK; - return (0); + lkflags = LK_SHARED; + if (interlkp) + lkflags |= LK_INTERLOCK; + if (lockmgr(&mp->mnt_lock, lkflags, interlkp, p)) + panic("vfs_busy: unexpected lock failure"); + return (0); } + /* - * Unlock a locked filesystem. - * Panic if filesystem is not locked. + * Free a busy file system */ void -vfs_unlock(mp) - register struct mount *mp; +vfs_unbusy(mp, p) + struct mount *mp; + struct proc *p; { - - if ((mp->mnt_flag & MNT_MLOCK) == 0) - panic("vfs_unlock: not locked"); - mp->mnt_flag &= ~MNT_MLOCK; - if (mp->mnt_flag & MNT_MWAIT) { - mp->mnt_flag &= ~MNT_MWAIT; - wakeup((caddr_t)mp); - } + lockmgr(&mp->mnt_lock, LK_RELEASE, NULL, p); } /* - * Mark a mount point as busy. - * Used to synchronize access and to delay unmounting. + * Lookup a filesystem type, and if found allocate and initialize + * a mount structure for it. + * + * Devname is usually updated by mount(8) after booting. */ -int -vfs_busy(mp) - register struct mount *mp; -{ - while(mp->mnt_flag & MNT_MPBUSY) { - mp->mnt_flag |= MNT_MPWANT; - tsleep((caddr_t)&mp->mnt_flag, PVFS, "vfsbusy", 0); - } - if (mp->mnt_flag & MNT_UNMOUNT) - return (1); - mp->mnt_flag |= MNT_MPBUSY; - return (0); -} +int +vfs_rootmountalloc(fstypename, devname, mpp) + char *fstypename; + char *devname; + struct mount **mpp; + { + struct proc *p = curproc; /* XXX */ + struct vfsconf *vfsp; + struct mount *mp; + + for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) + if (!strcmp(vfsp->vfc_name, fstypename)) + break; + if (vfsp == NULL) + return (ENODEV); + mp = malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK); + bzero((char *)mp, (u_long)sizeof(struct mount)); + lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, 0); + (void)vfs_busy(mp, LK_NOWAIT, 0, p); + LIST_INIT(&mp->mnt_vnodelist); + mp->mnt_vfc = vfsp; + mp->mnt_op = vfsp->vfc_vfsops; + mp->mnt_flag = MNT_RDONLY; + mp->mnt_vnodecovered = NULLVP; + vfsp->vfc_refcount++; + mp->mnt_stat.f_type = vfsp->vfc_typenum; + mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK; + strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN); + mp->mnt_stat.f_mntonname[0] = '/'; + (void) copystr(devname, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, 0); + *mpp = mp; + return (0); + } /* - * Free a busy filesystem. - * Panic if filesystem is not busy. - */ -void -vfs_unbusy(mp) - register struct mount *mp; + * Find an appropriate filesystem to use for the root. If a filesystem + * has not been preselected, walk through the list of known filesystems + * trying those that have mountroot routines, and try them until one + * works or we have tried them all. + */ +int +vfs_mountroot() { - - if ((mp->mnt_flag & MNT_MPBUSY) == 0) - panic("vfs_unbusy: not busy"); - mp->mnt_flag &= ~MNT_MPBUSY; - if (mp->mnt_flag & MNT_MPWANT) { - mp->mnt_flag &= ~MNT_MPWANT; - wakeup((caddr_t)&mp->mnt_flag); - } + struct vfsconf *vfsp; + extern int (*mountroot)(void); + int error; + + if (mountroot != NULL) + return ((*mountroot)()); + for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) { + if (vfsp->vfc_mountroot == NULL) + continue; + if ((error = (*vfsp->vfc_mountroot)()) == 0) + return (0); + printf("%s_mountroot failed: %d\n", vfsp->vfc_name, error); + } + return (ENODEV); } - + /* * Lookup a mount point by filesystem identifier. */ struct mount * -getvfs(fsid) +vfs_getvfs(fsid) fsid_t *fsid; { register struct mount *mp; + simple_lock(&mountlist_slock); for (mp = mountlist.cqh_first; mp != (void *)&mountlist; - mp = mp->mnt_list.cqe_next) + mp = mp->mnt_list.cqe_next) { if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] && - mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) + mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) { + simple_unlock(&mountlist_slock); return (mp); + } + } + simple_unlock(&mountlist_slock); return ((struct mount *)0); } + /* * Get a new unique fsid */ void -getnewfsid(mp, mtype) +vfs_getnewfsid(mp) struct mount *mp; - int mtype; { static u_short xxxfs_mntid; fsid_t tfsid; + int mtype; - mp->mnt_stat.f_fsid.val[0] = makedev(nblkdev + 11, 0); /* XXX */ + simple_lock(&mntid_slock); + mtype = mp->mnt_vfc->vfc_typenum; + mp->mnt_stat.f_fsid.val[0] = makedev(nblkdev + mtype, 0); mp->mnt_stat.f_fsid.val[1] = mtype; if (xxxfs_mntid == 0) ++xxxfs_mntid; - tfsid.val[0] = makedev((nblkdev + mtype) & 0xff, xxxfs_mntid); + tfsid.val[0] = makedev(nblkdev + mtype, xxxfs_mntid); tfsid.val[1] = mtype; if (mountlist.cqh_first != (void *)&mountlist) { - while (getvfs(&tfsid)) { + while (vfs_getvfs(&tfsid)) { tfsid.val[0]++; xxxfs_mntid++; } } mp->mnt_stat.f_fsid.val[0] = tfsid.val[0]; + simple_unlock(&mntid_slock); } /* @@ -318,20 +407,56 @@ getnewvnode(tag, mp, vops, vpp) int (**vops) __P((void *)); struct vnode **vpp; { - register struct vnode *vp; + struct proc *p = curproc; /* XXX */ + struct freelst *listhd; + static int toggle; + struct vnode *vp; #ifdef DIAGNOSTIC int s; #endif - if ((vnode_free_list.tqh_first == NULL && - numvnodes < 2 * desiredvnodes) || - numvnodes < desiredvnodes) { + /* + * We must choose whether to allocate a new vnode or recycle an + * existing one. The criterion for allocating a new one is that + * the total number of vnodes is less than the number desired or + * there are no vnodes on either free list. Generally we only + * want to recycle vnodes that have no buffers associated with + * them, so we look first on the vnode_free_list. If it is empty, + * we next consider vnodes with referencing buffers on the + * vnode_hold_list. The toggle ensures that half the time we + * will use a buffer from the vnode_hold_list, and half the time + * we will allocate a new one unless the list has grown to twice + * the desired size. We are reticent to recycle vnodes from the + * vnode_hold_list because we will lose the identity of all its + * referencing buffers. + */ + toggle ^= 1; + if (numvnodes > 2 * desiredvnodes) + toggle = 0; + + + simple_lock(&vnode_free_list_slock); + if ((numvnodes < desiredvnodes) || + ((TAILQ_FIRST(listhd = &vnode_free_list) == NULL) && + ((TAILQ_FIRST(listhd = &vnode_hold_list) == NULL) || toggle))) { + simple_unlock(&vnode_free_list_slock); vp = (struct vnode *)malloc((u_long)sizeof *vp, M_VNODE, M_WAITOK); bzero((char *)vp, sizeof *vp); numvnodes++; } else { - if ((vp = vnode_free_list.tqh_first) == NULL) { + for (vp = TAILQ_FIRST(listhd); vp != NULLVP; + vp = TAILQ_NEXT(vp, v_freelist)) { + if (simple_lock_try(&vp->v_interlock)) + break; + } + /* + * Unless this is a bad time of the month, at most + * the first NCPUS items on the free list are + * locked, so this is close enough to being empty. + */ + if (vp == NULLVP) { + simple_unlock(&vnode_free_list_slock); tablefull("vnode"); *vpp = 0; return (ENFILE); @@ -340,12 +465,15 @@ getnewvnode(tag, mp, vops, vpp) vprint("free vnode", vp); panic("free vnode isn't"); } - TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); + TAILQ_REMOVE(listhd, vp, v_freelist); /* see comment on why 0xdeadb is set at end of vgone (below) */ - vp->v_freelist.tqe_prev = (struct vnode **)0xdeadb; + vp->v_flag |= VGONEHACK; + simple_unlock(&vnode_free_list_slock); vp->v_lease = NULL; if (vp->v_type != VBAD) - vgone(vp); + vgonel(vp, p); + else + simple_unlock(&vp->v_interlock); #ifdef DIAGNOSTIC if (vp->v_data) { vprint("cleaned vnode", vp); @@ -385,18 +513,19 @@ insmntque(vp, mp) register struct vnode *vp; register struct mount *mp; { - + simple_lock(&mntvnode_slock); /* * Delete from old mount point vnode list, if on one. */ + if (vp->v_mount != NULL) LIST_REMOVE(vp, v_mntvnodes); /* * Insert into list of vnodes for the new mount point, if available. */ - if ((vp->v_mount = mp) == NULL) - return; - LIST_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes); + if ((vp->v_mount = mp) != NULL) + LIST_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes); + simple_unlock(&mntvnode_slock); } /* @@ -435,14 +564,15 @@ vinvalbuf(vp, flags, cred, p, slpflag, slptimeo) struct buf *nbp, *blist; int s, error; - if (flags & V_SAVE) { + if ((flags & V_SAVE) && vp->v_dirtyblkhd.lh_first != NULL) { if ((error = VOP_FSYNC(vp, cred, MNT_WAIT, p)) != 0) return (error); if (vp->v_dirtyblkhd.lh_first != NULL) panic("vinvalbuf: dirty bufs"); } for (;;) { - if ((blist = vp->v_cleanblkhd.lh_first) && flags & V_SAVEMETA) + if ((blist = vp->v_cleanblkhd.lh_first) && + (flags & V_SAVEMETA)) while (blist && blist->b_lblkno < 0) blist = blist->b_vnbufs.le_next; if (!blist && (blist = vp->v_dirtyblkhd.lh_first) && @@ -562,30 +692,140 @@ brelvp(bp) register struct buf *bp; { struct vnode *vp; + struct buf *wasdirty; - if (bp->b_vp == (struct vnode *) 0) + if ((vp = bp->b_vp) == (struct vnode *) 0) panic("brelvp: NULL"); /* * Delete from old vnode list, if on one. */ + wasdirty = vp->v_dirtyblkhd.lh_first; if (bp->b_vnbufs.le_next != NOLIST) bufremvn(bp); - vp = bp->b_vp; + if (wasdirty && LIST_FIRST(&vp->v_dirtyblkhd) == NULL) + LIST_REMOVE(vp, v_synclist); bp->b_vp = (struct vnode *) 0; HOLDRELE(vp); } /* - * Reassign a buffer from one vnode to another. - * Used to assign file specific control information - * (indirect blocks) to the vnode to which they belong. + * The workitem queue. + * + * It is useful to delay writes of file data and filesystem metadata + * for tens of seconds so that quickly created and deleted files need + * not waste disk bandwidth being created and removed. To realize this, + * we append vnodes to a "workitem" queue. When running with a soft + * updates implementation, most pending metadata dependencies should + * not wait for more than a few seconds. Thus, mounted on block devices + * are delayed only about a half the time that file data is delayed. + * Similarly, directory updates are more critical, so are only delayed + * about a third the time that file data is delayed. Thus, there are + * SYNCER_MAXDELAY queues that are processed round-robin at a rate of + * one each second (driven off the filesystem syner process). The + * syncer_delayno variable indicates the next queue that is to be processed. + * Items that need to be processed soon are placed in this queue: + * + * syncer_workitem_pending[syncer_delayno] + * + * A delay of fifteen seconds is done by placing the request fifteen + * entries later in the queue: + * + * syncer_workitem_pending[(syncer_delayno + 15) & syncer_mask] + * + */ + +/* + * Add an item to the syncer work queue. + */ +void +vn_syncer_add_to_worklist(vp, delay) + struct vnode *vp; + int delay; +{ + int s, slot; + + s = splbio(); + if (delay > syncer_maxdelay - 2) + delay = syncer_maxdelay - 2; + slot = (syncer_delayno + delay) & syncer_mask; + LIST_INSERT_HEAD(&syncer_workitem_pending[slot], vp, v_synclist); + splx(s); +} + +/* + * System filesystem synchronizer daemon. + */ + +extern int lbolt; + +void +sched_sync(p) + struct proc *p; +{ + struct synclist *slp; + struct vnode *vp; + long starttime; + int s; + + for (;;) { + starttime = time.tv_sec; + + /* + * Push files whose dirty time has expired. + */ + s = splbio(); + slp = &syncer_workitem_pending[syncer_delayno]; + syncer_delayno += 1; + if (syncer_delayno == syncer_maxdelay) + syncer_delayno = 0; + splx(s); + while ((vp = LIST_FIRST(slp)) != NULL) { + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); + (void) VOP_FSYNC(vp, p->p_ucred, MNT_LAZY, p); + VOP_UNLOCK(vp, 0, p); + if (LIST_FIRST(slp) == vp) { + if (LIST_FIRST(&vp->v_dirtyblkhd) == NULL) + panic("sched_sync: fsync failed"); + /* + * Move ourselves to the back of the sync list. + */ + LIST_REMOVE(vp, v_synclist); + vn_syncer_add_to_worklist(vp, syncdelay); + } + } + + /* + * Do soft update processing. + */ + if (bioops.io_sync) + (*bioops.io_sync)(NULL); + + /* + * If it has taken us less than a second to process the + * current work, then wait. Otherwise start right over + * again. We can still lose time if any single round + * takes more than two seconds, but it does not really + * matter as we are just trying to generally pace the + * filesystem activity. + */ + if (time.tv_sec == starttime) + tsleep(&lbolt, PPAUSE, "syncer", 0); + } +} + +/* + * Reassign a buffer from one vnode to another. Used to assign buffers + * to the appropriate clean or dirty list and to add newly dirty vnodes + * to the appropriate filesystem syncer list. */ void reassignbuf(bp, newvp) register struct buf *bp; register struct vnode *newvp; { - register struct buflists *listheadp; + struct buflists *listheadp; + struct buf *wasdirty; + int delay; if (newvp == NULL) { printf("reassignbuf: NULL"); @@ -594,16 +834,36 @@ reassignbuf(bp, newvp) /* * Delete from old vnode list, if on one. */ + wasdirty = newvp->v_dirtyblkhd.lh_first; if (bp->b_vnbufs.le_next != NOLIST) bufremvn(bp); /* * If dirty, put on list of dirty buffers; * otherwise insert onto list of clean buffers. */ - if (bp->b_flags & B_DELWRI) - listheadp = &newvp->v_dirtyblkhd; - else + if ((bp->b_flags & B_DELWRI) == 0) { listheadp = &newvp->v_cleanblkhd; + if (wasdirty && LIST_FIRST(&newvp->v_dirtyblkhd) == NULL) + LIST_REMOVE(newvp, v_synclist); + } else { + listheadp = &newvp->v_dirtyblkhd; + if (LIST_FIRST(listheadp) == NULL) { + switch (newvp->v_type) { + case VDIR: + delay = syncdelay / 3; + break; + case VBLK: + if (newvp->v_specmountpoint != NULL) { + delay = syncdelay / 2; + break; + } + /* fall through */ + default: + delay = syncdelay; + } + vn_syncer_add_to_worklist(newvp, delay); + } + } bufinsvn(bp, listheadp); } @@ -649,8 +909,10 @@ getdevvp(dev, vpp, type) struct vnode *nvp; int error; - if (dev == NODEV) + if (dev == NODEV) { + *vpp = NULLVP; return (0); + } error = getnewvnode(VT_NON, NULL, spec_vnodeop_p, &nvp); if (error) { *vpp = NULLVP; @@ -680,6 +942,7 @@ checkalias(nvp, nvp_rdev, mp) dev_t nvp_rdev; struct mount *mp; { + struct proc *p = curproc; register struct vnode *vp; struct vnode **vpp; @@ -688,18 +951,23 @@ checkalias(nvp, nvp_rdev, mp) vpp = &speclisth[SPECHASH(nvp_rdev)]; loop: + simple_lock(&spechash_slock); for (vp = *vpp; vp; vp = vp->v_specnext) { + simple_lock(&vp->v_interlock); if (nvp_rdev != vp->v_rdev || nvp->v_type != vp->v_type) continue; /* * Alias, but not in use, so flush it out. */ if (vp->v_usecount == 0) { - vgone(vp); + simple_unlock(&spechash_slock); + vgonel(vp, p); goto loop; } - if (vget(vp, 1)) + if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, p)) { + simple_unlock(&spechash_slock); goto loop; + } break; } if (vp == NULL || vp->v_tag != VT_NON || vp->v_type != VBLK) { @@ -708,18 +976,21 @@ loop: nvp->v_rdev = nvp_rdev; nvp->v_hashchain = vpp; nvp->v_specnext = *vpp; - nvp->v_specflags = 0; + nvp->v_specmountpoint = NULL; nvp->v_speclockf = NULL; + simple_unlock(&spechash_slock); *vpp = nvp; - if (vp != NULL) { + if (vp != NULLVP) { nvp->v_flag |= VALIASED; vp->v_flag |= VALIASED; vput(vp); } return (NULLVP); } - VOP_UNLOCK(vp); - vclean(vp, 0); + simple_unlock(&spechash_slock); + VOP_UNLOCK(vp, 0, p); + simple_lock(&vp->v_interlock); + vclean(vp, 0, p); vp->v_op = nvp->v_op; vp->v_tag = nvp->v_tag; nvp->v_type = VNON; @@ -736,91 +1007,260 @@ loop: * been changed to a new file system type). */ int -vget(vp, lockflag) - register struct vnode *vp; - int lockflag; +vget(vp, flags, p) + struct vnode *vp; + int flags; + struct proc *p; { - + int error; /* * If the vnode is in the process of being cleaned out for * another use, we wait for the cleaning to finish and then - * return failure. Cleaning is determined either by checking - * that the VXLOCK flag is set, or that the use count is - * zero with the back pointer set to show that it has been - * removed from the free list by getnewvnode. The VXLOCK - * flag may not have been set yet because vclean is blocked in - * the VOP_LOCK call waiting for the VOP_INACTIVE to complete. + * return failure. Cleaning is determined by checking that + * the VXLOCK flag is set. */ - if ((vp->v_flag & VXLOCK) || - (vp->v_usecount == 0 && - vp->v_freelist.tqe_prev == (struct vnode **)0xdeadb)) { - vp->v_flag |= VXWANT; + if ((flags & LK_INTERLOCK) == 0) + simple_lock(&vp->v_interlock); + if (vp->v_flag & VXLOCK) { + vp->v_flag |= VXWANT; + simple_unlock(&vp->v_interlock); tsleep((caddr_t)vp, PINOD, "vget", 0); - return (1); + return (ENOENT); + } + if (vp->v_usecount == 0) { + simple_lock(&vnode_free_list_slock); + if (vp->v_holdcnt > 0) + TAILQ_REMOVE(&vnode_hold_list, vp, v_freelist); + else + TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); + simple_unlock(&vnode_free_list_slock); } - if (vp->v_usecount == 0) - TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); - vp->v_usecount++; - if (lockflag) - VOP_LOCK(vp); + vp->v_usecount++; + if (flags & LK_TYPE_MASK) { + if ((error = vn_lock(vp, flags | LK_INTERLOCK, p)) != 0) { + vunref(vp); + simple_unlock(&vp->v_interlock); + } + return (error); + } + simple_unlock(&vp->v_interlock); return (0); } /* - * Vnode reference, just increment the count + * Stubs to use when there is no locking to be done on the underlying object. + * A minimal shared lock is necessary to ensure that the underlying object + * is not revoked while an operation is in progress. So, an active shared + * count is maintained in an auxillary vnode lock structure. + */ +int +vop_nolock(v) + void *v; +{ + struct vop_lock_args /* { + struct vnode *a_vp; + int a_flags; + struct proc *a_p; + } */ *ap = v; + +#ifdef notyet + /* + * This code cannot be used until all the non-locking filesystems + * (notably NFS) are converted to properly lock and release nodes. + * Also, certain vnode operations change the locking state within + * the operation (create, mknod, remove, link, rename, mkdir, rmdir, + * and symlink). Ideally these operations should not change the + * lock state, but should be changed to let the caller of the + * function unlock them. Otherwise all intermediate vnode layers + * (such as union, umapfs, etc) must catch these functions to do + * the necessary locking at their layer. Note that the inactive + * and lookup operations also change their lock state, but this + * cannot be avoided, so these two operations will always need + * to be handled in intermediate layers. + */ + struct vnode *vp = ap->a_vp; + int vnflags, flags = ap->a_flags; + + if (vp->v_vnlock == NULL) { + if ((flags & LK_TYPE_MASK) == LK_DRAIN) + return (0); + MALLOC(vp->v_vnlock, struct lock *, sizeof(struct lock), + M_VNODE, M_WAITOK); + lockinit(vp->v_vnlock, PVFS, "vnlock", 0, 0); + } + switch (flags & LK_TYPE_MASK) { + case LK_DRAIN: + vnflags = LK_DRAIN; + break; + case LK_EXCLUSIVE: + case LK_SHARED: + vnflags = LK_SHARED; + break; + case LK_UPGRADE: + case LK_EXCLUPGRADE: + case LK_DOWNGRADE: + return (0); + case LK_RELEASE: + default: + panic("vop_nolock: bad operation %d", flags & LK_TYPE_MASK); + } + if (flags & LK_INTERLOCK) + vnflags |= LK_INTERLOCK; + return(lockmgr(vp->v_vnlock, vnflags, &vp->v_interlock, ap->a_p)); +#else /* for now */ + /* + * Since we are not using the lock manager, we must clear + * the interlock here. + */ + if (ap->a_flags & LK_INTERLOCK) + simple_unlock(&ap->a_vp->v_interlock); + return (0); +#endif +} + +/* + * Decrement the active use count. + */ + +int +vop_nounlock(v) + void *v; +{ + struct vop_unlock_args /* { + struct vnode *a_vp; + int a_flags; + struct proc *a_p; + } */ *ap = v; + + struct vnode *vp = ap->a_vp; + + if (vp->v_vnlock == NULL) + return (0); + return (lockmgr(vp->v_vnlock, LK_RELEASE, NULL, ap->a_p)); +} + +/* + * Return whether or not the node is in use. + */ +int +vop_noislocked(v) + void *v; +{ + struct vop_islocked_args /* { + struct vnode *a_vp; + } */ *ap = v; + + struct vnode *vp = ap->a_vp; + + if (vp->v_vnlock == NULL) + return (0); + return (lockstatus(vp->v_vnlock)); +} + +/* + * Vnode reference. */ void vref(vp) struct vnode *vp; { - + simple_lock(&vp->v_interlock); if (vp->v_usecount <= 0) panic("vref used where vget required"); vp->v_usecount++; + simple_unlock(&vp->v_interlock); } -/* - * vput(), just unlock and vrele() - */ -void -vput(vp) - register struct vnode *vp; + +int +vunref(vp) + struct vnode *vp; { +#ifdef DIAGNOSTIC + if (vp == NULL) + panic("vrele: null vp"); +#endif + simple_lock (&vp->v_interlock); + vp->v_usecount--; + if (vp->v_usecount > 0) { + simple_unlock(&vp->v_interlock); + return (vp->v_usecount); + } +#ifdef DIAGNOSTIC + if (vp->v_usecount < 0 || vp->v_writecount != 0) { + vprint("vrele: bad ref count", vp); + panic("vrele: ref cnt"); + } +#endif + /* + * insert at tail of LRU list + */ + simple_lock(&vnode_free_list_slock); + if (vp->v_holdcnt > 0) + TAILQ_INSERT_TAIL(&vnode_hold_list, vp, v_freelist); + else + TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); + simple_unlock(&vnode_free_list_slock); - VOP_UNLOCK(vp); - vrele(vp); + return (0); } /* - * Vnode release. - * If count drops to zero, call inactive routine and return to freelist. + * vput(), just unlock and vrele() */ void -vrele(vp) +vput(vp) register struct vnode *vp; { + struct proc *p = curproc; /* XXX */ -#ifdef DIAGNOSTIC +#ifdef DIGANOSTIC if (vp == NULL) - panic("vrele: null vp"); + panic("vput: null vp"); #endif + simple_lock(&vp->v_interlock); vp->v_usecount--; - if (vp->v_usecount > 0) + if (vp->v_usecount > 0) { + simple_unlock(&vp->v_interlock); + VOP_UNLOCK(vp, 0, p); return; + } #ifdef DIAGNOSTIC - if (vp->v_usecount != 0 || vp->v_writecount != 0) { - vprint("vrele: bad ref count", vp); - panic("vrele: ref cnt"); + if (vp->v_usecount < 0 || vp->v_writecount != 0) { + vprint("vput: bad ref count", vp); + panic("vput: ref cnt"); } #endif /* * insert at tail of LRU list */ - TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); - VOP_INACTIVE(vp); + simple_lock(&vnode_free_list_slock); + if (vp->v_holdcnt > 0) + TAILQ_INSERT_TAIL(&vnode_hold_list, vp, v_freelist); + else + TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); + simple_unlock(&vnode_free_list_slock); + simple_unlock(&vp->v_interlock); + VOP_INACTIVE(vp, p); } /* + * Vnode release - use for active VNODES. + * If count drops to zero, call inactive routine and return to freelist. + */ +void +vrele(vp) + register struct vnode *vp; +{ + struct proc *p = curproc; + + if (vunref(vp) == 0 && + vn_lock(vp, LK_EXCLUSIVE |LK_INTERLOCK, p) == 0) + VOP_INACTIVE(vp, p); +} + +#ifdef DIAGNOSTIC +/* * Page or buffer structure gets a reference. */ void @@ -828,7 +1268,27 @@ vhold(vp) register struct vnode *vp; { + /* + * If it is on the freelist and the hold count is currently + * zero, move it to the hold list. + * + * The VGONEHACK flag reflects a call from getnewvnode, + * which will remove the vnode from the free list, but + * will not increment the ref count until after it calls vgone + * If the ref count we're incremented first, vgone would + * (incorrectly) try to close the previous instance of the + * underlying object. + */ + simple_lock(&vp->v_interlock); + if (!(vp->v_flag & VGONEHACK) && + vp->v_holdcnt == 0 && vp->v_usecount == 0) { + simple_lock(&vnode_free_list_slock); + TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); + TAILQ_INSERT_TAIL(&vnode_hold_list, vp, v_freelist); + simple_unlock(&vnode_free_list_slock); + } vp->v_holdcnt++; + simple_unlock(&vp->v_interlock); } /* @@ -839,10 +1299,26 @@ holdrele(vp) register struct vnode *vp; { + simple_lock(&vp->v_interlock); if (vp->v_holdcnt <= 0) panic("holdrele: holdcnt"); vp->v_holdcnt--; + /* + * If it is on the holdlist and the hold count drops to + * zero, move it to the free list. + * + * See above for VGONEHACK + */ + if (!(vp->v_flag & VGONEHACK) && + vp->v_holdcnt == 0 && vp->v_usecount == 0) { + simple_lock(&vnode_free_list_slock); + TAILQ_REMOVE(&vnode_hold_list, vp, v_freelist); + TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); + simple_unlock(&vnode_free_list_slock); + } + simple_unlock(&vp->v_interlock); } +#endif /* DIAGNOSTIC */ /* * Remove any vnodes in the vnode table belonging to mount point mp. @@ -863,11 +1339,11 @@ vflush(mp, skipvp, flags) struct vnode *skipvp; int flags; { + struct proc *p = curproc; register struct vnode *vp, *nvp; int busy = 0; - if ((mp->mnt_flag & MNT_MPBUSY) == 0) - panic("vflush: not busy"); + simple_lock(&mntvnode_slock); loop: for (vp = mp->mnt_vnodelist.lh_first; vp; vp = nvp) { if (vp->v_mount != mp) @@ -878,24 +1354,32 @@ loop: */ if (vp == skipvp) continue; + + simple_lock(&vp->v_interlock); /* * Skip over a vnodes marked VSYSTEM. */ - if ((flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM)) + if ((flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM)) { + simple_unlock(&vp->v_interlock); continue; + } /* * If WRITECLOSE is set, only flush out regular file * vnodes open for writing. */ if ((flags & WRITECLOSE) && - (vp->v_writecount == 0 || vp->v_type != VREG)) + (vp->v_writecount == 0 || vp->v_type != VREG)) { + simple_unlock(&vp->v_interlock); continue; + } /* * With v_usecount == 0, all we need to do is clear * out the vnode data structures and we are done. */ if (vp->v_usecount == 0) { - vgone(vp); + simple_unlock(&mntvnode_slock); + vgonel(vp, p); + simple_lock(&mntvnode_slock); continue; } /* @@ -904,21 +1388,25 @@ loop: * anonymous device. For all other files, just kill them. */ if (flags & FORCECLOSE) { + simple_unlock(&mntvnode_slock); if (vp->v_type != VBLK && vp->v_type != VCHR) { - vgone(vp); + vgonel(vp, p); } else { - vclean(vp, 0); + vclean(vp, 0, p); vp->v_op = spec_vnodeop_p; insmntque(vp, (struct mount *)0); } + simple_lock(&mntvnode_slock); continue; } #ifdef DEBUG if (busyprt) vprint("vflush: busy vnode", vp); #endif + simple_unlock(&vp->v_interlock); busy++; } + simple_unlock(&mntvnode_slock); if (busy) return (EBUSY); return (0); @@ -926,11 +1414,13 @@ loop: /* * Disassociate the underlying file system from a vnode. + * The vnode interlock is held on entry. */ void -vclean(vp, flags) +vclean(vp, flags, p) register struct vnode *vp; int flags; + struct proc *p; { int active; @@ -941,15 +1431,8 @@ vclean(vp, flags) * race against ourselves to recycle it. */ if ((active = vp->v_usecount) != 0) - VREF(vp); - /* - * Even if the count is zero, the VOP_INACTIVE routine may still - * have the object locked while it cleans it out. The VOP_LOCK - * ensures that the VOP_INACTIVE routine is done with its work. - * For active vnodes, it ensures that no other activity can - * occur while the underlying object is being cleaned out. - */ - VOP_LOCK(vp); + vp->v_usecount++; + /* * Prevent the vnode from being recycled or * brought into use while we clean it out. @@ -957,32 +1440,57 @@ vclean(vp, flags) if (vp->v_flag & VXLOCK) panic("vclean: deadlock"); vp->v_flag |= VXLOCK; + + /* - * Clean out any buffers associated with the vnode. + * Even if the count is zero, the VOP_INACTIVE routine may still + * have the object locked while it cleans it out. The VOP_LOCK + * ensures that the VOP_INACTIVE routine is done with its work. + * For active vnodes, it ensures that no other activity can + * occur while the underlying object is being cleaned out. */ - if (flags & DOCLOSE) - vinvalbuf(vp, V_SAVE, NOCRED, NULL, 0, 0); + VOP_LOCK(vp, LK_DRAIN | LK_INTERLOCK, p); + /* - * Any other processes trying to obtain this lock must first - * wait for VXLOCK to clear, then call the new lock operation. + * Clean out any buffers associated with the vnode. */ - VOP_UNLOCK(vp); + if (flags & DOCLOSE) + vinvalbuf(vp, V_SAVE, NOCRED, p, 0, 0); /* * If purging an active vnode, it must be closed and - * deactivated before being reclaimed. + * deactivated before being reclaimed. Note that the + * VOP_INACTIVE will unlock the vnode */ if (active) { if (flags & DOCLOSE) - VOP_CLOSE(vp, FNONBLOCK, NOCRED, NULL); - VOP_INACTIVE(vp); + VOP_CLOSE(vp, FNONBLOCK, NOCRED, p); + VOP_INACTIVE(vp, p); + } else { + /* + * Any other processes trying to obtain this lock must first + * wait for VXLOCK to clear, then call the new lock operation. + */ + VOP_UNLOCK(vp, 0, p); } + /* * Reclaim the vnode. */ - if (VOP_RECLAIM(vp)) + if (VOP_RECLAIM(vp, p)) panic("vclean: cannot reclaim"); - if (active) - vrele(vp); + if (active) { + if (vunref(vp) == 0 && + vp->v_holdcnt > 0) + panic("vclean: not clean"); + simple_unlock(&vp->v_interlock); + } + cache_purge(vp); + if (vp->v_vnlock) { + if ((vp->v_vnlock->lk_flags & LK_DRAINED) == 0) + vprint("vclean: lock not drained", vp); + FREE(vp->v_vnlock, M_VNODE); + vp->v_vnlock = NULL; + } /* * Done with purge, notify sleepers of the grim news. @@ -1000,12 +1508,25 @@ vclean(vp, flags) * Eliminate all activity associated with the requested vnode * and with all vnodes aliased to the requested vnode. */ -void -vgoneall(vp) - register struct vnode *vp; +int +vop_revoke(v) + void *v; { - register struct vnode *vq; + struct vop_revoke_args /* { + struct vnode *a_vp; + int a_flags; + } */ *ap = v; + struct vnode *vp, *vq; + struct proc *p = curproc; + +#ifdef DIAGNOSTIC + if ((ap->a_flags & REVOKEALL) == 0) + panic("vop_revoke"); +#endif + vp = ap->a_vp; + simple_lock(&vp->v_interlock); + if (vp->v_flag & VALIASED) { /* * If a vgone (or vclean) is already in progress, @@ -1013,19 +1534,23 @@ vgoneall(vp) */ if (vp->v_flag & VXLOCK) { vp->v_flag |= VXWANT; - tsleep((caddr_t)vp, PINOD, "vgoneall", 0); - return; + simple_unlock(&vp->v_interlock); + tsleep((caddr_t)vp, PINOD, "vop_revokeall", 0); + return(0); } /* * Ensure that vp will not be vgone'd while we * are eliminating its aliases. */ vp->v_flag |= VXLOCK; + simple_unlock(&vp->v_interlock); while (vp->v_flag & VALIASED) { + simple_lock(&spechash_slock); for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type || vp == vq) continue; + simple_unlock(&spechash_slock); vgone(vq); break; } @@ -1035,9 +1560,34 @@ vgoneall(vp) * really eliminate the vnode after which time * vgone will awaken any sleepers. */ + simple_lock(&vp->v_interlock); vp->v_flag &= ~VXLOCK; } - vgone(vp); + vgonel(vp, p); + return (0); +} + + +/* + * Recycle an unused vnode to the front of the free list. + * Release the passed interlock if the vnode will be recycled. + */ +int +vrecycle(vp, inter_lkp, p) + struct vnode *vp; + struct simplelock *inter_lkp; + struct proc *p; +{ + + simple_lock(&vp->v_interlock); + if (vp->v_usecount == 0) { + if (inter_lkp) + simple_unlock(inter_lkp); + vgonel(vp, p); + return (1); + } + simple_unlock(&vp->v_interlock); + return (0); } /* @@ -1048,6 +1598,20 @@ void vgone(vp) register struct vnode *vp; { + struct proc *p = curproc; + + simple_lock (&vp->v_interlock); + vgonel(vp, p); +} + +/* + * vgone, with the vp interlock held. + */ +void +vgonel(vp, p) + struct vnode *vp; + struct proc *p; +{ register struct vnode *vq; struct vnode *vx; @@ -1057,21 +1621,25 @@ vgone(vp) */ if (vp->v_flag & VXLOCK) { vp->v_flag |= VXWANT; + simple_unlock(&vp->v_interlock); tsleep((caddr_t)vp, PINOD, "vgone", 0); return; } /* * Clean out the filesystem specific data. */ - vclean(vp, DOCLOSE); + vclean(vp, DOCLOSE, p); /* * Delete from old mount point vnode list, if on one. */ - insmntque(vp, (struct mount *)0); + if (vp->v_mount != NULL) + insmntque(vp, (struct mount *)0); /* - * If special device, remove it from special device alias list. + * If special device, remove it from special device alias list + * if it is on one. */ - if (vp->v_type == VBLK || vp->v_type == VCHR) { + if ((vp->v_type == VBLK || vp->v_type == VCHR) && vp->v_specinfo != 0) { + simple_lock(&spechash_slock); if (*vp->v_hashchain == vp) { *vp->v_hashchain = vp->v_specnext; } else { @@ -1100,27 +1668,26 @@ vgone(vp) vx->v_flag &= ~VALIASED; vp->v_flag &= ~VALIASED; } + simple_unlock(&spechash_slock); FREE(vp->v_specinfo, M_VNODE); vp->v_specinfo = NULL; } /* * If it is on the freelist and not already at the head, - * move it to the head of the list. The test of the back - * pointer and the reference count of zero is because - * it will be removed from the free list by getnewvnode, - * but will not have its reference count incremented until - * after calling vgone. If the reference count were - * incremented first, vgone would (incorrectly) try to - * close the previous instance of the underlying object. - * So, the back pointer is explicitly set to `0xdeadb' in - * getnewvnode after removing it from the freelist to ensure - * that we do not try to move it here. + * move it to the head of the list. + * + * See above about the VGONEHACK */ - if (vp->v_usecount == 0 && - vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb && - vnode_free_list.tqh_first != vp) { - TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); - TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist); + if (vp->v_usecount == 0) { + simple_lock(&vnode_free_list_slock); + if (vp->v_holdcnt > 0) + panic("vgonel: not clean"); + if (!(vp->v_flag & VGONEHACK) && + TAILQ_FIRST(&vnode_free_list) != vp) { + TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); + TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist); + } + simple_unlock(&vnode_free_list_slock); } vp->v_type = VBAD; } @@ -1135,14 +1702,18 @@ vfinddev(dev, type, vpp) struct vnode **vpp; { register struct vnode *vp; + int rc =0; + simple_lock(&spechash_slock); for (vp = speclisth[SPECHASH(dev)]; vp; vp = vp->v_specnext) { if (dev != vp->v_rdev || type != vp->v_type) continue; *vpp = vp; - return (1); + rc = 1; + break; } - return (0); + simple_unlock(&spechash_slock); + return (rc); } /* @@ -1150,14 +1721,15 @@ vfinddev(dev, type, vpp) */ int vcount(vp) - register struct vnode *vp; + struct vnode *vp; { - register struct vnode *vq, *vnext; + struct vnode *vq, *vnext; int count; loop: if ((vp->v_flag & VALIASED) == 0) return (vp->v_usecount); + simple_lock(&spechash_slock); for (count = 0, vq = *vp->v_hashchain; vq; vq = vnext) { vnext = vq->v_specnext; if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type) @@ -1166,11 +1738,13 @@ loop: * Alias, but not in use, so flush it out. */ if (vq->v_usecount == 0 && vq != vp) { + simple_unlock(&spechash_slock); vgone(vq); goto loop; } count += vq->v_usecount; } + simple_unlock(&spechash_slock); return (count); } @@ -1225,21 +1799,77 @@ vprint(label, vp) void printlockedvnodes() { - register struct mount *mp; + struct proc *p = curproc; + register struct mount *mp, *nmp; register struct vnode *vp; printf("Locked vnodes\n"); + simple_lock(&mountlist_slock); for (mp = mountlist.cqh_first; mp != (void *)&mountlist; - mp = mp->mnt_list.cqe_next) { + mp = nmp) { + if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) { + nmp = mp->mnt_list.cque_next; + continue; + } for (vp = mp->mnt_vnodelist.lh_first; vp != NULL; - vp = vp->v_mntvnodes.le_next) + vp = vp->v_mntvnodes.le_next) { if (VOP_ISLOCKED(vp)) vprint((char *)0, vp); - } + simple_lock(&mountlist_slock); + nmp = mp->mnt_list.cqe_next; + vfs_unbusy(mp, p); + } + simple_unlock(&mountlist_slock); + } #endif +/* + * Top level filesystem related information gathering. + */ +int +vfs_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p) + int *name; + u_int namelen; + void *oldp; + size_t *oldlenp; + void *newp; + size_t newlen; + struct proc *p; +{ + struct vfsconf *vfsp; + + /* all sysctl names at this level are at least name and field */ + if (namelen < 2) + return (ENOTDIR); /* overloaded */ + if (name[0] != VFS_GENERIC) { + for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) + if (vfsp->vfc_typenum == name[0]) + break; + if (vfsp == NULL) + return (EOPNOTSUPP); + return ((*vfsp->vfc_vfsops->vfs_sysctl)(&name[1], namelen - 1, + oldp, oldlenp, newp, newlen, p)); + } + switch (name[1]) { + case VFS_MAXTYPENUM: + return (sysctl_rdint(oldp, oldlenp, newp, maxvfsconf)); + case VFS_CONF: + if (namelen < 3) + return (ENOTDIR); /* overloaded */ + for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) + if (vfsp->vfc_typenum == name[2]) + break; + if (vfsp == NULL) + return (EOPNOTSUPP); + return (sysctl_rdstruct(oldp, oldlenp, newp, vfsp, + sizeof(struct vfsconf))); + } + return (EOPNOTSUPP); +} + + int kinfo_vdebug = 1; int kinfo_vgetfailed; #define KINFO_VNODESLOP 10 @@ -1249,12 +1879,13 @@ int kinfo_vgetfailed; */ /* ARGSUSED */ int -sysctl_vnode(where, sizep) +sysctl_vnode(where, sizep, p) char *where; size_t *sizep; + struct proc *p; { register struct mount *mp, *nmp; - struct vnode *vp; + struct vnode *vp, *nvp; register char *bp = where, *savebp; char *ewhere; int error; @@ -1267,27 +1898,32 @@ sysctl_vnode(where, sizep) } ewhere = where + *sizep; + simple_lock(&mountlist_slock); for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) { - nmp = mp->mnt_list.cqe_next; - if (vfs_busy(mp)) + if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) { + nmp = mp->mnt_list.cqe_next; continue; + } savebp = bp; again: for (vp = mp->mnt_vnodelist.lh_first; vp != NULL; - vp = vp->v_mntvnodes.le_next) { + vp = nvp) { /* * Check that the vp is still associated with * this filesystem. RACE: could have been * recycled onto the same filesystem. */ if (vp->v_mount != mp) { + simple_unlock(&mntvnode_slock); if (kinfo_vdebug) printf("kinfo: vp changed\n"); bp = savebp; goto again; } + nvp = vp->v_mntvnodes.le_next; if (bp + VPTRSZ + VNODESZ > ewhere) { + simple_unlock(&mntvnode_slock); *sizep = bp - where; return (ENOMEM); } @@ -1295,10 +1931,17 @@ again: (error = copyout((caddr_t)vp, bp + VPTRSZ, VNODESZ))) return (error); bp += VPTRSZ + VNODESZ; + simple_lock(&mntvnode_slock); } - vfs_unbusy(mp); + + simple_unlock(&mntvnode_slock); + simple_lock(&mountlist_slock); + nmp = mp->mnt_list.cqe_next; + vfs_unbusy(mp, p); } + simple_unlock(&mountlist_slock); + *sizep = bp - where; return (0); } @@ -1311,26 +1954,31 @@ vfs_mountedon(vp) register struct vnode *vp; { register struct vnode *vq; + int error = 0; - if (vp->v_specflags & SI_MOUNTEDON) + if (vp->v_specmountpoint != NULL) return (EBUSY); if (vp->v_flag & VALIASED) { + simple_lock(&spechash_slock); for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type) continue; - if (vq->v_specflags & SI_MOUNTEDON) - return (EBUSY); - } + if (vq->v_specmountpoint != NULL) { + error = EBUSY; + break; + } + } + simple_unlock(&spechash_slock); } - return (0); + return (error); } /* * Build hash lists of net addresses and hang them off the mount point. * Called by ufs_mount() to set up the lists of export addresses. */ -static int +int vfs_hang_addrlist(mp, nep, argp) struct mount *mp; struct netexport *nep; @@ -1404,7 +2052,7 @@ out: } /* ARGSUSED */ -static int +int vfs_free_netcred(rn, w) struct radix_node *rn; void *w; @@ -1419,7 +2067,7 @@ vfs_free_netcred(rn, w) /* * Free the net address hash lists that are hanging off the mount points. */ -static void +void vfs_free_addrlist(nep) struct netexport *nep; { @@ -1666,3 +2314,161 @@ fs_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p) } return (*fn)(name + 1, namelen - 1, oldp, oldlenp, newp, newlen, p); } + +/* + * Routine to create and manage a filesystem syncer vnode. + */ +#define sync_close nullop +int sync_fsync __P((void *)); +int sync_inactive __P((void *)); +#define sync_reclaim nullop +#define sync_lock vop_nolock +#define sync_unlock vop_nounlock +int sync_print __P((void *)); +#define sync_islocked vop_noislocked + +int (**sync_vnodeop_p) __P((void *)); +struct vnodeopv_entry_desc sync_vnodeop_entries[] = { + { &vop_default_desc, vn_default_error }, + { &vop_close_desc, sync_close }, /* close */ + { &vop_fsync_desc, sync_fsync }, /* fsync */ + { &vop_inactive_desc, sync_inactive }, /* inactive */ + { &vop_reclaim_desc, sync_reclaim }, /* reclaim */ + { &vop_lock_desc, sync_lock }, /* lock */ + { &vop_unlock_desc, sync_unlock }, /* unlock */ + { &vop_print_desc, sync_print }, /* print */ + { &vop_islocked_desc, sync_islocked }, /* islocked */ + { (struct vnodeop_desc*)NULL, (int(*) __P((void *)))NULL } +}; +struct vnodeopv_desc sync_vnodeop_opv_desc = + { &sync_vnodeop_p, sync_vnodeop_entries }; + +/* + * Create a new filesystem syncer vnode for the specified mount point. + */ +int +vfs_allocate_syncvnode(mp) + struct mount *mp; +{ + struct vnode *vp; + static long start, incr, next; + int error; + + /* Allocate a new vnode */ + if ((error = getnewvnode(VT_VFS, mp, sync_vnodeop_p, &vp)) != 0) { + mp->mnt_syncer = NULL; + return (error); + } + vp->v_writecount = 1; + vp->v_type = VNON; + /* + * Place the vnode onto the syncer worklist. We attempt to + * scatter them about on the list so that they will go off + * at evenly distributed times even if all the filesystems + * are mounted at once. + */ + next += incr; + if (next == 0 || next > syncer_maxdelay) { + start /= 2; + incr /= 2; + if (start == 0) { + start = syncer_maxdelay / 2; + incr = syncer_maxdelay; + } + next = start; + } + vn_syncer_add_to_worklist(vp, next); + mp->mnt_syncer = vp; + return (0); +} + +/* + * Do a lazy sync of the filesystem. + */ +int +sync_fsync(v) + void *v; +{ + struct vop_fsync_args /* { + struct vnode *a_vp; + struct ucred *a_cred; + int a_waitfor; + struct proc *a_p; + } */ *ap = v; + + struct vnode *syncvp = ap->a_vp; + struct mount *mp = syncvp->v_mount; + int asyncflag; + + /* + * We only need to do something if this is a lazy evaluation. + */ + if (ap->a_waitfor != MNT_LAZY) + return (0); + + /* + * Move ourselves to the back of the sync list. + */ + LIST_REMOVE(syncvp, v_synclist); + vn_syncer_add_to_worklist(syncvp, syncdelay); + + /* + * Walk the list of vnodes pushing all that are dirty and + * not already on the sync list. + */ + simple_lock(&mountlist_slock); + if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, ap->a_p) == 0) { + asyncflag = mp->mnt_flag & MNT_ASYNC; + mp->mnt_flag &= ~MNT_ASYNC; + VFS_SYNC(mp, MNT_LAZY, ap->a_cred, ap->a_p); + if (asyncflag) + mp->mnt_flag |= MNT_ASYNC; + vfs_unbusy(mp, ap->a_p); + } + return (0); +} + +/* + * The syncer vnode is no longer needed and is being decommissioned. + */ +int +sync_inactive(v) + void *v; + +{ + struct vop_inactive_args /* { + struct vnode *a_vp; + struct proc *a_p; + } */ *ap = v; + + struct vnode *vp = ap->a_vp; + + if (vp->v_usecount == 0) + return (0); + vp->v_mount->mnt_syncer = NULL; + LIST_REMOVE(vp, v_synclist); + vp->v_writecount = 0; + vput(vp); + return (0); +} + +/* + * Print out a syncer vnode. + */ +int +sync_print(v) + void *v; + +{ + struct vop_print_args /* { + struct vnode *a_vp; + } */ *ap = v; + struct vnode *vp = ap->a_vp; + + printf("syncer vnode"); + if (vp->v_vnlock != NULL) + lockmgr_printinfo(vp->v_vnlock); + printf("\n"); + return (0); +} + diff --git a/sys/kern/vfs_syscalls.c b/sys/kern/vfs_syscalls.c index 74d914ee7e8..f1e566ae6b8 100644 --- a/sys/kern/vfs_syscalls.c +++ b/sys/kern/vfs_syscalls.c @@ -1,4 +1,4 @@ -/* $OpenBSD: vfs_syscalls.c,v 1.25 1997/03/02 09:38:35 millert Exp $ */ +/* $OpenBSD: vfs_syscalls.c,v 1.26 1997/10/06 15:12:43 csapuntz Exp $ */ /* $NetBSD: vfs_syscalls.c,v 1.71 1996/04/23 10:29:02 mycroft Exp $ */ /* @@ -102,10 +102,11 @@ sys_mount(p, v, retval) register struct vnode *vp; register struct mount *mp; int error, flag = 0; - u_long fsindex = 0; + u_long fstypenum = 0; char fstypename[MFSNAMELEN]; struct vattr va; struct nameidata nd; + struct vfsconf *vfsp; if (usermount == 0 && (error = suser(p->p_ucred, &p->p_acflag))) return (error); @@ -156,7 +157,7 @@ sys_mount(p, v, retval) } SCARG(uap, flags) |= MNT_NOSUID | MNT_NODEV; } - VOP_UNLOCK(vp); + VOP_UNLOCK(vp, 0, p); goto update; } /* @@ -195,12 +196,19 @@ sys_mount(p, v, retval) * string, we check to see if it matches one of the historic * filesystem types. */ - fsindex = (u_long)SCARG(uap, type); - if (fsindex >= nvfssw || vfssw[fsindex] == NULL) { - vput(vp); - return (ENODEV); + fstypenum = (u_long)SCARG(uap, type); + + if (fstypenum < maxvfsconf) { + for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) + if (vfsp->vfc_typenum == fstypenum) + break; + if (vfsp == NULL) { + vput(vp); + return (ENODEV); + } + strncpy(fstypename, vfsp->vfc_name, MFSNAMELEN); + } - strncpy(fstypename, vfssw[fsindex]->vfs_name, MFSNAMELEN); #else vput(vp); return (error); @@ -212,14 +220,16 @@ sys_mount(p, v, retval) strncpy( fstypename, "ffs", MFSNAMELEN); } #endif - for (fsindex = 0; fsindex < nvfssw; fsindex++) - if (vfssw[fsindex] != NULL && - !strncmp(vfssw[fsindex]->vfs_name, fstypename, MFSNAMELEN)) + for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) { + if (!strcmp(vfsp->vfc_name, fstypename)) break; - if (fsindex >= nvfssw) { + } + + if (vfsp == NULL) { vput(vp); return (ENODEV); } + if (vp->v_mountedhere != NULL) { vput(vp); return (EBUSY); @@ -231,14 +241,14 @@ sys_mount(p, v, retval) mp = (struct mount *)malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK); bzero((char *)mp, (u_long)sizeof(struct mount)); - mp->mnt_op = vfssw[fsindex]; - if ((error = vfs_lock(mp)) != 0) { - free((caddr_t)mp, M_MOUNT); - vput(vp); - return (error); - } - /* Do this early in case we block later. */ - vfssw[fsindex]->vfs_refcount++; + lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, 0); + vfs_busy(mp, LK_NOWAIT, 0, p); + mp->mnt_op = vfsp->vfc_vfsops; + mp->mnt_vfc = vfsp; + vfsp->vfc_refcount++; + mp->mnt_stat.f_type = vfsp->vfc_typenum; + mp->mnt_flag |= (vfsp->vfc_flags & MNT_VISFLAGMASK); + strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN); vp->v_mountedhere = mp; mp->mnt_vnodecovered = vp; mp->mnt_stat.f_owner = p->p_ucred->cr_uid; @@ -266,6 +276,17 @@ update: (MNT_UPDATE | MNT_RELOAD | MNT_FORCE | MNT_WANTRDWR); if (error) mp->mnt_flag = flag; + + if ((mp->mnt_flag & MNT_RDONLY) == 0) { + if (mp->mnt_syncer == NULL) + error = vfs_allocate_syncvnode(mp); + } else { + if (mp->mnt_syncer != NULL) + vgone(mp->mnt_syncer); + mp->mnt_syncer = NULL; + } + + vfs_unbusy(mp, p); return (error); } /* @@ -273,16 +294,20 @@ update: */ cache_purge(vp); if (!error) { + simple_lock(&mountlist_slock); CIRCLEQ_INSERT_TAIL(&mountlist, mp, mnt_list); + simple_unlock(&mountlist_slock); checkdirs(vp); - VOP_UNLOCK(vp); - vfs_unlock(mp); + VOP_UNLOCK(vp, 0, p); + if ((mp->mnt_flag & MNT_RDONLY) == 0) + error = vfs_allocate_syncvnode(mp); + vfs_unbusy(mp, p); (void) VFS_STATFS(mp, &mp->mnt_stat, p); - error = VFS_START(mp, 0, p); + if ((error = VFS_START(mp, 0, p)) != 0) + vrele(vp); } else { mp->mnt_vnodecovered->v_mountedhere = (struct mount *)0; - vfssw[fsindex]->vfs_refcount--; - vfs_unlock(mp); + vfs_unbusy(mp, p); free((caddr_t)mp, M_MOUNT); vput(vp); } @@ -397,36 +422,40 @@ dounmount(mp, flags, p) struct vnode *coveredvp; int error; - coveredvp = mp->mnt_vnodecovered; - if (vfs_busy(mp)) - return (EBUSY); + simple_lock(&mountlist_slock); mp->mnt_flag |= MNT_UNMOUNT; - if ((error = vfs_lock(mp)) != 0) + lockmgr(&mp->mnt_lock, LK_DRAIN | LK_INTERLOCK, &mountlist_slock, p); + mp->mnt_flag &=~ MNT_ASYNC; + vnode_pager_umount(mp); /* release cached vnodes */ + cache_purgevfs(mp); /* remove cache entries for this file sys */ + if (mp->mnt_syncer != NULL) + vgone(mp->mnt_syncer); + if (((mp->mnt_flag & MNT_RDONLY) || + (error = VFS_SYNC(mp, MNT_WAIT, p->p_ucred, p)) == 0) || + (flags & MNT_FORCE)) + error = VFS_UNMOUNT(mp, flags, p); + simple_lock(&mountlist_slock); + if (error) { + if ((mp->mnt_flag & MNT_RDONLY) == 0 && mp->mnt_syncer == NULL) + (void) vfs_allocate_syncvnode(mp); + mp->mnt_flag &= ~MNT_UNMOUNT; + lockmgr(&mp->mnt_lock, LK_RELEASE | LK_INTERLOCK | LK_REENABLE, + &mountlist_slock, p); return (error); - - mp->mnt_flag &=~ MNT_ASYNC; - vnode_pager_umount(mp); /* release cached vnodes */ - cache_purgevfs(mp); /* remove cache entries for this file sys */ - if ((error = VFS_SYNC(mp, MNT_WAIT, p->p_ucred, p)) == 0 || - (flags & MNT_FORCE)) - error = VFS_UNMOUNT(mp, flags, p); - mp->mnt_flag &= ~MNT_UNMOUNT; - vfs_unbusy(mp); - if (error) { - vfs_unlock(mp); - } else { - CIRCLEQ_REMOVE(&mountlist, mp, mnt_list); - if (coveredvp != NULLVP) { - vrele(coveredvp); - coveredvp->v_mountedhere = (struct mount *)0; - } - mp->mnt_op->vfs_refcount--; - vfs_unlock(mp); - if (mp->mnt_vnodelist.lh_first != NULL) - panic("unmount: dangling vnode"); - free((caddr_t)mp, M_MOUNT); } - return (error); + CIRCLEQ_REMOVE(&mountlist, mp, mnt_list); + if ((coveredvp = mp->mnt_vnodecovered) != NULLVP) { + coveredvp->v_mountedhere = (struct mount *)0; + vrele(coveredvp); + } + mp->mnt_vfc->vfc_refcount--; + if (mp->mnt_vnodelist.lh_first != NULL) + panic("unmount: dangling vnode"); + lockmgr(&mp->mnt_lock, LK_RELEASE | LK_INTERLOCK, &mountlist_slock, p); + if (mp->mnt_flag & MNT_MWAIT) + wakeup((caddr_t)mp); + free((caddr_t)mp, M_MOUNT); + return (0); } /* @@ -447,31 +476,25 @@ sys_sync(p, v, retval) register struct mount *mp, *nmp; int asyncflag; + simple_lock(&mountlist_slock); for (mp = mountlist.cqh_last; mp != (void *)&mountlist; mp = nmp) { - /* - * Get the next pointer in case we hang on vfs_busy - * while we are being unmounted. - */ - nmp = mp->mnt_list.cqe_prev; - /* - * The lock check below is to avoid races with mount - * and unmount. - */ - if ((mp->mnt_flag & (MNT_MLOCK|MNT_RDONLY|MNT_MPBUSY)) == 0 && - !vfs_busy(mp)) { + if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) { + nmp = mp->mnt_list.cqe_next; + continue; + } + if ((mp->mnt_flag & MNT_RDONLY) == 0) { asyncflag = mp->mnt_flag & MNT_ASYNC; mp->mnt_flag &= ~MNT_ASYNC; VFS_SYNC(mp, MNT_NOWAIT, p->p_ucred, p); if (asyncflag) mp->mnt_flag |= MNT_ASYNC; - /* - * Get the next pointer again, as the next filesystem - * might have been unmounted while we were sync'ing. - */ - nmp = mp->mnt_list.cqe_prev; - vfs_unbusy(mp); } + simple_lock(&mountlist_slock); + nmp = mp->mnt_list.cqe_next; + vfs_unbusy(mp, p); } + simple_unlock(&mountlist_slock); + #ifdef DEBUG if (syncprt) vfs_bufstats(); @@ -596,7 +619,7 @@ sys_getfsstat(p, v, retval) syscallarg(long) bufsize; syscallarg(int) flags; } */ *uap = v; - register struct mount *mp; + register struct mount *mp, *nmp; register struct statfs *sp; caddr_t sfsp; long count, maxcount, error; @@ -604,20 +627,28 @@ sys_getfsstat(p, v, retval) maxcount = SCARG(uap, bufsize) / sizeof(struct statfs); sfsp = (caddr_t)SCARG(uap, buf); - for (count = 0, mp = mountlist.cqh_first; - mp != (void *)&mountlist; - mp = mp->mnt_list.cqe_next) { - if (sfsp && count < maxcount && - ((mp->mnt_flag & MNT_MLOCK) == 0)) { + count = 0; + simple_lock(&mountlist_slock); + for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) { + if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) { + nmp = mp->mnt_list.cqe_next; + continue; + } + if (sfsp && count < maxcount) { sp = &mp->mnt_stat; /* - * If MNT_NOWAIT is specified, do not refresh the - * fsstat cache. MNT_WAIT overrides MNT_NOWAIT. - */ - if (((SCARG(uap, flags) & MNT_NOWAIT) == 0 || + * If MNT_NOWAIT or MNT_LAZY is specified, do not + * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY + * overrides MNT_WAIT. + */ + if (((SCARG(uap, flags) & (MNT_LAZY|MNT_NOWAIT)) == 0 || (SCARG(uap, flags) & MNT_WAIT)) && - (error = VFS_STATFS(mp, sp, p))) - continue; + (error = VFS_STATFS(mp, sp, p))) { + simple_lock(&mountlist_slock); + nmp = mp->mnt_list.cqe_next; + vfs_unbusy(mp, p); + continue; + } sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; if (suser(p->p_ucred, &p->p_acflag)) { bcopy((caddr_t)sp, (caddr_t)&sb, sizeof(sb)); @@ -630,7 +661,11 @@ sys_getfsstat(p, v, retval) sfsp += sizeof(*sp); } count++; + simple_lock(&mountlist_slock); + nmp = mp->mnt_list.cqe_next; + vfs_unbusy(mp, p); } + simple_unlock(&mountlist_slock); if (sfsp && count > maxcount) *retval = maxcount; else @@ -661,7 +696,7 @@ sys_fchdir(p, v, retval) return (error); vp = (struct vnode *)fp->f_data; VREF(vp); - VOP_LOCK(vp); + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); if (vp->v_type != VDIR) error = ENOTDIR; else @@ -677,11 +712,21 @@ sys_fchdir(p, v, retval) vput(vp); vp = tdp; } - VOP_UNLOCK(vp); + while (!error && (mp = vp->v_mountedhere) != NULL) { + if (vfs_busy(mp, 0, 0, p)) + continue; + error = VFS_ROOT(mp, &tdp); + vfs_unbusy(mp, p); + if (error) + break; + vput(vp); + vp = tdp; + } if (error) { - vrele(vp); + vput(vp); return (error); } + VOP_UNLOCK(vp, 0, p); vrele(fdp->fd_cdir); fdp->fd_cdir = vp; return (0); @@ -768,9 +813,10 @@ change_dir(ndp, p) error = ENOTDIR; else error = VOP_ACCESS(vp, VEXEC, p->p_ucred, p); - VOP_UNLOCK(vp); if (error) - vrele(vp); + vput(vp); + else + VOP_UNLOCK(vp, 0, p); return (error); } @@ -837,7 +883,7 @@ sys_open(p, v, retval) type = F_FLOCK; if ((flags & FNONBLOCK) == 0) type |= F_WAIT; - VOP_UNLOCK(vp); + VOP_UNLOCK(vp, 0, p); error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type); if (error) { (void) vn_close(vp, fp->f_flag, fp->f_cred, p); @@ -845,10 +891,10 @@ sys_open(p, v, retval) fdp->fd_ofiles[indx] = NULL; return (error); } - VOP_LOCK(vp); + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); fp->f_flag |= FHASLOCK; } - VOP_UNLOCK(vp); + VOP_UNLOCK(vp, 0, p); *retval = indx; return (0); } @@ -1417,7 +1463,7 @@ sys_chflags(p, v, retval) return (error); vp = nd.ni_vp; VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE); - VOP_LOCK(vp); + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); if (vp->v_mount->mnt_flag & MNT_RDONLY) error = EROFS; else { @@ -1452,7 +1498,7 @@ sys_fchflags(p, v, retval) return (error); vp = (struct vnode *)fp->f_data; VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE); - VOP_LOCK(vp); + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); if (vp->v_mount->mnt_flag & MNT_RDONLY) error = EROFS; else { @@ -1460,7 +1506,7 @@ sys_fchflags(p, v, retval) vattr.va_flags = SCARG(uap, flags); error = VOP_SETATTR(vp, &vattr, p->p_ucred, p); } - VOP_UNLOCK(vp); + VOP_UNLOCK(vp, 0, p); return (error); } @@ -1488,7 +1534,7 @@ sys_chmod(p, v, retval) return (error); vp = nd.ni_vp; VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE); - VOP_LOCK(vp); + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); if (vp->v_mount->mnt_flag & MNT_RDONLY) error = EROFS; else { @@ -1523,7 +1569,7 @@ sys_fchmod(p, v, retval) return (error); vp = (struct vnode *)fp->f_data; VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE); - VOP_LOCK(vp); + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); if (vp->v_mount->mnt_flag & MNT_RDONLY) error = EROFS; else { @@ -1531,7 +1577,7 @@ sys_fchmod(p, v, retval) vattr.va_mode = SCARG(uap, mode) & ALLPERMS; error = VOP_SETATTR(vp, &vattr, p->p_ucred, p); } - VOP_UNLOCK(vp); + VOP_UNLOCK(vp, 0, p); return (error); } @@ -1561,7 +1607,7 @@ sys_chown(p, v, retval) return (error); vp = nd.ni_vp; VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE); - VOP_LOCK(vp); + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); if (vp->v_mount->mnt_flag & MNT_RDONLY) error = EROFS; else { @@ -1613,7 +1659,7 @@ sys_lchown(p, v, retval) return (error); vp = nd.ni_vp; VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE); - VOP_LOCK(vp); + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); if (vp->v_mount->mnt_flag & MNT_RDONLY) error = EROFS; else { @@ -1658,88 +1704,88 @@ sys_fchown(p, v, retval) struct vattr vattr; int error; struct file *fp; - u_short mode; - - if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0) - return (error); - vp = (struct vnode *)fp->f_data; - VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE); - VOP_LOCK(vp); - if (vp->v_mount->mnt_flag & MNT_RDONLY) - error = EROFS; - else { - if (suser(p->p_ucred, &p->p_acflag) || - suid_clear) { - error = VOP_GETATTR(vp, &vattr, p->p_ucred, p); - if (error) - goto out; - mode = vattr.va_mode & ~(VSUID | VSGID); - if (mode == vattr.va_mode) - mode = VNOVAL; - } - else - mode = VNOVAL; - VATTR_NULL(&vattr); - vattr.va_uid = SCARG(uap, uid); - vattr.va_gid = SCARG(uap, gid); - vattr.va_mode = mode; - error = VOP_SETATTR(vp, &vattr, p->p_ucred, p); - } + u_short mode; + + if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0) + return (error); + vp = (struct vnode *)fp->f_data; + VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE); + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); + if (vp->v_mount->mnt_flag & MNT_RDONLY) + error = EROFS; + else { + if (suser(p->p_ucred, &p->p_acflag) || + suid_clear) { + error = VOP_GETATTR(vp, &vattr, p->p_ucred, p); + if (error) + goto out; + mode = vattr.va_mode & ~(VSUID | VSGID); + if (mode == vattr.va_mode) + mode = VNOVAL; + } + else + mode = VNOVAL; + VATTR_NULL(&vattr); + vattr.va_uid = SCARG(uap, uid); + vattr.va_gid = SCARG(uap, gid); + vattr.va_mode = mode; + error = VOP_SETATTR(vp, &vattr, p->p_ucred, p); + } out: - VOP_UNLOCK(vp); - return (error); + VOP_UNLOCK(vp, 0, p); + return (error); } - /* * Set the access and modification times given a path name. */ /* ARGSUSED */ int sys_utimes(p, v, retval) - struct proc *p; - void *v; - register_t *retval; + struct proc *p; + void *v; + register_t *retval; { - register struct sys_utimes_args /* { - syscallarg(char *) path; - syscallarg(struct timeval *) tptr; - } */ *uap = v; - register struct vnode *vp; - struct timeval tv[2]; - struct vattr vattr; - int error; - struct nameidata nd; - - VATTR_NULL(&vattr); - if (SCARG(uap, tptr) == NULL) { - microtime(&tv[0]); - tv[1] = tv[0]; - vattr.va_vaflags |= VA_UTIMES_NULL; - } else { - error = copyin((caddr_t)SCARG(uap, tptr), (caddr_t)tv, - sizeof (tv)); - if (error) - return (error); - } - NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p); - if ((error = namei(&nd)) != 0) - return (error); - vp = nd.ni_vp; - VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE); - VOP_LOCK(vp); - if (vp->v_mount->mnt_flag & MNT_RDONLY) - error = EROFS; - else { - vattr.va_atime.tv_sec = tv[0].tv_sec; - vattr.va_atime.tv_nsec = tv[0].tv_usec * 1000; - vattr.va_mtime.tv_sec = tv[1].tv_sec; - vattr.va_mtime.tv_nsec = tv[1].tv_usec * 1000; - error = VOP_SETATTR(vp, &vattr, p->p_ucred, p); - } + register struct sys_utimes_args /* { + syscallarg(char *) path; + syscallarg(struct timeval *) tptr; + } */ *uap = v; + register struct vnode *vp; + struct timeval tv[2]; + struct vattr vattr; + int error; + struct nameidata nd; + + VATTR_NULL(&vattr); + if (SCARG(uap, tptr) == NULL) { + microtime(&tv[0]); + tv[1] = tv[0]; + vattr.va_vaflags |= VA_UTIMES_NULL; + } else { + error = copyin((caddr_t)SCARG(uap, tptr), (caddr_t)tv, + sizeof (tv)); + if (error) + return (error); + } + NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p); + if ((error = namei(&nd)) != 0) + return (error); + vp = nd.ni_vp; + VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE); + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); + if (vp->v_mount->mnt_flag & MNT_RDONLY) + error = EROFS; + else { + vattr.va_atime.tv_sec = tv[0].tv_sec; + vattr.va_atime.tv_nsec = tv[0].tv_usec * 1000; + vattr.va_mtime.tv_sec = tv[1].tv_sec; + vattr.va_mtime.tv_nsec = tv[1].tv_usec * 1000; + error = VOP_SETATTR(vp, &vattr, p->p_ucred, p); + } vput(vp); - return (error); + return (error); } + /* * Set the access and modification times given a file descriptor. */ @@ -1775,7 +1821,7 @@ sys_futimes(p, v, retval) return (error); vp = (struct vnode *)fp->f_data; VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE); - VOP_LOCK(vp); + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); if (vp->v_mount->mnt_flag & MNT_RDONLY) error = EROFS; else { @@ -1785,7 +1831,7 @@ sys_futimes(p, v, retval) vattr.va_mtime.tv_nsec = tv[1].tv_usec * 1000; error = VOP_SETATTR(vp, &vattr, p->p_ucred, p); } - VOP_UNLOCK(vp); + VOP_UNLOCK(vp, 0, p); return (error); } @@ -1814,7 +1860,7 @@ sys_truncate(p, v, retval) return (error); vp = nd.ni_vp; VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE); - VOP_LOCK(vp); + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); if (vp->v_type == VDIR) error = EISDIR; else if ((error = vn_writechk(vp)) == 0 && @@ -1853,7 +1899,7 @@ sys_ftruncate(p, v, retval) return (EINVAL); vp = (struct vnode *)fp->f_data; VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE); - VOP_LOCK(vp); + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); if (vp->v_type == VDIR) error = EISDIR; else if ((error = vn_writechk(vp)) == 0) { @@ -1861,7 +1907,7 @@ sys_ftruncate(p, v, retval) vattr.va_size = SCARG(uap, length); error = VOP_SETATTR(vp, &vattr, fp->f_cred, p); } - VOP_UNLOCK(vp); + VOP_UNLOCK(vp, 0, p); return (error); } @@ -1885,9 +1931,9 @@ sys_fsync(p, v, retval) if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0) return (error); vp = (struct vnode *)fp->f_data; - VOP_LOCK(vp); + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); error = VOP_FSYNC(vp, fp->f_cred, MNT_WAIT, p); - VOP_UNLOCK(vp); + VOP_UNLOCK(vp, 0, p); return (error); } @@ -2108,11 +2154,11 @@ unionread: auio.uio_segflg = UIO_USERSPACE; auio.uio_procp = p; auio.uio_resid = SCARG(uap, count); - VOP_LOCK(vp); + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); loff = auio.uio_offset = fp->f_offset; - error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, (u_long *)0, 0); + error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, 0, 0); fp->f_offset = auio.uio_offset; - VOP_UNLOCK(vp); + VOP_UNLOCK(vp, 0, p); if (error) return (error); if ((SCARG(uap, count) == auio.uio_resid) && @@ -2182,17 +2228,13 @@ sys_revoke(p, v, retval) if ((error = namei(&nd)) != 0) return (error); vp = nd.ni_vp; - if (vp->v_type != VCHR && vp->v_type != VBLK) { - error = EINVAL; - goto out; - } if ((error = VOP_GETATTR(vp, &vattr, p->p_ucred, p)) != 0) goto out; if (p->p_ucred->cr_uid != vattr.va_uid && (error = suser(p->p_ucred, &p->p_acflag))) goto out; if (vp->v_usecount > 1 || (vp->v_flag & VALIASED)) - vgoneall(vp); + VOP_REVOKE(vp, REVOKEALL); out: vrele(vp); return (error); diff --git a/sys/kern/vfs_vnops.c b/sys/kern/vfs_vnops.c index b99a001a165..3037cad20fe 100644 --- a/sys/kern/vfs_vnops.c +++ b/sys/kern/vfs_vnops.c @@ -1,4 +1,4 @@ -/* $OpenBSD: vfs_vnops.c,v 1.5 1997/08/04 08:24:54 deraadt Exp $ */ +/* $OpenBSD: vfs_vnops.c,v 1.6 1997/10/06 15:12:45 csapuntz Exp $ */ /* $NetBSD: vfs_vnops.c,v 1.20 1996/02/04 02:18:41 christos Exp $ */ /* @@ -133,9 +133,9 @@ vn_open(ndp, fmode, cmode) } } if (fmode & O_TRUNC) { - VOP_UNLOCK(vp); /* XXX */ + VOP_UNLOCK(vp, 0, p); /* XXX */ VOP_LEASE(vp, p, cred, LEASE_WRITE); - VOP_LOCK(vp); /* XXX */ + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); /* XXX */ VATTR_NULL(&va); va.va_size = 0; if ((error = VOP_SETATTR(vp, &va, cred, p)) != 0) @@ -153,14 +153,14 @@ bad: /* * Check for write permissions on the specified vnode. - * The read-only status of the file system is checked. - * Also, prototype text segments cannot be written. + * Prototype text segments cannot be written. */ int vn_writechk(vp) register struct vnode *vp; { +#if 0 /* * Disallow write attempts on read-only file systems; * unless the file is a socket or a block or character @@ -175,6 +175,7 @@ vn_writechk(vp) break; } } +#endif /* * If there's shared text associated with * the vnode, try to free it up once. If @@ -225,7 +226,7 @@ vn_rdwr(rw, vp, base, len, offset, segflg, ioflg, cred, aresid, p) int error; if ((ioflg & IO_NODELOCKED) == 0) - VOP_LOCK(vp); + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); auio.uio_iov = &aiov; auio.uio_iovcnt = 1; aiov.iov_base = base; @@ -246,7 +247,7 @@ vn_rdwr(rw, vp, base, len, offset, segflg, ioflg, cred, aresid, p) if (auio.uio_resid && error == 0) error = EIO; if ((ioflg & IO_NODELOCKED) == 0) - VOP_UNLOCK(vp); + VOP_UNLOCK(vp, 0, p); return (error); } @@ -261,16 +262,17 @@ vn_read(fp, uio, cred) { register struct vnode *vp = (struct vnode *)fp->f_data; int count, error = 0; + struct proc *p = uio->uio_procp; VOP_LEASE(vp, uio->uio_procp, cred, LEASE_READ); - VOP_LOCK(vp); + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); uio->uio_offset = fp->f_offset; count = uio->uio_resid; if (vp->v_type != VDIR) error = VOP_READ(vp, uio, (fp->f_flag & FNONBLOCK) ? IO_NDELAY : 0, cred); fp->f_offset += count - uio->uio_resid; - VOP_UNLOCK(vp); + VOP_UNLOCK(vp, 0, p); return (error); } @@ -284,14 +286,18 @@ vn_write(fp, uio, cred) struct ucred *cred; { register struct vnode *vp = (struct vnode *)fp->f_data; + struct proc *p = uio->uio_procp; int count, error, ioflag = IO_UNIT; if (vp->v_type == VREG && (fp->f_flag & O_APPEND)) ioflag |= IO_APPEND; if (fp->f_flag & FNONBLOCK) ioflag |= IO_NDELAY; + if ((fp->f_flag & O_FSYNC) || + (vp->v_mount && (vp->v_mount->mnt_flag & MNT_SYNCHRONOUS))) + ioflag |= IO_SYNC; VOP_LEASE(vp, uio->uio_procp, cred, LEASE_WRITE); - VOP_LOCK(vp); + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); uio->uio_offset = fp->f_offset; count = uio->uio_resid; error = VOP_WRITE(vp, uio, ioflag, cred); @@ -299,7 +305,7 @@ vn_write(fp, uio, cred) fp->f_offset = uio->uio_offset; else fp->f_offset += count - uio->uio_resid; - VOP_UNLOCK(vp); + VOP_UNLOCK(vp, 0, p); return (error); } @@ -427,6 +433,36 @@ vn_select(fp, which, p) } /* + * Check that the vnode is still valid, and if so + * acquire requested lock. + */ +int +vn_lock(vp, flags, p) + struct vnode *vp; + int flags; + struct proc *p; +{ + int error; + + do { + if ((flags & LK_INTERLOCK) == 0) + simple_lock(&vp->v_interlock); + if (vp->v_flag & VXLOCK) { + vp->v_flag |= VXWANT; + simple_unlock(&vp->v_interlock); + tsleep((caddr_t)vp, PINOD, "vn_lock", 0); + error = ENOENT; + } else { + error = VOP_LOCK(vp, flags | LK_INTERLOCK, p); + if (error == 0) + return (error); + } + flags &= ~LK_INTERLOCK; + } while (flags & LK_RETRY); + return (error); +} + +/* * File table vnode close routine. */ int diff --git a/sys/kern/vnode_if.c b/sys/kern/vnode_if.c index 14f1f0c5a09..b373b57c591 100644 --- a/sys/kern/vnode_if.c +++ b/sys/kern/vnode_if.c @@ -218,6 +218,22 @@ struct vnodeop_desc vop_write_desc = { NULL, }; +int vop_lease_vp_offsets[] = { + VOPARG_OFFSETOF(struct vop_lease_args,a_vp), + VDESC_NO_OFFSET +}; +struct vnodeop_desc vop_lease_desc = { + 0, + "vop_lease", + 0, + vop_lease_vp_offsets, + VDESC_NO_OFFSET, + VOPARG_OFFSETOF(struct vop_lease_args, a_cred), + VOPARG_OFFSETOF(struct vop_lease_args, a_p), + VDESC_NO_OFFSET, + NULL, +}; + int vop_ioctl_vp_offsets[] = { VOPARG_OFFSETOF(struct vop_ioctl_args,a_vp), VDESC_NO_OFFSET @@ -250,6 +266,22 @@ struct vnodeop_desc vop_select_desc = { NULL, }; +int vop_revoke_vp_offsets[] = { + VOPARG_OFFSETOF(struct vop_revoke_args,a_vp), + VDESC_NO_OFFSET +}; +struct vnodeop_desc vop_revoke_desc = { + 0, + "vop_revoke", + 0, + vop_revoke_vp_offsets, + VDESC_NO_OFFSET, + VDESC_NO_OFFSET, + VDESC_NO_OFFSET, + VDESC_NO_OFFSET, + NULL, +}; + int vop_mmap_vp_offsets[] = { VOPARG_OFFSETOF(struct vop_mmap_args,a_vp), VDESC_NO_OFFSET @@ -459,7 +491,7 @@ struct vnodeop_desc vop_inactive_desc = { vop_inactive_vp_offsets, VDESC_NO_OFFSET, VDESC_NO_OFFSET, - VDESC_NO_OFFSET, + VOPARG_OFFSETOF(struct vop_inactive_args, a_p), VDESC_NO_OFFSET, NULL, }; @@ -475,7 +507,7 @@ struct vnodeop_desc vop_reclaim_desc = { vop_reclaim_vp_offsets, VDESC_NO_OFFSET, VDESC_NO_OFFSET, - VDESC_NO_OFFSET, + VOPARG_OFFSETOF(struct vop_reclaim_args, a_p), VDESC_NO_OFFSET, NULL, }; @@ -491,7 +523,7 @@ struct vnodeop_desc vop_lock_desc = { vop_lock_vp_offsets, VDESC_NO_OFFSET, VDESC_NO_OFFSET, - VDESC_NO_OFFSET, + VOPARG_OFFSETOF(struct vop_lock_args, a_p), VDESC_NO_OFFSET, NULL, }; @@ -507,7 +539,7 @@ struct vnodeop_desc vop_unlock_desc = { vop_unlock_vp_offsets, VDESC_NO_OFFSET, VDESC_NO_OFFSET, - VDESC_NO_OFFSET, + VOPARG_OFFSETOF(struct vop_unlock_args, a_p), VDESC_NO_OFFSET, NULL, }; @@ -624,6 +656,22 @@ struct vnodeop_desc vop_valloc_desc = { NULL, }; +int vop_balloc_vp_offsets[] = { + VOPARG_OFFSETOF(struct vop_balloc_args,a_vp), + VDESC_NO_OFFSET +}; +struct vnodeop_desc vop_balloc_desc = { + 0, + "vop_balloc", + 0, + vop_balloc_vp_offsets, + VDESC_NO_OFFSET, + VOPARG_OFFSETOF(struct vop_balloc_args, a_cred), + VDESC_NO_OFFSET, + VDESC_NO_OFFSET, + NULL, +}; + int vop_reallocblks_vp_offsets[] = { VOPARG_OFFSETOF(struct vop_reallocblks_args,a_vp), VDESC_NO_OFFSET @@ -688,22 +736,6 @@ struct vnodeop_desc vop_update_desc = { NULL, }; -int vop_lease_vp_offsets[] = { - VOPARG_OFFSETOF(struct vop_lease_args,a_vp), - VDESC_NO_OFFSET -}; -struct vnodeop_desc vop_lease_desc = { - 0, - "vop_lease", - 0, - vop_lease_vp_offsets, - VDESC_NO_OFFSET, - VOPARG_OFFSETOF(struct vop_lease_args, a_cred), - VOPARG_OFFSETOF(struct vop_lease_args, a_p), - VDESC_NO_OFFSET, - NULL, -}; - int vop_whiteout_vp_offsets[] = { VOPARG_OFFSETOF(struct vop_whiteout_args,a_dvp), VDESC_NO_OFFSET @@ -769,8 +801,10 @@ struct vnodeop_desc *vfs_op_descs[] = { &vop_setattr_desc, &vop_read_desc, &vop_write_desc, + &vop_lease_desc, &vop_ioctl_desc, &vop_select_desc, + &vop_revoke_desc, &vop_mmap_desc, &vop_fsync_desc, &vop_seek_desc, @@ -794,11 +828,11 @@ struct vnodeop_desc *vfs_op_descs[] = { &vop_advlock_desc, &vop_blkatoff_desc, &vop_valloc_desc, + &vop_balloc_desc, &vop_reallocblks_desc, &vop_vfree_desc, &vop_truncate_desc, &vop_update_desc, - &vop_lease_desc, &vop_whiteout_desc, NULL }; diff --git a/sys/kern/vnode_if.src b/sys/kern/vnode_if.src index 0a8c45ace58..76edff456c6 100644 --- a/sys/kern/vnode_if.src +++ b/sys/kern/vnode_if.src @@ -1,4 +1,4 @@ -# $OpenBSD: vnode_if.src,v 1.4 1996/05/22 11:47:12 deraadt Exp $ +# $OpenBSD: vnode_if.src,v 1.5 1997/10/06 15:12:48 csapuntz Exp $ # $NetBSD: vnode_if.src,v 1.10 1996/05/11 18:26:27 mycroft Exp $ # # Copyright (c) 1992, 1993 @@ -34,12 +34,43 @@ # # @(#)vnode_if.src 8.3 (Berkeley) 2/3/94 # + + +# +# Above each of the vop descriptors is a specification of the locking +# protocol used by each vop call. The first column is the name of +# the variable, the remaining three columns are in, out and error +# respectively. The "in" column defines the lock state on input, +# the "out" column defines the state on succesful return, and the +# "error" column defines the locking state on error exit. +# +# The locking value can take the following values: +# L: locked. +# U: unlocked/ +# -: not applicable. vnode does not yet (or no longer) exists. +# =: the same on input and output, may be either L or U. +# X: locked if not nil. +# + +# +#% lookup dvp L ? ? +#% lookup vpp - L - +# +# XXX - the lookup locking protocol defies simple description and depends +# on the flags and operation fields in the (cnp) structure. Note +# especially that *vpp may equal dvp and both may be locked. + vop_lookup { IN struct vnode *dvp; INOUT struct vnode **vpp; IN struct componentname *cnp; }; +# +#% create dvp L U U +#% create vpp - L - +# + vop_create { IN WILLRELE struct vnode *dvp; OUT struct vnode **vpp; @@ -47,6 +78,11 @@ vop_create { IN struct vattr *vap; }; +# +#% mknod dvp L U U +#% mknod vpp - X - +# + vop_mknod { IN WILLRELE struct vnode *dvp; OUT WILLRELE struct vnode **vpp; @@ -54,6 +90,10 @@ vop_mknod { IN struct vattr *vap; }; +# +#% open vp L L L +# + vop_open { IN struct vnode *vp; IN int mode; @@ -61,6 +101,10 @@ vop_open { IN struct proc *p; }; +# +#% close vp U U U +# + vop_close { IN struct vnode *vp; IN int fflag; @@ -68,6 +112,10 @@ vop_close { IN struct proc *p; }; +# +#% access vp L L L +# + vop_access { IN struct vnode *vp; IN int mode; @@ -75,6 +123,10 @@ vop_access { IN struct proc *p; }; +# +#% getattr vp = = = +# + vop_getattr { IN struct vnode *vp; IN struct vattr *vap; @@ -82,6 +134,11 @@ vop_getattr { IN struct proc *p; }; + +# +#% setattr vp L L L +# + vop_setattr { IN struct vnode *vp; IN struct vattr *vap; @@ -89,6 +146,10 @@ vop_setattr { IN struct proc *p; }; +# +#% read vp L L L +# + vop_read { IN struct vnode *vp; INOUT struct uio *uio; @@ -96,6 +157,10 @@ vop_read { IN struct ucred *cred; }; +# +#% write vp L L L +# + vop_write { IN struct vnode *vp; INOUT struct uio *uio; @@ -103,6 +168,20 @@ vop_write { IN struct ucred *cred; }; +# +#% lease vp = = = +# +vop_lease { + IN struct vnode *vp; + IN struct proc *p; + IN struct ucred *cred; + IN int flag; +}; + +# +#% ioctl vp U U U +# + vop_ioctl { IN struct vnode *vp; IN u_long command; @@ -112,7 +191,11 @@ vop_ioctl { IN struct proc *p; }; +# +#% select vp U U U +# # Needs work? (fflags) +# vop_select { IN struct vnode *vp; IN int which; @@ -121,6 +204,17 @@ vop_select { IN struct proc *p; }; +# +#% revoke vp U U U +# +vop_revoke { + IN struct vnode *vp; + IN int flags; +}; + +# +# XXX - not used +# vop_mmap { IN struct vnode *vp; IN int fflags; @@ -128,6 +222,9 @@ vop_mmap { IN struct proc *p; }; +# +#% fsync vp L L L +# vop_fsync { IN struct vnode *vp; IN struct ucred *cred; @@ -135,7 +232,10 @@ vop_fsync { IN struct proc *p; }; -# Needs word: Is newoff right? What's it mean? +# +# XXX - not used +# Needs work: Is newoff right? What's it mean? +# vop_seek { IN struct vnode *vp; IN off_t oldoff; @@ -143,18 +243,34 @@ vop_seek { IN struct ucred *cred; }; +# +#% remove dvp L U U +#% remove vp L U U +# + vop_remove { IN WILLRELE struct vnode *dvp; IN WILLRELE struct vnode *vp; IN struct componentname *cnp; }; +# +#% link vp U U U +#% link tdvp L U U +# vop_link { IN WILLRELE struct vnode *dvp; IN struct vnode *vp; IN struct componentname *cnp; }; +# +#% rename fdvp U U U +#% rename fvp U U U +#% rename tdvp L U U +#% rename tvp X U U +# + vop_rename { IN WILLRELE struct vnode *fdvp; IN WILLRELE struct vnode *fvp; @@ -164,6 +280,11 @@ vop_rename { IN struct componentname *tcnp; }; +# +#% mkdir dvp L U U +#% mkdir vpp - L - +# + vop_mkdir { IN WILLRELE struct vnode *dvp; OUT struct vnode **vpp; @@ -171,12 +292,26 @@ vop_mkdir { IN struct vattr *vap; }; +# +#% rmdir dvp L U U +#% rmdir vp L U U +# + vop_rmdir { IN WILLRELE struct vnode *dvp; IN WILLRELE struct vnode *vp; IN struct componentname *cnp; }; +# +#% symlink dvp L U U +#% symlink vpp - U - +# +# XXX - note that the return vnode has already been VRELE'ed +# by the filesystem layer. To use it you must use vget, +# possibly with a further namei. +# + vop_symlink { IN WILLRELE struct vnode *dvp; OUT WILLRELE struct vnode **vpp; @@ -185,42 +320,79 @@ vop_symlink { IN char *target; }; +# +#% readdir vp L L L +# + vop_readdir { IN struct vnode *vp; INOUT struct uio *uio; IN struct ucred *cred; - OUT int *eofflag; - OUT u_long *cookies; - IN int ncookies; + INOUT int *eofflag; + OUT int *ncookies; + INOUT u_long **cookies; }; +# +#% readlink vp L L L +# vop_readlink { IN struct vnode *vp; INOUT struct uio *uio; IN struct ucred *cred; }; +# +#% abortop dvp = = = +# vop_abortop { IN struct vnode *dvp; IN struct componentname *cnp; }; + +# +#% inactive vp L U U +# vop_inactive { IN struct vnode *vp; + IN struct proc *p; }; +# +#% reclaim vp U U U +# + vop_reclaim { IN struct vnode *vp; + IN struct proc *p; }; +# +#% lock vp U L U +# + vop_lock { IN struct vnode *vp; + IN int flags; + IN struct proc *p; }; +# +#% unlock vp L U L +# + vop_unlock { IN struct vnode *vp; + IN int flags; + IN struct proc *p; }; +# +#% bmap vp L L L +#% bmap vpp - U - +# + vop_bmap { IN struct vnode *vp; IN daddr_t bn; @@ -229,24 +401,39 @@ vop_bmap { OUT int *runp; }; +# +# Needs work: no vp? +# #vop_strategy { # IN struct buf *bp; #}; +# +#% print vp = = = +# vop_print { IN struct vnode *vp; }; +# +#% islocked vp = = = +# vop_islocked { IN struct vnode *vp; }; +# +#% pathconf vp L L L +# vop_pathconf { IN struct vnode *vp; IN int name; OUT register_t *retval; }; +# +#% advlock vp U U U +# vop_advlock { IN struct vnode *vp; IN caddr_t id; @@ -255,6 +442,9 @@ vop_advlock { IN int flags; }; +# +#% blkatoff vp L L L +# vop_blkatoff { IN struct vnode *vp; IN off_t offset; @@ -262,6 +452,9 @@ vop_blkatoff { OUT struct buf **bpp; }; +# +#% valloc pvp L L L +# vop_valloc { IN struct vnode *pvp; IN int mode; @@ -269,17 +462,40 @@ vop_valloc { OUT struct vnode **vpp; }; +# +#% balloc vp L L L +# +vop_balloc { + IN struct vnode *vp; + IN off_t startoffset; + IN int size; + IN struct ucred *cred; + IN int flags; + OUT struct buf **bpp; +}; + +# +#% reallocblks vp L L L +# vop_reallocblks { IN struct vnode *vp; IN struct cluster_save *buflist; }; +# +#% vfree pvp L L L +# + vop_vfree { IN struct vnode *pvp; IN ino_t ino; IN int mode; }; +# +#% truncate vp L L L +# + vop_truncate { IN struct vnode *vp; IN off_t length; @@ -288,6 +504,10 @@ vop_truncate { IN struct proc *p; }; +# +#% update vp L L L +# + vop_update { IN struct vnode *vp; IN struct timespec *access; @@ -295,12 +515,11 @@ vop_update { IN int waitfor; }; -vop_lease { - IN struct vnode *vp; - IN struct proc *p; - IN struct ucred *cred; - IN int flag; -}; +# +#% whiteout dvp L L L +#% whiteout cnp - - - +#% whiteout flag - - - +# vop_whiteout { IN struct vnode *dvp; |