diff options
author | Claudio Jeker <claudio@cvs.openbsd.org> | 2021-04-28 09:53:54 +0000 |
---|---|---|
committer | Claudio Jeker <claudio@cvs.openbsd.org> | 2021-04-28 09:53:54 +0000 |
commit | 4887a72e0faf0bb613766e1087f0588d3e6f1efb (patch) | |
tree | 5208e287bba068f9bc6327702566be5413feb7b8 /sys/kern | |
parent | 176253037126c3bd71b12369fe410a948e26b2f2 (diff) |
Introduce a global vnode_mtx and use it to make vn_lock() safe to be called
without the KERNEL_LOCK.
This moves VXLOCK and VXWANT to a mutex protected v_lflag field and also
v_lockcount is protected by this mutex.
The vn_lock() dance is overly complex and all of this should probably replaced
by a proper lock on the vnode but such a diff is a lot more complex. This
is an intermediate step so that at least some calls can be modified to grab
the KERNEL_LOCK later or not at all.
OK mpi@
Diffstat (limited to 'sys/kern')
-rw-r--r-- | sys/kern/spec_vnops.c | 26 | ||||
-rw-r--r-- | sys/kern/vfs_default.c | 19 | ||||
-rw-r--r-- | sys/kern/vfs_subr.c | 51 | ||||
-rw-r--r-- | sys/kern/vfs_vnops.c | 24 | ||||
-rw-r--r-- | sys/kern/vfs_vops.c | 6 |
5 files changed, 85 insertions, 41 deletions
diff --git a/sys/kern/spec_vnops.c b/sys/kern/spec_vnops.c index b55c3941f8e..0e0071ce719 100644 --- a/sys/kern/spec_vnops.c +++ b/sys/kern/spec_vnops.c @@ -1,4 +1,4 @@ -/* $OpenBSD: spec_vnops.c,v 1.103 2021/03/10 10:21:47 jsg Exp $ */ +/* $OpenBSD: spec_vnops.c,v 1.104 2021/04/28 09:53:53 claudio Exp $ */ /* $NetBSD: spec_vnops.c,v 1.29 1996/04/22 01:42:38 christos Exp $ */ /* @@ -484,7 +484,7 @@ spec_close(void *v) struct vnode *vp = ap->a_vp; dev_t dev = vp->v_rdev; int (*devclose)(dev_t, int, int, struct proc *); - int mode, relock, error; + int mode, relock, xlocked, error; int clone = 0; switch (vp->v_type) { @@ -512,7 +512,10 @@ spec_close(void *v) * of forcibly closing the device, otherwise we only * close on last reference. */ - if (vcount(vp) > 1 && (vp->v_flag & VXLOCK) == 0) + mtx_enter(&vnode_mtx); + xlocked = (vp->v_lflag & VXLOCK); + mtx_leave(&vnode_mtx); + if (vcount(vp) > 1 && !xlocked) return (0); } devclose = cdevsw[major(dev)].d_close; @@ -527,10 +530,13 @@ spec_close(void *v) * that, we must lock the vnode. If we are coming from * vclean(), the vnode is already locked. */ - if (!(vp->v_flag & VXLOCK)) + mtx_enter(&vnode_mtx); + xlocked = (vp->v_lflag & VXLOCK); + mtx_leave(&vnode_mtx); + if (!xlocked) vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); error = vinvalbuf(vp, V_SAVE, ap->a_cred, p, 0, INFSLP); - if (!(vp->v_flag & VXLOCK)) + if (!xlocked) VOP_UNLOCK(vp); if (error) return (error); @@ -543,7 +549,10 @@ spec_close(void *v) * sum of the reference counts on all the aliased * vnodes descends to one, we are on last close. */ - if (vcount(vp) > 1 && (vp->v_flag & VXLOCK) == 0) + mtx_enter(&vnode_mtx); + xlocked = (vp->v_lflag & VXLOCK); + mtx_leave(&vnode_mtx); + if (vcount(vp) > 1 && !xlocked) return (0); devclose = bdevsw[major(dev)].d_close; mode = S_IFBLK; @@ -554,7 +563,10 @@ spec_close(void *v) } /* release lock if held and this isn't coming from vclean() */ - relock = VOP_ISLOCKED(vp) && !(vp->v_flag & VXLOCK); + mtx_enter(&vnode_mtx); + xlocked = (vp->v_lflag & VXLOCK); + mtx_leave(&vnode_mtx); + relock = VOP_ISLOCKED(vp) && !xlocked; if (relock) VOP_UNLOCK(vp); error = (*devclose)(dev, ap->a_fflag, mode, p); diff --git a/sys/kern/vfs_default.c b/sys/kern/vfs_default.c index 35793370639..04c815f8be4 100644 --- a/sys/kern/vfs_default.c +++ b/sys/kern/vfs_default.c @@ -1,4 +1,4 @@ -/* $OpenBSD: vfs_default.c,v 1.47 2020/02/20 16:56:52 visa Exp $ */ +/* $OpenBSD: vfs_default.c,v 1.48 2021/04/28 09:53:53 claudio Exp $ */ /* * Portions of this code are: @@ -86,9 +86,12 @@ vop_generic_revoke(void *v) * If a vgone (or vclean) is already in progress, * wait until it is done and return. */ - if (vp->v_flag & VXLOCK) { - vp->v_flag |= VXWANT; - tsleep_nsec(vp, PINOD, "vop_generic_revokeall", INFSLP); + mtx_enter(&vnode_mtx); + if (vp->v_lflag & VXLOCK) { + vp->v_lflag |= VXWANT; + msleep_nsec(vp, &vnode_mtx, PINOD, + "vop_generic_revokeall", INFSLP); + mtx_leave(&vnode_mtx); return(0); } @@ -96,7 +99,9 @@ vop_generic_revoke(void *v) * Ensure that vp will not be vgone'd while we * are eliminating its aliases. */ - vp->v_flag |= VXLOCK; + vp->v_lflag |= VXLOCK; + mtx_leave(&vnode_mtx); + while (vp->v_flag & VALIASED) { SLIST_FOREACH(vq, vp->v_hashchain, v_specnext) { if (vq->v_rdev != vp->v_rdev || @@ -112,7 +117,9 @@ vop_generic_revoke(void *v) * really eliminate the vnode after which time * vgone will awaken any sleepers. */ - vp->v_flag &= ~VXLOCK; + mtx_enter(&vnode_mtx); + vp->v_lflag &= ~VXLOCK; + mtx_leave(&vnode_mtx); } vgonel(vp, p); diff --git a/sys/kern/vfs_subr.c b/sys/kern/vfs_subr.c index 2a6adabe6af..433ba0030c6 100644 --- a/sys/kern/vfs_subr.c +++ b/sys/kern/vfs_subr.c @@ -1,4 +1,4 @@ -/* $OpenBSD: vfs_subr.c,v 1.304 2021/01/29 10:47:24 claudio Exp $ */ +/* $OpenBSD: vfs_subr.c,v 1.305 2021/04/28 09:53:53 claudio Exp $ */ /* $NetBSD: vfs_subr.c,v 1.53 1996/04/22 01:39:13 christos Exp $ */ /* @@ -117,6 +117,8 @@ void vputonfreelist(struct vnode *); int vflush_vnode(struct vnode *, void *); int maxvnodes; +struct mutex vnode_mtx = MUTEX_INITIALIZER(IPL_BIO); + void vfs_unmountall(void); #ifdef DEBUG @@ -644,16 +646,19 @@ vget(struct vnode *vp, int flags) * return failure. Cleaning is determined by checking that * the VXLOCK flag is set. */ - - if (vp->v_flag & VXLOCK) { + mtx_enter(&vnode_mtx); + if (vp->v_lflag & VXLOCK) { if (flags & LK_NOWAIT) { + mtx_leave(&vnode_mtx); return (EBUSY); } - vp->v_flag |= VXWANT; - tsleep_nsec(vp, PINOD, "vget", INFSLP); + vp->v_lflag |= VXWANT; + msleep_nsec(vp, &vnode_mtx, PINOD, "vget", INFSLP); + mtx_leave(&vnode_mtx); return (ENOENT); } + mtx_leave(&vnode_mtx); onfreelist = vp->v_bioflag & VBIOONFREELIST; if (vp->v_usecount == 0 && onfreelist) { @@ -982,7 +987,7 @@ vflush(struct mount *mp, struct vnode *skipvp, int flags) void vclean(struct vnode *vp, int flags, struct proc *p) { - int active; + int active, do_wakeup = 0; /* * Check to see if the vnode is in use. @@ -997,9 +1002,10 @@ vclean(struct vnode *vp, int flags, struct proc *p) * Prevent the vnode from being recycled or * brought into use while we clean it out. */ - if (vp->v_flag & VXLOCK) + mtx_enter(&vnode_mtx); + if (vp->v_lflag & VXLOCK) panic("vclean: deadlock"); - vp->v_flag |= VXLOCK; + vp->v_lflag |= VXLOCK; if (vp->v_lockcount > 0) { /* @@ -1007,9 +1013,11 @@ vclean(struct vnode *vp, int flags, struct proc *p) * observed that the vnode is about to be exclusively locked * before continuing. */ - tsleep_nsec(&vp->v_lockcount, PINOD, "vop_lock", INFSLP); + msleep_nsec(&vp->v_lockcount, &vnode_mtx, PINOD, "vop_lock", + INFSLP); KASSERT(vp->v_lockcount == 0); } + mtx_leave(&vnode_mtx); /* * Even if the count is zero, the VOP_INACTIVE routine may still @@ -1067,14 +1075,18 @@ vclean(struct vnode *vp, int flags, struct proc *p) vp->v_op = &dead_vops; VN_KNOTE(vp, NOTE_REVOKE); vp->v_tag = VT_NON; - vp->v_flag &= ~VXLOCK; #ifdef VFSLCKDEBUG vp->v_flag &= ~VLOCKSWORK; #endif - if (vp->v_flag & VXWANT) { - vp->v_flag &= ~VXWANT; + mtx_enter(&vnode_mtx); + vp->v_lflag &= ~VXLOCK; + if (vp->v_lflag & VXWANT) { + vp->v_lflag &= ~VXWANT; + do_wakeup = 1; + } + mtx_leave(&vnode_mtx); + if (do_wakeup) wakeup(vp); - } } /* @@ -1116,11 +1128,14 @@ vgonel(struct vnode *vp, struct proc *p) * If a vgone (or vclean) is already in progress, * wait until it is done and return. */ - if (vp->v_flag & VXLOCK) { - vp->v_flag |= VXWANT; - tsleep_nsec(vp, PINOD, "vgone", INFSLP); + mtx_enter(&vnode_mtx); + if (vp->v_lflag & VXLOCK) { + vp->v_lflag |= VXWANT; + msleep_nsec(vp, &vnode_mtx, PINOD, "vgone", INFSLP); + mtx_leave(&vnode_mtx); return; } + mtx_leave(&vnode_mtx); /* * Clean out the filesystem specific data. @@ -1275,9 +1290,9 @@ vprint(char *label, struct vnode *vp) strlcat(buf, "|VTEXT", sizeof buf); if (vp->v_flag & VSYSTEM) strlcat(buf, "|VSYSTEM", sizeof buf); - if (vp->v_flag & VXLOCK) + if (vp->v_lflag & VXLOCK) strlcat(buf, "|VXLOCK", sizeof buf); - if (vp->v_flag & VXWANT) + if (vp->v_lflag & VXWANT) strlcat(buf, "|VXWANT", sizeof buf); if (vp->v_bioflag & VBIOWAIT) strlcat(buf, "|VBIOWAIT", sizeof buf); diff --git a/sys/kern/vfs_vnops.c b/sys/kern/vfs_vnops.c index fefbebf3cfd..945b40935e2 100644 --- a/sys/kern/vfs_vnops.c +++ b/sys/kern/vfs_vnops.c @@ -1,4 +1,4 @@ -/* $OpenBSD: vfs_vnops.c,v 1.114 2020/04/08 08:07:51 mpi Exp $ */ +/* $OpenBSD: vfs_vnops.c,v 1.115 2021/04/28 09:53:53 claudio Exp $ */ /* $NetBSD: vfs_vnops.c,v 1.20 1996/02/04 02:18:41 christos Exp $ */ /* @@ -563,19 +563,29 @@ vn_poll(struct file *fp, int events, struct proc *p) int vn_lock(struct vnode *vp, int flags) { - int error; + int error, xlocked, do_wakeup; do { - if (vp->v_flag & VXLOCK) { - vp->v_flag |= VXWANT; - tsleep_nsec(vp, PINOD, "vn_lock", INFSLP); + mtx_enter(&vnode_mtx); + if (vp->v_lflag & VXLOCK) { + vp->v_lflag |= VXWANT; + msleep_nsec(vp, &vnode_mtx, PINOD, "vn_lock", INFSLP); + mtx_leave(&vnode_mtx); error = ENOENT; } else { vp->v_lockcount++; + mtx_leave(&vnode_mtx); + error = VOP_LOCK(vp, flags); + + mtx_enter(&vnode_mtx); vp->v_lockcount--; + do_wakeup = (vp->v_lockcount == 0); + xlocked = vp->v_lflag & VXLOCK; + mtx_leave(&vnode_mtx); + if (error == 0) { - if ((vp->v_flag & VXLOCK) == 0) + if (!xlocked) return (0); /* @@ -585,7 +595,7 @@ vn_lock(struct vnode *vp, int flags) */ error = ENOENT; VOP_UNLOCK(vp); - if (vp->v_lockcount == 0) + if (do_wakeup) wakeup_one(&vp->v_lockcount); } } diff --git a/sys/kern/vfs_vops.c b/sys/kern/vfs_vops.c index d153c9f309e..877daef51b2 100644 --- a/sys/kern/vfs_vops.c +++ b/sys/kern/vfs_vops.c @@ -1,4 +1,4 @@ -/* $OpenBSD: vfs_vops.c,v 1.29 2020/10/07 12:33:03 mpi Exp $ */ +/* $OpenBSD: vfs_vops.c,v 1.30 2021/04/28 09:53:53 claudio Exp $ */ /* * Copyright (c) 2010 Thordur I. Bjornsson <thib@openbsd.org> * @@ -48,8 +48,6 @@ #include <sys/systm.h> #ifdef VFSLCKDEBUG -#include <sys/systm.h> /* for panic() */ - #define ASSERT_VP_ISLOCKED(vp) do { \ if (((vp)->v_flag & VLOCKSWORK) && !VOP_ISLOCKED(vp)) { \ VOP_PRINT(vp); \ @@ -608,6 +606,8 @@ VOP_LOCK(struct vnode *vp, int flags) a.a_vp = vp; a.a_flags = flags; + MUTEX_ASSERT_UNLOCKED(&vnode_mtx); + if (vp->v_op->vop_lock == NULL) return (EOPNOTSUPP); |