summaryrefslogtreecommitdiff
path: root/sys/kern
diff options
context:
space:
mode:
authorClaudio Jeker <claudio@cvs.openbsd.org>2021-04-28 09:53:54 +0000
committerClaudio Jeker <claudio@cvs.openbsd.org>2021-04-28 09:53:54 +0000
commit4887a72e0faf0bb613766e1087f0588d3e6f1efb (patch)
tree5208e287bba068f9bc6327702566be5413feb7b8 /sys/kern
parent176253037126c3bd71b12369fe410a948e26b2f2 (diff)
Introduce a global vnode_mtx and use it to make vn_lock() safe to be called
without the KERNEL_LOCK. This moves VXLOCK and VXWANT to a mutex protected v_lflag field and also v_lockcount is protected by this mutex. The vn_lock() dance is overly complex and all of this should probably replaced by a proper lock on the vnode but such a diff is a lot more complex. This is an intermediate step so that at least some calls can be modified to grab the KERNEL_LOCK later or not at all. OK mpi@
Diffstat (limited to 'sys/kern')
-rw-r--r--sys/kern/spec_vnops.c26
-rw-r--r--sys/kern/vfs_default.c19
-rw-r--r--sys/kern/vfs_subr.c51
-rw-r--r--sys/kern/vfs_vnops.c24
-rw-r--r--sys/kern/vfs_vops.c6
5 files changed, 85 insertions, 41 deletions
diff --git a/sys/kern/spec_vnops.c b/sys/kern/spec_vnops.c
index b55c3941f8e..0e0071ce719 100644
--- a/sys/kern/spec_vnops.c
+++ b/sys/kern/spec_vnops.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: spec_vnops.c,v 1.103 2021/03/10 10:21:47 jsg Exp $ */
+/* $OpenBSD: spec_vnops.c,v 1.104 2021/04/28 09:53:53 claudio Exp $ */
/* $NetBSD: spec_vnops.c,v 1.29 1996/04/22 01:42:38 christos Exp $ */
/*
@@ -484,7 +484,7 @@ spec_close(void *v)
struct vnode *vp = ap->a_vp;
dev_t dev = vp->v_rdev;
int (*devclose)(dev_t, int, int, struct proc *);
- int mode, relock, error;
+ int mode, relock, xlocked, error;
int clone = 0;
switch (vp->v_type) {
@@ -512,7 +512,10 @@ spec_close(void *v)
* of forcibly closing the device, otherwise we only
* close on last reference.
*/
- if (vcount(vp) > 1 && (vp->v_flag & VXLOCK) == 0)
+ mtx_enter(&vnode_mtx);
+ xlocked = (vp->v_lflag & VXLOCK);
+ mtx_leave(&vnode_mtx);
+ if (vcount(vp) > 1 && !xlocked)
return (0);
}
devclose = cdevsw[major(dev)].d_close;
@@ -527,10 +530,13 @@ spec_close(void *v)
* that, we must lock the vnode. If we are coming from
* vclean(), the vnode is already locked.
*/
- if (!(vp->v_flag & VXLOCK))
+ mtx_enter(&vnode_mtx);
+ xlocked = (vp->v_lflag & VXLOCK);
+ mtx_leave(&vnode_mtx);
+ if (!xlocked)
vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
error = vinvalbuf(vp, V_SAVE, ap->a_cred, p, 0, INFSLP);
- if (!(vp->v_flag & VXLOCK))
+ if (!xlocked)
VOP_UNLOCK(vp);
if (error)
return (error);
@@ -543,7 +549,10 @@ spec_close(void *v)
* sum of the reference counts on all the aliased
* vnodes descends to one, we are on last close.
*/
- if (vcount(vp) > 1 && (vp->v_flag & VXLOCK) == 0)
+ mtx_enter(&vnode_mtx);
+ xlocked = (vp->v_lflag & VXLOCK);
+ mtx_leave(&vnode_mtx);
+ if (vcount(vp) > 1 && !xlocked)
return (0);
devclose = bdevsw[major(dev)].d_close;
mode = S_IFBLK;
@@ -554,7 +563,10 @@ spec_close(void *v)
}
/* release lock if held and this isn't coming from vclean() */
- relock = VOP_ISLOCKED(vp) && !(vp->v_flag & VXLOCK);
+ mtx_enter(&vnode_mtx);
+ xlocked = (vp->v_lflag & VXLOCK);
+ mtx_leave(&vnode_mtx);
+ relock = VOP_ISLOCKED(vp) && !xlocked;
if (relock)
VOP_UNLOCK(vp);
error = (*devclose)(dev, ap->a_fflag, mode, p);
diff --git a/sys/kern/vfs_default.c b/sys/kern/vfs_default.c
index 35793370639..04c815f8be4 100644
--- a/sys/kern/vfs_default.c
+++ b/sys/kern/vfs_default.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: vfs_default.c,v 1.47 2020/02/20 16:56:52 visa Exp $ */
+/* $OpenBSD: vfs_default.c,v 1.48 2021/04/28 09:53:53 claudio Exp $ */
/*
* Portions of this code are:
@@ -86,9 +86,12 @@ vop_generic_revoke(void *v)
* If a vgone (or vclean) is already in progress,
* wait until it is done and return.
*/
- if (vp->v_flag & VXLOCK) {
- vp->v_flag |= VXWANT;
- tsleep_nsec(vp, PINOD, "vop_generic_revokeall", INFSLP);
+ mtx_enter(&vnode_mtx);
+ if (vp->v_lflag & VXLOCK) {
+ vp->v_lflag |= VXWANT;
+ msleep_nsec(vp, &vnode_mtx, PINOD,
+ "vop_generic_revokeall", INFSLP);
+ mtx_leave(&vnode_mtx);
return(0);
}
@@ -96,7 +99,9 @@ vop_generic_revoke(void *v)
* Ensure that vp will not be vgone'd while we
* are eliminating its aliases.
*/
- vp->v_flag |= VXLOCK;
+ vp->v_lflag |= VXLOCK;
+ mtx_leave(&vnode_mtx);
+
while (vp->v_flag & VALIASED) {
SLIST_FOREACH(vq, vp->v_hashchain, v_specnext) {
if (vq->v_rdev != vp->v_rdev ||
@@ -112,7 +117,9 @@ vop_generic_revoke(void *v)
* really eliminate the vnode after which time
* vgone will awaken any sleepers.
*/
- vp->v_flag &= ~VXLOCK;
+ mtx_enter(&vnode_mtx);
+ vp->v_lflag &= ~VXLOCK;
+ mtx_leave(&vnode_mtx);
}
vgonel(vp, p);
diff --git a/sys/kern/vfs_subr.c b/sys/kern/vfs_subr.c
index 2a6adabe6af..433ba0030c6 100644
--- a/sys/kern/vfs_subr.c
+++ b/sys/kern/vfs_subr.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: vfs_subr.c,v 1.304 2021/01/29 10:47:24 claudio Exp $ */
+/* $OpenBSD: vfs_subr.c,v 1.305 2021/04/28 09:53:53 claudio Exp $ */
/* $NetBSD: vfs_subr.c,v 1.53 1996/04/22 01:39:13 christos Exp $ */
/*
@@ -117,6 +117,8 @@ void vputonfreelist(struct vnode *);
int vflush_vnode(struct vnode *, void *);
int maxvnodes;
+struct mutex vnode_mtx = MUTEX_INITIALIZER(IPL_BIO);
+
void vfs_unmountall(void);
#ifdef DEBUG
@@ -644,16 +646,19 @@ vget(struct vnode *vp, int flags)
* return failure. Cleaning is determined by checking that
* the VXLOCK flag is set.
*/
-
- if (vp->v_flag & VXLOCK) {
+ mtx_enter(&vnode_mtx);
+ if (vp->v_lflag & VXLOCK) {
if (flags & LK_NOWAIT) {
+ mtx_leave(&vnode_mtx);
return (EBUSY);
}
- vp->v_flag |= VXWANT;
- tsleep_nsec(vp, PINOD, "vget", INFSLP);
+ vp->v_lflag |= VXWANT;
+ msleep_nsec(vp, &vnode_mtx, PINOD, "vget", INFSLP);
+ mtx_leave(&vnode_mtx);
return (ENOENT);
}
+ mtx_leave(&vnode_mtx);
onfreelist = vp->v_bioflag & VBIOONFREELIST;
if (vp->v_usecount == 0 && onfreelist) {
@@ -982,7 +987,7 @@ vflush(struct mount *mp, struct vnode *skipvp, int flags)
void
vclean(struct vnode *vp, int flags, struct proc *p)
{
- int active;
+ int active, do_wakeup = 0;
/*
* Check to see if the vnode is in use.
@@ -997,9 +1002,10 @@ vclean(struct vnode *vp, int flags, struct proc *p)
* Prevent the vnode from being recycled or
* brought into use while we clean it out.
*/
- if (vp->v_flag & VXLOCK)
+ mtx_enter(&vnode_mtx);
+ if (vp->v_lflag & VXLOCK)
panic("vclean: deadlock");
- vp->v_flag |= VXLOCK;
+ vp->v_lflag |= VXLOCK;
if (vp->v_lockcount > 0) {
/*
@@ -1007,9 +1013,11 @@ vclean(struct vnode *vp, int flags, struct proc *p)
* observed that the vnode is about to be exclusively locked
* before continuing.
*/
- tsleep_nsec(&vp->v_lockcount, PINOD, "vop_lock", INFSLP);
+ msleep_nsec(&vp->v_lockcount, &vnode_mtx, PINOD, "vop_lock",
+ INFSLP);
KASSERT(vp->v_lockcount == 0);
}
+ mtx_leave(&vnode_mtx);
/*
* Even if the count is zero, the VOP_INACTIVE routine may still
@@ -1067,14 +1075,18 @@ vclean(struct vnode *vp, int flags, struct proc *p)
vp->v_op = &dead_vops;
VN_KNOTE(vp, NOTE_REVOKE);
vp->v_tag = VT_NON;
- vp->v_flag &= ~VXLOCK;
#ifdef VFSLCKDEBUG
vp->v_flag &= ~VLOCKSWORK;
#endif
- if (vp->v_flag & VXWANT) {
- vp->v_flag &= ~VXWANT;
+ mtx_enter(&vnode_mtx);
+ vp->v_lflag &= ~VXLOCK;
+ if (vp->v_lflag & VXWANT) {
+ vp->v_lflag &= ~VXWANT;
+ do_wakeup = 1;
+ }
+ mtx_leave(&vnode_mtx);
+ if (do_wakeup)
wakeup(vp);
- }
}
/*
@@ -1116,11 +1128,14 @@ vgonel(struct vnode *vp, struct proc *p)
* If a vgone (or vclean) is already in progress,
* wait until it is done and return.
*/
- if (vp->v_flag & VXLOCK) {
- vp->v_flag |= VXWANT;
- tsleep_nsec(vp, PINOD, "vgone", INFSLP);
+ mtx_enter(&vnode_mtx);
+ if (vp->v_lflag & VXLOCK) {
+ vp->v_lflag |= VXWANT;
+ msleep_nsec(vp, &vnode_mtx, PINOD, "vgone", INFSLP);
+ mtx_leave(&vnode_mtx);
return;
}
+ mtx_leave(&vnode_mtx);
/*
* Clean out the filesystem specific data.
@@ -1275,9 +1290,9 @@ vprint(char *label, struct vnode *vp)
strlcat(buf, "|VTEXT", sizeof buf);
if (vp->v_flag & VSYSTEM)
strlcat(buf, "|VSYSTEM", sizeof buf);
- if (vp->v_flag & VXLOCK)
+ if (vp->v_lflag & VXLOCK)
strlcat(buf, "|VXLOCK", sizeof buf);
- if (vp->v_flag & VXWANT)
+ if (vp->v_lflag & VXWANT)
strlcat(buf, "|VXWANT", sizeof buf);
if (vp->v_bioflag & VBIOWAIT)
strlcat(buf, "|VBIOWAIT", sizeof buf);
diff --git a/sys/kern/vfs_vnops.c b/sys/kern/vfs_vnops.c
index fefbebf3cfd..945b40935e2 100644
--- a/sys/kern/vfs_vnops.c
+++ b/sys/kern/vfs_vnops.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: vfs_vnops.c,v 1.114 2020/04/08 08:07:51 mpi Exp $ */
+/* $OpenBSD: vfs_vnops.c,v 1.115 2021/04/28 09:53:53 claudio Exp $ */
/* $NetBSD: vfs_vnops.c,v 1.20 1996/02/04 02:18:41 christos Exp $ */
/*
@@ -563,19 +563,29 @@ vn_poll(struct file *fp, int events, struct proc *p)
int
vn_lock(struct vnode *vp, int flags)
{
- int error;
+ int error, xlocked, do_wakeup;
do {
- if (vp->v_flag & VXLOCK) {
- vp->v_flag |= VXWANT;
- tsleep_nsec(vp, PINOD, "vn_lock", INFSLP);
+ mtx_enter(&vnode_mtx);
+ if (vp->v_lflag & VXLOCK) {
+ vp->v_lflag |= VXWANT;
+ msleep_nsec(vp, &vnode_mtx, PINOD, "vn_lock", INFSLP);
+ mtx_leave(&vnode_mtx);
error = ENOENT;
} else {
vp->v_lockcount++;
+ mtx_leave(&vnode_mtx);
+
error = VOP_LOCK(vp, flags);
+
+ mtx_enter(&vnode_mtx);
vp->v_lockcount--;
+ do_wakeup = (vp->v_lockcount == 0);
+ xlocked = vp->v_lflag & VXLOCK;
+ mtx_leave(&vnode_mtx);
+
if (error == 0) {
- if ((vp->v_flag & VXLOCK) == 0)
+ if (!xlocked)
return (0);
/*
@@ -585,7 +595,7 @@ vn_lock(struct vnode *vp, int flags)
*/
error = ENOENT;
VOP_UNLOCK(vp);
- if (vp->v_lockcount == 0)
+ if (do_wakeup)
wakeup_one(&vp->v_lockcount);
}
}
diff --git a/sys/kern/vfs_vops.c b/sys/kern/vfs_vops.c
index d153c9f309e..877daef51b2 100644
--- a/sys/kern/vfs_vops.c
+++ b/sys/kern/vfs_vops.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: vfs_vops.c,v 1.29 2020/10/07 12:33:03 mpi Exp $ */
+/* $OpenBSD: vfs_vops.c,v 1.30 2021/04/28 09:53:53 claudio Exp $ */
/*
* Copyright (c) 2010 Thordur I. Bjornsson <thib@openbsd.org>
*
@@ -48,8 +48,6 @@
#include <sys/systm.h>
#ifdef VFSLCKDEBUG
-#include <sys/systm.h> /* for panic() */
-
#define ASSERT_VP_ISLOCKED(vp) do { \
if (((vp)->v_flag & VLOCKSWORK) && !VOP_ISLOCKED(vp)) { \
VOP_PRINT(vp); \
@@ -608,6 +606,8 @@ VOP_LOCK(struct vnode *vp, int flags)
a.a_vp = vp;
a.a_flags = flags;
+ MUTEX_ASSERT_UNLOCKED(&vnode_mtx);
+
if (vp->v_op->vop_lock == NULL)
return (EOPNOTSUPP);