src - OpenBSD base system

diff options


context:
space:
mode:

author	Claudio Jeker <claudio@cvs.openbsd.org>	2021-04-28 09:53:54 +0000
committer	Claudio Jeker <claudio@cvs.openbsd.org>	2021-04-28 09:53:54 +0000
commit	4887a72e0faf0bb613766e1087f0588d3e6f1efb (patch)
tree	5208e287bba068f9bc6327702566be5413feb7b8 /sys/kern
parent	176253037126c3bd71b12369fe410a948e26b2f2 (diff)

Introduce a global vnode_mtx and use it to make vn_lock() safe to be called

without the KERNEL_LOCK. This moves VXLOCK and VXWANT to a mutex protected v_lflag field and also v_lockcount is protected by this mutex. The vn_lock() dance is overly complex and all of this should probably replaced by a proper lock on the vnode but such a diff is a lot more complex. This is an intermediate step so that at least some calls can be modified to grab the KERNEL_LOCK later or not at all. OK mpi@

Diffstat (limited to 'sys/kern')

-rw-r--r--

sys/kern/spec_vnops.c

-rw-r--r--

sys/kern/vfs_default.c

-rw-r--r--

sys/kern/vfs_subr.c

-rw-r--r--

sys/kern/vfs_vnops.c

-rw-r--r--

sys/kern/vfs_vops.c

5 files changed, 85 insertions, 41 deletions

diff --git a/sys/kern/spec_vnops.c b/sys/kern/spec_vnops.c
index b55c3941f8e..0e0071ce719 100644
--- a/sys/kern/spec_vnops.c
+++ b/sys/kern/spec_vnops.c

@@ -1,4 +1,4 @@

-/* $OpenBSD: spec_vnops.c,v 1.103 2021/03/10 10:21:47 jsg Exp $ */

+/* $OpenBSD: spec_vnops.c,v 1.104 2021/04/28 09:53:53 claudio Exp $ */

/* $NetBSD: spec_vnops.c,v 1.29 1996/04/22 01:42:38 christos Exp $ */

@@ -484,7 +484,7 @@ spec_close(void *v)

struct vnode *vp = ap->a_vp;

dev_t dev = vp->v_rdev;

int (*devclose)(dev_t, int, int, struct proc *);

- int mode, relock, error;

+ int mode, relock, xlocked, error;

int clone = 0;

switch (vp->v_type) {

@@ -512,7 +512,10 @@ spec_close(void *v)

* of forcibly closing the device, otherwise we only

* close on last reference.

- if (vcount(vp) > 1 && (vp->v_flag & VXLOCK) == 0)

+ mtx_enter(&vnode_mtx);

+ xlocked = (vp->v_lflag & VXLOCK);

+ mtx_leave(&vnode_mtx);

+ if (vcount(vp) > 1 && !xlocked)

return (0);

}

devclose = cdevsw[major(dev)].d_close;

@@ -527,10 +530,13 @@ spec_close(void *v)

* that, we must lock the vnode. If we are coming from

* vclean(), the vnode is already locked.

- if (!(vp->v_flag & VXLOCK))

+ mtx_enter(&vnode_mtx);

+ xlocked = (vp->v_lflag & VXLOCK);

+ mtx_leave(&vnode_mtx);

+ if (!xlocked)

vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);

error = vinvalbuf(vp, V_SAVE, ap->a_cred, p, 0, INFSLP);

- if (!(vp->v_flag & VXLOCK))

+ if (!xlocked)

VOP_UNLOCK(vp);

if (error)

return (error);

@@ -543,7 +549,10 @@ spec_close(void *v)

* sum of the reference counts on all the aliased

* vnodes descends to one, we are on last close.

- if (vcount(vp) > 1 && (vp->v_flag & VXLOCK) == 0)

+ mtx_enter(&vnode_mtx);

+ xlocked = (vp->v_lflag & VXLOCK);

+ mtx_leave(&vnode_mtx);

+ if (vcount(vp) > 1 && !xlocked)

return (0);

devclose = bdevsw[major(dev)].d_close;

mode = S_IFBLK;

@@ -554,7 +563,10 @@ spec_close(void *v)

}

/* release lock if held and this isn't coming from vclean() */

- relock = VOP_ISLOCKED(vp) && !(vp->v_flag & VXLOCK);

+ mtx_enter(&vnode_mtx);

+ xlocked = (vp->v_lflag & VXLOCK);

+ mtx_leave(&vnode_mtx);

+ relock = VOP_ISLOCKED(vp) && !xlocked;

if (relock)

VOP_UNLOCK(vp);

error = (*devclose)(dev, ap->a_fflag, mode, p);

diff --git a/sys/kern/vfs_default.c b/sys/kern/vfs_default.c
index 35793370639..04c815f8be4 100644
--- a/sys/kern/vfs_default.c
+++ b/sys/kern/vfs_default.c

@@ -1,4 +1,4 @@

-/* $OpenBSD: vfs_default.c,v 1.47 2020/02/20 16:56:52 visa Exp $ */

+/* $OpenBSD: vfs_default.c,v 1.48 2021/04/28 09:53:53 claudio Exp $ */

* Portions of this code are:

@@ -86,9 +86,12 @@ vop_generic_revoke(void *v)

* If a vgone (or vclean) is already in progress,

* wait until it is done and return.

- if (vp->v_flag & VXLOCK) {

- vp->v_flag |= VXWANT;

- tsleep_nsec(vp, PINOD, "vop_generic_revokeall", INFSLP);

+ mtx_enter(&vnode_mtx);

+ if (vp->v_lflag & VXLOCK) {

+ vp->v_lflag |= VXWANT;

+ msleep_nsec(vp, &vnode_mtx, PINOD,

+ "vop_generic_revokeall", INFSLP);

+ mtx_leave(&vnode_mtx);

return(0);

}

@@ -96,7 +99,9 @@ vop_generic_revoke(void *v)

* Ensure that vp will not be vgone'd while we

* are eliminating its aliases.

- vp->v_flag |= VXLOCK;

+ vp->v_lflag |= VXLOCK;

+ mtx_leave(&vnode_mtx);

while (vp->v_flag & VALIASED) {

SLIST_FOREACH(vq, vp->v_hashchain, v_specnext) {

if (vq->v_rdev != vp->v_rdev ||

@@ -112,7 +117,9 @@ vop_generic_revoke(void *v)

* really eliminate the vnode after which time

* vgone will awaken any sleepers.

- vp->v_flag &= ~VXLOCK;

+ mtx_enter(&vnode_mtx);

+ vp->v_lflag &= ~VXLOCK;

+ mtx_leave(&vnode_mtx);

}

vgonel(vp, p);

diff --git a/sys/kern/vfs_subr.c b/sys/kern/vfs_subr.c
index 2a6adabe6af..433ba0030c6 100644
--- a/sys/kern/vfs_subr.c
+++ b/sys/kern/vfs_subr.c

@@ -1,4 +1,4 @@

-/* $OpenBSD: vfs_subr.c,v 1.304 2021/01/29 10:47:24 claudio Exp $ */

+/* $OpenBSD: vfs_subr.c,v 1.305 2021/04/28 09:53:53 claudio Exp $ */

/* $NetBSD: vfs_subr.c,v 1.53 1996/04/22 01:39:13 christos Exp $ */

@@ -117,6 +117,8 @@ void vputonfreelist(struct vnode *);

int vflush_vnode(struct vnode *, void *);

int maxvnodes;

+struct mutex vnode_mtx = MUTEX_INITIALIZER(IPL_BIO);

void vfs_unmountall(void);

#ifdef DEBUG

@@ -644,16 +646,19 @@ vget(struct vnode *vp, int flags)

* return failure. Cleaning is determined by checking that

* the VXLOCK flag is set.

- if (vp->v_flag & VXLOCK) {

+ mtx_enter(&vnode_mtx);

+ if (vp->v_lflag & VXLOCK) {

if (flags & LK_NOWAIT) {

+ mtx_leave(&vnode_mtx);

return (EBUSY);

}

- vp->v_flag |= VXWANT;

- tsleep_nsec(vp, PINOD, "vget", INFSLP);

+ vp->v_lflag |= VXWANT;

+ msleep_nsec(vp, &vnode_mtx, PINOD, "vget", INFSLP);

+ mtx_leave(&vnode_mtx);

return (ENOENT);

}

+ mtx_leave(&vnode_mtx);

onfreelist = vp->v_bioflag & VBIOONFREELIST;

if (vp->v_usecount == 0 && onfreelist) {

@@ -982,7 +987,7 @@ vflush(struct mount *mp, struct vnode *skipvp, int flags)

void

vclean(struct vnode *vp, int flags, struct proc *p)

{

- int active;

+ int active, do_wakeup = 0;

* Check to see if the vnode is in use.

@@ -997,9 +1002,10 @@ vclean(struct vnode *vp, int flags, struct proc *p)

* Prevent the vnode from being recycled or

* brought into use while we clean it out.

- if (vp->v_flag & VXLOCK)

+ mtx_enter(&vnode_mtx);

+ if (vp->v_lflag & VXLOCK)

panic("vclean: deadlock");

- vp->v_flag |= VXLOCK;

+ vp->v_lflag |= VXLOCK;

if (vp->v_lockcount > 0) {

@@ -1007,9 +1013,11 @@ vclean(struct vnode *vp, int flags, struct proc *p)

* observed that the vnode is about to be exclusively locked

* before continuing.

- tsleep_nsec(&vp->v_lockcount, PINOD, "vop_lock", INFSLP);

+ msleep_nsec(&vp->v_lockcount, &vnode_mtx, PINOD, "vop_lock",

+ INFSLP);

KASSERT(vp->v_lockcount == 0);

}

+ mtx_leave(&vnode_mtx);

* Even if the count is zero, the VOP_INACTIVE routine may still

@@ -1067,14 +1075,18 @@ vclean(struct vnode *vp, int flags, struct proc *p)

vp->v_op = &dead_vops;

VN_KNOTE(vp, NOTE_REVOKE);

vp->v_tag = VT_NON;

- vp->v_flag &= ~VXLOCK;

#ifdef VFSLCKDEBUG

vp->v_flag &= ~VLOCKSWORK;

#endif

- if (vp->v_flag & VXWANT) {

- vp->v_flag &= ~VXWANT;

+ mtx_enter(&vnode_mtx);

+ vp->v_lflag &= ~VXLOCK;

+ if (vp->v_lflag & VXWANT) {

+ vp->v_lflag &= ~VXWANT;

+ do_wakeup = 1;

+ }

+ mtx_leave(&vnode_mtx);

+ if (do_wakeup)

wakeup(vp);

- }

}

@@ -1116,11 +1128,14 @@ vgonel(struct vnode *vp, struct proc *p)

* If a vgone (or vclean) is already in progress,

* wait until it is done and return.

- if (vp->v_flag & VXLOCK) {

- vp->v_flag |= VXWANT;

- tsleep_nsec(vp, PINOD, "vgone", INFSLP);

+ mtx_enter(&vnode_mtx);

+ if (vp->v_lflag & VXLOCK) {

+ vp->v_lflag |= VXWANT;

+ msleep_nsec(vp, &vnode_mtx, PINOD, "vgone", INFSLP);

+ mtx_leave(&vnode_mtx);

return;

}

+ mtx_leave(&vnode_mtx);

* Clean out the filesystem specific data.

@@ -1275,9 +1290,9 @@ vprint(char *label, struct vnode *vp)

strlcat(buf, "|VTEXT", sizeof buf);

if (vp->v_flag & VSYSTEM)

strlcat(buf, "|VSYSTEM", sizeof buf);

- if (vp->v_flag & VXLOCK)

+ if (vp->v_lflag & VXLOCK)

strlcat(buf, "|VXLOCK", sizeof buf);

- if (vp->v_flag & VXWANT)

+ if (vp->v_lflag & VXWANT)

strlcat(buf, "|VXWANT", sizeof buf);

if (vp->v_bioflag & VBIOWAIT)

strlcat(buf, "|VBIOWAIT", sizeof buf);

diff --git a/sys/kern/vfs_vnops.c b/sys/kern/vfs_vnops.c
index fefbebf3cfd..945b40935e2 100644
--- a/sys/kern/vfs_vnops.c
+++ b/sys/kern/vfs_vnops.c

@@ -1,4 +1,4 @@

-/* $OpenBSD: vfs_vnops.c,v 1.114 2020/04/08 08:07:51 mpi Exp $ */

+/* $OpenBSD: vfs_vnops.c,v 1.115 2021/04/28 09:53:53 claudio Exp $ */

/* $NetBSD: vfs_vnops.c,v 1.20 1996/02/04 02:18:41 christos Exp $ */

@@ -563,19 +563,29 @@ vn_poll(struct file *fp, int events, struct proc *p)

int

vn_lock(struct vnode *vp, int flags)

{

- int error;

+ int error, xlocked, do_wakeup;

do {

- if (vp->v_flag & VXLOCK) {

- vp->v_flag |= VXWANT;

- tsleep_nsec(vp, PINOD, "vn_lock", INFSLP);

+ mtx_enter(&vnode_mtx);

+ if (vp->v_lflag & VXLOCK) {

+ vp->v_lflag |= VXWANT;

+ msleep_nsec(vp, &vnode_mtx, PINOD, "vn_lock", INFSLP);

+ mtx_leave(&vnode_mtx);

error = ENOENT;

} else {

vp->v_lockcount++;

+ mtx_leave(&vnode_mtx);

error = VOP_LOCK(vp, flags);

+ mtx_enter(&vnode_mtx);

vp->v_lockcount--;

+ do_wakeup = (vp->v_lockcount == 0);

+ xlocked = vp->v_lflag & VXLOCK;

+ mtx_leave(&vnode_mtx);

if (error == 0) {

- if ((vp->v_flag & VXLOCK) == 0)

+ if (!xlocked)

return (0);

@@ -585,7 +595,7 @@ vn_lock(struct vnode *vp, int flags)

error = ENOENT;

VOP_UNLOCK(vp);

- if (vp->v_lockcount == 0)

+ if (do_wakeup)

wakeup_one(&vp->v_lockcount);

}

diff --git a/sys/kern/vfs_vops.c b/sys/kern/vfs_vops.c
index d153c9f309e..877daef51b2 100644
--- a/sys/kern/vfs_vops.c
+++ b/sys/kern/vfs_vops.c

@@ -1,4 +1,4 @@

-/* $OpenBSD: vfs_vops.c,v 1.29 2020/10/07 12:33:03 mpi Exp $ */

+/* $OpenBSD: vfs_vops.c,v 1.30 2021/04/28 09:53:53 claudio Exp $ */

@@ -48,8 +48,6 @@

#include <sys/systm.h>

#ifdef VFSLCKDEBUG

-#include <sys/systm.h> /* for panic() */

#define ASSERT_VP_ISLOCKED(vp) do { \

if (((vp)->v_flag & VLOCKSWORK) && !VOP_ISLOCKED(vp)) { \

VOP_PRINT(vp); \

@@ -608,6 +606,8 @@ VOP_LOCK(struct vnode *vp, int flags)

a.a_vp = vp;

a.a_flags = flags;

+ MUTEX_ASSERT_UNLOCKED(&vnode_mtx);

if (vp->v_op->vop_lock == NULL)

return (EOPNOTSUPP);