summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBob Beck <beck@cvs.openbsd.org>2024-07-12 08:15:20 +0000
committerBob Beck <beck@cvs.openbsd.org>2024-07-12 08:15:20 +0000
commit89413f200efe95cd81f3067313a5802e1f39bc3b (patch)
tree8ea63569c126d602e0cbd8525d2504f85cd68127
parent0fae8424df55fc9eeb2e693c18ce0c6dc6b59cfc (diff)
Add vdoom() to fix ufs/ext2fs re-use of invalid vnode.
This was noticed by syzkiller and analyzed in isolaiton by mbuhl@ and visa@ two years ago. As the kernel has become more unlocked it has started to appear more and was being hit regularly by jsing@ on the Go builder. The problem was during reclaim of a inode the corresponding vnode could be picked up by a vget() by another thread while the inode was being cleared out in the ufs_inactive routine and the thread running ufs_inactive slept for i/o. When raced the vnode would then not have zero use count and would not be cleared out on exit from ufs_inactive with a dead/invalid vnode being used. While this could get "fixed" by checking for the race happening and trying again in the inactive routine, or by adding "yet another visible vnode locking flag" we choose to add a vdoom() api for the moment that allows the caller to block future attempts to grab this vnode until it is cleared out fully with vclean. Teste by jsing@ on the Go builder and seems to solve the issue. ok kettenis@, claudio@
-rw-r--r--sys/kern/vfs_subr.c30
-rw-r--r--sys/sys/vnode.h4
-rw-r--r--sys/ufs/ext2fs/ext2fs_inode.c16
-rw-r--r--sys/ufs/ufs/ufs_inode.c14
4 files changed, 53 insertions, 11 deletions
diff --git a/sys/kern/vfs_subr.c b/sys/kern/vfs_subr.c
index 05c114c6525..0400fca4166 100644
--- a/sys/kern/vfs_subr.c
+++ b/sys/kern/vfs_subr.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: vfs_subr.c,v 1.320 2024/07/05 05:42:08 jsg Exp $ */
+/* $OpenBSD: vfs_subr.c,v 1.321 2024/07/12 08:15:19 beck Exp $ */
/* $NetBSD: vfs_subr.c,v 1.53 1996/04/22 01:39:13 christos Exp $ */
/*
@@ -627,6 +627,26 @@ loop:
}
/*
+ * Indicate that this vnode will be recycled by the caller and prevent any other
+ * access to the vnode via vget() until it is cleaned. Callers must ensure
+ * vclean() is called on this vnode. vclean() will permit the vnode to be passed
+ * in with the VXLOCK flag set if VDOOMED is set.
+ */
+void
+vdoom(struct vnode *vp)
+{
+ mtx_enter(&vnode_mtx);
+ KASSERT(vp->v_usecount == 0);
+ if (vp->v_lflag & VXLOCK)
+ panic("vdoom: vnode already locked!");
+ vp->v_lflag |= VXLOCK;
+ if (vp->v_lflag & VDOOMED)
+ panic("vdoom: vnode already doomed!");
+ vp->v_lflag |= VDOOMED;
+ mtx_leave(&vnode_mtx);
+}
+
+/*
* Grab a particular vnode from the free list, increment its
* reference count and lock it. If the vnode lock bit is set,
* the vnode is being eliminated in vgone. In that case, we
@@ -1029,8 +1049,10 @@ vclean(struct vnode *vp, int flags, struct proc *p)
*/
mtx_enter(&vnode_mtx);
if (vp->v_lflag & VXLOCK)
- panic("vclean: deadlock");
+ if (!(vp->v_lflag & VDOOMED))
+ panic("vclean: deadlock");
vp->v_lflag |= VXLOCK;
+ vp->v_lflag &= ~VDOOMED;
if (vp->v_lockcount > 0) {
/*
@@ -1157,7 +1179,7 @@ vgonel(struct vnode *vp, struct proc *p)
* wait until it is done and return.
*/
mtx_enter(&vnode_mtx);
- if (vp->v_lflag & VXLOCK) {
+ if (!(vp->v_lflag & VDOOMED) && vp->v_flag & VXLOCK) {
vp->v_lflag |= VXWANT;
msleep_nsec(vp, &vnode_mtx, PINOD, "vgone", INFSLP);
mtx_leave(&vnode_mtx);
@@ -1328,6 +1350,8 @@ vprint(char *label, struct vnode *vp)
strlcat(buf, "|VBIOONSYNCLIST", sizeof buf);
if (vp->v_flag & VALIASED)
strlcat(buf, "|VALIASED", sizeof buf);
+ if (vp->v_flag & VDOOMED)
+ strlcat(buf, "|VDOOMED", sizeof buf);
if (buf[0] != '\0')
printf(" flags (%s)", &buf[1]);
if (vp->v_data == NULL) {
diff --git a/sys/sys/vnode.h b/sys/sys/vnode.h
index b97d486f14b..4ea623af9cf 100644
--- a/sys/sys/vnode.h
+++ b/sys/sys/vnode.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: vnode.h,v 1.171 2024/07/05 05:42:08 jsg Exp $ */
+/* $OpenBSD: vnode.h,v 1.172 2024/07/12 08:15:19 beck Exp $ */
/* $NetBSD: vnode.h,v 1.38 1996/02/29 20:59:05 cgd Exp $ */
/*
@@ -146,6 +146,7 @@ struct vnode {
#define VCLONED 0x0400 /* vnode was cloned */
#define VALIASED 0x0800 /* vnode has an alias */
#define VLARVAL 0x1000 /* vnode data not yet set up by higher level */
+#define VDOOMED 0x2000 /* hold vnode with VXLOCK to be cleaned */
#define VLOCKSWORK 0x4000 /* FS supports locking discipline */
#define VCLONE 0x8000 /* vnode is a clone */
@@ -586,6 +587,7 @@ int vaccess(enum vtype, mode_t, uid_t, gid_t, mode_t, struct ucred *);
int vnoperm(struct vnode *);
void vattr_null(struct vattr *);
void vdevgone(int, int, int, enum vtype);
+void vdoom(struct vnode *);
int vcount(struct vnode *);
int vfinddev(dev_t, enum vtype, struct vnode **);
void vflushbuf(struct vnode *, int);
diff --git a/sys/ufs/ext2fs/ext2fs_inode.c b/sys/ufs/ext2fs/ext2fs_inode.c
index 2962628f865..3bbe98b6688 100644
--- a/sys/ufs/ext2fs/ext2fs_inode.c
+++ b/sys/ufs/ext2fs/ext2fs_inode.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: ext2fs_inode.c,v 1.66 2022/08/12 14:30:53 visa Exp $ */
+/* $OpenBSD: ext2fs_inode.c,v 1.67 2024/07/12 08:15:19 beck Exp $ */
/* $NetBSD: ext2fs_inode.c,v 1.24 2001/06/19 12:59:18 wiz Exp $ */
/*
@@ -105,20 +105,28 @@ ext2fs_inactive(void *v)
struct vnode *vp = ap->a_vp;
struct inode *ip = VTOI(vp);
struct timespec ts;
+ int recycle_vnode = 0;
int error = 0;
#ifdef DIAGNOSTIC
extern int prtactive;
- if (prtactive && vp->v_usecount != 0)
+ if (prtactive && vp->v_usecount != 0)
vprint("ext2fs_inactive: pushing active", vp);
#endif
/* Get rid of inodes related to stale file handles. */
- if (ip->i_e2din == NULL || ip->i_e2fs_mode == 0 || ip->i_e2fs_dtime)
+ if (ip->i_e2din == NULL || ip->i_e2fs_mode == 0 || ip->i_e2fs_dtime) {
+ recycle_vnode = 1;
+ vdoom(vp);
goto out;
+ }
error = 0;
if (ip->i_e2fs_nlink == 0 && (vp->v_mount->mnt_flag & MNT_RDONLY) == 0) {
+ /* lock this vnode and promise to vclean it */
+ recycle_vnode = 1;
+ vdoom(vp);
+
if (ext2fs_size(ip) != 0) {
error = ext2fs_truncate(ip, (off_t)0, 0, NOCRED);
}
@@ -136,7 +144,7 @@ out:
* If we are done with the inode, reclaim it
* so that it can be reused immediately.
*/
- if (ip->i_e2din == NULL || ip->i_e2fs_dtime != 0)
+ if (recycle_vnode)
vrecycle(vp, ap->a_p);
return (error);
}
diff --git a/sys/ufs/ufs/ufs_inode.c b/sys/ufs/ufs/ufs_inode.c
index 4ea385d0477..67182ceb655 100644
--- a/sys/ufs/ufs/ufs_inode.c
+++ b/sys/ufs/ufs/ufs_inode.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: ufs_inode.c,v 1.45 2024/02/03 18:51:58 beck Exp $ */
+/* $OpenBSD: ufs_inode.c,v 1.46 2024/07/12 08:15:19 beck Exp $ */
/* $NetBSD: ufs_inode.c,v 1.7 1996/05/11 18:27:52 mycroft Exp $ */
/*
@@ -63,6 +63,7 @@ ufs_inactive(void *v)
struct vnode *vp = ap->a_vp;
struct inode *ip = VTOI(vp);
mode_t mode;
+ int recycle_vnode = 0;
int error = 0;
#ifdef DIAGNOSTIC
extern int prtactive;
@@ -74,10 +75,17 @@ ufs_inactive(void *v)
/*
* Ignore inodes related to stale file handles.
*/
- if (ip->i_din1 == NULL || DIP(ip, mode) == 0)
+ if (ip->i_din1 == NULL || DIP(ip, mode) == 0) {
+ recycle_vnode = 1;
+ vdoom(vp);
goto out;
+ }
if (DIP(ip, nlink) <= 0 && (vp->v_mount->mnt_flag & MNT_RDONLY) == 0) {
+ /* lock this vnode and promise to vclean it */
+ recycle_vnode = 1;
+ vdoom(vp);
+
if (getinoquota(ip) == 0)
(void)ufs_quota_free_inode(ip, NOCRED);
@@ -101,7 +109,7 @@ out:
* If we are done with the inode, reclaim it
* so that it can be reused immediately.
*/
- if (ip->i_din1 == NULL || DIP(ip, mode) == 0)
+ if (recycle_vnode)
vrecycle(vp, ap->a_p);
return (error);