summaryrefslogtreecommitdiff
path: root/sys/kern/kern_descrip.c
diff options
context:
space:
mode:
authoranton <anton@cvs.openbsd.org>2019-07-10 16:43:21 +0000
committeranton <anton@cvs.openbsd.org>2019-07-10 16:43:21 +0000
commitd8bf193ec09f26b570daa057ab9961744de1794d (patch)
treefb42455d3d5d0bb3236d5a9551cade60758d5ad7 /sys/kern/kern_descrip.c
parentcfb2b7e76d2d97505ffb3e2ac6a43561e980f07a (diff)
Make read/write of the f_offset field belonging to struct file MP-safe;
as part of the effort to unlock the kernel. Instead of relying on the vnode lock, introduce a dedicated lock per file. Exclusive write access is granted using the new foffset_enter and foffset_leave API. A convenience function foffset_get is also available for threads that only need to read the current offset. The lock acquisition order in vn_write has been changed to match the one in vn_read in order to avoid a potential deadlock. This change also gets rid of a documented race in vn_read(). Inspired by the FreeBSD implementation. With help and ok mpi@ visa@
Diffstat (limited to 'sys/kern/kern_descrip.c')
-rw-r--r--sys/kern/kern_descrip.c81
1 files changed, 76 insertions, 5 deletions
diff --git a/sys/kern/kern_descrip.c b/sys/kern/kern_descrip.c
index e2e4f1c668a..2cf66bb04aa 100644
--- a/sys/kern/kern_descrip.c
+++ b/sys/kern/kern_descrip.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: kern_descrip.c,v 1.188 2019/07/03 14:32:02 visa Exp $ */
+/* $OpenBSD: kern_descrip.c,v 1.189 2019/07/10 16:43:19 anton Exp $ */
/* $NetBSD: kern_descrip.c,v 1.42 1996/03/30 22:24:38 christos Exp $ */
/*
@@ -532,12 +532,14 @@ restart:
ktrflock(p, &fl);
#endif
if (fl.l_whence == SEEK_CUR) {
+ off_t offset = foffset_get(fp);
+
if (fl.l_start == 0 && fl.l_len < 0) {
/* lockf(3) compliance hack */
fl.l_len = -fl.l_len;
- fl.l_start = fp->f_offset - fl.l_len;
+ fl.l_start = offset - fl.l_len;
} else
- fl.l_start += fp->f_offset;
+ fl.l_start += offset;
}
switch (fl.l_type) {
@@ -602,12 +604,14 @@ restart:
if (error)
break;
if (fl.l_whence == SEEK_CUR) {
+ off_t offset = foffset_get(fp);
+
if (fl.l_start == 0 && fl.l_len < 0) {
/* lockf(3) compliance hack */
fl.l_len = -fl.l_len;
- fl.l_start = fp->f_offset - fl.l_len;
+ fl.l_start = offset - fl.l_len;
} else
- fl.l_start += fp->f_offset;
+ fl.l_start += offset;
}
if (fl.l_type != F_RDLCK &&
fl.l_type != F_WRLCK &&
@@ -1277,6 +1281,73 @@ fdrop(struct file *fp, struct proc *p)
}
/*
+ * Get the file offset without keeping the same offset locked upon return.
+ */
+off_t
+foffset_get(struct file *fp)
+{
+ off_t offset;
+
+ mtx_enter(&fp->f_mtx);
+ offset = fp->f_offset;
+ mtx_leave(&fp->f_mtx);
+ return (offset);
+}
+
+/*
+ * Acquire an exclusive lock of the file offset. The calling thread must call
+ * foffset_leave() once done.
+ */
+off_t
+foffset_enter(struct file *fp)
+{
+ off_t offset;
+
+ mtx_enter(&fp->f_mtx);
+
+ while (fp->f_olock & FOL_LOCKED) {
+ KASSERT((fp->f_olock & FOL_NWAIT) < FOL_NWAIT);
+ fp->f_olock++;
+ msleep(&fp->f_olock, &fp->f_mtx, PLOCK, "foffset", 0);
+ KASSERT((fp->f_olock & FOL_NWAIT) > 0);
+ fp->f_olock--;
+ }
+ fp->f_olock |= FOL_LOCKED;
+
+ offset = fp->f_offset;
+
+ mtx_leave(&fp->f_mtx);
+
+ return (offset);
+}
+
+/*
+ * Write a new file offset and release the lock. The calling thread must already
+ * have acquired the lock using foffset_enter().
+ * If FO_NOUPDATE is present in flags, only the lock is released and the offset
+ * remains unmodified.
+ */
+void
+foffset_leave(struct file *fp, off_t offset, int flags)
+{
+ unsigned int nwait;
+
+ mtx_enter(&fp->f_mtx);
+
+ KASSERT(fp->f_olock & FOL_LOCKED);
+
+ if ((flags & FO_NOUPDATE) == 0)
+ fp->f_offset = offset;
+ nwait = fp->f_olock & FOL_NWAIT;
+ fp->f_olock &= ~FOL_LOCKED;
+
+ mtx_leave(&fp->f_mtx);
+
+ if (nwait > 0)
+ wakeup_one(&fp->f_olock);
+}
+
+/*
* Apply an advisory lock on a file descriptor.
*
* Just attempt to get a record lock of the requested type on