From 3271eaf6457d553ced53a5fd2858085b46ee8285 Mon Sep 17 00:00:00 2001 From: Sebastien Marie Date: Sat, 22 Jun 2019 06:49:15 +0000 Subject: push the KERNEL_LOCK deeper on read(2) and write(2) unlocks read(2) and write(2) syscalls families, and push the KERNEL_LOCK deeper in the code path. KERNEL_LOCK is managed per file type in fileops handlers (fo_read, fo_write, and fo_close). read(2) and write(2) on socket are KERNEL_LOCK-free. initial work from mpi@ and ians@ ok mpi@ kettenis@ visa@ ians@ --- sys/kern/sys_generic.c | 7 +++++-- sys/kern/sys_pipe.c | 17 +++++++++++++---- sys/kern/syscalls.master | 18 +++++++++--------- sys/kern/vfs_vnops.c | 22 +++++++++++++++++----- sys/sys/file.h | 9 ++++++++- 5 files changed, 52 insertions(+), 21 deletions(-) (limited to 'sys') diff --git a/sys/kern/sys_generic.c b/sys/kern/sys_generic.c index 5fb4cf04258..f72ca9fd3cb 100644 --- a/sys/kern/sys_generic.c +++ b/sys/kern/sys_generic.c @@ -1,4 +1,4 @@ -/* $OpenBSD: sys_generic.c,v 1.124 2019/06/21 09:39:48 visa Exp $ */ +/* $OpenBSD: sys_generic.c,v 1.125 2019/06/22 06:48:25 semarie Exp $ */ /* $NetBSD: sys_generic.c,v 1.24 1996/03/29 00:25:32 cgd Exp $ */ /* @@ -366,8 +366,11 @@ dofilewritev(struct proc *p, int fd, struct uio *uio, int flags, if (uio->uio_resid != cnt && (error == ERESTART || error == EINTR || error == EWOULDBLOCK)) error = 0; - if (error == EPIPE) + if (error == EPIPE) { + KERNEL_LOCK(); ptsignal(p, SIGPIPE, STHREAD); + KERNEL_UNLOCK(); + } } cnt -= uio->uio_resid; diff --git a/sys/kern/sys_pipe.c b/sys/kern/sys_pipe.c index 71522053130..2d2c2210e5e 100644 --- a/sys/kern/sys_pipe.c +++ b/sys/kern/sys_pipe.c @@ -1,4 +1,4 @@ -/* $OpenBSD: sys_pipe.c,v 1.87 2018/11/13 13:02:20 visa Exp $ */ +/* $OpenBSD: sys_pipe.c,v 1.88 2019/06/22 06:48:25 semarie Exp $ */ /* * Copyright (c) 1996 John S. Dyson @@ -314,9 +314,11 @@ pipe_read(struct file *fp, struct uio *uio, int fflags) int error; size_t size, nread = 0; + KERNEL_LOCK(); + error = pipelock(rpipe); if (error) - return (error); + goto done; ++rpipe->pipe_busy; @@ -420,6 +422,8 @@ unlocked_error: if ((rpipe->pipe_buffer.size - rpipe->pipe_buffer.cnt) >= PIPE_BUF) pipeselwakeup(rpipe); +done: + KERNEL_UNLOCK(); return (error); } @@ -430,6 +434,8 @@ pipe_write(struct file *fp, struct uio *uio, int fflags) size_t orig_resid; struct pipe *wpipe, *rpipe; + KERNEL_LOCK(); + rpipe = fp->f_data; wpipe = rpipe->pipe_peer; @@ -437,7 +443,8 @@ pipe_write(struct file *fp, struct uio *uio, int fflags) * detect loss of pipe read side, issue SIGPIPE if lost. */ if ((wpipe == NULL) || (wpipe->pipe_state & PIPE_EOF)) { - return (EPIPE); + error = EPIPE; + goto done; } ++wpipe->pipe_busy; @@ -471,7 +478,7 @@ pipe_write(struct file *fp, struct uio *uio, int fflags) wpipe->pipe_state &= ~(PIPE_WANT | PIPE_WANTR); wakeup(wpipe); } - return (error); + goto done; } orig_resid = uio->uio_resid; @@ -642,6 +649,8 @@ retrywrite: if (wpipe->pipe_buffer.cnt) pipeselwakeup(wpipe); +done: + KERNEL_UNLOCK(); return (error); } diff --git a/sys/kern/syscalls.master b/sys/kern/syscalls.master index 8c6ea62e801..d7439af23cf 100644 --- a/sys/kern/syscalls.master +++ b/sys/kern/syscalls.master @@ -1,4 +1,4 @@ -; $OpenBSD: syscalls.master,v 1.191 2019/05/13 22:55:27 beck Exp $ +; $OpenBSD: syscalls.master,v 1.192 2019/06/22 06:49:14 semarie Exp $ ; $NetBSD: syscalls.master,v 1.32 1996/04/23 10:24:21 mycroft Exp $ ; @(#)syscalls.master 8.2 (Berkeley) 1/13/94 @@ -51,8 +51,8 @@ 0 INDIR { int sys_syscall(int number, ...); } 1 STD { void sys_exit(int rval); } 2 STD { int sys_fork(void); } -3 STD { ssize_t sys_read(int fd, void *buf, size_t nbyte); } -4 STD { ssize_t sys_write(int fd, const void *buf, \ +3 STD NOLOCK { ssize_t sys_read(int fd, void *buf, size_t nbyte); } +4 STD NOLOCK { ssize_t sys_write(int fd, const void *buf, \ size_t nbyte); } 5 STD { int sys_open(const char *path, \ int flags, ... mode_t mode); } @@ -247,9 +247,9 @@ 118 STD { int sys_getsockopt(int s, int level, int name, \ void *val, socklen_t *avalsize); } 119 STD { int sys_thrkill(pid_t tid, int signum, void *tcb); } -120 STD { ssize_t sys_readv(int fd, \ +120 STD NOLOCK { ssize_t sys_readv(int fd, \ const struct iovec *iovp, int iovcnt); } -121 STD { ssize_t sys_writev(int fd, \ +121 STD NOLOCK { ssize_t sys_writev(int fd, \ const struct iovec *iovp, int iovcnt); } 122 STD { int sys_kill(int pid, int signum); } 123 STD { int sys_fchown(int fd, uid_t uid, gid_t gid); } @@ -316,9 +316,9 @@ 170 OBSOL msgsys10 171 OBSOL shmsys10 172 UNIMPL -173 STD { ssize_t sys_pread(int fd, void *buf, \ +173 STD NOLOCK { ssize_t sys_pread(int fd, void *buf, \ size_t nbyte, int pad, off_t offset); } -174 STD { ssize_t sys_pwrite(int fd, const void *buf, \ +174 STD NOLOCK { ssize_t sys_pwrite(int fd, const void *buf, \ size_t nbyte, int pad, off_t offset); } 175 UNIMPL ntp_gettime 176 UNIMPL ntp_adjtime @@ -452,10 +452,10 @@ 264 STD { int sys_fhopen(const fhandle_t *fhp, int flags); } 265 UNIMPL 266 UNIMPL -267 STD { ssize_t sys_preadv(int fd, \ +267 STD NOLOCK { ssize_t sys_preadv(int fd, \ const struct iovec *iovp, int iovcnt, \ int pad, off_t offset); } -268 STD { ssize_t sys_pwritev(int fd, \ +268 STD NOLOCK { ssize_t sys_pwritev(int fd, \ const struct iovec *iovp, int iovcnt, \ int pad, off_t offset); } 269 STD { int sys_kqueue(void); } diff --git a/sys/kern/vfs_vnops.c b/sys/kern/vfs_vnops.c index c02f8f0cba1..520126602c1 100644 --- a/sys/kern/vfs_vnops.c +++ b/sys/kern/vfs_vnops.c @@ -1,4 +1,4 @@ -/* $OpenBSD: vfs_vnops.c,v 1.98 2019/06/21 09:39:48 visa Exp $ */ +/* $OpenBSD: vfs_vnops.c,v 1.99 2019/06/22 06:48:25 semarie Exp $ */ /* $NetBSD: vfs_vnops.c,v 1.20 1996/02/04 02:18:41 christos Exp $ */ /* @@ -343,6 +343,8 @@ vn_read(struct file *fp, struct uio *uio, int fflags) off_t offset; int error; + KERNEL_LOCK(); + /* * Check below can race. We can block on the vnode lock * and resume with a different `fp->f_offset' value. @@ -353,11 +355,15 @@ vn_read(struct file *fp, struct uio *uio, int fflags) offset = uio->uio_offset; /* no wrap around of offsets except on character devices */ - if (vp->v_type != VCHR && count > LLONG_MAX - offset) - return (EINVAL); + if (vp->v_type != VCHR && count > LLONG_MAX - offset) { + error = EINVAL; + goto done; + } - if (vp->v_type == VDIR) - return (EISDIR); + if (vp->v_type == VDIR) { + error = EISDIR; + goto done; + } vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); if ((fflags & FO_POSITION) == 0) @@ -367,6 +373,8 @@ vn_read(struct file *fp, struct uio *uio, int fflags) if ((fflags & FO_POSITION) == 0) fp->f_offset += count - uio->uio_resid; VOP_UNLOCK(vp); +done: + KERNEL_UNLOCK(); return (error); } @@ -381,6 +389,8 @@ vn_write(struct file *fp, struct uio *uio, int fflags) int error, ioflag = IO_UNIT; size_t count; + KERNEL_LOCK(); + /* note: pwrite/pwritev are unaffected by O_APPEND */ if (vp->v_type == VREG && (fp->f_flag & O_APPEND) && (fflags & FO_POSITION) == 0) @@ -402,6 +412,8 @@ vn_write(struct file *fp, struct uio *uio, int fflags) fp->f_offset += count - uio->uio_resid; } VOP_UNLOCK(vp); + + KERNEL_UNLOCK(); return (error); } diff --git a/sys/sys/file.h b/sys/sys/file.h index b2a1f95e898..0054c6df76b 100644 --- a/sys/sys/file.h +++ b/sys/sys/file.h @@ -1,4 +1,4 @@ -/* $OpenBSD: file.h,v 1.53 2018/08/20 16:00:22 mpi Exp $ */ +/* $OpenBSD: file.h,v 1.54 2019/06/22 06:48:25 semarie Exp $ */ /* $NetBSD: file.h,v 1.11 1995/03/26 20:24:13 jtc Exp $ */ /* @@ -46,6 +46,13 @@ struct stat; struct file; struct ucred; +/** + * File operations. + * The following entries could be called without KERNEL_LOCK hold: + * - fo_read + * - fo_write + * - fo_close + */ struct fileops { int (*fo_read)(struct file *, struct uio *, int); int (*fo_write)(struct file *, struct uio *, int); -- cgit v1.2.3