diff options
-rw-r--r-- | sys/kern/uipc_socket.c | 12 | ||||
-rw-r--r-- | sys/kern/uipc_socket2.c | 10 | ||||
-rw-r--r-- | sys/kern/uipc_syscalls.c | 6 | ||||
-rw-r--r-- | sys/netinet/in_pcb.h | 34 | ||||
-rw-r--r-- | sys/sys/protosw.h | 8 |
5 files changed, 57 insertions, 13 deletions
diff --git a/sys/kern/uipc_socket.c b/sys/kern/uipc_socket.c index 6c2f93cda67..7a3062cd87f 100644 --- a/sys/kern/uipc_socket.c +++ b/sys/kern/uipc_socket.c @@ -1,4 +1,4 @@ -/* $OpenBSD: uipc_socket.c,v 1.309 2023/08/08 22:07:25 mvs Exp $ */ +/* $OpenBSD: uipc_socket.c,v 1.310 2023/12/18 13:11:20 bluhm Exp $ */ /* $NetBSD: uipc_socket.c,v 1.21 1996/02/04 02:17:52 christos Exp $ */ /* @@ -832,8 +832,10 @@ bad: *mp = NULL; solock_shared(so); + pru_lock(so); restart: if ((error = sblock(so, &so->so_rcv, SBLOCKWAIT(flags))) != 0) { + pru_unlock(so); sounlock_shared(so); return (error); } @@ -900,11 +902,13 @@ restart: SBLASTRECORDCHK(&so->so_rcv, "soreceive sbwait 1"); SBLASTMBUFCHK(&so->so_rcv, "soreceive sbwait 1"); sbunlock(so, &so->so_rcv); + pru_unlock(so); error = sbwait(so, &so->so_rcv); if (error) { sounlock_shared(so); return (error); } + pru_lock(so); goto restart; } dontblock: @@ -971,11 +975,13 @@ dontblock: sbsync(&so->so_rcv, nextrecord); if (controlp) { if (pr->pr_domain->dom_externalize) { + pru_unlock(so); sounlock_shared(so); error = (*pr->pr_domain->dom_externalize) (cm, controllen, flags); solock_shared(so); + pru_lock(so); } *controlp = cm; } else { @@ -1049,9 +1055,11 @@ dontblock: SBLASTRECORDCHK(&so->so_rcv, "soreceive uiomove"); SBLASTMBUFCHK(&so->so_rcv, "soreceive uiomove"); resid = uio->uio_resid; + pru_unlock(so); sounlock_shared(so); uio_error = uiomove(mtod(m, caddr_t) + moff, len, uio); solock_shared(so); + pru_lock(so); if (uio_error) uio->uio_resid = resid - len; } else @@ -1136,6 +1144,7 @@ dontblock: error = sbwait(so, &so->so_rcv); if (error) { sbunlock(so, &so->so_rcv); + pru_unlock(so); sounlock_shared(so); return (0); } @@ -1182,6 +1191,7 @@ dontblock: *flagsp |= flags; release: sbunlock(so, &so->so_rcv); + pru_unlock(so); sounlock_shared(so); return (error); } diff --git a/sys/kern/uipc_socket2.c b/sys/kern/uipc_socket2.c index f21e0e20ab8..18f7746f611 100644 --- a/sys/kern/uipc_socket2.c +++ b/sys/kern/uipc_socket2.c @@ -1,4 +1,4 @@ -/* $OpenBSD: uipc_socket2.c,v 1.138 2023/10/30 13:27:53 bluhm Exp $ */ +/* $OpenBSD: uipc_socket2.c,v 1.139 2023/12/18 13:11:20 bluhm Exp $ */ /* $NetBSD: uipc_socket2.c,v 1.11 1996/02/04 02:17:55 christos Exp $ */ /* @@ -368,7 +368,7 @@ solock_shared(struct socket *so) case PF_INET6: if (so->so_proto->pr_usrreqs->pru_lock != NULL) { NET_LOCK_SHARED(); - pru_lock(so); + rw_enter_write(&so->so_lock); } else NET_LOCK(); break; @@ -427,7 +427,7 @@ sounlock_shared(struct socket *so) case PF_INET: case PF_INET6: if (so->so_proto->pr_usrreqs->pru_unlock != NULL) { - pru_unlock(so); + rw_exit_write(&so->so_lock); NET_UNLOCK_SHARED(); } else NET_UNLOCK(); @@ -463,12 +463,12 @@ sosleep_nsec(struct socket *so, void *ident, int prio, const char *wmesg, case PF_INET6: if (so->so_proto->pr_usrreqs->pru_unlock != NULL && rw_status(&netlock) == RW_READ) { - pru_unlock(so); + rw_exit_write(&so->so_lock); } ret = rwsleep_nsec(ident, &netlock, prio, wmesg, nsecs); if (so->so_proto->pr_usrreqs->pru_lock != NULL && rw_status(&netlock) == RW_READ) { - pru_lock(so); + rw_enter_write(&so->so_lock); } break; default: diff --git a/sys/kern/uipc_syscalls.c b/sys/kern/uipc_syscalls.c index 2919c1c9686..0a58664adca 100644 --- a/sys/kern/uipc_syscalls.c +++ b/sys/kern/uipc_syscalls.c @@ -1,4 +1,4 @@ -/* $OpenBSD: uipc_syscalls.c,v 1.214 2023/09/23 09:17:21 jan Exp $ */ +/* $OpenBSD: uipc_syscalls.c,v 1.215 2023/12/18 13:11:20 bluhm Exp $ */ /* $NetBSD: uipc_syscalls.c,v 1.19 1996/02/09 19:00:48 christos Exp $ */ /* @@ -185,9 +185,9 @@ sys_bind(struct proc *p, void *v, register_t *retval) if (KTRPOINT(p, KTR_STRUCT)) ktrsockaddr(p, mtod(nam, caddr_t), SCARG(uap, namelen)); #endif - solock(so); + solock_shared(so); error = sobind(so, nam, p); - sounlock(so); + sounlock_shared(so); m_freem(nam); out: FRELE(fp, p); diff --git a/sys/netinet/in_pcb.h b/sys/netinet/in_pcb.h index b618a2e804d..16d1ce324c4 100644 --- a/sys/netinet/in_pcb.h +++ b/sys/netinet/in_pcb.h @@ -1,4 +1,4 @@ -/* $OpenBSD: in_pcb.h,v 1.144 2023/12/15 00:24:56 bluhm Exp $ */ +/* $OpenBSD: in_pcb.h,v 1.145 2023/12/18 13:11:20 bluhm Exp $ */ /* $NetBSD: in_pcb.h,v 1.14 1996/02/13 23:42:00 christos Exp $ */ /* @@ -84,6 +84,38 @@ * p inpcb_mtx pcb mutex */ +/* + * The pcb table mutex guarantees that all inpcb are consistent and + * that bind(2) and connect(2) create unique combinations of + * laddr/faddr/lport/fport/rtalbleid. This mutex is used to protect + * both address consistency and inpcb lookup during protocol input. + * All writes to inp_[lf]addr take table mutex. A per socket lock is + * needed, so that socket layer input have a consistent view at these + * values. + * + * In soconnect() and sosend() pcb mutex cannot be used. They eventually + * can call IP output which takes pf lock which is a sleeping lock. + * Also connect(2) does a route lookup for source selection. There + * route resolve happens, which creates a route, which sends a route + * message, which needs route lock, which is a rw-lock. + * + * On the other hand a mutex should be used in protocol input. It + * does not make sense to do a process switch per packet. Better spin + * until the packet can be processed. + * + * So there are three locks. Table mutex is for writing inp_[lf]addr/port + * and lookup, socket rw-lock to separate sockets in system calls, and + * pcb mutex to protect socket receive buffer. Changing inp_[lf]addr/port + * takes both per socket rw-lock and global table mutex. Protocol + * input only reads inp_[lf]addr/port during lookup and is safe. System + * call only reads when holding socket rw-lock and is safe. The socket + * layer needs pcb mutex only in soreceive(). + * + * Function pru_lock() grabs the pcb mutex and its existence indicates + * that a protocol is MP safe. Otherwise the exclusive net lock is + * used. + */ + struct pf_state_key; union inpaddru { diff --git a/sys/sys/protosw.h b/sys/sys/protosw.h index bf17e7b78a8..78b439b7b88 100644 --- a/sys/sys/protosw.h +++ b/sys/sys/protosw.h @@ -1,4 +1,4 @@ -/* $OpenBSD: protosw.h,v 1.62 2023/05/18 09:59:44 mvs Exp $ */ +/* $OpenBSD: protosw.h,v 1.63 2023/12/18 13:11:20 bluhm Exp $ */ /* $NetBSD: protosw.h,v 1.10 1996/04/09 20:55:32 cgd Exp $ */ /*- @@ -284,13 +284,15 @@ pru_detach(struct socket *so) static inline void pru_lock(struct socket *so) { - (*so->so_proto->pr_usrreqs->pru_lock)(so); + if (so->so_proto->pr_usrreqs->pru_lock) + (*so->so_proto->pr_usrreqs->pru_lock)(so); } static inline void pru_unlock(struct socket *so) { - (*so->so_proto->pr_usrreqs->pru_unlock)(so); + if (so->so_proto->pr_usrreqs->pru_unlock) + (*so->so_proto->pr_usrreqs->pru_unlock)(so); } static inline int |