summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--sys/kern/uipc_socket.c12
-rw-r--r--sys/kern/uipc_socket2.c10
-rw-r--r--sys/kern/uipc_syscalls.c6
-rw-r--r--sys/netinet/in_pcb.h34
-rw-r--r--sys/sys/protosw.h8
5 files changed, 57 insertions, 13 deletions
diff --git a/sys/kern/uipc_socket.c b/sys/kern/uipc_socket.c
index 6c2f93cda67..7a3062cd87f 100644
--- a/sys/kern/uipc_socket.c
+++ b/sys/kern/uipc_socket.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: uipc_socket.c,v 1.309 2023/08/08 22:07:25 mvs Exp $ */
+/* $OpenBSD: uipc_socket.c,v 1.310 2023/12/18 13:11:20 bluhm Exp $ */
/* $NetBSD: uipc_socket.c,v 1.21 1996/02/04 02:17:52 christos Exp $ */
/*
@@ -832,8 +832,10 @@ bad:
*mp = NULL;
solock_shared(so);
+ pru_lock(so);
restart:
if ((error = sblock(so, &so->so_rcv, SBLOCKWAIT(flags))) != 0) {
+ pru_unlock(so);
sounlock_shared(so);
return (error);
}
@@ -900,11 +902,13 @@ restart:
SBLASTRECORDCHK(&so->so_rcv, "soreceive sbwait 1");
SBLASTMBUFCHK(&so->so_rcv, "soreceive sbwait 1");
sbunlock(so, &so->so_rcv);
+ pru_unlock(so);
error = sbwait(so, &so->so_rcv);
if (error) {
sounlock_shared(so);
return (error);
}
+ pru_lock(so);
goto restart;
}
dontblock:
@@ -971,11 +975,13 @@ dontblock:
sbsync(&so->so_rcv, nextrecord);
if (controlp) {
if (pr->pr_domain->dom_externalize) {
+ pru_unlock(so);
sounlock_shared(so);
error =
(*pr->pr_domain->dom_externalize)
(cm, controllen, flags);
solock_shared(so);
+ pru_lock(so);
}
*controlp = cm;
} else {
@@ -1049,9 +1055,11 @@ dontblock:
SBLASTRECORDCHK(&so->so_rcv, "soreceive uiomove");
SBLASTMBUFCHK(&so->so_rcv, "soreceive uiomove");
resid = uio->uio_resid;
+ pru_unlock(so);
sounlock_shared(so);
uio_error = uiomove(mtod(m, caddr_t) + moff, len, uio);
solock_shared(so);
+ pru_lock(so);
if (uio_error)
uio->uio_resid = resid - len;
} else
@@ -1136,6 +1144,7 @@ dontblock:
error = sbwait(so, &so->so_rcv);
if (error) {
sbunlock(so, &so->so_rcv);
+ pru_unlock(so);
sounlock_shared(so);
return (0);
}
@@ -1182,6 +1191,7 @@ dontblock:
*flagsp |= flags;
release:
sbunlock(so, &so->so_rcv);
+ pru_unlock(so);
sounlock_shared(so);
return (error);
}
diff --git a/sys/kern/uipc_socket2.c b/sys/kern/uipc_socket2.c
index f21e0e20ab8..18f7746f611 100644
--- a/sys/kern/uipc_socket2.c
+++ b/sys/kern/uipc_socket2.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: uipc_socket2.c,v 1.138 2023/10/30 13:27:53 bluhm Exp $ */
+/* $OpenBSD: uipc_socket2.c,v 1.139 2023/12/18 13:11:20 bluhm Exp $ */
/* $NetBSD: uipc_socket2.c,v 1.11 1996/02/04 02:17:55 christos Exp $ */
/*
@@ -368,7 +368,7 @@ solock_shared(struct socket *so)
case PF_INET6:
if (so->so_proto->pr_usrreqs->pru_lock != NULL) {
NET_LOCK_SHARED();
- pru_lock(so);
+ rw_enter_write(&so->so_lock);
} else
NET_LOCK();
break;
@@ -427,7 +427,7 @@ sounlock_shared(struct socket *so)
case PF_INET:
case PF_INET6:
if (so->so_proto->pr_usrreqs->pru_unlock != NULL) {
- pru_unlock(so);
+ rw_exit_write(&so->so_lock);
NET_UNLOCK_SHARED();
} else
NET_UNLOCK();
@@ -463,12 +463,12 @@ sosleep_nsec(struct socket *so, void *ident, int prio, const char *wmesg,
case PF_INET6:
if (so->so_proto->pr_usrreqs->pru_unlock != NULL &&
rw_status(&netlock) == RW_READ) {
- pru_unlock(so);
+ rw_exit_write(&so->so_lock);
}
ret = rwsleep_nsec(ident, &netlock, prio, wmesg, nsecs);
if (so->so_proto->pr_usrreqs->pru_lock != NULL &&
rw_status(&netlock) == RW_READ) {
- pru_lock(so);
+ rw_enter_write(&so->so_lock);
}
break;
default:
diff --git a/sys/kern/uipc_syscalls.c b/sys/kern/uipc_syscalls.c
index 2919c1c9686..0a58664adca 100644
--- a/sys/kern/uipc_syscalls.c
+++ b/sys/kern/uipc_syscalls.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: uipc_syscalls.c,v 1.214 2023/09/23 09:17:21 jan Exp $ */
+/* $OpenBSD: uipc_syscalls.c,v 1.215 2023/12/18 13:11:20 bluhm Exp $ */
/* $NetBSD: uipc_syscalls.c,v 1.19 1996/02/09 19:00:48 christos Exp $ */
/*
@@ -185,9 +185,9 @@ sys_bind(struct proc *p, void *v, register_t *retval)
if (KTRPOINT(p, KTR_STRUCT))
ktrsockaddr(p, mtod(nam, caddr_t), SCARG(uap, namelen));
#endif
- solock(so);
+ solock_shared(so);
error = sobind(so, nam, p);
- sounlock(so);
+ sounlock_shared(so);
m_freem(nam);
out:
FRELE(fp, p);
diff --git a/sys/netinet/in_pcb.h b/sys/netinet/in_pcb.h
index b618a2e804d..16d1ce324c4 100644
--- a/sys/netinet/in_pcb.h
+++ b/sys/netinet/in_pcb.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: in_pcb.h,v 1.144 2023/12/15 00:24:56 bluhm Exp $ */
+/* $OpenBSD: in_pcb.h,v 1.145 2023/12/18 13:11:20 bluhm Exp $ */
/* $NetBSD: in_pcb.h,v 1.14 1996/02/13 23:42:00 christos Exp $ */
/*
@@ -84,6 +84,38 @@
* p inpcb_mtx pcb mutex
*/
+/*
+ * The pcb table mutex guarantees that all inpcb are consistent and
+ * that bind(2) and connect(2) create unique combinations of
+ * laddr/faddr/lport/fport/rtalbleid. This mutex is used to protect
+ * both address consistency and inpcb lookup during protocol input.
+ * All writes to inp_[lf]addr take table mutex. A per socket lock is
+ * needed, so that socket layer input have a consistent view at these
+ * values.
+ *
+ * In soconnect() and sosend() pcb mutex cannot be used. They eventually
+ * can call IP output which takes pf lock which is a sleeping lock.
+ * Also connect(2) does a route lookup for source selection. There
+ * route resolve happens, which creates a route, which sends a route
+ * message, which needs route lock, which is a rw-lock.
+ *
+ * On the other hand a mutex should be used in protocol input. It
+ * does not make sense to do a process switch per packet. Better spin
+ * until the packet can be processed.
+ *
+ * So there are three locks. Table mutex is for writing inp_[lf]addr/port
+ * and lookup, socket rw-lock to separate sockets in system calls, and
+ * pcb mutex to protect socket receive buffer. Changing inp_[lf]addr/port
+ * takes both per socket rw-lock and global table mutex. Protocol
+ * input only reads inp_[lf]addr/port during lookup and is safe. System
+ * call only reads when holding socket rw-lock and is safe. The socket
+ * layer needs pcb mutex only in soreceive().
+ *
+ * Function pru_lock() grabs the pcb mutex and its existence indicates
+ * that a protocol is MP safe. Otherwise the exclusive net lock is
+ * used.
+ */
+
struct pf_state_key;
union inpaddru {
diff --git a/sys/sys/protosw.h b/sys/sys/protosw.h
index bf17e7b78a8..78b439b7b88 100644
--- a/sys/sys/protosw.h
+++ b/sys/sys/protosw.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: protosw.h,v 1.62 2023/05/18 09:59:44 mvs Exp $ */
+/* $OpenBSD: protosw.h,v 1.63 2023/12/18 13:11:20 bluhm Exp $ */
/* $NetBSD: protosw.h,v 1.10 1996/04/09 20:55:32 cgd Exp $ */
/*-
@@ -284,13 +284,15 @@ pru_detach(struct socket *so)
static inline void
pru_lock(struct socket *so)
{
- (*so->so_proto->pr_usrreqs->pru_lock)(so);
+ if (so->so_proto->pr_usrreqs->pru_lock)
+ (*so->so_proto->pr_usrreqs->pru_lock)(so);
}
static inline void
pru_unlock(struct socket *so)
{
- (*so->so_proto->pr_usrreqs->pru_unlock)(so);
+ if (so->so_proto->pr_usrreqs->pru_unlock)
+ (*so->so_proto->pr_usrreqs->pru_unlock)(so);
}
static inline int