summaryrefslogtreecommitdiff
path: root/sys/kern/uipc_socket.c
diff options
context:
space:
mode:
authorVitaliy Makkoveev <mvs@cvs.openbsd.org>2022-07-01 09:56:18 +0000
committerVitaliy Makkoveev <mvs@cvs.openbsd.org>2022-07-01 09:56:18 +0000
commit330cd6e874a18fa481c8b57a0e30f3850f24e39c (patch)
tree66960465c1d51317668f36763e40af70a3d95ff7 /sys/kern/uipc_socket.c
parent02a36ca46695152cacd5ccf441f740f65785e838 (diff)
Make fine grained unix(4) domain sockets locking. Use the per-socket
`so_lock' rwlock(9) instead of global `unp_lock' which locks the whole layer. The PCB of unix(4) sockets are linked to each other and we need to lock them both. This introduces the lock ordering problem, because when the thread (1) keeps lock on `so1' and trying to lock `so2', the thread (2) could hold lock on `so2' and trying to lock `so1'. To solve this we always lock sockets in the strict order. For the sockets which are already accessible from userland, we always lock socket with the smallest memory address first. Sometimes we need to unlock socket before lock it's peer and lock it again. We use reference counters for prevent the connected peer destruction during to relock. We also handle the case where the peer socket was replaced by another socket. For the newly connected sockets, which are not yet exported to the userland by accept(2), we always lock the listening socket `head' first. This allows us to avoid unwanted relock within accept(2) syscall. ok claudio@
Diffstat (limited to 'sys/kern/uipc_socket.c')
-rw-r--r--sys/kern/uipc_socket.c82
1 files changed, 77 insertions, 5 deletions
diff --git a/sys/kern/uipc_socket.c b/sys/kern/uipc_socket.c
index fc2421e9ca1..aa856abc56a 100644
--- a/sys/kern/uipc_socket.c
+++ b/sys/kern/uipc_socket.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: uipc_socket.c,v 1.278 2022/06/06 14:45:41 claudio Exp $ */
+/* $OpenBSD: uipc_socket.c,v 1.279 2022/07/01 09:56:17 mvs Exp $ */
/* $NetBSD: uipc_socket.c,v 1.21 1996/02/04 02:17:52 christos Exp $ */
/*
@@ -52,6 +52,7 @@
#include <sys/atomic.h>
#include <sys/rwlock.h>
#include <sys/time.h>
+#include <sys/refcnt.h>
#ifdef DDB
#include <machine/db_machdep.h>
@@ -146,7 +147,9 @@ soalloc(int prflags)
so = pool_get(&socket_pool, prflags);
if (so == NULL)
return (NULL);
- rw_init(&so->so_lock, "solock");
+ rw_init_flags(&so->so_lock, "solock", RWL_DUPOK);
+ refcnt_init(&so->so_refcnt);
+
return (so);
}
@@ -247,6 +250,8 @@ solisten(struct socket *so, int backlog)
void
sofree(struct socket *so, int keep_lock)
{
+ int persocket = solock_persocket(so);
+
soassertlocked(so);
if (so->so_pcb || (so->so_state & SS_NOFDREF) == 0) {
@@ -255,17 +260,54 @@ sofree(struct socket *so, int keep_lock)
return;
}
if (so->so_head) {
+ struct socket *head = so->so_head;
+
/*
* We must not decommission a socket that's on the accept(2)
* queue. If we do, then accept(2) may hang after select(2)
* indicated that the listening socket was ready.
*/
- if (!soqremque(so, 0)) {
+ if (so->so_onq == &head->so_q) {
if (!keep_lock)
sounlock(so);
return;
}
+
+ if (persocket) {
+ /*
+ * Concurrent close of `head' could
+ * abort `so' due to re-lock.
+ */
+ soref(so);
+ soref(head);
+ sounlock(so);
+ solock(head);
+ solock(so);
+
+ if (so->so_onq != &head->so_q0) {
+ sounlock(head);
+ sounlock(so);
+ sorele(head);
+ sorele(so);
+ return;
+ }
+
+ sorele(head);
+ sorele(so);
+ }
+
+ soqremque(so, 0);
+
+ if (persocket)
+ sounlock(head);
}
+
+ if (persocket) {
+ sounlock(so);
+ refcnt_finalize(&so->so_refcnt, "sofinal");
+ solock(so);
+ }
+
sigio_free(&so->so_sigio);
klist_free(&so->so_rcv.sb_sel.si_note);
klist_free(&so->so_snd.sb_sel.si_note);
@@ -356,13 +398,36 @@ drop:
error = error2;
}
if (so->so_options & SO_ACCEPTCONN) {
+ int persocket = solock_persocket(so);
+
+ if (persocket) {
+ /* Wait concurrent sonewconn() threads. */
+ while (so->so_newconn > 0) {
+ so->so_state |= SS_NEWCONN_WAIT;
+ sosleep_nsec(so, &so->so_newconn, PSOCK,
+ "netlck", INFSLP);
+ }
+ }
+
while ((so2 = TAILQ_FIRST(&so->so_q0)) != NULL) {
+ if (persocket)
+ solock(so2);
(void) soqremque(so2, 0);
+ if (persocket)
+ sounlock(so);
(void) soabort(so2);
+ if (persocket)
+ solock(so);
}
while ((so2 = TAILQ_FIRST(&so->so_q)) != NULL) {
+ if (persocket)
+ solock(so2);
(void) soqremque(so2, 1);
+ if (persocket)
+ sounlock(so);
(void) soabort(so2);
+ if (persocket)
+ solock(so);
}
}
discard:
@@ -430,11 +495,18 @@ soconnect(struct socket *so, struct mbuf *nam)
int
soconnect2(struct socket *so1, struct socket *so2)
{
- int error;
+ int persocket, error;
+
+ if ((persocket = solock_persocket(so1)))
+ solock_pair(so1, so2);
+ else
+ solock(so1);
- solock(so1);
error = (*so1->so_proto->pr_usrreq)(so1, PRU_CONNECT2, NULL,
(struct mbuf *)so2, NULL, curproc);
+
+ if (persocket)
+ sounlock(so2);
sounlock(so1);
return (error);
}