summaryrefslogtreecommitdiff
path: root/sys
diff options
context:
space:
mode:
Diffstat (limited to 'sys')
-rw-r--r--sys/kern/uipc_socket.c82
-rw-r--r--sys/kern/uipc_socket2.c128
-rw-r--r--sys/kern/uipc_syscalls.c69
-rw-r--r--sys/kern/uipc_usrreq.c235
-rw-r--r--sys/miscfs/fifofs/fifo_vnops.c10
-rw-r--r--sys/sys/socketvar.h20
-rw-r--r--sys/sys/unpcb.h25
7 files changed, 453 insertions, 116 deletions
diff --git a/sys/kern/uipc_socket.c b/sys/kern/uipc_socket.c
index fc2421e9ca1..aa856abc56a 100644
--- a/sys/kern/uipc_socket.c
+++ b/sys/kern/uipc_socket.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: uipc_socket.c,v 1.278 2022/06/06 14:45:41 claudio Exp $ */
+/* $OpenBSD: uipc_socket.c,v 1.279 2022/07/01 09:56:17 mvs Exp $ */
/* $NetBSD: uipc_socket.c,v 1.21 1996/02/04 02:17:52 christos Exp $ */
/*
@@ -52,6 +52,7 @@
#include <sys/atomic.h>
#include <sys/rwlock.h>
#include <sys/time.h>
+#include <sys/refcnt.h>
#ifdef DDB
#include <machine/db_machdep.h>
@@ -146,7 +147,9 @@ soalloc(int prflags)
so = pool_get(&socket_pool, prflags);
if (so == NULL)
return (NULL);
- rw_init(&so->so_lock, "solock");
+ rw_init_flags(&so->so_lock, "solock", RWL_DUPOK);
+ refcnt_init(&so->so_refcnt);
+
return (so);
}
@@ -247,6 +250,8 @@ solisten(struct socket *so, int backlog)
void
sofree(struct socket *so, int keep_lock)
{
+ int persocket = solock_persocket(so);
+
soassertlocked(so);
if (so->so_pcb || (so->so_state & SS_NOFDREF) == 0) {
@@ -255,17 +260,54 @@ sofree(struct socket *so, int keep_lock)
return;
}
if (so->so_head) {
+ struct socket *head = so->so_head;
+
/*
* We must not decommission a socket that's on the accept(2)
* queue. If we do, then accept(2) may hang after select(2)
* indicated that the listening socket was ready.
*/
- if (!soqremque(so, 0)) {
+ if (so->so_onq == &head->so_q) {
if (!keep_lock)
sounlock(so);
return;
}
+
+ if (persocket) {
+ /*
+ * Concurrent close of `head' could
+ * abort `so' due to re-lock.
+ */
+ soref(so);
+ soref(head);
+ sounlock(so);
+ solock(head);
+ solock(so);
+
+ if (so->so_onq != &head->so_q0) {
+ sounlock(head);
+ sounlock(so);
+ sorele(head);
+ sorele(so);
+ return;
+ }
+
+ sorele(head);
+ sorele(so);
+ }
+
+ soqremque(so, 0);
+
+ if (persocket)
+ sounlock(head);
}
+
+ if (persocket) {
+ sounlock(so);
+ refcnt_finalize(&so->so_refcnt, "sofinal");
+ solock(so);
+ }
+
sigio_free(&so->so_sigio);
klist_free(&so->so_rcv.sb_sel.si_note);
klist_free(&so->so_snd.sb_sel.si_note);
@@ -356,13 +398,36 @@ drop:
error = error2;
}
if (so->so_options & SO_ACCEPTCONN) {
+ int persocket = solock_persocket(so);
+
+ if (persocket) {
+ /* Wait concurrent sonewconn() threads. */
+ while (so->so_newconn > 0) {
+ so->so_state |= SS_NEWCONN_WAIT;
+ sosleep_nsec(so, &so->so_newconn, PSOCK,
+ "netlck", INFSLP);
+ }
+ }
+
while ((so2 = TAILQ_FIRST(&so->so_q0)) != NULL) {
+ if (persocket)
+ solock(so2);
(void) soqremque(so2, 0);
+ if (persocket)
+ sounlock(so);
(void) soabort(so2);
+ if (persocket)
+ solock(so);
}
while ((so2 = TAILQ_FIRST(&so->so_q)) != NULL) {
+ if (persocket)
+ solock(so2);
(void) soqremque(so2, 1);
+ if (persocket)
+ sounlock(so);
(void) soabort(so2);
+ if (persocket)
+ solock(so);
}
}
discard:
@@ -430,11 +495,18 @@ soconnect(struct socket *so, struct mbuf *nam)
int
soconnect2(struct socket *so1, struct socket *so2)
{
- int error;
+ int persocket, error;
+
+ if ((persocket = solock_persocket(so1)))
+ solock_pair(so1, so2);
+ else
+ solock(so1);
- solock(so1);
error = (*so1->so_proto->pr_usrreq)(so1, PRU_CONNECT2, NULL,
(struct mbuf *)so2, NULL, curproc);
+
+ if (persocket)
+ sounlock(so2);
sounlock(so1);
return (error);
}
diff --git a/sys/kern/uipc_socket2.c b/sys/kern/uipc_socket2.c
index 6b0b36e3150..e3327e14531 100644
--- a/sys/kern/uipc_socket2.c
+++ b/sys/kern/uipc_socket2.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: uipc_socket2.c,v 1.124 2022/06/26 05:20:42 visa Exp $ */
+/* $OpenBSD: uipc_socket2.c,v 1.125 2022/07/01 09:56:17 mvs Exp $ */
/* $NetBSD: uipc_socket2.c,v 1.11 1996/02/04 02:17:55 christos Exp $ */
/*
@@ -53,8 +53,6 @@ u_long sb_max = SB_MAX; /* patchable */
extern struct pool mclpools[];
extern struct pool mbpool;
-extern struct rwlock unp_lock;
-
/*
* Procedures to manipulate state flags of socket
* and do appropriate wakeups. Normal sequence from the
@@ -101,10 +99,37 @@ soisconnected(struct socket *so)
soassertlocked(so);
so->so_state &= ~(SS_ISCONNECTING|SS_ISDISCONNECTING);
so->so_state |= SS_ISCONNECTED;
- if (head && soqremque(so, 0)) {
+
+ if (head != NULL && so->so_onq == &head->so_q0) {
+ int persocket = solock_persocket(so);
+
+ if (persocket) {
+ soref(so);
+ soref(head);
+
+ sounlock(so);
+ solock(head);
+ solock(so);
+
+ if (so->so_onq != &head->so_q0) {
+ sounlock(head);
+ sorele(head);
+ sorele(so);
+
+ return;
+ }
+
+ sorele(head);
+ sorele(so);
+ }
+
+ soqremque(so, 0);
soqinsque(head, so, 1);
sorwakeup(head);
wakeup_one(&head->so_timeo);
+
+ if (persocket)
+ sounlock(head);
} else {
wakeup(&so->so_timeo);
sorwakeup(so);
@@ -146,7 +171,8 @@ struct socket *
sonewconn(struct socket *head, int connstatus)
{
struct socket *so;
- int soqueue = connstatus ? 1 : 0;
+ int persocket = solock_persocket(head);
+ int error;
/*
* XXXSMP as long as `so' and `head' share the same lock, we
@@ -175,9 +201,17 @@ sonewconn(struct socket *head, int connstatus)
so->so_cpid = head->so_cpid;
/*
+ * Lock order will be `head' -> `so' while these sockets are linked.
+ */
+ if (persocket)
+ solock(so);
+
+ /*
* Inherit watermarks but those may get clamped in low mem situations.
*/
if (soreserve(so, head->so_snd.sb_hiwat, head->so_rcv.sb_hiwat)) {
+ if (persocket)
+ sounlock(so);
pool_put(&socket_pool, so);
return (NULL);
}
@@ -193,20 +227,54 @@ sonewconn(struct socket *head, int connstatus)
sigio_init(&so->so_sigio);
sigio_copy(&so->so_sigio, &head->so_sigio);
- soqinsque(head, so, soqueue);
- if ((*so->so_proto->pr_attach)(so, 0)) {
- (void) soqremque(so, soqueue);
+ soqinsque(head, so, 0);
+
+ /*
+ * We need to unlock `head' because PCB layer could release
+ * solock() to enforce desired lock order.
+ */
+ if (persocket) {
+ head->so_newconn++;
+ sounlock(head);
+ }
+
+ error = (*so->so_proto->pr_attach)(so, 0);
+
+ if (persocket) {
+ sounlock(so);
+ solock(head);
+ solock(so);
+
+ if ((head->so_newconn--) == 0) {
+ if ((head->so_state & SS_NEWCONN_WAIT) != 0) {
+ head->so_state &= ~SS_NEWCONN_WAIT;
+ wakeup(&head->so_newconn);
+ }
+ }
+ }
+
+ if (error) {
+ soqremque(so, 0);
+ if (persocket)
+ sounlock(so);
sigio_free(&so->so_sigio);
klist_free(&so->so_rcv.sb_sel.si_note);
klist_free(&so->so_snd.sb_sel.si_note);
pool_put(&socket_pool, so);
return (NULL);
}
+
if (connstatus) {
+ so->so_state |= connstatus;
+ soqremque(so, 0);
+ soqinsque(head, so, 1);
sorwakeup(head);
wakeup(&head->so_timeo);
- so->so_state |= connstatus;
}
+
+ if (persocket)
+ sounlock(so);
+
return (so);
}
@@ -214,6 +282,7 @@ void
soqinsque(struct socket *head, struct socket *so, int q)
{
soassertlocked(head);
+ soassertlocked(so);
KASSERT(so->so_onq == NULL);
@@ -233,6 +302,7 @@ soqremque(struct socket *so, int q)
{
struct socket *head = so->so_head;
+ soassertlocked(so);
soassertlocked(head);
if (q == 0) {
@@ -284,15 +354,40 @@ solock(struct socket *so)
case PF_INET6:
NET_LOCK();
break;
- case PF_UNIX:
- rw_enter_write(&unp_lock);
- break;
default:
rw_enter_write(&so->so_lock);
break;
}
}
+int
+solock_persocket(struct socket *so)
+{
+ switch (so->so_proto->pr_domain->dom_family) {
+ case PF_INET:
+ case PF_INET6:
+ return 0;
+ default:
+ return 1;
+ }
+}
+
+void
+solock_pair(struct socket *so1, struct socket *so2)
+{
+ KASSERT(so1 != so2);
+ KASSERT(so1->so_type == so2->so_type);
+ KASSERT(solock_persocket(so1));
+
+ if (so1 < so2) {
+ solock(so1);
+ solock(so2);
+ } else {
+ solock(so2);
+ solock(so1);
+ }
+}
+
void
sounlock(struct socket *so)
{
@@ -301,9 +396,6 @@ sounlock(struct socket *so)
case PF_INET6:
NET_UNLOCK();
break;
- case PF_UNIX:
- rw_exit_write(&unp_lock);
- break;
default:
rw_exit_write(&so->so_lock);
break;
@@ -318,9 +410,6 @@ soassertlocked(struct socket *so)
case PF_INET6:
NET_ASSERT_LOCKED();
break;
- case PF_UNIX:
- rw_assert_wrlock(&unp_lock);
- break;
default:
rw_assert_wrlock(&so->so_lock);
break;
@@ -338,9 +427,6 @@ sosleep_nsec(struct socket *so, void *ident, int prio, const char *wmesg,
case PF_INET6:
ret = rwsleep_nsec(ident, &netlock, prio, wmesg, nsecs);
break;
- case PF_UNIX:
- ret = rwsleep_nsec(ident, &unp_lock, prio, wmesg, nsecs);
- break;
default:
ret = rwsleep_nsec(ident, &so->so_lock, prio, wmesg, nsecs);
break;
diff --git a/sys/kern/uipc_syscalls.c b/sys/kern/uipc_syscalls.c
index a2590014ae9..c99ee6116be 100644
--- a/sys/kern/uipc_syscalls.c
+++ b/sys/kern/uipc_syscalls.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: uipc_syscalls.c,v 1.195 2022/06/06 14:45:41 claudio Exp $ */
+/* $OpenBSD: uipc_syscalls.c,v 1.196 2022/07/01 09:56:17 mvs Exp $ */
/* $NetBSD: uipc_syscalls.c,v 1.19 1996/02/09 19:00:48 christos Exp $ */
/*
@@ -246,7 +246,7 @@ doaccept(struct proc *p, int sock, struct sockaddr *name, socklen_t *anamelen,
socklen_t namelen;
int error, tmpfd;
struct socket *head, *so;
- int cloexec, nflag;
+ int cloexec, nflag, persocket;
cloexec = (flags & SOCK_CLOEXEC) ? UF_EXCLOSE : 0;
@@ -269,16 +269,19 @@ doaccept(struct proc *p, int sock, struct sockaddr *name, socklen_t *anamelen,
head = headfp->f_data;
solock(head);
+
+ persocket = solock_persocket(head);
+
if (isdnssocket(head) || (head->so_options & SO_ACCEPTCONN) == 0) {
error = EINVAL;
- goto out;
+ goto out_unlock;
}
if ((headfp->f_flag & FNONBLOCK) && head->so_qlen == 0) {
if (head->so_state & SS_CANTRCVMORE)
error = ECONNABORTED;
else
error = EWOULDBLOCK;
- goto out;
+ goto out_unlock;
}
while (head->so_qlen == 0 && head->so_error == 0) {
if (head->so_state & SS_CANTRCVMORE) {
@@ -288,18 +291,22 @@ doaccept(struct proc *p, int sock, struct sockaddr *name, socklen_t *anamelen,
error = sosleep_nsec(head, &head->so_timeo, PSOCK | PCATCH,
"netcon", INFSLP);
if (error)
- goto out;
+ goto out_unlock;
}
if (head->so_error) {
error = head->so_error;
head->so_error = 0;
- goto out;
+ goto out_unlock;
}
/*
* Do not sleep after we have taken the socket out of the queue.
*/
so = TAILQ_FIRST(&head->so_q);
+
+ if (persocket)
+ solock(so);
+
if (soqremque(so, 1) == 0)
panic("accept");
@@ -310,30 +317,52 @@ doaccept(struct proc *p, int sock, struct sockaddr *name, socklen_t *anamelen,
/* connection has been removed from the listen queue */
KNOTE(&head->so_rcv.sb_sel.si_note, 0);
+ if (persocket)
+ sounlock(head);
+
fp->f_type = DTYPE_SOCKET;
fp->f_flag = FREAD | FWRITE | nflag;
fp->f_ops = &socketops;
fp->f_data = so;
+
error = soaccept(so, nam);
-out:
- sounlock(head);
- if (!error && name != NULL)
+
+ if (persocket)
+ sounlock(so);
+ else
+ sounlock(head);
+
+ if (error)
+ goto out;
+
+ if (name != NULL) {
error = copyaddrout(p, nam, name, namelen, anamelen);
- if (!error) {
- fdplock(fdp);
- fdinsert(fdp, tmpfd, cloexec, fp);
- fdpunlock(fdp);
- FRELE(fp, p);
- *retval = tmpfd;
- } else {
- fdplock(fdp);
- fdremove(fdp, tmpfd);
- fdpunlock(fdp);
- closef(fp, p);
+ if (error)
+ goto out;
}
+ fdplock(fdp);
+ fdinsert(fdp, tmpfd, cloexec, fp);
+ fdpunlock(fdp);
+ FRELE(fp, p);
+ *retval = tmpfd;
+
m_freem(nam);
FRELE(headfp, p);
+
+ return 0;
+
+out_unlock:
+ sounlock(head);
+out:
+ fdplock(fdp);
+ fdremove(fdp, tmpfd);
+ fdpunlock(fdp);
+ closef(fp, p);
+
+ m_freem(nam);
+ FRELE(headfp, p);
+
return (error);
}
diff --git a/sys/kern/uipc_usrreq.c b/sys/kern/uipc_usrreq.c
index 372a473a75c..0710393d376 100644
--- a/sys/kern/uipc_usrreq.c
+++ b/sys/kern/uipc_usrreq.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: uipc_usrreq.c,v 1.165 2022/06/06 14:45:41 claudio Exp $ */
+/* $OpenBSD: uipc_usrreq.c,v 1.166 2022/07/01 09:56:17 mvs Exp $ */
/* $NetBSD: uipc_usrreq.c,v 1.18 1996/02/09 19:00:50 christos Exp $ */
/*
@@ -55,6 +55,7 @@
#include <sys/mutex.h>
#include <sys/sysctl.h>
#include <sys/lock.h>
+#include <sys/refcnt.h>
#include "kcov.h"
#if NKCOV > 0
@@ -66,9 +67,10 @@
* I immutable after creation
* D unp_df_lock
* G unp_gc_lock
- * U unp_lock
+ * M unp_ino_mtx
* R unp_rights_mtx
* a atomic
+ * s socket lock
*/
struct rwlock unp_lock = RWLOCK_INITIALIZER("unplock");
@@ -76,6 +78,7 @@ struct rwlock unp_df_lock = RWLOCK_INITIALIZER("unpdflk");
struct rwlock unp_gc_lock = RWLOCK_INITIALIZER("unpgclk");
struct mutex unp_rights_mtx = MUTEX_INITIALIZER(IPL_SOFTNET);
+struct mutex unp_ino_mtx = MUTEX_INITIALIZER(IPL_SOFTNET);
/*
* Stack of sets of files that were passed over a socket but were
@@ -94,6 +97,9 @@ void unp_remove_gcrefs(struct fdpass *, int);
void unp_restore_gcrefs(struct fdpass *, int);
void unp_scan(struct mbuf *, void (*)(struct fdpass *, int));
int unp_nam2sun(struct mbuf *, struct sockaddr_un **, size_t *);
+static inline void unp_ref(struct unpcb *);
+static inline void unp_rele(struct unpcb *);
+struct socket *unp_solock_peer(struct socket *);
struct pool unpcb_pool;
struct task unp_gc_task = TASK_INITIALIZER(unp_gc, NULL);
@@ -127,6 +133,53 @@ unp_init(void)
IPL_SOFTNET, 0, "unpcb", NULL);
}
+static inline void
+unp_ref(struct unpcb *unp)
+{
+ refcnt_take(&unp->unp_refcnt);
+}
+
+static inline void
+unp_rele(struct unpcb *unp)
+{
+ refcnt_rele_wake(&unp->unp_refcnt);
+}
+
+struct socket *
+unp_solock_peer(struct socket *so)
+{
+ struct unpcb *unp, *unp2;
+ struct socket *so2;
+
+ unp = so->so_pcb;
+
+again:
+ if ((unp2 = unp->unp_conn) == NULL)
+ return NULL;
+
+ so2 = unp2->unp_socket;
+
+ if (so < so2)
+ solock(so2);
+ else if (so > so2){
+ unp_ref(unp2);
+ sounlock(so);
+ solock(so2);
+ solock(so);
+
+ /* Datagram socket could be reconnected due to re-lock. */
+ if (unp->unp_conn != unp2) {
+ sounlock(so2);
+ unp_rele(unp2);
+ goto again;
+ }
+
+ unp_rele(unp2);
+ }
+
+ return so2;
+}
+
void
uipc_setaddr(const struct unpcb *unp, struct mbuf *nam)
{
@@ -201,7 +254,10 @@ uipc_usrreq(struct socket *so, int req, struct mbuf *m, struct mbuf *nam,
* if it was bound and we are still connected
* (our peer may have closed already!).
*/
+ so2 = unp_solock_peer(so);
uipc_setaddr(unp->unp_conn, nam);
+ if (so2 != NULL && so2 != so)
+ sounlock(so2);
break;
case PRU_SHUTDOWN:
@@ -218,9 +274,8 @@ uipc_usrreq(struct socket *so, int req, struct mbuf *m, struct mbuf *nam,
case SOCK_STREAM:
case SOCK_SEQPACKET:
- if (unp->unp_conn == NULL)
+ if ((so2 = unp_solock_peer(so)) == NULL)
break;
- so2 = unp->unp_conn->unp_socket;
/*
* Adjust backpressure on sender
* and wakeup any waiting to write.
@@ -228,6 +283,7 @@ uipc_usrreq(struct socket *so, int req, struct mbuf *m, struct mbuf *nam,
so2->so_snd.sb_mbcnt = so->so_rcv.sb_mbcnt;
so2->so_snd.sb_cc = so->so_rcv.sb_cc;
sowwakeup(so2);
+ sounlock(so2);
break;
default:
@@ -256,13 +312,16 @@ uipc_usrreq(struct socket *so, int req, struct mbuf *m, struct mbuf *nam,
error = unp_connect(so, nam, p);
if (error)
break;
- } else {
- if (unp->unp_conn == NULL) {
+ }
+
+ if ((so2 = unp_solock_peer(so)) == NULL) {
+ if (nam != NULL)
+ error = ECONNREFUSED;
+ else
error = ENOTCONN;
- break;
- }
+ break;
}
- so2 = unp->unp_conn->unp_socket;
+
if (unp->unp_addr)
from = mtod(unp->unp_addr, struct sockaddr *);
else
@@ -273,6 +332,10 @@ uipc_usrreq(struct socket *so, int req, struct mbuf *m, struct mbuf *nam,
control = NULL;
} else
error = ENOBUFS;
+
+ if (so2 != so)
+ sounlock(so2);
+
if (nam)
unp_disconnect(unp);
break;
@@ -284,11 +347,11 @@ uipc_usrreq(struct socket *so, int req, struct mbuf *m, struct mbuf *nam,
error = EPIPE;
break;
}
- if (unp->unp_conn == NULL) {
+ if ((so2 = unp_solock_peer(so)) == NULL) {
error = ENOTCONN;
break;
}
- so2 = unp->unp_conn->unp_socket;
+
/*
* Send to paired receive port, and then raise
* send buffer counts to maintain backpressure.
@@ -310,6 +373,8 @@ uipc_usrreq(struct socket *so, int req, struct mbuf *m, struct mbuf *nam,
so->so_snd.sb_cc = so2->so_rcv.sb_cc;
if (so2->so_rcv.sb_cc > 0)
sorwakeup(so2);
+
+ sounlock(so2);
m = NULL;
break;
@@ -323,12 +388,7 @@ uipc_usrreq(struct socket *so, int req, struct mbuf *m, struct mbuf *nam,
case PRU_ABORT:
unp_detach(unp);
- /*
- * As long as `unp_lock' is taken before entering
- * uipc_usrreq() releasing it here would lead to a
- * double unlock.
- */
- sofree(so, 1);
+ sofree(so, 0);
break;
case PRU_SENSE: {
@@ -336,8 +396,10 @@ uipc_usrreq(struct socket *so, int req, struct mbuf *m, struct mbuf *nam,
sb->st_blksize = so->so_snd.sb_hiwat;
sb->st_dev = NODEV;
+ mtx_enter(&unp_ino_mtx);
if (unp->unp_ino == 0)
unp->unp_ino = unp_ino++;
+ mtx_leave(&unp_ino_mtx);
sb->st_atim.tv_sec =
sb->st_mtim.tv_sec =
sb->st_ctim.tv_sec = unp->unp_ctime.tv_sec;
@@ -358,7 +420,10 @@ uipc_usrreq(struct socket *so, int req, struct mbuf *m, struct mbuf *nam,
break;
case PRU_PEERADDR:
+ so2 = unp_solock_peer(so);
uipc_setaddr(unp->unp_conn, nam);
+ if (so2 != NULL && so2 != so)
+ sounlock(so2);
break;
case PRU_SLOWTIMO:
@@ -410,8 +475,6 @@ uipc_attach(struct socket *so, int proto)
struct unpcb *unp;
int error;
- rw_assert_wrlock(&unp_lock);
-
if (so->so_pcb)
return EISCONN;
if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
@@ -438,6 +501,7 @@ uipc_attach(struct socket *so, int proto)
unp = pool_get(&unpcb_pool, PR_NOWAIT|PR_ZERO);
if (unp == NULL)
return (ENOBUFS);
+ refcnt_init(&unp->unp_refcnt);
unp->unp_socket = so;
so->so_pcb = unp;
getnanotime(&unp->unp_ctime);
@@ -445,12 +509,6 @@ uipc_attach(struct socket *so, int proto)
/*
* Enforce `unp_gc_lock' -> `solock()' lock order.
*/
- /*
- * We also release the lock on listening socket and on our peer
- * socket when called from unp_connect(). This is safe. The
- * listening socket protected by vnode(9) lock. The peer socket
- * has 'UNP_CONNECTING' flag set.
- */
sounlock(so);
rw_enter_write(&unp_gc_lock);
LIST_INSERT_HEAD(&unp_head, unp, unp_link);
@@ -512,14 +570,13 @@ unp_detach(struct unpcb *unp)
{
struct socket *so = unp->unp_socket;
struct vnode *vp = unp->unp_vnode;
-
- rw_assert_wrlock(&unp_lock);
+ struct unpcb *unp2;
unp->unp_vnode = NULL;
/*
* Enforce `unp_gc_lock' -> `solock()' lock order.
- * Enforce `i_lock' -> `unp_lock' lock order.
+ * Enforce `i_lock' -> `solock()' lock order.
*/
sounlock(so);
@@ -538,10 +595,47 @@ unp_detach(struct unpcb *unp)
solock(so);
- if (unp->unp_conn)
+ if (unp->unp_conn != NULL) {
+ /*
+ * Datagram socket could be connected to itself.
+ * Such socket will be disconnected here.
+ */
unp_disconnect(unp);
- while (!SLIST_EMPTY(&unp->unp_refs))
- unp_drop(SLIST_FIRST(&unp->unp_refs), ECONNRESET);
+ }
+
+ while ((unp2 = SLIST_FIRST(&unp->unp_refs)) != NULL) {
+ struct socket *so2 = unp2->unp_socket;
+
+ if (so < so2)
+ solock(so2);
+ else {
+ unp_ref(unp2);
+ sounlock(so);
+ solock(so2);
+ solock(so);
+
+ if (unp2->unp_conn != unp) {
+ /* `unp2' was disconnected due to re-lock. */
+ sounlock(so2);
+ unp_rele(unp2);
+ continue;
+ }
+
+ unp_rele(unp2);
+ }
+
+ unp2->unp_conn = NULL;
+ SLIST_REMOVE(&unp->unp_refs, unp2, unpcb, unp_nextref);
+ so2->so_error = ECONNRESET;
+ so2->so_state &= ~SS_ISCONNECTED;
+
+ sounlock(so2);
+ }
+
+ sounlock(so);
+ refcnt_finalize(&unp->unp_refcnt, "unpfinal");
+ solock(so);
+
soisdisconnected(so);
so->so_pcb = NULL;
m_freem(unp->unp_addr);
@@ -681,24 +775,42 @@ unp_connect(struct socket *so, struct mbuf *nam, struct proc *p)
}
if ((error = VOP_ACCESS(vp, VWRITE, p->p_ucred, p)) != 0)
goto put;
- solock(so);
so2 = vp->v_socket;
if (so2 == NULL) {
error = ECONNREFUSED;
- goto put_locked;
+ goto put;
}
if (so->so_type != so2->so_type) {
error = EPROTOTYPE;
- goto put_locked;
+ goto put;
}
+
if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
+ solock(so2);
+
if ((so2->so_options & SO_ACCEPTCONN) == 0 ||
(so3 = sonewconn(so2, 0)) == NULL) {
error = ECONNREFUSED;
- goto put_locked;
}
+
+ sounlock(so2);
+
+ if (error != 0)
+ goto put;
+
+ /*
+ * Since `so2' is protected by vnode(9) lock, `so3'
+ * can't be PRU_ABORT'ed here.
+ */
+ solock_pair(so, so3);
+
unp2 = sotounpcb(so2);
unp3 = sotounpcb(so3);
+
+ /*
+ * `unp_addr', `unp_connid' and 'UNP_FEIDSBIND' flag
+ * are immutable since we set them in unp_bind().
+ */
if (unp2->unp_addr)
unp3->unp_addr =
m_copym(unp2->unp_addr, 0, M_COPYALL, M_NOWAIT);
@@ -706,15 +818,29 @@ unp_connect(struct socket *so, struct mbuf *nam, struct proc *p)
unp3->unp_connid.gid = p->p_ucred->cr_gid;
unp3->unp_connid.pid = p->p_p->ps_pid;
unp3->unp_flags |= UNP_FEIDS;
- so2 = so3;
+
if (unp2->unp_flags & UNP_FEIDSBIND) {
unp->unp_connid = unp2->unp_connid;
unp->unp_flags |= UNP_FEIDS;
}
+
+ so2 = so3;
+ } else {
+ if (so2 != so)
+ solock_pair(so, so2);
+ else
+ solock(so);
}
+
error = unp_connect2(so, so2);
-put_locked:
+
sounlock(so);
+
+ /*
+ * `so2' can't be PRU_ABORT'ed concurrently
+ */
+ if (so2 != so)
+ sounlock(so2);
put:
vput(vp);
unlock:
@@ -738,7 +864,8 @@ unp_connect2(struct socket *so, struct socket *so2)
struct unpcb *unp = sotounpcb(so);
struct unpcb *unp2;
- rw_assert_wrlock(&unp_lock);
+ soassertlocked(so);
+ soassertlocked(so2);
if (so2->so_type != so->so_type)
return (EPROTOTYPE);
@@ -767,11 +894,15 @@ unp_connect2(struct socket *so, struct socket *so2)
void
unp_disconnect(struct unpcb *unp)
{
- struct unpcb *unp2 = unp->unp_conn;
+ struct socket *so2;
+ struct unpcb *unp2;
- if (unp2 == NULL)
+ if ((so2 = unp_solock_peer(unp->unp_socket)) == NULL)
return;
+
+ unp2 = unp->unp_conn;
unp->unp_conn = NULL;
+
switch (unp->unp_socket->so_type) {
case SOCK_DGRAM:
@@ -790,35 +921,31 @@ unp_disconnect(struct unpcb *unp)
soisdisconnected(unp2->unp_socket);
break;
}
+
+ if (so2 != unp->unp_socket)
+ sounlock(so2);
}
void
unp_shutdown(struct unpcb *unp)
{
- struct socket *so;
+ struct socket *so2;
switch (unp->unp_socket->so_type) {
case SOCK_STREAM:
case SOCK_SEQPACKET:
- if (unp->unp_conn && (so = unp->unp_conn->unp_socket))
- socantrcvmore(so);
+ if ((so2 = unp_solock_peer(unp->unp_socket)) == NULL)
+ break;
+
+ socantrcvmore(so2);
+ sounlock(so2);
+
break;
default:
break;
}
}
-void
-unp_drop(struct unpcb *unp, int errno)
-{
- struct socket *so = unp->unp_socket;
-
- rw_assert_wrlock(&unp_lock);
-
- so->so_error = errno;
- unp_disconnect(unp);
-}
-
#ifdef notdef
unp_drain(void)
{
diff --git a/sys/miscfs/fifofs/fifo_vnops.c b/sys/miscfs/fifofs/fifo_vnops.c
index ff62072ccdb..d4f5df4d2c7 100644
--- a/sys/miscfs/fifofs/fifo_vnops.c
+++ b/sys/miscfs/fifofs/fifo_vnops.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: fifo_vnops.c,v 1.95 2022/06/26 05:20:42 visa Exp $ */
+/* $OpenBSD: fifo_vnops.c,v 1.96 2022/07/01 09:56:17 mvs Exp $ */
/* $NetBSD: fifo_vnops.c,v 1.18 1996/03/16 23:52:42 christos Exp $ */
/*
@@ -176,15 +176,17 @@ fifo_open(void *v)
solock(wso);
wso->so_state |= SS_CANTSENDMORE;
wso->so_snd.sb_lowat = PIPE_BUF;
+ sounlock(wso);
} else {
rso = fip->fi_readsock;
wso = fip->fi_writesock;
- solock(wso);
}
if (ap->a_mode & FREAD) {
fip->fi_readers++;
if (fip->fi_readers == 1) {
+ solock(wso);
wso->so_state &= ~SS_CANTSENDMORE;
+ sounlock(wso);
if (fip->fi_writers > 0)
wakeup(&fip->fi_writers);
}
@@ -193,16 +195,16 @@ fifo_open(void *v)
fip->fi_writers++;
if ((ap->a_mode & O_NONBLOCK) && fip->fi_readers == 0) {
error = ENXIO;
- sounlock(wso);
goto bad;
}
if (fip->fi_writers == 1) {
+ solock(rso);
rso->so_state &= ~(SS_CANTRCVMORE|SS_ISDISCONNECTED);
+ sounlock(rso);
if (fip->fi_readers > 0)
wakeup(&fip->fi_readers);
}
}
- sounlock(wso);
if ((ap->a_mode & O_NONBLOCK) == 0) {
if ((ap->a_mode & FREAD) && fip->fi_writers == 0) {
VOP_UNLOCK(vp);
diff --git a/sys/sys/socketvar.h b/sys/sys/socketvar.h
index 7e899aee63b..f82c1b0ac11 100644
--- a/sys/sys/socketvar.h
+++ b/sys/sys/socketvar.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: socketvar.h,v 1.104 2022/06/26 05:20:42 visa Exp $ */
+/* $OpenBSD: socketvar.h,v 1.105 2022/07/01 09:56:17 mvs Exp $ */
/* $NetBSD: socketvar.h,v 1.18 1996/02/09 18:25:38 christos Exp $ */
/*-
@@ -38,6 +38,7 @@
#include <sys/task.h>
#include <sys/timeout.h>
#include <sys/rwlock.h>
+#include <sys/refcnt.h>
#ifndef _SOCKLEN_T_DEFINED_
#define _SOCKLEN_T_DEFINED_
@@ -55,6 +56,7 @@ TAILQ_HEAD(soqhead, socket);
struct socket {
const struct protosw *so_proto; /* protocol handle */
struct rwlock so_lock; /* this socket lock */
+ struct refcnt so_refcnt; /* references to this socket */
void *so_pcb; /* protocol control block */
u_int so_state; /* internal state flags SS_*, below */
short so_type; /* generic type, see socket.h */
@@ -80,6 +82,7 @@ struct socket {
short so_q0len; /* partials on so_q0 */
short so_qlen; /* number of connections on so_q */
short so_qlimit; /* max number queued connections */
+ u_long so_newconn; /* # of pending sonewconn() threads */
short so_timeo; /* connection timeout */
u_long so_oobmark; /* chars to oob mark */
u_int so_error; /* error affecting connection */
@@ -149,6 +152,7 @@ struct socket {
#define SS_CONNECTOUT 0x1000 /* connect, not accept, at this end */
#define SS_ISSENDING 0x2000 /* hint for lower layer */
#define SS_DNS 0x4000 /* created using SOCK_DNS socket(2) */
+#define SS_NEWCONN_WAIT 0x8000 /* waiting sonewconn() relock */
#ifdef _KERNEL
@@ -156,6 +160,18 @@ struct socket {
void soassertlocked(struct socket *);
+static inline void
+soref(struct socket *so)
+{
+ refcnt_take(&so->so_refcnt);
+}
+
+static inline void
+sorele(struct socket *so)
+{
+ refcnt_rele_wake(&so->so_refcnt);
+}
+
/*
* Macros for sockets and socket buffering.
*/
@@ -329,6 +345,8 @@ int sockargs(struct mbuf **, const void *, size_t, int);
int sosleep_nsec(struct socket *, void *, int, const char *, uint64_t);
void solock(struct socket *);
+int solock_persocket(struct socket *);
+void solock_pair(struct socket *, struct socket *);
void sounlock(struct socket *);
int sendit(struct proc *, int, struct msghdr *, int, register_t *);
diff --git a/sys/sys/unpcb.h b/sys/sys/unpcb.h
index b3641bde092..54d083b6103 100644
--- a/sys/sys/unpcb.h
+++ b/sys/sys/unpcb.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: unpcb.h,v 1.25 2022/02/25 23:51:04 guenther Exp $ */
+/* $OpenBSD: unpcb.h,v 1.26 2022/07/01 09:56:17 mvs Exp $ */
/* $NetBSD: unpcb.h,v 1.6 1994/06/29 06:46:08 cgd Exp $ */
/*
@@ -32,6 +32,8 @@
* @(#)unpcb.h 8.1 (Berkeley) 6/2/93
*/
+#include <sys/refcnt.h>
+
/*
* Protocol control block for an active
* instance of a UNIX internal protocol.
@@ -60,24 +62,26 @@
* Locks used to protect struct members:
* I immutable after creation
* G unp_gc_lock
- * U unp_lock
+ * s socket lock
*/
struct unpcb {
+ struct refcnt unp_refcnt; /* references to this pcb */
struct socket *unp_socket; /* [I] pointer back to socket */
- struct vnode *unp_vnode; /* [U] if associated with file */
+ struct vnode *unp_vnode; /* [s] if associated with file */
struct file *unp_file; /* [G] backpointer for unp_gc() */
- struct unpcb *unp_conn; /* [U] control block of connected socket */
- ino_t unp_ino; /* [U] fake inode number */
- SLIST_HEAD(,unpcb) unp_refs; /* [U] referencing socket linked list */
- SLIST_ENTRY(unpcb) unp_nextref; /* [U] link in unp_refs list */
- struct mbuf *unp_addr; /* [U] bound address of socket */
+ struct unpcb *unp_conn; /* [s] control block of connected
+ socket */
+ ino_t unp_ino; /* [s] fake inode number */
+ SLIST_HEAD(,unpcb) unp_refs; /* [s] referencing socket linked list */
+ SLIST_ENTRY(unpcb) unp_nextref; /* [s] link in unp_refs list */
+ struct mbuf *unp_addr; /* [s] bound address of socket */
long unp_msgcount; /* [G] references from socket rcv buf */
long unp_gcrefs; /* [G] references from gc */
- int unp_flags; /* [U] this unpcb contains peer eids */
+ int unp_flags; /* [s] this unpcb contains peer eids */
int unp_gcflags; /* [G] garbage collector flags */
- struct sockpeercred unp_connid;/* [U] id of peer process */
+ struct sockpeercred unp_connid;/* [s] id of peer process */
struct timespec unp_ctime; /* [I] holds creation time */
LIST_ENTRY(unpcb) unp_link; /* [G] link in per-AF list of sockets */
};
@@ -114,7 +118,6 @@ int unp_connect(struct socket *, struct mbuf *, struct proc *);
int unp_connect2(struct socket *, struct socket *);
void unp_detach(struct unpcb *);
void unp_disconnect(struct unpcb *);
-void unp_drop(struct unpcb *, int);
void unp_gc(void *);
void unp_shutdown(struct unpcb *);
int unp_externalize(struct mbuf *, socklen_t, int);