diff options
Diffstat (limited to 'sys')
-rw-r--r-- | sys/kern/uipc_socket.c | 82 | ||||
-rw-r--r-- | sys/kern/uipc_socket2.c | 128 | ||||
-rw-r--r-- | sys/kern/uipc_syscalls.c | 69 | ||||
-rw-r--r-- | sys/kern/uipc_usrreq.c | 235 | ||||
-rw-r--r-- | sys/miscfs/fifofs/fifo_vnops.c | 10 | ||||
-rw-r--r-- | sys/sys/socketvar.h | 20 | ||||
-rw-r--r-- | sys/sys/unpcb.h | 25 |
7 files changed, 453 insertions, 116 deletions
diff --git a/sys/kern/uipc_socket.c b/sys/kern/uipc_socket.c index fc2421e9ca1..aa856abc56a 100644 --- a/sys/kern/uipc_socket.c +++ b/sys/kern/uipc_socket.c @@ -1,4 +1,4 @@ -/* $OpenBSD: uipc_socket.c,v 1.278 2022/06/06 14:45:41 claudio Exp $ */ +/* $OpenBSD: uipc_socket.c,v 1.279 2022/07/01 09:56:17 mvs Exp $ */ /* $NetBSD: uipc_socket.c,v 1.21 1996/02/04 02:17:52 christos Exp $ */ /* @@ -52,6 +52,7 @@ #include <sys/atomic.h> #include <sys/rwlock.h> #include <sys/time.h> +#include <sys/refcnt.h> #ifdef DDB #include <machine/db_machdep.h> @@ -146,7 +147,9 @@ soalloc(int prflags) so = pool_get(&socket_pool, prflags); if (so == NULL) return (NULL); - rw_init(&so->so_lock, "solock"); + rw_init_flags(&so->so_lock, "solock", RWL_DUPOK); + refcnt_init(&so->so_refcnt); + return (so); } @@ -247,6 +250,8 @@ solisten(struct socket *so, int backlog) void sofree(struct socket *so, int keep_lock) { + int persocket = solock_persocket(so); + soassertlocked(so); if (so->so_pcb || (so->so_state & SS_NOFDREF) == 0) { @@ -255,17 +260,54 @@ sofree(struct socket *so, int keep_lock) return; } if (so->so_head) { + struct socket *head = so->so_head; + /* * We must not decommission a socket that's on the accept(2) * queue. If we do, then accept(2) may hang after select(2) * indicated that the listening socket was ready. */ - if (!soqremque(so, 0)) { + if (so->so_onq == &head->so_q) { if (!keep_lock) sounlock(so); return; } + + if (persocket) { + /* + * Concurrent close of `head' could + * abort `so' due to re-lock. + */ + soref(so); + soref(head); + sounlock(so); + solock(head); + solock(so); + + if (so->so_onq != &head->so_q0) { + sounlock(head); + sounlock(so); + sorele(head); + sorele(so); + return; + } + + sorele(head); + sorele(so); + } + + soqremque(so, 0); + + if (persocket) + sounlock(head); } + + if (persocket) { + sounlock(so); + refcnt_finalize(&so->so_refcnt, "sofinal"); + solock(so); + } + sigio_free(&so->so_sigio); klist_free(&so->so_rcv.sb_sel.si_note); klist_free(&so->so_snd.sb_sel.si_note); @@ -356,13 +398,36 @@ drop: error = error2; } if (so->so_options & SO_ACCEPTCONN) { + int persocket = solock_persocket(so); + + if (persocket) { + /* Wait concurrent sonewconn() threads. */ + while (so->so_newconn > 0) { + so->so_state |= SS_NEWCONN_WAIT; + sosleep_nsec(so, &so->so_newconn, PSOCK, + "netlck", INFSLP); + } + } + while ((so2 = TAILQ_FIRST(&so->so_q0)) != NULL) { + if (persocket) + solock(so2); (void) soqremque(so2, 0); + if (persocket) + sounlock(so); (void) soabort(so2); + if (persocket) + solock(so); } while ((so2 = TAILQ_FIRST(&so->so_q)) != NULL) { + if (persocket) + solock(so2); (void) soqremque(so2, 1); + if (persocket) + sounlock(so); (void) soabort(so2); + if (persocket) + solock(so); } } discard: @@ -430,11 +495,18 @@ soconnect(struct socket *so, struct mbuf *nam) int soconnect2(struct socket *so1, struct socket *so2) { - int error; + int persocket, error; + + if ((persocket = solock_persocket(so1))) + solock_pair(so1, so2); + else + solock(so1); - solock(so1); error = (*so1->so_proto->pr_usrreq)(so1, PRU_CONNECT2, NULL, (struct mbuf *)so2, NULL, curproc); + + if (persocket) + sounlock(so2); sounlock(so1); return (error); } diff --git a/sys/kern/uipc_socket2.c b/sys/kern/uipc_socket2.c index 6b0b36e3150..e3327e14531 100644 --- a/sys/kern/uipc_socket2.c +++ b/sys/kern/uipc_socket2.c @@ -1,4 +1,4 @@ -/* $OpenBSD: uipc_socket2.c,v 1.124 2022/06/26 05:20:42 visa Exp $ */ +/* $OpenBSD: uipc_socket2.c,v 1.125 2022/07/01 09:56:17 mvs Exp $ */ /* $NetBSD: uipc_socket2.c,v 1.11 1996/02/04 02:17:55 christos Exp $ */ /* @@ -53,8 +53,6 @@ u_long sb_max = SB_MAX; /* patchable */ extern struct pool mclpools[]; extern struct pool mbpool; -extern struct rwlock unp_lock; - /* * Procedures to manipulate state flags of socket * and do appropriate wakeups. Normal sequence from the @@ -101,10 +99,37 @@ soisconnected(struct socket *so) soassertlocked(so); so->so_state &= ~(SS_ISCONNECTING|SS_ISDISCONNECTING); so->so_state |= SS_ISCONNECTED; - if (head && soqremque(so, 0)) { + + if (head != NULL && so->so_onq == &head->so_q0) { + int persocket = solock_persocket(so); + + if (persocket) { + soref(so); + soref(head); + + sounlock(so); + solock(head); + solock(so); + + if (so->so_onq != &head->so_q0) { + sounlock(head); + sorele(head); + sorele(so); + + return; + } + + sorele(head); + sorele(so); + } + + soqremque(so, 0); soqinsque(head, so, 1); sorwakeup(head); wakeup_one(&head->so_timeo); + + if (persocket) + sounlock(head); } else { wakeup(&so->so_timeo); sorwakeup(so); @@ -146,7 +171,8 @@ struct socket * sonewconn(struct socket *head, int connstatus) { struct socket *so; - int soqueue = connstatus ? 1 : 0; + int persocket = solock_persocket(head); + int error; /* * XXXSMP as long as `so' and `head' share the same lock, we @@ -175,9 +201,17 @@ sonewconn(struct socket *head, int connstatus) so->so_cpid = head->so_cpid; /* + * Lock order will be `head' -> `so' while these sockets are linked. + */ + if (persocket) + solock(so); + + /* * Inherit watermarks but those may get clamped in low mem situations. */ if (soreserve(so, head->so_snd.sb_hiwat, head->so_rcv.sb_hiwat)) { + if (persocket) + sounlock(so); pool_put(&socket_pool, so); return (NULL); } @@ -193,20 +227,54 @@ sonewconn(struct socket *head, int connstatus) sigio_init(&so->so_sigio); sigio_copy(&so->so_sigio, &head->so_sigio); - soqinsque(head, so, soqueue); - if ((*so->so_proto->pr_attach)(so, 0)) { - (void) soqremque(so, soqueue); + soqinsque(head, so, 0); + + /* + * We need to unlock `head' because PCB layer could release + * solock() to enforce desired lock order. + */ + if (persocket) { + head->so_newconn++; + sounlock(head); + } + + error = (*so->so_proto->pr_attach)(so, 0); + + if (persocket) { + sounlock(so); + solock(head); + solock(so); + + if ((head->so_newconn--) == 0) { + if ((head->so_state & SS_NEWCONN_WAIT) != 0) { + head->so_state &= ~SS_NEWCONN_WAIT; + wakeup(&head->so_newconn); + } + } + } + + if (error) { + soqremque(so, 0); + if (persocket) + sounlock(so); sigio_free(&so->so_sigio); klist_free(&so->so_rcv.sb_sel.si_note); klist_free(&so->so_snd.sb_sel.si_note); pool_put(&socket_pool, so); return (NULL); } + if (connstatus) { + so->so_state |= connstatus; + soqremque(so, 0); + soqinsque(head, so, 1); sorwakeup(head); wakeup(&head->so_timeo); - so->so_state |= connstatus; } + + if (persocket) + sounlock(so); + return (so); } @@ -214,6 +282,7 @@ void soqinsque(struct socket *head, struct socket *so, int q) { soassertlocked(head); + soassertlocked(so); KASSERT(so->so_onq == NULL); @@ -233,6 +302,7 @@ soqremque(struct socket *so, int q) { struct socket *head = so->so_head; + soassertlocked(so); soassertlocked(head); if (q == 0) { @@ -284,15 +354,40 @@ solock(struct socket *so) case PF_INET6: NET_LOCK(); break; - case PF_UNIX: - rw_enter_write(&unp_lock); - break; default: rw_enter_write(&so->so_lock); break; } } +int +solock_persocket(struct socket *so) +{ + switch (so->so_proto->pr_domain->dom_family) { + case PF_INET: + case PF_INET6: + return 0; + default: + return 1; + } +} + +void +solock_pair(struct socket *so1, struct socket *so2) +{ + KASSERT(so1 != so2); + KASSERT(so1->so_type == so2->so_type); + KASSERT(solock_persocket(so1)); + + if (so1 < so2) { + solock(so1); + solock(so2); + } else { + solock(so2); + solock(so1); + } +} + void sounlock(struct socket *so) { @@ -301,9 +396,6 @@ sounlock(struct socket *so) case PF_INET6: NET_UNLOCK(); break; - case PF_UNIX: - rw_exit_write(&unp_lock); - break; default: rw_exit_write(&so->so_lock); break; @@ -318,9 +410,6 @@ soassertlocked(struct socket *so) case PF_INET6: NET_ASSERT_LOCKED(); break; - case PF_UNIX: - rw_assert_wrlock(&unp_lock); - break; default: rw_assert_wrlock(&so->so_lock); break; @@ -338,9 +427,6 @@ sosleep_nsec(struct socket *so, void *ident, int prio, const char *wmesg, case PF_INET6: ret = rwsleep_nsec(ident, &netlock, prio, wmesg, nsecs); break; - case PF_UNIX: - ret = rwsleep_nsec(ident, &unp_lock, prio, wmesg, nsecs); - break; default: ret = rwsleep_nsec(ident, &so->so_lock, prio, wmesg, nsecs); break; diff --git a/sys/kern/uipc_syscalls.c b/sys/kern/uipc_syscalls.c index a2590014ae9..c99ee6116be 100644 --- a/sys/kern/uipc_syscalls.c +++ b/sys/kern/uipc_syscalls.c @@ -1,4 +1,4 @@ -/* $OpenBSD: uipc_syscalls.c,v 1.195 2022/06/06 14:45:41 claudio Exp $ */ +/* $OpenBSD: uipc_syscalls.c,v 1.196 2022/07/01 09:56:17 mvs Exp $ */ /* $NetBSD: uipc_syscalls.c,v 1.19 1996/02/09 19:00:48 christos Exp $ */ /* @@ -246,7 +246,7 @@ doaccept(struct proc *p, int sock, struct sockaddr *name, socklen_t *anamelen, socklen_t namelen; int error, tmpfd; struct socket *head, *so; - int cloexec, nflag; + int cloexec, nflag, persocket; cloexec = (flags & SOCK_CLOEXEC) ? UF_EXCLOSE : 0; @@ -269,16 +269,19 @@ doaccept(struct proc *p, int sock, struct sockaddr *name, socklen_t *anamelen, head = headfp->f_data; solock(head); + + persocket = solock_persocket(head); + if (isdnssocket(head) || (head->so_options & SO_ACCEPTCONN) == 0) { error = EINVAL; - goto out; + goto out_unlock; } if ((headfp->f_flag & FNONBLOCK) && head->so_qlen == 0) { if (head->so_state & SS_CANTRCVMORE) error = ECONNABORTED; else error = EWOULDBLOCK; - goto out; + goto out_unlock; } while (head->so_qlen == 0 && head->so_error == 0) { if (head->so_state & SS_CANTRCVMORE) { @@ -288,18 +291,22 @@ doaccept(struct proc *p, int sock, struct sockaddr *name, socklen_t *anamelen, error = sosleep_nsec(head, &head->so_timeo, PSOCK | PCATCH, "netcon", INFSLP); if (error) - goto out; + goto out_unlock; } if (head->so_error) { error = head->so_error; head->so_error = 0; - goto out; + goto out_unlock; } /* * Do not sleep after we have taken the socket out of the queue. */ so = TAILQ_FIRST(&head->so_q); + + if (persocket) + solock(so); + if (soqremque(so, 1) == 0) panic("accept"); @@ -310,30 +317,52 @@ doaccept(struct proc *p, int sock, struct sockaddr *name, socklen_t *anamelen, /* connection has been removed from the listen queue */ KNOTE(&head->so_rcv.sb_sel.si_note, 0); + if (persocket) + sounlock(head); + fp->f_type = DTYPE_SOCKET; fp->f_flag = FREAD | FWRITE | nflag; fp->f_ops = &socketops; fp->f_data = so; + error = soaccept(so, nam); -out: - sounlock(head); - if (!error && name != NULL) + + if (persocket) + sounlock(so); + else + sounlock(head); + + if (error) + goto out; + + if (name != NULL) { error = copyaddrout(p, nam, name, namelen, anamelen); - if (!error) { - fdplock(fdp); - fdinsert(fdp, tmpfd, cloexec, fp); - fdpunlock(fdp); - FRELE(fp, p); - *retval = tmpfd; - } else { - fdplock(fdp); - fdremove(fdp, tmpfd); - fdpunlock(fdp); - closef(fp, p); + if (error) + goto out; } + fdplock(fdp); + fdinsert(fdp, tmpfd, cloexec, fp); + fdpunlock(fdp); + FRELE(fp, p); + *retval = tmpfd; + m_freem(nam); FRELE(headfp, p); + + return 0; + +out_unlock: + sounlock(head); +out: + fdplock(fdp); + fdremove(fdp, tmpfd); + fdpunlock(fdp); + closef(fp, p); + + m_freem(nam); + FRELE(headfp, p); + return (error); } diff --git a/sys/kern/uipc_usrreq.c b/sys/kern/uipc_usrreq.c index 372a473a75c..0710393d376 100644 --- a/sys/kern/uipc_usrreq.c +++ b/sys/kern/uipc_usrreq.c @@ -1,4 +1,4 @@ -/* $OpenBSD: uipc_usrreq.c,v 1.165 2022/06/06 14:45:41 claudio Exp $ */ +/* $OpenBSD: uipc_usrreq.c,v 1.166 2022/07/01 09:56:17 mvs Exp $ */ /* $NetBSD: uipc_usrreq.c,v 1.18 1996/02/09 19:00:50 christos Exp $ */ /* @@ -55,6 +55,7 @@ #include <sys/mutex.h> #include <sys/sysctl.h> #include <sys/lock.h> +#include <sys/refcnt.h> #include "kcov.h" #if NKCOV > 0 @@ -66,9 +67,10 @@ * I immutable after creation * D unp_df_lock * G unp_gc_lock - * U unp_lock + * M unp_ino_mtx * R unp_rights_mtx * a atomic + * s socket lock */ struct rwlock unp_lock = RWLOCK_INITIALIZER("unplock"); @@ -76,6 +78,7 @@ struct rwlock unp_df_lock = RWLOCK_INITIALIZER("unpdflk"); struct rwlock unp_gc_lock = RWLOCK_INITIALIZER("unpgclk"); struct mutex unp_rights_mtx = MUTEX_INITIALIZER(IPL_SOFTNET); +struct mutex unp_ino_mtx = MUTEX_INITIALIZER(IPL_SOFTNET); /* * Stack of sets of files that were passed over a socket but were @@ -94,6 +97,9 @@ void unp_remove_gcrefs(struct fdpass *, int); void unp_restore_gcrefs(struct fdpass *, int); void unp_scan(struct mbuf *, void (*)(struct fdpass *, int)); int unp_nam2sun(struct mbuf *, struct sockaddr_un **, size_t *); +static inline void unp_ref(struct unpcb *); +static inline void unp_rele(struct unpcb *); +struct socket *unp_solock_peer(struct socket *); struct pool unpcb_pool; struct task unp_gc_task = TASK_INITIALIZER(unp_gc, NULL); @@ -127,6 +133,53 @@ unp_init(void) IPL_SOFTNET, 0, "unpcb", NULL); } +static inline void +unp_ref(struct unpcb *unp) +{ + refcnt_take(&unp->unp_refcnt); +} + +static inline void +unp_rele(struct unpcb *unp) +{ + refcnt_rele_wake(&unp->unp_refcnt); +} + +struct socket * +unp_solock_peer(struct socket *so) +{ + struct unpcb *unp, *unp2; + struct socket *so2; + + unp = so->so_pcb; + +again: + if ((unp2 = unp->unp_conn) == NULL) + return NULL; + + so2 = unp2->unp_socket; + + if (so < so2) + solock(so2); + else if (so > so2){ + unp_ref(unp2); + sounlock(so); + solock(so2); + solock(so); + + /* Datagram socket could be reconnected due to re-lock. */ + if (unp->unp_conn != unp2) { + sounlock(so2); + unp_rele(unp2); + goto again; + } + + unp_rele(unp2); + } + + return so2; +} + void uipc_setaddr(const struct unpcb *unp, struct mbuf *nam) { @@ -201,7 +254,10 @@ uipc_usrreq(struct socket *so, int req, struct mbuf *m, struct mbuf *nam, * if it was bound and we are still connected * (our peer may have closed already!). */ + so2 = unp_solock_peer(so); uipc_setaddr(unp->unp_conn, nam); + if (so2 != NULL && so2 != so) + sounlock(so2); break; case PRU_SHUTDOWN: @@ -218,9 +274,8 @@ uipc_usrreq(struct socket *so, int req, struct mbuf *m, struct mbuf *nam, case SOCK_STREAM: case SOCK_SEQPACKET: - if (unp->unp_conn == NULL) + if ((so2 = unp_solock_peer(so)) == NULL) break; - so2 = unp->unp_conn->unp_socket; /* * Adjust backpressure on sender * and wakeup any waiting to write. @@ -228,6 +283,7 @@ uipc_usrreq(struct socket *so, int req, struct mbuf *m, struct mbuf *nam, so2->so_snd.sb_mbcnt = so->so_rcv.sb_mbcnt; so2->so_snd.sb_cc = so->so_rcv.sb_cc; sowwakeup(so2); + sounlock(so2); break; default: @@ -256,13 +312,16 @@ uipc_usrreq(struct socket *so, int req, struct mbuf *m, struct mbuf *nam, error = unp_connect(so, nam, p); if (error) break; - } else { - if (unp->unp_conn == NULL) { + } + + if ((so2 = unp_solock_peer(so)) == NULL) { + if (nam != NULL) + error = ECONNREFUSED; + else error = ENOTCONN; - break; - } + break; } - so2 = unp->unp_conn->unp_socket; + if (unp->unp_addr) from = mtod(unp->unp_addr, struct sockaddr *); else @@ -273,6 +332,10 @@ uipc_usrreq(struct socket *so, int req, struct mbuf *m, struct mbuf *nam, control = NULL; } else error = ENOBUFS; + + if (so2 != so) + sounlock(so2); + if (nam) unp_disconnect(unp); break; @@ -284,11 +347,11 @@ uipc_usrreq(struct socket *so, int req, struct mbuf *m, struct mbuf *nam, error = EPIPE; break; } - if (unp->unp_conn == NULL) { + if ((so2 = unp_solock_peer(so)) == NULL) { error = ENOTCONN; break; } - so2 = unp->unp_conn->unp_socket; + /* * Send to paired receive port, and then raise * send buffer counts to maintain backpressure. @@ -310,6 +373,8 @@ uipc_usrreq(struct socket *so, int req, struct mbuf *m, struct mbuf *nam, so->so_snd.sb_cc = so2->so_rcv.sb_cc; if (so2->so_rcv.sb_cc > 0) sorwakeup(so2); + + sounlock(so2); m = NULL; break; @@ -323,12 +388,7 @@ uipc_usrreq(struct socket *so, int req, struct mbuf *m, struct mbuf *nam, case PRU_ABORT: unp_detach(unp); - /* - * As long as `unp_lock' is taken before entering - * uipc_usrreq() releasing it here would lead to a - * double unlock. - */ - sofree(so, 1); + sofree(so, 0); break; case PRU_SENSE: { @@ -336,8 +396,10 @@ uipc_usrreq(struct socket *so, int req, struct mbuf *m, struct mbuf *nam, sb->st_blksize = so->so_snd.sb_hiwat; sb->st_dev = NODEV; + mtx_enter(&unp_ino_mtx); if (unp->unp_ino == 0) unp->unp_ino = unp_ino++; + mtx_leave(&unp_ino_mtx); sb->st_atim.tv_sec = sb->st_mtim.tv_sec = sb->st_ctim.tv_sec = unp->unp_ctime.tv_sec; @@ -358,7 +420,10 @@ uipc_usrreq(struct socket *so, int req, struct mbuf *m, struct mbuf *nam, break; case PRU_PEERADDR: + so2 = unp_solock_peer(so); uipc_setaddr(unp->unp_conn, nam); + if (so2 != NULL && so2 != so) + sounlock(so2); break; case PRU_SLOWTIMO: @@ -410,8 +475,6 @@ uipc_attach(struct socket *so, int proto) struct unpcb *unp; int error; - rw_assert_wrlock(&unp_lock); - if (so->so_pcb) return EISCONN; if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) { @@ -438,6 +501,7 @@ uipc_attach(struct socket *so, int proto) unp = pool_get(&unpcb_pool, PR_NOWAIT|PR_ZERO); if (unp == NULL) return (ENOBUFS); + refcnt_init(&unp->unp_refcnt); unp->unp_socket = so; so->so_pcb = unp; getnanotime(&unp->unp_ctime); @@ -445,12 +509,6 @@ uipc_attach(struct socket *so, int proto) /* * Enforce `unp_gc_lock' -> `solock()' lock order. */ - /* - * We also release the lock on listening socket and on our peer - * socket when called from unp_connect(). This is safe. The - * listening socket protected by vnode(9) lock. The peer socket - * has 'UNP_CONNECTING' flag set. - */ sounlock(so); rw_enter_write(&unp_gc_lock); LIST_INSERT_HEAD(&unp_head, unp, unp_link); @@ -512,14 +570,13 @@ unp_detach(struct unpcb *unp) { struct socket *so = unp->unp_socket; struct vnode *vp = unp->unp_vnode; - - rw_assert_wrlock(&unp_lock); + struct unpcb *unp2; unp->unp_vnode = NULL; /* * Enforce `unp_gc_lock' -> `solock()' lock order. - * Enforce `i_lock' -> `unp_lock' lock order. + * Enforce `i_lock' -> `solock()' lock order. */ sounlock(so); @@ -538,10 +595,47 @@ unp_detach(struct unpcb *unp) solock(so); - if (unp->unp_conn) + if (unp->unp_conn != NULL) { + /* + * Datagram socket could be connected to itself. + * Such socket will be disconnected here. + */ unp_disconnect(unp); - while (!SLIST_EMPTY(&unp->unp_refs)) - unp_drop(SLIST_FIRST(&unp->unp_refs), ECONNRESET); + } + + while ((unp2 = SLIST_FIRST(&unp->unp_refs)) != NULL) { + struct socket *so2 = unp2->unp_socket; + + if (so < so2) + solock(so2); + else { + unp_ref(unp2); + sounlock(so); + solock(so2); + solock(so); + + if (unp2->unp_conn != unp) { + /* `unp2' was disconnected due to re-lock. */ + sounlock(so2); + unp_rele(unp2); + continue; + } + + unp_rele(unp2); + } + + unp2->unp_conn = NULL; + SLIST_REMOVE(&unp->unp_refs, unp2, unpcb, unp_nextref); + so2->so_error = ECONNRESET; + so2->so_state &= ~SS_ISCONNECTED; + + sounlock(so2); + } + + sounlock(so); + refcnt_finalize(&unp->unp_refcnt, "unpfinal"); + solock(so); + soisdisconnected(so); so->so_pcb = NULL; m_freem(unp->unp_addr); @@ -681,24 +775,42 @@ unp_connect(struct socket *so, struct mbuf *nam, struct proc *p) } if ((error = VOP_ACCESS(vp, VWRITE, p->p_ucred, p)) != 0) goto put; - solock(so); so2 = vp->v_socket; if (so2 == NULL) { error = ECONNREFUSED; - goto put_locked; + goto put; } if (so->so_type != so2->so_type) { error = EPROTOTYPE; - goto put_locked; + goto put; } + if (so->so_proto->pr_flags & PR_CONNREQUIRED) { + solock(so2); + if ((so2->so_options & SO_ACCEPTCONN) == 0 || (so3 = sonewconn(so2, 0)) == NULL) { error = ECONNREFUSED; - goto put_locked; } + + sounlock(so2); + + if (error != 0) + goto put; + + /* + * Since `so2' is protected by vnode(9) lock, `so3' + * can't be PRU_ABORT'ed here. + */ + solock_pair(so, so3); + unp2 = sotounpcb(so2); unp3 = sotounpcb(so3); + + /* + * `unp_addr', `unp_connid' and 'UNP_FEIDSBIND' flag + * are immutable since we set them in unp_bind(). + */ if (unp2->unp_addr) unp3->unp_addr = m_copym(unp2->unp_addr, 0, M_COPYALL, M_NOWAIT); @@ -706,15 +818,29 @@ unp_connect(struct socket *so, struct mbuf *nam, struct proc *p) unp3->unp_connid.gid = p->p_ucred->cr_gid; unp3->unp_connid.pid = p->p_p->ps_pid; unp3->unp_flags |= UNP_FEIDS; - so2 = so3; + if (unp2->unp_flags & UNP_FEIDSBIND) { unp->unp_connid = unp2->unp_connid; unp->unp_flags |= UNP_FEIDS; } + + so2 = so3; + } else { + if (so2 != so) + solock_pair(so, so2); + else + solock(so); } + error = unp_connect2(so, so2); -put_locked: + sounlock(so); + + /* + * `so2' can't be PRU_ABORT'ed concurrently + */ + if (so2 != so) + sounlock(so2); put: vput(vp); unlock: @@ -738,7 +864,8 @@ unp_connect2(struct socket *so, struct socket *so2) struct unpcb *unp = sotounpcb(so); struct unpcb *unp2; - rw_assert_wrlock(&unp_lock); + soassertlocked(so); + soassertlocked(so2); if (so2->so_type != so->so_type) return (EPROTOTYPE); @@ -767,11 +894,15 @@ unp_connect2(struct socket *so, struct socket *so2) void unp_disconnect(struct unpcb *unp) { - struct unpcb *unp2 = unp->unp_conn; + struct socket *so2; + struct unpcb *unp2; - if (unp2 == NULL) + if ((so2 = unp_solock_peer(unp->unp_socket)) == NULL) return; + + unp2 = unp->unp_conn; unp->unp_conn = NULL; + switch (unp->unp_socket->so_type) { case SOCK_DGRAM: @@ -790,35 +921,31 @@ unp_disconnect(struct unpcb *unp) soisdisconnected(unp2->unp_socket); break; } + + if (so2 != unp->unp_socket) + sounlock(so2); } void unp_shutdown(struct unpcb *unp) { - struct socket *so; + struct socket *so2; switch (unp->unp_socket->so_type) { case SOCK_STREAM: case SOCK_SEQPACKET: - if (unp->unp_conn && (so = unp->unp_conn->unp_socket)) - socantrcvmore(so); + if ((so2 = unp_solock_peer(unp->unp_socket)) == NULL) + break; + + socantrcvmore(so2); + sounlock(so2); + break; default: break; } } -void -unp_drop(struct unpcb *unp, int errno) -{ - struct socket *so = unp->unp_socket; - - rw_assert_wrlock(&unp_lock); - - so->so_error = errno; - unp_disconnect(unp); -} - #ifdef notdef unp_drain(void) { diff --git a/sys/miscfs/fifofs/fifo_vnops.c b/sys/miscfs/fifofs/fifo_vnops.c index ff62072ccdb..d4f5df4d2c7 100644 --- a/sys/miscfs/fifofs/fifo_vnops.c +++ b/sys/miscfs/fifofs/fifo_vnops.c @@ -1,4 +1,4 @@ -/* $OpenBSD: fifo_vnops.c,v 1.95 2022/06/26 05:20:42 visa Exp $ */ +/* $OpenBSD: fifo_vnops.c,v 1.96 2022/07/01 09:56:17 mvs Exp $ */ /* $NetBSD: fifo_vnops.c,v 1.18 1996/03/16 23:52:42 christos Exp $ */ /* @@ -176,15 +176,17 @@ fifo_open(void *v) solock(wso); wso->so_state |= SS_CANTSENDMORE; wso->so_snd.sb_lowat = PIPE_BUF; + sounlock(wso); } else { rso = fip->fi_readsock; wso = fip->fi_writesock; - solock(wso); } if (ap->a_mode & FREAD) { fip->fi_readers++; if (fip->fi_readers == 1) { + solock(wso); wso->so_state &= ~SS_CANTSENDMORE; + sounlock(wso); if (fip->fi_writers > 0) wakeup(&fip->fi_writers); } @@ -193,16 +195,16 @@ fifo_open(void *v) fip->fi_writers++; if ((ap->a_mode & O_NONBLOCK) && fip->fi_readers == 0) { error = ENXIO; - sounlock(wso); goto bad; } if (fip->fi_writers == 1) { + solock(rso); rso->so_state &= ~(SS_CANTRCVMORE|SS_ISDISCONNECTED); + sounlock(rso); if (fip->fi_readers > 0) wakeup(&fip->fi_readers); } } - sounlock(wso); if ((ap->a_mode & O_NONBLOCK) == 0) { if ((ap->a_mode & FREAD) && fip->fi_writers == 0) { VOP_UNLOCK(vp); diff --git a/sys/sys/socketvar.h b/sys/sys/socketvar.h index 7e899aee63b..f82c1b0ac11 100644 --- a/sys/sys/socketvar.h +++ b/sys/sys/socketvar.h @@ -1,4 +1,4 @@ -/* $OpenBSD: socketvar.h,v 1.104 2022/06/26 05:20:42 visa Exp $ */ +/* $OpenBSD: socketvar.h,v 1.105 2022/07/01 09:56:17 mvs Exp $ */ /* $NetBSD: socketvar.h,v 1.18 1996/02/09 18:25:38 christos Exp $ */ /*- @@ -38,6 +38,7 @@ #include <sys/task.h> #include <sys/timeout.h> #include <sys/rwlock.h> +#include <sys/refcnt.h> #ifndef _SOCKLEN_T_DEFINED_ #define _SOCKLEN_T_DEFINED_ @@ -55,6 +56,7 @@ TAILQ_HEAD(soqhead, socket); struct socket { const struct protosw *so_proto; /* protocol handle */ struct rwlock so_lock; /* this socket lock */ + struct refcnt so_refcnt; /* references to this socket */ void *so_pcb; /* protocol control block */ u_int so_state; /* internal state flags SS_*, below */ short so_type; /* generic type, see socket.h */ @@ -80,6 +82,7 @@ struct socket { short so_q0len; /* partials on so_q0 */ short so_qlen; /* number of connections on so_q */ short so_qlimit; /* max number queued connections */ + u_long so_newconn; /* # of pending sonewconn() threads */ short so_timeo; /* connection timeout */ u_long so_oobmark; /* chars to oob mark */ u_int so_error; /* error affecting connection */ @@ -149,6 +152,7 @@ struct socket { #define SS_CONNECTOUT 0x1000 /* connect, not accept, at this end */ #define SS_ISSENDING 0x2000 /* hint for lower layer */ #define SS_DNS 0x4000 /* created using SOCK_DNS socket(2) */ +#define SS_NEWCONN_WAIT 0x8000 /* waiting sonewconn() relock */ #ifdef _KERNEL @@ -156,6 +160,18 @@ struct socket { void soassertlocked(struct socket *); +static inline void +soref(struct socket *so) +{ + refcnt_take(&so->so_refcnt); +} + +static inline void +sorele(struct socket *so) +{ + refcnt_rele_wake(&so->so_refcnt); +} + /* * Macros for sockets and socket buffering. */ @@ -329,6 +345,8 @@ int sockargs(struct mbuf **, const void *, size_t, int); int sosleep_nsec(struct socket *, void *, int, const char *, uint64_t); void solock(struct socket *); +int solock_persocket(struct socket *); +void solock_pair(struct socket *, struct socket *); void sounlock(struct socket *); int sendit(struct proc *, int, struct msghdr *, int, register_t *); diff --git a/sys/sys/unpcb.h b/sys/sys/unpcb.h index b3641bde092..54d083b6103 100644 --- a/sys/sys/unpcb.h +++ b/sys/sys/unpcb.h @@ -1,4 +1,4 @@ -/* $OpenBSD: unpcb.h,v 1.25 2022/02/25 23:51:04 guenther Exp $ */ +/* $OpenBSD: unpcb.h,v 1.26 2022/07/01 09:56:17 mvs Exp $ */ /* $NetBSD: unpcb.h,v 1.6 1994/06/29 06:46:08 cgd Exp $ */ /* @@ -32,6 +32,8 @@ * @(#)unpcb.h 8.1 (Berkeley) 6/2/93 */ +#include <sys/refcnt.h> + /* * Protocol control block for an active * instance of a UNIX internal protocol. @@ -60,24 +62,26 @@ * Locks used to protect struct members: * I immutable after creation * G unp_gc_lock - * U unp_lock + * s socket lock */ struct unpcb { + struct refcnt unp_refcnt; /* references to this pcb */ struct socket *unp_socket; /* [I] pointer back to socket */ - struct vnode *unp_vnode; /* [U] if associated with file */ + struct vnode *unp_vnode; /* [s] if associated with file */ struct file *unp_file; /* [G] backpointer for unp_gc() */ - struct unpcb *unp_conn; /* [U] control block of connected socket */ - ino_t unp_ino; /* [U] fake inode number */ - SLIST_HEAD(,unpcb) unp_refs; /* [U] referencing socket linked list */ - SLIST_ENTRY(unpcb) unp_nextref; /* [U] link in unp_refs list */ - struct mbuf *unp_addr; /* [U] bound address of socket */ + struct unpcb *unp_conn; /* [s] control block of connected + socket */ + ino_t unp_ino; /* [s] fake inode number */ + SLIST_HEAD(,unpcb) unp_refs; /* [s] referencing socket linked list */ + SLIST_ENTRY(unpcb) unp_nextref; /* [s] link in unp_refs list */ + struct mbuf *unp_addr; /* [s] bound address of socket */ long unp_msgcount; /* [G] references from socket rcv buf */ long unp_gcrefs; /* [G] references from gc */ - int unp_flags; /* [U] this unpcb contains peer eids */ + int unp_flags; /* [s] this unpcb contains peer eids */ int unp_gcflags; /* [G] garbage collector flags */ - struct sockpeercred unp_connid;/* [U] id of peer process */ + struct sockpeercred unp_connid;/* [s] id of peer process */ struct timespec unp_ctime; /* [I] holds creation time */ LIST_ENTRY(unpcb) unp_link; /* [G] link in per-AF list of sockets */ }; @@ -114,7 +118,6 @@ int unp_connect(struct socket *, struct mbuf *, struct proc *); int unp_connect2(struct socket *, struct socket *); void unp_detach(struct unpcb *); void unp_disconnect(struct unpcb *); -void unp_drop(struct unpcb *, int); void unp_gc(void *); void unp_shutdown(struct unpcb *); int unp_externalize(struct mbuf *, socklen_t, int); |