diff options
-rw-r--r-- | sys/conf/GENERIC | 3 | ||||
-rw-r--r-- | sys/kern/uipc_socket.c | 361 | ||||
-rw-r--r-- | sys/netinet/in_proto.c | 4 | ||||
-rw-r--r-- | sys/netinet/tcp_input.c | 36 | ||||
-rw-r--r-- | sys/netinet/tcp_output.c | 8 | ||||
-rw-r--r-- | sys/netinet/tcp_var.h | 4 | ||||
-rw-r--r-- | sys/netinet6/in6_proto.c | 4 | ||||
-rw-r--r-- | sys/sys/protosw.h | 3 | ||||
-rw-r--r-- | sys/sys/socket.h | 11 | ||||
-rw-r--r-- | sys/sys/socketvar.h | 28 |
10 files changed, 434 insertions, 28 deletions
diff --git a/sys/conf/GENERIC b/sys/conf/GENERIC index d41397419f4..2868aa33aaa 100644 --- a/sys/conf/GENERIC +++ b/sys/conf/GENERIC @@ -1,4 +1,4 @@ -# $OpenBSD: GENERIC,v 1.168 2010/10/18 15:01:21 claudio Exp $ +# $OpenBSD: GENERIC,v 1.169 2011/01/07 17:50:42 bluhm Exp $ # # Machine-independent option; used by all architectures for their # GENERIC kernel @@ -42,6 +42,7 @@ option EXT2FS # Second Extended Filesystem option MFS # memory file system option NNPFS # NNPFS filesystem +option SOCKET_SPLICE # Socket Splicing for TCP option TCP_SACK # Selective Acknowledgements for TCP option TCP_ECN # Explicit Congestion Notification for TCP option TCP_SIGNATURE # TCP MD5 Signatures, for BGP routing sessions diff --git a/sys/kern/uipc_socket.c b/sys/kern/uipc_socket.c index 53648922f37..115bb69a8d1 100644 --- a/sys/kern/uipc_socket.c +++ b/sys/kern/uipc_socket.c @@ -1,4 +1,4 @@ -/* $OpenBSD: uipc_socket.c,v 1.84 2010/09/24 02:59:45 claudio Exp $ */ +/* $OpenBSD: uipc_socket.c,v 1.85 2011/01/07 17:50:42 bluhm Exp $ */ /* $NetBSD: uipc_socket.c,v 1.21 1996/02/04 02:17:52 christos Exp $ */ /* @@ -36,6 +36,7 @@ #include <sys/systm.h> #include <sys/proc.h> #include <sys/file.h> +#include <sys/filedesc.h> #include <sys/malloc.h> #include <sys/mbuf.h> #include <sys/domain.h> @@ -50,6 +51,8 @@ #include <net/route.h> #include <sys/pool.h> +int sosplice(struct socket *, int, off_t); +int somove(struct socket *, int); void filt_sordetach(struct knote *kn); int filt_soread(struct knote *kn, long hint); void filt_sowdetach(struct knote *kn); @@ -144,8 +147,13 @@ sobind(struct socket *so, struct mbuf *nam, struct proc *p) int solisten(struct socket *so, int backlog) { - int s = splsoftnet(), error; + int s, error; +#ifdef SOCKET_SPLICE + if (so->so_splice || so->so_spliceback) + return (EOPNOTSUPP); +#endif /* SOCKET_SPLICE */ + s = splsoftnet(); error = (*so->so_proto->pr_usrreq)(so, PRU_LISTEN, NULL, NULL, NULL, curproc); if (error) { @@ -183,6 +191,21 @@ sofree(struct socket *so) if (!soqremque(so, 0)) return; } +#ifdef SOCKET_SPLICE + if (so->so_spliceback) { + so->so_snd.sb_flags &= ~SB_SPLICE; + so->so_spliceback->so_rcv.sb_flags &= ~SB_SPLICE; + so->so_spliceback->so_splice = NULL; + if (soreadable(so->so_spliceback)) + sorwakeup(so->so_spliceback); + } + if (so->so_splice) { + so->so_splice->so_snd.sb_flags &= ~SB_SPLICE; + so->so_rcv.sb_flags &= ~SB_SPLICE; + so->so_splice->so_spliceback = NULL; + } + so->so_spliceback = so->so_splice = NULL; +#endif /* SOCKET_SPLICE */ sbrelease(&so->so_snd); sorflush(so); pool_put(&socket_pool, so); @@ -967,6 +990,311 @@ sorflush(struct socket *so) sbrelease(&asb); } +#ifdef SOCKET_SPLICE +int +sosplice(struct socket *so, int fd, off_t max) +{ + struct file *fp; + struct socket *sosp; + int s, error = 0; + + if ((so->so_proto->pr_flags & PR_SPLICE) == 0) + return (EPROTONOSUPPORT); + if (so->so_options & SO_ACCEPTCONN) + return (EOPNOTSUPP); + if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0) + return (ENOTCONN); + + /* If no fd is given, unsplice by removing existing link. */ + if (fd < 0) { + s = splsoftnet(); + if (so->so_splice) { + so->so_splice->so_snd.sb_flags &= ~SB_SPLICE; + so->so_rcv.sb_flags &= ~SB_SPLICE; + so->so_splice->so_spliceback = NULL; + so->so_splice = NULL; + if (soreadable(so)) + sorwakeup(so); + } + splx(s); + return (0); + } + + if (max && max < 0) + return (EINVAL); + + /* Find sosp, the drain socket where data will be spliced into. */ + if ((error = getsock(curproc->p_fd, fd, &fp)) != 0) + return (error); + sosp = fp->f_data; + + /* Lock both receive and send buffer. */ + if ((error = sblock(&so->so_rcv, + (so->so_state & SS_NBIO) ? M_NOWAIT : M_WAITOK)) != 0) { + FRELE(fp); + return (error); + } + if ((error = sblock(&sosp->so_snd, M_WAITOK)) != 0) { + sbunlock(&so->so_rcv); + FRELE(fp); + return (error); + } + s = splsoftnet(); + + if (so->so_splice || sosp->so_spliceback) { + error = EBUSY; + goto release; + } + if (sosp->so_proto->pr_usrreq != so->so_proto->pr_usrreq) { + error = EPROTONOSUPPORT; + goto release; + } + if (sosp->so_options & SO_ACCEPTCONN) { + error = EOPNOTSUPP; + goto release; + } + if ((sosp->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0) { + error = ENOTCONN; + goto release; + } + + /* Splice so and sosp together. */ + so->so_splice = sosp; + sosp->so_spliceback = so; + so->so_splicelen = 0; + so->so_splicemax = max; + + /* + * To prevent softnet interrupt from calling somove() while + * we sleep, the socket buffers are not marked as spliced yet. + */ + if (somove(so, M_WAIT)) { + so->so_rcv.sb_flags |= SB_SPLICE; + sosp->so_snd.sb_flags |= SB_SPLICE; + } + + release: + splx(s); + sbunlock(&sosp->so_snd); + sbunlock(&so->so_rcv); + FRELE(fp); + return (error); +} + +/* + * Move data from receive buffer of spliced source socket to send + * buffer of drain socket. Try to move as much as possible in one + * big chunk. It is a TCP only implementation. + * Return value 0 means splicing has been finished, 1 continue. + */ +int +somove(struct socket *so, int wait) +{ + struct socket *sosp = so->so_splice; + struct mbuf *m = NULL, **mp; + u_long len, off, oobmark; + long space; + int error = 0, maxreached = 0; + short state; + + splsoftassert(IPL_SOFTNET); + + if (so->so_error) { + error = so->so_error; + goto release; + } + if (sosp->so_state & SS_CANTSENDMORE) { + error = EPIPE; + goto release; + } + if (sosp->so_error) { + error = sosp->so_error; + goto release; + } + if ((sosp->so_state & SS_ISCONNECTED) == 0) + goto release; + + /* Calculate how many bytes can be copied now. */ + len = so->so_rcv.sb_cc; + if (len == 0) + goto release; + if (so->so_splicemax) { + KASSERT(so->so_splicelen < so->so_splicemax); + if (so->so_splicemax <= so->so_splicelen + len) { + len = so->so_splicemax - so->so_splicelen; + maxreached = 1; + } + } + space = sbspace(&sosp->so_snd); + if (so->so_oobmark && so->so_oobmark < len && + so->so_oobmark < space + 1024) + space += 1024; + if (space <= 0) { + maxreached = 0; + goto release; + } + if (space < len) { + maxreached = 0; + if (space < sosp->so_snd.sb_lowat) + goto release; + len = space; + } + sosp->so_state |= SS_ISSENDING; + + /* Take at most len mbufs out of receive buffer. */ + m = so->so_rcv.sb_mb; + for (off = 0, mp = &m; off < len; + off += (*mp)->m_len, mp = &(*mp)->m_next) { + u_long size = len - off; + + if ((*mp)->m_len > size) { + if (!maxreached || (*mp = m_copym( + so->so_rcv.sb_mb, 0, size, wait)) == NULL) { + len -= size; + break; + } + so->so_rcv.sb_mb->m_data += size; + so->so_rcv.sb_mb->m_len -= size; + so->so_rcv.sb_cc -= size; + so->so_rcv.sb_datacc -= size; + } else { + *mp = so->so_rcv.sb_mb; + sbfree(&so->so_rcv, *mp); + so->so_rcv.sb_mb = (*mp)->m_next; + } + } + *mp = NULL; + SB_EMPTY_FIXUP(&so->so_rcv); + so->so_rcv.sb_lastrecord = so->so_rcv.sb_mb; + + SBLASTRECORDCHK(&so->so_rcv, "somove"); + SBLASTMBUFCHK(&so->so_rcv, "somove"); + KDASSERT(m->m_nextpkt == NULL); + KASSERT(so->so_rcv.sb_mb == so->so_rcv.sb_lastrecord); +#ifdef SOCKBUF_DEBUG + sbcheck(&so->so_rcv); +#endif + + /* Send window update to source peer if receive buffer has changed. */ + if (m) + (so->so_proto->pr_usrreq)(so, PRU_RCVD, NULL, + (struct mbuf *)0L, NULL, NULL); + + /* Receive buffer did shrink by len bytes, adjust oob. */ + state = so->so_state; + so->so_state &= ~SS_RCVATMARK; + oobmark = so->so_oobmark; + so->so_oobmark = oobmark > len ? oobmark - len : 0; + if (oobmark) { + if (oobmark == len) + so->so_state |= SS_RCVATMARK; + if (oobmark >= len) + oobmark = 0; + } + + /* + * Handle oob data. If any malloc fails, ignore error. + * TCP urgent data is not very reliable anyway. + */ + while (m && ((state & SS_RCVATMARK) || oobmark) && + (so->so_options & SO_OOBINLINE)) { + struct mbuf *o = NULL; + + if (state & SS_RCVATMARK) { + o = m_get(wait, MT_DATA); + state &= ~SS_RCVATMARK; + } else if (oobmark) { + o = m_split(m, oobmark, wait); + if (o) { + error = (*sosp->so_proto->pr_usrreq)(sosp, + PRU_SEND, m, NULL, NULL, NULL); + m = NULL; + if (error) { + m_freem(o); + if (sosp->so_state & SS_CANTSENDMORE) + error = EPIPE; + goto release; + } + len -= oobmark; + so->so_splicelen += oobmark; + m = o; + o = m_get(wait, MT_DATA); + } + oobmark = 0; + } + if (o) { + o->m_len = 1; + *mtod(o, caddr_t) = *mtod(m, caddr_t); + error = (*sosp->so_proto->pr_usrreq)(sosp, PRU_SENDOOB, + o, NULL, NULL, NULL); + if (error) { + if (sosp->so_state & SS_CANTSENDMORE) + error = EPIPE; + goto release; + } + len -= 1; + so->so_splicelen += 1; + if (oobmark) { + oobmark -= 1; + if (oobmark == 0) + state |= SS_RCVATMARK; + } + m_adj(m, 1); + } + } + + /* Append all remaining data to drain socket. */ + if (m) { + if (so->so_rcv.sb_cc == 0) + sosp->so_state &= ~SS_ISSENDING; + error = (*sosp->so_proto->pr_usrreq)(sosp, PRU_SEND, m, NULL, + NULL, NULL); + m = NULL; + if (error) { + if (sosp->so_state & SS_CANTSENDMORE) + error = EPIPE; + goto release; + } + so->so_splicelen += len; + } + + release: + if (m) + m_freem(m); + sosp->so_state &= ~SS_ISSENDING; + if (error) + so->so_error = error; + if (((so->so_state & SS_CANTRCVMORE) && so->so_rcv.sb_cc == 0) || + (sosp->so_state & SS_CANTSENDMORE) || maxreached || error) { + sosp->so_snd.sb_flags &= ~SB_SPLICE; + so->so_rcv.sb_flags &= ~SB_SPLICE; + so->so_splice = sosp->so_spliceback = NULL; + if (soreadable(so)) + sorwakeup(so); + return (0); + } + return (1); +} + +void +sorwakeup(struct socket *so) +{ + if (so->so_rcv.sb_flags & SB_SPLICE) { + (void) somove(so, M_DONTWAIT); + return; + } + _sorwakeup(so); +} + +void +sowwakeup(struct socket *so) +{ + if (so->so_snd.sb_flags & SB_SPLICE) + (void) somove(so->so_spliceback, M_DONTWAIT); + _sowwakeup(so); +} +#endif /* SOCKET_SPLICE */ + int sosetopt(struct socket *so, int level, int optname, struct mbuf *m0) { @@ -1096,6 +1424,23 @@ sosetopt(struct socket *so, int level, int optname, struct mbuf *m0) break; } +#ifdef SOCKET_SPLICE + case SO_SPLICE: + if (m == NULL) { + error = sosplice(so, -1, 0); + } else if (m->m_len < sizeof(int)) { + error = EINVAL; + goto bad; + } else if (m->m_len < sizeof(struct splice)) { + error = sosplice(so, *mtod(m, int *), 0); + } else { + error = sosplice(so, + mtod(m, struct splice *)->sp_fd, + mtod(m, struct splice *)->sp_max); + } + break; +#endif /* SOCKET_SPLICE */ + default: error = ENOPROTOOPT; break; @@ -1188,6 +1533,18 @@ sogetopt(struct socket *so, int level, int optname, struct mbuf **mp) break; } +#ifdef SOCKET_SPLICE + case SO_SPLICE: + { + int s = splsoftnet(); + + m->m_len = sizeof(off_t); + *mtod(m, off_t *) = so->so_splicelen; + splx(s); + break; + } +#endif /* SOCKET_SPLICE */ + case SO_PEERCRED: if (so->so_proto->pr_protocol == AF_UNIX) { struct unpcb *unp = sotounpcb(so); diff --git a/sys/netinet/in_proto.c b/sys/netinet/in_proto.c index 7dd03ac2e51..ee8fff3e74b 100644 --- a/sys/netinet/in_proto.c +++ b/sys/netinet/in_proto.c @@ -1,4 +1,4 @@ -/* $OpenBSD: in_proto.c,v 1.54 2010/08/29 09:24:38 gollo Exp $ */ +/* $OpenBSD: in_proto.c,v 1.55 2011/01/07 17:50:42 bluhm Exp $ */ /* $NetBSD: in_proto.c,v 1.14 1996/02/18 18:58:32 christos Exp $ */ /* @@ -189,7 +189,7 @@ struct protosw inetsw[] = { udp_usrreq, udp_init, 0, 0, 0, udp_sysctl }, -{ SOCK_STREAM, &inetdomain, IPPROTO_TCP, PR_CONNREQUIRED|PR_WANTRCVD|PR_ABRTACPTDIS, +{ SOCK_STREAM, &inetdomain, IPPROTO_TCP, PR_CONNREQUIRED|PR_WANTRCVD|PR_ABRTACPTDIS|PR_SPLICE, tcp_input, 0, tcp_ctlinput, tcp_ctloutput, tcp_usrreq, tcp_init, 0, tcp_slowtimo, 0, tcp_sysctl diff --git a/sys/netinet/tcp_input.c b/sys/netinet/tcp_input.c index 66ad114f222..9a6f406456f 100644 --- a/sys/netinet/tcp_input.c +++ b/sys/netinet/tcp_input.c @@ -1,4 +1,4 @@ -/* $OpenBSD: tcp_input.c,v 1.239 2010/09/29 19:42:11 claudio Exp $ */ +/* $OpenBSD: tcp_input.c,v 1.240 2011/01/07 17:50:42 bluhm Exp $ */ /* $NetBSD: tcp_input.c,v 1.23 1996/02/13 23:43:44 christos Exp $ */ /* @@ -328,7 +328,9 @@ present: pool_put(&tcpqe_pool, q); q = nq; } while (q != NULL && q->tcpqe_tcp->th_seq == tp->rcv_nxt); + tp->t_flags |= TF_BLOCKOUTPUT; sorwakeup(so); + tp->t_flags &= ~TF_BLOCKOUTPUT; return (flags); } @@ -368,7 +370,7 @@ tcp_input(struct mbuf *m, ...) struct tcpcb *tp = 0; int tiflags; struct socket *so = NULL; - int todrop, acked, ourfinisacked, needoutput = 0; + int todrop, acked, ourfinisacked; int hdroptlen = 0; short ostate = 0; tcp_seq iss, *reuse = NULL; @@ -1090,9 +1092,13 @@ after_listen: TCP_TIMER_ARM(tp, TCPT_REXMT, tp->t_rxtcur); tcp_update_sndspace(tp); - if (sb_notify(&so->so_snd)) + if (sb_notify(&so->so_snd)) { + tp->t_flags |= TF_BLOCKOUTPUT; sowwakeup(so); - if (so->so_snd.sb_cc) + tp->t_flags &= ~TF_BLOCKOUTPUT; + } + if (so->so_snd.sb_cc || + tp->t_flags & TF_NEEDOUTPUT) (void) tcp_output(tp); return; } @@ -1136,8 +1142,10 @@ after_listen: m_adj(m, iphlen + off); sbappendstream(&so->so_rcv, m); } + tp->t_flags |= TF_BLOCKOUTPUT; sorwakeup(so); - if (tp->t_flags & TF_ACKNOW) + tp->t_flags &= ~TF_BLOCKOUTPUT; + if (tp->t_flags & (TF_ACKNOW|TF_NEEDOUTPUT)) (void) tcp_output(tp); return; } @@ -1773,10 +1781,10 @@ trimthenstep6: #if defined(TCP_SACK) && defined(TCP_FACK) /* Force call to tcp_output */ if (tp->snd_awnd < tp->snd_cwnd) - needoutput = 1; + tp->t_flags |= TF_NEEDOUTPUT; #else tp->snd_cwnd += tp->t_maxseg; - needoutput = 1; + tp->t_flags |= TF_NEEDOUTPUT; #endif /* TCP_FACK */ } else { /* Out of fast recovery */ @@ -1844,7 +1852,7 @@ trimthenstep6: */ if (th->th_ack == tp->snd_max) { TCP_TIMER_DISARM(tp, TCPT_REXMT); - needoutput = 1; + tp->t_flags |= TF_NEEDOUTPUT; } else if (TCP_TIMER_ISARMED(tp, TCPT_PERSIST) == 0) TCP_TIMER_ARM(tp, TCPT_REXMT, tp->t_rxtcur); /* @@ -1877,8 +1885,11 @@ trimthenstep6: } tcp_update_sndspace(tp); - if (sb_notify(&so->so_snd)) + if (sb_notify(&so->so_snd)) { + tp->t_flags |= TF_BLOCKOUTPUT; sowwakeup(so); + tp->t_flags &= ~TF_BLOCKOUTPUT; + } /* * If we had a pending ICMP message that referred to data @@ -1996,7 +2007,7 @@ step6: tp->snd_wl2 = th->th_ack; if (tp->snd_wnd > tp->max_sndwnd) tp->max_sndwnd = tp->snd_wnd; - needoutput = 1; + tp->t_flags |= TF_NEEDOUTPUT; } /* @@ -2088,7 +2099,9 @@ dodata: /* XXX */ m_adj(m, hdroptlen); sbappendstream(&so->so_rcv, m); } + tp->t_flags |= TF_BLOCKOUTPUT; sorwakeup(so); + tp->t_flags &= ~TF_BLOCKOUTPUT; } else { m_adj(m, hdroptlen); tiflags = tcp_reass(tp, th, m, &tlen); @@ -2182,9 +2195,8 @@ dodata: /* XXX */ /* * Return any desired output. */ - if (needoutput || (tp->t_flags & TF_ACKNOW)) { + if (tp->t_flags & (TF_ACKNOW|TF_NEEDOUTPUT)) (void) tcp_output(tp); - } return; badsyn: diff --git a/sys/netinet/tcp_output.c b/sys/netinet/tcp_output.c index 08d93835a4a..868b14c65e1 100644 --- a/sys/netinet/tcp_output.c +++ b/sys/netinet/tcp_output.c @@ -1,4 +1,4 @@ -/* $OpenBSD: tcp_output.c,v 1.92 2010/09/24 02:59:45 claudio Exp $ */ +/* $OpenBSD: tcp_output.c,v 1.93 2011/01/07 17:50:42 bluhm Exp $ */ /* $NetBSD: tcp_output.c,v 1.16 1997/06/03 16:17:09 kml Exp $ */ /* @@ -228,6 +228,12 @@ tcp_output(struct tcpcb *tp) int needect; #endif + if (tp->t_flags & TF_BLOCKOUTPUT) { + tp->t_flags |= TF_NEEDOUTPUT; + return (0); + } else + tp->t_flags &= ~TF_NEEDOUTPUT; + #if defined(TCP_SACK) && defined(TCP_SIGNATURE) && defined(DIAGNOSTIC) if (tp->sack_enable && (tp->t_flags & TF_SIGNATURE)) return (EINVAL); diff --git a/sys/netinet/tcp_var.h b/sys/netinet/tcp_var.h index 6ded3dd733f..0573c7d966b 100644 --- a/sys/netinet/tcp_var.h +++ b/sys/netinet/tcp_var.h @@ -1,4 +1,4 @@ -/* $OpenBSD: tcp_var.h,v 1.97 2010/10/21 11:38:27 bluhm Exp $ */ +/* $OpenBSD: tcp_var.h,v 1.98 2011/01/07 17:50:42 bluhm Exp $ */ /* $NetBSD: tcp_var.h,v 1.17 1996/02/13 23:44:24 christos Exp $ */ /* @@ -95,6 +95,8 @@ struct tcpcb { #define TF_LASTIDLE 0x00100000 /* no outstanding ACK on last send */ #define TF_DEAD 0x00200000 /* dead and to-be-released */ #define TF_PMTUD_PEND 0x00400000 /* Path MTU Discovery pending */ +#define TF_NEEDOUTPUT 0x00800000 /* call tcp_output after tcp_input */ +#define TF_BLOCKOUTPUT 0x01000000 /* avert tcp_output during tcp_input */ struct mbuf *t_template; /* skeletal packet for transmit */ struct inpcb *t_inpcb; /* back pointer to internet pcb */ diff --git a/sys/netinet6/in6_proto.c b/sys/netinet6/in6_proto.c index 11c29783988..7d40de9db8f 100644 --- a/sys/netinet6/in6_proto.c +++ b/sys/netinet6/in6_proto.c @@ -1,4 +1,4 @@ -/* $OpenBSD: in6_proto.c,v 1.59 2010/07/08 19:42:46 jsg Exp $ */ +/* $OpenBSD: in6_proto.c,v 1.60 2011/01/07 17:50:42 bluhm Exp $ */ /* $KAME: in6_proto.c,v 1.66 2000/10/10 15:35:47 itojun Exp $ */ /* @@ -137,7 +137,7 @@ struct ip6protosw inet6sw[] = { 0, 0, 0, udp_sysctl, }, -{ SOCK_STREAM, &inet6domain, IPPROTO_TCP, PR_CONNREQUIRED|PR_WANTRCVD|PR_ABRTACPTDIS, +{ SOCK_STREAM, &inet6domain, IPPROTO_TCP, PR_CONNREQUIRED|PR_WANTRCVD|PR_ABRTACPTDIS|PR_SPLICE, tcp6_input, 0, tcp6_ctlinput, tcp_ctloutput, tcp_usrreq, #ifdef INET /* don't call initialization and timeout routines twice */ diff --git a/sys/sys/protosw.h b/sys/sys/protosw.h index 40a362e79d9..ea47dbc22e6 100644 --- a/sys/sys/protosw.h +++ b/sys/sys/protosw.h @@ -1,4 +1,4 @@ -/* $OpenBSD: protosw.h,v 1.14 2010/10/18 04:31:01 guenther Exp $ */ +/* $OpenBSD: protosw.h,v 1.15 2011/01/07 17:50:42 bluhm Exp $ */ /* $NetBSD: protosw.h,v 1.10 1996/04/09 20:55:32 cgd Exp $ */ /*- @@ -106,6 +106,7 @@ struct protosw { #define PR_RIGHTS 0x10 /* passes capabilities */ #define PR_ABRTACPTDIS 0x20 /* abort on accept(2) to disconnected socket */ +#define PR_SPLICE 0x40 /* socket splicing is possible */ /* * The arguments to usrreq are: diff --git a/sys/sys/socket.h b/sys/sys/socket.h index 2c9d895a334..f8454d7d4dd 100644 --- a/sys/sys/socket.h +++ b/sys/sys/socket.h @@ -1,4 +1,4 @@ -/* $OpenBSD: socket.h,v 1.70 2010/07/05 22:20:22 tedu Exp $ */ +/* $OpenBSD: socket.h,v 1.71 2011/01/07 17:50:42 bluhm Exp $ */ /* $NetBSD: socket.h,v 1.14 1996/02/09 18:25:36 christos Exp $ */ /* @@ -87,6 +87,7 @@ #define SO_NETPROC 0x1020 /* multiplex; network processing */ #define SO_RTABLE 0x1021 /* routing table to be used */ #define SO_PEERCRED 0x1022 /* get connect-time credentials */ +#define SO_SPLICE 0x1023 /* splice data to other socket */ /* * Structure used for manipulating linger option. @@ -97,6 +98,14 @@ struct linger { }; /* + * Structure used for manipulating splice option. + */ +struct splice { + int sp_fd; /* drain socket file descriptor */ + off_t sp_max; /* if set, maximum bytes to splice */ +}; + +/* * Level number for (get/set)sockopt() to apply to socket itself. */ #define SOL_SOCKET 0xffff /* options for socket level */ diff --git a/sys/sys/socketvar.h b/sys/sys/socketvar.h index 81c6e17267b..fbe2a7a48e8 100644 --- a/sys/sys/socketvar.h +++ b/sys/sys/socketvar.h @@ -1,4 +1,4 @@ -/* $OpenBSD: socketvar.h,v 1.47 2010/09/24 02:59:46 claudio Exp $ */ +/* $OpenBSD: socketvar.h,v 1.48 2011/01/07 17:50:42 bluhm Exp $ */ /* $NetBSD: socketvar.h,v 1.18 1996/02/09 18:25:38 christos Exp $ */ /*- @@ -75,6 +75,13 @@ struct socket { uid_t so_siguid; /* uid of process who set so_pgid */ uid_t so_sigeuid; /* euid of process who set so_pgid */ u_long so_oobmark; /* chars to oob mark */ + +#if 1 /*def SOCKET_SPLICE*/ + struct socket *so_splice; /* send data to drain socket */ + struct socket *so_spliceback; /* back ref for notify and cleanup */ + off_t so_splicelen; /* number of bytes spliced so far */ + off_t so_splicemax; /* maximum number of bytes to splice */ +#endif /* SOCKET_SPLICE */ /* * Variables for socket buffering. */ @@ -102,6 +109,7 @@ struct socket { #define SB_ASYNC 0x10 /* ASYNC I/O, need signals */ #define SB_NOINTR 0x40 /* operations not interruptible */ #define SB_KNOTE 0x80 /* kernel note attached */ +#define SB_SPLICE 0x0100 /* buffer is splice source or drain */ void *so_internal; /* Space for svr4 stream data */ void (*so_upcall)(struct socket *so, caddr_t arg, int waitf); @@ -145,7 +153,7 @@ struct socket { * Do we need to notify the other side when I/O is possible? */ #define sb_notify(sb) (((sb)->sb_flags & (SB_WAIT|SB_SEL|SB_ASYNC| \ - SB_KNOTE)) != 0) + SB_KNOTE|SB_SPLICE)) != 0) /* * How much space is there in a socket buffer (so->so_snd or so->so_rcv)? @@ -165,7 +173,7 @@ struct socket { ((so)->so_state & SS_ISSENDING) /* can we read something from so? */ -#define soreadable(so) \ +#define _soreadable(so) \ ((so)->so_rcv.sb_cc >= (so)->so_rcv.sb_lowat || \ ((so)->so_state & SS_CANTRCVMORE) || \ (so)->so_qlen || (so)->so_error) @@ -215,14 +223,24 @@ struct socket { } \ } while (/* CONSTCOND */ 0) -#define sorwakeup(so) do { \ +#define _sorwakeup(so) do { \ sowakeup((so), &(so)->so_rcv); \ if ((so)->so_upcall) \ (*((so)->so_upcall))((so), (so)->so_upcallarg, \ M_DONTWAIT); \ } while (/* CONSTCOND */ 0) -#define sowwakeup(so) sowakeup((so), &(so)->so_snd) +#define _sowwakeup(so) sowakeup((so), &(so)->so_snd) + +#ifdef SOCKET_SPLICE +#define soreadable(so) ((so)->so_splice == NULL && _soreadable(so)) +void sorwakeup(struct socket *); +void sowwakeup(struct socket *); +#else /* SOCKET_SPLICE */ +#define soreadable(so) _soreadable(so) +#define sorwakeup(so) _sorwakeup(so) +#define sowwakeup(so) _sowwakeup(so) +#endif /* SOCKET_SPLICE */ #ifdef _KERNEL extern u_long sb_max; |