diff options
author | Niels Provos <provos@cvs.openbsd.org> | 2000-09-18 22:06:39 +0000 |
---|---|---|
committer | Niels Provos <provos@cvs.openbsd.org> | 2000-09-18 22:06:39 +0000 |
commit | 90175ecf691a7934faa9072f3a1ed18f4e91deb3 (patch) | |
tree | 68bd50ddf0e438117ac0a4f0bb898d7075a341c3 /sys | |
parent | c396a799db246d91ccfc784bce1f77a21e417f2a (diff) |
Path MTU discovery based on NetBSD but with the decision to use the DF
flag delayed to ip_output(). That halves the code and reduces most of
the route lookups. okay deraadt@
Diffstat (limited to 'sys')
-rw-r--r-- | sys/netinet/in.h | 8 | ||||
-rw-r--r-- | sys/netinet/in_pcb.c | 39 | ||||
-rw-r--r-- | sys/netinet/in_pcb.h | 4 | ||||
-rw-r--r-- | sys/netinet/ip_icmp.c | 151 | ||||
-rw-r--r-- | sys/netinet/ip_input.c | 35 | ||||
-rw-r--r-- | sys/netinet/ip_output.c | 10 | ||||
-rw-r--r-- | sys/netinet/ip_var.h | 7 | ||||
-rw-r--r-- | sys/netinet/tcp_input.c | 91 | ||||
-rw-r--r-- | sys/netinet/tcp_output.c | 6 | ||||
-rw-r--r-- | sys/netinet/tcp_subr.c | 55 | ||||
-rw-r--r-- | sys/netinet/tcp_timer.c | 20 | ||||
-rw-r--r-- | sys/netinet/tcp_var.h | 5 |
12 files changed, 375 insertions, 56 deletions
diff --git a/sys/netinet/in.h b/sys/netinet/in.h index 9575c729309..ab6e9c2977e 100644 --- a/sys/netinet/in.h +++ b/sys/netinet/in.h @@ -1,4 +1,4 @@ -/* $OpenBSD: in.h,v 1.40 2000/06/18 07:06:13 itojun Exp $ */ +/* $OpenBSD: in.h,v 1.41 2000/09/18 22:06:36 provos Exp $ */ /* $NetBSD: in.h,v 1.20 1996/02/13 23:41:47 christos Exp $ */ /* @@ -447,7 +447,9 @@ struct ip_mreq { #define IPCTL_IPSEC_FIRSTUSE 24 #define IPCTL_IPSEC_ENC_ALGORITHM 25 #define IPCTL_IPSEC_AUTH_ALGORITHM 26 -#define IPCTL_MAXID 27 +#define IPCTL_MTUDISC 27 /* allow path MTU discovery */ +#define IPCTL_MTUDISCTIMEOUT 28 /* allow path MTU discovery */ +#define IPCTL_MAXID 29 #define IPCTL_NAMES { \ { 0, 0 }, \ @@ -477,6 +479,8 @@ struct ip_mreq { { "ipsec-firstuse", CTLTYPE_INT }, \ { "ipsec-enc-alg", CTLTYPE_STRING }, \ { "ipsec-auth-alg", CTLTYPE_STRING }, \ + { "mtudisc", CTLTYPE_INT }, \ + { "mtudisctimeout", CTLTYPE_INT }, \ } /* INET6 stuff */ diff --git a/sys/netinet/in_pcb.c b/sys/netinet/in_pcb.c index faec511620b..f0fc288ca89 100644 --- a/sys/netinet/in_pcb.c +++ b/sys/netinet/in_pcb.c @@ -1,4 +1,4 @@ -/* $OpenBSD: in_pcb.c,v 1.42 2000/04/27 09:23:21 itojun Exp $ */ +/* $OpenBSD: in_pcb.c,v 1.43 2000/09/18 22:06:37 provos Exp $ */ /* $NetBSD: in_pcb.c,v 1.25 1996/02/13 23:41:53 christos Exp $ */ /* @@ -859,6 +859,43 @@ in_pcblookup(table, faddrp, fport_arg, laddrp, lport_arg, flags) return (match); } +struct rtentry * +in_pcbrtentry(inp) + struct inpcb *inp; +{ + struct route *ro; + + ro = &inp->inp_route; + + /* + * No route yet, so try to acquire one. + */ + if (ro->ro_rt == NULL) { + switch(sotopf(inp->inp_socket)) { +#ifdef INET6 + case PF_INET6: + if (IN6_IS_ADDR_UNSPECIFIED(&inp->inp_faddr6)) + break; + ro->ro_dst.sa_family = AF_INET6; + ro->ro_dst.sa_len = sizeof(struct sockaddr_in6); + ((struct sockaddr_in6 *) &ro->ro_dst)->sin6_addr = + inp->inp_faddr6; + rtalloc(ro); + break; +#endif /* INET6 */ + case PF_INET: + if (inp->inp_faddr.s_addr != INADDR_ANY) + break; + ro->ro_dst.sa_family = AF_INET; + ro->ro_dst.sa_len = sizeof(ro->ro_dst); + satosin(&ro->ro_dst)->sin_addr = inp->inp_faddr; + rtalloc(ro); + break; + } + } + return (ro->ro_rt); +} + struct sockaddr_in * in_selectsrc(sin, ro, soopts, mopts, errorp) struct sockaddr_in *sin; diff --git a/sys/netinet/in_pcb.h b/sys/netinet/in_pcb.h index 6ac62c01529..7b8dec2ab7c 100644 --- a/sys/netinet/in_pcb.h +++ b/sys/netinet/in_pcb.h @@ -1,4 +1,4 @@ -/* $OpenBSD: in_pcb.h,v 1.25 2000/06/18 17:32:48 itojun Exp $ */ +/* $OpenBSD: in_pcb.h,v 1.26 2000/09/18 22:06:37 provos Exp $ */ /* $NetBSD: in_pcb.h,v 1.14 1996/02/13 23:42:00 christos Exp $ */ /* @@ -260,6 +260,8 @@ void in_setsockaddr __P((struct inpcb *, struct mbuf *)); int in_baddynamic __P((u_int16_t, u_int16_t)); extern struct sockaddr_in *in_selectsrc __P((struct sockaddr_in *, struct route *, int, struct ip_moptions *, int *)); +struct rtentry * + in_pcbrtentry __P((struct inpcb *)); /* INET6 stuff */ int in6_pcbnotify __P((struct inpcbtable *, struct sockaddr *, diff --git a/sys/netinet/ip_icmp.c b/sys/netinet/ip_icmp.c index c8f1e5e29e1..1548bf33095 100644 --- a/sys/netinet/ip_icmp.c +++ b/sys/netinet/ip_icmp.c @@ -1,4 +1,4 @@ -/* $OpenBSD: ip_icmp.c,v 1.21 2000/05/15 11:07:32 itojun Exp $ */ +/* $OpenBSD: ip_icmp.c,v 1.22 2000/09/18 22:06:37 provos Exp $ */ /* $NetBSD: ip_icmp.c,v 1.19 1996/02/13 23:42:22 christos Exp $ */ /* @@ -92,6 +92,9 @@ static int ip_next_mtu __P((int, int)); /*static*/ int ip_next_mtu __P((int, int)); #endif +void icmp_mtudisc __P((struct icmp *)); +void icmp_mtudisc_timeout __P((struct rtentry *, struct rttimer *)); + extern struct protosw inetsw[]; /* @@ -391,6 +394,8 @@ icmp_input(m, va_alist) printf("deliver to protocol %d\n", icp->icmp_ip.ip_p); #endif icmpsrc.sin_addr = icp->icmp_ip.ip_dst; + if (code == PRC_MSGSIZE && ip_mtudisc) + icmp_mtudisc(icp); /* * XXX if the packet contains [IPv4 AH TCP], we can't make a * notification to TCP layer. @@ -712,3 +717,147 @@ icmp_sysctl(name, namelen, oldp, oldlenp, newp, newlen) } /* NOTREACHED */ } + +void +icmp_mtudisc(icp) + struct icmp *icp; +{ + struct rtentry *rt; + struct sockaddr *dst = sintosa(&icmpsrc); + u_long mtu = ntohs(icp->icmp_nextmtu); /* Why a long? IPv6 */ + int error; + + /* Table of common MTUs: */ + + static u_short mtu_table[] = {65535, 65280, 32000, 17914, 9180, 8166, + 4352, 2002, 1492, 1006, 508, 296, 68, 0}; + + rt = rtalloc1(dst, 1); + if (rt == 0) + return; + + /* If we didn't get a host route, allocate one */ + + if ((rt->rt_flags & RTF_HOST) == 0) { + struct rtentry *nrt; + + error = rtrequest((int) RTM_ADD, dst, + (struct sockaddr *) rt->rt_gateway, + (struct sockaddr *) 0, + RTF_GATEWAY | RTF_HOST | RTF_DYNAMIC, &nrt); + if (error) { + rtfree(rt); + rtfree(nrt); + return; + } + nrt->rt_rmx = rt->rt_rmx; + rtfree(rt); + rt = nrt; + } + error = rt_timer_add(rt, icmp_mtudisc_timeout, ip_mtudisc_timeout_q); + if (error) { + rtfree(rt); + return; + } + + if (mtu == 0) { + int i = 0; + + mtu = icp->icmp_ip.ip_len; /* NTOHS happened in deliver: */ + /* Some 4.2BSD-based routers incorrectly adjust the ip_len */ + if (mtu > rt->rt_rmx.rmx_mtu && rt->rt_rmx.rmx_mtu != 0) + mtu -= (icp->icmp_ip.ip_hl << 2); + + /* If we still can't guess a value, try the route */ + + if (mtu == 0) { + mtu = rt->rt_rmx.rmx_mtu; + + /* If no route mtu, default to the interface mtu */ + + if (mtu == 0) + mtu = rt->rt_ifp->if_mtu; + } + + for (i = 0; i < sizeof(mtu_table) / sizeof(mtu_table[0]); i++) + if (mtu > mtu_table[i]) { + mtu = mtu_table[i]; + break; + } + } + + /* + * XXX: RTV_MTU is overloaded, since the admin can set it + * to turn off PMTU for a route, and the kernel can + * set it to indicate a serious problem with PMTU + * on a route. We should be using a separate flag + * for the kernel to indicate this. + */ + + if ((rt->rt_rmx.rmx_locks & RTV_MTU) == 0) { + if (mtu < 296 || mtu > rt->rt_ifp->if_mtu) + rt->rt_rmx.rmx_locks |= RTV_MTU; + else if (rt->rt_rmx.rmx_mtu > mtu || + rt->rt_rmx.rmx_mtu == 0) + rt->rt_rmx.rmx_mtu = mtu; + } + + if (rt) + rtfree(rt); +} + +/* + * Return the next larger or smaller MTU plateau (table from RFC 1191) + * given current value MTU. If DIR is less than zero, a larger plateau + * is returned; otherwise, a smaller value is returned. + */ +int +ip_next_mtu(mtu, dir) /* XXX */ + int mtu; + int dir; +{ + static u_short mtutab[] = { + 65535, 32000, 17914, 8166, 4352, 2002, 1492, 1006, 508, 296, + 68, 0 + }; + int i; + + for (i = 0; i < (sizeof mtutab) / (sizeof mtutab[0]); i++) { + if (mtu >= mtutab[i]) + break; + } + + if (dir < 0) { + if (i == 0) { + return 0; + } else { + return mtutab[i - 1]; + } + } else { + if (mtutab[i] == 0) { + return 0; + } else if(mtu > mtutab[i]) { + return mtutab[i]; + } else { + return mtutab[i + 1]; + } + } +} + +void +icmp_mtudisc_timeout(rt, r) + struct rtentry *rt; + struct rttimer *r; +{ + if (rt == NULL) + panic("icmp_mtudisc_timeout: bad route to timeout"); + if ((rt->rt_flags & (RTF_DYNAMIC | RTF_HOST)) == + (RTF_DYNAMIC | RTF_HOST)) { + rtrequest((int) RTM_DELETE, (struct sockaddr *)rt_key(rt), + rt->rt_gateway, rt_mask(rt), rt->rt_flags, 0); + } else { + if ((rt->rt_rmx.rmx_locks & RTV_MTU) == 0) { + rt->rt_rmx.rmx_mtu = 0; + } + } +} diff --git a/sys/netinet/ip_input.c b/sys/netinet/ip_input.c index 19c3609993c..792dc8886a1 100644 --- a/sys/netinet/ip_input.c +++ b/sys/netinet/ip_input.c @@ -1,4 +1,4 @@ -/* $OpenBSD: ip_input.c,v 1.56 2000/05/15 11:07:33 itojun Exp $ */ +/* $OpenBSD: ip_input.c,v 1.57 2000/09/18 22:06:37 provos Exp $ */ /* $NetBSD: ip_input.c,v 1.30 1996/03/16 23:53:58 christos Exp $ */ /* @@ -76,6 +76,12 @@ #ifndef IPSENDREDIRECTS #define IPSENDREDIRECTS 1 #endif +#ifndef IPMTUDISC +#define IPMTUDISC 0 +#endif +#ifndef IPMTUDISCTIMEOUT +#define IPMTUDISCTIMEOUT (10 * 60) /* as per RFC 1191 */ +#endif int encdebug = 0; int ipsec_acl = 1; @@ -107,11 +113,15 @@ int ipforwarding = IPFORWARDING; int ipsendredirects = IPSENDREDIRECTS; int ip_dosourceroute = 0; /* no src-routing unless sysctl'd to enable */ int ip_defttl = IPDEFTTL; +int ip_mtudisc = IPMTUDISC; +u_int ip_mtudisc_timeout = IPMTUDISCTIMEOUT; int ip_directedbcast = IPDIRECTEDBCAST; #ifdef DIAGNOSTIC int ipprintfs = 0; #endif +struct rttimer_queue *ip_mtudisc_timeout_q = NULL; + int ipsec_auth_default_level = IPSEC_AUTH_LEVEL_DEFAULT; int ipsec_esp_trans_default_level = IPSEC_ESP_TRANS_LEVEL_DEFAULT; int ipsec_esp_network_default_level = IPSEC_ESP_NETWORK_LEVEL_DEFAULT; @@ -224,6 +234,9 @@ ip_init() LIST_INIT(&ipq); ipintrq.ifq_maxlen = ipqmaxlen; TAILQ_INIT(&in_ifaddr); + if (ip_mtudisc != 0) + ip_mtudisc_timeout_q = + rt_timer_queue_create(ip_mtudisc_timeout); /* Fill in list of ports not to allocate dynamically. */ bzero((void *)&baddynamicports, sizeof(baddynamicports)); @@ -1469,6 +1482,8 @@ ip_sysctl(name, namelen, oldp, oldlenp, newp, newlen) void *newp; size_t newlen; { + int error; + /* All sysctl names at this level are terminal. */ if (namelen != 1) return (ENOTDIR); @@ -1496,6 +1511,24 @@ ip_sysctl(name, namelen, oldp, oldlenp, newp, newlen) case IPCTL_DIRECTEDBCAST: return (sysctl_int(oldp, oldlenp, newp, newlen, &ip_directedbcast)); + case IPCTL_MTUDISC: + error = sysctl_int(oldp, oldlenp, newp, newlen, + &ip_mtudisc); + if (ip_mtudisc != 0 && ip_mtudisc_timeout_q == NULL) { + ip_mtudisc_timeout_q = + rt_timer_queue_create(ip_mtudisc_timeout); + } else if (ip_mtudisc == 0 && ip_mtudisc_timeout_q != NULL) { + rt_timer_queue_destroy(ip_mtudisc_timeout_q, TRUE); + ip_mtudisc_timeout_q = NULL; + } + return error; + case IPCTL_MTUDISCTIMEOUT: + error = sysctl_int(oldp, oldlenp, newp, newlen, + &ip_mtudisc_timeout); + if (ip_mtudisc_timeout_q != NULL) + rt_timer_queue_change(ip_mtudisc_timeout_q, + ip_mtudisc_timeout); + return (error); case IPCTL_IPPORT_FIRSTAUTO: return (sysctl_int(oldp, oldlenp, newp, newlen, &ipport_firstauto)); diff --git a/sys/netinet/ip_output.c b/sys/netinet/ip_output.c index be360787aa3..93a75eabf13 100644 --- a/sys/netinet/ip_output.c +++ b/sys/netinet/ip_output.c @@ -1,4 +1,4 @@ -/* $OpenBSD: ip_output.c,v 1.79 2000/07/29 22:51:22 angelos Exp $ */ +/* $OpenBSD: ip_output.c,v 1.80 2000/09/18 22:06:37 provos Exp $ */ /* $NetBSD: ip_output.c,v 1.28 1996/02/13 23:43:07 christos Exp $ */ /* @@ -545,6 +545,14 @@ ip_output(m0, va_alist) m->m_flags &= ~M_BCAST; sendit: + /* + * If we're doing Path MTU discovery, we need to set DF unless + * the route's MTU is locked. + */ + if ((flags & IP_MTUDISC) && ro->ro_rt && + (ro->ro_rt->rt_rmx.rmx_locks & RTV_MTU) == 0) + ip->ip_off |= IP_DF; + #ifdef IPSEC /* * Check if the packet needs encapsulation. diff --git a/sys/netinet/ip_var.h b/sys/netinet/ip_var.h index 023b0e2f76e..cb39663c35d 100644 --- a/sys/netinet/ip_var.h +++ b/sys/netinet/ip_var.h @@ -1,4 +1,4 @@ -/* $OpenBSD: ip_var.h,v 1.13 2000/01/02 09:00:19 angelos Exp $ */ +/* $OpenBSD: ip_var.h,v 1.14 2000/09/18 22:06:38 provos Exp $ */ /* $NetBSD: ip_var.h,v 1.16 1996/02/13 23:43:20 christos Exp $ */ /* @@ -154,12 +154,17 @@ struct ipstat { #define IP_RAWOUTPUT 0x2 /* raw ip header exists */ #define IP_ROUTETOIF SO_DONTROUTE /* bypass routing tables */ #define IP_ALLOWBROADCAST SO_BROADCAST /* can send broadcast packets */ +#define IP_MTUDISC 0x0400 /* pmtu discovery, set DF */ #define IP_ENCAPSULATED 0x0800 /* encapsulated already */ struct ipstat ipstat; LIST_HEAD(ipqhead, ipq) ipq; /* ip reass. queue */ int ip_defttl; /* default IP ttl */ +int ip_mtudisc; /* mtu discovery */ +u_int ip_mtudisc_timeout; /* seconds to timeout mtu discovery */ +struct rttimer_queue *ip_mtudisc_timeout_q; + int ip_ctloutput __P((int, struct socket *, int, int, struct mbuf **)); int ip_dooptions __P((struct mbuf *)); void ip_drain __P((void)); diff --git a/sys/netinet/tcp_input.c b/sys/netinet/tcp_input.c index 33a38b7dee4..3762fc8004c 100644 --- a/sys/netinet/tcp_input.c +++ b/sys/netinet/tcp_input.c @@ -1,4 +1,4 @@ -/* $OpenBSD: tcp_input.c,v 1.69 2000/09/05 21:57:41 provos Exp $ */ +/* $OpenBSD: tcp_input.c,v 1.70 2000/09/18 22:06:38 provos Exp $ */ /* $NetBSD: tcp_input.c,v 1.23 1996/02/13 23:43:44 christos Exp $ */ /* @@ -2743,13 +2743,14 @@ tcp_xmit_timer(tp, rtt) int tcp_mss(tp, offer) register struct tcpcb *tp; - u_int offer; + int offer; { struct route *ro; register struct rtentry *rt; struct ifnet *ifp; - register int rtt, mss; + register int rtt, mss, mssopt; u_long bufsize; + int iphlen, is_ipv6 = 0; struct inpcb *inp; struct socket *so; @@ -2796,6 +2797,23 @@ tcp_mss(tp, offer) } ifp = rt->rt_ifp; + mssopt = mss = tcp_mssdflt; + + switch (tp->pf) { +#ifdef INET6 + case AF_INET6: + iphlen = sizeof(struct ip6_hdr); + is_ipv6 = 1; + break; +#endif + case AF_INET: + iphlen = sizeof(struct ip); + break; + default: + /* the family does not support path MTU discovery */ + goto out; + } + #ifdef RTV_MTU /* if route characteristics exist ... */ /* * While we're here, check if there's an initial rtt @@ -2823,58 +2841,52 @@ tcp_mss(tp, offer) ((tp->t_srtt >> 2) + tp->t_rttvar) >> 1, tp->t_rttmin, TCPTV_REXMTMAX); } + /* * if there's an mtu associated with the route and we support * path MTU discovery for the underlying protocol family, use it. */ - /* - * XXX It's wrong to use PMTU values to determine the MSS we - * are going to advertise; we should only use the input interface's - * MTU instead (see draft-ietf-tcpimpl-pmtud-03.txt). tcp_mss() - * should be changed to be aware whether it's called for input or - * output MSS calculation, and act accordingly. - */ if (rt->rt_rmx.rmx_mtu) { /* * One may wish to lower MSS to take into account options, * especially security-related options. */ - mss = rt->rt_rmx.rmx_mtu - sizeof(struct tcphdr); - switch (tp->pf) { -#ifdef INET6 - case AF_INET6: - mss -= sizeof(struct ip6_hdr); - break; -#endif -#ifdef notdef /* no IPv4 path MTU discovery yet */ - case AF_INET: - mss -= sizeof(struct ip); - break; -#endif - default: - /* the family does not support path MTU discovery */ - mss = 0; - break; - } + mss = rt->rt_rmx.rmx_mtu - iphlen - sizeof(struct tcphdr); } else - mss = 0; -#else - mss = 0; #endif /* RTV_MTU */ - if (mss == 0) { + if (!ifp) /* * ifp may be null and rmx_mtu may be zero in certain * v6 cases (e.g., if ND wasn't able to resolve the * destination host. */ - mss = ifp ? ifp->if_mtu - sizeof(struct tcpiphdr) : 0; - switch (tp->pf) { - case AF_INET: - if (!in_localaddr(inp->inp_faddr)) - mss = min(mss, tcp_mssdflt); - break; + goto out; + else if (ip_mtudisc || ifp->if_flags & IFF_LOOPBACK) + mss = ifp->if_mtu - iphlen - sizeof(struct tcphdr); +#ifdef INET6 + else if (is_ipv6) { + if (IN6_IS_ADDR_V4MAPPED(&inp->inp_faddr6)) { + /* mapped addr case */ + struct in_addr d; + bcopy(&inp->inp_faddr6.s6_addr32[3], &d, sizeof(d)); + if (in_localaddr(d)) + mss = ifp->if_mtu - iphlen - sizeof(struct tcphdr); + } else { + if (in6_localaddr(&inp->inp_faddr6)) + mss = ifp->if_mtu - iphlen - sizeof(struct tcphdr); } } +#endif /* INET6 */ + else if (inp && in_localaddr(inp->inp_faddr)) + mss = ifp->if_mtu - iphlen - sizeof(struct tcphdr); + + /* Calculate the value that we offer in TCPOPT_MAXSEG */ + if (offer != -1) { + mssopt = ifp->if_mtu - iphlen - sizeof(struct tcphdr); + mssopt = max(tcp_mssdflt, mssopt); + } + + out: /* * The current mss, t_maxseg, is initialized to the default value. * If we compute a smaller value, reduce the current mss. @@ -2883,7 +2895,7 @@ tcp_mss(tp, offer) * unless we received an offer at least that large from peer. * However, do not accept offers under 32 bytes. */ - if (offer) + if (offer && offer != -1) mss = min(mss, offer); mss = max(mss, 64); /* sanity - at least max opt. space */ /* @@ -2953,7 +2965,8 @@ tcp_mss(tp, offer) tp->snd_ssthresh = max(2 * mss, rt->rt_rmx.rmx_ssthresh); } #endif /* RTV_MTU */ - return (mss); + + return (offer != -1 ? mssopt : mss); } #endif /* TUBA_INCLUDE */ diff --git a/sys/netinet/tcp_output.c b/sys/netinet/tcp_output.c index a746eea1b41..8b6ad202982 100644 --- a/sys/netinet/tcp_output.c +++ b/sys/netinet/tcp_output.c @@ -1,4 +1,4 @@ -/* $OpenBSD: tcp_output.c,v 1.31 2000/09/05 21:57:41 provos Exp $ */ +/* $OpenBSD: tcp_output.c,v 1.32 2000/09/18 22:06:38 provos Exp $ */ /* $NetBSD: tcp_output.c,v 1.16 1997/06/03 16:17:09 kml Exp $ */ /* @@ -1033,7 +1033,9 @@ send: ip->ip_tos = tp->t_inpcb->inp_ip.ip_tos; } error = ip_output(m, tp->t_inpcb->inp_options, - &tp->t_inpcb->inp_route, so->so_options & SO_DONTROUTE, + &tp->t_inpcb->inp_route, + (ip_mtudisc ? IP_MTUDISC : 0) | + (so->so_options & SO_DONTROUTE), 0, tp->t_inpcb); break; #endif /* INET */ diff --git a/sys/netinet/tcp_subr.c b/sys/netinet/tcp_subr.c index 1f2d21aa8d2..6cc41c276a7 100644 --- a/sys/netinet/tcp_subr.c +++ b/sys/netinet/tcp_subr.c @@ -1,4 +1,4 @@ -/* $OpenBSD: tcp_subr.c,v 1.30 2000/07/11 19:18:17 provos Exp $ */ +/* $OpenBSD: tcp_subr.c,v 1.31 2000/09/18 22:06:38 provos Exp $ */ /* $NetBSD: tcp_subr.c,v 1.22 1996/02/13 23:44:00 christos Exp $ */ /* @@ -406,7 +406,8 @@ tcp_respond(tp, template, m, ack, seq, flags) th->th_sum = in_cksum(m, tlen); ((struct ip *)ti)->ip_len = tlen; ((struct ip *)ti)->ip_ttl = ip_defttl; - ip_output(m, NULL, ro, 0, NULL, tp ? tp->t_inpcb : NULL); + ip_output(m, NULL, ro, ip_mtudisc ? IP_MTUDISC : 0, NULL, + tp ? tp->t_inpcb : NULL); } } @@ -729,10 +730,8 @@ tcp6_ctlinput(cmd, sa, d) return; if (cmd == PRC_QUENCH) notify = tcp_quench; -#if 0 else if (cmd == PRC_MSGSIZE) notify = tcp_mtudisc; -#endif else if (!PRC_IS_REDIRECT(cmd) && ((unsigned)cmd > PRC_NCMDS || inet6ctlerrmap[cmd] == 0)) return; @@ -809,6 +808,8 @@ tcp_ctlinput(cmd, sa, v) notify = tcp_quench; else if (PRC_IS_REDIRECT(cmd)) notify = in_rtchange, ip = 0; + else if (cmd == PRC_MSGSIZE && ip_mtudisc) + notify = tcp_mtudisc, ip = 0; else if (cmd == PRC_HOSTDEAD) ip = 0; else if (errno == 0) @@ -839,6 +840,52 @@ tcp_quench(inp, errno) tp->snd_cwnd = tp->t_maxseg; } +/* + * On receipt of path MTU corrections, flush old route and replace it + * with the new one. Retransmit all unacknowledged packets, to ensure + * that all packets will be received. + */ +void +tcp_mtudisc(inp, errno) + struct inpcb *inp; + int errno; +{ + struct tcpcb *tp = intotcpcb(inp); + struct rtentry *rt = in_pcbrtentry(inp); + + if (tp != 0) { + if (rt != 0) { + /* + * If this was not a host route, remove and realloc. + */ + if ((rt->rt_flags & RTF_HOST) == 0) { + in_rtchange(inp, errno); + if ((rt = in_pcbrtentry(inp)) == 0) + return; + } + + /* + * Slow start out of the error condition. We + * use the MTU because we know it's smaller + * than the previously transmitted segment. + * + * Note: This is more conservative than the + * suggestion in RFC 2414 + */ + if (rt->rt_rmx.rmx_mtu != 0) { + tcp_mss(tp, -1); + tp->snd_cwnd = rt->rt_rmx.rmx_mtu; + } + } + + /* + * Resend unacknowledged packets. + */ + tp->snd_nxt = tp->snd_una; + tcp_output(tp); + } +} + #ifdef TCP_SIGNATURE int tcp_signature_tdb_attach() diff --git a/sys/netinet/tcp_timer.c b/sys/netinet/tcp_timer.c index ccc114118bf..4f7b1726439 100644 --- a/sys/netinet/tcp_timer.c +++ b/sys/netinet/tcp_timer.c @@ -1,4 +1,4 @@ -/* $OpenBSD: tcp_timer.c,v 1.16 1999/12/21 17:49:28 provos Exp $ */ +/* $OpenBSD: tcp_timer.c,v 1.17 2000/09/18 22:06:38 provos Exp $ */ /* $NetBSD: tcp_timer.c,v 1.14 1996/02/13 23:44:09 christos Exp $ */ /* @@ -236,6 +236,24 @@ tcp_timers(tp, timer) rto * tcp_backoff[tp->t_rxtshift], tp->t_rttmin, TCPTV_REXMTMAX); tp->t_timer[TCPT_REXMT] = tp->t_rxtcur; +#if 0 + /* + * If we are losing and we are trying path MTU discovery, + * try turning it off. This will avoid black holes in + * the network which suppress or fail to send "packet + * too big" ICMP messages. We should ideally do + * lots more sophisticated searching to find the right + * value here... + */ + if (ip_mtudisc && tp->t_rxtshift > TCP_MAXRXTSHIFT / 6) { + struct rtentry *rt = NULL; + + if (tp->t_inpcb) + rt = in_pcbrtentry(tp->t_inpcb); + + /* XXX: Black hole recovery code goes here */ + } +#endif /* * If losing, let the lower level know and try for * a better route. Also, if we backed off this far, diff --git a/sys/netinet/tcp_var.h b/sys/netinet/tcp_var.h index dc3573294cf..e4c25532abd 100644 --- a/sys/netinet/tcp_var.h +++ b/sys/netinet/tcp_var.h @@ -1,4 +1,4 @@ -/* $OpenBSD: tcp_var.h,v 1.29 2000/07/11 16:53:22 provos Exp $ */ +/* $OpenBSD: tcp_var.h,v 1.30 2000/09/18 22:06:38 provos Exp $ */ /* $NetBSD: tcp_var.h,v 1.17 1996/02/13 23:44:24 christos Exp $ */ /* @@ -342,7 +342,8 @@ void tcp_init __P((void)); int tcp6_input __P((struct mbuf **, int *, int)); #endif void tcp_input __P((struct mbuf *, ...)); -int tcp_mss __P((struct tcpcb *, u_int)); +int tcp_mss __P((struct tcpcb *, int)); +void tcp_mtudisc __P((struct inpcb *, int)); struct tcpcb * tcp_newtcpcb __P((struct inpcb *)); void tcp_notify __P((struct inpcb *, int)); |