summaryrefslogtreecommitdiff
path: root/sys
diff options
context:
space:
mode:
authorNiels Provos <provos@cvs.openbsd.org>2000-09-18 22:06:39 +0000
committerNiels Provos <provos@cvs.openbsd.org>2000-09-18 22:06:39 +0000
commit90175ecf691a7934faa9072f3a1ed18f4e91deb3 (patch)
tree68bd50ddf0e438117ac0a4f0bb898d7075a341c3 /sys
parentc396a799db246d91ccfc784bce1f77a21e417f2a (diff)
Path MTU discovery based on NetBSD but with the decision to use the DF
flag delayed to ip_output(). That halves the code and reduces most of the route lookups. okay deraadt@
Diffstat (limited to 'sys')
-rw-r--r--sys/netinet/in.h8
-rw-r--r--sys/netinet/in_pcb.c39
-rw-r--r--sys/netinet/in_pcb.h4
-rw-r--r--sys/netinet/ip_icmp.c151
-rw-r--r--sys/netinet/ip_input.c35
-rw-r--r--sys/netinet/ip_output.c10
-rw-r--r--sys/netinet/ip_var.h7
-rw-r--r--sys/netinet/tcp_input.c91
-rw-r--r--sys/netinet/tcp_output.c6
-rw-r--r--sys/netinet/tcp_subr.c55
-rw-r--r--sys/netinet/tcp_timer.c20
-rw-r--r--sys/netinet/tcp_var.h5
12 files changed, 375 insertions, 56 deletions
diff --git a/sys/netinet/in.h b/sys/netinet/in.h
index 9575c729309..ab6e9c2977e 100644
--- a/sys/netinet/in.h
+++ b/sys/netinet/in.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: in.h,v 1.40 2000/06/18 07:06:13 itojun Exp $ */
+/* $OpenBSD: in.h,v 1.41 2000/09/18 22:06:36 provos Exp $ */
/* $NetBSD: in.h,v 1.20 1996/02/13 23:41:47 christos Exp $ */
/*
@@ -447,7 +447,9 @@ struct ip_mreq {
#define IPCTL_IPSEC_FIRSTUSE 24
#define IPCTL_IPSEC_ENC_ALGORITHM 25
#define IPCTL_IPSEC_AUTH_ALGORITHM 26
-#define IPCTL_MAXID 27
+#define IPCTL_MTUDISC 27 /* allow path MTU discovery */
+#define IPCTL_MTUDISCTIMEOUT 28 /* allow path MTU discovery */
+#define IPCTL_MAXID 29
#define IPCTL_NAMES { \
{ 0, 0 }, \
@@ -477,6 +479,8 @@ struct ip_mreq {
{ "ipsec-firstuse", CTLTYPE_INT }, \
{ "ipsec-enc-alg", CTLTYPE_STRING }, \
{ "ipsec-auth-alg", CTLTYPE_STRING }, \
+ { "mtudisc", CTLTYPE_INT }, \
+ { "mtudisctimeout", CTLTYPE_INT }, \
}
/* INET6 stuff */
diff --git a/sys/netinet/in_pcb.c b/sys/netinet/in_pcb.c
index faec511620b..f0fc288ca89 100644
--- a/sys/netinet/in_pcb.c
+++ b/sys/netinet/in_pcb.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: in_pcb.c,v 1.42 2000/04/27 09:23:21 itojun Exp $ */
+/* $OpenBSD: in_pcb.c,v 1.43 2000/09/18 22:06:37 provos Exp $ */
/* $NetBSD: in_pcb.c,v 1.25 1996/02/13 23:41:53 christos Exp $ */
/*
@@ -859,6 +859,43 @@ in_pcblookup(table, faddrp, fport_arg, laddrp, lport_arg, flags)
return (match);
}
+struct rtentry *
+in_pcbrtentry(inp)
+ struct inpcb *inp;
+{
+ struct route *ro;
+
+ ro = &inp->inp_route;
+
+ /*
+ * No route yet, so try to acquire one.
+ */
+ if (ro->ro_rt == NULL) {
+ switch(sotopf(inp->inp_socket)) {
+#ifdef INET6
+ case PF_INET6:
+ if (IN6_IS_ADDR_UNSPECIFIED(&inp->inp_faddr6))
+ break;
+ ro->ro_dst.sa_family = AF_INET6;
+ ro->ro_dst.sa_len = sizeof(struct sockaddr_in6);
+ ((struct sockaddr_in6 *) &ro->ro_dst)->sin6_addr =
+ inp->inp_faddr6;
+ rtalloc(ro);
+ break;
+#endif /* INET6 */
+ case PF_INET:
+ if (inp->inp_faddr.s_addr != INADDR_ANY)
+ break;
+ ro->ro_dst.sa_family = AF_INET;
+ ro->ro_dst.sa_len = sizeof(ro->ro_dst);
+ satosin(&ro->ro_dst)->sin_addr = inp->inp_faddr;
+ rtalloc(ro);
+ break;
+ }
+ }
+ return (ro->ro_rt);
+}
+
struct sockaddr_in *
in_selectsrc(sin, ro, soopts, mopts, errorp)
struct sockaddr_in *sin;
diff --git a/sys/netinet/in_pcb.h b/sys/netinet/in_pcb.h
index 6ac62c01529..7b8dec2ab7c 100644
--- a/sys/netinet/in_pcb.h
+++ b/sys/netinet/in_pcb.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: in_pcb.h,v 1.25 2000/06/18 17:32:48 itojun Exp $ */
+/* $OpenBSD: in_pcb.h,v 1.26 2000/09/18 22:06:37 provos Exp $ */
/* $NetBSD: in_pcb.h,v 1.14 1996/02/13 23:42:00 christos Exp $ */
/*
@@ -260,6 +260,8 @@ void in_setsockaddr __P((struct inpcb *, struct mbuf *));
int in_baddynamic __P((u_int16_t, u_int16_t));
extern struct sockaddr_in *in_selectsrc __P((struct sockaddr_in *,
struct route *, int, struct ip_moptions *, int *));
+struct rtentry *
+ in_pcbrtentry __P((struct inpcb *));
/* INET6 stuff */
int in6_pcbnotify __P((struct inpcbtable *, struct sockaddr *,
diff --git a/sys/netinet/ip_icmp.c b/sys/netinet/ip_icmp.c
index c8f1e5e29e1..1548bf33095 100644
--- a/sys/netinet/ip_icmp.c
+++ b/sys/netinet/ip_icmp.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: ip_icmp.c,v 1.21 2000/05/15 11:07:32 itojun Exp $ */
+/* $OpenBSD: ip_icmp.c,v 1.22 2000/09/18 22:06:37 provos Exp $ */
/* $NetBSD: ip_icmp.c,v 1.19 1996/02/13 23:42:22 christos Exp $ */
/*
@@ -92,6 +92,9 @@ static int ip_next_mtu __P((int, int));
/*static*/ int ip_next_mtu __P((int, int));
#endif
+void icmp_mtudisc __P((struct icmp *));
+void icmp_mtudisc_timeout __P((struct rtentry *, struct rttimer *));
+
extern struct protosw inetsw[];
/*
@@ -391,6 +394,8 @@ icmp_input(m, va_alist)
printf("deliver to protocol %d\n", icp->icmp_ip.ip_p);
#endif
icmpsrc.sin_addr = icp->icmp_ip.ip_dst;
+ if (code == PRC_MSGSIZE && ip_mtudisc)
+ icmp_mtudisc(icp);
/*
* XXX if the packet contains [IPv4 AH TCP], we can't make a
* notification to TCP layer.
@@ -712,3 +717,147 @@ icmp_sysctl(name, namelen, oldp, oldlenp, newp, newlen)
}
/* NOTREACHED */
}
+
+void
+icmp_mtudisc(icp)
+ struct icmp *icp;
+{
+ struct rtentry *rt;
+ struct sockaddr *dst = sintosa(&icmpsrc);
+ u_long mtu = ntohs(icp->icmp_nextmtu); /* Why a long? IPv6 */
+ int error;
+
+ /* Table of common MTUs: */
+
+ static u_short mtu_table[] = {65535, 65280, 32000, 17914, 9180, 8166,
+ 4352, 2002, 1492, 1006, 508, 296, 68, 0};
+
+ rt = rtalloc1(dst, 1);
+ if (rt == 0)
+ return;
+
+ /* If we didn't get a host route, allocate one */
+
+ if ((rt->rt_flags & RTF_HOST) == 0) {
+ struct rtentry *nrt;
+
+ error = rtrequest((int) RTM_ADD, dst,
+ (struct sockaddr *) rt->rt_gateway,
+ (struct sockaddr *) 0,
+ RTF_GATEWAY | RTF_HOST | RTF_DYNAMIC, &nrt);
+ if (error) {
+ rtfree(rt);
+ rtfree(nrt);
+ return;
+ }
+ nrt->rt_rmx = rt->rt_rmx;
+ rtfree(rt);
+ rt = nrt;
+ }
+ error = rt_timer_add(rt, icmp_mtudisc_timeout, ip_mtudisc_timeout_q);
+ if (error) {
+ rtfree(rt);
+ return;
+ }
+
+ if (mtu == 0) {
+ int i = 0;
+
+ mtu = icp->icmp_ip.ip_len; /* NTOHS happened in deliver: */
+ /* Some 4.2BSD-based routers incorrectly adjust the ip_len */
+ if (mtu > rt->rt_rmx.rmx_mtu && rt->rt_rmx.rmx_mtu != 0)
+ mtu -= (icp->icmp_ip.ip_hl << 2);
+
+ /* If we still can't guess a value, try the route */
+
+ if (mtu == 0) {
+ mtu = rt->rt_rmx.rmx_mtu;
+
+ /* If no route mtu, default to the interface mtu */
+
+ if (mtu == 0)
+ mtu = rt->rt_ifp->if_mtu;
+ }
+
+ for (i = 0; i < sizeof(mtu_table) / sizeof(mtu_table[0]); i++)
+ if (mtu > mtu_table[i]) {
+ mtu = mtu_table[i];
+ break;
+ }
+ }
+
+ /*
+ * XXX: RTV_MTU is overloaded, since the admin can set it
+ * to turn off PMTU for a route, and the kernel can
+ * set it to indicate a serious problem with PMTU
+ * on a route. We should be using a separate flag
+ * for the kernel to indicate this.
+ */
+
+ if ((rt->rt_rmx.rmx_locks & RTV_MTU) == 0) {
+ if (mtu < 296 || mtu > rt->rt_ifp->if_mtu)
+ rt->rt_rmx.rmx_locks |= RTV_MTU;
+ else if (rt->rt_rmx.rmx_mtu > mtu ||
+ rt->rt_rmx.rmx_mtu == 0)
+ rt->rt_rmx.rmx_mtu = mtu;
+ }
+
+ if (rt)
+ rtfree(rt);
+}
+
+/*
+ * Return the next larger or smaller MTU plateau (table from RFC 1191)
+ * given current value MTU. If DIR is less than zero, a larger plateau
+ * is returned; otherwise, a smaller value is returned.
+ */
+int
+ip_next_mtu(mtu, dir) /* XXX */
+ int mtu;
+ int dir;
+{
+ static u_short mtutab[] = {
+ 65535, 32000, 17914, 8166, 4352, 2002, 1492, 1006, 508, 296,
+ 68, 0
+ };
+ int i;
+
+ for (i = 0; i < (sizeof mtutab) / (sizeof mtutab[0]); i++) {
+ if (mtu >= mtutab[i])
+ break;
+ }
+
+ if (dir < 0) {
+ if (i == 0) {
+ return 0;
+ } else {
+ return mtutab[i - 1];
+ }
+ } else {
+ if (mtutab[i] == 0) {
+ return 0;
+ } else if(mtu > mtutab[i]) {
+ return mtutab[i];
+ } else {
+ return mtutab[i + 1];
+ }
+ }
+}
+
+void
+icmp_mtudisc_timeout(rt, r)
+ struct rtentry *rt;
+ struct rttimer *r;
+{
+ if (rt == NULL)
+ panic("icmp_mtudisc_timeout: bad route to timeout");
+ if ((rt->rt_flags & (RTF_DYNAMIC | RTF_HOST)) ==
+ (RTF_DYNAMIC | RTF_HOST)) {
+ rtrequest((int) RTM_DELETE, (struct sockaddr *)rt_key(rt),
+ rt->rt_gateway, rt_mask(rt), rt->rt_flags, 0);
+ } else {
+ if ((rt->rt_rmx.rmx_locks & RTV_MTU) == 0) {
+ rt->rt_rmx.rmx_mtu = 0;
+ }
+ }
+}
diff --git a/sys/netinet/ip_input.c b/sys/netinet/ip_input.c
index 19c3609993c..792dc8886a1 100644
--- a/sys/netinet/ip_input.c
+++ b/sys/netinet/ip_input.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: ip_input.c,v 1.56 2000/05/15 11:07:33 itojun Exp $ */
+/* $OpenBSD: ip_input.c,v 1.57 2000/09/18 22:06:37 provos Exp $ */
/* $NetBSD: ip_input.c,v 1.30 1996/03/16 23:53:58 christos Exp $ */
/*
@@ -76,6 +76,12 @@
#ifndef IPSENDREDIRECTS
#define IPSENDREDIRECTS 1
#endif
+#ifndef IPMTUDISC
+#define IPMTUDISC 0
+#endif
+#ifndef IPMTUDISCTIMEOUT
+#define IPMTUDISCTIMEOUT (10 * 60) /* as per RFC 1191 */
+#endif
int encdebug = 0;
int ipsec_acl = 1;
@@ -107,11 +113,15 @@ int ipforwarding = IPFORWARDING;
int ipsendredirects = IPSENDREDIRECTS;
int ip_dosourceroute = 0; /* no src-routing unless sysctl'd to enable */
int ip_defttl = IPDEFTTL;
+int ip_mtudisc = IPMTUDISC;
+u_int ip_mtudisc_timeout = IPMTUDISCTIMEOUT;
int ip_directedbcast = IPDIRECTEDBCAST;
#ifdef DIAGNOSTIC
int ipprintfs = 0;
#endif
+struct rttimer_queue *ip_mtudisc_timeout_q = NULL;
+
int ipsec_auth_default_level = IPSEC_AUTH_LEVEL_DEFAULT;
int ipsec_esp_trans_default_level = IPSEC_ESP_TRANS_LEVEL_DEFAULT;
int ipsec_esp_network_default_level = IPSEC_ESP_NETWORK_LEVEL_DEFAULT;
@@ -224,6 +234,9 @@ ip_init()
LIST_INIT(&ipq);
ipintrq.ifq_maxlen = ipqmaxlen;
TAILQ_INIT(&in_ifaddr);
+ if (ip_mtudisc != 0)
+ ip_mtudisc_timeout_q =
+ rt_timer_queue_create(ip_mtudisc_timeout);
/* Fill in list of ports not to allocate dynamically. */
bzero((void *)&baddynamicports, sizeof(baddynamicports));
@@ -1469,6 +1482,8 @@ ip_sysctl(name, namelen, oldp, oldlenp, newp, newlen)
void *newp;
size_t newlen;
{
+ int error;
+
/* All sysctl names at this level are terminal. */
if (namelen != 1)
return (ENOTDIR);
@@ -1496,6 +1511,24 @@ ip_sysctl(name, namelen, oldp, oldlenp, newp, newlen)
case IPCTL_DIRECTEDBCAST:
return (sysctl_int(oldp, oldlenp, newp, newlen,
&ip_directedbcast));
+ case IPCTL_MTUDISC:
+ error = sysctl_int(oldp, oldlenp, newp, newlen,
+ &ip_mtudisc);
+ if (ip_mtudisc != 0 && ip_mtudisc_timeout_q == NULL) {
+ ip_mtudisc_timeout_q =
+ rt_timer_queue_create(ip_mtudisc_timeout);
+ } else if (ip_mtudisc == 0 && ip_mtudisc_timeout_q != NULL) {
+ rt_timer_queue_destroy(ip_mtudisc_timeout_q, TRUE);
+ ip_mtudisc_timeout_q = NULL;
+ }
+ return error;
+ case IPCTL_MTUDISCTIMEOUT:
+ error = sysctl_int(oldp, oldlenp, newp, newlen,
+ &ip_mtudisc_timeout);
+ if (ip_mtudisc_timeout_q != NULL)
+ rt_timer_queue_change(ip_mtudisc_timeout_q,
+ ip_mtudisc_timeout);
+ return (error);
case IPCTL_IPPORT_FIRSTAUTO:
return (sysctl_int(oldp, oldlenp, newp, newlen,
&ipport_firstauto));
diff --git a/sys/netinet/ip_output.c b/sys/netinet/ip_output.c
index be360787aa3..93a75eabf13 100644
--- a/sys/netinet/ip_output.c
+++ b/sys/netinet/ip_output.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: ip_output.c,v 1.79 2000/07/29 22:51:22 angelos Exp $ */
+/* $OpenBSD: ip_output.c,v 1.80 2000/09/18 22:06:37 provos Exp $ */
/* $NetBSD: ip_output.c,v 1.28 1996/02/13 23:43:07 christos Exp $ */
/*
@@ -545,6 +545,14 @@ ip_output(m0, va_alist)
m->m_flags &= ~M_BCAST;
sendit:
+ /*
+ * If we're doing Path MTU discovery, we need to set DF unless
+ * the route's MTU is locked.
+ */
+ if ((flags & IP_MTUDISC) && ro->ro_rt &&
+ (ro->ro_rt->rt_rmx.rmx_locks & RTV_MTU) == 0)
+ ip->ip_off |= IP_DF;
+
#ifdef IPSEC
/*
* Check if the packet needs encapsulation.
diff --git a/sys/netinet/ip_var.h b/sys/netinet/ip_var.h
index 023b0e2f76e..cb39663c35d 100644
--- a/sys/netinet/ip_var.h
+++ b/sys/netinet/ip_var.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: ip_var.h,v 1.13 2000/01/02 09:00:19 angelos Exp $ */
+/* $OpenBSD: ip_var.h,v 1.14 2000/09/18 22:06:38 provos Exp $ */
/* $NetBSD: ip_var.h,v 1.16 1996/02/13 23:43:20 christos Exp $ */
/*
@@ -154,12 +154,17 @@ struct ipstat {
#define IP_RAWOUTPUT 0x2 /* raw ip header exists */
#define IP_ROUTETOIF SO_DONTROUTE /* bypass routing tables */
#define IP_ALLOWBROADCAST SO_BROADCAST /* can send broadcast packets */
+#define IP_MTUDISC 0x0400 /* pmtu discovery, set DF */
#define IP_ENCAPSULATED 0x0800 /* encapsulated already */
struct ipstat ipstat;
LIST_HEAD(ipqhead, ipq) ipq; /* ip reass. queue */
int ip_defttl; /* default IP ttl */
+int ip_mtudisc; /* mtu discovery */
+u_int ip_mtudisc_timeout; /* seconds to timeout mtu discovery */
+struct rttimer_queue *ip_mtudisc_timeout_q;
+
int ip_ctloutput __P((int, struct socket *, int, int, struct mbuf **));
int ip_dooptions __P((struct mbuf *));
void ip_drain __P((void));
diff --git a/sys/netinet/tcp_input.c b/sys/netinet/tcp_input.c
index 33a38b7dee4..3762fc8004c 100644
--- a/sys/netinet/tcp_input.c
+++ b/sys/netinet/tcp_input.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: tcp_input.c,v 1.69 2000/09/05 21:57:41 provos Exp $ */
+/* $OpenBSD: tcp_input.c,v 1.70 2000/09/18 22:06:38 provos Exp $ */
/* $NetBSD: tcp_input.c,v 1.23 1996/02/13 23:43:44 christos Exp $ */
/*
@@ -2743,13 +2743,14 @@ tcp_xmit_timer(tp, rtt)
int
tcp_mss(tp, offer)
register struct tcpcb *tp;
- u_int offer;
+ int offer;
{
struct route *ro;
register struct rtentry *rt;
struct ifnet *ifp;
- register int rtt, mss;
+ register int rtt, mss, mssopt;
u_long bufsize;
+ int iphlen, is_ipv6 = 0;
struct inpcb *inp;
struct socket *so;
@@ -2796,6 +2797,23 @@ tcp_mss(tp, offer)
}
ifp = rt->rt_ifp;
+ mssopt = mss = tcp_mssdflt;
+
+ switch (tp->pf) {
+#ifdef INET6
+ case AF_INET6:
+ iphlen = sizeof(struct ip6_hdr);
+ is_ipv6 = 1;
+ break;
+#endif
+ case AF_INET:
+ iphlen = sizeof(struct ip);
+ break;
+ default:
+ /* the family does not support path MTU discovery */
+ goto out;
+ }
+
#ifdef RTV_MTU /* if route characteristics exist ... */
/*
* While we're here, check if there's an initial rtt
@@ -2823,58 +2841,52 @@ tcp_mss(tp, offer)
((tp->t_srtt >> 2) + tp->t_rttvar) >> 1,
tp->t_rttmin, TCPTV_REXMTMAX);
}
+
/*
* if there's an mtu associated with the route and we support
* path MTU discovery for the underlying protocol family, use it.
*/
- /*
- * XXX It's wrong to use PMTU values to determine the MSS we
- * are going to advertise; we should only use the input interface's
- * MTU instead (see draft-ietf-tcpimpl-pmtud-03.txt). tcp_mss()
- * should be changed to be aware whether it's called for input or
- * output MSS calculation, and act accordingly.
- */
if (rt->rt_rmx.rmx_mtu) {
/*
* One may wish to lower MSS to take into account options,
* especially security-related options.
*/
- mss = rt->rt_rmx.rmx_mtu - sizeof(struct tcphdr);
- switch (tp->pf) {
-#ifdef INET6
- case AF_INET6:
- mss -= sizeof(struct ip6_hdr);
- break;
-#endif
-#ifdef notdef /* no IPv4 path MTU discovery yet */
- case AF_INET:
- mss -= sizeof(struct ip);
- break;
-#endif
- default:
- /* the family does not support path MTU discovery */
- mss = 0;
- break;
- }
+ mss = rt->rt_rmx.rmx_mtu - iphlen - sizeof(struct tcphdr);
} else
- mss = 0;
-#else
- mss = 0;
#endif /* RTV_MTU */
- if (mss == 0) {
+ if (!ifp)
/*
* ifp may be null and rmx_mtu may be zero in certain
* v6 cases (e.g., if ND wasn't able to resolve the
* destination host.
*/
- mss = ifp ? ifp->if_mtu - sizeof(struct tcpiphdr) : 0;
- switch (tp->pf) {
- case AF_INET:
- if (!in_localaddr(inp->inp_faddr))
- mss = min(mss, tcp_mssdflt);
- break;
+ goto out;
+ else if (ip_mtudisc || ifp->if_flags & IFF_LOOPBACK)
+ mss = ifp->if_mtu - iphlen - sizeof(struct tcphdr);
+#ifdef INET6
+ else if (is_ipv6) {
+ if (IN6_IS_ADDR_V4MAPPED(&inp->inp_faddr6)) {
+ /* mapped addr case */
+ struct in_addr d;
+ bcopy(&inp->inp_faddr6.s6_addr32[3], &d, sizeof(d));
+ if (in_localaddr(d))
+ mss = ifp->if_mtu - iphlen - sizeof(struct tcphdr);
+ } else {
+ if (in6_localaddr(&inp->inp_faddr6))
+ mss = ifp->if_mtu - iphlen - sizeof(struct tcphdr);
}
}
+#endif /* INET6 */
+ else if (inp && in_localaddr(inp->inp_faddr))
+ mss = ifp->if_mtu - iphlen - sizeof(struct tcphdr);
+
+ /* Calculate the value that we offer in TCPOPT_MAXSEG */
+ if (offer != -1) {
+ mssopt = ifp->if_mtu - iphlen - sizeof(struct tcphdr);
+ mssopt = max(tcp_mssdflt, mssopt);
+ }
+
+ out:
/*
* The current mss, t_maxseg, is initialized to the default value.
* If we compute a smaller value, reduce the current mss.
@@ -2883,7 +2895,7 @@ tcp_mss(tp, offer)
* unless we received an offer at least that large from peer.
* However, do not accept offers under 32 bytes.
*/
- if (offer)
+ if (offer && offer != -1)
mss = min(mss, offer);
mss = max(mss, 64); /* sanity - at least max opt. space */
/*
@@ -2953,7 +2965,8 @@ tcp_mss(tp, offer)
tp->snd_ssthresh = max(2 * mss, rt->rt_rmx.rmx_ssthresh);
}
#endif /* RTV_MTU */
- return (mss);
+
+ return (offer != -1 ? mssopt : mss);
}
#endif /* TUBA_INCLUDE */
diff --git a/sys/netinet/tcp_output.c b/sys/netinet/tcp_output.c
index a746eea1b41..8b6ad202982 100644
--- a/sys/netinet/tcp_output.c
+++ b/sys/netinet/tcp_output.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: tcp_output.c,v 1.31 2000/09/05 21:57:41 provos Exp $ */
+/* $OpenBSD: tcp_output.c,v 1.32 2000/09/18 22:06:38 provos Exp $ */
/* $NetBSD: tcp_output.c,v 1.16 1997/06/03 16:17:09 kml Exp $ */
/*
@@ -1033,7 +1033,9 @@ send:
ip->ip_tos = tp->t_inpcb->inp_ip.ip_tos;
}
error = ip_output(m, tp->t_inpcb->inp_options,
- &tp->t_inpcb->inp_route, so->so_options & SO_DONTROUTE,
+ &tp->t_inpcb->inp_route,
+ (ip_mtudisc ? IP_MTUDISC : 0) |
+ (so->so_options & SO_DONTROUTE),
0, tp->t_inpcb);
break;
#endif /* INET */
diff --git a/sys/netinet/tcp_subr.c b/sys/netinet/tcp_subr.c
index 1f2d21aa8d2..6cc41c276a7 100644
--- a/sys/netinet/tcp_subr.c
+++ b/sys/netinet/tcp_subr.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: tcp_subr.c,v 1.30 2000/07/11 19:18:17 provos Exp $ */
+/* $OpenBSD: tcp_subr.c,v 1.31 2000/09/18 22:06:38 provos Exp $ */
/* $NetBSD: tcp_subr.c,v 1.22 1996/02/13 23:44:00 christos Exp $ */
/*
@@ -406,7 +406,8 @@ tcp_respond(tp, template, m, ack, seq, flags)
th->th_sum = in_cksum(m, tlen);
((struct ip *)ti)->ip_len = tlen;
((struct ip *)ti)->ip_ttl = ip_defttl;
- ip_output(m, NULL, ro, 0, NULL, tp ? tp->t_inpcb : NULL);
+ ip_output(m, NULL, ro, ip_mtudisc ? IP_MTUDISC : 0, NULL,
+ tp ? tp->t_inpcb : NULL);
}
}
@@ -729,10 +730,8 @@ tcp6_ctlinput(cmd, sa, d)
return;
if (cmd == PRC_QUENCH)
notify = tcp_quench;
-#if 0
else if (cmd == PRC_MSGSIZE)
notify = tcp_mtudisc;
-#endif
else if (!PRC_IS_REDIRECT(cmd) &&
((unsigned)cmd > PRC_NCMDS || inet6ctlerrmap[cmd] == 0))
return;
@@ -809,6 +808,8 @@ tcp_ctlinput(cmd, sa, v)
notify = tcp_quench;
else if (PRC_IS_REDIRECT(cmd))
notify = in_rtchange, ip = 0;
+ else if (cmd == PRC_MSGSIZE && ip_mtudisc)
+ notify = tcp_mtudisc, ip = 0;
else if (cmd == PRC_HOSTDEAD)
ip = 0;
else if (errno == 0)
@@ -839,6 +840,52 @@ tcp_quench(inp, errno)
tp->snd_cwnd = tp->t_maxseg;
}
+/*
+ * On receipt of path MTU corrections, flush old route and replace it
+ * with the new one. Retransmit all unacknowledged packets, to ensure
+ * that all packets will be received.
+ */
+void
+tcp_mtudisc(inp, errno)
+ struct inpcb *inp;
+ int errno;
+{
+ struct tcpcb *tp = intotcpcb(inp);
+ struct rtentry *rt = in_pcbrtentry(inp);
+
+ if (tp != 0) {
+ if (rt != 0) {
+ /*
+ * If this was not a host route, remove and realloc.
+ */
+ if ((rt->rt_flags & RTF_HOST) == 0) {
+ in_rtchange(inp, errno);
+ if ((rt = in_pcbrtentry(inp)) == 0)
+ return;
+ }
+
+ /*
+ * Slow start out of the error condition. We
+ * use the MTU because we know it's smaller
+ * than the previously transmitted segment.
+ *
+ * Note: This is more conservative than the
+ * suggestion in RFC 2414
+ */
+ if (rt->rt_rmx.rmx_mtu != 0) {
+ tcp_mss(tp, -1);
+ tp->snd_cwnd = rt->rt_rmx.rmx_mtu;
+ }
+ }
+
+ /*
+ * Resend unacknowledged packets.
+ */
+ tp->snd_nxt = tp->snd_una;
+ tcp_output(tp);
+ }
+}
+
#ifdef TCP_SIGNATURE
int
tcp_signature_tdb_attach()
diff --git a/sys/netinet/tcp_timer.c b/sys/netinet/tcp_timer.c
index ccc114118bf..4f7b1726439 100644
--- a/sys/netinet/tcp_timer.c
+++ b/sys/netinet/tcp_timer.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: tcp_timer.c,v 1.16 1999/12/21 17:49:28 provos Exp $ */
+/* $OpenBSD: tcp_timer.c,v 1.17 2000/09/18 22:06:38 provos Exp $ */
/* $NetBSD: tcp_timer.c,v 1.14 1996/02/13 23:44:09 christos Exp $ */
/*
@@ -236,6 +236,24 @@ tcp_timers(tp, timer)
rto * tcp_backoff[tp->t_rxtshift],
tp->t_rttmin, TCPTV_REXMTMAX);
tp->t_timer[TCPT_REXMT] = tp->t_rxtcur;
+#if 0
+ /*
+ * If we are losing and we are trying path MTU discovery,
+ * try turning it off. This will avoid black holes in
+ * the network which suppress or fail to send "packet
+ * too big" ICMP messages. We should ideally do
+ * lots more sophisticated searching to find the right
+ * value here...
+ */
+ if (ip_mtudisc && tp->t_rxtshift > TCP_MAXRXTSHIFT / 6) {
+ struct rtentry *rt = NULL;
+
+ if (tp->t_inpcb)
+ rt = in_pcbrtentry(tp->t_inpcb);
+
+ /* XXX: Black hole recovery code goes here */
+ }
+#endif
/*
* If losing, let the lower level know and try for
* a better route. Also, if we backed off this far,
diff --git a/sys/netinet/tcp_var.h b/sys/netinet/tcp_var.h
index dc3573294cf..e4c25532abd 100644
--- a/sys/netinet/tcp_var.h
+++ b/sys/netinet/tcp_var.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: tcp_var.h,v 1.29 2000/07/11 16:53:22 provos Exp $ */
+/* $OpenBSD: tcp_var.h,v 1.30 2000/09/18 22:06:38 provos Exp $ */
/* $NetBSD: tcp_var.h,v 1.17 1996/02/13 23:44:24 christos Exp $ */
/*
@@ -342,7 +342,8 @@ void tcp_init __P((void));
int tcp6_input __P((struct mbuf **, int *, int));
#endif
void tcp_input __P((struct mbuf *, ...));
-int tcp_mss __P((struct tcpcb *, u_int));
+int tcp_mss __P((struct tcpcb *, int));
+void tcp_mtudisc __P((struct inpcb *, int));
struct tcpcb *
tcp_newtcpcb __P((struct inpcb *));
void tcp_notify __P((struct inpcb *, int));