diff options
author | Mike Belopuhov <mikeb@cvs.openbsd.org> | 2012-08-07 17:54:21 +0000 |
---|---|---|
committer | Mike Belopuhov <mikeb@cvs.openbsd.org> | 2012-08-07 17:54:21 +0000 |
commit | 030f298c8382a798fd024872370231715cc325a8 (patch) | |
tree | b72ad6db8190800bc079e985b08f5f8373eb3596 /sys/netinet/ip_input.c | |
parent | 74fcc25eb9ddb9d66d0bcb8134e74aefdd995a53 (diff) |
Store the data used to generate an ICMP error message on a stack
instead of allocating a new mbuf. This is a third or fourth
attempt to incorporate a change like this meaning a handful of
people have lost their hair trying to make it work, namely dlg@,
henning@, deraadt@, and thib@. Unfortunately the fixed version
was never put back which is exceptionally unfortunate since the
impact on performance is huge: it nearly doubles the forwarding
performance on selected hardware in simple setups.
So after being beaten in test and production environments on
several architectures it's ready to be put back again. We're
doing it early in the release cycle so that it will receive a
good test exposure.
ok derradt, henning
Diffstat (limited to 'sys/netinet/ip_input.c')
-rw-r--r-- | sys/netinet/ip_input.c | 33 |
1 files changed, 21 insertions, 12 deletions
diff --git a/sys/netinet/ip_input.c b/sys/netinet/ip_input.c index b39dc35c4f0..6f11b0a6d64 100644 --- a/sys/netinet/ip_input.c +++ b/sys/netinet/ip_input.c @@ -1,4 +1,4 @@ -/* $OpenBSD: ip_input.c,v 1.196 2012/07/16 18:05:36 markus Exp $ */ +/* $OpenBSD: ip_input.c,v 1.197 2012/08/07 17:54:20 mikeb Exp $ */ /* $NetBSD: ip_input.c,v 1.30 1996/03/16 23:53:58 christos Exp $ */ /* @@ -1450,12 +1450,12 @@ int inetctlerrmap[PRC_NCMDS] = { void ip_forward(struct mbuf *m, int srcrt) { + struct mbuf mfake, *mcopy = NULL; struct ip *ip = mtod(m, struct ip *); struct sockaddr_in *sin; struct rtentry *rt; - int error, type = 0, code = 0, destmtu = 0; + int error, type = 0, code = 0, destmtu = 0, fake = 0, len; u_int rtableid = 0; - struct mbuf *mcopy; n_long dest; dest = 0; @@ -1500,11 +1500,19 @@ ip_forward(struct mbuf *m, int srcrt) /* * Save at most 68 bytes of the packet in case * we need to generate an ICMP message to the src. - * Pullup to avoid sharing mbuf cluster between m and mcopy. + * The data is saved in the mbuf on the stack that + * acts as a temporary storage not intended to be + * passed down the IP stack or to the mfree. */ - mcopy = m_copym(m, 0, min(ntohs(ip->ip_len), 68), M_DONTWAIT); - if (mcopy) - mcopy = m_pullup(mcopy, min(ntohs(ip->ip_len), 68)); + bzero(&mfake.m_hdr, sizeof(mfake.m_hdr)); + mfake.m_type = m->m_type; + if (m_dup_pkthdr(&mfake, m, M_DONTWAIT) == 0) { + mfake.m_data = mfake.m_pktdat; + len = min(ntohs(ip->ip_len), 68); + m_copydata(m, 0, len, mfake.m_pktdat); + mfake.m_pkthdr.len = mfake.m_len = len; + fake = 1; + } ip->ip_ttl -= IPTTLDEC; @@ -1553,7 +1561,7 @@ ip_forward(struct mbuf *m, int srcrt) else goto freecopy; } - if (mcopy == NULL) + if (!fake) goto freert; switch (error) { @@ -1604,12 +1612,13 @@ ip_forward(struct mbuf *m, int srcrt) goto freecopy; } - icmp_error(mcopy, type, code, dest, destmtu); - goto freert; + mcopy = m_copym(&mfake, 0, len, M_DONTWAIT); + if (mcopy) + icmp_error(mcopy, type, code, dest, destmtu); freecopy: - if (mcopy) - m_freem(mcopy); + if (fake) + m_tag_delete_chain(&mfake); freert: #ifndef SMALL_KERNEL if (ipmultipath && ipforward_rt.ro_rt && |