summaryrefslogtreecommitdiff
path: root/sys/net/if_gre.c
diff options
context:
space:
mode:
authorDavid Gwynne <dlg@cvs.openbsd.org>2018-02-07 22:31:00 +0000
committerDavid Gwynne <dlg@cvs.openbsd.org>2018-02-07 22:31:00 +0000
commitd156bb23773c4488a3c312605523cc7a692af0da (patch)
tree2bf73a89653721ee43b55027301c98a9984c29ab /sys/net/if_gre.c
parent0c80ef07be4e7a1851f58908f8948640a05519f5 (diff)
update the gre driver.
the main new feature is gre keys, supported by the vnetid ioctls. this also adds support for gre over ipv6, the use of hfsc, and allows tx mitigation in the future. this diff removes keepalive support, but i promised claudio@ and patrick@ i would put it back after this goes in. ok claudio@
Diffstat (limited to 'sys/net/if_gre.c')
-rw-r--r--sys/net/if_gre.c1130
1 files changed, 808 insertions, 322 deletions
diff --git a/sys/net/if_gre.c b/sys/net/if_gre.c
index 07cf73cc59a..ff4eca1e575 100644
--- a/sys/net/if_gre.c
+++ b/sys/net/if_gre.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: if_gre.c,v 1.90 2018/02/07 01:52:15 dlg Exp $ */
+/* $OpenBSD: if_gre.c,v 1.91 2018/02/07 22:30:59 dlg Exp $ */
/* $NetBSD: if_gre.c,v 1.9 1999/10/25 19:18:11 drochner Exp $ */
/*
@@ -38,9 +38,6 @@
* Also supported: IP in IP encapsulation (proto 55) per RFC 2004.
*/
-#include "gre.h"
-#if NGRE > 0
-
#include "bpfilter.h"
#include "pf.h"
@@ -50,10 +47,12 @@
#include <sys/sockio.h>
#include <sys/kernel.h>
#include <sys/systm.h>
-#include <sys/timeout.h>
+#include <sys/errno.h>
+#include <sys/tree.h>
#include <net/if.h>
#include <net/if_types.h>
+#include <net/if_media.h>
#include <net/route.h>
#include <netinet/in.h>
@@ -61,6 +60,19 @@
#include <netinet/ip_var.h>
#include <netinet/if_ether.h>
+#ifdef INET6
+#include <netinet/ip6.h>
+#include <netinet6/ip6_var.h>
+#endif
+
+#ifdef PIPEX
+#include <net/pipex.h>
+#endif
+
+#ifdef MPLS
+#include <netmpls/mpls.h>
+#endif /* MPLS */
+
#if NBPFILTER > 0
#include <net/bpf.h>
#endif
@@ -71,25 +83,115 @@
#include <net/if_gre.h>
-#ifndef GRE_RECURSION_LIMIT
-#define GRE_RECURSION_LIMIT 3 /* How many levels of recursion allowed */
-#endif /* GRE_RECURSION_LIMIT */
+#include <netinet/ip_gre.h>
+#include <sys/sysctl.h>
/*
- * It is not easy to calculate the right value for a GRE MTU.
- * We leave this task to the admin and use the same default that
- * other vendors use.
+ * packet formats
+ */
+struct gre_header {
+ uint16_t gre_flags;
+#define GRE_CP 0x8000 /* Checksum Present */
+#define GRE_KP 0x2000 /* Key Present */
+#define GRE_SP 0x1000 /* Sequence Present */
+
+#define GRE_VERS_MASK 0x0007
+#define GRE_VERS_0 0x0000
+#define GRE_VERS_1 0x0001
+
+ uint16_t gre_proto;
+} __packed __aligned(4);
+
+struct gre_h_cksum {
+ uint16_t gre_cksum;
+ uint16_t gre_reserved1;
+} __packed __aligned(4);
+
+struct gre_h_key {
+ uint32_t gre_key;
+} __packed __aligned(4);
+
+struct gre_h_seq {
+ uint32_t gre_seq;
+} __packed __aligned(4);
+
+
+/*
+ * GRE tunnel metadata
*/
-#define GREMTU 1476
-int gre_clone_create(struct if_clone *, int);
-int gre_clone_destroy(struct ifnet *);
+struct gre_tunnel {
+ RBT_ENTRY(gre_entry) t_entry;
-struct gre_softc_head gre_softc_list;
+ uint32_t t_key_mask;
+#define GRE_KEY_NONE htonl(0x00000000U)
+#define GRE_KEY_ENTROPY htonl(0xffffff00U)
+#define GRE_KEY_MASK htonl(0xffffffffU)
+ uint32_t t_key;
+
+ u_int t_rtableid;
+ int t_af;
+ uint32_t t_src[4];
+ uint32_t t_dst[4];
+
+ uint8_t t_ttl;
+};
+
+RBT_HEAD(gre_tree, gre_tunnel);
+
+static inline int
+ gre_cmp(const struct gre_tunnel *, const struct gre_tunnel *);
+
+RBT_PROTOTYPE(gre_tree, gre_tunnel, t_entry, gre_cmp);
+
+static int gre_set_tunnel(struct gre_tunnel *, struct if_laddrreq *);
+static int gre_get_tunnel(struct gre_tunnel *, struct if_laddrreq *);
+static int gre_del_tunnel(struct gre_tunnel *);
+
+static int gre_set_vnetid(struct gre_tunnel *, struct ifreq *);
+static int gre_get_vnetid(struct gre_tunnel *, struct ifreq *);
+static int gre_del_vnetid(struct gre_tunnel *);
+
+static int gre_ip_output(const struct gre_tunnel *, struct mbuf *,
+ uint8_t);
+/*
+ * layer 3 GRE tunnels
+ */
+
+struct gre_softc {
+ struct gre_tunnel sc_tunnel; /* must be first */
+ struct ifnet sc_if;
+};
+
+static int gre_clone_create(struct if_clone *, int);
+static int gre_clone_destroy(struct ifnet *);
struct if_clone gre_cloner =
IF_CLONE_INITIALIZER("gre", gre_clone_create, gre_clone_destroy);
+struct gre_tree gre_softcs = RBT_INITIALIZER();
+
+static int gre_output(struct ifnet *, struct mbuf *, struct sockaddr *,
+ struct rtentry *);
+static void gre_start(struct ifnet *);
+static int gre_ioctl(struct ifnet *, u_long, caddr_t);
+
+static int gre_up(struct gre_softc *);
+static int gre_down(struct gre_softc *);
+
+static int gre_input_key(struct mbuf **, int *, int, int,
+ struct gre_tunnel *);
+
+static struct mbuf *
+ gre_encap(struct gre_softc *, struct mbuf *, uint8_t *);
+
+/*
+ * It is not easy to calculate the right value for a GRE MTU.
+ * We leave this task to the admin and use the same default that
+ * other vendors use.
+ */
+#define GREMTU 1476
+
/*
* We can control the acceptance of GRE and MobileIP packets by
* altering the sysctl net.inet.gre.allow values
@@ -102,231 +204,460 @@ struct if_clone gre_cloner =
int gre_allow = 0;
int gre_wccp = 0;
-void gre_keepalive(void *);
-void gre_send_keepalive(void *);
-void gre_link_state(struct gre_softc *);
-
void
greattach(int n)
{
- LIST_INIT(&gre_softc_list);
if_clone_attach(&gre_cloner);
}
-int
+static int
gre_clone_create(struct if_clone *ifc, int unit)
{
struct gre_softc *sc;
+ struct ifnet *ifp;
sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO);
snprintf(sc->sc_if.if_xname, sizeof sc->sc_if.if_xname, "%s%d",
ifc->ifc_name, unit);
- sc->sc_if.if_softc = sc;
- sc->sc_if.if_type = IFT_TUNNEL;
- sc->sc_if.if_hdrlen = 24; /* IP + GRE */
- sc->sc_if.if_mtu = GREMTU;
- sc->sc_if.if_flags = IFF_POINTOPOINT|IFF_MULTICAST;
- sc->sc_if.if_xflags = IFXF_CLONED;
- sc->sc_if.if_output = gre_output;
- sc->sc_if.if_ioctl = gre_ioctl;
- sc->sc_if.if_rtrequest = p2p_rtrequest;
- sc->g_dst.s_addr = sc->g_src.s_addr = INADDR_ANY;
- sc->sc_ka_state = GRE_STATE_UKNWN;
-
- /* GRE encapsulation */
- sc->g_proto = IPPROTO_GRE;
-
- timeout_set(&sc->sc_ka_hold, gre_keepalive, sc);
- timeout_set_proc(&sc->sc_ka_snd, gre_send_keepalive, sc);
-
- if_attach(&sc->sc_if);
- if_alloc_sadl(&sc->sc_if);
+
+ ifp = &sc->sc_if;
+ ifp->if_softc = sc;
+ ifp->if_type = IFT_TUNNEL;
+ ifp->if_hdrlen = 24; /* IP + GRE */
+ ifp->if_mtu = GREMTU;
+ ifp->if_flags = IFF_POINTOPOINT|IFF_MULTICAST;
+ ifp->if_xflags = IFXF_CLONED;
+ ifp->if_output = gre_output;
+ ifp->if_start = gre_start;
+ ifp->if_ioctl = gre_ioctl;
+ ifp->if_rtrequest = p2p_rtrequest;
+
+ if_attach(ifp);
+ if_alloc_sadl(ifp);
#if NBPFILTER > 0
- bpfattach(&sc->sc_if.if_bpf, &sc->sc_if, DLT_LOOP, sizeof(u_int32_t));
+ bpfattach(&ifp->if_bpf, ifp, DLT_LOOP, sizeof(uint32_t));
#endif
- NET_LOCK();
- LIST_INSERT_HEAD(&gre_softc_list, sc, sc_list);
- NET_UNLOCK();
return (0);
}
-int
+static int
gre_clone_destroy(struct ifnet *ifp)
{
struct gre_softc *sc = ifp->if_softc;
- timeout_del(&sc->sc_ka_snd);
- timeout_del(&sc->sc_ka_hold);
NET_LOCK();
- LIST_REMOVE(sc, sc_list);
+ if (ISSET(ifp->if_flags, IFF_RUNNING))
+ gre_down(sc);
NET_UNLOCK();
if_detach(ifp);
free(sc, M_DEVBUF, sizeof(*sc));
+
return (0);
}
-/*
- * The output routine. Takes a packet and encapsulates it in the protocol
- * given by sc->g_proto. See also RFC 1701 and RFC 2004.
- */
+int
+gre_input(struct mbuf **mp, int *offp, int type, int af)
+{
+ struct mbuf *m = *mp;
+ struct gre_tunnel key;
+ struct ip *ip;
+
+ ip = mtod(m, struct ip *);
+
+ key.t_af = AF_INET;
+ key.t_src[0] = ip->ip_dst.s_addr;
+ key.t_dst[0] = ip->ip_src.s_addr;
+
+ if (gre_input_key(mp, offp, type, af, &key) == -1)
+ return (rip_input(mp, offp, type, af));
+ return (IPPROTO_DONE);
+}
+
+#ifdef INET6
int
+gre_input6(struct mbuf **mp, int *offp, int type, int af)
+{
+ struct mbuf *m = *mp;
+ struct gre_tunnel key;
+ struct ip6_hdr *ip6;
+
+ ip6 = mtod(m, struct ip6_hdr *);
+
+ key.t_af = AF_INET6;
+ memcpy(key.t_src, &ip6->ip6_dst, sizeof(key.t_src));
+ memcpy(key.t_dst, &ip6->ip6_src, sizeof(key.t_dst));
+
+ if (gre_input_key(mp, offp, type, af, &key) == -1)
+ return (rip6_input(mp, offp, type, af));
+
+ return (IPPROTO_DONE);
+}
+#endif /* INET6 */
+
+static int
+gre_input_key(struct mbuf **mp, int *offp, int type, int af,
+ struct gre_tunnel *key)
+{
+ struct mbuf *m = *mp;
+ int iphlen = *offp, hlen;
+ struct gre_softc *sc;
+ struct ifnet *ifp;
+ caddr_t buf;
+ struct gre_header *gh;
+ struct gre_h_key *gkh;
+ void (*input)(struct ifnet *, struct mbuf *);
+ int bpf_af = AF_UNSPEC; /* bpf */
+
+ if (!gre_allow)
+ goto decline;
+
+ hlen = iphlen + sizeof(*gh);
+ if (m->m_pkthdr.len < hlen)
+ goto decline;
+
+ m = m_pullup(m, hlen);
+ if (m == NULL)
+ return (IPPROTO_DONE);
+
+ buf = mtod(m, caddr_t);
+ gh = (struct gre_header *)(buf + iphlen);
+
+ /* check the version */
+ switch (gh->gre_flags & htons(GRE_VERS_MASK)) {
+ case htons(GRE_VERS_0):
+ break;
+
+ case htons(GRE_VERS_1):
+#ifdef PIPEX
+ if (pipex_enable) {
+ struct pipex_session *session;
+
+ session = pipex_pptp_lookup_session(m);
+ if (session != NULL &&
+ pipex_pptp_input(m, session) == NULL)
+ return (IPPROTO_DONE);
+ }
+#endif
+ /* FALLTHROUGH */
+ default:
+ goto decline;
+ }
+
+ /* the only optional bit in the header is K flag */
+ if ((gh->gre_flags & htons(~(GRE_KP|GRE_VERS_MASK))) != htons(0))
+ goto decline;
+
+ if (gh->gre_flags & htons(GRE_KP)) {
+ hlen += sizeof(*gkh);
+ if (m->m_pkthdr.len < hlen)
+ goto decline;
+
+ m = m_pullup(m, hlen);
+ if (m == NULL)
+ return (IPPROTO_DONE);
+
+ buf = mtod(m, caddr_t);
+ gh = (struct gre_header *)(buf + iphlen);
+ gkh = (struct gre_h_key *)(gh + 1);
+
+ key->t_key_mask = GRE_KEY_MASK;
+ key->t_key = gkh->gre_key;
+ } else
+ key->t_key_mask = GRE_KEY_NONE;
+
+ key->t_rtableid = m->m_pkthdr.ph_rtableid;
+
+ switch (gh->gre_proto) {
+ case htons(ETHERTYPE_IP):
+#if NBPFILTER > 0
+ bpf_af = AF_INET;
+#endif
+ input = ipv4_input;
+ break;
+#ifdef INET6
+ case htons(ETHERTYPE_IPV6):
+#if NBPFILTER > 0
+ bpf_af = AF_INET6;
+#endif
+ input = ipv6_input;
+ break;
+#endif
+#ifdef MPLS
+ case htons(ETHERTYPE_MPLS):
+ case htons(ETHERTYPE_MPLS_MCAST):
+#if NBPFILTER > 0
+ bpf_af = AF_MPLS;
+#endif
+ input = mpls_input;
+ break;
+#endif
+
+ case htons(ETHERTYPE_TRANSETHER): /* not yet */
+ default:
+ goto decline;
+ }
+
+ sc = (struct gre_softc *)RBT_FIND(gre_tree, &gre_softcs, key);
+ if (sc == NULL)
+ goto decline;
+
+ ifp = &sc->sc_if;
+
+ m_adj(m, hlen);
+
+ m->m_flags &= ~(M_MCAST|M_BCAST);
+ m->m_pkthdr.ph_ifidx = ifp->if_index;
+ m->m_pkthdr.ph_rtableid = ifp->if_rdomain;
+
+#if NPF > 0
+ pf_pkt_addr_changed(m);
+#endif
+
+ ifp->if_ipackets++;
+ ifp->if_ibytes += m->m_pkthdr.len;
+
+#if NBPFILTER > 0
+ if (ifp->if_bpf)
+ bpf_mtap_af(ifp->if_bpf, bpf_af, m, BPF_DIRECTION_IN);
+#endif
+
+ (*input)(ifp, m);
+ return (IPPROTO_DONE);
+decline:
+ mp = &m;
+ return (-1);
+}
+
+static int
gre_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
- struct rtentry *rt)
+ struct rtentry *rt)
{
- int error = 0;
- struct gre_softc *sc = (struct gre_softc *) (ifp->if_softc);
- struct greip *gh = NULL;
- struct ip *inp = NULL;
- u_int8_t ip_tos = 0;
- u_int16_t etype = 0;
struct m_tag *mtag;
+ int error = 0;
- if ((ifp->if_flags & IFF_UP) == 0 ||
- sc->g_src.s_addr == INADDR_ANY || sc->g_dst.s_addr == INADDR_ANY) {
- m_freem(m);
- error = ENETDOWN;
- goto end;
+ if (!gre_allow) {
+ error = EACCES;
+ goto drop;
}
-#ifdef DIAGNOSTIC
- if (ifp->if_rdomain != rtable_l2(m->m_pkthdr.ph_rtableid)) {
- printf("%s: trying to send packet on wrong domain. "
- "if %d vs. mbuf %d, AF %d\n", ifp->if_xname,
- ifp->if_rdomain, rtable_l2(m->m_pkthdr.ph_rtableid),
- dst->sa_family);
+ if (!ISSET(ifp->if_flags, IFF_RUNNING)) {
+ error = ENETDOWN;
+ goto drop;
}
+
+ switch (dst->sa_family) {
+ case AF_INET:
+#ifdef INET6
+ case AF_INET6:
+#endif
+#ifdef MPLS
+ case AF_MPLS:
#endif
+ break;
+ default:
+ error = EAFNOSUPPORT;
+ goto drop;
+ }
/* Try to limit infinite recursion through misconfiguration. */
for (mtag = m_tag_find(m, PACKET_TAG_GRE, NULL); mtag;
mtag = m_tag_find(m, PACKET_TAG_GRE, mtag)) {
- if (!bcmp((caddr_t)(mtag + 1), &ifp, sizeof(struct ifnet *))) {
+ if (memcmp((caddr_t)(mtag + 1), &ifp->if_index,
+ sizeof(ifp->if_index)) == 0) {
m_freem(m);
error = EIO;
goto end;
}
}
- mtag = m_tag_get(PACKET_TAG_GRE, sizeof(struct ifnet *), M_NOWAIT);
+ mtag = m_tag_get(PACKET_TAG_GRE, sizeof(ifp->if_index), M_NOWAIT);
if (mtag == NULL) {
m_freem(m);
error = ENOBUFS;
goto end;
}
- bcopy(&ifp, (caddr_t)(mtag + 1), sizeof(struct ifnet *));
+ memcpy((caddr_t)(mtag + 1), &ifp->if_index, sizeof(ifp->if_index));
m_tag_prepend(m, mtag);
- m->m_flags &= ~(M_BCAST|M_MCAST);
+ m->m_pkthdr.ph_family = dst->sa_family;
+
+ error = if_enqueue(ifp, m);
+end:
+ if (error)
+ ifp->if_oerrors++;
+ return (error);
+drop:
+ m_freem(m);
+ return (error);
+}
+
+void
+gre_start(struct ifnet *ifp)
+{
+ struct gre_softc *sc = ifp->if_softc;
+ struct mbuf *m;
+ uint8_t tos;
#if NBPFILTER > 0
- if (ifp->if_bpf)
- bpf_mtap_af(ifp->if_bpf, dst->sa_family, m, BPF_DIRECTION_OUT);
+ caddr_t if_bpf;
#endif
- if (gre_allow == 0) {
- m_freem(m);
- error = EACCES;
- goto end;
- }
-
- switch(dst->sa_family) {
- case AF_INET:
- if (m->m_len < sizeof(struct ip)) {
- m = m_pullup(m, sizeof(struct ip));
- if (m == NULL) {
- error = ENOBUFS;
- goto end;
- }
+ while ((m = ifq_dequeue(&ifp->if_snd)) != NULL) {
+#if NBPFILTER > 0
+ if_bpf = ifp->if_bpf;
+ if (if_bpf) {
+ int af = m->m_pkthdr.ph_family;
+ bpf_mtap_af(if_bpf, af, m, BPF_DIRECTION_OUT);
}
+#endif
+
+ m = gre_encap(sc, m, &tos);
+ if (m == NULL || gre_ip_output(&sc->sc_tunnel, m, tos) != 0)
+ ifp->if_oerrors++;
+ }
+}
- inp = mtod(m, struct ip *);
- ip_tos = inp->ip_tos;
- etype = ETHERTYPE_IP;
+static struct mbuf *
+gre_encap(struct gre_softc *sc, struct mbuf *m, uint8_t *tos)
+{
+ struct gre_header *gh;
+ struct gre_h_key *gkh;
+ uint16_t proto;
+ int hlen;
+
+ *tos = 0;
+ switch (m->m_pkthdr.ph_family) {
+ case AF_INET: {
+ proto = htons(ETHERTYPE_IP);
+
+ struct ip *ip = mtod(m, struct ip *);
+ *tos = ip->ip_tos;
break;
+ }
#ifdef INET6
case AF_INET6:
- etype = ETHERTYPE_IPV6;
+ proto = htons(ETHERTYPE_IPV6);
break;
#endif
#ifdef MPLS
case AF_MPLS:
if (m->m_flags & (M_BCAST | M_MCAST))
- etype = ETHERTYPE_MPLS_MCAST;
+ proto = htons(ETHERTYPE_MPLS_MCAST);
else
- etype = ETHERTYPE_MPLS;
+ proto = htons(ETHERTYPE_MPLS);
break;
#endif
default:
- m_freem(m);
- error = EAFNOSUPPORT;
- goto end;
+ unhandled_af(m->m_pkthdr.ph_family);
}
- M_PREPEND(m, sizeof(struct greip), M_DONTWAIT);
+ hlen = sizeof(*gh);
+ if (sc->sc_tunnel.t_key_mask != GRE_KEY_NONE)
+ hlen += sizeof(*gkh);
- if (m == NULL) {
- error = ENOBUFS;
- goto end;
- }
+ m = m_prepend(m, hlen, M_DONTWAIT);
+ if (m == NULL)
+ return (NULL);
- gh = mtod(m, struct greip *);
- if (sc->g_proto == IPPROTO_GRE) {
- /* We don't support any GRE flags for now */
+ gh = mtod(m, struct gre_header *);
+ gh->gre_flags = GRE_VERS_0;
+ gh->gre_proto = proto;
+ if (sc->sc_tunnel.t_key_mask != GRE_KEY_NONE) {
+ gh->gre_flags |= htons(GRE_KP);
- bzero((void *) &gh->gi_g, sizeof(struct gre_h));
- gh->gi_ptype = htons(etype);
+ gkh = (struct gre_h_key *)(gh + 1);
+ gkh->gre_key = sc->sc_tunnel.t_key;
}
- gh->gi_pr = sc->g_proto;
- gh->gi_src = sc->g_src;
- gh->gi_dst = sc->g_dst;
- ((struct ip *) gh)->ip_hl = (sizeof(struct ip)) >> 2;
- ((struct ip *) gh)->ip_ttl = ip_defttl;
- ((struct ip *) gh)->ip_tos = ip_tos;
- gh->gi_len = htons(m->m_pkthdr.len);
-
- ifp->if_opackets++;
- ifp->if_obytes += m->m_pkthdr.len;
-
+ return (m);
+}
- m->m_pkthdr.ph_rtableid = sc->g_rtableid;
+static int
+gre_ip_output(const struct gre_tunnel *tunnel, struct mbuf *m, uint8_t tos)
+{
+ m->m_flags &= ~(M_BCAST|M_MCAST);
+ m->m_pkthdr.ph_rtableid = tunnel->t_rtableid;
#if NPF > 0
pf_pkt_addr_changed(m);
#endif
- /* Send it off */
- error = ip_output(m, NULL, &sc->route, 0, NULL, NULL, 0);
- end:
- if (error)
- ifp->if_oerrors++;
- return (error);
+ switch (tunnel->t_af) {
+ case AF_INET: {
+ struct ip *ip;
+
+ m = m_prepend(m, sizeof(*ip), M_DONTWAIT);
+ if (m == NULL)
+ return (ENOMEM);
+
+ ip = mtod(m, struct ip *);
+ ip->ip_tos = tos;
+ ip->ip_len = htons(m->m_pkthdr.len);
+ ip->ip_ttl = tunnel->t_ttl;
+ ip->ip_p = IPPROTO_GRE;
+ ip->ip_src.s_addr = tunnel->t_src[0];
+ ip->ip_dst.s_addr = tunnel->t_dst[0];
+
+ ip_send(m);
+ break;
+ }
+#ifdef INET6
+ case AF_INET6: {
+ struct ip6_hdr *ip6;
+ int len = m->m_pkthdr.len;
+
+ m = m_prepend(m, sizeof(*ip6), M_DONTWAIT);
+ if (m == NULL)
+ return (ENOMEM);
+
+ ip6 = mtod(m, struct ip6_hdr *);
+ ip6->ip6_flow = ISSET(m->m_pkthdr.ph_flowid, M_FLOWID_VALID) ?
+ htonl(m->m_pkthdr.ph_flowid & M_FLOWID_MASK) : 0;
+ ip6->ip6_vfc |= IPV6_VERSION;
+ ip6->ip6_plen = htons(len);
+ ip6->ip6_nxt = IPPROTO_GRE;
+ ip6->ip6_hlim = tunnel->t_ttl;
+ memcpy(&ip6->ip6_src, tunnel->t_src, sizeof(ip6->ip6_src));
+ memcpy(&ip6->ip6_dst, tunnel->t_dst, sizeof(ip6->ip6_dst));
+
+ ip6_send(m);
+ break;
+ }
+#endif /* INET6 */
+ default:
+ panic("%s: unsupported af %d in %p", __func__, tunnel->t_af,
+ tunnel);
+ }
+
+ return (0);
}
-int
+static int
gre_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
{
-
struct ifreq *ifr = (struct ifreq *)data;
- struct if_laddrreq *lifr = (struct if_laddrreq *)data;
- struct ifkalivereq *ikar = (struct ifkalivereq *)data;
struct gre_softc *sc = ifp->if_softc;
- struct sockaddr_in si;
int error = 0;
- struct proc *prc = curproc; /* XXX */
switch(cmd) {
case SIOCSIFADDR:
ifp->if_flags |= IFF_UP;
- break;
- case SIOCSIFDSTADDR:
- break;
+ /* FALLTHROUGH */
case SIOCSIFFLAGS:
+ if (ISSET(ifp->if_flags, IFF_UP)) {
+ if (!ISSET(ifp->if_flags, IFF_RUNNING))
+ error = gre_up(sc);
+ else
+ error = 0;
+ } else {
+ if (ISSET(ifp->if_flags, IFF_RUNNING))
+ error = gre_down(sc);
+ }
break;
case SIOCSIFMTU:
if (ifr->ifr_mtu < 576) {
@@ -338,95 +669,79 @@ gre_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
case SIOCADDMULTI:
case SIOCDELMULTI:
break;
- case SIOCSETKALIVE:
- if ((error = suser(prc, 0)) != 0)
- break;
- if (ikar->ikar_timeo < 0 || ikar->ikar_timeo > 86400 ||
- ikar->ikar_cnt < 0 || ikar->ikar_cnt > 256) {
- error = EINVAL;
- break;
- }
- sc->sc_ka_timout = ikar->ikar_timeo;
- sc->sc_ka_cnt = ikar->ikar_cnt;
- if (sc->sc_ka_timout == 0 || sc->sc_ka_cnt == 0) {
- sc->sc_ka_timout = 0;
- sc->sc_ka_cnt = 0;
- sc->sc_ka_state = GRE_STATE_UKNWN;
- gre_link_state(sc);
+
+ case SIOCSVNETID:
+ if (ISSET(ifp->if_flags, IFF_RUNNING)) {
+ error = EBUSY;
break;
}
- if (!timeout_pending(&sc->sc_ka_snd)) {
- sc->sc_ka_holdmax = sc->sc_ka_cnt;
- timeout_add(&sc->sc_ka_snd, 1);
- timeout_add_sec(&sc->sc_ka_hold, sc->sc_ka_timout *
- sc->sc_ka_cnt);
- }
+ error = gre_set_vnetid(&sc->sc_tunnel, ifr);
break;
- case SIOCGETKALIVE:
- ikar->ikar_timeo = sc->sc_ka_timout;
- ikar->ikar_cnt = sc->sc_ka_cnt;
+
+ case SIOCGVNETID:
+ error = gre_get_vnetid(&sc->sc_tunnel, ifr);
break;
- case SIOCSLIFPHYADDR:
- if ((error = suser(prc, 0)) != 0)
- break;
- if (lifr->addr.ss_family != AF_INET ||
- lifr->dstaddr.ss_family != AF_INET) {
- error = EAFNOSUPPORT;
- break;
- }
- if (lifr->addr.ss_len != sizeof(si) ||
- lifr->dstaddr.ss_len != sizeof(si)) {
- error = EINVAL;
+ case SIOCDVNETID:
+ if (ISSET(ifp->if_flags, IFF_RUNNING)) {
+ error = EBUSY;
break;
}
- sc->g_src = ((struct sockaddr_in *)&lifr->addr)->sin_addr;
- sc->g_dst = ((struct sockaddr_in *)&lifr->dstaddr)->sin_addr;
- recompute:
- if ((sc->g_src.s_addr != INADDR_ANY) &&
- (sc->g_dst.s_addr != INADDR_ANY)) {
- if (sc->route.ro_rt != NULL) {
- rtfree(sc->route.ro_rt);
- sc->route.ro_rt = NULL;
- }
- /* ip_output() will do the lookup */
- bzero(&sc->route, sizeof(sc->route));
- ifp->if_flags |= IFF_UP;
- }
+ error = gre_del_vnetid(&sc->sc_tunnel);
break;
- case SIOCDIFPHYADDR:
- if ((error = suser(prc, 0)) != 0)
+
+ case SIOCSLIFPHYADDR:
+ if (ISSET(ifp->if_flags, IFF_RUNNING)) {
+ error = EBUSY;
break;
- sc->g_src.s_addr = INADDR_ANY;
- sc->g_dst.s_addr = INADDR_ANY;
+ }
+ error = gre_set_tunnel(&sc->sc_tunnel,
+ (struct if_laddrreq *)data);
break;
case SIOCGLIFPHYADDR:
- if (sc->g_src.s_addr == INADDR_ANY ||
- sc->g_dst.s_addr == INADDR_ANY) {
- error = EADDRNOTAVAIL;
+ error = gre_get_tunnel(&sc->sc_tunnel,
+ (struct if_laddrreq *)data);
+ break;
+ case SIOCDIFPHYADDR:
+ if (ISSET(ifp->if_flags, IFF_RUNNING)) {
+ error = EBUSY;
break;
}
- bzero(&si, sizeof(si));
- si.sin_family = AF_INET;
- si.sin_len = sizeof(struct sockaddr_in);
- si.sin_addr.s_addr = sc->g_src.s_addr;
- memcpy(&lifr->addr, &si, sizeof(si));
- si.sin_addr.s_addr = sc->g_dst.s_addr;
- memcpy(&lifr->dstaddr, &si, sizeof(si));
+
+ error = gre_del_tunnel(&sc->sc_tunnel);
break;
+
case SIOCSLIFPHYRTABLE:
- if ((error = suser(prc, 0)) != 0)
+ if (ISSET(ifp->if_flags, IFF_RUNNING)) {
+ error = EBUSY;
break;
+ }
+
if (ifr->ifr_rdomainid < 0 ||
ifr->ifr_rdomainid > RT_TABLEID_MAX ||
!rtable_exists(ifr->ifr_rdomainid)) {
error = EINVAL;
break;
}
- sc->g_rtableid = ifr->ifr_rdomainid;
- goto recompute;
+ sc->sc_tunnel.t_rtableid = ifr->ifr_rdomainid;
+ break;
case SIOCGLIFPHYRTABLE:
- ifr->ifr_rdomainid = sc->g_rtableid;
+ ifr->ifr_rdomainid = sc->sc_tunnel.t_rtableid;
+ break;
+
+ case SIOCSLIFPHYTTL:
+ if (ifr->ifr_ttl < 0 || ifr->ifr_ttl > 0xff) {
+ error = EINVAL;
+ break;
+ }
+
+ /* commit */
+ sc->sc_tunnel.t_ttl = (uint8_t)ifr->ifr_ttl;
+ break;
+
+ case SIOCGLIFPHYTTL:
+ ifr->ifr_ttl = (int)sc->sc_tunnel.t_ttl;
break;
+
default:
error = ENOTTY;
}
@@ -434,151 +749,322 @@ gre_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
return (error);
}
-/*
- * do a checksum of a buffer - much like in_cksum, which operates on
- * mbufs.
- */
-u_int16_t
-gre_in_cksum(u_int16_t *p, u_int len)
+static int
+gre_up(struct gre_softc *sc)
{
- u_int32_t sum = 0;
- int nwords = len >> 1;
-
- while (nwords-- != 0)
- sum += *p++;
-
- if (len & 1) {
- union {
- u_short w;
- u_char c[2];
- } u;
- u.c[0] = *(u_char *) p;
- u.c[1] = 0;
- sum += u.w;
- }
+ int error = 0;
+
+ if (sc->sc_tunnel.t_af == AF_UNSPEC)
+ return (ENXIO);
- /* end-around-carry */
- sum = (sum >> 16) + (sum & 0xffff);
- sum += (sum >> 16);
- return (~sum);
+ NET_ASSERT_LOCKED();
+ if (RBT_INSERT(gre_tree, &gre_softcs, &sc->sc_tunnel) != NULL)
+ return (EBUSY);
+
+ SET(sc->sc_if.if_flags, IFF_RUNNING);
+
+ return (error);
}
-void
-gre_keepalive(void *arg)
+static int
+gre_down(struct gre_softc *sc)
{
- struct gre_softc *sc = arg;
+ NET_ASSERT_LOCKED();
+ RBT_REMOVE(gre_tree, &gre_softcs, &sc->sc_tunnel);
- if (!sc->sc_ka_timout)
- return;
+ CLR(sc->sc_if.if_flags, IFF_RUNNING);
- sc->sc_ka_state = GRE_STATE_DOWN;
- gre_link_state(sc);
+ return (0);
}
-void
-gre_send_keepalive(void *arg)
+static int
+gre_set_tunnel(struct gre_tunnel *tunnel, struct if_laddrreq *req)
{
- struct gre_softc *sc = arg;
- struct mbuf *m;
- struct ip *ip;
- struct gre_h *gh;
- struct sockaddr dst;
-
- if (sc->sc_ka_timout)
- timeout_add_sec(&sc->sc_ka_snd, sc->sc_ka_timout);
-
- if (sc->g_proto != IPPROTO_GRE)
- return;
- if ((sc->sc_if.if_flags & IFF_UP) == 0 ||
- sc->g_src.s_addr == INADDR_ANY || sc->g_dst.s_addr == INADDR_ANY)
- return;
-
- MGETHDR(m, M_DONTWAIT, MT_DATA);
- if (m == NULL) {
- sc->sc_if.if_oerrors++;
- return;
- }
+ struct sockaddr *src = (struct sockaddr *)&req->addr;
+ struct sockaddr *dst = (struct sockaddr *)&req->dstaddr;
+ struct sockaddr_in *src4, *dst4;
+#ifdef INET6
+ struct sockaddr_in6 *src6, *dst6;
+ int error;
+#endif
- m->m_len = m->m_pkthdr.len = sizeof(*ip) + sizeof(*gh);
- MH_ALIGN(m, m->m_len);
+ /* sa_family and sa_len must be equal */
+ if (src->sa_family != dst->sa_family || src->sa_len != dst->sa_len)
+ return (EINVAL);
- /* use the interface's rdomain when sending keepalives. */
- m->m_pkthdr.ph_rtableid = sc->sc_if.if_rdomain;
+ /* validate */
+ switch (dst->sa_family) {
+ case AF_INET:
+ if (dst->sa_len != sizeof(*dst4))
+ return (EINVAL);
- /* build the ip header */
- ip = mtod(m, struct ip *);
+ src4 = (struct sockaddr_in *)src;
+ if (in_nullhost(src4->sin_addr) ||
+ IN_MULTICAST(src4->sin_addr.s_addr))
+ return (EINVAL);
- ip->ip_v = IPVERSION;
- ip->ip_hl = sizeof(*ip) >> 2;
- ip->ip_tos = IPTOS_LOWDELAY;
- ip->ip_len = htons(m->m_pkthdr.len);
- ip->ip_id = htons(ip_randomid());
- ip->ip_off = htons(IP_DF);
- ip->ip_ttl = ip_defttl;
- ip->ip_p = IPPROTO_GRE;
- ip->ip_src.s_addr = sc->g_dst.s_addr;
- ip->ip_dst.s_addr = sc->g_src.s_addr;
- ip->ip_sum = 0;
- ip->ip_sum = in_cksum(m, sizeof(*ip));
-
- gh = (struct gre_h *)(ip + 1);
- /* We don't support any GRE flags for now */
- bzero(gh, sizeof(*gh));
-
- bzero(&dst, sizeof(dst));
- dst.sa_family = AF_INET;
+ dst4 = (struct sockaddr_in *)dst;
+ if (in_nullhost(dst4->sin_addr) ||
+ IN_MULTICAST(dst4->sin_addr.s_addr))
+ return (EINVAL);
- NET_LOCK();
- /* should we care about the error? */
- gre_output(&sc->sc_if, m, &dst, NULL);
- NET_UNLOCK();
+ tunnel->t_src[0] = src4->sin_addr.s_addr;
+ tunnel->t_dst[0] = dst4->sin_addr.s_addr;
+
+ break;
+#ifdef INET6
+ case AF_INET6:
+ if (dst->sa_len != sizeof(*dst6))
+ return (EINVAL);
+
+ src6 = (struct sockaddr_in6 *)src;
+ if (IN6_IS_ADDR_UNSPECIFIED(&src6->sin6_addr) ||
+ IN6_IS_ADDR_MULTICAST(&src6->sin6_addr))
+ return (EINVAL);
+
+ dst6 = (struct sockaddr_in6 *)dst;
+ if (IN6_IS_ADDR_UNSPECIFIED(&dst6->sin6_addr) ||
+ IN6_IS_ADDR_MULTICAST(&dst6->sin6_addr))
+ return (EINVAL);
+
+ error = in6_embedscope((struct in6_addr *)tunnel->t_src,
+ src6, NULL);
+ if (error != 0)
+ return (error);
+
+ error = in6_embedscope((struct in6_addr *)tunnel->t_dst,
+ dst6, NULL);
+ if (error != 0)
+ return (error);
+
+ break;
+#endif
+ default:
+ return (EAFNOSUPPORT);
+ }
+
+ /* commit */
+ tunnel->t_af = dst->sa_family;
+
+ return (0);
}
-void
-gre_recv_keepalive(struct gre_softc *sc)
+static int
+gre_get_tunnel(struct gre_tunnel *tunnel, struct if_laddrreq *req)
{
- if (!sc->sc_ka_timout)
- return;
-
- /* link state flap dampening */
- switch (sc->sc_ka_state) {
- case GRE_STATE_UKNWN:
- case GRE_STATE_DOWN:
- sc->sc_ka_state = GRE_STATE_HOLD;
- sc->sc_ka_holdcnt = sc->sc_ka_holdmax;
- sc->sc_ka_holdmax = MIN(sc->sc_ka_holdmax * 2,
- 16 * sc->sc_ka_cnt);
- break;
- case GRE_STATE_HOLD:
- if (--sc->sc_ka_holdcnt < 1) {
- sc->sc_ka_state = GRE_STATE_UP;
- gre_link_state(sc);
- }
+ struct sockaddr *src = (struct sockaddr *)&req->addr;
+ struct sockaddr *dst = (struct sockaddr *)&req->dstaddr;
+ struct sockaddr_in *sin;
+#ifdef INET6 /* ifconfig already embeds the scopeid */
+ struct sockaddr_in6 *sin6;
+#endif
+
+ switch (tunnel->t_af) {
+ case AF_UNSPEC:
+ return (EADDRNOTAVAIL);
+ case AF_INET:
+ sin = (struct sockaddr_in *)src;
+ memset(sin, 0, sizeof(*sin));
+ sin->sin_family = AF_INET;
+ sin->sin_len = sizeof(*sin);
+ sin->sin_addr.s_addr = tunnel->t_src[0];
+
+ sin = (struct sockaddr_in *)dst;
+ memset(sin, 0, sizeof(*sin));
+ sin->sin_family = AF_INET;
+ sin->sin_len = sizeof(*sin);
+ sin->sin_addr.s_addr = tunnel->t_dst[0];
+
break;
- case GRE_STATE_UP:
- sc->sc_ka_holdmax--;
- sc->sc_ka_holdmax = MAX(sc->sc_ka_holdmax, sc->sc_ka_cnt);
+
+#ifdef INET6
+ case AF_INET6:
+ sin6 = (struct sockaddr_in6 *)src;
+ memset(sin6, 0, sizeof(*sin6));
+ sin6->sin6_family = AF_INET6;
+ sin6->sin6_len = sizeof(*sin6);
+ in6_recoverscope(sin6, (struct in6_addr *)tunnel->t_src);
+
+ sin6 = (struct sockaddr_in6 *)dst;
+ memset(sin6, 0, sizeof(*sin6));
+ sin6->sin6_family = AF_INET6;
+ sin6->sin6_len = sizeof(*sin6);
+ in6_recoverscope(sin6, (struct in6_addr *)tunnel->t_dst);
+
break;
+#endif
+ default:
+ return (EAFNOSUPPORT);
}
- /* rescedule hold timer */
- timeout_add_sec(&sc->sc_ka_hold, sc->sc_ka_timout * sc->sc_ka_cnt);
+ return (0);
}
-void
-gre_link_state(struct gre_softc *sc)
+static int
+gre_del_tunnel(struct gre_tunnel *tunnel)
+{
+ /* commit */
+ tunnel->t_af = AF_UNSPEC;
+
+ return (0);
+}
+
+static int
+gre_set_vnetid(struct gre_tunnel *tunnel, struct ifreq *ifr)
+{
+ uint32_t key;
+
+ if (ifr->ifr_vnetid < 0 || ifr->ifr_vnetid > 0xffffffff)
+ return EINVAL;
+
+ key = htonl(ifr->ifr_vnetid);
+
+ if (tunnel->t_key_mask == GRE_KEY_MASK && tunnel->t_key == key)
+ return (0);
+
+ /* commit */
+ tunnel->t_key_mask = GRE_KEY_MASK;
+ tunnel->t_key = key;
+
+ return (0);
+}
+
+static int
+gre_get_vnetid(struct gre_tunnel *tunnel, struct ifreq *ifr)
+{
+ if (tunnel->t_key_mask == GRE_KEY_NONE)
+ return (EADDRNOTAVAIL);
+
+ ifr->ifr_vnetid = (int64_t)ntohl(tunnel->t_key);
+
+ return (0);
+}
+
+static int
+gre_del_vnetid(struct gre_tunnel *tunnel)
+{
+ tunnel->t_key_mask = GRE_KEY_NONE;
+
+ return (0);
+}
+
+int
+gre_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp,
+ size_t newlen)
+{
+ int error;
+
+ /* All sysctl names at this level are terminal. */
+ if (namelen != 1)
+ return (ENOTDIR);
+
+ switch (name[0]) {
+ case GRECTL_ALLOW:
+ NET_LOCK();
+ error = sysctl_int(oldp, oldlenp, newp, newlen, &gre_allow);
+ NET_UNLOCK();
+ return (error);
+ case GRECTL_WCCP:
+ NET_LOCK();
+ error = sysctl_int(oldp, oldlenp, newp, newlen, &gre_wccp);
+ NET_UNLOCK();
+ return (error);
+ default:
+ return (ENOPROTOOPT);
+ }
+ /* NOTREACHED */
+}
+
+static inline int
+gre_ip_cmp(int af, const uint32_t *a, const uint32_t *b)
{
- struct ifnet *ifp = &sc->sc_if;
- int link_state = LINK_STATE_UNKNOWN;
+ switch (af) {
+#ifdef INET6
+ case AF_INET6:
+ if (a[3] > b[3])
+ return (1);
+ if (a[3] < b[3])
+ return (-1);
+
+ if (a[2] > b[2])
+ return (1);
+ if (a[2] < b[2])
+ return (-1);
+
+ if (a[1] > b[1])
+ return (1);
+ if (a[1] < b[1])
+ return (-1);
+
+ /* FALLTHROUGH */
+#endif /* INET6 */
+ case AF_INET:
+ if (a[0] > b[0])
+ return (1);
+ if (a[0] < b[0])
+ return (-1);
+ break;
+ default:
+ panic("%s: unsupported af %d\n", __func__, af);
+ }
- if (sc->sc_ka_state == GRE_STATE_UP)
- link_state = LINK_STATE_UP;
- else if (sc->sc_ka_state != GRE_STATE_UKNWN)
- link_state = LINK_STATE_KALIVE_DOWN;
+ return (0);
+}
- if (ifp->if_link_state != link_state) {
- ifp->if_link_state = link_state;
- if_link_state_change(ifp);
+static inline int
+gre_cmp(const struct gre_tunnel *a, const struct gre_tunnel *b)
+{
+ uint32_t ka, kb;
+ uint32_t mask;
+ int rv;
+
+ /* sort by routing table */
+ if (a->t_rtableid > b->t_rtableid)
+ return (1);
+ if (a->t_rtableid < b->t_rtableid)
+ return (-1);
+
+ /* sort by address */
+ if (a->t_af > b->t_af)
+ return (1);
+ if (a->t_af < b->t_af)
+ return (-1);
+
+ rv = gre_ip_cmp(a->t_af, a->t_dst, b->t_dst);
+ if (rv != 0)
+ return (rv);
+
+ rv = gre_ip_cmp(a->t_af, a->t_src, b->t_src);
+ if (rv != 0)
+ return (rv);
+
+ /* is K set at all? */
+ ka = a->t_key_mask & GRE_KEY_ENTROPY;
+ kb = b->t_key_mask & GRE_KEY_ENTROPY;
+
+ /* sort by whether K is set */
+ if (ka > kb)
+ return (1);
+ if (ka < kb)
+ return (-1);
+
+ /* is K set on both? */
+ if (ka != GRE_KEY_NONE) {
+ /* get common prefix */
+ mask = a->t_key_mask & b->t_key_mask;
+
+ ka = a->t_key & mask;
+ kb = b->t_key & mask;
+
+ /* sort by common prefix */
+ if (ka > kb)
+ return (1);
+ if (ka < kb)
+ return (-1);
}
+
+ return (0);
}
-#endif
+
+RBT_GENERATE(gre_tree, gre_tunnel, t_entry, gre_cmp);
+