summaryrefslogtreecommitdiff
path: root/sys/net/if_vxlan.c
diff options
context:
space:
mode:
Diffstat (limited to 'sys/net/if_vxlan.c')
-rw-r--r--sys/net/if_vxlan.c590
1 files changed, 590 insertions, 0 deletions
diff --git a/sys/net/if_vxlan.c b/sys/net/if_vxlan.c
new file mode 100644
index 00000000000..69323992ccf
--- /dev/null
+++ b/sys/net/if_vxlan.c
@@ -0,0 +1,590 @@
+/* $OpenBSD: if_vxlan.c,v 1.1 2013/10/13 10:10:03 reyk Exp $ */
+
+/*
+ * Copyright (c) 2013 Reyk Floeter <reyk@openbsd.org>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include "bpfilter.h"
+#include "vxlan.h"
+#include "vlan.h"
+#include "pf.h"
+#include "bridge.h"
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/sockio.h>
+#include <sys/ioctl.h>
+
+#include <net/if.h>
+#include <net/if_dl.h>
+#include <net/if_media.h>
+#include <net/route.h>
+
+#if NBPFILTER > 0
+#include <net/bpf.h>
+#endif
+
+#if NPF > 0
+#include <net/pfvar.h>
+#endif
+
+#include <netinet/in.h>
+#include <netinet/in_var.h>
+#include <netinet/in_systm.h>
+#include <netinet/if_ether.h>
+#include <netinet/ip.h>
+#include <netinet/ip_var.h>
+#include <netinet/udp.h>
+#include <netinet/udp_var.h>
+#include <netinet/in_pcb.h>
+
+#if NBRIDGE > 0
+#include <net/if_bridge.h>
+#endif
+
+#include <net/if_vxlan.h>
+
+void vxlanattach(int);
+int vxlanioctl(struct ifnet *, u_long, caddr_t);
+void vxlanstart(struct ifnet *);
+int vxlan_clone_create(struct if_clone *, int);
+int vxlan_clone_destroy(struct ifnet *);
+int vxlan_media_change(struct ifnet *);
+void vxlan_media_status(struct ifnet *, struct ifmediareq *);
+int vxlan_config(struct ifnet *, struct sockaddr *, struct sockaddr *);
+int vxlan_output(struct ifnet *, struct mbuf *);
+
+struct if_clone vxlan_cloner =
+ IF_CLONE_INITIALIZER("vxlan", vxlan_clone_create, vxlan_clone_destroy);
+
+int vxlan_enable = 0;
+u_long vxlan_tagmask;
+
+#define VXLAN_TAGHASHSIZE 32
+#define VXLAN_TAGHASH(tag) (tag & vxlan_tagmask)
+LIST_HEAD(vxlan_taghash, vxlan_softc) *vxlan_tagh;
+
+void
+vxlanattach(int count)
+{
+ if ((vxlan_tagh = hashinit(VXLAN_TAGHASHSIZE, M_DEVBUF, M_NOWAIT,
+ &vxlan_tagmask)) == NULL)
+ panic("vxlanattach: hashinit");
+
+ if_clone_attach(&vxlan_cloner);
+}
+
+int
+vxlan_clone_create(struct if_clone *ifc, int unit)
+{
+ struct ifnet *ifp;
+ struct vxlan_softc *sc;
+
+ if ((sc = malloc(sizeof(*sc),
+ M_DEVBUF, M_NOWAIT|M_ZERO)) == NULL)
+ return (ENOMEM);
+
+ sc->sc_imo.imo_membership = malloc(
+ (sizeof(struct in_multi *) * IP_MIN_MEMBERSHIPS), M_IPMOPTS,
+ M_WAITOK|M_ZERO);
+ sc->sc_imo.imo_max_memberships = IP_MIN_MEMBERSHIPS;
+ sc->sc_dstport = htons(VXLAN_PORT);
+ sc->sc_vnetid = 0;
+
+ ifp = &sc->sc_ac.ac_if;
+ snprintf(ifp->if_xname, sizeof ifp->if_xname, "vxlan%d", unit);
+ ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
+ ether_fakeaddr(ifp);
+
+ ifp->if_softc = sc;
+ ifp->if_ioctl = vxlanioctl;
+ ifp->if_start = vxlanstart;
+ IFQ_SET_MAXLEN(&ifp->if_snd, IFQ_MAXLEN);
+ IFQ_SET_READY(&ifp->if_snd);
+
+ ifp->if_hardmtu = 0xffff;
+ ifp->if_capabilities = IFCAP_VLAN_MTU;
+
+ ifmedia_init(&sc->sc_media, 0, vxlan_media_change,
+ vxlan_media_status);
+ ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_AUTO, 0, NULL);
+ ifmedia_set(&sc->sc_media, IFM_ETHER | IFM_AUTO);
+
+ if_attach(ifp);
+ ether_ifattach(ifp);
+
+ /* XXX should we allow IP fragments? */
+ ifp->if_mtu = ETHERMTU - sizeof(struct ether_header);
+#ifdef INET
+ ifp->if_mtu -= sizeof(struct vxlanudpiphdr);
+#endif
+
+ LIST_INSERT_HEAD(&vxlan_tagh[VXLAN_TAGHASH(0)], sc, sc_entry);
+ vxlan_enable++;
+
+ return (0);
+}
+
+int
+vxlan_clone_destroy(struct ifnet *ifp)
+{
+ struct vxlan_softc *sc = ifp->if_softc;
+
+ vxlan_enable--;
+ LIST_REMOVE(sc, sc_entry);
+
+ ifmedia_delete_instance(&sc->sc_media, IFM_INST_ANY);
+ ether_ifdetach(ifp);
+ if_detach(ifp);
+ free(sc->sc_imo.imo_membership, M_IPMOPTS);
+ free(sc, M_DEVBUF);
+
+ return (0);
+}
+
+void
+vxlanstart(struct ifnet *ifp)
+{
+ struct mbuf *m;
+ int s;
+
+ for (;;) {
+ s = splnet();
+ IFQ_DEQUEUE(&ifp->if_snd, m);
+ splx(s);
+
+ if (m == NULL)
+ return;
+ ifp->if_opackets++;
+
+#if NBPFILTER > 0
+ if (ifp->if_bpf)
+ bpf_mtap(ifp->if_bpf, m, BPF_DIRECTION_OUT);
+#endif
+
+ vxlan_output(ifp, m);
+ }
+}
+
+int
+vxlan_config(struct ifnet *ifp, struct sockaddr *src, struct sockaddr *dst)
+{
+ struct vxlan_softc *sc = (struct vxlan_softc *)ifp->if_softc;
+ struct ip_moptions *imo = &sc->sc_imo;
+#ifdef INET
+ struct sockaddr_in *src4, *dst4;
+ struct ifaddr *ifa;
+#endif
+ int reset = 0;
+
+ if (src != NULL && dst != NULL) {
+ /* XXX inet6 is not supported */
+ if (src->sa_family != AF_INET || dst->sa_family != AF_INET)
+ return (EAFNOSUPPORT);
+ } else {
+ /* Reset current configuration */
+ src = (struct sockaddr *)&sc->sc_src;
+ dst = (struct sockaddr *)&sc->sc_dst;
+ reset = 1;
+ }
+
+#ifdef INET
+ src4 = (struct sockaddr_in *)src;
+ dst4 = (struct sockaddr_in *)dst;
+
+ if (src4->sin_len != sizeof(*src4) || dst4->sin_len != sizeof(*dst4))
+ return (EINVAL);
+
+ if (IN_MULTICAST(dst4->sin_addr.s_addr)) {
+ if (src4->sin_addr.s_addr == INADDR_ANY ||
+ IN_MULTICAST(src4->sin_addr.s_addr))
+ return (EINVAL);
+ if ((ifa = ifa_ifwithaddr((struct sockaddr *)src4,
+ sc->sc_rtableid)) == NULL ||
+ ifa->ifa_ifp == NULL ||
+ (ifa->ifa_ifp->if_flags & IFF_MULTICAST) == 0)
+ return (EADDRNOTAVAIL);
+ }
+#endif
+
+ if (imo->imo_num_memberships > 0) {
+ in_delmulti(imo->imo_membership[
+ --imo->imo_num_memberships]);
+ imo->imo_multicast_ifp = NULL;
+ }
+
+#ifdef INET
+ if (IN_MULTICAST(dst4->sin_addr.s_addr)) {
+ if ((imo->imo_membership[0] =
+ in_addmulti(&dst4->sin_addr, ifa->ifa_ifp)) == NULL)
+ return (ENOBUFS);
+ imo->imo_num_memberships++;
+ imo->imo_multicast_ifp = ifa->ifa_ifp;
+ if (sc->sc_ttl > 0)
+ imo->imo_multicast_ttl = sc->sc_ttl;
+ else
+ imo->imo_multicast_ttl = IP_DEFAULT_MULTICAST_TTL;
+ imo->imo_multicast_loop = 0;
+ }
+ if (dst4->sin_port)
+ sc->sc_dstport = dst4->sin_port;
+#endif
+
+ if (!reset) {
+ bzero(&sc->sc_src, sizeof(sc->sc_src));
+ bzero(&sc->sc_dst, sizeof(sc->sc_dst));
+ memcpy(&sc->sc_src, src, src->sa_len);
+ memcpy(&sc->sc_dst, dst, dst->sa_len);
+ }
+
+ LIST_REMOVE(sc, sc_entry);
+ LIST_INSERT_HEAD(&vxlan_tagh[VXLAN_TAGHASH(sc->sc_vnetid)],
+ sc, sc_entry);
+
+ return (0);
+}
+
+/* ARGSUSED */
+int
+vxlanioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
+{
+ struct vxlan_softc *sc = (struct vxlan_softc *)ifp->if_softc;
+#ifdef INET
+ struct ifaddr *ifa = (struct ifaddr *)data;
+#endif
+ struct ifreq *ifr = (struct ifreq *)data;
+ struct if_laddrreq *lifr = (struct if_laddrreq *)data;
+ struct proc *p = curproc;
+ int error = 0, s;
+ struct ip_moptions *imo = &sc->sc_imo;
+
+ switch (cmd) {
+ case SIOCSIFADDR:
+ ifp->if_flags |= IFF_UP;
+#ifdef INET
+ if (ifa->ifa_addr->sa_family == AF_INET)
+ arp_ifinit(&sc->sc_ac, ifa);
+#endif
+ /* FALLTHROUGH */
+
+ case SIOCSIFFLAGS:
+ if (ifp->if_flags & IFF_UP) {
+ ifp->if_flags |= IFF_RUNNING;
+ } else {
+ ifp->if_flags &= ~IFF_RUNNING;
+ }
+ break;
+
+ case SIOCADDMULTI:
+ case SIOCDELMULTI:
+ if (ifr == 0) {
+ error = EAFNOSUPPORT;
+ break;
+ }
+ error = (cmd == SIOCADDMULTI) ?
+ ether_addmulti(ifr, &sc->sc_ac) :
+ ether_delmulti(ifr, &sc->sc_ac);
+ if (error == ENETRESET)
+ error = 0;
+ break;
+
+ case SIOCGIFMEDIA:
+ case SIOCSIFMEDIA:
+ error = ifmedia_ioctl(ifp, ifr, &sc->sc_media, cmd);
+ break;
+
+ case SIOCSLIFPHYADDR:
+ if ((error = suser(p, 0)) != 0)
+ break;
+ s = splnet();
+ error = vxlan_config(ifp,
+ (struct sockaddr *)&lifr->addr,
+ (struct sockaddr *)&lifr->dstaddr);
+ splx(s);
+ break;
+
+ case SIOCDIFPHYADDR:
+ if ((error = suser(p, 0)) != 0)
+ break;
+ s = splnet();
+ if (imo->imo_num_memberships > 0) {
+ in_delmulti(imo->imo_membership[
+ --imo->imo_num_memberships]);
+ imo->imo_multicast_ifp = NULL;
+ }
+ bzero(&sc->sc_src, sizeof(sc->sc_src));
+ bzero(&sc->sc_dst, sizeof(sc->sc_dst));
+ sc->sc_dstport = htons(VXLAN_PORT);
+ splx(s);
+ break;
+
+ case SIOCGLIFPHYADDR:
+ if (sc->sc_dst.ss_family == AF_UNSPEC) {
+ error = EADDRNOTAVAIL;
+ break;
+ }
+ bzero(&lifr->addr, sizeof(lifr->addr));
+ bzero(&lifr->dstaddr, sizeof(lifr->dstaddr));
+ memcpy(&lifr->addr, &sc->sc_src, sc->sc_src.ss_len);
+ memcpy(&lifr->dstaddr, &sc->sc_dst, sc->sc_dst.ss_len);
+ break;
+
+ case SIOCSLIFPHYRTABLE:
+ if ((error = suser(p, 0)) != 0)
+ break;
+ if (ifr->ifr_rdomainid < 0 ||
+ ifr->ifr_rdomainid > RT_TABLEID_MAX ||
+ !rtable_exists(ifr->ifr_rdomainid)) {
+ error = EINVAL;
+ break;
+ }
+ s = splnet();
+ sc->sc_rtableid = ifr->ifr_rdomainid;
+ (void)vxlan_config(ifp, NULL, NULL);
+ splx(s);
+ break;
+
+ case SIOCGLIFPHYRTABLE:
+ ifr->ifr_rdomainid = sc->sc_rtableid;
+ break;
+
+ case SIOCSLIFPHYTTL:
+ if ((error = suser(p, 0)) != 0)
+ break;
+ if (ifr->ifr_ttl < 0 || ifr->ifr_ttl > 0xff) {
+ error = EINVAL;
+ break;
+ }
+ if (sc->sc_ttl == (u_int8_t)ifr->ifr_ttl)
+ break;
+ s = splnet();
+ sc->sc_ttl = (u_int8_t)(ifr->ifr_ttl);
+ (void)vxlan_config(ifp, NULL, NULL);
+ splx(s);
+ break;
+
+ case SIOCGLIFPHYTTL:
+ ifr->ifr_ttl = (int)sc->sc_ttl;
+ break;
+
+ case SIOCSVNETID:
+ if ((error = suser(p, 0)) != 0)
+ break;
+ if (ifr->ifr_vnetid < 0 ||
+ ifr->ifr_vnetid > 0x00ffffff) {
+ error = EINVAL;
+ break;
+ }
+ s = splnet();
+ sc->sc_vnetid = (u_int32_t)ifr->ifr_vnetid;
+ (void)vxlan_config(ifp, NULL, NULL);
+ splx(s);
+ break;
+
+ case SIOCGVNETID:
+ ifr->ifr_vnetid = (int)sc->sc_vnetid;
+ break;
+
+ default:
+ error = ether_ioctl(ifp, &sc->sc_ac, cmd, data);
+ break;
+ }
+
+ return (error);
+}
+
+int
+vxlan_media_change(struct ifnet *ifp)
+{
+ return (0);
+}
+
+void
+vxlan_media_status(struct ifnet *ifp, struct ifmediareq *imr)
+{
+ imr->ifm_active = IFM_ETHER | IFM_AUTO;
+ imr->ifm_status = IFM_AVALID | IFM_ACTIVE;
+}
+
+int
+vxlan_lookup(struct mbuf *m, struct udphdr *uh, int iphlen,
+ struct sockaddr *srcsa)
+{
+ struct vxlan_softc *sc = NULL;
+ struct vxlan_header v;
+ u_int32_t vni;
+ struct ifnet *ifp;
+ int skip;
+ struct ether_header *eh;
+#if NBRIDGE > 0
+ struct sockaddr *sa;
+#endif
+
+ /* XXX Should verify the UDP port first before copying the packet */
+ skip = iphlen + sizeof(*uh);
+ if (m->m_pkthdr.len - skip < sizeof(v))
+ return (0);
+ m_copydata(m, skip, sizeof(v), (caddr_t)&v);
+ skip += sizeof(v);
+
+ vni = ntohl(v.vxlan_id);
+
+ /* Validate header */
+ if ((vni == 0) || (vni & VXLAN_RESERVED2) ||
+ (ntohl(v.vxlan_flags) != VXLAN_FLAGS_VNI))
+ return (0);
+
+ vni >>= VXLAN_VNI_S;
+ LIST_FOREACH(sc, &vxlan_tagh[VXLAN_TAGHASH(vni)],
+ sc_entry) {
+ if ((uh->uh_dport == sc->sc_dstport) &&
+ vni == sc->sc_vnetid &&
+ sc->sc_rtableid == rtable_l2(m->m_pkthdr.rdomain))
+ goto found;
+ }
+
+ /* not found */
+ return (0);
+
+ found:
+ m_adj(m, skip);
+ ifp = &sc->sc_ac.ac_if;
+ m->m_pkthdr.rcvif = ifp;
+
+ if ((eh = mtod(m, struct ether_header *)) == NULL)
+ return (EINVAL);
+
+#if NBRIDGE > 0
+ /* Store the peer IP address for the bridge */
+ if (ifp->if_bridgeport != NULL &&
+ srcsa->sa_family != AF_UNSPEC &&
+ (sa = bridge_tunneltag(m, srcsa->sa_family)) != NULL)
+ memcpy(sa, srcsa, sa->sa_len);
+#endif
+
+ /* Clear multicast flag from the outer packet */
+ if (sc->sc_imo.imo_num_memberships > 0 &&
+ m->m_flags & (M_MCAST) &&
+ !ETHER_IS_MULTICAST(eh->ether_dhost))
+ m->m_flags &= ~M_MCAST;
+
+#if NBPFILTER > 0
+ if (ifp->if_bpf)
+ bpf_mtap(ifp->if_bpf, m, BPF_DIRECTION_IN);
+#endif
+
+ m_adj(m, ETHER_HDR_LEN);
+
+#if NPF > 0
+ pf_pkt_addr_changed(m);
+#endif
+
+ ifp->if_ipackets++;
+ ether_input(ifp, eh, m);
+
+ /* success */
+ return (1);
+}
+
+int
+vxlan_output(struct ifnet *ifp, struct mbuf *m)
+{
+ struct vxlan_softc *sc = (struct vxlan_softc *)ifp->if_softc;
+#ifdef INET
+ struct udpiphdr *ui;
+ struct vxlanudpiphdr *vi;
+ u_int16_t len = m->m_pkthdr.len;
+ struct ip *ip;
+#if NBRIDGE > 0
+ struct sockaddr_in *sin;
+#endif
+#endif
+ int error;
+
+#ifdef INET
+ /* VXLAN header */
+ M_PREPEND(m, sizeof(*vi), M_DONTWAIT);
+ if (m == NULL) {
+ ifp->if_oerrors++;
+ return (ENOBUFS);
+ }
+
+ len += sizeof(struct vxlan_header);
+
+ ui = mtod(m, struct udpiphdr *);
+ ui->ui_pr = IPPROTO_UDP;
+ ui->ui_src = ((struct sockaddr_in *)&sc->sc_src)->sin_addr;
+ ui->ui_dst = ((struct sockaddr_in *)&sc->sc_dst)->sin_addr;
+ ui->ui_sport = sc->sc_dstport;
+ ui->ui_dport = sc->sc_dstport;
+ ui->ui_ulen = htons(sizeof(struct udphdr) + len);
+
+ ip = (struct ip *)ui;
+ ip->ip_v = IPVERSION;
+ ip->ip_hl = sizeof(struct ip) >> 2;
+ ip->ip_id = htons(ip_randomid());
+ ip->ip_off = 0; /* htons(IP_DF); XXX should we disallow IP fragments? */
+ ip->ip_tos = IPTOS_LOWDELAY;
+ ip->ip_len = htons(sizeof(struct udpiphdr) + len);
+ if (sc->sc_ttl > 0)
+ ip->ip_ttl = sc->sc_ttl;
+ else
+ ip->ip_ttl = IPDEFTTL;
+
+#if NBRIDGE > 0
+ if ((sin = (struct sockaddr_in *)bridge_tunnel(m)) != NULL &&
+ sin->sin_family == AF_INET) {
+ ui->ui_dst = sin->sin_addr;
+
+ /*
+ * If the LINK0 flag is set, send the packet back to
+ * the original source port of the endport, otherwise use
+ * the configured VXLAN port.
+ */
+ if (ifp->if_flags & IFF_LINK0)
+ ui->ui_dport = sin->sin_port;
+ }
+ if (sin != NULL)
+ bridge_tunneluntag(m);
+#endif
+
+ vi = (struct vxlanudpiphdr *)ui;
+ vi->ui_v.vxlan_flags = htonl(VXLAN_FLAGS_VNI);
+ vi->ui_v.vxlan_id = htonl(sc->sc_vnetid << VXLAN_VNI_S);
+
+ /* UDP checksum should be 0 */
+ ui->ui_sum = 0;
+#endif
+
+ ifp->if_opackets++;
+ ifp->if_obytes += m->m_pkthdr.len;
+
+ m->m_pkthdr.rdomain = sc->sc_rtableid;
+
+#if NPF > 0
+ pf_pkt_addr_changed(m);
+#endif
+
+#ifdef INET
+ if ((error =
+ ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo, NULL))) {
+ ifp->if_oerrors++;
+ }
+#endif
+
+ return (error);
+}