diff options
author | Reyk Floeter <reyk@cvs.openbsd.org> | 2013-10-13 10:10:05 +0000 |
---|---|---|
committer | Reyk Floeter <reyk@cvs.openbsd.org> | 2013-10-13 10:10:05 +0000 |
commit | 728bebe5cfbcacfe290ca38c4777c371e5d0560e (patch) | |
tree | 21318ce06a08909b963e7238f07971e33a148f46 | |
parent | dc9aa5495448804b764c4be959bf635dcc8fb033 (diff) |
Import vxlan(4), the virtual extensible local area network tunnel
interface. VXLAN is a UDP-based tunnelling protocol for overlaying
virtualized layer 2 networks over layer 3 networks. The implementation
is based on draft-mahalingam-dutt-dcops-vxlan-04 and has been tested
with other implementations in the wild.
put it in deraadt@
-rw-r--r-- | sbin/ifconfig/brconfig.c | 13 | ||||
-rw-r--r-- | sbin/ifconfig/ifconfig.8 | 20 | ||||
-rw-r--r-- | sbin/ifconfig/ifconfig.c | 69 | ||||
-rw-r--r-- | share/man/man4/vxlan.4 | 165 | ||||
-rw-r--r-- | sys/conf/GENERIC | 3 | ||||
-rw-r--r-- | sys/conf/files | 4 | ||||
-rw-r--r-- | sys/net/if.h | 4 | ||||
-rw-r--r-- | sys/net/if_bridge.c | 121 | ||||
-rw-r--r-- | sys/net/if_bridge.h | 15 | ||||
-rw-r--r-- | sys/net/if_vxlan.c | 590 | ||||
-rw-r--r-- | sys/net/if_vxlan.h | 77 | ||||
-rw-r--r-- | sys/netinet/udp_usrreq.c | 22 | ||||
-rw-r--r-- | sys/sys/mbuf.h | 3 | ||||
-rw-r--r-- | sys/sys/sockio.h | 8 |
14 files changed, 1086 insertions, 28 deletions
diff --git a/sbin/ifconfig/brconfig.c b/sbin/ifconfig/brconfig.c index 05f29d5c684..5c38d457c3e 100644 --- a/sbin/ifconfig/brconfig.c +++ b/sbin/ifconfig/brconfig.c @@ -1,4 +1,4 @@ -/* $OpenBSD: brconfig.c,v 1.6 2012/12/22 13:20:32 camield Exp $ */ +/* $OpenBSD: brconfig.c,v 1.7 2013/10/13 10:10:00 reyk Exp $ */ /* * Copyright (c) 1999, 2000 Jason L. Wright (jason@thought.net) @@ -40,6 +40,7 @@ #include <netinet/in.h> #include <netinet/if_ether.h> #include <net/if_bridge.h> +#include <netdb.h> #include <string.h> #include <err.h> #include <errno.h> @@ -609,9 +610,13 @@ bridge_addaddr(const char *ifname, const char *addr) void bridge_addrs(const char *delim, int d) { + char dstaddr[NI_MAXHOST]; + char dstport[NI_MAXSERV]; + const int niflag = NI_NUMERICHOST; struct ifbaconf ifbac; struct ifbareq *ifba; char *inbuf = NULL, buf[sizeof(ifba->ifba_ifsname) + 1], *inb; + struct sockaddr *sa; int i, len = 8192; /* ifconfig will call us with the argv of the command */ @@ -640,7 +645,13 @@ bridge_addrs(const char *delim, int d) strlcpy(buf, ifba->ifba_ifsname, sizeof(buf)); printf("%s%s %s %u ", delim, ether_ntoa(&ifba->ifba_dst), buf, ifba->ifba_age); + sa = (struct sockaddr *)&ifba->ifba_dstsa; printb("flags", ifba->ifba_flags, IFBAFBITS); + if (sa->sa_family != AF_UNSPEC && + getnameinfo(sa, sa->sa_len, + dstaddr, sizeof(dstaddr), + dstport, sizeof(dstport), niflag) == 0) + printf(" tunnel %s:%s", dstaddr, dstport); printf("\n"); } free(inbuf); diff --git a/sbin/ifconfig/ifconfig.8 b/sbin/ifconfig/ifconfig.8 index 13f3e7a2b9b..5d3133f9c41 100644 --- a/sbin/ifconfig/ifconfig.8 +++ b/sbin/ifconfig/ifconfig.8 @@ -1,4 +1,4 @@ -.\" $OpenBSD: ifconfig.8,v 1.235 2013/09/13 14:32:52 florian Exp $ +.\" $OpenBSD: ifconfig.8,v 1.236 2013/10/13 10:10:00 reyk Exp $ .\" $NetBSD: ifconfig.8,v 1.11 1996/01/04 21:27:29 pk Exp $ .\" $FreeBSD: ifconfig.8,v 1.16 1998/02/01 07:03:29 steve Exp $ .\" @@ -31,7 +31,7 @@ .\" .\" @(#)ifconfig.8 8.4 (Berkeley) 6/1/94 .\" -.Dd $Mdocdate: September 13 2013 $ +.Dd $Mdocdate: October 13 2013 $ .Dt IFCONFIG 8 .Os .Sh NAME @@ -1428,6 +1428,7 @@ for a complete list of the available protocols, .Op Oo Fl Oc Ns Cm keepalive Ar period count .Op Cm tunnel Ar src_address dest_address .Op Cm tunneldomain Ar route-id +.Op Cm vnetid Ar network-id .Ek .nr nS 0 .Pp @@ -1454,13 +1455,16 @@ is 2 since the round-trip time of keepalive packets needs to be accounted for. Disable the .Xr gre 4 keepalive mechanism. -.It Cm tunnel Ar src_address dest_address +.It Cm tunnel Ar src_address dest_address Ns Op Ns : Ns Ar dest_port Set the source and destination tunnel addresses on a tunnel interface, including .Xr gif 4 . Packets routed to this interface will be encapsulated in IPv4 or IPv6, depending on the source and destination address families. Both addresses must be of the same family. +The optional destination port can be specified for interfaces such as +.Xr vxlan 4 , +which further encapsulate the packets in UDP datagrams. .It Cm tunneldomain Ar route-id Use routing table .Ar route-id @@ -1470,6 +1474,16 @@ interface itself. .Ar route-id can be set to any valid routing table ID; the corresponding routing domain is derived from this table. +.It Cm tunnelttl Ar ttl +Set the IP or multicast TTL of the tunnel packets. +.It Cm vnetid Ar network-id +Set the virtual network identifier. +This is a number which is used by tunnel protocols such as +.Xr vxlan 4 +to identify packets with a virtual network. +The accepted size of the number depends on the individual tunnel protocol; +which is a 24-bit number for +.Xr vxlan 4 . .El .\" VLAN .Sh VLAN diff --git a/sbin/ifconfig/ifconfig.c b/sbin/ifconfig/ifconfig.c index 58721e86796..d67a74d530d 100644 --- a/sbin/ifconfig/ifconfig.c +++ b/sbin/ifconfig/ifconfig.c @@ -1,4 +1,4 @@ -/* $OpenBSD: ifconfig.c,v 1.271 2013/10/09 20:23:46 reyk Exp $ */ +/* $OpenBSD: ifconfig.c,v 1.272 2013/10/13 10:10:00 reyk Exp $ */ /* $NetBSD: ifconfig.c,v 1.40 1997/10/01 02:19:43 enami Exp $ */ /* @@ -168,6 +168,8 @@ void setifprefixlen(const char *, int); void settunnel(const char *, const char *); void deletetunnel(const char *, int); void settunnelinst(const char *, int); +void settunnelttl(const char *, int); +void setvnetid(const char *, int); #ifdef INET6 void setia6flags(const char *, int); void setia6pltime(const char *, int); @@ -380,6 +382,8 @@ const struct cmd { { "tunnel", NEXTARG2, 0, NULL, settunnel } , { "deletetunnel", 0, 0, deletetunnel } , { "tunneldomain", NEXTARG, 0, settunnelinst } , + { "tunnelttl", NEXTARG, 0, settunnelttl } , + { "vnetid", NEXTARG, 0, setvnetid }, { "pppoedev", NEXTARG, 0, setpppoe_dev }, { "pppoesvc", NEXTARG, 0, setpppoe_svc }, { "-pppoesvc", 1, 0, setpppoe_svc }, @@ -2694,6 +2698,7 @@ phys_status(int force) const char *ver = ""; const int niflag = NI_NUMERICHOST; struct if_laddrreq req; + in_port_t dstport = 0; psrcaddr[0] = pdstaddr[0] = '\0'; @@ -2713,9 +2718,13 @@ phys_status(int force) ver = "6"; #endif /* INET6 */ + if (req.dstaddr.ss_family == AF_INET) + dstport = ((struct sockaddr_in *)&req.dstaddr)->sin_port; #ifdef INET6 - if (req.dstaddr.ss_family == AF_INET6) + else if (req.dstaddr.ss_family == AF_INET6) { in6_fillscopeid((struct sockaddr_in6 *)&req.dstaddr); + dstport = ((struct sockaddr_in6 *)&req.dstaddr)->sin6_port; + } #endif /* INET6 */ if (getnameinfo((struct sockaddr *)&req.dstaddr, req.dstaddr.ss_len, pdstaddr, sizeof(pdstaddr), 0, 0, niflag) != 0) @@ -2724,6 +2733,15 @@ phys_status(int force) printf("\ttunnel: inet%s %s -> %s", ver, psrcaddr, pdstaddr); + if (dstport) + printf(":%u", ntohs(dstport)); + + if (ioctl(s, SIOCGVNETID, (caddr_t)&ifr) == 0 && ifr.ifr_vnetid > 0) + printf(" vnetid %d", ifr.ifr_vnetid); + + if (ioctl(s, SIOCGLIFPHYTTL, (caddr_t)&ifr) == 0 && ifr.ifr_ttl > 0) + printf(" ttl %d", ifr.ifr_ttl); + #ifndef SMALL if (ioctl(s, SIOCGLIFPHYRTABLE, (caddr_t)&ifr) == 0 && (rdomainid != 0 || ifr.ifr_rdomainid != 0)) @@ -3140,15 +3158,28 @@ in6_status(int force) void settunnel(const char *src, const char *dst) { + char buf[MAXHOSTNAMELEN+sizeof (":65535")], *dstport; + const char *dstip; struct addrinfo *srcres, *dstres; int ecode; struct if_laddrreq req; + if (strchr (dst, ':') == NULL) { + dstip = dst; + dstport = NULL; + } else { + if (strlcpy(buf, dst, sizeof(buf)) >= sizeof(buf)) + errx(1, "%s bad value", dst); + dstport = strchr(buf, ':'); + *dstport++ = '\0'; + dstip = buf; + } + if ((ecode = getaddrinfo(src, NULL, NULL, &srcres)) != 0) errx(1, "error in parsing address string: %s", gai_strerror(ecode)); - if ((ecode = getaddrinfo(dst, NULL, NULL, &dstres)) != 0) + if ((ecode = getaddrinfo(dstip, dstport, NULL, &dstres)) != 0) errx(1, "error in parsing address string: %s", gai_strerror(ecode)); @@ -3196,6 +3227,38 @@ settunnelinst(const char *id, int param) } void +settunnelttl(const char *id, int param) +{ + const char *errmsg = NULL; + int ttl; + + ttl = strtonum(id, 0, 0xff, &errmsg); + if (errmsg) + errx(1, "tunnelttl %s: %s", id, errmsg); + + strlcpy(ifr.ifr_name, name, sizeof(ifr.ifr_name)); + ifr.ifr_ttl = ttl; + if (ioctl(s, SIOCSLIFPHYTTL, (caddr_t)&ifr) < 0) + warn("SIOCSLIFPHYTTL"); +} + +void +setvnetid(const char *id, int param) +{ + const char *errmsg = NULL; + int vnetid; + + vnetid = strtonum(id, 0, UINT_MAX, &errmsg); + if (errmsg) + errx(1, "vnetid %s: %s", id, errmsg); + + strlcpy(ifr.ifr_name, name, sizeof(ifr.ifr_name)); + ifr.ifr_vnetid = vnetid; + if (ioctl(s, SIOCSVNETID, (caddr_t)&ifr) < 0) + warn("SIOCSVNETID"); +} + +void mpe_status(void) { struct shim_hdr shim; diff --git a/share/man/man4/vxlan.4 b/share/man/man4/vxlan.4 new file mode 100644 index 00000000000..308b7af91ad --- /dev/null +++ b/share/man/man4/vxlan.4 @@ -0,0 +1,165 @@ +.\" $OpenBSD: vxlan.4,v 1.1 2013/10/13 10:10:01 reyk Exp $ +.\" +.\" Copyright (c) 2013 Reyk Floeter <reyk@openbsd.org> +.\" +.\" Permission to use, copy, modify, and distribute this software for any +.\" purpose with or without fee is hereby granted, provided that the above +.\" copyright notice and this permission notice appear in all copies. +.\" +.\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +.\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +.\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +.\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +.\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +.\" +.Dd $Mdocdate: October 13 2013 $ +.Dt VXLAN 4 +.Os +.Sh NAME +.Nm vxlan +.Nd virtual extensible local area network tunnel interface +.Sh SYNOPSIS +.Cd "pseudo-device vxlan" +.Sh DESCRIPTION +The +.Nm +interface is a tunnelling pseudo-device for overlaying virtualized +layer 2 networks over layer 3 networks. +.Pp +A +.Nm +interface can be created using the +.Ic ifconfig vxlan Ns Ar N Ic create +command. +Once configured, the interface encapsulates and decapsulates Ethernet +frames in UDP datagrams that are exchanged with tunnel endpoints. +The default UDP port for VXLAN traffic is 4789. +.Pp +Each +.Nm +interface uses a 24-bit +.Ic vnetid +(virtual networks identifier) +that allows to distinguish multiple virtualized layer 2 networks and +their tunnels between identical tunnel endpoints. +.Pp +The interface can operate in three different tunnel modes: +.Bl -tag -width multicast +.It Ic unicast mode +When a unicast IP address is configured as the tunnel destination, +all traffic is sent to a single tunnel endpoint. +.It Ic multicast mode +When a multicast IP address is configured as the tunnel destination, +all traffic is sent to all the tunnel endpoints that subscribed for the +specified multicast group. +.It Ic dynamic mode +When +.Nm +is configured for multicast mode and added to a +.Xr bridge 4 , +all broadcast and multicast traffic is sent to the multicast group, +but directed traffic is sent to unicast IP addresses of individual tunnel +endpoints, as they are learned by the bridge. +.Xr +.El +.Pp +The configuration can be done at runtime or by setting up a +.Xr hostname.if 5 +configuration file for +.Xr netstart 8 . +.Sh EXAMPLES +Create a tunnel to a unicast tunnel endpoint, using the virtual tunnel +identifier 5: +.Bd -literal -offset indent +# ifconfig vxlan0 tunnel 192.168.1.100 192.168.1.200 vnetid 5 +# ifconfig vxlan0 10.1.1.100/24 +.Ed +.Pp +The following examples creates a dynamic tunnel that is attached to a +.Xr bridge 4 : +.Bd -literal -offset indent +# ifconfig vxlan0 tunnel 192.168.1.100 239.1.1.100 vnetid 7395 +# ifconfig vxlan0 10.1.2.100/24 +# ifconfig bridge0 add vxlan0 up +.Ed +.Pp +Prior to the assignment of UDP port 4789 by IANA, some early VXLAN +implementations used port 8472. +A non-standard port can be specified with the tunnel destination +address: +.Bd -literal -offset indent +# ifconfig vxlan0 tunnel 192.168.1.100 239.1.1.100:8472 +.Ed +.Sh SECURITY +.Nm +does not provide any integrated security features. +It is designed to be a simple protocol that can be used in trusted +data center environments, to carry VM traffic between virtual machine +hypervisors, and provide virtualized layer 2 networks in Cloud +infrastructures. +.Pp +To protect +.Nm +tunnels, the traffic can be protected with IPsec to add authentication +and encryption for confidentiality. +.Pp +The Packet Filter (PF) can be used to filter tunnel traffic with +endpoint policies in +.Xr pf.conf 5 : +.Bd -literal -offset indent +table <vxlantep> { 192.168.1.200 192.168.1.201 } +block in on vmx0 +pass out on vmx0 +pass in on vmx0 proto udp from <vxlantep> to port 4789 +.Ed +.Pp +The Time-to-Live (TTL) value of the tunnel can be set to 1 or a low +value to restrict the traffic to the local network: +.Bd -literal -offset indent +# ifconfig vxlan0 tunnelttl 1 +.Ed +.Sh SEE ALSO +.Xr bridge 4 , +.Xr inet 4 , +.Xr hostname.if 5 , +.Xr ifconfig 8 , +.Xr netstart 8 +.Sh HISTORY +The +.Nm +device first appeared in +.Ox 5.5 . +.Sh AUTHORS +The +.Nm +driver was written by +.An Reyk Floeter Aq Mt reyk@openbsd.org . +.Sh STANDARDS +.Rs +.%A M. Mahalingam +.%A D. Dutt +.%A K. Duda +.%A P. Agarwal +.%A L. Kreeger +.%A T. Sridhar +.%A M. Bursell +.%A C. Wright +.%D May 2013 +.%R draft-mahalingam-dutt-dcops-vxlan-04 +.%T VXLAN: A Framework for Overlaying Virtualized Layer 2 Networks over Layer 3 Networks. +.Re +.Sh CAVEATS +The +.Nm +interface requires at least 50 bytes for the IP, UDP and VXLAN +protocol overhead and optionally 4 bytes for the encapsulated VLAN tag. +The default MTU is set to 1450 bytes but can be adjusted if the +transport interfaces carrying the tunnel traffic support larger MTUs. +It is recommended to set the MTU of the transport interfaces to at +least 1550 bytes and to bump the MTU of the +.Nm +interfaces to 1500 bytes accordingly. +.Pp +The implementation does not support IPv6 tunnel endpoints at present. diff --git a/sys/conf/GENERIC b/sys/conf/GENERIC index 7f05181509a..c4929576efa 100644 --- a/sys/conf/GENERIC +++ b/sys/conf/GENERIC @@ -1,4 +1,4 @@ -# $OpenBSD: GENERIC,v 1.202 2013/10/01 06:57:25 dlg Exp $ +# $OpenBSD: GENERIC,v 1.203 2013/10/13 10:10:01 reyk Exp $ # # Machine-independent option; used by all architectures for their # GENERIC kernel @@ -107,6 +107,7 @@ pseudo-device sppp 1 # Sync PPP/HDLC pseudo-device trunk # Trunking support pseudo-device tun # network tunneling over tty pseudo-device vether # Virtual ethernet +#pseudo-device vxlan # Virtual extensible LAN pseudo-device vlan # IEEE 802.1Q VLAN pseudo-device bio 1 # ioctl multiplexing device diff --git a/sys/conf/files b/sys/conf/files index 58635aeb080..94292f0db0a 100644 --- a/sys/conf/files +++ b/sys/conf/files @@ -1,4 +1,4 @@ -# $OpenBSD: files,v 1.555 2013/10/12 12:02:03 henning Exp $ +# $OpenBSD: files,v 1.556 2013/10/13 10:10:02 reyk Exp $ # $NetBSD: files,v 1.87 1996/05/19 17:17:50 jonathan Exp $ # @(#)files.newconf 7.5 (Berkeley) 5/10/93 @@ -541,6 +541,7 @@ pseudo-device trunk: ifnet, ether, ifmedia pseudo-device mpe: ifnet, ether pseudo-device vether: ifnet, ether pseudo-device pppx: ifnet +pseudo-device vxlan: ifnet, ether, ifmedia pseudo-device systrace @@ -800,6 +801,7 @@ file net/if_mpe.c mpe needs-count file net/if_vether.c vether needs-count file net/if_pppx.c pppx needs-count file net/if_aoe.c ether & aoe +file net/if_vxlan.c vxlan needs-count file net80211/ieee80211.c wlan file net80211/ieee80211_amrr.c wlan file net80211/ieee80211_crypto.c wlan diff --git a/sys/net/if.h b/sys/net/if.h index a95ef74c2b1..508407cd6bd 100644 --- a/sys/net/if.h +++ b/sys/net/if.h @@ -1,4 +1,4 @@ -/* $OpenBSD: if.h,v 1.147 2013/10/12 12:13:10 henning Exp $ */ +/* $OpenBSD: if.h,v 1.148 2013/10/13 10:10:02 reyk Exp $ */ /* $NetBSD: if.h,v 1.23 1996/05/07 02:40:27 thorpej Exp $ */ /* @@ -641,6 +641,8 @@ struct ifreq { #define ifr_hardmtu ifr_ifru.ifru_metric /* hardmtu (overload) */ #define ifr_media ifr_ifru.ifru_metric /* media options (overload) */ #define ifr_rdomainid ifr_ifru.ifru_metric /* VRF instance (overload) */ +#define ifr_vnetid ifr_ifru.ifru_metric /* Virtual Net Id (overload) */ +#define ifr_ttl ifr_ifru.ifru_metric /* tunnel TTL (overload) */ #define ifr_data ifr_ifru.ifru_data /* for use by interface */ }; diff --git a/sys/net/if_bridge.c b/sys/net/if_bridge.c index 9ea29ba1a3f..c1798b0371b 100644 --- a/sys/net/if_bridge.c +++ b/sys/net/if_bridge.c @@ -1,4 +1,4 @@ -/* $OpenBSD: if_bridge.c,v 1.215 2013/10/12 11:55:45 henning Exp $ */ +/* $OpenBSD: if_bridge.c,v 1.216 2013/10/13 10:10:02 reyk Exp $ */ /* * Copyright (c) 1999, 2000 Jason L. Wright (jason@thought.net) @@ -131,9 +131,9 @@ int bridge_rtfind(struct bridge_softc *, struct ifbaconf *); void bridge_rtage(struct bridge_softc *); int bridge_rtdaddr(struct bridge_softc *, struct ether_addr *); void bridge_rtflush(struct bridge_softc *, int); -struct ifnet * bridge_rtupdate(struct bridge_softc *, - struct ether_addr *, struct ifnet *ifp, int, u_int8_t); -struct ifnet * bridge_rtlookup(struct bridge_softc *, +struct ifnet *bridge_rtupdate(struct bridge_softc *, + struct ether_addr *, struct ifnet *ifp, int, u_int8_t, struct mbuf *); +struct bridge_rtnode *bridge_rtlookup(struct bridge_softc *, struct ether_addr *); u_int32_t bridge_hash(struct bridge_softc *, struct ether_addr *); int bridge_blocknonip(struct ether_header *, struct mbuf *); @@ -161,6 +161,7 @@ int bridge_ipsec(struct bridge_softc *, struct ifnet *, int bridge_clone_create(struct if_clone *, int); int bridge_clone_destroy(struct ifnet *ifp); int bridge_delete(struct bridge_softc *, struct bridge_iflist *); +void bridge_tunnelupdate(struct sockaddr *, struct sockaddr *); #define ETHERADDR_IS_IP_MCAST(a) \ /* struct etheraddr *a; */ \ @@ -550,7 +551,7 @@ bridge_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) } ifs = bridge_rtupdate(sc, &bareq->ifba_dst, ifs, 1, - bareq->ifba_flags); + bareq->ifba_flags, NULL); if (ifs == NULL) error = ENOMEM; break; @@ -728,7 +729,7 @@ bridge_update(struct ifnet *ifp, struct ether_addr *ea, int delete) if (!delete) { /* Update the bridge table */ - bridge_rtupdate(sc, ea, ifp, 0, IFBAF_DYNAMIC); + bridge_rtupdate(sc, ea, ifp, 0, IFBAF_DYNAMIC, NULL); } } } @@ -946,7 +947,8 @@ bridge_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *sa, struct rtentry *rt) { struct ether_header *eh; - struct ifnet *dst_if; + struct ifnet *dst_if = NULL; + struct bridge_rtnode *dst_p = NULL; struct ether_addr *dst; struct bridge_softc *sc; int s, error, len; @@ -990,7 +992,8 @@ bridge_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *sa, * If the packet is a broadcast or we don't know a better way to * get there, send to all interfaces. */ - dst_if = bridge_rtlookup(sc, dst); + if ((dst_p = bridge_rtlookup(sc, dst)) != NULL) + dst_if = dst_p->brt_if; if (dst_if == NULL || ETHER_IS_MULTICAST(eh->ether_dhost)) { struct bridge_iflist *p; struct mbuf *mc; @@ -1083,6 +1086,10 @@ bridge_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *sa, } sendunicast: + if (dst_p != NULL && dst_p->brt_tunnel.sa.sa_family != AF_UNSPEC && + (sa = bridge_tunneltag(m, dst_p->brt_tunnel.sa.sa_family)) != NULL) + memcpy(sa, &dst_p->brt_tunnel.sa, dst_p->brt_tunnel.sa.sa_len); + bridge_span(sc, NULL, m); if ((dst_if->if_flags & IFF_RUNNING) == 0) { m_freem(m); @@ -1133,6 +1140,7 @@ bridgeintr_frame(struct bridge_softc *sc, struct mbuf *m) int s, len; struct ifnet *src_if, *dst_if; struct bridge_iflist *ifl; + struct bridge_rtnode *dst_p; struct ether_addr *dst, *src; struct ether_header eh; @@ -1175,7 +1183,7 @@ bridgeintr_frame(struct bridge_softc *sc, struct mbuf *m) !(eh.ether_shost[0] == 0 && eh.ether_shost[1] == 0 && eh.ether_shost[2] == 0 && eh.ether_shost[3] == 0 && eh.ether_shost[4] == 0 && eh.ether_shost[5] == 0)) - bridge_rtupdate(sc, src, src_if, 0, IFBAF_DYNAMIC); + bridge_rtupdate(sc, src, src_if, 0, IFBAF_DYNAMIC, m); if ((ifl->bif_flags & IFBIF_STP) && (ifl->bif_state == BSTP_IFSTATE_LEARNING)) { @@ -1193,7 +1201,10 @@ bridgeintr_frame(struct bridge_softc *sc, struct mbuf *m) * side of the bridge, drop it. */ if ((m->m_flags & (M_BCAST | M_MCAST)) == 0) { - dst_if = bridge_rtlookup(sc, dst); + if ((dst_p = bridge_rtlookup(sc, dst)) != NULL) + dst_if = dst_p->brt_if; + else + dst_if = NULL; if (dst_if == src_if) { m_freem(m); return; @@ -1422,7 +1433,7 @@ bridge_input(struct ifnet *ifp, struct ether_header *eh, struct mbuf *m) if (srcifl->bif_flags & IFBIF_LEARNING) bridge_rtupdate(sc, (struct ether_addr *)&eh->ether_shost, - ifp, 0, IFBAF_DYNAMIC); + ifp, 0, IFBAF_DYNAMIC, m); if (bridge_filterrule(&srcifl->bif_brlin, eh, m) == BRL_ACTION_BLOCK) { m_freem(m); @@ -1685,12 +1696,18 @@ bridge_span(struct bridge_softc *sc, struct ether_header *eh, struct ifnet * bridge_rtupdate(struct bridge_softc *sc, struct ether_addr *ea, - struct ifnet *ifp, int setflags, u_int8_t flags) + struct ifnet *ifp, int setflags, u_int8_t flags, struct mbuf *m) { struct bridge_rtnode *p, *q; + struct sockaddr *sa = NULL; u_int32_t h; int dir; + if (m != NULL) { + /* Check if the mbuf was tagged with a tunnel endpoint addr */ + sa = bridge_tunnel(m); + } + h = bridge_hash(sc, ea); p = LIST_FIRST(&sc->sc_rts[h]); if (p == NULL) { @@ -1703,6 +1720,7 @@ bridge_rtupdate(struct bridge_softc *sc, struct ether_addr *ea, bcopy(ea, &p->brt_addr, sizeof(p->brt_addr)); p->brt_if = ifp; p->brt_age = 1; + bridge_tunnelupdate(sa, (struct sockaddr *)&p->brt_tunnel); if (setflags) p->brt_flags = flags; @@ -1729,6 +1747,9 @@ bridge_rtupdate(struct bridge_softc *sc, struct ether_addr *ea, if (q->brt_if == ifp) q->brt_age = 1; ifp = q->brt_if; + bridge_tunnelupdate(sa, + (struct sockaddr *)&q->brt_tunnel); + goto want; } @@ -1742,6 +1763,8 @@ bridge_rtupdate(struct bridge_softc *sc, struct ether_addr *ea, bcopy(ea, &p->brt_addr, sizeof(p->brt_addr)); p->brt_if = ifp; p->brt_age = 1; + bridge_tunnelupdate(sa, + (struct sockaddr *)&p->brt_tunnel); if (setflags) p->brt_flags = flags; @@ -1763,6 +1786,8 @@ bridge_rtupdate(struct bridge_softc *sc, struct ether_addr *ea, bcopy(ea, &p->brt_addr, sizeof(p->brt_addr)); p->brt_if = ifp; p->brt_age = 1; + bridge_tunnelupdate(sa, + (struct sockaddr *)&p->brt_tunnel); if (setflags) p->brt_flags = flags; @@ -1780,7 +1805,7 @@ want: return (ifp); } -struct ifnet * +struct bridge_rtnode * bridge_rtlookup(struct bridge_softc *sc, struct ether_addr *ea) { struct bridge_rtnode *p; @@ -1791,7 +1816,7 @@ bridge_rtlookup(struct bridge_softc *sc, struct ether_addr *ea) LIST_FOREACH(p, &sc->sc_rts[h], brt_next) { dir = memcmp(ea, &p->brt_addr, sizeof(p->brt_addr)); if (dir == 0) - return (p->brt_if); + return (p); if (dir > 0) goto fail; } @@ -2018,6 +2043,12 @@ bridge_rtfind(struct bridge_softc *sc, struct ifbaconf *baconf) sizeof(bareq.ifba_ifsname)); bcopy(&n->brt_addr, &bareq.ifba_dst, sizeof(bareq.ifba_dst)); + if (n->brt_tunnel.sa.sa_family != AF_UNSPEC) + bcopy(&n->brt_tunnel.sa, + &bareq.ifba_dstsa, + n->brt_tunnel.sa.sa_len); + else + bareq.ifba_dstsa.ss_family = AF_UNSPEC; bareq.ifba_age = n->brt_age; bareq.ifba_flags = n->brt_flags; error = copyout((caddr_t)&bareq, @@ -2823,3 +2854,65 @@ bridge_send_icmp_err(struct bridge_softc *sc, struct ifnet *ifp, m_freem(n); } #endif + +struct sockaddr * +bridge_tunnel(struct mbuf *m) +{ + struct m_tag *mtag; + + if ((mtag = m_tag_find(m, PACKET_TAG_TUNNEL, NULL)) == NULL) + return (NULL); + + return ((struct sockaddr *)(mtag + 1)); +} + +struct sockaddr * +bridge_tunneltag(struct mbuf *m, int af) +{ + struct m_tag *mtag; + size_t len; + struct sockaddr *sa; + + if ((mtag = m_tag_find(m, PACKET_TAG_TUNNEL, NULL)) != NULL) { + sa = (struct sockaddr *)(mtag + 1); + if (sa->sa_family != af) { + m_tag_delete(m, mtag); + mtag = NULL; + } + } + if (mtag == NULL) { + if (af == AF_INET) + len = sizeof(struct sockaddr_in); + else if (af == AF_INET6) + len = sizeof(struct sockaddr_in6); + else + return (NULL); + mtag = m_tag_get(PACKET_TAG_TUNNEL, len, M_NOWAIT); + if (mtag == NULL) + return (NULL); + bzero(mtag + 1, len); + sa = (struct sockaddr *)(mtag + 1); + sa->sa_family = af; + sa->sa_len = len; + m_tag_prepend(m, mtag); + } + + return ((struct sockaddr *)(mtag + 1)); +} + +void +bridge_tunneluntag(struct mbuf *m) +{ + struct m_tag *mtag; + if ((mtag = m_tag_find(m, PACKET_TAG_TUNNEL, NULL)) != NULL) + m_tag_delete(m, mtag); +} + +void +bridge_tunnelupdate(struct sockaddr *sa, struct sockaddr *tunnel) +{ + if (sa != NULL && sa->sa_family != AF_UNSPEC) + memcpy(tunnel, sa, sa->sa_len); + else + tunnel->sa_family = AF_UNSPEC; +} diff --git a/sys/net/if_bridge.h b/sys/net/if_bridge.h index 3fb604dba6e..db956d960cd 100644 --- a/sys/net/if_bridge.h +++ b/sys/net/if_bridge.h @@ -1,4 +1,4 @@ -/* $OpenBSD: if_bridge.h,v 1.37 2013/01/23 13:28:36 camield Exp $ */ +/* $OpenBSD: if_bridge.h,v 1.38 2013/10/13 10:10:03 reyk Exp $ */ /* * Copyright (c) 1999, 2000 Jason L. Wright (jason@thought.net) @@ -122,6 +122,7 @@ struct ifbareq { u_int8_t ifba_age; /* address age */ u_int8_t ifba_flags; /* address flags */ struct ether_addr ifba_dst; /* destination addr */ + struct sockaddr_storage ifba_dstsa; /* tunnel endpoint */ }; #define IFBAF_TYPEMASK 0x03 /* address type mask */ @@ -406,6 +407,15 @@ struct bridge_rtnode { u_int8_t brt_flags; /* address flags */ u_int8_t brt_age; /* age counter */ struct ether_addr brt_addr; /* dst addr */ + union { + struct sockaddr sa; +#ifdef INET + struct sockaddr_in sin; +#endif +#ifdef INET6 + struct sockaddr_in6 sin6; +#endif + } brt_tunnel; /* tunnel endpoint */ }; #ifndef BRIDGE_RTABLE_SIZE @@ -441,6 +451,9 @@ int bridge_output(struct ifnet *, struct mbuf *, struct sockaddr *, void bridge_update(struct ifnet *, struct ether_addr *, int); void bridge_rtdelete(struct bridge_softc *, struct ifnet *, int); void bridge_rtagenode(struct ifnet *, int); +struct sockaddr *bridge_tunnel(struct mbuf *); +struct sockaddr *bridge_tunneltag(struct mbuf *, int); +void bridge_tunneluntag(struct mbuf *); struct bstp_state *bstp_create(struct ifnet *); void bstp_destroy(struct bstp_state *); diff --git a/sys/net/if_vxlan.c b/sys/net/if_vxlan.c new file mode 100644 index 00000000000..69323992ccf --- /dev/null +++ b/sys/net/if_vxlan.c @@ -0,0 +1,590 @@ +/* $OpenBSD: if_vxlan.c,v 1.1 2013/10/13 10:10:03 reyk Exp $ */ + +/* + * Copyright (c) 2013 Reyk Floeter <reyk@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include "bpfilter.h" +#include "vxlan.h" +#include "vlan.h" +#include "pf.h" +#include "bridge.h" + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/mbuf.h> +#include <sys/socket.h> +#include <sys/sockio.h> +#include <sys/ioctl.h> + +#include <net/if.h> +#include <net/if_dl.h> +#include <net/if_media.h> +#include <net/route.h> + +#if NBPFILTER > 0 +#include <net/bpf.h> +#endif + +#if NPF > 0 +#include <net/pfvar.h> +#endif + +#include <netinet/in.h> +#include <netinet/in_var.h> +#include <netinet/in_systm.h> +#include <netinet/if_ether.h> +#include <netinet/ip.h> +#include <netinet/ip_var.h> +#include <netinet/udp.h> +#include <netinet/udp_var.h> +#include <netinet/in_pcb.h> + +#if NBRIDGE > 0 +#include <net/if_bridge.h> +#endif + +#include <net/if_vxlan.h> + +void vxlanattach(int); +int vxlanioctl(struct ifnet *, u_long, caddr_t); +void vxlanstart(struct ifnet *); +int vxlan_clone_create(struct if_clone *, int); +int vxlan_clone_destroy(struct ifnet *); +int vxlan_media_change(struct ifnet *); +void vxlan_media_status(struct ifnet *, struct ifmediareq *); +int vxlan_config(struct ifnet *, struct sockaddr *, struct sockaddr *); +int vxlan_output(struct ifnet *, struct mbuf *); + +struct if_clone vxlan_cloner = + IF_CLONE_INITIALIZER("vxlan", vxlan_clone_create, vxlan_clone_destroy); + +int vxlan_enable = 0; +u_long vxlan_tagmask; + +#define VXLAN_TAGHASHSIZE 32 +#define VXLAN_TAGHASH(tag) (tag & vxlan_tagmask) +LIST_HEAD(vxlan_taghash, vxlan_softc) *vxlan_tagh; + +void +vxlanattach(int count) +{ + if ((vxlan_tagh = hashinit(VXLAN_TAGHASHSIZE, M_DEVBUF, M_NOWAIT, + &vxlan_tagmask)) == NULL) + panic("vxlanattach: hashinit"); + + if_clone_attach(&vxlan_cloner); +} + +int +vxlan_clone_create(struct if_clone *ifc, int unit) +{ + struct ifnet *ifp; + struct vxlan_softc *sc; + + if ((sc = malloc(sizeof(*sc), + M_DEVBUF, M_NOWAIT|M_ZERO)) == NULL) + return (ENOMEM); + + sc->sc_imo.imo_membership = malloc( + (sizeof(struct in_multi *) * IP_MIN_MEMBERSHIPS), M_IPMOPTS, + M_WAITOK|M_ZERO); + sc->sc_imo.imo_max_memberships = IP_MIN_MEMBERSHIPS; + sc->sc_dstport = htons(VXLAN_PORT); + sc->sc_vnetid = 0; + + ifp = &sc->sc_ac.ac_if; + snprintf(ifp->if_xname, sizeof ifp->if_xname, "vxlan%d", unit); + ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; + ether_fakeaddr(ifp); + + ifp->if_softc = sc; + ifp->if_ioctl = vxlanioctl; + ifp->if_start = vxlanstart; + IFQ_SET_MAXLEN(&ifp->if_snd, IFQ_MAXLEN); + IFQ_SET_READY(&ifp->if_snd); + + ifp->if_hardmtu = 0xffff; + ifp->if_capabilities = IFCAP_VLAN_MTU; + + ifmedia_init(&sc->sc_media, 0, vxlan_media_change, + vxlan_media_status); + ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_AUTO, 0, NULL); + ifmedia_set(&sc->sc_media, IFM_ETHER | IFM_AUTO); + + if_attach(ifp); + ether_ifattach(ifp); + + /* XXX should we allow IP fragments? */ + ifp->if_mtu = ETHERMTU - sizeof(struct ether_header); +#ifdef INET + ifp->if_mtu -= sizeof(struct vxlanudpiphdr); +#endif + + LIST_INSERT_HEAD(&vxlan_tagh[VXLAN_TAGHASH(0)], sc, sc_entry); + vxlan_enable++; + + return (0); +} + +int +vxlan_clone_destroy(struct ifnet *ifp) +{ + struct vxlan_softc *sc = ifp->if_softc; + + vxlan_enable--; + LIST_REMOVE(sc, sc_entry); + + ifmedia_delete_instance(&sc->sc_media, IFM_INST_ANY); + ether_ifdetach(ifp); + if_detach(ifp); + free(sc->sc_imo.imo_membership, M_IPMOPTS); + free(sc, M_DEVBUF); + + return (0); +} + +void +vxlanstart(struct ifnet *ifp) +{ + struct mbuf *m; + int s; + + for (;;) { + s = splnet(); + IFQ_DEQUEUE(&ifp->if_snd, m); + splx(s); + + if (m == NULL) + return; + ifp->if_opackets++; + +#if NBPFILTER > 0 + if (ifp->if_bpf) + bpf_mtap(ifp->if_bpf, m, BPF_DIRECTION_OUT); +#endif + + vxlan_output(ifp, m); + } +} + +int +vxlan_config(struct ifnet *ifp, struct sockaddr *src, struct sockaddr *dst) +{ + struct vxlan_softc *sc = (struct vxlan_softc *)ifp->if_softc; + struct ip_moptions *imo = &sc->sc_imo; +#ifdef INET + struct sockaddr_in *src4, *dst4; + struct ifaddr *ifa; +#endif + int reset = 0; + + if (src != NULL && dst != NULL) { + /* XXX inet6 is not supported */ + if (src->sa_family != AF_INET || dst->sa_family != AF_INET) + return (EAFNOSUPPORT); + } else { + /* Reset current configuration */ + src = (struct sockaddr *)&sc->sc_src; + dst = (struct sockaddr *)&sc->sc_dst; + reset = 1; + } + +#ifdef INET + src4 = (struct sockaddr_in *)src; + dst4 = (struct sockaddr_in *)dst; + + if (src4->sin_len != sizeof(*src4) || dst4->sin_len != sizeof(*dst4)) + return (EINVAL); + + if (IN_MULTICAST(dst4->sin_addr.s_addr)) { + if (src4->sin_addr.s_addr == INADDR_ANY || + IN_MULTICAST(src4->sin_addr.s_addr)) + return (EINVAL); + if ((ifa = ifa_ifwithaddr((struct sockaddr *)src4, + sc->sc_rtableid)) == NULL || + ifa->ifa_ifp == NULL || + (ifa->ifa_ifp->if_flags & IFF_MULTICAST) == 0) + return (EADDRNOTAVAIL); + } +#endif + + if (imo->imo_num_memberships > 0) { + in_delmulti(imo->imo_membership[ + --imo->imo_num_memberships]); + imo->imo_multicast_ifp = NULL; + } + +#ifdef INET + if (IN_MULTICAST(dst4->sin_addr.s_addr)) { + if ((imo->imo_membership[0] = + in_addmulti(&dst4->sin_addr, ifa->ifa_ifp)) == NULL) + return (ENOBUFS); + imo->imo_num_memberships++; + imo->imo_multicast_ifp = ifa->ifa_ifp; + if (sc->sc_ttl > 0) + imo->imo_multicast_ttl = sc->sc_ttl; + else + imo->imo_multicast_ttl = IP_DEFAULT_MULTICAST_TTL; + imo->imo_multicast_loop = 0; + } + if (dst4->sin_port) + sc->sc_dstport = dst4->sin_port; +#endif + + if (!reset) { + bzero(&sc->sc_src, sizeof(sc->sc_src)); + bzero(&sc->sc_dst, sizeof(sc->sc_dst)); + memcpy(&sc->sc_src, src, src->sa_len); + memcpy(&sc->sc_dst, dst, dst->sa_len); + } + + LIST_REMOVE(sc, sc_entry); + LIST_INSERT_HEAD(&vxlan_tagh[VXLAN_TAGHASH(sc->sc_vnetid)], + sc, sc_entry); + + return (0); +} + +/* ARGSUSED */ +int +vxlanioctl(struct ifnet *ifp, u_long cmd, caddr_t data) +{ + struct vxlan_softc *sc = (struct vxlan_softc *)ifp->if_softc; +#ifdef INET + struct ifaddr *ifa = (struct ifaddr *)data; +#endif + struct ifreq *ifr = (struct ifreq *)data; + struct if_laddrreq *lifr = (struct if_laddrreq *)data; + struct proc *p = curproc; + int error = 0, s; + struct ip_moptions *imo = &sc->sc_imo; + + switch (cmd) { + case SIOCSIFADDR: + ifp->if_flags |= IFF_UP; +#ifdef INET + if (ifa->ifa_addr->sa_family == AF_INET) + arp_ifinit(&sc->sc_ac, ifa); +#endif + /* FALLTHROUGH */ + + case SIOCSIFFLAGS: + if (ifp->if_flags & IFF_UP) { + ifp->if_flags |= IFF_RUNNING; + } else { + ifp->if_flags &= ~IFF_RUNNING; + } + break; + + case SIOCADDMULTI: + case SIOCDELMULTI: + if (ifr == 0) { + error = EAFNOSUPPORT; + break; + } + error = (cmd == SIOCADDMULTI) ? + ether_addmulti(ifr, &sc->sc_ac) : + ether_delmulti(ifr, &sc->sc_ac); + if (error == ENETRESET) + error = 0; + break; + + case SIOCGIFMEDIA: + case SIOCSIFMEDIA: + error = ifmedia_ioctl(ifp, ifr, &sc->sc_media, cmd); + break; + + case SIOCSLIFPHYADDR: + if ((error = suser(p, 0)) != 0) + break; + s = splnet(); + error = vxlan_config(ifp, + (struct sockaddr *)&lifr->addr, + (struct sockaddr *)&lifr->dstaddr); + splx(s); + break; + + case SIOCDIFPHYADDR: + if ((error = suser(p, 0)) != 0) + break; + s = splnet(); + if (imo->imo_num_memberships > 0) { + in_delmulti(imo->imo_membership[ + --imo->imo_num_memberships]); + imo->imo_multicast_ifp = NULL; + } + bzero(&sc->sc_src, sizeof(sc->sc_src)); + bzero(&sc->sc_dst, sizeof(sc->sc_dst)); + sc->sc_dstport = htons(VXLAN_PORT); + splx(s); + break; + + case SIOCGLIFPHYADDR: + if (sc->sc_dst.ss_family == AF_UNSPEC) { + error = EADDRNOTAVAIL; + break; + } + bzero(&lifr->addr, sizeof(lifr->addr)); + bzero(&lifr->dstaddr, sizeof(lifr->dstaddr)); + memcpy(&lifr->addr, &sc->sc_src, sc->sc_src.ss_len); + memcpy(&lifr->dstaddr, &sc->sc_dst, sc->sc_dst.ss_len); + break; + + case SIOCSLIFPHYRTABLE: + if ((error = suser(p, 0)) != 0) + break; + if (ifr->ifr_rdomainid < 0 || + ifr->ifr_rdomainid > RT_TABLEID_MAX || + !rtable_exists(ifr->ifr_rdomainid)) { + error = EINVAL; + break; + } + s = splnet(); + sc->sc_rtableid = ifr->ifr_rdomainid; + (void)vxlan_config(ifp, NULL, NULL); + splx(s); + break; + + case SIOCGLIFPHYRTABLE: + ifr->ifr_rdomainid = sc->sc_rtableid; + break; + + case SIOCSLIFPHYTTL: + if ((error = suser(p, 0)) != 0) + break; + if (ifr->ifr_ttl < 0 || ifr->ifr_ttl > 0xff) { + error = EINVAL; + break; + } + if (sc->sc_ttl == (u_int8_t)ifr->ifr_ttl) + break; + s = splnet(); + sc->sc_ttl = (u_int8_t)(ifr->ifr_ttl); + (void)vxlan_config(ifp, NULL, NULL); + splx(s); + break; + + case SIOCGLIFPHYTTL: + ifr->ifr_ttl = (int)sc->sc_ttl; + break; + + case SIOCSVNETID: + if ((error = suser(p, 0)) != 0) + break; + if (ifr->ifr_vnetid < 0 || + ifr->ifr_vnetid > 0x00ffffff) { + error = EINVAL; + break; + } + s = splnet(); + sc->sc_vnetid = (u_int32_t)ifr->ifr_vnetid; + (void)vxlan_config(ifp, NULL, NULL); + splx(s); + break; + + case SIOCGVNETID: + ifr->ifr_vnetid = (int)sc->sc_vnetid; + break; + + default: + error = ether_ioctl(ifp, &sc->sc_ac, cmd, data); + break; + } + + return (error); +} + +int +vxlan_media_change(struct ifnet *ifp) +{ + return (0); +} + +void +vxlan_media_status(struct ifnet *ifp, struct ifmediareq *imr) +{ + imr->ifm_active = IFM_ETHER | IFM_AUTO; + imr->ifm_status = IFM_AVALID | IFM_ACTIVE; +} + +int +vxlan_lookup(struct mbuf *m, struct udphdr *uh, int iphlen, + struct sockaddr *srcsa) +{ + struct vxlan_softc *sc = NULL; + struct vxlan_header v; + u_int32_t vni; + struct ifnet *ifp; + int skip; + struct ether_header *eh; +#if NBRIDGE > 0 + struct sockaddr *sa; +#endif + + /* XXX Should verify the UDP port first before copying the packet */ + skip = iphlen + sizeof(*uh); + if (m->m_pkthdr.len - skip < sizeof(v)) + return (0); + m_copydata(m, skip, sizeof(v), (caddr_t)&v); + skip += sizeof(v); + + vni = ntohl(v.vxlan_id); + + /* Validate header */ + if ((vni == 0) || (vni & VXLAN_RESERVED2) || + (ntohl(v.vxlan_flags) != VXLAN_FLAGS_VNI)) + return (0); + + vni >>= VXLAN_VNI_S; + LIST_FOREACH(sc, &vxlan_tagh[VXLAN_TAGHASH(vni)], + sc_entry) { + if ((uh->uh_dport == sc->sc_dstport) && + vni == sc->sc_vnetid && + sc->sc_rtableid == rtable_l2(m->m_pkthdr.rdomain)) + goto found; + } + + /* not found */ + return (0); + + found: + m_adj(m, skip); + ifp = &sc->sc_ac.ac_if; + m->m_pkthdr.rcvif = ifp; + + if ((eh = mtod(m, struct ether_header *)) == NULL) + return (EINVAL); + +#if NBRIDGE > 0 + /* Store the peer IP address for the bridge */ + if (ifp->if_bridgeport != NULL && + srcsa->sa_family != AF_UNSPEC && + (sa = bridge_tunneltag(m, srcsa->sa_family)) != NULL) + memcpy(sa, srcsa, sa->sa_len); +#endif + + /* Clear multicast flag from the outer packet */ + if (sc->sc_imo.imo_num_memberships > 0 && + m->m_flags & (M_MCAST) && + !ETHER_IS_MULTICAST(eh->ether_dhost)) + m->m_flags &= ~M_MCAST; + +#if NBPFILTER > 0 + if (ifp->if_bpf) + bpf_mtap(ifp->if_bpf, m, BPF_DIRECTION_IN); +#endif + + m_adj(m, ETHER_HDR_LEN); + +#if NPF > 0 + pf_pkt_addr_changed(m); +#endif + + ifp->if_ipackets++; + ether_input(ifp, eh, m); + + /* success */ + return (1); +} + +int +vxlan_output(struct ifnet *ifp, struct mbuf *m) +{ + struct vxlan_softc *sc = (struct vxlan_softc *)ifp->if_softc; +#ifdef INET + struct udpiphdr *ui; + struct vxlanudpiphdr *vi; + u_int16_t len = m->m_pkthdr.len; + struct ip *ip; +#if NBRIDGE > 0 + struct sockaddr_in *sin; +#endif +#endif + int error; + +#ifdef INET + /* VXLAN header */ + M_PREPEND(m, sizeof(*vi), M_DONTWAIT); + if (m == NULL) { + ifp->if_oerrors++; + return (ENOBUFS); + } + + len += sizeof(struct vxlan_header); + + ui = mtod(m, struct udpiphdr *); + ui->ui_pr = IPPROTO_UDP; + ui->ui_src = ((struct sockaddr_in *)&sc->sc_src)->sin_addr; + ui->ui_dst = ((struct sockaddr_in *)&sc->sc_dst)->sin_addr; + ui->ui_sport = sc->sc_dstport; + ui->ui_dport = sc->sc_dstport; + ui->ui_ulen = htons(sizeof(struct udphdr) + len); + + ip = (struct ip *)ui; + ip->ip_v = IPVERSION; + ip->ip_hl = sizeof(struct ip) >> 2; + ip->ip_id = htons(ip_randomid()); + ip->ip_off = 0; /* htons(IP_DF); XXX should we disallow IP fragments? */ + ip->ip_tos = IPTOS_LOWDELAY; + ip->ip_len = htons(sizeof(struct udpiphdr) + len); + if (sc->sc_ttl > 0) + ip->ip_ttl = sc->sc_ttl; + else + ip->ip_ttl = IPDEFTTL; + +#if NBRIDGE > 0 + if ((sin = (struct sockaddr_in *)bridge_tunnel(m)) != NULL && + sin->sin_family == AF_INET) { + ui->ui_dst = sin->sin_addr; + + /* + * If the LINK0 flag is set, send the packet back to + * the original source port of the endport, otherwise use + * the configured VXLAN port. + */ + if (ifp->if_flags & IFF_LINK0) + ui->ui_dport = sin->sin_port; + } + if (sin != NULL) + bridge_tunneluntag(m); +#endif + + vi = (struct vxlanudpiphdr *)ui; + vi->ui_v.vxlan_flags = htonl(VXLAN_FLAGS_VNI); + vi->ui_v.vxlan_id = htonl(sc->sc_vnetid << VXLAN_VNI_S); + + /* UDP checksum should be 0 */ + ui->ui_sum = 0; +#endif + + ifp->if_opackets++; + ifp->if_obytes += m->m_pkthdr.len; + + m->m_pkthdr.rdomain = sc->sc_rtableid; + +#if NPF > 0 + pf_pkt_addr_changed(m); +#endif + +#ifdef INET + if ((error = + ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo, NULL))) { + ifp->if_oerrors++; + } +#endif + + return (error); +} diff --git a/sys/net/if_vxlan.h b/sys/net/if_vxlan.h new file mode 100644 index 00000000000..47518e47e91 --- /dev/null +++ b/sys/net/if_vxlan.h @@ -0,0 +1,77 @@ +/* $OpenBSD: if_vxlan.h,v 1.1 2013/10/13 10:10:03 reyk Exp $ */ + +/* + * Copyright (c) 2013 Reyk Floeter <reyk@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#ifndef _NET_VXLAN_H +#define _NET_VXLAN_H + +#define VXLANMTU 1492 +#define VXLAN_HDRLEN 8 +#define VXLAN_PORT 4789 + +struct vxlan_header { + u_int32_t vxlan_flags; +#define VXLAN_FLAGS_VNI 0x08000000 +#define VXLAN_RESERVED1 0xf7ffffff + u_int32_t vxlan_id; +#define VXLAN_VNI 0xffffff00 +#define VXLAN_VNI_S 8 +#define VXLAN_RESERVED2 0x000000ff +} __packed; + +#ifdef INET +struct vxlanudpiphdr { + struct ipovly ui_i; + struct udphdr ui_u; + struct vxlan_header ui_v; +} __packed; +#endif + +#define SIOCGETVXLAN SIOCGETPFLOW +#define SIOCSETVXLAN SIOCSETPFLOW + +/* from struct vxlanreq */ +struct vxlanreq { + u_int8_t vxlan_reserved; +}; + +#ifdef _KERNEL +struct vxlan_softc { + struct arpcom sc_ac; + struct ifmedia sc_media; + struct ip_moptions sc_imo; + struct sockaddr_storage sc_src; + struct sockaddr_storage sc_dst; + in_port_t sc_dstport; + u_int sc_rtableid; + u_int32_t sc_vnetid; + u_int8_t sc_ttl; + + LIST_ENTRY(vxlan_softc) sc_entry; +}; + +extern int vxlan_enable; + +int vxlan_lookup(struct mbuf *, struct udphdr *, int, + struct sockaddr *); +struct sockaddr *vxlan_tag_find(struct mbuf *); +struct sockaddr *vxlan_tag_get(struct mbuf *, int); +void vxlan_tag_delete(struct mbuf *); + +#endif /* _KERNEL */ + +#endif /* _NET_VXLAN_H */ diff --git a/sys/netinet/udp_usrreq.c b/sys/netinet/udp_usrreq.c index 634f3f4c82a..beea6cf28a0 100644 --- a/sys/netinet/udp_usrreq.c +++ b/sys/netinet/udp_usrreq.c @@ -1,4 +1,4 @@ -/* $OpenBSD: udp_usrreq.c,v 1.166 2013/09/06 18:35:16 bluhm Exp $ */ +/* $OpenBSD: udp_usrreq.c,v 1.167 2013/10/13 10:10:04 reyk Exp $ */ /* $NetBSD: udp_usrreq.c,v 1.28 1996/03/16 23:54:03 christos Exp $ */ /* @@ -78,6 +78,7 @@ #include <sys/sysctl.h> #include <net/if.h> +#include <net/if_media.h> #include <net/route.h> #include <netinet/in.h> @@ -113,6 +114,11 @@ #include <net/pipex.h> #endif +#include "vxlan.h" +#if NVXLAN > 0 +#include <net/if_vxlan.h> +#endif + /* * UDP protocol implementation. * Per RFC 768, August, 1980. @@ -384,6 +390,20 @@ udp_input(struct mbuf *m, ...) #endif /* INET6 */ } +#if NVXLAN > 0 + if (vxlan_enable > 0 && +#if NPF > 0 + !(m->m_pkthdr.pf.flags & PF_TAG_DIVERTED) && +#endif + (error = vxlan_lookup(m, uh, iphlen, &srcsa.sa)) != 0) { + if (error == -1) { + udpstat.udps_hdrops++; + m_freem(m); + } + return; + } +#endif + #ifdef INET6 if ((ip6 && IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) || (ip && IN_MULTICAST(ip->ip_dst.s_addr)) || diff --git a/sys/sys/mbuf.h b/sys/sys/mbuf.h index 376d1341c04..2fed3c09910 100644 --- a/sys/sys/mbuf.h +++ b/sys/sys/mbuf.h @@ -1,4 +1,4 @@ -/* $OpenBSD: mbuf.h,v 1.167 2013/08/21 05:21:46 dlg Exp $ */ +/* $OpenBSD: mbuf.h,v 1.168 2013/10/13 10:10:04 reyk Exp $ */ /* $NetBSD: mbuf.h,v 1.19 1996/02/09 18:25:14 christos Exp $ */ /* @@ -454,6 +454,7 @@ struct m_tag *m_tag_next(struct mbuf *, struct m_tag *); #define PACKET_TAG_PIPEX 0x0400 /* pipex session cache */ #define PACKET_TAG_PF_REASSEMBLED 0x0800 /* pf reassembled ipv6 packet */ #define PACKET_TAG_SRCROUTE 0x1000 /* IPv4 source routing options */ +#define PACKET_TAG_TUNNEL 0x2000 /* Tunnel endpoint address */ /* * Maximum tag payload length (that is excluding the m_tag structure). diff --git a/sys/sys/sockio.h b/sys/sys/sockio.h index 7f8faba576f..aa220bb6d69 100644 --- a/sys/sys/sockio.h +++ b/sys/sys/sockio.h @@ -1,4 +1,4 @@ -/* $OpenBSD: sockio.h,v 1.52 2013/08/16 12:29:18 mpi Exp $ */ +/* $OpenBSD: sockio.h,v 1.53 2013/10/13 10:10:04 reyk Exp $ */ /* $NetBSD: sockio.h,v 1.5 1995/08/23 00:40:47 thorpej Exp $ */ /*- @@ -184,6 +184,12 @@ #define SIOCGIFHARDMTU _IOWR('i', 165, struct ifreq) /* get ifnet hardmtu */ +#define SIOCSVNETID _IOW('i', 166, struct ifreq) /* set virt net id */ +#define SIOCGVNETID _IOWR('i', 167, struct ifreq) /* get virt net id */ + +#define SIOCSLIFPHYTTL _IOW('i', 168, struct ifreq) /* set tunnel ttl */ +#define SIOCGLIFPHYTTL _IOWR('i', 169, struct ifreq) /* get tunnel ttl */ + #define SIOCSVH _IOWR('i', 245, struct ifreq) /* set carp param */ #define SIOCGVH _IOWR('i', 246, struct ifreq) /* get carp param */ |