summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorReyk Floeter <reyk@cvs.openbsd.org>2013-10-13 10:10:05 +0000
committerReyk Floeter <reyk@cvs.openbsd.org>2013-10-13 10:10:05 +0000
commit728bebe5cfbcacfe290ca38c4777c371e5d0560e (patch)
tree21318ce06a08909b963e7238f07971e33a148f46
parentdc9aa5495448804b764c4be959bf635dcc8fb033 (diff)
Import vxlan(4), the virtual extensible local area network tunnel
interface. VXLAN is a UDP-based tunnelling protocol for overlaying virtualized layer 2 networks over layer 3 networks. The implementation is based on draft-mahalingam-dutt-dcops-vxlan-04 and has been tested with other implementations in the wild. put it in deraadt@
-rw-r--r--sbin/ifconfig/brconfig.c13
-rw-r--r--sbin/ifconfig/ifconfig.820
-rw-r--r--sbin/ifconfig/ifconfig.c69
-rw-r--r--share/man/man4/vxlan.4165
-rw-r--r--sys/conf/GENERIC3
-rw-r--r--sys/conf/files4
-rw-r--r--sys/net/if.h4
-rw-r--r--sys/net/if_bridge.c121
-rw-r--r--sys/net/if_bridge.h15
-rw-r--r--sys/net/if_vxlan.c590
-rw-r--r--sys/net/if_vxlan.h77
-rw-r--r--sys/netinet/udp_usrreq.c22
-rw-r--r--sys/sys/mbuf.h3
-rw-r--r--sys/sys/sockio.h8
14 files changed, 1086 insertions, 28 deletions
diff --git a/sbin/ifconfig/brconfig.c b/sbin/ifconfig/brconfig.c
index 05f29d5c684..5c38d457c3e 100644
--- a/sbin/ifconfig/brconfig.c
+++ b/sbin/ifconfig/brconfig.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: brconfig.c,v 1.6 2012/12/22 13:20:32 camield Exp $ */
+/* $OpenBSD: brconfig.c,v 1.7 2013/10/13 10:10:00 reyk Exp $ */
/*
* Copyright (c) 1999, 2000 Jason L. Wright (jason@thought.net)
@@ -40,6 +40,7 @@
#include <netinet/in.h>
#include <netinet/if_ether.h>
#include <net/if_bridge.h>
+#include <netdb.h>
#include <string.h>
#include <err.h>
#include <errno.h>
@@ -609,9 +610,13 @@ bridge_addaddr(const char *ifname, const char *addr)
void
bridge_addrs(const char *delim, int d)
{
+ char dstaddr[NI_MAXHOST];
+ char dstport[NI_MAXSERV];
+ const int niflag = NI_NUMERICHOST;
struct ifbaconf ifbac;
struct ifbareq *ifba;
char *inbuf = NULL, buf[sizeof(ifba->ifba_ifsname) + 1], *inb;
+ struct sockaddr *sa;
int i, len = 8192;
/* ifconfig will call us with the argv of the command */
@@ -640,7 +645,13 @@ bridge_addrs(const char *delim, int d)
strlcpy(buf, ifba->ifba_ifsname, sizeof(buf));
printf("%s%s %s %u ", delim, ether_ntoa(&ifba->ifba_dst),
buf, ifba->ifba_age);
+ sa = (struct sockaddr *)&ifba->ifba_dstsa;
printb("flags", ifba->ifba_flags, IFBAFBITS);
+ if (sa->sa_family != AF_UNSPEC &&
+ getnameinfo(sa, sa->sa_len,
+ dstaddr, sizeof(dstaddr),
+ dstport, sizeof(dstport), niflag) == 0)
+ printf(" tunnel %s:%s", dstaddr, dstport);
printf("\n");
}
free(inbuf);
diff --git a/sbin/ifconfig/ifconfig.8 b/sbin/ifconfig/ifconfig.8
index 13f3e7a2b9b..5d3133f9c41 100644
--- a/sbin/ifconfig/ifconfig.8
+++ b/sbin/ifconfig/ifconfig.8
@@ -1,4 +1,4 @@
-.\" $OpenBSD: ifconfig.8,v 1.235 2013/09/13 14:32:52 florian Exp $
+.\" $OpenBSD: ifconfig.8,v 1.236 2013/10/13 10:10:00 reyk Exp $
.\" $NetBSD: ifconfig.8,v 1.11 1996/01/04 21:27:29 pk Exp $
.\" $FreeBSD: ifconfig.8,v 1.16 1998/02/01 07:03:29 steve Exp $
.\"
@@ -31,7 +31,7 @@
.\"
.\" @(#)ifconfig.8 8.4 (Berkeley) 6/1/94
.\"
-.Dd $Mdocdate: September 13 2013 $
+.Dd $Mdocdate: October 13 2013 $
.Dt IFCONFIG 8
.Os
.Sh NAME
@@ -1428,6 +1428,7 @@ for a complete list of the available protocols,
.Op Oo Fl Oc Ns Cm keepalive Ar period count
.Op Cm tunnel Ar src_address dest_address
.Op Cm tunneldomain Ar route-id
+.Op Cm vnetid Ar network-id
.Ek
.nr nS 0
.Pp
@@ -1454,13 +1455,16 @@ is 2 since the round-trip time of keepalive packets needs to be accounted for.
Disable the
.Xr gre 4
keepalive mechanism.
-.It Cm tunnel Ar src_address dest_address
+.It Cm tunnel Ar src_address dest_address Ns Op Ns : Ns Ar dest_port
Set the source and destination tunnel addresses on a tunnel interface,
including
.Xr gif 4 .
Packets routed to this interface will be encapsulated in
IPv4 or IPv6, depending on the source and destination address families.
Both addresses must be of the same family.
+The optional destination port can be specified for interfaces such as
+.Xr vxlan 4 ,
+which further encapsulate the packets in UDP datagrams.
.It Cm tunneldomain Ar route-id
Use routing table
.Ar route-id
@@ -1470,6 +1474,16 @@ interface itself.
.Ar route-id
can be set to any valid routing table ID;
the corresponding routing domain is derived from this table.
+.It Cm tunnelttl Ar ttl
+Set the IP or multicast TTL of the tunnel packets.
+.It Cm vnetid Ar network-id
+Set the virtual network identifier.
+This is a number which is used by tunnel protocols such as
+.Xr vxlan 4
+to identify packets with a virtual network.
+The accepted size of the number depends on the individual tunnel protocol;
+which is a 24-bit number for
+.Xr vxlan 4 .
.El
.\" VLAN
.Sh VLAN
diff --git a/sbin/ifconfig/ifconfig.c b/sbin/ifconfig/ifconfig.c
index 58721e86796..d67a74d530d 100644
--- a/sbin/ifconfig/ifconfig.c
+++ b/sbin/ifconfig/ifconfig.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: ifconfig.c,v 1.271 2013/10/09 20:23:46 reyk Exp $ */
+/* $OpenBSD: ifconfig.c,v 1.272 2013/10/13 10:10:00 reyk Exp $ */
/* $NetBSD: ifconfig.c,v 1.40 1997/10/01 02:19:43 enami Exp $ */
/*
@@ -168,6 +168,8 @@ void setifprefixlen(const char *, int);
void settunnel(const char *, const char *);
void deletetunnel(const char *, int);
void settunnelinst(const char *, int);
+void settunnelttl(const char *, int);
+void setvnetid(const char *, int);
#ifdef INET6
void setia6flags(const char *, int);
void setia6pltime(const char *, int);
@@ -380,6 +382,8 @@ const struct cmd {
{ "tunnel", NEXTARG2, 0, NULL, settunnel } ,
{ "deletetunnel", 0, 0, deletetunnel } ,
{ "tunneldomain", NEXTARG, 0, settunnelinst } ,
+ { "tunnelttl", NEXTARG, 0, settunnelttl } ,
+ { "vnetid", NEXTARG, 0, setvnetid },
{ "pppoedev", NEXTARG, 0, setpppoe_dev },
{ "pppoesvc", NEXTARG, 0, setpppoe_svc },
{ "-pppoesvc", 1, 0, setpppoe_svc },
@@ -2694,6 +2698,7 @@ phys_status(int force)
const char *ver = "";
const int niflag = NI_NUMERICHOST;
struct if_laddrreq req;
+ in_port_t dstport = 0;
psrcaddr[0] = pdstaddr[0] = '\0';
@@ -2713,9 +2718,13 @@ phys_status(int force)
ver = "6";
#endif /* INET6 */
+ if (req.dstaddr.ss_family == AF_INET)
+ dstport = ((struct sockaddr_in *)&req.dstaddr)->sin_port;
#ifdef INET6
- if (req.dstaddr.ss_family == AF_INET6)
+ else if (req.dstaddr.ss_family == AF_INET6) {
in6_fillscopeid((struct sockaddr_in6 *)&req.dstaddr);
+ dstport = ((struct sockaddr_in6 *)&req.dstaddr)->sin6_port;
+ }
#endif /* INET6 */
if (getnameinfo((struct sockaddr *)&req.dstaddr, req.dstaddr.ss_len,
pdstaddr, sizeof(pdstaddr), 0, 0, niflag) != 0)
@@ -2724,6 +2733,15 @@ phys_status(int force)
printf("\ttunnel: inet%s %s -> %s", ver,
psrcaddr, pdstaddr);
+ if (dstport)
+ printf(":%u", ntohs(dstport));
+
+ if (ioctl(s, SIOCGVNETID, (caddr_t)&ifr) == 0 && ifr.ifr_vnetid > 0)
+ printf(" vnetid %d", ifr.ifr_vnetid);
+
+ if (ioctl(s, SIOCGLIFPHYTTL, (caddr_t)&ifr) == 0 && ifr.ifr_ttl > 0)
+ printf(" ttl %d", ifr.ifr_ttl);
+
#ifndef SMALL
if (ioctl(s, SIOCGLIFPHYRTABLE, (caddr_t)&ifr) == 0 &&
(rdomainid != 0 || ifr.ifr_rdomainid != 0))
@@ -3140,15 +3158,28 @@ in6_status(int force)
void
settunnel(const char *src, const char *dst)
{
+ char buf[MAXHOSTNAMELEN+sizeof (":65535")], *dstport;
+ const char *dstip;
struct addrinfo *srcres, *dstres;
int ecode;
struct if_laddrreq req;
+ if (strchr (dst, ':') == NULL) {
+ dstip = dst;
+ dstport = NULL;
+ } else {
+ if (strlcpy(buf, dst, sizeof(buf)) >= sizeof(buf))
+ errx(1, "%s bad value", dst);
+ dstport = strchr(buf, ':');
+ *dstport++ = '\0';
+ dstip = buf;
+ }
+
if ((ecode = getaddrinfo(src, NULL, NULL, &srcres)) != 0)
errx(1, "error in parsing address string: %s",
gai_strerror(ecode));
- if ((ecode = getaddrinfo(dst, NULL, NULL, &dstres)) != 0)
+ if ((ecode = getaddrinfo(dstip, dstport, NULL, &dstres)) != 0)
errx(1, "error in parsing address string: %s",
gai_strerror(ecode));
@@ -3196,6 +3227,38 @@ settunnelinst(const char *id, int param)
}
void
+settunnelttl(const char *id, int param)
+{
+ const char *errmsg = NULL;
+ int ttl;
+
+ ttl = strtonum(id, 0, 0xff, &errmsg);
+ if (errmsg)
+ errx(1, "tunnelttl %s: %s", id, errmsg);
+
+ strlcpy(ifr.ifr_name, name, sizeof(ifr.ifr_name));
+ ifr.ifr_ttl = ttl;
+ if (ioctl(s, SIOCSLIFPHYTTL, (caddr_t)&ifr) < 0)
+ warn("SIOCSLIFPHYTTL");
+}
+
+void
+setvnetid(const char *id, int param)
+{
+ const char *errmsg = NULL;
+ int vnetid;
+
+ vnetid = strtonum(id, 0, UINT_MAX, &errmsg);
+ if (errmsg)
+ errx(1, "vnetid %s: %s", id, errmsg);
+
+ strlcpy(ifr.ifr_name, name, sizeof(ifr.ifr_name));
+ ifr.ifr_vnetid = vnetid;
+ if (ioctl(s, SIOCSVNETID, (caddr_t)&ifr) < 0)
+ warn("SIOCSVNETID");
+}
+
+void
mpe_status(void)
{
struct shim_hdr shim;
diff --git a/share/man/man4/vxlan.4 b/share/man/man4/vxlan.4
new file mode 100644
index 00000000000..308b7af91ad
--- /dev/null
+++ b/share/man/man4/vxlan.4
@@ -0,0 +1,165 @@
+.\" $OpenBSD: vxlan.4,v 1.1 2013/10/13 10:10:01 reyk Exp $
+.\"
+.\" Copyright (c) 2013 Reyk Floeter <reyk@openbsd.org>
+.\"
+.\" Permission to use, copy, modify, and distribute this software for any
+.\" purpose with or without fee is hereby granted, provided that the above
+.\" copyright notice and this permission notice appear in all copies.
+.\"
+.\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+.\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+.\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+.\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+.\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+.\"
+.Dd $Mdocdate: October 13 2013 $
+.Dt VXLAN 4
+.Os
+.Sh NAME
+.Nm vxlan
+.Nd virtual extensible local area network tunnel interface
+.Sh SYNOPSIS
+.Cd "pseudo-device vxlan"
+.Sh DESCRIPTION
+The
+.Nm
+interface is a tunnelling pseudo-device for overlaying virtualized
+layer 2 networks over layer 3 networks.
+.Pp
+A
+.Nm
+interface can be created using the
+.Ic ifconfig vxlan Ns Ar N Ic create
+command.
+Once configured, the interface encapsulates and decapsulates Ethernet
+frames in UDP datagrams that are exchanged with tunnel endpoints.
+The default UDP port for VXLAN traffic is 4789.
+.Pp
+Each
+.Nm
+interface uses a 24-bit
+.Ic vnetid
+(virtual networks identifier)
+that allows to distinguish multiple virtualized layer 2 networks and
+their tunnels between identical tunnel endpoints.
+.Pp
+The interface can operate in three different tunnel modes:
+.Bl -tag -width multicast
+.It Ic unicast mode
+When a unicast IP address is configured as the tunnel destination,
+all traffic is sent to a single tunnel endpoint.
+.It Ic multicast mode
+When a multicast IP address is configured as the tunnel destination,
+all traffic is sent to all the tunnel endpoints that subscribed for the
+specified multicast group.
+.It Ic dynamic mode
+When
+.Nm
+is configured for multicast mode and added to a
+.Xr bridge 4 ,
+all broadcast and multicast traffic is sent to the multicast group,
+but directed traffic is sent to unicast IP addresses of individual tunnel
+endpoints, as they are learned by the bridge.
+.Xr
+.El
+.Pp
+The configuration can be done at runtime or by setting up a
+.Xr hostname.if 5
+configuration file for
+.Xr netstart 8 .
+.Sh EXAMPLES
+Create a tunnel to a unicast tunnel endpoint, using the virtual tunnel
+identifier 5:
+.Bd -literal -offset indent
+# ifconfig vxlan0 tunnel 192.168.1.100 192.168.1.200 vnetid 5
+# ifconfig vxlan0 10.1.1.100/24
+.Ed
+.Pp
+The following examples creates a dynamic tunnel that is attached to a
+.Xr bridge 4 :
+.Bd -literal -offset indent
+# ifconfig vxlan0 tunnel 192.168.1.100 239.1.1.100 vnetid 7395
+# ifconfig vxlan0 10.1.2.100/24
+# ifconfig bridge0 add vxlan0 up
+.Ed
+.Pp
+Prior to the assignment of UDP port 4789 by IANA, some early VXLAN
+implementations used port 8472.
+A non-standard port can be specified with the tunnel destination
+address:
+.Bd -literal -offset indent
+# ifconfig vxlan0 tunnel 192.168.1.100 239.1.1.100:8472
+.Ed
+.Sh SECURITY
+.Nm
+does not provide any integrated security features.
+It is designed to be a simple protocol that can be used in trusted
+data center environments, to carry VM traffic between virtual machine
+hypervisors, and provide virtualized layer 2 networks in Cloud
+infrastructures.
+.Pp
+To protect
+.Nm
+tunnels, the traffic can be protected with IPsec to add authentication
+and encryption for confidentiality.
+.Pp
+The Packet Filter (PF) can be used to filter tunnel traffic with
+endpoint policies in
+.Xr pf.conf 5 :
+.Bd -literal -offset indent
+table <vxlantep> { 192.168.1.200 192.168.1.201 }
+block in on vmx0
+pass out on vmx0
+pass in on vmx0 proto udp from <vxlantep> to port 4789
+.Ed
+.Pp
+The Time-to-Live (TTL) value of the tunnel can be set to 1 or a low
+value to restrict the traffic to the local network:
+.Bd -literal -offset indent
+# ifconfig vxlan0 tunnelttl 1
+.Ed
+.Sh SEE ALSO
+.Xr bridge 4 ,
+.Xr inet 4 ,
+.Xr hostname.if 5 ,
+.Xr ifconfig 8 ,
+.Xr netstart 8
+.Sh HISTORY
+The
+.Nm
+device first appeared in
+.Ox 5.5 .
+.Sh AUTHORS
+The
+.Nm
+driver was written by
+.An Reyk Floeter Aq Mt reyk@openbsd.org .
+.Sh STANDARDS
+.Rs
+.%A M. Mahalingam
+.%A D. Dutt
+.%A K. Duda
+.%A P. Agarwal
+.%A L. Kreeger
+.%A T. Sridhar
+.%A M. Bursell
+.%A C. Wright
+.%D May 2013
+.%R draft-mahalingam-dutt-dcops-vxlan-04
+.%T VXLAN: A Framework for Overlaying Virtualized Layer 2 Networks over Layer 3 Networks.
+.Re
+.Sh CAVEATS
+The
+.Nm
+interface requires at least 50 bytes for the IP, UDP and VXLAN
+protocol overhead and optionally 4 bytes for the encapsulated VLAN tag.
+The default MTU is set to 1450 bytes but can be adjusted if the
+transport interfaces carrying the tunnel traffic support larger MTUs.
+It is recommended to set the MTU of the transport interfaces to at
+least 1550 bytes and to bump the MTU of the
+.Nm
+interfaces to 1500 bytes accordingly.
+.Pp
+The implementation does not support IPv6 tunnel endpoints at present.
diff --git a/sys/conf/GENERIC b/sys/conf/GENERIC
index 7f05181509a..c4929576efa 100644
--- a/sys/conf/GENERIC
+++ b/sys/conf/GENERIC
@@ -1,4 +1,4 @@
-# $OpenBSD: GENERIC,v 1.202 2013/10/01 06:57:25 dlg Exp $
+# $OpenBSD: GENERIC,v 1.203 2013/10/13 10:10:01 reyk Exp $
#
# Machine-independent option; used by all architectures for their
# GENERIC kernel
@@ -107,6 +107,7 @@ pseudo-device sppp 1 # Sync PPP/HDLC
pseudo-device trunk # Trunking support
pseudo-device tun # network tunneling over tty
pseudo-device vether # Virtual ethernet
+#pseudo-device vxlan # Virtual extensible LAN
pseudo-device vlan # IEEE 802.1Q VLAN
pseudo-device bio 1 # ioctl multiplexing device
diff --git a/sys/conf/files b/sys/conf/files
index 58635aeb080..94292f0db0a 100644
--- a/sys/conf/files
+++ b/sys/conf/files
@@ -1,4 +1,4 @@
-# $OpenBSD: files,v 1.555 2013/10/12 12:02:03 henning Exp $
+# $OpenBSD: files,v 1.556 2013/10/13 10:10:02 reyk Exp $
# $NetBSD: files,v 1.87 1996/05/19 17:17:50 jonathan Exp $
# @(#)files.newconf 7.5 (Berkeley) 5/10/93
@@ -541,6 +541,7 @@ pseudo-device trunk: ifnet, ether, ifmedia
pseudo-device mpe: ifnet, ether
pseudo-device vether: ifnet, ether
pseudo-device pppx: ifnet
+pseudo-device vxlan: ifnet, ether, ifmedia
pseudo-device systrace
@@ -800,6 +801,7 @@ file net/if_mpe.c mpe needs-count
file net/if_vether.c vether needs-count
file net/if_pppx.c pppx needs-count
file net/if_aoe.c ether & aoe
+file net/if_vxlan.c vxlan needs-count
file net80211/ieee80211.c wlan
file net80211/ieee80211_amrr.c wlan
file net80211/ieee80211_crypto.c wlan
diff --git a/sys/net/if.h b/sys/net/if.h
index a95ef74c2b1..508407cd6bd 100644
--- a/sys/net/if.h
+++ b/sys/net/if.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: if.h,v 1.147 2013/10/12 12:13:10 henning Exp $ */
+/* $OpenBSD: if.h,v 1.148 2013/10/13 10:10:02 reyk Exp $ */
/* $NetBSD: if.h,v 1.23 1996/05/07 02:40:27 thorpej Exp $ */
/*
@@ -641,6 +641,8 @@ struct ifreq {
#define ifr_hardmtu ifr_ifru.ifru_metric /* hardmtu (overload) */
#define ifr_media ifr_ifru.ifru_metric /* media options (overload) */
#define ifr_rdomainid ifr_ifru.ifru_metric /* VRF instance (overload) */
+#define ifr_vnetid ifr_ifru.ifru_metric /* Virtual Net Id (overload) */
+#define ifr_ttl ifr_ifru.ifru_metric /* tunnel TTL (overload) */
#define ifr_data ifr_ifru.ifru_data /* for use by interface */
};
diff --git a/sys/net/if_bridge.c b/sys/net/if_bridge.c
index 9ea29ba1a3f..c1798b0371b 100644
--- a/sys/net/if_bridge.c
+++ b/sys/net/if_bridge.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: if_bridge.c,v 1.215 2013/10/12 11:55:45 henning Exp $ */
+/* $OpenBSD: if_bridge.c,v 1.216 2013/10/13 10:10:02 reyk Exp $ */
/*
* Copyright (c) 1999, 2000 Jason L. Wright (jason@thought.net)
@@ -131,9 +131,9 @@ int bridge_rtfind(struct bridge_softc *, struct ifbaconf *);
void bridge_rtage(struct bridge_softc *);
int bridge_rtdaddr(struct bridge_softc *, struct ether_addr *);
void bridge_rtflush(struct bridge_softc *, int);
-struct ifnet * bridge_rtupdate(struct bridge_softc *,
- struct ether_addr *, struct ifnet *ifp, int, u_int8_t);
-struct ifnet * bridge_rtlookup(struct bridge_softc *,
+struct ifnet *bridge_rtupdate(struct bridge_softc *,
+ struct ether_addr *, struct ifnet *ifp, int, u_int8_t, struct mbuf *);
+struct bridge_rtnode *bridge_rtlookup(struct bridge_softc *,
struct ether_addr *);
u_int32_t bridge_hash(struct bridge_softc *, struct ether_addr *);
int bridge_blocknonip(struct ether_header *, struct mbuf *);
@@ -161,6 +161,7 @@ int bridge_ipsec(struct bridge_softc *, struct ifnet *,
int bridge_clone_create(struct if_clone *, int);
int bridge_clone_destroy(struct ifnet *ifp);
int bridge_delete(struct bridge_softc *, struct bridge_iflist *);
+void bridge_tunnelupdate(struct sockaddr *, struct sockaddr *);
#define ETHERADDR_IS_IP_MCAST(a) \
/* struct etheraddr *a; */ \
@@ -550,7 +551,7 @@ bridge_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
}
ifs = bridge_rtupdate(sc, &bareq->ifba_dst, ifs, 1,
- bareq->ifba_flags);
+ bareq->ifba_flags, NULL);
if (ifs == NULL)
error = ENOMEM;
break;
@@ -728,7 +729,7 @@ bridge_update(struct ifnet *ifp, struct ether_addr *ea, int delete)
if (!delete) {
/* Update the bridge table */
- bridge_rtupdate(sc, ea, ifp, 0, IFBAF_DYNAMIC);
+ bridge_rtupdate(sc, ea, ifp, 0, IFBAF_DYNAMIC, NULL);
}
}
}
@@ -946,7 +947,8 @@ bridge_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *sa,
struct rtentry *rt)
{
struct ether_header *eh;
- struct ifnet *dst_if;
+ struct ifnet *dst_if = NULL;
+ struct bridge_rtnode *dst_p = NULL;
struct ether_addr *dst;
struct bridge_softc *sc;
int s, error, len;
@@ -990,7 +992,8 @@ bridge_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *sa,
* If the packet is a broadcast or we don't know a better way to
* get there, send to all interfaces.
*/
- dst_if = bridge_rtlookup(sc, dst);
+ if ((dst_p = bridge_rtlookup(sc, dst)) != NULL)
+ dst_if = dst_p->brt_if;
if (dst_if == NULL || ETHER_IS_MULTICAST(eh->ether_dhost)) {
struct bridge_iflist *p;
struct mbuf *mc;
@@ -1083,6 +1086,10 @@ bridge_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *sa,
}
sendunicast:
+ if (dst_p != NULL && dst_p->brt_tunnel.sa.sa_family != AF_UNSPEC &&
+ (sa = bridge_tunneltag(m, dst_p->brt_tunnel.sa.sa_family)) != NULL)
+ memcpy(sa, &dst_p->brt_tunnel.sa, dst_p->brt_tunnel.sa.sa_len);
+
bridge_span(sc, NULL, m);
if ((dst_if->if_flags & IFF_RUNNING) == 0) {
m_freem(m);
@@ -1133,6 +1140,7 @@ bridgeintr_frame(struct bridge_softc *sc, struct mbuf *m)
int s, len;
struct ifnet *src_if, *dst_if;
struct bridge_iflist *ifl;
+ struct bridge_rtnode *dst_p;
struct ether_addr *dst, *src;
struct ether_header eh;
@@ -1175,7 +1183,7 @@ bridgeintr_frame(struct bridge_softc *sc, struct mbuf *m)
!(eh.ether_shost[0] == 0 && eh.ether_shost[1] == 0 &&
eh.ether_shost[2] == 0 && eh.ether_shost[3] == 0 &&
eh.ether_shost[4] == 0 && eh.ether_shost[5] == 0))
- bridge_rtupdate(sc, src, src_if, 0, IFBAF_DYNAMIC);
+ bridge_rtupdate(sc, src, src_if, 0, IFBAF_DYNAMIC, m);
if ((ifl->bif_flags & IFBIF_STP) &&
(ifl->bif_state == BSTP_IFSTATE_LEARNING)) {
@@ -1193,7 +1201,10 @@ bridgeintr_frame(struct bridge_softc *sc, struct mbuf *m)
* side of the bridge, drop it.
*/
if ((m->m_flags & (M_BCAST | M_MCAST)) == 0) {
- dst_if = bridge_rtlookup(sc, dst);
+ if ((dst_p = bridge_rtlookup(sc, dst)) != NULL)
+ dst_if = dst_p->brt_if;
+ else
+ dst_if = NULL;
if (dst_if == src_if) {
m_freem(m);
return;
@@ -1422,7 +1433,7 @@ bridge_input(struct ifnet *ifp, struct ether_header *eh, struct mbuf *m)
if (srcifl->bif_flags & IFBIF_LEARNING)
bridge_rtupdate(sc,
(struct ether_addr *)&eh->ether_shost,
- ifp, 0, IFBAF_DYNAMIC);
+ ifp, 0, IFBAF_DYNAMIC, m);
if (bridge_filterrule(&srcifl->bif_brlin, eh, m) ==
BRL_ACTION_BLOCK) {
m_freem(m);
@@ -1685,12 +1696,18 @@ bridge_span(struct bridge_softc *sc, struct ether_header *eh,
struct ifnet *
bridge_rtupdate(struct bridge_softc *sc, struct ether_addr *ea,
- struct ifnet *ifp, int setflags, u_int8_t flags)
+ struct ifnet *ifp, int setflags, u_int8_t flags, struct mbuf *m)
{
struct bridge_rtnode *p, *q;
+ struct sockaddr *sa = NULL;
u_int32_t h;
int dir;
+ if (m != NULL) {
+ /* Check if the mbuf was tagged with a tunnel endpoint addr */
+ sa = bridge_tunnel(m);
+ }
+
h = bridge_hash(sc, ea);
p = LIST_FIRST(&sc->sc_rts[h]);
if (p == NULL) {
@@ -1703,6 +1720,7 @@ bridge_rtupdate(struct bridge_softc *sc, struct ether_addr *ea,
bcopy(ea, &p->brt_addr, sizeof(p->brt_addr));
p->brt_if = ifp;
p->brt_age = 1;
+ bridge_tunnelupdate(sa, (struct sockaddr *)&p->brt_tunnel);
if (setflags)
p->brt_flags = flags;
@@ -1729,6 +1747,9 @@ bridge_rtupdate(struct bridge_softc *sc, struct ether_addr *ea,
if (q->brt_if == ifp)
q->brt_age = 1;
ifp = q->brt_if;
+ bridge_tunnelupdate(sa,
+ (struct sockaddr *)&q->brt_tunnel);
+
goto want;
}
@@ -1742,6 +1763,8 @@ bridge_rtupdate(struct bridge_softc *sc, struct ether_addr *ea,
bcopy(ea, &p->brt_addr, sizeof(p->brt_addr));
p->brt_if = ifp;
p->brt_age = 1;
+ bridge_tunnelupdate(sa,
+ (struct sockaddr *)&p->brt_tunnel);
if (setflags)
p->brt_flags = flags;
@@ -1763,6 +1786,8 @@ bridge_rtupdate(struct bridge_softc *sc, struct ether_addr *ea,
bcopy(ea, &p->brt_addr, sizeof(p->brt_addr));
p->brt_if = ifp;
p->brt_age = 1;
+ bridge_tunnelupdate(sa,
+ (struct sockaddr *)&p->brt_tunnel);
if (setflags)
p->brt_flags = flags;
@@ -1780,7 +1805,7 @@ want:
return (ifp);
}
-struct ifnet *
+struct bridge_rtnode *
bridge_rtlookup(struct bridge_softc *sc, struct ether_addr *ea)
{
struct bridge_rtnode *p;
@@ -1791,7 +1816,7 @@ bridge_rtlookup(struct bridge_softc *sc, struct ether_addr *ea)
LIST_FOREACH(p, &sc->sc_rts[h], brt_next) {
dir = memcmp(ea, &p->brt_addr, sizeof(p->brt_addr));
if (dir == 0)
- return (p->brt_if);
+ return (p);
if (dir > 0)
goto fail;
}
@@ -2018,6 +2043,12 @@ bridge_rtfind(struct bridge_softc *sc, struct ifbaconf *baconf)
sizeof(bareq.ifba_ifsname));
bcopy(&n->brt_addr, &bareq.ifba_dst,
sizeof(bareq.ifba_dst));
+ if (n->brt_tunnel.sa.sa_family != AF_UNSPEC)
+ bcopy(&n->brt_tunnel.sa,
+ &bareq.ifba_dstsa,
+ n->brt_tunnel.sa.sa_len);
+ else
+ bareq.ifba_dstsa.ss_family = AF_UNSPEC;
bareq.ifba_age = n->brt_age;
bareq.ifba_flags = n->brt_flags;
error = copyout((caddr_t)&bareq,
@@ -2823,3 +2854,65 @@ bridge_send_icmp_err(struct bridge_softc *sc, struct ifnet *ifp,
m_freem(n);
}
#endif
+
+struct sockaddr *
+bridge_tunnel(struct mbuf *m)
+{
+ struct m_tag *mtag;
+
+ if ((mtag = m_tag_find(m, PACKET_TAG_TUNNEL, NULL)) == NULL)
+ return (NULL);
+
+ return ((struct sockaddr *)(mtag + 1));
+}
+
+struct sockaddr *
+bridge_tunneltag(struct mbuf *m, int af)
+{
+ struct m_tag *mtag;
+ size_t len;
+ struct sockaddr *sa;
+
+ if ((mtag = m_tag_find(m, PACKET_TAG_TUNNEL, NULL)) != NULL) {
+ sa = (struct sockaddr *)(mtag + 1);
+ if (sa->sa_family != af) {
+ m_tag_delete(m, mtag);
+ mtag = NULL;
+ }
+ }
+ if (mtag == NULL) {
+ if (af == AF_INET)
+ len = sizeof(struct sockaddr_in);
+ else if (af == AF_INET6)
+ len = sizeof(struct sockaddr_in6);
+ else
+ return (NULL);
+ mtag = m_tag_get(PACKET_TAG_TUNNEL, len, M_NOWAIT);
+ if (mtag == NULL)
+ return (NULL);
+ bzero(mtag + 1, len);
+ sa = (struct sockaddr *)(mtag + 1);
+ sa->sa_family = af;
+ sa->sa_len = len;
+ m_tag_prepend(m, mtag);
+ }
+
+ return ((struct sockaddr *)(mtag + 1));
+}
+
+void
+bridge_tunneluntag(struct mbuf *m)
+{
+ struct m_tag *mtag;
+ if ((mtag = m_tag_find(m, PACKET_TAG_TUNNEL, NULL)) != NULL)
+ m_tag_delete(m, mtag);
+}
+
+void
+bridge_tunnelupdate(struct sockaddr *sa, struct sockaddr *tunnel)
+{
+ if (sa != NULL && sa->sa_family != AF_UNSPEC)
+ memcpy(tunnel, sa, sa->sa_len);
+ else
+ tunnel->sa_family = AF_UNSPEC;
+}
diff --git a/sys/net/if_bridge.h b/sys/net/if_bridge.h
index 3fb604dba6e..db956d960cd 100644
--- a/sys/net/if_bridge.h
+++ b/sys/net/if_bridge.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: if_bridge.h,v 1.37 2013/01/23 13:28:36 camield Exp $ */
+/* $OpenBSD: if_bridge.h,v 1.38 2013/10/13 10:10:03 reyk Exp $ */
/*
* Copyright (c) 1999, 2000 Jason L. Wright (jason@thought.net)
@@ -122,6 +122,7 @@ struct ifbareq {
u_int8_t ifba_age; /* address age */
u_int8_t ifba_flags; /* address flags */
struct ether_addr ifba_dst; /* destination addr */
+ struct sockaddr_storage ifba_dstsa; /* tunnel endpoint */
};
#define IFBAF_TYPEMASK 0x03 /* address type mask */
@@ -406,6 +407,15 @@ struct bridge_rtnode {
u_int8_t brt_flags; /* address flags */
u_int8_t brt_age; /* age counter */
struct ether_addr brt_addr; /* dst addr */
+ union {
+ struct sockaddr sa;
+#ifdef INET
+ struct sockaddr_in sin;
+#endif
+#ifdef INET6
+ struct sockaddr_in6 sin6;
+#endif
+ } brt_tunnel; /* tunnel endpoint */
};
#ifndef BRIDGE_RTABLE_SIZE
@@ -441,6 +451,9 @@ int bridge_output(struct ifnet *, struct mbuf *, struct sockaddr *,
void bridge_update(struct ifnet *, struct ether_addr *, int);
void bridge_rtdelete(struct bridge_softc *, struct ifnet *, int);
void bridge_rtagenode(struct ifnet *, int);
+struct sockaddr *bridge_tunnel(struct mbuf *);
+struct sockaddr *bridge_tunneltag(struct mbuf *, int);
+void bridge_tunneluntag(struct mbuf *);
struct bstp_state *bstp_create(struct ifnet *);
void bstp_destroy(struct bstp_state *);
diff --git a/sys/net/if_vxlan.c b/sys/net/if_vxlan.c
new file mode 100644
index 00000000000..69323992ccf
--- /dev/null
+++ b/sys/net/if_vxlan.c
@@ -0,0 +1,590 @@
+/* $OpenBSD: if_vxlan.c,v 1.1 2013/10/13 10:10:03 reyk Exp $ */
+
+/*
+ * Copyright (c) 2013 Reyk Floeter <reyk@openbsd.org>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include "bpfilter.h"
+#include "vxlan.h"
+#include "vlan.h"
+#include "pf.h"
+#include "bridge.h"
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/sockio.h>
+#include <sys/ioctl.h>
+
+#include <net/if.h>
+#include <net/if_dl.h>
+#include <net/if_media.h>
+#include <net/route.h>
+
+#if NBPFILTER > 0
+#include <net/bpf.h>
+#endif
+
+#if NPF > 0
+#include <net/pfvar.h>
+#endif
+
+#include <netinet/in.h>
+#include <netinet/in_var.h>
+#include <netinet/in_systm.h>
+#include <netinet/if_ether.h>
+#include <netinet/ip.h>
+#include <netinet/ip_var.h>
+#include <netinet/udp.h>
+#include <netinet/udp_var.h>
+#include <netinet/in_pcb.h>
+
+#if NBRIDGE > 0
+#include <net/if_bridge.h>
+#endif
+
+#include <net/if_vxlan.h>
+
+void vxlanattach(int);
+int vxlanioctl(struct ifnet *, u_long, caddr_t);
+void vxlanstart(struct ifnet *);
+int vxlan_clone_create(struct if_clone *, int);
+int vxlan_clone_destroy(struct ifnet *);
+int vxlan_media_change(struct ifnet *);
+void vxlan_media_status(struct ifnet *, struct ifmediareq *);
+int vxlan_config(struct ifnet *, struct sockaddr *, struct sockaddr *);
+int vxlan_output(struct ifnet *, struct mbuf *);
+
+struct if_clone vxlan_cloner =
+ IF_CLONE_INITIALIZER("vxlan", vxlan_clone_create, vxlan_clone_destroy);
+
+int vxlan_enable = 0;
+u_long vxlan_tagmask;
+
+#define VXLAN_TAGHASHSIZE 32
+#define VXLAN_TAGHASH(tag) (tag & vxlan_tagmask)
+LIST_HEAD(vxlan_taghash, vxlan_softc) *vxlan_tagh;
+
+void
+vxlanattach(int count)
+{
+ if ((vxlan_tagh = hashinit(VXLAN_TAGHASHSIZE, M_DEVBUF, M_NOWAIT,
+ &vxlan_tagmask)) == NULL)
+ panic("vxlanattach: hashinit");
+
+ if_clone_attach(&vxlan_cloner);
+}
+
+int
+vxlan_clone_create(struct if_clone *ifc, int unit)
+{
+ struct ifnet *ifp;
+ struct vxlan_softc *sc;
+
+ if ((sc = malloc(sizeof(*sc),
+ M_DEVBUF, M_NOWAIT|M_ZERO)) == NULL)
+ return (ENOMEM);
+
+ sc->sc_imo.imo_membership = malloc(
+ (sizeof(struct in_multi *) * IP_MIN_MEMBERSHIPS), M_IPMOPTS,
+ M_WAITOK|M_ZERO);
+ sc->sc_imo.imo_max_memberships = IP_MIN_MEMBERSHIPS;
+ sc->sc_dstport = htons(VXLAN_PORT);
+ sc->sc_vnetid = 0;
+
+ ifp = &sc->sc_ac.ac_if;
+ snprintf(ifp->if_xname, sizeof ifp->if_xname, "vxlan%d", unit);
+ ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
+ ether_fakeaddr(ifp);
+
+ ifp->if_softc = sc;
+ ifp->if_ioctl = vxlanioctl;
+ ifp->if_start = vxlanstart;
+ IFQ_SET_MAXLEN(&ifp->if_snd, IFQ_MAXLEN);
+ IFQ_SET_READY(&ifp->if_snd);
+
+ ifp->if_hardmtu = 0xffff;
+ ifp->if_capabilities = IFCAP_VLAN_MTU;
+
+ ifmedia_init(&sc->sc_media, 0, vxlan_media_change,
+ vxlan_media_status);
+ ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_AUTO, 0, NULL);
+ ifmedia_set(&sc->sc_media, IFM_ETHER | IFM_AUTO);
+
+ if_attach(ifp);
+ ether_ifattach(ifp);
+
+ /* XXX should we allow IP fragments? */
+ ifp->if_mtu = ETHERMTU - sizeof(struct ether_header);
+#ifdef INET
+ ifp->if_mtu -= sizeof(struct vxlanudpiphdr);
+#endif
+
+ LIST_INSERT_HEAD(&vxlan_tagh[VXLAN_TAGHASH(0)], sc, sc_entry);
+ vxlan_enable++;
+
+ return (0);
+}
+
+int
+vxlan_clone_destroy(struct ifnet *ifp)
+{
+ struct vxlan_softc *sc = ifp->if_softc;
+
+ vxlan_enable--;
+ LIST_REMOVE(sc, sc_entry);
+
+ ifmedia_delete_instance(&sc->sc_media, IFM_INST_ANY);
+ ether_ifdetach(ifp);
+ if_detach(ifp);
+ free(sc->sc_imo.imo_membership, M_IPMOPTS);
+ free(sc, M_DEVBUF);
+
+ return (0);
+}
+
+void
+vxlanstart(struct ifnet *ifp)
+{
+ struct mbuf *m;
+ int s;
+
+ for (;;) {
+ s = splnet();
+ IFQ_DEQUEUE(&ifp->if_snd, m);
+ splx(s);
+
+ if (m == NULL)
+ return;
+ ifp->if_opackets++;
+
+#if NBPFILTER > 0
+ if (ifp->if_bpf)
+ bpf_mtap(ifp->if_bpf, m, BPF_DIRECTION_OUT);
+#endif
+
+ vxlan_output(ifp, m);
+ }
+}
+
+int
+vxlan_config(struct ifnet *ifp, struct sockaddr *src, struct sockaddr *dst)
+{
+ struct vxlan_softc *sc = (struct vxlan_softc *)ifp->if_softc;
+ struct ip_moptions *imo = &sc->sc_imo;
+#ifdef INET
+ struct sockaddr_in *src4, *dst4;
+ struct ifaddr *ifa;
+#endif
+ int reset = 0;
+
+ if (src != NULL && dst != NULL) {
+ /* XXX inet6 is not supported */
+ if (src->sa_family != AF_INET || dst->sa_family != AF_INET)
+ return (EAFNOSUPPORT);
+ } else {
+ /* Reset current configuration */
+ src = (struct sockaddr *)&sc->sc_src;
+ dst = (struct sockaddr *)&sc->sc_dst;
+ reset = 1;
+ }
+
+#ifdef INET
+ src4 = (struct sockaddr_in *)src;
+ dst4 = (struct sockaddr_in *)dst;
+
+ if (src4->sin_len != sizeof(*src4) || dst4->sin_len != sizeof(*dst4))
+ return (EINVAL);
+
+ if (IN_MULTICAST(dst4->sin_addr.s_addr)) {
+ if (src4->sin_addr.s_addr == INADDR_ANY ||
+ IN_MULTICAST(src4->sin_addr.s_addr))
+ return (EINVAL);
+ if ((ifa = ifa_ifwithaddr((struct sockaddr *)src4,
+ sc->sc_rtableid)) == NULL ||
+ ifa->ifa_ifp == NULL ||
+ (ifa->ifa_ifp->if_flags & IFF_MULTICAST) == 0)
+ return (EADDRNOTAVAIL);
+ }
+#endif
+
+ if (imo->imo_num_memberships > 0) {
+ in_delmulti(imo->imo_membership[
+ --imo->imo_num_memberships]);
+ imo->imo_multicast_ifp = NULL;
+ }
+
+#ifdef INET
+ if (IN_MULTICAST(dst4->sin_addr.s_addr)) {
+ if ((imo->imo_membership[0] =
+ in_addmulti(&dst4->sin_addr, ifa->ifa_ifp)) == NULL)
+ return (ENOBUFS);
+ imo->imo_num_memberships++;
+ imo->imo_multicast_ifp = ifa->ifa_ifp;
+ if (sc->sc_ttl > 0)
+ imo->imo_multicast_ttl = sc->sc_ttl;
+ else
+ imo->imo_multicast_ttl = IP_DEFAULT_MULTICAST_TTL;
+ imo->imo_multicast_loop = 0;
+ }
+ if (dst4->sin_port)
+ sc->sc_dstport = dst4->sin_port;
+#endif
+
+ if (!reset) {
+ bzero(&sc->sc_src, sizeof(sc->sc_src));
+ bzero(&sc->sc_dst, sizeof(sc->sc_dst));
+ memcpy(&sc->sc_src, src, src->sa_len);
+ memcpy(&sc->sc_dst, dst, dst->sa_len);
+ }
+
+ LIST_REMOVE(sc, sc_entry);
+ LIST_INSERT_HEAD(&vxlan_tagh[VXLAN_TAGHASH(sc->sc_vnetid)],
+ sc, sc_entry);
+
+ return (0);
+}
+
+/* ARGSUSED */
+int
+vxlanioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
+{
+ struct vxlan_softc *sc = (struct vxlan_softc *)ifp->if_softc;
+#ifdef INET
+ struct ifaddr *ifa = (struct ifaddr *)data;
+#endif
+ struct ifreq *ifr = (struct ifreq *)data;
+ struct if_laddrreq *lifr = (struct if_laddrreq *)data;
+ struct proc *p = curproc;
+ int error = 0, s;
+ struct ip_moptions *imo = &sc->sc_imo;
+
+ switch (cmd) {
+ case SIOCSIFADDR:
+ ifp->if_flags |= IFF_UP;
+#ifdef INET
+ if (ifa->ifa_addr->sa_family == AF_INET)
+ arp_ifinit(&sc->sc_ac, ifa);
+#endif
+ /* FALLTHROUGH */
+
+ case SIOCSIFFLAGS:
+ if (ifp->if_flags & IFF_UP) {
+ ifp->if_flags |= IFF_RUNNING;
+ } else {
+ ifp->if_flags &= ~IFF_RUNNING;
+ }
+ break;
+
+ case SIOCADDMULTI:
+ case SIOCDELMULTI:
+ if (ifr == 0) {
+ error = EAFNOSUPPORT;
+ break;
+ }
+ error = (cmd == SIOCADDMULTI) ?
+ ether_addmulti(ifr, &sc->sc_ac) :
+ ether_delmulti(ifr, &sc->sc_ac);
+ if (error == ENETRESET)
+ error = 0;
+ break;
+
+ case SIOCGIFMEDIA:
+ case SIOCSIFMEDIA:
+ error = ifmedia_ioctl(ifp, ifr, &sc->sc_media, cmd);
+ break;
+
+ case SIOCSLIFPHYADDR:
+ if ((error = suser(p, 0)) != 0)
+ break;
+ s = splnet();
+ error = vxlan_config(ifp,
+ (struct sockaddr *)&lifr->addr,
+ (struct sockaddr *)&lifr->dstaddr);
+ splx(s);
+ break;
+
+ case SIOCDIFPHYADDR:
+ if ((error = suser(p, 0)) != 0)
+ break;
+ s = splnet();
+ if (imo->imo_num_memberships > 0) {
+ in_delmulti(imo->imo_membership[
+ --imo->imo_num_memberships]);
+ imo->imo_multicast_ifp = NULL;
+ }
+ bzero(&sc->sc_src, sizeof(sc->sc_src));
+ bzero(&sc->sc_dst, sizeof(sc->sc_dst));
+ sc->sc_dstport = htons(VXLAN_PORT);
+ splx(s);
+ break;
+
+ case SIOCGLIFPHYADDR:
+ if (sc->sc_dst.ss_family == AF_UNSPEC) {
+ error = EADDRNOTAVAIL;
+ break;
+ }
+ bzero(&lifr->addr, sizeof(lifr->addr));
+ bzero(&lifr->dstaddr, sizeof(lifr->dstaddr));
+ memcpy(&lifr->addr, &sc->sc_src, sc->sc_src.ss_len);
+ memcpy(&lifr->dstaddr, &sc->sc_dst, sc->sc_dst.ss_len);
+ break;
+
+ case SIOCSLIFPHYRTABLE:
+ if ((error = suser(p, 0)) != 0)
+ break;
+ if (ifr->ifr_rdomainid < 0 ||
+ ifr->ifr_rdomainid > RT_TABLEID_MAX ||
+ !rtable_exists(ifr->ifr_rdomainid)) {
+ error = EINVAL;
+ break;
+ }
+ s = splnet();
+ sc->sc_rtableid = ifr->ifr_rdomainid;
+ (void)vxlan_config(ifp, NULL, NULL);
+ splx(s);
+ break;
+
+ case SIOCGLIFPHYRTABLE:
+ ifr->ifr_rdomainid = sc->sc_rtableid;
+ break;
+
+ case SIOCSLIFPHYTTL:
+ if ((error = suser(p, 0)) != 0)
+ break;
+ if (ifr->ifr_ttl < 0 || ifr->ifr_ttl > 0xff) {
+ error = EINVAL;
+ break;
+ }
+ if (sc->sc_ttl == (u_int8_t)ifr->ifr_ttl)
+ break;
+ s = splnet();
+ sc->sc_ttl = (u_int8_t)(ifr->ifr_ttl);
+ (void)vxlan_config(ifp, NULL, NULL);
+ splx(s);
+ break;
+
+ case SIOCGLIFPHYTTL:
+ ifr->ifr_ttl = (int)sc->sc_ttl;
+ break;
+
+ case SIOCSVNETID:
+ if ((error = suser(p, 0)) != 0)
+ break;
+ if (ifr->ifr_vnetid < 0 ||
+ ifr->ifr_vnetid > 0x00ffffff) {
+ error = EINVAL;
+ break;
+ }
+ s = splnet();
+ sc->sc_vnetid = (u_int32_t)ifr->ifr_vnetid;
+ (void)vxlan_config(ifp, NULL, NULL);
+ splx(s);
+ break;
+
+ case SIOCGVNETID:
+ ifr->ifr_vnetid = (int)sc->sc_vnetid;
+ break;
+
+ default:
+ error = ether_ioctl(ifp, &sc->sc_ac, cmd, data);
+ break;
+ }
+
+ return (error);
+}
+
+int
+vxlan_media_change(struct ifnet *ifp)
+{
+ return (0);
+}
+
+void
+vxlan_media_status(struct ifnet *ifp, struct ifmediareq *imr)
+{
+ imr->ifm_active = IFM_ETHER | IFM_AUTO;
+ imr->ifm_status = IFM_AVALID | IFM_ACTIVE;
+}
+
+int
+vxlan_lookup(struct mbuf *m, struct udphdr *uh, int iphlen,
+ struct sockaddr *srcsa)
+{
+ struct vxlan_softc *sc = NULL;
+ struct vxlan_header v;
+ u_int32_t vni;
+ struct ifnet *ifp;
+ int skip;
+ struct ether_header *eh;
+#if NBRIDGE > 0
+ struct sockaddr *sa;
+#endif
+
+ /* XXX Should verify the UDP port first before copying the packet */
+ skip = iphlen + sizeof(*uh);
+ if (m->m_pkthdr.len - skip < sizeof(v))
+ return (0);
+ m_copydata(m, skip, sizeof(v), (caddr_t)&v);
+ skip += sizeof(v);
+
+ vni = ntohl(v.vxlan_id);
+
+ /* Validate header */
+ if ((vni == 0) || (vni & VXLAN_RESERVED2) ||
+ (ntohl(v.vxlan_flags) != VXLAN_FLAGS_VNI))
+ return (0);
+
+ vni >>= VXLAN_VNI_S;
+ LIST_FOREACH(sc, &vxlan_tagh[VXLAN_TAGHASH(vni)],
+ sc_entry) {
+ if ((uh->uh_dport == sc->sc_dstport) &&
+ vni == sc->sc_vnetid &&
+ sc->sc_rtableid == rtable_l2(m->m_pkthdr.rdomain))
+ goto found;
+ }
+
+ /* not found */
+ return (0);
+
+ found:
+ m_adj(m, skip);
+ ifp = &sc->sc_ac.ac_if;
+ m->m_pkthdr.rcvif = ifp;
+
+ if ((eh = mtod(m, struct ether_header *)) == NULL)
+ return (EINVAL);
+
+#if NBRIDGE > 0
+ /* Store the peer IP address for the bridge */
+ if (ifp->if_bridgeport != NULL &&
+ srcsa->sa_family != AF_UNSPEC &&
+ (sa = bridge_tunneltag(m, srcsa->sa_family)) != NULL)
+ memcpy(sa, srcsa, sa->sa_len);
+#endif
+
+ /* Clear multicast flag from the outer packet */
+ if (sc->sc_imo.imo_num_memberships > 0 &&
+ m->m_flags & (M_MCAST) &&
+ !ETHER_IS_MULTICAST(eh->ether_dhost))
+ m->m_flags &= ~M_MCAST;
+
+#if NBPFILTER > 0
+ if (ifp->if_bpf)
+ bpf_mtap(ifp->if_bpf, m, BPF_DIRECTION_IN);
+#endif
+
+ m_adj(m, ETHER_HDR_LEN);
+
+#if NPF > 0
+ pf_pkt_addr_changed(m);
+#endif
+
+ ifp->if_ipackets++;
+ ether_input(ifp, eh, m);
+
+ /* success */
+ return (1);
+}
+
+int
+vxlan_output(struct ifnet *ifp, struct mbuf *m)
+{
+ struct vxlan_softc *sc = (struct vxlan_softc *)ifp->if_softc;
+#ifdef INET
+ struct udpiphdr *ui;
+ struct vxlanudpiphdr *vi;
+ u_int16_t len = m->m_pkthdr.len;
+ struct ip *ip;
+#if NBRIDGE > 0
+ struct sockaddr_in *sin;
+#endif
+#endif
+ int error;
+
+#ifdef INET
+ /* VXLAN header */
+ M_PREPEND(m, sizeof(*vi), M_DONTWAIT);
+ if (m == NULL) {
+ ifp->if_oerrors++;
+ return (ENOBUFS);
+ }
+
+ len += sizeof(struct vxlan_header);
+
+ ui = mtod(m, struct udpiphdr *);
+ ui->ui_pr = IPPROTO_UDP;
+ ui->ui_src = ((struct sockaddr_in *)&sc->sc_src)->sin_addr;
+ ui->ui_dst = ((struct sockaddr_in *)&sc->sc_dst)->sin_addr;
+ ui->ui_sport = sc->sc_dstport;
+ ui->ui_dport = sc->sc_dstport;
+ ui->ui_ulen = htons(sizeof(struct udphdr) + len);
+
+ ip = (struct ip *)ui;
+ ip->ip_v = IPVERSION;
+ ip->ip_hl = sizeof(struct ip) >> 2;
+ ip->ip_id = htons(ip_randomid());
+ ip->ip_off = 0; /* htons(IP_DF); XXX should we disallow IP fragments? */
+ ip->ip_tos = IPTOS_LOWDELAY;
+ ip->ip_len = htons(sizeof(struct udpiphdr) + len);
+ if (sc->sc_ttl > 0)
+ ip->ip_ttl = sc->sc_ttl;
+ else
+ ip->ip_ttl = IPDEFTTL;
+
+#if NBRIDGE > 0
+ if ((sin = (struct sockaddr_in *)bridge_tunnel(m)) != NULL &&
+ sin->sin_family == AF_INET) {
+ ui->ui_dst = sin->sin_addr;
+
+ /*
+ * If the LINK0 flag is set, send the packet back to
+ * the original source port of the endport, otherwise use
+ * the configured VXLAN port.
+ */
+ if (ifp->if_flags & IFF_LINK0)
+ ui->ui_dport = sin->sin_port;
+ }
+ if (sin != NULL)
+ bridge_tunneluntag(m);
+#endif
+
+ vi = (struct vxlanudpiphdr *)ui;
+ vi->ui_v.vxlan_flags = htonl(VXLAN_FLAGS_VNI);
+ vi->ui_v.vxlan_id = htonl(sc->sc_vnetid << VXLAN_VNI_S);
+
+ /* UDP checksum should be 0 */
+ ui->ui_sum = 0;
+#endif
+
+ ifp->if_opackets++;
+ ifp->if_obytes += m->m_pkthdr.len;
+
+ m->m_pkthdr.rdomain = sc->sc_rtableid;
+
+#if NPF > 0
+ pf_pkt_addr_changed(m);
+#endif
+
+#ifdef INET
+ if ((error =
+ ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo, NULL))) {
+ ifp->if_oerrors++;
+ }
+#endif
+
+ return (error);
+}
diff --git a/sys/net/if_vxlan.h b/sys/net/if_vxlan.h
new file mode 100644
index 00000000000..47518e47e91
--- /dev/null
+++ b/sys/net/if_vxlan.h
@@ -0,0 +1,77 @@
+/* $OpenBSD: if_vxlan.h,v 1.1 2013/10/13 10:10:03 reyk Exp $ */
+
+/*
+ * Copyright (c) 2013 Reyk Floeter <reyk@openbsd.org>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#ifndef _NET_VXLAN_H
+#define _NET_VXLAN_H
+
+#define VXLANMTU 1492
+#define VXLAN_HDRLEN 8
+#define VXLAN_PORT 4789
+
+struct vxlan_header {
+ u_int32_t vxlan_flags;
+#define VXLAN_FLAGS_VNI 0x08000000
+#define VXLAN_RESERVED1 0xf7ffffff
+ u_int32_t vxlan_id;
+#define VXLAN_VNI 0xffffff00
+#define VXLAN_VNI_S 8
+#define VXLAN_RESERVED2 0x000000ff
+} __packed;
+
+#ifdef INET
+struct vxlanudpiphdr {
+ struct ipovly ui_i;
+ struct udphdr ui_u;
+ struct vxlan_header ui_v;
+} __packed;
+#endif
+
+#define SIOCGETVXLAN SIOCGETPFLOW
+#define SIOCSETVXLAN SIOCSETPFLOW
+
+/* from struct vxlanreq */
+struct vxlanreq {
+ u_int8_t vxlan_reserved;
+};
+
+#ifdef _KERNEL
+struct vxlan_softc {
+ struct arpcom sc_ac;
+ struct ifmedia sc_media;
+ struct ip_moptions sc_imo;
+ struct sockaddr_storage sc_src;
+ struct sockaddr_storage sc_dst;
+ in_port_t sc_dstport;
+ u_int sc_rtableid;
+ u_int32_t sc_vnetid;
+ u_int8_t sc_ttl;
+
+ LIST_ENTRY(vxlan_softc) sc_entry;
+};
+
+extern int vxlan_enable;
+
+int vxlan_lookup(struct mbuf *, struct udphdr *, int,
+ struct sockaddr *);
+struct sockaddr *vxlan_tag_find(struct mbuf *);
+struct sockaddr *vxlan_tag_get(struct mbuf *, int);
+void vxlan_tag_delete(struct mbuf *);
+
+#endif /* _KERNEL */
+
+#endif /* _NET_VXLAN_H */
diff --git a/sys/netinet/udp_usrreq.c b/sys/netinet/udp_usrreq.c
index 634f3f4c82a..beea6cf28a0 100644
--- a/sys/netinet/udp_usrreq.c
+++ b/sys/netinet/udp_usrreq.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: udp_usrreq.c,v 1.166 2013/09/06 18:35:16 bluhm Exp $ */
+/* $OpenBSD: udp_usrreq.c,v 1.167 2013/10/13 10:10:04 reyk Exp $ */
/* $NetBSD: udp_usrreq.c,v 1.28 1996/03/16 23:54:03 christos Exp $ */
/*
@@ -78,6 +78,7 @@
#include <sys/sysctl.h>
#include <net/if.h>
+#include <net/if_media.h>
#include <net/route.h>
#include <netinet/in.h>
@@ -113,6 +114,11 @@
#include <net/pipex.h>
#endif
+#include "vxlan.h"
+#if NVXLAN > 0
+#include <net/if_vxlan.h>
+#endif
+
/*
* UDP protocol implementation.
* Per RFC 768, August, 1980.
@@ -384,6 +390,20 @@ udp_input(struct mbuf *m, ...)
#endif /* INET6 */
}
+#if NVXLAN > 0
+ if (vxlan_enable > 0 &&
+#if NPF > 0
+ !(m->m_pkthdr.pf.flags & PF_TAG_DIVERTED) &&
+#endif
+ (error = vxlan_lookup(m, uh, iphlen, &srcsa.sa)) != 0) {
+ if (error == -1) {
+ udpstat.udps_hdrops++;
+ m_freem(m);
+ }
+ return;
+ }
+#endif
+
#ifdef INET6
if ((ip6 && IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) ||
(ip && IN_MULTICAST(ip->ip_dst.s_addr)) ||
diff --git a/sys/sys/mbuf.h b/sys/sys/mbuf.h
index 376d1341c04..2fed3c09910 100644
--- a/sys/sys/mbuf.h
+++ b/sys/sys/mbuf.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: mbuf.h,v 1.167 2013/08/21 05:21:46 dlg Exp $ */
+/* $OpenBSD: mbuf.h,v 1.168 2013/10/13 10:10:04 reyk Exp $ */
/* $NetBSD: mbuf.h,v 1.19 1996/02/09 18:25:14 christos Exp $ */
/*
@@ -454,6 +454,7 @@ struct m_tag *m_tag_next(struct mbuf *, struct m_tag *);
#define PACKET_TAG_PIPEX 0x0400 /* pipex session cache */
#define PACKET_TAG_PF_REASSEMBLED 0x0800 /* pf reassembled ipv6 packet */
#define PACKET_TAG_SRCROUTE 0x1000 /* IPv4 source routing options */
+#define PACKET_TAG_TUNNEL 0x2000 /* Tunnel endpoint address */
/*
* Maximum tag payload length (that is excluding the m_tag structure).
diff --git a/sys/sys/sockio.h b/sys/sys/sockio.h
index 7f8faba576f..aa220bb6d69 100644
--- a/sys/sys/sockio.h
+++ b/sys/sys/sockio.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: sockio.h,v 1.52 2013/08/16 12:29:18 mpi Exp $ */
+/* $OpenBSD: sockio.h,v 1.53 2013/10/13 10:10:04 reyk Exp $ */
/* $NetBSD: sockio.h,v 1.5 1995/08/23 00:40:47 thorpej Exp $ */
/*-
@@ -184,6 +184,12 @@
#define SIOCGIFHARDMTU _IOWR('i', 165, struct ifreq) /* get ifnet hardmtu */
+#define SIOCSVNETID _IOW('i', 166, struct ifreq) /* set virt net id */
+#define SIOCGVNETID _IOWR('i', 167, struct ifreq) /* get virt net id */
+
+#define SIOCSLIFPHYTTL _IOW('i', 168, struct ifreq) /* set tunnel ttl */
+#define SIOCGLIFPHYTTL _IOWR('i', 169, struct ifreq) /* get tunnel ttl */
+
#define SIOCSVH _IOWR('i', 245, struct ifreq) /* set carp param */
#define SIOCGVH _IOWR('i', 246, struct ifreq) /* get carp param */