diff options
author | Marco Pfatschbacher <mpf@cvs.openbsd.org> | 2008-06-15 06:56:10 +0000 |
---|---|---|
committer | Marco Pfatschbacher <mpf@cvs.openbsd.org> | 2008-06-15 06:56:10 +0000 |
commit | 74c7f11fe6f9d45c4bff30198f3a7eb688b078b8 (patch) | |
tree | 416a4c0aaf59983030ba52a4ed8c8d11b0068789 | |
parent | fd6388fffd794075b228479299e84d73c2bdb675 (diff) |
Add 802.3ad LACP support for trunk(4).
Implementation from NetBSD. Ported via FreeBSD's version in trunk^Wlagg(4).
This is still work in progress. Tested with a HP ProCurve 3500.
OK reyk@
-rw-r--r-- | sbin/ifconfig/ifconfig.c | 16 | ||||
-rw-r--r-- | sys/conf/files | 3 | ||||
-rw-r--r-- | sys/net/if_trunk.c | 142 | ||||
-rw-r--r-- | sys/net/if_trunk.h | 51 | ||||
-rw-r--r-- | sys/net/trunklacp.c | 1887 | ||||
-rw-r--r-- | sys/net/trunklacp.h | 333 |
6 files changed, 2416 insertions, 16 deletions
diff --git a/sbin/ifconfig/ifconfig.c b/sbin/ifconfig/ifconfig.c index 35ff1961498..a78786d78c1 100644 --- a/sbin/ifconfig/ifconfig.c +++ b/sbin/ifconfig/ifconfig.c @@ -1,4 +1,4 @@ -/* $OpenBSD: ifconfig.c,v 1.199 2008/06/14 21:46:22 reyk Exp $ */ +/* $OpenBSD: ifconfig.c,v 1.200 2008/06/15 06:56:09 mpf Exp $ */ /* $NetBSD: ifconfig.c,v 1.40 1997/10/01 02:19:43 enami Exp $ */ /* @@ -4013,6 +4013,7 @@ trunk_status(void) struct trunk_protos tpr[] = TRUNK_PROTOS; struct trunk_reqport rp, rpbuf[TRUNK_MAX_PORTS]; struct trunk_reqall ra; + struct lacp_opreq *lp; const char *proto = "<unknown>"; int i, isport = 0; @@ -4030,6 +4031,8 @@ trunk_status(void) ra.ra_port = rpbuf; if (ioctl(s, SIOCGTRUNK, &ra) == 0) { + lp = (struct lacp_opreq *)&ra.ra_lacpreq; + for (i = 0; i < (sizeof(tpr) / sizeof(tpr[0])); i++) { if (ra.ra_proto == tpr[i].tpr_proto) { proto = tpr[i].tpr_name; @@ -4041,6 +4044,17 @@ trunk_status(void) if (isport) printf(" trunkdev %s", rp.rp_ifname); putchar('\n'); + if (ra.ra_proto == TRUNK_PROTO_LACP) { + printf("\ttrunk id: [(%04X,%s,%04X,%04X,%04X),\n" + "\t\t (%04X,%s,%04X,%04X,%04X)]\n", + lp->actor_prio, + ether_ntoa((struct ether_addr*)lp->actor_mac), + lp->actor_key, lp->actor_portprio, lp->actor_portno, + lp->partner_prio, + ether_ntoa((struct ether_addr*)lp->partner_mac), + lp->partner_key, lp->partner_portprio, + lp->partner_portno); + } for (i = 0; i < ra.ra_ports; i++) { printf("\t\ttrunkport %s ", rpbuf[i].rp_portname); diff --git a/sys/conf/files b/sys/conf/files index c841b5267ce..0b9f9238b09 100644 --- a/sys/conf/files +++ b/sys/conf/files @@ -1,4 +1,4 @@ -# $OpenBSD: files,v 1.435 2008/06/12 06:58:38 deraadt Exp $ +# $OpenBSD: files,v 1.436 2008/06/15 06:56:09 mpf Exp $ # $NetBSD: files,v 1.87 1996/05/19 17:17:50 jonathan Exp $ # @(#)files.newconf 7.5 (Berkeley) 5/10/93 @@ -777,6 +777,7 @@ file net/slcompress.c sl | ppp | strip file net/if_enc.c enc needs-count file net/if_gre.c gre needs-count file net/if_trunk.c trunk needs-count +file net/trunklacp.c trunk file net/if_mpe.c mpe needs-count file net80211/ieee80211.c wlan file net80211/ieee80211_amrr.c wlan diff --git a/sys/net/if_trunk.c b/sys/net/if_trunk.c index 3f6c0306f68..0105cebdabc 100644 --- a/sys/net/if_trunk.c +++ b/sys/net/if_trunk.c @@ -1,4 +1,4 @@ -/* $OpenBSD: if_trunk.c,v 1.45 2008/06/14 01:18:53 mpf Exp $ */ +/* $OpenBSD: if_trunk.c,v 1.46 2008/06/15 06:56:09 mpf Exp $ */ /* * Copyright (c) 2005, 2006, 2007 Reyk Floeter <reyk@openbsd.org> @@ -56,6 +56,8 @@ #include <net/if_vlan_var.h> #include <net/if_trunk.h> +#include <net/trunklacp.h> + SLIST_HEAD(__trhead, trunk_softc) trunk_list; /* list of trunks */ @@ -126,6 +128,13 @@ int trunk_bcast_start(struct trunk_softc *, struct mbuf *); int trunk_bcast_input(struct trunk_softc *, struct trunk_port *, struct ether_header *, struct mbuf *); +/* 802.3ad LACP */ +int trunk_lacp_attach(struct trunk_softc *); +int trunk_lacp_detach(struct trunk_softc *); +int trunk_lacp_start(struct trunk_softc *, struct mbuf *); +int trunk_lacp_input(struct trunk_softc *, struct trunk_port *, + struct ether_header *, struct mbuf *); + /* Trunk protocol table */ static const struct { enum trunk_proto ti_proto; @@ -135,6 +144,7 @@ static const struct { { TRUNK_PROTO_FAILOVER, trunk_fail_attach }, { TRUNK_PROTO_LOADBALANCE, trunk_lb_attach }, { TRUNK_PROTO_BROADCAST, trunk_bcast_attach }, + { TRUNK_PROTO_LACP, trunk_lacp_attach }, { TRUNK_PROTO_NONE, NULL } }; @@ -576,12 +586,35 @@ void trunk_port2req(struct trunk_port *tp, struct trunk_reqport *rp) { struct trunk_softc *tr = (struct trunk_softc *)tp->tp_trunk; + strlcpy(rp->rp_ifname, tr->tr_ifname, sizeof(rp->rp_ifname)); strlcpy(rp->rp_portname, tp->tp_if->if_xname, sizeof(rp->rp_portname)); rp->rp_prio = tp->tp_prio; - rp->rp_flags = tp->tp_flags; - if (TRUNK_PORTACTIVE(tp)) - rp->rp_flags |= TRUNK_PORT_ACTIVE; + if (tr->tr_portreq != NULL) + (*tr->tr_portreq)(tp, (caddr_t)&rp->rp_psc); + + /* Add protocol specific flags */ + switch (tr->tr_proto) { + case TRUNK_PROTO_FAILOVER: + case TRUNK_PROTO_ROUNDROBIN: + case TRUNK_PROTO_LOADBALANCE: + case TRUNK_PROTO_BROADCAST: + if (TRUNK_PORTACTIVE(tp)) + rp->rp_flags |= TRUNK_PORT_ACTIVE; + break; + + case TRUNK_PROTO_LACP: + /* LACP has a different definition of active */ + if (lacp_isactive(tp)) + rp->rp_flags |= TRUNK_PORT_ACTIVE; + if (lacp_iscollecting(tp)) + rp->rp_flags |= TRUNK_PORT_COLLECTING; + if (lacp_isdistributing(tp)) + rp->rp_flags |= TRUNK_PORT_DISTRIBUTING; + break; + default: + break; + } } int @@ -606,6 +639,8 @@ trunk_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) switch (cmd) { case SIOCGTRUNK: ra->ra_proto = tr->tr_proto; + if (tr->tr_req != NULL) + (*tr->tr_req)(tr, (caddr_t)&ra->ra_psc); ra->ra_ports = i = 0; tp = SLIST_FIRST(&tr->tr_ports); while (tp && ra->ra_size >= @@ -698,7 +733,6 @@ trunk_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) break; case SIOCSIFADDR: ifp->if_flags |= IFF_UP; - #ifdef INET if (ifa->ifa_addr->sa_family == AF_INET) arp_ifinit(&tr->tr_ac, ifa); @@ -1200,6 +1234,8 @@ trunk_rr_attach(struct trunk_softc *tr) tr->tr_port_create = NULL; tr->tr_port_destroy = trunk_rr_port_destroy; tr->tr_capabilities = IFCAP_TRUNK_FULLDUPLEX; + tr->tr_req = NULL; + tr->tr_portreq = NULL; tp = SLIST_FIRST(&tr->tr_ports); tr->tr_psc = (caddr_t)tp; @@ -1272,6 +1308,8 @@ trunk_fail_attach(struct trunk_softc *tr) tr->tr_port_create = NULL; tr->tr_port_destroy = NULL; tr->tr_linkstate = NULL; + tr->tr_req = NULL; + tr->tr_portreq = NULL; return (0); } @@ -1349,6 +1387,8 @@ trunk_lb_attach(struct trunk_softc *tr) tr->tr_port_destroy = trunk_lb_port_destroy; tr->tr_linkstate = NULL; tr->tr_capabilities = IFCAP_TRUNK_FULLDUPLEX; + tr->tr_req = NULL; + tr->tr_portreq = NULL; lb->lb_key = arc4random(); tr->tr_psc = (caddr_t)lb; @@ -1468,6 +1508,8 @@ trunk_bcast_attach(struct trunk_softc *tr) tr->tr_port_create = NULL; tr->tr_port_destroy = NULL; tr->tr_linkstate = NULL; + tr->tr_req = NULL; + tr->tr_portreq = NULL; return (0); } @@ -1520,3 +1562,93 @@ trunk_bcast_input(struct trunk_softc *tr, struct trunk_port *tp, m->m_pkthdr.rcvif = ifp; return (0); } + +/* + * 802.3ad LACP + */ + +int +trunk_lacp_attach(struct trunk_softc *tr) +{ + struct trunk_port *tp; + int error; + + tr->tr_detach = trunk_lacp_detach; + tr->tr_port_create = lacp_port_create; + tr->tr_port_destroy = lacp_port_destroy; + tr->tr_linkstate = lacp_linkstate; + tr->tr_start = trunk_lacp_start; + tr->tr_input = trunk_lacp_input; + tr->tr_init = lacp_init; + tr->tr_stop = lacp_stop; + tr->tr_req = lacp_req; + tr->tr_portreq = lacp_portreq; + + error = lacp_attach(tr); + if (error) + return (error); + + SLIST_FOREACH(tp, &tr->tr_ports, tp_entries) + lacp_port_create(tp); + + return (error); +} + +int +trunk_lacp_detach(struct trunk_softc *tr) +{ + struct trunk_port *tp; + int error; + + SLIST_FOREACH(tp, &tr->tr_ports, tp_entries) + lacp_port_destroy(tp); + + /* unlocking is safe here */ + error = lacp_detach(tr); + + return (error); +} + +int +trunk_lacp_start(struct trunk_softc *tr, struct mbuf *m) +{ + struct trunk_port *tp; + + tp = lacp_select_tx_port(tr, m); + if (tp == NULL) { + m_freem(m); + return (EBUSY); + } + + /* Send mbuf */ + return (trunk_enqueue(tp->tp_if, m)); +} + +int +trunk_lacp_input(struct trunk_softc *tr, struct trunk_port *tp, + struct ether_header *eh, struct mbuf *m) +{ + struct ifnet *ifp = &tr->tr_ac.ac_if; + u_short etype; + + etype = ntohs(eh->ether_type); + + /* Tap off LACP control messages */ + if (etype == ETHERTYPE_SLOW) { + m = lacp_input(tp, m); + if (m == NULL) + return (-1); + } + + /* + * If the port is not collecting or not in the active aggregator then + * free and return. + */ + if (lacp_iscollecting(tp) == 0 || lacp_isactive(tp) == 0) { + m_freem(m); + return (-1); + } + + m->m_pkthdr.rcvif = ifp; + return (0); +} diff --git a/sys/net/if_trunk.h b/sys/net/if_trunk.h index e00b57f3157..6b26d02876b 100644 --- a/sys/net/if_trunk.h +++ b/sys/net/if_trunk.h @@ -1,4 +1,4 @@ -/* $OpenBSD: if_trunk.h,v 1.15 2008/06/13 07:03:45 mpf Exp $ */ +/* $OpenBSD: if_trunk.h,v 1.16 2008/06/15 06:56:09 mpf Exp $ */ /* * Copyright (c) 2005, 2006, 2007 Reyk Floeter <reyk@openbsd.org> @@ -28,13 +28,16 @@ #define TRUNK_MAX_STACKING 4 /* maximum number of stacked trunks */ /* Port flags */ -#define TRUNK_PORT_SLAVE 0x00000000 /* normal enslaved port */ -#define TRUNK_PORT_MASTER 0x00000001 /* primary port */ -#define TRUNK_PORT_STACK 0x00000002 /* stacked trunk port */ -#define TRUNK_PORT_ACTIVE 0x00000004 /* port is active */ -#define TRUNK_PORT_GLOBAL 0x80000000 /* IOCTL: global flag */ -#define TRUNK_PORT_BITS \ - "\20\01MASTER\02STACK\03ACTIVE" +#define TRUNK_PORT_SLAVE 0x00000000 /* normal enslaved port */ +#define TRUNK_PORT_MASTER 0x00000001 /* primary port */ +#define TRUNK_PORT_STACK 0x00000002 /* stacked trunk port */ +#define TRUNK_PORT_ACTIVE 0x00000004 /* port is active */ +#define TRUNK_PORT_COLLECTING 0x00000008 /* port is receiving frames */ +#define TRUNK_PORT_DISTRIBUTING 0x00000010 /* port is sending frames */ +#define TRUNK_PORT_DISABLED 0x00000020 /* port is disabled */ +#define TRUNK_PORT_GLOBAL 0x80000000 /* IOCTL: global flag */ +#define TRUNK_PORT_BITS "\20\01MASTER\02STACK\03ACTIVE" \ + "\04COLLECTING\05DISTRIBUTING\06DISABLED" /* Supported trunk PROTOs */ enum trunk_proto { @@ -43,7 +46,8 @@ enum trunk_proto { TRUNK_PROTO_FAILOVER = 2, /* active failover */ TRUNK_PROTO_LOADBALANCE = 3, /* loadbalance */ TRUNK_PROTO_BROADCAST = 4, /* broadcast */ - TRUNK_PROTO_MAX = 5 + TRUNK_PROTO_LACP = 5, /* 802.3ad LACP */ + TRUNK_PROTO_MAX = 6 }; struct trunk_protos { @@ -55,6 +59,7 @@ struct trunk_protos { #define TRUNK_PROTOS { \ { "roundrobin", TRUNK_PROTO_ROUNDROBIN }, \ { "failover", TRUNK_PROTO_FAILOVER }, \ + { "lacp", TRUNK_PROTO_LACP }, \ { "loadbalance", TRUNK_PROTO_LOADBALANCE }, \ { "broadcast", TRUNK_PROTO_BROADCAST }, \ { "none", TRUNK_PROTO_NONE }, \ @@ -65,12 +70,34 @@ struct trunk_protos { * Trunk ioctls. */ +/* + * LACP current operational parameters structure. + */ +struct lacp_opreq { + u_int16_t actor_prio; + u_int8_t actor_mac[ETHER_ADDR_LEN]; + u_int16_t actor_key; + u_int16_t actor_portprio; + u_int16_t actor_portno; + u_int8_t actor_state; + u_int16_t partner_prio; + u_int8_t partner_mac[ETHER_ADDR_LEN]; + u_int16_t partner_key; + u_int16_t partner_portprio; + u_int16_t partner_portno; + u_int8_t partner_state; +}; + /* Trunk port settings */ struct trunk_reqport { char rp_ifname[IFNAMSIZ]; /* name of the trunk */ char rp_portname[IFNAMSIZ]; /* name of the port */ u_int32_t rp_prio; /* port priority */ u_int32_t rp_flags; /* port flags */ + union { + struct lacp_opreq rpsc_lacp; + } rp_psc; +#define rp_lacpreq rp_psc.rpsc_lacp }; #define SIOCGTRUNKPORT _IOWR('i', 140, struct trunk_reqport) @@ -85,6 +112,10 @@ struct trunk_reqall { size_t ra_size; /* size of buffer */ struct trunk_reqport *ra_port; /* allocated buffer */ int ra_ports; /* total port count */ + union { + struct lacp_opreq rpsc_lacp; + } ra_psc; +#define ra_lacpreq ra_psc.rpsc_lacp }; #define SIOCGTRUNK _IOWR('i', 143, struct trunk_reqall) @@ -169,6 +200,8 @@ struct trunk_softc { void (*tr_linkstate)(struct trunk_port *); void (*tr_init)(struct trunk_softc *); void (*tr_stop)(struct trunk_softc *); + void (*tr_req)(struct trunk_softc *, caddr_t); + void (*tr_portreq)(struct trunk_port *, caddr_t); }; #define tr_ifflags tr_ac.ac_if.if_flags /* flags */ diff --git a/sys/net/trunklacp.c b/sys/net/trunklacp.c new file mode 100644 index 00000000000..61e1492fed6 --- /dev/null +++ b/sys/net/trunklacp.c @@ -0,0 +1,1887 @@ +/* $OpenBSD: trunklacp.c,v 1.1 2008/06/15 06:56:09 mpf Exp $ */ +/* $NetBSD: ieee8023ad_lacp.c,v 1.3 2005/12/11 12:24:54 christos Exp $ */ +/* $FreeBSD:ieee8023ad_lacp.c,v 1.15 2008/03/16 19:25:30 thompsa Exp $ */ + +/* + * Copyright (c)2005 YAMAMOTO Takashi, + * Copyright (c)2008 Andrew Thompson <thompsa@FreeBSD.org> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/cdefs.h> + +#include <sys/param.h> +#include <sys/mbuf.h> +#include <sys/systm.h> +#include <sys/malloc.h> +#include <sys/kernel.h> /* hz */ +#include <sys/socket.h> /* for net/if.h */ +#include <sys/sockio.h> +#include <sys/lock.h> +#include <sys/rwlock.h> +#include <sys/queue.h> +#include <sys/timeout.h> +#include <dev/rndvar.h> + +#include <net/if.h> +#include <net/if_dl.h> +#include <net/ethertypes.h> +#include <net/if_media.h> +#include <net/if_types.h> + +#include <netinet/in.h> +#include <netinet/if_ether.h> + +#include "if_trunk.h" +#include "trunklacp.h" + +/* + * actor system priority and port priority. + * XXX should be configurable. + */ + +#define LACP_SYSTEM_PRIO 0x8000 +#define LACP_PORT_PRIO 0x8000 + +const uint8_t ethermulticastaddr_slowprotocols[ETHER_ADDR_LEN] = + { 0x01, 0x80, 0xc2, 0x00, 0x00, 0x02 }; + +static const struct tlv_template lacp_info_tlv_template[] = { + { LACP_TYPE_ACTORINFO, + sizeof(struct tlvhdr) + sizeof(struct lacp_peerinfo) }, + { LACP_TYPE_PARTNERINFO, + sizeof(struct tlvhdr) + sizeof(struct lacp_peerinfo) }, + { LACP_TYPE_COLLECTORINFO, + sizeof(struct tlvhdr) + sizeof(struct lacp_collectorinfo) }, + { 0, 0 }, +}; + +static const struct tlv_template marker_info_tlv_template[] = { + { MARKER_TYPE_INFO, + sizeof(struct tlvhdr) + sizeof(struct lacp_markerinfo) }, + { 0, 0 }, +}; + +static const struct tlv_template marker_response_tlv_template[] = { + { MARKER_TYPE_RESPONSE, + sizeof(struct tlvhdr) + sizeof(struct lacp_markerinfo) }, + { 0, 0 }, +}; + +typedef void (*lacp_timer_func_t)(struct lacp_port *); + +static void lacp_fill_actorinfo(struct lacp_port *, struct lacp_peerinfo *); +static void lacp_fill_markerinfo(struct lacp_port *, + struct lacp_markerinfo *); + +static uint64_t lacp_aggregator_bandwidth(struct lacp_aggregator *); +static void lacp_suppress_distributing(struct lacp_softc *, + struct lacp_aggregator *); +static void lacp_transit_expire(void *); +static void lacp_update_portmap(struct lacp_softc *); +static void lacp_select_active_aggregator(struct lacp_softc *); +static uint16_t lacp_compose_key(struct lacp_port *); +static int tlv_check(const void *, size_t, const struct tlvhdr *, + const struct tlv_template *, int); +static void lacp_tick(void *); + +static void lacp_fill_aggregator_id(struct lacp_aggregator *, + const struct lacp_port *); +static void lacp_fill_aggregator_id_peer(struct lacp_peerinfo *, + const struct lacp_peerinfo *); +static int lacp_aggregator_is_compatible(const struct lacp_aggregator *, + const struct lacp_port *); +static int lacp_peerinfo_is_compatible(const struct lacp_peerinfo *, + const struct lacp_peerinfo *); + +static struct lacp_aggregator *lacp_aggregator_get(struct lacp_softc *, + struct lacp_port *); +static void lacp_aggregator_addref(struct lacp_softc *, + struct lacp_aggregator *); +static void lacp_aggregator_delref(struct lacp_softc *, + struct lacp_aggregator *); + +/* receive machine */ + +static int lacp_pdu_input(struct lacp_port *, struct mbuf *); +static int lacp_marker_input(struct lacp_port *, struct mbuf *); +static void lacp_sm_rx(struct lacp_port *, const struct lacpdu *); +static void lacp_sm_rx_timer(struct lacp_port *); +static void lacp_sm_rx_set_expired(struct lacp_port *); +static void lacp_sm_rx_update_ntt(struct lacp_port *, + const struct lacpdu *); +static void lacp_sm_rx_record_pdu(struct lacp_port *, + const struct lacpdu *); +static void lacp_sm_rx_update_selected(struct lacp_port *, + const struct lacpdu *); +static void lacp_sm_rx_record_default(struct lacp_port *); +static void lacp_sm_rx_update_default_selected(struct lacp_port *); +static void lacp_sm_rx_update_selected_from_peerinfo(struct lacp_port *, + const struct lacp_peerinfo *); + +/* mux machine */ + +static void lacp_sm_mux(struct lacp_port *); +static void lacp_set_mux(struct lacp_port *, enum lacp_mux_state); +static void lacp_sm_mux_timer(struct lacp_port *); + +/* periodic transmit machine */ + +static void lacp_sm_ptx_update_timeout(struct lacp_port *, uint8_t); +static void lacp_sm_ptx_tx_schedule(struct lacp_port *); +static void lacp_sm_ptx_timer(struct lacp_port *); + +/* transmit machine */ + +static void lacp_sm_tx(struct lacp_port *); +static void lacp_sm_assert_ntt(struct lacp_port *); + +static void lacp_run_timers(struct lacp_port *); +static int lacp_compare_peerinfo(const struct lacp_peerinfo *, + const struct lacp_peerinfo *); +static int lacp_compare_systemid(const struct lacp_systemid *, + const struct lacp_systemid *); +static void lacp_port_enable(struct lacp_port *); +static void lacp_port_disable(struct lacp_port *); +static void lacp_select(struct lacp_port *); +static void lacp_unselect(struct lacp_port *); +static void lacp_disable_collecting(struct lacp_port *); +static void lacp_enable_collecting(struct lacp_port *); +static void lacp_disable_distributing(struct lacp_port *); +static void lacp_enable_distributing(struct lacp_port *); +static int lacp_xmit_lacpdu(struct lacp_port *); +static int lacp_xmit_marker(struct lacp_port *); + +#if defined(LACP_DEBUG) +static void lacp_dump_lacpdu(const struct lacpdu *); +static const char *lacp_format_partner(const struct lacp_peerinfo *, char *, + size_t); +static const char *lacp_format_lagid(const struct lacp_peerinfo *, + const struct lacp_peerinfo *, char *, size_t); +static const char *lacp_format_lagid_aggregator(const struct lacp_aggregator *, + char *, size_t); +static const char *lacp_format_state(uint8_t, char *, size_t); +static const char *lacp_format_mac(const uint8_t *, char *, size_t); +static const char *lacp_format_systemid(const struct lacp_systemid *, char *, + size_t); +static const char *lacp_format_portid(const struct lacp_portid *, char *, + size_t); +static void lacp_dprintf(const struct lacp_port *, const char *, ...) + __attribute__((__format__(__printf__, 2, 3))); +#define LACP_DPRINTF(a) lacp_dprintf a +#else +#define LACP_DPRINTF(a) /* nothing */ +#endif + +/* + * partner administration variables. + * XXX should be configurable. + */ + +static const struct lacp_peerinfo lacp_partner_admin = { + { 0xffff }, /* lip_systemid.lsi_prio */ + 0, /* lip_key */ + { 0xffff }, /* lip_portid.lpi_prio */ +#if 1 + /* optimistic lip_state */ + LACP_STATE_SYNC | LACP_STATE_AGGREGATION | + LACP_STATE_COLLECTING | LACP_STATE_DISTRIBUTING +#else + /* pessimistic lip_state */ + 0 +#endif +}; + +static const lacp_timer_func_t lacp_timer_funcs[LACP_NTIMER] = { + [LACP_TIMER_CURRENT_WHILE] = lacp_sm_rx_timer, + [LACP_TIMER_PERIODIC] = lacp_sm_ptx_timer, + [LACP_TIMER_WAIT_WHILE] = lacp_sm_mux_timer, +}; + +struct mbuf * +lacp_input(struct trunk_port *tp, struct mbuf *m) +{ + struct lacp_port *lp = LACP_PORT(tp); + uint8_t subtype; + + if (m->m_pkthdr.len < sizeof(struct ether_header) + sizeof(subtype)) { + m_freem(m); + return (NULL); + } + + m_copydata(m, sizeof(struct ether_header), sizeof(subtype), &subtype); + switch (subtype) { + /* FALLTHROUGH */ + case SLOWPROTOCOLS_SUBTYPE_LACP: + lacp_pdu_input(lp, m); + return (NULL); + + case SLOWPROTOCOLS_SUBTYPE_MARKER: + lacp_marker_input(lp, m); + return (NULL); + } + + /* Not a subtype we are interested in */ + return (m); +} + +/* + * lacp_pdu_input: process lacpdu + */ +static int +lacp_pdu_input(struct lacp_port *lp, struct mbuf *m) +{ + struct lacpdu *du; + int error = 0; + + if (m->m_pkthdr.len != sizeof(*du)) { + goto bad; + } + + if ((m->m_flags & M_MCAST) == 0) { + goto bad; + } + + if (m->m_len < sizeof(*du)) { + m = m_pullup(m, sizeof(*du)); + if (m == NULL) { + return (ENOMEM); + } + } + + du = mtod(m, struct lacpdu *); + + if (memcmp(&du->ldu_eh.ether_dhost, + ðermulticastaddr_slowprotocols, ETHER_ADDR_LEN)) { + goto bad; + } + + /* + * ignore the version for compatibility with + * the future protocol revisions. + */ +#if 0 + if (du->ldu_sph.sph_version != 1) { + goto bad; + } +#endif + + /* + * ignore tlv types for compatibility with + * the future protocol revisions. + */ + if (tlv_check(du, sizeof(*du), &du->ldu_tlv_actor, + lacp_info_tlv_template, 0)) { + goto bad; + } + +#if defined(LACP_DEBUG) + LACP_DPRINTF((lp, "lacpdu receive\n")); + lacp_dump_lacpdu(du); +#endif /* defined(LACP_DEBUG) */ + + lacp_sm_rx(lp, du); + + m_freem(m); + return (error); + +bad: + m_freem(m); + return (EINVAL); +} + +static void +lacp_fill_actorinfo(struct lacp_port *lp, struct lacp_peerinfo *info) +{ + struct trunk_port *tp = lp->lp_trunk; + struct trunk_softc *sc = tp->tp_trunk; + + info->lip_systemid.lsi_prio = htons(LACP_SYSTEM_PRIO); + memcpy(&info->lip_systemid.lsi_mac, + sc->tr_ac.ac_enaddr, ETHER_ADDR_LEN); + info->lip_portid.lpi_prio = htons(LACP_PORT_PRIO); + info->lip_portid.lpi_portno = htons(lp->lp_ifp->if_index); + info->lip_state = lp->lp_state; +} + +static void +lacp_fill_markerinfo(struct lacp_port *lp, struct lacp_markerinfo *info) +{ + struct ifnet *ifp = lp->lp_ifp; + + /* Fill in the port index and system id (encoded as the MAC) */ + info->mi_rq_port = htons(ifp->if_index); + memcpy(&info->mi_rq_system, lp->lp_systemid.lsi_mac, ETHER_ADDR_LEN); + info->mi_rq_xid = htonl(0); +} + +static int +lacp_xmit_lacpdu(struct lacp_port *lp) +{ + struct trunk_port *tp = lp->lp_trunk; + struct mbuf *m; + struct lacpdu *du; + int error; + + m = m_gethdr(M_DONTWAIT, MT_DATA); + if (m == NULL) { + return (ENOMEM); + } + m->m_len = m->m_pkthdr.len = sizeof(*du); + + du = mtod(m, struct lacpdu *); + memset(du, 0, sizeof(*du)); + + memcpy(&du->ldu_eh.ether_dhost, ethermulticastaddr_slowprotocols, + ETHER_ADDR_LEN); + memcpy(&du->ldu_eh.ether_shost, tp->tp_lladdr, ETHER_ADDR_LEN); + du->ldu_eh.ether_type = htons(ETHERTYPE_SLOW); + + du->ldu_sph.sph_subtype = SLOWPROTOCOLS_SUBTYPE_LACP; + du->ldu_sph.sph_version = 1; + + TLV_SET(&du->ldu_tlv_actor, LACP_TYPE_ACTORINFO, sizeof(du->ldu_actor)); + du->ldu_actor = lp->lp_actor; + + TLV_SET(&du->ldu_tlv_partner, LACP_TYPE_PARTNERINFO, + sizeof(du->ldu_partner)); + du->ldu_partner = lp->lp_partner; + + TLV_SET(&du->ldu_tlv_collector, LACP_TYPE_COLLECTORINFO, + sizeof(du->ldu_collector)); + du->ldu_collector.lci_maxdelay = 0; + +#if defined(LACP_DEBUG) + LACP_DPRINTF((lp, "lacpdu transmit\n")); + lacp_dump_lacpdu(du); +#endif /* defined(LACP_DEBUG) */ + + m->m_flags |= M_MCAST; + + /* + * XXX should use higher priority queue. + * otherwise network congestion can break aggregation. + */ + + error = trunk_enqueue(lp->lp_ifp, m); + return (error); +} + +static int +lacp_xmit_marker(struct lacp_port *lp) +{ + struct trunk_port *tp = lp->lp_trunk; + struct mbuf *m; + struct markerdu *mdu; + int error; + + m = m_gethdr(M_DONTWAIT, MT_DATA); + if (m == NULL) { + return (ENOMEM); + } + m->m_len = m->m_pkthdr.len = sizeof(*mdu); + + mdu = mtod(m, struct markerdu *); + memset(mdu, 0, sizeof(*mdu)); + + memcpy(&mdu->mdu_eh.ether_dhost, ethermulticastaddr_slowprotocols, + ETHER_ADDR_LEN); + memcpy(&mdu->mdu_eh.ether_shost, tp->tp_lladdr, ETHER_ADDR_LEN); + mdu->mdu_eh.ether_type = htons(ETHERTYPE_SLOW); + + mdu->mdu_sph.sph_subtype = SLOWPROTOCOLS_SUBTYPE_MARKER; + mdu->mdu_sph.sph_version = 1; + + /* Bump the transaction id and copy over the marker info */ + lp->lp_marker.mi_rq_xid = htonl(ntohl(lp->lp_marker.mi_rq_xid) + 1); + TLV_SET(&mdu->mdu_tlv, MARKER_TYPE_INFO, sizeof(mdu->mdu_info)); + mdu->mdu_info = lp->lp_marker; + + LACP_DPRINTF((lp, "marker transmit, port=%u, sys=%6D, id=%u\n", + ntohs(mdu->mdu_info.mi_rq_port), mdu->mdu_info.mi_rq_system, ":", + ntohl(mdu->mdu_info.mi_rq_xid))); + + m->m_flags |= M_MCAST; + error = trunk_enqueue(lp->lp_ifp, m); + return (error); +} + +void +lacp_linkstate(struct trunk_port *tp) +{ + struct lacp_port *lp = LACP_PORT(tp); + uint8_t old_state; + uint16_t old_key; + + old_state = lp->lp_state; + old_key = lp->lp_key; + + /* + * If the port is not an active full duplex Ethernet link then it can + * not be aggregated. + */ + + if (tp->tp_link_state == LINK_STATE_UNKNOWN || + tp->tp_link_state == LINK_STATE_FULL_DUPLEX) + lacp_port_enable(lp); + else + lacp_port_disable(lp); + + lp->lp_key = lacp_compose_key(lp); + + if (old_state != lp->lp_state || old_key != lp->lp_key) { + LACP_DPRINTF((lp, "-> UNSELECTED\n")); + lp->lp_selected = LACP_UNSELECTED; + } +} + +static void +lacp_tick(void *arg) +{ + struct lacp_softc *lsc = arg; + struct lacp_port *lp; + + LIST_FOREACH(lp, &lsc->lsc_ports, lp_next) { + if ((lp->lp_state & LACP_STATE_AGGREGATION) == 0) + continue; + + lacp_run_timers(lp); + + lacp_select(lp); + lacp_sm_mux(lp); + lacp_sm_tx(lp); + lacp_sm_ptx_tx_schedule(lp); + } + timeout_add(&lsc->lsc_callout, hz); +} + +int +lacp_port_create(struct trunk_port *tp) +{ + struct trunk_softc *sc = tp->tp_trunk; + struct lacp_softc *lsc = LACP_SOFTC(sc); + struct lacp_port *lp; + struct ifnet *ifp = tp->tp_if; + struct ifreq ifr; + struct ifmediareq ifmr; + int error; + + int active = 1; /* XXX should be configurable */ + int fast = 0; /* XXX should be configurable */ + + bzero(&ifr, sizeof(ifr)); + ifr.ifr_addr.sa_family = AF_UNSPEC; + ifr.ifr_addr.sa_len = ETHER_ADDR_LEN; + bcopy(ðermulticastaddr_slowprotocols, + ifr.ifr_addr.sa_data, ETHER_ADDR_LEN); + + error = ether_addmulti(&ifr, (struct arpcom *)ifp); + if (error && error != ENETRESET) { + printf("%s: ADDMULTI failed on %s\n", __func__, tp->tp_ifname); + return (error); + } + + lp = malloc(sizeof(struct lacp_port), + M_DEVBUF, M_NOWAIT|M_ZERO); + if (lp == NULL) + return (ENOMEM); + + tp->tp_psc = (caddr_t)lp; + lp->lp_ifp = ifp; + lp->lp_trunk = tp; + lp->lp_lsc = lsc; + + LIST_INSERT_HEAD(&lsc->lsc_ports, lp, lp_next); + + lacp_fill_actorinfo(lp, &lp->lp_actor); + lacp_fill_markerinfo(lp, &lp->lp_marker); + lp->lp_state = + (active ? LACP_STATE_ACTIVITY : 0) | + (fast ? LACP_STATE_TIMEOUT : 0); + lp->lp_aggregator = NULL; + lacp_sm_rx_set_expired(lp); + + bzero((char *)&ifmr, sizeof(ifmr)); + error = (*ifp->if_ioctl)(ifp, SIOCGIFMEDIA, (caddr_t)&ifmr); + if (error == 0) + lp->lp_media = ifmr.ifm_active; + + lacp_linkstate(tp); + + return (0); +} + +void +lacp_port_destroy(struct trunk_port *tp) +{ + struct lacp_port *lp = LACP_PORT(tp); + int i; + + for (i = 0; i < LACP_NTIMER; i++) { + LACP_TIMER_DISARM(lp, i); + } + + lacp_disable_collecting(lp); + lacp_disable_distributing(lp); + lacp_unselect(lp); + + LIST_REMOVE(lp, lp_next); + free(lp, M_DEVBUF); +} + +void +lacp_req(struct trunk_softc *sc, caddr_t data) +{ + struct lacp_opreq *req = (struct lacp_opreq *)data; + struct lacp_softc *lsc = LACP_SOFTC(sc); + struct lacp_aggregator *la = lsc->lsc_active_aggregator; + + bzero(req, sizeof(struct lacp_opreq)); + if (la != NULL) { + req->actor_prio = ntohs(la->la_actor.lip_systemid.lsi_prio); + memcpy(&req->actor_mac, &la->la_actor.lip_systemid.lsi_mac, + ETHER_ADDR_LEN); + req->actor_key = ntohs(la->la_actor.lip_key); + req->actor_portprio = ntohs(la->la_actor.lip_portid.lpi_prio); + req->actor_portno = ntohs(la->la_actor.lip_portid.lpi_portno); + req->actor_state = la->la_actor.lip_state; + + req->partner_prio = ntohs(la->la_partner.lip_systemid.lsi_prio); + memcpy(&req->partner_mac, &la->la_partner.lip_systemid.lsi_mac, + ETHER_ADDR_LEN); + req->partner_key = ntohs(la->la_partner.lip_key); + req->partner_portprio = ntohs(la->la_partner.lip_portid.lpi_prio); + req->partner_portno = ntohs(la->la_partner.lip_portid.lpi_portno); + req->partner_state = la->la_partner.lip_state; + } +} + +void +lacp_portreq(struct trunk_port *tp, caddr_t data) +{ + struct lacp_opreq *req = (struct lacp_opreq *)data; + struct lacp_port *lp = LACP_PORT(tp); + + req->actor_prio = ntohs(lp->lp_actor.lip_systemid.lsi_prio); + memcpy(&req->actor_mac, &lp->lp_actor.lip_systemid.lsi_mac, + ETHER_ADDR_LEN); + req->actor_key = ntohs(lp->lp_actor.lip_key); + req->actor_portprio = ntohs(lp->lp_actor.lip_portid.lpi_prio); + req->actor_portno = ntohs(lp->lp_actor.lip_portid.lpi_portno); + req->actor_state = lp->lp_actor.lip_state; + + req->partner_prio = ntohs(lp->lp_partner.lip_systemid.lsi_prio); + memcpy(&req->partner_mac, &lp->lp_partner.lip_systemid.lsi_mac, + ETHER_ADDR_LEN); + req->partner_key = ntohs(lp->lp_partner.lip_key); + req->partner_portprio = ntohs(lp->lp_partner.lip_portid.lpi_prio); + req->partner_portno = ntohs(lp->lp_partner.lip_portid.lpi_portno); + req->partner_state = lp->lp_partner.lip_state; +} + +static void +lacp_disable_collecting(struct lacp_port *lp) +{ + LACP_DPRINTF((lp, "collecting disabled\n")); + lp->lp_state &= ~LACP_STATE_COLLECTING; +} + +static void +lacp_enable_collecting(struct lacp_port *lp) +{ + LACP_DPRINTF((lp, "collecting enabled\n")); + lp->lp_state |= LACP_STATE_COLLECTING; +} + +static void +lacp_disable_distributing(struct lacp_port *lp) +{ + struct lacp_aggregator *la = lp->lp_aggregator; + struct lacp_softc *lsc = lp->lp_lsc; +#if defined(LACP_DEBUG) + char buf[LACP_LAGIDSTR_MAX+1]; +#endif /* defined(LACP_DEBUG) */ + + if (la == NULL || (lp->lp_state & LACP_STATE_DISTRIBUTING) == 0) { + return; + } + + KASSERT(!TAILQ_EMPTY(&la->la_ports)); + KASSERT(la->la_nports > 0); + KASSERT(la->la_refcnt >= la->la_nports); + + LACP_DPRINTF((lp, "disable distributing on aggregator %s, " + "nports %d -> %d\n", + lacp_format_lagid_aggregator(la, buf, sizeof(buf)), + la->la_nports, la->la_nports - 1)); + + TAILQ_REMOVE(&la->la_ports, lp, lp_dist_q); + la->la_nports--; + + if (lsc->lsc_active_aggregator == la) { + lacp_suppress_distributing(lsc, la); + lacp_select_active_aggregator(lsc); + /* regenerate the port map, the active aggregator has changed */ + lacp_update_portmap(lsc); + } + + lp->lp_state &= ~LACP_STATE_DISTRIBUTING; +} + +static void +lacp_enable_distributing(struct lacp_port *lp) +{ + struct lacp_aggregator *la = lp->lp_aggregator; + struct lacp_softc *lsc = lp->lp_lsc; +#if defined(LACP_DEBUG) + char buf[LACP_LAGIDSTR_MAX+1]; +#endif /* defined(LACP_DEBUG) */ + + if ((lp->lp_state & LACP_STATE_DISTRIBUTING) != 0) { + return; + } + + LACP_DPRINTF((lp, "enable distributing on aggregator %s, " + "nports %d -> %d\n", + lacp_format_lagid_aggregator(la, buf, sizeof(buf)), + la->la_nports, la->la_nports + 1)); + + KASSERT(la->la_refcnt > la->la_nports); + TAILQ_INSERT_HEAD(&la->la_ports, lp, lp_dist_q); + la->la_nports++; + + lp->lp_state |= LACP_STATE_DISTRIBUTING; + + if (lsc->lsc_active_aggregator == la) { + lacp_suppress_distributing(lsc, la); + lacp_update_portmap(lsc); + } else + /* try to become the active aggregator */ + lacp_select_active_aggregator(lsc); +} + +static void +lacp_transit_expire(void *vp) +{ + struct lacp_softc *lsc = vp; + + LACP_DPRINTF((NULL, "%s\n", __func__)); + lsc->lsc_suppress_distributing = 0; +} + +int +lacp_attach(struct trunk_softc *sc) +{ + struct lacp_softc *lsc; + + lsc = malloc(sizeof(struct lacp_softc), + M_DEVBUF, M_NOWAIT|M_ZERO); + if (lsc == NULL) + return (ENOMEM); + + sc->tr_psc = (caddr_t)lsc; + lsc->lsc_softc = sc; + + lsc->lsc_hashkey = arc4random(); + lsc->lsc_active_aggregator = NULL; + TAILQ_INIT(&lsc->lsc_aggregators); + LIST_INIT(&lsc->lsc_ports); + + timeout_set(&lsc->lsc_transit_callout, lacp_transit_expire, lsc); + timeout_set(&lsc->lsc_callout, lacp_tick, lsc); + + /* if the trunk is already up then do the same */ + if (sc->tr_ac.ac_if.if_flags & IFF_RUNNING) + lacp_init(sc); + + return (0); +} + +int +lacp_detach(struct trunk_softc *sc) +{ + struct lacp_softc *lsc = LACP_SOFTC(sc); + + KASSERT(TAILQ_EMPTY(&lsc->lsc_aggregators)); + KASSERT(lsc->lsc_active_aggregator == NULL); + + sc->tr_psc = NULL; + timeout_del(&lsc->lsc_transit_callout); + timeout_del(&lsc->lsc_callout); + + free(lsc, M_DEVBUF); + return (0); +} + +void +lacp_init(struct trunk_softc *sc) +{ + struct lacp_softc *lsc = LACP_SOFTC(sc); + + timeout_add(&lsc->lsc_callout, hz); +} + +void +lacp_stop(struct trunk_softc *sc) +{ + struct lacp_softc *lsc = LACP_SOFTC(sc); + + timeout_del(&lsc->lsc_transit_callout); + timeout_del(&lsc->lsc_callout); +} + +struct trunk_port * +lacp_select_tx_port(struct trunk_softc *sc, struct mbuf *m) +{ + struct lacp_softc *lsc = LACP_SOFTC(sc); + struct lacp_portmap *pm; + struct lacp_port *lp; + uint32_t hash; + + if (__predict_false(lsc->lsc_suppress_distributing)) { + LACP_DPRINTF((NULL, "%s: waiting transit\n", __func__)); + return (NULL); + } + + pm = &lsc->lsc_pmap[lsc->lsc_activemap]; + if (pm->pm_count == 0) { + LACP_DPRINTF((NULL, "%s: no active aggregator\n", __func__)); + return (NULL); + } + + hash = trunk_hashmbuf(m, lsc->lsc_hashkey); + hash %= pm->pm_count; + lp = pm->pm_map[hash]; + + KASSERT((lp->lp_state & LACP_STATE_DISTRIBUTING) != 0); + + return (lp->lp_trunk); +} +/* + * lacp_suppress_distributing: drop transmit packets for a while + * to preserve packet ordering. + */ + +static void +lacp_suppress_distributing(struct lacp_softc *lsc, struct lacp_aggregator *la) +{ + struct lacp_port *lp; + + if (lsc->lsc_active_aggregator != la) { + return; + } + + LACP_DPRINTF((NULL, "%s\n", __func__)); + lsc->lsc_suppress_distributing = 1; + + /* send a marker frame down each port to verify the queues are empty */ + LIST_FOREACH(lp, &lsc->lsc_ports, lp_next) { + lp->lp_flags |= LACP_PORT_MARK; + lacp_xmit_marker(lp); + } + + /* set a timeout for the marker frames */ + timeout_add(&lsc->lsc_transit_callout, LACP_TRANSIT_DELAY * hz / 1000); +} + +static int +lacp_compare_peerinfo(const struct lacp_peerinfo *a, + const struct lacp_peerinfo *b) +{ + return (memcmp(a, b, offsetof(struct lacp_peerinfo, lip_state))); +} + +static int +lacp_compare_systemid(const struct lacp_systemid *a, + const struct lacp_systemid *b) +{ + return (memcmp(a, b, sizeof(*a))); +} + +#if 0 /* unused */ +static int +lacp_compare_portid(const struct lacp_portid *a, + const struct lacp_portid *b) +{ + return (memcmp(a, b, sizeof(*a))); +} +#endif + +static uint64_t +lacp_aggregator_bandwidth(struct lacp_aggregator *la) +{ + struct lacp_port *lp; + uint64_t speed; + + lp = TAILQ_FIRST(&la->la_ports); + if (lp == NULL) { + return (0); + } + + speed = lp->lp_ifp->if_baudrate; + speed = ifmedia_baudrate(lp->lp_media); + speed *= la->la_nports; + if (speed == 0) { + LACP_DPRINTF((lp, "speed 0? media=0x%x nports=%d\n", + lp->lp_media, la->la_nports)); + } + + return (speed); +} + +/* + * lacp_select_active_aggregator: select an aggregator to be used to transmit + * packets from trunk(4) interface. + */ + +static void +lacp_select_active_aggregator(struct lacp_softc *lsc) +{ + struct lacp_aggregator *la; + struct lacp_aggregator *best_la = NULL; + uint64_t best_speed = 0; +#if defined(LACP_DEBUG) + char buf[LACP_LAGIDSTR_MAX+1]; +#endif /* defined(LACP_DEBUG) */ + + LACP_DPRINTF((NULL, "%s:\n", __func__)); + + TAILQ_FOREACH(la, &lsc->lsc_aggregators, la_q) { + uint64_t speed; + + if (la->la_nports == 0) { + continue; + } + + speed = lacp_aggregator_bandwidth(la); + LACP_DPRINTF((NULL, "%s, speed=%jd, nports=%d\n", + lacp_format_lagid_aggregator(la, buf, sizeof(buf)), + speed, la->la_nports)); + + /* This aggregator is chosen if + * the partner has a better system priority + * or, the total aggregated speed is higher + * or, it is already the chosen aggregator + */ + if ((best_la != NULL && LACP_SYS_PRI(la->la_partner) < + LACP_SYS_PRI(best_la->la_partner)) || + speed > best_speed || + (speed == best_speed && + la == lsc->lsc_active_aggregator)) { + best_la = la; + best_speed = speed; + } + } + + KASSERT(best_la == NULL || best_la->la_nports > 0); + KASSERT(best_la == NULL || !TAILQ_EMPTY(&best_la->la_ports)); + +#if defined(LACP_DEBUG) + if (lsc->lsc_active_aggregator != best_la) { + LACP_DPRINTF((NULL, "active aggregator changed\n")); + LACP_DPRINTF((NULL, "old %s\n", + lacp_format_lagid_aggregator(lsc->lsc_active_aggregator, + buf, sizeof(buf)))); + } else { + LACP_DPRINTF((NULL, "active aggregator not changed\n")); + } + LACP_DPRINTF((NULL, "new %s\n", + lacp_format_lagid_aggregator(best_la, buf, sizeof(buf)))); +#endif /* defined(LACP_DEBUG) */ + + if (lsc->lsc_active_aggregator != best_la) { + lsc->lsc_active_aggregator = best_la; + lacp_update_portmap(lsc); + if (best_la) { + lacp_suppress_distributing(lsc, best_la); + } + } +} + +/* + * Updated the inactive portmap array with the new list of ports and + * make it live. + */ +static void +lacp_update_portmap(struct lacp_softc *lsc) +{ + struct lacp_aggregator *la; + struct lacp_portmap *p; + struct lacp_port *lp; + u_int newmap; + int i; + + newmap = lsc->lsc_activemap == 0 ? 1 : 0; + p = &lsc->lsc_pmap[newmap]; + la = lsc->lsc_active_aggregator; + bzero(p, sizeof(struct lacp_portmap)); + + if (la != NULL && la->la_nports > 0) { + p->pm_count = la->la_nports; + i = 0; + TAILQ_FOREACH(lp, &la->la_ports, lp_dist_q) + p->pm_map[i++] = lp; + KASSERT(i == p->pm_count); + } + + /* switch the active portmap over */ + lsc->lsc_activemap = newmap; + LACP_DPRINTF((NULL, "Set table %d with %d ports\n", + lsc->lsc_activemap, + lsc->lsc_pmap[lsc->lsc_activemap].pm_count)); +} + +static uint16_t +lacp_compose_key(struct lacp_port *lp) +{ + struct trunk_port *tp = lp->lp_trunk; + struct trunk_softc *sc = tp->tp_trunk; + u_int media = lp->lp_media; + uint16_t key; + + if ((lp->lp_state & LACP_STATE_AGGREGATION) == 0) { + + /* + * non-aggregatable links should have unique keys. + * + * XXX this isn't really unique as if_index is 16 bit. + */ + + /* bit 0..14: (some bits of) if_index of this port */ + key = lp->lp_ifp->if_index; + /* bit 15: 1 */ + key |= 0x8000; + } else { + u_int subtype = IFM_SUBTYPE(media); + + KASSERT(IFM_TYPE(media) == IFM_ETHER); + KASSERT((media & IFM_FDX) != 0); + + /* bit 0..4: IFM_SUBTYPE */ + key = subtype; + /* bit 5..14: (some bits of) if_index of trunk device */ + key |= 0x7fe0 & ((sc->tr_ac.ac_if.if_index) << 5); + /* bit 15: 0 */ + } + return (htons(key)); +} + +static void +lacp_aggregator_addref(struct lacp_softc *lsc, struct lacp_aggregator *la) +{ +#if defined(LACP_DEBUG) + char buf[LACP_LAGIDSTR_MAX+1]; +#endif + + LACP_DPRINTF((NULL, "%s: lagid=%s, refcnt %d -> %d\n", + __func__, + lacp_format_lagid(&la->la_actor, &la->la_partner, + buf, sizeof(buf)), + la->la_refcnt, la->la_refcnt + 1)); + + KASSERT(la->la_refcnt > 0); + la->la_refcnt++; + KASSERT(la->la_refcnt > la->la_nports); +} + +static void +lacp_aggregator_delref(struct lacp_softc *lsc, struct lacp_aggregator *la) +{ +#if defined(LACP_DEBUG) + char buf[LACP_LAGIDSTR_MAX+1]; +#endif + + LACP_DPRINTF((NULL, "%s: lagid=%s, refcnt %d -> %d\n", + __func__, + lacp_format_lagid(&la->la_actor, &la->la_partner, + buf, sizeof(buf)), + la->la_refcnt, la->la_refcnt - 1)); + + KASSERT(la->la_refcnt > la->la_nports); + la->la_refcnt--; + if (la->la_refcnt > 0) { + return; + } + + KASSERT(la->la_refcnt == 0); + KASSERT(lsc->lsc_active_aggregator != la); + + TAILQ_REMOVE(&lsc->lsc_aggregators, la, la_q); + + free(la, M_DEVBUF); +} + +/* + * lacp_aggregator_get: allocate an aggregator. + */ + +static struct lacp_aggregator * +lacp_aggregator_get(struct lacp_softc *lsc, struct lacp_port *lp) +{ + struct lacp_aggregator *la; + + la = malloc(sizeof(*la), M_DEVBUF, M_NOWAIT); + if (la) { + la->la_refcnt = 1; + la->la_nports = 0; + TAILQ_INIT(&la->la_ports); + la->la_pending = 0; + TAILQ_INSERT_TAIL(&lsc->lsc_aggregators, la, la_q); + } + + return (la); +} + +/* + * lacp_fill_aggregator_id: setup a newly allocated aggregator from a port. + */ + +static void +lacp_fill_aggregator_id(struct lacp_aggregator *la, const struct lacp_port *lp) +{ + lacp_fill_aggregator_id_peer(&la->la_partner, &lp->lp_partner); + lacp_fill_aggregator_id_peer(&la->la_actor, &lp->lp_actor); + + la->la_actor.lip_state = lp->lp_state & LACP_STATE_AGGREGATION; +} + +static void +lacp_fill_aggregator_id_peer(struct lacp_peerinfo *lpi_aggr, + const struct lacp_peerinfo *lpi_port) +{ + memset(lpi_aggr, 0, sizeof(*lpi_aggr)); + lpi_aggr->lip_systemid = lpi_port->lip_systemid; + lpi_aggr->lip_key = lpi_port->lip_key; +} + +/* + * lacp_aggregator_is_compatible: check if a port can join to an aggregator. + */ + +static int +lacp_aggregator_is_compatible(const struct lacp_aggregator *la, + const struct lacp_port *lp) +{ + if (!(lp->lp_state & LACP_STATE_AGGREGATION) || + !(lp->lp_partner.lip_state & LACP_STATE_AGGREGATION)) { + return (0); + } + + if (!(la->la_actor.lip_state & LACP_STATE_AGGREGATION)) { + return (0); + } + + if (!lacp_peerinfo_is_compatible(&la->la_partner, &lp->lp_partner)) { + return (0); + } + + if (!lacp_peerinfo_is_compatible(&la->la_actor, &lp->lp_actor)) { + return (0); + } + + return (1); +} + +static int +lacp_peerinfo_is_compatible(const struct lacp_peerinfo *a, + const struct lacp_peerinfo *b) +{ + if (memcmp(&a->lip_systemid, &b->lip_systemid, + sizeof(a->lip_systemid))) { + return (0); + } + + if (memcmp(&a->lip_key, &b->lip_key, sizeof(a->lip_key))) { + return (0); + } + + return (1); +} + +static void +lacp_port_enable(struct lacp_port *lp) +{ + lp->lp_state |= LACP_STATE_AGGREGATION; +} + +static void +lacp_port_disable(struct lacp_port *lp) +{ + lacp_set_mux(lp, LACP_MUX_DETACHED); + + lp->lp_state &= ~LACP_STATE_AGGREGATION; + lp->lp_selected = LACP_UNSELECTED; + lacp_sm_rx_record_default(lp); + lp->lp_partner.lip_state &= ~LACP_STATE_AGGREGATION; + lp->lp_state &= ~LACP_STATE_EXPIRED; +} + +/* + * lacp_select: select an aggregator. create one if necessary. + */ +static void +lacp_select(struct lacp_port *lp) +{ + struct lacp_softc *lsc = lp->lp_lsc; + struct lacp_aggregator *la; +#if defined(LACP_DEBUG) + char buf[LACP_LAGIDSTR_MAX+1]; +#endif + + if (lp->lp_aggregator) { + return; + } + + KASSERT(!LACP_TIMER_ISARMED(lp, LACP_TIMER_WAIT_WHILE)); + + LACP_DPRINTF((lp, "port lagid=%s\n", + lacp_format_lagid(&lp->lp_actor, &lp->lp_partner, + buf, sizeof(buf)))); + + TAILQ_FOREACH(la, &lsc->lsc_aggregators, la_q) { + if (lacp_aggregator_is_compatible(la, lp)) { + break; + } + } + + if (la == NULL) { + la = lacp_aggregator_get(lsc, lp); + if (la == NULL) { + LACP_DPRINTF((lp, "aggregator creation failed\n")); + + /* + * will retry on the next tick. + */ + + return; + } + lacp_fill_aggregator_id(la, lp); + LACP_DPRINTF((lp, "aggregator created\n")); + } else { + LACP_DPRINTF((lp, "compatible aggregator found\n")); + if (la->la_refcnt == LACP_MAX_PORTS) + return; + lacp_aggregator_addref(lsc, la); + } + + LACP_DPRINTF((lp, "aggregator lagid=%s\n", + lacp_format_lagid(&la->la_actor, &la->la_partner, + buf, sizeof(buf)))); + + lp->lp_aggregator = la; + lp->lp_selected = LACP_SELECTED; +} + +/* + * lacp_unselect: finish unselect/detach process. + */ + +static void +lacp_unselect(struct lacp_port *lp) +{ + struct lacp_softc *lsc = lp->lp_lsc; + struct lacp_aggregator *la = lp->lp_aggregator; + + KASSERT(!LACP_TIMER_ISARMED(lp, LACP_TIMER_WAIT_WHILE)); + + if (la == NULL) { + return; + } + + lp->lp_aggregator = NULL; + lacp_aggregator_delref(lsc, la); +} + +/* mux machine */ + +static void +lacp_sm_mux(struct lacp_port *lp) +{ + enum lacp_mux_state new_state; + int p_sync = + (lp->lp_partner.lip_state & LACP_STATE_SYNC) != 0; + int p_collecting = + (lp->lp_partner.lip_state & LACP_STATE_COLLECTING) != 0; + enum lacp_selected selected = lp->lp_selected; + struct lacp_aggregator *la; + + /* LACP_DPRINTF((lp, "%s: state %d\n", __func__, lp->lp_mux_state)); */ + +re_eval: + la = lp->lp_aggregator; + KASSERT(lp->lp_mux_state == LACP_MUX_DETACHED || la != NULL); + new_state = lp->lp_mux_state; + switch (lp->lp_mux_state) { + case LACP_MUX_DETACHED: + if (selected != LACP_UNSELECTED) { + new_state = LACP_MUX_WAITING; + } + break; + case LACP_MUX_WAITING: + KASSERT(la->la_pending > 0 || + !LACP_TIMER_ISARMED(lp, LACP_TIMER_WAIT_WHILE)); + if (selected == LACP_SELECTED && la->la_pending == 0) { + new_state = LACP_MUX_ATTACHED; + } else if (selected == LACP_UNSELECTED) { + new_state = LACP_MUX_DETACHED; + } + break; + case LACP_MUX_ATTACHED: + if (selected == LACP_SELECTED && p_sync) { + new_state = LACP_MUX_COLLECTING; + } else if (selected != LACP_SELECTED) { + new_state = LACP_MUX_DETACHED; + } + break; + case LACP_MUX_COLLECTING: + if (selected == LACP_SELECTED && p_sync && p_collecting) { + new_state = LACP_MUX_DISTRIBUTING; + } else if (selected != LACP_SELECTED || !p_sync) { + new_state = LACP_MUX_ATTACHED; + } + break; + case LACP_MUX_DISTRIBUTING: + if (selected != LACP_SELECTED || !p_sync || !p_collecting) { + new_state = LACP_MUX_COLLECTING; + } + break; + default: + panic("%s: unknown state", __func__); + } + + if (lp->lp_mux_state == new_state) { + return; + } + + lacp_set_mux(lp, new_state); + goto re_eval; +} + +static void +lacp_set_mux(struct lacp_port *lp, enum lacp_mux_state new_state) +{ + struct lacp_aggregator *la = lp->lp_aggregator; + + if (lp->lp_mux_state == new_state) { + return; + } + + switch (new_state) { + case LACP_MUX_DETACHED: + lp->lp_state &= ~LACP_STATE_SYNC; + lacp_disable_distributing(lp); + lacp_disable_collecting(lp); + lacp_sm_assert_ntt(lp); + /* cancel timer */ + if (LACP_TIMER_ISARMED(lp, LACP_TIMER_WAIT_WHILE)) { + KASSERT(la->la_pending > 0); + la->la_pending--; + } + LACP_TIMER_DISARM(lp, LACP_TIMER_WAIT_WHILE); + lacp_unselect(lp); + break; + case LACP_MUX_WAITING: + LACP_TIMER_ARM(lp, LACP_TIMER_WAIT_WHILE, + LACP_AGGREGATE_WAIT_TIME); + la->la_pending++; + break; + case LACP_MUX_ATTACHED: + lp->lp_state |= LACP_STATE_SYNC; + lacp_disable_collecting(lp); + lacp_sm_assert_ntt(lp); + break; + case LACP_MUX_COLLECTING: + lacp_enable_collecting(lp); + lacp_disable_distributing(lp); + lacp_sm_assert_ntt(lp); + break; + case LACP_MUX_DISTRIBUTING: + lacp_enable_distributing(lp); + break; + default: + panic("%s: unknown state", __func__); + } + + LACP_DPRINTF((lp, "mux_state %d -> %d\n", lp->lp_mux_state, new_state)); + + lp->lp_mux_state = new_state; +} + +static void +lacp_sm_mux_timer(struct lacp_port *lp) +{ + struct lacp_aggregator *la = lp->lp_aggregator; +#if defined(LACP_DEBUG) + char buf[LACP_LAGIDSTR_MAX+1]; +#endif + + KASSERT(la->la_pending > 0); + + LACP_DPRINTF((lp, "%s: aggregator %s, pending %d -> %d\n", __func__, + lacp_format_lagid(&la->la_actor, &la->la_partner, + buf, sizeof(buf)), + la->la_pending, la->la_pending - 1)); + + la->la_pending--; +} + +/* periodic transmit machine */ + +static void +lacp_sm_ptx_update_timeout(struct lacp_port *lp, uint8_t oldpstate) +{ + if (LACP_STATE_EQ(oldpstate, lp->lp_partner.lip_state, + LACP_STATE_TIMEOUT)) { + return; + } + + LACP_DPRINTF((lp, "partner timeout changed\n")); + + /* + * FAST_PERIODIC -> SLOW_PERIODIC + * or + * SLOW_PERIODIC (-> PERIODIC_TX) -> FAST_PERIODIC + * + * let lacp_sm_ptx_tx_schedule to update timeout. + */ + + LACP_TIMER_DISARM(lp, LACP_TIMER_PERIODIC); + + /* + * if timeout has been shortened, assert NTT. + */ + + if ((lp->lp_partner.lip_state & LACP_STATE_TIMEOUT)) { + lacp_sm_assert_ntt(lp); + } +} + +static void +lacp_sm_ptx_tx_schedule(struct lacp_port *lp) +{ + int timeout; + + if (!(lp->lp_state & LACP_STATE_ACTIVITY) && + !(lp->lp_partner.lip_state & LACP_STATE_ACTIVITY)) { + + /* + * NO_PERIODIC + */ + + LACP_TIMER_DISARM(lp, LACP_TIMER_PERIODIC); + return; + } + + if (LACP_TIMER_ISARMED(lp, LACP_TIMER_PERIODIC)) { + return; + } + + timeout = (lp->lp_partner.lip_state & LACP_STATE_TIMEOUT) ? + LACP_FAST_PERIODIC_TIME : LACP_SLOW_PERIODIC_TIME; + + LACP_TIMER_ARM(lp, LACP_TIMER_PERIODIC, timeout); +} + +static void +lacp_sm_ptx_timer(struct lacp_port *lp) +{ + lacp_sm_assert_ntt(lp); +} + +static void +lacp_sm_rx(struct lacp_port *lp, const struct lacpdu *du) +{ + int timeout; + + /* + * check LACP_DISABLED first + */ + + if (!(lp->lp_state & LACP_STATE_AGGREGATION)) { + return; + } + + /* + * check loopback condition. + */ + + if (!lacp_compare_systemid(&du->ldu_actor.lip_systemid, + &lp->lp_actor.lip_systemid)) { + return; + } + + /* + * EXPIRED, DEFAULTED, CURRENT -> CURRENT + */ + + lacp_sm_rx_update_selected(lp, du); + lacp_sm_rx_update_ntt(lp, du); + lacp_sm_rx_record_pdu(lp, du); + + timeout = (lp->lp_state & LACP_STATE_TIMEOUT) ? + LACP_SHORT_TIMEOUT_TIME : LACP_LONG_TIMEOUT_TIME; + LACP_TIMER_ARM(lp, LACP_TIMER_CURRENT_WHILE, timeout); + + lp->lp_state &= ~LACP_STATE_EXPIRED; + + /* + * kick transmit machine without waiting the next tick. + */ + + lacp_sm_tx(lp); +} + +static void +lacp_sm_rx_set_expired(struct lacp_port *lp) +{ + lp->lp_partner.lip_state &= ~LACP_STATE_SYNC; + lp->lp_partner.lip_state |= LACP_STATE_TIMEOUT; + LACP_TIMER_ARM(lp, LACP_TIMER_CURRENT_WHILE, LACP_SHORT_TIMEOUT_TIME); + lp->lp_state |= LACP_STATE_EXPIRED; +} + +static void +lacp_sm_rx_timer(struct lacp_port *lp) +{ + if ((lp->lp_state & LACP_STATE_EXPIRED) == 0) { + /* CURRENT -> EXPIRED */ + LACP_DPRINTF((lp, "%s: CURRENT -> EXPIRED\n", __func__)); + lacp_sm_rx_set_expired(lp); + } else { + /* EXPIRED -> DEFAULTED */ + LACP_DPRINTF((lp, "%s: EXPIRED -> DEFAULTED\n", __func__)); + lacp_sm_rx_update_default_selected(lp); + lacp_sm_rx_record_default(lp); + lp->lp_state &= ~LACP_STATE_EXPIRED; + } +} + +static void +lacp_sm_rx_record_pdu(struct lacp_port *lp, const struct lacpdu *du) +{ + int active; + uint8_t oldpstate; +#if defined(LACP_DEBUG) + char buf[LACP_STATESTR_MAX+1]; +#endif + + /* LACP_DPRINTF((lp, "%s\n", __func__)); */ + + oldpstate = lp->lp_partner.lip_state; + + active = (du->ldu_actor.lip_state & LACP_STATE_ACTIVITY) + || ((lp->lp_state & LACP_STATE_ACTIVITY) && + (du->ldu_partner.lip_state & LACP_STATE_ACTIVITY)); + + lp->lp_partner = du->ldu_actor; + if (active && + ((LACP_STATE_EQ(lp->lp_state, du->ldu_partner.lip_state, + LACP_STATE_AGGREGATION) && + !lacp_compare_peerinfo(&lp->lp_actor, &du->ldu_partner)) + || (du->ldu_partner.lip_state & LACP_STATE_AGGREGATION) == 0)) { + /* XXX nothing? */ + } else { + lp->lp_partner.lip_state &= ~LACP_STATE_SYNC; + } + + lp->lp_state &= ~LACP_STATE_DEFAULTED; + + if (oldpstate != lp->lp_partner.lip_state) { + LACP_DPRINTF((lp, "old pstate %s\n", + lacp_format_state(oldpstate, buf, sizeof(buf)))); + LACP_DPRINTF((lp, "new pstate %s\n", + lacp_format_state(lp->lp_partner.lip_state, buf, + sizeof(buf)))); + } + + lacp_sm_ptx_update_timeout(lp, oldpstate); +} + +static void +lacp_sm_rx_update_ntt(struct lacp_port *lp, const struct lacpdu *du) +{ + /* LACP_DPRINTF((lp, "%s\n", __func__)); */ + + if (lacp_compare_peerinfo(&lp->lp_actor, &du->ldu_partner) || + !LACP_STATE_EQ(lp->lp_state, du->ldu_partner.lip_state, + LACP_STATE_ACTIVITY | LACP_STATE_SYNC | LACP_STATE_AGGREGATION)) { + LACP_DPRINTF((lp, "%s: assert ntt\n", __func__)); + lacp_sm_assert_ntt(lp); + } +} + +static void +lacp_sm_rx_record_default(struct lacp_port *lp) +{ + uint8_t oldpstate; + + /* LACP_DPRINTF((lp, "%s\n", __func__)); */ + + oldpstate = lp->lp_partner.lip_state; + lp->lp_partner = lacp_partner_admin; + lp->lp_state |= LACP_STATE_DEFAULTED; + lacp_sm_ptx_update_timeout(lp, oldpstate); +} + +static void +lacp_sm_rx_update_selected_from_peerinfo(struct lacp_port *lp, + const struct lacp_peerinfo *info) +{ + /* LACP_DPRINTF((lp, "%s\n", __func__)); */ + + if (lacp_compare_peerinfo(&lp->lp_partner, info) || + !LACP_STATE_EQ(lp->lp_partner.lip_state, info->lip_state, + LACP_STATE_AGGREGATION)) { + lp->lp_selected = LACP_UNSELECTED; + /* mux machine will clean up lp->lp_aggregator */ + } +} + +static void +lacp_sm_rx_update_selected(struct lacp_port *lp, const struct lacpdu *du) +{ + /* LACP_DPRINTF((lp, "%s\n", __func__)); */ + + lacp_sm_rx_update_selected_from_peerinfo(lp, &du->ldu_actor); +} + +static void +lacp_sm_rx_update_default_selected(struct lacp_port *lp) +{ + /* LACP_DPRINTF((lp, "%s\n", __func__)); */ + + lacp_sm_rx_update_selected_from_peerinfo(lp, &lacp_partner_admin); +} + +/* transmit machine */ + +static void +lacp_sm_tx(struct lacp_port *lp) +{ + int error; + + if (!(lp->lp_state & LACP_STATE_AGGREGATION) +#if 1 + || (!(lp->lp_state & LACP_STATE_ACTIVITY) + && !(lp->lp_partner.lip_state & LACP_STATE_ACTIVITY)) +#endif + ) { + lp->lp_flags &= ~LACP_PORT_NTT; + } + + if (!(lp->lp_flags & LACP_PORT_NTT)) { + return; + } + + /* Rate limit to 3 PDUs per LACP_FAST_PERIODIC_TIME */ + if (ppsratecheck(&lp->lp_last_lacpdu, &lp->lp_lacpdu_sent, + (3 / LACP_FAST_PERIODIC_TIME)) == 0) { + LACP_DPRINTF((lp, "rate limited pdu\n")); + return; + } + + error = lacp_xmit_lacpdu(lp); + + if (error == 0) { + lp->lp_flags &= ~LACP_PORT_NTT; + } else { + LACP_DPRINTF((lp, "lacpdu transmit failure, error %d\n", + error)); + } +} + +static void +lacp_sm_assert_ntt(struct lacp_port *lp) +{ + + lp->lp_flags |= LACP_PORT_NTT; +} + +static void +lacp_run_timers(struct lacp_port *lp) +{ + int i; + + for (i = 0; i < LACP_NTIMER; i++) { + KASSERT(lp->lp_timer[i] >= 0); + if (lp->lp_timer[i] == 0) { + continue; + } else if (--lp->lp_timer[i] <= 0) { + if (lacp_timer_funcs[i]) { + (*lacp_timer_funcs[i])(lp); + } + } + } +} + +int +lacp_marker_input(struct lacp_port *lp, struct mbuf *m) +{ + struct lacp_softc *lsc = lp->lp_lsc; + struct trunk_port *tp = lp->lp_trunk; + struct lacp_port *lp2; + struct markerdu *mdu; + int error = 0; + int pending = 0; + + if (m->m_pkthdr.len != sizeof(*mdu)) { + goto bad; + } + + if ((m->m_flags & M_MCAST) == 0) { + goto bad; + } + + if (m->m_len < sizeof(*mdu)) { + m = m_pullup(m, sizeof(*mdu)); + if (m == NULL) { + return (ENOMEM); + } + } + + mdu = mtod(m, struct markerdu *); + + if (memcmp(&mdu->mdu_eh.ether_dhost, + ðermulticastaddr_slowprotocols, ETHER_ADDR_LEN)) { + goto bad; + } + + if (mdu->mdu_sph.sph_version != 1) { + goto bad; + } + + switch (mdu->mdu_tlv.tlv_type) { + case MARKER_TYPE_INFO: + if (tlv_check(mdu, sizeof(*mdu), &mdu->mdu_tlv, + marker_info_tlv_template, 1)) { + goto bad; + } + mdu->mdu_tlv.tlv_type = MARKER_TYPE_RESPONSE; + memcpy(&mdu->mdu_eh.ether_dhost, + ðermulticastaddr_slowprotocols, ETHER_ADDR_LEN); + memcpy(&mdu->mdu_eh.ether_shost, + tp->tp_lladdr, ETHER_ADDR_LEN); + error = trunk_enqueue(lp->lp_ifp, m); + break; + + case MARKER_TYPE_RESPONSE: + if (tlv_check(mdu, sizeof(*mdu), &mdu->mdu_tlv, + marker_response_tlv_template, 1)) { + goto bad; + } + LACP_DPRINTF((lp, "marker response, port=%u, sys=%6D, id=%u\n", + ntohs(mdu->mdu_info.mi_rq_port), mdu->mdu_info.mi_rq_system, + ":", ntohl(mdu->mdu_info.mi_rq_xid))); + + /* Verify that it is the last marker we sent out */ + if (memcmp(&mdu->mdu_info, &lp->lp_marker, + sizeof(struct lacp_markerinfo))) + goto bad; + + lp->lp_flags &= ~LACP_PORT_MARK; + + if (lsc->lsc_suppress_distributing) { + /* Check if any ports are waiting for a response */ + LIST_FOREACH(lp2, &lsc->lsc_ports, lp_next) { + if (lp2->lp_flags & LACP_PORT_MARK) { + pending = 1; + break; + } + } + + if (pending == 0) { + /* All interface queues are clear */ + LACP_DPRINTF((NULL, "queue flush complete\n")); + lsc->lsc_suppress_distributing = 0; + } + } + m_freem(m); + break; + + default: + goto bad; + } + + return (error); + +bad: + LACP_DPRINTF((lp, "bad marker frame\n")); + m_freem(m); + return (EINVAL); +} + +static int +tlv_check(const void *p, size_t size, const struct tlvhdr *tlv, + const struct tlv_template *tmpl, int check_type) +{ + while (/* CONSTCOND */ 1) { + if ((const char *)tlv - (const char *)p + sizeof(*tlv) > size) { + return (EINVAL); + } + if ((check_type && tlv->tlv_type != tmpl->tmpl_type) || + tlv->tlv_length != tmpl->tmpl_length) { + return (EINVAL); + } + if (tmpl->tmpl_type == 0) { + break; + } + tlv = (const struct tlvhdr *) + ((const char *)tlv + tlv->tlv_length); + tmpl++; + } + + return (0); +} + +#if defined(LACP_DEBUG) +const char * +lacp_format_mac(const uint8_t *mac, char *buf, size_t buflen) +{ + snprintf(buf, buflen, "%02X-%02X-%02X-%02X-%02X-%02X", + (int)mac[0], + (int)mac[1], + (int)mac[2], + (int)mac[3], + (int)mac[4], + (int)mac[5]); + + return (buf); +} + +const char * +lacp_format_systemid(const struct lacp_systemid *sysid, + char *buf, size_t buflen) +{ + char macbuf[LACP_MACSTR_MAX+1]; + + snprintf(buf, buflen, "%04X,%s", + ntohs(sysid->lsi_prio), + lacp_format_mac(sysid->lsi_mac, macbuf, sizeof(macbuf))); + + return (buf); +} + +const char * +lacp_format_portid(const struct lacp_portid *portid, char *buf, size_t buflen) +{ + snprintf(buf, buflen, "%04X,%04X", + ntohs(portid->lpi_prio), + ntohs(portid->lpi_portno)); + + return (buf); +} + +const char * +lacp_format_partner(const struct lacp_peerinfo *peer, char *buf, size_t buflen) +{ + char sysid[LACP_SYSTEMIDSTR_MAX+1]; + char portid[LACP_PORTIDSTR_MAX+1]; + + snprintf(buf, buflen, "(%s,%04X,%s)", + lacp_format_systemid(&peer->lip_systemid, sysid, sizeof(sysid)), + ntohs(peer->lip_key), + lacp_format_portid(&peer->lip_portid, portid, sizeof(portid))); + + return (buf); +} + +const char * +lacp_format_lagid(const struct lacp_peerinfo *a, + const struct lacp_peerinfo *b, char *buf, size_t buflen) +{ + char astr[LACP_PARTNERSTR_MAX+1]; + char bstr[LACP_PARTNERSTR_MAX+1]; + +#if 0 + /* + * there's a convention to display small numbered peer + * in the left. + */ + + if (lacp_compare_peerinfo(a, b) > 0) { + const struct lacp_peerinfo *t; + + t = a; + a = b; + b = t; + } +#endif + + snprintf(buf, buflen, "[%s,%s]", + lacp_format_partner(a, astr, sizeof(astr)), + lacp_format_partner(b, bstr, sizeof(bstr))); + + return (buf); +} + +const char * +lacp_format_lagid_aggregator(const struct lacp_aggregator *la, + char *buf, size_t buflen) +{ + if (la == NULL) { + return ("(none)"); + } + + return (lacp_format_lagid(&la->la_actor, &la->la_partner, buf, buflen)); +} + +const char * +lacp_format_state(uint8_t state, char *buf, size_t buflen) +{ + snprintf(buf, buflen, "%b", state, LACP_STATE_BITS); + return (buf); +} + +static void +lacp_dump_lacpdu(const struct lacpdu *du) +{ + char buf[LACP_PARTNERSTR_MAX+1]; + char buf2[LACP_STATESTR_MAX+1]; + + printf("actor=%s\n", + lacp_format_partner(&du->ldu_actor, buf, sizeof(buf))); + printf("actor.state=%s\n", + lacp_format_state(du->ldu_actor.lip_state, buf2, sizeof(buf2))); + printf("partner=%s\n", + lacp_format_partner(&du->ldu_partner, buf, sizeof(buf))); + printf("partner.state=%s\n", + lacp_format_state(du->ldu_partner.lip_state, buf2, sizeof(buf2))); + + printf("maxdelay=%d\n", ntohs(du->ldu_collector.lci_maxdelay)); +} + +static void +lacp_dprintf(const struct lacp_port *lp, const char *fmt, ...) +{ + va_list va; + + if (lp) { + printf("%s: ", lp->tp_if->if_xname); + } + + va_start(va, fmt); + vprintf(fmt, va); + va_end(va); +} +#endif diff --git a/sys/net/trunklacp.h b/sys/net/trunklacp.h new file mode 100644 index 00000000000..25c789df89c --- /dev/null +++ b/sys/net/trunklacp.h @@ -0,0 +1,333 @@ +/* $OpenBSD: trunklacp.h,v 1.1 2008/06/15 06:56:09 mpf Exp $ */ +/* $NetBSD: ieee8023ad_impl.h,v 1.2 2005/12/10 23:21:39 elad Exp $ */ + +/* + * Copyright (c)2005 YAMAMOTO Takashi, + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD: src/sys/net/ieee8023ad_lacp.h,v 1.11 2008/03/17 01:26:44 thompsa Exp $ + */ + +/* + * IEEE802.3ad LACP + * + * implementation details. + */ + +#define LACP_TIMER_CURRENT_WHILE 0 +#define LACP_TIMER_PERIODIC 1 +#define LACP_TIMER_WAIT_WHILE 2 +#define LACP_NTIMER 3 + +#define LACP_TIMER_ARM(port, timer, val) \ + (port)->lp_timer[(timer)] = (val) +#define LACP_TIMER_DISARM(port, timer) \ + (port)->lp_timer[(timer)] = 0 +#define LACP_TIMER_ISARMED(port, timer) \ + ((port)->lp_timer[(timer)] > 0) + +/* + * IEEE802.3ad LACP + * + * protocol definitions. + */ + +#define LACP_STATE_ACTIVITY (1<<0) +#define LACP_STATE_TIMEOUT (1<<1) +#define LACP_STATE_AGGREGATION (1<<2) +#define LACP_STATE_SYNC (1<<3) +#define LACP_STATE_COLLECTING (1<<4) +#define LACP_STATE_DISTRIBUTING (1<<5) +#define LACP_STATE_DEFAULTED (1<<6) +#define LACP_STATE_EXPIRED (1<<7) + +#define LACP_PORT_NTT 0x00000001 +#define LACP_PORT_MARK 0x00000002 + +#define LACP_STATE_BITS \ + "\020" \ + "\001ACTIVITY" \ + "\002TIMEOUT" \ + "\003AGGREGATION" \ + "\004SYNC" \ + "\005COLLECTING" \ + "\006DISTRIBUTING" \ + "\007DEFAULTED" \ + "\010EXPIRED" + +/* + * IEEE802.3 slow protocols + * + * protocol (on-wire) definitions. + * + * XXX should be elsewhere. + */ + +#define SLOWPROTOCOLS_SUBTYPE_LACP 1 +#define SLOWPROTOCOLS_SUBTYPE_MARKER 2 + +struct slowprothdr { + uint8_t sph_subtype; + uint8_t sph_version; +} __packed; + +/* + * TLV on-wire structure. + */ + +struct tlvhdr { + uint8_t tlv_type; + uint8_t tlv_length; + /* uint8_t tlv_value[]; */ +} __packed; + +/* + * ... and our implementation. + */ + +#define TLV_SET(tlv, type, length) \ + do { \ + (tlv)->tlv_type = (type); \ + (tlv)->tlv_length = sizeof(*tlv) + (length); \ + } while (/*CONSTCOND*/0) + +struct tlv_template { + uint8_t tmpl_type; + uint8_t tmpl_length; +}; + +struct lacp_systemid { + uint16_t lsi_prio; + uint8_t lsi_mac[6]; +} __packed; + +struct lacp_portid { + uint16_t lpi_prio; + uint16_t lpi_portno; +} __packed; + +struct lacp_peerinfo { + struct lacp_systemid lip_systemid; + uint16_t lip_key; + struct lacp_portid lip_portid; + uint8_t lip_state; + uint8_t lip_resv[3]; +} __packed; + +struct lacp_collectorinfo { + uint16_t lci_maxdelay; + uint8_t lci_resv[12]; +} __packed; + +struct lacpdu { + struct ether_header ldu_eh; + struct slowprothdr ldu_sph; + + struct tlvhdr ldu_tlv_actor; + struct lacp_peerinfo ldu_actor; + struct tlvhdr ldu_tlv_partner; + struct lacp_peerinfo ldu_partner; + struct tlvhdr ldu_tlv_collector; + struct lacp_collectorinfo ldu_collector; + struct tlvhdr ldu_tlv_term; + uint8_t ldu_resv[50]; +} __packed; + +/* + * IEEE802.3ad marker protocol + * + * protocol (on-wire) definitions. + */ +struct lacp_markerinfo { + uint16_t mi_rq_port; + uint8_t mi_rq_system[ETHER_ADDR_LEN]; + uint32_t mi_rq_xid; + uint8_t mi_pad[2]; +} __packed; + +struct markerdu { + struct ether_header mdu_eh; + struct slowprothdr mdu_sph; + + struct tlvhdr mdu_tlv; + struct lacp_markerinfo mdu_info; + struct tlvhdr mdu_tlv_term; + uint8_t mdu_resv[90]; +} __packed; + +#define MARKER_TYPE_INFO 0x01 +#define MARKER_TYPE_RESPONSE 0x02 + +enum lacp_selected { + LACP_UNSELECTED, + LACP_STANDBY, /* not used in this implementation */ + LACP_SELECTED, +}; + +enum lacp_mux_state { + LACP_MUX_DETACHED, + LACP_MUX_WAITING, + LACP_MUX_ATTACHED, + LACP_MUX_COLLECTING, + LACP_MUX_DISTRIBUTING, +}; + +#define LACP_MAX_PORTS 32 + +struct lacp_portmap { + int pm_count; + struct lacp_port *pm_map[LACP_MAX_PORTS]; +}; + +struct lacp_port { + TAILQ_ENTRY(lacp_port) lp_dist_q; + LIST_ENTRY(lacp_port) lp_next; + struct lacp_softc *lp_lsc; + struct trunk_port *lp_trunk; + struct ifnet *lp_ifp; + struct lacp_peerinfo lp_partner; + struct lacp_peerinfo lp_actor; + struct lacp_markerinfo lp_marker; +#define lp_state lp_actor.lip_state +#define lp_key lp_actor.lip_key +#define lp_systemid lp_actor.lip_systemid + struct timeval lp_last_lacpdu; + int lp_lacpdu_sent; + enum lacp_mux_state lp_mux_state; + enum lacp_selected lp_selected; + int lp_flags; + u_int lp_media; /* XXX redundant */ + int lp_timer[LACP_NTIMER]; + struct ifmultiaddr *lp_ifma; + + struct lacp_aggregator *lp_aggregator; +}; + +struct lacp_aggregator { + TAILQ_ENTRY(lacp_aggregator) la_q; + int la_refcnt; /* num of ports which selected us */ + int la_nports; /* num of distributing ports */ + TAILQ_HEAD(, lacp_port) la_ports; /* distributing ports */ + struct lacp_peerinfo la_partner; + struct lacp_peerinfo la_actor; + int la_pending; /* number of ports in wait_while */ +}; + +struct lacp_softc { + struct trunk_softc *lsc_softc; + struct lacp_aggregator *lsc_active_aggregator; + TAILQ_HEAD(, lacp_aggregator) lsc_aggregators; + int lsc_suppress_distributing; + struct timeout lsc_transit_callout; + struct timeout lsc_callout; + LIST_HEAD(, lacp_port) lsc_ports; + struct lacp_portmap lsc_pmap[2]; + volatile u_int lsc_activemap; + u_int32_t lsc_hashkey; +}; + +#define LACP_TYPE_ACTORINFO 1 +#define LACP_TYPE_PARTNERINFO 2 +#define LACP_TYPE_COLLECTORINFO 3 + +/* timeout values (in sec) */ +#define LACP_FAST_PERIODIC_TIME (1) +#define LACP_SLOW_PERIODIC_TIME (30) +#define LACP_SHORT_TIMEOUT_TIME (3 * LACP_FAST_PERIODIC_TIME) +#define LACP_LONG_TIMEOUT_TIME (3 * LACP_SLOW_PERIODIC_TIME) +#define LACP_CHURN_DETECTION_TIME (60) +#define LACP_AGGREGATE_WAIT_TIME (2) +#define LACP_TRANSIT_DELAY 3000 /* in msec */ + +#define LACP_STATE_EQ(s1, s2, mask) \ + ((((s1) ^ (s2)) & (mask)) == 0) + +#define LACP_SYS_PRI(peer) (peer).lip_systemid.lsi_prio + +#define LACP_PORT(_lp) ((struct lacp_port *)(_lp)->tp_psc) +#define LACP_SOFTC(_sc) ((struct lacp_softc *)(_sc)->tr_psc) + +#define LACP_LOCK_INIT(_lsc) mtx_init(&(_lsc)->lsc_mtx, \ + "lacp mtx", NULL, MTX_DEF) +#define LACP_LOCK_DESTROY(_lsc) mtx_destroy(&(_lsc)->lsc_mtx) +#define LACP_LOCK(_lsc) mtx_lock(&(_lsc)->lsc_mtx) +#define LACP_UNLOCK(_lsc) mtx_unlock(&(_lsc)->lsc_mtx) +#define LACP_LOCK_ASSERT(_lsc) mtx_assert(&(_lsc)->lsc_mtx, MA_OWNED) + +struct mbuf *lacp_input(struct trunk_port *, struct mbuf *); +struct trunk_port *lacp_select_tx_port(struct trunk_softc *, struct mbuf *); +int lacp_attach(struct trunk_softc *); +int lacp_detach(struct trunk_softc *); +void lacp_init(struct trunk_softc *); +void lacp_stop(struct trunk_softc *); +int lacp_port_create(struct trunk_port *); +void lacp_port_destroy(struct trunk_port *); +void lacp_linkstate(struct trunk_port *); +void lacp_req(struct trunk_softc *, caddr_t); +void lacp_portreq(struct trunk_port *, caddr_t); + +static __inline int +lacp_isactive(struct trunk_port *lgp) +{ + struct lacp_port *lp = LACP_PORT(lgp); + struct lacp_softc *lsc = lp->lp_lsc; + struct lacp_aggregator *la = lp->lp_aggregator; + + /* This port is joined to the active aggregator */ + if (la != NULL && la == lsc->lsc_active_aggregator) + return (1); + + return (0); +} + +static __inline int +lacp_iscollecting(struct trunk_port *lgp) +{ + struct lacp_port *lp = LACP_PORT(lgp); + + return ((lp->lp_state & LACP_STATE_COLLECTING) != 0); +} + +static __inline int +lacp_isdistributing(struct trunk_port *lgp) +{ + struct lacp_port *lp = LACP_PORT(lgp); + + return ((lp->lp_state & LACP_STATE_DISTRIBUTING) != 0); +} + +/* following constants don't include terminating NUL */ +#define LACP_MACSTR_MAX (2*6 + 5) +#define LACP_SYSTEMPRIOSTR_MAX (4) +#define LACP_SYSTEMIDSTR_MAX (LACP_SYSTEMPRIOSTR_MAX + 1 + LACP_MACSTR_MAX) +#define LACP_PORTPRIOSTR_MAX (4) +#define LACP_PORTNOSTR_MAX (4) +#define LACP_PORTIDSTR_MAX (LACP_PORTPRIOSTR_MAX + 1 + LACP_PORTNOSTR_MAX) +#define LACP_KEYSTR_MAX (4) +#define LACP_PARTNERSTR_MAX \ + (1 + LACP_SYSTEMIDSTR_MAX + 1 + LACP_KEYSTR_MAX + 1 \ + + LACP_PORTIDSTR_MAX + 1) +#define LACP_LAGIDSTR_MAX \ + (1 + LACP_PARTNERSTR_MAX + 1 + LACP_PARTNERSTR_MAX + 1) +#define LACP_STATESTR_MAX (255) /* XXX */ |